Only use required fault management endpoint

When managing a large number of subclouds on a DC System Controller,
sysinv-api freezes and the controller swacts.
This happens due to sysinv-api RequestContext service catalog having
the fm endpoints of all subclouds, which is too large for sysinv to
handle when using the ZeroMQ RPC backend.

This change filter the RequestContext service catalog to only store
the required endpoint, instead of the endpoints of all subclouds.

TEST PLAN:
PASS: AIO-SX: Install, bootstrap and host-unlock
PASS: DC: Edit the source files with this change, restart sysinv-api
      service, then install and manage a large number of subclouds
PASS: AIO-SX/DC: Perform system commands that interacts with fm
      (system health-query, for instance)

Story: 2010087
Task: 46444

Signed-off-by: Alyson Deives Pereira <alyson.deivespereira@windriver.com>
Change-Id: I0a8baad3022a64c25e188ac6f6da24548605785a
This commit is contained in:
Alyson Deives Pereira 2022-12-08 17:13:29 -03:00
parent cd8cedaef0
commit 40d5bcdf71
2 changed files with 40 additions and 14 deletions

View File

@ -12,11 +12,12 @@
# License for the specific language governing permissions and limitations
# under the License.
from sysinv.common.fm import get_fm_region
from sysinv.db import api as dbapi
from sysinv.openstack.common import context
REQUIRED_SERVICE_TYPES = ('faultmanagement',)
FAULT_MANAGEMENT = 'faultmanagement'
REQUIRED_SERVICE_TYPES = (FAULT_MANAGEMENT,)
class RequestContext(context.RequestContext):
@ -52,7 +53,17 @@ class RequestContext(context.RequestContext):
if service_catalog:
# Only include required parts of service_catalog
self.service_catalog = [s for s in service_catalog
if s.get('type') in REQUIRED_SERVICE_TYPES]
if s.get('type', '')
in REQUIRED_SERVICE_TYPES]
for service in self.service_catalog:
if service.get('type') == FAULT_MANAGEMENT:
if 'endpoints' in service:
fm_region = get_fm_region()
fm_endpoints = []
for endpoint in service['endpoints']:
if endpoint.get('region', '') == fm_region:
fm_endpoints.append(endpoint)
service['endpoints'] = fm_endpoints
else:
# if list is empty or none
self.service_catalog = []

View File

@ -10,9 +10,9 @@
from keystoneauth1.access import service_catalog as k_service_catalog
from oslo_config import cfg
from oslo_log import log
from oslo_utils import importutils
from fm_api import constants as fm_constants
from fm_api import fm_api
import fmclient as fm_client
CONF = cfg.CONF
@ -46,7 +46,7 @@ class FmCustomerLog(object):
_fm_api = None
def __init__(self):
self._fm_api = fm_api.FaultAPIs()
self._fm_api = _get_fm_api().FaultAPIs()
def customer_log(self, log_data):
LOG.info("Generating FM Customer Log %s" % log_data)
@ -60,15 +60,15 @@ class FmCustomerLog(object):
fm_event_type = log_data.get('fm_event_type', None)
fm_probable_cause = fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN
fm_uuid = None
fault = fm_api.Fault(fm_event_id,
fm_event_state,
entity_type,
entity,
fm_severity,
reason_text,
fm_event_type,
fm_probable_cause, "",
False, True)
fault = _get_fm_api().Fault(fm_event_id,
fm_event_state,
entity_type,
entity,
fm_severity,
reason_text,
fm_event_type,
fm_probable_cause, "",
False, True)
response = self._fm_api.set_fault(fault)
if response is None:
@ -103,3 +103,18 @@ def fmclient(context, version=1, endpoint=None):
return fm_client.Client(version=version,
endpoint=endpoint,
auth_token=auth_token)
def get_fm_region():
return CONF.fm.os_region_name
_FMAPI = None
def _get_fm_api():
"""Delay import of fm api for unit tests."""
global _FMAPI
if _FMAPI is None:
_FMAPI = importutils.import_module('fm_api.fm_api')
return _FMAPI