Host compute service failure alarm removal

Removal of stale alarm 270.001(Host compute service failure)
is raised by the vim. This might be an old reference to nova.
It’s likely not in use since stx.

Test Plan:
PASS: Verify with a load without the changes (removal of alarm)
and the event log in platform.log shows an entry for 270.001 alarm.
PASS: Verify with a load with changes of alarm removal and
the event log in platform.log does not show an entry for 270.001 alarm.

Depends-On: https://review.opendev.org/c/starlingx/fault/+/872603

Closes-Bug: 2004744

Change-Id: Icafb079fc2b58fb4126ac325804901ebd3f8f66e
Signed-off-by: Vanathi.Selvaraju <vanathi.selvaraju@windriver.com>
This commit is contained in:
Vanathi.Selvaraju 2023-02-02 18:34:15 -05:00
parent 0df5aaaf5d
commit 65bbbe1f0d
5 changed files with 4 additions and 179 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -24,8 +24,6 @@ DLOG = debug.debug_get_logger('nfv_plugins.alarm_handlers.fm')
_fm_alarm_id_mapping = dict([
(alarm_objects_v1.ALARM_TYPE.MULTI_NODE_RECOVERY_MODE,
fm_constants.FM_ALARM_ID_VM_MULTI_NODE_RECOVERY_MODE),
(alarm_objects_v1.ALARM_TYPE.HOST_SERVICES_FAILED,
fm_constants.FM_ALARM_ID_HOST_SERVICES_FAILED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_FAILED,
fm_constants.FM_ALARM_ID_VM_FAILED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_SCHEDULING_FAILED,

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -32,8 +32,6 @@ _fm_event_id_mapping = dict([
fm_constants.FM_LOG_ID_HOST_SERVICES_ENABLED),
(event_log_objects_v1.EVENT_ID.HOST_SERVICES_DISABLED,
fm_constants.FM_LOG_ID_HOST_SERVICES_DISABLED),
(event_log_objects_v1.EVENT_ID.HOST_SERVICES_FAILED,
fm_constants.FM_LOG_ID_HOST_SERVICES_FAILED),
(event_log_objects_v1.EVENT_ID.HYPERVISOR_STATE_CHANGE,
fm_constants.FM_LOG_ID_HYPERVISOR_STATE_CHANGE),
(event_log_objects_v1.EVENT_ID.INSTANCE_RENAMED,

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2016 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -7,8 +7,6 @@ from nfv_common.alarm import * # noqa: F401,F403
from nfv_vim.alarm._general import clear_general_alarm # noqa: F401
from nfv_vim.alarm._general import raise_general_alarm # noqa: F401
from nfv_vim.alarm._host import host_clear_alarm # noqa: F401
from nfv_vim.alarm._host import host_raise_alarm # noqa: F401
from nfv_vim.alarm._instance import instance_clear_alarm # noqa: F401
from nfv_vim.alarm._instance import instance_manage_alarms # noqa: F401
from nfv_vim.alarm._instance import instance_raise_alarm # noqa: F401

View File

@ -1,134 +0,0 @@
#
# Copyright (c) 2015-2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import uuid
from nfv_common import alarm
# Alarm Template Definitions
# *** Don't add a period to the end of reason_text, these are not sentences.
_alarm_templates = {
alarm.ALARM_TYPE.HOST_SERVICES_FAILED: {
'entity_type': "host.services",
'entity': "host=%(host_name)s.services=compute",
'event_type': alarm.ALARM_EVENT_TYPE.PROCESSING_ERROR_ALARM,
'severity': alarm.ALARM_SEVERITY.CRITICAL,
'probable_cause': alarm.ALARM_PROBABLE_CAUSE.UNKNOWN,
'reason_text': "Host %(host_name)s compute services failure"
"%(additional_text)s",
'repair_action': "Wait for host services recovery to complete; if problem "
"persists contact next level of support",
'exclude_alarm_context': [alarm.ALARM_CONTEXT.TENANT],
},
}
def _alarm_template_get(alarm_type, alarm_context):
"""
Returns the alarm template associated with the given context
"""
if alarm_type not in _alarm_templates:
return None
alarm_template = _alarm_templates[alarm_type]
if alarm_context in alarm_template['exclude_alarm_context']:
return None
template = dict()
template['entity_type'] = alarm_template['entity_type']
template['entity'] = alarm_template['entity']
template['event_type'] = alarm_template['event_type']
template['severity'] = alarm_template['severity']
template['probable_cause'] = alarm_template['probable_cause']
template['reason_text'] = alarm_template['reason_text']
template['repair_action'] = alarm_template['repair_action']
alarm_template_context_data = alarm_template.get('alarm_context_data', None)
if alarm_template_context_data is not None:
if alarm_context in alarm_template_context_data:
template_context = alarm_template_context_data[alarm_context]
if 'entity_type' in template_context:
template['entity_type'] = template_context['entity_type']
if 'entity' in template_context:
template['entity'] = template_context['entity']
if 'event_type' in template_context:
template['event_type'] = template_context['event_type']
if 'severity' in template_context:
template['severity'] = template_context['severity']
if 'probable_cause' in template_context:
template['probable_cause'] = template_context['probable_cause']
if 'reason_text' in template_context:
template['reason_text'] = template_context['reason_text']
if 'repair_action' in template_context:
template['repair_action'] = template_context['repair_action']
return template
def _alarm_raise(alarm_type, alarm_context, template, data):
"""
Raises an alarm given the alarm template and data
"""
alarm_uuid = uuid.uuid4()
alarm_data = alarm.AlarmData(alarm_uuid, alarm_type, alarm_context,
template['entity_type'],
template['entity'] % data,
template['event_type'],
template['probable_cause'],
template['severity'],
alarm.ALARM_TREND_INDICATION.NO_CHANGE,
template['reason_text'] % data,
template['repair_action'])
alarm.alarm_raise(alarm_uuid, alarm_data)
return alarm_data
def host_raise_alarm(host, alarm_type, additional_text=None, alarm_context=None):
"""
Raise alarms against the host
"""
data = dict()
data['host_name'] = host.name
data['additional_text'] = additional_text
alarm_list = list()
# For now, override alarm context to be the admin only
alarm_context = alarm.ALARM_CONTEXT.ADMIN
if alarm_context is None:
for alarm_context in alarm.ALARM_CONTEXT:
template = _alarm_template_get(alarm_type, alarm_context)
if template is not None:
alarm_data = _alarm_raise(alarm_type, alarm_context, template,
data)
alarm_list.append(alarm_data)
else:
template = _alarm_template_get(alarm_type, alarm_context)
if template is not None:
alarm_data = _alarm_raise(alarm_type, alarm_context, template,
data)
alarm_list.append(alarm_data)
return alarm_list
def host_clear_alarm(alarm_list):
"""
Clear alarms against the instance
"""
for alarm_data in alarm_list:
alarm.alarm_clear(alarm_data.alarm_uuid)

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2018 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -14,7 +14,6 @@ from nfv_common.helpers import Singleton
from nfv_vim.objects._object import ObjectData
from nfv_vim import alarm
from nfv_vim import event_log
from nfv_vim import host_fsm
from nfv_vim import nfvi
@ -124,7 +123,6 @@ class Host(ObjectData):
HOST_SERVICE_STATE.ENABLED if self.is_enabled() else \
HOST_SERVICE_STATE.DISABLED
self._alarms = list()
self._events = list()
@property
@ -730,7 +728,6 @@ class Host(ObjectData):
"""
NFVI Host Delete
"""
alarm.host_clear_alarm(self._alarms)
self._fsm.handle_event(host_fsm.HOST_EVENT.DELETE)
def periodic_timer(self):
@ -774,43 +771,11 @@ class Host(ObjectData):
if HOST_SERVICE_STATE.ENABLED == host_service_state:
self._events = event_log.host_issue_log(
self, event_log.EVENT_ID.HOST_SERVICES_ENABLED)
alarm.host_clear_alarm(self._alarms)
self._alarms[:] = list()
elif HOST_SERVICE_STATE.DISABLED == host_service_state:
# Always log the disabled compute service
self._events = event_log.host_issue_log(
self, event_log.EVENT_ID.HOST_SERVICES_DISABLED)
# Clear any previous alarms for this host
alarm.host_clear_alarm(self._alarms)
self._alarms[:] = list()
# Alarm the disabled compute service if the host is still
# enabled and is not being locked. Alarm it as a failure.
if self.nfvi_host_is_enabled():
if reason is None:
additional_text = ''
else:
additional_text = ", %s" % reason
self._alarms = alarm.host_raise_alarm(
self, alarm.ALARM_TYPE.HOST_SERVICES_FAILED,
additional_text=additional_text)
elif HOST_SERVICE_STATE.FAILED == host_service_state:
if reason is None:
additional_text = ''
else:
additional_text = ", %s" % reason
self._events = event_log.host_issue_log(
self, event_log.EVENT_ID.HOST_SERVICES_FAILED,
additional_text=additional_text)
# Clear any previous alarms for this host
alarm.host_clear_alarm(self._alarms)
self._alarms[:] = list()
# Alarm the failed compute service
self._alarms = alarm.host_raise_alarm(
self, alarm.ALARM_TYPE.HOST_SERVICES_FAILED,
additional_text=additional_text)
def nfvi_host_upgrade_status(self, upgrade_inprogress, recover_instances):
"""