Merge "Host compute service failure alarm removal"

This commit is contained in:
Zuul 2023-02-11 13:54:13 +00:00 committed by Gerrit Code Review
commit 319d78f0dc
5 changed files with 4 additions and 179 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -24,8 +24,6 @@ DLOG = debug.debug_get_logger('nfv_plugins.alarm_handlers.fm')
_fm_alarm_id_mapping = dict([
(alarm_objects_v1.ALARM_TYPE.MULTI_NODE_RECOVERY_MODE,
fm_constants.FM_ALARM_ID_VM_MULTI_NODE_RECOVERY_MODE),
(alarm_objects_v1.ALARM_TYPE.HOST_SERVICES_FAILED,
fm_constants.FM_ALARM_ID_HOST_SERVICES_FAILED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_FAILED,
fm_constants.FM_ALARM_ID_VM_FAILED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_SCHEDULING_FAILED,

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -32,8 +32,6 @@ _fm_event_id_mapping = dict([
fm_constants.FM_LOG_ID_HOST_SERVICES_ENABLED),
(event_log_objects_v1.EVENT_ID.HOST_SERVICES_DISABLED,
fm_constants.FM_LOG_ID_HOST_SERVICES_DISABLED),
(event_log_objects_v1.EVENT_ID.HOST_SERVICES_FAILED,
fm_constants.FM_LOG_ID_HOST_SERVICES_FAILED),
(event_log_objects_v1.EVENT_ID.HYPERVISOR_STATE_CHANGE,
fm_constants.FM_LOG_ID_HYPERVISOR_STATE_CHANGE),
(event_log_objects_v1.EVENT_ID.INSTANCE_RENAMED,

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2016 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -7,8 +7,6 @@ from nfv_common.alarm import * # noqa: F401,F403
from nfv_vim.alarm._general import clear_general_alarm # noqa: F401
from nfv_vim.alarm._general import raise_general_alarm # noqa: F401
from nfv_vim.alarm._host import host_clear_alarm # noqa: F401
from nfv_vim.alarm._host import host_raise_alarm # noqa: F401
from nfv_vim.alarm._instance import instance_clear_alarm # noqa: F401
from nfv_vim.alarm._instance import instance_manage_alarms # noqa: F401
from nfv_vim.alarm._instance import instance_raise_alarm # noqa: F401

View File

@ -1,134 +0,0 @@
#
# Copyright (c) 2015-2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import uuid
from nfv_common import alarm
# Alarm Template Definitions
# *** Don't add a period to the end of reason_text, these are not sentences.
_alarm_templates = {
alarm.ALARM_TYPE.HOST_SERVICES_FAILED: {
'entity_type': "host.services",
'entity': "host=%(host_name)s.services=compute",
'event_type': alarm.ALARM_EVENT_TYPE.PROCESSING_ERROR_ALARM,
'severity': alarm.ALARM_SEVERITY.CRITICAL,
'probable_cause': alarm.ALARM_PROBABLE_CAUSE.UNKNOWN,
'reason_text': "Host %(host_name)s compute services failure"
"%(additional_text)s",
'repair_action': "Wait for host services recovery to complete; if problem "
"persists contact next level of support",
'exclude_alarm_context': [alarm.ALARM_CONTEXT.TENANT],
},
}
def _alarm_template_get(alarm_type, alarm_context):
"""
Returns the alarm template associated with the given context
"""
if alarm_type not in _alarm_templates:
return None
alarm_template = _alarm_templates[alarm_type]
if alarm_context in alarm_template['exclude_alarm_context']:
return None
template = dict()
template['entity_type'] = alarm_template['entity_type']
template['entity'] = alarm_template['entity']
template['event_type'] = alarm_template['event_type']
template['severity'] = alarm_template['severity']
template['probable_cause'] = alarm_template['probable_cause']
template['reason_text'] = alarm_template['reason_text']
template['repair_action'] = alarm_template['repair_action']
alarm_template_context_data = alarm_template.get('alarm_context_data', None)
if alarm_template_context_data is not None:
if alarm_context in alarm_template_context_data:
template_context = alarm_template_context_data[alarm_context]
if 'entity_type' in template_context:
template['entity_type'] = template_context['entity_type']
if 'entity' in template_context:
template['entity'] = template_context['entity']
if 'event_type' in template_context:
template['event_type'] = template_context['event_type']
if 'severity' in template_context:
template['severity'] = template_context['severity']
if 'probable_cause' in template_context:
template['probable_cause'] = template_context['probable_cause']
if 'reason_text' in template_context:
template['reason_text'] = template_context['reason_text']
if 'repair_action' in template_context:
template['repair_action'] = template_context['repair_action']
return template
def _alarm_raise(alarm_type, alarm_context, template, data):
"""
Raises an alarm given the alarm template and data
"""
alarm_uuid = uuid.uuid4()
alarm_data = alarm.AlarmData(alarm_uuid, alarm_type, alarm_context,
template['entity_type'],
template['entity'] % data,
template['event_type'],
template['probable_cause'],
template['severity'],
alarm.ALARM_TREND_INDICATION.NO_CHANGE,
template['reason_text'] % data,
template['repair_action'])
alarm.alarm_raise(alarm_uuid, alarm_data)
return alarm_data
def host_raise_alarm(host, alarm_type, additional_text=None, alarm_context=None):
"""
Raise alarms against the host
"""
data = dict()
data['host_name'] = host.name
data['additional_text'] = additional_text
alarm_list = list()
# For now, override alarm context to be the admin only
alarm_context = alarm.ALARM_CONTEXT.ADMIN
if alarm_context is None:
for alarm_context in alarm.ALARM_CONTEXT:
template = _alarm_template_get(alarm_type, alarm_context)
if template is not None:
alarm_data = _alarm_raise(alarm_type, alarm_context, template,
data)
alarm_list.append(alarm_data)
else:
template = _alarm_template_get(alarm_type, alarm_context)
if template is not None:
alarm_data = _alarm_raise(alarm_type, alarm_context, template,
data)
alarm_list.append(alarm_data)
return alarm_list
def host_clear_alarm(alarm_list):
"""
Clear alarms against the instance
"""
for alarm_data in alarm_list:
alarm.alarm_clear(alarm_data.alarm_uuid)

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2018 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -14,7 +14,6 @@ from nfv_common.helpers import Singleton
from nfv_vim.objects._object import ObjectData
from nfv_vim import alarm
from nfv_vim import event_log
from nfv_vim import host_fsm
from nfv_vim import nfvi
@ -124,7 +123,6 @@ class Host(ObjectData):
HOST_SERVICE_STATE.ENABLED if self.is_enabled() else \
HOST_SERVICE_STATE.DISABLED
self._alarms = list()
self._events = list()
@property
@ -730,7 +728,6 @@ class Host(ObjectData):
"""
NFVI Host Delete
"""
alarm.host_clear_alarm(self._alarms)
self._fsm.handle_event(host_fsm.HOST_EVENT.DELETE)
def periodic_timer(self):
@ -774,43 +771,11 @@ class Host(ObjectData):
if HOST_SERVICE_STATE.ENABLED == host_service_state:
self._events = event_log.host_issue_log(
self, event_log.EVENT_ID.HOST_SERVICES_ENABLED)
alarm.host_clear_alarm(self._alarms)
self._alarms[:] = list()
elif HOST_SERVICE_STATE.DISABLED == host_service_state:
# Always log the disabled compute service
self._events = event_log.host_issue_log(
self, event_log.EVENT_ID.HOST_SERVICES_DISABLED)
# Clear any previous alarms for this host
alarm.host_clear_alarm(self._alarms)
self._alarms[:] = list()
# Alarm the disabled compute service if the host is still
# enabled and is not being locked. Alarm it as a failure.
if self.nfvi_host_is_enabled():
if reason is None:
additional_text = ''
else:
additional_text = ", %s" % reason
self._alarms = alarm.host_raise_alarm(
self, alarm.ALARM_TYPE.HOST_SERVICES_FAILED,
additional_text=additional_text)
elif HOST_SERVICE_STATE.FAILED == host_service_state:
if reason is None:
additional_text = ''
else:
additional_text = ", %s" % reason
self._events = event_log.host_issue_log(
self, event_log.EVENT_ID.HOST_SERVICES_FAILED,
additional_text=additional_text)
# Clear any previous alarms for this host
alarm.host_clear_alarm(self._alarms)
self._alarms[:] = list()
# Alarm the failed compute service
self._alarms = alarm.host_raise_alarm(
self, alarm.ALARM_TYPE.HOST_SERVICES_FAILED,
additional_text=additional_text)
def nfvi_host_upgrade_status(self, upgrade_inprogress, recover_instances):
"""