246 lines
10 KiB
Python
Executable File
246 lines
10 KiB
Python
Executable File
#
|
|
# Copyright (c) 2015-2016 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
from fm_api import constants as fm_constants
|
|
from fm_api import fm_api
|
|
|
|
from nfv_common import debug
|
|
|
|
import nfv_common.alarm.handlers.v1 as alarm_handlers_v1
|
|
import nfv_common.alarm.objects.v1 as alarm_objects_v1
|
|
|
|
from nfv_plugins.alarm_handlers import config
|
|
|
|
DLOG = debug.debug_get_logger('nfv_plugins.alarm_handlers.fm')
|
|
|
|
_fm_alarm_id_mapping = dict([
|
|
(alarm_objects_v1.ALARM_TYPE.MULTI_NODE_RECOVERY_MODE,
|
|
fm_constants.FM_ALARM_ID_VM_MULTI_NODE_RECOVERY_MODE),
|
|
(alarm_objects_v1.ALARM_TYPE.HOST_SERVICES_FAILED,
|
|
fm_constants.FM_ALARM_ID_HOST_SERVICES_FAILED),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_FAILED,
|
|
fm_constants.FM_ALARM_ID_VM_FAILED),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_SCHEDULING_FAILED,
|
|
fm_constants.FM_ALARM_ID_VM_FAILED),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_PAUSED,
|
|
fm_constants.FM_ALARM_ID_VM_PAUSED),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_SUSPENDED,
|
|
fm_constants.FM_ALARM_ID_VM_SUSPENDED),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_STOPPED,
|
|
fm_constants.FM_ALARM_ID_VM_STOPPED),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_REBOOTING,
|
|
fm_constants.FM_ALARM_ID_VM_REBOOTING),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_REBUILDING,
|
|
fm_constants.FM_ALARM_ID_VM_REBUILDING),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_EVACUATING,
|
|
fm_constants.FM_ALARM_ID_VM_EVACUATING),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_LIVE_MIGRATING,
|
|
fm_constants.FM_ALARM_ID_VM_LIVE_MIGRATING),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_COLD_MIGRATING,
|
|
fm_constants.FM_ALARM_ID_VM_COLD_MIGRATING),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_COLD_MIGRATED,
|
|
fm_constants.FM_ALARM_ID_VM_COLD_MIGRATED),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_COLD_MIGRATE_REVERTING,
|
|
fm_constants.FM_ALARM_ID_VM_COLD_MIGRATE_REVERTING),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_RESIZING,
|
|
fm_constants.FM_ALARM_ID_VM_RESIZING),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_RESIZED,
|
|
fm_constants.FM_ALARM_ID_VM_RESIZED),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_RESIZE_REVERTING,
|
|
fm_constants.FM_ALARM_ID_VM_RESIZE_REVERTING),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_GUEST_HEARTBEAT,
|
|
fm_constants.FM_ALARM_ID_VM_GUEST_HEARTBEAT),
|
|
(alarm_objects_v1.ALARM_TYPE.INSTANCE_GROUP_POLICY_CONFLICT,
|
|
fm_constants.FM_ALARM_ID_VM_GROUP_POLICY_CONFLICT),
|
|
(alarm_objects_v1.ALARM_TYPE.SW_PATCH_AUTO_APPLY_INPROGRESS,
|
|
fm_constants.FM_ALARM_ID_SW_PATCH_AUTO_APPLY_INPROGRESS),
|
|
(alarm_objects_v1.ALARM_TYPE.SW_PATCH_AUTO_APPLY_ABORTING,
|
|
fm_constants.FM_ALARM_ID_SW_PATCH_AUTO_APPLY_ABORTING),
|
|
(alarm_objects_v1.ALARM_TYPE.SW_PATCH_AUTO_APPLY_FAILED,
|
|
fm_constants.FM_ALARM_ID_SW_PATCH_AUTO_APPLY_FAILED),
|
|
(alarm_objects_v1.ALARM_TYPE.SW_UPGRADE_AUTO_APPLY_INPROGRESS,
|
|
fm_constants.FM_ALARM_ID_SW_UPGRADE_AUTO_APPLY_INPROGRESS),
|
|
(alarm_objects_v1.ALARM_TYPE.SW_UPGRADE_AUTO_APPLY_ABORTING,
|
|
fm_constants.FM_ALARM_ID_SW_UPGRADE_AUTO_APPLY_ABORTING),
|
|
(alarm_objects_v1.ALARM_TYPE.SW_UPGRADE_AUTO_APPLY_FAILED,
|
|
fm_constants.FM_ALARM_ID_SW_UPGRADE_AUTO_APPLY_FAILED),
|
|
])
|
|
|
|
_fm_alarm_type_mapping = dict([
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.COMMUNICATIONS_ALARM,
|
|
fm_constants.FM_ALARM_TYPE_1),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.QUALITY_OF_SERVICE_ALARM,
|
|
fm_constants.FM_ALARM_TYPE_2),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.PROCESSING_ERROR_ALARM,
|
|
fm_constants.FM_ALARM_TYPE_3),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.EQUIPMENT_ALARM,
|
|
fm_constants.FM_ALARM_TYPE_4),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.ENVIRONMENTAL_ALARM,
|
|
fm_constants.FM_ALARM_TYPE_5),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.INTEGRITY_VIOLATION,
|
|
fm_constants.FM_ALARM_TYPE_6),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.OPERATIONAL_VIOLATION,
|
|
fm_constants.FM_ALARM_TYPE_7),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.PHYSICAL_VIOLATION,
|
|
fm_constants.FM_ALARM_TYPE_8),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.SECURITY_SERVICE_VIOLATION,
|
|
fm_constants.FM_ALARM_TYPE_9),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.MECHANISM_VIOLATION,
|
|
fm_constants.FM_ALARM_TYPE_9),
|
|
(alarm_objects_v1.ALARM_EVENT_TYPE.TIME_DOMAIN_VIOLATION,
|
|
fm_constants.FM_ALARM_TYPE_10)
|
|
])
|
|
|
|
_fm_alarm_probable_cause = dict([
|
|
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.UNKNOWN,
|
|
fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN),
|
|
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.SOFTWARE_ERROR,
|
|
fm_constants.ALARM_PROBABLE_CAUSE_45),
|
|
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.SOFTWARE_PROGRAM_ERROR,
|
|
fm_constants.ALARM_PROBABLE_CAUSE_47),
|
|
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.UNDERLYING_RESOURCE_UNAVAILABLE,
|
|
fm_constants.ALARM_PROBABLE_CAUSE_55),
|
|
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.PROCEDURAL_ERROR,
|
|
fm_constants.ALARM_PROBABLE_CAUSE_64)
|
|
])
|
|
|
|
_fm_alarm_severity_mapping = dict([
|
|
(alarm_objects_v1.ALARM_SEVERITY.CLEARED,
|
|
fm_constants.FM_ALARM_SEVERITY_CLEAR),
|
|
(alarm_objects_v1.ALARM_SEVERITY.WARNING,
|
|
fm_constants.FM_ALARM_SEVERITY_WARNING),
|
|
(alarm_objects_v1.ALARM_SEVERITY.MINOR,
|
|
fm_constants.FM_ALARM_SEVERITY_MINOR),
|
|
(alarm_objects_v1.ALARM_SEVERITY.MAJOR,
|
|
fm_constants.FM_ALARM_SEVERITY_MAJOR),
|
|
(alarm_objects_v1.ALARM_SEVERITY.CRITICAL,
|
|
fm_constants.FM_ALARM_SEVERITY_CRITICAL)
|
|
])
|
|
|
|
|
|
class FaultManagement(alarm_handlers_v1.AlarmHandler):
|
|
"""
|
|
Fault Management Alarm Handler
|
|
"""
|
|
_name = 'Fault-Management'
|
|
_version = '1.0.0'
|
|
_provider = 'Wind River'
|
|
_signature = 'e33d7cf6-f270-4256-893e-16266ee4dd2e'
|
|
|
|
_alarm_db = dict()
|
|
_fm_api = None
|
|
|
|
@property
|
|
def name(self):
|
|
return self._name
|
|
|
|
@property
|
|
def version(self):
|
|
return self._version
|
|
|
|
@property
|
|
def provider(self):
|
|
return self._provider
|
|
|
|
@property
|
|
def signature(self):
|
|
return self._signature
|
|
|
|
def raise_alarm(self, alarm_uuid, alarm_data):
|
|
DLOG.debug("Raising alarm, uuid=%s." % alarm_uuid)
|
|
|
|
fm_alarm_id = _fm_alarm_id_mapping.get(alarm_data.alarm_type, None)
|
|
if fm_alarm_id is not None:
|
|
fm_alarm_type = _fm_alarm_type_mapping[alarm_data.event_type]
|
|
fm_severity = _fm_alarm_severity_mapping[alarm_data.perceived_severity]
|
|
fm_probable_cause = _fm_alarm_probable_cause[alarm_data.probable_cause]
|
|
fm_uuid = None
|
|
|
|
fault = fm_api.Fault(fm_alarm_id, fm_constants.FM_ALARM_STATE_SET,
|
|
alarm_data.entity_type, alarm_data.entity,
|
|
fm_severity, alarm_data.specific_problem_text,
|
|
fm_alarm_type, fm_probable_cause,
|
|
alarm_data.proposed_repair_action,
|
|
alarm_data.service_affecting,
|
|
alarm_data.suppression_allowed,
|
|
fm_uuid,
|
|
timestamp=alarm_data.raised_timestamp)
|
|
|
|
response = self._fm_api.set_fault(fault)
|
|
if response is None:
|
|
self._alarm_db[alarm_uuid] = (alarm_data, None)
|
|
DLOG.error("Failed to raise alarm, uuid=%s, fm_uuid=%s."
|
|
% (alarm_uuid, fm_uuid))
|
|
else:
|
|
fm_uuid = response
|
|
self._alarm_db[alarm_uuid] = (alarm_data, fm_uuid)
|
|
DLOG.info("Raised alarm, uuid=%s, fm_uuid=%s."
|
|
% (alarm_uuid, fm_uuid))
|
|
else:
|
|
DLOG.error("Unknown alarm type (%s) given." % alarm_data.alarm_type)
|
|
|
|
def clear_alarm(self, alarm_uuid):
|
|
DLOG.debug("Clearing alarm, uuid=%s." % alarm_uuid)
|
|
|
|
alarm_data, fm_uuid = self._alarm_db.get(alarm_uuid, (None, None))
|
|
if alarm_data is not None:
|
|
fm_alarm_id = _fm_alarm_id_mapping[alarm_data.alarm_type]
|
|
success = self._fm_api.clear_fault(fm_alarm_id, alarm_data.entity)
|
|
if success:
|
|
DLOG.info("Cleared alarm, uuid=%s." % alarm_uuid)
|
|
else:
|
|
DLOG.error("Failed to clear alarm, uuid=%s." % alarm_uuid)
|
|
# Always remove the alarm from our alarm db. If we failed to clear
|
|
# the alarm, the audit will clear it later.
|
|
del self._alarm_db[alarm_uuid]
|
|
|
|
def audit_alarms(self):
|
|
DLOG.debug("Auditing alarms.")
|
|
|
|
for alarm_type in alarm_objects_v1.ALARM_TYPE:
|
|
fm_alarm_id = _fm_alarm_id_mapping.get(alarm_type, None)
|
|
if fm_alarm_id is None:
|
|
continue
|
|
|
|
fm_faults = self._fm_api.get_faults_by_id(fm_alarm_id)
|
|
if not fm_faults:
|
|
continue
|
|
|
|
# Check for missing alarms needing to be raised
|
|
for alarm_uuid, (alarm_data, fm_uuid) in self._alarm_db.items():
|
|
if alarm_type == alarm_data.alarm_type:
|
|
if fm_uuid is None:
|
|
self.raise_alarm(alarm_uuid, alarm_data)
|
|
else:
|
|
for fm_fault in fm_faults:
|
|
if fm_uuid == fm_fault.uuid:
|
|
break
|
|
else:
|
|
DLOG.info("Re-raise of alarm, uuid=%s."
|
|
% alarm_uuid)
|
|
self.raise_alarm(alarm_uuid, alarm_data)
|
|
|
|
# Check for stale alarms needing to be cleared
|
|
for fm_fault in fm_faults:
|
|
for alarm_uuid, (alarm_data, fm_uuid) in self._alarm_db.items():
|
|
if fm_uuid == fm_fault.uuid:
|
|
break
|
|
else:
|
|
DLOG.info("Clear stale alarm, fm_uuid=%s, fm_alarm_id=%s, "
|
|
"fm_entity_instance_id=%s."
|
|
% (fm_fault.uuid, fm_fault.alarm_id,
|
|
fm_fault.entity_instance_id))
|
|
|
|
self._fm_api.clear_fault(fm_fault.alarm_id,
|
|
fm_fault.entity_instance_id)
|
|
DLOG.debug("Audited alarms.")
|
|
|
|
def initialize(self, config_file):
|
|
config.load(config_file)
|
|
self._fm_api = fm_api.FaultAPIs()
|
|
|
|
def finalize(self):
|
|
return
|