nfv/nfv/nfv-plugins/nfv_plugins/alarm_handlers/fm.py

246 lines
10 KiB
Python
Executable File

#
# Copyright (c) 2015-2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from fm_api import constants as fm_constants
from fm_api import fm_api
from nfv_common import debug
import nfv_common.alarm.handlers.v1 as alarm_handlers_v1
import nfv_common.alarm.objects.v1 as alarm_objects_v1
from nfv_plugins.alarm_handlers import config
DLOG = debug.debug_get_logger('nfv_plugins.alarm_handlers.fm')
_fm_alarm_id_mapping = dict([
(alarm_objects_v1.ALARM_TYPE.MULTI_NODE_RECOVERY_MODE,
fm_constants.FM_ALARM_ID_VM_MULTI_NODE_RECOVERY_MODE),
(alarm_objects_v1.ALARM_TYPE.HOST_SERVICES_FAILED,
fm_constants.FM_ALARM_ID_HOST_SERVICES_FAILED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_FAILED,
fm_constants.FM_ALARM_ID_VM_FAILED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_SCHEDULING_FAILED,
fm_constants.FM_ALARM_ID_VM_FAILED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_PAUSED,
fm_constants.FM_ALARM_ID_VM_PAUSED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_SUSPENDED,
fm_constants.FM_ALARM_ID_VM_SUSPENDED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_STOPPED,
fm_constants.FM_ALARM_ID_VM_STOPPED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_REBOOTING,
fm_constants.FM_ALARM_ID_VM_REBOOTING),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_REBUILDING,
fm_constants.FM_ALARM_ID_VM_REBUILDING),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_EVACUATING,
fm_constants.FM_ALARM_ID_VM_EVACUATING),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_LIVE_MIGRATING,
fm_constants.FM_ALARM_ID_VM_LIVE_MIGRATING),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_COLD_MIGRATING,
fm_constants.FM_ALARM_ID_VM_COLD_MIGRATING),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_COLD_MIGRATED,
fm_constants.FM_ALARM_ID_VM_COLD_MIGRATED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_COLD_MIGRATE_REVERTING,
fm_constants.FM_ALARM_ID_VM_COLD_MIGRATE_REVERTING),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_RESIZING,
fm_constants.FM_ALARM_ID_VM_RESIZING),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_RESIZED,
fm_constants.FM_ALARM_ID_VM_RESIZED),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_RESIZE_REVERTING,
fm_constants.FM_ALARM_ID_VM_RESIZE_REVERTING),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_GUEST_HEARTBEAT,
fm_constants.FM_ALARM_ID_VM_GUEST_HEARTBEAT),
(alarm_objects_v1.ALARM_TYPE.INSTANCE_GROUP_POLICY_CONFLICT,
fm_constants.FM_ALARM_ID_VM_GROUP_POLICY_CONFLICT),
(alarm_objects_v1.ALARM_TYPE.SW_PATCH_AUTO_APPLY_INPROGRESS,
fm_constants.FM_ALARM_ID_SW_PATCH_AUTO_APPLY_INPROGRESS),
(alarm_objects_v1.ALARM_TYPE.SW_PATCH_AUTO_APPLY_ABORTING,
fm_constants.FM_ALARM_ID_SW_PATCH_AUTO_APPLY_ABORTING),
(alarm_objects_v1.ALARM_TYPE.SW_PATCH_AUTO_APPLY_FAILED,
fm_constants.FM_ALARM_ID_SW_PATCH_AUTO_APPLY_FAILED),
(alarm_objects_v1.ALARM_TYPE.SW_UPGRADE_AUTO_APPLY_INPROGRESS,
fm_constants.FM_ALARM_ID_SW_UPGRADE_AUTO_APPLY_INPROGRESS),
(alarm_objects_v1.ALARM_TYPE.SW_UPGRADE_AUTO_APPLY_ABORTING,
fm_constants.FM_ALARM_ID_SW_UPGRADE_AUTO_APPLY_ABORTING),
(alarm_objects_v1.ALARM_TYPE.SW_UPGRADE_AUTO_APPLY_FAILED,
fm_constants.FM_ALARM_ID_SW_UPGRADE_AUTO_APPLY_FAILED),
])
_fm_alarm_type_mapping = dict([
(alarm_objects_v1.ALARM_EVENT_TYPE.COMMUNICATIONS_ALARM,
fm_constants.FM_ALARM_TYPE_1),
(alarm_objects_v1.ALARM_EVENT_TYPE.QUALITY_OF_SERVICE_ALARM,
fm_constants.FM_ALARM_TYPE_2),
(alarm_objects_v1.ALARM_EVENT_TYPE.PROCESSING_ERROR_ALARM,
fm_constants.FM_ALARM_TYPE_3),
(alarm_objects_v1.ALARM_EVENT_TYPE.EQUIPMENT_ALARM,
fm_constants.FM_ALARM_TYPE_4),
(alarm_objects_v1.ALARM_EVENT_TYPE.ENVIRONMENTAL_ALARM,
fm_constants.FM_ALARM_TYPE_5),
(alarm_objects_v1.ALARM_EVENT_TYPE.INTEGRITY_VIOLATION,
fm_constants.FM_ALARM_TYPE_6),
(alarm_objects_v1.ALARM_EVENT_TYPE.OPERATIONAL_VIOLATION,
fm_constants.FM_ALARM_TYPE_7),
(alarm_objects_v1.ALARM_EVENT_TYPE.PHYSICAL_VIOLATION,
fm_constants.FM_ALARM_TYPE_8),
(alarm_objects_v1.ALARM_EVENT_TYPE.SECURITY_SERVICE_VIOLATION,
fm_constants.FM_ALARM_TYPE_9),
(alarm_objects_v1.ALARM_EVENT_TYPE.MECHANISM_VIOLATION,
fm_constants.FM_ALARM_TYPE_9),
(alarm_objects_v1.ALARM_EVENT_TYPE.TIME_DOMAIN_VIOLATION,
fm_constants.FM_ALARM_TYPE_10)
])
_fm_alarm_probable_cause = dict([
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.UNKNOWN,
fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN),
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.SOFTWARE_ERROR,
fm_constants.ALARM_PROBABLE_CAUSE_45),
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.SOFTWARE_PROGRAM_ERROR,
fm_constants.ALARM_PROBABLE_CAUSE_47),
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.UNDERLYING_RESOURCE_UNAVAILABLE,
fm_constants.ALARM_PROBABLE_CAUSE_55),
(alarm_objects_v1.ALARM_PROBABLE_CAUSE.PROCEDURAL_ERROR,
fm_constants.ALARM_PROBABLE_CAUSE_64)
])
_fm_alarm_severity_mapping = dict([
(alarm_objects_v1.ALARM_SEVERITY.CLEARED,
fm_constants.FM_ALARM_SEVERITY_CLEAR),
(alarm_objects_v1.ALARM_SEVERITY.WARNING,
fm_constants.FM_ALARM_SEVERITY_WARNING),
(alarm_objects_v1.ALARM_SEVERITY.MINOR,
fm_constants.FM_ALARM_SEVERITY_MINOR),
(alarm_objects_v1.ALARM_SEVERITY.MAJOR,
fm_constants.FM_ALARM_SEVERITY_MAJOR),
(alarm_objects_v1.ALARM_SEVERITY.CRITICAL,
fm_constants.FM_ALARM_SEVERITY_CRITICAL)
])
class FaultManagement(alarm_handlers_v1.AlarmHandler):
"""
Fault Management Alarm Handler
"""
_name = 'Fault-Management'
_version = '1.0.0'
_provider = 'Wind River'
_signature = 'e33d7cf6-f270-4256-893e-16266ee4dd2e'
_alarm_db = dict()
_fm_api = None
@property
def name(self):
return self._name
@property
def version(self):
return self._version
@property
def provider(self):
return self._provider
@property
def signature(self):
return self._signature
def raise_alarm(self, alarm_uuid, alarm_data):
DLOG.debug("Raising alarm, uuid=%s." % alarm_uuid)
fm_alarm_id = _fm_alarm_id_mapping.get(alarm_data.alarm_type, None)
if fm_alarm_id is not None:
fm_alarm_type = _fm_alarm_type_mapping[alarm_data.event_type]
fm_severity = _fm_alarm_severity_mapping[alarm_data.perceived_severity]
fm_probable_cause = _fm_alarm_probable_cause[alarm_data.probable_cause]
fm_uuid = None
fault = fm_api.Fault(fm_alarm_id, fm_constants.FM_ALARM_STATE_SET,
alarm_data.entity_type, alarm_data.entity,
fm_severity, alarm_data.specific_problem_text,
fm_alarm_type, fm_probable_cause,
alarm_data.proposed_repair_action,
alarm_data.service_affecting,
alarm_data.suppression_allowed,
fm_uuid,
timestamp=alarm_data.raised_timestamp)
response = self._fm_api.set_fault(fault)
if response is None:
self._alarm_db[alarm_uuid] = (alarm_data, None)
DLOG.error("Failed to raise alarm, uuid=%s, fm_uuid=%s."
% (alarm_uuid, fm_uuid))
else:
fm_uuid = response
self._alarm_db[alarm_uuid] = (alarm_data, fm_uuid)
DLOG.info("Raised alarm, uuid=%s, fm_uuid=%s."
% (alarm_uuid, fm_uuid))
else:
DLOG.error("Unknown alarm type (%s) given." % alarm_data.alarm_type)
def clear_alarm(self, alarm_uuid):
DLOG.debug("Clearing alarm, uuid=%s." % alarm_uuid)
alarm_data, fm_uuid = self._alarm_db.get(alarm_uuid, (None, None))
if alarm_data is not None:
fm_alarm_id = _fm_alarm_id_mapping[alarm_data.alarm_type]
success = self._fm_api.clear_fault(fm_alarm_id, alarm_data.entity)
if success:
DLOG.info("Cleared alarm, uuid=%s." % alarm_uuid)
else:
DLOG.error("Failed to clear alarm, uuid=%s." % alarm_uuid)
# Always remove the alarm from our alarm db. If we failed to clear
# the alarm, the audit will clear it later.
del self._alarm_db[alarm_uuid]
def audit_alarms(self):
DLOG.debug("Auditing alarms.")
for alarm_type in alarm_objects_v1.ALARM_TYPE:
fm_alarm_id = _fm_alarm_id_mapping.get(alarm_type, None)
if fm_alarm_id is None:
continue
fm_faults = self._fm_api.get_faults_by_id(fm_alarm_id)
if not fm_faults:
continue
# Check for missing alarms needing to be raised
for alarm_uuid, (alarm_data, fm_uuid) in self._alarm_db.items():
if alarm_type == alarm_data.alarm_type:
if fm_uuid is None:
self.raise_alarm(alarm_uuid, alarm_data)
else:
for fm_fault in fm_faults:
if fm_uuid == fm_fault.uuid:
break
else:
DLOG.info("Re-raise of alarm, uuid=%s."
% alarm_uuid)
self.raise_alarm(alarm_uuid, alarm_data)
# Check for stale alarms needing to be cleared
for fm_fault in fm_faults:
for alarm_uuid, (alarm_data, fm_uuid) in self._alarm_db.items():
if fm_uuid == fm_fault.uuid:
break
else:
DLOG.info("Clear stale alarm, fm_uuid=%s, fm_alarm_id=%s, "
"fm_entity_instance_id=%s."
% (fm_fault.uuid, fm_fault.alarm_id,
fm_fault.entity_instance_id))
self._fm_api.clear_fault(fm_fault.alarm_id,
fm_fault.entity_instance_id)
DLOG.debug("Audited alarms.")
def initialize(self, config_file):
config.load(config_file)
self._fm_api = fm_api.FaultAPIs()
def finalize(self):
return