Merge "Add alarm id, definition for system peer and subcloud peer group"

This commit is contained in:
Zuul 2023-10-13 22:12:47 +00:00 committed by Gerrit Code Review
commit 1fb9ffdc54
2 changed files with 40 additions and 0 deletions

View File

@ -40,6 +40,8 @@ FM_ENTITY_TYPE_IMAGE_CONVERSION = 'fs_name'
FM_ENTITY_TYPE_SUBCLOUD = 'subcloud'
FM_ENTITY_TYPE_APPLICATION = 'k8s_application'
FM_ENTITY_TYPE_CERTIFICATE = 'certificate'
FM_ENTITY_TYPE_SYSTEM_PEER = 'system_peer'
FM_ENTITY_TYPE_SUBCLOUD_PEER_GROUP = 'subcloud_peer_group'
# alarm service sub entity values
FM_SERVICE_NETWORKING = 'networking'
@ -130,6 +132,8 @@ FM_LOG_ID_HYPERVISOR_STATE_CHANGE = ALARM_GROUP_HYPERVISOR + ".001"
FM_ALARM_ID_DC_SUBCLOUD_OFFLINE = ALARM_GROUP_DISTRIBUTED_CLOUD + ".001"
FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC = ALARM_GROUP_DISTRIBUTED_CLOUD + ".002"
FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED = ALARM_GROUP_DISTRIBUTED_CLOUD + ".003"
FM_ALARM_ID_DC_SYSTEM_PEER_HEARTBEAT_FAILED = ALARM_GROUP_DISTRIBUTED_CLOUD + ".004"
FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED = ALARM_GROUP_DISTRIBUTED_CLOUD + ".005"
# HA alarm id
FM_ALARM_ID_HA_SERVICE_GROUP_STATE = ALARM_GROUP_HA + ".001"

View File

@ -1113,6 +1113,42 @@
Degrade_Affecting_Severity: none
Context: none
280.004:
Type: Alarm
Description: |-
Critical: Peer <peer_uuid> is in disconnected state. The following subcloud peer groups are impacted: <peer-groups>.
Major: Peer <peer_uuid> connections in disconnected state.
Entity_Instance_ID: |-
peer=<peer_uuid>
Severity: [critical, major]
Proposed_Repair_Action: "Check the connectivity between the current system and the reported peer site. If the peer system is down, migrate the affected peer group(s) to the current system for continued subcloud management."
Maintenance_Action:
Inhibit_Alarms:
Alarm_Type: communication
Probable_Cause: unknown
Service_Affecting: False
Suppression: True
Management_Affecting_Severity: none
Degrade_Affecting_Severity: none
Context: starlingx
280.005:
Type: Alarm
Description: |-
Subcloud peer group <peer_group_name> is managed by remote system <peer_uuid> with a lower priority.
Entity_Instance_ID: peer_group=<peer_group_name>,peer=<peer_uuid>
Severity: [major]
Proposed_Repair_Action: "Check the reported peer group state. Migrate it back to the current system if the state is 'rehomed' and the current system is stable. Otherwise, wait until these conditions are met."
Maintenance_Action:
Inhibit_Alarms: False
Alarm_Type: other
Probable_Cause: unknown
Service_Affecting: False
Suppression: True
Management_Affecting_Severity: none
Degrade_Affecting_Severity: none
Context: starlingx
#---------------------------------------------------------------------------
# NETWORK
#---------------------------------------------------------------------------