From 04d6b31d95ccaa5db600719cd1d55eff1b261185 Mon Sep 17 00:00:00 2001 From: Yuxing Jiang Date: Fri, 5 May 2023 14:22:59 -0400 Subject: [PATCH] Add System Config Update orch alarms and events System config update alarms are 900.6xx series The new alarms are originated by a new type of vim strategy orchestrating configuration update. The new alarms are similar in numbering and wording as the kube upgrade auto apply 900.4xx series alarms and logs. System config update in-progress alarm is 900.010. System config update aborted alarm is 900.011. Story: 2010719 Task: 47947 Change-Id: Ieb6e68adf359ac7b0489d15bb33cb4b4a9f3ef3f Signed-off-by: Yuxing Jiang --- fm-api/source/fm_api/constants.py | 23 ++++ fm-doc/fm_doc/events.yaml | 190 ++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) diff --git a/fm-api/source/fm_api/constants.py b/fm-api/source/fm_api/constants.py index 284fcd2e..16694584 100755 --- a/fm-api/source/fm_api/constants.py +++ b/fm-api/source/fm_api/constants.py @@ -305,6 +305,14 @@ FM_ALARM_ID_KUBE_ROOTCA_UPDATE_IN_PROGRESS = ALARM_GROUP_SW_MGMT + ".008" # Kubernetes RootCA Update abort alarm id FM_ALARM_ID_KUBE_ROOTCA_UPDATE_ABORTED = ALARM_GROUP_SW_MGMT + ".009" +# The SYSTEM_CONFIG_UPDATE alarms are originated by vim strategy which is the +# same as the other sw-mgmt alarms, put them in the same group +# System Config Update alarm id +FM_ALARM_ID_SYSTEM_CONFIG_UPDATE_IN_PROGRESS = ALARM_GROUP_SW_MGMT + ".010" + +# System Config Update abort alarm id +FM_ALARM_ID_SYSTEM_CONFIG_UPDATE_ABORTED = ALARM_GROUP_SW_MGMT + ".011" + # Security log id FM_LOG_ID_INVALID_PASSWORD = ALARM_GROUP_SECURITY + ".001" FM_LOG_ID_USER_LOCKOUT = ALARM_GROUP_SECURITY + ".002" @@ -400,6 +408,21 @@ FM_LOG_ID_KUBE_ROOTCA_UPDATE_AUTO_APPLY_ABORT_REJECTED = ALARM_GROUP_SW_MGMT + " FM_LOG_ID_KUBE_ROOTCA_UPDATE_AUTO_APPLY_ABORT_FAILED = ALARM_GROUP_SW_MGMT + ".520" FM_LOG_ID_KUBE_ROOTCA_UPDATE_AUTO_APPLY_ABORTED = ALARM_GROUP_SW_MGMT + ".521" +FM_ALARM_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_INPROGRESS = ALARM_GROUP_SW_MGMT + ".601" +FM_ALARM_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_ABORTING = ALARM_GROUP_SW_MGMT + ".602" +FM_ALARM_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_FAILED = ALARM_GROUP_SW_MGMT + ".603" + +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_START = ALARM_GROUP_SW_MGMT + ".611" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_INPROGRESS = ALARM_GROUP_SW_MGMT + ".612" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_REJECTED = ALARM_GROUP_SW_MGMT + ".613" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_CANCELLED = ALARM_GROUP_SW_MGMT + ".614" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_FAILED = ALARM_GROUP_SW_MGMT + ".615" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_COMPLETED = ALARM_GROUP_SW_MGMT + ".616" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_ABORT = ALARM_GROUP_SW_MGMT + ".617" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_ABORTING = ALARM_GROUP_SW_MGMT + ".618" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_ABORT_REJECTED = ALARM_GROUP_SW_MGMT + ".619" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_ABORT_FAILED = ALARM_GROUP_SW_MGMT + ".620" +FM_LOG_ID_SYSTEM_CONFIG_UPDATE_AUTO_APPLY_ABORTED = ALARM_GROUP_SW_MGMT + ".621" FM_ALARM_STATE_SET = 'set' FM_ALARM_STATE_CLEAR = 'clear' diff --git a/fm-doc/fm_doc/events.yaml b/fm-doc/fm_doc/events.yaml index e48141e0..a35ffd34 100755 --- a/fm-doc/fm_doc/events.yaml +++ b/fm-doc/fm_doc/events.yaml @@ -3534,6 +3534,38 @@ Degrade_Affecting_Severity: none Context: starlingx +900.010: + Type: Alarm + Description: System Config update in progress + Entity_Instance_ID: host=controller + Severity: minor + Proposed_Repair_Action: Wait for system config update to complete + Maintenance_Action: + Inhibit_Alarms: + Alarm_Type: operational-violation + Probable_Cause: unspecified-reason + Service_Affecting: False + Suppression: False + Management_Affecting_Severity: warning + Degrade_Affecting_Severity: none + Context: starlingx + +900.011: + Type: Alarm + Description: System Config update aborted, configurations may not be fully updated + Entity_Instance_ID: host= + Severity: minor + Proposed_Repair_Action: Lock the host, wait for the host resource in the deployment namespace to become in-sync, then unlock the host + Maintenance_Action: + Inhibit_Alarms: + Alarm_Type: operational-violation + Probable_Cause: unspecified-reason + Service_Affecting: False + Suppression: False + Management_Affecting_Severity: warning + Degrade_Affecting_Severity: none + Context: starlingx + 900.101: Type: Alarm Description: Software patch auto-apply in progress @@ -4323,4 +4355,162 @@ Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx + +900.601: + Type: Alarm + Description: System config update auto-apply in progress + Entity_Instance_ID: orchestration=system-config-update + Severity: major + Proposed_Repair_Action: Wait for system config update auto-apply to complete; if problem persists contact next level of support + Maintenance_Action: + Inhibit_Alarms: + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: True + Suppression: True + Management_Affecting_Severity: warning + Degrade_Affecting_Severity: none + Context: starlingx + +900.602: + Type: Alarm + Description: System config update auto-apply aborting + Entity_Instance_ID: orchestration=system-config-update + Severity: major + Proposed_Repair_Action: Wait for system config update auto-apply abort to complete; if problem persists contact next level of support + Maintenance_Action: + Inhibit_Alarms: + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: True + Suppression: True + Management_Affecting_Severity: warning + Degrade_Affecting_Severity: none + Context: starlingx + +900.603: + Type: Alarm + Description: System config update auto-apply failed. Command "sw-manager kube-upgrade-strategy apply" failed + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Proposed_Repair_Action: Attempt to apply system config update manually; if problem persists contact next level of support + Maintenance_Action: + Inhibit_Alarms: + Alarm_Type: equipment + Probable_Cause: underlying-resource-unavailable + Service_Affecting: True + Suppression: True + Management_Affecting_Severity: warning + Degrade_Affecting_Severity: none + Context: starlingx + +900.611: + Type: Log + Description: System config update auto-apply start + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.612: + Type: Log + Description: System config update auto-apply in progress + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.613: + Type: Log + Description: System config update auto-apply rejected + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.614: + Type: Log + Description: System config update auto-apply cancelled + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.615: + Type: Log + Description: System config update auto-apply failed + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.616: + Type: Log + Description: System config update auto-apply completed + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.617: + Type: Log + Description: System config update auto-apply abort + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.618: + Type: Log + Description: System config update auto-apply aborting + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.619: + Type: Log + Description: System config update auto-apply abort rejected + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.620: + Type: Log + Description: System config update auto-apply abort failed + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx + +900.621: + Type: Log + Description: System config update auto-apply aborted + Entity_Instance_ID: orchestration=system-config-update + Severity: critical + Alarm_Type: equipment + Probable_Cause: unspecified-reason + Service_Affecting: False + Context: starlingx ...