Add 800.002 alarm for K8S cluster unavailable

This adds 800.002 alarm to indicate K8S cluster unavailable due to unhealthy/failed kube-apiserver. Test Plan: PASS: Kube api server was interrupted/stopped by changing configuration files and alarm was raised. PASS: Alarm was cleared when configurations were reset and kube api server was restarted. Change-Id: I335179ea98ef63d7c35c89d82328a52ab2391f5c Signed-off-by: rakshith mr <rakshith.mr@windriver.com>
2024-01-29 08:20:34 -05:00 · 2024-01-29 08:20:34 -05:00 · bfd1f29714
parent 1684514c79
commit bfd1f29714
2 changed files with 20 additions and 0 deletions
--- a/fm-api/source/fm_api/constants.py
+++ b/fm-api/source/fm_api/constants.py
@ -309,6 +309,9 @@ FM_ALARM_ID_KUBE_ROOTCA_UPDATE_IN_PROGRESS = ALARM_GROUP_SW_MGMT + ".008"
 # Kubernetes RootCA Update abort alarm id
 FM_ALARM_ID_KUBE_ROOTCA_UPDATE_ABORTED = ALARM_GROUP_SW_MGMT + ".009"

+# Kubernetes cluster Down alarm id
+FM_ALARM_ID_KUBE_CLUSTER_DOWN = ALARM_GROUP_K8S + ".002"
+
 # The SYSTEM_CONFIG_UPDATE alarms are originated by vim strategy which is the
 # same as the other sw-mgmt alarms, put them in the same group
 # System Config Update alarm id
--- a/fm-doc/fm_doc/events.yaml
+++ b/fm-doc/fm_doc/events.yaml
@ -3431,6 +3431,23 @@
    Degrade_Affecting_Severity: none
    Context: none

+850.002:
+    Type: Alarm
+    Description: K8s cluster unreachable
+    Entity_Instance_ID: kubernetes=k8s-cluster
+    Severity: major
+    Proposed_Repair_Action: "If problem persists
+                             contact next level of support."
+    Maintenance_Action:
+    Inhibit_Alarms:
+    Alarm_Type: communication
+    Probable_Cause: out-of-service
+    Service_Affecting: True
+    Suppression: False
+    Management_Affecting_Severity: none
+    Degrade_Affecting_Severity: none
+    Context: none
+
 #---------------------------------------------------------------------------
 #   SOFTWARE
 #---------------------------------------------------------------------------