From df3fffe42656df80f28ec3b9c01b4474cfc07a92 Mon Sep 17 00:00:00 2001 From: rakshith mr Date: Mon, 29 Jan 2024 08:20:34 -0500 Subject: [PATCH] New alarm 850.002 - used when k8s cluster is unreachable Configuring new alarm 850.002 which will be used when K8s periodic audit happens and any of the endpoint health check fails. Test Plan: PASS: Performed tox test locally and packages were built successfully PASS: Verify that k8s orchestrated upgrade will get blocked, if it is tried while alarm is set. And k8s orchestrated upgrade will complete, if it is tried when alarm is cleared. Story: 2011037 Task: 49535 Change-Id: I335179ea98ef63d7c35c89d82328a52ab2391f5c Signed-off-by: rakshith mr --- fm-api/source/fm_api/constants.py | 3 ++- fm-doc/fm_doc/events.yaml | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/fm-api/source/fm_api/constants.py b/fm-api/source/fm_api/constants.py index a890215e..54370124 100755 --- a/fm-api/source/fm_api/constants.py +++ b/fm-api/source/fm_api/constants.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2013-2023 Wind River Systems, Inc. +# Copyright (c) 2013-2024 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -114,6 +114,7 @@ FM_ALARM_ID_STORAGE_BACKEND_FAILED = ALARM_GROUP_STORAGE + ".104" # Kubernetes Resource Alarms FM_ALARM_ID_K8S_RESOURCE_PV = ALARM_GROUP_K8S + ".001" +FM_ALARM_ID_K8S_CLUSTER_DOWN = ALARM_GROUP_K8S + ".002" # Deployment Alarm id diff --git a/fm-doc/fm_doc/events.yaml b/fm-doc/fm_doc/events.yaml index 47e39cde..57b5d513 100755 --- a/fm-doc/fm_doc/events.yaml +++ b/fm-doc/fm_doc/events.yaml @@ -1,7 +1,7 @@ --- # -# Copyright (c) 2013-2023 Wind River Systems, Inc. +# Copyright (c) 2013-2024 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -3431,6 +3431,23 @@ Degrade_Affecting_Severity: none Context: none +850.002: + Type: Alarm + Description: Kubernetes cluster unreachable + Entity_Instance_ID: kubernetes=k8s-health-check-failed + Severity: major + Proposed_Repair_Action: "If problem persists + contact next level of support." + Maintenance_Action: + Inhibit_Alarms: + Alarm_Type: communication + Probable_Cause: out-of-service + Service_Affecting: True + Suppression: False + Management_Affecting_Severity: major + Degrade_Affecting_Severity: none + Context: none + #--------------------------------------------------------------------------- # SOFTWARE #---------------------------------------------------------------------------