Merge "Ignore certain alarms during kube upgrade pre-check"
This commit is contained in:
commit
4af71c2e29
|
@ -14,6 +14,12 @@ from dcmanager.common import utils
|
||||||
from dcmanager.db import api as db_api
|
from dcmanager.db import api as db_api
|
||||||
from dcmanager.orchestrator.states.base import BaseState
|
from dcmanager.orchestrator.states.base import BaseState
|
||||||
|
|
||||||
|
# These following alarms can occur during a vim orchestrated k8s upgrade on the subcloud.
|
||||||
|
# By ignoring the alarms, subcloud k8s upgrade can be
|
||||||
|
# retried after a failure using DC orchestrator.
|
||||||
|
ALARM_IGNORE_LIST = ['100.003', '200.001', '700.004', '750.006',
|
||||||
|
'900.007', '900.401']
|
||||||
|
|
||||||
|
|
||||||
class KubeUpgradePreCheckState(BaseState):
|
class KubeUpgradePreCheckState(BaseState):
|
||||||
"""Perform pre check operations to determine if kube upgrade is required"""
|
"""Perform pre check operations to determine if kube upgrade is required"""
|
||||||
|
@ -46,6 +52,22 @@ class KubeUpgradePreCheckState(BaseState):
|
||||||
system_health)
|
system_health)
|
||||||
if not fails or (len(fails) == 1 and failed_alarm_check and no_mgmt_alarms):
|
if not fails or (len(fails) == 1 and failed_alarm_check and no_mgmt_alarms):
|
||||||
self.info_log(strategy_step, "Kubernetes upgrade health check passed.")
|
self.info_log(strategy_step, "Kubernetes upgrade health check passed.")
|
||||||
|
elif (len(fails) == 1 and failed_alarm_check):
|
||||||
|
alarms = self.get_fm_client(self.region_name).get_alarms()
|
||||||
|
for alarm in alarms:
|
||||||
|
if alarm.alarm_id not in ALARM_IGNORE_LIST:
|
||||||
|
if alarm.mgmt_affecting == "True":
|
||||||
|
error_desc_msg = ("Kubernetes upgrade health check failed due to alarm %s. "
|
||||||
|
"Kubernetes upgrade health: \n %s" %
|
||||||
|
(alarm.alarm_id, system_health))
|
||||||
|
db_api.subcloud_update(
|
||||||
|
self.context, strategy_step.subcloud_id,
|
||||||
|
error_description=error_desc_msg)
|
||||||
|
self.error_log(strategy_step, "\n" + system_health)
|
||||||
|
raise Exception(("Kubernetes upgrade health check failed due to alarm %s. "
|
||||||
|
"Please run 'system health-query-kube-upgrade' "
|
||||||
|
"command on the subcloud or %s on central for details." %
|
||||||
|
(alarm.alarm_id, ERROR_DESC_CMD)))
|
||||||
else:
|
else:
|
||||||
error_desc_msg = ("Kubernetes upgrade health check failed. \n %s" %
|
error_desc_msg = ("Kubernetes upgrade health check failed. \n %s" %
|
||||||
system_health)
|
system_health)
|
||||||
|
|
|
@ -14,6 +14,7 @@ from dcmanager.common.consts import STRATEGY_STATE_KUBE_UPGRADE_PRE_CHECK
|
||||||
from dcmanager.db.sqlalchemy import api as db_api
|
from dcmanager.db.sqlalchemy import api as db_api
|
||||||
|
|
||||||
from dcmanager.tests.unit.common import fake_strategy
|
from dcmanager.tests.unit.common import fake_strategy
|
||||||
|
from dcmanager.tests.unit.orchestrator.states.fakes import FakeAlarm
|
||||||
from dcmanager.tests.unit.orchestrator.states.fakes import FakeKubeUpgrade
|
from dcmanager.tests.unit.orchestrator.states.fakes import FakeKubeUpgrade
|
||||||
from dcmanager.tests.unit.orchestrator.states.fakes import FakeKubeVersion
|
from dcmanager.tests.unit.orchestrator.states.fakes import FakeKubeVersion
|
||||||
from dcmanager.tests.unit.orchestrator.states.fakes \
|
from dcmanager.tests.unit.orchestrator.states.fakes \
|
||||||
|
@ -58,6 +59,10 @@ KUBERNETES_UPGRADE_HEALTH_RESPONSE_NON_MGMT_AFFECTING_ALARM = \
|
||||||
"All kubernetes control plane pods are ready: [OK]\n" \
|
"All kubernetes control plane pods are ready: [OK]\n" \
|
||||||
"All kubernetes applications are in a valid state: [OK]"
|
"All kubernetes applications are in a valid state: [OK]"
|
||||||
|
|
||||||
|
MEMORY_THRESHOLD_ALARM = FakeAlarm('100.101', 'True')
|
||||||
|
KUBERNETES_UPGRADE_ALARM = FakeAlarm('900.007', 'True')
|
||||||
|
CEPH_ALARM = FakeAlarm('250.002', 'False')
|
||||||
|
|
||||||
|
|
||||||
class TestKubeUpgradePreCheckStage(TestKubeUpgradeState):
|
class TestKubeUpgradePreCheckStage(TestKubeUpgradeState):
|
||||||
|
|
||||||
|
@ -76,6 +81,7 @@ class TestKubeUpgradePreCheckStage(TestKubeUpgradeState):
|
||||||
self.sysinv_client.get_kube_upgrades = mock.MagicMock()
|
self.sysinv_client.get_kube_upgrades = mock.MagicMock()
|
||||||
self.sysinv_client.get_kube_upgrades.return_value = []
|
self.sysinv_client.get_kube_upgrades.return_value = []
|
||||||
|
|
||||||
|
self.fm_client.get_alarms = mock.MagicMock()
|
||||||
self.sysinv_client.get_kube_upgrade_health = mock.MagicMock()
|
self.sysinv_client.get_kube_upgrade_health = mock.MagicMock()
|
||||||
self.sysinv_client.get_kube_upgrade_health.return_value = KUBERNETES_UPGRADE_HEALTH_RESPONSE_SUCCESS
|
self.sysinv_client.get_kube_upgrade_health.return_value = KUBERNETES_UPGRADE_HEALTH_RESPONSE_SUCCESS
|
||||||
|
|
||||||
|
@ -119,6 +125,7 @@ class TestKubeUpgradePreCheckStage(TestKubeUpgradeState):
|
||||||
self.subcloud.id,
|
self.subcloud.id,
|
||||||
deploy_status=DEPLOY_STATE_DONE)
|
deploy_status=DEPLOY_STATE_DONE)
|
||||||
|
|
||||||
|
self.fm_client.get_alarms.return_value = [MEMORY_THRESHOLD_ALARM, KUBERNETES_UPGRADE_ALARM]
|
||||||
self.sysinv_client.get_kube_upgrade_health.return_value = KUBERNETES_UPGRADE_HEALTH_RESPONSE_MGMT_AFFECTING_ALARM
|
self.sysinv_client.get_kube_upgrade_health.return_value = KUBERNETES_UPGRADE_HEALTH_RESPONSE_MGMT_AFFECTING_ALARM
|
||||||
self.sysinv_client.get_kube_upgrades.return_value = [FakeKubeUpgrade()]
|
self.sysinv_client.get_kube_upgrades.return_value = [FakeKubeUpgrade()]
|
||||||
self.sysinv_client.get_kube_versions.return_value = [
|
self.sysinv_client.get_kube_versions.return_value = [
|
||||||
|
@ -129,10 +136,30 @@ class TestKubeUpgradePreCheckStage(TestKubeUpgradeState):
|
||||||
]
|
]
|
||||||
self.worker.perform_state_action(self.strategy_step)
|
self.worker.perform_state_action(self.strategy_step)
|
||||||
self.sysinv_client.get_kube_upgrade_health.assert_called_once()
|
self.sysinv_client.get_kube_upgrade_health.assert_called_once()
|
||||||
|
|
||||||
self.assert_step_updated(self.strategy_step.subcloud_id,
|
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||||
STRATEGY_STATE_FAILED)
|
STRATEGY_STATE_FAILED)
|
||||||
|
|
||||||
|
def test_pre_check_subcloud_failed_health_check_with_allowed_management_alarms(self):
|
||||||
|
"""Test pre check step where subcloud has management affecting alarms"""
|
||||||
|
|
||||||
|
db_api.subcloud_update(self.ctx,
|
||||||
|
self.subcloud.id,
|
||||||
|
deploy_status=DEPLOY_STATE_DONE)
|
||||||
|
|
||||||
|
self.fm_client.get_alarms.return_value = [CEPH_ALARM, KUBERNETES_UPGRADE_ALARM]
|
||||||
|
self.sysinv_client.get_kube_upgrade_health.return_value = KUBERNETES_UPGRADE_HEALTH_RESPONSE_MGMT_AFFECTING_ALARM
|
||||||
|
self.sysinv_client.get_kube_upgrades.return_value = [FakeKubeUpgrade()]
|
||||||
|
self.sysinv_client.get_kube_versions.return_value = [
|
||||||
|
FakeKubeVersion(obj_id=1,
|
||||||
|
version=UPGRADED_KUBE_VERSION,
|
||||||
|
target=True,
|
||||||
|
state='active'),
|
||||||
|
]
|
||||||
|
self.worker.perform_state_action(self.strategy_step)
|
||||||
|
self.sysinv_client.get_kube_upgrade_health.assert_called_once()
|
||||||
|
self.assert_step_updated(self.strategy_step.subcloud_id,
|
||||||
|
STRATEGY_STATE_KUBE_CREATING_VIM_KUBE_UPGRADE_STRATEGY)
|
||||||
|
|
||||||
def test_pre_check_subcloud_failed_health_check_with_non_management_alarms(self):
|
def test_pre_check_subcloud_failed_health_check_with_non_management_alarms(self):
|
||||||
"""Test pre check step where subcloud has non-management affecting alarms"""
|
"""Test pre check step where subcloud has non-management affecting alarms"""
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue