diff --git a/nfv/nfv-vim/nfv_vim/objects/_kube_upgrade.py b/nfv/nfv-vim/nfv_vim/objects/_kube_upgrade.py index 61d2d3af..953222bd 100644 --- a/nfv/nfv-vim/nfv_vim/objects/_kube_upgrade.py +++ b/nfv/nfv-vim/nfv_vim/objects/_kube_upgrade.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2020-2021 Wind River Systems, Inc. +# Copyright (c) 2020-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -18,6 +18,7 @@ from nfv_vim.objects._sw_update import SW_UPDATE_TYPE from nfv_vim.objects._sw_update import SwUpdate DLOG = debug.debug_get_logger('nfv_vim.objects.kube_upgrade') +DEFAULT_KUBE_AUDIT_RATE = 5 class KubeUpgrade(SwUpdate): @@ -29,8 +30,8 @@ class KubeUpgrade(SwUpdate): sw_update_type=SW_UPDATE_TYPE.KUBE_UPGRADE, sw_update_uuid=sw_update_uuid, strategy_data=strategy_data) - # TODO(abailey): we do not appear to populate _kube_upgrade_hosts - # consider removing + # these next two values are used by the audit + self._kube_upgrade = None self._kube_upgrade_hosts = list() def strategy_build(self, @@ -140,18 +141,10 @@ class KubeUpgrade(SwUpdate): elif (self.strategy.is_apply_failed() or self.strategy.is_apply_timed_out()): - for kube_upgrade_host in self._kube_upgrade_hosts: - if not self._alarms: - self._alarms = alarm.raise_sw_update_alarm( - self.alarm_type(SW_UPDATE_ALARM_TYPES.APPLY_FAILED)) - event_log.sw_update_issue_log( - self.event_id(SW_UPDATE_EVENT_IDS.APPLY_FAILED)) - break - - else: - if self._alarms: - alarm.clear_sw_update_alarm(self._alarms) - return False + # we do not raise additional alarms + if self._alarms: + alarm.clear_sw_update_alarm(self._alarms) + return False elif self.strategy.is_aborting(): if not self._alarms: @@ -167,6 +160,48 @@ class KubeUpgrade(SwUpdate): return True + @coroutine + def nfvi_kube_upgrade_callback(self, timer_id): + """ + Audit Kube Upgrade Callback + """ + from nfv_vim import strategy + response = (yield) + + if response['completed']: + DLOG.debug("Audit-Kube-Upgrade callback, response=%s." % response) + last_state = self._kube_upgrade.state if self._kube_upgrade else None + self._kube_upgrade = response['result-data'] + current_state = self._kube_upgrade.state if self._kube_upgrade else None + if last_state != current_state: + self.handle_event(strategy.STRATEGY_EVENT.KUBE_UPGRADE_CHANGED, + self._kube_upgrade) + else: + DLOG.error("Audit-Kube-Upgrade callback, not completed, " + "response=%s." % response) + + self._nfvi_audit_inprogress = False + + @coroutine + def nfvi_kube_host_upgrade_list_callback(self, timer_id): + """ + Audit Kube Host Upgrade Callback + """ + from nfv_vim import strategy + response = (yield) + + if response['completed']: + DLOG.debug("Audit-Kube-Host-Upgrade callback, response=%s." % response) + self._kube_upgrade_hosts = response['result-data'] + # todo(abailey): this needs to detect the change + self.handle_event(strategy.STRATEGY_EVENT.KUBE_HOST_UPGRADE_CHANGED, + self._kube_upgrade) + else: + DLOG.error("Audit-Kube-Upgrade callback, not completed, " + "response=%s." % response) + + self._nfvi_audit_inprogress = False + @coroutine def nfvi_audit(self): """ @@ -184,15 +219,35 @@ class KubeUpgrade(SwUpdate): self._nfvi_audit_inprogress = True while self._nfvi_audit_inprogress: timer_id = (yield) + # nfvi_alarms_callback sets timer to 2 seconds + # leave timer at 2 seconds for the next two audit calls - # nfvi_alarms_callback sets timer to 2 seconds. reset back to 30 - timers.timers_reschedule_timer(timer_id, 30) + DLOG.debug("Audit kube upgrade, timer_id=%s." % timer_id) + nfvi.nfvi_get_kube_upgrade( + self.nfvi_kube_upgrade_callback(timer_id)) + self._nfvi_audit_inprogress = True + while self._nfvi_audit_inprogress: + timer_id = (yield) + current_state = self._kube_upgrade.state if self._kube_upgrade else None + # only audit the kube hosts when upgrading kubelets + if current_state in ["upgrading-kubelets", + "upgraded-kubelets"]: + DLOG.debug("Audit kube upgrade hosts, timer_id=%s." % timer_id) + nfvi.nfvi_get_kube_host_upgrade_list( + self.nfvi_kube_host_upgrade_list_callback(timer_id)) + + self._nfvi_audit_inprogress = True + while self._nfvi_audit_inprogress: + timer_id = (yield) + + # set timer to DEFAULT_KUBE_AUDIT_RATE + timers.timers_reschedule_timer(timer_id, DEFAULT_KUBE_AUDIT_RATE) if not self.nfvi_update(): DLOG.info("Audit no longer needed.") break - DLOG.verbose("Audit kube upgrade still running, timer_id=%s." % - timer_id) + DLOG.debug("Audit kube upgrade still running, timer_id=%s." % + timer_id) self._nfvi_timer_id = None diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_defs.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_defs.py index e146fd39..221f30dd 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_defs.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_defs.py @@ -30,11 +30,13 @@ class EventNames(object): MIGRATE_INSTANCES_FAILED = Constant('migrate-instances-failed') KUBE_HOST_CORDON_FAILED = Constant('kube-host-cordon-failed') KUBE_HOST_UNCORDON_FAILED = Constant('kube-host-uncordon-failed') + KUBE_HOST_UPGRADE_CHANGED = Constant('kube-host-upgrade-changed') KUBE_HOST_UPGRADE_CONTROL_PLANE_FAILED = \ Constant('kube-host-upgrade-control-plane-failed') KUBE_HOST_UPGRADE_KUBELET_FAILED = \ Constant('kube-host-upgrade-kubelet-failed') KUBE_ROOTCA_UPDATE_HOST_FAILED = Constant('kube-rootca-update-host-failed') + KUBE_UPGRADE_CHANGED = Constant('kube-upgrade-changed') # Constants diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py index d481d252..e1b12de1 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py @@ -3882,14 +3882,18 @@ class AbstractKubeUpgradeStep(AbstractStrategyStep): from nfv_vim import nfvi DLOG.debug("Step (%s) handle event (%s)." % (self._name, event)) - if event == STRATEGY_EVENT.HOST_AUDIT: + if event == STRATEGY_EVENT.KUBE_UPGRADE_CHANGED: + # todo(abailey): use event data rather than re-querying + self._query_inprogress = True + nfvi.nfvi_get_kube_upgrade(self._get_kube_upgrade_callback()) + return True + elif event == STRATEGY_EVENT.HOST_AUDIT: if 0 == self._wait_time: self._wait_time = timers.get_monotonic_timestamp_in_ms() - now_ms = timers.get_monotonic_timestamp_in_ms() secs_expired = (now_ms - self._wait_time) // 1000 - # Wait at least 60 seconds before checking upgrade for first time - if 60 <= secs_expired and not self._query_inprogress: + # Wait 30 seconds before checking kube upgrade for first time + if 30 <= secs_expired and not self._query_inprogress: self._query_inprogress = True nfvi.nfvi_get_kube_upgrade(self._get_kube_upgrade_callback()) return True @@ -4379,8 +4383,8 @@ class KubeHostUpgradeControlPlaneStep(AbstractKubeHostUpgradeStep): from nfv_vim import directors - DLOG.info("Step (%s) apply to hostnames (%s)." - % (self._name, self._host_names)) + DLOG.debug("Step (%s) apply to hostnames (%s)." + % (self._name, self._host_names)) host_director = directors.get_host_director() operation = \ host_director.kube_upgrade_hosts_control_plane(self._host_names, @@ -4456,14 +4460,19 @@ class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep): result, "kube host upgrade kubelet (%s) failed" % host.name) return True + elif event == STRATEGY_EVENT.KUBE_HOST_UPGRADE_CHANGED: + self._query_inprogress = True + nfvi.nfvi_get_kube_host_upgrade_list( + self._get_kube_host_upgrade_list_callback()) + return True elif event == STRATEGY_EVENT.HOST_AUDIT: if 0 == self._wait_time: self._wait_time = timers.get_monotonic_timestamp_in_ms() now_ms = timers.get_monotonic_timestamp_in_ms() secs_expired = (now_ms - self._wait_time) // 1000 - # Wait at least 60 seconds before checking upgrade for first time - if 60 <= secs_expired and not self._query_inprogress: + # Wait at least 30 seconds before checking kube hosts for first time + if 30 <= secs_expired and not self._query_inprogress: self._query_inprogress = True nfvi.nfvi_get_kube_host_upgrade_list( self._get_kube_host_upgrade_list_callback()) @@ -4475,13 +4484,12 @@ class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep): from nfv_vim import directors - DLOG.info("Step (%s) apply to hostnames (%s)." - % (self._name, self._host_names)) + DLOG.debug("Step (%s) apply to hostnames (%s)." + % (self._name, self._host_names)) host_director = directors.get_host_director() operation = \ host_director.kube_upgrade_hosts_kubelet(self._host_names, self._force) - if operation.is_inprogress(): return strategy.STRATEGY_STEP_RESULT.WAIT, "" elif operation.is_failed():