Audit kube upgrade changes more frequently
Kube upgrade orchestrator used built-in host-audit events (emitted at thirty second interval) to determine when a kube-upgrade or kube-host-upgrade query should be invoked. This meant that kube-upgrade steps would typically take at least two of those intervals to detect a relatively quick transition. Now the kube-upgrade audit will be responsible for the kube upgrade and kube host upgrade queries, and will run at its own interval (5 seconds) to allow the steps to more rapidly detect completion. AIO-SX kube upgrade is two to three minutes faster. Test Plan: PASS: AIO-SX kube upgrade Story: 2010565 Task: 48173 Signed-off-by: Al Bailey <al.bailey@windriver.com> Change-Id: Ib4878322d0846b8df935f643352b028ff11fc184
This commit is contained in:
parent
c908b9625d
commit
f1f7fe3292
|
@ -1,5 +1,5 @@
|
||||||
#
|
#
|
||||||
# Copyright (c) 2020-2021 Wind River Systems, Inc.
|
# Copyright (c) 2020-2023 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
|
@ -18,6 +18,7 @@ from nfv_vim.objects._sw_update import SW_UPDATE_TYPE
|
||||||
from nfv_vim.objects._sw_update import SwUpdate
|
from nfv_vim.objects._sw_update import SwUpdate
|
||||||
|
|
||||||
DLOG = debug.debug_get_logger('nfv_vim.objects.kube_upgrade')
|
DLOG = debug.debug_get_logger('nfv_vim.objects.kube_upgrade')
|
||||||
|
DEFAULT_KUBE_AUDIT_RATE = 5
|
||||||
|
|
||||||
|
|
||||||
class KubeUpgrade(SwUpdate):
|
class KubeUpgrade(SwUpdate):
|
||||||
|
@ -29,8 +30,8 @@ class KubeUpgrade(SwUpdate):
|
||||||
sw_update_type=SW_UPDATE_TYPE.KUBE_UPGRADE,
|
sw_update_type=SW_UPDATE_TYPE.KUBE_UPGRADE,
|
||||||
sw_update_uuid=sw_update_uuid,
|
sw_update_uuid=sw_update_uuid,
|
||||||
strategy_data=strategy_data)
|
strategy_data=strategy_data)
|
||||||
# TODO(abailey): we do not appear to populate _kube_upgrade_hosts
|
# these next two values are used by the audit
|
||||||
# consider removing
|
self._kube_upgrade = None
|
||||||
self._kube_upgrade_hosts = list()
|
self._kube_upgrade_hosts = list()
|
||||||
|
|
||||||
def strategy_build(self,
|
def strategy_build(self,
|
||||||
|
@ -140,18 +141,10 @@ class KubeUpgrade(SwUpdate):
|
||||||
|
|
||||||
elif (self.strategy.is_apply_failed() or
|
elif (self.strategy.is_apply_failed() or
|
||||||
self.strategy.is_apply_timed_out()):
|
self.strategy.is_apply_timed_out()):
|
||||||
for kube_upgrade_host in self._kube_upgrade_hosts:
|
# we do not raise additional alarms
|
||||||
if not self._alarms:
|
if self._alarms:
|
||||||
self._alarms = alarm.raise_sw_update_alarm(
|
alarm.clear_sw_update_alarm(self._alarms)
|
||||||
self.alarm_type(SW_UPDATE_ALARM_TYPES.APPLY_FAILED))
|
return False
|
||||||
event_log.sw_update_issue_log(
|
|
||||||
self.event_id(SW_UPDATE_EVENT_IDS.APPLY_FAILED))
|
|
||||||
break
|
|
||||||
|
|
||||||
else:
|
|
||||||
if self._alarms:
|
|
||||||
alarm.clear_sw_update_alarm(self._alarms)
|
|
||||||
return False
|
|
||||||
|
|
||||||
elif self.strategy.is_aborting():
|
elif self.strategy.is_aborting():
|
||||||
if not self._alarms:
|
if not self._alarms:
|
||||||
|
@ -167,6 +160,48 @@ class KubeUpgrade(SwUpdate):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@coroutine
|
||||||
|
def nfvi_kube_upgrade_callback(self, timer_id):
|
||||||
|
"""
|
||||||
|
Audit Kube Upgrade Callback
|
||||||
|
"""
|
||||||
|
from nfv_vim import strategy
|
||||||
|
response = (yield)
|
||||||
|
|
||||||
|
if response['completed']:
|
||||||
|
DLOG.debug("Audit-Kube-Upgrade callback, response=%s." % response)
|
||||||
|
last_state = self._kube_upgrade.state if self._kube_upgrade else None
|
||||||
|
self._kube_upgrade = response['result-data']
|
||||||
|
current_state = self._kube_upgrade.state if self._kube_upgrade else None
|
||||||
|
if last_state != current_state:
|
||||||
|
self.handle_event(strategy.STRATEGY_EVENT.KUBE_UPGRADE_CHANGED,
|
||||||
|
self._kube_upgrade)
|
||||||
|
else:
|
||||||
|
DLOG.error("Audit-Kube-Upgrade callback, not completed, "
|
||||||
|
"response=%s." % response)
|
||||||
|
|
||||||
|
self._nfvi_audit_inprogress = False
|
||||||
|
|
||||||
|
@coroutine
|
||||||
|
def nfvi_kube_host_upgrade_list_callback(self, timer_id):
|
||||||
|
"""
|
||||||
|
Audit Kube Host Upgrade Callback
|
||||||
|
"""
|
||||||
|
from nfv_vim import strategy
|
||||||
|
response = (yield)
|
||||||
|
|
||||||
|
if response['completed']:
|
||||||
|
DLOG.debug("Audit-Kube-Host-Upgrade callback, response=%s." % response)
|
||||||
|
self._kube_upgrade_hosts = response['result-data']
|
||||||
|
# todo(abailey): this needs to detect the change
|
||||||
|
self.handle_event(strategy.STRATEGY_EVENT.KUBE_HOST_UPGRADE_CHANGED,
|
||||||
|
self._kube_upgrade)
|
||||||
|
else:
|
||||||
|
DLOG.error("Audit-Kube-Upgrade callback, not completed, "
|
||||||
|
"response=%s." % response)
|
||||||
|
|
||||||
|
self._nfvi_audit_inprogress = False
|
||||||
|
|
||||||
@coroutine
|
@coroutine
|
||||||
def nfvi_audit(self):
|
def nfvi_audit(self):
|
||||||
"""
|
"""
|
||||||
|
@ -184,15 +219,35 @@ class KubeUpgrade(SwUpdate):
|
||||||
self._nfvi_audit_inprogress = True
|
self._nfvi_audit_inprogress = True
|
||||||
while self._nfvi_audit_inprogress:
|
while self._nfvi_audit_inprogress:
|
||||||
timer_id = (yield)
|
timer_id = (yield)
|
||||||
|
# nfvi_alarms_callback sets timer to 2 seconds
|
||||||
|
# leave timer at 2 seconds for the next two audit calls
|
||||||
|
|
||||||
# nfvi_alarms_callback sets timer to 2 seconds. reset back to 30
|
DLOG.debug("Audit kube upgrade, timer_id=%s." % timer_id)
|
||||||
timers.timers_reschedule_timer(timer_id, 30)
|
nfvi.nfvi_get_kube_upgrade(
|
||||||
|
self.nfvi_kube_upgrade_callback(timer_id))
|
||||||
|
self._nfvi_audit_inprogress = True
|
||||||
|
while self._nfvi_audit_inprogress:
|
||||||
|
timer_id = (yield)
|
||||||
|
|
||||||
|
current_state = self._kube_upgrade.state if self._kube_upgrade else None
|
||||||
|
# only audit the kube hosts when upgrading kubelets
|
||||||
|
if current_state in ["upgrading-kubelets",
|
||||||
|
"upgraded-kubelets"]:
|
||||||
|
DLOG.debug("Audit kube upgrade hosts, timer_id=%s." % timer_id)
|
||||||
|
nfvi.nfvi_get_kube_host_upgrade_list(
|
||||||
|
self.nfvi_kube_host_upgrade_list_callback(timer_id))
|
||||||
|
|
||||||
|
self._nfvi_audit_inprogress = True
|
||||||
|
while self._nfvi_audit_inprogress:
|
||||||
|
timer_id = (yield)
|
||||||
|
|
||||||
|
# set timer to DEFAULT_KUBE_AUDIT_RATE
|
||||||
|
timers.timers_reschedule_timer(timer_id, DEFAULT_KUBE_AUDIT_RATE)
|
||||||
if not self.nfvi_update():
|
if not self.nfvi_update():
|
||||||
DLOG.info("Audit no longer needed.")
|
DLOG.info("Audit no longer needed.")
|
||||||
break
|
break
|
||||||
|
|
||||||
DLOG.verbose("Audit kube upgrade still running, timer_id=%s." %
|
DLOG.debug("Audit kube upgrade still running, timer_id=%s." %
|
||||||
timer_id)
|
timer_id)
|
||||||
|
|
||||||
self._nfvi_timer_id = None
|
self._nfvi_timer_id = None
|
||||||
|
|
|
@ -30,11 +30,13 @@ class EventNames(object):
|
||||||
MIGRATE_INSTANCES_FAILED = Constant('migrate-instances-failed')
|
MIGRATE_INSTANCES_FAILED = Constant('migrate-instances-failed')
|
||||||
KUBE_HOST_CORDON_FAILED = Constant('kube-host-cordon-failed')
|
KUBE_HOST_CORDON_FAILED = Constant('kube-host-cordon-failed')
|
||||||
KUBE_HOST_UNCORDON_FAILED = Constant('kube-host-uncordon-failed')
|
KUBE_HOST_UNCORDON_FAILED = Constant('kube-host-uncordon-failed')
|
||||||
|
KUBE_HOST_UPGRADE_CHANGED = Constant('kube-host-upgrade-changed')
|
||||||
KUBE_HOST_UPGRADE_CONTROL_PLANE_FAILED = \
|
KUBE_HOST_UPGRADE_CONTROL_PLANE_FAILED = \
|
||||||
Constant('kube-host-upgrade-control-plane-failed')
|
Constant('kube-host-upgrade-control-plane-failed')
|
||||||
KUBE_HOST_UPGRADE_KUBELET_FAILED = \
|
KUBE_HOST_UPGRADE_KUBELET_FAILED = \
|
||||||
Constant('kube-host-upgrade-kubelet-failed')
|
Constant('kube-host-upgrade-kubelet-failed')
|
||||||
KUBE_ROOTCA_UPDATE_HOST_FAILED = Constant('kube-rootca-update-host-failed')
|
KUBE_ROOTCA_UPDATE_HOST_FAILED = Constant('kube-rootca-update-host-failed')
|
||||||
|
KUBE_UPGRADE_CHANGED = Constant('kube-upgrade-changed')
|
||||||
|
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
|
|
|
@ -3882,14 +3882,18 @@ class AbstractKubeUpgradeStep(AbstractStrategyStep):
|
||||||
from nfv_vim import nfvi
|
from nfv_vim import nfvi
|
||||||
|
|
||||||
DLOG.debug("Step (%s) handle event (%s)." % (self._name, event))
|
DLOG.debug("Step (%s) handle event (%s)." % (self._name, event))
|
||||||
if event == STRATEGY_EVENT.HOST_AUDIT:
|
if event == STRATEGY_EVENT.KUBE_UPGRADE_CHANGED:
|
||||||
|
# todo(abailey): use event data rather than re-querying
|
||||||
|
self._query_inprogress = True
|
||||||
|
nfvi.nfvi_get_kube_upgrade(self._get_kube_upgrade_callback())
|
||||||
|
return True
|
||||||
|
elif event == STRATEGY_EVENT.HOST_AUDIT:
|
||||||
if 0 == self._wait_time:
|
if 0 == self._wait_time:
|
||||||
self._wait_time = timers.get_monotonic_timestamp_in_ms()
|
self._wait_time = timers.get_monotonic_timestamp_in_ms()
|
||||||
|
|
||||||
now_ms = timers.get_monotonic_timestamp_in_ms()
|
now_ms = timers.get_monotonic_timestamp_in_ms()
|
||||||
secs_expired = (now_ms - self._wait_time) // 1000
|
secs_expired = (now_ms - self._wait_time) // 1000
|
||||||
# Wait at least 60 seconds before checking upgrade for first time
|
# Wait 30 seconds before checking kube upgrade for first time
|
||||||
if 60 <= secs_expired and not self._query_inprogress:
|
if 30 <= secs_expired and not self._query_inprogress:
|
||||||
self._query_inprogress = True
|
self._query_inprogress = True
|
||||||
nfvi.nfvi_get_kube_upgrade(self._get_kube_upgrade_callback())
|
nfvi.nfvi_get_kube_upgrade(self._get_kube_upgrade_callback())
|
||||||
return True
|
return True
|
||||||
|
@ -4379,8 +4383,8 @@ class KubeHostUpgradeControlPlaneStep(AbstractKubeHostUpgradeStep):
|
||||||
|
|
||||||
from nfv_vim import directors
|
from nfv_vim import directors
|
||||||
|
|
||||||
DLOG.info("Step (%s) apply to hostnames (%s)."
|
DLOG.debug("Step (%s) apply to hostnames (%s)."
|
||||||
% (self._name, self._host_names))
|
% (self._name, self._host_names))
|
||||||
host_director = directors.get_host_director()
|
host_director = directors.get_host_director()
|
||||||
operation = \
|
operation = \
|
||||||
host_director.kube_upgrade_hosts_control_plane(self._host_names,
|
host_director.kube_upgrade_hosts_control_plane(self._host_names,
|
||||||
|
@ -4456,14 +4460,19 @@ class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep):
|
||||||
result,
|
result,
|
||||||
"kube host upgrade kubelet (%s) failed" % host.name)
|
"kube host upgrade kubelet (%s) failed" % host.name)
|
||||||
return True
|
return True
|
||||||
|
elif event == STRATEGY_EVENT.KUBE_HOST_UPGRADE_CHANGED:
|
||||||
|
self._query_inprogress = True
|
||||||
|
nfvi.nfvi_get_kube_host_upgrade_list(
|
||||||
|
self._get_kube_host_upgrade_list_callback())
|
||||||
|
return True
|
||||||
elif event == STRATEGY_EVENT.HOST_AUDIT:
|
elif event == STRATEGY_EVENT.HOST_AUDIT:
|
||||||
if 0 == self._wait_time:
|
if 0 == self._wait_time:
|
||||||
self._wait_time = timers.get_monotonic_timestamp_in_ms()
|
self._wait_time = timers.get_monotonic_timestamp_in_ms()
|
||||||
|
|
||||||
now_ms = timers.get_monotonic_timestamp_in_ms()
|
now_ms = timers.get_monotonic_timestamp_in_ms()
|
||||||
secs_expired = (now_ms - self._wait_time) // 1000
|
secs_expired = (now_ms - self._wait_time) // 1000
|
||||||
# Wait at least 60 seconds before checking upgrade for first time
|
# Wait at least 30 seconds before checking kube hosts for first time
|
||||||
if 60 <= secs_expired and not self._query_inprogress:
|
if 30 <= secs_expired and not self._query_inprogress:
|
||||||
self._query_inprogress = True
|
self._query_inprogress = True
|
||||||
nfvi.nfvi_get_kube_host_upgrade_list(
|
nfvi.nfvi_get_kube_host_upgrade_list(
|
||||||
self._get_kube_host_upgrade_list_callback())
|
self._get_kube_host_upgrade_list_callback())
|
||||||
|
@ -4475,13 +4484,12 @@ class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep):
|
||||||
|
|
||||||
from nfv_vim import directors
|
from nfv_vim import directors
|
||||||
|
|
||||||
DLOG.info("Step (%s) apply to hostnames (%s)."
|
DLOG.debug("Step (%s) apply to hostnames (%s)."
|
||||||
% (self._name, self._host_names))
|
% (self._name, self._host_names))
|
||||||
host_director = directors.get_host_director()
|
host_director = directors.get_host_director()
|
||||||
operation = \
|
operation = \
|
||||||
host_director.kube_upgrade_hosts_kubelet(self._host_names,
|
host_director.kube_upgrade_hosts_kubelet(self._host_names,
|
||||||
self._force)
|
self._force)
|
||||||
|
|
||||||
if operation.is_inprogress():
|
if operation.is_inprogress():
|
||||||
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
|
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
|
||||||
elif operation.is_failed():
|
elif operation.is_failed():
|
||||||
|
|
Loading…
Reference in New Issue