Optimize kubelet upgrade phase using VIM orchestrator.

Optimizing kubelet upgrade by reducing the wait
time and adding in more logs for debugging.The
wait time is removed as we have the additional
kubelet state check in place (Closes-Bug: 2044209)
and an existing timeout of 900secs.

Test Plan:
PASSED: On a DX system, removed the wait time of 60 sec
and tested 3 consecutive k8s upgrade thrice.
PASSED: On a DX system, tweaked the code and tested
on the kubelet upgrade retry timeout(900sec).
PASSED: On a DX system, tweaked code and tested
the existing behaviour that retry is not
occuring in case of failure.

Closes-Bug: 2045776

Change-Id: I321d7eae5ef7ebd29c1c6aca97992e3e20acb457
Signed-off-by: Vanathi.Selvaraju <vanathi.selvaraju@windriver.com>
This commit is contained in:
Vanathi.Selvaraju 2023-12-05 13:31:37 -05:00 committed by Vanathi Selvaraju
parent 71406be649
commit 42442c85d9
1 changed files with 9 additions and 8 deletions

View File

@ -4737,6 +4737,8 @@ class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep):
if (k_host.kubelet_version == self._to_version and if (k_host.kubelet_version == self._to_version and
k_host.status == nfvi.objects.v1.KUBE_HOST_UPGRADE_STATE. k_host.status == nfvi.objects.v1.KUBE_HOST_UPGRADE_STATE.
KUBE_HOST_UPGRADED_KUBELET): KUBE_HOST_UPGRADED_KUBELET):
DLOG.info("Kubelet upgraded to version %s for host %s"
% (self._to_version, host_uuid))
match_count += 1 match_count += 1
host_count += 1 host_count += 1
# break out of inner loop, since uuids match # break out of inner loop, since uuids match
@ -4746,12 +4748,14 @@ class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep):
break break
if match_count == len(self._host_uuids): if match_count == len(self._host_uuids):
result = strategy.STRATEGY_STEP_RESULT.SUCCESS result = strategy.STRATEGY_STEP_RESULT.SUCCESS
DLOG.info("Kubelet upgrade completed")
self.stage.step_complete(result, "") self.stage.step_complete(result, "")
else: else:
# keep waiting for kubelet state to change # keep waiting for kubelet state to change
pass pass
else: else:
result = strategy.STRATEGY_STEP_RESULT.FAILED result = strategy.STRATEGY_STEP_RESULT.FAILED
DLOG.info("Kubelet upgrade failed")
self.stage.step_complete(result, response['reason']) self.stage.step_complete(result, response['reason'])
def handle_event(self, event, event_data=None): def handle_event(self, event, event_data=None):
@ -4772,19 +4776,16 @@ class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep):
"kube host upgrade kubelet (%s) failed" % host.name) "kube host upgrade kubelet (%s) failed" % host.name)
return True return True
elif event == STRATEGY_EVENT.KUBE_HOST_UPGRADE_CHANGED: elif event == STRATEGY_EVENT.KUBE_HOST_UPGRADE_CHANGED:
DLOG.info("Event %s in progress" % (STRATEGY_EVENT.KUBE_HOST_UPGRADE_CHANGED))
self._query_inprogress = True self._query_inprogress = True
nfvi.nfvi_get_kube_host_upgrade_list( nfvi.nfvi_get_kube_host_upgrade_list(
self._get_kube_host_upgrade_list_callback()) self._get_kube_host_upgrade_list_callback())
return True return True
elif event == STRATEGY_EVENT.HOST_AUDIT: elif event == STRATEGY_EVENT.HOST_AUDIT:
if 0 == self._wait_time: DLOG.info("Event %s in progress" % (STRATEGY_EVENT.HOST_AUDIT))
self._wait_time = timers.get_monotonic_timestamp_in_ms() # Wait time not required as we have a timeout initialized
# in init method.
now_ms = timers.get_monotonic_timestamp_in_ms() if not self._query_inprogress:
secs_expired = (now_ms - self._wait_time) // 1000
# Wait at least 60 seconds before checking kube hosts for first time
# todo: reduce the delay to 15 and retry for 2mins.
if 60 <= secs_expired and not self._query_inprogress:
self._query_inprogress = True self._query_inprogress = True
nfvi.nfvi_get_kube_host_upgrade_list( nfvi.nfvi_get_kube_host_upgrade_list(
self._get_kube_host_upgrade_list_callback()) self._get_kube_host_upgrade_list_callback())