From b732e56b43576fffd601175f1abad50399ed3c9c Mon Sep 17 00:00:00 2001 From: Igor Soares Date: Wed, 24 Jan 2024 19:34:25 -0300 Subject: [PATCH] Account for new Kubernetes upgrade statuses Update the Kubernetes upgrade orchestration code to account for two new statuses: upgrade-starting and upgrade-starting-failed. The new statuses were introduced to support updating StarlingX applications during the kube-upgrade-start step. Test Plan: PASS: build-pkgs -a && build-image PASS: sw-manager kube-upgrade-strategy create --to-version v1.27.5 sw-manager kube-upgrade-strategy apply Check if Kubernetes upgrade started and finished successfully. sw-manager kube-upgrade-strategy delete PASS: sw-manager kube-upgrade-strategy create --to-version v1.27.5 sw-manager kube-upgrade-strategy apply sw-manager kube-upgrade-strategy abort Check if Kubernetes upgrade was successfully aborted. sw-manager kube-upgrade-strategy delete PASS: Fresh install AIO-SX Create a platform-integ-apps updated tarball without the metadata.yaml file and copy it over to /usr/local/share/applications/helm/. sw-manager kube-upgrade-strategy create --to-version v1.27.5 sw-manager kube-upgrade-strategy apply Confirm that the Kubernetes upgrade failed Check if Kubernetes upgrade was successfully aborted. sw-manager kube-upgrade-strategy delete PASS: Fresh install AIO-DX Create a platform-integ-apps updated tarball without the metadata.yaml file and copy it over to /usr/local/share/applications/helm/. sw-manager kube-upgrade-strategy create --to-version v1.27.5 sw-manager kube-upgrade-strategy apply Confirm that the Kubernetes upgrade failed sw-manager kube-upgrade-strategy delete Copy a working platform-integ-apps tarball to /usr/local/share/applications/helm/. sw-manager kube-upgrade-strategy create --to-version v1.27.5 sw-manager kube-upgrade-strategy apply Confirm that the Kubernetes upgrade was resumed and successfully finished. PASS: Fresh install AIO-SX with Kubernetes 1.24.4 sw-manager kube-upgrade-strategy create --to-version v1.27.5 sw-manager kube-upgrade-strategy apply Check if Kubernetes upgrade started and finished successfully. Story: 2010929 Task: 49461 Depends-on: https://review.opendev.org/c/starlingx/config/+/905005 Change-Id: I1a8b86c9ecf8cc21a9cb25ee57d6930944cef261 Signed-off-by: Igor Soares --- .../tests/test_kube_upgrade_strategy.py | 36 ++++++++++++++++++- .../nfv_vim/nfvi/objects/v1/_kube_upgrade.py | 2 ++ nfv/nfv-vim/nfv_vim/strategy/_strategy.py | 4 +++ .../nfv_vim/strategy/_strategy_steps.py | 7 ++-- 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py index 2ae59587..5849f08f 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py @@ -269,7 +269,8 @@ class ApplyStageMixin(object): 'total_steps': 1, 'steps': [ {'name': 'kube-upgrade-start', - 'success_state': 'upgrade-started'}, + 'success_state': 'upgrade-started', + 'fail_state': 'upgrade-starting-failed'}, ], } @@ -623,6 +624,39 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.worker_list = [] self.storage_list = [] + def test_resume_after_starting_failed(self): + """ + Test the kube_upgrade strategy creation when the upgrade had previously + stopped with 'upgrade-starting-failed' + It is expected to resume at the 'upgrade-starting' stage + """ + kube_upgrade = self._create_kube_upgrade_obj( + KUBE_UPGRADE_STATE.KUBE_UPGRADE_STARTING_FAILED, + self.default_from_version, + self.default_to_version) + stages = [ + self._kube_upgrade_start_stage(), + self._kube_upgrade_download_images_stage(), + self._kube_upgrade_networking_stage(), + self._kube_upgrade_storage_stage(), + ] + if self.is_simplex(): + stages.append(self._kube_host_cordon_stage()) + for ver in self.kube_versions: + stages.append(self._kube_upgrade_first_control_plane_stage(ver)) + stages.extend(self._kube_upgrade_kubelet_stages( + ver, + self.std_controller_list, + self.aio_controller_list, + self.worker_list)) + if self.is_simplex(): + stages.append(self._kube_host_uncordon_stage()) + stages.extend([ + self._kube_upgrade_complete_stage(), + self._kube_upgrade_cleanup_stage(), + ]) + self.validate_apply_phase(self.is_simplex(), kube_upgrade, stages) + def test_resume_after_download_images_failed(self): """ Test the kube_upgrade strategy creation when the upgrade had previously diff --git a/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_kube_upgrade.py b/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_kube_upgrade.py index 328a08b2..04d50364 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_kube_upgrade.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_kube_upgrade.py @@ -37,6 +37,8 @@ class KubeUpgradeState(Constants): Maintaining the same order as defined in kubernetes.py """ + KUBE_UPGRADE_STARTING = Constant('upgrade-starting') + KUBE_UPGRADE_STARTING_FAILED = Constant('upgrade-starting-failed') KUBE_UPGRADE_STARTED = Constant('upgrade-started') KUBE_UPGRADE_DOWNLOADING_IMAGES = Constant('downloading-images') KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED = Constant('downloading-images-failed') diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py index 464a94d1..7fc833ee 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py @@ -3800,6 +3800,10 @@ class KubeUpgradeStrategy(SwUpdateStrategy, # Note: there are no resume states for actions that are still running # ie: KUBE_UPGRADE_DOWNLOADING_IMAGES RESUME_STATE = { + # If upgrade start failed, allow to restart + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADE_STARTING_FAILED: + self._add_kube_upgrade_start_stage, + # after upgrade-started -> download images nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADE_STARTED: self._add_kube_upgrade_download_images_stage, diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py index a32d7e86..1b9227d4 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py @@ -4135,7 +4135,8 @@ class KubeUpgradeStartStep(AbstractKubeUpgradeStep): super(KubeUpgradeStartStep, self).__init__( STRATEGY_STEP_NAME.KUBE_UPGRADE_START, nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADE_STARTED, - None) # there is no failure state if upgrade-start fails + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADE_STARTING_FAILED, + timeout_in_secs=1800) # next 2 attributes must be persisted through from_dict/as_dict self._to_version = to_version self._force = force @@ -4171,13 +4172,9 @@ class KubeUpgradeStartStep(AbstractKubeUpgradeStep): response = (yield) DLOG.debug("%s callback response=%s." % (self._name, response)) - # kube-upgrade-start will return a result when it completes, - # so we do not want to use handle_event if response['completed']: if self.strategy is not None: self.strategy.nfvi_kube_upgrade = response['result-data'] - result = strategy.STRATEGY_STEP_RESULT.SUCCESS - self.stage.step_complete(result, "") else: result = strategy.STRATEGY_STEP_RESULT.FAILED self.stage.step_complete(result, response['reason'])