From 0f83fc11690e3b7b1e7ff081d0375085db651630 Mon Sep 17 00:00:00 2001 From: Al Bailey Date: Thu, 23 Feb 2023 19:27:00 +0000 Subject: [PATCH] Combine multiple kube-upgrades into one strategy This algorithm change is Simplex only. The algorithm for multi-version upgrade for k8s is: - system kube-upgrade-start - system kube-upgrade-download-images - system kube-upgrade-networking - system kube-host-cordon controller-0 (future) - loop from current version to final version - system kube-host-upgrade controller-0 control-plane - system kube-host-upgrade controller-0 kubelet - system kube-host-uncordon controller-0 (future) - system kube-upgrade-complete - system kube-upgrade-delete This change does the following additional cleanup: - remove patch-apply intermediate steps during kube-upgrade - remove all patching mixings from kube upgrade strategy Test Plan: PASS: (multi-upgrade) AIO-SX kube upgrade orchestration v1.21.8 to v1.24.4 PASS: (single upgrade) AIO-DX kube-upgrade orchestration v1.21.8 to v1.22.5 Depends-On: https://review.opendev.org/c/starlingx/config/+/877988 Story: 2010565 Task: 47741 Signed-off-by: Al Bailey Change-Id: Id654212e198321c6518b8feaa85cd5301167735c --- .../tests/sw_update_testcase.py | 12 +- .../tests/test_kube_upgrade_strategy.py | 655 +++++++----------- nfv/nfv-vim/nfv_vim/strategy/_strategy.py | 453 +++++------- .../nfv_vim/strategy/_strategy_steps.py | 27 +- 4 files changed, 444 insertions(+), 703 deletions(-) diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/sw_update_testcase.py b/nfv/nfv-tests/nfv_unit_tests/tests/sw_update_testcase.py index 10d665d4..90a1c51c 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/sw_update_testcase.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/sw_update_testcase.py @@ -25,8 +25,10 @@ from nfv_vim.tables._table import Table from nfv_unit_tests.tests import testcase from nfv_unit_tests.tests import utils - +# change the following 2 values to assist with +# unit test comparison between json structures DEBUG_PRINTING = False +DEBUG_DEPTH = 3 def validate_strategy_persists(strategy): @@ -42,9 +44,9 @@ def validate_strategy_persists(strategy): if DEBUG_PRINTING: if strategy.as_dict() != new_strategy.as_dict(): print("==================== Strategy ====================") - pprint.pprint(strategy.as_dict()) + pprint.pprint(strategy.as_dict(), depth=DEBUG_DEPTH) print("============== Converted Strategy ================") - pprint.pprint(new_strategy.as_dict()) + pprint.pprint(new_strategy.as_dict(), depth=DEBUG_DEPTH) assert strategy.as_dict() == new_strategy.as_dict(), \ "Strategy changed when converting to/from dict" @@ -57,9 +59,9 @@ def validate_phase(phase, expected_results): """ if DEBUG_PRINTING: print("====================== Phase Results ========================") - pprint.pprint(phase) + pprint.pprint(phase, depth=DEBUG_DEPTH) print("===================== Expected Results ======================") - pprint.pprint(expected_results) + pprint.pprint(expected_results, depth=DEBUG_DEPTH) for key in expected_results: if key == 'stages': diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py index 5f702e57..f9273ec4 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py @@ -9,10 +9,8 @@ import uuid from nfv_common import strategy as common_strategy from nfv_vim import nfvi -from nfv_vim.nfvi.objects.v1 import HostSwPatch from nfv_vim.nfvi.objects.v1 import KUBE_UPGRADE_STATE from nfv_vim.nfvi.objects.v1 import KubeVersion -from nfv_vim.nfvi.objects.v1 import SwPatch from nfv_vim.objects import KubeUpgrade from nfv_vim.objects import SW_UPDATE_ALARM_RESTRICTION from nfv_vim.objects import SW_UPDATE_APPLY_TYPE @@ -23,13 +21,11 @@ from nfv_unit_tests.tests import sw_update_testcase FROM_KUBE_VERSION = '1.2.3' -TO_KUBE_VERSION = '1.2.4' - +MID_KUBE_VERSION = '1.2.4' +HIGH_KUBE_VERSION = '1.2.5' +DEFAULT_TO_VERSION = MID_KUBE_VERSION FAKE_LOAD = '12.01' -KUBE_PATCH_1 = 'KUBE.1' # the control plane patch -KUBE_PATCH_2 = 'KUBE.2' # the kubelet patch - @mock.patch('nfv_vim.event_log._instance._event_issue', sw_update_testcase.fake_event_issue) @@ -49,7 +45,7 @@ class TestBuildStrategy(sw_update_testcase.SwUpdateStrategyTestCase): max_parallel_worker_hosts=10, default_instance_action=SW_UPDATE_INSTANCE_ACTION.STOP_START, alarm_restrictions=SW_UPDATE_ALARM_RESTRICTION.STRICT, - to_version=TO_KUBE_VERSION, + to_version=MID_KUBE_VERSION, single_controller=False, nfvi_kube_upgrade=None): """ @@ -97,8 +93,6 @@ class TestBuildStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'query-kube-versions'}, {'name': 'query-kube-upgrade'}, {'name': 'query-kube-host-upgrade'}, - {'name': 'query-sw-patches'}, - {'name': 'query-sw-patch-hosts'}, ] expected_results = { 'total_stages': 1, @@ -113,18 +107,39 @@ class TestBuildStrategy(sw_update_testcase.SwUpdateStrategyTestCase): class SimplexKubeUpgradeMixin(object): - FAKE_PATCH_HOSTS_LIST = [ - HostSwPatch('controller-0', # name - 'controller', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False), # interim_state - ] FAKE_KUBE_HOST_UPGRADES_LIST = [] + # simplex sets the versions as available + FAKE_KUBE_VERSIONS_LIST = [ + KubeVersion( + FROM_KUBE_VERSION, # kube_version + 'active', # state + True, # target + [], # upgrade_from + [], # downgrade_to + [], # applied_patches + [] # available_patches + ), + KubeVersion( + MID_KUBE_VERSION, # kube_version + 'available', # state + False, # target + [FROM_KUBE_VERSION], # upgrade_from + [], # downgrade_to + [], # applied_patches + [] # available_patches + ), + KubeVersion( + HIGH_KUBE_VERSION, # kube_version + 'available', # state + False, # target + [MID_KUBE_VERSION], # upgrade_from + [], # downgrade_to + [], # applied_patches + [] # available_patches + ), + ] + def setUp(self): super(SimplexKubeUpgradeMixin, self).setUp() @@ -136,27 +151,9 @@ class SimplexKubeUpgradeMixin(object): class DuplexKubeUpgradeMixin(object): - FAKE_PATCH_HOSTS_LIST = [ - HostSwPatch('controller-0', 'controller', FAKE_LOAD, - False, False, 'idle', False, False), - HostSwPatch('controller-1', 'controller', FAKE_LOAD, - False, False, 'idle', False, False), - ] FAKE_KUBE_HOST_UPGRADES_LIST = [] - def setUp(self): - super(DuplexKubeUpgradeMixin, self).setUp() - - def is_simplex(self): - return False - - def is_duplex(self): - return True - - -class KubePatchMixin(object): - """This Mixin represents the patches for a kube upgrade in proper state""" - + # duplex sets only one version as available FAKE_KUBE_VERSIONS_LIST = [ KubeVersion( FROM_KUBE_VERSION, # kube_version @@ -168,111 +165,38 @@ class KubePatchMixin(object): [] # available_patches ), KubeVersion( - TO_KUBE_VERSION, # kube_version + MID_KUBE_VERSION, # kube_version 'available', # state False, # target [FROM_KUBE_VERSION], # upgrade_from [], # downgrade_to - [KUBE_PATCH_1], # applied_patches - [KUBE_PATCH_2] # available_patches - ) - ] - - FAKE_PATCHES_LIST = [ - SwPatch(KUBE_PATCH_1, FAKE_LOAD, 'Applied', 'Applied'), - SwPatch(KUBE_PATCH_2, FAKE_LOAD, 'Available', 'Available'), + [], # applied_patches + [] # available_patches + ), + KubeVersion( + HIGH_KUBE_VERSION, # kube_version + 'unavailable', # state + False, # target + [MID_KUBE_VERSION], # upgrade_from + [], # downgrade_to + [], # applied_patches + [] # available_patches + ), ] def setUp(self): - super(KubePatchMixin, self).setUp() + super(DuplexKubeUpgradeMixin, self).setUp() - def _kube_upgrade_patch_storage_stage(self, host_list, reboot): - steps = [ - {'name': 'query-alarms', }, - {'name': 'sw-patch-hosts', - 'entity_type': 'hosts', - 'entity_names': host_list, - }, - {'name': 'system-stabilize', - 'timeout': 30, - }, - ] - return { - 'name': 'sw-patch-storage-hosts', - 'total_steps': len(steps), - 'steps': steps, - } + def is_simplex(self): + return False - def _kube_upgrade_patch_worker_stage(self, host_list, reboot): - steps = [ - {'name': 'query-alarms', }, - {'name': 'sw-patch-hosts', - 'entity_type': 'hosts', - 'entity_names': host_list, - }, - {'name': 'system-stabilize', - 'timeout': 30, - }, - ] - return { - 'name': 'sw-patch-worker-hosts', - 'total_steps': len(steps), - 'steps': steps, - } - - def _kube_upgrade_patch_controller_stage(self, host_list, reboot): - steps = [ - {'name': 'query-alarms', }, - {'name': 'sw-patch-hosts', - 'entity_type': 'hosts', - 'entity_names': host_list, - }, - {'name': 'system-stabilize', - 'timeout': 30, - }, - ] - return { - 'name': 'sw-patch-controllers', - 'total_steps': len(steps), - 'steps': steps, - } - - def _kube_upgrade_patch_stage(self, - std_controller_list=None, - worker_list=None, - storage_list=None): - """hosts are patched in the following order - controllers, storage, then workers - """ - patch_stages = [] - patch_stage = { - 'name': 'kube-upgrade-patch', - 'total_steps': 1, - 'steps': [{'name': 'apply-patches', - 'entity_type': 'patches', - 'entity_names': ['KUBE.2']}, - ], - } - patch_stages.append(patch_stage) - - for host_name in std_controller_list: - patch_stages.append( - self._kube_upgrade_patch_controller_stage([host_name], False)) - if storage_list: - for sub_list in storage_list: - patch_stages.append( - self._kube_upgrade_patch_storage_stage(sub_list, False)) - if worker_list: - for sub_list in worker_list: - patch_stages.append( - self._kube_upgrade_patch_worker_stage(sub_list, False)) - return patch_stages + def is_duplex(self): + return True class ApplyStageMixin(object): """This Mixin will not work unless combined with other mixins. - PatchMixin - to provide the setup patches and kube versions - HostMixin - to provide the patch hosts and kube host upgrade states + HostMixin - to provide the kube host upgrade states """ # override any of these prior to calling setup in classes that use mixin @@ -283,28 +207,27 @@ class ApplyStageMixin(object): worker_apply_type = SW_UPDATE_APPLY_TYPE.SERIAL default_instance_action = SW_UPDATE_INSTANCE_ACTION.STOP_START + # for multi-kube upgrade: 'to' and 'kube_versions' should be updated + default_from_version = FROM_KUBE_VERSION + default_to_version = MID_KUBE_VERSION + # steps when performing control plane and kubelet upversion + kube_versions = [MID_KUBE_VERSION, ] + def setUp(self): super(ApplyStageMixin, self).setUp() - def _create_kube_upgrade_obj(self, - state, - from_version=FROM_KUBE_VERSION, - to_version=TO_KUBE_VERSION): - """ - Create a kube upgrade db object - """ + def _create_kube_upgrade_obj(self, state, from_version, to_version): + """Create a kube upgrade db object""" return nfvi.objects.v1.KubeUpgrade(state=state, from_version=from_version, to_version=to_version) def _create_built_kube_upgrade_strategy(self, sw_update_obj, - to_version=TO_KUBE_VERSION, + to_version, single_controller=False, kube_upgrade=None, alarms_list=None, - patch_list=None, - patch_hosts_list=None, kube_versions_list=None, kube_hosts_list=None): """ @@ -328,13 +251,6 @@ class ApplyStageMixin(object): # If any of the input lists are None, replace with defaults # this is done to prevent passing a list as a default - if patch_list is None: - patch_list = self.FAKE_PATCHES_LIST - strategy.nfvi_sw_patches = patch_list - - if patch_hosts_list is None: - patch_hosts_list = self.FAKE_PATCH_HOSTS_LIST - strategy.nfvi_sw_patch_hosts = patch_hosts_list if kube_versions_list is None: kube_versions_list = self.FAKE_KUBE_VERSIONS_LIST @@ -367,9 +283,9 @@ class ApplyStageMixin(object): ], } - def _kube_upgrade_first_control_plane_stage(self): + def _kube_upgrade_first_control_plane_stage(self, ver): return { - 'name': 'kube-upgrade-first-control-plane', + 'name': 'kube-upgrade-first-control-plane %s' % ver, 'total_steps': 1, 'steps': [ {'name': 'kube-host-upgrade-control-plane', @@ -378,6 +294,18 @@ class ApplyStageMixin(object): ], } + def _kube_upgrade_second_control_plane_stage(self, ver): + """This stage only executes on a duplex system""" + return { + 'name': 'kube-upgrade-second-control-plane %s' % ver, + 'total_steps': 1, + 'steps': [ + {'name': 'kube-host-upgrade-control-plane', + 'success_state': 'upgraded-second-master', + 'fail_state': 'upgrading-second-master-failed'}, + ], + } + def _kube_upgrade_networking_stage(self): return { 'name': 'kube-upgrade-networking', @@ -389,18 +317,6 @@ class ApplyStageMixin(object): ], } - def _kube_upgrade_second_control_plane_stage(self): - """This stage only executes on a duplex system""" - return { - 'name': 'kube-upgrade-second-control-plane', - 'total_steps': 1, - 'steps': [ - {'name': 'kube-host-upgrade-control-plane', - 'success_state': 'upgraded-second-master', - 'fail_state': 'upgrading-second-master-failed'}, - ], - } - def _kube_upgrade_complete_stage(self): return { 'name': 'kube-upgrade-complete', @@ -420,7 +336,7 @@ class ApplyStageMixin(object): ], } - def _kube_upgrade_kubelet_controller_stage(self, host, do_lock=True): + def _kube_upgrade_kubelet_controller_stage(self, host, ver, do_lock=True): """duplex needs to swact/lock/unlock whereas simplex does not""" if do_lock: steps = [ @@ -448,14 +364,16 @@ class ApplyStageMixin(object): 'entity_type': 'hosts', }, {'name': 'system-stabilize', }, ] + stage_name = "kube-upgrade-kubelet %s" % ver return { - 'name': 'kube-upgrade-kubelets-controllers', + 'name': stage_name, 'total_steps': len(steps), 'steps': steps, } def _kube_upgrade_kubelet_worker_stage(self, hosts, + ver, do_lock=True, do_swact=False): steps = [{'name': 'query-alarms', }] @@ -477,13 +395,15 @@ class ApplyStageMixin(object): 'entity_type': 'hosts', }) steps.append({'name': 'wait-alarms-clear', }) + stage_name = "kube-upgrade-kubelet %s" % ver return { - 'name': 'kube-upgrade-kubelets-workers', + 'name': stage_name, 'total_steps': len(steps), 'steps': steps, } def _kube_upgrade_kubelet_stages(self, + ver, std_controller_list, aio_controller_list, worker_list): @@ -495,17 +415,19 @@ class ApplyStageMixin(object): kubelet_stages.append( self._kube_upgrade_kubelet_controller_stage( host_name, + ver, self.is_duplex())) # lock is duplex only for host_name in aio_controller_list: kubelet_stages.append( self._kube_upgrade_kubelet_worker_stage( [host_name], + ver, do_lock=self.is_duplex(), # lock is duplex only do_swact=self.is_duplex())) # swact only if we lock for sub_list in worker_list: # kubelet workers are lock but not controllers, so no swact kubelet_stages.append( - self._kube_upgrade_kubelet_worker_stage(sub_list, True, False)) + self._kube_upgrade_kubelet_worker_stage(sub_list, ver, True, False)) return kubelet_stages def validate_apply_phase(self, single_controller, kube_upgrade, stages): @@ -515,6 +437,7 @@ class ApplyStageMixin(object): # create a strategy for a system with no existing kube_upgrade strategy = self._create_built_kube_upgrade_strategy( update_obj, + self.default_to_version, single_controller=single_controller, kube_upgrade=kube_upgrade) @@ -534,19 +457,20 @@ class ApplyStageMixin(object): def build_stage_list(self, std_controller_list=None, aio_controller_list=None, - patch_worker_list=None, worker_list=None, storage_list=None, add_start=True, add_download=True, - add_first_plane=True, add_networking=True, - add_second_plane=True, - add_patches=True, + add_first_control_plane=True, + add_second_control_plane=True, add_kubelets=True, add_complete=True, add_cleanup=True): - """The order of the host_list determines the patch and kubelets""" + """The order of the host_list determines the kubelets""" + # We never add a second control plane on a simplex + if self.is_simplex(): + add_second_control_plane = False stages = [] if add_start: stages.append(self._kube_upgrade_start_stage()) @@ -554,22 +478,17 @@ class ApplyStageMixin(object): stages.append(self._kube_upgrade_download_images_stage()) if add_networking: stages.append(self._kube_upgrade_networking_stage()) - if add_first_plane: - stages.append(self._kube_upgrade_first_control_plane_stage()) - if add_second_plane: - stages.append(self._kube_upgrade_second_control_plane_stage()) - if add_patches: - # patches are not processed like kubelets. - # AIO controllers are processed with the worker list - stages.extend(self._kube_upgrade_patch_stage( - std_controller_list=std_controller_list, - worker_list=patch_worker_list, - storage_list=storage_list)) - if add_kubelets: - # there are no kubelets on storage - stages.extend(self._kube_upgrade_kubelet_stages(std_controller_list, - aio_controller_list, - worker_list)) + for ver in self.kube_versions: + if add_first_control_plane: + stages.append(self._kube_upgrade_first_control_plane_stage(ver)) + if add_second_control_plane: + stages.append(self._kube_upgrade_second_control_plane_stage(ver)) + if add_kubelets: + # there are no kubelets on storage + stages.extend(self._kube_upgrade_kubelet_stages(ver, + std_controller_list, + aio_controller_list, + worker_list)) if add_complete: stages.append(self._kube_upgrade_complete_stage()) if add_cleanup: @@ -587,10 +506,8 @@ class ApplyStageMixin(object): stages = self.build_stage_list( std_controller_list=self.std_controller_list, aio_controller_list=self.aio_controller_list, - patch_worker_list=self.patch_worker_list, worker_list=self.worker_list, - storage_list=self.storage_list, - add_second_plane=self.is_duplex()) + storage_list=self.storage_list) self.validate_apply_phase(self.is_simplex(), kube_upgrade, stages) def test_resume_after_upgrade_started(self): @@ -601,16 +518,16 @@ class ApplyStageMixin(object): 'downloading images' stage """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADE_STARTED) + KUBE_UPGRADE_STATE.KUBE_UPGRADE_STARTED, + self.default_from_version, + self.default_to_version) # explicity bypass the start stage stages = self.build_stage_list( std_controller_list=self.std_controller_list, aio_controller_list=self.aio_controller_list, - patch_worker_list=self.patch_worker_list, worker_list=self.worker_list, storage_list=self.storage_list, - add_start=False, - add_second_plane=self.is_duplex()) + add_start=False) self.validate_apply_phase(self.is_simplex(), kube_upgrade, stages) def test_resume_after_upgrade_complete(self): @@ -620,7 +537,9 @@ class ApplyStageMixin(object): It is expected to resume at the cleanup stage """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADE_COMPLETE) + KUBE_UPGRADE_STATE.KUBE_UPGRADE_COMPLETE, + self.default_from_version, + self.default_to_version) # not using build_stage_list utility since the list of stages is small stages = [ self._kube_upgrade_cleanup_stage(), @@ -628,6 +547,14 @@ class ApplyStageMixin(object): self.validate_apply_phase(self.is_simplex(), kube_upgrade, stages) +class MultiApplyStageMixin(ApplyStageMixin): + default_to_version = HIGH_KUBE_VERSION + kube_versions = [MID_KUBE_VERSION, HIGH_KUBE_VERSION, ] + + def setUp(self): + super(MultiApplyStageMixin, self).setUp() + + @mock.patch('nfv_vim.event_log._instance._event_issue', sw_update_testcase.fake_event_issue) @mock.patch('nfv_vim.objects._sw_update.SwUpdate.save', @@ -637,7 +564,6 @@ class ApplyStageMixin(object): @mock.patch('nfv_vim.nfvi.nfvi_compute_plugin_disabled', sw_update_testcase.fake_nfvi_compute_plugin_disabled) class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, - KubePatchMixin, ApplyStageMixin, SimplexKubeUpgradeMixin): def setUp(self): @@ -645,7 +571,6 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.create_host('controller-0', aio=True) # AIO will be patched in the worker list self.std_controller_list = [] - self.patch_worker_list = [['controller-0']] # nested list # AIO kubelet phase does not process controller with the workers self.aio_controller_list = ['controller-0'] self.worker_list = [] @@ -658,21 +583,20 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, It is expected to resume at the 'downloading images' stage """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED) + KUBE_UPGRADE_STATE.KUBE_UPGRADE_DOWNLOADING_IMAGES_FAILED, + self.default_from_version, + self.default_to_version) stages = [ self._kube_upgrade_download_images_stage(), self._kube_upgrade_networking_stage(), - self._kube_upgrade_first_control_plane_stage(), ] - stages.extend( - self._kube_upgrade_patch_stage( - std_controller_list=self.std_controller_list, - worker_list=self.patch_worker_list, - storage_list=self.storage_list)) - stages.extend( - self._kube_upgrade_kubelet_stages(self.std_controller_list, - self.aio_controller_list, - self.worker_list)) + for ver in self.kube_versions: + stages.append(self._kube_upgrade_first_control_plane_stage(ver)) + stages.extend(self._kube_upgrade_kubelet_stages( + ver, + self.std_controller_list, + self.aio_controller_list, + self.worker_list)) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -686,20 +610,20 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, It is expected to resume at the 'first control plane' stage. """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADE_DOWNLOADED_IMAGES) + KUBE_UPGRADE_STATE.KUBE_UPGRADE_DOWNLOADED_IMAGES, + self.default_from_version, + self.default_to_version) stages = [ self._kube_upgrade_networking_stage(), - self._kube_upgrade_first_control_plane_stage(), ] - stages.extend( - self._kube_upgrade_patch_stage( - std_controller_list=self.std_controller_list, - worker_list=self.patch_worker_list, - storage_list=self.storage_list)) - stages.extend( - self._kube_upgrade_kubelet_stages(self.std_controller_list, - self.aio_controller_list, - self.worker_list)) + for ver in self.kube_versions: + stages.append(self._kube_upgrade_first_control_plane_stage( + ver)) + stages.extend(self._kube_upgrade_kubelet_stages( + ver, + self.std_controller_list, + self.aio_controller_list, + self.worker_list)) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -713,19 +637,18 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, It is expected to resume and retry the 'first control plane' stage """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADING_FIRST_MASTER_FAILED) - stages = [ - self._kube_upgrade_first_control_plane_stage(), - ] - stages.extend( - self._kube_upgrade_patch_stage( - std_controller_list=self.std_controller_list, - worker_list=self.patch_worker_list, - storage_list=self.storage_list)) - stages.extend( - self._kube_upgrade_kubelet_stages(self.std_controller_list, - self.aio_controller_list, - self.worker_list)) + KUBE_UPGRADE_STATE.KUBE_UPGRADING_FIRST_MASTER_FAILED, + self.default_from_version, + self.default_to_version) + stages = [] + for ver in self.kube_versions: + stages.append(self._kube_upgrade_first_control_plane_stage( + ver)) + stages.extend(self._kube_upgrade_kubelet_stages( + ver, + self.std_controller_list, + self.aio_controller_list, + self.worker_list)) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -739,17 +662,16 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, It is expected to resume at the second control plane stage in duplex """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADED_FIRST_MASTER) + KUBE_UPGRADE_STATE.KUBE_UPGRADED_FIRST_MASTER, + self.default_from_version, + self.default_to_version) stages = [] - stages.extend( - self._kube_upgrade_patch_stage( - std_controller_list=self.std_controller_list, - worker_list=self.patch_worker_list, - storage_list=self.storage_list)) - stages.extend( - self._kube_upgrade_kubelet_stages(self.std_controller_list, - self.aio_controller_list, - self.worker_list)) + for ver in self.kube_versions: + stages.extend(self._kube_upgrade_kubelet_stages( + ver, + self.std_controller_list, + self.aio_controller_list, + self.worker_list)) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -763,20 +685,20 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, It is expected to retry and resume at the networking stage """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADING_NETWORKING_FAILED) + KUBE_UPGRADE_STATE.KUBE_UPGRADING_NETWORKING_FAILED, + self.default_from_version, + self.default_to_version) stages = [ self._kube_upgrade_networking_stage(), - self._kube_upgrade_first_control_plane_stage(), ] - stages.extend( - self._kube_upgrade_patch_stage( - std_controller_list=self.std_controller_list, - worker_list=self.patch_worker_list, - storage_list=self.storage_list)) - stages.extend( - self._kube_upgrade_kubelet_stages(self.std_controller_list, - self.aio_controller_list, - self.worker_list)) + for ver in self.kube_versions: + stages.append(self._kube_upgrade_first_control_plane_stage( + ver)) + stages.extend(self._kube_upgrade_kubelet_stages( + ver, + self.std_controller_list, + self.aio_controller_list, + self.worker_list)) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -787,22 +709,21 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, """ Test the kube_upgrade strategy creation when there is only a simplex and the upgrade had previously stopped after successful networking. - It is expected to resume at the patch stage + It is expected to resume at the first control plane """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADED_NETWORKING) - stages = [ - self._kube_upgrade_first_control_plane_stage(), - ] - stages.extend( - self._kube_upgrade_patch_stage( - std_controller_list=self.std_controller_list, - worker_list=self.patch_worker_list, - storage_list=self.storage_list)) - stages.extend( - self._kube_upgrade_kubelet_stages(self.std_controller_list, - self.aio_controller_list, - self.worker_list)) + KUBE_UPGRADE_STATE.KUBE_UPGRADED_NETWORKING, + self.default_from_version, + self.default_to_version) + stages = [] + for ver in self.kube_versions: + stages.append(self._kube_upgrade_first_control_plane_stage( + ver)) + stages.extend(self._kube_upgrade_kubelet_stages( + ver, + self.std_controller_list, + self.aio_controller_list, + self.worker_list)) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -815,20 +736,19 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, and the upgrade had previously stopped after a second control plane state is encountered. There should never be a second control plane state in a simplex, so - the stages should skip over it to the patch stage. + the stages should skip over it to the kubelet stage. """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADED_SECOND_MASTER) + KUBE_UPGRADE_STATE.KUBE_UPGRADED_SECOND_MASTER, + self.default_from_version, + self.default_to_version) stages = [] - stages.extend( - self._kube_upgrade_patch_stage( - std_controller_list=self.std_controller_list, - worker_list=self.patch_worker_list, - storage_list=self.storage_list)) - stages.extend( - self._kube_upgrade_kubelet_stages(self.std_controller_list, - self.aio_controller_list, - self.worker_list)) + for ver in self.kube_versions: + stages.extend(self._kube_upgrade_kubelet_stages( + ver, + self.std_controller_list, + self.aio_controller_list, + self.worker_list)) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -840,21 +760,20 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, Test the kube_upgrade strategy creation when there is only a simplex and the upgrade had previously stopped after a second control plane failure state is encountered. - There should never be a second control plane state in a simplex, so - the stages should skip over it to the patch stage. + There should never be a second control plane state in a simplex + so the logic should just proceed to the kubelets """ kube_upgrade = self._create_kube_upgrade_obj( - KUBE_UPGRADE_STATE.KUBE_UPGRADING_SECOND_MASTER_FAILED) + KUBE_UPGRADE_STATE.KUBE_UPGRADING_SECOND_MASTER_FAILED, + self.default_from_version, + self.default_to_version) stages = [] - stages.extend( - self._kube_upgrade_patch_stage( - std_controller_list=self.std_controller_list, - worker_list=self.patch_worker_list, - storage_list=self.storage_list)) - stages.extend( - self._kube_upgrade_kubelet_stages(self.std_controller_list, - self.aio_controller_list, - self.worker_list)) + for ver in self.kube_versions: + stages.extend(self._kube_upgrade_kubelet_stages( + ver, + self.std_controller_list, + self.aio_controller_list, + self.worker_list)) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -862,6 +781,29 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.validate_apply_phase(self.is_simplex(), kube_upgrade, stages) +@mock.patch('nfv_vim.event_log._instance._event_issue', + sw_update_testcase.fake_event_issue) +@mock.patch('nfv_vim.objects._sw_update.SwUpdate.save', + sw_update_testcase.fake_save) +@mock.patch('nfv_vim.objects._sw_update.timers.timers_create_timer', + sw_update_testcase.fake_timer) +@mock.patch('nfv_vim.nfvi.nfvi_compute_plugin_disabled', + sw_update_testcase.fake_nfvi_compute_plugin_disabled) +class TestSimplexMultiApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, + MultiApplyStageMixin, + SimplexKubeUpgradeMixin): + """This test class can be updated to resume from partial control plane""" + + def setUp(self): + super(TestSimplexMultiApplyStrategy, self).setUp() + self.create_host('controller-0', aio=True) + # AIO kubelet phase does not process controller with the workers + self.std_controller_list = [] + self.aio_controller_list = ['controller-0', ] + self.worker_list = [] + self.storage_list = [] + + @mock.patch('nfv_vim.event_log._instance._event_issue', sw_update_testcase.fake_event_issue) @mock.patch('nfv_vim.objects._sw_update.SwUpdate.save', @@ -871,18 +813,15 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, @mock.patch('nfv_vim.nfvi.nfvi_compute_plugin_disabled', sw_update_testcase.fake_nfvi_compute_plugin_disabled) class TestDuplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, - KubePatchMixin, ApplyStageMixin, DuplexKubeUpgradeMixin): def setUp(self): super(TestDuplexApplyStrategy, self).setUp() self.create_host('controller-0', aio=True) self.create_host('controller-1', aio=True) - # AIO will be patched in the worker list # AIO kubelet phase does not process controller with the workers self.std_controller_list = [] self.aio_controller_list = ['controller-1', 'controller-0'] - self.patch_worker_list = [['controller-0'], ['controller-1']] self.worker_list = [] self.storage_list = [] @@ -896,7 +835,6 @@ class TestDuplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, @mock.patch('nfv_vim.nfvi.nfvi_compute_plugin_disabled', sw_update_testcase.fake_nfvi_compute_plugin_disabled) class TestDuplexPlusApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, - KubePatchMixin, ApplyStageMixin, DuplexKubeUpgradeMixin): def setUp(self): @@ -908,19 +846,9 @@ class TestDuplexPlusApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, # AIO will be patched in the worker list # AIO kubelet phase does not process controller with the workers self.std_controller_list = [] - self.patch_worker_list = [['controller-0'], ['controller-1'], ['compute-0']] self.aio_controller_list = ['controller-1', 'controller-0'] self.worker_list = [['compute-0']] # A nested list self.storage_list = [] - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('compute-0', # name - 'worker', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state @mock.patch('nfv_vim.event_log._instance._event_issue', @@ -933,7 +861,6 @@ class TestDuplexPlusApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, sw_update_testcase.fake_nfvi_compute_plugin_disabled) class TestDuplexPlusApplyStrategyTwoWorkers( sw_update_testcase.SwUpdateStrategyTestCase, - KubePatchMixin, ApplyStageMixin, DuplexKubeUpgradeMixin): @@ -947,29 +874,9 @@ class TestDuplexPlusApplyStrategyTwoWorkers( # AIO kubelet phase does not process controller with the workers self.std_controller_list = [] self.aio_controller_list = ['controller-1', 'controller-0'] - self.patch_worker_list = [['controller-0'], ['controller-1'], ['compute-0'], ['compute-1']] self.worker_list = [['compute-0'], ['compute-1']] # nested serial list self.storage_list = [] - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('compute-0', # name - 'worker', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('compute-1', # name - 'worker', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state - @mock.patch('nfv_vim.event_log._instance._event_issue', sw_update_testcase.fake_event_issue) @@ -981,7 +888,6 @@ class TestDuplexPlusApplyStrategyTwoWorkers( sw_update_testcase.fake_nfvi_compute_plugin_disabled) class TestDuplexPlusApplyStrategyTwoWorkersParallel( sw_update_testcase.SwUpdateStrategyTestCase, - KubePatchMixin, ApplyStageMixin, DuplexKubeUpgradeMixin): @@ -997,27 +903,8 @@ class TestDuplexPlusApplyStrategyTwoWorkersParallel( # AIO kubelet phase does not process controller with the workers self.std_controller_list = [] self.aio_controller_list = ['controller-1', 'controller-0'] - self.patch_worker_list = [['controller-0'], ['controller-1'], ['compute-0', 'compute-1']] self.worker_list = [['compute-0', 'compute-1']] # nested parallel list self.storage_list = [] - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('compute-0', # name - 'worker', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('compute-1', # name - 'worker', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state @mock.patch('nfv_vim.event_log._instance._event_issue', @@ -1030,7 +917,6 @@ class TestDuplexPlusApplyStrategyTwoWorkersParallel( sw_update_testcase.fake_nfvi_compute_plugin_disabled) class TestDuplexPlusApplyStrategyTwoStorage( sw_update_testcase.SwUpdateStrategyTestCase, - KubePatchMixin, ApplyStageMixin, DuplexKubeUpgradeMixin): @@ -1044,27 +930,8 @@ class TestDuplexPlusApplyStrategyTwoStorage( # AIO kubelet phase does not process controller with the workers self.std_controller_list = [] self.aio_controller_list = ['controller-1', 'controller-0'] - self.patch_worker_list = [['controller-0'], ['controller-1']] self.worker_list = [] self.storage_list = [['storage-0'], ['storage-1']] # serial - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('storage-0', # name - 'storage', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('storage-1', # name - 'storage', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state @mock.patch('nfv_vim.event_log._instance._event_issue', @@ -1077,7 +944,6 @@ class TestDuplexPlusApplyStrategyTwoStorage( sw_update_testcase.fake_nfvi_compute_plugin_disabled) class TestDuplexPlusApplyStrategyTwoStorageParallel( sw_update_testcase.SwUpdateStrategyTestCase, - KubePatchMixin, ApplyStageMixin, DuplexKubeUpgradeMixin): @@ -1093,27 +959,8 @@ class TestDuplexPlusApplyStrategyTwoStorageParallel( # AIO kubelet phase does not process controller with the workers self.std_controller_list = [] self.aio_controller_list = ['controller-1', 'controller-0'] - self.patch_worker_list = [['controller-0'], ['controller-1']] self.worker_list = [] self.storage_list = [['storage-0', 'storage-1']] # parallel - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('storage-0', # name - 'storage', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('storage-1', # name - 'storage', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state @mock.patch('nfv_vim.event_log._instance._event_issue', @@ -1126,7 +973,6 @@ class TestDuplexPlusApplyStrategyTwoStorageParallel( sw_update_testcase.fake_nfvi_compute_plugin_disabled) class TestStandardTwoWorkerTwoStorage( sw_update_testcase.SwUpdateStrategyTestCase, - KubePatchMixin, ApplyStageMixin, DuplexKubeUpgradeMixin): @@ -1141,42 +987,5 @@ class TestStandardTwoWorkerTwoStorage( self.create_host('storage-1') self.std_controller_list = ['controller-1', 'controller-0'] self.aio_controller_list = [] - self.patch_worker_list = [['compute-0'], ['compute-1']] self.worker_list = [['compute-0'], ['compute-1']] self.storage_list = [['storage-0'], ['storage-1']] - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('storage-0', # name - 'storage', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('storage-1', # name - 'storage', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('compute-0', # name - 'worker', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state - self.FAKE_PATCH_HOSTS_LIST.append( - HostSwPatch('compute-1', # name - 'worker', # personality - FAKE_LOAD, # sw_version - False, # requires reboot - False, # patch_current - 'idle', # state - False, # patch_failed - False)) # interim_state diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py index 7e5ff41b..34f64980 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -869,7 +869,8 @@ class UpdateControllerHostsMixin(object): controllers, reboot, strategy_stage_name, - host_action_step): + host_action_step, + extra_args=None): """ Add controller software stages for a controller list to a strategy """ @@ -911,7 +912,10 @@ class UpdateControllerHostsMixin(object): stage.add_step(strategy.SwactHostsStep(host_list)) stage.add_step(strategy.LockHostsStep(host_list)) # Add the action step for these hosts (patch, etc..) - stage.add_step(host_action_step(host_list)) + if extra_args is None: + stage.add_step(host_action_step(host_list)) + else: + stage.add_step(host_action_step(host_list, extra_args)) if reboot: # Cannot unlock right away after certain actions # like SwPatchHostsStep @@ -941,7 +945,10 @@ class UpdateControllerHostsMixin(object): stage.add_step(strategy.SwactHostsStep(host_list)) stage.add_step(strategy.LockHostsStep(host_list)) # Add the action step for the local_hosts (patch, etc..) - stage.add_step(host_action_step(host_list)) + if extra_args is None: + stage.add_step(host_action_step(host_list)) + else: + stage.add_step(host_action_step(host_list, extra_args)) if reboot: # Cannot unlock right away after certain actions # like SwPatchHostsStep @@ -984,13 +991,14 @@ class PatchControllerHostsMixin(UpdateControllerHostsMixin): class UpgradeKubeletControllerHostsMixin(UpdateControllerHostsMixin): - def _add_kubelet_controller_strategy_stages(self, controllers, reboot): + def _add_kubelet_controller_strategy_stages(self, controllers, to_version, reboot, stage_name): from nfv_vim import strategy return self._add_update_controller_strategy_stages( controllers, reboot, - strategy.STRATEGY_STAGE_NAME.KUBE_UPGRADE_KUBELETS_CONTROLLERS, - strategy.KubeHostUpgradeKubeletStep) + stage_name, + strategy.KubeHostUpgradeKubeletStep, + extra_args=to_version) class UpdateStorageHostsMixin(object): @@ -1075,7 +1083,8 @@ class UpdateWorkerHostsMixin(object): worker_hosts, reboot, strategy_stage_name, - host_action_step): + host_action_step, + extra_args=None): """ Add worker update stages to a strategy The strategy_stage_name is the type of stage (patch, kube, etc..) @@ -1169,7 +1178,10 @@ class UpdateWorkerHostsMixin(object): hosts_to_lock, wait_until_disabled=wait_until_disabled)) # Add the action step for these hosts (patch, etc..) - stage.add_step(host_action_step(host_list)) + if extra_args is None: + stage.add_step(host_action_step(host_list)) + else: + stage.add_step(host_action_step(host_list, extra_args)) if reboot: # Cannot unlock right away after the action step @@ -1226,13 +1238,14 @@ class PatchWorkerHostsMixin(UpdateWorkerHostsMixin): class UpgradeKubeletWorkerHostsMixin(UpdateWorkerHostsMixin): - def _add_kubelet_worker_strategy_stages(self, worker_hosts, reboot): + def _add_kubelet_worker_strategy_stages(self, worker_hosts, to_version, reboot, stage_name): from nfv_vim import strategy return self._add_update_worker_strategy_stages( worker_hosts, reboot, - strategy.STRATEGY_STAGE_NAME.KUBE_UPGRADE_KUBELETS_WORKERS, - strategy.KubeHostUpgradeKubeletStep) + stage_name, + strategy.KubeHostUpgradeKubeletStep, + extra_args=to_version) ################################################################### @@ -2958,11 +2971,6 @@ class KubeUpgradeStrategy(SwUpdateStrategy, QueryKubeUpgradesMixin, QueryKubeHostUpgradesMixin, QueryKubeVersionsMixin, - QuerySwPatchesMixin, - QuerySwPatchHostsMixin, - PatchControllerHostsMixin, - PatchStorageHostsMixin, - PatchWorkerHostsMixin, UpgradeKubeletControllerHostsMixin, UpgradeKubeletWorkerHostsMixin): """ @@ -3002,7 +3010,6 @@ class KubeUpgradeStrategy(SwUpdateStrategy, '280.002', # Subcloud resource out-of-sync '700.004', # VM stopped '750.006', # Configuration change requires reapply of cert-manager - '900.001', # Patch in progress (kube orch uses patching) '900.007', # Kube Upgrade in progress '900.401', # kube-upgrade-auto-apply-inprogress ] @@ -3038,12 +3045,69 @@ class KubeUpgradeStrategy(SwUpdateStrategy, stage.add_step(strategy.QueryKubeVersionsStep()) stage.add_step(strategy.QueryKubeUpgradeStep()) stage.add_step(strategy.QueryKubeHostUpgradeStep()) - stage.add_step(strategy.QuerySwPatchesStep()) - stage.add_step(strategy.QuerySwPatchHostsStep()) self.build_phase.add_stage(stage) super(KubeUpgradeStrategy, self).build() + def _get_kube_version_steps(self, target_version, kube_list): + """Returns an ordered list for a multi-version kubernetes upgrade + + Returns an ordered list of kubernetes versions to complete the upgrade + If the target is already the active version, the list will be empty + Raises an exception if the kubernetes chain is broken + """ + # convert the kube_list into a dictionary indexed by version + kube_dict = {} + for kube in kube_list: + kube_dict[kube['kube_version']] = kube + + # Populate the kube_sequence + # Start with the target version and traverse based on the + # 'upgrade_from' field. + # The loop ends when we reach the active/partial version + # The loop always inserts at the 'front' of the kube_sequence + kube_sequence = [] + ver = target_version + loop_count = 0 + while True: + # We should never encounter a version that is not in the dict + kube = kube_dict.get(ver) + if kube is None: + # We do not raise an exception. if the lowest version is + # 'partial' its 'upgrade_from' will not exist in the dict, + # so we can stop iterating + break + + # We do not add the 'active' version to the front of the list + # since it will not be updated + if kube['state'] == 'active': + # active means we are at the end of the sequence + break + + # Add to the kube_sequence if it is any state other than 'active' + kube_sequence.insert(0, ver) + + # 'partial' means we have started updating that version + # There can be two partial states if the control plane + # was updated, but the kubelet was not, so add only the first + if kube['state'] == 'partial': + # if its partial there is no need for another loop + break + + # 'upgrade_from' value is a list of versions however the + # list should only ever be a single entry so we get the first + # value and allow an exception to be raised if the list is empty + ver = kube['upgrade_from'][0] + # go around the loop again... + + # We should NEVER get into an infinite loop, but if the kube-version entries + # in sysinv are malformed, we do not want to spin forever + loop_count += 1 + if loop_count > 100: + raise Exception("Invalid kubernetes dependency chain detected") + + return kube_sequence + def _kubelet_map(self): """Map the host kubelet versions by the host uuid. Leave the kubelet version empty, if the status is not None, @@ -3104,34 +3168,87 @@ class KubeUpgradeStrategy(SwUpdateStrategy, strategy.STRATEGY_STAGE_NAME.KUBE_UPGRADE_NETWORKING) stage.add_step(strategy.KubeUpgradeNetworkingStep()) self.apply_phase.add_stage(stage) - # Next stage after networking is second control plane (if duplex) - self._add_kube_upgrade_first_control_plane_stage() - def _add_kube_upgrade_first_control_plane_stage(self): - """ - Add first controller control plane kube upgrade stage - This stage only occurs after networking - It then proceeds to the next stage - """ + # need to update control plane and kubelet per-version + self._add_kube_update_stages() + + def _add_kube_update_stages(self): + # for a particular version, the order is: + # - first control plane + # - second control plane + # - kubelets + + from nfv_vim import nfvi + from nfv_vim import strategy + first_host = self.get_first_host() + second_host = self.get_second_host() + ver_list = self._get_kube_version_steps(self._to_version, + self._nfvi_kube_versions_list) + + prev_state = None + if self.nfvi_kube_upgrade is not None: + prev_state = self.nfvi_kube_upgrade.state + + skip_first = False + skip_second = False + if prev_state in [nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_FIRST_MASTER, + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_SECOND_MASTER, + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_SECOND_MASTER_FAILED]: + # we have already proceeded past first control plane + skip_first = True + elif prev_state in [nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_SECOND_MASTER, + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_KUBELETS]: + # we have already proceeded past first control plane and second control plane + skip_first = True + skip_second = True + + for kube_ver in ver_list: + DLOG.info("Examining %s " % kube_ver) + + # first control plane + if skip_first: + # skip only occurs on the first loop + skip_first = False + else: + self._add_kube_upgrade_first_control_plane_stage(first_host, kube_ver) + + # second control plane + if skip_second: + skip_second = False + else: + self._add_kube_upgrade_second_control_plane_stage(second_host, kube_ver) + + # kubelets + self._add_kube_upgrade_kubelets_stage(kube_ver) + # kubelets can 'fail' the build. Return abruptly if it does + # todo(abailey): change this once all lock/unlock are removed from kubelet + if self._state == strategy.STRATEGY_STATE.BUILD_FAILED: + return + + # after this loop is kube upgrade complete stage + self._add_kube_upgrade_complete_stage() + + def _add_kube_upgrade_first_control_plane_stage(self, first_host, kube_ver): + """Add first controller control plane kube upgrade stage""" from nfv_vim import nfvi from nfv_vim import strategy - stage = strategy.StrategyStage( - strategy.STRATEGY_STAGE_NAME.KUBE_UPGRADE_FIRST_CONTROL_PLANE) + stage_name = "%s %s" % (strategy.STRATEGY_STAGE_NAME.KUBE_UPGRADE_FIRST_CONTROL_PLANE, kube_ver) + stage = strategy.StrategyStage(stage_name) first_host = self.get_first_host() # force argument is ignored by control plane API force = True stage.add_step(strategy.KubeHostUpgradeControlPlaneStep( first_host, + kube_ver, force, nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_FIRST_MASTER, nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_FIRST_MASTER_FAILED) ) self.apply_phase.add_stage(stage) - # Next stage after first control plane is second control plane - self._add_kube_upgrade_second_control_plane_stage() + return True - def _add_kube_upgrade_second_control_plane_stage(self): + def _add_kube_upgrade_second_control_plane_stage(self, second_host, kube_ver): """ Add second control plane kube upgrade stage This stage only occurs after networking and if this is a duplex. @@ -3140,236 +3257,26 @@ class KubeUpgradeStrategy(SwUpdateStrategy, from nfv_vim import nfvi from nfv_vim import strategy - second_host = self.get_second_host() if second_host is not None: # force argument is ignored by control plane API force = True - stage = strategy.StrategyStage( - strategy.STRATEGY_STAGE_NAME.KUBE_UPGRADE_SECOND_CONTROL_PLANE) + stage_name = "%s %s" % (strategy.STRATEGY_STAGE_NAME.KUBE_UPGRADE_SECOND_CONTROL_PLANE, kube_ver) + stage = strategy.StrategyStage(stage_name) stage.add_step(strategy.KubeHostUpgradeControlPlaneStep( second_host, + kube_ver, force, nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_SECOND_MASTER, nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_SECOND_MASTER_FAILED) ) self.apply_phase.add_stage(stage) - # Next stage after second control plane is to apply kube patch - self._add_kube_upgrade_patch_stage() + return True + return False - def _check_host_patch(self, host, new_patches): - """ - Check a host for whether it is patch current. - :returns: (Boolean,Boolean) host is patch current, host needs reboot - """ - # If any new patches have been applied, assume the host will need it. - # If a patch was controller or worker only then this assumption - # may not be true. + def _add_kube_upgrade_kubelets_stage(self, kube_ver): + # todo(abailey): This can be completely redone when lock + # and unlock are completely obsoleted - # There is no way in the vim to determine from a patch if a reboot - # will be required until after the patch is applied - if new_patches: - return (False, False) - - for host_entry in self._nfvi_sw_patch_hosts: - if host_entry['name'] == host.name: - return (host_entry['patch_current'], - host_entry['requires_reboot']) - - # Did not find a matching entry in the sw patch hosts list. - # We cannot determine if it is patch current - return (False, False) - - def _add_kube_upgrade_patch_stage(self): - """ - Add patch steps for the kubelet patch - If required 'applied' patches have not already been applied, fail this - stage. This stage is meant to apply the patches tagged as 'available' - for the kube upgrade. The patches are then installed on the hosts. - """ - from nfv_vim import strategy - from nfv_vim import tables - - applied_patches = None - available_patches = None - for kube_version_object in self.nfvi_kube_versions_list: - if kube_version_object['kube_version'] == self._to_version: - applied_patches = kube_version_object['applied_patches'] - available_patches = kube_version_object['available_patches'] - break - - # todo(abailey): handle 'committed' state - - # This section validates the 'applied_patches' for a kube upgrade. - # Note: validation fails on the first required patch in wrong state - # it does not indicate all pre-requisite patches that are invalid. - if applied_patches: - for kube_patch in applied_patches: - matching_patch = None - for patch in self.nfvi_sw_patches: - if patch['name'] == kube_patch: - matching_patch = patch - break - # - Fail if the required patch is missing - # - Fail if the required patch is not applied - # - Fail if the required patch is not installed on all hosts - if matching_patch is None: - self.report_build_failure("Missing a required patch: [%s]" - % kube_patch) - return - elif matching_patch['repo_state'] != PATCH_REPO_STATE_APPLIED: - self.report_build_failure( - "Required pre-applied patch: [%s] is not applied." - % kube_patch) - return - elif matching_patch['patch_state'] != PATCH_STATE_APPLIED: - self.report_build_failure( - "Required patch: [%s] is not installed on all hosts." - % kube_patch) - return - else: - DLOG.debug("Verified patch: [%s] is applied and installed" - % kube_patch) - - # This section validates the 'available_patches' for a kube upgrade. - # It also sets up the apply and install steps. - # 'available_patches' are the patches that need to be applied and - # installed on all hosts during kube upgrade orchestration after the - # control plane has been setup. - patches_to_apply = [] - patches_need_host_install = False - if available_patches: - for kube_patch in available_patches: - matching_patch = None - for patch in self.nfvi_sw_patches: - if patch['name'] == kube_patch: - matching_patch = patch - break - # - Fail if the required patch is missing - # - Apply the patch if it is not yet applied - # - Install the patch on any hosts where it is not installed. - if matching_patch is None: - self.report_build_failure("Missing a required patch: [%s]" - % kube_patch) - return - # if there is an applied_patch that is not applied, fail - elif matching_patch['repo_state'] != PATCH_REPO_STATE_APPLIED: - DLOG.debug("Preparing to apply available patch %s" - % kube_patch) - patches_to_apply.append(kube_patch) - # we apply the patch, so it must be installed on the hosts - patches_need_host_install = True - elif matching_patch['patch_state'] != PATCH_STATE_APPLIED: - # One of the patches is not fully installed on all hosts - patches_need_host_install = True - else: - DLOG.debug("Skipping available patch %s already applied" - % kube_patch) - - if patches_to_apply: - # Add a stage to 'apply' the patches - stage = strategy.StrategyStage( - strategy.STRATEGY_STAGE_NAME.KUBE_UPGRADE_PATCH) - stage.add_step(strategy.ApplySwPatchesStep(patches_to_apply)) - self.apply_phase.add_stage(stage) - - if patches_to_apply or patches_need_host_install: - # add stages to host-install the patches on the different hosts - - # each of the lists has its own stage if it is not empty - # kubernetes does not run on storage hosts, but it has kube rpms - controller_0_reboot = [] - controller_0_no_reboot = [] - controller_1_reboot = [] - controller_1_no_reboot = [] - worker_hosts_reboot = [] - worker_hosts_no_reboot = [] - storage_hosts_reboot = [] - storage_hosts_no_reboot = [] - - # todo(abailey): refactor the code duplication from SwPatch - host_table = tables.tables_get_host_table() - for host in list(host_table.values()): - # filter the host out if we do not need to patch it - current, reboot = self._check_host_patch(host, - patches_to_apply) - if not current: - if HOST_NAME.CONTROLLER_0 == host.name: - if reboot: - controller_0_reboot.append(host) - else: - controller_0_no_reboot.append(host) - elif HOST_NAME.CONTROLLER_1 == host.name: - if reboot: - controller_1_reboot.append(host) - else: - controller_1_no_reboot.append(host) - elif HOST_PERSONALITY.STORAGE in host.personality: - if reboot: - storage_hosts_reboot.append(host) - else: - storage_hosts_no_reboot.append(host) - - # above, An AIO will be added to the controller list, but - # ignored internally by _add_controller_strategy_stages - # so we add it also to the worker list - if HOST_PERSONALITY.WORKER in host.personality: - # Ignore worker hosts that are powered down - if not host.is_offline(): - if reboot: - worker_hosts_reboot.append(host) - else: - worker_hosts_no_reboot.append(host) - - # always process but no-reboot before reboot - # for controllers of same mode, controller-1 before controller-0 - STRATEGY_CREATION_COMMANDS = [ - # controller-1 no-reboot - (self._add_controller_strategy_stages, - controller_1_no_reboot, - False), - (self._add_controller_strategy_stages, - controller_0_no_reboot, - False), - (self._add_controller_strategy_stages, - controller_1_reboot, - True), - (self._add_controller_strategy_stages, - controller_0_reboot, - True), - # then storage - (self._add_storage_strategy_stages, - storage_hosts_no_reboot, - False), - (self._add_storage_strategy_stages, - storage_hosts_reboot, - True), - # workers last - (self._add_worker_strategy_stages, - worker_hosts_no_reboot, - False), - (self._add_worker_strategy_stages, - worker_hosts_reboot, - True) - ] - - for add_strategy_stages_function, host_list, reboot in \ - STRATEGY_CREATION_COMMANDS: - if host_list: - # sort each host list by name before adding stages - sorted_host_list = sorted(host_list, - key=lambda host: host.name) - success, reason = add_strategy_stages_function( - sorted_host_list, reboot) - if not success: - self.report_build_failure(reason) - return - else: - DLOG.info("No 'available_patches' need to be applied or installed") - - # next stage after this are kubelets, which are updated for all hosts - self._add_kube_upgrade_kubelets_stage() - - def _add_kube_upgrade_kubelets_stage(self): from nfv_vim import tables host_table = tables.tables_get_host_table() @@ -3392,6 +3299,9 @@ class KubeUpgradeStrategy(SwUpdateStrategy, if kubelet_map.get(host.uuid) == self._to_version: DLOG.info("Host %s kubelet already up to date" % host.name) continue + if kubelet_map.get(host.uuid) == kube_ver: + DLOG.info("Host %s kubelet already at interim version" % host.name) + continue if HOST_PERSONALITY.CONTROLLER in host.personality: if HOST_NAME.CONTROLLER_0 == host.name: if HOST_PERSONALITY.WORKER in host.personality: @@ -3412,34 +3322,38 @@ class KubeUpgradeStrategy(SwUpdateStrategy, # kubelet order is: controller-1, controller-0 then workers # storage nodes can be skipped + # we only include 'reboot' in a duplex env (includes workers) + reboot_default = not self._single_controller # We do NOT reboot an AIO-SX host HOST_STAGES = [ (self._add_kubelet_controller_strategy_stages, controller_1_std, - True), + reboot_default), (self._add_kubelet_controller_strategy_stages, controller_0_std, - True), + reboot_default), (self._add_kubelet_worker_strategy_stages, controller_1_workers, - True), + reboot_default), (self._add_kubelet_worker_strategy_stages, controller_0_workers, - not self._single_controller), # We do NOT reboot an AIO-SX host + reboot_default), (self._add_kubelet_worker_strategy_stages, worker_hosts, - True) + reboot_default) ] + stage_name = "kube-upgrade-kubelet %s" % kube_ver for add_kubelet_stages_function, host_list, reboot in HOST_STAGES: if host_list: sorted_host_list = sorted(host_list, key=lambda host: host.name) success, reason = add_kubelet_stages_function(sorted_host_list, - reboot) + kube_ver, + reboot, + stage_name) + # todo(abailey): We need revisit if this can never fail if not success: self.report_build_failure(reason) return - # stage after kubelets is kube upgrade complete stage - self._add_kube_upgrade_complete_stage() def _add_kube_upgrade_complete_stage(self): """ @@ -3546,29 +3460,29 @@ class KubeUpgradeStrategy(SwUpdateStrategy, # After networking -> upgrade first control plane nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_NETWORKING: - self._add_kube_upgrade_first_control_plane_stage, + self._add_kube_update_stages, # if upgrading first control plane failed, resume there nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_FIRST_MASTER_FAILED: - self._add_kube_upgrade_first_control_plane_stage, + self._add_kube_update_stages, # After first control plane -> upgrade second control plane nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_FIRST_MASTER: - self._add_kube_upgrade_second_control_plane_stage, + self._add_kube_update_stages, - # if upgrading second control plane failed, resume there + # Re-attempt second control plane nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_SECOND_MASTER_FAILED: - self._add_kube_upgrade_second_control_plane_stage, + self._add_kube_update_stages, - # After second control plane , proceed with patching + # After second control plane , do kubelets nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_SECOND_MASTER: - self._add_kube_upgrade_patch_stage, + self._add_kube_update_stages, - # kubelets are next kube upgrade phase after second patch applied + # kubelets transition to 'complete' when they are done nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_KUBELETS: - self._add_kube_upgrade_kubelets_stage, + self._add_kube_update_stages, - # kubelets applied and upgrade is completed, delete the upgrade + # upgrade is completed, delete the upgrade nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADE_COMPLETE: self._add_kube_upgrade_cleanup_stage, } @@ -3627,6 +3541,7 @@ class KubeUpgradeStrategy(SwUpdateStrategy, self._add_kube_upgrade_start_stage() else: # Determine which stage to resume at + # this is complicated due to the 'loop' current_state = self.nfvi_kube_upgrade.state resume_from_stage = RESUME_STATE.get(current_state) if resume_from_stage is None: diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py index 1dd65c9e..bbaea035 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2022 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -4112,6 +4112,7 @@ class AbstractKubeHostUpgradeStep(AbstractKubeUpgradeStep): """ def __init__(self, host, + to_version, force, step_name, success_state, @@ -4122,6 +4123,7 @@ class AbstractKubeHostUpgradeStep(AbstractKubeUpgradeStep): success_state, fail_state, timeout_in_secs=timeout_in_secs) + self._to_version = to_version self._force = force # This class accepts only a single host # but serializes as a list of hosts (list size of one) @@ -4137,6 +4139,7 @@ class AbstractKubeHostUpgradeStep(AbstractKubeUpgradeStep): Returns the step object initialized using the given dictionary """ super(AbstractKubeHostUpgradeStep, self).from_dict(data) + self._to_version = data['to_version'] self._force = data['force'] self._hosts = list() self._host_uuids = list() @@ -4154,6 +4157,7 @@ class AbstractKubeHostUpgradeStep(AbstractKubeUpgradeStep): Represent the step as a dictionary """ data = super(AbstractKubeHostUpgradeStep, self).as_dict() + data['to_version'] = self._to_version data['force'] = self._force data['entity_type'] = 'hosts' data['entity_names'] = self._host_names @@ -4166,9 +4170,11 @@ class AbstractKubeHostListUpgradeStep(AbstractKubeUpgradeStep): This operation issues a host command, which updates the kube upgrade object It operates on a list of hosts + Kube host operations can have intermediate (to_version) steps """ def __init__(self, hosts, + to_version, force, step_name, success_state, @@ -4179,6 +4185,7 @@ class AbstractKubeHostListUpgradeStep(AbstractKubeUpgradeStep): success_state, fail_state, timeout_in_secs=timeout_in_secs) + self._to_version = to_version self._force = force self._hosts = hosts self._host_names = list() @@ -4192,6 +4199,7 @@ class AbstractKubeHostListUpgradeStep(AbstractKubeUpgradeStep): Returns the step object initialized using the given dictionary """ super(AbstractKubeHostListUpgradeStep, self).from_dict(data) + self._to_version = data['to_version'] self._force = data['force'] self._hosts = list() self._host_uuids = list() @@ -4209,6 +4217,7 @@ class AbstractKubeHostListUpgradeStep(AbstractKubeUpgradeStep): Represent the step as a dictionary """ data = super(AbstractKubeHostListUpgradeStep, self).as_dict() + data['to_version'] = self._to_version data['force'] = self._force data['entity_type'] = 'hosts' data['entity_names'] = self._host_names @@ -4222,14 +4231,16 @@ class KubeHostUpgradeControlPlaneStep(AbstractKubeHostUpgradeStep): This operation issues a host command, which updates the kube upgrade object """ - def __init__(self, host, force, target_state, target_failure_state): + def __init__(self, host, to_version, force, target_state, target_failure_state, + timeout_in_secs=600): super(KubeHostUpgradeControlPlaneStep, self).__init__( host, + to_version, force, STRATEGY_STEP_NAME.KUBE_HOST_UPGRADE_CONTROL_PLANE, target_state, target_failure_state, - timeout_in_secs=600) + timeout_in_secs) def handle_event(self, event, event_data=None): """ @@ -4275,12 +4286,12 @@ class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep): This operation issues a host command, which indirectly updates the kube upgrade object, however additional calls to other hosts do not change it. - This step should only be invoked on locked hosts. """ - def __init__(self, hosts, force=True): + def __init__(self, hosts, to_version, force=True): super(KubeHostUpgradeKubeletStep, self).__init__( hosts, + to_version, force, STRATEGY_STEP_NAME.KUBE_HOST_UPGRADE_KUBELET, None, # there is no kube upgrade success state for kubelets @@ -4303,7 +4314,7 @@ class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep): for host_uuid in self._host_uuids: for k_host in self.strategy.nfvi_kube_host_upgrade_list: if k_host.host_uuid == host_uuid: - if k_host.kubelet_version == self.strategy.to_version: + if k_host.kubelet_version == self._to_version: match_count += 1 host_count += 1 # break out of inner loop, since uuids match @@ -4378,7 +4389,9 @@ def strategy_step_rebuild_from_dict(data): """ rebuild_map = { STRATEGY_STEP_NAME.APPLY_PATCHES: ApplySwPatchesStep, + # # kube rootca update steps + # STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_ABORT: KubeRootcaUpdateAbortStep, STRATEGY_STEP_NAME.KUBE_ROOTCA_UPDATE_COMPLETE: @@ -4403,7 +4416,9 @@ def strategy_step_rebuild_from_dict(data): QueryKubeRootcaUpdateStep, STRATEGY_STEP_NAME.QUERY_KUBE_ROOTCA_HOST_UPDATES: QueryKubeRootcaHostUpdatesStep, + # # kube upgrade steps + # STRATEGY_STEP_NAME.KUBE_HOST_UPGRADE_CONTROL_PLANE: KubeHostUpgradeControlPlaneStep, STRATEGY_STEP_NAME.KUBE_HOST_UPGRADE_KUBELET: