diff --git a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py index eaa4f6cb..657782cd 100755 --- a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py +++ b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -3183,6 +3183,120 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): callback.send(response) callback.close() + def kube_host_cordon(self, future, host_uuid, host_name, force, callback): + """ + Cordon a host + """ + + # ignoring the force argument for now + response = dict() + response['completed'] = False + response['host_uuid'] = host_uuid + response['host_name'] = host_name + response['reason'] = '' + + action_type = 'kube-host-cordon' + sysinv_method = sysinv.kube_host_cordon + try: + future.set_timeouts(config.CONF.get('nfvi-timeouts', None)) + + if self._platform_token is None or \ + self._platform_token.is_expired(): + future.work(openstack.get_token, self._platform_directory) + future.result = (yield) + + if not future.result.is_complete() or \ + future.result.data is None: + DLOG.error("OpenStack get-token did not complete, " + "host_uuid=%s." % host_uuid) + return + + self._platform_token = future.result.data + + # cordon wants a hostname and not a host_uuid + future.work(sysinv_method, self._platform_token, host_name, force) + future.result = (yield) + + if not future.result.is_complete(): + return + + response['completed'] = True + + except exceptions.OpenStackRestAPIException as e: + if httplib.UNAUTHORIZED == e.http_status_code: + response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED + if self._platform_token is not None: + self._platform_token.set_expired() + + else: + DLOG.exception("Caught exception while trying to %s " + "a host %s, error=%s." % (action_type, host_name, e)) + response['reason'] = e.http_response_reason + + except Exception as e: + DLOG.exception("Caught exception while trying to %s a " + "host %s, error=%s." % (action_type, host_name, e)) + + finally: + callback.send(response) + callback.close() + + def kube_host_uncordon(self, future, host_uuid, host_name, force, callback): + """ + Uncordon a host + """ + response = dict() + response['completed'] = False + response['host_uuid'] = host_uuid + response['host_name'] = host_name + response['reason'] = '' + + action_type = 'kube-host-uncordon' + sysinv_method = sysinv.kube_host_uncordon + try: + future.set_timeouts(config.CONF.get('nfvi-timeouts', None)) + + if self._platform_token is None or \ + self._platform_token.is_expired(): + future.work(openstack.get_token, self._platform_directory) + future.result = (yield) + + if not future.result.is_complete() or \ + future.result.data is None: + DLOG.error("OpenStack get-token did not complete, " + "host_uuid=%s." % host_uuid) + return + + self._platform_token = future.result.data + + # uncordon wants a hostname and not a host_uuid + future.work(sysinv_method, self._platform_token, host_name, force) + future.result = (yield) + + if not future.result.is_complete(): + return + + response['completed'] = True + + except exceptions.OpenStackRestAPIException as e: + if httplib.UNAUTHORIZED == e.http_status_code: + response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED + if self._platform_token is not None: + self._platform_token.set_expired() + + else: + DLOG.exception("Caught exception while trying to %s " + "a host %s, error=%s." % (action_type, host_name, e)) + response['reason'] = e.http_response_reason + + except Exception as e: + DLOG.exception("Caught exception while trying to %s a " + "host %s, error=%s." % (action_type, host_name, e)) + + finally: + callback.send(response) + callback.close() + def lock_host(self, future, host_uuid, host_name, callback): """ Lock a host diff --git a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/sysinv.py b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/sysinv.py index 13717826..bb6a2599 100755 --- a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/sysinv.py +++ b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/sysinv.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -383,7 +383,12 @@ def kube_upgrade_start(token, to_version, force=False, alarm_ignore_list=None): return response -def _patch_kube_upgrade_state(token, new_value): +def api_data_patch(path, value, op="replace"): + # the 'path' is prefixed with a leading '/' + return {'path': '/' + path, 'value': value, 'op': op} + + +def _patch_kube_upgrade_state(token, new_value, hostname=None): url = token.get_service_url(PLATFORM_SERVICE.SYSINV) if url is None: raise ValueError("OpenStack SysInv URL is invalid") @@ -395,11 +400,10 @@ def _patch_kube_upgrade_state(token, new_value): api_cmd_headers['User-Agent'] = "vim/1.0" api_cmd_payload = list() - host_data = dict() - host_data['path'] = "/state" - host_data['value'] = new_value - host_data['op'] = "replace" - api_cmd_payload.append(host_data) + api_cmd_payload.append(api_data_patch('state', new_value)) + # some kube upgrade patch commands take a hostname + if hostname is not None: + api_cmd_payload.append(api_data_patch('hostname', hostname)) return rest_api_request(token, "PATCH", @@ -449,6 +453,23 @@ def kube_upgrade_networking(token): return _patch_kube_upgrade_state(token, "upgrading-networking") +def kube_host_cordon(token, hostname, force): + """ + system kube-host-cordon + force is a 'string' but is currently unused + """ + # cordon needs a 'hostname' + return _patch_kube_upgrade_state(token, "cordon-started", hostname=hostname) + + +def kube_host_uncordon(token, hostname, force): + """ + system kube-host-uncordon + force is a 'string' but is currently unused + """ + return _patch_kube_upgrade_state(token, "uncordon-started", hostname=hostname) + + def _kube_host_upgrade(token, host_uuid, target_operation, force): """ Invoke a POST for a host kube-upgrade operation diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py index f9273ec4..3c9c560e 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_kube_upgrade_strategy.py @@ -283,6 +283,28 @@ class ApplyStageMixin(object): ], } + def _kube_host_cordon_stage(self, ver="N/A"): + return { + 'name': 'kube-host-cordon', + 'total_steps': 1, + 'steps': [ + {'name': 'kube-host-cordon', + 'success_state': 'cordon-complete', + 'fail_state': 'cordon-failed'}, + ], + } + + def _kube_host_uncordon_stage(self, ver="N/A"): + return { + 'name': 'kube-host-uncordon', + 'total_steps': 1, + 'steps': [ + {'name': 'kube-host-uncordon', + 'success_state': 'uncordon-complete', + 'fail_state': 'uncordon-failed'}, + ], + } + def _kube_upgrade_first_control_plane_stage(self, ver): return { 'name': 'kube-upgrade-first-control-plane %s' % ver, @@ -462,15 +484,21 @@ class ApplyStageMixin(object): add_start=True, add_download=True, add_networking=True, + add_cordon=True, add_first_control_plane=True, add_second_control_plane=True, add_kubelets=True, + add_uncordon=True, add_complete=True, add_cleanup=True): """The order of the host_list determines the kubelets""" # We never add a second control plane on a simplex if self.is_simplex(): add_second_control_plane = False + # we do not support cordon and uncordon in duplex + if self.is_duplex(): + add_cordon = False + add_uncordon = False stages = [] if add_start: stages.append(self._kube_upgrade_start_stage()) @@ -478,6 +506,8 @@ class ApplyStageMixin(object): stages.append(self._kube_upgrade_download_images_stage()) if add_networking: stages.append(self._kube_upgrade_networking_stage()) + if add_cordon: + stages.append(self._kube_host_cordon_stage()) for ver in self.kube_versions: if add_first_control_plane: stages.append(self._kube_upgrade_first_control_plane_stage(ver)) @@ -489,6 +519,8 @@ class ApplyStageMixin(object): std_controller_list, aio_controller_list, worker_list)) + if add_uncordon: + stages.append(self._kube_host_uncordon_stage()) if add_complete: stages.append(self._kube_upgrade_complete_stage()) if add_cleanup: @@ -590,6 +622,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self._kube_upgrade_download_images_stage(), self._kube_upgrade_networking_stage(), ] + if self.is_simplex(): + stages.append(self._kube_host_cordon_stage()) for ver in self.kube_versions: stages.append(self._kube_upgrade_first_control_plane_stage(ver)) stages.extend(self._kube_upgrade_kubelet_stages( @@ -597,6 +631,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.std_controller_list, self.aio_controller_list, self.worker_list)) + if self.is_simplex(): + stages.append(self._kube_host_uncordon_stage()) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -616,6 +652,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, stages = [ self._kube_upgrade_networking_stage(), ] + if self.is_simplex(): + stages.append(self._kube_host_cordon_stage()) for ver in self.kube_versions: stages.append(self._kube_upgrade_first_control_plane_stage( ver)) @@ -624,6 +662,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.std_controller_list, self.aio_controller_list, self.worker_list)) + if self.is_simplex(): + stages.append(self._kube_host_uncordon_stage()) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -649,6 +689,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.std_controller_list, self.aio_controller_list, self.worker_list)) + if self.is_simplex(): + stages.append(self._kube_host_uncordon_stage()) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -672,6 +714,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.std_controller_list, self.aio_controller_list, self.worker_list)) + if self.is_simplex(): + stages.append(self._kube_host_uncordon_stage()) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -691,6 +735,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, stages = [ self._kube_upgrade_networking_stage(), ] + if self.is_simplex(): + stages.append(self._kube_host_cordon_stage()) for ver in self.kube_versions: stages.append(self._kube_upgrade_first_control_plane_stage( ver)) @@ -699,6 +745,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.std_controller_list, self.aio_controller_list, self.worker_list)) + if self.is_simplex(): + stages.append(self._kube_host_uncordon_stage()) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -716,6 +764,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.default_from_version, self.default_to_version) stages = [] + if self.is_simplex(): + stages.append(self._kube_host_cordon_stage()) for ver in self.kube_versions: stages.append(self._kube_upgrade_first_control_plane_stage( ver)) @@ -724,6 +774,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.std_controller_list, self.aio_controller_list, self.worker_list)) + if self.is_simplex(): + stages.append(self._kube_host_uncordon_stage()) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -749,6 +801,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.std_controller_list, self.aio_controller_list, self.worker_list)) + if self.is_simplex(): + stages.append(self._kube_host_uncordon_stage()) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), @@ -774,6 +828,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase, self.std_controller_list, self.aio_controller_list, self.worker_list)) + if self.is_simplex(): + stages.append(self._kube_host_uncordon_stage()) stages.extend([ self._kube_upgrade_complete_stage(), self._kube_upgrade_cleanup_stage(), diff --git a/nfv/nfv-vim/nfv_vim/directors/_host_director.py b/nfv/nfv-vim/nfv_vim/directors/_host_director.py index fda7799e..ec0b11f6 100755 --- a/nfv/nfv-vim/nfv_vim/directors/_host_director.py +++ b/nfv/nfv-vim/nfv_vim/directors/_host_director.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -50,7 +50,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.LOCK_HOSTS != self._host_operation.operation_type: @@ -89,7 +89,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.DISABLE_HOST_SERVICES != \ @@ -145,7 +145,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.ENABLE_HOST_SERVICES != \ @@ -206,7 +206,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.UNLOCK_HOSTS != self._host_operation.operation_type: @@ -244,7 +244,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.REBOOT_HOSTS != self._host_operation.operation_type: @@ -282,7 +282,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.UPGRADE_HOSTS != self._host_operation.operation_type: @@ -320,7 +320,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.SWACT_HOSTS != self._host_operation.operation_type: @@ -358,7 +358,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.FW_UPDATE_HOSTS != self._host_operation.operation_type: @@ -395,7 +395,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.FW_UPDATE_ABORT_HOSTS != self._host_operation.operation_type: @@ -515,11 +515,11 @@ class HostDirector(object): @staticmethod def host_audit(host): """ - Notifies the host director that a host audit is inprogress + Notifies the host director that a host audit is in progress """ from nfv_vim import directors - DLOG.verbose("Notify other directors that a host %s audit is inprogress." + DLOG.verbose("Notify other directors that a host %s audit is in progress." % host.name) instance_director = directors.get_instance_director() instance_director.host_audit(host) @@ -530,11 +530,11 @@ class HostDirector(object): @staticmethod def host_abort(host): """ - Notifies the host director that a host abort is inprogress + Notifies the host director that a host abort is in progress """ from nfv_vim import directors - DLOG.info("Notify other directors that a host %s abort is inprogress." + DLOG.info("Notify other directors that a host %s abort is in progress." % host.name) instance_director = directors.get_instance_director() instance_director.host_operation_cancel(host.name) @@ -824,7 +824,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.KUBE_UPGRADE_HOSTS != self._host_operation.operation_type: @@ -883,6 +883,162 @@ class HostDirector(object): return host_operation + # cordon + @coroutine + def _nfvi_kube_host_cordon_callback(self): + """ + NFVI Kube Host Cordon Callback + """ + from nfv_vim import directors + + response = (yield) + DLOG.verbose("NFVI Kube Host Cordon response=%s." % response) + if not response['completed']: + DLOG.info("Kube Host Upgrade Cordon failed. Host:%s, reason=%s." + % (response['host_name'], response['reason'])) + + host_table = tables.tables_get_host_table() + host = host_table.get(response['host_name'], None) + if host is None: + DLOG.verbose("Host %s does not exist." % response['host_name']) + return + + if self._host_operation is None: + DLOG.verbose("No host %s operation in progress." % host.name) + return + + if OPERATION_TYPE.KUBE_UPGRADE_HOSTS != self._host_operation.operation_type: + DLOG.verbose("Unexpected host %s operation %s, ignoring." + % (host.name, self._host_operation.operation_type)) + return + + sw_mgmt_director = directors.get_sw_mgmt_director() + sw_mgmt_director.kube_host_cordon_failed(host) + + def _nfvi_kube_host_cordon(self, + host_uuid, + host_name, + force): + """ + NFVI Kube Host Cordon + """ + nfvi.nfvi_kube_host_cordon( + host_uuid, + host_name, + force, + self._nfvi_kube_host_cordon_callback()) + + def kube_host_cordon(self, host_names, force): + """ + Kube Host Cordon for multiple hosts + """ + DLOG.info("Kube Host Cordon for hosts: %s" % host_names) + + host_operation = \ + Operation(OPERATION_TYPE.KUBE_UPGRADE_HOSTS) + + if self._host_operation is not None: + DLOG.debug("Canceling previous host operation %s, before " + "continuing with host operation %s." + % (self._host_operation.operation_type, + host_operation.operation_type)) + self._host_operation = None + + host_table = tables.tables_get_host_table() + for host_name in host_names: + host = host_table.get(host_name, None) + if host is None: + reason = "Unknown host %s given." % host_name + DLOG.info(reason) + host_operation.set_failed(reason) + return host_operation + + host_operation.add_host(host.name, OPERATION_STATE.INPROGRESS) + self._nfvi_kube_host_cordon(host.uuid, + host.name, + force) + if host_operation.is_inprogress(): + self._host_operation = host_operation + return host_operation + + # uncordon + @coroutine + def _nfvi_kube_host_uncordon_callback(self): + """ + NFVI Kube Host Uncordon Callback + """ + from nfv_vim import directors + + response = (yield) + DLOG.verbose("NFVI Kube Host Uncordon response=%s." % response) + if not response['completed']: + DLOG.info("Kube Host Upgrade Uncordon failed. Host:%s, reason=%s." + % (response['host_name'], response['reason'])) + + host_table = tables.tables_get_host_table() + host = host_table.get(response['host_name'], None) + if host is None: + DLOG.verbose("Host %s does not exist." % response['host_name']) + return + + if self._host_operation is None: + DLOG.verbose("No host %s operation in progress." % host.name) + return + + if OPERATION_TYPE.KUBE_UPGRADE_HOSTS != self._host_operation.operation_type: + DLOG.verbose("Unexpected host %s operation %s, ignoring." + % (host.name, self._host_operation.operation_type)) + return + + sw_mgmt_director = directors.get_sw_mgmt_director() + sw_mgmt_director.kube_host_uncordon_failed(host) + + def _nfvi_kube_host_uncordon(self, + host_uuid, + host_name, + force): + """ + NFVI Kube Host Uncordon + """ + nfvi.nfvi_kube_host_uncordon( + host_uuid, + host_name, + force, + self._nfvi_kube_host_uncordon_callback()) + + def kube_host_uncordon(self, host_names, force): + """ + Kube Host Uncordon for multiple hosts + """ + DLOG.info("Kube Host Uncordon for hosts: %s" % host_names) + + host_operation = \ + Operation(OPERATION_TYPE.KUBE_UPGRADE_HOSTS) + + if self._host_operation is not None: + DLOG.debug("Canceling previous host operation %s, before " + "continuing with host operation %s." + % (self._host_operation.operation_type, + host_operation.operation_type)) + self._host_operation = None + + host_table = tables.tables_get_host_table() + for host_name in host_names: + host = host_table.get(host_name, None) + if host is None: + reason = "Unknown host %s given." % host_name + DLOG.info(reason) + host_operation.set_failed(reason) + return host_operation + + host_operation.add_host(host.name, OPERATION_STATE.INPROGRESS) + self._nfvi_kube_host_uncordon(host.uuid, + host.name, + force) + if host_operation.is_inprogress(): + self._host_operation = host_operation + return host_operation + @coroutine def _nfvi_kube_host_upgrade_kubelet_callback(self): """ @@ -904,7 +1060,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.KUBE_UPGRADE_HOSTS != self._host_operation.operation_type: @@ -978,7 +1134,7 @@ class HostDirector(object): return if self._host_operation is None: - DLOG.verbose("No host %s operation inprogress." % host.name) + DLOG.verbose("No host %s operation in progress." % host.name) return if OPERATION_TYPE.KUBE_ROOTCA_UPDATE_HOSTS \ diff --git a/nfv/nfv-vim/nfv_vim/directors/_sw_mgmt_director.py b/nfv/nfv-vim/nfv_vim/directors/_sw_mgmt_director.py index 7d62b530..cba96732 100755 --- a/nfv/nfv-vim/nfv_vim/directors/_sw_mgmt_director.py +++ b/nfv/nfv-vim/nfv_vim/directors/_sw_mgmt_director.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -281,6 +281,22 @@ class SwMgmtDirector(object): self._sw_update.handle_event( strategy.STRATEGY_EVENT.ENABLE_HOST_SERVICES_FAILED, host) + def kube_host_cordon_failed(self, host): + """ + Called when a kube host cordon fails + """ + if self._sw_update is not None: + self._sw_update.handle_event( + strategy.STRATEGY_EVENT.KUBE_HOST_CORDON_FAILED, host) + + def kube_host_uncordon_failed(self, host): + """ + Called when a kube host uncordon fails + """ + if self._sw_update is not None: + self._sw_update.handle_event( + strategy.STRATEGY_EVENT.KUBE_HOST_UNCORDON_FAILED, host) + def host_unlock_failed(self, host): """ Called when a unlock of a host failed diff --git a/nfv/nfv-vim/nfv_vim/nfvi/__init__.py b/nfv/nfv-vim/nfv_vim/nfvi/__init__.py index fca28abf..d64faec5 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/__init__.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -112,6 +112,8 @@ from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_terminating_pods from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_upgrade # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_host_device_image_update # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_host_device_image_update_abort # noqa: F401 +from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_cordon # noqa: F401 +from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_uncordon # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_upgrade_control_plane # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_upgrade_kubelet # noqa: F401 from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_rootca_update_abort # noqa: F401 diff --git a/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py b/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py index 718856a8..76f34561 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/_nfvi_infrastructure_module.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -100,6 +100,32 @@ def nfvi_host_device_image_update_abort(host_uuid, host_name, callback): return cmd_id +def nfvi_kube_host_cordon(host_uuid, host_name, force, callback): + """ + Kube Host Upgrade Cordon + """ + cmd_id = _infrastructure_plugin.invoke_plugin( + 'kube_host_cordon', + host_uuid, + host_name, + force, + callback=callback) + return cmd_id + + +def nfvi_kube_host_uncordon(host_uuid, host_name, force, callback): + """ + Kube Host Upgrade Uncordon + """ + cmd_id = _infrastructure_plugin.invoke_plugin( + 'kube_host_uncordon', + host_uuid, + host_name, + force, + callback=callback) + return cmd_id + + def nfvi_kube_host_upgrade_control_plane(host_uuid, host_name, force, callback): """ Kube Host Upgrade Control Plane diff --git a/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_kube_upgrade.py b/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_kube_upgrade.py index d8f10b9d..d3d782ed 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_kube_upgrade.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_kube_upgrade.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2016-2021 Wind River Systems, Inc. +# Copyright (c) 2016-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -51,6 +51,12 @@ class KubeUpgradeState(Constants): KUBE_UPGRADED_SECOND_MASTER = Constant('upgraded-second-master') KUBE_UPGRADING_KUBELETS = Constant('upgrading-kubelets') KUBE_UPGRADE_COMPLETE = Constant('upgrade-complete') + KUBE_HOST_CORDON = Constant('cordon-started') + KUBE_HOST_CORDON_COMPLETE = Constant('cordon-complete') + KUBE_HOST_CORDON_FAILED = Constant('cordon-failed') + KUBE_HOST_UNCORDON = Constant('uncordon-started') + KUBE_HOST_UNCORDON_COMPLETE = Constant('uncordon-complete') + KUBE_HOST_UNCORDON_FAILED = Constant('uncordon-failed') # Kube Upgrade Constant Instantiation diff --git a/nfv/nfv-vim/nfv_vim/strategy/__init__.py b/nfv/nfv-vim/nfv_vim/strategy/__init__.py index 3b721982..13992997 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/__init__.py +++ b/nfv/nfv-vim/nfv_vim/strategy/__init__.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -16,6 +16,8 @@ from nfv_vim.strategy._strategy_steps import ApplySwPatchesStep # noqa: F401 from nfv_vim.strategy._strategy_steps import DisableHostServicesStep # noqa: F401 from nfv_vim.strategy._strategy_steps import FwUpdateAbortHostsStep # noqa: F401 from nfv_vim.strategy._strategy_steps import FwUpdateHostsStep # noqa: F401 +from nfv_vim.strategy._strategy_steps import KubeHostCordonStep # noqa: F401 +from nfv_vim.strategy._strategy_steps import KubeHostUncordonStep # noqa: F401 from nfv_vim.strategy._strategy_steps import KubeHostUpgradeControlPlaneStep # noqa: F401 from nfv_vim.strategy._strategy_steps import KubeHostUpgradeKubeletStep # noqa: F401 from nfv_vim.strategy._strategy_steps import KubeRootcaUpdateCompleteStep # noqa: F401 diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py index 34f64980..d4af102f 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py @@ -3097,13 +3097,16 @@ class KubeUpgradeStrategy(SwUpdateStrategy, # 'upgrade_from' value is a list of versions however the # list should only ever be a single entry so we get the first # value and allow an exception to be raised if the list is empty + # todo(abailey): if the list contains more than one entry the + # algorithm may not work, since it may not converge at the active version. ver = kube['upgrade_from'][0] + # go around the loop again... # We should NEVER get into an infinite loop, but if the kube-version entries # in sysinv are malformed, we do not want to spin forever loop_count += 1 - if loop_count > 100: + if loop_count > 10: raise Exception("Invalid kubernetes dependency chain detected") return kube_sequence @@ -3169,21 +3172,55 @@ class KubeUpgradeStrategy(SwUpdateStrategy, stage.add_step(strategy.KubeUpgradeNetworkingStep()) self.apply_phase.add_stage(stage) - # need to update control plane and kubelet per-version - self._add_kube_update_stages() + # Next stage after networking is cordon + self._add_kube_host_cordon_stage() - def _add_kube_update_stages(self): - # for a particular version, the order is: - # - first control plane - # - second control plane - # - kubelets + def _add_kube_host_cordon_stage(self): + """Add host cordon stage for a host""" + # simplex only from nfv_vim import nfvi from nfv_vim import strategy + + first_host = self.get_first_host() + second_host = self.get_second_host() + is_simplex = second_host is None + if is_simplex: + # todo(abailey): add rollback support to trigger uncordon + stage = strategy.StrategyStage( + strategy.STRATEGY_STAGE_NAME.KUBE_HOST_CORDON) + stage.add_step(strategy.KubeHostCordonStep( + first_host, + self._to_version, + False, # force + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_CORDON_COMPLETE, + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_CORDON_FAILED) + ) + self.apply_phase.add_stage(stage) + self._add_kube_update_stages() + + def _add_kube_update_stages(self): + """Stages for control plane, kubelet and cordon""" + # Algorithm + # ------------------------- + # Simplex: + # - loop over kube versions + # - control plane + # - kubelet + # ------------------------- + # Duplex: + # - loop over kube versions + # - first control plane + # - second control plane + # - kubelets + # ------------------------- + from nfv_vim import nfvi + from nfv_vim import strategy + first_host = self.get_first_host() second_host = self.get_second_host() ver_list = self._get_kube_version_steps(self._to_version, - self._nfvi_kube_versions_list) + self._nfvi_kube_versions_list) prev_state = None if self.nfvi_kube_upgrade is not None: @@ -3225,6 +3262,29 @@ class KubeUpgradeStrategy(SwUpdateStrategy, if self._state == strategy.STRATEGY_STATE.BUILD_FAILED: return + self._add_kube_host_uncordon_stage() + + def _add_kube_host_uncordon_stage(self): + """Add host uncordon stage for a host""" + # simplex only + + from nfv_vim import nfvi + from nfv_vim import strategy + + first_host = self.get_first_host() + second_host = self.get_second_host() + is_simplex = second_host is None + if is_simplex: + stage = strategy.StrategyStage( + strategy.STRATEGY_STAGE_NAME.KUBE_HOST_UNCORDON) + stage.add_step(strategy.KubeHostUncordonStep( + first_host, + self._to_version, + False, # force + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_UNCORDON_COMPLETE, + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_UNCORDON_FAILED) + ) + self.apply_phase.add_stage(stage) # after this loop is kube upgrade complete stage self._add_kube_upgrade_complete_stage() @@ -3458,8 +3518,16 @@ class KubeUpgradeStrategy(SwUpdateStrategy, nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_NETWORKING_FAILED: self._add_kube_upgrade_networking_stage, - # After networking -> upgrade first control plane + # After networking -> cordon nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_NETWORKING: + self._add_kube_host_cordon_stage, + + # If the state is cordon-failed, resume at cordon stage + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_CORDON_FAILED: + self._add_kube_host_cordon_stage, + + # If the state is cordon-complete, resume at update stages + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_CORDON_COMPLETE: self._add_kube_update_stages, # if upgrading first control plane failed, resume there @@ -3478,10 +3546,18 @@ class KubeUpgradeStrategy(SwUpdateStrategy, nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_SECOND_MASTER: self._add_kube_update_stages, - # kubelets transition to 'complete' when they are done + # kubelets transition to 'uncordon after they are done nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_KUBELETS: self._add_kube_update_stages, + # If the state is uncordon-failed, resume at uncordon stage + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_UNCORDON_FAILED: + self._add_kube_host_uncordon_stage, + + # If the state is uncordon-complete, resume at complete stage + nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_UNCORDON_COMPLETE: + self._add_kube_upgrade_complete_stage, + # upgrade is completed, delete the upgrade nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADE_COMPLETE: self._add_kube_upgrade_cleanup_stage, diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_defs.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_defs.py index 1643ffc7..e146fd39 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_defs.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_defs.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -28,6 +28,8 @@ class EventNames(object): DISABLE_HOST_SERVICES_FAILED = Constant('disable-host-services-failed') ENABLE_HOST_SERVICES_FAILED = Constant('enable-host-services-failed') MIGRATE_INSTANCES_FAILED = Constant('migrate-instances-failed') + KUBE_HOST_CORDON_FAILED = Constant('kube-host-cordon-failed') + KUBE_HOST_UNCORDON_FAILED = Constant('kube-host-uncordon-failed') KUBE_HOST_UPGRADE_CONTROL_PLANE_FAILED = \ Constant('kube-host-upgrade-control-plane-failed') KUBE_HOST_UPGRADE_KUBELET_FAILED = \ diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_stages.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_stages.py index ccc9311b..e4dbdb23 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_stages.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_stages.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2015-2021 Wind River Systems, Inc. +# Copyright (c) 2015-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -53,6 +53,8 @@ class StrategyStageNames(Constants): KUBE_ROOTCA_UPDATE_QUERY = Constant('kube-rootca-update-query') KUBE_ROOTCA_UPDATE_START = Constant('kube-rootca-update-start') # kube upgrade stages + KUBE_HOST_CORDON = Constant('kube-host-cordon') + KUBE_HOST_UNCORDON = Constant('kube-host-uncordon') KUBE_UPGRADE_QUERY = Constant('kube-upgrade-query') KUBE_UPGRADE_START = Constant('kube-upgrade-start') KUBE_UPGRADE_DOWNLOAD_IMAGES = Constant('kube-upgrade-download-images') diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py index bbaea035..2454e93e 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py @@ -74,6 +74,8 @@ class StrategyStepNames(Constants): QUERY_KUBE_HOST_UPGRADE = Constant('query-kube-host-upgrade') QUERY_KUBE_UPGRADE = Constant('query-kube-upgrade') QUERY_KUBE_VERSIONS = Constant('query-kube-versions') + KUBE_HOST_CORDON = Constant('kube-host-cordon') + KUBE_HOST_UNCORDON = Constant('kube-host-uncordon') KUBE_UPGRADE_START = Constant('kube-upgrade-start') KUBE_UPGRADE_CLEANUP = Constant('kube-upgrade-cleanup') KUBE_UPGRADE_COMPLETE = Constant('kube-upgrade-complete') @@ -88,6 +90,14 @@ class StrategyStepNames(Constants): STRATEGY_STEP_NAME = StrategyStepNames() +def validate_operation(operation): + if operation.is_inprogress(): + return strategy.STRATEGY_STEP_RESULT.WAIT, "" + elif operation.is_failed(): + return strategy.STRATEGY_STEP_RESULT.FAILED, operation.reason + return strategy.STRATEGY_STEP_RESULT.SUCCESS, "" + + class AbstractStrategyStep(strategy.StrategyStep): """An abstract base class for strategy steps""" @@ -4225,6 +4235,100 @@ class AbstractKubeHostListUpgradeStep(AbstractKubeUpgradeStep): return data +class KubeHostCordonStep(AbstractKubeHostUpgradeStep): + """Kube Host Cordon - Strategy Step""" + + def __init__(self, host, to_version, force, target_state, target_failure_state, + timeout_in_secs=600): + super(KubeHostCordonStep, self).__init__( + host, + to_version, + force, + STRATEGY_STEP_NAME.KUBE_HOST_CORDON, + target_state, + target_failure_state, + timeout_in_secs) + + def handle_event(self, event, event_data=None): + """ + Handle Host events - does not query kube host upgrade list but + instead queries kube host upgrade directly. + """ + DLOG.debug("Step (%s) handle event (%s)." % (self._name, event)) + + if event == STRATEGY_EVENT.KUBE_HOST_CORDON_FAILED: + host = event_data + if host is not None and host.name in self._host_names: + result = strategy.STRATEGY_STEP_RESULT.FAILED + self.stage.step_complete( + result, + "kube host cordon (%s) failed" % host.name) + return True + # return handle_event of parent class + return super(KubeHostCordonStep, self).handle_event( + event, event_data=event_data) + + def apply(self): + """Kube Host Cordon""" + + from nfv_vim import directors + + DLOG.info("Step (%s) apply to hostnames (%s)." + % (self._name, self._host_names)) + host_director = directors.get_host_director() + operation = \ + host_director.kube_host_cordon(self._host_names, + self._force) + return validate_operation(operation) + + +class KubeHostUncordonStep(AbstractKubeHostUpgradeStep): + """Kube Host Uncordon - Strategy Step""" + + def __init__(self, host, to_version, force, target_state, target_failure_state, + timeout_in_secs=600): + super(KubeHostUncordonStep, self).__init__( + host, + to_version, + force, + STRATEGY_STEP_NAME.KUBE_HOST_UNCORDON, + target_state, + target_failure_state, + timeout_in_secs) + + def handle_event(self, event, event_data=None): + """ + Handle Host events - does not query kube host upgrade list but + instead queries kube host upgrade directly. + """ + DLOG.debug("Step (%s) handle event (%s)." % (self._name, event)) + + if event == STRATEGY_EVENT.KUBE_HOST_UNCORDON_FAILED: + host = event_data + if host is not None and host.name in self._host_names: + result = strategy.STRATEGY_STEP_RESULT.FAILED + self.stage.step_complete( + result, + "kube host uncordon (%s) failed" % host.name) + return True + # return handle_event of parent class + return super(KubeHostUncordonStep, self).handle_event( + event, event_data=event_data) + + def apply(self): + """Kube Host Uncordon""" + + from nfv_vim import directors + + DLOG.info("Step (%s) apply to hostnames (%s)." + % (self._name, self._host_names)) + host_director = directors.get_host_director() + operation = \ + host_director.kube_host_uncordon(self._host_names, + self._force) + return validate_operation(operation) + + class KubeHostUpgradeControlPlaneStep(AbstractKubeHostUpgradeStep): """Kube Host Upgrade Control Plane - Strategy Step @@ -4272,13 +4376,7 @@ class KubeHostUpgradeControlPlaneStep(AbstractKubeHostUpgradeStep): operation = \ host_director.kube_upgrade_hosts_control_plane(self._host_names, self._force) - - if operation.is_inprogress(): - return strategy.STRATEGY_STEP_RESULT.WAIT, "" - elif operation.is_failed(): - return strategy.STRATEGY_STEP_RESULT.FAILED, operation.reason - - return strategy.STRATEGY_STEP_RESULT.SUCCESS, "" + return validate_operation(operation) class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep): @@ -4419,6 +4517,8 @@ def strategy_step_rebuild_from_dict(data): # # kube upgrade steps # + STRATEGY_STEP_NAME.KUBE_HOST_CORDON: KubeHostCordonStep, + STRATEGY_STEP_NAME.KUBE_HOST_UNCORDON: KubeHostUncordonStep, STRATEGY_STEP_NAME.KUBE_HOST_UPGRADE_CONTROL_PLANE: KubeHostUpgradeControlPlaneStep, STRATEGY_STEP_NAME.KUBE_HOST_UPGRADE_KUBELET: