From 4c42927040f93ff68f3521b8f2408b26de8d4212 Mon Sep 17 00:00:00 2001 From: Saba Touheed Mujawar Date: Tue, 5 Mar 2024 08:06:16 -0500 Subject: [PATCH] Add retry robustness for Kubernetes upgrade control plane In the case of a rare intermittent failure behaviour during the upgrading control plane step where puppet hits timeout first before the upgrade is completed or kubeadm hits its own Upgrade Manifest timeout (at 5m). This change will retry running the process by reporting failure to conductor when puppet manifest apply fails. Since it is using RPC to send messages with options, we don't get the return code directly and hence, cannot use a retry decorator. So we use the sysinv report callback feature to handle the success/failure path. TEST PLAN: PASS: Perform simplex and duplex k8s upgrade successfully. PASS: Install iso successfully. PASS: Manually send STOP signal to pause the process so that puppet manifest timeout and check whether retry code works and in retry attempts the upgrade completes. PASS: Manually decrease the puppet timeout to very low number and verify that code retries 2 times and updates failure state PASS: Perform orchestrated k8s upgrade, Manually send STOP signal to pause the kubeadm process during step upgrading-first-master and perform system kube-upgrade-abort. Verify that upgrade-aborted successfully and also verify that code does not try the retry mechanism for k8s upgrade control-plane as it is not in desired KUBE_UPGRADING_FIRST_MASTER or KUBE_UPGRADING_SECOND_MASTER state PASS: Perform manual k8s upgrade, for k8s upgrade control-plane failure perform manual upgrade-abort successfully. Perform Orchestrated k8s upgrade, for k8s upgrade control-plane failure after retries nfv aborts automatically. Closes-Bug: 2056326 Depends-on: https://review.opendev.org/c/starlingx/nfv/+/912806 https://review.opendev.org/c/starlingx/stx-puppet/+/911945 https://review.opendev.org/c/starlingx/integ/+/913422 Change-Id: I5dc3b87530be89d623b40da650b7ff04c69f1cc5 Signed-off-by: Saba Touheed Mujawar --- .../sysinv/sysinv/sysinv/common/constants.py | 3 + .../sysinv/sysinv/sysinv/conductor/manager.py | 155 +++-- sysinv/sysinv/sysinv/sysinv/puppet/common.py | 1 + .../sysinv/tests/conductor/test_manager.py | 588 +++++++----------- 4 files changed, 344 insertions(+), 403 deletions(-) diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index 5b97fa2dd1..babe094440 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -2484,6 +2484,9 @@ CSTATE_PATH = "/sys/devices/system/cpu/cpu0/cpuidle" # Auto-recovery limits for kube upgrade abort AUTO_RECOVERY_COUNT = 3 +# Auto-recovery limits for kube upgrade control-plane +CONTROL_PLANE_RETRY_COUNT = 2 + # Puppet Runtime Manifest constants RUNTIME_CONFIG_APPLY_TIMEOUT_IN_SECS = 600 RUNTIME_CONFIG_STATE_PENDING = "pending" diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index 4b7717c846..0257166089 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -10253,6 +10253,58 @@ class ConductorManager(service.PeriodicService): context, config_uuid=active_controller.config_target, config_dict=config_dict, skip_update_config=True) + def handle_k8s_upgrade_control_plane_failure(self, context, kube_upgrade_obj, + host_uuid, puppet_class): + kube_upgrade_obj.recovery_attempts += 1 + kube_upgrade_obj.save() + + personalities = [constants.CONTROLLER] + config_uuid = self._config_update_hosts(context, personalities, + [host_uuid]) + + # Apply the runtime manifest to upgrade the control plane + config_dict = { + "personalities": personalities, + "host_uuids": [host_uuid], + "classes": [puppet_class], + puppet_common.REPORT_STATUS_CFG: + puppet_common.REPORT_UPGRADE_CONTROL_PLANE + } + self._config_apply_runtime_manifest(context, config_uuid, config_dict, + skip_update_config=True) + + def handle_k8s_upgrade_control_plane_success(self, context, kube_upgrade_obj, host_uuid, + new_state, fail_state): + host_obj = objects.host.get_by_uuid(context, host_uuid) + host_name = host_obj.hostname + kube_host_upgrade_obj = objects.kube_host_upgrade.get_by_host_id( + context, host_obj.id) + target_version = kube_host_upgrade_obj.target_version + kube_operator = kubernetes.KubeOperator() + + cp_versions = kube_operator.kube_get_control_plane_versions() + + LOG.info("Checking control plane update on host %s, " + "cp_versions = %s, target_version = %s" % + (host_name, cp_versions, target_version)) + + if cp_versions.get(host_name, None) != target_version: + LOG.warning("Control plane upgrade failed for host %s" % + host_name) + kube_host_upgrade_obj.status = \ + kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED + kube_host_upgrade_obj.save() + kube_upgrade_obj.state = fail_state + kube_upgrade_obj.save() + return + + # The control plane update was successful + LOG.info("Control plane was updated for host %s" % host_name) + kube_host_upgrade_obj.status = None + kube_host_upgrade_obj.save() + kube_upgrade_obj.state = new_state + kube_upgrade_obj.save() + def report_config_status(self, context, iconfig, status, error=None): """ Callback from Sysinv Agent on manifest apply success or failure @@ -10462,6 +10514,48 @@ class ConductorManager(service.PeriodicService): kube_upgrade_obj.state = kubernetes.KUBE_UPGRADE_ABORTING_FAILED kube_upgrade_obj.save() self.kube_upgrade_abort_recovery(context) + elif reported_cfg == puppet_common.REPORT_UPGRADE_CONTROL_PLANE: + # The agent is reporting the runtime kube_upgrade_control_plane has been applied. + kube_upgrade_obj = objects.kube_upgrade.get_one(context) + host_obj = objects.host.get_by_uuid(context, host_uuid) + if kube_upgrade_obj.state == kubernetes.KUBE_UPGRADING_FIRST_MASTER: + puppet_class = 'platform::kubernetes::upgrade_first_control_plane' + new_state = kubernetes.KUBE_UPGRADED_FIRST_MASTER + fail_state = kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED + elif kube_upgrade_obj.state == kubernetes.KUBE_UPGRADING_SECOND_MASTER: + puppet_class = 'platform::kubernetes::upgrade_control_plane' + new_state = kubernetes.KUBE_UPGRADED_SECOND_MASTER + fail_state = kubernetes.KUBE_UPGRADING_SECOND_MASTER_FAILED + else: + # To handle the case during orchestrated k8s upgrade where + # where nfv timeout earlier than puppet timeout which updates + # k8s upgrade state upgrade-aborted + LOG.info("Skipping retry: Kubernetes upgrade state %s is not in %s, or %s" + % (kube_upgrade_obj.state, kubernetes.KUBE_UPGRADING_FIRST_MASTER, + kubernetes.KUBE_UPGRADING_SECOND_MASTER)) + return + if status == puppet_common.REPORT_SUCCESS: + # Upgrade control-plane action was successful. + success = True + self.handle_k8s_upgrade_control_plane_success(context, kube_upgrade_obj, host_uuid, + new_state, fail_state) + if status == puppet_common.REPORT_FAILURE: + # Upgrade control-plane action failed to apply puppet manifest. + if kube_upgrade_obj.recovery_attempts < constants.CONTROL_PLANE_RETRY_COUNT: + LOG.info("k8s upgrade control plane failed - retrying attempt %s" + % kube_upgrade_obj.recovery_attempts) + self.handle_k8s_upgrade_control_plane_failure(context, kube_upgrade_obj, + host_uuid, puppet_class) + else: + LOG.warning("k8s upgrade control plane failed %s times, giving up" + % constants.AUTO_RECOVERY_COUNT) + kube_upgrade_obj.state = fail_state + kube_upgrade_obj.save() + kube_host_upgrade_obj = objects.kube_host_upgrade.get_by_host_id( + context, host_obj.id) + kube_host_upgrade_obj.status = \ + kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED + kube_host_upgrade_obj.save() else: LOG.error("Reported configuration '%(cfg)s' is not handled by" " report_config_status! iconfig: %(iconfig)s" % @@ -17024,6 +17118,8 @@ class ConductorManager(service.PeriodicService): context, host_obj.id) target_version = kube_host_upgrade_obj.target_version kube_upgrade_obj = objects.kube_upgrade.get_one(context) + kube_upgrade_obj.recovery_attempts = 0 + kube_upgrade_obj.save() kube_operator = kubernetes.KubeOperator() current_versions = kube_operator.kube_get_kubelet_versions() system = self.dbapi.isystem_get_one() @@ -17041,7 +17137,6 @@ class ConductorManager(service.PeriodicService): kube_host_upgrade_obj.save() puppet_class = 'platform::kubernetes::upgrade_first_control_plane' - new_state = kubernetes.KUBE_UPGRADED_FIRST_MASTER fail_state = kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED # Drop any removed/unsupported feature gates before we upgrade to a @@ -17086,7 +17181,6 @@ class ConductorManager(service.PeriodicService): elif kube_upgrade_obj.state == kubernetes.KUBE_UPGRADING_SECOND_MASTER: puppet_class = 'platform::kubernetes::upgrade_control_plane' - new_state = kubernetes.KUBE_UPGRADED_SECOND_MASTER fail_state = kubernetes.KUBE_UPGRADING_SECOND_MASTER_FAILED else: raise exception.SysinvException(_( @@ -17102,7 +17196,9 @@ class ConductorManager(service.PeriodicService): config_dict = { "personalities": personalities, "host_uuids": [host_uuid], - "classes": [puppet_class] + "classes": [puppet_class], + puppet_common.REPORT_STATUS_CFG: + puppet_common.REPORT_UPGRADE_CONTROL_PLANE } try: self._config_apply_runtime_manifest(context, config_uuid, config_dict) @@ -17111,57 +17207,6 @@ class ConductorManager(service.PeriodicService): (host_name, config_uuid)) manifest_apply_failed_state(context, fail_state, host_obj) - # Wait for the manifest to be applied - elapsed = 0 - LOG.info("Waiting for config apply on host = %s" % host_name) - while elapsed < kubernetes.MANIFEST_APPLY_TIMEOUT: - elapsed += kubernetes.MANIFEST_APPLY_INTERVAL - greenthread.sleep(kubernetes.MANIFEST_APPLY_INTERVAL) - host_obj = objects.host.get_by_uuid(context, host_uuid) - if host_obj.config_target == host_obj.config_applied: - LOG.info("Config was applied for host %s" % host_name) - break - LOG.info("Waiting for config apply on host %s" % host_name) - else: - LOG.warning("Manifest apply failed for host %s" % host_name) - manifest_apply_failed_state(context, fail_state, host_obj) - - # Wait for the control plane pods to start with the new version - elapsed = 0 - LOG.info("Waiting for control plane update on host %s, " - "target_version = %s" % (host_name, target_version)) - while elapsed < kubernetes.POD_START_TIMEOUT: - elapsed += kubernetes.POD_START_INTERVAL - greenthread.sleep(kubernetes.POD_START_INTERVAL) - cp_versions = kube_operator.kube_get_control_plane_versions() - if cp_versions.get(host_name, None) == target_version: - LOG.info("Control plane was updated for host %s" % host_name) - break - LOG.info("Waiting for control plane update on host %s, " - "cp_versions = %s, target_version = %s" % - (host_name, cp_versions, target_version)) - else: - LOG.warning("Control plane upgrade failed for host %s" % - host_name) - kube_host_upgrade_obj = objects.kube_host_upgrade.get_by_host_id( - context, host_obj.id) - kube_host_upgrade_obj.status = \ - kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED - kube_host_upgrade_obj.save() - kube_upgrade_obj = objects.kube_upgrade.get_one(context) - kube_upgrade_obj.state = fail_state - kube_upgrade_obj.save() - return - - # The control plane update was successful - kube_host_upgrade_obj = objects.kube_host_upgrade.get_by_host_id( - context, host_obj.id) - kube_host_upgrade_obj.status = None - kube_host_upgrade_obj.save() - kube_upgrade_obj = objects.kube_upgrade.get_one(context) - kube_upgrade_obj.state = new_state - kube_upgrade_obj.save() - def kube_upgrade_kubelet(self, context, host_uuid): """Upgrade the kubernetes kubelet on this host""" @@ -17339,6 +17384,8 @@ class ConductorManager(service.PeriodicService): """ kube_upgrade_obj = objects.kube_upgrade.get_one(context) + kube_upgrade_obj.recovery_attempts = 0 + kube_upgrade_obj.save() controller_hosts = self.dbapi.ihost_get_by_personality( constants.CONTROLLER) system = self.dbapi.isystem_get_one() diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/common.py b/sysinv/sysinv/sysinv/sysinv/puppet/common.py index b3f4cb8fbf..2340b3a2c2 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/common.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/common.py @@ -57,6 +57,7 @@ REPORT_HTTP_CONFIG = 'http_config' REPORT_KERNEL_CONFIG = 'host_kernel_config' REPORT_UPGRADE_ABORT = 'upgrade_abort' REPORT_APPARMOR_CONFIG = 'host_apparmor_config' +REPORT_UPGRADE_CONTROL_PLANE = 'upgrade_control_plane' def puppet_apply_manifest(ip_address, personality, diff --git a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py index cbe8ded963..40290a0c5c 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py @@ -1501,7 +1501,13 @@ class ManagerTestCase(base.DbTestCase): self.assertEqual(updated_upgrade.state, kubernetes.KUBE_UPGRADE_UNCORDON_COMPLETE) - def test_kube_upgrade_control_plane_first_master(self): + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_apply_runtime_manifest') + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_update_hosts') + def test_kube_upgrade_control_plane_first_master(self, mock_config_update_hosts, + mock_config_apply_runtime_manifest): + mock_config_update_hosts.return_value = "6c5aa183-4884-46e6-b86a-b29e6b08dedb" # Create an upgrade utils.create_test_kube_upgrade( from_version='v1.42.1', @@ -1574,349 +1580,25 @@ class ManagerTestCase(base.DbTestCase): # Upgrade the control plane self.service.kube_upgrade_control_plane(self.context, c0.uuid) - # Verify that the upgrade state was updated - updated_upgrade = self.dbapi.kube_upgrade_get_one() - self.assertEqual(updated_upgrade.state, - kubernetes.KUBE_UPGRADED_FIRST_MASTER) + personalities = [constants.CONTROLLER] + config_dict = { + "personalities": personalities, + "host_uuids": [c0.uuid], + "classes": ['platform::kubernetes::upgrade_first_control_plane'], + puppet_common.REPORT_STATUS_CFG: + puppet_common.REPORT_UPGRADE_CONTROL_PLANE + } - # Verify that the host upgrade status was cleared - updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1) - self.assertEqual(updated_host_upgrade.status, None) + mock_config_apply_runtime_manifest.assert_called_with(mock.ANY, '6c5aa183-4884-46e6-b86a-b29e6b08dedb', + config_dict) - def test_kube_upgrade_control_plane_first_master_simplex(self): - system_dict = self.system.as_dict() - system_dict['system_mode'] = constants.SYSTEM_MODE_SIMPLEX - self.dbapi.isystem_update(self.system.uuid, system_dict) - # Create an upgrade - utils.create_test_kube_upgrade( - from_version='v1.41.1', - to_version='v1.43.1', - state=kubernetes.KUBE_UPGRADING_FIRST_MASTER, - ) - - # Create controller-0 - config_uuid = str(uuid.uuid4()) - c0 = self._create_test_ihost( - personality=constants.CONTROLLER, - hostname='controller-0', - uuid=str(uuid.uuid4()), - config_status=None, - config_applied=config_uuid, - config_target=config_uuid, - invprovision=constants.PROVISIONED, - administrative=constants.ADMIN_UNLOCKED, - operational=constants.OPERATIONAL_ENABLED, - availability=constants.AVAILABILITY_ONLINE, - ) - # Set the target version for controller-0 - self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.43.1'}) - # Make the control plane upgrade pass - self.kube_get_control_plane_versions_result = { - 'controller-0': 'v1.42.2'} - - self.kube_get_kubelet_versions_result = { - 'controller-0': 'v1.41.1'} - - mock_sanitize_feature_gates_bootstrap_config_file = mock.MagicMock() - p = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_bootstrap_config_file', - mock_sanitize_feature_gates_bootstrap_config_file) - p.start().return_value = 0 - self.addCleanup(p.stop) - - mock_sanitize_feature_gates_service_parameters = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_service_parameters', - mock_sanitize_feature_gates_service_parameters) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_feature_gates_kubeadm_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubeadm_configmap', - mock_sanitize_feature_gates_kubeadm_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_feature_gates_kubelet_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubelet_configmap', - mock_sanitize_feature_gates_kubelet_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_image_repository_kubeadm_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_image_repository_kubeadm_configmap', - mock_sanitize_image_repository_kubeadm_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - self.service._kube = FakeKubeOperator() - - # Speed up the test - kubernetes.MANIFEST_APPLY_INTERVAL = 1 - kubernetes.POD_START_INTERVAL = 1 - - # Upgrade the control plane - self.service.kube_upgrade_control_plane(self.context, c0.uuid) - - # Verify that the upgrade state was updated - updated_upgrade = self.dbapi.kube_upgrade_get_one() - self.assertEqual(updated_upgrade.state, - kubernetes.KUBE_UPGRADED_FIRST_MASTER) - - # Verify that the host upgrade status was cleared - updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1) - self.assertEqual(updated_host_upgrade.status, None) - - def test_kube_upgrade_control_plane_first_master_simplex_failed(self): - system_dict = self.system.as_dict() - system_dict['system_mode'] = constants.SYSTEM_MODE_SIMPLEX - self.dbapi.isystem_update(self.system.uuid, system_dict) - # Create an upgrade - utils.create_test_kube_upgrade( - from_version='v1.41.1', - to_version='v1.43.1', - state=kubernetes.KUBE_UPGRADING_FIRST_MASTER, - ) - # Create controller-0 - config_uuid = str(uuid.uuid4()) - c0 = self._create_test_ihost( - personality=constants.CONTROLLER, - hostname='controller-0', - uuid=str(uuid.uuid4()), - config_status=None, - config_applied=config_uuid, - config_target=config_uuid, - invprovision=constants.PROVISIONED, - administrative=constants.ADMIN_UNLOCKED, - operational=constants.OPERATIONAL_ENABLED, - availability=constants.AVAILABILITY_ONLINE, - ) - # Set the target version for controller-0 - self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.43.1'}) - # Check the control plane upgrade - self.kube_get_control_plane_versions_result = { - 'controller-0': 'v1.43.1'} - - self.kube_get_kubelet_versions_result = { - 'controller-0': 'v1.41.1'} - - mock_sanitize_feature_gates_bootstrap_config_file = mock.MagicMock() - p = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_bootstrap_config_file', - mock_sanitize_feature_gates_bootstrap_config_file) - p.start().return_value = 0 - self.addCleanup(p.stop) - - mock_sanitize_feature_gates_service_parameters = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_service_parameters', - mock_sanitize_feature_gates_service_parameters) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_feature_gates_kubeadm_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubeadm_configmap', - mock_sanitize_feature_gates_kubeadm_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_feature_gates_kubelet_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubelet_configmap', - mock_sanitize_feature_gates_kubelet_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_image_repository_kubeadm_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_image_repository_kubeadm_configmap', - mock_sanitize_image_repository_kubeadm_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - self.service._kube = FakeKubeOperator() - - # Speed up the test - kubernetes.MANIFEST_APPLY_INTERVAL = 1 - kubernetes.POD_START_INTERVAL = 1 - kubernetes.POD_START_TIMEOUT = 1 - - # Upgrade the control plane - self.service.kube_upgrade_control_plane(self.context, c0.uuid) - - # Verify that the upgrade state was updated - updated_upgrade = self.dbapi.kube_upgrade_get_one() - self.assertEqual(updated_upgrade.state, - kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED) - - # Verify that the host upgrade status was set - updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1) - self.assertEqual(updated_host_upgrade.status, - kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED) - - def test_kube_upgrade_control_plane_first_master_manifest_timeout(self): - # Create an upgrade - utils.create_test_kube_upgrade( - from_version='v1.42.1', - to_version='v1.42.2', - state=kubernetes.KUBE_UPGRADING_FIRST_MASTER, - ) - # Create controller-0 - config_uuid = str(uuid.uuid4()) - c0 = self._create_test_ihost( - personality=constants.CONTROLLER, - hostname='controller-0', - uuid=str(uuid.uuid4()), - config_status=None, - config_applied=config_uuid, - config_target=config_uuid, - invprovision=constants.PROVISIONED, - administrative=constants.ADMIN_UNLOCKED, - operational=constants.OPERATIONAL_ENABLED, - availability=constants.AVAILABILITY_ONLINE, - ) - # Set the target version for controller-0 - self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'}) - # Make the manifest apply fail - self.fail_config_apply_runtime_manifest = True - - mock_sanitize_feature_gates_bootstrap_config_file = mock.MagicMock() - p = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_bootstrap_config_file', - mock_sanitize_feature_gates_bootstrap_config_file) - p.start().return_value = 0 - self.addCleanup(p.stop) - - mock_sanitize_feature_gates_service_parameters = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_service_parameters', - mock_sanitize_feature_gates_service_parameters) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_feature_gates_kubeadm_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubeadm_configmap', - mock_sanitize_feature_gates_kubeadm_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_feature_gates_kubelet_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubelet_configmap', - mock_sanitize_feature_gates_kubelet_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_image_repository_kubeadm_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_image_repository_kubeadm_configmap', - mock_sanitize_image_repository_kubeadm_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - self.service._kube = FakeKubeOperator() - - # Speed up the test - kubernetes.MANIFEST_APPLY_INTERVAL = 1 - kubernetes.MANIFEST_APPLY_TIMEOUT = 1 - - # Upgrade the control plane - self.service.kube_upgrade_control_plane(self.context, c0.uuid) - - # Verify that the upgrade state was updated - updated_upgrade = self.dbapi.kube_upgrade_get_one() - self.assertEqual(updated_upgrade.state, - kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED) - - # Verify that the host upgrade status was set - updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1) - self.assertEqual(updated_host_upgrade.status, - kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED) - - def test_kube_upgrade_control_plane_first_master_upgrade_fail(self): - # Create an upgrade - utils.create_test_kube_upgrade( - from_version='v1.42.1', - to_version='v1.42.2', - state=kubernetes.KUBE_UPGRADING_FIRST_MASTER, - ) - # Create controller-0 - config_uuid = str(uuid.uuid4()) - c0 = self._create_test_ihost( - personality=constants.CONTROLLER, - hostname='controller-0', - uuid=str(uuid.uuid4()), - config_status=None, - config_applied=config_uuid, - config_target=config_uuid, - invprovision=constants.PROVISIONED, - administrative=constants.ADMIN_UNLOCKED, - operational=constants.OPERATIONAL_ENABLED, - availability=constants.AVAILABILITY_ONLINE, - ) - # Set the target version for controller-0 - self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'}) - - mock_sanitize_feature_gates_bootstrap_config_file = mock.MagicMock() - p = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_bootstrap_config_file', - mock_sanitize_feature_gates_bootstrap_config_file) - p.start().return_value = 0 - self.addCleanup(p.stop) - - mock_sanitize_feature_gates_service_parameters = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_service_parameters', - mock_sanitize_feature_gates_service_parameters) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_feature_gates_kubeadm_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubeadm_configmap', - mock_sanitize_feature_gates_kubeadm_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_feature_gates_kubelet_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubelet_configmap', - mock_sanitize_feature_gates_kubelet_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - mock_sanitize_image_repository_kubeadm_configmap = mock.MagicMock() - p2 = mock.patch( - 'sysinv.conductor.manager.ConductorManager.sanitize_image_repository_kubeadm_configmap', - mock_sanitize_image_repository_kubeadm_configmap) - p2.start().return_value = 0 - self.addCleanup(p2.stop) - - self.service._kube = FakeKubeOperator() - - # Speed up the test - kubernetes.MANIFEST_APPLY_INTERVAL = 1 - kubernetes.POD_START_INTERVAL = 1 - kubernetes.POD_START_TIMEOUT = 1 - - # Upgrade the control plane - self.service.kube_upgrade_control_plane(self.context, c0.uuid) - - # Verify that the upgrade state was updated - updated_upgrade = self.dbapi.kube_upgrade_get_one() - self.assertEqual(updated_upgrade.state, - kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED) - - # Verify that the host upgrade status was cleared - updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1) - self.assertIsNotNone(updated_host_upgrade.status) - - def test_kube_upgrade_control_plane_second_master(self): + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_apply_runtime_manifest') + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_update_hosts') + def test_kube_upgrade_control_plane_second_master(self, mock_config_update_hosts, + mock_config_apply_runtime_manifest): + mock_config_update_hosts.return_value = "6c5aa183-4884-46e6-b86a-b29e6b08dedb" # Create an upgrade utils.create_test_kube_upgrade( from_version='v1.42.1', @@ -1972,14 +1654,222 @@ class ManagerTestCase(base.DbTestCase): # Upgrade the control plane self.service.kube_upgrade_control_plane(self.context, c1.uuid) - # Verify that the upgrade state was updated - updated_upgrade = self.dbapi.kube_upgrade_get_one() - self.assertEqual(updated_upgrade.state, - kubernetes.KUBE_UPGRADED_SECOND_MASTER) + personalities = [constants.CONTROLLER] + config_dict = { + "personalities": personalities, + "host_uuids": [c1.uuid], + "classes": ['platform::kubernetes::upgrade_control_plane'], + puppet_common.REPORT_STATUS_CFG: + puppet_common.REPORT_UPGRADE_CONTROL_PLANE + } - # Verify that the host upgrade status was cleared - updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1) - self.assertEqual(updated_host_upgrade.status, None) + mock_config_apply_runtime_manifest.assert_called_with(mock.ANY, '6c5aa183-4884-46e6-b86a-b29e6b08dedb', + config_dict) + + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_apply_runtime_manifest') + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_update_hosts') + def test_handle_k8s_upgrade_control_plane_failure_first_master(self, mock_config_update_hosts, + mock_config_apply_runtime_manifest): + mock_config_update_hosts.return_value = "273cfafd-886d-43ec-9478-8328727b34cc" + utils.create_test_kube_upgrade( + from_version='v1.42.1', + to_version='v1.42.2', + state=kubernetes.KUBE_UPGRADING_FIRST_MASTER, + ) + + # Create controller-0 + config_uuid = str(uuid.uuid4()) + c0 = self._create_test_ihost( + personality=constants.CONTROLLER, + hostname='controller-0', + uuid=str(uuid.uuid4()), + config_status=None, + config_applied=config_uuid, + config_target=config_uuid, + invprovision=constants.PROVISIONED, + administrative=constants.ADMIN_UNLOCKED, + operational=constants.OPERATIONAL_ENABLED, + availability=constants.AVAILABILITY_ONLINE, + ) + # Set the target version for controller-0 + self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'}) + + puppet_class = 'platform::kubernetes::upgrade_first_control_plane' + + kube_upgrade_obj = objects.kube_upgrade.get_one(context) + self.service.handle_k8s_upgrade_control_plane_failure(self.context, + kube_upgrade_obj, c0.uuid, puppet_class) + personalities = [constants.CONTROLLER] + config_dict = { + "personalities": personalities, + "host_uuids": [c0.uuid], + "classes": [puppet_class], + puppet_common.REPORT_STATUS_CFG: + puppet_common.REPORT_UPGRADE_CONTROL_PLANE + } + + mock_config_apply_runtime_manifest.assert_called_with(mock.ANY, '273cfafd-886d-43ec-9478-8328727b34cc', + config_dict, skip_update_config=True) + + def test_handle_k8s_upgrade_control_plane_success_first_master(self): + # Create controller-0 + config_uuid = str(uuid.uuid4()) + c0 = self._create_test_ihost( + personality=constants.CONTROLLER, + hostname='controller-0', + uuid=str(uuid.uuid4()), + config_status=None, + config_applied=config_uuid, + config_target=config_uuid, + invprovision=constants.PROVISIONED, + administrative=constants.ADMIN_UNLOCKED, + operational=constants.OPERATIONAL_ENABLED, + availability=constants.AVAILABILITY_ONLINE, + ) + # Set the target version for controller-0 + self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'}) + + utils.create_test_kube_upgrade( + from_version='v1.42.1', + to_version='v1.42.2', + state=kubernetes.KUBE_UPGRADED_FIRST_MASTER, + ) + + self.kube_get_control_plane_versions_result = { + 'controller-0': 'v1.42.2'} + + new_state = kubernetes.KUBE_UPGRADED_FIRST_MASTER + fail_state = kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED + + kube_upgrade_obj = objects.kube_upgrade.get_one(context) + self.service.handle_k8s_upgrade_control_plane_success(self.context, + kube_upgrade_obj, c0.uuid, new_state, fail_state) + self.assertEqual(kube_upgrade_obj.state, + new_state) + + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_apply_runtime_manifest') + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_update_hosts') + def test_handle_k8s_upgrade_control_plane_failure_second_master(self, mock_config_update_hosts, + mock_config_apply_runtime_manifest): + mock_config_update_hosts.return_value = "273cfafd-886d-43ec-9478-8328727b34cc" + utils.create_test_kube_upgrade( + from_version='v1.42.1', + to_version='v1.42.2', + state=kubernetes.KUBE_UPGRADING_SECOND_MASTER, + ) + + # Create controller-0 + config_uuid = str(uuid.uuid4()) + self._create_test_ihost( + personality=constants.CONTROLLER, + hostname='controller-0', + uuid=str(uuid.uuid4()), + config_status=None, + config_applied=config_uuid, + config_target=config_uuid, + invprovision=constants.PROVISIONED, + administrative=constants.ADMIN_UNLOCKED, + operational=constants.OPERATIONAL_ENABLED, + availability=constants.AVAILABILITY_ONLINE, + ) + # Set the target version for controller-0 + self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'}) + + # Create controller-1 + config_uuid = str(uuid.uuid4()) + c1 = self._create_test_ihost( + personality=constants.CONTROLLER, + hostname='controller-1', + uuid=str(uuid.uuid4()), + config_status=None, + config_applied=config_uuid, + config_target=config_uuid, + invprovision=constants.PROVISIONED, + administrative=constants.ADMIN_UNLOCKED, + operational=constants.OPERATIONAL_ENABLED, + availability=constants.AVAILABILITY_ONLINE, + mgmt_mac='00:11:22:33:44:56', + mgmt_ip='1.2.3.5', + ) + # Set the target version for controller-1 + self.dbapi.kube_host_upgrade_update(2, {'target_version': 'v1.42.2'}) + + puppet_class = 'platform::kubernetes::upgrade_control_plane' + + kube_upgrade_obj = objects.kube_upgrade.get_one(context) + self.service.handle_k8s_upgrade_control_plane_failure(self.context, + kube_upgrade_obj, c1.uuid, puppet_class) + personalities = [constants.CONTROLLER] + config_dict = { + "personalities": personalities, + "host_uuids": [c1.uuid], + "classes": [puppet_class], + puppet_common.REPORT_STATUS_CFG: + puppet_common.REPORT_UPGRADE_CONTROL_PLANE + } + + mock_config_apply_runtime_manifest.assert_called_with(mock.ANY, '273cfafd-886d-43ec-9478-8328727b34cc', + config_dict, skip_update_config=True) + + def test_handle_k8s_upgrade_control_plane_success_second_master(self): + # Create controller-0 + config_uuid = str(uuid.uuid4()) + self._create_test_ihost( + personality=constants.CONTROLLER, + hostname='controller-0', + uuid=str(uuid.uuid4()), + config_status=None, + config_applied=config_uuid, + config_target=config_uuid, + invprovision=constants.PROVISIONED, + administrative=constants.ADMIN_UNLOCKED, + operational=constants.OPERATIONAL_ENABLED, + availability=constants.AVAILABILITY_ONLINE, + ) + # Set the target version for controller-0 + self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'}) + # Create controller-1 + config_uuid = str(uuid.uuid4()) + c1 = self._create_test_ihost( + personality=constants.CONTROLLER, + hostname='controller-1', + uuid=str(uuid.uuid4()), + config_status=None, + config_applied=config_uuid, + config_target=config_uuid, + invprovision=constants.PROVISIONED, + administrative=constants.ADMIN_UNLOCKED, + operational=constants.OPERATIONAL_ENABLED, + availability=constants.AVAILABILITY_ONLINE, + mgmt_mac='00:11:22:33:44:56', + mgmt_ip='1.2.3.5', + ) + # Set the target version for controller-1 + self.dbapi.kube_host_upgrade_update(2, {'target_version': 'v1.42.2'}) + # Make the control plane upgrade pass + self.kube_get_control_plane_versions_result = { + 'controller-0': 'v1.42.2', + 'controller-1': 'v1.42.2', + 'worker-0': 'v1.42.1'} + + utils.create_test_kube_upgrade( + from_version='v1.42.1', + to_version='v1.42.2', + state=kubernetes.KUBE_UPGRADED_SECOND_MASTER, + ) + + new_state = kubernetes.KUBE_UPGRADED_SECOND_MASTER + fail_state = kubernetes.KUBE_UPGRADING_SECOND_MASTER_FAILED + + kube_upgrade_obj = objects.kube_upgrade.get_one(context) + self.service.handle_k8s_upgrade_control_plane_success(self.context, + kube_upgrade_obj, c1.uuid, new_state, fail_state) + self.assertEqual(kube_upgrade_obj.state, + new_state) def test_kube_upgrade_kubelet_controller(self): # Create an upgrade