Add retry robustness for Kubernetes upgrade control plane

In the case of a rare intermittent failure behaviour during the
upgrading control plane step where puppet hits timeout first before
the upgrade is completed or kubeadm hits its own Upgrade Manifest
timeout (at 5m).

This change will retry running the process by
reporting failure to conductor when puppet manifest apply fails.
Since it is using RPC to send messages with options, we don't get
the return code directly and hence, cannot use a retry decorator.
So we use the sysinv report callback feature to handle the
success/failure path.

TEST PLAN:
PASS: Perform simplex and duplex k8s upgrade successfully.
PASS: Install iso successfully.
PASS: Manually send STOP signal to pause the process so that
      puppet manifest timeout and check whether retry code works
      and in retry attempts the upgrade completes.
PASS: Manually decrease the puppet timeout to very low number
      and verify that code retries 2 times and updates failure
      state
PASS: Perform orchestrated k8s upgrade, Manually send STOP
      signal to pause the kubeadm process during step
      upgrading-first-master and perform system kube-upgrade-abort.
      Verify that upgrade-aborted successfully and also verify
      that code does not try the retry mechanism for
      k8s upgrade control-plane as it is not in desired
      KUBE_UPGRADING_FIRST_MASTER or KUBE_UPGRADING_SECOND_MASTER
      state
PASS: Perform manual k8s upgrade, for k8s upgrade control-plane
      failure perform manual upgrade-abort successfully.
      Perform Orchestrated k8s upgrade, for k8s upgrade control-plane
      failure after retries nfv aborts automatically.

Closes-Bug: 2056326

Depends-on: https://review.opendev.org/c/starlingx/nfv/+/912806
            https://review.opendev.org/c/starlingx/stx-puppet/+/911945
            https://review.opendev.org/c/starlingx/integ/+/913422

Change-Id: I5dc3b87530be89d623b40da650b7ff04c69f1cc5
Signed-off-by: Saba Touheed Mujawar <sabatouheed.mujawar@windriver.com>
This commit is contained in:
Saba Touheed Mujawar 2024-03-05 08:06:16 -05:00
parent 2a072b65c5
commit 4c42927040
4 changed files with 344 additions and 403 deletions

View File

@ -2484,6 +2484,9 @@ CSTATE_PATH = "/sys/devices/system/cpu/cpu0/cpuidle"
# Auto-recovery limits for kube upgrade abort
AUTO_RECOVERY_COUNT = 3
# Auto-recovery limits for kube upgrade control-plane
CONTROL_PLANE_RETRY_COUNT = 2
# Puppet Runtime Manifest constants
RUNTIME_CONFIG_APPLY_TIMEOUT_IN_SECS = 600
RUNTIME_CONFIG_STATE_PENDING = "pending"

View File

@ -10253,6 +10253,58 @@ class ConductorManager(service.PeriodicService):
context, config_uuid=active_controller.config_target,
config_dict=config_dict, skip_update_config=True)
def handle_k8s_upgrade_control_plane_failure(self, context, kube_upgrade_obj,
host_uuid, puppet_class):
kube_upgrade_obj.recovery_attempts += 1
kube_upgrade_obj.save()
personalities = [constants.CONTROLLER]
config_uuid = self._config_update_hosts(context, personalities,
[host_uuid])
# Apply the runtime manifest to upgrade the control plane
config_dict = {
"personalities": personalities,
"host_uuids": [host_uuid],
"classes": [puppet_class],
puppet_common.REPORT_STATUS_CFG:
puppet_common.REPORT_UPGRADE_CONTROL_PLANE
}
self._config_apply_runtime_manifest(context, config_uuid, config_dict,
skip_update_config=True)
def handle_k8s_upgrade_control_plane_success(self, context, kube_upgrade_obj, host_uuid,
new_state, fail_state):
host_obj = objects.host.get_by_uuid(context, host_uuid)
host_name = host_obj.hostname
kube_host_upgrade_obj = objects.kube_host_upgrade.get_by_host_id(
context, host_obj.id)
target_version = kube_host_upgrade_obj.target_version
kube_operator = kubernetes.KubeOperator()
cp_versions = kube_operator.kube_get_control_plane_versions()
LOG.info("Checking control plane update on host %s, "
"cp_versions = %s, target_version = %s" %
(host_name, cp_versions, target_version))
if cp_versions.get(host_name, None) != target_version:
LOG.warning("Control plane upgrade failed for host %s" %
host_name)
kube_host_upgrade_obj.status = \
kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED
kube_host_upgrade_obj.save()
kube_upgrade_obj.state = fail_state
kube_upgrade_obj.save()
return
# The control plane update was successful
LOG.info("Control plane was updated for host %s" % host_name)
kube_host_upgrade_obj.status = None
kube_host_upgrade_obj.save()
kube_upgrade_obj.state = new_state
kube_upgrade_obj.save()
def report_config_status(self, context, iconfig, status, error=None):
""" Callback from Sysinv Agent on manifest apply success or failure
@ -10462,6 +10514,48 @@ class ConductorManager(service.PeriodicService):
kube_upgrade_obj.state = kubernetes.KUBE_UPGRADE_ABORTING_FAILED
kube_upgrade_obj.save()
self.kube_upgrade_abort_recovery(context)
elif reported_cfg == puppet_common.REPORT_UPGRADE_CONTROL_PLANE:
# The agent is reporting the runtime kube_upgrade_control_plane has been applied.
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
host_obj = objects.host.get_by_uuid(context, host_uuid)
if kube_upgrade_obj.state == kubernetes.KUBE_UPGRADING_FIRST_MASTER:
puppet_class = 'platform::kubernetes::upgrade_first_control_plane'
new_state = kubernetes.KUBE_UPGRADED_FIRST_MASTER
fail_state = kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED
elif kube_upgrade_obj.state == kubernetes.KUBE_UPGRADING_SECOND_MASTER:
puppet_class = 'platform::kubernetes::upgrade_control_plane'
new_state = kubernetes.KUBE_UPGRADED_SECOND_MASTER
fail_state = kubernetes.KUBE_UPGRADING_SECOND_MASTER_FAILED
else:
# To handle the case during orchestrated k8s upgrade where
# where nfv timeout earlier than puppet timeout which updates
# k8s upgrade state upgrade-aborted
LOG.info("Skipping retry: Kubernetes upgrade state %s is not in %s, or %s"
% (kube_upgrade_obj.state, kubernetes.KUBE_UPGRADING_FIRST_MASTER,
kubernetes.KUBE_UPGRADING_SECOND_MASTER))
return
if status == puppet_common.REPORT_SUCCESS:
# Upgrade control-plane action was successful.
success = True
self.handle_k8s_upgrade_control_plane_success(context, kube_upgrade_obj, host_uuid,
new_state, fail_state)
if status == puppet_common.REPORT_FAILURE:
# Upgrade control-plane action failed to apply puppet manifest.
if kube_upgrade_obj.recovery_attempts < constants.CONTROL_PLANE_RETRY_COUNT:
LOG.info("k8s upgrade control plane failed - retrying attempt %s"
% kube_upgrade_obj.recovery_attempts)
self.handle_k8s_upgrade_control_plane_failure(context, kube_upgrade_obj,
host_uuid, puppet_class)
else:
LOG.warning("k8s upgrade control plane failed %s times, giving up"
% constants.AUTO_RECOVERY_COUNT)
kube_upgrade_obj.state = fail_state
kube_upgrade_obj.save()
kube_host_upgrade_obj = objects.kube_host_upgrade.get_by_host_id(
context, host_obj.id)
kube_host_upgrade_obj.status = \
kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED
kube_host_upgrade_obj.save()
else:
LOG.error("Reported configuration '%(cfg)s' is not handled by"
" report_config_status! iconfig: %(iconfig)s" %
@ -17024,6 +17118,8 @@ class ConductorManager(service.PeriodicService):
context, host_obj.id)
target_version = kube_host_upgrade_obj.target_version
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
kube_upgrade_obj.recovery_attempts = 0
kube_upgrade_obj.save()
kube_operator = kubernetes.KubeOperator()
current_versions = kube_operator.kube_get_kubelet_versions()
system = self.dbapi.isystem_get_one()
@ -17041,7 +17137,6 @@ class ConductorManager(service.PeriodicService):
kube_host_upgrade_obj.save()
puppet_class = 'platform::kubernetes::upgrade_first_control_plane'
new_state = kubernetes.KUBE_UPGRADED_FIRST_MASTER
fail_state = kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED
# Drop any removed/unsupported feature gates before we upgrade to a
@ -17086,7 +17181,6 @@ class ConductorManager(service.PeriodicService):
elif kube_upgrade_obj.state == kubernetes.KUBE_UPGRADING_SECOND_MASTER:
puppet_class = 'platform::kubernetes::upgrade_control_plane'
new_state = kubernetes.KUBE_UPGRADED_SECOND_MASTER
fail_state = kubernetes.KUBE_UPGRADING_SECOND_MASTER_FAILED
else:
raise exception.SysinvException(_(
@ -17102,7 +17196,9 @@ class ConductorManager(service.PeriodicService):
config_dict = {
"personalities": personalities,
"host_uuids": [host_uuid],
"classes": [puppet_class]
"classes": [puppet_class],
puppet_common.REPORT_STATUS_CFG:
puppet_common.REPORT_UPGRADE_CONTROL_PLANE
}
try:
self._config_apply_runtime_manifest(context, config_uuid, config_dict)
@ -17111,57 +17207,6 @@ class ConductorManager(service.PeriodicService):
(host_name, config_uuid))
manifest_apply_failed_state(context, fail_state, host_obj)
# Wait for the manifest to be applied
elapsed = 0
LOG.info("Waiting for config apply on host = %s" % host_name)
while elapsed < kubernetes.MANIFEST_APPLY_TIMEOUT:
elapsed += kubernetes.MANIFEST_APPLY_INTERVAL
greenthread.sleep(kubernetes.MANIFEST_APPLY_INTERVAL)
host_obj = objects.host.get_by_uuid(context, host_uuid)
if host_obj.config_target == host_obj.config_applied:
LOG.info("Config was applied for host %s" % host_name)
break
LOG.info("Waiting for config apply on host %s" % host_name)
else:
LOG.warning("Manifest apply failed for host %s" % host_name)
manifest_apply_failed_state(context, fail_state, host_obj)
# Wait for the control plane pods to start with the new version
elapsed = 0
LOG.info("Waiting for control plane update on host %s, "
"target_version = %s" % (host_name, target_version))
while elapsed < kubernetes.POD_START_TIMEOUT:
elapsed += kubernetes.POD_START_INTERVAL
greenthread.sleep(kubernetes.POD_START_INTERVAL)
cp_versions = kube_operator.kube_get_control_plane_versions()
if cp_versions.get(host_name, None) == target_version:
LOG.info("Control plane was updated for host %s" % host_name)
break
LOG.info("Waiting for control plane update on host %s, "
"cp_versions = %s, target_version = %s" %
(host_name, cp_versions, target_version))
else:
LOG.warning("Control plane upgrade failed for host %s" %
host_name)
kube_host_upgrade_obj = objects.kube_host_upgrade.get_by_host_id(
context, host_obj.id)
kube_host_upgrade_obj.status = \
kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED
kube_host_upgrade_obj.save()
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
kube_upgrade_obj.state = fail_state
kube_upgrade_obj.save()
return
# The control plane update was successful
kube_host_upgrade_obj = objects.kube_host_upgrade.get_by_host_id(
context, host_obj.id)
kube_host_upgrade_obj.status = None
kube_host_upgrade_obj.save()
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
kube_upgrade_obj.state = new_state
kube_upgrade_obj.save()
def kube_upgrade_kubelet(self, context, host_uuid):
"""Upgrade the kubernetes kubelet on this host"""
@ -17339,6 +17384,8 @@ class ConductorManager(service.PeriodicService):
"""
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
kube_upgrade_obj.recovery_attempts = 0
kube_upgrade_obj.save()
controller_hosts = self.dbapi.ihost_get_by_personality(
constants.CONTROLLER)
system = self.dbapi.isystem_get_one()

View File

@ -57,6 +57,7 @@ REPORT_HTTP_CONFIG = 'http_config'
REPORT_KERNEL_CONFIG = 'host_kernel_config'
REPORT_UPGRADE_ABORT = 'upgrade_abort'
REPORT_APPARMOR_CONFIG = 'host_apparmor_config'
REPORT_UPGRADE_CONTROL_PLANE = 'upgrade_control_plane'
def puppet_apply_manifest(ip_address, personality,

View File

@ -1501,7 +1501,13 @@ class ManagerTestCase(base.DbTestCase):
self.assertEqual(updated_upgrade.state,
kubernetes.KUBE_UPGRADE_UNCORDON_COMPLETE)
def test_kube_upgrade_control_plane_first_master(self):
@mock.patch('sysinv.conductor.manager.'
'ConductorManager._config_apply_runtime_manifest')
@mock.patch('sysinv.conductor.manager.'
'ConductorManager._config_update_hosts')
def test_kube_upgrade_control_plane_first_master(self, mock_config_update_hosts,
mock_config_apply_runtime_manifest):
mock_config_update_hosts.return_value = "6c5aa183-4884-46e6-b86a-b29e6b08dedb"
# Create an upgrade
utils.create_test_kube_upgrade(
from_version='v1.42.1',
@ -1574,349 +1580,25 @@ class ManagerTestCase(base.DbTestCase):
# Upgrade the control plane
self.service.kube_upgrade_control_plane(self.context, c0.uuid)
# Verify that the upgrade state was updated
updated_upgrade = self.dbapi.kube_upgrade_get_one()
self.assertEqual(updated_upgrade.state,
kubernetes.KUBE_UPGRADED_FIRST_MASTER)
personalities = [constants.CONTROLLER]
config_dict = {
"personalities": personalities,
"host_uuids": [c0.uuid],
"classes": ['platform::kubernetes::upgrade_first_control_plane'],
puppet_common.REPORT_STATUS_CFG:
puppet_common.REPORT_UPGRADE_CONTROL_PLANE
}
# Verify that the host upgrade status was cleared
updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1)
self.assertEqual(updated_host_upgrade.status, None)
mock_config_apply_runtime_manifest.assert_called_with(mock.ANY, '6c5aa183-4884-46e6-b86a-b29e6b08dedb',
config_dict)
def test_kube_upgrade_control_plane_first_master_simplex(self):
system_dict = self.system.as_dict()
system_dict['system_mode'] = constants.SYSTEM_MODE_SIMPLEX
self.dbapi.isystem_update(self.system.uuid, system_dict)
# Create an upgrade
utils.create_test_kube_upgrade(
from_version='v1.41.1',
to_version='v1.43.1',
state=kubernetes.KUBE_UPGRADING_FIRST_MASTER,
)
# Create controller-0
config_uuid = str(uuid.uuid4())
c0 = self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-0',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
)
# Set the target version for controller-0
self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.43.1'})
# Make the control plane upgrade pass
self.kube_get_control_plane_versions_result = {
'controller-0': 'v1.42.2'}
self.kube_get_kubelet_versions_result = {
'controller-0': 'v1.41.1'}
mock_sanitize_feature_gates_bootstrap_config_file = mock.MagicMock()
p = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_bootstrap_config_file',
mock_sanitize_feature_gates_bootstrap_config_file)
p.start().return_value = 0
self.addCleanup(p.stop)
mock_sanitize_feature_gates_service_parameters = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_service_parameters',
mock_sanitize_feature_gates_service_parameters)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_feature_gates_kubeadm_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubeadm_configmap',
mock_sanitize_feature_gates_kubeadm_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_feature_gates_kubelet_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubelet_configmap',
mock_sanitize_feature_gates_kubelet_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_image_repository_kubeadm_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_image_repository_kubeadm_configmap',
mock_sanitize_image_repository_kubeadm_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
self.service._kube = FakeKubeOperator()
# Speed up the test
kubernetes.MANIFEST_APPLY_INTERVAL = 1
kubernetes.POD_START_INTERVAL = 1
# Upgrade the control plane
self.service.kube_upgrade_control_plane(self.context, c0.uuid)
# Verify that the upgrade state was updated
updated_upgrade = self.dbapi.kube_upgrade_get_one()
self.assertEqual(updated_upgrade.state,
kubernetes.KUBE_UPGRADED_FIRST_MASTER)
# Verify that the host upgrade status was cleared
updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1)
self.assertEqual(updated_host_upgrade.status, None)
def test_kube_upgrade_control_plane_first_master_simplex_failed(self):
system_dict = self.system.as_dict()
system_dict['system_mode'] = constants.SYSTEM_MODE_SIMPLEX
self.dbapi.isystem_update(self.system.uuid, system_dict)
# Create an upgrade
utils.create_test_kube_upgrade(
from_version='v1.41.1',
to_version='v1.43.1',
state=kubernetes.KUBE_UPGRADING_FIRST_MASTER,
)
# Create controller-0
config_uuid = str(uuid.uuid4())
c0 = self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-0',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
)
# Set the target version for controller-0
self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.43.1'})
# Check the control plane upgrade
self.kube_get_control_plane_versions_result = {
'controller-0': 'v1.43.1'}
self.kube_get_kubelet_versions_result = {
'controller-0': 'v1.41.1'}
mock_sanitize_feature_gates_bootstrap_config_file = mock.MagicMock()
p = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_bootstrap_config_file',
mock_sanitize_feature_gates_bootstrap_config_file)
p.start().return_value = 0
self.addCleanup(p.stop)
mock_sanitize_feature_gates_service_parameters = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_service_parameters',
mock_sanitize_feature_gates_service_parameters)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_feature_gates_kubeadm_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubeadm_configmap',
mock_sanitize_feature_gates_kubeadm_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_feature_gates_kubelet_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubelet_configmap',
mock_sanitize_feature_gates_kubelet_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_image_repository_kubeadm_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_image_repository_kubeadm_configmap',
mock_sanitize_image_repository_kubeadm_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
self.service._kube = FakeKubeOperator()
# Speed up the test
kubernetes.MANIFEST_APPLY_INTERVAL = 1
kubernetes.POD_START_INTERVAL = 1
kubernetes.POD_START_TIMEOUT = 1
# Upgrade the control plane
self.service.kube_upgrade_control_plane(self.context, c0.uuid)
# Verify that the upgrade state was updated
updated_upgrade = self.dbapi.kube_upgrade_get_one()
self.assertEqual(updated_upgrade.state,
kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED)
# Verify that the host upgrade status was set
updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1)
self.assertEqual(updated_host_upgrade.status,
kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED)
def test_kube_upgrade_control_plane_first_master_manifest_timeout(self):
# Create an upgrade
utils.create_test_kube_upgrade(
from_version='v1.42.1',
to_version='v1.42.2',
state=kubernetes.KUBE_UPGRADING_FIRST_MASTER,
)
# Create controller-0
config_uuid = str(uuid.uuid4())
c0 = self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-0',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
)
# Set the target version for controller-0
self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'})
# Make the manifest apply fail
self.fail_config_apply_runtime_manifest = True
mock_sanitize_feature_gates_bootstrap_config_file = mock.MagicMock()
p = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_bootstrap_config_file',
mock_sanitize_feature_gates_bootstrap_config_file)
p.start().return_value = 0
self.addCleanup(p.stop)
mock_sanitize_feature_gates_service_parameters = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_service_parameters',
mock_sanitize_feature_gates_service_parameters)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_feature_gates_kubeadm_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubeadm_configmap',
mock_sanitize_feature_gates_kubeadm_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_feature_gates_kubelet_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubelet_configmap',
mock_sanitize_feature_gates_kubelet_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_image_repository_kubeadm_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_image_repository_kubeadm_configmap',
mock_sanitize_image_repository_kubeadm_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
self.service._kube = FakeKubeOperator()
# Speed up the test
kubernetes.MANIFEST_APPLY_INTERVAL = 1
kubernetes.MANIFEST_APPLY_TIMEOUT = 1
# Upgrade the control plane
self.service.kube_upgrade_control_plane(self.context, c0.uuid)
# Verify that the upgrade state was updated
updated_upgrade = self.dbapi.kube_upgrade_get_one()
self.assertEqual(updated_upgrade.state,
kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED)
# Verify that the host upgrade status was set
updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1)
self.assertEqual(updated_host_upgrade.status,
kubernetes.KUBE_HOST_UPGRADING_CONTROL_PLANE_FAILED)
def test_kube_upgrade_control_plane_first_master_upgrade_fail(self):
# Create an upgrade
utils.create_test_kube_upgrade(
from_version='v1.42.1',
to_version='v1.42.2',
state=kubernetes.KUBE_UPGRADING_FIRST_MASTER,
)
# Create controller-0
config_uuid = str(uuid.uuid4())
c0 = self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-0',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
)
# Set the target version for controller-0
self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'})
mock_sanitize_feature_gates_bootstrap_config_file = mock.MagicMock()
p = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_bootstrap_config_file',
mock_sanitize_feature_gates_bootstrap_config_file)
p.start().return_value = 0
self.addCleanup(p.stop)
mock_sanitize_feature_gates_service_parameters = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_service_parameters',
mock_sanitize_feature_gates_service_parameters)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_feature_gates_kubeadm_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubeadm_configmap',
mock_sanitize_feature_gates_kubeadm_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_feature_gates_kubelet_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_feature_gates_kubelet_configmap',
mock_sanitize_feature_gates_kubelet_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
mock_sanitize_image_repository_kubeadm_configmap = mock.MagicMock()
p2 = mock.patch(
'sysinv.conductor.manager.ConductorManager.sanitize_image_repository_kubeadm_configmap',
mock_sanitize_image_repository_kubeadm_configmap)
p2.start().return_value = 0
self.addCleanup(p2.stop)
self.service._kube = FakeKubeOperator()
# Speed up the test
kubernetes.MANIFEST_APPLY_INTERVAL = 1
kubernetes.POD_START_INTERVAL = 1
kubernetes.POD_START_TIMEOUT = 1
# Upgrade the control plane
self.service.kube_upgrade_control_plane(self.context, c0.uuid)
# Verify that the upgrade state was updated
updated_upgrade = self.dbapi.kube_upgrade_get_one()
self.assertEqual(updated_upgrade.state,
kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED)
# Verify that the host upgrade status was cleared
updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1)
self.assertIsNotNone(updated_host_upgrade.status)
def test_kube_upgrade_control_plane_second_master(self):
@mock.patch('sysinv.conductor.manager.'
'ConductorManager._config_apply_runtime_manifest')
@mock.patch('sysinv.conductor.manager.'
'ConductorManager._config_update_hosts')
def test_kube_upgrade_control_plane_second_master(self, mock_config_update_hosts,
mock_config_apply_runtime_manifest):
mock_config_update_hosts.return_value = "6c5aa183-4884-46e6-b86a-b29e6b08dedb"
# Create an upgrade
utils.create_test_kube_upgrade(
from_version='v1.42.1',
@ -1972,14 +1654,222 @@ class ManagerTestCase(base.DbTestCase):
# Upgrade the control plane
self.service.kube_upgrade_control_plane(self.context, c1.uuid)
# Verify that the upgrade state was updated
updated_upgrade = self.dbapi.kube_upgrade_get_one()
self.assertEqual(updated_upgrade.state,
kubernetes.KUBE_UPGRADED_SECOND_MASTER)
personalities = [constants.CONTROLLER]
config_dict = {
"personalities": personalities,
"host_uuids": [c1.uuid],
"classes": ['platform::kubernetes::upgrade_control_plane'],
puppet_common.REPORT_STATUS_CFG:
puppet_common.REPORT_UPGRADE_CONTROL_PLANE
}
# Verify that the host upgrade status was cleared
updated_host_upgrade = self.dbapi.kube_host_upgrade_get(1)
self.assertEqual(updated_host_upgrade.status, None)
mock_config_apply_runtime_manifest.assert_called_with(mock.ANY, '6c5aa183-4884-46e6-b86a-b29e6b08dedb',
config_dict)
@mock.patch('sysinv.conductor.manager.'
'ConductorManager._config_apply_runtime_manifest')
@mock.patch('sysinv.conductor.manager.'
'ConductorManager._config_update_hosts')
def test_handle_k8s_upgrade_control_plane_failure_first_master(self, mock_config_update_hosts,
mock_config_apply_runtime_manifest):
mock_config_update_hosts.return_value = "273cfafd-886d-43ec-9478-8328727b34cc"
utils.create_test_kube_upgrade(
from_version='v1.42.1',
to_version='v1.42.2',
state=kubernetes.KUBE_UPGRADING_FIRST_MASTER,
)
# Create controller-0
config_uuid = str(uuid.uuid4())
c0 = self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-0',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
)
# Set the target version for controller-0
self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'})
puppet_class = 'platform::kubernetes::upgrade_first_control_plane'
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
self.service.handle_k8s_upgrade_control_plane_failure(self.context,
kube_upgrade_obj, c0.uuid, puppet_class)
personalities = [constants.CONTROLLER]
config_dict = {
"personalities": personalities,
"host_uuids": [c0.uuid],
"classes": [puppet_class],
puppet_common.REPORT_STATUS_CFG:
puppet_common.REPORT_UPGRADE_CONTROL_PLANE
}
mock_config_apply_runtime_manifest.assert_called_with(mock.ANY, '273cfafd-886d-43ec-9478-8328727b34cc',
config_dict, skip_update_config=True)
def test_handle_k8s_upgrade_control_plane_success_first_master(self):
# Create controller-0
config_uuid = str(uuid.uuid4())
c0 = self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-0',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
)
# Set the target version for controller-0
self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'})
utils.create_test_kube_upgrade(
from_version='v1.42.1',
to_version='v1.42.2',
state=kubernetes.KUBE_UPGRADED_FIRST_MASTER,
)
self.kube_get_control_plane_versions_result = {
'controller-0': 'v1.42.2'}
new_state = kubernetes.KUBE_UPGRADED_FIRST_MASTER
fail_state = kubernetes.KUBE_UPGRADING_FIRST_MASTER_FAILED
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
self.service.handle_k8s_upgrade_control_plane_success(self.context,
kube_upgrade_obj, c0.uuid, new_state, fail_state)
self.assertEqual(kube_upgrade_obj.state,
new_state)
@mock.patch('sysinv.conductor.manager.'
'ConductorManager._config_apply_runtime_manifest')
@mock.patch('sysinv.conductor.manager.'
'ConductorManager._config_update_hosts')
def test_handle_k8s_upgrade_control_plane_failure_second_master(self, mock_config_update_hosts,
mock_config_apply_runtime_manifest):
mock_config_update_hosts.return_value = "273cfafd-886d-43ec-9478-8328727b34cc"
utils.create_test_kube_upgrade(
from_version='v1.42.1',
to_version='v1.42.2',
state=kubernetes.KUBE_UPGRADING_SECOND_MASTER,
)
# Create controller-0
config_uuid = str(uuid.uuid4())
self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-0',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
)
# Set the target version for controller-0
self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'})
# Create controller-1
config_uuid = str(uuid.uuid4())
c1 = self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-1',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
mgmt_mac='00:11:22:33:44:56',
mgmt_ip='1.2.3.5',
)
# Set the target version for controller-1
self.dbapi.kube_host_upgrade_update(2, {'target_version': 'v1.42.2'})
puppet_class = 'platform::kubernetes::upgrade_control_plane'
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
self.service.handle_k8s_upgrade_control_plane_failure(self.context,
kube_upgrade_obj, c1.uuid, puppet_class)
personalities = [constants.CONTROLLER]
config_dict = {
"personalities": personalities,
"host_uuids": [c1.uuid],
"classes": [puppet_class],
puppet_common.REPORT_STATUS_CFG:
puppet_common.REPORT_UPGRADE_CONTROL_PLANE
}
mock_config_apply_runtime_manifest.assert_called_with(mock.ANY, '273cfafd-886d-43ec-9478-8328727b34cc',
config_dict, skip_update_config=True)
def test_handle_k8s_upgrade_control_plane_success_second_master(self):
# Create controller-0
config_uuid = str(uuid.uuid4())
self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-0',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
)
# Set the target version for controller-0
self.dbapi.kube_host_upgrade_update(1, {'target_version': 'v1.42.2'})
# Create controller-1
config_uuid = str(uuid.uuid4())
c1 = self._create_test_ihost(
personality=constants.CONTROLLER,
hostname='controller-1',
uuid=str(uuid.uuid4()),
config_status=None,
config_applied=config_uuid,
config_target=config_uuid,
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
mgmt_mac='00:11:22:33:44:56',
mgmt_ip='1.2.3.5',
)
# Set the target version for controller-1
self.dbapi.kube_host_upgrade_update(2, {'target_version': 'v1.42.2'})
# Make the control plane upgrade pass
self.kube_get_control_plane_versions_result = {
'controller-0': 'v1.42.2',
'controller-1': 'v1.42.2',
'worker-0': 'v1.42.1'}
utils.create_test_kube_upgrade(
from_version='v1.42.1',
to_version='v1.42.2',
state=kubernetes.KUBE_UPGRADED_SECOND_MASTER,
)
new_state = kubernetes.KUBE_UPGRADED_SECOND_MASTER
fail_state = kubernetes.KUBE_UPGRADING_SECOND_MASTER_FAILED
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
self.service.handle_k8s_upgrade_control_plane_success(self.context,
kube_upgrade_obj, c1.uuid, new_state, fail_state)
self.assertEqual(kube_upgrade_obj.state,
new_state)
def test_kube_upgrade_kubelet_controller(self):
# Create an upgrade