From c645ce21d6272e92b4546ded808205088874f4bd Mon Sep 17 00:00:00 2001 From: Chris Friesen Date: Fri, 24 Nov 2023 02:00:39 -0600 Subject: [PATCH] disable image gc when doing k8s upgrade Static pods cannot use image pull secrets, so it's important that the control plane images are not garbage-collected while we're doing a Kubernetes upgrade otherwise the upgrade can fail. Accordingly we want to disable garbage-collecting the images, then pre-pull the new images, then do the actual K8s upgrade, then re-enable image garbage collection. For duplex systems we can disable garbage collection from the puppet manifest, but for simplex puppet isn't involved so we have to do it from sysinv. The re-enabling of the image garbage collection happens when we upgrade kubelet to the final desired version. It's done in the puppet commit linked below. Depends-On: https://review.opendev.org/c/starlingx/stx-puppet/+/901778 TEST-PLAN: PASS: Perform single-verison K8s upgrade on AIO-SX, ensure upgrade passes and image garbage collection is disabled when we download images and re-enabled when kubelet gets upgraded. Closes-Bug: 2044493 Change-Id: Ide258768c3b05a01c4e903e52380a348c2fcae65 Signed-off-by: Chris Friesen --- sysinv/sysinv/sysinv/sysinv/conductor/manager.py | 16 ++++++++++++++++ .../sysinv/tests/conductor/test_manager.py | 15 +++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index ec815d86f1..f857a6ad52 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -15958,6 +15958,22 @@ class ConductorManager(service.PeriodicService): else: next_versions = [kube_version] + # For simplex systems, disable image garbage collection by kubelet + # during the K8s upgrade. For duplex this will be done on each controller + # by the puppet manifest called below. It wants to be done before we + # pull the images so that they can't be garbage collected by kubelet + # before they're needed. + if system.system_mode == constants.SYSTEM_MODE_SIMPLEX: + try: + # Call the helper script used by the puppet manifest. + subprocess.check_call( # pylint: disable=not-callable + ["/bin/bash", + "/usr/share/puppet/modules/platform/files/disable_image_gc.sh"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) + except subprocess.CalledProcessError: + LOG.error("Failed to call disable_image_gc.sh, continuing anyway.") + for k8s_version in next_versions: LOG.info("executing playbook: %s for version %s" % (constants.ANSIBLE_KUBE_PUSH_IMAGES_PLAYBOOK, k8s_version)) diff --git a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py index 2627902769..2ac57b707b 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py @@ -25,6 +25,7 @@ import copy import mock import os.path +import subprocess import tempfile import uuid @@ -1051,6 +1052,13 @@ class ManagerTestCase(base.DbTestCase): p.start().return_value = ['v1.42.2', 'v1.43.1'] self.addCleanup(p.stop) + # Mock subprocess check_call + mock_check_call = mock.MagicMock() + p = mock.patch('eventlet.green.subprocess.check_call', mock_check_call) + mock_subprocess_check_call = p.start() + mock_subprocess_check_call.return_value = 0 + self.addCleanup(p.stop) + next_versions = kubernetes.KubeOperator().kube_get_higher_patch_version('v1.41.1', 'v1.43.1') mock_run_playbook = mock.MagicMock() @@ -1067,6 +1075,13 @@ class ManagerTestCase(base.DbTestCase): constants.ANSIBLE_KUBE_PUSH_IMAGES_PLAYBOOK] mock_run_playbook.assert_any_call(playbook_cmd) + # Verify that we called the script to turn off image garbage collection + mock_subprocess_check_call.assert_called_with( + ["/bin/bash", "/usr/share/puppet/modules/platform/files/disable_image_gc.sh"], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + # Verify that the upgrade state was updated updated_upgrade = self.dbapi.kube_upgrade_get_one() self.assertEqual(updated_upgrade.state,