Nfv upgrade orchestration for kube-upgrade-storage

This adds the kube-upgrade-storage step on the orchestration for k8s
upgrade.

VIM build stages stay the same, but the VIM apply stage changes. After
the networking upgrade, there is an addition of storage upgrade and
after that everything stays the same.

Test Plan:
 PASS: Run a kubernetes upgrade with the kube-upgrade-storage step and
       observe that the image for volume-snapshot-controller is changed
 PASS: Run the kube-upgrade-storage with an unexpected state and
       expect failure.

Story: 2010877
Task: 48588

Change-Id: Ib5ff848ed67c3e57c9cfcf6d1a41abbc192a7935
Signed-off-by: Luiz Felipe Kina <LuizFelipe.EiskeKina@windriver.com>
Signed-off-by: Gabriel de Araújo Cabral <gabriel.cabral@windriver.com>
This commit is contained in:
Luiz Felipe Kina 2023-07-25 07:40:14 -04:00 committed by Gabriel de Araújo Cabral
parent 6dba3df3e3
commit b90eeb1436
10 changed files with 221 additions and 2 deletions

View File

@ -2074,6 +2074,64 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
callback.send(response)
callback.close()
def kube_upgrade_storage(self, future, callback):
"""
Start kube upgrade storage
"""
response = dict()
response['completed'] = False
response['reason'] = ''
action_type = 'kube-upgrade-storage'
try:
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
if self._platform_token is None or \
self._platform_token.is_expired():
future.work(openstack.get_token, self._platform_directory)
future.result = (yield)
if not future.result.is_complete() or \
future.result.data is None:
DLOG.error("OpenStack get-token did not complete.")
return
self._platform_token = future.result.data
future.work(sysinv.kube_upgrade_storage, self._platform_token)
future.result = (yield)
if not future.result.is_complete():
DLOG.error("%s did not complete." % action_type)
return
kube_upgrade_data = future.result.data
kube_upgrade_obj = nfvi.objects.v1.KubeUpgrade(
kube_upgrade_data['state'],
kube_upgrade_data['from_version'],
kube_upgrade_data['to_version'])
response['result-data'] = kube_upgrade_obj
response['completed'] = True
except exceptions.OpenStackRestAPIException as e:
if httplib.UNAUTHORIZED == e.http_status_code:
response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED
if self._platform_token is not None:
self._platform_token.set_expired()
else:
DLOG.exception("Caught API exception while trying %s. error=%s"
% (action_type, e))
except Exception as e:
DLOG.exception("Caught exception while trying %s. error=%s"
% (action_type, e))
finally:
callback.send(response)
callback.close()
def kube_upgrade_start(self, future, to_version, force, alarm_ignore_list,
callback):
"""

View File

@ -443,6 +443,13 @@ def kube_upgrade_networking(token):
return _patch_kube_upgrade_state(token, "upgrading-networking")
def kube_upgrade_storage(token):
"""
Ask System Inventory to kube upgrade storage
"""
return _patch_kube_upgrade_state(token, "upgrading-storage")
def kube_host_cordon(token, hostname, force):
"""
system kube-host-cordon <host>

View File

@ -340,6 +340,17 @@ class ApplyStageMixin(object):
],
}
def _kube_upgrade_storage_stage(self):
return {
'name': 'kube-upgrade-storage',
'total_steps': 1,
'steps': [
{'name': 'kube-upgrade-storage',
'success_state': 'upgraded-storage',
'fail_state': 'upgrading-storage-failed'},
],
}
def _kube_upgrade_complete_stage(self):
return {
'name': 'kube-upgrade-complete',
@ -485,6 +496,7 @@ class ApplyStageMixin(object):
add_start=True,
add_download=True,
add_networking=True,
add_storage=True,
add_cordon=True,
add_first_control_plane=True,
add_second_control_plane=True,
@ -507,6 +519,8 @@ class ApplyStageMixin(object):
stages.append(self._kube_upgrade_download_images_stage())
if add_networking:
stages.append(self._kube_upgrade_networking_stage())
if add_storage:
stages.append(self._kube_upgrade_storage_stage())
if add_cordon:
stages.append(self._kube_host_cordon_stage())
for ver in self.kube_versions:
@ -622,6 +636,7 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
stages = [
self._kube_upgrade_download_images_stage(),
self._kube_upgrade_networking_stage(),
self._kube_upgrade_storage_stage(),
]
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())
@ -652,6 +667,7 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.default_to_version)
stages = [
self._kube_upgrade_networking_stage(),
self._kube_upgrade_storage_stage(),
]
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())
@ -735,6 +751,7 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.default_to_version)
stages = [
self._kube_upgrade_networking_stage(),
self._kube_upgrade_storage_stage()
]
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())
@ -758,12 +775,74 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
"""
Test the kube_upgrade strategy creation when there is only a simplex
and the upgrade had previously stopped after successful networking.
It is expected to resume at the first control plane
It is expected to resume at the storage stage
"""
kube_upgrade = self._create_kube_upgrade_obj(
KUBE_UPGRADE_STATE.KUBE_UPGRADED_NETWORKING,
self.default_from_version,
self.default_to_version)
stages = [
self._kube_upgrade_storage_stage()
]
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())
for ver in self.kube_versions:
stages.append(self._kube_upgrade_first_control_plane_stage(
ver))
stages.extend(self._kube_upgrade_kubelet_stages(
ver,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),
])
self.validate_apply_phase(self.is_simplex(), kube_upgrade, stages)
def test_resume_after_storage_failed(self):
"""
Test the kube_upgrade strategy creation when there is only a simplex
and the upgrade had previously failed during storage.
It is expected to retry and resume at the storage stage
"""
kube_upgrade = self._create_kube_upgrade_obj(
KUBE_UPGRADE_STATE.KUBE_UPGRADING_STORAGE_FAILED,
self.default_from_version,
self.default_to_version)
stages = [
self._kube_upgrade_storage_stage(),
]
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())
for ver in self.kube_versions:
stages.append(self._kube_upgrade_first_control_plane_stage(
ver))
stages.extend(self._kube_upgrade_kubelet_stages(
ver,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),
])
self.validate_apply_phase(self.is_simplex(), kube_upgrade, stages)
def test_resume_after_storage_succeeded(self):
"""
Test the kube_upgrade strategy creation when there is only a simplex
and the upgrade had previously stopped after successful storage.
It is expected to resume at the first control plane
"""
kube_upgrade = self._create_kube_upgrade_obj(
KUBE_UPGRADE_STATE.KUBE_UPGRADED_STORAGE,
self.default_from_version,
self.default_to_version)
stages = []
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())

View File

@ -130,6 +130,7 @@ from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_upgrade_complete
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_upgrade_download_images # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_upgrade_networking # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_upgrade_start # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_upgrade_storage # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_list_deployment_hosts # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_lock_host # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_notify_host_failed # noqa: F401

View File

@ -299,6 +299,15 @@ def nfvi_kube_upgrade_networking(callback):
return cmd_id
def nfvi_kube_upgrade_storage(callback):
"""
Kube Upgrade Storage
"""
cmd_id = _infrastructure_plugin.invoke_plugin('kube_upgrade_storage',
callback=callback)
return cmd_id
def nfvi_kube_upgrade_start(to_version, force, alarm_ignore_list, callback):
"""
Kube Upgrade Start

View File

@ -44,6 +44,9 @@ class KubeUpgradeState(Constants):
KUBE_UPGRADING_NETWORKING = Constant('upgrading-networking')
KUBE_UPGRADING_NETWORKING_FAILED = Constant('upgrading-networking-failed')
KUBE_UPGRADED_NETWORKING = Constant('upgraded-networking')
KUBE_UPGRADING_STORAGE = Constant('upgrading-storage')
KUBE_UPGRADING_STORAGE_FAILED = Constant('upgrading-storage-failed')
KUBE_UPGRADED_STORAGE = Constant('upgraded-storage')
KUBE_UPGRADING_FIRST_MASTER = Constant('upgrading-first-master')
KUBE_UPGRADING_FIRST_MASTER_FAILED = Constant('upgrading-first-master-failed')
KUBE_UPGRADED_FIRST_MASTER = Constant('upgraded-first-master')

View File

@ -36,6 +36,7 @@ from nfv_vim.strategy._strategy_steps import KubeUpgradeCompleteStep # noqa: F4
from nfv_vim.strategy._strategy_steps import KubeUpgradeDownloadImagesStep # noqa: F401
from nfv_vim.strategy._strategy_steps import KubeUpgradeNetworkingStep # noqa: F401
from nfv_vim.strategy._strategy_steps import KubeUpgradeStartStep # noqa: F401
from nfv_vim.strategy._strategy_steps import KubeUpgradeStorageStep # noqa: F401
from nfv_vim.strategy._strategy_steps import LockHostsStep # noqa: F401
from nfv_vim.strategy._strategy_steps import MigrateInstancesFromHostStep # noqa: F401
from nfv_vim.strategy._strategy_steps import MigrateInstancesStep # noqa: F401

View File

@ -3458,6 +3458,22 @@ class KubeUpgradeStrategy(SwUpdateStrategy,
stage.add_step(strategy.KubeUpgradeNetworkingStep())
self.apply_phase.add_stage(stage)
# Next stage after networking is upgrade storage
self._add_kube_upgrade_storage_stage()
def _add_kube_upgrade_storage_stage(self):
"""
Add kube upgrade storage stage.
This stage only occurs after upgrade networking
It then proceeds to the next stage
"""
from nfv_vim import strategy
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.KUBE_UPGRADE_STORAGE)
stage.add_step(strategy.KubeUpgradeStorageStep())
self.apply_phase.add_stage(stage)
# Next stage after networking is cordon
self._add_kube_host_cordon_stage()
@ -3794,8 +3810,16 @@ class KubeUpgradeStrategy(SwUpdateStrategy,
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_NETWORKING_FAILED:
self._add_kube_upgrade_networking_stage,
# After networking -> cordon
# After networking -> upgrade storage
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_NETWORKING:
self._add_kube_upgrade_storage_stage,
# if storage state failed, resync at storage state
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_STORAGE_FAILED:
self._add_kube_upgrade_storage_stage,
# After storage -> cordon
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_STORAGE:
self._add_kube_host_cordon_stage,
# If the state is cordon-failed, resume at cordon stage

View File

@ -66,6 +66,7 @@ class StrategyStageNames(Constants):
KUBE_UPGRADE_FIRST_CONTROL_PLANE = \
Constant('kube-upgrade-first-control-plane')
KUBE_UPGRADE_NETWORKING = Constant('kube-upgrade-networking')
KUBE_UPGRADE_STORAGE = Constant('kube-upgrade-storage')
KUBE_UPGRADE_SECOND_CONTROL_PLANE = \
Constant('kube-upgrade-second-control-plane')
KUBE_UPGRADE_PATCH = Constant('kube-upgrade-patch')

View File

@ -83,6 +83,7 @@ class StrategyStepNames(Constants):
KUBE_UPGRADE_COMPLETE = Constant('kube-upgrade-complete')
KUBE_UPGRADE_DOWNLOAD_IMAGES = Constant('kube-upgrade-download-images')
KUBE_UPGRADE_NETWORKING = Constant('kube-upgrade-networking')
KUBE_UPGRADE_STORAGE = Constant('kube-upgrade-storage')
KUBE_HOST_UPGRADE_CONTROL_PLANE = \
Constant('kube-host-upgrade-control-plane')
KUBE_HOST_UPGRADE_KUBELET = Constant('kube-host-upgrade-kubelet')
@ -4403,6 +4404,40 @@ class KubeUpgradeNetworkingStep(AbstractKubeUpgradeStep):
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
class KubeUpgradeStorageStep(AbstractKubeUpgradeStep):
"""Kube Upgrade Storage - Strategy Step"""
def __init__(self):
from nfv_vim import nfvi
super(KubeUpgradeStorageStep, self).__init__(
STRATEGY_STEP_NAME.KUBE_UPGRADE_STORAGE,
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_STORAGE,
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_STORAGE_FAILED,
timeout_in_secs=900)
@coroutine
def _response_callback(self):
"""Kube Upgrade Storage - Callback"""
response = (yield)
DLOG.debug("%s callback response=%s." % (self._name, response))
if response['completed']:
if self.strategy is not None:
self.strategy.nfvi_kube_upgrade = response['result-data']
else:
result = strategy.STRATEGY_STEP_RESULT.FAILED
self.stage.step_complete(result, response['reason'])
def apply(self):
"""Kube Upgrade Storage"""
from nfv_vim import nfvi
nfvi.nfvi_kube_upgrade_storage(self._response_callback())
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
class AbstractKubeHostUpgradeStep(AbstractKubeUpgradeStep):
"""Kube Upgrade Host - Abstract Strategy Step
@ -4851,6 +4886,7 @@ def strategy_step_rebuild_from_dict(data):
STRATEGY_STEP_NAME.KUBE_UPGRADE_DOWNLOAD_IMAGES:
KubeUpgradeDownloadImagesStep,
STRATEGY_STEP_NAME.KUBE_UPGRADE_NETWORKING: KubeUpgradeNetworkingStep,
STRATEGY_STEP_NAME.KUBE_UPGRADE_STORAGE: KubeUpgradeStorageStep,
STRATEGY_STEP_NAME.KUBE_UPGRADE_START: KubeUpgradeStartStep,
STRATEGY_STEP_NAME.QUERY_KUBE_HOST_UPGRADE: QueryKubeHostUpgradeStep,
STRATEGY_STEP_NAME.QUERY_KUBE_UPGRADE: QueryKubeUpgradeStep,