Add host cordon steps to kube upgrade orch

When updating the control plane and kubelets the
host needs to be cordoned to prevent it from
doing kubernetes work during that time period:

system kube-host-cordon <host>
 < update control plane >
 < update kubelet>
system kube-host-uncordon <host>

Currently only supported for simplex.

Depends-On: https://review.opendev.org/c/starlingx/config/+/880333

Test Plan:
   PASS: AIO-SX single kube upgrade (1.24 -> 1.25)
   PASS: Resume AIO-SX single kube upgrade after cordon started.
   PEND: AIO-SX multi-kube upgrade
   PEND: AIO-DX kube upgrade

Story: 2010565
Task: 47772
Signed-off-by: Al Bailey <al.bailey@windriver.com>
Change-Id: I54262d4ff31a2da005fffb6d30bb6872ee52f6d4
This commit is contained in:
Al Bailey 2023-04-03 12:57:53 +00:00
parent 4ff5e50e91
commit a4280ebf59
13 changed files with 629 additions and 50 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -3183,6 +3183,120 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
callback.send(response)
callback.close()
def kube_host_cordon(self, future, host_uuid, host_name, force, callback):
"""
Cordon a host
"""
# ignoring the force argument for now
response = dict()
response['completed'] = False
response['host_uuid'] = host_uuid
response['host_name'] = host_name
response['reason'] = ''
action_type = 'kube-host-cordon'
sysinv_method = sysinv.kube_host_cordon
try:
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
if self._platform_token is None or \
self._platform_token.is_expired():
future.work(openstack.get_token, self._platform_directory)
future.result = (yield)
if not future.result.is_complete() or \
future.result.data is None:
DLOG.error("OpenStack get-token did not complete, "
"host_uuid=%s." % host_uuid)
return
self._platform_token = future.result.data
# cordon wants a hostname and not a host_uuid
future.work(sysinv_method, self._platform_token, host_name, force)
future.result = (yield)
if not future.result.is_complete():
return
response['completed'] = True
except exceptions.OpenStackRestAPIException as e:
if httplib.UNAUTHORIZED == e.http_status_code:
response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED
if self._platform_token is not None:
self._platform_token.set_expired()
else:
DLOG.exception("Caught exception while trying to %s "
"a host %s, error=%s." % (action_type, host_name, e))
response['reason'] = e.http_response_reason
except Exception as e:
DLOG.exception("Caught exception while trying to %s a "
"host %s, error=%s." % (action_type, host_name, e))
finally:
callback.send(response)
callback.close()
def kube_host_uncordon(self, future, host_uuid, host_name, force, callback):
"""
Uncordon a host
"""
response = dict()
response['completed'] = False
response['host_uuid'] = host_uuid
response['host_name'] = host_name
response['reason'] = ''
action_type = 'kube-host-uncordon'
sysinv_method = sysinv.kube_host_uncordon
try:
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
if self._platform_token is None or \
self._platform_token.is_expired():
future.work(openstack.get_token, self._platform_directory)
future.result = (yield)
if not future.result.is_complete() or \
future.result.data is None:
DLOG.error("OpenStack get-token did not complete, "
"host_uuid=%s." % host_uuid)
return
self._platform_token = future.result.data
# uncordon wants a hostname and not a host_uuid
future.work(sysinv_method, self._platform_token, host_name, force)
future.result = (yield)
if not future.result.is_complete():
return
response['completed'] = True
except exceptions.OpenStackRestAPIException as e:
if httplib.UNAUTHORIZED == e.http_status_code:
response['error-code'] = nfvi.NFVI_ERROR_CODE.TOKEN_EXPIRED
if self._platform_token is not None:
self._platform_token.set_expired()
else:
DLOG.exception("Caught exception while trying to %s "
"a host %s, error=%s." % (action_type, host_name, e))
response['reason'] = e.http_response_reason
except Exception as e:
DLOG.exception("Caught exception while trying to %s a "
"host %s, error=%s." % (action_type, host_name, e))
finally:
callback.send(response)
callback.close()
def lock_host(self, future, host_uuid, host_name, callback):
"""
Lock a host

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -383,7 +383,12 @@ def kube_upgrade_start(token, to_version, force=False, alarm_ignore_list=None):
return response
def _patch_kube_upgrade_state(token, new_value):
def api_data_patch(path, value, op="replace"):
# the 'path' is prefixed with a leading '/'
return {'path': '/' + path, 'value': value, 'op': op}
def _patch_kube_upgrade_state(token, new_value, hostname=None):
url = token.get_service_url(PLATFORM_SERVICE.SYSINV)
if url is None:
raise ValueError("OpenStack SysInv URL is invalid")
@ -395,11 +400,10 @@ def _patch_kube_upgrade_state(token, new_value):
api_cmd_headers['User-Agent'] = "vim/1.0"
api_cmd_payload = list()
host_data = dict()
host_data['path'] = "/state"
host_data['value'] = new_value
host_data['op'] = "replace"
api_cmd_payload.append(host_data)
api_cmd_payload.append(api_data_patch('state', new_value))
# some kube upgrade patch commands take a hostname
if hostname is not None:
api_cmd_payload.append(api_data_patch('hostname', hostname))
return rest_api_request(token,
"PATCH",
@ -449,6 +453,23 @@ def kube_upgrade_networking(token):
return _patch_kube_upgrade_state(token, "upgrading-networking")
def kube_host_cordon(token, hostname, force):
"""
system kube-host-cordon <host>
force is a 'string' but is currently unused
"""
# cordon needs a 'hostname'
return _patch_kube_upgrade_state(token, "cordon-started", hostname=hostname)
def kube_host_uncordon(token, hostname, force):
"""
system kube-host-uncordon <host>
force is a 'string' but is currently unused
"""
return _patch_kube_upgrade_state(token, "uncordon-started", hostname=hostname)
def _kube_host_upgrade(token, host_uuid, target_operation, force):
"""
Invoke a POST for a host kube-upgrade operation

View File

@ -283,6 +283,28 @@ class ApplyStageMixin(object):
],
}
def _kube_host_cordon_stage(self, ver="N/A"):
return {
'name': 'kube-host-cordon',
'total_steps': 1,
'steps': [
{'name': 'kube-host-cordon',
'success_state': 'cordon-complete',
'fail_state': 'cordon-failed'},
],
}
def _kube_host_uncordon_stage(self, ver="N/A"):
return {
'name': 'kube-host-uncordon',
'total_steps': 1,
'steps': [
{'name': 'kube-host-uncordon',
'success_state': 'uncordon-complete',
'fail_state': 'uncordon-failed'},
],
}
def _kube_upgrade_first_control_plane_stage(self, ver):
return {
'name': 'kube-upgrade-first-control-plane %s' % ver,
@ -462,15 +484,21 @@ class ApplyStageMixin(object):
add_start=True,
add_download=True,
add_networking=True,
add_cordon=True,
add_first_control_plane=True,
add_second_control_plane=True,
add_kubelets=True,
add_uncordon=True,
add_complete=True,
add_cleanup=True):
"""The order of the host_list determines the kubelets"""
# We never add a second control plane on a simplex
if self.is_simplex():
add_second_control_plane = False
# we do not support cordon and uncordon in duplex
if self.is_duplex():
add_cordon = False
add_uncordon = False
stages = []
if add_start:
stages.append(self._kube_upgrade_start_stage())
@ -478,6 +506,8 @@ class ApplyStageMixin(object):
stages.append(self._kube_upgrade_download_images_stage())
if add_networking:
stages.append(self._kube_upgrade_networking_stage())
if add_cordon:
stages.append(self._kube_host_cordon_stage())
for ver in self.kube_versions:
if add_first_control_plane:
stages.append(self._kube_upgrade_first_control_plane_stage(ver))
@ -489,6 +519,8 @@ class ApplyStageMixin(object):
std_controller_list,
aio_controller_list,
worker_list))
if add_uncordon:
stages.append(self._kube_host_uncordon_stage())
if add_complete:
stages.append(self._kube_upgrade_complete_stage())
if add_cleanup:
@ -590,6 +622,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self._kube_upgrade_download_images_stage(),
self._kube_upgrade_networking_stage(),
]
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())
for ver in self.kube_versions:
stages.append(self._kube_upgrade_first_control_plane_stage(ver))
stages.extend(self._kube_upgrade_kubelet_stages(
@ -597,6 +631,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),
@ -616,6 +652,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
stages = [
self._kube_upgrade_networking_stage(),
]
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())
for ver in self.kube_versions:
stages.append(self._kube_upgrade_first_control_plane_stage(
ver))
@ -624,6 +662,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),
@ -649,6 +689,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),
@ -672,6 +714,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),
@ -691,6 +735,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
stages = [
self._kube_upgrade_networking_stage(),
]
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())
for ver in self.kube_versions:
stages.append(self._kube_upgrade_first_control_plane_stage(
ver))
@ -699,6 +745,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),
@ -716,6 +764,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.default_from_version,
self.default_to_version)
stages = []
if self.is_simplex():
stages.append(self._kube_host_cordon_stage())
for ver in self.kube_versions:
stages.append(self._kube_upgrade_first_control_plane_stage(
ver))
@ -724,6 +774,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),
@ -749,6 +801,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),
@ -774,6 +828,8 @@ class TestSimplexApplyStrategy(sw_update_testcase.SwUpdateStrategyTestCase,
self.std_controller_list,
self.aio_controller_list,
self.worker_list))
if self.is_simplex():
stages.append(self._kube_host_uncordon_stage())
stages.extend([
self._kube_upgrade_complete_stage(),
self._kube_upgrade_cleanup_stage(),

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -50,7 +50,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.LOCK_HOSTS != self._host_operation.operation_type:
@ -89,7 +89,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.DISABLE_HOST_SERVICES != \
@ -145,7 +145,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.ENABLE_HOST_SERVICES != \
@ -206,7 +206,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.UNLOCK_HOSTS != self._host_operation.operation_type:
@ -244,7 +244,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.REBOOT_HOSTS != self._host_operation.operation_type:
@ -282,7 +282,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.UPGRADE_HOSTS != self._host_operation.operation_type:
@ -320,7 +320,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.SWACT_HOSTS != self._host_operation.operation_type:
@ -358,7 +358,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.FW_UPDATE_HOSTS != self._host_operation.operation_type:
@ -395,7 +395,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.FW_UPDATE_ABORT_HOSTS != self._host_operation.operation_type:
@ -515,11 +515,11 @@ class HostDirector(object):
@staticmethod
def host_audit(host):
"""
Notifies the host director that a host audit is inprogress
Notifies the host director that a host audit is in progress
"""
from nfv_vim import directors
DLOG.verbose("Notify other directors that a host %s audit is inprogress."
DLOG.verbose("Notify other directors that a host %s audit is in progress."
% host.name)
instance_director = directors.get_instance_director()
instance_director.host_audit(host)
@ -530,11 +530,11 @@ class HostDirector(object):
@staticmethod
def host_abort(host):
"""
Notifies the host director that a host abort is inprogress
Notifies the host director that a host abort is in progress
"""
from nfv_vim import directors
DLOG.info("Notify other directors that a host %s abort is inprogress."
DLOG.info("Notify other directors that a host %s abort is in progress."
% host.name)
instance_director = directors.get_instance_director()
instance_director.host_operation_cancel(host.name)
@ -824,7 +824,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.KUBE_UPGRADE_HOSTS != self._host_operation.operation_type:
@ -883,6 +883,162 @@ class HostDirector(object):
return host_operation
# cordon
@coroutine
def _nfvi_kube_host_cordon_callback(self):
"""
NFVI Kube Host Cordon Callback
"""
from nfv_vim import directors
response = (yield)
DLOG.verbose("NFVI Kube Host Cordon response=%s." % response)
if not response['completed']:
DLOG.info("Kube Host Upgrade Cordon failed. Host:%s, reason=%s."
% (response['host_name'], response['reason']))
host_table = tables.tables_get_host_table()
host = host_table.get(response['host_name'], None)
if host is None:
DLOG.verbose("Host %s does not exist." % response['host_name'])
return
if self._host_operation is None:
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.KUBE_UPGRADE_HOSTS != self._host_operation.operation_type:
DLOG.verbose("Unexpected host %s operation %s, ignoring."
% (host.name, self._host_operation.operation_type))
return
sw_mgmt_director = directors.get_sw_mgmt_director()
sw_mgmt_director.kube_host_cordon_failed(host)
def _nfvi_kube_host_cordon(self,
host_uuid,
host_name,
force):
"""
NFVI Kube Host Cordon
"""
nfvi.nfvi_kube_host_cordon(
host_uuid,
host_name,
force,
self._nfvi_kube_host_cordon_callback())
def kube_host_cordon(self, host_names, force):
"""
Kube Host Cordon for multiple hosts
"""
DLOG.info("Kube Host Cordon for hosts: %s" % host_names)
host_operation = \
Operation(OPERATION_TYPE.KUBE_UPGRADE_HOSTS)
if self._host_operation is not None:
DLOG.debug("Canceling previous host operation %s, before "
"continuing with host operation %s."
% (self._host_operation.operation_type,
host_operation.operation_type))
self._host_operation = None
host_table = tables.tables_get_host_table()
for host_name in host_names:
host = host_table.get(host_name, None)
if host is None:
reason = "Unknown host %s given." % host_name
DLOG.info(reason)
host_operation.set_failed(reason)
return host_operation
host_operation.add_host(host.name, OPERATION_STATE.INPROGRESS)
self._nfvi_kube_host_cordon(host.uuid,
host.name,
force)
if host_operation.is_inprogress():
self._host_operation = host_operation
return host_operation
# uncordon
@coroutine
def _nfvi_kube_host_uncordon_callback(self):
"""
NFVI Kube Host Uncordon Callback
"""
from nfv_vim import directors
response = (yield)
DLOG.verbose("NFVI Kube Host Uncordon response=%s." % response)
if not response['completed']:
DLOG.info("Kube Host Upgrade Uncordon failed. Host:%s, reason=%s."
% (response['host_name'], response['reason']))
host_table = tables.tables_get_host_table()
host = host_table.get(response['host_name'], None)
if host is None:
DLOG.verbose("Host %s does not exist." % response['host_name'])
return
if self._host_operation is None:
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.KUBE_UPGRADE_HOSTS != self._host_operation.operation_type:
DLOG.verbose("Unexpected host %s operation %s, ignoring."
% (host.name, self._host_operation.operation_type))
return
sw_mgmt_director = directors.get_sw_mgmt_director()
sw_mgmt_director.kube_host_uncordon_failed(host)
def _nfvi_kube_host_uncordon(self,
host_uuid,
host_name,
force):
"""
NFVI Kube Host Uncordon
"""
nfvi.nfvi_kube_host_uncordon(
host_uuid,
host_name,
force,
self._nfvi_kube_host_uncordon_callback())
def kube_host_uncordon(self, host_names, force):
"""
Kube Host Uncordon for multiple hosts
"""
DLOG.info("Kube Host Uncordon for hosts: %s" % host_names)
host_operation = \
Operation(OPERATION_TYPE.KUBE_UPGRADE_HOSTS)
if self._host_operation is not None:
DLOG.debug("Canceling previous host operation %s, before "
"continuing with host operation %s."
% (self._host_operation.operation_type,
host_operation.operation_type))
self._host_operation = None
host_table = tables.tables_get_host_table()
for host_name in host_names:
host = host_table.get(host_name, None)
if host is None:
reason = "Unknown host %s given." % host_name
DLOG.info(reason)
host_operation.set_failed(reason)
return host_operation
host_operation.add_host(host.name, OPERATION_STATE.INPROGRESS)
self._nfvi_kube_host_uncordon(host.uuid,
host.name,
force)
if host_operation.is_inprogress():
self._host_operation = host_operation
return host_operation
@coroutine
def _nfvi_kube_host_upgrade_kubelet_callback(self):
"""
@ -904,7 +1060,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.KUBE_UPGRADE_HOSTS != self._host_operation.operation_type:
@ -978,7 +1134,7 @@ class HostDirector(object):
return
if self._host_operation is None:
DLOG.verbose("No host %s operation inprogress." % host.name)
DLOG.verbose("No host %s operation in progress." % host.name)
return
if OPERATION_TYPE.KUBE_ROOTCA_UPDATE_HOSTS \

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -281,6 +281,22 @@ class SwMgmtDirector(object):
self._sw_update.handle_event(
strategy.STRATEGY_EVENT.ENABLE_HOST_SERVICES_FAILED, host)
def kube_host_cordon_failed(self, host):
"""
Called when a kube host cordon fails
"""
if self._sw_update is not None:
self._sw_update.handle_event(
strategy.STRATEGY_EVENT.KUBE_HOST_CORDON_FAILED, host)
def kube_host_uncordon_failed(self, host):
"""
Called when a kube host uncordon fails
"""
if self._sw_update is not None:
self._sw_update.handle_event(
strategy.STRATEGY_EVENT.KUBE_HOST_UNCORDON_FAILED, host)
def host_unlock_failed(self, host):
"""
Called when a unlock of a host failed

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -112,6 +112,8 @@ from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_terminating_pods
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_get_upgrade # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_host_device_image_update # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_host_device_image_update_abort # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_cordon # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_uncordon # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_upgrade_control_plane # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_host_upgrade_kubelet # noqa: F401
from nfv_vim.nfvi._nfvi_infrastructure_module import nfvi_kube_rootca_update_abort # noqa: F401

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -100,6 +100,32 @@ def nfvi_host_device_image_update_abort(host_uuid, host_name, callback):
return cmd_id
def nfvi_kube_host_cordon(host_uuid, host_name, force, callback):
"""
Kube Host Upgrade Cordon
"""
cmd_id = _infrastructure_plugin.invoke_plugin(
'kube_host_cordon',
host_uuid,
host_name,
force,
callback=callback)
return cmd_id
def nfvi_kube_host_uncordon(host_uuid, host_name, force, callback):
"""
Kube Host Upgrade Uncordon
"""
cmd_id = _infrastructure_plugin.invoke_plugin(
'kube_host_uncordon',
host_uuid,
host_name,
force,
callback=callback)
return cmd_id
def nfvi_kube_host_upgrade_control_plane(host_uuid, host_name, force, callback):
"""
Kube Host Upgrade Control Plane

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2016-2021 Wind River Systems, Inc.
# Copyright (c) 2016-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -51,6 +51,12 @@ class KubeUpgradeState(Constants):
KUBE_UPGRADED_SECOND_MASTER = Constant('upgraded-second-master')
KUBE_UPGRADING_KUBELETS = Constant('upgrading-kubelets')
KUBE_UPGRADE_COMPLETE = Constant('upgrade-complete')
KUBE_HOST_CORDON = Constant('cordon-started')
KUBE_HOST_CORDON_COMPLETE = Constant('cordon-complete')
KUBE_HOST_CORDON_FAILED = Constant('cordon-failed')
KUBE_HOST_UNCORDON = Constant('uncordon-started')
KUBE_HOST_UNCORDON_COMPLETE = Constant('uncordon-complete')
KUBE_HOST_UNCORDON_FAILED = Constant('uncordon-failed')
# Kube Upgrade Constant Instantiation

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -16,6 +16,8 @@ from nfv_vim.strategy._strategy_steps import ApplySwPatchesStep # noqa: F401
from nfv_vim.strategy._strategy_steps import DisableHostServicesStep # noqa: F401
from nfv_vim.strategy._strategy_steps import FwUpdateAbortHostsStep # noqa: F401
from nfv_vim.strategy._strategy_steps import FwUpdateHostsStep # noqa: F401
from nfv_vim.strategy._strategy_steps import KubeHostCordonStep # noqa: F401
from nfv_vim.strategy._strategy_steps import KubeHostUncordonStep # noqa: F401
from nfv_vim.strategy._strategy_steps import KubeHostUpgradeControlPlaneStep # noqa: F401
from nfv_vim.strategy._strategy_steps import KubeHostUpgradeKubeletStep # noqa: F401
from nfv_vim.strategy._strategy_steps import KubeRootcaUpdateCompleteStep # noqa: F401

View File

@ -3097,13 +3097,16 @@ class KubeUpgradeStrategy(SwUpdateStrategy,
# 'upgrade_from' value is a list of versions however the
# list should only ever be a single entry so we get the first
# value and allow an exception to be raised if the list is empty
# todo(abailey): if the list contains more than one entry the
# algorithm may not work, since it may not converge at the active version.
ver = kube['upgrade_from'][0]
# go around the loop again...
# We should NEVER get into an infinite loop, but if the kube-version entries
# in sysinv are malformed, we do not want to spin forever
loop_count += 1
if loop_count > 100:
if loop_count > 10:
raise Exception("Invalid kubernetes dependency chain detected")
return kube_sequence
@ -3169,21 +3172,55 @@ class KubeUpgradeStrategy(SwUpdateStrategy,
stage.add_step(strategy.KubeUpgradeNetworkingStep())
self.apply_phase.add_stage(stage)
# need to update control plane and kubelet per-version
self._add_kube_update_stages()
# Next stage after networking is cordon
self._add_kube_host_cordon_stage()
def _add_kube_update_stages(self):
# for a particular version, the order is:
# - first control plane
# - second control plane
# - kubelets
def _add_kube_host_cordon_stage(self):
"""Add host cordon stage for a host"""
# simplex only
from nfv_vim import nfvi
from nfv_vim import strategy
first_host = self.get_first_host()
second_host = self.get_second_host()
is_simplex = second_host is None
if is_simplex:
# todo(abailey): add rollback support to trigger uncordon
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.KUBE_HOST_CORDON)
stage.add_step(strategy.KubeHostCordonStep(
first_host,
self._to_version,
False, # force
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_CORDON_COMPLETE,
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_CORDON_FAILED)
)
self.apply_phase.add_stage(stage)
self._add_kube_update_stages()
def _add_kube_update_stages(self):
"""Stages for control plane, kubelet and cordon"""
# Algorithm
# -------------------------
# Simplex:
# - loop over kube versions
# - control plane
# - kubelet
# -------------------------
# Duplex:
# - loop over kube versions
# - first control plane
# - second control plane
# - kubelets
# -------------------------
from nfv_vim import nfvi
from nfv_vim import strategy
first_host = self.get_first_host()
second_host = self.get_second_host()
ver_list = self._get_kube_version_steps(self._to_version,
self._nfvi_kube_versions_list)
self._nfvi_kube_versions_list)
prev_state = None
if self.nfvi_kube_upgrade is not None:
@ -3225,6 +3262,29 @@ class KubeUpgradeStrategy(SwUpdateStrategy,
if self._state == strategy.STRATEGY_STATE.BUILD_FAILED:
return
self._add_kube_host_uncordon_stage()
def _add_kube_host_uncordon_stage(self):
"""Add host uncordon stage for a host"""
# simplex only
from nfv_vim import nfvi
from nfv_vim import strategy
first_host = self.get_first_host()
second_host = self.get_second_host()
is_simplex = second_host is None
if is_simplex:
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.KUBE_HOST_UNCORDON)
stage.add_step(strategy.KubeHostUncordonStep(
first_host,
self._to_version,
False, # force
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_UNCORDON_COMPLETE,
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_UNCORDON_FAILED)
)
self.apply_phase.add_stage(stage)
# after this loop is kube upgrade complete stage
self._add_kube_upgrade_complete_stage()
@ -3458,8 +3518,16 @@ class KubeUpgradeStrategy(SwUpdateStrategy,
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_NETWORKING_FAILED:
self._add_kube_upgrade_networking_stage,
# After networking -> upgrade first control plane
# After networking -> cordon
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_NETWORKING:
self._add_kube_host_cordon_stage,
# If the state is cordon-failed, resume at cordon stage
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_CORDON_FAILED:
self._add_kube_host_cordon_stage,
# If the state is cordon-complete, resume at update stages
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_CORDON_COMPLETE:
self._add_kube_update_stages,
# if upgrading first control plane failed, resume there
@ -3478,10 +3546,18 @@ class KubeUpgradeStrategy(SwUpdateStrategy,
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADED_SECOND_MASTER:
self._add_kube_update_stages,
# kubelets transition to 'complete' when they are done
# kubelets transition to 'uncordon after they are done
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADING_KUBELETS:
self._add_kube_update_stages,
# If the state is uncordon-failed, resume at uncordon stage
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_UNCORDON_FAILED:
self._add_kube_host_uncordon_stage,
# If the state is uncordon-complete, resume at complete stage
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_HOST_UNCORDON_COMPLETE:
self._add_kube_upgrade_complete_stage,
# upgrade is completed, delete the upgrade
nfvi.objects.v1.KUBE_UPGRADE_STATE.KUBE_UPGRADE_COMPLETE:
self._add_kube_upgrade_cleanup_stage,

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -28,6 +28,8 @@ class EventNames(object):
DISABLE_HOST_SERVICES_FAILED = Constant('disable-host-services-failed')
ENABLE_HOST_SERVICES_FAILED = Constant('enable-host-services-failed')
MIGRATE_INSTANCES_FAILED = Constant('migrate-instances-failed')
KUBE_HOST_CORDON_FAILED = Constant('kube-host-cordon-failed')
KUBE_HOST_UNCORDON_FAILED = Constant('kube-host-uncordon-failed')
KUBE_HOST_UPGRADE_CONTROL_PLANE_FAILED = \
Constant('kube-host-upgrade-control-plane-failed')
KUBE_HOST_UPGRADE_KUBELET_FAILED = \

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2015-2021 Wind River Systems, Inc.
# Copyright (c) 2015-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -53,6 +53,8 @@ class StrategyStageNames(Constants):
KUBE_ROOTCA_UPDATE_QUERY = Constant('kube-rootca-update-query')
KUBE_ROOTCA_UPDATE_START = Constant('kube-rootca-update-start')
# kube upgrade stages
KUBE_HOST_CORDON = Constant('kube-host-cordon')
KUBE_HOST_UNCORDON = Constant('kube-host-uncordon')
KUBE_UPGRADE_QUERY = Constant('kube-upgrade-query')
KUBE_UPGRADE_START = Constant('kube-upgrade-start')
KUBE_UPGRADE_DOWNLOAD_IMAGES = Constant('kube-upgrade-download-images')

View File

@ -74,6 +74,8 @@ class StrategyStepNames(Constants):
QUERY_KUBE_HOST_UPGRADE = Constant('query-kube-host-upgrade')
QUERY_KUBE_UPGRADE = Constant('query-kube-upgrade')
QUERY_KUBE_VERSIONS = Constant('query-kube-versions')
KUBE_HOST_CORDON = Constant('kube-host-cordon')
KUBE_HOST_UNCORDON = Constant('kube-host-uncordon')
KUBE_UPGRADE_START = Constant('kube-upgrade-start')
KUBE_UPGRADE_CLEANUP = Constant('kube-upgrade-cleanup')
KUBE_UPGRADE_COMPLETE = Constant('kube-upgrade-complete')
@ -88,6 +90,14 @@ class StrategyStepNames(Constants):
STRATEGY_STEP_NAME = StrategyStepNames()
def validate_operation(operation):
if operation.is_inprogress():
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
elif operation.is_failed():
return strategy.STRATEGY_STEP_RESULT.FAILED, operation.reason
return strategy.STRATEGY_STEP_RESULT.SUCCESS, ""
class AbstractStrategyStep(strategy.StrategyStep):
"""An abstract base class for strategy steps"""
@ -4225,6 +4235,100 @@ class AbstractKubeHostListUpgradeStep(AbstractKubeUpgradeStep):
return data
class KubeHostCordonStep(AbstractKubeHostUpgradeStep):
"""Kube Host Cordon - Strategy Step"""
def __init__(self, host, to_version, force, target_state, target_failure_state,
timeout_in_secs=600):
super(KubeHostCordonStep, self).__init__(
host,
to_version,
force,
STRATEGY_STEP_NAME.KUBE_HOST_CORDON,
target_state,
target_failure_state,
timeout_in_secs)
def handle_event(self, event, event_data=None):
"""
Handle Host events - does not query kube host upgrade list but
instead queries kube host upgrade directly.
"""
DLOG.debug("Step (%s) handle event (%s)." % (self._name, event))
if event == STRATEGY_EVENT.KUBE_HOST_CORDON_FAILED:
host = event_data
if host is not None and host.name in self._host_names:
result = strategy.STRATEGY_STEP_RESULT.FAILED
self.stage.step_complete(
result,
"kube host cordon (%s) failed" % host.name)
return True
# return handle_event of parent class
return super(KubeHostCordonStep, self).handle_event(
event, event_data=event_data)
def apply(self):
"""Kube Host Cordon"""
from nfv_vim import directors
DLOG.info("Step (%s) apply to hostnames (%s)."
% (self._name, self._host_names))
host_director = directors.get_host_director()
operation = \
host_director.kube_host_cordon(self._host_names,
self._force)
return validate_operation(operation)
class KubeHostUncordonStep(AbstractKubeHostUpgradeStep):
"""Kube Host Uncordon - Strategy Step"""
def __init__(self, host, to_version, force, target_state, target_failure_state,
timeout_in_secs=600):
super(KubeHostUncordonStep, self).__init__(
host,
to_version,
force,
STRATEGY_STEP_NAME.KUBE_HOST_UNCORDON,
target_state,
target_failure_state,
timeout_in_secs)
def handle_event(self, event, event_data=None):
"""
Handle Host events - does not query kube host upgrade list but
instead queries kube host upgrade directly.
"""
DLOG.debug("Step (%s) handle event (%s)." % (self._name, event))
if event == STRATEGY_EVENT.KUBE_HOST_UNCORDON_FAILED:
host = event_data
if host is not None and host.name in self._host_names:
result = strategy.STRATEGY_STEP_RESULT.FAILED
self.stage.step_complete(
result,
"kube host uncordon (%s) failed" % host.name)
return True
# return handle_event of parent class
return super(KubeHostUncordonStep, self).handle_event(
event, event_data=event_data)
def apply(self):
"""Kube Host Uncordon"""
from nfv_vim import directors
DLOG.info("Step (%s) apply to hostnames (%s)."
% (self._name, self._host_names))
host_director = directors.get_host_director()
operation = \
host_director.kube_host_uncordon(self._host_names,
self._force)
return validate_operation(operation)
class KubeHostUpgradeControlPlaneStep(AbstractKubeHostUpgradeStep):
"""Kube Host Upgrade Control Plane - Strategy Step
@ -4272,13 +4376,7 @@ class KubeHostUpgradeControlPlaneStep(AbstractKubeHostUpgradeStep):
operation = \
host_director.kube_upgrade_hosts_control_plane(self._host_names,
self._force)
if operation.is_inprogress():
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
elif operation.is_failed():
return strategy.STRATEGY_STEP_RESULT.FAILED, operation.reason
return strategy.STRATEGY_STEP_RESULT.SUCCESS, ""
return validate_operation(operation)
class KubeHostUpgradeKubeletStep(AbstractKubeHostListUpgradeStep):
@ -4419,6 +4517,8 @@ def strategy_step_rebuild_from_dict(data):
#
# kube upgrade steps
#
STRATEGY_STEP_NAME.KUBE_HOST_CORDON: KubeHostCordonStep,
STRATEGY_STEP_NAME.KUBE_HOST_UNCORDON: KubeHostUncordonStep,
STRATEGY_STEP_NAME.KUBE_HOST_UPGRADE_CONTROL_PLANE:
KubeHostUpgradeControlPlaneStep,
STRATEGY_STEP_NAME.KUBE_HOST_UPGRADE_KUBELET: