sw-deploy precheck and start API

sw-deploy precheck and start synchronous
APIs implementation.

Test Plan:
PASSED: On a DX system, when active alarm is present
perform precheck on an in-service patch, precheck fails.
PASSED: On a DX system, there is no active alarm and
system is in healthy state, perform precheck on an
in-service patch, precheck is successful.
PASSED: On a DX system, there is no active alarm and
system is in healthy state, perform precheck on in-service
patch, precheck is successful.
PASSED: On a DX system, try to perform precheck on
an in-service patch when there is another minor release
which is in 'deploy-start' state, precheck fails.
PASSED: On a DX system, perfrom software deploy start on
an in-service patch,with change in release state,
start API is successful.
PASSED: On DX system, perform software deploy start
on an in-service patch, with no change in release state,
start API fails.
to do: On SX system, perform end to end sw-deploy strategy
update using in-service patch.

Story: 2011045
Task: 49911

Change-Id: I265c8a1f9fbadf04275e0af788614d094c23315c
Signed-off-by: Vanathi.Selvaraju <vanathi.selvaraju@windriver.com>
This commit is contained in:
Vanathi.Selvaraju 2024-04-22 11:43:42 -04:00
parent 6142d9f116
commit 1511be7533
10 changed files with 97 additions and 56 deletions

View File

@ -1,5 +1,5 @@
# #
# Copyright (c) 2015-2016 Wind River Systems, Inc. # Copyright (c) 2015-2016,2024 Wind River Systems, Inc.
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
@ -57,7 +57,9 @@ class TaskFuture(object):
if timeout_in_secs is None: if timeout_in_secs is None:
# WARNING: Any change to the default timeout must be reflected in # WARNING: Any change to the default timeout must be reflected in
# the timeouts used for any work being done. # the timeouts used for any work being done.
timeout_in_secs = 20 # The timeout to be changed back to 20sec when the start
# software-api is async for patch-release
timeout_in_secs = 600
elif 0 >= timeout_in_secs: elif 0 >= timeout_in_secs:
timeout_in_secs = None # No timeout wanted, wait forever timeout_in_secs = None # No timeout wanted, wait forever

View File

@ -1,5 +1,5 @@
# #
# Copyright (c) 2015-2016 Wind River Systems, Inc. # Copyright (c) 2015-2016,2024 Wind River Systems, Inc.
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
@ -94,7 +94,7 @@ class Thread(object):
self._stall_timestamp_ms = \ self._stall_timestamp_ms = \
timers.get_monotonic_timestamp_in_ms() timers.get_monotonic_timestamp_in_ms()
DLOG.error("Thread %s stalled, progress_marker=%s, " DLOG.warn("Thread %s stalled, progress_marker=%s, "
"elapsed_secs=%s." % (self._name, "elapsed_secs=%s." % (self._name,
self._progress_marker.value, self._progress_marker.value,
self.stall_elapsed_secs)) self.stall_elapsed_secs))

View File

@ -1,5 +1,5 @@
# #
# Copyright (c) 2015-2018 Wind River Systems, Inc. # Copyright (c) 2015-2018,2024 Wind River Systems, Inc.
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
@ -125,3 +125,4 @@ glance.upload_image_data_by_file=180
glance.upload_image_data_by_url=180 glance.upload_image_data_by_url=180
sysinv=60 sysinv=60
patching.apply_patch=180 patching.apply_patch=180
usm=60

View File

@ -2297,17 +2297,13 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
future.work(usm.sw_deploy_precheck, self._platform_token, release) future.work(usm.sw_deploy_precheck, self._platform_token, release)
future.result = (yield) future.result = (yield)
if not future.result.is_complete(): if not future.result.is_complete():
DLOG.error("USM software deploy precheck did not complete.") DLOG.error("USM software deploy precheck did not complete.")
return return
upgrade_obj = nfvi.objects.v1.Upgrade( precheck_data = future.result.data['error']
release,
None,
None)
response['result-data'] = upgrade_obj response['result-data'] = precheck_data
response['completed'] = True response['completed'] = True
except exceptions.OpenStackRestAPIException as e: except exceptions.OpenStackRestAPIException as e:
@ -2328,7 +2324,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
callback.send(response) callback.send(response)
callback.close() callback.close()
def upgrade_start(self, future, release, callback): def sw_deploy_start(self, future, release, callback):
""" """
Start a USM software deploy Start a USM software deploy
""" """
@ -2337,9 +2333,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
response['reason'] = '' response['reason'] = ''
try: try:
upgrade_data = future.result.data
future.set_timeouts(config.CONF.get('nfvi-timeouts', None)) future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
if self._platform_token is None or \ if self._platform_token is None or \
self._platform_token.is_expired(): self._platform_token.is_expired():
future.work(openstack.get_token, self._platform_directory) future.work(openstack.get_token, self._platform_directory)
@ -2354,17 +2348,22 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
future.work(usm.sw_deploy_start, self._platform_token, release) future.work(usm.sw_deploy_start, self._platform_token, release)
future.result = (yield) future.result = (yield)
if not future.result.is_complete(): if not future.result.is_complete():
DLOG.error("USM software deploy start did not complete.") DLOG.error("USM software deploy start did not complete.")
return return
upgrade_obj = nfvi.objects.v1.Upgrade( # todo (vselvara): remove the state check here once the api is changed
release, # to async. state check to be done in _strategy_steps.py
upgrade_data, future.work(usm.sw_deploy_get_release, self._platform_token, release)
None) future.result = (yield)
if not future.result.is_complete():
DLOG.error("USM software deploy get release did not complete.")
return
response['result-data'] = upgrade_obj release_data = future.result.data
start_state_data = release_data["metadata"].get(release, None)
response['result-data'] = start_state_data
response['completed'] = True response['completed'] = True
except exceptions.OpenStackRestAPIException as e: except exceptions.OpenStackRestAPIException as e:

View File

@ -1,5 +1,5 @@
# #
# Copyright (c) 2015-2023 Wind River Systems, Inc. # Copyright (c) 2015-2024 Wind River Systems, Inc.
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
@ -473,7 +473,7 @@ def rest_api_request(token,
api_cmd, api_cmd,
api_cmd_headers=None, api_cmd_headers=None,
api_cmd_payload=None, api_cmd_payload=None,
timeout_in_secs=20, timeout_in_secs=600,
file_to_post=None): file_to_post=None):
""" """
Make a rest-api request using the given token Make a rest-api request using the given token

View File

@ -56,7 +56,7 @@ def _api_post(token, url, payload, headers=None):
url, url,
headers, headers,
json.dumps(payload), json.dumps(payload),
timeout_in_secs=REST_API_REQUEST_TIMEOUT) timeout_in_secs=600)
return response return response
@ -88,7 +88,8 @@ def sw_deploy_precheck(token, release, force=False):
Ask USM to precheck before a deployment Ask USM to precheck before a deployment
""" """
uri = f"deploy_precheck/{release}/force" if force else f"deploy_precheck/{release}" uri = (f"deploy_precheck/{release}/force?region_name=RegionOne" if
force else f"deploy_precheck/{release}?region_name=RegionOne")
url = _usm_api_cmd(token, uri) url = _usm_api_cmd(token, uri)
response = _api_post(token, url, {}) response = _api_post(token, url, {})
return response return response

View File

@ -205,11 +205,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 1, 'total_stages': 1,
'stages': [ 'stages': [
{'name': 'sw-upgrade-start', {'name': 'sw-upgrade-start',
'total_steps': 4, 'total_steps': 3,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'sw-deploy-precheck',
'release': release},
{'name': 'start-upgrade', {'name': 'start-upgrade',
'release': release}, 'release': release},
{'name': 'system-stabilize', {'name': 'system-stabilize',
@ -253,11 +251,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 1, 'total_stages': 1,
'stages': [ 'stages': [
{'name': 'sw-upgrade-start', {'name': 'sw-upgrade-start',
'total_steps': 4, 'total_steps': 3,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'sw-deploy-precheck',
'release': release},
{'name': 'start-upgrade', {'name': 'start-upgrade',
'release': release}, 'release': release},
{'name': 'system-stabilize', {'name': 'system-stabilize',
@ -302,13 +298,11 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 1, 'total_stages': 1,
'stages': [ 'stages': [
{'name': 'sw-upgrade-start', {'name': 'sw-upgrade-start',
'total_steps': 6, 'total_steps': 5,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'swact-hosts', {'name': 'swact-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'sw-deploy-precheck',
'release': release},
{'name': 'start-upgrade', {'name': 'start-upgrade',
'release': release}, 'release': release},
{'name': 'system-stabilize', {'name': 'system-stabilize',
@ -994,10 +988,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'stages': [ 'stages': [
{ {
'name': 'sw-upgrade-start', 'name': 'sw-upgrade-start',
'total_steps': 4, 'total_steps': 3,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'sw-deploy-precheck', 'release': release},
{'name': 'start-upgrade', 'release': release}, {'name': 'start-upgrade', 'release': release},
{'name': 'system-stabilize', 'timeout': 60}, {'name': 'system-stabilize', 'timeout': 60},
], ],
@ -1060,10 +1053,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'stages': [ 'stages': [
{ {
'name': 'sw-upgrade-start', 'name': 'sw-upgrade-start',
'total_steps': 4, 'total_steps': 3,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'sw-deploy-precheck', 'release': release},
{'name': 'start-upgrade', 'release': release}, {'name': 'start-upgrade', 'release': release},
{'name': 'system-stabilize', 'timeout': 60}, {'name': 'system-stabilize', 'timeout': 60},
], ],
@ -1130,10 +1122,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'stages': [ 'stages': [
{ {
'name': 'sw-upgrade-start', 'name': 'sw-upgrade-start',
'total_steps': 4, 'total_steps': 3,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'sw-deploy-precheck', 'release': release},
{'name': 'start-upgrade', 'release': release}, {'name': 'start-upgrade', 'release': release},
{'name': 'system-stabilize', 'timeout': 60}, {'name': 'system-stabilize', 'timeout': 60},
], ],
@ -1205,10 +1196,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'stages': [ 'stages': [
{ {
'name': 'sw-upgrade-start', 'name': 'sw-upgrade-start',
'total_steps': 4, 'total_steps': 3,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'sw-deploy-precheck', 'release': release},
{'name': 'start-upgrade', 'release': release}, {'name': 'start-upgrade', 'release': release},
{'name': 'system-stabilize', 'timeout': 60}, {'name': 'system-stabilize', 'timeout': 60},
], ],
@ -1292,10 +1282,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'stages': [ 'stages': [
{ {
'name': 'sw-upgrade-start', 'name': 'sw-upgrade-start',
'total_steps': 4, 'total_steps': 3,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'sw-deploy-precheck', 'release': release},
{'name': 'start-upgrade', 'release': release}, {'name': 'start-upgrade', 'release': release},
{'name': 'system-stabilize', 'timeout': 60}, {'name': 'system-stabilize', 'timeout': 60},
], ],
@ -1389,10 +1378,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'stages': [ 'stages': [
{ {
'name': 'sw-upgrade-start', 'name': 'sw-upgrade-start',
'total_steps': 4, 'total_steps': 3,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'sw-deploy-precheck', 'release': release},
{'name': 'start-upgrade', 'release': release}, {'name': 'start-upgrade', 'release': release},
{'name': 'system-stabilize', 'timeout': 60}, {'name': 'system-stabilize', 'timeout': 60},
], ],

View File

@ -1400,11 +1400,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 8, 'total_stages': 8,
'stages': [ 'stages': [
{'name': 'sw-upgrade-start', {'name': 'sw-upgrade-start',
'total_steps': 4, 'total_steps': 3,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'sw-deploy-precheck',
'release': strategy.nfvi_upgrade.release},
{'name': 'start-upgrade', {'name': 'start-upgrade',
'release': strategy.nfvi_upgrade.release}, 'release': strategy.nfvi_upgrade.release},
{'name': 'system-stabilize', {'name': 'system-stabilize',
@ -1570,13 +1568,11 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'total_stages': 6, 'total_stages': 6,
'stages': [ 'stages': [
{'name': 'sw-upgrade-start', {'name': 'sw-upgrade-start',
'total_steps': 6, 'total_steps': 5,
'steps': [ 'steps': [
{'name': 'query-alarms'}, {'name': 'query-alarms'},
{'name': 'swact-hosts', {'name': 'swact-hosts',
'entity_names': ['controller-1']}, 'entity_names': ['controller-1']},
{'name': 'sw-deploy-precheck',
'release': strategy.nfvi_upgrade.release},
{'name': 'start-upgrade', {'name': 'start-upgrade',
'release': strategy.nfvi_upgrade.release}, 'release': strategy.nfvi_upgrade.release},
{'name': 'system-stabilize', {'name': 'system-stabilize',

View File

@ -10,6 +10,7 @@ import weakref
from nfv_common import debug from nfv_common import debug
from nfv_common.helpers import Constant from nfv_common.helpers import Constant
from nfv_common.helpers import Constants from nfv_common.helpers import Constants
from nfv_common.helpers import coroutine
from nfv_common.helpers import get_local_host_name from nfv_common.helpers import get_local_host_name
from nfv_common.helpers import Singleton from nfv_common.helpers import Singleton
from nfv_common import strategy from nfv_common import strategy
@ -1804,6 +1805,48 @@ class SwUpgradeStrategy(
""" """
self._nfvi_upgrade = nfvi_upgrade self._nfvi_upgrade = nfvi_upgrade
@coroutine
def _sw_deploy_precheck_callback(self):
"""
Software deploy precheck callback
"""
response = (yield)
DLOG.debug("sw-deploy precheck callback response=%s." % response)
if response['completed']:
if not response['result-data']:
DLOG.debug("sw-deploy precheck completed %s" % response['result-data'])
else:
reason = "sw-deploy precheck failed"
# todo: (vselvara) to display the entire erorr response
# reason = response['result-data']
DLOG.warn(reason)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = reason
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
else:
reason = "sw-deploy precheck operation not completed"
DLOG.warn(reason)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = reason
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
def swdeploy_precheck(self, release):
"""
Software deploy precheck
"""
from nfv_vim import nfvi
DLOG.info("Software deploy precheck for %s." % release)
nfvi._nfvi_infrastructure_module.nfvi_sw_deploy_precheck(release, self._sw_deploy_precheck_callback())
def build(self): def build(self):
""" """
Build the strategy Build the strategy
@ -1812,6 +1855,7 @@ class SwUpgradeStrategy(
stage = strategy.StrategyStage(strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_QUERY) stage = strategy.StrategyStage(strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_QUERY)
stage.add_step(strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms)) stage.add_step(strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms))
self.swdeploy_precheck(release=self._release)
stage.add_step(strategy.QueryUpgradeStep(release=self._release)) stage.add_step(strategy.QueryUpgradeStep(release=self._release))
self.build_phase.add_stage(stage) self.build_phase.add_stage(stage)
@ -1852,7 +1896,6 @@ class SwUpgradeStrategy(
if self.nfvi_upgrade.is_available: if self.nfvi_upgrade.is_available:
# sw-deploy start must be done on controller-0 # sw-deploy start must be done on controller-0
self._swact_fix(stage, HOST_NAME.CONTROLLER_1) self._swact_fix(stage, HOST_NAME.CONTROLLER_1)
stage.add_step(strategy.SwDeployPrecheckStep(release=self._release))
stage.add_step(strategy.UpgradeStartStep(release=self._release)) stage.add_step(strategy.UpgradeStartStep(release=self._release))
stage.add_step(strategy.SystemStabilizeStep()) stage.add_step(strategy.SystemStabilizeStep())
# sw-deploy host must first be on controller-1 # sw-deploy host must first be on controller-1

View File

@ -24,6 +24,11 @@ KUBE_CERT_UPDATE_TRUSTBOTHCAS = "trust-both-cas"
KUBE_CERT_UPDATE_TRUSTNEWCA = "trust-new-ca" KUBE_CERT_UPDATE_TRUSTNEWCA = "trust-new-ca"
KUBE_CERT_UPDATE_UPDATECERTS = "update-certs" KUBE_CERT_UPDATE_UPDATECERTS = "update-certs"
# sw-deploy strategy constants
# todo: (vselvara) based on the new modified states
SW_DEPLOY_START = 'deploying-start'
SW_DEPLOY_REMOVE = 'removing'
@six.add_metaclass(Singleton) @six.add_metaclass(Singleton)
class StrategyStepNames(Constants): class StrategyStepNames(Constants):
@ -1107,14 +1112,20 @@ class UpgradeStartStep(strategy.StrategyStep):
""" """
response = (yield) response = (yield)
DLOG.debug("Start-Upgrade callback response=%s." % response) DLOG.debug("Start-Upgrade callback response=%s." % response)
if response['completed']: if response['completed']:
# TODO(jkraitbe): Consider updating self.strategy.nfvi_upgrade.hosts_info # TODO(jkraitbe): Consider updating self.strategy.nfvi_upgrade.hosts_info
result = strategy.STRATEGY_STEP_RESULT.SUCCESS if (response['result-data']['state'] == SW_DEPLOY_START or
self.stage.step_complete(result, "") response['result-data']['state'] == SW_DEPLOY_REMOVE):
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
self.stage.step_complete(result, "")
else:
reason = "Software deploy not started %s" % response['result-data']['state']
result = strategy.STRATEGY_STEP_RESULT.FAILED, reason
self.stage.step_complete(result, "")
else: else:
# TODO(jkraitbe): Add error message # TODO(jkraitbe): Add error message
result = strategy.STRATEGY_STEP_RESULT.FAILED reason = "Software deploy start not completed"
result = strategy.STRATEGY_STEP_RESULT.FAILED, reason
self.stage.step_complete(result, "") self.stage.step_complete(result, "")
def apply(self): def apply(self):