Add retry at nfv orchestration level
This commit introduces retry on failure for cases such as kubelet killing pods due to resource contention during kubernetes upgrade. Test Plan: PASS: Simulate kubeapiserver pod failure by adding wrong resource in rest api request and check retries. PASS: Verify kubernetes orchestrated update works with changes on aio-sx PASS: Verify changes are working on AIO-DX, with strategy created on controller-0 and applied on controller-1 Closes-Bug: #2053236 Change-Id: I816b09bb0cd767380e5093d4732d161e4cc8cb24 Signed-off-by: sshathee <shunmugam.shatheesh@windriver.com>
This commit is contained in:
parent
d730d8edc2
commit
9c73d3b254
|
@ -1,5 +1,5 @@
|
||||||
#
|
#
|
||||||
# Copyright (c) 2015-2023 Wind River Systems, Inc.
|
# Copyright (c) 2015-2024 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
|
@ -27,6 +27,7 @@ class HostDirector(object):
|
||||||
"""
|
"""
|
||||||
Host Director
|
Host Director
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._host_operation = None
|
self._host_operation = None
|
||||||
|
|
||||||
|
@ -1280,6 +1281,25 @@ class HostDirector(object):
|
||||||
|
|
||||||
return host_operation
|
return host_operation
|
||||||
|
|
||||||
|
@coroutine
|
||||||
|
def _nfvi_get_kube_host_upgrade_list_callback(self):
|
||||||
|
"""
|
||||||
|
Get Kube Host Upgrade List Callback
|
||||||
|
"""
|
||||||
|
from nfv_vim import directors
|
||||||
|
|
||||||
|
response = (yield)
|
||||||
|
DLOG.debug("Get kube host upgrade list callback response=%s." % response)
|
||||||
|
sw_mgmt_director = directors.get_sw_mgmt_director()
|
||||||
|
sw_mgmt_director.kube_host_upgrade_list(response)
|
||||||
|
|
||||||
|
def _nfvi_get_kube_host_upgrade_list(self):
|
||||||
|
"""
|
||||||
|
NFVI Kube host upgrade list
|
||||||
|
"""
|
||||||
|
nfvi.nfvi_get_kube_host_upgrade_list(
|
||||||
|
self._nfvi_get_kube_host_upgrade_list_callback())
|
||||||
|
|
||||||
|
|
||||||
def get_host_director():
|
def get_host_director():
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -450,6 +450,18 @@ class SwMgmtDirector(object):
|
||||||
self._sw_update.handle_event(
|
self._sw_update.handle_event(
|
||||||
strategy.STRATEGY_EVENT.MIGRATE_INSTANCES_FAILED, reason)
|
strategy.STRATEGY_EVENT.MIGRATE_INSTANCES_FAILED, reason)
|
||||||
|
|
||||||
|
def kube_host_upgrade_list(self, event_data):
|
||||||
|
"""
|
||||||
|
Kubernetes host upgrade list handle_event called
|
||||||
|
"""
|
||||||
|
if event_data['completed']:
|
||||||
|
event = strategy.STRATEGY_EVENT.QUERY_KUBE_HOST_UPGRADE_COMPLETED
|
||||||
|
else:
|
||||||
|
event = strategy.STRATEGY_EVENT.QUERY_KUBE_HOST_UPGRADE_FAILED
|
||||||
|
if self._sw_update is not None:
|
||||||
|
self._sw_update.handle_event(
|
||||||
|
event, event_data)
|
||||||
|
|
||||||
|
|
||||||
def get_sw_mgmt_director():
|
def get_sw_mgmt_director():
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -37,6 +37,9 @@ class EventNames(object):
|
||||||
Constant('kube-host-upgrade-kubelet-failed')
|
Constant('kube-host-upgrade-kubelet-failed')
|
||||||
KUBE_ROOTCA_UPDATE_HOST_FAILED = Constant('kube-rootca-update-host-failed')
|
KUBE_ROOTCA_UPDATE_HOST_FAILED = Constant('kube-rootca-update-host-failed')
|
||||||
KUBE_UPGRADE_CHANGED = Constant('kube-upgrade-changed')
|
KUBE_UPGRADE_CHANGED = Constant('kube-upgrade-changed')
|
||||||
|
QUERY_KUBE_HOST_UPGRADE_FAILED = Constant('query-kube-host-upgrade-failed')
|
||||||
|
QUERY_KUBE_HOST_UPGRADE_COMPLETED = \
|
||||||
|
Constant('query-kube-host-upgrade-completed')
|
||||||
|
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#
|
#
|
||||||
# Copyright (c) 2015-2023 Wind River Systems, Inc.
|
# Copyright (c) 2015-2024 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
|
@ -3930,9 +3930,12 @@ class QueryKubeHostUpgradeStep(AbstractStrategyStep):
|
||||||
"""
|
"""
|
||||||
Query Kube Host Upgrade list
|
Query Kube Host Upgrade list
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
MAX_RETRIES = 3
|
||||||
|
|
||||||
|
def __init__(self, retry_count=MAX_RETRIES):
|
||||||
super(QueryKubeHostUpgradeStep, self).__init__(
|
super(QueryKubeHostUpgradeStep, self).__init__(
|
||||||
STRATEGY_STEP_NAME.QUERY_KUBE_HOST_UPGRADE, timeout_in_secs=60)
|
STRATEGY_STEP_NAME.QUERY_KUBE_HOST_UPGRADE, timeout_in_secs=200)
|
||||||
|
self._retry_count = retry_count
|
||||||
|
|
||||||
@coroutine
|
@coroutine
|
||||||
def _get_kube_host_upgrade_list_callback(self):
|
def _get_kube_host_upgrade_list_callback(self):
|
||||||
|
@ -3957,12 +3960,46 @@ class QueryKubeHostUpgradeStep(AbstractStrategyStep):
|
||||||
"""
|
"""
|
||||||
Query Kube Host Upgrade List
|
Query Kube Host Upgrade List
|
||||||
"""
|
"""
|
||||||
from nfv_vim import nfvi
|
from nfv_vim import directors
|
||||||
|
|
||||||
DLOG.info("Step (%s) apply." % self._name)
|
DLOG.info("Step (%s) apply." % self._name)
|
||||||
nfvi.nfvi_get_kube_host_upgrade_list(
|
|
||||||
self._get_kube_host_upgrade_list_callback())
|
host_director = directors.get_host_director()
|
||||||
|
host_director._nfvi_get_kube_host_upgrade_list()
|
||||||
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
|
return strategy.STRATEGY_STEP_RESULT.WAIT, ""
|
||||||
|
|
||||||
|
def handle_event(self, event, event_data=None):
|
||||||
|
"""
|
||||||
|
Handle Query Kube Host upgrade event
|
||||||
|
"""
|
||||||
|
from nfv_vim import directors
|
||||||
|
|
||||||
|
if event == STRATEGY_EVENT.QUERY_KUBE_HOST_UPGRADE_FAILED:
|
||||||
|
if event_data is not None and self._retry_count > 0:
|
||||||
|
# if kube host upgrade list fails and we have retries,
|
||||||
|
# re-trigger the function
|
||||||
|
DLOG.info("Step (%s) retry due to failure for (%s)." % (self._name,
|
||||||
|
str(event_data["reason"])))
|
||||||
|
|
||||||
|
self._retry_count = self._retry_count - 1
|
||||||
|
host_director = directors.get_host_director()
|
||||||
|
host_director._nfvi_get_kube_host_upgrade_list()
|
||||||
|
else:
|
||||||
|
# if kube host upgrade list fails and we are out of retries, fail
|
||||||
|
result = strategy.STRATEGY_STEP_RESULT.FAILED
|
||||||
|
self.stage.step_complete(result, event_data['reason'])
|
||||||
|
return True
|
||||||
|
|
||||||
|
elif event == STRATEGY_EVENT.QUERY_KUBE_HOST_UPGRADE_COMPLETED:
|
||||||
|
if event_data is not None and self.strategy is not None:
|
||||||
|
self.strategy.nfvi_kube_host_upgrade_list = \
|
||||||
|
event_data['result-data']
|
||||||
|
|
||||||
|
result = strategy.STRATEGY_STEP_RESULT.SUCCESS
|
||||||
|
self.stage.step_complete(result, "")
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class AbstractKubeUpgradeStep(AbstractStrategyStep):
|
class AbstractKubeUpgradeStep(AbstractStrategyStep):
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue