163 lines
7.2 KiB
Python
163 lines
7.2 KiB
Python
#
|
|
# Copyright (c) 2023 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
import time
|
|
|
|
from dcmanager.common import consts
|
|
from dcmanager.common.exceptions import StrategyStoppedException
|
|
from dcmanager.common import utils
|
|
from dcmanager.db import api as db_api
|
|
from dcmanager.orchestrator.states.base import BaseState
|
|
|
|
ACTIVATING_COMPLETED_STATES = ['activation-complete',
|
|
'aborting']
|
|
|
|
ACTIVATING_RETRY_STATES = ['activation-failed', ]
|
|
|
|
ACTIVATING_IN_PROGRESS_STATES = ['activating', 'activating-hosts', ]
|
|
|
|
# Max time: 60 minutes = 60 queries x 60 seconds sleep between queries
|
|
DEFAULT_MAX_QUERIES = 60
|
|
DEFAULT_SLEEP_DURATION = 60
|
|
MAX_FAILED_RETRIES = 10
|
|
|
|
|
|
class ActivatingUpgradeState(BaseState):
|
|
"""Upgrade state actions for activating an upgrade"""
|
|
|
|
def __init__(self, region_name):
|
|
super(ActivatingUpgradeState, self).__init__(
|
|
next_state=consts.STRATEGY_STATE_COMPLETING_UPGRADE, region_name=region_name)
|
|
# max time to wait (in seconds) is: sleep_duration * max_queries
|
|
self.sleep_duration = DEFAULT_SLEEP_DURATION
|
|
self.max_queries = DEFAULT_MAX_QUERIES
|
|
self.max_failed_retries = MAX_FAILED_RETRIES
|
|
|
|
def get_upgrade_state(self, strategy_step):
|
|
try:
|
|
upgrades = self.get_sysinv_client(
|
|
strategy_step.subcloud.region_name).get_upgrades()
|
|
|
|
except Exception as exception:
|
|
self.warn_log(strategy_step,
|
|
"Encountered exception: %s, "
|
|
"retry upgrade activation for subcloud %s."
|
|
% (str(exception), strategy_step.subcloud.name))
|
|
return ACTIVATING_RETRY_STATES[0]
|
|
|
|
if len(upgrades) == 0:
|
|
raise Exception("No upgrades were found to activate")
|
|
|
|
# The list of upgrades will never contain more than one entry.
|
|
return upgrades[0].state
|
|
|
|
def perform_state_action(self, strategy_step):
|
|
"""Activate an upgrade on a subcloud
|
|
|
|
Returns the next state in the state machine on success.
|
|
Any exceptions raised by this method set the strategy to FAILED.
|
|
"""
|
|
try:
|
|
upgrade_state = self.get_upgrade_state(strategy_step)
|
|
except Exception as ex:
|
|
self.info_log(strategy_step, "%s for %s."
|
|
% (str(ex), strategy_step.subcloud.name))
|
|
return self.next_state
|
|
|
|
# Check if an existing upgrade is already activated
|
|
if upgrade_state in ACTIVATING_COMPLETED_STATES:
|
|
self.info_log(strategy_step,
|
|
"Already in an activating state:%s" % upgrade_state)
|
|
return self.next_state
|
|
|
|
# Need to loop
|
|
# - attempt an initial activate one or more times
|
|
# - loop until state changed to a activating completed state
|
|
# - re-attempt activate if activation fails
|
|
audit_counter = 0
|
|
activate_retry_counter = 0
|
|
first_activate = True
|
|
while True:
|
|
# If event handler stop has been triggered, fail the state
|
|
if self.stopped():
|
|
raise StrategyStoppedException()
|
|
|
|
# if max retries have occurred, fail the state
|
|
if activate_retry_counter >= self.max_failed_retries:
|
|
error_msg = utils.get_failure_msg(strategy_step.subcloud.region_name)
|
|
db_api.subcloud_update(
|
|
self.context, strategy_step.subcloud_id,
|
|
error_description=error_msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
|
details = ("Failed to activate upgrade. Please check "
|
|
"sysinv.log on the subcloud or "
|
|
"%s on central for details." %
|
|
(consts.ERROR_DESC_CMD))
|
|
raise Exception(details)
|
|
|
|
# We may need multiple attempts to issue the first activate
|
|
# if keystone is down, impacting the ability to send the activate
|
|
if first_activate:
|
|
# invoke the API 'upgrade-activate'.
|
|
# Normally only auth failures deserve retry
|
|
# (no upgrade found, bad host state, auth)
|
|
try:
|
|
self.get_sysinv_client(
|
|
strategy_step.subcloud.region_name).upgrade_activate()
|
|
first_activate = False # clear first activation flag
|
|
activate_retry_counter = 0 # reset activation retries
|
|
except Exception as exception:
|
|
# increment the retry counter on failure
|
|
activate_retry_counter += 1
|
|
self.warn_log(strategy_step,
|
|
"Encountered exception: %s, "
|
|
"retry upgrade activation for subcloud %s."
|
|
% (str(exception),
|
|
strategy_step.subcloud.name))
|
|
# cannot flow into the remaining code. sleep / continue
|
|
time.sleep(self.sleep_duration)
|
|
continue
|
|
|
|
upgrade_state = self.get_upgrade_state(strategy_step)
|
|
if upgrade_state in ACTIVATING_RETRY_STATES:
|
|
# We failed. Better try again
|
|
activate_retry_counter += 1
|
|
self.info_log(strategy_step,
|
|
"Activation failed, retrying... State=%s"
|
|
% upgrade_state)
|
|
try:
|
|
self.get_sysinv_client(
|
|
strategy_step.subcloud.region_name).upgrade_activate()
|
|
except Exception as exception:
|
|
self.warn_log(strategy_step,
|
|
"Encountered exception: %s, "
|
|
"retry upgrade activation for subcloud %s."
|
|
% (str(exception),
|
|
strategy_step.subcloud.name))
|
|
elif upgrade_state in ACTIVATING_IN_PROGRESS_STATES:
|
|
self.info_log(strategy_step,
|
|
"Activation in progress, waiting... State=%s"
|
|
% upgrade_state)
|
|
elif upgrade_state in ACTIVATING_COMPLETED_STATES:
|
|
self.info_log(strategy_step,
|
|
"Activation completed. State=%s"
|
|
% upgrade_state)
|
|
break
|
|
audit_counter += 1
|
|
if audit_counter >= self.max_queries:
|
|
error_msg = utils.get_failure_msg(strategy_step.subcloud.region_name)
|
|
db_api.subcloud_update(
|
|
self.context, strategy_step.subcloud_id,
|
|
error_description=error_msg[0:consts.ERROR_DESCRIPTION_LENGTH])
|
|
details = ("Timeout waiting for activation to complete. "
|
|
"Please check sysinv.log on the subcloud or "
|
|
"%s on central for details." %
|
|
(consts.ERROR_DESC_CMD))
|
|
raise Exception(details)
|
|
time.sleep(self.sleep_duration)
|
|
|
|
# When we return from this method without throwing an exception, the
|
|
# state machine can proceed to the next state
|
|
return self.next_state
|