Merge "Backend support for dcmanager prestage orchestration"

This commit is contained in:
Zuul 2022-03-03 16:48:26 +00:00 committed by Gerrit Code Review
commit e654fba247
15 changed files with 658 additions and 202 deletions

View File

@ -221,8 +221,8 @@ class SubcloudsController(object):
else:
if field == 'sysadmin_password':
try:
payload['sysadmin_password'] = base64.b64decode(
val).decode('utf-8')
base64.b64decode(val).decode('utf-8')
payload['sysadmin_password'] = val
except Exception:
pecan.abort(
400,
@ -1423,10 +1423,16 @@ class SubcloudsController(object):
payload = self._get_prestage_payload(request)
payload['subcloud_name'] = subcloud.name
try:
payload['oam_floating_ip'] = \
prestage.validate_prestage_subcloud(subcloud, payload)
prestage.global_prestage_validate(payload)
except exceptions.PrestagePreCheckFailedException as exc:
LOG.exception("validate_prestage_subcloud failed")
LOG.exception("global_prestage_validate failed")
pecan.abort(400, _(str(exc)))
try:
payload['oam_floating_ip'] = \
prestage.validate_prestage(subcloud, payload)
except exceptions.PrestagePreCheckFailedException as exc:
LOG.exception("validate_prestage failed")
pecan.abort(400, _(str(exc)))
try:

View File

@ -1,5 +1,5 @@
# Copyright (c) 2017 Ericsson AB.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
# Copyright (c) 2017-2022 Wind River Systems, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
@ -39,13 +39,15 @@ SUPPORTED_STRATEGY_TYPES = [
consts.SW_UPDATE_TYPE_KUBE_ROOTCA_UPDATE,
consts.SW_UPDATE_TYPE_KUBERNETES,
consts.SW_UPDATE_TYPE_PATCH,
consts.SW_UPDATE_TYPE_PRESTAGE,
consts.SW_UPDATE_TYPE_UPGRADE
]
# some strategies allow force for all subclouds
FORCE_ALL_TYPES = [
consts.SW_UPDATE_TYPE_KUBE_ROOTCA_UPDATE,
consts.SW_UPDATE_TYPE_KUBERNETES
consts.SW_UPDATE_TYPE_KUBERNETES,
consts.SW_UPDATE_TYPE_PRESTAGE
]

View File

@ -79,6 +79,7 @@ SW_UPDATE_TYPE_FIRMWARE = "firmware"
SW_UPDATE_TYPE_KUBE_ROOTCA_UPDATE = "kube-rootca-update"
SW_UPDATE_TYPE_KUBERNETES = "kubernetes"
SW_UPDATE_TYPE_PATCH = "patch"
SW_UPDATE_TYPE_PRESTAGE = "prestage"
SW_UPDATE_TYPE_UPGRADE = "upgrade"
# Software update states
@ -164,6 +165,12 @@ STRATEGY_STATE_CREATING_VIM_KUBE_ROOTCA_UPDATE_STRATEGY = \
STRATEGY_STATE_APPLYING_VIM_KUBE_ROOTCA_UPDATE_STRATEGY = \
"applying vim kube rootca update strategy"
# Prestage orchestration states (ordered)
STRATEGY_STATE_PRESTAGE_PRE_CHECK = "prestage-precheck"
STRATEGY_STATE_PRESTAGE_PREPARE = "prestage-prepare"
STRATEGY_STATE_PRESTAGE_PACKAGES = "prestaging-packages"
STRATEGY_STATE_PRESTAGE_IMAGES = "prestaging-images"
# Subcloud deploy status states
DEPLOY_STATE_NONE = 'not-deployed'
DEPLOY_STATE_PRE_DEPLOY = 'pre-deploy'
@ -201,12 +208,11 @@ UPGRADE_STATE_ACTIVATION_FAILED = 'activation-failed'
UPGRADE_STATE_ACTIVATION_COMPLETE = 'activation-complete'
# Prestage States
PRESTAGE_STATE_PREPARE = 'prestage-prepare'
PRESTAGE_STATE_PACKAGES = 'prestaging-packages'
PRESTAGE_STATE_IMAGES = 'prestaging-images'
PRESTAGE_STATE_PREPARE = STRATEGY_STATE_PRESTAGE_PREPARE
PRESTAGE_STATE_PACKAGES = STRATEGY_STATE_PRESTAGE_PACKAGES
PRESTAGE_STATE_IMAGES = STRATEGY_STATE_PRESTAGE_IMAGES
PRESTAGE_STATE_FAILED = 'prestage-failed'
PRESTAGE_STATE_COMPLETE = 'prestage-complete'
PRESTAGE_FILE_POSTFIX = '_prestage.yml'
# Alarm aggregation
ALARMS_DISABLED = "disabled"
@ -262,6 +268,8 @@ EXTRA_ARGS_TO_VERSION = 'to-version'
EXTRA_ARGS_CERT_FILE = 'cert-file'
EXTRA_ARGS_EXPIRY_DATE = 'expiry-date'
EXTRA_ARGS_SUBJECT = 'subject'
EXTRA_ARGS_SYSADMIN_PASSWORD = 'sysadmin_password'
EXTRA_ARGS_FORCE = 'force'
# Device Image Bitstream Types
BITSTREAM_TYPE_ROOT_KEY = 'root-key'

View File

@ -195,7 +195,25 @@ class PreCheckFailedException(DCManagerException):
class PrestagePreCheckFailedException(DCManagerException):
message = _("Subcloud %(subcloud)s prestage precheck failed: %(details)s")
"""PrestagePreCheckFailedException
Extended to include 'orch_skip' property, indicating that
the subcloud can be skipped during orchestrated prestage
operations.
"""
def __init__(self, subcloud, details, orch_skip=False):
self.orch_skip = orch_skip
# Subcloud can be none if we are failing
# during global prestage validation
if subcloud is None:
self.message = _("Prestage failed: %s" % details)
elif orch_skip:
self.message = _("Prestage skipped '%s': %s"
% (subcloud, details))
else:
self.message = _("Prestage failed '%s': %s"
% (subcloud, details))
super(PrestagePreCheckFailedException, self).__init__()
class VaultLoadMissingError(DCManagerException):

View File

@ -20,8 +20,8 @@ Common prestaging operations.
These are shared across dcmanager (SubcloudManager) and orchestration.
"""
import base64
import os
import subprocess
import threading
from oslo_log import log as logging
@ -35,23 +35,25 @@ from dccommon.exceptions import PlaybookExecutionFailed
from dccommon.utils import run_playbook
from dcmanager.common import consts
from dcmanager.common.consts import PRESTAGE_FILE_POSTFIX
from dcmanager.common import exceptions
from dcmanager.common import utils
from dcmanager.db import api as db_api
LOG = logging.getLogger(__name__)
PREPARE_PRESTAGE_PACKAGES_SCRIPT = \
'/usr/local/bin/prepare-prestage-packages.sh'
DEPLOY_BASE_DIR = DEPLOY_DIR + '/' + SW_VERSION
PREPARE_PRESTAGE_PACKAGES_OUTPUT_PATH = DEPLOY_BASE_DIR + '/prestage/shared'
PREPARE_PRESTAGE_PACKAGES_IMAGES_LIST \
= DEPLOY_BASE_DIR + '/prestage_images_image_list.txt'
PRESTAGE_PREPARATION_COMPLETED_FILE = os.path.join(
PREPARE_PRESTAGE_PACKAGES_OUTPUT_PATH, '.prestage_preparation_completed')
PRESTAGE_PREPARATION_FAILED_FILE = os.path.join(
DEPLOY_BASE_DIR, '.prestage_preparation_failed')
ANSIBLE_PREPARE_PRESTAGE_PACKAGES_PLAYBOOK = \
"/usr/share/ansible/stx-ansible/playbooks/prepare_prestage_packages.yml"
ANSIBLE_PRESTAGE_SUBCLOUD_PACKAGES_PLAYBOOK = \
"/usr/share/ansible/stx-ansible/playbooks/prestage_sw_packages.yml"
ANSIBLE_PRESTAGE_SUBCLOUD_IMAGES_PLAYBOOK = \
"/usr/share/ansible/stx-ansible/playbooks/prestage_images.yml"
ANSIBLE_PRESTAGE_INVENTORY_SUFFIX = '_prestage_inventory.yml'
def is_deploy_status_prestage(deploy_status):
@ -85,7 +87,69 @@ def is_system_controller_upgrading():
return len(_get_system_controller_upgrades()) != 0
def validate_prestage_subcloud(subcloud, payload):
def global_prestage_validate(payload):
"""Global prestage validation (not subcloud-specific)"""
if is_system_controller_upgrading():
raise exceptions.PrestagePreCheckFailedException(
subcloud=consts.SYSTEM_CONTROLLER_NAME,
details='Prestage operations not allowed while system'
' controller upgrade is in progress.')
if ('sysadmin_password' not in payload
or payload['sysadmin_password'] is None
or payload['sysadmin_password'] == ''):
raise exceptions.PrestagePreCheckFailedException(
subcloud=None,
orch_skip=False,
details="Missing required parameter 'sysadmin_password'")
# Ensure we can decode the sysadmin_password
# (we decode again when running ansible)
try:
base64.b64decode(payload['sysadmin_password']).decode('utf-8')
except Exception as ex:
raise exceptions.PrestagePreCheckFailedException(
subcloud=None,
orch_skip=False,
details="Failed to decode subcloud sysadmin_password,"
" verify the password is base64 encoded."
" Details: %s" % ex)
def initial_subcloud_validate(subcloud):
"""Basic validation a subcloud prestage operation.
Raises a PrestageCheckFailedException on failure.
"""
LOG.debug("Validating subcloud prestage '%s'", subcloud.name)
if subcloud.availability_status != consts.AVAILABILITY_ONLINE:
raise exceptions.PrestagePreCheckFailedException(
subcloud=subcloud.name,
orch_skip=True,
details="Subcloud is offline.")
if subcloud.management_state != consts.MANAGEMENT_MANAGED:
raise exceptions.PrestagePreCheckFailedException(
subcloud=subcloud.name,
orch_skip=True,
details="Subcloud is not managed.")
allowed_deploy_states = [consts.DEPLOY_STATE_DONE,
consts.PRESTAGE_STATE_FAILED,
consts.PRESTAGE_STATE_COMPLETE]
if subcloud.deploy_status not in allowed_deploy_states:
raise exceptions.PrestagePreCheckFailedException(
subcloud=subcloud.name,
orch_skip=True,
details="Prestage operation is only allowed while"
" subcloud deploy status is one of: %s."
" The current deploy status is %s."
% (', '.join(allowed_deploy_states), subcloud.deploy_status))
def validate_prestage(subcloud, payload):
"""Validate a subcloud prestage operation.
Prestage conditions validation
@ -96,60 +160,70 @@ def validate_prestage_subcloud(subcloud, payload):
- Subcloud has no management-affecting alarms (unless force=true)
Raises a PrestageCheckFailedException on failure.
Returns the oam_floating_ip for subsequent use by ansible.
"""
force = payload['force']
LOG.debug("Validating subcloud prestage '%s', force=%s",
subcloud.name, force)
LOG.debug("Validating subcloud prestage '%s'", subcloud.name)
if subcloud.availability_status != consts.AVAILABILITY_ONLINE:
raise exceptions.PrestagePreCheckFailedException(
subcloud=subcloud.name, details="Subcloud is offline")
# re-run the initial validation
initial_subcloud_validate(subcloud)
subcloud_type, system_health, oam_floating_ip = \
_get_prestage_subcloud_info(subcloud.name)
# TODO(kmacleod) for orchestration, make sure we check this
# as part of strategy create
if is_system_controller_upgrading():
if subcloud_type != consts.SYSTEM_MODE_SIMPLEX:
raise exceptions.PrestagePreCheckFailedException(
subcloud=subcloud.name,
details='Prestage operations not allowed while system'
' controller upgrade is in progress.')
orch_skip=True,
details="Prestage operation is only accepted for a simplex"
" subcloud.")
if (subcloud_type != consts.SYSTEM_MODE_SIMPLEX or
subcloud.management_state != consts.MANAGEMENT_MANAGED):
raise exceptions.PrestagePreCheckFailedException(
subcloud=subcloud.name,
details='Prestage operation is only accepted for a simplex'
' subcloud that is currently online and managed.')
if (not force
if (not payload['force']
and not utils.pre_check_management_affected_alarm(system_health)):
raise exceptions.PrestagePreCheckFailedException(
subcloud=subcloud.name,
details='There is management affecting alarm(s) in the'
' subcloud. Please resolve the alarm condition(s)'
' or use --force option and try again.')
allowed_deploy_states = [consts.DEPLOY_STATE_DONE,
consts.PRESTAGE_STATE_FAILED,
consts.PRESTAGE_STATE_COMPLETE]
if subcloud.deploy_status not in allowed_deploy_states:
raise exceptions.PrestagePreCheckFailedException(
subcloud=subcloud.name,
details='Prestage operation is only allowed while'
' subcloud deploy status is one of: %s.'
' The current deploy status is %s.'
% (', '.join(allowed_deploy_states), subcloud.deploy_status))
orch_skip=False,
details="Subcloud has management affecting alarm(s)."
" Please resolve the alarm condition(s)"
" or use --force option and try again.")
return oam_floating_ip
@utils.synchronized('prestage-prepare-cleanup', external=True)
def cleanup_failed_preparation():
"""Remove the preparation failed file if it exists from a previous run"""
if os.path.exists(PRESTAGE_PREPARATION_FAILED_FILE):
LOG.debug("Cleanup: removing %s", PRESTAGE_PREPARATION_FAILED_FILE)
os.remove(PRESTAGE_PREPARATION_FAILED_FILE)
def prestage_start(context, subcloud_id):
subcloud = db_api.subcloud_update(
context, subcloud_id,
deploy_status=consts.PRESTAGE_STATE_PREPARE)
return subcloud
def prestage_complete(context, subcloud_id):
db_api.subcloud_update(
context, subcloud_id,
deploy_status=consts.PRESTAGE_STATE_COMPLETE)
def prestage_fail(context, subcloud_id):
db_api.subcloud_update(
context, subcloud_id,
deploy_status=consts.PRESTAGE_STATE_FAILED)
def is_upgrade(subcloud_version):
return SW_VERSION != subcloud_version
def prestage_subcloud(context, payload):
"""Subcloud prestaging
This is the standalone (not orchestrated) prestage implementation.
4 phases:
1. Prestage validation (already done by this point)
- Subcloud exists, is online, is managed, is AIO-SX
@ -173,14 +247,12 @@ def prestage_subcloud(context, payload):
subcloud=subcloud_name,
details="Subcloud does not exist")
# TODO(kmacleod) fail if prestaging orchestration in progress
subcloud = db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.PRESTAGE_STATE_PREPARE)
cleanup_failed_preparation()
subcloud = prestage_start(context, subcloud.id)
try:
apply_thread = threading.Thread(
target=run_prestage_thread, args=(context, subcloud, payload))
target=_prestage_standalone_thread,
args=(context, subcloud, payload))
apply_thread.start()
@ -188,50 +260,83 @@ def prestage_subcloud(context, payload):
except Exception:
LOG.exception("Subcloud prestaging failed %s" % subcloud_name)
db_api.subcloud_update(
context, subcloud_name,
deploy_status=consts.PRESTAGE_STATE_FAILED)
prestage_fail(context, subcloud.id)
def run_prestage_thread(context, subcloud, payload):
def _sync_run_prestage_prepare_packages(context, subcloud, payload):
"""Run prepare prestage packages ansible script."""
if os.path.exists(PRESTAGE_PREPARATION_FAILED_FILE):
LOG.warn("Subcloud %s prestage preparation aborted due to "
"previous %s failure", subcloud.name,
consts.PRESTAGE_STATE_PREPARE)
raise Exception("Aborted due to previous %s failure"
% consts.PRESTAGE_STATE_PREPARE)
LOG.info("Running prepare prestage ansible script, version=%s "
"(subcloud_id=%s)", SW_VERSION, subcloud.id)
db_api.subcloud_update(context,
subcloud.id,
deploy_status=consts.PRESTAGE_STATE_PREPARE)
# Ansible inventory filename for the specified subcloud
ansible_subcloud_inventory_file = \
utils.get_ansible_filename(subcloud.name,
ANSIBLE_PRESTAGE_INVENTORY_SUFFIX)
extra_vars_str = "current_software_version=%s previous_software_version=%s" \
% (SW_VERSION, subcloud.software_version)
try:
_run_ansible(context,
["ansible-playbook",
ANSIBLE_PREPARE_PRESTAGE_PACKAGES_PLAYBOOK,
"--inventory", ansible_subcloud_inventory_file,
"--extra-vars", extra_vars_str],
"prepare",
subcloud,
consts.PRESTAGE_STATE_PREPARE,
payload['sysadmin_password'],
payload['oam_floating_ip'],
ansible_subcloud_inventory_file)
except Exception:
# Flag the failure on file system so that other orchestrated
# strategy steps in this run fail immediately. This file is
# removed at the start of each orchestrated/standalone run.
# This creates the file if it doesn't exist:
with open(PRESTAGE_PREPARATION_FAILED_FILE, 'a'):
pass
raise
LOG.info("Prepare prestage ansible successful")
@utils.synchronized('prestage-prepare-packages', external=True)
def prestage_prepare(context, subcloud, payload):
"""Run the prepare prestage packages playbook if required."""
if is_upgrade(subcloud.software_version):
if not os.path.exists(PRESTAGE_PREPARATION_COMPLETED_FILE):
_sync_run_prestage_prepare_packages(context, subcloud, payload)
else:
LOG.info(
"Skipping prestage package preparation (not required)")
else:
LOG.info("Skipping prestage package preparation (reinstall)")
def _prestage_standalone_thread(context, subcloud, payload):
"""Run the prestage operations inside a separate thread"""
try:
is_upgrade = SW_VERSION != subcloud.software_version
prestage_prepare(context, subcloud, payload)
prestage_packages(context, subcloud, payload)
prestage_images(context, subcloud, payload)
if is_upgrade:
# TODO(kmacleod): check for '.prestage_prepation_completed' file instead
# of Packages directory (not in place yet)
if not os.path.exists(
os.path.join(PREPARE_PRESTAGE_PACKAGES_OUTPUT_PATH,
'Packages')):
if not _run_prestage_prepare_packages(
context, subcloud.id):
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.PRESTAGE_STATE_FAILED)
return
else:
LOG.info(
"Skipping prestage package preparation (not required)")
else:
LOG.info("Skipping prestage package preparation (reinstall)")
if _run_prestage_ansible(
context, subcloud, payload['oam_floating_ip'],
payload['sysadmin_password'], is_upgrade):
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.PRESTAGE_STATE_COMPLETE)
LOG.info("Prestage complete: %s", subcloud.name)
else:
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.PRESTAGE_STATE_FAILED)
prestage_complete(context, subcloud.id)
LOG.info("Prestage complete: %s", subcloud.name)
except Exception:
LOG.exception("Unexpected exception")
db_api.subcloud_update(context, subcloud.id,
deploy_status=consts.PRESTAGE_STATE_FAILED)
prestage_fail(context, subcloud.id)
raise
def _get_prestage_subcloud_info(subcloud_name):
@ -257,116 +362,115 @@ def _get_prestage_subcloud_info(subcloud_name):
LOG.exception(e)
raise exceptions.PrestagePreCheckFailedException(
subcloud=subcloud_name,
details='Failed to retrieve subcloud system mode and system health.')
details="Failed to retrieve subcloud system mode and system health.")
@utils.synchronized('prestage-prepare-packages', external=True)
def _run_prestage_prepare_packages(context, subcloud_id):
def _run_ansible(context, prestage_command, phase,
subcloud, deploy_status,
sysadmin_password, oam_floating_ip,
ansible_subcloud_inventory_file):
if deploy_status == consts.PRESTAGE_STATE_PREPARE:
LOG.info("Preparing prestage shared packages for subcloud: %s, version: %s",
subcloud.name, SW_VERSION)
else:
LOG.info("Prestaging %s for subcloud: %s, version: %s",
phase, subcloud.name, SW_VERSION)
LOG.info("Running prestage prepare packages script, version=%s "
"(subcloud_id=%s)", SW_VERSION, subcloud_id)
db_api.subcloud_update(context,
subcloud_id,
deploy_status=consts.PRESTAGE_STATE_PREPARE)
if not os.path.exists(PREPARE_PRESTAGE_PACKAGES_SCRIPT):
LOG.error("Prepare prestage packages script does not exist: %s",
PREPARE_PRESTAGE_PACKAGES_SCRIPT)
return False
LOG.info("Executing script: %s --release-id %s",
PREPARE_PRESTAGE_PACKAGES_SCRIPT, SW_VERSION)
try:
output = subprocess.check_output(
[PREPARE_PRESTAGE_PACKAGES_SCRIPT, "--release-id", SW_VERSION],
stderr=subprocess.STDOUT)
LOG.info("%s output:\n%s", PREPARE_PRESTAGE_PACKAGES_SCRIPT, output)
except subprocess.CalledProcessError:
LOG.exception("Failed to prepare prestage packages for %s", SW_VERSION)
return False
LOG.info("Prestage prepare packages successful")
return True
def _run_prestage_ansible(context, subcloud, oam_floating_ip,
sysadmin_password, is_upgrade):
subcloud.id,
deploy_status=deploy_status)
log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \
'_prestage_playbook_output.log'
# Ansible inventory filename for the specified subcloud
ansible_subcloud_inventory_file = \
utils.get_ansible_filename(subcloud.name, PRESTAGE_FILE_POSTFIX)
'_playbook_output.log'
# Create the ansible inventory for the new subcloud
utils.create_subcloud_inventory_with_admin_creds(
subcloud.name,
ansible_subcloud_inventory_file,
oam_floating_ip,
ansible_pass=sysadmin_password)
def _run_ansible(prestage_command, phase, deploy_status):
LOG.info("Prestaging %s for subcloud: %s, version: %s",
phase, subcloud.name, SW_VERSION)
db_api.subcloud_update(context,
subcloud.id,
deploy_status=deploy_status)
try:
run_playbook(log_file, prestage_command)
except PlaybookExecutionFailed:
LOG.error("Failed to run the prestage %s playbook"
" for subcloud %s, check individual log at "
"%s for detailed output.",
phase, subcloud.name, log_file)
return False
LOG.info("Prestage %s successful for subcloud %s",
phase, subcloud.name)
return True
# Always run prestage_packages.yml playbook.
#
# Pass the image_list_file to the prestage_images.yml playbook if the
# prestage images file has been uploaded for the target software
# version. If this file does not exist and the prestage is for upgrade,
# skip calling prestage_images.yml playbook.
#
# Ensure the final state is either prestage-failed or prestage-complete
# regardless whether prestage_images.yml playbook is skipped or not.
#
ansible_pass=base64.b64decode(sysadmin_password).decode('utf-8'))
try:
extra_vars_str = "software_version=%s" % SW_VERSION
if not _run_ansible(["ansible-playbook",
ANSIBLE_PRESTAGE_SUBCLOUD_PACKAGES_PLAYBOOK,
"--inventory", ansible_subcloud_inventory_file,
"--extra-vars", extra_vars_str],
"packages",
consts.PRESTAGE_STATE_PACKAGES):
return False
image_list_exists = \
os.path.exists(PREPARE_PRESTAGE_PACKAGES_IMAGES_LIST)
LOG.debug("prestage images list: %s, exists: %s",
PREPARE_PRESTAGE_PACKAGES_IMAGES_LIST, image_list_exists)
if is_upgrade and image_list_exists:
extra_vars_str += (" image_list_file=%s" %
PREPARE_PRESTAGE_PACKAGES_IMAGES_LIST)
if not is_upgrade or (is_upgrade and image_list_exists):
if not _run_ansible(["ansible-playbook",
ANSIBLE_PRESTAGE_SUBCLOUD_IMAGES_PLAYBOOK,
"--inventory",
ansible_subcloud_inventory_file,
"--extra-vars", extra_vars_str],
"images",
consts.PRESTAGE_STATE_IMAGES):
return False
else:
LOG.info("Skipping ansible prestage images step, upgrade: %s,"
" image_list_exists: %s",
is_upgrade, image_list_exists)
return True
run_playbook(log_file, prestage_command)
except PlaybookExecutionFailed as ex:
msg = ("Prestaging %s failed for subcloud %s,"
" check individual log at %s for detailed output."
% (phase, subcloud.name, log_file))
LOG.exception("%s: %s", msg, ex)
raise Exception(msg)
finally:
utils.delete_subcloud_inventory(ansible_subcloud_inventory_file)
LOG.info("Prestage %s successful for subcloud %s",
phase, subcloud.name)
def prestage_packages(context, subcloud, payload):
"""Run the prestage packages ansible script."""
# Ansible inventory filename for the specified subcloud
ansible_subcloud_inventory_file = \
utils.get_ansible_filename(subcloud.name,
ANSIBLE_PRESTAGE_INVENTORY_SUFFIX)
extra_vars_str = "software_version=%s" % SW_VERSION
_run_ansible(context,
["ansible-playbook",
ANSIBLE_PRESTAGE_SUBCLOUD_PACKAGES_PLAYBOOK,
"--inventory", ansible_subcloud_inventory_file,
"--extra-vars", extra_vars_str],
"packages",
subcloud,
consts.PRESTAGE_STATE_PACKAGES,
payload['sysadmin_password'],
payload['oam_floating_ip'],
ansible_subcloud_inventory_file)
def prestage_images(context, subcloud, payload):
"""Run the prestage images ansible script.
Approach:
If the prestage images file has been uploaded for the target software
version then pass the image_list_file to the prestage_images.yml playbook
If this file does not exist and the prestage is for upgrade,
skip calling prestage_images.yml playbook.
Ensure the final state is either prestage-failed or prestage-complete
regardless whether prestage_images.yml playbook is skipped or not.
"""
image_list_file = os.path.join(DEPLOY_BASE_DIR,
utils.get_filename_by_prefix(
DEPLOY_BASE_DIR, 'prestage_images'))
image_list_exists = \
image_list_file is not None and os.path.exists(image_list_file)
LOG.debug("prestage images list: %s, exists: %s",
image_list_file, image_list_exists)
upgrade = is_upgrade(subcloud.software_version)
extra_vars_str = "software_version=%s" % SW_VERSION
if upgrade and image_list_exists:
extra_vars_str += (" image_list_file=%s" % image_list_file)
# Ansible inventory filename for the specified subcloud
ansible_subcloud_inventory_file = \
utils.get_ansible_filename(subcloud.name,
ANSIBLE_PRESTAGE_INVENTORY_SUFFIX)
if not upgrade or (upgrade and image_list_exists):
_run_ansible(context,
["ansible-playbook",
ANSIBLE_PRESTAGE_SUBCLOUD_IMAGES_PLAYBOOK,
"--inventory", ansible_subcloud_inventory_file,
"--extra-vars", extra_vars_str],
"images",
subcloud,
consts.PRESTAGE_STATE_IMAGES,
payload['sysadmin_password'],
payload['oam_floating_ip'],
ansible_subcloud_inventory_file)
else:
LOG.info("Skipping ansible prestage images step, upgrade: %s,"
" image_list_exists: %s",
upgrade, image_list_exists)

View File

@ -284,6 +284,10 @@ def synchronized(name, external=True, fair=False):
def get_filename_by_prefix(dir_path, prefix):
"""Returns the first filename found matching 'prefix' within 'dir_path'
Note: returns base filename only - result does not include dir_path
"""
for filename in os.listdir(dir_path):
if filename.startswith(prefix):
return filename

View File

@ -351,11 +351,13 @@ def sw_update_strategy_get(context, update_type=None):
return IMPL.sw_update_strategy_get(context, update_type=update_type)
def sw_update_strategy_update(context, state=None, update_type=None):
def sw_update_strategy_update(context, state=None,
update_type=None, additional_args=None):
"""Update a sw update or raise if it does not exist."""
return IMPL.sw_update_strategy_update(context,
state,
update_type=update_type)
update_type=update_type,
additional_args=additional_args)
def sw_update_strategy_destroy(context, update_type=None):

View File

@ -540,12 +540,20 @@ def sw_update_strategy_create(context, type, subcloud_apply_type,
@require_admin_context
def sw_update_strategy_update(context, state=None, update_type=None):
def sw_update_strategy_update(context, state=None,
update_type=None, additional_args=None):
with write_session() as session:
sw_update_strategy_ref = \
sw_update_strategy_get(context, update_type=update_type)
if state is not None:
sw_update_strategy_ref.state = state
if additional_args is not None:
if sw_update_strategy_ref.extra_args is None:
sw_update_strategy_ref.extra_args = additional_args
else:
# extend the existing dictionary
sw_update_strategy_ref.extra_args = dict(
sw_update_strategy_ref.extra_args, **additional_args)
sw_update_strategy_ref.save(session)
return sw_update_strategy_ref

View File

@ -429,6 +429,7 @@ class OrchThread(threading.Thread):
LOG.exception("(%s) exception during delete"
% self.update_type)
raise e
LOG.info("(%s) Finished deleting update strategy" % self.update_type)
def delete_subcloud_strategy(self, strategy_step):
"""Delete the update strategy in this subcloud
@ -448,6 +449,10 @@ class OrchThread(threading.Thread):
def do_delete_subcloud_strategy(self, strategy_step):
"""Delete the vim strategy in this subcloud"""
if self.vim_strategy_name is None:
return
region = self.get_region_name(strategy_step)
LOG.info("(%s) Deleting vim strategy:(%s) for region:(%s)"

View File

@ -0,0 +1,51 @@
# Copyright (c) 2022 Wind River Systems, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from oslo_log import log as logging
from dcmanager.common import consts
from dcmanager.orchestrator.orch_thread import OrchThread
from dcmanager.orchestrator.states.prestage import states
LOG = logging.getLogger(__name__)
class PrestageOrchThread(OrchThread):
"""Prestage Orchestration Thread"""
# Every state in prestage orchestration must have an operator
# The states are listed here in their typical execution order
STATE_OPERATORS = {
consts.STRATEGY_STATE_PRESTAGE_PRE_CHECK:
states.PrestagePreCheckState,
consts.STRATEGY_STATE_PRESTAGE_PREPARE:
states.PrestagePrepareState,
consts.STRATEGY_STATE_PRESTAGE_PACKAGES:
states.PrestagePackagesState,
consts.STRATEGY_STATE_PRESTAGE_IMAGES:
states.PrestageImagesState,
}
def __init__(self, strategy_lock, audit_rpc_client):
super(PrestageOrchThread, self).__init__(
strategy_lock,
audit_rpc_client,
consts.SW_UPDATE_TYPE_PRESTAGE,
None,
consts.STRATEGY_STATE_PRESTAGE_PRE_CHECK)
def trigger_audit(self):
"""Trigger an audit"""
pass

View File

@ -0,0 +1,174 @@
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import abc
from oslo_log import log as logging
from dcmanager.common import consts
from dcmanager.common import exceptions
from dcmanager.common import prestage
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.orchestrator.states.base import BaseState
LOG = logging.getLogger(__name__)
class PrestageState(BaseState):
"""Perform prepare operation"""
def __init__(self, next_state, region_name):
super(PrestageState, self).__init__(
next_state=next_state, region_name=region_name)
@abc.abstractmethod
def _do_state_action(self, strategy_step):
pass
def perform_state_action(self, strategy_step):
"""Wrapper to ensure proper error handling"""
try:
self._do_state_action(strategy_step)
except Exception:
prestage.prestage_fail(self.context, strategy_step.subcloud.id)
raise
# state machine can proceed to the next state
return self.next_state
class PrestagePreCheckState(PrestageState):
"""Perform pre check operations"""
def __init__(self, region_name):
super(PrestagePreCheckState, self).__init__(
next_state=consts.STRATEGY_STATE_PRESTAGE_PREPARE,
region_name=region_name)
@utils.synchronized('prestage-update-extra-args', external=True)
def _update_oam_floating_ip(self, strategy_step, oam_floating_ip):
# refresh the extra_args
extra_args = utils.get_sw_update_strategy_extra_args(self.context)
if 'oam_floating_ip_dict' in extra_args:
LOG.debug("Updating oam_floating_ip_dict: %s: %s",
strategy_step.subcloud.name, oam_floating_ip)
oam_floating_ip_dict = extra_args['oam_floating_ip_dict']
oam_floating_ip_dict[strategy_step.subcloud.name] \
= oam_floating_ip
else:
LOG.debug("Creating oam_floating_ip_dict: %s: %s",
strategy_step.subcloud.name, oam_floating_ip)
oam_floating_ip_dict = {
strategy_step.subcloud.name: oam_floating_ip
}
db_api.sw_update_strategy_update(
self.context, state=None, update_type=None,
additional_args={'oam_floating_ip_dict': oam_floating_ip_dict})
def _do_state_action(self, strategy_step):
extra_args = utils.get_sw_update_strategy_extra_args(self.context)
if extra_args is None:
message = "Prestage pre-check: missing all mandatory arguments"
self.error_log(strategy_step, message)
raise Exception(message)
payload = {
'sysadmin_password': extra_args['sysadmin_password'],
'force': extra_args['force']
}
try:
oam_floating_ip = prestage.validate_prestage(
strategy_step.subcloud, payload)
self._update_oam_floating_ip(strategy_step, oam_floating_ip)
if strategy_step.stage == 1:
# Note: this cleanup happens for every subcloud, but they are all
# processed before moving on to the next strategy step
# TODO(kmacleod) although this is a quick check, it is
# synchronized, so we may want to figure out a better
# way to only run this once
prestage.cleanup_failed_preparation()
prestage.prestage_start(self.context, strategy_step.subcloud.id)
except exceptions.PrestagePreCheckFailedException as ex:
if ex.orch_skip:
self.info_log(strategy_step,
"Pre-check: skipping subcloud: %s" % ex)
# Update the details to show that this subcloud has been skipped
db_api.strategy_step_update(self.context,
strategy_step.subcloud.id,
details=str(ex))
self.override_next_state(consts.STRATEGY_STATE_COMPLETE)
else:
self.info_log(strategy_step, "Pre-check failed: %s" % ex)
raise
else:
self.info_log(strategy_step, "Pre-check pass")
class PrestagePrepareState(PrestageState):
"""Perform prepare operation"""
def __init__(self, region_name):
super(PrestagePrepareState, self).__init__(
next_state=consts.STRATEGY_STATE_PRESTAGE_PACKAGES,
region_name=region_name)
def _do_state_action(self, strategy_step):
extra_args = utils.get_sw_update_strategy_extra_args(self.context)
payload = {
'sysadmin_password': extra_args['sysadmin_password'],
'oam_floating_ip':
extra_args['oam_floating_ip_dict'][strategy_step.subcloud.name],
'force': extra_args['force']
}
prestage.prestage_prepare(self.context, strategy_step.subcloud, payload)
self.info_log(strategy_step, "Prepare finished")
class PrestagePackagesState(PrestageState):
"""Perform prestage packages operation"""
def __init__(self, region_name):
super(PrestagePackagesState, self).__init__(
next_state=consts.STRATEGY_STATE_PRESTAGE_IMAGES,
region_name=region_name)
def _do_state_action(self, strategy_step):
extra_args = utils.get_sw_update_strategy_extra_args(self.context)
payload = {
'sysadmin_password': extra_args['sysadmin_password'],
'oam_floating_ip':
extra_args['oam_floating_ip_dict'][strategy_step.subcloud.name],
'force': extra_args['force']
}
prestage.prestage_packages(self.context,
strategy_step.subcloud, payload)
self.info_log(strategy_step, "Packages finished")
class PrestageImagesState(PrestageState):
"""Perform prestage images operation"""
def __init__(self, region_name):
super(PrestageImagesState, self).__init__(
next_state=consts.STRATEGY_STATE_COMPLETE,
region_name=region_name)
def _do_state_action(self, strategy_step):
extra_args = utils.get_sw_update_strategy_extra_args(self.context)
payload = {
'sysadmin_password': extra_args['sysadmin_password'],
'oam_floating_ip':
extra_args['oam_floating_ip_dict'][strategy_step.subcloud.name],
'force': extra_args['force']
}
prestage.prestage_images(self.context, strategy_step.subcloud, payload)
self.info_log(strategy_step, "Images finished")
prestage.prestage_complete(self.context, strategy_step.subcloud.id)

View File

@ -24,6 +24,7 @@ from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
from dcmanager.common import consts
from dcmanager.common import exceptions
from dcmanager.common import manager
from dcmanager.common import prestage
from dcmanager.common import utils
from dcmanager.db import api as db_api
from dcmanager.orchestrator.fw_update_orch_thread import FwUpdateOrchThread
@ -32,6 +33,7 @@ from dcmanager.orchestrator.kube_rootca_update_orch_thread \
from dcmanager.orchestrator.kube_upgrade_orch_thread \
import KubeUpgradeOrchThread
from dcmanager.orchestrator.patch_orch_thread import PatchOrchThread
from dcmanager.orchestrator.prestage_orch_thread import PrestageOrchThread
from dcmanager.orchestrator.sw_upgrade_orch_thread import SwUpgradeOrchThread
from dcorch.common import consts as dcorch_consts
@ -77,6 +79,10 @@ class SwUpdateManager(manager.Manager):
self.audit_rpc_client)
self.kube_rootca_update_orch_thread.start()
self.prestage_orch_thread = PrestageOrchThread(self.strategy_lock,
self.audit_rpc_client)
self.prestage_orch_thread.start()
def stop(self):
# Stop (and join) the worker threads
# - patch orchestration thread
@ -88,12 +94,15 @@ class SwUpdateManager(manager.Manager):
# - fw update orchestration thread
self.fw_update_orch_thread.stop()
self.fw_update_orch_thread.join()
# - kube upgrade orchestration thread
# - kube upgrade orchestration thread
self.kube_upgrade_orch_thread.stop()
self.kube_upgrade_orch_thread.join()
# - kube rootca update orchestration thread
self.kube_rootca_update_orch_thread.stop()
self.kube_rootca_update_orch_thread.join()
# - prestage orchestration thread
self.prestage_orch_thread.stop()
self.prestage_orch_thread.join()
def _validate_subcloud_status_sync(self, strategy_type,
subcloud_status, force,
@ -148,6 +157,12 @@ class SwUpdateManager(manager.Manager):
dcorch_consts.ENDPOINT_TYPE_KUBE_ROOTCA and
subcloud_status.sync_status ==
consts.SYNC_STATUS_OUT_OF_SYNC)
elif strategy_type == consts.SW_UPDATE_TYPE_PRESTAGE:
# For prestage we reuse the ENDPOINT_TYPE_LOAD.
# We just need to key off a unique endpoint,
# so that the strategy is created only once.
return (subcloud_status.endpoint_type
== dcorch_consts.ENDPOINT_TYPE_LOAD)
# Unimplemented strategy_type status check. Log an error
LOG.error("_validate_subcloud_status_sync for %s not implemented" %
strategy_type)
@ -295,6 +310,7 @@ class SwUpdateManager(manager.Manager):
# Has the user specified a specific subcloud?
# todo(abailey): refactor this code to use classes
cloud_name = payload.get('cloud_name')
prestage_global_validated = False
if cloud_name and cloud_name != consts.SYSTEM_CONTROLLER_NAME:
# Make sure subcloud exists
try:
@ -354,6 +370,15 @@ class SwUpdateManager(manager.Manager):
raise exceptions.BadRequest(
resource='strategy',
msg='Subcloud %s does not require patching' % cloud_name)
elif strategy_type == consts.SW_UPDATE_TYPE_PRESTAGE:
# Do initial validation for subcloud
try:
prestage.global_prestage_validate(payload)
prestage_global_validated = True
prestage.initial_subcloud_validate(subcloud)
except exceptions.PrestagePreCheckFailedException as ex:
raise exceptions.BadRequest(resource='strategy',
msg=str(ex))
extra_args = None
# kube rootca update orchestration supports extra creation args
@ -373,6 +398,20 @@ class SwUpdateManager(manager.Manager):
consts.EXTRA_ARGS_TO_VERSION:
payload.get(consts.EXTRA_ARGS_TO_VERSION),
}
elif strategy_type == consts.SW_UPDATE_TYPE_PRESTAGE:
if not prestage_global_validated:
try:
prestage.global_prestage_validate(payload)
except exceptions.PrestagePreCheckFailedException as ex:
raise exceptions.BadRequest(
resource='strategy',
msg=str(ex))
extra_args = {
consts.EXTRA_ARGS_SYSADMIN_PASSWORD:
payload.get(consts.EXTRA_ARGS_SYSADMIN_PASSWORD),
consts.EXTRA_ARGS_FORCE: force
}
# Don't create a strategy if any of the subclouds is online and the
# relevant sync status is unknown. Offline subcloud is skipped unless
@ -386,6 +425,7 @@ class SwUpdateManager(manager.Manager):
else:
subclouds = db_api.subcloud_get_all_with_status(context)
subclouds_processed = list()
for subcloud, subcloud_status in subclouds:
if (cloud_name and subcloud.name != cloud_name or
subcloud.management_state != consts.MANAGEMENT_MANAGED):
@ -448,6 +488,16 @@ class SwUpdateManager(manager.Manager):
resource='strategy',
msg='Kube rootca update sync status is unknown for '
'one or more subclouds')
elif strategy_type == consts.SW_UPDATE_TYPE_PRESTAGE:
if subcloud.name not in subclouds_processed:
# Do initial validation for subcloud
try:
prestage.initial_subcloud_validate(subcloud)
except exceptions.PrestagePreCheckFailedException:
LOG.warn("Excluding subcloud from prestage strategy: %s",
subcloud.name)
continue
subclouds_processed.append(subcloud.name)
# handle extra_args processing such as staging to the vault
self._process_extra_args_creation(strategy_type, extra_args)
@ -519,7 +569,10 @@ class SwUpdateManager(manager.Manager):
status,
force,
subcloud.availability_status):
LOG.debug("Created for %s" % subcloud.id)
LOG.debug("Creating strategy_step for endpoint_type: %s, "
"sync_status: %s, subcloud: %s, id: %s",
status.endpoint_type, status.sync_status,
subcloud.name, subcloud.id)
db_api.strategy_step_create(
context,
subcloud.id,

View File

@ -1,4 +1,4 @@
# Copyright (c) 2017-2021 Wind River Systems, Inc.
# Copyright (c) 2017-2022 Wind River Systems, Inc.
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
@ -164,6 +164,20 @@ class TestSwUpdate(base.DCManagerTestCase):
self.fake_kube_rootca_update_orch_thread
self.addCleanup(p.stop)
if strategy_type == consts.SW_UPDATE_TYPE_PRESTAGE:
sw_update_manager.PrestageOrchThread.stopped = lambda x: False
worker = \
sw_update_manager.PrestageOrchThread(mock_strategy_lock,
mock_dcmanager_audit_api)
else:
# mock the prestage orch thread
self.fake_prestage_orch_thread = FakeOrchThread()
p = mock.patch.object(sw_update_manager, 'PrestageOrchThread')
self.mock_prestage_orch_thread = p.start()
self.mock_prestage_orch_thread.return_value = \
self.fake_prestage_orch_thread
self.addCleanup(p.stop)
return worker
def setup_subcloud(self):

View File

@ -595,6 +595,13 @@ class TestSwUpdateManager(base.DCManagerTestCase):
self.fake_kube_rootca_update_orch_thread
self.addCleanup(p.stop)
self.fake_prestage_orch_thread = FakeOrchThread()
p = mock.patch.object(sw_update_manager, 'PrestageOrchThread')
self.mock_prestage_orch_thread = p.start()
self.mock_prestage_orch_thread.return_value = \
self.fake_prestage_orch_thread
self.addCleanup(p.stop)
# Mock the dcmanager audit API
self.fake_dcmanager_audit_api = FakeDCManagerAuditAPI()
p = mock.patch('dcmanager.audit.rpcapi.ManagerAuditClient')