# Copyright (c) 2023 Wind River Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. # """ Common prestaging operations. These are shared across dcmanager (SubcloudManager) and orchestration. """ import base64 import os import threading from oslo_config import cfg from oslo_log import log as logging from tsconfig.tsconfig import SW_VERSION from dccommon import consts as dccommon_consts from dccommon.drivers.openstack.sdk_platform import OpenStackDriver from dccommon.drivers.openstack.sysinv_v1 import SysinvClient from dccommon.exceptions import PlaybookExecutionFailed from dccommon.exceptions import PlaybookExecutionTimeout from dccommon.utils import run_playbook from dcmanager.common import consts from dcmanager.common import exceptions from dcmanager.common import utils from dcmanager.db import api as db_api LOG = logging.getLogger(__name__) CONF = cfg.CONF PRESTAGING_REPO_DIR = '/var/run/prestaging_repo' DEPLOY_BASE_DIR = dccommon_consts.DEPLOY_DIR + '/' + SW_VERSION PRESTAGE_PREPARATION_COMPLETED_FILE = os.path.join( PRESTAGING_REPO_DIR, '.prestage_preparation_completed') PRESTAGE_PREPARATION_FAILED_FILE = os.path.join( DEPLOY_BASE_DIR, '.prestage_preparation_failed') ANSIBLE_PREPARE_PRESTAGE_PACKAGES_PLAYBOOK = \ "/usr/share/ansible/stx-ansible/playbooks/prepare_prestage_packages.yml" ANSIBLE_PRESTAGE_SUBCLOUD_PACKAGES_PLAYBOOK = \ "/usr/share/ansible/stx-ansible/playbooks/prestage_sw_packages.yml" ANSIBLE_PRESTAGE_SUBCLOUD_IMAGES_PLAYBOOK = \ "/usr/share/ansible/stx-ansible/playbooks/prestage_images.yml" ANSIBLE_PRESTAGE_INVENTORY_SUFFIX = '_prestage_inventory.yml' LAST_SW_VERSION_IN_CENTOS = "22.06" def is_deploy_status_prestage(deploy_status): return deploy_status in (consts.PRESTAGE_STATE_PREPARE, consts.PRESTAGE_STATE_PACKAGES, consts.PRESTAGE_STATE_IMAGES, consts.PRESTAGE_STATE_FAILED, consts.PRESTAGE_STATE_COMPLETE) def _get_system_controller_upgrades(): # get a cached keystone client (and token) try: os_client = OpenStackDriver( region_name=dccommon_consts.SYSTEM_CONTROLLER_NAME, region_clients=None) except Exception: LOG.exception("Failed to get keystone client for %s", dccommon_consts.SYSTEM_CONTROLLER_NAME) raise ks_client = os_client.keystone_client sysinv_client = SysinvClient( dccommon_consts.SYSTEM_CONTROLLER_NAME, ks_client.session, endpoint=ks_client.endpoint_cache.get_endpoint('sysinv')) return sysinv_client.get_upgrades() def is_system_controller_upgrading(): return len(_get_system_controller_upgrades()) != 0 def global_prestage_validate(payload): """Global prestage validation (not subcloud-specific)""" if is_system_controller_upgrading(): raise exceptions.PrestagePreCheckFailedException( subcloud=dccommon_consts.SYSTEM_CONTROLLER_NAME, details='Prestage operations not allowed while system' ' controller upgrade is in progress.') if ('sysadmin_password' not in payload or payload['sysadmin_password'] is None or payload['sysadmin_password'] == ''): raise exceptions.PrestagePreCheckFailedException( subcloud=None, orch_skip=False, details="Missing required parameter 'sysadmin_password'") # Ensure we can decode the sysadmin_password # (we decode again when running ansible) try: base64.b64decode(payload['sysadmin_password']).decode('utf-8') except Exception as ex: raise exceptions.PrestagePreCheckFailedException( subcloud=None, orch_skip=False, details="Failed to decode subcloud sysadmin_password," " verify the password is base64 encoded." " Details: %s" % ex) def initial_subcloud_validate(subcloud): """Basic validation a subcloud prestage operation. Raises a PrestageCheckFailedException on failure. """ LOG.debug("Validating subcloud prestage '%s'", subcloud.name) if subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Subcloud is offline.") if subcloud.management_state != dccommon_consts.MANAGEMENT_MANAGED: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Subcloud is not managed.") allowed_deploy_states = [consts.DEPLOY_STATE_DONE, consts.PRESTAGE_STATE_FAILED, consts.PRESTAGE_STATE_COMPLETE] if subcloud.deploy_status not in allowed_deploy_states: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Prestage operation is only allowed while" " subcloud deploy status is one of: %s." " The current deploy status is %s." % (', '.join(allowed_deploy_states), subcloud.deploy_status)) def validate_prestage(subcloud, payload): """Validate a subcloud prestage operation. Prestage conditions validation - Subcloud exists - Subcloud is an AIO-SX - Subcloud is online - Subcloud is managed - Subcloud has no management-affecting alarms (unless force=true) Raises a PrestageCheckFailedException on failure. """ LOG.debug("Validating subcloud prestage '%s'", subcloud.name) # re-run the initial validation initial_subcloud_validate(subcloud) subcloud_type, system_health, oam_floating_ip = \ _get_prestage_subcloud_info(subcloud.name) if subcloud_type != consts.SYSTEM_MODE_SIMPLEX: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Prestage operation is only accepted for a simplex" " subcloud.") if (not payload['force'] and not utils.pre_check_management_affected_alarm(system_health)): raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=False, details="Subcloud has management affecting alarm(s)." " Please resolve the alarm condition(s)" " or use --force option and try again.") return oam_floating_ip @utils.synchronized('prestage-prepare-cleanup', external=True) def cleanup_failed_preparation(): """Remove the preparation failed file if it exists from a previous run""" if os.path.exists(PRESTAGE_PREPARATION_FAILED_FILE): LOG.debug("Cleanup: removing %s", PRESTAGE_PREPARATION_FAILED_FILE) os.remove(PRESTAGE_PREPARATION_FAILED_FILE) def prestage_start(context, subcloud_id): subcloud = db_api.subcloud_update( context, subcloud_id, deploy_status=consts.PRESTAGE_STATE_PREPARE) return subcloud def prestage_complete(context, subcloud_id): db_api.subcloud_update( context, subcloud_id, deploy_status=consts.PRESTAGE_STATE_COMPLETE) def prestage_fail(context, subcloud_id): db_api.subcloud_update( context, subcloud_id, deploy_status=consts.PRESTAGE_STATE_FAILED) def is_upgrade(subcloud_version): return SW_VERSION != subcloud_version def prestage_subcloud(context, payload): """Subcloud prestaging This is the standalone (not orchestrated) prestage implementation. 4 phases: 1. Prestage validation (already done by this point) - Subcloud exists, is online, is managed, is AIO-SX - Subcloud has no management-affecting alarms (unless force is given) 2. Packages preparation - prestage-prepare-packages.sh 3. Packages prestaging - run prestage_packages.yml ansible playbook 4. Images prestaging - run prestage_images.yml ansible playbook """ subcloud_name = payload['subcloud_name'] LOG.info("Prestaging subcloud: %s, force=%s" % (subcloud_name, payload['force'])) try: subcloud = db_api.subcloud_get_by_name(context, subcloud_name) except exceptions.SubcloudNameNotFound: LOG.info("Prestage validation failure: " "subcloud '%s' does not exist", subcloud_name) raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud_name, details="Subcloud does not exist") cleanup_failed_preparation() subcloud = prestage_start(context, subcloud.id) try: apply_thread = threading.Thread( target=_prestage_standalone_thread, args=(context, subcloud, payload)) apply_thread.start() return db_api.subcloud_db_model_to_dict(subcloud) except Exception: LOG.exception("Subcloud prestaging failed %s" % subcloud_name) prestage_fail(context, subcloud.id) def _sync_run_prestage_prepare_packages(context, subcloud, payload): """Run prepare prestage packages ansible script.""" if os.path.exists(PRESTAGE_PREPARATION_FAILED_FILE): LOG.warn("Subcloud %s prestage preparation aborted due to " "previous %s failure", subcloud.name, consts.PRESTAGE_STATE_PREPARE) raise Exception("Aborted due to previous %s failure" % consts.PRESTAGE_STATE_PREPARE) LOG.info("Running prepare prestage ansible script, version=%s " "(subcloud_id=%s)", SW_VERSION, subcloud.id) db_api.subcloud_update(context, subcloud.id, deploy_status=consts.PRESTAGE_STATE_PREPARE) # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = \ utils.get_ansible_filename(subcloud.name, ANSIBLE_PRESTAGE_INVENTORY_SUFFIX) extra_vars_str = "current_software_version=%s previous_software_version=%s" \ % (SW_VERSION, subcloud.software_version) try: _run_ansible(context, ["ansible-playbook", ANSIBLE_PREPARE_PRESTAGE_PACKAGES_PLAYBOOK, "--inventory", ansible_subcloud_inventory_file, "--extra-vars", extra_vars_str], "prepare", subcloud, consts.PRESTAGE_STATE_PREPARE, payload['sysadmin_password'], payload['oam_floating_ip'], ansible_subcloud_inventory_file, consts.PRESTAGE_PREPARE_TIMEOUT) except Exception: # Flag the failure on file system so that other orchestrated # strategy steps in this run fail immediately. This file is # removed at the start of each orchestrated/standalone run. # This creates the file if it doesn't exist: with open(PRESTAGE_PREPARATION_FAILED_FILE, 'a'): pass raise LOG.info("Prepare prestage ansible successful") # TODO(Shrikumar): Cleanup this function, especially the comparison for # software versions. # Rationale: In CentOS, prestage_prepare is required; in Debian, it is not. @utils.synchronized('prestage-prepare-packages', external=True) def prestage_prepare(context, subcloud, payload): """Run the prepare prestage packages playbook if required.""" if SW_VERSION > LAST_SW_VERSION_IN_CENTOS: LOG.info("Skipping prestage package preparation in Debian") return if is_upgrade(subcloud.software_version): if not os.path.exists(PRESTAGE_PREPARATION_COMPLETED_FILE): _sync_run_prestage_prepare_packages(context, subcloud, payload) else: LOG.info( "Skipping prestage package preparation (not required)") else: LOG.info("Skipping prestage package preparation (reinstall)") def _prestage_standalone_thread(context, subcloud, payload): """Run the prestage operations inside a separate thread""" try: prestage_prepare(context, subcloud, payload) prestage_packages(context, subcloud, payload) prestage_images(context, subcloud, payload) prestage_complete(context, subcloud.id) LOG.info("Prestage complete: %s", subcloud.name) except Exception: prestage_fail(context, subcloud.id) raise def _get_prestage_subcloud_info(subcloud_name): """Retrieve prestage data from the subcloud. Pull all required data here in order to minimize keystone/sysinv client interactions. """ try: os_client = OpenStackDriver(region_name=subcloud_name, region_clients=None) keystone_client = os_client.keystone_client endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv') sysinv_client = SysinvClient(subcloud_name, keystone_client.session, endpoint=endpoint) mode = sysinv_client.get_system().system_mode health = sysinv_client.get_system_health() oam_floating_ip = sysinv_client.get_oam_addresses().oam_floating_ip return mode, health, oam_floating_ip except Exception as e: LOG.exception(e) raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud_name, details="Failed to retrieve subcloud system mode and system health.") def _run_ansible(context, prestage_command, phase, subcloud, deploy_status, sysadmin_password, oam_floating_ip, ansible_subcloud_inventory_file, timeout_seconds=None): if not timeout_seconds: # We always want to set a timeout in prestaging operations: timeout_seconds = CONF.playbook_timeout if deploy_status == consts.PRESTAGE_STATE_PREPARE: LOG.info(("Preparing prestage shared packages for subcloud: %s, " "version: %s, timeout: %ss"), subcloud.name, SW_VERSION, timeout_seconds) else: LOG.info("Prestaging %s for subcloud: %s, version: %s, timeout: %ss", phase, subcloud.name, SW_VERSION, timeout_seconds) db_api.subcloud_update(context, subcloud.id, deploy_status=deploy_status) log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \ '_playbook_output.log' # Create the ansible inventory for the new subcloud utils.create_subcloud_inventory_with_admin_creds( subcloud.name, ansible_subcloud_inventory_file, oam_floating_ip, ansible_pass=base64.b64decode(sysadmin_password).decode('utf-8')) try: run_playbook(log_file, prestage_command, timeout=timeout_seconds, register_cleanup=True) except PlaybookExecutionFailed as ex: timeout_msg = '' if isinstance(ex, PlaybookExecutionTimeout): timeout_msg = ' (TIMEOUT)' msg = ("Prestaging %s failed%s for subcloud %s," " check individual log at %s for detailed output." % (phase, timeout_msg, subcloud.name, log_file)) LOG.exception("%s: %s", msg, ex) raise Exception(msg) finally: utils.delete_subcloud_inventory(ansible_subcloud_inventory_file) LOG.info("Prestage %s successful for subcloud %s", phase, subcloud.name) def prestage_packages(context, subcloud, payload): """Run the prestage packages ansible script.""" # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = \ utils.get_ansible_filename(subcloud.name, ANSIBLE_PRESTAGE_INVENTORY_SUFFIX) extra_vars_str = "software_version=%s" % SW_VERSION _run_ansible(context, ["ansible-playbook", ANSIBLE_PRESTAGE_SUBCLOUD_PACKAGES_PLAYBOOK, "--inventory", ansible_subcloud_inventory_file, "--extra-vars", extra_vars_str], "packages", subcloud, consts.PRESTAGE_STATE_PACKAGES, payload['sysadmin_password'], payload['oam_floating_ip'], ansible_subcloud_inventory_file) def prestage_images(context, subcloud, payload): """Run the prestage images ansible script. Approach: If the prestage images file has been uploaded for the target software version then pass the image_list_file to the prestage_images.yml playbook If the images file does not exist and the prestage is for upgrade, skip calling prestage_images.yml playbook. Ensure the final state is either prestage-failed or prestage-complete regardless of whether prestage_images.yml playbook is executed or skipped. """ upgrade = is_upgrade(subcloud.software_version) extra_vars_str = "software_version=%s" % SW_VERSION image_list_file = None if upgrade: image_list_filename = utils.get_filename_by_prefix(DEPLOY_BASE_DIR, 'prestage_images') if image_list_filename: image_list_file = os.path.join(DEPLOY_BASE_DIR, image_list_filename) # include this file in the ansible args: extra_vars_str += (" image_list_file=%s" % image_list_file) LOG.debug("prestage images list file: %s", image_list_file) else: LOG.debug("prestage images list file does not exist") # There are only two scenarios where we want to run ansible # for prestaging images: # 1. reinstall # 2. upgrade, with supplied image list if not upgrade or (upgrade and image_list_file): # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = \ utils.get_ansible_filename(subcloud.name, ANSIBLE_PRESTAGE_INVENTORY_SUFFIX) _run_ansible(context, ["ansible-playbook", ANSIBLE_PRESTAGE_SUBCLOUD_IMAGES_PLAYBOOK, "--inventory", ansible_subcloud_inventory_file, "--extra-vars", extra_vars_str], "images", subcloud, consts.PRESTAGE_STATE_IMAGES, payload['sysadmin_password'], payload['oam_floating_ip'], ansible_subcloud_inventory_file, timeout_seconds=CONF.playbook_timeout * 2) else: LOG.info("Skipping ansible prestage images step, upgrade: %s," " image_list_file: %s", upgrade, image_list_file)