# Copyright (c) 2022-2023 Wind River Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. # """ Common prestaging operations. These are shared across dcmanager (SubcloudManager) and orchestration. """ import base64 import os import threading from oslo_config import cfg from oslo_log import log as logging from tsconfig.tsconfig import SW_VERSION from dccommon import consts as dccommon_consts from dccommon.drivers.openstack.sdk_platform import OpenStackDriver from dccommon.drivers.openstack.sysinv_v1 import SysinvClient from dccommon.exceptions import PlaybookExecutionFailed from dccommon.exceptions import PlaybookExecutionTimeout from dccommon.utils import run_playbook from dcmanager.common import consts from dcmanager.common import exceptions from dcmanager.common import utils from dcmanager.db import api as db_api LOG = logging.getLogger(__name__) CONF = cfg.CONF DEPLOY_BASE_DIR = dccommon_consts.DEPLOY_DIR ANSIBLE_PRESTAGE_SUBCLOUD_PACKAGES_PLAYBOOK = \ "/usr/share/ansible/stx-ansible/playbooks/prestage_sw_packages.yml" ANSIBLE_PRESTAGE_SUBCLOUD_IMAGES_PLAYBOOK = \ "/usr/share/ansible/stx-ansible/playbooks/prestage_images.yml" ANSIBLE_PRESTAGE_INVENTORY_SUFFIX = '_prestage_inventory.yml' def is_deploy_status_prestage(deploy_status): return deploy_status in (consts.PRESTAGE_STATE_PACKAGES, consts.PRESTAGE_STATE_IMAGES, consts.PRESTAGE_STATE_FAILED, consts.PRESTAGE_STATE_COMPLETE) def _get_system_controller_upgrades(): # get a cached keystone client (and token) try: os_client = OpenStackDriver( region_name=dccommon_consts.SYSTEM_CONTROLLER_NAME, region_clients=None) except Exception: LOG.exception("Failed to get keystone client for %s", dccommon_consts.SYSTEM_CONTROLLER_NAME) raise ks_client = os_client.keystone_client sysinv_client = SysinvClient( dccommon_consts.SYSTEM_CONTROLLER_NAME, ks_client.session, endpoint=ks_client.endpoint_cache.get_endpoint('sysinv')) return sysinv_client.get_upgrades() def is_system_controller_upgrading(): return len(_get_system_controller_upgrades()) != 0 def global_prestage_validate(payload): """Global prestage validation (not subcloud-specific)""" if is_system_controller_upgrading(): raise exceptions.PrestagePreCheckFailedException( subcloud=dccommon_consts.SYSTEM_CONTROLLER_NAME, details='Prestage operations not allowed while system' ' controller upgrade is in progress.') if ('sysadmin_password' not in payload or payload['sysadmin_password'] is None or payload['sysadmin_password'] == ''): raise exceptions.PrestagePreCheckFailedException( subcloud=None, orch_skip=False, details="Missing required parameter 'sysadmin_password'") # Ensure we can decode the sysadmin_password # (we decode again when running ansible) try: base64.b64decode(payload['sysadmin_password']).decode('utf-8') except Exception as ex: raise exceptions.PrestagePreCheckFailedException( subcloud=None, orch_skip=False, details="Failed to decode subcloud sysadmin_password," " verify the password is base64 encoded." " Details: %s" % ex) def initial_subcloud_validate(subcloud, installed_loads, software_version): """Basic validation a subcloud prestage operation. Raises a PrestageCheckFailedException on failure. """ LOG.debug("Validating subcloud prestage '%s'", subcloud.name) if subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Subcloud is offline.") if subcloud.management_state != dccommon_consts.MANAGEMENT_MANAGED: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Subcloud is not managed.") if subcloud.backup_status in consts.STATES_FOR_ONGOING_BACKUP: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Prestage operation is not allowed while" " backup is in progress.") allowed_deploy_states = [consts.DEPLOY_STATE_DONE, consts.PRESTAGE_STATE_FAILED, consts.PRESTAGE_STATE_COMPLETE] if subcloud.deploy_status not in allowed_deploy_states: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Prestage operation is only allowed while" " subcloud deploy status is one of: %s." " The current deploy status is %s." % (', '.join(allowed_deploy_states), subcloud.deploy_status)) # The request software version must be either the same as the software version # of the subcloud or any active/inactive/imported load on the system controller # (can be checked with "system load-list" command). if software_version and \ software_version != subcloud.software_version and \ software_version not in installed_loads: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Specified release is not supported. " "%s version must first be imported" % software_version) def validate_prestage(subcloud, payload): """Validate a subcloud prestage operation. Prestage conditions validation - Subcloud exists - Subcloud is an AIO-SX - Subcloud is online - Subcloud is managed - Subcloud backup operation is not in progress - Subcloud has no management-affecting alarms (unless force=true) Raises a PrestageCheckFailedException on failure. """ LOG.debug("Validating subcloud prestage '%s'", subcloud.name) installed_loads = [] software_version = None if payload.get(consts.PRESTAGE_REQUEST_RELEASE): software_version = payload.get(consts.PRESTAGE_REQUEST_RELEASE) installed_loads = utils.get_systemcontroller_installed_loads() # re-run the initial validation initial_subcloud_validate(subcloud, installed_loads, software_version) subcloud_type, system_health, oam_floating_ip = \ _get_prestage_subcloud_info(subcloud) if subcloud_type != consts.SYSTEM_MODE_SIMPLEX: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Prestage operation is only accepted for a simplex" " subcloud.") if (not payload['force'] and not utils.pre_check_management_affected_alarm(system_health)): raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=False, details="Subcloud has management affecting alarm(s)." " Please resolve the alarm condition(s)" " or use --force option and try again.") return oam_floating_ip def prestage_start(context, subcloud_id): subcloud = db_api.subcloud_update( context, subcloud_id, deploy_status=consts.PRESTAGE_STATE_PACKAGES) return subcloud def prestage_complete(context, subcloud_id): db_api.subcloud_update( context, subcloud_id, deploy_status=consts.PRESTAGE_STATE_COMPLETE) def prestage_fail(context, subcloud_id): db_api.subcloud_update( context, subcloud_id, deploy_status=consts.PRESTAGE_STATE_FAILED) def is_local(subcloud_version, specified_version): return subcloud_version == specified_version def prestage_subcloud(context, payload): """Subcloud prestaging This is the standalone (not orchestrated) prestage implementation. 3 phases: 1. Prestage validation (already done by this point) - Subcloud exists, is online, is managed, is AIO-SX - Subcloud has no management-affecting alarms (unless force is given) 2. Packages prestaging - run prestage_packages.yml ansible playbook 3. Images prestaging - run prestage_images.yml ansible playbook """ subcloud_name = payload['subcloud_name'] LOG.info("Prestaging subcloud: %s, force=%s" % (subcloud_name, payload['force'])) try: subcloud = db_api.subcloud_get_by_name(context, subcloud_name) except exceptions.SubcloudNameNotFound: LOG.info("Prestage validation failure: " "subcloud '%s' does not exist", subcloud_name) raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud_name, details="Subcloud does not exist") subcloud = prestage_start(context, subcloud.id) try: apply_thread = threading.Thread( target=_prestage_standalone_thread, args=(context, subcloud, payload)) apply_thread.start() return db_api.subcloud_db_model_to_dict(subcloud) except Exception: LOG.exception("Subcloud prestaging failed %s" % subcloud_name) prestage_fail(context, subcloud.id) def _prestage_standalone_thread(context, subcloud, payload): """Run the prestage operations inside a separate thread""" try: prestage_packages(context, subcloud, payload) prestage_images(context, subcloud, payload) prestage_complete(context, subcloud.id) LOG.info("Prestage complete: %s", subcloud.name) except Exception: prestage_fail(context, subcloud.id) raise def _get_prestage_subcloud_info(subcloud): """Retrieve prestage data from the subcloud. Pull all required data here in order to minimize keystone/sysinv client interactions. """ try: os_client = OpenStackDriver(region_name=subcloud.region_name, region_clients=None) keystone_client = os_client.keystone_client endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv') sysinv_client = SysinvClient(subcloud.region_name, keystone_client.session, endpoint=endpoint) mode = sysinv_client.get_system().system_mode health = sysinv_client.get_system_health() oam_floating_ip = sysinv_client.get_oam_addresses().oam_floating_ip return mode, health, oam_floating_ip except Exception as e: LOG.exception(e) raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, details="Failed to retrieve subcloud system mode and system health.") def _run_ansible(context, prestage_command, phase, subcloud, deploy_status, sysadmin_password, oam_floating_ip, software_version, ansible_subcloud_inventory_file, timeout_seconds=None): if not timeout_seconds: # We always want to set a timeout in prestaging operations: timeout_seconds = CONF.playbook_timeout LOG.info("Prestaging %s for subcloud: %s, version: %s, timeout: %ss", phase, subcloud.name, software_version, timeout_seconds) db_api.subcloud_update(context, subcloud.id, deploy_status=deploy_status) log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \ '_playbook_output.log' # Create the ansible inventory for the new subcloud utils.create_subcloud_inventory_with_admin_creds( subcloud.name, ansible_subcloud_inventory_file, oam_floating_ip, ansible_pass=utils.decode_and_normalize_passwd(sysadmin_password)) try: run_playbook(log_file, prestage_command, timeout=timeout_seconds, register_cleanup=True) except PlaybookExecutionFailed as ex: timeout_msg = '' if isinstance(ex, PlaybookExecutionTimeout): timeout_msg = ' (TIMEOUT)' msg = ("Prestaging %s failed%s for subcloud %s," " check individual log at %s for detailed output." % (phase, timeout_msg, subcloud.name, log_file)) LOG.exception("%s: %s", msg, ex) raise Exception(msg) finally: utils.delete_subcloud_inventory(ansible_subcloud_inventory_file) LOG.info("Prestage %s successful for subcloud %s", phase, subcloud.name) def prestage_packages(context, subcloud, payload): """Run the prestage packages ansible script.""" # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = \ utils.get_ansible_filename(subcloud.name, ANSIBLE_PRESTAGE_INVENTORY_SUFFIX) prestage_software_version = payload.get( consts.PRESTAGE_REQUEST_RELEASE, SW_VERSION) extra_vars_str = "software_version=%s" % prestage_software_version _run_ansible(context, ["ansible-playbook", ANSIBLE_PRESTAGE_SUBCLOUD_PACKAGES_PLAYBOOK, "--inventory", ansible_subcloud_inventory_file, "--extra-vars", extra_vars_str], "packages", subcloud, consts.PRESTAGE_STATE_PACKAGES, payload['sysadmin_password'], payload['oam_floating_ip'], prestage_software_version, ansible_subcloud_inventory_file) def prestage_images(context, subcloud, payload): """Run the prestage images ansible script. If the prestage images file has been uploaded, include the fully qualified path name in the extra vars before invoking the prestage_images.yml playbook. If the prestage images file has not been uploaded, only proceed with images prestage if the prestage source is local. Ensure the final state is either prestage-failed or prestage-complete regardless of whether prestage_images.yml playbook is executed or skipped. """ prestage_software_version = payload.get( consts.PRESTAGE_REQUEST_RELEASE, SW_VERSION) extra_vars_str = "software_version=%s" % prestage_software_version image_list_filename = None deploy_dir = os.path.join(DEPLOY_BASE_DIR, prestage_software_version) if os.path.isdir(deploy_dir): image_list_filename = utils.get_filename_by_prefix(deploy_dir, 'prestage_images') if image_list_filename: image_list_file = os.path.join(deploy_dir, image_list_filename) # include this file in the ansible args: extra_vars_str += (" image_list_file=%s" % image_list_file) LOG.debug("prestage images list file: %s", image_list_file) else: LOG.debug("prestage images list file does not exist") if prestage_software_version != subcloud.software_version: # Prestage source is remote but there is no images list file so # skip the images prestage. LOG.info("Images prestage is skipped for %s as the prestage images " "list for release %s has not been uploaded and the " "subcloud is running a different load than %s." % (subcloud.name, prestage_software_version, prestage_software_version)) return # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = \ utils.get_ansible_filename(subcloud.name, ANSIBLE_PRESTAGE_INVENTORY_SUFFIX) _run_ansible(context, ["ansible-playbook", ANSIBLE_PRESTAGE_SUBCLOUD_IMAGES_PLAYBOOK, "--inventory", ansible_subcloud_inventory_file, "--extra-vars", extra_vars_str], "images", subcloud, consts.PRESTAGE_STATE_IMAGES, payload['sysadmin_password'], payload['oam_floating_ip'], prestage_software_version, ansible_subcloud_inventory_file, timeout_seconds=CONF.playbook_timeout * 2)