# Copyright (c) 2022-2024 Wind River Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. # """ Common prestaging operations. These are shared across dcmanager (SubcloudManager) and orchestration. """ import base64 import os import threading from oslo_config import cfg from oslo_log import log as logging from tsconfig.tsconfig import SW_VERSION from dccommon import consts as dccommon_consts from dccommon.drivers.openstack.sdk_platform import OpenStackDriver from dccommon.drivers.openstack.sysinv_v1 import SysinvClient from dccommon.exceptions import PlaybookExecutionFailed from dccommon.exceptions import PlaybookExecutionTimeout from dccommon.utils import AnsiblePlaybook from dcmanager.common import consts from dcmanager.common import exceptions from dcmanager.common import utils from dcmanager.db import api as db_api LOG = logging.getLogger(__name__) CONF = cfg.CONF DEPLOY_BASE_DIR = dccommon_consts.DEPLOY_DIR ANSIBLE_PRESTAGE_SUBCLOUD_PACKAGES_PLAYBOOK = \ "/usr/share/ansible/stx-ansible/playbooks/prestage_sw_packages.yml" ANSIBLE_PRESTAGE_SUBCLOUD_IMAGES_PLAYBOOK = \ "/usr/share/ansible/stx-ansible/playbooks/prestage_images.yml" ANSIBLE_PRESTAGE_INVENTORY_SUFFIX = '_prestage_inventory.yml' PRINT_PRESTAGE_VERSIONS_TASK = \ 'prestage\/prestage-versions : Print prestage versions' PRESTAGE_VERSIONS_KEY_STR = 'prestage_versions:' def _get_system_controller_upgrades(): # get a cached keystone client (and token) try: os_client = OpenStackDriver( region_name=dccommon_consts.SYSTEM_CONTROLLER_NAME, region_clients=None) except Exception: LOG.exception("Failed to get keystone client for %s", dccommon_consts.SYSTEM_CONTROLLER_NAME) raise ks_client = os_client.keystone_client sysinv_client = SysinvClient( dccommon_consts.SYSTEM_CONTROLLER_NAME, ks_client.session, endpoint=ks_client.endpoint_cache.get_endpoint('sysinv')) return sysinv_client.get_upgrades() def is_system_controller_upgrading(): return len(_get_system_controller_upgrades()) != 0 def global_prestage_validate(payload): """Global prestage validation (not subcloud-specific)""" if is_system_controller_upgrading(): raise exceptions.PrestagePreCheckFailedException( subcloud=dccommon_consts.SYSTEM_CONTROLLER_NAME, details='Prestage operations not allowed while system' ' controller upgrade is in progress.') if ('sysadmin_password' not in payload or payload['sysadmin_password'] is None or payload['sysadmin_password'] == ''): raise exceptions.PrestagePreCheckFailedException( subcloud=None, orch_skip=False, details="Missing required parameter 'sysadmin_password'") # Ensure we can decode the sysadmin_password # (we decode again when running ansible) try: base64.b64decode(payload['sysadmin_password']).decode('utf-8') except Exception as ex: raise exceptions.PrestagePreCheckFailedException( subcloud=None, orch_skip=False, details="Failed to decode subcloud sysadmin_password," " verify the password is base64 encoded." " Details: %s" % ex) def initial_subcloud_validate(subcloud, installed_loads, software_version): """Basic validation a subcloud prestage operation. Raises a PrestageCheckFailedException on failure. """ LOG.debug("Validating subcloud prestage '%s'", subcloud.name) if subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Subcloud is offline.") if subcloud.management_state != dccommon_consts.MANAGEMENT_MANAGED: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Subcloud is not managed.") if subcloud.backup_status in consts.STATES_FOR_ONGOING_BACKUP: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Prestage operation is not allowed while" " backup is in progress.") if subcloud.deploy_status != consts.DEPLOY_STATE_DONE: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Prestage operation is not allowed when" " subcloud deploy is not completed.") allowed_prestage_states = [consts.PRESTAGE_STATE_FAILED, consts.PRESTAGE_STATE_COMPLETE] if (subcloud.prestage_status and (subcloud.prestage_status not in allowed_prestage_states)): raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Prestage operation is only allowed while" " subcloud prestage status is one of: %s." " The current prestage status is %s." % (', '.join(allowed_prestage_states), subcloud.prestage_status)) # The request software version must be either the same as the software version # of the subcloud or any active/inactive/imported load on the system controller # (can be checked with "system load-list" command). if software_version and \ software_version != subcloud.software_version and \ software_version not in installed_loads: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Specified release is not supported. " "%s version must first be imported" % software_version) def validate_prestage(subcloud, payload): """Validate a subcloud prestage operation. Prestage conditions validation - Subcloud exists - Subcloud is an AIO-SX - Subcloud is online - Subcloud is managed - Subcloud backup operation is not in progress - Subcloud has no management-affecting alarms (unless force=true) Raises a PrestageCheckFailedException on failure. """ LOG.debug("Validating subcloud prestage '%s'", subcloud.name) installed_loads = [] software_version = None if payload.get(consts.PRESTAGE_REQUEST_RELEASE): software_version = payload.get(consts.PRESTAGE_REQUEST_RELEASE) installed_loads = utils.get_systemcontroller_installed_loads() # re-run the initial validation initial_subcloud_validate(subcloud, installed_loads, software_version) subcloud_type, system_health, oam_floating_ip = \ _get_prestage_subcloud_info(subcloud) if subcloud_type != consts.SYSTEM_MODE_SIMPLEX: raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=True, details="Prestage operation is only accepted for a simplex" " subcloud.") if (not payload['force'] and not utils.pre_check_management_affected_alarm(system_health)): raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, orch_skip=False, details="Subcloud has management affecting alarm(s)." " Please resolve the alarm condition(s)" " or use --force option and try again.") return oam_floating_ip def prestage_start(context, subcloud_id): subcloud = db_api.subcloud_update( context, subcloud_id, prestage_status=consts.PRESTAGE_STATE_PACKAGES) return subcloud def prestage_complete(context, subcloud_id, prestage_versions): db_api.subcloud_update( context, subcloud_id, prestage_status=consts.PRESTAGE_STATE_COMPLETE, prestage_versions=prestage_versions) def prestage_fail(context, subcloud_id): db_api.subcloud_update( context, subcloud_id, prestage_status=consts.PRESTAGE_STATE_FAILED) def is_local(subcloud_version, specified_version): return subcloud_version == specified_version def prestage_subcloud(context, payload): """Subcloud prestaging This is the standalone (not orchestrated) prestage implementation. 3 phases: 1. Prestage validation (already done by this point) - Subcloud exists, is online, is managed, is AIO-SX - Subcloud has no management-affecting alarms (unless force is given) 2. Packages prestaging - run prestage_packages.yml ansible playbook 3. Images prestaging - run prestage_images.yml ansible playbook """ subcloud_name = payload['subcloud_name'] LOG.info("Prestaging subcloud: %s, force=%s" % (subcloud_name, payload['force'])) try: subcloud = db_api.subcloud_get_by_name(context, subcloud_name) except exceptions.SubcloudNameNotFound: LOG.info("Prestage validation failure: " "subcloud '%s' does not exist", subcloud_name) raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud_name, details="Subcloud does not exist") subcloud = prestage_start(context, subcloud.id) try: apply_thread = threading.Thread( target=_prestage_standalone_thread, args=(context, subcloud, payload)) apply_thread.start() return db_api.subcloud_db_model_to_dict(subcloud) except Exception: LOG.exception("Subcloud prestaging failed %s" % subcloud_name) prestage_fail(context, subcloud.id) def _prestage_standalone_thread(context, subcloud, payload): """Run the prestage operations inside a separate thread""" log_file = utils.get_subcloud_ansible_log_file(subcloud.name) try: prestage_packages(context, subcloud, payload) # Get the prestage versions from the logs generated by # the prestage packages playbook prestage_versions = utils.get_msg_output_info( log_file, PRINT_PRESTAGE_VERSIONS_TASK, PRESTAGE_VERSIONS_KEY_STR) prestage_images(context, subcloud, payload) prestage_complete(context, subcloud.id, prestage_versions) LOG.info("Prestage complete: %s", subcloud.name) except Exception: prestage_fail(context, subcloud.id) raise def _get_prestage_subcloud_info(subcloud): """Retrieve prestage data from the subcloud. Pull all required data here in order to minimize keystone/sysinv client interactions. """ try: os_client = OpenStackDriver(region_name=subcloud.region_name, region_clients=None) keystone_client = os_client.keystone_client endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv') sysinv_client = SysinvClient(subcloud.region_name, keystone_client.session, endpoint=endpoint) mode = sysinv_client.get_system().system_mode health = sysinv_client.get_system_health() oam_floating_ip = sysinv_client.get_oam_addresses().oam_floating_ip return mode, health, oam_floating_ip except Exception as e: LOG.exception(e) raise exceptions.PrestagePreCheckFailedException( subcloud=subcloud.name, details="Failed to retrieve subcloud system mode and system health.") def _run_ansible(context, prestage_command, phase, subcloud, prestage_status, sysadmin_password, oam_floating_ip, software_version, ansible_subcloud_inventory_file, timeout_seconds=None): if not timeout_seconds: # We always want to set a timeout in prestaging operations: timeout_seconds = CONF.playbook_timeout LOG.info("Prestaging %s for subcloud: %s, version: %s, timeout: %ss", phase, subcloud.name, software_version, timeout_seconds) db_api.subcloud_update(context, subcloud.id, prestage_status=prestage_status) # Create the ansible inventory for the new subcloud utils.create_subcloud_inventory_with_admin_creds( subcloud.name, ansible_subcloud_inventory_file, oam_floating_ip, ansible_pass=utils.decode_and_normalize_passwd(sysadmin_password)) log_file = utils.get_subcloud_ansible_log_file(subcloud.name) try: ansible = AnsiblePlaybook(subcloud.name) ansible.run_playbook(log_file, prestage_command, timeout=timeout_seconds, register_cleanup=True) except PlaybookExecutionFailed as ex: timeout_msg = '' if isinstance(ex, PlaybookExecutionTimeout): timeout_msg = ' (TIMEOUT)' msg = ("Prestaging %s failed%s for subcloud %s," " check individual log at %s for detailed output." % (phase, timeout_msg, subcloud.name, log_file)) LOG.exception("%s: %s", msg, ex) raise Exception(msg) finally: utils.delete_subcloud_inventory(ansible_subcloud_inventory_file) LOG.info("Prestage %s successful for subcloud %s", phase, subcloud.name) def prestage_packages(context, subcloud, payload): """Run the prestage packages ansible script.""" # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = \ utils.get_ansible_filename(subcloud.name, ANSIBLE_PRESTAGE_INVENTORY_SUFFIX) prestage_software_version = payload.get( consts.PRESTAGE_REQUEST_RELEASE, SW_VERSION) extra_vars_str = "software_version=%s" % prestage_software_version _run_ansible(context, ["ansible-playbook", ANSIBLE_PRESTAGE_SUBCLOUD_PACKAGES_PLAYBOOK, "--inventory", ansible_subcloud_inventory_file, "--extra-vars", extra_vars_str], "packages", subcloud, consts.PRESTAGE_STATE_PACKAGES, payload['sysadmin_password'], payload['oam_floating_ip'], prestage_software_version, ansible_subcloud_inventory_file) def prestage_images(context, subcloud, payload): """Run the prestage images ansible script. If the prestage images file has been uploaded, include the fully qualified path name in the extra vars before invoking the prestage_images.yml playbook. If the prestage images file has not been uploaded, only proceed with images prestage if the prestage source is local. Ensure the final state is either prestage-failed or prestage-complete regardless of whether prestage_images.yml playbook is executed or skipped. """ prestage_software_version = payload.get( consts.PRESTAGE_REQUEST_RELEASE, SW_VERSION) extra_vars_str = "software_version=%s" % prestage_software_version image_list_filename = None deploy_dir = os.path.join(DEPLOY_BASE_DIR, prestage_software_version) if os.path.isdir(deploy_dir): image_list_filename = utils.get_filename_by_prefix(deploy_dir, 'prestage_images') if image_list_filename: image_list_file = os.path.join(deploy_dir, image_list_filename) # include this file in the ansible args: extra_vars_str += (" image_list_file=%s" % image_list_file) LOG.debug("prestage images list file: %s", image_list_file) else: LOG.debug("prestage images list file does not exist") if prestage_software_version != subcloud.software_version: # Prestage source is remote but there is no images list file so # skip the images prestage. LOG.info("Images prestage is skipped for %s as the prestage images " "list for release %s has not been uploaded and the " "subcloud is running a different load than %s." % (subcloud.name, prestage_software_version, prestage_software_version)) return # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = \ utils.get_ansible_filename(subcloud.name, ANSIBLE_PRESTAGE_INVENTORY_SUFFIX) _run_ansible(context, ["ansible-playbook", ANSIBLE_PRESTAGE_SUBCLOUD_IMAGES_PLAYBOOK, "--inventory", ansible_subcloud_inventory_file, "--extra-vars", extra_vars_str], "images", subcloud, consts.PRESTAGE_STATE_IMAGES, payload['sysadmin_password'], payload['oam_floating_ip'], prestage_software_version, ansible_subcloud_inventory_file, timeout_seconds=CONF.playbook_timeout * 2)