# Copyright 2017 Ericsson AB. # Copyright (c) 2017-2023 Wind River Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. # from __future__ import division import collections import datetime import filecmp import functools import json import os import shutil import threading import time from eventlet import greenpool from fm_api import constants as fm_const from fm_api import fm_api import keyring import netaddr from oslo_log import log as logging from oslo_messaging import RemoteError from tsconfig.tsconfig import CONFIG_PATH from tsconfig.tsconfig import SW_VERSION from dccommon import consts as dccommon_consts from dccommon.drivers.openstack.sdk_platform import OpenStackDriver from dccommon.drivers.openstack.sysinv_v1 import SysinvClient from dccommon.exceptions import PlaybookExecutionFailed from dccommon import kubeoperator from dccommon.subcloud_install import SubcloudInstall from dccommon.subcloud_install import SubcloudShutdown from dccommon.utils import run_playbook from dccommon.utils import RunAnsible from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client from dcmanager.common import consts from dcmanager.common.consts import INVENTORY_FILE_POSTFIX from dcmanager.common import context as dcmanager_context from dcmanager.common import exceptions from dcmanager.common.exceptions import DCManagerException from dcmanager.common.i18n import _ from dcmanager.common import manager from dcmanager.common import phased_subcloud_deploy as psd_common from dcmanager.common import prestage from dcmanager.common import utils from dcmanager.db import api as db_api from dcmanager.db.sqlalchemy.models import Subcloud from dcmanager.rpc import client as dcmanager_rpc_client from dcorch.rpc import client as dcorch_rpc_client LOG = logging.getLogger(__name__) # Name of our distributed cloud addn_hosts file for dnsmasq # to read. This file is referenced in dnsmasq.conf ADDN_HOSTS_DC = 'dnsmasq.addn_hosts_dc' # Subcloud configuration paths ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK = \ '/usr/share/ansible/stx-ansible/playbooks/create_subcloud_backup.yml' ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK = \ '/usr/share/ansible/stx-ansible/playbooks/delete_subcloud_backup.yml' ANSIBLE_SUBCLOUD_BACKUP_RESTORE_PLAYBOOK = \ '/usr/share/ansible/stx-ansible/playbooks/restore_subcloud_backup.yml' ANSIBLE_SUBCLOUD_PLAYBOOK = \ '/usr/share/ansible/stx-ansible/playbooks/bootstrap.yml' ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK = \ '/usr/share/ansible/stx-ansible/playbooks/rehome_subcloud.yml' ANSIBLE_SUBCLOUD_UPDATE_PLAYBOOK = \ '/usr/share/ansible/stx-ansible/playbooks/update_subcloud.yml' USERS_TO_REPLICATE = [ 'sysinv', 'patching', 'vim', 'mtce', 'fm', 'barbican', 'dcmanager' ] # The timeout of the rehome playbook is set to 180 seconds as it takes a # long time for privilege escalation before resetting the host route and # LDAP server address in a subcloud. REHOME_PLAYBOOK_TIMEOUT = "180" # 180 seconds UPDATE_PLAYBOOK_TIMEOUT = "180" SC_INTERMEDIATE_CERT_DURATION = "8760h" # 1 year = 24 hours x 365 SC_INTERMEDIATE_CERT_RENEW_BEFORE = "720h" # 30 days CERT_NAMESPACE = "dc-cert" TRANSITORY_STATES = { consts.DEPLOY_STATE_NONE: consts.DEPLOY_STATE_DEPLOY_PREP_FAILED, consts.DEPLOY_STATE_PRE_DEPLOY: consts.DEPLOY_STATE_DEPLOY_PREP_FAILED, consts.DEPLOY_STATE_CREATING: consts.DEPLOY_STATE_CREATE_FAILED, consts.DEPLOY_STATE_PRE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL_FAILED, consts.DEPLOY_STATE_INSTALLING: consts.DEPLOY_STATE_INSTALL_FAILED, consts.DEPLOY_STATE_PRE_BOOTSTRAP: consts.DEPLOY_STATE_PRE_BOOTSTRAP_FAILED, consts.DEPLOY_STATE_BOOTSTRAPPING: consts.DEPLOY_STATE_BOOTSTRAP_FAILED, consts.DEPLOY_STATE_PRE_CONFIG: consts.DEPLOY_STATE_PRE_CONFIG_FAILED, consts.DEPLOY_STATE_CONFIGURING: consts.DEPLOY_STATE_CONFIG_FAILED, consts.DEPLOY_STATE_DEPLOYING: consts.DEPLOY_STATE_DEPLOY_FAILED, consts.DEPLOY_STATE_ABORTING_INSTALL: consts.DEPLOY_STATE_INSTALL_FAILED, consts.DEPLOY_STATE_ABORTING_BOOTSTRAP: consts.DEPLOY_STATE_BOOTSTRAP_FAILED, consts.DEPLOY_STATE_ABORTING_CONFIG: consts.DEPLOY_STATE_CONFIG_FAILED, consts.DEPLOY_STATE_MIGRATING_DATA: consts.DEPLOY_STATE_DATA_MIGRATION_FAILED, consts.DEPLOY_STATE_PRE_RESTORE: consts.DEPLOY_STATE_RESTORE_PREP_FAILED, consts.DEPLOY_STATE_RESTORING: consts.DEPLOY_STATE_RESTORE_FAILED, consts.PRESTAGE_STATE_PACKAGES: consts.PRESTAGE_STATE_FAILED, consts.PRESTAGE_STATE_IMAGES: consts.PRESTAGE_STATE_FAILED, } TRANSITORY_BACKUP_STATES = { consts.BACKUP_STATE_VALIDATING: consts.BACKUP_STATE_VALIDATE_FAILED, consts.BACKUP_STATE_PRE_BACKUP: consts.BACKUP_STATE_PREP_FAILED, consts.BACKUP_STATE_IN_PROGRESS: consts.BACKUP_STATE_FAILED } MAX_PARALLEL_SUBCLOUD_BACKUP_CREATE = 250 MAX_PARALLEL_SUBCLOUD_BACKUP_DELETE = 250 MAX_PARALLEL_SUBCLOUD_BACKUP_RESTORE = 100 CENTRAL_BACKUP_DIR = '/opt/dc-vault/backups' ENDPOINT_URLS = { dccommon_consts.ENDPOINT_TYPE_PLATFORM: "https://{}:6386/v1", dccommon_consts.ENDPOINT_TYPE_IDENTITY: "https://{}:5001/v3", dccommon_consts.ENDPOINT_TYPE_PATCHING: "https://{}:5492", dccommon_consts.ENDPOINT_TYPE_FM: "https://{}:18003", dccommon_consts.ENDPOINT_TYPE_NFV: "https://{}:4546", dccommon_consts.ENDPOINT_TYPE_SOFTWARE: "https://{}:5498", } class SubcloudManager(manager.Manager): """Manages tasks related to subclouds.""" regionone_data = collections.defaultdict(dict) def __init__(self, *args, **kwargs): LOG.debug(_('SubcloudManager initialization...')) super(SubcloudManager, self).__init__(service_name="subcloud_manager", *args, **kwargs) self.context = dcmanager_context.get_admin_context() self.dcorch_rpc_client = dcorch_rpc_client.EngineClient() self.fm_api = fm_api.FaultAPIs() self.audit_rpc_client = dcmanager_audit_rpc_client.ManagerAuditClient() self.state_rpc_client = dcmanager_rpc_client.SubcloudStateClient() @staticmethod def _get_subcloud_cert_name(subcloud_name): cert_name = "%s-adminep-ca-certificate" % subcloud_name return cert_name @staticmethod def _get_subcloud_cert_secret_name(subcloud_name): secret_name = "%s-adminep-ca-certificate" % subcloud_name return secret_name @staticmethod def _create_intermediate_ca_cert(payload): subcloud_name = payload["name"] cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name) secret_name = SubcloudManager._get_subcloud_cert_secret_name( subcloud_name) cert = { "apiVersion": "%s/%s" % (kubeoperator.CERT_MANAGER_GROUP, kubeoperator.CERT_MANAGER_VERSION), "kind": "Certificate", "metadata": { "namespace": CERT_NAMESPACE, "name": cert_name }, "spec": { "secretName": secret_name, "duration": SC_INTERMEDIATE_CERT_DURATION, "renewBefore": SC_INTERMEDIATE_CERT_RENEW_BEFORE, "issuerRef": { "kind": "Issuer", "name": "dc-adminep-root-ca-issuer" }, "commonName": cert_name, "isCA": True, }, } kube = kubeoperator.KubeOperator() kube.apply_cert_manager_certificate(CERT_NAMESPACE, cert_name, cert) for count in range(1, 20): secret = kube.kube_get_secret(secret_name, CERT_NAMESPACE) if not hasattr(secret, 'data'): time.sleep(1) LOG.debug('Wait for %s ... %s' % (secret_name, count)) continue data = secret.data if ('ca.crt' not in data or 'tls.crt' not in data or 'tls.key' not in data) or \ not (data['ca.crt'] and data['tls.crt'] and data['tls.key']): # ca cert, certificate and key pair are needed and must exist # for creating an intermediate ca. If not, certificate is not # ready yet. time.sleep(1) LOG.debug('Wait for %s ... %s' % (secret_name, count)) continue payload['dc_root_ca_cert'] = data['ca.crt'] payload['sc_ca_cert'] = data['tls.crt'] payload['sc_ca_key'] = data['tls.key'] return raise Exception("Secret for certificate %s is not ready." % cert_name) # TODO(kmacleod) switch to using utils.get_ansible_filename @staticmethod def _get_ansible_filename(subcloud_name, postfix='.yml'): ansible_filename = os.path.join( dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + postfix) return ansible_filename def compose_install_command(self, subcloud_name, ansible_subcloud_inventory_file, software_version=None): install_command = [ "ansible-playbook", dccommon_consts.ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK, "-i", ansible_subcloud_inventory_file, "--limit", subcloud_name, "-e", "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + subcloud_name + '/' + "install_values.yml", "-e", "install_release_version=%s" % software_version if software_version else SW_VERSION] return install_command def compose_bootstrap_command(self, subcloud_name, ansible_subcloud_inventory_file, software_version=None): bootstrap_command = [ "ansible-playbook", utils.get_playbook_for_software_version( ANSIBLE_SUBCLOUD_PLAYBOOK, software_version), "-i", ansible_subcloud_inventory_file, "--limit", subcloud_name ] # Add the overrides dir and region_name so the playbook knows # which overrides to load bootstrap_command += [ "-e", str("override_files_dir='%s' region_name=%s") % ( dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name), "-e", "install_release_version=%s" % software_version if software_version else SW_VERSION] return bootstrap_command def compose_config_command(self, subcloud_name, ansible_subcloud_inventory_file, payload): config_command = [ "ansible-playbook", payload[consts.DEPLOY_PLAYBOOK], "-e", "@%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + subcloud_name + '_deploy_values.yml', "-i", ansible_subcloud_inventory_file, "--limit", subcloud_name ] return config_command def compose_backup_command(self, subcloud_name, ansible_subcloud_inventory_file): backup_command = [ "ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_CREATE_PLAYBOOK, "-i", ansible_subcloud_inventory_file, "--limit", subcloud_name, "-e", "subcloud_bnr_overrides=%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + subcloud_name + "_backup_create_values.yml"] return backup_command def compose_backup_delete_command(self, subcloud_name, ansible_subcloud_inventory_file): backup_command = [ "ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_DELETE_PLAYBOOK, "-i", ansible_subcloud_inventory_file, "--limit", subcloud_name, "-e", "subcloud_bnr_overrides=%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + subcloud_name + "_backup_delete_values.yml"] return backup_command def compose_backup_restore_command(self, subcloud_name, ansible_subcloud_inventory_file): backup_command = [ "ansible-playbook", ANSIBLE_SUBCLOUD_BACKUP_RESTORE_PLAYBOOK, "-i", ansible_subcloud_inventory_file, "--limit", subcloud_name, "-e", "subcloud_bnr_overrides=%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + subcloud_name + "_backup_restore_values.yml"] return backup_command def compose_update_command(self, subcloud_name, ansible_subcloud_inventory_file): subcloud_update_command = [ "ansible-playbook", ANSIBLE_SUBCLOUD_UPDATE_PLAYBOOK, "-i", ansible_subcloud_inventory_file, "--limit", subcloud_name, "--timeout", UPDATE_PLAYBOOK_TIMEOUT, "-e", "subcloud_update_overrides=%s" % dccommon_consts.ANSIBLE_OVERRIDES_PATH + "/" + subcloud_name + "_update_values.yml"] return subcloud_update_command def compose_rehome_command(self, subcloud_name, ansible_subcloud_inventory_file, software_version): rehome_command = [ "ansible-playbook", utils.get_playbook_for_software_version( ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK, software_version), "-i", ansible_subcloud_inventory_file, "--limit", subcloud_name, "--timeout", REHOME_PLAYBOOK_TIMEOUT, "-e", str("override_files_dir='%s' region_name=%s") % ( dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name)] return rehome_command def rehome_subcloud(self, context, subcloud, payload): # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = self._get_ansible_filename( subcloud.name, INVENTORY_FILE_POSTFIX) rehome_command = self.compose_rehome_command( subcloud.name, ansible_subcloud_inventory_file, subcloud.software_version) self.run_deploy_thread(subcloud, payload, context, rehome_command=rehome_command) def add_subcloud(self, context, subcloud_id, payload): """Add subcloud and notify orchestrators. :param context: request context object :param subcloud_id: id of the subcloud :param payload: subcloud configuration """ LOG.info(f"Adding subcloud {payload['name']}.") rehoming = payload.get('migrate', '').lower() == "true" payload['ansible_ssh_pass'] = payload['sysadmin_password'] # Create the subcloud subcloud = self.subcloud_deploy_create(context, subcloud_id, payload, rehoming, return_as_dict=False) # Return if create failed if rehoming: success_state = consts.DEPLOY_STATE_PRE_REHOME else: success_state = consts.DEPLOY_STATE_CREATED if subcloud.deploy_status != success_state: return # Rehome subcloud if rehoming: self.rehome_subcloud(context, subcloud, payload) return # Define which deploy phases should be run phases_to_run = [] if consts.INSTALL_VALUES in payload: phases_to_run.append(consts.DEPLOY_PHASE_INSTALL) phases_to_run.append(consts.DEPLOY_PHASE_BOOTSTRAP) if consts.DEPLOY_CONFIG in payload: phases_to_run.append(consts.DEPLOY_PHASE_CONFIG) # Finish adding the subcloud by running the deploy phases succeeded = self.run_deploy_phases( context, subcloud_id, payload, phases_to_run) if succeeded: subcloud = db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_DONE) LOG.info(f"Finished adding subcloud {subcloud['name']}.") def reconfigure_subcloud(self, context, subcloud_id, payload): """Reconfigure subcloud :param context: request context object :param subcloud_id: id of the subcloud :param payload: subcloud configuration """ LOG.info("Reconfiguring subcloud %s." % subcloud_id) subcloud = db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_PRE_DEPLOY) try: # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = self._get_ansible_filename( subcloud.name, INVENTORY_FILE_POSTFIX) config_command = None if "deploy_playbook" in payload: self._prepare_for_deployment(payload, subcloud.name) config_command = self.compose_config_command( subcloud.name, ansible_subcloud_inventory_file, payload) del payload['sysadmin_password'] apply_thread = threading.Thread( target=self.run_deploy_thread, args=(subcloud, payload, context, None, None, config_command)) apply_thread.start() return db_api.subcloud_db_model_to_dict(subcloud) except Exception: LOG.exception("Failed to create subcloud %s" % subcloud.name) # If we failed to create the subcloud, update the # deployment status db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_DEPLOY_PREP_FAILED) def reinstall_subcloud(self, context, subcloud_id, payload): """Reinstall subcloud :param context: request context object :param subcloud_id: subcloud id from db :param payload: subcloud reinstall """ # Retrieve the subcloud details from the database subcloud = db_api.subcloud_get(context, subcloud_id) LOG.info("Reinstalling subcloud %s." % subcloud_id) try: ansible_subcloud_inventory_file = self._get_ansible_filename( subcloud.name, INVENTORY_FILE_POSTFIX) m_ks_client = OpenStackDriver( region_name=dccommon_consts.DEFAULT_REGION_NAME, region_clients=None).keystone_client cached_regionone_data = self._get_cached_regionone_data(m_ks_client) self._populate_payload_with_cached_keystone_data( cached_regionone_data, payload) payload['install_values']['ansible_ssh_pass'] = \ payload['sysadmin_password'] payload['install_values']['ansible_become_pass'] = \ payload['sysadmin_password'] payload['bootstrap-address'] = \ payload['install_values']['bootstrap_address'] config_command = None if "deploy_playbook" in payload: self._prepare_for_deployment(payload, subcloud.name) config_command = self.compose_config_command( subcloud.name, ansible_subcloud_inventory_file, payload) del payload['sysadmin_password'] payload['users'] = dict() for user in USERS_TO_REPLICATE: payload['users'][user] = \ str(keyring.get_password( user, dccommon_consts.SERVICES_USER_NAME)) utils.create_subcloud_inventory(payload, ansible_subcloud_inventory_file) self._create_intermediate_ca_cert(payload) self._write_subcloud_ansible_config(cached_regionone_data, payload) install_command = self.compose_install_command( subcloud.name, ansible_subcloud_inventory_file, payload['software_version']) bootstrap_command = self.compose_bootstrap_command( subcloud.name, ansible_subcloud_inventory_file, payload['software_version']) network_reconfig = utils.has_network_reconfig(payload, subcloud) apply_thread = threading.Thread( target=self.run_deploy_thread, args=(subcloud, payload, context, install_command, bootstrap_command, config_command, None, network_reconfig)) apply_thread.start() return db_api.subcloud_db_model_to_dict(subcloud) except Exception: LOG.exception("Failed to reinstall subcloud %s" % subcloud.name) # If we failed to reinstall the subcloud, update the # deployment status db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED) def redeploy_subcloud(self, context, subcloud_id, payload): """Redeploy subcloud :param context: request context object :param subcloud_id: subcloud id from db :param payload: subcloud redeploy """ # Retrieve the subcloud details from the database subcloud = db_api.subcloud_get(context, subcloud_id) LOG.info("Redeploying subcloud %s." % subcloud.name) # Define which deploy phases to run phases_to_run = [consts.DEPLOY_PHASE_INSTALL, consts.DEPLOY_PHASE_BOOTSTRAP] if consts.DEPLOY_CONFIG in payload: phases_to_run.append(consts.DEPLOY_PHASE_CONFIG) succeeded = self.run_deploy_phases(context, subcloud_id, payload, phases_to_run) if succeeded: db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_DONE, error_description=consts.ERROR_DESC_EMPTY) LOG.info(f"Finished redeploying subcloud {subcloud['name']}.") def create_subcloud_backups(self, context, payload): """Backup subcloud or group of subclouds :param context: request context object :param payload: subcloud backup create detail """ subcloud_id = payload.get('subcloud') group_id = payload.get('group') # Retrieve either a single subcloud or all subclouds in a group subclouds = [db_api.subcloud_get(context, subcloud_id)] if subcloud_id\ else db_api.subcloud_get_for_group(context, group_id) self._filter_subclouds_with_ongoing_backup(subclouds) self._update_backup_status(context, subclouds, consts.BACKUP_STATE_INITIAL) # Validate the subclouds and filter the ones applicable for backup self._update_backup_status(context, subclouds, consts.BACKUP_STATE_VALIDATING) subclouds_to_backup, invalid_subclouds = \ self._validate_subclouds_for_backup(subclouds, 'create') self._mark_invalid_subclouds_for_backup(context, invalid_subclouds) # Use thread pool to limit number of operations in parallel backup_pool = greenpool.GreenPool(size=MAX_PARALLEL_SUBCLOUD_BACKUP_CREATE) # Spawn threads to back up each applicable subcloud backup_function = functools.partial(self._backup_subcloud, context, payload) self._run_parallel_group_operation('backup create', backup_function, backup_pool, subclouds_to_backup) LOG.info("Subcloud backup operation finished") def delete_subcloud_backups(self, context, release_version, payload): """Delete backups for subcloud or group of subclouds for a given release :param context: request context object :param release_version Backup release version to be deleted :param payload: subcloud backup delete detail """ local_delete = payload.get('local_only') subclouds_to_delete_backup, invalid_subclouds = \ self._filter_subclouds_for_backup_delete(context, payload, local_delete) # Spawn threads to back up each applicable subcloud backup_delete_function = functools.partial( self._delete_subcloud_backup, context, payload, release_version) # Use thread pool to limit number of operations in parallel max_parallel_operations = MAX_PARALLEL_SUBCLOUD_BACKUP_DELETE backup_delete_pool = greenpool.GreenPool(size=max_parallel_operations) failed_subclouds = self._run_parallel_group_operation( 'backup delete', backup_delete_function, backup_delete_pool, subclouds_to_delete_backup) LOG.info("Subcloud backup delete operation finished") return self._subcloud_operation_notice('delete', subclouds_to_delete_backup, failed_subclouds, invalid_subclouds) def restore_subcloud_backups(self, context, payload): """Restore a subcloud or group of subclouds from backup data :param context: request context object :param payload: restore backup subcloud detail """ subcloud_id = payload.get('subcloud') group_id = payload.get('group') # Initialize subclouds lists restore_subclouds, invalid_subclouds, failed_subclouds = ( list(), list(), list()) # Retrieve either a single subcloud or all subclouds in a group subclouds = ( [db_api.subcloud_get(context, subcloud_id)] if subcloud_id else db_api.subcloud_get_for_group(context, group_id) ) restore_subclouds, invalid_subclouds = ( self._validate_subclouds_for_backup(subclouds, 'restore') ) if restore_subclouds: # Use thread pool to limit number of operations in parallel restore_pool = greenpool.GreenPool( size=MAX_PARALLEL_SUBCLOUD_BACKUP_RESTORE) # Spawn threads to back up each applicable subcloud restore_function = functools.partial( self._restore_subcloud_backup, context, payload) failed_subclouds = self._run_parallel_group_operation( 'backup restore', restore_function, restore_pool, restore_subclouds ) restored_subclouds = len(restore_subclouds) - len(failed_subclouds) LOG.info("Subcloud restore backup operation finished.\n" "Restored subclouds: %s. Invalid subclouds: %s. " "Failed subclouds: %s." % (restored_subclouds, len(invalid_subclouds), len(failed_subclouds))) return self._subcloud_operation_notice('restore', restore_subclouds, failed_subclouds, invalid_subclouds) def _deploy_bootstrap_prep(self, context, subcloud, payload: dict, ansible_subcloud_inventory_file): """Run the preparation steps needed to run the bootstrap operation :param context: target request context object :param subcloud: subcloud model object :param payload: bootstrap request parameters :param ansible_subcloud_inventory_file: the ansible inventory file path :return: ansible command needed to run the bootstrap playbook """ network_reconfig = utils.has_network_reconfig(payload, subcloud) if network_reconfig: self._configure_system_controller_network(context, payload, subcloud, update_db=False) # Regenerate the addn_hosts_dc file self._create_addn_hosts_dc(context) # Update subcloud subcloud = db_api.subcloud_update( context, subcloud.id, description=payload.get("description"), management_subnet=utils.get_management_subnet(payload), management_gateway_ip=utils.get_management_gateway_address( payload), management_start_ip=utils.get_management_start_address( payload), management_end_ip=utils.get_management_end_address(payload), systemcontroller_gateway_ip=payload.get( "systemcontroller_gateway_address"), location=payload.get("location"), deploy_status=consts.DEPLOY_STATE_PRE_BOOTSTRAP) # Populate payload with passwords payload['ansible_become_pass'] = payload['sysadmin_password'] payload['ansible_ssh_pass'] = payload['sysadmin_password'] payload['admin_password'] = str(keyring.get_password('CGCS', 'admin')) payload_without_sysadmin_password = payload.copy() if 'sysadmin_password' in payload_without_sysadmin_password: del payload_without_sysadmin_password['sysadmin_password'] # Update the ansible overrides file overrides_file = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud.name + '.yml') utils.update_values_on_yaml_file(overrides_file, payload_without_sysadmin_password) # Update the ansible inventory for the subcloud utils.create_subcloud_inventory(payload, ansible_subcloud_inventory_file) bootstrap_command = self.compose_bootstrap_command( subcloud.name, ansible_subcloud_inventory_file, subcloud.software_version) return bootstrap_command def _deploy_config_prep(self, subcloud, payload: dict, ansible_subcloud_inventory_file): """Run the preparation steps needed to run the config operation :param subcloud: target subcloud model object :param payload: config request parameters :param ansible_subcloud_inventory_file: the ansible inventory file path :return: ansible command needed to run the config playbook """ self._prepare_for_deployment(payload, subcloud.name) config_command = self.compose_config_command( subcloud.name, ansible_subcloud_inventory_file, payload) return config_command def _deploy_install_prep(self, subcloud, payload: dict, ansible_subcloud_inventory_file): """Run the preparation steps needed to run the install operation :param subcloud: target subcloud model object :param payload: install request parameters :param ansible_subcloud_inventory_file: the ansible inventory file path :return: ansible command needed to run the install playbook """ payload['install_values']['ansible_ssh_pass'] = \ payload['sysadmin_password'] payload['install_values']['ansible_become_pass'] = \ payload['sysadmin_password'] # If all update_values already exists on override file or are # the same as the existing ones, the update won't happen # and the file will remain untouched bootstrap_file = psd_common.get_config_file_path(subcloud.name, consts.BOOTSTRAP_VALUES) update_values = {'software_version': payload['software_version'], 'bmc_password': payload['bmc_password'], 'ansible_ssh_pass': payload['sysadmin_password'], 'ansible_become_pass': payload['sysadmin_password'] } utils.update_values_on_yaml_file(bootstrap_file, update_values) install_command = self.compose_install_command( subcloud.name, ansible_subcloud_inventory_file, payload['software_version']) return install_command def subcloud_deploy_abort(self, context, subcloud_id, deploy_status): """Abort the subcloud deploy :param context: request context object :param subcloud_id: subcloud id from db :param deploy_status: subcloud deploy status from db """ LOG.info("Aborting deployment of subcloud %s." % subcloud_id) subcloud = utils.update_abort_status(context, subcloud_id, deploy_status) try: run_ansible = RunAnsible() aborted = run_ansible.run_abort(subcloud.name) if not aborted: LOG.warning("Ansible deploy phase subprocess of %s " "was terminated before it could be aborted" % subcloud.name) # let the main phase thread handle the state update return if subcloud.deploy_status == consts.DEPLOY_STATE_ABORTING_INSTALL: # First delete the k8s job and pod, stopping the current # installation attempt if exists # Then send shutdown signal to subcloud kube = kubeoperator.KubeOperator() shutdown_subcloud = SubcloudShutdown(subcloud.name) namespace = dccommon_consts.RVMC_NAME_PREFIX jobname = '%s-%s' % (namespace, subcloud.name) pod_basename = '%s-' % jobname all_pods = kube.get_pods_by_namespace(namespace) desired_pod = next((s for s in all_pods if pod_basename in s), None) if desired_pod: kube.kube_delete_job(jobname, namespace) kube.kube_delete_pod(desired_pod, namespace) while kube.pod_exists(desired_pod, namespace): time.sleep(2) shutdown_subcloud.send_shutdown_signal() except Exception as ex: LOG.error("Subcloud deploy abort failed for subcloud %s" % subcloud.name) utils.update_abort_status(context, subcloud.id, subcloud.deploy_status, abort_failed=True) # exception is logged above raise ex LOG.info("Successfully aborted deployment of %s" % subcloud.name) utils.update_abort_status(context, subcloud.id, subcloud.deploy_status) def subcloud_deploy_resume(self, context, subcloud_id, subcloud_name, payload: dict, deploy_states_to_run): """Resume the subcloud deployment :param context: request context object :param subcloud_id: subcloud id from db :param subcloud_name: name of the subcloud :param payload: subcloud resume payload :param deploy_states_to_run: deploy phases pending execution """ LOG.info("Resuming deployment of subcloud %s. Deploy phases to be executed: %s" % (subcloud_name, ', '.join(deploy_states_to_run))) self.run_deploy_phases(context, subcloud_id, payload, deploy_states_to_run) def subcloud_deploy_create(self, context, subcloud_id, payload, rehoming=False, return_as_dict=True): """Create subcloud and notify orchestrators. :param context: request context object :param subcloud_id: subcloud_id from db :param payload: subcloud configuration :param rehoming: flag indicating if this is part of a rehoming operation :param return_as_dict: converts the subcloud DB object to a dict before returning :return: resulting subcloud DB object or dictionary """ LOG.info("Creating subcloud %s." % payload['name']) if rehoming: deploy_state = consts.DEPLOY_STATE_PRE_REHOME else: deploy_state = consts.DEPLOY_STATE_CREATING subcloud = db_api.subcloud_update( context, subcloud_id, deploy_status=deploy_state) try: # Create a new route to this subcloud on the management interface # on both controllers. m_ks_client = OpenStackDriver( region_name=dccommon_consts.DEFAULT_REGION_NAME, region_clients=None).keystone_client subcloud_subnet = netaddr.IPNetwork( utils.get_management_subnet(payload)) endpoint = m_ks_client.endpoint_cache.get_endpoint('sysinv') sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, m_ks_client.session, endpoint=endpoint) LOG.debug("Getting cached regionone data for %s" % subcloud.name) cached_regionone_data = self._get_cached_regionone_data( m_ks_client, sysinv_client) for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']: sysinv_client.create_route( mgmt_if_uuid, str(subcloud_subnet.ip), subcloud_subnet.prefixlen, payload['systemcontroller_gateway_address'], 1) # Create endpoints to this subcloud on the # management-start-ip of the subcloud which will be allocated # as the floating Management IP of the Subcloud if the # Address Pool is not shared. Incase the endpoint entries # are incorrect, or the management IP of the subcloud is changed # in the future, it will not go managed or will show up as # out of sync. To fix this use Openstack endpoint commands # on the SystemController to change the subcloud endpoints. # The non-identity endpoints are added to facilitate horizon access # from the System Controller to the subcloud. endpoint_config = [] endpoint_ip = utils.get_management_start_address(payload) if netaddr.IPAddress(endpoint_ip).version == 6: endpoint_ip = '[' + endpoint_ip + ']' for service in m_ks_client.services_list: admin_endpoint_url = ENDPOINT_URLS.get(service.type, None) if admin_endpoint_url: admin_endpoint_url = admin_endpoint_url.format(endpoint_ip) endpoint_config.append( {"id": service.id, "admin_endpoint_url": admin_endpoint_url}) if len(endpoint_config) < len(ENDPOINT_URLS): raise exceptions.BadRequest( resource='subcloud', msg='Missing service in SystemController') for endpoint in endpoint_config: try: m_ks_client.keystone_client.endpoints.create( endpoint["id"], endpoint['admin_endpoint_url'], interface=dccommon_consts.KS_ENDPOINT_ADMIN, region=subcloud.name) except Exception as e: # Keystone service must be temporarily busy, retry LOG.error(str(e)) m_ks_client.keystone_client.endpoints.create( endpoint["id"], endpoint['admin_endpoint_url'], interface=dccommon_consts.KS_ENDPOINT_ADMIN, region=subcloud.name) # Inform orchestrator that subcloud has been added self.dcorch_rpc_client.add_subcloud( context, subcloud.name, subcloud.software_version) # create entry into alarm summary table, will get real values later alarm_updates = {'critical_alarms': -1, 'major_alarms': -1, 'minor_alarms': -1, 'warnings': -1, 'cloud_status': consts.ALARMS_DISABLED} db_api.subcloud_alarms_create(context, subcloud.name, alarm_updates) # Regenerate the addn_hosts_dc file self._create_addn_hosts_dc(context) self._populate_payload_with_cached_keystone_data( cached_regionone_data, payload, populate_passwords=False) if "deploy_playbook" in payload: self._prepare_for_deployment(payload, subcloud.name, populate_passwords=False) payload['users'] = {} for user in USERS_TO_REPLICATE: payload['users'][user] = \ str(keyring.get_password( user, dccommon_consts.SERVICES_USER_NAME)) # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = utils.get_ansible_filename( subcloud.name, INVENTORY_FILE_POSTFIX) # Create the ansible inventory for the new subcloud utils.create_subcloud_inventory(payload, ansible_subcloud_inventory_file) # create subcloud intermediate certificate and pass in keys self._create_intermediate_ca_cert(payload) # Write this subclouds overrides to file # NOTE: This file should not be deleted if subcloud add fails # as it is used for debugging self._write_subcloud_ansible_config(cached_regionone_data, payload) if not rehoming: deploy_state = consts.DEPLOY_STATE_CREATED except Exception: LOG.exception("Failed to create subcloud %s" % payload['name']) # If we failed to create the subcloud, update the deployment status if rehoming: deploy_state = consts.DEPLOY_STATE_REHOME_PREP_FAILED else: deploy_state = consts.DEPLOY_STATE_CREATE_FAILED subcloud = db_api.subcloud_update( context, subcloud.id, deploy_status=deploy_state) # The RPC call must return the subcloud as a dictionary, otherwise it # should return the DB object for dcmanager internal use (subcloud add) if return_as_dict: subcloud = db_api.subcloud_db_model_to_dict(subcloud) return subcloud def subcloud_deploy_install(self, context, subcloud_id, payload: dict) -> bool: """Install subcloud :param context: request context object :param subcloud_id: subcloud id from db :param payload: subcloud Install :return: success status """ # Retrieve the subcloud details from the database subcloud = db_api.subcloud_update( context, subcloud_id, software_version=payload['software_version'], deploy_status=consts.DEPLOY_STATE_PRE_INSTALL, data_install=json.dumps(payload['install_values'])) LOG.info("Installing subcloud %s." % subcloud.name) try: log_file = ( os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + "_playbook_output.log" ) ansible_subcloud_inventory_file = self._get_ansible_filename( subcloud.name, INVENTORY_FILE_POSTFIX) install_command = self._deploy_install_prep( subcloud, payload, ansible_subcloud_inventory_file) install_success = self._run_subcloud_install( context, subcloud, install_command, log_file, payload['install_values'], abortable=True) if install_success: db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_INSTALLED, error_description=consts.ERROR_DESC_EMPTY) return install_success except Exception: LOG.exception("Failed to install subcloud %s" % subcloud.name) # If we failed to install the subcloud, # update the deployment status db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED) return False def subcloud_deploy_bootstrap(self, context, subcloud_id, payload): """Bootstrap subcloud :param context: request context object :param subcloud_id: subcloud_id from db :param payload: subcloud bootstrap configuration :return: success status """ LOG.info("Bootstrapping subcloud %s." % payload['name']) # Retrieve the subcloud details from the database subcloud = db_api.subcloud_get(context, subcloud_id) try: log_file = ( os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + "_playbook_output.log" ) ansible_subcloud_inventory_file = self._get_ansible_filename( subcloud.name, INVENTORY_FILE_POSTFIX) bootstrap_command = self._deploy_bootstrap_prep( context, subcloud, payload, ansible_subcloud_inventory_file) bootstrap_success = self._run_subcloud_bootstrap( context, subcloud, bootstrap_command, log_file) return bootstrap_success except Exception: LOG.exception("Failed to bootstrap subcloud %s" % payload['name']) db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_PRE_BOOTSTRAP_FAILED) return False def subcloud_deploy_config(self, context, subcloud_id, payload: dict) -> bool: """Configure subcloud :param context: request context object :param subcloud_id: subcloud_id from db :param payload: subcloud configuration :return: success status """ LOG.info("Configuring subcloud %s." % subcloud_id) subcloud = db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_PRE_CONFIG) try: log_file = ( os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + "_playbook_output.log" ) # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = self._get_ansible_filename( subcloud.name, INVENTORY_FILE_POSTFIX) self._prepare_for_deployment(payload, subcloud.name) config_command = self.compose_config_command( subcloud.name, ansible_subcloud_inventory_file, payload) config_success = self._run_subcloud_config(subcloud, context, config_command, log_file) return config_success except Exception: LOG.exception("Failed to configure %s" % subcloud.name) db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_PRE_CONFIG_FAILED) return False def subcloud_deploy_complete(self, context, subcloud_id): """Completes the subcloud deployment. :param context: request context object :param subcloud_id: subcloud_id from db :return: resulting subcloud dictionary """ LOG.info("Completing subcloud %s deployment." % subcloud_id) # Just update the deploy status subcloud = db_api.subcloud_update(context, subcloud_id, deploy_status=consts.DEPLOY_STATE_DONE) LOG.info("Subcloud %s deploy status set to: %s" % (subcloud_id, consts.DEPLOY_STATE_DONE)) return db_api.subcloud_db_model_to_dict(subcloud) def _subcloud_operation_notice( self, operation, restore_subclouds, failed_subclouds, invalid_subclouds): all_failed = ((not set(restore_subclouds) - set(failed_subclouds)) and not invalid_subclouds) if all_failed: LOG.error("Backup %s failed for all applied subclouds" % operation) raise exceptions.SubcloudBackupOperationFailed(operation=operation) if invalid_subclouds: self._warn_for_invalid_subclouds_on_backup_operation(invalid_subclouds) if failed_subclouds: self._warn_for_failed_subclouds_on_backup_operation(operation, failed_subclouds) if invalid_subclouds or failed_subclouds: return self._build_subcloud_operation_notice(operation, failed_subclouds, invalid_subclouds) return def _filter_subclouds_with_ongoing_backup(self, subclouds): i = 0 while i < len(subclouds): subcloud = subclouds[i] if subcloud.backup_status in consts.STATES_FOR_ONGOING_BACKUP: LOG.info(_('Subcloud %s already has a backup operation in ' 'progress' % subcloud.name)) subclouds.pop(i) else: i += 1 def _validate_subclouds_for_backup(self, subclouds, operation): valid_subclouds = [] invalid_subclouds = [] for subcloud in subclouds: is_valid = False try: if utils.is_valid_for_backup_operation(operation, subcloud): is_valid = True except exceptions.ValidateFail: is_valid = False if is_valid: valid_subclouds.append(subcloud) else: invalid_subclouds.append(subcloud) return valid_subclouds, invalid_subclouds @staticmethod def _mark_invalid_subclouds_for_backup(context, invalid_subclouds): try: invalid_ids = {subcloud.id for subcloud in invalid_subclouds} invalid_names = {subcloud.name for subcloud in invalid_subclouds} if invalid_ids: # Set state on subclouds that failed validation LOG.warn('The following subclouds are not online and/or managed ' 'and/or in a valid deploy state, and will not be backed ' 'up: %s', ', '.join(list(invalid_names))) SubcloudManager._update_backup_status_by_ids( context, invalid_ids, consts.BACKUP_STATE_VALIDATE_FAILED) except DCManagerException as ex: LOG.exception("Subcloud backup validation failed") raise ex @staticmethod def _warn_for_invalid_subclouds_on_backup_operation(invalid_subclouds): invalid_names = {subcloud.name for subcloud in invalid_subclouds} LOG.warn('The following subclouds were not online and/or in a valid ' 'deploy/management state, and thus were not be reached ' 'for backup operation: %s', ', '.join(list(invalid_names))) @staticmethod def _warn_for_failed_subclouds_on_backup_operation(operation, failed_subclouds): failed_names = {subcloud.name for subcloud in failed_subclouds} LOG.warn('Backup %s operation failed for some subclouds, ' 'check previous logs for details. Failed subclouds: %s' % (operation, ', '.join(list(failed_names)))) @staticmethod def _update_backup_status(context, subclouds, backup_status): subcloud_ids = [subcloud.id for subcloud in subclouds] return SubcloudManager.\ _update_backup_status_by_ids(context, subcloud_ids, backup_status) @staticmethod def _update_backup_status_by_ids(context, subcloud_ids, backup_status): validate_state_form = { Subcloud.backup_status.name: backup_status } db_api.subcloud_bulk_update_by_ids(context, subcloud_ids, validate_state_form) @staticmethod def _run_parallel_group_operation(op_type, op_function, thread_pool, subclouds): failed_subclouds = [] processed = 0 for subcloud, success in thread_pool.imap(op_function, subclouds): processed += 1 if not success: failed_subclouds.append(subcloud) completion = float(processed) / float(len(subclouds)) * 100 remaining = len(subclouds) - processed LOG.info("Processed subcloud %s for %s (operation %.0f%% " "complete, %d subcloud(s) remaining)" % (subcloud.name, op_type, completion, remaining)) return failed_subclouds def _backup_subcloud(self, context, payload, subcloud): try: # Health check validation if not utils.is_subcloud_healthy(subcloud.name): db_api.subcloud_update( context, subcloud.id, backup_status=consts.BACKUP_STATE_VALIDATE_FAILED, ) LOG.info( ("Subcloud %s is not in good health for subcloud-backup create") % subcloud.name ) return subcloud, False db_api.subcloud_update( context, subcloud.id, backup_status=consts.BACKUP_STATE_PRE_BACKUP, ) subcloud_inventory_file = self._create_subcloud_inventory_file(subcloud) # Prepare for backup overrides_file = self._create_overrides_for_backup_or_restore( 'create', payload, subcloud.name ) backup_command = self.compose_backup_command( subcloud.name, subcloud_inventory_file) self._clear_subcloud_backup_failure_alarm_if_exists(subcloud) except Exception: self._fail_subcloud_backup_prep(context, subcloud) return subcloud, False local_only = payload.get('local_only') or False success = self._run_subcloud_backup_create_playbook( subcloud, backup_command, context, local_only) if success: utils.delete_subcloud_inventory(overrides_file) return subcloud, success def _filter_subclouds_for_backup_delete(self, context, payload, local_delete): subcloud_id = payload.get('subcloud') group_id = payload.get('group') # Retrieve either a single subcloud or all subclouds in a group subclouds = [db_api.subcloud_get(context, subcloud_id)] if subcloud_id \ else db_api.subcloud_get_for_group(context, group_id) invalid_subclouds = [] # Subcloud state validation only required for local delete if local_delete: # Use same criteria defined for subcloud backup create subclouds_to_delete_backup, invalid_subclouds = \ self._validate_subclouds_for_backup(subclouds, 'delete') else: # Otherwise, validation is unnecessary, since connection is not required subclouds_to_delete_backup = subclouds return subclouds_to_delete_backup, invalid_subclouds def _delete_subcloud_backup(self, context, payload, release_version, subcloud): try: overrides_file = self._create_overrides_for_backup_delete( payload, subcloud.name, release_version ) inventory_file = self._create_subcloud_inventory_file(subcloud) delete_command = self.compose_backup_delete_command( subcloud.name, inventory_file) except Exception: LOG.exception("Failed to prepare subcloud %s for backup delete" % subcloud.name) return subcloud, False success = self._run_subcloud_backup_delete_playbook(context, subcloud, delete_command) if success: utils.delete_subcloud_inventory(overrides_file) return subcloud, success def _restore_subcloud_backup(self, context, payload, subcloud): log_file = (os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + '_playbook_output.log') data_install = json.loads(subcloud.data_install) try: db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_PRE_RESTORE ) subcloud_inventory_file = self._create_subcloud_inventory_file( subcloud, data_install=data_install) # Prepare for restore overrides_file = self._create_overrides_for_backup_or_restore( 'restore', payload, subcloud.name ) restore_command = self.compose_backup_restore_command( subcloud.name, subcloud_inventory_file) except Exception: db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_RESTORE_PREP_FAILED ) LOG.exception("Failed to prepare subcloud %s for backup restore" % subcloud.name) return subcloud, False if payload.get('with_install'): software_version = payload.get('software_version') install_command = self.compose_install_command( subcloud.name, subcloud_inventory_file, software_version) # Update data_install with missing data matching_iso, _ = utils.get_vault_load_files(software_version) data_install['software_version'] = software_version data_install['image'] = matching_iso data_install['ansible_ssh_pass'] = payload['sysadmin_password'] data_install['ansible_become_pass'] = payload['sysadmin_password'] install_success = self._run_subcloud_install( context, subcloud, install_command, log_file, data_install) if not install_success: return subcloud, False success = self._run_subcloud_backup_restore_playbook( subcloud, restore_command, context, log_file) if success: utils.delete_subcloud_inventory(overrides_file) return subcloud, success @staticmethod def _build_subcloud_operation_notice(operation, failed_subclouds, invalid_subclouds): invalid_subcloud_names = [subcloud.name for subcloud in invalid_subclouds] failed_subcloud_names = [subcloud.name for subcloud in failed_subclouds] notice = "Subcloud backup %s operation completed with warnings:\n" % operation if invalid_subclouds: notice += ("The following subclouds were skipped for local backup " "%s operation: %s." % (operation, ' ,'.join(invalid_subcloud_names))) if failed_subclouds: notice += ("The following subclouds failed during backup " "%s operation: %s." % (operation, ' ,'.join(failed_subcloud_names))) return notice def _create_subcloud_inventory_file(self, subcloud, data_install=None): # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = self._get_ansible_filename( subcloud.name, INVENTORY_FILE_POSTFIX) oam_fip = None # Restore is restrict to Redfish enabled subclouds if data_install: oam_fip = data_install.get('bootstrap_address') else: # Use subcloud floating IP for host reachability keystone_client = OpenStackDriver( region_name=subcloud.name, region_clients=None).keystone_client oam_fip = utils.get_oam_addresses(subcloud.name, keystone_client)\ .oam_floating_ip # Add parameters used to generate inventory subcloud_params = {'name': subcloud.name, 'bootstrap-address': oam_fip} utils.create_subcloud_inventory(subcloud_params, ansible_subcloud_inventory_file) return ansible_subcloud_inventory_file def _create_overrides_for_backup_or_restore(self, op, payload, subcloud_name): # Set override names as expected by the playbook if not payload.get('override_values'): payload['override_values'] = {} payload['override_values']['local'] = \ payload['local_only'] or False if op == 'create': payload['override_values']['backup_registry_images'] = \ payload['registry_images'] or False suffix = 'backup_create_values' else: payload['override_values']['restore_registry_images'] = \ payload['registry_images'] or False suffix = 'backup_restore_values' if not payload['local_only']: payload['override_values']['central_backup_dir'] = CENTRAL_BACKUP_DIR payload['override_values']['ansible_ssh_pass'] = \ payload['sysadmin_password'] payload['override_values']['ansible_become_pass'] = \ payload['sysadmin_password'] payload['override_values']['admin_password'] = \ str(keyring.get_password('CGCS', 'admin')) if payload.get('backup_values'): LOG.info('Backup create: Received backup_values for subcloud %s' % subcloud_name) for key, value in payload.get('backup_values').items(): payload['override_values'][key] = value elif payload.get('restore_values'): LOG.info('Backup restore: Received restore_values for subcloud %s' % subcloud_name) for key, value in payload.get('restore_values').items(): payload['override_values'][key] = value return self._create_backup_overrides_file(payload, subcloud_name, suffix) def _create_overrides_for_backup_delete(self, payload, subcloud_name, release_version): # Set override names as expected by the playbook if not payload.get('override_values'): payload['override_values'] = {} payload['override_values']['software_version'] = release_version payload['override_values']['local'] = \ payload['local_only'] or False if not payload['local_only']: payload['override_values']['central_backup_dir'] = CENTRAL_BACKUP_DIR else: payload['override_values']['ansible_ssh_pass'] = \ payload['sysadmin_password'] payload['override_values']['ansible_become_pass'] = \ payload['sysadmin_password'] return self._create_backup_overrides_file( payload, subcloud_name, 'backup_delete_values' ) def _create_backup_overrides_file(self, payload, subcloud_name, filename_suffix): backup_overrides_file = os.path.join( dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + '_' + filename_suffix + '.yml') with open(backup_overrides_file, 'w') as f_out: f_out.write( '---\n' ) for k, v in payload['override_values'].items(): f_out.write("%s: %s\n" % (k, v)) return backup_overrides_file def _run_subcloud_backup_create_playbook(self, subcloud, backup_command, context, local_only): log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \ '_playbook_output.log' db_api.subcloud_update( context, subcloud.id, backup_status=consts.BACKUP_STATE_IN_PROGRESS, error_description=consts.ERROR_DESC_EMPTY) # Run the subcloud backup playbook try: run_playbook(log_file, backup_command) # Decide between complete-local or complete-central if local_only: backup_status = consts.BACKUP_STATE_COMPLETE_LOCAL else: backup_status = consts.BACKUP_STATE_COMPLETE_CENTRAL db_api.subcloud_update( context, subcloud.id, backup_status=backup_status, backup_datetime=datetime.datetime.utcnow()) LOG.info("Successfully backed up subcloud %s" % subcloud.name) return True except PlaybookExecutionFailed: self._fail_subcloud_backup_operation(context, log_file, subcloud) return False @staticmethod def _run_subcloud_backup_delete_playbook(context, subcloud, delete_command): log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \ '_playbook_output.log' try: # Run the subcloud backup delete playbook run_playbook(log_file, delete_command) # Set backup status to unknown after delete, since most recent backup may # have been deleted db_api.subcloud_bulk_update_by_ids( context, [subcloud.id], {Subcloud.backup_status.name: consts.BACKUP_STATE_UNKNOWN, Subcloud.backup_datetime.name: None}) LOG.info("Successfully deleted backup for subcloud %s" % subcloud.name) return True except PlaybookExecutionFailed: LOG.error("Failed to delete backup for subcloud %s, check individual " "log at %s for detailed output." % (subcloud.name, log_file)) msg = utils.find_ansible_error_msg( subcloud.name, log_file, consts.BACKUP_STATE_FAILED) LOG.error(msg) db_api.subcloud_update( context, subcloud.id, error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) return False def _run_subcloud_backup_restore_playbook( self, subcloud, restore_command, context, log_file): db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_RESTORING, error_description=consts.ERROR_DESC_EMPTY ) # Run the subcloud backup restore playbook try: run_playbook(log_file, restore_command) LOG.info("Successfully restore subcloud %s" % subcloud.name) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_DONE ) return True except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( subcloud.name, log_file, consts.DEPLOY_STATE_RESTORING) LOG.error(msg) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_RESTORE_FAILED, error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH] ) return False @staticmethod def _fail_subcloud_backup_prep(context, subcloud): LOG.exception("Failed to prepare subcloud %s for backup" % subcloud.name) db_api.subcloud_update( context, subcloud.id, backup_status=consts.BACKUP_STATE_PREP_FAILED) def _fail_subcloud_backup_operation(self, context, log_file, subcloud): msg = utils.find_ansible_error_msg( subcloud.name, log_file, consts.BACKUP_STATE_IN_PROGRESS) LOG.error(msg) db_api.subcloud_update( context, subcloud.id, backup_status=consts.BACKUP_STATE_FAILED, error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) self._set_subcloud_backup_failure_alarm(subcloud) def _clear_subcloud_backup_failure_alarm_if_exists(self, subcloud): entity_instance_id = "subcloud=%s" % subcloud.name try: fault = self.fm_api.get_fault( fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, entity_instance_id) if fault: self.fm_api.clear_fault( fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, # noqa entity_instance_id) except Exception as e: LOG.exception(e) def _set_subcloud_backup_failure_alarm(self, subcloud): entity_instance_id = "subcloud=%s" % subcloud.name try: fault = fm_api.Fault( alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, # noqa alarm_state=fm_const.FM_ALARM_STATE_SET, entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD, entity_instance_id=entity_instance_id, severity=fm_const.FM_ALARM_SEVERITY_MINOR, reason_text=("Subcloud Backup Failure (subcloud=%s)" % subcloud.name), alarm_type=fm_const.FM_ALARM_TYPE_3, probable_cause=fm_const.ALARM_PROBABLE_CAUSE_UNKNOWN, proposed_repair_action="Retry subcloud backup after checking input " "file. If problem persists, please contact " "next level of support.", service_affecting=False) self.fm_api.set_fault(fault) except Exception as e: LOG.exception(e) def run_deploy_thread(self, subcloud, payload, context, install_command=None, bootstrap_command=None, config_command=None, rehome_command=None, network_reconfig=None): try: self._run_deploy(subcloud, payload, context, install_command, bootstrap_command, config_command, rehome_command, network_reconfig) except Exception as ex: LOG.exception("run_deploy failed") raise ex def _run_deploy(self, subcloud, payload, context, install_command, bootstrap_command, config_command, rehome_command, network_reconfig): log_file = ( os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + "_playbook_output.log" ) if install_command: install_success = self._run_subcloud_install( context, subcloud, install_command, log_file, payload['install_values'] ) if not install_success: return if bootstrap_command: try: # Update the subcloud to bootstrapping db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPING, error_description=consts.ERROR_DESC_EMPTY) except Exception: LOG.error("DB subcloud_update failed") # exception is logged above raise # Run the ansible boostrap-subcloud playbook LOG.info("Starting bootstrap of %s" % subcloud.name) try: run_playbook(log_file, bootstrap_command) except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( subcloud.name, log_file, consts.DEPLOY_STATE_BOOTSTRAPPING) LOG.error(msg) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_BOOTSTRAP_FAILED, error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) return LOG.info("Successfully bootstrapped %s" % subcloud.name) if config_command: # Run the custom deploy playbook LOG.info("Starting deploy of %s" % subcloud.name) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_DEPLOYING, error_description=consts.ERROR_DESC_EMPTY) try: run_playbook(log_file, config_command) except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( subcloud.name, log_file, consts.DEPLOY_STATE_DEPLOYING) LOG.error(msg) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_DEPLOY_FAILED, error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) return LOG.info("Successfully deployed %s" % subcloud.name) if rehome_command: # Update the deploy status to rehoming db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_REHOMING) # Run the rehome-subcloud playbook try: run_playbook(log_file, rehome_command) except PlaybookExecutionFailed: msg = "Failed to run the subcloud rehome playbook" \ " for subcloud %s, check individual log at " \ "%s for detailed output." % ( subcloud.name, log_file) LOG.error(msg) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_REHOME_FAILED) return LOG.info("Successfully rehomed subcloud %s" % subcloud.name) if network_reconfig: self._configure_system_controller_network(context, payload, subcloud) subcloud = db_api.subcloud_update( context, subcloud.id, description=payload.get('description', subcloud.description), management_subnet=utils.get_management_subnet(payload), management_gateway_ip=utils.get_management_gateway_address(payload), management_start_ip=utils.get_management_start_address(payload), management_end_ip=utils.get_management_end_address(payload), location=payload.get('location', subcloud.location), group_id=payload.get('group_id', subcloud.group_id), data_install=payload.get('data_install', subcloud.data_install) ) # Regenerate the addn_hosts_dc file self._create_addn_hosts_dc(context) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_DONE, error_description=consts.ERROR_DESC_EMPTY) def run_deploy_phases(self, context, subcloud_id, payload, deploy_phases_to_run): """Run one or more deployment phases, ensuring correct order :param context: request context object :param subcloud_id: subcloud id from db :param payload: deploy phases payload :param deploy_phases_to_run: deploy phases that should run """ try: succeeded = True if consts.DEPLOY_PHASE_INSTALL in deploy_phases_to_run: succeeded = self.subcloud_deploy_install( context, subcloud_id, payload) if succeeded and consts.DEPLOY_PHASE_BOOTSTRAP in deploy_phases_to_run: succeeded = self.subcloud_deploy_bootstrap( context, subcloud_id, payload) if succeeded and consts.DEPLOY_PHASE_CONFIG in deploy_phases_to_run: succeeded = self.subcloud_deploy_config( context, subcloud_id, payload) return succeeded except Exception as ex: LOG.exception("run_deploy_phases failed") raise ex def _run_subcloud_config(self, subcloud, context, config_command, log_file): # Run the custom deploy playbook LOG.info("Starting deploy of %s" % subcloud.name) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_CONFIGURING, error_description=consts.ERROR_DESC_EMPTY) try: run_ansible = RunAnsible() aborted = run_ansible.exec_playbook( log_file, config_command, subcloud.name) except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( subcloud.name, log_file, consts.DEPLOY_STATE_CONFIGURING) LOG.error(msg) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_CONFIG_FAILED, error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) return False if aborted: return False LOG.info("Successfully deployed %s" % subcloud.name) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_DONE, error_description=consts.ERROR_DESC_EMPTY) @staticmethod def _run_subcloud_install(context, subcloud, install_command, log_file, payload, abortable=False): software_version = str(payload['software_version']) LOG.info("Preparing remote install of %s, version: %s", subcloud.name, software_version) if (subcloud.deploy_status != consts.DEPLOY_STATE_PRE_INSTALL or subcloud.software_version != software_version): db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_PRE_INSTALL, software_version=software_version) try: install = SubcloudInstall(context, subcloud.name) install.prep(dccommon_consts.ANSIBLE_OVERRIDES_PATH, payload) except Exception as e: LOG.exception(e) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED) LOG.error(str(e)) install.cleanup(software_version) return False # Run the remote install playbook LOG.info("Starting remote install of %s" % subcloud.name) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_INSTALLING, error_description=consts.ERROR_DESC_EMPTY) try: aborted = install.install( consts.DC_ANSIBLE_LOG_DIR, install_command, abortable=abortable) except Exception as e: msg = utils.find_ansible_error_msg( subcloud.name, log_file, consts.DEPLOY_STATE_INSTALLING) LOG.error(str(e)) LOG.error(msg) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_INSTALL_FAILED, error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) install.cleanup(software_version) return False install.cleanup(software_version) if aborted: return False LOG.info("Successfully installed %s" % subcloud.name) return True def _run_subcloud_bootstrap(self, context, subcloud, bootstrap_command, log_file): # Update the subcloud deploy_status to bootstrapping db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPING, error_description=consts.ERROR_DESC_EMPTY) # Run the ansible subcloud boostrap playbook LOG.info("Starting bootstrap of %s" % subcloud.name) try: run_ansible = RunAnsible() aborted = run_ansible.exec_playbook( log_file, bootstrap_command, subcloud.name) except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( subcloud.name, log_file, consts.DEPLOY_STATE_BOOTSTRAPPING) LOG.error(msg) db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_BOOTSTRAP_FAILED, error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH]) return False if aborted: return False db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPED, error_description=consts.ERROR_DESC_EMPTY) LOG.info("Successfully bootstrapped %s" % subcloud.name) return True def _create_addn_hosts_dc(self, context): """Generate the addn_hosts_dc file for hostname/ip translation""" addn_hosts_dc = os.path.join(CONFIG_PATH, ADDN_HOSTS_DC) addn_hosts_dc_temp = addn_hosts_dc + '.temp' subclouds = db_api.subcloud_get_all(context) with open(addn_hosts_dc_temp, 'w') as f_out_addn_dc_temp: for subcloud in subclouds: addn_dc_line = subcloud.management_start_ip + ' ' + \ subcloud.name + '\n' f_out_addn_dc_temp.write(addn_dc_line) # if no more subclouds, create empty file so dnsmasq does not # emit an error log. if not subclouds: f_out_addn_dc_temp.write(' ') if not filecmp.cmp(addn_hosts_dc_temp, addn_hosts_dc): os.rename(addn_hosts_dc_temp, addn_hosts_dc) # restart dnsmasq so it can re-read our addn_hosts file. os.system("pkill -HUP dnsmasq") def _write_subcloud_ansible_config(self, cached_regionone_data, payload): """Create the override file for usage with the specified subcloud""" overrides_file = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH, payload['name'] + '.yml') mgmt_pool = cached_regionone_data['mgmt_pool'] mgmt_floating_ip = mgmt_pool.floating_address mgmt_subnet = "%s/%d" % (mgmt_pool.network, mgmt_pool.prefix) oam_addresses = cached_regionone_data['oam_addresses'] oam_floating_ip = oam_addresses.oam_floating_ip oam_subnet = oam_addresses.oam_subnet with open(overrides_file, 'w') as f_out_overrides_file: f_out_overrides_file.write( '---' '\nregion_config: yes' '\ndistributed_cloud_role: subcloud' '\nsystem_controller_subnet: ' + mgmt_subnet + '\nsystem_controller_floating_address: ' + mgmt_floating_ip + '\nsystem_controller_oam_subnet: ' + oam_subnet + '\nsystem_controller_oam_floating_address: ' + oam_floating_ip + '\n' ) for k, v in payload.items(): if k not in ['deploy_playbook', 'deploy_values', 'deploy_config', 'deploy_chart', 'deploy_overrides', 'install_values']: f_out_overrides_file.write("%s: %s\n" % (k, json.dumps(v))) def _write_deploy_files(self, payload, subcloud_name): """Create the deploy value files for the subcloud""" deploy_values_file = os.path.join( dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + '_deploy_values.yml') with open(deploy_values_file, 'w') as f_out_deploy_values_file: json.dump(payload['deploy_values'], f_out_deploy_values_file) def _prepare_for_deployment(self, payload, subcloud_name, populate_passwords=True): payload['deploy_values'] = dict() if populate_passwords: payload['deploy_values']['ansible_become_pass'] = \ payload['sysadmin_password'] payload['deploy_values']['ansible_ssh_pass'] = \ payload['sysadmin_password'] payload['deploy_values']['admin_password'] = \ str(keyring.get_password('CGCS', 'admin')) payload['deploy_values']['deployment_config'] = \ payload[consts.DEPLOY_CONFIG] payload['deploy_values']['deployment_manager_chart'] = \ payload[consts.DEPLOY_CHART] payload['deploy_values']['deployment_manager_overrides'] = \ payload[consts.DEPLOY_OVERRIDES] payload['deploy_values']['user_uploaded_artifacts'] = \ payload["user_uploaded_artifacts"] self._write_deploy_files(payload, subcloud_name) def _delete_subcloud_routes(self, keystone_client, subcloud): """Delete the routes to this subcloud""" # Delete the route to this subcloud on the management interface on # both controllers. management_subnet = netaddr.IPNetwork(subcloud.management_subnet) endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv') sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, keystone_client.session, endpoint=endpoint) cached_regionone_data = self._get_cached_regionone_data(keystone_client, sysinv_client) for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']: sysinv_client.delete_route(mgmt_if_uuid, str(management_subnet.ip), management_subnet.prefixlen, str(netaddr.IPAddress(subcloud.systemcontroller_gateway_ip)), 1) @staticmethod def _delete_subcloud_cert(subcloud_name): cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name) secret_name = SubcloudManager._get_subcloud_cert_secret_name( subcloud_name) kube = kubeoperator.KubeOperator() kube.delete_cert_manager_certificate(CERT_NAMESPACE, cert_name) kube.kube_delete_secret(secret_name, CERT_NAMESPACE) LOG.info("cert %s and secret %s are deleted" % (cert_name, secret_name)) def _remove_subcloud_details(self, context, subcloud, ansible_subcloud_inventory_file): """Remove subcloud details from database and inform orchestrators""" # Inform orchestrators that subcloud has been deleted try: self.dcorch_rpc_client.del_subcloud(context, subcloud.name) except RemoteError as e: # TODO(kmacleod): this should be caught as explicit remote exception # Fix when centos/python2 is no longer supported if "SubcloudNotFound" in str(e): pass # delete the associated alarm entry try: db_api.subcloud_alarms_delete(context, subcloud.name) except RemoteError as e: # TODO(kmacleod): fix same with above if "SubcloudNotFound" in str(e): pass # We only delete subcloud endpoints, region and user information # in the Central Region. The subcloud is already unmanaged and powered # down so is not accessible. Therefore set up a session with the # Central Region Keystone ONLY. keystone_client = OpenStackDriver( region_name=dccommon_consts.DEFAULT_REGION_NAME, region_clients=None).keystone_client # Delete keystone endpoints for subcloud keystone_client.delete_endpoints(subcloud.name) keystone_client.delete_region(subcloud.name) # Delete the routes to this subcloud self._delete_subcloud_routes(keystone_client, subcloud) # Remove the subcloud from the database try: db_api.subcloud_destroy(context, subcloud.id) except Exception as e: LOG.exception(e) raise e # Delete the ansible inventory for the new subcloud utils.delete_subcloud_inventory(ansible_subcloud_inventory_file) # Delete the subcloud intermediate certificate SubcloudManager._delete_subcloud_cert(subcloud.name) # Delete the subcloud backup path self._delete_subcloud_backup_data(subcloud.name) # Regenerate the addn_hosts_dc file self._create_addn_hosts_dc(context) @staticmethod def _delete_subcloud_backup_data(subcloud_name): try: backup_path = os.path.join(CENTRAL_BACKUP_DIR, subcloud_name) if os.path.exists(backup_path): shutil.rmtree(backup_path) except Exception as e: LOG.exception(e) def delete_subcloud(self, context, subcloud_id): """Delete subcloud and notify orchestrators. :param context: request context object. :param subcloud_id: id of subcloud to delete """ LOG.info("Deleting subcloud %s." % subcloud_id) # Retrieve the subcloud details from the database subcloud = db_api.subcloud_get(context, subcloud_id) # Semantic checking if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED: raise exceptions.SubcloudNotUnmanaged() if subcloud.availability_status == \ dccommon_consts.AVAILABILITY_ONLINE: raise exceptions.SubcloudNotOffline() # Ansible inventory filename for the specified subcloud ansible_subcloud_inventory_file = self._get_ansible_filename( subcloud.name, INVENTORY_FILE_POSTFIX) self._remove_subcloud_details(context, subcloud, ansible_subcloud_inventory_file) # Clear any subcloud alarms. # Note that endpoint out-of-sync alarms should have been cleared when # the subcloud was unmanaged and the endpoint sync statuses were set to # unknown. # # TODO(kmacleod): Until an API is available to clear all alarms # for a subcloud, we manually clear the following: # - subcloud offline # - subloud resource out of sync # - Subcloud Backup Failure for alarm_id, entity_instance_id in ( (fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE, "subcloud=%s" % subcloud.name), (fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, "subcloud=%s.resource=%s" % (subcloud.name, dccommon_consts.ENDPOINT_TYPE_DC_CERT)), (fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, "subcloud=%s" % subcloud.name)): try: fault = self.fm_api.get_fault(alarm_id, entity_instance_id) if fault: self.fm_api.clear_fault(alarm_id, entity_instance_id) except Exception as e: LOG.info( "Problem clearing fault for subcloud %s, alarm_id=%s" % (subcloud.name, alarm_id)) LOG.exception(e) def update_subcloud(self, context, subcloud_id, management_state=None, description=None, location=None, group_id=None, data_install=None, force=None): """Update subcloud and notify orchestrators. :param context: request context object :param subcloud_id: id of subcloud to update :param management_state: new management state :param description: new description :param location: new location :param group_id: new subcloud group id :param data_install: subcloud install values :param force: force flag """ LOG.info("Updating subcloud %s." % subcloud_id) # Get the subcloud details from the database subcloud = db_api.subcloud_get(context, subcloud_id) original_management_state = subcloud.management_state # Semantic checking if management_state: if management_state == dccommon_consts.MANAGEMENT_UNMANAGED: if subcloud.management_state == dccommon_consts.MANAGEMENT_UNMANAGED: LOG.warning("Subcloud %s already unmanaged" % subcloud_id) raise exceptions.BadRequest( resource='subcloud', msg='Subcloud is already unmanaged') elif management_state == dccommon_consts.MANAGEMENT_MANAGED: if subcloud.management_state == dccommon_consts.MANAGEMENT_MANAGED: LOG.warning("Subcloud %s already managed" % subcloud_id) raise exceptions.BadRequest( resource='subcloud', msg='Subcloud is already managed') elif not force: if (subcloud.deploy_status != consts.DEPLOY_STATE_DONE and not prestage.is_deploy_status_prestage( subcloud.deploy_status)): LOG.warning("Subcloud %s can be managed only when" "deploy_status is complete" % subcloud_id) raise exceptions.BadRequest( resource='subcloud', msg='Subcloud can be managed only if deploy status is complete') if subcloud.availability_status != \ dccommon_consts.AVAILABILITY_ONLINE: LOG.warning("Subcloud %s is not online" % subcloud_id) raise exceptions.SubcloudNotOnline() else: LOG.error("Invalid management_state %s" % management_state) raise exceptions.InternalError() subcloud = db_api.subcloud_update( context, subcloud_id, management_state=management_state, description=description, location=location, group_id=group_id, data_install=data_install ) # Inform orchestrators that subcloud has been updated if management_state: try: # Inform orchestrator of state change self.dcorch_rpc_client.update_subcloud_states( context, subcloud.name, management_state, subcloud.availability_status) LOG.info('Notifying dcorch, subcloud:%s management: %s, ' 'availability:%s' % (subcloud.name, management_state, subcloud.availability_status)) except Exception as e: LOG.exception(e) LOG.warn('Problem informing dcorch of subcloud ' 'state change, resume to original state, subcloud: %s' % subcloud.name) management_state = original_management_state subcloud = \ db_api.subcloud_update(context, subcloud_id, management_state=management_state, description=description, location=location) if management_state == dccommon_consts.MANAGEMENT_UNMANAGED: # set all endpoint statuses to unknown, except the dc-cert # endpoint which continues to be audited for unmanaged # subclouds self.state_rpc_client.update_subcloud_endpoint_status_sync( context, subcloud_name=subcloud.name, endpoint_type=None, sync_status=dccommon_consts.SYNC_STATUS_UNKNOWN, ignore_endpoints=[dccommon_consts.ENDPOINT_TYPE_DC_CERT]) elif management_state == dccommon_consts.MANAGEMENT_MANAGED: # Subcloud is managed # Tell cert-mon to audit endpoint certificate LOG.info('Request certmon audit for %s' % subcloud.name) dc_notification = dcmanager_rpc_client.DCManagerNotifications() dc_notification.subcloud_managed(context, subcloud.name) return db_api.subcloud_db_model_to_dict(subcloud) def update_subcloud_with_network_reconfig(self, context, subcloud_id, payload): subcloud = db_api.subcloud_get(context, subcloud_id) subcloud = db_api.subcloud_update( context, subcloud.id, deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK ) subcloud_name = payload['name'] try: self._create_intermediate_ca_cert(payload) subcloud_inventory_file = self._get_ansible_filename( subcloud_name, INVENTORY_FILE_POSTFIX) subcloud_params = {'name': subcloud_name, 'bootstrap-address': payload.get('bootstrap_address')} utils.create_subcloud_inventory(subcloud_params, subcloud_inventory_file) overrides_file = self._create_subcloud_update_overrides_file( payload, subcloud_name, 'update_values') update_command = self.compose_update_command( subcloud_name, subcloud_inventory_file) except Exception: LOG.exception( "Failed to prepare subcloud %s for update." % subcloud_name) return try: apply_thread = threading.Thread( target=self._run_network_reconfiguration, args=(subcloud_name, update_command, overrides_file, payload, context, subcloud)) apply_thread.start() except Exception: LOG.exception("Failed to update subcloud %s" % subcloud_name) def _run_network_reconfiguration( self, subcloud_name, update_command, overrides_file, payload, context, subcloud ): log_file = (os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud_name) + '_playbook_output.log') subcloud_id = subcloud.id try: run_playbook(log_file, update_command) utils.delete_subcloud_inventory(overrides_file) except PlaybookExecutionFailed: msg = utils.find_ansible_error_msg( subcloud_name, log_file, consts.DEPLOY_STATE_RECONFIGURING_NETWORK) LOG.error(msg) db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED, error_description=msg[0:consts.ERROR_DESCRIPTION_LENGTH] ) return self._configure_system_controller_network(context, payload, subcloud) db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_DONE ) subcloud = db_api.subcloud_update( context, subcloud_id, description=payload.get('description', subcloud.description), management_subnet=payload.get('management_subnet'), management_gateway_ip=payload.get('management_gateway_ip'), management_start_ip=payload.get('management_start_ip'), management_end_ip=payload.get('management_end_ip'), location=payload.get('location', subcloud.location), group_id=payload.get('group_id', subcloud.group_id), data_install=payload.get('data_install', subcloud.data_install) ) # Regenerate the addn_hosts_dc file self._create_addn_hosts_dc(context) def _configure_system_controller_network(self, context, payload, subcloud, update_db=True): """Configure system controller network :param context: request context object :param payload: subcloud bootstrap configuration :param subcloud: subcloud model object :param update_db: whether it should update the db on success/failure """ subcloud_name = subcloud.name subcloud_id = subcloud.id sys_controller_gw_ip = payload.get("systemcontroller_gateway_address", subcloud.systemcontroller_gateway_ip) try: m_ks_client = OpenStackDriver( region_name=dccommon_consts.DEFAULT_REGION_NAME, region_clients=None).keystone_client self._create_subcloud_route(payload, m_ks_client, sys_controller_gw_ip) except Exception: LOG.exception( "Failed to create route to subcloud %s." % subcloud_name) if update_db: db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED, error_description=consts.ERROR_DESC_EMPTY ) return try: self._update_services_endpoint( context, payload, subcloud_name, m_ks_client) except Exception: LOG.exception("Failed to update subcloud %s endpoints" % subcloud_name) if update_db: db_api.subcloud_update( context, subcloud_id, deploy_status=consts.DEPLOY_STATE_RECONFIGURING_NETWORK_FAILED, error_description=consts.ERROR_DESC_EMPTY ) return # Delete old routes if utils.get_management_subnet(payload) != subcloud.management_subnet: self._delete_subcloud_routes(m_ks_client, subcloud) def _create_subcloud_route(self, payload, keystone_client, systemcontroller_gateway_ip): subcloud_subnet = netaddr.IPNetwork(utils.get_management_subnet(payload)) endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv') sysinv_client = SysinvClient(dccommon_consts.DEFAULT_REGION_NAME, keystone_client.session, endpoint=endpoint) cached_regionone_data = self._get_cached_regionone_data( keystone_client, sysinv_client) for mgmt_if_uuid in cached_regionone_data['mgmt_interface_uuids']: sysinv_client.create_route(mgmt_if_uuid, str(subcloud_subnet.ip), subcloud_subnet.prefixlen, systemcontroller_gateway_ip, 1) def _update_services_endpoint( self, context, payload, subcloud_name, m_ks_client): endpoint_ip = utils.get_management_start_address(payload) if netaddr.IPAddress(endpoint_ip).version == 6: endpoint_ip = f"[{endpoint_ip}]" services_endpoints = { "keystone": "https://{}:5001/v3".format(endpoint_ip), "sysinv": "https://{}:6386/v1".format(endpoint_ip), "fm": "https://{}:18003".format(endpoint_ip), "patching": "https://{}:5492".format(endpoint_ip), "vim": "https://{}:4546".format(endpoint_ip), "usm": "https://{}:5498".format(endpoint_ip), } for endpoint in m_ks_client.keystone_client.endpoints.list( region=subcloud_name): service_type = m_ks_client.keystone_client.services.get( endpoint.service_id).type if service_type == dccommon_consts.ENDPOINT_TYPE_PLATFORM: admin_endpoint_url = services_endpoints.get('sysinv') elif service_type == dccommon_consts.ENDPOINT_TYPE_IDENTITY: admin_endpoint_url = services_endpoints.get('keystone') elif service_type == dccommon_consts.ENDPOINT_TYPE_PATCHING: admin_endpoint_url = services_endpoints.get('patching') elif service_type == dccommon_consts.ENDPOINT_TYPE_FM: admin_endpoint_url = services_endpoints.get('fm') elif service_type == dccommon_consts.ENDPOINT_TYPE_NFV: admin_endpoint_url = services_endpoints.get('vim') elif service_type == dccommon_consts.ENDPOINT_TYPE_SOFTWARE: admin_endpoint_url = services_endpoints.get('usm') else: LOG.exception("Endpoint Type Error: %s" % service_type) m_ks_client.keystone_client.endpoints.update( endpoint, url=admin_endpoint_url) LOG.info("Update services endpoint to %s in subcloud %s" % ( endpoint_ip, subcloud_name)) # Update service URLs in subcloud endpoint cache self.audit_rpc_client.trigger_subcloud_endpoints_update( context, subcloud_name, services_endpoints) self.dcorch_rpc_client.update_subcloud_endpoints( context, subcloud_name, services_endpoints) # Update sysinv URL in cert-mon cache dc_notification = dcmanager_rpc_client.DCManagerNotifications() dc_notification.subcloud_sysinv_endpoint_update( context, subcloud_name, services_endpoints.get("sysinv")) def _create_subcloud_update_overrides_file( self, payload, subcloud_name, filename_suffix): update_overrides_file = os.path.join( dccommon_consts.ANSIBLE_OVERRIDES_PATH, subcloud_name + '_' + filename_suffix + '.yml') self._update_override_values(payload) with open(update_overrides_file, 'w', encoding='UTF-8') as f_out: f_out.write('---\n') for key, value in payload['override_values'].items(): if key in ['ansible_ssh_pass', 'ansible_become_pass']: f_out.write(f"{key}: {value}\n") else: f_out.write(f"{key}: {json.dumps(value)}\n") return update_overrides_file def _update_override_values(self, payload): if not payload.get('override_values'): payload['override_values'] = {} payload['override_values']['ansible_ssh_pass'] = ( payload['sysadmin_password']) payload['override_values']['ansible_become_pass'] = ( payload['sysadmin_password']) payload['override_values']['sc_gateway_address'] = ( payload['management_gateway_ip']) payload['override_values']['sc_floating_address'] = ( payload['management_start_ip']) payload['override_values']['system_controller_network'] = ( payload['system_controller_network']) payload['override_values']['system_controller_network_prefix'] = ( payload['system_controller_network_prefix']) payload['override_values']['sc_subnet'] = payload['management_subnet'] payload['override_values']['dc_root_ca_cert'] = payload['dc_root_ca_cert'] payload['override_values']['sc_ca_cert'] = payload['sc_ca_cert'] payload['override_values']['sc_ca_key'] = payload['sc_ca_key'] def update_subcloud_sync_endpoint_type(self, context, subcloud_name, endpoint_type_list, openstack_installed): operation = 'add' if openstack_installed else 'remove' func_switcher = { 'add': ( self.dcorch_rpc_client.add_subcloud_sync_endpoint_type, db_api.subcloud_status_create ), 'remove': ( self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type, db_api.subcloud_status_delete ) } try: subcloud = db_api.subcloud_get_by_name(context, subcloud_name) except Exception: LOG.exception("Failed to get subcloud by name: %s" % subcloud_name) raise try: # Notify dcorch to add/remove sync endpoint type list func_switcher[operation][0](self.context, subcloud_name, endpoint_type_list) LOG.info('Notifying dcorch, subcloud: %s new sync endpoint: %s' % (subcloud_name, endpoint_type_list)) # Update subcloud status table by adding/removing openstack sync # endpoint types for endpoint_type in endpoint_type_list: func_switcher[operation][1](self.context, subcloud.id, endpoint_type) # Update openstack_installed of subcloud table db_api.subcloud_update(self.context, subcloud.id, openstack_installed=openstack_installed) except Exception: LOG.exception('Problem informing dcorch of subcloud sync endpoint' ' type change, subcloud: %s' % subcloud_name) def handle_subcloud_operations_in_progress(self): """Identify subclouds in transitory stages and update subcloud state to failure. """ LOG.info('Identifying subclouds in transitory stages.') subclouds = db_api.subcloud_get_all(self.context) for subcloud in subclouds: # Identify subclouds in transitory states new_deploy_status = TRANSITORY_STATES.get(subcloud.deploy_status) new_backup_status = TRANSITORY_BACKUP_STATES.get(subcloud.backup_status) # update deploy and backup states to the corresponding failure states if new_deploy_status or new_backup_status: if new_deploy_status: LOG.info("Changing subcloud %s deploy status from %s to %s." % (subcloud.name, subcloud.deploy_status, new_deploy_status)) if new_backup_status: LOG.info("Changing subcloud %s backup status from %s to %s." % (subcloud.name, subcloud.backup_status, new_backup_status)) db_api.subcloud_update( self.context, subcloud.id, deploy_status=new_deploy_status or subcloud.deploy_status, backup_status=new_backup_status or subcloud.backup_status ) @staticmethod def prestage_subcloud(context, payload): """Subcloud prestaging""" return prestage.prestage_subcloud(context, payload) @utils.synchronized("regionone-data-cache", external=False) def _get_cached_regionone_data(self, regionone_keystone_client, regionone_sysinv_client=None): if (not SubcloudManager.regionone_data or SubcloudManager.regionone_data['expiry'] <= datetime.datetime.utcnow()): user_list = regionone_keystone_client.get_enabled_users(id_only=False) for user in user_list: if user.name == dccommon_consts.ADMIN_USER_NAME: SubcloudManager.regionone_data['admin_user_id'] = user.id elif user.name == dccommon_consts.SYSINV_USER_NAME: SubcloudManager.regionone_data['sysinv_user_id'] = user.id elif user.name == dccommon_consts.DCMANAGER_USER_NAME: SubcloudManager.regionone_data['dcmanager_user_id'] = user.id project_list = regionone_keystone_client.get_enabled_projects(id_only=False) for project in project_list: if project.name == dccommon_consts.ADMIN_PROJECT_NAME: SubcloudManager.regionone_data['admin_project_id'] = project.id elif project.name == dccommon_consts.SERVICES_USER_NAME: SubcloudManager.regionone_data['services_project_id'] = project.id if regionone_sysinv_client is None: endpoint = regionone_keystone_client.endpoint_cache.get_endpoint('sysinv') regionone_sysinv_client = SysinvClient( dccommon_consts.DEFAULT_REGION_NAME, regionone_keystone_client.session, endpoint=endpoint) controllers = regionone_sysinv_client.get_controller_hosts() mgmt_interface_uuids = [] for controller in controllers: mgmt_interface = regionone_sysinv_client.get_management_interface( controller.hostname) if mgmt_interface is not None: mgmt_interface_uuids.append(mgmt_interface.uuid) SubcloudManager.regionone_data['mgmt_interface_uuids'] = mgmt_interface_uuids SubcloudManager.regionone_data['mgmt_pool'] = \ regionone_sysinv_client.get_management_address_pool() SubcloudManager.regionone_data['oam_addresses'] = \ regionone_sysinv_client.get_oam_addresses() SubcloudManager.regionone_data['expiry'] = \ datetime.datetime.utcnow() + datetime.timedelta(hours=1) LOG.info("RegionOne cached data updated %s" % SubcloudManager.regionone_data) cached_regionone_data = SubcloudManager.regionone_data return cached_regionone_data def _populate_payload_with_cached_keystone_data(self, cached_data, payload, populate_passwords=True): payload['system_controller_keystone_admin_user_id'] = \ cached_data['admin_user_id'] payload['system_controller_keystone_admin_project_id'] = \ cached_data['admin_project_id'] payload['system_controller_keystone_services_project_id'] = \ cached_data['services_project_id'] payload['system_controller_keystone_sysinv_user_id'] = \ cached_data['sysinv_user_id'] payload['system_controller_keystone_dcmanager_user_id'] = \ cached_data['dcmanager_user_id'] if populate_passwords: # While at it, add the admin and service user passwords to the # payload so they get copied to the overrides file payload['ansible_become_pass'] = payload['sysadmin_password'] payload['ansible_ssh_pass'] = payload['sysadmin_password'] payload['admin_password'] = str(keyring.get_password('CGCS', 'admin'))