# # Copyright (c) 2014-2017 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # """ Backup & Restore """ import copy import filecmp import fileinput import os import glob import shutil import stat import subprocess import tarfile import tempfile import textwrap import time from fm_api import constants as fm_constants from fm_api import fm_api from sysinv.common import constants as sysinv_constants from common import log from common import constants from common.exceptions import BackupFail, RestoreFail from common.exceptions import KeystoneFail, SysInvFail import openstack import tsconfig.tsconfig as tsconfig import utils import sysinv_api as sysinv from six.moves import input LOG = log.get_logger(__name__) DEVNULL = open(os.devnull, 'w') RESTORE_COMPLETE = "restore-complete" RESTORE_RERUN_REQUIRED = "restore-rerun-required" # Backup/restore related constants backup_in_progress = tsconfig.BACKUP_IN_PROGRESS_FLAG restore_in_progress = tsconfig.RESTORE_IN_PROGRESS_FLAG restore_system_ready = tsconfig.RESTORE_SYSTEM_FLAG restore_patching_complete = '/etc/platform/.restore_patching_complete' node_is_patched = '/var/run/node_is_patched' keyring_permdir = os.path.join('/opt/platform/.keyring', tsconfig.SW_VERSION) ceph_permdir = os.path.join(tsconfig.CONFIG_PATH, 'ceph-config') ldap_permdir = '/var/lib/openldap-data' ceilometer_permdir = '/opt/cgcs/ceilometer/' + tsconfig.SW_VERSION glance_permdir = '/opt/cgcs/glance' patching_permdir = '/opt/patching' patching_repo_permdir = '/www/pages/updates' home_permdir = '/home' cinder_permdir = '/opt/cgcs/cinder' extension_permdir = '/opt/extension' patch_vault_permdir = '/opt/patch-vault' def get_backup_databases(cinder_config=False): """ Retrieve database lists for backup. :return: backup_databases and backup_database_skip_tables """ # Databases common to all configurations REGION_LOCAL_DATABASES = ('postgres', 'template1', 'nova', 'sysinv', 'neutron', 'heat', 'nova_api', 'aodh', 'murano', 'magnum', 'panko', 'ironic', 'nova_cell0', 'gnocchi', 'fm', 'barbican') REGION_SHARED_DATABASES = ('glance', 'keystone') if cinder_config: REGION_SHARED_DATABASES += ('cinder', ) # Indicates which tables have to be dropped for a certain database. DB_TABLE_SKIP_MAPPING = { 'fm': ('alarm',), 'gnocchi': ('metric', 'resource'), 'dcorch': ('orch_job', 'orch_request', 'resource', 'subcloud_resource'), } if tsconfig.region_config == 'yes': BACKUP_DATABASES = REGION_LOCAL_DATABASES # Add databases which are optional in secondary regions(and subclouds) shared_services = sysinv.get_shared_services() for service_type in ["image", "volume"]: if service_type not in shared_services: service = 'glance' if service_type == "image" else 'cinder' BACKUP_DATABASES += (service, ) else: # Add additional databases for non-region configuration and for the # primary region in region deployments. BACKUP_DATABASES = REGION_LOCAL_DATABASES + REGION_SHARED_DATABASES # Add distributed cloud databases if tsconfig.distributed_cloud_role == \ sysinv_constants.DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER: BACKUP_DATABASES += ('dcmanager', 'dcorch') # We generate the tables to be skipped for each database # mentioned in BACKUP_DATABASES. We explicitly list # skip tables in DB_TABLE_SKIP_MAPPING BACKUP_DB_SKIP_TABLES = dict( map(lambda x: [x, DB_TABLE_SKIP_MAPPING.get(x, ())], BACKUP_DATABASES)) return BACKUP_DATABASES, BACKUP_DB_SKIP_TABLES def check_load_versions(archive, staging_dir): match = False try: member = archive.getmember('etc/build.info') archive.extract(member, path=staging_dir) match = filecmp.cmp('/etc/build.info', staging_dir + '/etc/build.info') shutil.rmtree(staging_dir + '/etc') except Exception as e: LOG.exception(e) raise RestoreFail("Unable to verify load version in backup file. " "Invalid backup file.") if not match: LOG.error("Load version mismatch.") raise RestoreFail("Load version of backup does not match the " "version of the installed load.") def get_subfunctions(filename): """ Retrieves the subfunctions from a platform.conf file. :param filename: file to retrieve subfunctions from :return: a list of the subfunctions or None if no subfunctions exist """ matchstr = 'subfunction=' with open(filename, 'r') as f: for line in f: if matchstr in line: parsed = line.split('=') return parsed[1].rstrip().split(",") return def check_load_subfunctions(archive, staging_dir): """ Verify that the subfunctions in the backup match the installed load. :param archive: backup archive :param staging_dir: staging directory :return: raises exception if the subfunctions do not match """ match = False backup_subfunctions = None try: member = archive.getmember('etc/platform/platform.conf') archive.extract(member, path=staging_dir) backup_subfunctions = get_subfunctions(staging_dir + '/etc/platform/platform.conf') shutil.rmtree(staging_dir + '/etc') if set(backup_subfunctions) ^ set(tsconfig.subfunctions): # The set of subfunctions do not match match = False else: match = True except Exception: LOG.exception("Unable to verify subfunctions in backup file") raise RestoreFail("Unable to verify subfunctions in backup file. " "Invalid backup file.") if not match: LOG.error("Subfunction mismatch - backup: %s, installed: %s" % (str(backup_subfunctions), str(tsconfig.subfunctions))) raise RestoreFail("Subfunctions in backup load (%s) do not match the " "subfunctions of the installed load (%s)." % (str(backup_subfunctions), str(tsconfig.subfunctions))) def file_exists_in_archive(archive, file_path): """ Check if file exists in archive """ try: archive.getmember(file_path) return True except KeyError: LOG.info("File %s is not in archive." % file_path) return False def filter_directory(archive, directory): for tarinfo in archive: if tarinfo.name.split('/')[0] == directory: yield tarinfo def backup_etc_size(): """ Backup etc size estimate """ try: total_size = utils.directory_get_size('/etc') return total_size except OSError: LOG.error("Failed to estimate backup etc size.") raise BackupFail("Failed to estimate backup etc size") def backup_etc(archive): """ Backup etc """ try: archive.add('/etc', arcname='etc') except tarfile.TarError: LOG.error("Failed to backup etc.") raise BackupFail("Failed to backup etc") def restore_etc_file(archive, dest_dir, etc_file): """ Restore etc file """ try: # Change the name of this file to remove the leading path member = archive.getmember('etc/' + etc_file) # Copy the member to avoid changing the name for future operations on # this member. temp_member = copy.copy(member) temp_member.name = os.path.basename(temp_member.name) archive.extract(temp_member, path=dest_dir) except tarfile.TarError: LOG.error("Failed to restore etc file.") raise RestoreFail("Failed to restore etc file") def restore_etc_ssl_dir(archive, configpath=constants.CONFIG_WORKDIR): """ Restore the etc SSL dir """ def filter_etc_ssl_private(members): for tarinfo in members: if 'etc/ssl/private' in tarinfo.name: yield tarinfo if file_exists_in_archive(archive, 'config/server-cert.pem'): restore_config_file( archive, configpath, 'server-cert.pem') if file_exists_in_archive(archive, 'etc/ssl/private'): # NOTE: This will include all TPM certificate files if TPM was # enabled on the backed up system. However in that case, this # restoration is only done for the first controller and TPM # will need to be reconfigured once duplex controller (if any) # is restored. archive.extractall(path='/', members=filter_etc_ssl_private(archive)) def restore_ceph_external_config_files(archive, staging_dir): # Restore ceph-config. if file_exists_in_archive(archive, "config/ceph-config"): restore_config_dir(archive, staging_dir, 'ceph-config', ceph_permdir) # Copy the file to /etc/ceph. # There might be no files to copy, so don't check the return code. cp_command = ('cp -Rp ' + os.path.join(ceph_permdir, '*') + ' /etc/ceph/') subprocess.call(cp_command, shell=True) def backup_config_size(config_permdir): """ Backup configuration size estimate """ try: return(utils.directory_get_size(config_permdir)) except OSError: LOG.error("Failed to estimate backup configuration size.") raise BackupFail("Failed to estimate backup configuration size") def backup_config(archive, config_permdir): """ Backup configuration """ try: # The config dir is versioned, but we're only grabbing the current # release archive.add(config_permdir, arcname='config') except tarfile.TarError: LOG.error("Failed to backup config.") raise BackupFail("Failed to backup configuration") def restore_config_file(archive, dest_dir, config_file): """ Restore configuration file """ try: # Change the name of this file to remove the leading path member = archive.getmember('config/' + config_file) # Copy the member to avoid changing the name for future operations on # this member. temp_member = copy.copy(member) temp_member.name = os.path.basename(temp_member.name) archive.extract(temp_member, path=dest_dir) except tarfile.TarError: LOG.error("Failed to restore config file %s." % config_file) raise RestoreFail("Failed to restore configuration") def restore_configuration(archive, staging_dir): """ Restore configuration """ try: os.makedirs(constants.CONFIG_WORKDIR, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) except OSError: LOG.error("Failed to create config directory: %s", constants.CONFIG_WORKDIR) raise RestoreFail("Failed to restore configuration files") # Restore cgcs_config file from original installation for historical # purposes. Not used to restore the system as the information in this # file is out of date (not updated after original installation). restore_config_file(archive, constants.CONFIG_WORKDIR, 'cgcs_config') # Restore platform.conf file and update as necessary. The file will be # created in a temporary location and then moved into place when it is # complete to prevent access to a partially created file. restore_etc_file(archive, staging_dir, 'platform/platform.conf') temp_platform_conf_file = os.path.join(tsconfig.PLATFORM_CONF_PATH, 'platform.conf.temp') shutil.copyfile(os.path.join(staging_dir, 'platform.conf'), temp_platform_conf_file) install_uuid = utils.get_install_uuid() for line in fileinput.FileInput(temp_platform_conf_file, inplace=1): if line.startswith("INSTALL_UUID="): # The INSTALL_UUID must be updated to match the new INSTALL_UUID # which was generated when this controller was installed prior to # doing the restore. print "INSTALL_UUID=%s" % install_uuid elif line.startswith("management_interface=") or \ line.startswith("oam_interface=") or \ line.startswith("infrastructure_interface=") or \ line.startswith("UUID="): # Strip out any entries that are host specific as the backup can # be done on either controller. The application of the # platform_conf manifest will add these back in. pass else: print line, fileinput.close() # Move updated platform.conf file into place. os.rename(temp_platform_conf_file, tsconfig.PLATFORM_CONF_FILE) # Kick tsconfig to reload the platform.conf file tsconfig._load() # Restore branding restore_config_dir(archive, staging_dir, 'branding', '/opt/branding/') # Restore banner customization restore_config_dir(archive, staging_dir, 'banner/etc', '/opt/banner') # Restore ssh configuration restore_config_dir(archive, staging_dir, 'ssh_config', constants.CONFIG_WORKDIR + '/ssh_config') # Configure hostname utils.configure_hostname('controller-0') # Restore hosts file restore_etc_file(archive, '/etc', 'hosts') restore_etc_file(archive, constants.CONFIG_WORKDIR, 'hosts') # Restore certificate files restore_etc_ssl_dir(archive) # Restore firewall rules file if it is in the archive if file_exists_in_archive(archive, 'config/iptables.rules'): restore_config_file( archive, constants.CONFIG_WORKDIR, 'iptables.rules') restore_etc_file(archive, tsconfig.PLATFORM_CONF_PATH, 'platform/iptables.rules') def filter_pxelinux(archive): for tarinfo in archive: if tarinfo.name.find('config/pxelinux.cfg') == 0: yield tarinfo def restore_dnsmasq(archive, config_permdir): """ Restore dnsmasq """ try: etc_files = ['hosts'] perm_files = ['hosts', 'dnsmasq.hosts', 'dnsmasq.leases', 'dnsmasq.addn_hosts'] for etc_file in etc_files: restore_config_file(archive, '/etc', etc_file) for perm_file in perm_files: restore_config_file(archive, config_permdir, perm_file) # Extract distributed cloud addn_hosts file if present in archive. if file_exists_in_archive( archive, 'config/dnsmasq.addn_hosts_dc'): restore_config_file(archive, config_permdir, 'dnsmasq.addn_hosts_dc') tmpdir = tempfile.mkdtemp(prefix="pxerestore_") archive.extractall(tmpdir, members=filter_pxelinux(archive)) if os.path.exists(tmpdir + '/config/pxelinux.cfg'): shutil.rmtree(config_permdir + 'pxelinux.cfg', ignore_errors=True) shutil.move(tmpdir + '/config/pxelinux.cfg', config_permdir) shutil.rmtree(tmpdir, ignore_errors=True) except (shutil.Error, subprocess.CalledProcessError, tarfile.TarError): LOG.error("Failed to restore dnsmasq config.") raise RestoreFail("Failed to restore dnsmasq files") def backup_puppet_data_size(puppet_permdir): """ Backup puppet data size estimate """ try: return(utils.directory_get_size(puppet_permdir)) except OSError: LOG.error("Failed to estimate backup puppet data size.") raise BackupFail("Failed to estimate backup puppet data size") def backup_puppet_data(archive, puppet_permdir): """ Backup puppet data """ try: # The puppet dir is versioned, but we're only grabbing the current # release archive.add(puppet_permdir, arcname='hieradata') except tarfile.TarError: LOG.error("Failed to backup puppet data.") raise BackupFail("Failed to backup puppet data") def restore_static_puppet_data(archive, puppet_workdir): """ Restore static puppet data """ try: member = archive.getmember('hieradata/static.yaml') archive.extract(member, path=os.path.dirname(puppet_workdir)) member = archive.getmember('hieradata/secure_static.yaml') archive.extract(member, path=os.path.dirname(puppet_workdir)) except tarfile.TarError: LOG.error("Failed to restore static puppet data.") raise RestoreFail("Failed to restore static puppet data") except OSError: pass def restore_puppet_data(archive, puppet_workdir): """ Restore puppet data """ try: archive.extractall( path=os.path.dirname(puppet_workdir), members=filter_directory(archive, os.path.basename(puppet_workdir))) except tarfile.TarError: LOG.error("Failed to restore puppet data.") raise RestoreFail("Failed to restore puppet data") except OSError: pass def backup_cinder_config(archive): """ Backup cinder configuration """ # If the iscsi target config file exists, add it to the archive # On setups without LVM backends this file is absent if os.path.exists(cinder_permdir + '/iscsi-target/saveconfig.json'): archive.add( cinder_permdir + '/iscsi-target/saveconfig.json', arcname='cinder/saveconfig.json') def restore_cinder_file(archive, dest_dir, cinder_file): """ Restore cinder file """ try: # Change the name of this file to remove the leading path member = archive.getmember('cinder/' + cinder_file) # Copy the member to avoid changing the name for future operations on # this member. temp_member = copy.copy(member) temp_member.name = os.path.basename(temp_member.name) archive.extract(temp_member, path=dest_dir) except tarfile.TarError: LOG.error("Failed to restore cinder file %s." % cinder_file) raise RestoreFail("Failed to restore configuration") def restore_cinder_config(archive): """Restore cinder config files""" # If the iscsi target config file is present in the archive, # restore it. if file_exists_in_archive(archive, 'cinder/saveconfig.json'): restore_cinder_file( archive, cinder_permdir + '/iscsi-target', 'saveconfig.json') # Also create a copy of the original file as the volume # restore procedure changes this file and breaks the # valid nova settings. shutil.copyfile( cinder_permdir + '/iscsi-target/saveconfig.json', cinder_permdir + '/iscsi-target/saveconfig.json.bck') def backup_cinder_size(cinder_permdir): """ Backup cinder size estimate """ try: if not os.path.exists( cinder_permdir + '/iscsi-target/saveconfig.json'): return 0 statinfo = os.stat(cinder_permdir + '/iscsi-target/saveconfig.json') return statinfo.st_size except OSError: LOG.error("Failed to estimate backup cinder size.") raise BackupFail("Failed to estimate backup cinder size") def backup_keyring_size(keyring_permdir): """ Backup keyring size estimate """ try: return(utils.directory_get_size(keyring_permdir)) except OSError: LOG.error("Failed to estimate backup keyring size.") raise BackupFail("Failed to estimate backup keyring size") def backup_keyring(archive, keyring_permdir): """ Backup keyring configuration """ try: archive.add(keyring_permdir, arcname='.keyring') except tarfile.TarError: LOG.error("Failed to backup keyring.") raise BackupFail("Failed to backup keyring configuration") def restore_keyring(archive, keyring_permdir): """ Restore keyring configuration """ try: shutil.rmtree(keyring_permdir, ignore_errors=False) members = filter_directory(archive, '.keyring') temp_members = list() # remove .keyring and .keyring/ from the member path since they are # extracted to keyring_permdir: /opt/platform/.keyring/release for m in members: temp_member = copy.copy(m) lst = temp_member.name.split('.keyring/') if len(lst) > 1: temp_member.name = lst[1] temp_members.append(temp_member) archive.extractall(path=keyring_permdir, members=temp_members) except (tarfile.TarError, shutil.Error): LOG.error("Failed to restore keyring.") shutil.rmtree(keyring_permdir, ignore_errors=True) raise RestoreFail("Failed to restore keyring configuration") def prefetch_keyring(archive): """ Prefetch keyring configuration for manifest use """ keyring_tmpdir = '/tmp/.keyring' python_keyring_tmpdir = '/tmp/python_keyring' try: shutil.rmtree(keyring_tmpdir, ignore_errors=True) shutil.rmtree(python_keyring_tmpdir, ignore_errors=True) archive.extractall( path=os.path.dirname(keyring_tmpdir), members=filter_directory(archive, os.path.basename(keyring_tmpdir))) shutil.move(keyring_tmpdir + '/python_keyring', python_keyring_tmpdir) except (tarfile.TarError, shutil.Error): LOG.error("Failed to restore keyring.") shutil.rmtree(keyring_tmpdir, ignore_errors=True) shutil.rmtree(python_keyring_tmpdir, ignore_errors=True) raise RestoreFail("Failed to restore keyring configuration") def cleanup_prefetched_keyring(): """ Cleanup fetched keyring """ try: keyring_tmpdir = '/tmp/.keyring' python_keyring_tmpdir = '/tmp/python_keyring' shutil.rmtree(keyring_tmpdir, ignore_errors=True) shutil.rmtree(python_keyring_tmpdir, ignore_errors=True) except shutil.Error: LOG.error("Failed to cleanup keyring.") raise RestoreFail("Failed to cleanup fetched keyring") def backup_ldap_size(): """ Backup ldap size estimate """ try: total_size = 0 proc = subprocess.Popen( ['slapcat -d 0 -F /etc/openldap/schema | wc -c'], shell=True, stdout=subprocess.PIPE) for line in proc.stdout: total_size = int(line) break proc.communicate() return total_size except subprocess.CalledProcessError: LOG.error("Failed to estimate backup ldap size.") raise BackupFail("Failed to estimate backup ldap size") def backup_ldap(archive, staging_dir): """ Backup ldap configuration """ try: ldap_staging_dir = staging_dir + '/ldap' os.mkdir(ldap_staging_dir, 0655) subprocess.check_call([ 'slapcat', '-d', '0', '-F', '/etc/openldap/schema', '-l', (ldap_staging_dir + '/ldap.db')], stdout=DEVNULL) archive.add(ldap_staging_dir + '/ldap.db', arcname='ldap.db') except (OSError, subprocess.CalledProcessError, tarfile.TarError): LOG.error("Failed to backup ldap database.") raise BackupFail("Failed to backup ldap configuration") def restore_ldap(archive, ldap_permdir, staging_dir): """ Restore ldap configuration """ try: ldap_staging_dir = staging_dir + '/ldap' archive.extract('ldap.db', path=ldap_staging_dir) utils.stop_lsb_service('openldap') subprocess.call(['rm', '-rf', ldap_permdir], stdout=DEVNULL) os.mkdir(ldap_permdir, 0o755) subprocess.check_call(['slapadd', '-F', '/etc/openldap/schema', '-l', ldap_staging_dir + '/ldap.db'], stdout=DEVNULL, stderr=DEVNULL) except (subprocess.CalledProcessError, OSError, tarfile.TarError): LOG.error("Failed to restore ldap database.") raise RestoreFail("Failed to restore ldap configuration") finally: utils.start_lsb_service('openldap') def backup_postgres_size(cinder_config=False): """ Backup postgres size estimate """ try: total_size = 0 # Backup roles, table spaces and schemas for databases. proc = subprocess.Popen([('sudo -u postgres pg_dumpall --clean ' + '--schema-only | wc -c')], shell=True, stdout=subprocess.PIPE, stderr=DEVNULL) for line in proc.stdout: total_size = int(line) break proc.communicate() # get backup database backup_databases, backup_db_skip_tables = get_backup_databases( cinder_config) # Backup data for databases. for _, db_elem in enumerate(backup_databases): db_cmd = 'sudo -u postgres pg_dump --format=plain --inserts ' db_cmd += '--disable-triggers --data-only %s ' % db_elem for _, table_elem in enumerate(backup_db_skip_tables[db_elem]): db_cmd += '--exclude-table=%s ' % table_elem db_cmd += '| wc -c' proc = subprocess.Popen([db_cmd], shell=True, stdout=subprocess.PIPE, stderr=DEVNULL) for line in proc.stdout: total_size += int(line) break proc.communicate() return total_size except subprocess.CalledProcessError: LOG.error("Failed to estimate backup database size.") raise BackupFail("Failed to estimate backup database size") def backup_postgres(archive, staging_dir, cinder_config=False): """ Backup postgres configuration """ try: postgres_staging_dir = staging_dir + '/postgres' os.mkdir(postgres_staging_dir, 0655) # Backup roles, table spaces and schemas for databases. subprocess.check_call([('sudo -u postgres pg_dumpall --clean ' + '--schema-only' + '> %s/%s' % (postgres_staging_dir, 'postgres.sql.config'))], shell=True, stderr=DEVNULL) # get backup database backup_databases, backup_db_skip_tables = get_backup_databases( cinder_config) # Backup data for databases. for _, db_elem in enumerate(backup_databases): db_cmd = 'sudo -u postgres pg_dump --format=plain --inserts ' db_cmd += '--disable-triggers --data-only %s ' % db_elem for _, table_elem in enumerate(backup_db_skip_tables[db_elem]): db_cmd += '--exclude-table=%s ' % table_elem db_cmd += '> %s/%s.sql.data' % (postgres_staging_dir, db_elem) subprocess.check_call([db_cmd], shell=True, stderr=DEVNULL) archive.add(postgres_staging_dir, arcname='postgres') except (OSError, subprocess.CalledProcessError, tarfile.TarError): LOG.error("Failed to backup postgres databases.") raise BackupFail("Failed to backup database configuration") def restore_postgres(archive, staging_dir): """ Restore postgres configuration """ try: postgres_staging_dir = staging_dir + '/postgres' archive.extractall(path=staging_dir, members=filter_directory(archive, 'postgres')) utils.start_service("postgresql") # Restore roles, table spaces and schemas for databases. subprocess.check_call(["sudo", "-u", "postgres", "psql", "-f", postgres_staging_dir + '/postgres.sql.config', "postgres"], stdout=DEVNULL, stderr=DEVNULL) # Restore data for databases. for data in glob.glob(postgres_staging_dir + '/*.sql.data'): db_elem = data.split('/')[-1].split('.')[0] subprocess.check_call(["sudo", "-u", "postgres", "psql", "-f", data, db_elem], stdout=DEVNULL) except (OSError, subprocess.CalledProcessError, tarfile.TarError) as e: LOG.error("Failed to restore postgres databases. Error: %s", e) raise RestoreFail("Failed to restore database configuration") finally: utils.stop_service('postgresql') def backup_ceilometer_size(ceilometer_permdir): """ Backup ceilometer size estimate """ try: statinfo = os.stat(ceilometer_permdir + '/pipeline.yaml') return statinfo.st_size except OSError: LOG.error("Failed to estimate backup ceilometer size.") raise BackupFail("Failed to estimate backup ceilometer size") def backup_ceilometer(archive, ceilometer_permdir): """ Backup ceilometer """ try: archive.add(ceilometer_permdir + '/pipeline.yaml', arcname='pipeline.yaml') except tarfile.TarError: LOG.error("Failed to backup ceilometer.") raise BackupFail("Failed to backup ceilometer") def restore_ceilometer(archive, ceilometer_permdir): """ Restore ceilometer """ try: archive.extract('pipeline.yaml', path=ceilometer_permdir) except tarfile.TarError: LOG.error("Failed to restore ceilometer") raise RestoreFail("Failed to restore ceilometer") def filter_config_dir(archive, directory): for tarinfo in archive: if tarinfo.name.find('config/' + directory) == 0: yield tarinfo def restore_config_dir(archive, staging_dir, config_dir, dest_dir): """ Restore configuration directory if it exists """ try: archive.extractall(staging_dir, members=filter_config_dir(archive, config_dir)) # Copy files from backup to dest dir if (os.path.exists(staging_dir + '/config/' + config_dir) and os.listdir(staging_dir + '/config/' + config_dir)): subprocess.call(["mkdir", "-p", dest_dir]) try: for f in glob.glob( staging_dir + '/config/' + config_dir + '/*'): subprocess.check_call(["cp", "-p", f, dest_dir]) except IOError: LOG.warning("Failed to copy %s files" % config_dir) except (subprocess.CalledProcessError, tarfile.TarError): LOG.info("No custom %s config was found during restore." % config_dir) def backup_std_dir_size(directory): """ Backup standard directory size estimate """ try: return utils.directory_get_size(directory) except OSError: LOG.error("Failed to estimate backup size for %s" % directory) raise BackupFail("Failed to estimate backup size for %s" % directory) def backup_std_dir(archive, directory): """ Backup standard directory """ try: archive.add(directory, arcname=os.path.basename(directory)) except tarfile.TarError: LOG.error("Failed to backup %s" % directory) raise BackupFail("Failed to backup %s" % directory) def restore_std_dir(archive, directory): """ Restore standard directory """ try: shutil.rmtree(directory, ignore_errors=True) # Verify that archive contains this directory try: archive.getmember(os.path.basename(directory)) except KeyError: LOG.error("Archive does not contain directory %s" % directory) raise RestoreFail("Invalid backup file - missing directory %s" % directory) archive.extractall( path=os.path.dirname(directory), members=filter_directory(archive, os.path.basename(directory))) except (shutil.Error, tarfile.TarError): LOG.error("Failed to restore %s" % directory) raise RestoreFail("Failed to restore %s" % directory) def configure_loopback_interface(archive): """ Restore and apply configuration for loopback interface """ utils.remove_interface_config_files() restore_etc_file( archive, utils.NETWORK_SCRIPTS_PATH, 'sysconfig/network-scripts/' + utils.NETWORK_SCRIPTS_LOOPBACK) utils.restart_networking() def backup_ceph_crush_map(archive, staging_dir): """ Backup ceph crush map """ try: ceph_staging_dir = os.path.join(staging_dir, 'ceph') os.mkdir(ceph_staging_dir, 0655) crushmap_file = os.path.join(ceph_staging_dir, sysinv_constants.CEPH_CRUSH_MAP_BACKUP) subprocess.check_call(['ceph', 'osd', 'getcrushmap', '-o', crushmap_file], stdout=DEVNULL, stderr=DEVNULL) archive.add(crushmap_file, arcname='ceph/' + sysinv_constants.CEPH_CRUSH_MAP_BACKUP) except Exception as e: LOG.error('Failed to backup ceph crush map. Reason: {}'.format(e)) raise BackupFail('Failed to backup ceph crush map') def restore_ceph_crush_map(archive): """ Restore ceph crush map """ if not file_exists_in_archive(archive, 'ceph/' + sysinv_constants.CEPH_CRUSH_MAP_BACKUP): return try: crush_map_file = 'ceph/' + sysinv_constants.CEPH_CRUSH_MAP_BACKUP if file_exists_in_archive(archive, crush_map_file): member = archive.getmember(crush_map_file) # Copy the member to avoid changing the name for future # operations on this member. temp_member = copy.copy(member) temp_member.name = os.path.basename(temp_member.name) archive.extract(temp_member, path=sysinv_constants.SYSINV_CONFIG_PATH) except tarfile.TarError as e: LOG.error('Failed to restore crush map file. Reason: {}'.format(e)) raise RestoreFail('Failed to restore crush map file') def check_size(archive_dir, cinder_config): """Check if there is enough space to create backup.""" backup_overhead_bytes = 1024 ** 3 # extra GB for staging directory # backup_cinder_size() will return 0 if cinder/lvm is not configured, # So no need to add extra check here. backup_size = (backup_overhead_bytes + backup_etc_size() + backup_config_size(tsconfig.CONFIG_PATH) + backup_puppet_data_size(constants.HIERADATA_PERMDIR) + backup_keyring_size(keyring_permdir) + backup_ldap_size() + backup_postgres_size(cinder_config) + backup_ceilometer_size(ceilometer_permdir) + backup_std_dir_size(glance_permdir) + backup_std_dir_size(home_permdir) + backup_std_dir_size(patching_permdir) + backup_std_dir_size(patching_repo_permdir) + backup_std_dir_size(extension_permdir) + backup_std_dir_size(patch_vault_permdir) + backup_cinder_size(cinder_permdir) ) archive_dir_free_space = \ utils.filesystem_get_free_space(archive_dir) if backup_size > archive_dir_free_space: print ("Archive directory (%s) does not have enough free " "space (%s), estimated backup size is %s." % (archive_dir, utils.print_bytes(archive_dir_free_space), utils.print_bytes(backup_size))) raise BackupFail("Not enough free space for backup.") def backup(backup_name, archive_dir, clone=False): """Backup configuration.""" if not os.path.isdir(archive_dir): raise BackupFail("Archive directory (%s) not found." % archive_dir) if not utils.is_active("management-ip"): raise BackupFail( "Backups can only be performed from the active controller.") if os.path.isfile(backup_in_progress): raise BackupFail("Backup already in progress.") else: open(backup_in_progress, 'w') fmApi = fm_api.FaultAPIs() entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, sysinv_constants.CONTROLLER_HOSTNAME) fault = fm_api.Fault(alarm_id=fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS, alarm_state=fm_constants.FM_ALARM_STATE_SET, entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, entity_instance_id=entity_instance_id, severity=fm_constants.FM_ALARM_SEVERITY_MINOR, reason_text=("System Backup in progress."), # operational alarm_type=fm_constants.FM_ALARM_TYPE_7, # congestion probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_8, proposed_repair_action=("No action required."), service_affecting=False) fmApi.set_fault(fault) cinder_config = False backend_services = sysinv.get_storage_backend_services() for services in backend_services.values(): if (services is not None and services.find(sysinv_constants.SB_SVC_CINDER) != -1): cinder_config = True break staging_dir = None system_tar_path = None images_tar_path = None warnings = '' try: os.chdir('/') if not clone: check_size(archive_dir, cinder_config) print ("\nPerforming backup (this might take several minutes):") staging_dir = tempfile.mkdtemp(dir=archive_dir) system_tar_path = os.path.join(archive_dir, backup_name + '_system.tgz') system_archive = tarfile.open(system_tar_path, "w:gz") images_tar_path = os.path.join(archive_dir, backup_name + '_images.tgz') step = 1 total_steps = 15 if sysinv_constants.SB_TYPE_CEPH in backend_services.keys(): total_steps += 1 if tsconfig.region_config == "yes": # We don't run the glance backup step total_steps -= 1 # Step 1: Backup etc backup_etc(system_archive) utils.progress(total_steps, step, 'backup etc', 'DONE') step += 1 # Step 2: Backup configuration backup_config(system_archive, tsconfig.CONFIG_PATH) utils.progress(total_steps, step, 'backup configuration', 'DONE') step += 1 # Step 3: Backup puppet data backup_puppet_data(system_archive, constants.HIERADATA_PERMDIR) utils.progress(total_steps, step, 'backup puppet data', 'DONE') step += 1 # Step 4: Backup keyring backup_keyring(system_archive, keyring_permdir) utils.progress(total_steps, step, 'backup keyring', 'DONE') step += 1 # Step 5: Backup ldap backup_ldap(system_archive, staging_dir) utils.progress(total_steps, step, 'backup ldap', 'DONE') step += 1 # Step 6: Backup postgres backup_postgres(system_archive, staging_dir, cinder_config) utils.progress(total_steps, step, 'backup postgres', 'DONE') step += 1 # Step 7: Backup ceilometer backup_ceilometer(system_archive, ceilometer_permdir) utils.progress(total_steps, step, 'backup ceilometer', 'DONE') step += 1 if tsconfig.region_config != "yes": # Step 8: Backup glance images_archive = tarfile.open(images_tar_path, "w:gz") backup_std_dir(images_archive, glance_permdir) images_archive.close() utils.progress(total_steps, step, 'backup glance', 'DONE') step += 1 # Step 9: Backup home backup_std_dir(system_archive, home_permdir) utils.progress(total_steps, step, 'backup home directory', 'DONE') step += 1 # Step 10: Backup patching if not clone: backup_std_dir(system_archive, patching_permdir) utils.progress(total_steps, step, 'backup patching', 'DONE') step += 1 # Step 11: Backup patching repo if not clone: backup_std_dir(system_archive, patching_repo_permdir) utils.progress(total_steps, step, 'backup patching repo', 'DONE') step += 1 # Step 12: Backup extension filesystem backup_std_dir(system_archive, extension_permdir) utils.progress(total_steps, step, 'backup extension filesystem ' 'directory', 'DONE') step += 1 # Step 13: Backup patch-vault filesystem if os.path.exists(patch_vault_permdir): backup_std_dir(system_archive, patch_vault_permdir) utils.progress(total_steps, step, 'backup patch-vault filesystem ' 'directory', 'DONE') step += 1 # Step 14: Backup cinder config/LVM config # No need to add extra check here as if cinder/LVM is not configured, # ../iscsi-target/saveconfig.json will be absent, so this function will # do nothing. backup_cinder_config(system_archive) utils.progress(total_steps, step, 'backup cinder/LVM config', 'DONE') step += 1 # Step 15: Backup ceph crush map if sysinv_constants.SB_TYPE_CEPH in backend_services.keys(): backup_ceph_crush_map(system_archive, staging_dir) utils.progress(total_steps, step, 'backup ceph crush map', 'DONE') step += 1 # Step 16: Create archive system_archive.close() utils.progress(total_steps, step, 'create archive', 'DONE') step += 1 except Exception: if system_tar_path and os.path.isfile(system_tar_path): os.remove(system_tar_path) if images_tar_path and os.path.isfile(images_tar_path): os.remove(images_tar_path) raise finally: fmApi.clear_fault(fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS, entity_instance_id) os.remove(backup_in_progress) if staging_dir: shutil.rmtree(staging_dir, ignore_errors=True) system_msg = "System backup file created" images_msg = "Images backup file created" if not clone: system_msg += ": " + system_tar_path images_msg += ": " + images_tar_path print system_msg if tsconfig.region_config != "yes": print images_msg if warnings != '': print "WARNING: The following problems occurred:" print textwrap.fill(warnings, 80) def create_restore_runtime_config(filename): """ Create any runtime parameters needed for Restore.""" config = {} # We need to re-enable Openstack password rules, which # were previously disabled while the controller manifests # were applying during a Restore config['classes'] = ['keystone::security_compliance'] utils.create_manifest_runtime_config(filename, config) def overwrite_iscsi_target_config(): """ Overwrite the current iscsi target config file with the one from the backup archive. """ if not os.path.exists( cinder_permdir + '/iscsi-target/saveconfig.json'): LOG.info("Restore: Missing current saveconfig.json file") return if not os.path.exists( cinder_permdir + '/iscsi-target/saveconfig.json.bck'): LOG.info("Restore: Missing backup saveconfig.json file") return os.remove(cinder_permdir + '/iscsi-target/saveconfig.json') shutil.copyfile( cinder_permdir + '/iscsi-target/saveconfig.json.bck', cinder_permdir + '/iscsi-target/saveconfig.json') os.remove(cinder_permdir + '/iscsi-target/saveconfig.json.bck') subprocess.call(["targetctl", "restore"], stdout=DEVNULL, stderr=DEVNULL) def restore_complete(): """ Restore proper ISCSI configuration file after cinder restore. Enable compute functionality for AIO system. :return: True if compute-config-complete is executed """ if utils.get_system_type() == sysinv_constants.TIS_AIO_BUILD: if not os.path.isfile(restore_system_ready): print textwrap.fill( "--restore-complete can only be run " "after restore-system has completed " "successfully", 80 ) return False # The iscsi target config file must be overwritten with the # original file from the backup archive. # This is due to the cinder restore process actually changing # this file. These changes cause VMs that were present at # backup time to not boot up properly anymore. # The original icsci config file has the proper settings so # we use use that. overwrite_iscsi_target_config() print ("\nApplying compute manifests for %s. " % (utils.get_controller_hostname())) print ("Node will reboot on completion.") sysinv.do_compute_config_complete(utils.get_controller_hostname()) # show in-progress log on console every 30 seconds # until self reboot or timeout os.remove(restore_system_ready) time.sleep(30) for i in range(1, 10): print("compute manifest apply in progress ... ") time.sleep(30) raise RestoreFail("Timeout running compute manifests, " "reboot did not occur") else: if not os.path.isfile(restore_system_ready): print textwrap.fill( "--restore-complete can only be run " "after restore-system has completed " "successfully", 80 ) return False overwrite_iscsi_target_config() os.remove(restore_system_ready) return True def restore_system(backup_file, include_storage_reinstall=False, clone=False): """Restoring system configuration.""" if (os.path.exists(constants.CGCS_CONFIG_FILE) or os.path.exists(tsconfig.CONFIG_PATH) or os.path.exists(constants.INITIAL_CONFIG_COMPLETE_FILE)): print textwrap.fill( "Configuration has already been done. " "A system restore operation can only be done " "immediately after the load has been installed.", 80) print raise RestoreFail("System configuration already completed") if not os.path.isabs(backup_file): raise RestoreFail("Backup file (%s) not found. Full path is " "required." % backup_file) if os.path.isfile(restore_in_progress): raise RestoreFail("Restore already in progress.") else: open(restore_in_progress, 'w') # Add newline to console log for install-clone scenario newline = clone staging_dir = None try: try: with open(os.devnull, "w") as fnull: subprocess.check_call(["vgdisplay", "cgts-vg"], stdout=fnull, stderr=fnull) except subprocess.CalledProcessError: LOG.error("The cgts-vg volume group was not found") raise RestoreFail("Volume groups not configured") print "\nRestoring system (this will take several minutes):" # Use /scratch for the staging dir for now, # until /opt/backups is available staging_dir = tempfile.mkdtemp(dir='/scratch') # Permission change required or postgres restore fails subprocess.call(['chmod', 'a+rx', staging_dir], stdout=DEVNULL) os.chdir('/') step = 1 total_steps = 24 # Step 1: Open archive and verify installed load matches backup try: archive = tarfile.open(backup_file) except tarfile.TarError as e: LOG.exception(e) raise RestoreFail("Error opening backup file. Invalid backup " "file.") check_load_versions(archive, staging_dir) check_load_subfunctions(archive, staging_dir) utils.progress(total_steps, step, 'open archive', 'DONE', newline) step += 1 # Patching is potentially a multi-phase step. # If the controller is impacted by patches from the backup, # it must be rebooted before continuing the restore. # If this is the second pass through, we can skip over this. if not os.path.isfile(restore_patching_complete) and not clone: # Step 2: Restore patching restore_std_dir(archive, patching_permdir) utils.progress(total_steps, step, 'restore patching', 'DONE', newline) step += 1 # Step 3: Restore patching repo restore_std_dir(archive, patching_repo_permdir) utils.progress(total_steps, step, 'restore patching repo', 'DONE', newline) step += 1 # Step 4: Apply patches try: subprocess.check_output(["sw-patch", "install-local"]) except subprocess.CalledProcessError: LOG.error("Failed to install patches") raise RestoreFail("Failed to install patches") utils.progress(total_steps, step, 'install patches', 'DONE', newline) step += 1 open(restore_patching_complete, 'w') # If the controller was impacted by patches, we need to reboot. if os.path.isfile(node_is_patched): if not clone: print ("\nThis controller has been patched. " + "A reboot is required.") print ("After the reboot is complete, " + "re-execute the restore command.") while True: user_input = input( "Enter 'reboot' to reboot controller: ") if user_input == 'reboot': break LOG.info("This controller has been patched. Rebooting now") print("\nThis controller has been patched. Rebooting now\n\n") time.sleep(5) os.remove(restore_in_progress) if staging_dir: shutil.rmtree(staging_dir, ignore_errors=True) subprocess.call("reboot") else: # We need to restart the patch controller and agent, since # we setup the repo and patch store outside its control with open(os.devnull, "w") as devnull: subprocess.call( ["systemctl", "restart", "sw-patch-controller-daemon.service"], stdout=devnull, stderr=devnull) subprocess.call( ["systemctl", "restart", "sw-patch-agent.service"], stdout=devnull, stderr=devnull) if clone: # No patches were applied, return to cloning code # to run validation code. return RESTORE_RERUN_REQUIRED else: # Add the skipped steps step += 3 if os.path.isfile(node_is_patched): # If we get here, it means the node was patched by the user # AFTER the restore applied patches and rebooted, but didn't # reboot. # This means the patch lineup no longer matches what's in the # backup, but we can't (and probably shouldn't) prevent that. # However, since this will ultimately cause the node to fail # the goenabled step, we can fail immediately and force the # user to reboot. print ("\nThis controller has been patched, but not rebooted.") print ("Please reboot before continuing the restore process.") raise RestoreFail("Controller node patched without rebooting") # Flag can now be cleared if os.path.exists(restore_patching_complete): os.remove(restore_patching_complete) # Prefetch keyring prefetch_keyring(archive) # Step 5: Restore configuration restore_configuration(archive, staging_dir) # In AIO SX systems, the loopback interface is used as the management # interface. However, the application of the interface manifest will # not configure the necessary addresses on the loopback interface (see # apply_network_config.sh for details). So, we need to configure the # loopback interface here. if tsconfig.system_mode == sysinv_constants.SYSTEM_MODE_SIMPLEX: configure_loopback_interface(archive) # Write the simplex flag utils.write_simplex_flag() utils.progress(total_steps, step, 'restore configuration', 'DONE', newline) step += 1 # Step 6: Apply restore bootstrap manifest controller_0_address = utils.get_address_from_hosts_file( 'controller-0') restore_static_puppet_data(archive, constants.HIERADATA_WORKDIR) try: utils.apply_manifest(controller_0_address, sysinv_constants.CONTROLLER, 'bootstrap', constants.HIERADATA_WORKDIR) except Exception as e: LOG.exception(e) raise RestoreFail( 'Failed to apply bootstrap manifest. ' 'See /var/log/puppet/latest/puppet.log for details.') utils.progress(total_steps, step, 'apply bootstrap manifest', 'DONE', newline) step += 1 # Step 7: Restore puppet data restore_puppet_data(archive, constants.HIERADATA_WORKDIR) utils.progress(total_steps, step, 'restore puppet data', 'DONE', newline) step += 1 # Step 8: Persist configuration utils.persist_config() utils.progress(total_steps, step, 'persist configuration', 'DONE', newline) step += 1 # Step 9: Apply controller manifest try: utils.apply_manifest(controller_0_address, sysinv_constants.CONTROLLER, 'controller', constants.HIERADATA_PERMDIR) except Exception as e: LOG.exception(e) raise RestoreFail( 'Failed to apply controller manifest. ' 'See /var/log/puppet/latest/puppet.log for details.') utils.progress(total_steps, step, 'apply controller manifest', 'DONE', newline) step += 1 # Step 10: Apply runtime controller manifests restore_filename = os.path.join(staging_dir, 'restore.yaml') create_restore_runtime_config(restore_filename) try: utils.apply_manifest(controller_0_address, sysinv_constants.CONTROLLER, 'runtime', constants.HIERADATA_PERMDIR, runtime_filename=restore_filename) except Exception as e: LOG.exception(e) raise RestoreFail( 'Failed to apply runtime controller manifest. ' 'See /var/log/puppet/latest/puppet.log for details.') utils.progress(total_steps, step, 'apply runtime controller manifest', 'DONE', newline) step += 1 # Move the staging dir under /opt/backups, now that it's setup shutil.rmtree(staging_dir, ignore_errors=True) staging_dir = tempfile.mkdtemp(dir=constants.BACKUPS_PATH) # Permission change required or postgres restore fails subprocess.call(['chmod', 'a+rx', staging_dir], stdout=DEVNULL) # Step 11: Restore cinder config file restore_cinder_config(archive) utils.progress(total_steps, step, 'restore cinder config', 'DONE', newline) step += 1 # Step 12: Apply banner customization utils.apply_banner_customization() utils.progress(total_steps, step, 'apply banner customization', 'DONE', newline) step += 1 # Step 13: Restore dnsmasq and pxeboot config restore_dnsmasq(archive, tsconfig.CONFIG_PATH) utils.progress(total_steps, step, 'restore dnsmasq', 'DONE', newline) step += 1 # Step 14: Restore keyring restore_keyring(archive, keyring_permdir) utils.progress(total_steps, step, 'restore keyring', 'DONE', newline) step += 1 # Step 15: Restore ldap restore_ldap(archive, ldap_permdir, staging_dir) utils.progress(total_steps, step, 'restore ldap', 'DONE', newline) step += 1 # Step 16: Restore postgres restore_postgres(archive, staging_dir) utils.progress(total_steps, step, 'restore postgres', 'DONE', newline) step += 1 # Step 17: Restore ceilometer restore_ceilometer(archive, ceilometer_permdir) utils.progress(total_steps, step, 'restore ceilometer', 'DONE', newline) step += 1 # Step 18: Restore ceph crush map restore_ceph_crush_map(archive) utils.progress(total_steps, step, 'restore ceph crush map', 'DONE', newline) step += 1 # Step 19: Restore home restore_std_dir(archive, home_permdir) utils.progress(total_steps, step, 'restore home directory', 'DONE', newline) step += 1 # Step 20: Restore extension filesystem restore_std_dir(archive, extension_permdir) utils.progress(total_steps, step, 'restore extension filesystem ' 'directory', 'DONE', newline) step += 1 # Step 21: Restore patch-vault filesystem if file_exists_in_archive(archive, os.path.basename(patch_vault_permdir)): restore_std_dir(archive, patch_vault_permdir) utils.progress(total_steps, step, 'restore patch-vault filesystem ' 'directory', 'DONE', newline) step += 1 # Step 22: Restore external ceph configuration files. restore_ceph_external_config_files(archive, staging_dir) utils.progress(total_steps, step, 'restore CEPH external config', 'DONE', newline) step += 1 # Step 23: Shutdown file systems archive.close() shutil.rmtree(staging_dir, ignore_errors=True) utils.shutdown_file_systems() utils.progress(total_steps, step, 'shutdown file systems', 'DONE', newline) step += 1 # Step 24: Recover services utils.mtce_restart() utils.mark_config_complete() time.sleep(120) for service in ['sysinv-conductor', 'sysinv-inv']: if not utils.wait_sm_service(service): raise RestoreFail("Services have failed to initialize.") utils.progress(total_steps, step, 'recover services', 'DONE', newline) step += 1 if tsconfig.system_mode != sysinv_constants.SYSTEM_MODE_SIMPLEX: print "\nRestoring node states (this will take several minutes):" backend_services = sysinv.get_storage_backend_services() with openstack.OpenStack() as client: # On ceph setups storage nodes take about 90 seconds # to become locked. Setting the timeout to 120 seconds # for such setups lock_timeout = 60 if sysinv_constants.SB_TYPE_CEPH in backend_services.keys(): lock_timeout = 120 failed_lock_host = False skip_hosts = ['controller-0'] if not include_storage_reinstall: storage_hosts = \ sysinv.get_hosts(client.admin_token, client.conf['region_name'], personality='storage') if storage_hosts: install_uuid = utils.get_install_uuid() for h in storage_hosts: skip_hosts.append(h.name) # Update install_uuid on the storage node client.sysinv.ihost.update_install_uuid( h.uuid, install_uuid) skip_hosts_count = len(skip_hosts) # Wait for nodes to be identified as disabled before attempting # to lock hosts. Even if after 3 minute nodes are still not # identified as disabled, we still continue the restore. if not client.wait_for_hosts_disabled( exempt_hostnames=skip_hosts, timeout=180): LOG.info("At least one node is not in a disabling state. " "Continuing.") print "\nLocking nodes:" try: failed_hosts = client.lock_hosts(skip_hosts, utils.progress, timeout=lock_timeout) # Don't power off nodes that could not be locked if len(failed_hosts) > 0: skip_hosts.append(failed_hosts) except (KeystoneFail, SysInvFail) as e: LOG.exception(e) failed_lock_host = True if not failed_lock_host: print "\nPowering-off nodes:" try: client.power_off_hosts(skip_hosts, utils.progress, timeout=60) except (KeystoneFail, SysInvFail) as e: LOG.exception(e) # this is somehow expected if failed_lock_host or len(skip_hosts) > skip_hosts_count: if include_storage_reinstall: print textwrap.fill( "Failed to lock at least one node. " + "Please lock the unlocked nodes manually.", 80 ) else: print textwrap.fill( "Failed to lock at least one node. " + "Please lock the unlocked controller-1 or " + "compute nodes manually.", 80 ) if not clone: print textwrap.fill( "Before continuing to the next step in the restore, " + "please ensure all nodes other than controller-0 " + "and storage nodes, if they are not being " + "reinstalled, are powered off. Please refer to the " + "system administration guide for more details.", 80 ) finally: os.remove(restore_in_progress) if staging_dir: shutil.rmtree(staging_dir, ignore_errors=True) cleanup_prefetched_keyring() fmApi = fm_api.FaultAPIs() entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, sysinv_constants.CONTROLLER_HOSTNAME) fault = fm_api.Fault( alarm_id=fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS, alarm_state=fm_constants.FM_ALARM_STATE_MSG, entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, entity_instance_id=entity_instance_id, severity=fm_constants.FM_ALARM_SEVERITY_MINOR, reason_text=("System Restore complete."), # other alarm_type=fm_constants.FM_ALARM_TYPE_0, # unknown probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN, proposed_repair_action=(""), service_affecting=False) fmApi.set_fault(fault) # Mark system restore as complete if (utils.get_controller_hostname() == sysinv_constants.CONTROLLER_0_HOSTNAME): # Create the flag file that permits the # restore_complete command option. utils.touch(restore_system_ready) return RESTORE_COMPLETE def restore_images(backup_file, clone=False): """Restoring images.""" if not os.path.exists(constants.INITIAL_CONFIG_COMPLETE_FILE): print textwrap.fill( "System restore has not been done. " "An image restore operation can only be done after " "the system restore has been completed.", 80) print raise RestoreFail("System restore required") if not os.path.isabs(backup_file): raise RestoreFail("Backup file (%s) not found. Full path is " "required." % backup_file) if os.path.isfile(restore_in_progress): raise RestoreFail("Restore already in progress.") else: open(restore_in_progress, 'w') # Add newline to console log for install-clone scenario newline = clone try: print "\nRestoring images (this will take several minutes):" os.chdir('/') step = 1 total_steps = 2 # Step 1: Open archive try: archive = tarfile.open(backup_file) except tarfile.TarError as e: LOG.exception(e) raise RestoreFail("Error opening backup file. Invalid backup " "file.") utils.progress(total_steps, step, 'open archive', 'DONE', newline) step += 1 # Step 2: Restore glance restore_std_dir(archive, glance_permdir) utils.progress(total_steps, step, 'restore glance', 'DONE', newline) step += 1 archive.close() finally: os.remove(restore_in_progress)