config/controllerconfig/controllerconfig/controllerconfig/backup_restore.py

1753 lines
65 KiB
Python

#
# Copyright (c) 2014-2017 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
"""
Backup & Restore
"""
import copy
import filecmp
import fileinput
import os
import glob
import shutil
import stat
import subprocess
import tarfile
import tempfile
import textwrap
import time
from fm_api import constants as fm_constants
from fm_api import fm_api
from sysinv.common import constants as sysinv_constants
from common import log
from common import constants
from common.exceptions import BackupFail, RestoreFail
from common.exceptions import KeystoneFail, SysInvFail
import openstack
import tsconfig.tsconfig as tsconfig
import utils
import sysinv_api as sysinv
from six.moves import input
LOG = log.get_logger(__name__)
DEVNULL = open(os.devnull, 'w')
RESTORE_COMPLETE = "restore-complete"
RESTORE_RERUN_REQUIRED = "restore-rerun-required"
# Backup/restore related constants
backup_in_progress = tsconfig.BACKUP_IN_PROGRESS_FLAG
restore_in_progress = tsconfig.RESTORE_IN_PROGRESS_FLAG
restore_system_ready = tsconfig.RESTORE_SYSTEM_FLAG
restore_patching_complete = '/etc/platform/.restore_patching_complete'
node_is_patched = '/var/run/node_is_patched'
keyring_permdir = os.path.join('/opt/platform/.keyring', tsconfig.SW_VERSION)
ceph_permdir = os.path.join(tsconfig.CONFIG_PATH, 'ceph-config')
ldap_permdir = '/var/lib/openldap-data'
ceilometer_permdir = '/opt/cgcs/ceilometer/' + tsconfig.SW_VERSION
glance_permdir = '/opt/cgcs/glance'
patching_permdir = '/opt/patching'
patching_repo_permdir = '/www/pages/updates'
home_permdir = '/home'
cinder_permdir = '/opt/cgcs/cinder'
extension_permdir = '/opt/extension'
patch_vault_permdir = '/opt/patch-vault'
def get_backup_databases(cinder_config=False):
"""
Retrieve database lists for backup.
:return: backup_databases and backup_database_skip_tables
"""
# Databases common to all configurations
REGION_LOCAL_DATABASES = ('postgres', 'template1', 'nova', 'sysinv',
'neutron', 'heat', 'nova_api',
'aodh', 'murano', 'magnum', 'panko', 'ironic',
'nova_cell0', 'gnocchi', 'fm', 'barbican')
REGION_SHARED_DATABASES = ('glance', 'keystone')
if cinder_config:
REGION_SHARED_DATABASES += ('cinder', )
# Indicates which tables have to be dropped for a certain database.
DB_TABLE_SKIP_MAPPING = {
'fm': ('alarm',),
'gnocchi': ('metric', 'resource'),
'dcorch': ('orch_job',
'orch_request',
'resource',
'subcloud_resource'), }
if tsconfig.region_config == 'yes':
BACKUP_DATABASES = REGION_LOCAL_DATABASES
# Add databases which are optional in secondary regions(and subclouds)
shared_services = sysinv.get_shared_services()
for service_type in ["image", "volume"]:
if service_type not in shared_services:
service = 'glance' if service_type == "image" else 'cinder'
BACKUP_DATABASES += (service, )
else:
# Add additional databases for non-region configuration and for the
# primary region in region deployments.
BACKUP_DATABASES = REGION_LOCAL_DATABASES + REGION_SHARED_DATABASES
# Add distributed cloud databases
if tsconfig.distributed_cloud_role == \
sysinv_constants.DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER:
BACKUP_DATABASES += ('dcmanager', 'dcorch')
# We generate the tables to be skipped for each database
# mentioned in BACKUP_DATABASES. We explicitly list
# skip tables in DB_TABLE_SKIP_MAPPING
BACKUP_DB_SKIP_TABLES = dict(
map(lambda x: [x, DB_TABLE_SKIP_MAPPING.get(x, ())],
BACKUP_DATABASES))
return BACKUP_DATABASES, BACKUP_DB_SKIP_TABLES
def check_load_versions(archive, staging_dir):
match = False
try:
member = archive.getmember('etc/build.info')
archive.extract(member, path=staging_dir)
match = filecmp.cmp('/etc/build.info', staging_dir + '/etc/build.info')
shutil.rmtree(staging_dir + '/etc')
except Exception as e:
LOG.exception(e)
raise RestoreFail("Unable to verify load version in backup file. "
"Invalid backup file.")
if not match:
LOG.error("Load version mismatch.")
raise RestoreFail("Load version of backup does not match the "
"version of the installed load.")
def get_subfunctions(filename):
"""
Retrieves the subfunctions from a platform.conf file.
:param filename: file to retrieve subfunctions from
:return: a list of the subfunctions or None if no subfunctions exist
"""
matchstr = 'subfunction='
with open(filename, 'r') as f:
for line in f:
if matchstr in line:
parsed = line.split('=')
return parsed[1].rstrip().split(",")
return
def check_load_subfunctions(archive, staging_dir):
"""
Verify that the subfunctions in the backup match the installed load.
:param archive: backup archive
:param staging_dir: staging directory
:return: raises exception if the subfunctions do not match
"""
match = False
backup_subfunctions = None
try:
member = archive.getmember('etc/platform/platform.conf')
archive.extract(member, path=staging_dir)
backup_subfunctions = get_subfunctions(staging_dir +
'/etc/platform/platform.conf')
shutil.rmtree(staging_dir + '/etc')
if set(backup_subfunctions) ^ set(tsconfig.subfunctions):
# The set of subfunctions do not match
match = False
else:
match = True
except Exception:
LOG.exception("Unable to verify subfunctions in backup file")
raise RestoreFail("Unable to verify subfunctions in backup file. "
"Invalid backup file.")
if not match:
LOG.error("Subfunction mismatch - backup: %s, installed: %s" %
(str(backup_subfunctions), str(tsconfig.subfunctions)))
raise RestoreFail("Subfunctions in backup load (%s) do not match the "
"subfunctions of the installed load (%s)." %
(str(backup_subfunctions),
str(tsconfig.subfunctions)))
def file_exists_in_archive(archive, file_path):
""" Check if file exists in archive """
try:
archive.getmember(file_path)
return True
except KeyError:
LOG.info("File %s is not in archive." % file_path)
return False
def filter_directory(archive, directory):
for tarinfo in archive:
if tarinfo.name.split('/')[0] == directory:
yield tarinfo
def backup_etc_size():
""" Backup etc size estimate """
try:
total_size = utils.directory_get_size('/etc')
return total_size
except OSError:
LOG.error("Failed to estimate backup etc size.")
raise BackupFail("Failed to estimate backup etc size")
def backup_etc(archive):
""" Backup etc """
try:
archive.add('/etc', arcname='etc')
except tarfile.TarError:
LOG.error("Failed to backup etc.")
raise BackupFail("Failed to backup etc")
def restore_etc_file(archive, dest_dir, etc_file):
""" Restore etc file """
try:
# Change the name of this file to remove the leading path
member = archive.getmember('etc/' + etc_file)
# Copy the member to avoid changing the name for future operations on
# this member.
temp_member = copy.copy(member)
temp_member.name = os.path.basename(temp_member.name)
archive.extract(temp_member, path=dest_dir)
except tarfile.TarError:
LOG.error("Failed to restore etc file.")
raise RestoreFail("Failed to restore etc file")
def restore_etc_ssl_dir(archive, configpath=constants.CONFIG_WORKDIR):
""" Restore the etc SSL dir """
def filter_etc_ssl_private(members):
for tarinfo in members:
if 'etc/ssl/private' in tarinfo.name:
yield tarinfo
if file_exists_in_archive(archive, 'config/server-cert.pem'):
restore_config_file(
archive, configpath, 'server-cert.pem')
if file_exists_in_archive(archive, 'etc/ssl/private'):
# NOTE: This will include all TPM certificate files if TPM was
# enabled on the backed up system. However in that case, this
# restoration is only done for the first controller and TPM
# will need to be reconfigured once duplex controller (if any)
# is restored.
archive.extractall(path='/',
members=filter_etc_ssl_private(archive))
def restore_ceph_external_config_files(archive, staging_dir):
# Restore ceph-config.
if file_exists_in_archive(archive, "config/ceph-config"):
restore_config_dir(archive, staging_dir, 'ceph-config', ceph_permdir)
# Copy the file to /etc/ceph.
# There might be no files to copy, so don't check the return code.
cp_command = ('cp -Rp ' + os.path.join(ceph_permdir, '*') +
' /etc/ceph/')
subprocess.call(cp_command, shell=True)
def backup_config_size(config_permdir):
""" Backup configuration size estimate """
try:
return(utils.directory_get_size(config_permdir))
except OSError:
LOG.error("Failed to estimate backup configuration size.")
raise BackupFail("Failed to estimate backup configuration size")
def backup_config(archive, config_permdir):
""" Backup configuration """
try:
# The config dir is versioned, but we're only grabbing the current
# release
archive.add(config_permdir, arcname='config')
except tarfile.TarError:
LOG.error("Failed to backup config.")
raise BackupFail("Failed to backup configuration")
def restore_config_file(archive, dest_dir, config_file):
""" Restore configuration file """
try:
# Change the name of this file to remove the leading path
member = archive.getmember('config/' + config_file)
# Copy the member to avoid changing the name for future operations on
# this member.
temp_member = copy.copy(member)
temp_member.name = os.path.basename(temp_member.name)
archive.extract(temp_member, path=dest_dir)
except tarfile.TarError:
LOG.error("Failed to restore config file %s." % config_file)
raise RestoreFail("Failed to restore configuration")
def restore_configuration(archive, staging_dir):
""" Restore configuration """
try:
os.makedirs(constants.CONFIG_WORKDIR, stat.S_IRWXU | stat.S_IRGRP |
stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
except OSError:
LOG.error("Failed to create config directory: %s",
constants.CONFIG_WORKDIR)
raise RestoreFail("Failed to restore configuration files")
# Restore cgcs_config file from original installation for historical
# purposes. Not used to restore the system as the information in this
# file is out of date (not updated after original installation).
restore_config_file(archive, constants.CONFIG_WORKDIR, 'cgcs_config')
# Restore platform.conf file and update as necessary. The file will be
# created in a temporary location and then moved into place when it is
# complete to prevent access to a partially created file.
restore_etc_file(archive, staging_dir, 'platform/platform.conf')
temp_platform_conf_file = os.path.join(tsconfig.PLATFORM_CONF_PATH,
'platform.conf.temp')
shutil.copyfile(os.path.join(staging_dir, 'platform.conf'),
temp_platform_conf_file)
install_uuid = utils.get_install_uuid()
for line in fileinput.FileInput(temp_platform_conf_file, inplace=1):
if line.startswith("INSTALL_UUID="):
# The INSTALL_UUID must be updated to match the new INSTALL_UUID
# which was generated when this controller was installed prior to
# doing the restore.
print "INSTALL_UUID=%s" % install_uuid
elif line.startswith("management_interface=") or \
line.startswith("oam_interface=") or \
line.startswith("infrastructure_interface=") or \
line.startswith("UUID="):
# Strip out any entries that are host specific as the backup can
# be done on either controller. The application of the
# platform_conf manifest will add these back in.
pass
else:
print line,
fileinput.close()
# Move updated platform.conf file into place.
os.rename(temp_platform_conf_file, tsconfig.PLATFORM_CONF_FILE)
# Kick tsconfig to reload the platform.conf file
tsconfig._load()
# Restore branding
restore_config_dir(archive, staging_dir, 'branding', '/opt/branding/')
# Restore banner customization
restore_config_dir(archive, staging_dir, 'banner/etc', '/opt/banner')
# Restore ssh configuration
restore_config_dir(archive, staging_dir, 'ssh_config',
constants.CONFIG_WORKDIR + '/ssh_config')
# Configure hostname
utils.configure_hostname('controller-0')
# Restore hosts file
restore_etc_file(archive, '/etc', 'hosts')
restore_etc_file(archive, constants.CONFIG_WORKDIR, 'hosts')
# Restore certificate files
restore_etc_ssl_dir(archive)
# Restore firewall rules file if it is in the archive
if file_exists_in_archive(archive, 'config/iptables.rules'):
restore_config_file(
archive, constants.CONFIG_WORKDIR, 'iptables.rules')
restore_etc_file(archive, tsconfig.PLATFORM_CONF_PATH,
'platform/iptables.rules')
def filter_pxelinux(archive):
for tarinfo in archive:
if tarinfo.name.find('config/pxelinux.cfg') == 0:
yield tarinfo
def restore_dnsmasq(archive, config_permdir):
""" Restore dnsmasq """
try:
etc_files = ['hosts']
perm_files = ['hosts',
'dnsmasq.hosts', 'dnsmasq.leases',
'dnsmasq.addn_hosts']
for etc_file in etc_files:
restore_config_file(archive, '/etc', etc_file)
for perm_file in perm_files:
restore_config_file(archive, config_permdir, perm_file)
# Extract distributed cloud addn_hosts file if present in archive.
if file_exists_in_archive(
archive, 'config/dnsmasq.addn_hosts_dc'):
restore_config_file(archive, config_permdir,
'dnsmasq.addn_hosts_dc')
tmpdir = tempfile.mkdtemp(prefix="pxerestore_")
archive.extractall(tmpdir,
members=filter_pxelinux(archive))
if os.path.exists(tmpdir + '/config/pxelinux.cfg'):
shutil.rmtree(config_permdir + 'pxelinux.cfg', ignore_errors=True)
shutil.move(tmpdir + '/config/pxelinux.cfg', config_permdir)
shutil.rmtree(tmpdir, ignore_errors=True)
except (shutil.Error, subprocess.CalledProcessError, tarfile.TarError):
LOG.error("Failed to restore dnsmasq config.")
raise RestoreFail("Failed to restore dnsmasq files")
def backup_puppet_data_size(puppet_permdir):
""" Backup puppet data size estimate """
try:
return(utils.directory_get_size(puppet_permdir))
except OSError:
LOG.error("Failed to estimate backup puppet data size.")
raise BackupFail("Failed to estimate backup puppet data size")
def backup_puppet_data(archive, puppet_permdir):
""" Backup puppet data """
try:
# The puppet dir is versioned, but we're only grabbing the current
# release
archive.add(puppet_permdir, arcname='hieradata')
except tarfile.TarError:
LOG.error("Failed to backup puppet data.")
raise BackupFail("Failed to backup puppet data")
def restore_static_puppet_data(archive, puppet_workdir):
""" Restore static puppet data """
try:
member = archive.getmember('hieradata/static.yaml')
archive.extract(member, path=os.path.dirname(puppet_workdir))
member = archive.getmember('hieradata/secure_static.yaml')
archive.extract(member, path=os.path.dirname(puppet_workdir))
except tarfile.TarError:
LOG.error("Failed to restore static puppet data.")
raise RestoreFail("Failed to restore static puppet data")
except OSError:
pass
def restore_puppet_data(archive, puppet_workdir):
""" Restore puppet data """
try:
archive.extractall(
path=os.path.dirname(puppet_workdir),
members=filter_directory(archive,
os.path.basename(puppet_workdir)))
except tarfile.TarError:
LOG.error("Failed to restore puppet data.")
raise RestoreFail("Failed to restore puppet data")
except OSError:
pass
def backup_cinder_config(archive):
""" Backup cinder configuration """
# If the iscsi target config file exists, add it to the archive
# On setups without LVM backends this file is absent
if os.path.exists(cinder_permdir + '/iscsi-target/saveconfig.json'):
archive.add(
cinder_permdir + '/iscsi-target/saveconfig.json',
arcname='cinder/saveconfig.json')
def restore_cinder_file(archive, dest_dir, cinder_file):
""" Restore cinder file """
try:
# Change the name of this file to remove the leading path
member = archive.getmember('cinder/' + cinder_file)
# Copy the member to avoid changing the name for future operations on
# this member.
temp_member = copy.copy(member)
temp_member.name = os.path.basename(temp_member.name)
archive.extract(temp_member, path=dest_dir)
except tarfile.TarError:
LOG.error("Failed to restore cinder file %s." % cinder_file)
raise RestoreFail("Failed to restore configuration")
def restore_cinder_config(archive):
"""Restore cinder config files"""
# If the iscsi target config file is present in the archive,
# restore it.
if file_exists_in_archive(archive, 'cinder/saveconfig.json'):
restore_cinder_file(
archive, cinder_permdir + '/iscsi-target',
'saveconfig.json')
# Also create a copy of the original file as the volume
# restore procedure changes this file and breaks the
# valid nova settings.
shutil.copyfile(
cinder_permdir + '/iscsi-target/saveconfig.json',
cinder_permdir + '/iscsi-target/saveconfig.json.bck')
def backup_cinder_size(cinder_permdir):
""" Backup cinder size estimate """
try:
if not os.path.exists(
cinder_permdir + '/iscsi-target/saveconfig.json'):
return 0
statinfo = os.stat(cinder_permdir + '/iscsi-target/saveconfig.json')
return statinfo.st_size
except OSError:
LOG.error("Failed to estimate backup cinder size.")
raise BackupFail("Failed to estimate backup cinder size")
def backup_keyring_size(keyring_permdir):
""" Backup keyring size estimate """
try:
return(utils.directory_get_size(keyring_permdir))
except OSError:
LOG.error("Failed to estimate backup keyring size.")
raise BackupFail("Failed to estimate backup keyring size")
def backup_keyring(archive, keyring_permdir):
""" Backup keyring configuration """
try:
archive.add(keyring_permdir, arcname='.keyring')
except tarfile.TarError:
LOG.error("Failed to backup keyring.")
raise BackupFail("Failed to backup keyring configuration")
def restore_keyring(archive, keyring_permdir):
""" Restore keyring configuration """
try:
shutil.rmtree(keyring_permdir, ignore_errors=False)
members = filter_directory(archive, '.keyring')
temp_members = list()
# remove .keyring and .keyring/ from the member path since they are
# extracted to keyring_permdir: /opt/platform/.keyring/release
for m in members:
temp_member = copy.copy(m)
lst = temp_member.name.split('.keyring/')
if len(lst) > 1:
temp_member.name = lst[1]
temp_members.append(temp_member)
archive.extractall(path=keyring_permdir, members=temp_members)
except (tarfile.TarError, shutil.Error):
LOG.error("Failed to restore keyring.")
shutil.rmtree(keyring_permdir, ignore_errors=True)
raise RestoreFail("Failed to restore keyring configuration")
def prefetch_keyring(archive):
""" Prefetch keyring configuration for manifest use """
keyring_tmpdir = '/tmp/.keyring'
python_keyring_tmpdir = '/tmp/python_keyring'
try:
shutil.rmtree(keyring_tmpdir, ignore_errors=True)
shutil.rmtree(python_keyring_tmpdir, ignore_errors=True)
archive.extractall(
path=os.path.dirname(keyring_tmpdir),
members=filter_directory(archive,
os.path.basename(keyring_tmpdir)))
shutil.move(keyring_tmpdir + '/python_keyring', python_keyring_tmpdir)
except (tarfile.TarError, shutil.Error):
LOG.error("Failed to restore keyring.")
shutil.rmtree(keyring_tmpdir, ignore_errors=True)
shutil.rmtree(python_keyring_tmpdir, ignore_errors=True)
raise RestoreFail("Failed to restore keyring configuration")
def cleanup_prefetched_keyring():
""" Cleanup fetched keyring """
try:
keyring_tmpdir = '/tmp/.keyring'
python_keyring_tmpdir = '/tmp/python_keyring'
shutil.rmtree(keyring_tmpdir, ignore_errors=True)
shutil.rmtree(python_keyring_tmpdir, ignore_errors=True)
except shutil.Error:
LOG.error("Failed to cleanup keyring.")
raise RestoreFail("Failed to cleanup fetched keyring")
def backup_ldap_size():
""" Backup ldap size estimate """
try:
total_size = 0
proc = subprocess.Popen(
['slapcat -d 0 -F /etc/openldap/schema | wc -c'],
shell=True, stdout=subprocess.PIPE)
for line in proc.stdout:
total_size = int(line)
break
proc.communicate()
return total_size
except subprocess.CalledProcessError:
LOG.error("Failed to estimate backup ldap size.")
raise BackupFail("Failed to estimate backup ldap size")
def backup_ldap(archive, staging_dir):
""" Backup ldap configuration """
try:
ldap_staging_dir = staging_dir + '/ldap'
os.mkdir(ldap_staging_dir, 0655)
subprocess.check_call([
'slapcat', '-d', '0', '-F', '/etc/openldap/schema',
'-l', (ldap_staging_dir + '/ldap.db')], stdout=DEVNULL)
archive.add(ldap_staging_dir + '/ldap.db', arcname='ldap.db')
except (OSError, subprocess.CalledProcessError, tarfile.TarError):
LOG.error("Failed to backup ldap database.")
raise BackupFail("Failed to backup ldap configuration")
def restore_ldap(archive, ldap_permdir, staging_dir):
""" Restore ldap configuration """
try:
ldap_staging_dir = staging_dir + '/ldap'
archive.extract('ldap.db', path=ldap_staging_dir)
utils.stop_lsb_service('openldap')
subprocess.call(['rm', '-rf', ldap_permdir], stdout=DEVNULL)
os.mkdir(ldap_permdir, 0o755)
subprocess.check_call(['slapadd', '-F', '/etc/openldap/schema',
'-l', ldap_staging_dir + '/ldap.db'],
stdout=DEVNULL, stderr=DEVNULL)
except (subprocess.CalledProcessError, OSError, tarfile.TarError):
LOG.error("Failed to restore ldap database.")
raise RestoreFail("Failed to restore ldap configuration")
finally:
utils.start_lsb_service('openldap')
def backup_postgres_size(cinder_config=False):
""" Backup postgres size estimate """
try:
total_size = 0
# Backup roles, table spaces and schemas for databases.
proc = subprocess.Popen([('sudo -u postgres pg_dumpall --clean ' +
'--schema-only | wc -c')], shell=True,
stdout=subprocess.PIPE, stderr=DEVNULL)
for line in proc.stdout:
total_size = int(line)
break
proc.communicate()
# get backup database
backup_databases, backup_db_skip_tables = get_backup_databases(
cinder_config)
# Backup data for databases.
for _, db_elem in enumerate(backup_databases):
db_cmd = 'sudo -u postgres pg_dump --format=plain --inserts '
db_cmd += '--disable-triggers --data-only %s ' % db_elem
for _, table_elem in enumerate(backup_db_skip_tables[db_elem]):
db_cmd += '--exclude-table=%s ' % table_elem
db_cmd += '| wc -c'
proc = subprocess.Popen([db_cmd], shell=True,
stdout=subprocess.PIPE, stderr=DEVNULL)
for line in proc.stdout:
total_size += int(line)
break
proc.communicate()
return total_size
except subprocess.CalledProcessError:
LOG.error("Failed to estimate backup database size.")
raise BackupFail("Failed to estimate backup database size")
def backup_postgres(archive, staging_dir, cinder_config=False):
""" Backup postgres configuration """
try:
postgres_staging_dir = staging_dir + '/postgres'
os.mkdir(postgres_staging_dir, 0655)
# Backup roles, table spaces and schemas for databases.
subprocess.check_call([('sudo -u postgres pg_dumpall --clean ' +
'--schema-only' +
'> %s/%s' % (postgres_staging_dir,
'postgres.sql.config'))],
shell=True, stderr=DEVNULL)
# get backup database
backup_databases, backup_db_skip_tables = get_backup_databases(
cinder_config)
# Backup data for databases.
for _, db_elem in enumerate(backup_databases):
db_cmd = 'sudo -u postgres pg_dump --format=plain --inserts '
db_cmd += '--disable-triggers --data-only %s ' % db_elem
for _, table_elem in enumerate(backup_db_skip_tables[db_elem]):
db_cmd += '--exclude-table=%s ' % table_elem
db_cmd += '> %s/%s.sql.data' % (postgres_staging_dir, db_elem)
subprocess.check_call([db_cmd], shell=True, stderr=DEVNULL)
archive.add(postgres_staging_dir, arcname='postgres')
except (OSError, subprocess.CalledProcessError, tarfile.TarError):
LOG.error("Failed to backup postgres databases.")
raise BackupFail("Failed to backup database configuration")
def restore_postgres(archive, staging_dir):
""" Restore postgres configuration """
try:
postgres_staging_dir = staging_dir + '/postgres'
archive.extractall(path=staging_dir,
members=filter_directory(archive, 'postgres'))
utils.start_service("postgresql")
# Restore roles, table spaces and schemas for databases.
subprocess.check_call(["sudo", "-u", "postgres", "psql", "-f",
postgres_staging_dir +
'/postgres.sql.config', "postgres"],
stdout=DEVNULL, stderr=DEVNULL)
# Restore data for databases.
for data in glob.glob(postgres_staging_dir + '/*.sql.data'):
db_elem = data.split('/')[-1].split('.')[0]
subprocess.check_call(["sudo", "-u", "postgres", "psql", "-f",
data, db_elem],
stdout=DEVNULL)
except (OSError, subprocess.CalledProcessError, tarfile.TarError) as e:
LOG.error("Failed to restore postgres databases. Error: %s", e)
raise RestoreFail("Failed to restore database configuration")
finally:
utils.stop_service('postgresql')
def backup_ceilometer_size(ceilometer_permdir):
""" Backup ceilometer size estimate """
try:
statinfo = os.stat(ceilometer_permdir + '/pipeline.yaml')
return statinfo.st_size
except OSError:
LOG.error("Failed to estimate backup ceilometer size.")
raise BackupFail("Failed to estimate backup ceilometer size")
def backup_ceilometer(archive, ceilometer_permdir):
""" Backup ceilometer """
try:
archive.add(ceilometer_permdir + '/pipeline.yaml',
arcname='pipeline.yaml')
except tarfile.TarError:
LOG.error("Failed to backup ceilometer.")
raise BackupFail("Failed to backup ceilometer")
def restore_ceilometer(archive, ceilometer_permdir):
""" Restore ceilometer """
try:
archive.extract('pipeline.yaml', path=ceilometer_permdir)
except tarfile.TarError:
LOG.error("Failed to restore ceilometer")
raise RestoreFail("Failed to restore ceilometer")
def filter_config_dir(archive, directory):
for tarinfo in archive:
if tarinfo.name.find('config/' + directory) == 0:
yield tarinfo
def restore_config_dir(archive, staging_dir, config_dir, dest_dir):
""" Restore configuration directory if it exists """
try:
archive.extractall(staging_dir,
members=filter_config_dir(archive, config_dir))
# Copy files from backup to dest dir
if (os.path.exists(staging_dir + '/config/' + config_dir) and
os.listdir(staging_dir + '/config/' + config_dir)):
subprocess.call(["mkdir", "-p", dest_dir])
try:
for f in glob.glob(
staging_dir + '/config/' + config_dir + '/*'):
subprocess.check_call(["cp", "-p", f, dest_dir])
except IOError:
LOG.warning("Failed to copy %s files" % config_dir)
except (subprocess.CalledProcessError, tarfile.TarError):
LOG.info("No custom %s config was found during restore." % config_dir)
def backup_std_dir_size(directory):
""" Backup standard directory size estimate """
try:
return utils.directory_get_size(directory)
except OSError:
LOG.error("Failed to estimate backup size for %s" % directory)
raise BackupFail("Failed to estimate backup size for %s" % directory)
def backup_std_dir(archive, directory):
""" Backup standard directory """
try:
archive.add(directory, arcname=os.path.basename(directory))
except tarfile.TarError:
LOG.error("Failed to backup %s" % directory)
raise BackupFail("Failed to backup %s" % directory)
def restore_std_dir(archive, directory):
""" Restore standard directory """
try:
shutil.rmtree(directory, ignore_errors=True)
# Verify that archive contains this directory
try:
archive.getmember(os.path.basename(directory))
except KeyError:
LOG.error("Archive does not contain directory %s" % directory)
raise RestoreFail("Invalid backup file - missing directory %s" %
directory)
archive.extractall(
path=os.path.dirname(directory),
members=filter_directory(archive, os.path.basename(directory)))
except (shutil.Error, tarfile.TarError):
LOG.error("Failed to restore %s" % directory)
raise RestoreFail("Failed to restore %s" % directory)
def configure_loopback_interface(archive):
""" Restore and apply configuration for loopback interface """
utils.remove_interface_config_files()
restore_etc_file(
archive, utils.NETWORK_SCRIPTS_PATH,
'sysconfig/network-scripts/' + utils.NETWORK_SCRIPTS_LOOPBACK)
utils.restart_networking()
def backup_ceph_crush_map(archive, staging_dir):
""" Backup ceph crush map """
try:
ceph_staging_dir = os.path.join(staging_dir, 'ceph')
os.mkdir(ceph_staging_dir, 0655)
crushmap_file = os.path.join(ceph_staging_dir,
sysinv_constants.CEPH_CRUSH_MAP_BACKUP)
subprocess.check_call(['ceph', 'osd', 'getcrushmap',
'-o', crushmap_file], stdout=DEVNULL,
stderr=DEVNULL)
archive.add(crushmap_file, arcname='ceph/' +
sysinv_constants.CEPH_CRUSH_MAP_BACKUP)
except Exception as e:
LOG.error('Failed to backup ceph crush map. Reason: {}'.format(e))
raise BackupFail('Failed to backup ceph crush map')
def restore_ceph_crush_map(archive):
""" Restore ceph crush map """
if not file_exists_in_archive(archive, 'ceph/' +
sysinv_constants.CEPH_CRUSH_MAP_BACKUP):
return
try:
crush_map_file = 'ceph/' + sysinv_constants.CEPH_CRUSH_MAP_BACKUP
if file_exists_in_archive(archive, crush_map_file):
member = archive.getmember(crush_map_file)
# Copy the member to avoid changing the name for future
# operations on this member.
temp_member = copy.copy(member)
temp_member.name = os.path.basename(temp_member.name)
archive.extract(temp_member,
path=sysinv_constants.SYSINV_CONFIG_PATH)
except tarfile.TarError as e:
LOG.error('Failed to restore crush map file. Reason: {}'.format(e))
raise RestoreFail('Failed to restore crush map file')
def check_size(archive_dir, cinder_config):
"""Check if there is enough space to create backup."""
backup_overhead_bytes = 1024 ** 3 # extra GB for staging directory
# backup_cinder_size() will return 0 if cinder/lvm is not configured,
# So no need to add extra check here.
backup_size = (backup_overhead_bytes +
backup_etc_size() +
backup_config_size(tsconfig.CONFIG_PATH) +
backup_puppet_data_size(constants.HIERADATA_PERMDIR) +
backup_keyring_size(keyring_permdir) +
backup_ldap_size() +
backup_postgres_size(cinder_config) +
backup_ceilometer_size(ceilometer_permdir) +
backup_std_dir_size(glance_permdir) +
backup_std_dir_size(home_permdir) +
backup_std_dir_size(patching_permdir) +
backup_std_dir_size(patching_repo_permdir) +
backup_std_dir_size(extension_permdir) +
backup_std_dir_size(patch_vault_permdir) +
backup_cinder_size(cinder_permdir)
)
archive_dir_free_space = \
utils.filesystem_get_free_space(archive_dir)
if backup_size > archive_dir_free_space:
print ("Archive directory (%s) does not have enough free "
"space (%s), estimated backup size is %s." %
(archive_dir, utils.print_bytes(archive_dir_free_space),
utils.print_bytes(backup_size)))
raise BackupFail("Not enough free space for backup.")
def backup(backup_name, archive_dir, clone=False):
"""Backup configuration."""
if not os.path.isdir(archive_dir):
raise BackupFail("Archive directory (%s) not found." % archive_dir)
if not utils.is_active("management-ip"):
raise BackupFail(
"Backups can only be performed from the active controller.")
if os.path.isfile(backup_in_progress):
raise BackupFail("Backup already in progress.")
else:
open(backup_in_progress, 'w')
fmApi = fm_api.FaultAPIs()
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
sysinv_constants.CONTROLLER_HOSTNAME)
fault = fm_api.Fault(alarm_id=fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS,
alarm_state=fm_constants.FM_ALARM_STATE_SET,
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
entity_instance_id=entity_instance_id,
severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
reason_text=("System Backup in progress."),
# operational
alarm_type=fm_constants.FM_ALARM_TYPE_7,
# congestion
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_8,
proposed_repair_action=("No action required."),
service_affecting=False)
fmApi.set_fault(fault)
cinder_config = False
backend_services = sysinv.get_storage_backend_services()
for services in backend_services.values():
if (services is not None and
services.find(sysinv_constants.SB_SVC_CINDER) != -1):
cinder_config = True
break
staging_dir = None
system_tar_path = None
images_tar_path = None
warnings = ''
try:
os.chdir('/')
if not clone:
check_size(archive_dir, cinder_config)
print ("\nPerforming backup (this might take several minutes):")
staging_dir = tempfile.mkdtemp(dir=archive_dir)
system_tar_path = os.path.join(archive_dir,
backup_name + '_system.tgz')
system_archive = tarfile.open(system_tar_path, "w:gz")
images_tar_path = os.path.join(archive_dir,
backup_name + '_images.tgz')
step = 1
total_steps = 15
if sysinv_constants.SB_TYPE_CEPH in backend_services.keys():
total_steps += 1
if tsconfig.region_config == "yes":
# We don't run the glance backup step
total_steps -= 1
# Step 1: Backup etc
backup_etc(system_archive)
utils.progress(total_steps, step, 'backup etc', 'DONE')
step += 1
# Step 2: Backup configuration
backup_config(system_archive, tsconfig.CONFIG_PATH)
utils.progress(total_steps, step, 'backup configuration', 'DONE')
step += 1
# Step 3: Backup puppet data
backup_puppet_data(system_archive, constants.HIERADATA_PERMDIR)
utils.progress(total_steps, step, 'backup puppet data', 'DONE')
step += 1
# Step 4: Backup keyring
backup_keyring(system_archive, keyring_permdir)
utils.progress(total_steps, step, 'backup keyring', 'DONE')
step += 1
# Step 5: Backup ldap
backup_ldap(system_archive, staging_dir)
utils.progress(total_steps, step, 'backup ldap', 'DONE')
step += 1
# Step 6: Backup postgres
backup_postgres(system_archive, staging_dir, cinder_config)
utils.progress(total_steps, step, 'backup postgres', 'DONE')
step += 1
# Step 7: Backup ceilometer
backup_ceilometer(system_archive, ceilometer_permdir)
utils.progress(total_steps, step, 'backup ceilometer', 'DONE')
step += 1
if tsconfig.region_config != "yes":
# Step 8: Backup glance
images_archive = tarfile.open(images_tar_path, "w:gz")
backup_std_dir(images_archive, glance_permdir)
images_archive.close()
utils.progress(total_steps, step, 'backup glance', 'DONE')
step += 1
# Step 9: Backup home
backup_std_dir(system_archive, home_permdir)
utils.progress(total_steps, step, 'backup home directory', 'DONE')
step += 1
# Step 10: Backup patching
if not clone:
backup_std_dir(system_archive, patching_permdir)
utils.progress(total_steps, step, 'backup patching', 'DONE')
step += 1
# Step 11: Backup patching repo
if not clone:
backup_std_dir(system_archive, patching_repo_permdir)
utils.progress(total_steps, step, 'backup patching repo', 'DONE')
step += 1
# Step 12: Backup extension filesystem
backup_std_dir(system_archive, extension_permdir)
utils.progress(total_steps, step, 'backup extension filesystem '
'directory', 'DONE')
step += 1
# Step 13: Backup patch-vault filesystem
if os.path.exists(patch_vault_permdir):
backup_std_dir(system_archive, patch_vault_permdir)
utils.progress(total_steps, step, 'backup patch-vault filesystem '
'directory', 'DONE')
step += 1
# Step 14: Backup cinder config/LVM config
# No need to add extra check here as if cinder/LVM is not configured,
# ../iscsi-target/saveconfig.json will be absent, so this function will
# do nothing.
backup_cinder_config(system_archive)
utils.progress(total_steps, step, 'backup cinder/LVM config', 'DONE')
step += 1
# Step 15: Backup ceph crush map
if sysinv_constants.SB_TYPE_CEPH in backend_services.keys():
backup_ceph_crush_map(system_archive, staging_dir)
utils.progress(total_steps, step, 'backup ceph crush map', 'DONE')
step += 1
# Step 16: Create archive
system_archive.close()
utils.progress(total_steps, step, 'create archive', 'DONE')
step += 1
except Exception:
if system_tar_path and os.path.isfile(system_tar_path):
os.remove(system_tar_path)
if images_tar_path and os.path.isfile(images_tar_path):
os.remove(images_tar_path)
raise
finally:
fmApi.clear_fault(fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS,
entity_instance_id)
os.remove(backup_in_progress)
if staging_dir:
shutil.rmtree(staging_dir, ignore_errors=True)
system_msg = "System backup file created"
images_msg = "Images backup file created"
if not clone:
system_msg += ": " + system_tar_path
images_msg += ": " + images_tar_path
print system_msg
if tsconfig.region_config != "yes":
print images_msg
if warnings != '':
print "WARNING: The following problems occurred:"
print textwrap.fill(warnings, 80)
def create_restore_runtime_config(filename):
""" Create any runtime parameters needed for Restore."""
config = {}
# We need to re-enable Openstack password rules, which
# were previously disabled while the controller manifests
# were applying during a Restore
config['classes'] = ['keystone::security_compliance']
utils.create_manifest_runtime_config(filename, config)
def overwrite_iscsi_target_config():
"""
Overwrite the current iscsi target config file with the one
from the backup archive.
"""
if not os.path.exists(
cinder_permdir + '/iscsi-target/saveconfig.json'):
LOG.info("Restore: Missing current saveconfig.json file")
return
if not os.path.exists(
cinder_permdir + '/iscsi-target/saveconfig.json.bck'):
LOG.info("Restore: Missing backup saveconfig.json file")
return
os.remove(cinder_permdir + '/iscsi-target/saveconfig.json')
shutil.copyfile(
cinder_permdir + '/iscsi-target/saveconfig.json.bck',
cinder_permdir + '/iscsi-target/saveconfig.json')
os.remove(cinder_permdir + '/iscsi-target/saveconfig.json.bck')
subprocess.call(["targetctl", "restore"], stdout=DEVNULL, stderr=DEVNULL)
def restore_complete():
"""
Restore proper ISCSI configuration file after cinder restore.
Enable compute functionality for AIO system.
:return: True if compute-config-complete is executed
"""
if utils.get_system_type() == sysinv_constants.TIS_AIO_BUILD:
if not os.path.isfile(restore_system_ready):
print textwrap.fill(
"--restore-complete can only be run "
"after restore-system has completed "
"successfully", 80
)
return False
# The iscsi target config file must be overwritten with the
# original file from the backup archive.
# This is due to the cinder restore process actually changing
# this file. These changes cause VMs that were present at
# backup time to not boot up properly anymore.
# The original icsci config file has the proper settings so
# we use use that.
overwrite_iscsi_target_config()
print ("\nApplying compute manifests for %s. " %
(utils.get_controller_hostname()))
print ("Node will reboot on completion.")
sysinv.do_compute_config_complete(utils.get_controller_hostname())
# show in-progress log on console every 30 seconds
# until self reboot or timeout
os.remove(restore_system_ready)
time.sleep(30)
for i in range(1, 10):
print("compute manifest apply in progress ... ")
time.sleep(30)
raise RestoreFail("Timeout running compute manifests, "
"reboot did not occur")
else:
if not os.path.isfile(restore_system_ready):
print textwrap.fill(
"--restore-complete can only be run "
"after restore-system has completed "
"successfully", 80
)
return False
overwrite_iscsi_target_config()
os.remove(restore_system_ready)
return True
def restore_system(backup_file, include_storage_reinstall=False, clone=False):
"""Restoring system configuration."""
if (os.path.exists(constants.CGCS_CONFIG_FILE) or
os.path.exists(tsconfig.CONFIG_PATH) or
os.path.exists(constants.INITIAL_CONFIG_COMPLETE_FILE)):
print textwrap.fill(
"Configuration has already been done. "
"A system restore operation can only be done "
"immediately after the load has been installed.", 80)
print
raise RestoreFail("System configuration already completed")
if not os.path.isabs(backup_file):
raise RestoreFail("Backup file (%s) not found. Full path is "
"required." % backup_file)
if os.path.isfile(restore_in_progress):
raise RestoreFail("Restore already in progress.")
else:
open(restore_in_progress, 'w')
# Add newline to console log for install-clone scenario
newline = clone
staging_dir = None
try:
try:
with open(os.devnull, "w") as fnull:
subprocess.check_call(["vgdisplay", "cgts-vg"],
stdout=fnull,
stderr=fnull)
except subprocess.CalledProcessError:
LOG.error("The cgts-vg volume group was not found")
raise RestoreFail("Volume groups not configured")
print "\nRestoring system (this will take several minutes):"
# Use /scratch for the staging dir for now,
# until /opt/backups is available
staging_dir = tempfile.mkdtemp(dir='/scratch')
# Permission change required or postgres restore fails
subprocess.call(['chmod', 'a+rx', staging_dir], stdout=DEVNULL)
os.chdir('/')
step = 1
total_steps = 24
# Step 1: Open archive and verify installed load matches backup
try:
archive = tarfile.open(backup_file)
except tarfile.TarError as e:
LOG.exception(e)
raise RestoreFail("Error opening backup file. Invalid backup "
"file.")
check_load_versions(archive, staging_dir)
check_load_subfunctions(archive, staging_dir)
utils.progress(total_steps, step, 'open archive', 'DONE', newline)
step += 1
# Patching is potentially a multi-phase step.
# If the controller is impacted by patches from the backup,
# it must be rebooted before continuing the restore.
# If this is the second pass through, we can skip over this.
if not os.path.isfile(restore_patching_complete) and not clone:
# Step 2: Restore patching
restore_std_dir(archive, patching_permdir)
utils.progress(total_steps, step, 'restore patching', 'DONE',
newline)
step += 1
# Step 3: Restore patching repo
restore_std_dir(archive, patching_repo_permdir)
utils.progress(total_steps, step, 'restore patching repo', 'DONE',
newline)
step += 1
# Step 4: Apply patches
try:
subprocess.check_output(["sw-patch", "install-local"])
except subprocess.CalledProcessError:
LOG.error("Failed to install patches")
raise RestoreFail("Failed to install patches")
utils.progress(total_steps, step, 'install patches', 'DONE',
newline)
step += 1
open(restore_patching_complete, 'w')
# If the controller was impacted by patches, we need to reboot.
if os.path.isfile(node_is_patched):
if not clone:
print ("\nThis controller has been patched. " +
"A reboot is required.")
print ("After the reboot is complete, " +
"re-execute the restore command.")
while True:
user_input = input(
"Enter 'reboot' to reboot controller: ")
if user_input == 'reboot':
break
LOG.info("This controller has been patched. Rebooting now")
print("\nThis controller has been patched. Rebooting now\n\n")
time.sleep(5)
os.remove(restore_in_progress)
if staging_dir:
shutil.rmtree(staging_dir, ignore_errors=True)
subprocess.call("reboot")
else:
# We need to restart the patch controller and agent, since
# we setup the repo and patch store outside its control
with open(os.devnull, "w") as devnull:
subprocess.call(
["systemctl",
"restart",
"sw-patch-controller-daemon.service"],
stdout=devnull, stderr=devnull)
subprocess.call(
["systemctl",
"restart",
"sw-patch-agent.service"],
stdout=devnull, stderr=devnull)
if clone:
# No patches were applied, return to cloning code
# to run validation code.
return RESTORE_RERUN_REQUIRED
else:
# Add the skipped steps
step += 3
if os.path.isfile(node_is_patched):
# If we get here, it means the node was patched by the user
# AFTER the restore applied patches and rebooted, but didn't
# reboot.
# This means the patch lineup no longer matches what's in the
# backup, but we can't (and probably shouldn't) prevent that.
# However, since this will ultimately cause the node to fail
# the goenabled step, we can fail immediately and force the
# user to reboot.
print ("\nThis controller has been patched, but not rebooted.")
print ("Please reboot before continuing the restore process.")
raise RestoreFail("Controller node patched without rebooting")
# Flag can now be cleared
if os.path.exists(restore_patching_complete):
os.remove(restore_patching_complete)
# Prefetch keyring
prefetch_keyring(archive)
# Step 5: Restore configuration
restore_configuration(archive, staging_dir)
# In AIO SX systems, the loopback interface is used as the management
# interface. However, the application of the interface manifest will
# not configure the necessary addresses on the loopback interface (see
# apply_network_config.sh for details). So, we need to configure the
# loopback interface here.
if tsconfig.system_mode == sysinv_constants.SYSTEM_MODE_SIMPLEX:
configure_loopback_interface(archive)
# Write the simplex flag
utils.write_simplex_flag()
utils.progress(total_steps, step, 'restore configuration', 'DONE',
newline)
step += 1
# Step 6: Apply restore bootstrap manifest
controller_0_address = utils.get_address_from_hosts_file(
'controller-0')
restore_static_puppet_data(archive, constants.HIERADATA_WORKDIR)
try:
utils.apply_manifest(controller_0_address,
sysinv_constants.CONTROLLER,
'bootstrap',
constants.HIERADATA_WORKDIR)
except Exception as e:
LOG.exception(e)
raise RestoreFail(
'Failed to apply bootstrap manifest. '
'See /var/log/puppet/latest/puppet.log for details.')
utils.progress(total_steps, step, 'apply bootstrap manifest', 'DONE',
newline)
step += 1
# Step 7: Restore puppet data
restore_puppet_data(archive, constants.HIERADATA_WORKDIR)
utils.progress(total_steps, step, 'restore puppet data', 'DONE',
newline)
step += 1
# Step 8: Persist configuration
utils.persist_config()
utils.progress(total_steps, step, 'persist configuration', 'DONE',
newline)
step += 1
# Step 9: Apply controller manifest
try:
utils.apply_manifest(controller_0_address,
sysinv_constants.CONTROLLER,
'controller',
constants.HIERADATA_PERMDIR)
except Exception as e:
LOG.exception(e)
raise RestoreFail(
'Failed to apply controller manifest. '
'See /var/log/puppet/latest/puppet.log for details.')
utils.progress(total_steps, step, 'apply controller manifest', 'DONE',
newline)
step += 1
# Step 10: Apply runtime controller manifests
restore_filename = os.path.join(staging_dir, 'restore.yaml')
create_restore_runtime_config(restore_filename)
try:
utils.apply_manifest(controller_0_address,
sysinv_constants.CONTROLLER,
'runtime',
constants.HIERADATA_PERMDIR,
runtime_filename=restore_filename)
except Exception as e:
LOG.exception(e)
raise RestoreFail(
'Failed to apply runtime controller manifest. '
'See /var/log/puppet/latest/puppet.log for details.')
utils.progress(total_steps, step,
'apply runtime controller manifest', 'DONE',
newline)
step += 1
# Move the staging dir under /opt/backups, now that it's setup
shutil.rmtree(staging_dir, ignore_errors=True)
staging_dir = tempfile.mkdtemp(dir=constants.BACKUPS_PATH)
# Permission change required or postgres restore fails
subprocess.call(['chmod', 'a+rx', staging_dir], stdout=DEVNULL)
# Step 11: Restore cinder config file
restore_cinder_config(archive)
utils.progress(total_steps, step, 'restore cinder config', 'DONE',
newline)
step += 1
# Step 12: Apply banner customization
utils.apply_banner_customization()
utils.progress(total_steps, step, 'apply banner customization', 'DONE',
newline)
step += 1
# Step 13: Restore dnsmasq and pxeboot config
restore_dnsmasq(archive, tsconfig.CONFIG_PATH)
utils.progress(total_steps, step, 'restore dnsmasq', 'DONE', newline)
step += 1
# Step 14: Restore keyring
restore_keyring(archive, keyring_permdir)
utils.progress(total_steps, step, 'restore keyring', 'DONE', newline)
step += 1
# Step 15: Restore ldap
restore_ldap(archive, ldap_permdir, staging_dir)
utils.progress(total_steps, step, 'restore ldap', 'DONE', newline)
step += 1
# Step 16: Restore postgres
restore_postgres(archive, staging_dir)
utils.progress(total_steps, step, 'restore postgres', 'DONE', newline)
step += 1
# Step 17: Restore ceilometer
restore_ceilometer(archive, ceilometer_permdir)
utils.progress(total_steps, step, 'restore ceilometer', 'DONE',
newline)
step += 1
# Step 18: Restore ceph crush map
restore_ceph_crush_map(archive)
utils.progress(total_steps, step, 'restore ceph crush map', 'DONE',
newline)
step += 1
# Step 19: Restore home
restore_std_dir(archive, home_permdir)
utils.progress(total_steps, step, 'restore home directory', 'DONE',
newline)
step += 1
# Step 20: Restore extension filesystem
restore_std_dir(archive, extension_permdir)
utils.progress(total_steps, step, 'restore extension filesystem '
'directory', 'DONE', newline)
step += 1
# Step 21: Restore patch-vault filesystem
if file_exists_in_archive(archive,
os.path.basename(patch_vault_permdir)):
restore_std_dir(archive, patch_vault_permdir)
utils.progress(total_steps, step, 'restore patch-vault filesystem '
'directory', 'DONE', newline)
step += 1
# Step 22: Restore external ceph configuration files.
restore_ceph_external_config_files(archive, staging_dir)
utils.progress(total_steps, step, 'restore CEPH external config',
'DONE', newline)
step += 1
# Step 23: Shutdown file systems
archive.close()
shutil.rmtree(staging_dir, ignore_errors=True)
utils.shutdown_file_systems()
utils.progress(total_steps, step, 'shutdown file systems', 'DONE',
newline)
step += 1
# Step 24: Recover services
utils.mtce_restart()
utils.mark_config_complete()
time.sleep(120)
for service in ['sysinv-conductor', 'sysinv-inv']:
if not utils.wait_sm_service(service):
raise RestoreFail("Services have failed to initialize.")
utils.progress(total_steps, step, 'recover services', 'DONE', newline)
step += 1
if tsconfig.system_mode != sysinv_constants.SYSTEM_MODE_SIMPLEX:
print "\nRestoring node states (this will take several minutes):"
backend_services = sysinv.get_storage_backend_services()
with openstack.OpenStack() as client:
# On ceph setups storage nodes take about 90 seconds
# to become locked. Setting the timeout to 120 seconds
# for such setups
lock_timeout = 60
if sysinv_constants.SB_TYPE_CEPH in backend_services.keys():
lock_timeout = 120
failed_lock_host = False
skip_hosts = ['controller-0']
if not include_storage_reinstall:
storage_hosts = \
sysinv.get_hosts(client.admin_token,
client.conf['region_name'],
personality='storage')
if storage_hosts:
install_uuid = utils.get_install_uuid()
for h in storage_hosts:
skip_hosts.append(h.name)
# Update install_uuid on the storage node
client.sysinv.ihost.update_install_uuid(
h.uuid,
install_uuid)
skip_hosts_count = len(skip_hosts)
# Wait for nodes to be identified as disabled before attempting
# to lock hosts. Even if after 3 minute nodes are still not
# identified as disabled, we still continue the restore.
if not client.wait_for_hosts_disabled(
exempt_hostnames=skip_hosts,
timeout=180):
LOG.info("At least one node is not in a disabling state. "
"Continuing.")
print "\nLocking nodes:"
try:
failed_hosts = client.lock_hosts(skip_hosts,
utils.progress,
timeout=lock_timeout)
# Don't power off nodes that could not be locked
if len(failed_hosts) > 0:
skip_hosts.append(failed_hosts)
except (KeystoneFail, SysInvFail) as e:
LOG.exception(e)
failed_lock_host = True
if not failed_lock_host:
print "\nPowering-off nodes:"
try:
client.power_off_hosts(skip_hosts,
utils.progress,
timeout=60)
except (KeystoneFail, SysInvFail) as e:
LOG.exception(e)
# this is somehow expected
if failed_lock_host or len(skip_hosts) > skip_hosts_count:
if include_storage_reinstall:
print textwrap.fill(
"Failed to lock at least one node. " +
"Please lock the unlocked nodes manually.", 80
)
else:
print textwrap.fill(
"Failed to lock at least one node. " +
"Please lock the unlocked controller-1 or " +
"compute nodes manually.", 80
)
if not clone:
print textwrap.fill(
"Before continuing to the next step in the restore, " +
"please ensure all nodes other than controller-0 " +
"and storage nodes, if they are not being " +
"reinstalled, are powered off. Please refer to the " +
"system administration guide for more details.", 80
)
finally:
os.remove(restore_in_progress)
if staging_dir:
shutil.rmtree(staging_dir, ignore_errors=True)
cleanup_prefetched_keyring()
fmApi = fm_api.FaultAPIs()
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
sysinv_constants.CONTROLLER_HOSTNAME)
fault = fm_api.Fault(
alarm_id=fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS,
alarm_state=fm_constants.FM_ALARM_STATE_MSG,
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
entity_instance_id=entity_instance_id,
severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
reason_text=("System Restore complete."),
# other
alarm_type=fm_constants.FM_ALARM_TYPE_0,
# unknown
probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN,
proposed_repair_action=(""),
service_affecting=False)
fmApi.set_fault(fault)
# Mark system restore as complete
if (utils.get_controller_hostname() ==
sysinv_constants.CONTROLLER_0_HOSTNAME):
# Create the flag file that permits the
# restore_complete command option.
utils.touch(restore_system_ready)
return RESTORE_COMPLETE
def restore_images(backup_file, clone=False):
"""Restoring images."""
if not os.path.exists(constants.INITIAL_CONFIG_COMPLETE_FILE):
print textwrap.fill(
"System restore has not been done. "
"An image restore operation can only be done after "
"the system restore has been completed.", 80)
print
raise RestoreFail("System restore required")
if not os.path.isabs(backup_file):
raise RestoreFail("Backup file (%s) not found. Full path is "
"required." % backup_file)
if os.path.isfile(restore_in_progress):
raise RestoreFail("Restore already in progress.")
else:
open(restore_in_progress, 'w')
# Add newline to console log for install-clone scenario
newline = clone
try:
print "\nRestoring images (this will take several minutes):"
os.chdir('/')
step = 1
total_steps = 2
# Step 1: Open archive
try:
archive = tarfile.open(backup_file)
except tarfile.TarError as e:
LOG.exception(e)
raise RestoreFail("Error opening backup file. Invalid backup "
"file.")
utils.progress(total_steps, step, 'open archive', 'DONE', newline)
step += 1
# Step 2: Restore glance
restore_std_dir(archive, glance_permdir)
utils.progress(total_steps, step, 'restore glance', 'DONE',
newline)
step += 1
archive.close()
finally:
os.remove(restore_in_progress)