config/controllerconfig/controllerconfig/controllerconfig/backup_restore.py

#
# Copyright (c) 2014-2017 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#

"""
Backup & Restore
"""

import copy
import filecmp
import fileinput
import os
import glob
import shutil
import stat
import subprocess
import tarfile
import tempfile
import textwrap
import time

from fm_api import constants as fm_constants
from fm_api import fm_api
from sysinv.common import constants as sysinv_constants

from common import log
from common import constants
from common.exceptions import BackupFail, RestoreFail
from common.exceptions import KeystoneFail, SysInvFail
import openstack
import tsconfig.tsconfig as tsconfig
import utils
import sysinv_api as sysinv
from six.moves import input

LOG = log.get_logger(__name__)

DEVNULL = open(os.devnull, 'w')
RESTORE_COMPLETE = "restore-complete"
RESTORE_RERUN_REQUIRED = "restore-rerun-required"

# Backup/restore related constants
backup_in_progress = tsconfig.BACKUP_IN_PROGRESS_FLAG
restore_in_progress = tsconfig.RESTORE_IN_PROGRESS_FLAG
restore_system_ready = tsconfig.RESTORE_SYSTEM_FLAG
restore_patching_complete = '/etc/platform/.restore_patching_complete'
node_is_patched = '/var/run/node_is_patched'
keyring_permdir = os.path.join('/opt/platform/.keyring', tsconfig.SW_VERSION)
ceph_permdir = os.path.join(tsconfig.CONFIG_PATH, 'ceph-config')
ldap_permdir = '/var/lib/openldap-data'
ceilometer_permdir = '/opt/cgcs/ceilometer/' + tsconfig.SW_VERSION
glance_permdir = '/opt/cgcs/glance'
patching_permdir = '/opt/patching'
patching_repo_permdir = '/www/pages/updates'
home_permdir = '/home'
cinder_permdir = '/opt/cgcs/cinder'
extension_permdir = '/opt/extension'
patch_vault_permdir = '/opt/patch-vault'


def get_backup_databases(cinder_config=False):
    """
    Retrieve database lists for backup.
    :return: backup_databases and backup_database_skip_tables
    """

    # Databases common to all configurations
    REGION_LOCAL_DATABASES = ('postgres', 'template1', 'nova', 'sysinv',
                              'neutron', 'heat', 'nova_api',
                              'aodh', 'murano', 'magnum', 'panko', 'ironic',
                              'nova_cell0', 'gnocchi', 'fm', 'barbican')
    REGION_SHARED_DATABASES = ('glance', 'keystone')

    if cinder_config:
        REGION_SHARED_DATABASES += ('cinder', )

    # Indicates which tables have to be dropped for a certain database.
    DB_TABLE_SKIP_MAPPING = {
        'fm': ('alarm',),
        'gnocchi': ('metric', 'resource'),
        'dcorch': ('orch_job',
                   'orch_request',
                   'resource',
                   'subcloud_resource'), }

    if tsconfig.region_config == 'yes':
        BACKUP_DATABASES = REGION_LOCAL_DATABASES
        # Add databases which are optional in secondary regions(and subclouds)
        shared_services = sysinv.get_shared_services()
        for service_type in ["image", "volume"]:
            if service_type not in shared_services:
                service = 'glance' if service_type == "image" else 'cinder'
                BACKUP_DATABASES += (service, )

    else:
        # Add additional databases for non-region configuration and for the
        # primary region in region deployments.
        BACKUP_DATABASES = REGION_LOCAL_DATABASES + REGION_SHARED_DATABASES

        # Add distributed cloud databases
        if tsconfig.distributed_cloud_role == \
                sysinv_constants.DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER:
            BACKUP_DATABASES += ('dcmanager', 'dcorch')

    # We generate the tables to be skipped for each database
    # mentioned in BACKUP_DATABASES. We explicitly list
    # skip tables in DB_TABLE_SKIP_MAPPING
    BACKUP_DB_SKIP_TABLES = dict(
        map(lambda x: [x, DB_TABLE_SKIP_MAPPING.get(x, ())],
            BACKUP_DATABASES))

    return BACKUP_DATABASES, BACKUP_DB_SKIP_TABLES


def check_load_versions(archive, staging_dir):
    match = False
    try:
        member = archive.getmember('etc/build.info')
        archive.extract(member, path=staging_dir)
        match = filecmp.cmp('/etc/build.info', staging_dir + '/etc/build.info')
        shutil.rmtree(staging_dir + '/etc')
    except Exception as e:
        LOG.exception(e)
        raise RestoreFail("Unable to verify load version in backup file. "
                          "Invalid backup file.")

    if not match:
        LOG.error("Load version mismatch.")
        raise RestoreFail("Load version of backup does not match the "
                          "version of the installed load.")


def get_subfunctions(filename):
    """
    Retrieves the subfunctions from a platform.conf file.
    :param filename: file to retrieve subfunctions from
    :return: a list of the subfunctions or None if no subfunctions exist
    """
    matchstr = 'subfunction='

    with open(filename, 'r') as f:
        for line in f:
            if matchstr in line:
                parsed = line.split('=')
                return parsed[1].rstrip().split(",")
    return


def check_load_subfunctions(archive, staging_dir):
    """
    Verify that the subfunctions in the backup match the installed load.
    :param archive: backup archive
    :param staging_dir: staging directory
    :return: raises exception if the subfunctions do not match
    """
    match = False
    backup_subfunctions = None
    try:
        member = archive.getmember('etc/platform/platform.conf')
        archive.extract(member, path=staging_dir)
        backup_subfunctions = get_subfunctions(staging_dir +
                                               '/etc/platform/platform.conf')
        shutil.rmtree(staging_dir + '/etc')
        if set(backup_subfunctions) ^ set(tsconfig.subfunctions):
            # The set of subfunctions do not match
            match = False
        else:
            match = True
    except Exception:
        LOG.exception("Unable to verify subfunctions in backup file")
        raise RestoreFail("Unable to verify subfunctions in backup file. "
                          "Invalid backup file.")

    if not match:
        LOG.error("Subfunction mismatch - backup: %s, installed: %s" %
                  (str(backup_subfunctions), str(tsconfig.subfunctions)))
        raise RestoreFail("Subfunctions in backup load (%s) do not match the "
                          "subfunctions of the installed load (%s)." %
                          (str(backup_subfunctions),
                           str(tsconfig.subfunctions)))


def file_exists_in_archive(archive, file_path):
    """ Check if file exists in archive """
    try:
        archive.getmember(file_path)
        return True

    except KeyError:
        LOG.info("File %s is not in archive." % file_path)
        return False


def filter_directory(archive, directory):
    for tarinfo in archive:
        if tarinfo.name.split('/')[0] == directory:
            yield tarinfo


def backup_etc_size():
    """ Backup etc size estimate """
    try:
        total_size = utils.directory_get_size('/etc')
        return total_size
    except OSError:
        LOG.error("Failed to estimate backup etc size.")
        raise BackupFail("Failed to estimate backup etc size")


def backup_etc(archive):
    """ Backup etc """
    try:
        archive.add('/etc', arcname='etc')

    except tarfile.TarError:
        LOG.error("Failed to backup etc.")
        raise BackupFail("Failed to backup etc")


def restore_etc_file(archive, dest_dir, etc_file):
    """ Restore etc file """
    try:
        # Change the name of this file to remove the leading path
        member = archive.getmember('etc/' + etc_file)
        # Copy the member to avoid changing the name for future operations on
        # this member.
        temp_member = copy.copy(member)
        temp_member.name = os.path.basename(temp_member.name)
        archive.extract(temp_member, path=dest_dir)

    except tarfile.TarError:
        LOG.error("Failed to restore etc file.")
        raise RestoreFail("Failed to restore etc file")


def restore_etc_ssl_dir(archive, configpath=constants.CONFIG_WORKDIR):
    """ Restore the etc SSL dir """

    def filter_etc_ssl_private(members):
        for tarinfo in members:
            if 'etc/ssl/private' in tarinfo.name:
                yield tarinfo

    if file_exists_in_archive(archive, 'config/server-cert.pem'):
        restore_config_file(
            archive, configpath, 'server-cert.pem')

    if file_exists_in_archive(archive, 'etc/ssl/private'):
        # NOTE: This will include all TPM certificate files if TPM was
        # enabled on the backed up system. However in that case, this
        # restoration is only done for the first controller and TPM
        # will need to be reconfigured once duplex controller (if any)
        # is restored.
        archive.extractall(path='/',
                           members=filter_etc_ssl_private(archive))


def restore_ceph_external_config_files(archive, staging_dir):
    # Restore ceph-config.
    if file_exists_in_archive(archive, "config/ceph-config"):
        restore_config_dir(archive, staging_dir, 'ceph-config', ceph_permdir)

        # Copy the file to /etc/ceph.
        # There might be no files to copy, so don't check the return code.
        cp_command = ('cp -Rp ' + os.path.join(ceph_permdir, '*') +
                      ' /etc/ceph/')
        subprocess.call(cp_command, shell=True)


def backup_config_size(config_permdir):
    """ Backup configuration size estimate """
    try:
        return(utils.directory_get_size(config_permdir))

    except OSError:
        LOG.error("Failed to estimate backup configuration size.")
        raise BackupFail("Failed to estimate backup configuration size")


def backup_config(archive, config_permdir):
    """ Backup configuration """
    try:
        # The config dir is versioned, but we're only grabbing the current
        # release
        archive.add(config_permdir, arcname='config')

    except tarfile.TarError:
        LOG.error("Failed to backup config.")
        raise BackupFail("Failed to backup configuration")


def restore_config_file(archive, dest_dir, config_file):
    """ Restore configuration file """
    try:
        # Change the name of this file to remove the leading path
        member = archive.getmember('config/' + config_file)
        # Copy the member to avoid changing the name for future operations on
        # this member.
        temp_member = copy.copy(member)
        temp_member.name = os.path.basename(temp_member.name)
        archive.extract(temp_member, path=dest_dir)

    except tarfile.TarError:
        LOG.error("Failed to restore config file %s." % config_file)
        raise RestoreFail("Failed to restore configuration")


def restore_configuration(archive, staging_dir):
    """ Restore configuration """
    try:
        os.makedirs(constants.CONFIG_WORKDIR, stat.S_IRWXU | stat.S_IRGRP |
                    stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
    except OSError:
        LOG.error("Failed to create config directory: %s",
                  constants.CONFIG_WORKDIR)
        raise RestoreFail("Failed to restore configuration files")

    # Restore cgcs_config file from original installation for historical
    # purposes. Not used to restore the system as the information in this
    # file is out of date (not updated after original installation).
    restore_config_file(archive, constants.CONFIG_WORKDIR, 'cgcs_config')

    # Restore platform.conf file and update as necessary. The file will be
    # created in a temporary location and then moved into place when it is
    # complete to prevent access to a partially created file.
    restore_etc_file(archive, staging_dir, 'platform/platform.conf')
    temp_platform_conf_file = os.path.join(tsconfig.PLATFORM_CONF_PATH,
                                           'platform.conf.temp')
    shutil.copyfile(os.path.join(staging_dir, 'platform.conf'),
                    temp_platform_conf_file)
    install_uuid = utils.get_install_uuid()
    for line in fileinput.FileInput(temp_platform_conf_file, inplace=1):
        if line.startswith("INSTALL_UUID="):
            # The INSTALL_UUID must be updated to match the new INSTALL_UUID
            # which was generated when this controller was installed prior to
            # doing the restore.
            print "INSTALL_UUID=%s" % install_uuid
        elif line.startswith("management_interface=") or \
                line.startswith("oam_interface=") or \
                line.startswith("infrastructure_interface=") or \
                line.startswith("UUID="):
            # Strip out any entries that are host specific as the backup can
            # be done on either controller. The application of the
            # platform_conf manifest will add these back in.
            pass
        else:
            print line,
    fileinput.close()
    # Move updated platform.conf file into place.
    os.rename(temp_platform_conf_file, tsconfig.PLATFORM_CONF_FILE)

    # Kick tsconfig to reload the platform.conf file
    tsconfig._load()

    # Restore branding
    restore_config_dir(archive, staging_dir, 'branding', '/opt/branding/')

    # Restore banner customization
    restore_config_dir(archive, staging_dir, 'banner/etc', '/opt/banner')

    # Restore ssh configuration
    restore_config_dir(archive, staging_dir, 'ssh_config',
                       constants.CONFIG_WORKDIR + '/ssh_config')

    # Configure hostname
    utils.configure_hostname('controller-0')

    # Restore hosts file
    restore_etc_file(archive, '/etc', 'hosts')
    restore_etc_file(archive, constants.CONFIG_WORKDIR, 'hosts')

    # Restore certificate files
    restore_etc_ssl_dir(archive)

    # Restore firewall rules file if it is in the archive
    if file_exists_in_archive(archive, 'config/iptables.rules'):
        restore_config_file(
            archive, constants.CONFIG_WORKDIR, 'iptables.rules')
        restore_etc_file(archive, tsconfig.PLATFORM_CONF_PATH,
                         'platform/iptables.rules')


def filter_pxelinux(archive):
    for tarinfo in archive:
        if tarinfo.name.find('config/pxelinux.cfg') == 0:
            yield tarinfo


def restore_dnsmasq(archive, config_permdir):
    """ Restore dnsmasq """
    try:
        etc_files = ['hosts']

        perm_files = ['hosts',
                      'dnsmasq.hosts', 'dnsmasq.leases',
                      'dnsmasq.addn_hosts']

        for etc_file in etc_files:
            restore_config_file(archive, '/etc', etc_file)

        for perm_file in perm_files:
            restore_config_file(archive, config_permdir, perm_file)

        # Extract distributed cloud addn_hosts file if present in archive.
        if file_exists_in_archive(
                archive, 'config/dnsmasq.addn_hosts_dc'):
            restore_config_file(archive, config_permdir,
                                'dnsmasq.addn_hosts_dc')

        tmpdir = tempfile.mkdtemp(prefix="pxerestore_")

        archive.extractall(tmpdir,
                           members=filter_pxelinux(archive))

        if os.path.exists(tmpdir + '/config/pxelinux.cfg'):
            shutil.rmtree(config_permdir + 'pxelinux.cfg', ignore_errors=True)
            shutil.move(tmpdir + '/config/pxelinux.cfg', config_permdir)

        shutil.rmtree(tmpdir, ignore_errors=True)

    except (shutil.Error, subprocess.CalledProcessError, tarfile.TarError):
        LOG.error("Failed to restore dnsmasq config.")
        raise RestoreFail("Failed to restore dnsmasq files")


def backup_puppet_data_size(puppet_permdir):
    """ Backup puppet data size estimate """
    try:
        return(utils.directory_get_size(puppet_permdir))

    except OSError:
        LOG.error("Failed to estimate backup puppet data size.")
        raise BackupFail("Failed to estimate backup puppet data size")


def backup_puppet_data(archive, puppet_permdir):
    """ Backup puppet data """
    try:
        # The puppet dir is versioned, but we're only grabbing the current
        # release
        archive.add(puppet_permdir, arcname='hieradata')

    except tarfile.TarError:
        LOG.error("Failed to backup puppet data.")
        raise BackupFail("Failed to backup puppet data")


def restore_static_puppet_data(archive, puppet_workdir):
    """ Restore static puppet data """
    try:
        member = archive.getmember('hieradata/static.yaml')
        archive.extract(member, path=os.path.dirname(puppet_workdir))

        member = archive.getmember('hieradata/secure_static.yaml')
        archive.extract(member, path=os.path.dirname(puppet_workdir))

    except tarfile.TarError:
        LOG.error("Failed to restore static puppet data.")
        raise RestoreFail("Failed to restore static puppet data")

    except OSError:
        pass


def restore_puppet_data(archive, puppet_workdir):
    """ Restore puppet data """
    try:
        archive.extractall(
            path=os.path.dirname(puppet_workdir),
            members=filter_directory(archive,
                                     os.path.basename(puppet_workdir)))

    except tarfile.TarError:
        LOG.error("Failed to restore puppet data.")
        raise RestoreFail("Failed to restore puppet data")

    except OSError:
        pass


def backup_cinder_config(archive):
    """ Backup cinder configuration """

    # If the iscsi target config file exists, add it to the archive
    # On setups without LVM backends this file is absent
    if os.path.exists(cinder_permdir + '/iscsi-target/saveconfig.json'):
        archive.add(
            cinder_permdir + '/iscsi-target/saveconfig.json',
            arcname='cinder/saveconfig.json')


def restore_cinder_file(archive, dest_dir, cinder_file):
    """ Restore cinder file """
    try:
        # Change the name of this file to remove the leading path
        member = archive.getmember('cinder/' + cinder_file)
        # Copy the member to avoid changing the name for future operations on
        # this member.
        temp_member = copy.copy(member)
        temp_member.name = os.path.basename(temp_member.name)
        archive.extract(temp_member, path=dest_dir)

    except tarfile.TarError:
        LOG.error("Failed to restore cinder file %s." % cinder_file)
        raise RestoreFail("Failed to restore configuration")


def restore_cinder_config(archive):
    """Restore cinder config files"""
    # If the iscsi target config file is present in the archive,
    # restore it.
    if file_exists_in_archive(archive, 'cinder/saveconfig.json'):
        restore_cinder_file(
            archive, cinder_permdir + '/iscsi-target',
            'saveconfig.json')
        # Also create a copy of the original file as the volume
        # restore procedure changes this file and breaks the
        # valid nova settings.
        shutil.copyfile(
            cinder_permdir + '/iscsi-target/saveconfig.json',
            cinder_permdir + '/iscsi-target/saveconfig.json.bck')


def backup_cinder_size(cinder_permdir):
    """ Backup cinder size estimate """
    try:
        if not os.path.exists(
                cinder_permdir + '/iscsi-target/saveconfig.json'):
            return 0
        statinfo = os.stat(cinder_permdir + '/iscsi-target/saveconfig.json')
        return statinfo.st_size

    except OSError:
        LOG.error("Failed to estimate backup cinder size.")
        raise BackupFail("Failed to estimate backup cinder size")


def backup_keyring_size(keyring_permdir):
    """ Backup keyring size estimate """
    try:
        return(utils.directory_get_size(keyring_permdir))

    except OSError:
        LOG.error("Failed to estimate backup keyring size.")
        raise BackupFail("Failed to estimate backup keyring size")


def backup_keyring(archive, keyring_permdir):
    """ Backup keyring configuration """
    try:
        archive.add(keyring_permdir, arcname='.keyring')

    except tarfile.TarError:
        LOG.error("Failed to backup keyring.")
        raise BackupFail("Failed to backup keyring configuration")


def restore_keyring(archive, keyring_permdir):
    """ Restore keyring configuration """
    try:
        shutil.rmtree(keyring_permdir, ignore_errors=False)
        members = filter_directory(archive, '.keyring')
        temp_members = list()
        # remove .keyring and .keyring/ from the member path since they are
        # extracted to keyring_permdir: /opt/platform/.keyring/release
        for m in members:
            temp_member = copy.copy(m)
            lst = temp_member.name.split('.keyring/')
            if len(lst) > 1:
                temp_member.name = lst[1]
                temp_members.append(temp_member)
        archive.extractall(path=keyring_permdir, members=temp_members)

    except (tarfile.TarError, shutil.Error):
        LOG.error("Failed to restore keyring.")
        shutil.rmtree(keyring_permdir, ignore_errors=True)
        raise RestoreFail("Failed to restore keyring configuration")


def prefetch_keyring(archive):
    """ Prefetch keyring configuration for manifest use """
    keyring_tmpdir = '/tmp/.keyring'
    python_keyring_tmpdir = '/tmp/python_keyring'
    try:
        shutil.rmtree(keyring_tmpdir, ignore_errors=True)
        shutil.rmtree(python_keyring_tmpdir, ignore_errors=True)
        archive.extractall(
            path=os.path.dirname(keyring_tmpdir),
            members=filter_directory(archive,
                                     os.path.basename(keyring_tmpdir)))

        shutil.move(keyring_tmpdir + '/python_keyring', python_keyring_tmpdir)

    except (tarfile.TarError, shutil.Error):
        LOG.error("Failed to restore keyring.")
        shutil.rmtree(keyring_tmpdir, ignore_errors=True)
        shutil.rmtree(python_keyring_tmpdir, ignore_errors=True)
        raise RestoreFail("Failed to restore keyring configuration")


def cleanup_prefetched_keyring():
    """ Cleanup fetched keyring """
    try:
        keyring_tmpdir = '/tmp/.keyring'
        python_keyring_tmpdir = '/tmp/python_keyring'

        shutil.rmtree(keyring_tmpdir, ignore_errors=True)
        shutil.rmtree(python_keyring_tmpdir, ignore_errors=True)

    except shutil.Error:
        LOG.error("Failed to cleanup keyring.")
        raise RestoreFail("Failed to cleanup fetched keyring")


def backup_ldap_size():
    """ Backup ldap size estimate """
    try:
        total_size = 0

        proc = subprocess.Popen(
            ['slapcat -d 0 -F /etc/openldap/schema | wc -c'],
            shell=True, stdout=subprocess.PIPE)

        for line in proc.stdout:
            total_size = int(line)
            break

        proc.communicate()

        return total_size

    except subprocess.CalledProcessError:
        LOG.error("Failed to estimate backup ldap size.")
        raise BackupFail("Failed to estimate backup ldap size")


def backup_ldap(archive, staging_dir):
    """ Backup ldap configuration """
    try:
        ldap_staging_dir = staging_dir + '/ldap'
        os.mkdir(ldap_staging_dir, 0655)

        subprocess.check_call([
            'slapcat', '-d', '0', '-F', '/etc/openldap/schema',
            '-l', (ldap_staging_dir + '/ldap.db')], stdout=DEVNULL)

        archive.add(ldap_staging_dir + '/ldap.db', arcname='ldap.db')

    except (OSError, subprocess.CalledProcessError, tarfile.TarError):
        LOG.error("Failed to backup ldap database.")
        raise BackupFail("Failed to backup ldap configuration")


def restore_ldap(archive, ldap_permdir, staging_dir):
    """ Restore ldap configuration """
    try:
        ldap_staging_dir = staging_dir + '/ldap'
        archive.extract('ldap.db', path=ldap_staging_dir)

        utils.stop_lsb_service('openldap')

        subprocess.call(['rm', '-rf', ldap_permdir], stdout=DEVNULL)
        os.mkdir(ldap_permdir, 0o755)

        subprocess.check_call(['slapadd', '-F', '/etc/openldap/schema',
                              '-l', ldap_staging_dir + '/ldap.db'],
                              stdout=DEVNULL, stderr=DEVNULL)

    except (subprocess.CalledProcessError, OSError, tarfile.TarError):
        LOG.error("Failed to restore ldap database.")
        raise RestoreFail("Failed to restore ldap configuration")

    finally:
        utils.start_lsb_service('openldap')


def backup_postgres_size(cinder_config=False):
    """ Backup postgres size estimate """
    try:
        total_size = 0

        # Backup roles, table spaces and schemas for databases.
        proc = subprocess.Popen([('sudo -u postgres pg_dumpall --clean ' +
                                  '--schema-only | wc -c')], shell=True,
                                stdout=subprocess.PIPE, stderr=DEVNULL)

        for line in proc.stdout:
            total_size = int(line)
            break

        proc.communicate()

        # get backup database
        backup_databases, backup_db_skip_tables = get_backup_databases(
            cinder_config)

        # Backup data for databases.
        for _, db_elem in enumerate(backup_databases):

            db_cmd = 'sudo -u postgres pg_dump --format=plain --inserts '
            db_cmd += '--disable-triggers --data-only %s ' % db_elem

            for _, table_elem in enumerate(backup_db_skip_tables[db_elem]):
                db_cmd += '--exclude-table=%s ' % table_elem

            db_cmd += '| wc -c'

            proc = subprocess.Popen([db_cmd], shell=True,
                                    stdout=subprocess.PIPE, stderr=DEVNULL)

            for line in proc.stdout:
                total_size += int(line)
                break

            proc.communicate()

        return total_size

    except subprocess.CalledProcessError:
        LOG.error("Failed to estimate backup database size.")
        raise BackupFail("Failed to estimate backup database size")


def backup_postgres(archive, staging_dir, cinder_config=False):
    """ Backup postgres configuration """
    try:
        postgres_staging_dir = staging_dir + '/postgres'
        os.mkdir(postgres_staging_dir, 0655)

        # Backup roles, table spaces and schemas for databases.
        subprocess.check_call([('sudo -u postgres pg_dumpall --clean ' +
                                '--schema-only' +
                                '> %s/%s' % (postgres_staging_dir,
                                             'postgres.sql.config'))],
                              shell=True, stderr=DEVNULL)

        # get backup database
        backup_databases, backup_db_skip_tables = get_backup_databases(
            cinder_config)

        # Backup data for databases.
        for _, db_elem in enumerate(backup_databases):

            db_cmd = 'sudo -u postgres pg_dump --format=plain --inserts '
            db_cmd += '--disable-triggers --data-only %s ' % db_elem

            for _, table_elem in enumerate(backup_db_skip_tables[db_elem]):
                db_cmd += '--exclude-table=%s ' % table_elem

            db_cmd += '> %s/%s.sql.data' % (postgres_staging_dir, db_elem)

            subprocess.check_call([db_cmd], shell=True, stderr=DEVNULL)

        archive.add(postgres_staging_dir, arcname='postgres')

    except (OSError, subprocess.CalledProcessError, tarfile.TarError):
        LOG.error("Failed to backup postgres databases.")
        raise BackupFail("Failed to backup database configuration")


def restore_postgres(archive, staging_dir):
    """ Restore postgres configuration """
    try:
        postgres_staging_dir = staging_dir + '/postgres'
        archive.extractall(path=staging_dir,
                           members=filter_directory(archive, 'postgres'))

        utils.start_service("postgresql")

        # Restore roles, table spaces and schemas for databases.
        subprocess.check_call(["sudo", "-u", "postgres", "psql", "-f",
                               postgres_staging_dir +
                               '/postgres.sql.config', "postgres"],
                              stdout=DEVNULL, stderr=DEVNULL)

        # Restore data for databases.
        for data in glob.glob(postgres_staging_dir + '/*.sql.data'):
            db_elem = data.split('/')[-1].split('.')[0]
            subprocess.check_call(["sudo", "-u", "postgres", "psql", "-f",
                                   data, db_elem],
                                  stdout=DEVNULL)

    except (OSError, subprocess.CalledProcessError, tarfile.TarError) as e:
        LOG.error("Failed to restore postgres databases. Error: %s", e)
        raise RestoreFail("Failed to restore database configuration")

    finally:
        utils.stop_service('postgresql')


def backup_ceilometer_size(ceilometer_permdir):
    """ Backup ceilometer size estimate """
    try:
        statinfo = os.stat(ceilometer_permdir + '/pipeline.yaml')
        return statinfo.st_size

    except OSError:
        LOG.error("Failed to estimate backup ceilometer size.")
        raise BackupFail("Failed to estimate backup ceilometer size")


def backup_ceilometer(archive, ceilometer_permdir):
    """ Backup ceilometer """
    try:
        archive.add(ceilometer_permdir + '/pipeline.yaml',
                    arcname='pipeline.yaml')

    except tarfile.TarError:
        LOG.error("Failed to backup ceilometer.")
        raise BackupFail("Failed to backup ceilometer")


def restore_ceilometer(archive, ceilometer_permdir):
    """ Restore ceilometer """
    try:
        archive.extract('pipeline.yaml', path=ceilometer_permdir)

    except tarfile.TarError:
        LOG.error("Failed to restore ceilometer")
        raise RestoreFail("Failed to restore ceilometer")


def filter_config_dir(archive, directory):
    for tarinfo in archive:
        if tarinfo.name.find('config/' + directory) == 0:
            yield tarinfo


def restore_config_dir(archive, staging_dir, config_dir, dest_dir):
    """ Restore configuration directory if it exists """
    try:
        archive.extractall(staging_dir,
                           members=filter_config_dir(archive, config_dir))

        # Copy files from backup to dest dir
        if (os.path.exists(staging_dir + '/config/' + config_dir) and
                os.listdir(staging_dir + '/config/' + config_dir)):
            subprocess.call(["mkdir", "-p", dest_dir])

            try:
                for f in glob.glob(
                        staging_dir + '/config/' + config_dir + '/*'):
                    subprocess.check_call(["cp", "-p", f, dest_dir])
            except IOError:
                LOG.warning("Failed to copy %s files" % config_dir)

    except (subprocess.CalledProcessError, tarfile.TarError):
        LOG.info("No custom %s config was found during restore." % config_dir)


def backup_std_dir_size(directory):
    """ Backup standard directory size estimate """
    try:
        return utils.directory_get_size(directory)

    except OSError:
        LOG.error("Failed to estimate backup size for %s" % directory)
        raise BackupFail("Failed to estimate backup size for %s" % directory)


def backup_std_dir(archive, directory):
    """ Backup standard directory """
    try:
        archive.add(directory, arcname=os.path.basename(directory))

    except tarfile.TarError:
        LOG.error("Failed to backup %s" % directory)
        raise BackupFail("Failed to backup %s" % directory)


def restore_std_dir(archive, directory):
    """ Restore standard directory """
    try:
        shutil.rmtree(directory, ignore_errors=True)
        # Verify that archive contains this directory
        try:
            archive.getmember(os.path.basename(directory))
        except KeyError:
            LOG.error("Archive does not contain directory %s" % directory)
            raise RestoreFail("Invalid backup file - missing directory %s" %
                              directory)
        archive.extractall(
            path=os.path.dirname(directory),
            members=filter_directory(archive, os.path.basename(directory)))

    except (shutil.Error, tarfile.TarError):
        LOG.error("Failed to restore %s" % directory)
        raise RestoreFail("Failed to restore %s" % directory)


def configure_loopback_interface(archive):
    """ Restore and apply configuration for loopback interface """
    utils.remove_interface_config_files()
    restore_etc_file(
        archive, utils.NETWORK_SCRIPTS_PATH,
        'sysconfig/network-scripts/' + utils.NETWORK_SCRIPTS_LOOPBACK)
    utils.restart_networking()


def backup_ceph_crush_map(archive, staging_dir):
    """ Backup ceph crush map """
    try:
        ceph_staging_dir = os.path.join(staging_dir, 'ceph')
        os.mkdir(ceph_staging_dir, 0655)
        crushmap_file = os.path.join(ceph_staging_dir,
                                     sysinv_constants.CEPH_CRUSH_MAP_BACKUP)
        subprocess.check_call(['ceph', 'osd', 'getcrushmap',
                               '-o', crushmap_file], stdout=DEVNULL,
                              stderr=DEVNULL)
        archive.add(crushmap_file, arcname='ceph/' +
                    sysinv_constants.CEPH_CRUSH_MAP_BACKUP)
    except Exception as e:
        LOG.error('Failed to backup ceph crush map. Reason: {}'.format(e))
        raise BackupFail('Failed to backup ceph crush map')


def restore_ceph_crush_map(archive):
    """ Restore ceph crush map """
    if not file_exists_in_archive(archive, 'ceph/' +
                                  sysinv_constants.CEPH_CRUSH_MAP_BACKUP):
        return

    try:
        crush_map_file = 'ceph/' + sysinv_constants.CEPH_CRUSH_MAP_BACKUP
        if file_exists_in_archive(archive, crush_map_file):
            member = archive.getmember(crush_map_file)
            # Copy the member to avoid changing the name for future
            # operations on this member.
            temp_member = copy.copy(member)
            temp_member.name = os.path.basename(temp_member.name)
            archive.extract(temp_member,
                            path=sysinv_constants.SYSINV_CONFIG_PATH)

    except tarfile.TarError as e:
        LOG.error('Failed to restore crush map file. Reason: {}'.format(e))
        raise RestoreFail('Failed to restore crush map file')


def check_size(archive_dir, cinder_config):
    """Check if there is enough space to create backup."""
    backup_overhead_bytes = 1024 ** 3  # extra GB for staging directory

    # backup_cinder_size() will return 0 if cinder/lvm is not configured,
    # So no need to add extra check here.
    backup_size = (backup_overhead_bytes +
                   backup_etc_size() +
                   backup_config_size(tsconfig.CONFIG_PATH) +
                   backup_puppet_data_size(constants.HIERADATA_PERMDIR) +
                   backup_keyring_size(keyring_permdir) +
                   backup_ldap_size() +
                   backup_postgres_size(cinder_config) +
                   backup_ceilometer_size(ceilometer_permdir) +
                   backup_std_dir_size(glance_permdir) +
                   backup_std_dir_size(home_permdir) +
                   backup_std_dir_size(patching_permdir) +
                   backup_std_dir_size(patching_repo_permdir) +
                   backup_std_dir_size(extension_permdir) +
                   backup_std_dir_size(patch_vault_permdir) +
                   backup_cinder_size(cinder_permdir)
                   )

    archive_dir_free_space = \
        utils.filesystem_get_free_space(archive_dir)

    if backup_size > archive_dir_free_space:
        print ("Archive directory (%s) does not have enough free "
               "space (%s), estimated backup size is %s." %
               (archive_dir, utils.print_bytes(archive_dir_free_space),
                utils.print_bytes(backup_size)))

        raise BackupFail("Not enough free space for backup.")


def backup(backup_name, archive_dir, clone=False):
    """Backup configuration."""

    if not os.path.isdir(archive_dir):
        raise BackupFail("Archive directory (%s) not found." % archive_dir)

    if not utils.is_active("management-ip"):
        raise BackupFail(
            "Backups can only be performed from the active controller.")

    if os.path.isfile(backup_in_progress):
        raise BackupFail("Backup already in progress.")
    else:
        open(backup_in_progress, 'w')

    fmApi = fm_api.FaultAPIs()
    entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
                                    sysinv_constants.CONTROLLER_HOSTNAME)
    fault = fm_api.Fault(alarm_id=fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS,
                         alarm_state=fm_constants.FM_ALARM_STATE_SET,
                         entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
                         entity_instance_id=entity_instance_id,
                         severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
                         reason_text=("System Backup in progress."),
                         # operational
                         alarm_type=fm_constants.FM_ALARM_TYPE_7,
                         # congestion
                         probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_8,
                         proposed_repair_action=("No action required."),
                         service_affecting=False)

    fmApi.set_fault(fault)

    cinder_config = False
    backend_services = sysinv.get_storage_backend_services()
    for services in backend_services.values():
        if (services is not None and
                services.find(sysinv_constants.SB_SVC_CINDER) != -1):
            cinder_config = True
            break

    staging_dir = None
    system_tar_path = None
    images_tar_path = None
    warnings = ''
    try:
        os.chdir('/')

        if not clone:
            check_size(archive_dir, cinder_config)

        print ("\nPerforming backup (this might take several minutes):")
        staging_dir = tempfile.mkdtemp(dir=archive_dir)

        system_tar_path = os.path.join(archive_dir,
                                       backup_name + '_system.tgz')
        system_archive = tarfile.open(system_tar_path, "w:gz")
        images_tar_path = os.path.join(archive_dir,
                                       backup_name + '_images.tgz')

        step = 1
        total_steps = 15

        if sysinv_constants.SB_TYPE_CEPH in backend_services.keys():
            total_steps += 1

        if tsconfig.region_config == "yes":
            # We don't run the glance backup step
            total_steps -= 1

        # Step 1: Backup etc
        backup_etc(system_archive)
        utils.progress(total_steps, step, 'backup etc', 'DONE')
        step += 1

        # Step 2: Backup configuration
        backup_config(system_archive, tsconfig.CONFIG_PATH)
        utils.progress(total_steps, step, 'backup configuration', 'DONE')
        step += 1

        # Step 3: Backup puppet data
        backup_puppet_data(system_archive, constants.HIERADATA_PERMDIR)
        utils.progress(total_steps, step, 'backup puppet data', 'DONE')
        step += 1

        # Step 4: Backup keyring
        backup_keyring(system_archive, keyring_permdir)
        utils.progress(total_steps, step, 'backup keyring', 'DONE')
        step += 1

        # Step 5: Backup ldap
        backup_ldap(system_archive, staging_dir)
        utils.progress(total_steps, step, 'backup ldap', 'DONE')
        step += 1

        # Step 6: Backup postgres
        backup_postgres(system_archive, staging_dir, cinder_config)
        utils.progress(total_steps, step, 'backup postgres', 'DONE')
        step += 1

        # Step 7: Backup ceilometer
        backup_ceilometer(system_archive, ceilometer_permdir)
        utils.progress(total_steps, step, 'backup ceilometer', 'DONE')
        step += 1

        if tsconfig.region_config != "yes":
            # Step 8: Backup glance
            images_archive = tarfile.open(images_tar_path, "w:gz")
            backup_std_dir(images_archive, glance_permdir)
            images_archive.close()
            utils.progress(total_steps, step, 'backup glance', 'DONE')
            step += 1

        # Step 9: Backup home
        backup_std_dir(system_archive, home_permdir)
        utils.progress(total_steps, step, 'backup home directory', 'DONE')
        step += 1

        # Step 10: Backup patching
        if not clone:
            backup_std_dir(system_archive, patching_permdir)
            utils.progress(total_steps, step, 'backup patching', 'DONE')
        step += 1

        # Step 11: Backup patching repo
        if not clone:
            backup_std_dir(system_archive, patching_repo_permdir)
            utils.progress(total_steps, step, 'backup patching repo', 'DONE')
        step += 1

        # Step 12: Backup extension filesystem
        backup_std_dir(system_archive, extension_permdir)
        utils.progress(total_steps, step, 'backup extension filesystem '
                                          'directory', 'DONE')
        step += 1

        # Step 13: Backup patch-vault filesystem
        if os.path.exists(patch_vault_permdir):
            backup_std_dir(system_archive, patch_vault_permdir)
            utils.progress(total_steps, step, 'backup patch-vault filesystem '
                                              'directory', 'DONE')
        step += 1

        # Step 14: Backup cinder config/LVM config
        # No need to add extra check here as if cinder/LVM is not configured,
        # ../iscsi-target/saveconfig.json will be absent, so this function will
        # do nothing.
        backup_cinder_config(system_archive)
        utils.progress(total_steps, step, 'backup cinder/LVM config', 'DONE')
        step += 1

        # Step 15: Backup ceph crush map
        if sysinv_constants.SB_TYPE_CEPH in backend_services.keys():
            backup_ceph_crush_map(system_archive, staging_dir)
            utils.progress(total_steps, step, 'backup ceph crush map', 'DONE')
            step += 1

        # Step 16: Create archive
        system_archive.close()
        utils.progress(total_steps, step, 'create archive', 'DONE')
        step += 1

    except Exception:
        if system_tar_path and os.path.isfile(system_tar_path):
            os.remove(system_tar_path)
        if images_tar_path and os.path.isfile(images_tar_path):
            os.remove(images_tar_path)

        raise
    finally:
        fmApi.clear_fault(fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS,
                          entity_instance_id)
        os.remove(backup_in_progress)
        if staging_dir:
            shutil.rmtree(staging_dir, ignore_errors=True)

    system_msg = "System backup file created"
    images_msg = "Images backup file created"
    if not clone:
        system_msg += ": " + system_tar_path
        images_msg += ": " + images_tar_path

    print system_msg
    if tsconfig.region_config != "yes":
        print images_msg
    if warnings != '':
        print "WARNING: The following problems occurred:"
        print textwrap.fill(warnings, 80)


def create_restore_runtime_config(filename):
    """ Create any runtime parameters needed for Restore."""
    config = {}
    # We need to re-enable Openstack password rules, which
    # were previously disabled while the controller manifests
    # were applying during a Restore
    config['classes'] = ['keystone::security_compliance']
    utils.create_manifest_runtime_config(filename, config)


def overwrite_iscsi_target_config():
    """
    Overwrite the current iscsi target config file with the one
    from the backup archive.
    """

    if not os.path.exists(
            cinder_permdir + '/iscsi-target/saveconfig.json'):
        LOG.info("Restore: Missing current saveconfig.json file")
        return

    if not os.path.exists(
            cinder_permdir + '/iscsi-target/saveconfig.json.bck'):
        LOG.info("Restore: Missing backup saveconfig.json file")
        return

    os.remove(cinder_permdir + '/iscsi-target/saveconfig.json')
    shutil.copyfile(
        cinder_permdir + '/iscsi-target/saveconfig.json.bck',
        cinder_permdir + '/iscsi-target/saveconfig.json')

    os.remove(cinder_permdir + '/iscsi-target/saveconfig.json.bck')
    subprocess.call(["targetctl", "restore"], stdout=DEVNULL, stderr=DEVNULL)


def restore_complete():
    """
    Restore proper ISCSI configuration file after cinder restore.
    Enable compute functionality for AIO system.
    :return: True if compute-config-complete is executed
    """
    if utils.get_system_type() == sysinv_constants.TIS_AIO_BUILD:
        if not os.path.isfile(restore_system_ready):
            print textwrap.fill(
                "--restore-complete can only be run "
                "after restore-system has completed "
                "successfully", 80
            )
            return False

        # The iscsi target config file must be overwritten with the
        # original file from the backup archive.
        # This is due to the cinder restore process actually changing
        # this file. These changes cause VMs that were present at
        # backup time to not boot up properly anymore.
        # The original icsci config file has the proper settings so
        # we use use that.
        overwrite_iscsi_target_config()

        print ("\nApplying compute manifests for %s. " %
               (utils.get_controller_hostname()))
        print ("Node will reboot on completion.")

        sysinv.do_compute_config_complete(utils.get_controller_hostname())

        # show in-progress log on console every 30 seconds
        # until self reboot or timeout
        os.remove(restore_system_ready)
        time.sleep(30)
        for i in range(1, 10):
            print("compute manifest apply in progress ... ")
            time.sleep(30)

        raise RestoreFail("Timeout running compute manifests, "
                          "reboot did not occur")

    else:
        if not os.path.isfile(restore_system_ready):
            print textwrap.fill(
                "--restore-complete can only be run "
                "after restore-system has completed "
                "successfully", 80
            )
            return False
        overwrite_iscsi_target_config()
        os.remove(restore_system_ready)
        return True


def restore_system(backup_file, include_storage_reinstall=False, clone=False):
    """Restoring system configuration."""

    if (os.path.exists(constants.CGCS_CONFIG_FILE) or
            os.path.exists(tsconfig.CONFIG_PATH) or
            os.path.exists(constants.INITIAL_CONFIG_COMPLETE_FILE)):
        print textwrap.fill(
            "Configuration has already been done. "
            "A system restore operation can only be done "
            "immediately after the load has been installed.", 80)
        print
        raise RestoreFail("System configuration already completed")

    if not os.path.isabs(backup_file):
        raise RestoreFail("Backup file (%s) not found. Full path is "
                          "required." % backup_file)

    if os.path.isfile(restore_in_progress):
        raise RestoreFail("Restore already in progress.")
    else:
        open(restore_in_progress, 'w')

    # Add newline to console log for install-clone scenario
    newline = clone
    staging_dir = None

    try:
        try:
            with open(os.devnull, "w") as fnull:
                subprocess.check_call(["vgdisplay", "cgts-vg"],
                                      stdout=fnull,
                                      stderr=fnull)
        except subprocess.CalledProcessError:
            LOG.error("The cgts-vg volume group was not found")
            raise RestoreFail("Volume groups not configured")

        print "\nRestoring system (this will take several minutes):"
        # Use /scratch for the staging dir for now,
        # until /opt/backups is available
        staging_dir = tempfile.mkdtemp(dir='/scratch')
        # Permission change required or postgres restore fails
        subprocess.call(['chmod', 'a+rx', staging_dir], stdout=DEVNULL)
        os.chdir('/')

        step = 1
        total_steps = 24

        # Step 1: Open archive and verify installed load matches backup
        try:
            archive = tarfile.open(backup_file)
        except tarfile.TarError as e:
            LOG.exception(e)
            raise RestoreFail("Error opening backup file. Invalid backup "
                              "file.")
        check_load_versions(archive, staging_dir)
        check_load_subfunctions(archive, staging_dir)
        utils.progress(total_steps, step, 'open archive', 'DONE', newline)
        step += 1

        # Patching is potentially a multi-phase step.
        # If the controller is impacted by patches from the backup,
        # it must be rebooted before continuing the restore.
        # If this is the second pass through, we can skip over this.
        if not os.path.isfile(restore_patching_complete) and not clone:
            # Step 2: Restore patching
            restore_std_dir(archive, patching_permdir)
            utils.progress(total_steps, step, 'restore patching', 'DONE',
                           newline)
            step += 1

            # Step 3: Restore patching repo
            restore_std_dir(archive, patching_repo_permdir)
            utils.progress(total_steps, step, 'restore patching repo', 'DONE',
                           newline)
            step += 1

            # Step 4: Apply patches
            try:
                subprocess.check_output(["sw-patch", "install-local"])
            except subprocess.CalledProcessError:
                LOG.error("Failed to install patches")
                raise RestoreFail("Failed to install patches")
            utils.progress(total_steps, step, 'install patches', 'DONE',
                           newline)
            step += 1

            open(restore_patching_complete, 'w')

            # If the controller was impacted by patches, we need to reboot.
            if os.path.isfile(node_is_patched):
                if not clone:
                    print ("\nThis controller has been patched. " +
                           "A reboot is required.")
                    print ("After the reboot is complete, " +
                           "re-execute the restore command.")
                    while True:
                        user_input = input(
                            "Enter 'reboot' to reboot controller: ")
                        if user_input == 'reboot':
                            break
                LOG.info("This controller has been patched. Rebooting now")
                print("\nThis controller has been patched. Rebooting now\n\n")
                time.sleep(5)
                os.remove(restore_in_progress)
                if staging_dir:
                    shutil.rmtree(staging_dir, ignore_errors=True)
                subprocess.call("reboot")

            else:
                # We need to restart the patch controller and agent, since
                # we setup the repo and patch store outside its control
                with open(os.devnull, "w") as devnull:
                    subprocess.call(
                        ["systemctl",
                         "restart",
                         "sw-patch-controller-daemon.service"],
                        stdout=devnull, stderr=devnull)
                    subprocess.call(
                        ["systemctl",
                         "restart",
                         "sw-patch-agent.service"],
                        stdout=devnull, stderr=devnull)
                if clone:
                    #  No patches were applied, return to cloning code
                    #  to run validation code.
                    return RESTORE_RERUN_REQUIRED
        else:
            # Add the skipped steps
            step += 3

        if os.path.isfile(node_is_patched):
            # If we get here, it means the node was patched by the user
            # AFTER the restore applied patches and rebooted, but didn't
            # reboot.
            # This means the patch lineup no longer matches what's in the
            # backup, but we can't (and probably shouldn't) prevent that.
            # However, since this will ultimately cause the node to fail
            # the goenabled step, we can fail immediately and force the
            # user to reboot.
            print ("\nThis controller has been patched, but not rebooted.")
            print ("Please reboot before continuing the restore process.")
            raise RestoreFail("Controller node patched without rebooting")

        # Flag can now be cleared
        if os.path.exists(restore_patching_complete):
            os.remove(restore_patching_complete)

        # Prefetch keyring
        prefetch_keyring(archive)

        # Step 5: Restore configuration
        restore_configuration(archive, staging_dir)
        # In AIO SX systems, the loopback interface is used as the management
        # interface. However, the application of the interface manifest will
        # not configure the necessary addresses on the loopback interface (see
        # apply_network_config.sh for details). So, we need to configure the
        # loopback interface here.
        if tsconfig.system_mode == sysinv_constants.SYSTEM_MODE_SIMPLEX:
            configure_loopback_interface(archive)
        # Write the simplex flag
        utils.write_simplex_flag()
        utils.progress(total_steps, step, 'restore configuration', 'DONE',
                       newline)
        step += 1

        # Step 6: Apply restore bootstrap manifest
        controller_0_address = utils.get_address_from_hosts_file(
            'controller-0')
        restore_static_puppet_data(archive, constants.HIERADATA_WORKDIR)
        try:
            utils.apply_manifest(controller_0_address,
                                 sysinv_constants.CONTROLLER,
                                 'bootstrap',
                                 constants.HIERADATA_WORKDIR)
        except Exception as e:
            LOG.exception(e)
            raise RestoreFail(
                'Failed to apply bootstrap manifest. '
                'See /var/log/puppet/latest/puppet.log for details.')

        utils.progress(total_steps, step, 'apply bootstrap manifest', 'DONE',
                       newline)
        step += 1

        # Step 7: Restore puppet data
        restore_puppet_data(archive, constants.HIERADATA_WORKDIR)
        utils.progress(total_steps, step, 'restore puppet data', 'DONE',
                       newline)
        step += 1

        # Step 8: Persist configuration
        utils.persist_config()
        utils.progress(total_steps, step, 'persist configuration', 'DONE',
                       newline)
        step += 1

        # Step 9: Apply controller manifest
        try:
            utils.apply_manifest(controller_0_address,
                                 sysinv_constants.CONTROLLER,
                                 'controller',
                                 constants.HIERADATA_PERMDIR)
        except Exception as e:
            LOG.exception(e)
            raise RestoreFail(
                'Failed to apply controller manifest. '
                'See /var/log/puppet/latest/puppet.log for details.')
        utils.progress(total_steps, step, 'apply controller manifest', 'DONE',
                       newline)
        step += 1

        # Step 10: Apply runtime controller manifests
        restore_filename = os.path.join(staging_dir, 'restore.yaml')
        create_restore_runtime_config(restore_filename)
        try:
            utils.apply_manifest(controller_0_address,
                                 sysinv_constants.CONTROLLER,
                                 'runtime',
                                 constants.HIERADATA_PERMDIR,
                                 runtime_filename=restore_filename)
        except Exception as e:
            LOG.exception(e)
            raise RestoreFail(
                'Failed to apply runtime controller manifest. '
                'See /var/log/puppet/latest/puppet.log for details.')
        utils.progress(total_steps, step,
                       'apply runtime controller manifest', 'DONE',
                       newline)
        step += 1

        # Move the staging dir under /opt/backups, now that it's setup
        shutil.rmtree(staging_dir, ignore_errors=True)
        staging_dir = tempfile.mkdtemp(dir=constants.BACKUPS_PATH)
        # Permission change required or postgres restore fails
        subprocess.call(['chmod', 'a+rx', staging_dir], stdout=DEVNULL)

        # Step 11: Restore cinder config file
        restore_cinder_config(archive)
        utils.progress(total_steps, step, 'restore cinder config', 'DONE',
                       newline)
        step += 1

        # Step 12: Apply banner customization
        utils.apply_banner_customization()
        utils.progress(total_steps, step, 'apply banner customization', 'DONE',
                       newline)
        step += 1

        # Step 13: Restore dnsmasq and pxeboot config
        restore_dnsmasq(archive, tsconfig.CONFIG_PATH)
        utils.progress(total_steps, step, 'restore dnsmasq', 'DONE', newline)
        step += 1

        # Step 14: Restore keyring
        restore_keyring(archive, keyring_permdir)
        utils.progress(total_steps, step, 'restore keyring', 'DONE', newline)
        step += 1

        # Step 15: Restore ldap
        restore_ldap(archive, ldap_permdir, staging_dir)
        utils.progress(total_steps, step, 'restore ldap', 'DONE', newline)
        step += 1

        # Step 16: Restore postgres
        restore_postgres(archive, staging_dir)
        utils.progress(total_steps, step, 'restore postgres', 'DONE', newline)
        step += 1

        # Step 17: Restore ceilometer
        restore_ceilometer(archive, ceilometer_permdir)
        utils.progress(total_steps, step, 'restore ceilometer', 'DONE',
                       newline)
        step += 1

        # Step 18: Restore ceph crush map
        restore_ceph_crush_map(archive)
        utils.progress(total_steps, step, 'restore ceph crush map', 'DONE',
                       newline)
        step += 1

        # Step 19: Restore home
        restore_std_dir(archive, home_permdir)
        utils.progress(total_steps, step, 'restore home directory', 'DONE',
                       newline)
        step += 1

        # Step 20: Restore extension filesystem
        restore_std_dir(archive, extension_permdir)
        utils.progress(total_steps, step, 'restore extension filesystem '
                                          'directory', 'DONE', newline)
        step += 1

        # Step 21: Restore patch-vault filesystem
        if file_exists_in_archive(archive,
                                  os.path.basename(patch_vault_permdir)):
            restore_std_dir(archive, patch_vault_permdir)
            utils.progress(total_steps, step, 'restore patch-vault filesystem '
                                              'directory', 'DONE', newline)

        step += 1

        # Step 22: Restore external ceph configuration files.
        restore_ceph_external_config_files(archive, staging_dir)
        utils.progress(total_steps, step, 'restore CEPH external config',
                       'DONE', newline)
        step += 1

        # Step 23: Shutdown file systems
        archive.close()
        shutil.rmtree(staging_dir, ignore_errors=True)
        utils.shutdown_file_systems()
        utils.progress(total_steps, step, 'shutdown file systems', 'DONE',
                       newline)
        step += 1

        # Step 24: Recover services
        utils.mtce_restart()
        utils.mark_config_complete()
        time.sleep(120)

        for service in ['sysinv-conductor', 'sysinv-inv']:
            if not utils.wait_sm_service(service):
                raise RestoreFail("Services have failed to initialize.")

        utils.progress(total_steps, step, 'recover services', 'DONE', newline)
        step += 1

        if tsconfig.system_mode != sysinv_constants.SYSTEM_MODE_SIMPLEX:

            print "\nRestoring node states (this will take several minutes):"

            backend_services = sysinv.get_storage_backend_services()

            with openstack.OpenStack() as client:
                # On ceph setups storage nodes take about 90 seconds
                # to become locked. Setting the timeout to 120 seconds
                # for such setups
                lock_timeout = 60
                if sysinv_constants.SB_TYPE_CEPH in backend_services.keys():
                    lock_timeout = 120

                failed_lock_host = False
                skip_hosts = ['controller-0']
                if not include_storage_reinstall:
                    storage_hosts = \
                        sysinv.get_hosts(client.admin_token,
                                         client.conf['region_name'],
                                         personality='storage')
                    if storage_hosts:
                        install_uuid = utils.get_install_uuid()
                        for h in storage_hosts:
                            skip_hosts.append(h.name)

                            # Update install_uuid on the storage node
                            client.sysinv.ihost.update_install_uuid(
                                h.uuid,
                                install_uuid)

                skip_hosts_count = len(skip_hosts)

                # Wait for nodes to be identified as disabled before attempting
                # to lock hosts. Even if after 3 minute nodes are still not
                # identified as disabled, we still continue the restore.
                if not client.wait_for_hosts_disabled(
                        exempt_hostnames=skip_hosts,
                        timeout=180):
                    LOG.info("At least one node is not in a disabling state. "
                             "Continuing.")

                print "\nLocking nodes:"
                try:
                    failed_hosts = client.lock_hosts(skip_hosts,
                                                     utils.progress,
                                                     timeout=lock_timeout)
                    # Don't power off nodes that could not be locked
                    if len(failed_hosts) > 0:
                        skip_hosts.append(failed_hosts)

                except (KeystoneFail, SysInvFail) as e:
                    LOG.exception(e)
                    failed_lock_host = True

                if not failed_lock_host:
                    print "\nPowering-off nodes:"
                    try:
                        client.power_off_hosts(skip_hosts,
                                               utils.progress,
                                               timeout=60)
                    except (KeystoneFail, SysInvFail) as e:
                        LOG.exception(e)
                        # this is somehow expected

                if failed_lock_host or len(skip_hosts) > skip_hosts_count:
                    if include_storage_reinstall:
                        print textwrap.fill(
                            "Failed to lock at least one node. " +
                            "Please lock the unlocked nodes manually.", 80
                        )
                    else:
                        print textwrap.fill(
                            "Failed to lock at least one node. " +
                            "Please lock the unlocked controller-1 or " +
                            "compute nodes manually.", 80
                        )

                if not clone:
                    print textwrap.fill(
                        "Before continuing to the next step in the restore, " +
                        "please ensure all nodes other than controller-0 " +
                        "and storage nodes, if they are not being " +
                        "reinstalled, are powered off. Please refer to the " +
                        "system administration guide for more details.", 80
                    )

    finally:
        os.remove(restore_in_progress)
        if staging_dir:
            shutil.rmtree(staging_dir, ignore_errors=True)
        cleanup_prefetched_keyring()

    fmApi = fm_api.FaultAPIs()
    entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
                                    sysinv_constants.CONTROLLER_HOSTNAME)
    fault = fm_api.Fault(
        alarm_id=fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS,
        alarm_state=fm_constants.FM_ALARM_STATE_MSG,
        entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
        entity_instance_id=entity_instance_id,
        severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
        reason_text=("System Restore complete."),
        # other
        alarm_type=fm_constants.FM_ALARM_TYPE_0,
        # unknown
        probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN,
        proposed_repair_action=(""),
        service_affecting=False)

    fmApi.set_fault(fault)

    # Mark system restore as complete
    if (utils.get_controller_hostname() ==
            sysinv_constants.CONTROLLER_0_HOSTNAME):
        # Create the flag file that permits the
        # restore_complete command option.
        utils.touch(restore_system_ready)

    return RESTORE_COMPLETE


def restore_images(backup_file, clone=False):
    """Restoring images."""

    if not os.path.exists(constants.INITIAL_CONFIG_COMPLETE_FILE):
        print textwrap.fill(
            "System restore has not been done. "
            "An image restore operation can only be done after "
            "the system restore has been completed.", 80)
        print
        raise RestoreFail("System restore required")

    if not os.path.isabs(backup_file):
        raise RestoreFail("Backup file (%s) not found. Full path is "
                          "required." % backup_file)

    if os.path.isfile(restore_in_progress):
        raise RestoreFail("Restore already in progress.")
    else:
        open(restore_in_progress, 'w')

    # Add newline to console log for install-clone scenario
    newline = clone

    try:
        print "\nRestoring images (this will take several minutes):"
        os.chdir('/')

        step = 1
        total_steps = 2

        # Step 1: Open archive
        try:
            archive = tarfile.open(backup_file)
        except tarfile.TarError as e:
            LOG.exception(e)
            raise RestoreFail("Error opening backup file. Invalid backup "
                              "file.")
        utils.progress(total_steps, step, 'open archive', 'DONE', newline)
        step += 1

        # Step 2: Restore glance
        restore_std_dir(archive, glance_permdir)
        utils.progress(total_steps, step, 'restore glance', 'DONE',
                       newline)
        step += 1
        archive.close()

    finally:
        os.remove(restore_in_progress)