From 2b6f92da1b21d4f5e7a04796965c41bf4a356cfa Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Fri, 26 Oct 2018 12:00:43 -0400 Subject: [PATCH] Support an Alternative Backup Restore Procedure This code change is to support system backup and restore without reinstalling storage nodes. The config_controller command is modified as follows: A new option is added to system restore command "config_controller --restore-system " with default set to exclude-storage-reinstall. config_controller --restore-system Add a new command "tidy_storage_post_restore" to be run after the restore. It scans Cinder/Glance and rbd backend for any discrepancy between the two and generates a user action log file. This code change has been tested on a virtual box as well as hardware lab. Story: 2004184 Task: 27672 Change-Id: I5e64fde70b977ea4bc3a5927bcbd852a393caec2 Signed-off-by: Wei Zhou --- controllerconfig/centos/build_srpm.data | 2 +- .../controllerconfig/backup_restore.py | 41 +- .../controllerconfig/common/exceptions.py | 5 + .../controllerconfig/systemconfig.py | 30 +- .../controllerconfig/tidy_storage.py | 578 ++++++++++++++++++ controllerconfig/controllerconfig/setup.py | 3 +- sysinv/cgts-client/centos/build_srpm.data | 2 +- .../cgts-client/cgtsclient/v1/ihost.py | 5 + sysinv/sysinv/centos/build_srpm.data | 2 +- sysinv/sysinv/sysinv/sysinv/agent/manager.py | 25 + sysinv/sysinv/sysinv/sysinv/agent/rpcapi.py | 20 + .../sysinv/sysinv/api/controllers/v1/host.py | 20 + .../sysinv/sysinv/sysinv/conductor/manager.py | 8 + .../sysinv/sysinv/sysinv/conductor/rpcapi.py | 13 + 14 files changed, 736 insertions(+), 18 deletions(-) create mode 100644 controllerconfig/controllerconfig/controllerconfig/tidy_storage.py diff --git a/controllerconfig/centos/build_srpm.data b/controllerconfig/centos/build_srpm.data index 5a4185072f..52319a2da8 100755 --- a/controllerconfig/centos/build_srpm.data +++ b/controllerconfig/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="controllerconfig" -TIS_PATCH_VER=147 +TIS_PATCH_VER=148 diff --git a/controllerconfig/controllerconfig/controllerconfig/backup_restore.py b/controllerconfig/controllerconfig/controllerconfig/backup_restore.py index 34bbbcf82c..f9bed53f80 100644 --- a/controllerconfig/controllerconfig/controllerconfig/backup_restore.py +++ b/controllerconfig/controllerconfig/controllerconfig/backup_restore.py @@ -35,7 +35,6 @@ import utils import sysinv_api as sysinv from six.moves import input - LOG = log.get_logger(__name__) DEVNULL = open(os.devnull, 'w') @@ -1253,7 +1252,7 @@ def restore_complete(): return True -def restore_system(backup_file, clone=False): +def restore_system(backup_file, include_storage_reinstall=False, clone=False): """Restoring system configuration.""" if (os.path.exists(constants.CGCS_CONFIG_FILE) or @@ -1596,6 +1595,22 @@ def restore_system(backup_file, clone=False): failed_lock_host = False skip_hosts = ['controller-0'] + if not include_storage_reinstall: + storage_hosts = \ + sysinv.get_hosts(client.admin_token, + client.conf['region_name'], + personality='storage') + if storage_hosts: + install_uuid = utils.get_install_uuid() + for h in storage_hosts: + skip_hosts.append(h.name) + + # Update install_uuid on the storage node + client.sysinv.ihost.update_install_uuid( + h.uuid, + install_uuid) + + skip_hosts_count = len(skip_hosts) # Wait for nodes to be identified as disabled before attempting # to lock hosts. Even if after 3 minute nodes are still not @@ -1629,18 +1644,26 @@ def restore_system(backup_file, clone=False): LOG.exception(e) # this is somehow expected - if failed_lock_host or len(skip_hosts) > 1: - print textwrap.fill( - "Failed to lock at least one node. " + - "Please lock the unlocked nodes manually.", 80 - ) + if failed_lock_host or len(skip_hosts) > skip_hosts_count: + if include_storage_reinstall: + print textwrap.fill( + "Failed to lock at least one node. " + + "Please lock the unlocked nodes manually.", 80 + ) + else: + print textwrap.fill( + "Failed to lock at least one node. " + + "Please lock the unlocked controller-1 or " + + "compute nodes manually.", 80 + ) if not clone: print textwrap.fill( "Before continuing to the next step in the restore, " + "please ensure all nodes other than controller-0 " + - "are powered off. Please refer to the system " + - "administration guide for more details.", 80 + "and storage nodes, if they are not being " + + "reinstalled, are powered off. Please refer to the " + + "system administration guide for more details.", 80 ) finally: diff --git a/controllerconfig/controllerconfig/controllerconfig/common/exceptions.py b/controllerconfig/controllerconfig/controllerconfig/common/exceptions.py index e0d26183be..afbcd3bb33 100644 --- a/controllerconfig/controllerconfig/controllerconfig/common/exceptions.py +++ b/controllerconfig/controllerconfig/controllerconfig/common/exceptions.py @@ -49,3 +49,8 @@ class UserQuit(ConfigError): class CloneFail(ConfigError): """Clone error.""" pass + + +class TidyStorageFail(ConfigError): + """Tidy storage error.""" + pass diff --git a/controllerconfig/controllerconfig/controllerconfig/systemconfig.py b/controllerconfig/controllerconfig/controllerconfig/systemconfig.py index ceb68000e9..340651cdbc 100644 --- a/controllerconfig/controllerconfig/controllerconfig/systemconfig.py +++ b/controllerconfig/controllerconfig/controllerconfig/systemconfig.py @@ -286,13 +286,16 @@ def show_help(): "the given file name\n" "--clone-status Status of the last installation of " "cloned image\n" - "--restore-system Restore system configuration from backup " + "--restore-system " + " " + "\n" + " Restore system configuration from backup " "file with\n" " the given name, full path required\n" "--restore-images Restore images from backup file with the " "given name,\n" " full path required\n" - "--restore-complete Complete restore of controller-0" + "--restore-complete Complete restore of controller-0\n" "--allow-ssh Allow configuration to be executed in " "ssh\n" % sys.argv[0]) @@ -327,6 +330,7 @@ def main(): do_default_config = False do_backup = False do_system_restore = False + include_storage_reinstall = False do_images_restore = False do_complete_restore = False do_clone = False @@ -365,9 +369,24 @@ def main(): elif sys.argv[arg] == "--restore-system": arg += 1 if arg < len(sys.argv): - backup_name = sys.argv[arg] + if sys.argv[arg] in ["include-storage-reinstall", + "exclude-storage-reinstall"]: + if sys.argv[arg] == "include-storage-reinstall": + include_storage_reinstall = True + arg += 1 + if arg < len(sys.argv): + backup_name = sys.argv[arg] + else: + print textwrap.fill( + "--restore-system requires the filename " + " of the backup", 80) + exit(1) + else: + backup_name = sys.argv[arg] else: - print "--restore-system requires the filename of the backup" + print textwrap.fill( + "--restore-system requires the filename " + "of the backup", 80) exit(1) do_system_restore = True elif sys.argv[arg] == "--restore-images": @@ -473,7 +492,8 @@ def main(): backup_restore.backup(backup_name, archive_dir) print "\nBackup complete" elif do_system_restore: - backup_restore.restore_system(backup_name) + backup_restore.restore_system(backup_name, + include_storage_reinstall) print "\nSystem restore complete" elif do_images_restore: backup_restore.restore_images(backup_name) diff --git a/controllerconfig/controllerconfig/controllerconfig/tidy_storage.py b/controllerconfig/controllerconfig/controllerconfig/tidy_storage.py new file mode 100644 index 0000000000..42e748abe3 --- /dev/null +++ b/controllerconfig/controllerconfig/controllerconfig/tidy_storage.py @@ -0,0 +1,578 @@ +""" + +Copyright (c) 2015-2018 Wind River Systems, Inc. + +SPDX-License-Identifier: Apache-2.0 + +""" + +import numpy as np +import os +import subprocess +import sys +import textwrap +import time + +from keystoneclient.auth.identity import v3 +from keystoneauth1 import session as ksc_session +from cinderclient.v3 import client as cinder_client_v3 +from glanceclient import Client + +from cinderclient import utils as c_utils +from controllerconfig.common import log +from controllerconfig.common.rest_api_utils import get_token +from controllerconfig.common.exceptions import TidyStorageFail + +LOG = log.get_logger(__name__) + +KEYSTONE_AUTH_SERVER_RETRY_CNT = 60 +KEYSTONE_AUTH_SERVER_WAIT = 1 # 1sec wait per retry + +search_opts = {'all_tenants': 1} + + +class OpenStack(object): + + def __init__(self): + self.admin_token = None + self.conf = {} + self.cinder_client = None + self.glance_client_v1 = None + self.glance_client_v2 = None + + try: + self.conf['admin_user'] = os.environ['OS_USERNAME'] + self.conf['admin_pwd'] = os.environ['OS_PASSWORD'] + self.conf['admin_tenant'] = os.environ['OS_PROJECT_NAME'] + self.conf['auth_url'] = os.environ['OS_AUTH_URL'] + self.conf['region_name'] = os.environ['OS_REGION_NAME'] + self.conf['user_domain'] = os.environ['OS_USER_DOMAIN_NAME'] + self.conf['project_domain'] = os.environ['OS_PROJECT_DOMAIN_NAME'] + except KeyError: + LOG.error("Please source openstack service credentials file.") + raise TidyStorageFail("Please source openstack credentials file.") + + def __enter__(self): + if not self._connect(): + raise Exception('Failed to connect') + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._disconnect() + + def __del__(self): + self._disconnect() + + def _connect(self): + """ Connect to an OpenStack instance """ + + if self.admin_token is not None: + self._disconnect() + + # Try to obtain an admin token from keystone + for _ in range(KEYSTONE_AUTH_SERVER_RETRY_CNT): + self.admin_token = get_token(self.conf['auth_url'], + self.conf['admin_tenant'], + self.conf['admin_user'], + self.conf['admin_pwd'], + self.conf['user_domain'], + self.conf['project_domain']) + if self.admin_token: + break + time.sleep(KEYSTONE_AUTH_SERVER_WAIT) + + return self.admin_token is not None + + def _disconnect(self): + """ Disconnect from an OpenStack instance """ + self.admin_token = None + + @property + def get_cinder_client(self): + if not self.cinder_client: + auth = v3.Password(auth_url=self.conf['auth_url'], + username=self.conf['admin_user'], + password=self.conf['admin_pwd'], + user_domain_name=self.conf['user_domain'], + project_name=self.conf['admin_tenant'], + project_domain_name=self.conf['project_domain']) + + self.cinder_client = cinder_client_v3.Client( + session=ksc_session.Session(auth=auth), + auth_url=self.conf['auth_url'], + endpoint_type='internalURL', + region_name="RegionOne") + + return self.cinder_client + + @property + def get_glance_client(self): + if not self.glance_client_v1 or not self.glance_client_v2: + auth = v3.Password(auth_url=self.conf['auth_url'], + username=self.conf['admin_user'], + password=self.conf['admin_pwd'], + user_domain_name=self.conf['user_domain'], + project_name=self.conf['admin_tenant'], + project_domain_name=self.conf['project_domain']) + + self.glance_client_v1 = Client( + '1', session=ksc_session.Session(auth=auth)) + self.glance_client_v2 = Client( + '2', session=ksc_session.Session(auth=auth)) + + return self.glance_client_v1, self.glance_client_v2 + + +def show_help(): + print ("Usage: %s " % sys.argv[0]) + print textwrap.fill( + "Tidy storage post system restore. Check user actions " + "in the generated user_action_log_file.", 80) + + +def tidy_storage(result_file): + """ + Search Glance images DB and rbd images pool for any discrepancy + between the two. + - If an image is in Glance images DB but not in rbd images pool, + list the image and suggested actions to take in a log file. + - If an image is in rbd images pool but not in Glance images DB, + create a Glance image in Glance images DB to associate with the + backend data. List the image and suggested actions to take in a log + file. + + Search Cinder volumes DB and rbd cinder-volumes pool for any discrepancy + between the two. + - If a volume is in Cinder volumes DB but not in rbd cinder-volumes + pool, set the volume state to "error". List the volume and suggested + actions to take in a log file. + - If a volume is in rbd cinder-volumes pool but not in Cinder volumes + DB, remove any snapshot(s) assoicated with this volume in rbd pool and + create a volume in Cinder volumes DB to associate with the backend + data. List the volume and suggested actions to take in a log file. + - If a volume is in both Cinder volumes DB and rbd cinder-volumes pool + and it has snapshot(s) in the rbd pool, re-create the snapshot in + Cinder if it doesn't exist. + + Clean up Cinder snapshots DB if the snapshot doesn't have backend data. + + """ + with OpenStack() as client: + # Check Glance images + print("Scanning Glance images in DB and rbd images pool...\n") + try: + g_client_v1, g_client_v2 = client.get_glance_client + image_l = g_client_v2.images.list() + image_id_l = [image['id'].encode('utf-8') for image in image_l] + + output = subprocess.check_output( + ["rbd", + "ls", + "--pool", + "images"], + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError: + LOG.error("Failed to access rbd images pool") + raise TidyStorageFail("Failed to access rbd images pool") + except Exception as e: + LOG.exception(e) + raise TidyStorageFail("Failed to list Glance images") + + rbd_image_l = [i for i in output.split('\n') if i != ""] + + print("Images in Glance images DB: %s \n" % image_id_l) + print("Images in rbd images pool: %s \n" % rbd_image_l) + + in_glance_only = np.setdiff1d(image_id_l, rbd_image_l) + in_rbd_image_only = np.setdiff1d(rbd_image_l, image_id_l) + + print("Images in Glance images DB only: %s \n" % in_glance_only) + print("Images in rbd images pool only: %s \n" % in_rbd_image_only) + + if in_rbd_image_only.size != 0: + output = subprocess.check_output( + ["grep", + "fsid", + "/etc/ceph/ceph.conf"], + stderr=subprocess.STDOUT) + + ceph_cluster = [i.strip() for i in output.split('=') + if i.find('fsid') == -1] + + fields = dict() + for image in in_rbd_image_only: + try: + img_file = 'rbd:images/{}'.format(image) + output = subprocess.check_output( + ["qemu-img", "info", img_file], stderr=subprocess.STDOUT) + + fields['disk_format'] = 'qcow2' + for line in output.split('\n'): + if 'file format:' in line: + fields['disk_format'] = line.split(':')[1].strip() + break + + fields['name'] = 'found-image-%s' % image + fields['id'] = image + fields['container_format'] = 'bare' + fields['location'] = \ + 'rbd://{}/images/{}/snap'.format(ceph_cluster[0], + image) + + print ("Creating a Glance image %s ...\n " % fields['name']) + g_client_v1.images.create(**fields) + except subprocess.CalledProcessError: + LOG.error("Failed to access rbd image %s" % image) + raise TidyStorageFail("Failed to access rbd image") + except Exception as e: + LOG.exception(e) + raise TidyStorageFail("Failed to create glance image") + + # Check cinder volume snapshots. Do it before "cinder manage" + # operation as "cinder manage" does not support keeping the same + # volume id. + print("Scanning Cinder snapshots in DB and rbd cinder-volumes " + "pool...\n") + try: + c_client = client.get_cinder_client + snap_l = c_client.volume_snapshots.list(search_opts=search_opts) + except Exception as e: + LOG.exception(e) + raise TidyStorageFail("Failed to get Cinder snapshots") + + snaps_no_backend_vol = [] + for snap in snap_l: + print ("Check if volume snapshot %s has backend " % snap.name) + try: + output = subprocess.check_output( + ["rbd", "ls", "--pool", "cinder-volumes"], + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError: + LOG.error("Failed to access rbd cinder-volumes pool") + raise TidyStorageFail( + "Failed to access rbd cinder-volumes pool") + + found_vol = False + for line in output.split('\n'): + if snap.volume_id in line: + found_vol = True + break + + if found_vol: + volume = 'cinder-volumes/volume-{}'.format(snap.volume_id) + try: + output = subprocess.check_output( + ["rbd", "snap", "list", volume], + stderr=subprocess.STDOUT) + + keep_snap = False + for line in output.split('\n'): + if snap.id in line: + keep_snap = True + break + except subprocess.CalledProcessError: + LOG.info("Failed to list snapshots for volume %s in " + "rbd cinder-volumes pool" + % snap.volume_id) + raise TidyStorageFail("Failed to list snapshots in rbd.") + + if not keep_snap: + try: + print ("Volume snapshot %s has no backend data. " + "Deleting it from Cinder...\n" % snap.name) + + c_client.volume_snapshots.delete(c_utils.find_resource( + c_client.volume_snapshots, snap.id), force=True) + + except Exception as e: + LOG.exception(e) + raise TidyStorageFail( + "Failed to delete volume snapshot") + + else: + # Volume snapshot that doesn't have backend volume cannot + # be deleted. If the backend volume is restored later, then + # the snapshot can be deleted. So for now we will add these + # snapshots in the user action log. + snaps_no_backend_vol.append(snap) + + # Check Cinder volumes + print("Scanning Cinder volumes in DB and rbd cinder-volumes pool...\n") + try: + volume_l = c_client.volumes.list(search_opts=search_opts) + v_t_d = c_client.volume_types.default() + avail_zones = c_client.availability_zones.list() + pools = c_client.pools.list() + except Exception as e: + LOG.exception(e) + raise TidyStorageFail("Failed to get Cinder volume info") + + if pools: + host = pools[0].name + + if v_t_d is None: + v_t_d = 'ceph' + else: + v_t_d = v_t_d.name + + cinder_volume_l = [i.id.encode('utf-8') for i in volume_l] + + if avail_zones: + avail_z = avail_zones[0].zoneName + + try: + output = subprocess.check_output( + ["rbd", "ls", "--pool", "cinder-volumes"], + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError: + LOG.error("Failed to access rbd cinder-volumes pool") + raise TidyStorageFail("Failed to access rbd cinder-volumes pool") + + rbd_volume_l = [i[7:] for i in output.split('\n') if i != ""] + + print("Volumes in Cinder volumes DB: %s \n" % cinder_volume_l) + print("Volumes in rbd pool: %s \n" % rbd_volume_l) + + in_cinder_only = np.setdiff1d(cinder_volume_l, rbd_volume_l) + in_rbd_volume_only = np.setdiff1d(rbd_volume_l, cinder_volume_l) + in_cinder_and_rbd = np.intersect1d(cinder_volume_l, rbd_volume_l) + + print("Volumes in Cinder volumes DB only: %s \n" % in_cinder_only) + print("Volumes in rbd pool only: %s \n" % in_rbd_volume_only) + print("Volumes in Cinder volumes DB and rbd pool: %s \n" + % in_cinder_and_rbd) + + for vol_id in in_rbd_volume_only: + volume = 'cinder-volumes/volume-{}'.format(vol_id) + try: + # Find out if the volume is a bootable one + output = subprocess.check_output( + ["rbd", "info", volume], + stderr=subprocess.STDOUT) + + bootable = False + for line in output.split('\n'): + if 'parent: images/' in line: + bootable = True + break + + # Find out if the volume has any snapshots. + print("Checking if volume %s has snapshots...\n" % vol_id) + output = subprocess.check_output( + ["rbd", "snap", "list", volume], stderr=subprocess.STDOUT) + + snap_l = [item.strip() for item in output.split(' ') + if item.find('snapshot-') != -1] + + # Returned volume id (vol.id) will be different from vol_id + try: + vol = c_client.volumes.manage( + host=host, + ref={'source-name': 'volume-%s' % vol_id}, + name='found-volume-%s' % vol_id, + description='manage a volume', + volume_type=v_t_d, + availability_zone=avail_z, + bootable=bootable) + + print("Creating volume found-volume-%s in Cinder...\n" + % vol_id) + except Exception as e: + LOG.exception(e) + raise TidyStorageFail("Failed to manage volume") + + try: + for snap in snap_l: + # Manage a snapshot for a managed volume is not + # supported in rbd. So we just remove the snapshot. + + # Remove the snapshot + print (textwrap.fill( + "Removing snapshot %s from volume %s " + "in rbd...\n" % (snap, vol_id), 76)) + del_snap = '{}@{}'.format(volume, snap) + output = subprocess.check_output( + ["rbd", "snap", "unprotect", del_snap], + stderr=subprocess.STDOUT) + + output = subprocess.check_output( + ["rbd", "snap", "rm", del_snap], + stderr=subprocess.STDOUT) + + except Exception as e: + LOG.exception(e) + raise TidyStorageFail("Failed to manage volume snapshot") + except subprocess.CalledProcessError: + LOG.error("Failed to access volume %s in cinder-volumes pool" + % vol_id) + raise TidyStorageFail("Failed to access rbd image") + + for vol in in_cinder_only: + try: + c_client.volumes.reset_state( + c_utils.find_volume(c_client, vol), state='error') + print("Setting state to error for volume %s \n" % vol) + except Exception as e: + LOG.error("Failed to update volume to error state for %s" + % vol) + raise TidyStorageFail("Failed to update volume to error state") + + # For volumes that are in Cinder volumes DB and rbd cinder-volumes + # pool, we check if any volume snapshot needs to be re-created + try: + c_s_l = c_client.volume_snapshots.list(search_opts=search_opts) + cinder_snap_l = ['snapshot-{}'.format(snap.id) for snap in c_s_l] + except Exception as e: + LOG.exception(e) + raise TidyStorageFail("Failed to get Cinder snapshots") + + for vol_id in in_cinder_and_rbd: + volume = 'cinder-volumes/volume-{}'.format(vol_id) + try: + # Find out if the volume has any snapshots. + print("Checking if volume %s has snapshots...\n" % vol_id) + output = subprocess.check_output( + ["rbd", "snap", "list", volume], + stderr=subprocess.STDOUT) + + snap_l = [item.strip() for item in output.split(' ') + if item.find('snapshot-') != -1] + + for snap in snap_l: + if snap not in cinder_snap_l: + print ("Creating volume snapshot found-%s " + "in Cinder...\n" % snap) + + c_client.volume_snapshots.manage( + volume_id=vol_id, + ref={'source-name': snap}, + name='found-%s' % snap, + description='manage a snapshot') + except subprocess.CalledProcessError: + LOG.error("Failed to access snapshot for volume %s" + % vol_id) + raise TidyStorageFail("Failed to access volume snapshot") + except Exception as e: + LOG.exception(e) + raise TidyStorageFail("Failed to manage Cinder snapshot") + + try: + with open(result_file, 'w') as f: + f.write('\n%s\n' % ('-' * 80)) + f.write(textwrap.fill( + "Following images are found in Ceph images pool but " + "not in Glance. These images were created after the " + "system backup was done. If you do not want to keep " + "them, you can delete them by " + "\"glance image-delete \" command.", 80)) + f.write("\n\n") + f.write('{0[0]:<40}{0[1]:<50}\n'.format(['ID', 'NAME'])) + image_l = g_client_v2.images.list() + for image in image_l: + if image['name'].find("found-image") != -1: + f.write('{0[0]:<40}{0[1]:<50}\n'.format( + [image['id'].encode('utf-8'), image['name']])) + + f.write("\n") + f.write('\n%s\n' % ('-' * 80)) + f.write(textwrap.fill( + "Following images are found in Glance without backend " + "data associated with. These images were deleted after " + "the system backup was done. You can delete them by " + "\"glance image-delete \" command or follow the B&R " + "document to restore the image.", 80)) + f.write("\n\n") + f.write('{0[0]:<40}{0[1]:<50}\n'.format(['ID', 'NAME'])) + image_l = g_client_v2.images.list() + for image in image_l: + if (in_glance_only.size != 0 and + image['id'].encode('utf-8') in in_glance_only): + f.write('{0[0]:<40}{0[1]:<50}\n'.format( + [image['id'].encode('utf-8'), image['name']])) + + f.write("\n") + f.write('\n%s\n' % ('-' * 80)) + f.write(textwrap.fill( + "Following volumes are found in Ceph cinder-volumes " + "pool but not in Cinder. These volumes were created " + "after the system backup was done. If you do not want " + "to keep them you can delete them by " + "\"cinder delete \" command.", 80)) + f.write("\n\n") + f.write('{0[0]:<40}{0[1]:<50}\n'.format(['ID', 'NAME'])) + volume_l = c_client.volumes.list(search_opts=search_opts) + for volume in volume_l: + if volume.name.find("found-") != -1: + f.write('{0[0]:<40}{0[1]:<50}\n'.format( + [volume.id.encode('utf-8'), volume.name])) + + f.write("\n") + f.write('\n%s\n' % ('-' * 80)) + f.write(textwrap.fill( + "Following volumes are found in Cinder without backend " + "data associated with. These volumes were deleted " + "after the system backup was done. You can delete them " + "by \"cinder delete \" command or follow the B&R " + "document to restore the cinder volume.", 80)) + f.write("\n\n") + f.write('{0[0]:<40}{0[1]:<50}\n'.format(['ID', 'NAME'])) + volume_l = c_client.volumes.list(search_opts=search_opts) + for volume in volume_l: + if (in_cinder_only.size != 0 and + volume.id in in_cinder_only): + f.write('{0[0]:<40}{0[1]:<50}\n'.format( + [volume.id.encode('utf-8'), volume.name])) + + f.write("\n") + f.write('\n%s\n' % ('-' * 80)) + f.write(textwrap.fill( + "Following volume snapshots are found in Ceph but not in " + "Cinder. These volume snapshots were created after the " + "system backup was done. If you do not want to keep them " + "you can delete them by \"cinder snapshot-delete \" " + "command.", 80)) + f.write("\n\n") + f.write('{0[0]:<40}{0[1]:<50}\n'.format(['ID', 'NAME'])) + snap_l = c_client.volume_snapshots.list( + search_opts=search_opts) + for snap in snap_l: + if snap.name.find("found-") != -1: + f.write('{0[0]:<40}{0[1]:<50}\n'.format( + [snap.id.encode('utf-8'), snap.name])) + + f.write("\n") + f.write('\n%s\n' % ('-' * 80)) + f.write(textwrap.fill( + "Following volume snapshots are found in Cinder without " + "backend volumes. If you want to delete them, you can do " + "so by \"cinder snapshot-delete \" after backend " + "volumes are restored.", 80)) + f.write("\n\n") + f.write('{0[0]:<40}{0[1]:<50}\n'.format(['ID', 'NAME'])) + for snap in snaps_no_backend_vol: + f.write('{0[0]:<40}{0[1]:<50}\n'.format( + [snap.id.encode('utf-8'), snap.name])) + + f.write("\n\n") + + except IOError: + raise TidyStorageFail("Failed to open file: %s" % result_file) + + +def main(): + if (len(sys.argv) < 2 or + sys.argv[1] in ['--help', '-h', '-?']): + show_help() + exit(1) + + log.configure() + + result_file = sys.argv[1] + + try: + open(result_file, 'w') + except IOError: + raise TidyStorageFail("Failed to open file: %s" % result_file) + exit(1) + + tidy_storage(result_file) diff --git a/controllerconfig/controllerconfig/setup.py b/controllerconfig/controllerconfig/setup.py index c018963772..4f50d7a667 100644 --- a/controllerconfig/controllerconfig/setup.py +++ b/controllerconfig/controllerconfig/setup.py @@ -23,7 +23,8 @@ setup( 'config_management = controllerconfig.config_management:main', 'upgrade_controller = controllerconfig.upgrades.controller:main', 'upgrade_controller_simplex = ' - 'controllerconfig.upgrades.controller:simplex_main' + 'controllerconfig.upgrades.controller:simplex_main', + 'tidy_storage_post_restore = controllerconfig.tidy_storage:main' ], } ) diff --git a/sysinv/cgts-client/centos/build_srpm.data b/sysinv/cgts-client/centos/build_srpm.data index 9936e62623..f4cf7d398b 100644 --- a/sysinv/cgts-client/centos/build_srpm.data +++ b/sysinv/cgts-client/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="cgts-client" -TIS_PATCH_VER=60 +TIS_PATCH_VER=61 diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/ihost.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/ihost.py index 72ee1863ec..13920fc065 100644 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/ihost.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/ihost.py @@ -107,6 +107,11 @@ class ihostManager(base.Manager): data = [data] return [obj_class(self, res, loaded=True) for res in data if res] + def update_install_uuid(self, hostid, install_uuid): + path = self._path(hostid) + "/state/update_install_uuid" + + self.api.json_request('PUT', path, body=install_uuid) + def delete(self, ihost_id): return self._delete(self._path(ihost_id)) diff --git a/sysinv/sysinv/centos/build_srpm.data b/sysinv/sysinv/centos/build_srpm.data index 2ed257b5b1..43df451e49 100644 --- a/sysinv/sysinv/centos/build_srpm.data +++ b/sysinv/sysinv/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="sysinv" -TIS_PATCH_VER=288 +TIS_PATCH_VER=289 diff --git a/sysinv/sysinv/sysinv/sysinv/agent/manager.py b/sysinv/sysinv/sysinv/sysinv/agent/manager.py index 25f5452f19..bef3ceab64 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py @@ -35,6 +35,7 @@ Commands (from conductors) are received via RPC calls. import errno import fcntl +import fileinput import os import retrying import shutil @@ -1291,6 +1292,30 @@ class AgentManager(service.PeriodicService): return + def iconfig_update_install_uuid(self, context, host_uuid, install_uuid): + """Update install_uuid in /etc/platform/platform.conf + + :param context: request context. + :param host_uuid: The host uuid to update the install_uuid + :param install_uuid: The updated install_uuid that will be + : written into /etc/platform/platform.conf + """ + + LOG.debug("iconfig_update_install_uuid " + "host_uuid=%s install_uuid=%s" % (host_uuid, install_uuid)) + + if self._ihost_uuid and self._ihost_uuid == host_uuid: + temp_platform_conf_file = os.path.join(tsc.PLATFORM_CONF_PATH, + 'platform.conf.temp') + shutil.copyfile(tsc.PLATFORM_CONF_FILE, temp_platform_conf_file) + for line in fileinput.FileInput(temp_platform_conf_file, inplace=1): + if line.startswith("INSTALL_UUID="): + print "INSTALL_UUID=%s" % install_uuid + else: + print line, + fileinput.close() + os.rename(temp_platform_conf_file, tsc.PLATFORM_CONF_FILE) + @utils.synchronized(LOCK_AGENT_ACTION, external=False) def iconfig_update_file(self, context, iconfig_uuid, iconfig_dict): """Configure the iiconfig_uuid, by updating file based upon diff --git a/sysinv/sysinv/sysinv/sysinv/agent/rpcapi.py b/sysinv/sysinv/sysinv/sysinv/agent/rpcapi.py index 6204ed0edf..9879bbd604 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/rpcapi.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/rpcapi.py @@ -109,6 +109,26 @@ class AgentAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): return retval + def iconfig_update_install_uuid(self, context, host_uuid, install_uuid): + """Asynchronously, have the agent update install_uuid in + /etc/platform/platform.conf + + :param context: request context. + :param host_uuid: The host uuid to update the install_uuid + :param install_uuid: The updated install_uuid that will be + : written into /etc/platform/platform.conf + """ + + LOG.debug("AgentApi.iconfig_update_install_uuid: fanout_cast: sending" + " install_uuid %s to agent" % install_uuid) + + retval = self.fanout_cast(context, self.make_msg( + 'iconfig_update_install_uuid', + host_uuid=host_uuid, + install_uuid=install_uuid)) + + return retval + def config_apply_runtime_manifest(self, context, config_uuid, config_dict): """Asynchronously have the agent apply the specified manifest based upon the config_dict (including personalities). diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index ed53fbfa75..0b6f77ee7c 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -157,6 +157,9 @@ class HostProvisionStateController(rest.RestController): raise NotImplementedError() +LOCK_NAME_STATE = 'HostStatesController' + + class HostStates(base.APIBase): """API representation of the states of a ihost.""" @@ -180,6 +183,7 @@ class HostStates(base.APIBase): class HostStatesController(rest.RestController): _custom_actions = { 'host_cpus_modify': ['PUT'], + 'update_install_uuid': ['PUT'], } # GET ihosts//state @@ -200,6 +204,22 @@ class HostStatesController(rest.RestController): sort_key=None, sort_dir=None) + # PUT ihosts//state/update_install_uuid + @cutils.synchronized(LOCK_NAME_STATE) + @wsme_pecan.wsexpose(HostStates, types.uuid, body=unicode) + def update_install_uuid(self, host_uuid, install_uuid): + """ Update install_uuid in /etc/platform/platform.conf + on the specified host. + :param host_uuid: UUID of the host + :param install_uuid: install_uuid. + """ + LOG.info("update_install_uuid host_uuid=%s install_uuid=%s" % + (host_uuid, install_uuid)) + + pecan.request.rpcapi.update_install_uuid(pecan.request.context, + host_uuid, + install_uuid) + # PUT ihosts//state/host_cpus_modify @cutils.synchronized(cpu_api.LOCK_NAME) @wsme_pecan.wsexpose(cpu_api.CPUCollection, types.uuid, body=[unicode]) diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index 5e8a90494b..66dc4224b9 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -5640,6 +5640,14 @@ class ConductorManager(service.PeriodicService): 'capabilities': capabilities} self.dbapi.service_update(cinder_service.name, values) + def update_install_uuid(self, context, host_uuid, install_uuid): + """ Update install_uuid on the specified host """ + + LOG.info("update_install_uuid host_uuid=%s install_uuid=%s " + % (host_uuid, install_uuid)) + rpcapi = agent_rpcapi.AgentAPI() + rpcapi.iconfig_update_install_uuid(context, host_uuid, install_uuid) + def update_ceph_config(self, context, sb_uuid, services): """Update the manifests for Cinder Ceph backend""" diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py index e6813888f5..a8bb1c505f 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py @@ -837,6 +837,19 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): return self.call(context, self.make_msg('update_lvm_cinder_config')) + def update_install_uuid(self, context, host_uuid, install_uuid): + """Synchronously, have an agent update install_uuid on + a host. + + :param context: request context. + :parm host_uuid: host uuid to update the install_uuid + :parm install_uuid: install_uuid + """ + return self.call(context, + self.make_msg('update_install_uuid', + host_uuid=host_uuid, + install_uuid=install_uuid)) + def update_ceph_config(self, context, sb_uuid, services): """Synchronously, have the conductor update Ceph on a controller