diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/partition.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/partition.py index 32fe665212..2d55986207 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/partition.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/partition.py @@ -691,6 +691,8 @@ def _create(partition): idiskid, {'available_mib': new_available_mib}) + partition_config_flag = constants.PARTITION_CONFIG_FLAG % (forihostid) + cutils.touch(partition_config_flag) try: # Update the database new_partition = pecan.request.dbapi.partition_create(forihostid, @@ -707,11 +709,13 @@ def _create(partition): # Instruct puppet to implement the change pecan.request.rpcapi.update_partition_config(pecan.request.context, partition) - except exception.HTTPNotFound: - msg = _("Creating partition failed for host %s ") % (ihost['hostname']) - raise wsme.exc.ClientSideError(msg) except exception.PartitionAlreadyExists: msg = _("Disk partition %s already exists." % partition.get('device_path')) + cutils.remove(partition_config_flag) + raise wsme.exc.ClientSideError(msg) + except Exception: + msg = _("Creating partition failed for host %s ") % (ihost['hostname']) + cutils.remove(partition_config_flag) raise wsme.exc.ClientSideError(msg) return new_partition diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index 693467b097..1389afb9a6 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -1594,6 +1594,9 @@ SYSINV_FIRST_REPORT_FLAG = os.path.join(SYSINV_VOLATILE_PATH, SYSINV_REPORTED = os.path.join(SYSINV_VOLATILE_PATH, ".sysinv_reported") +PARTITION_CONFIG_FLAG = os.path.join( + SYSINV_VOLATILE_PATH, ".sysinv_partition_config_%s") + NETWORK_CONFIG_LOCK_FILE = os.path.join( tsc.VOLATILE_PATH, "apply_network_config.lock") diff --git a/sysinv/sysinv/sysinv/sysinv/common/utils.py b/sysinv/sysinv/sysinv/sysinv/common/utils.py index eb936fe9a7..f4f2739b46 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/common/utils.py @@ -928,6 +928,11 @@ def touch(fname): os.utime(fname, None) +def remove(fname): + if os.path.exists(fname): + os.remove(fname) + + def symlink_force(source, link_name): """ Force creation of a symlink Params: diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index d6e085487f..1428f98d3d 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -399,6 +399,8 @@ class ConductorManager(service.PeriodicService): self._sx_to_dx_post_migration_actions(system) + self._clear_partition_config_flags() + LOG.info("sysinv-conductor start committed system=%s" % system.as_dict()) @@ -676,6 +678,11 @@ class ConductorManager(service.PeriodicService): constants.CINDER_BACKEND_CEPH, task=constants.SB_TASK_RESTORE) + def _clear_partition_config_flags(self): + files = constants.PARTITION_CONFIG_FLAG % ("*") + for fname in glob.glob(files): + cutils.remove(fname) + def _clear_stuck_loads(self): load_stuck_states = [constants.IMPORTING_LOAD_STATE] @@ -4968,6 +4975,7 @@ class ConductorManager(service.PeriodicService): LOG.debug("PART conductor-manager partition: %s" % str(partition)) # Get host. host_uuid = partition.get('ihost_uuid') + forihostid = partition.get('forihostid') try: db_host = self.dbapi.ihost_get(host_uuid) except exception.ServerNotFound: @@ -4998,6 +5006,10 @@ class ConductorManager(service.PeriodicService): config_dict, force=force_apply, filter_classes=[self.PUPPET_RUNTIME_CLASS_PARTITIONS]) + # The flag is cleared because the manifest class has already been added + # using the _add_runtime_class_apply_in_progress() method + # within _config_apply_runtime_manifest(). + cutils.remove(constants.PARTITION_CONFIG_FLAG % (forihostid)) def ipartition_update_by_ihost(self, context, ihost_uuid, ipart_dict_array, first_report=False): @@ -5014,8 +5026,10 @@ class ConductorManager(service.PeriodicService): LOG.exception("Invalid ihost_uuid %s" % ihost_uuid) return + upgrade_in_progress = False try: self.dbapi.software_upgrade_get_one() + upgrade_in_progress = True except exception.NotFound: # No upgrade in progress pass @@ -5032,14 +5046,20 @@ class ConductorManager(service.PeriodicService): db_host.hostname) return - if first_report and self._check_runtime_class_apply_in_progress([self.PUPPET_RUNTIME_CLASS_PARTITIONS], - host_uuids=ihost_uuid): - self._clear_runtime_class_apply_in_progress(classes_list=[self.PUPPET_RUNTIME_CLASS_PARTITIONS], - host_uuids=ihost_uuid) - # Get the id of the host. forihostid = db_host['id'] + partition_config_flag = constants.PARTITION_CONFIG_FLAG % (forihostid) + + # Receiving first_report=True means the sysinv-agent on that host has just started.. + # This means that if there were any puppet manifests running, they have been + # terminated, so we need to clear the list of runtime manifests in progress + # below and also remove the partition config flag from that host, to avoid a false positive. + if first_report: + self._clear_runtime_class_apply_in_progress(classes_list=[self.PUPPET_RUNTIME_CLASS_PARTITIONS], + host_uuids=ihost_uuid) + cutils.remove(partition_config_flag) + # Obtain the partitions, disks and physical volumes that are currently # present in the DB. db_parts = self.dbapi.partition_get_by_ihost(ihost_uuid) @@ -5085,8 +5105,13 @@ class ConductorManager(service.PeriodicService): # Handle database to fix partitions with the status 'stuck' # in creating/deleting/modifying. - if not self._check_runtime_class_apply_in_progress([self.PUPPET_RUNTIME_CLASS_PARTITIONS]): - if db_part.device_path not in ipart_device_paths: + if not os.path.exists(partition_config_flag) and \ + not self._check_runtime_class_apply_in_progress([self.PUPPET_RUNTIME_CLASS_PARTITIONS], + host_uuids=ihost_uuid): + if db_part.device_path not in ipart_device_paths and \ + not upgrade_in_progress and \ + not os.path.exists(tsc.RESTORE_IN_PROGRESS_FLAG) and \ + db_part.status != constants.PARTITION_CREATE_ON_UNLOCK_STATUS: self.dbapi.partition_destroy(db_part.uuid) LOG.info("Delete DB partition stuck: %s" % str(db_part.items())) elif db_part.status == constants.PARTITION_MODIFYING_STATUS: