diff --git a/distributedcloud/dccommon/drivers/openstack/dcmanager_v1.py b/distributedcloud/dccommon/drivers/openstack/dcmanager_v1.py index 490543b36..eff57bbc9 100644 --- a/distributedcloud/dccommon/drivers/openstack/dcmanager_v1.py +++ b/distributedcloud/dccommon/drivers/openstack/dcmanager_v1.py @@ -427,7 +427,8 @@ class DcmanagerClient(base.DriverBase): raise ValueError("subcloud_ref is required.") url = f"{self.endpoint}/subclouds/{subcloud_ref}" - headers = {"X-Auth-Token": self.token} + headers = {"X-Auth-Token": self.token, + "User-Agent": consts.DCMANAGER_V1_HTTP_AGENT} response = requests.delete(url, headers=headers, timeout=self.timeout) diff --git a/distributedcloud/dccommon/tests/unit/drivers/test_dcmanager_v1.py b/distributedcloud/dccommon/tests/unit/drivers/test_dcmanager_v1.py index 26b192ef1..b37b269af 100644 --- a/distributedcloud/dccommon/tests/unit/drivers/test_dcmanager_v1.py +++ b/distributedcloud/dccommon/tests/unit/drivers/test_dcmanager_v1.py @@ -362,7 +362,8 @@ class TestDcmanagerClient(base.DCCommonTestCase): result = client.delete_subcloud(SUBCLOUD_NAME) mock_delete.assert_called_once_with( FAKE_ENDPOINT + '/subclouds/' + SUBCLOUD_NAME, - headers={"X-Auth-Token": FAKE_TOKEN}, + headers={"X-Auth-Token": FAKE_TOKEN, + "User-Agent": dccommon_consts.DCMANAGER_V1_HTTP_AGENT}, timeout=FAKE_TIMEOUT ) self.assertEqual(result, '') diff --git a/distributedcloud/dcmanager/api/controllers/v1/subclouds.py b/distributedcloud/dcmanager/api/controllers/v1/subclouds.py index 7b04851ea..92c42964a 100644 --- a/distributedcloud/dcmanager/api/controllers/v1/subclouds.py +++ b/distributedcloud/dcmanager/api/controllers/v1/subclouds.py @@ -648,6 +648,100 @@ class SubcloudsController(object): if not payload: pecan.abort(400, _('Body required')) + peer_group = payload.get('peer_group') + # Verify the peer_group is valid + peer_group_id = None + if peer_group is not None: + # peer_group may be passed in the payload as an int or str + peer_group = str(peer_group) + # Get current site system information + local_system_uuid = utils.get_local_system().uuid + # Check if user wants to remove a subcloud + # from a subcloud-peer-group by + # setting peer_group_id as 'none', + # then we will pass 'none' string as + # the peer_group_id, + # update_subcloud() will handle it and + # Set the peer_group_id DB into None. + if peer_group.lower() == 'none': + if subcloud.peer_group_id is not None: + # Get the peer group of the subcloud + original_pgrp = db_api.subcloud_peer_group_get( + context, subcloud.peer_group_id) + # Check the system leader is not on this site + if original_pgrp.system_leader_id != local_system_uuid: + pecan.abort(400, _("Removing subcloud from a " + "peer group not led by the " + "current site is prohibited.")) + # Get associations by peer group id + associations = db_api.\ + peer_group_association_get_by_peer_group_id( + context, original_pgrp.id) + for association in associations: + system_peer = db_api.system_peer_get( + context, association.system_peer_id) + # If system peer is available, then does not allow + # to remove the subcloud from secondary peer group + if system_peer.availability_state == consts.\ + SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE \ + and original_pgrp.group_priority > 0: + pecan.abort(400, _( + "Removing subcloud from a peer group " + "associated with an available system peer " + "is prohibited.")) + peer_group_id = 'none' + else: + if subcloud.peer_group_id is not None and \ + str(subcloud.peer_group_id) != peer_group: + original_pgrp = utils.subcloud_peer_group_get_by_ref( + context, str(subcloud.peer_group_id)) + if original_pgrp and original_pgrp.group_priority > 0: + pecan.abort(400, _( + "Cannot update subcloud to a new peer group " + "if the original peer group has non-zero " + "priority.")) + pgrp = utils.subcloud_peer_group_get_by_ref(context, peer_group) + if not pgrp: + pecan.abort(400, _('Invalid peer group')) + if not utils.is_req_from_another_dc(request): + if pgrp.group_priority > 0: + pecan.abort(400, _("Cannot set the subcloud to a peer" + " group with non-zero priority.")) + elif pgrp.system_leader_id != local_system_uuid: + pecan.abort(400, _("Update subcloud to a peer " + "group that is not led by the " + "current site is prohibited.")) + elif not ( + subcloud.deploy_status == consts.DEPLOY_STATE_DONE + and subcloud.management_state == + dccommon_consts.MANAGEMENT_MANAGED + and subcloud.availability_status == + dccommon_consts.AVAILABILITY_ONLINE): + pecan.abort(400, _("Only subclouds that are " + "managed and online can be " + "added to a peer group.")) + peer_group_id = pgrp.id + + # Subcloud can only be updated while it is managed in + # the primary site because the sync command can only be issued + # in the site where the SPG was created. + if subcloud.peer_group_id is not None and peer_group_id is None \ + and not utils.is_req_from_another_dc(request): + # Get the peer group of the subcloud + original_pgrp = db_api.subcloud_peer_group_get( + context, subcloud.peer_group_id) + if original_pgrp.group_priority > 0: + pecan.abort(400, _("Subcloud update is only allowed when " + "its peer group priority value is 0.")) + # Get current site system information + local_system_uuid = utils.get_local_system().uuid + # Updating a subcloud under the peer group on primary site + # that the peer group should be led by the primary site. + if original_pgrp.system_leader_id != local_system_uuid: + pecan.abort(400, _("Updating subcloud from a " + "peer group not led by the " + "current site is prohibited.")) + # Rename the subcloud new_subcloud_name = payload.get('name') if new_subcloud_name is not None: @@ -736,7 +830,6 @@ class SubcloudsController(object): description = payload.get('description') location = payload.get('location') bootstrap_values = payload.get('bootstrap_values') - peer_group = payload.get('peer_group') bootstrap_address = payload.get('bootstrap_address') # If the migrate flag is present we need to update the deploy status @@ -752,6 +845,11 @@ class SubcloudsController(object): management_state not in [dccommon_consts.MANAGEMENT_UNMANAGED, dccommon_consts.MANAGEMENT_MANAGED]: pecan.abort(400, _('Invalid management-state')) + if management_state and subcloud.peer_group_id is not None \ + and not utils.is_req_from_another_dc(request): + pecan.abort(400, _('Cannot update the management state of a ' + 'subcloud that is associated with ' + 'a peer group.')) force_flag = payload.get('force') if force_flag is not None: @@ -776,41 +874,6 @@ class SubcloudsController(object): exceptions.SubcloudGroupNotFound): pecan.abort(400, _('Invalid group')) - # Verify the peer_group is valid - peer_group_id = None - if peer_group is not None: - # peer_group may be passed in the payload as an int or str - peer_group = str(peer_group) - # Check if user wants to remove a subcloud - # from a subcloud-peer-group by - # setting peer_group_id as 'none', - # then we will pass 'none' string as - # the peer_group_id, - # update_subcloud() will handle it and - # Set the peer_group_id DB into None. - if peer_group.lower() == 'none': - peer_group_id = 'none' - else: - pgrp = utils.subcloud_peer_group_get_by_ref(context, peer_group) - if not pgrp: - pecan.abort(400, _('Invalid peer group')) - if not utils.is_req_from_another_dc(request): - if pgrp.group_priority > 0: - pecan.abort(400, _("Cannot set the subcloud to a peer" - " group with non-zero priority.")) - elif not ( - subcloud.deploy_status in [ - consts.DEPLOY_STATE_DONE, - consts.PRESTAGE_STATE_COMPLETE - ] and subcloud.management_state == - dccommon_consts.MANAGEMENT_MANAGED - and subcloud.availability_status == - dccommon_consts.AVAILABILITY_ONLINE): - pecan.abort(400, _("Only subclouds that are " - "managed and online can be " - "added to a peer group.")) - peer_group_id = pgrp.id - if consts.INSTALL_VALUES in payload: # install_values of secondary subclouds are validated on # peer site @@ -998,6 +1061,20 @@ class SubcloudsController(object): pecan.abort(404, _('Subcloud not found')) subcloud_id = subcloud.id + peer_group_id = subcloud.peer_group_id + subcloud_management_state = subcloud.management_state + + # Check if the subcloud is "managed" status + if subcloud_management_state == dccommon_consts.MANAGEMENT_MANAGED \ + and not utils.is_req_from_another_dc(request): + pecan.abort(400, _('Cannot delete a subcloud that is "managed" ' + 'status')) + + # Check if the subcloud is part of a peer group + if peer_group_id is not None and \ + not utils.is_req_from_another_dc(request): + pecan.abort(400, _('Cannot delete a subcloud that is part of ' + 'a peer group on this site')) try: # Ask dcmanager-manager to delete the subcloud. diff --git a/distributedcloud/dcmanager/manager/peer_group_audit_manager.py b/distributedcloud/dcmanager/manager/peer_group_audit_manager.py index fc11a7dc2..d49b7efd2 100644 --- a/distributedcloud/dcmanager/manager/peer_group_audit_manager.py +++ b/distributedcloud/dcmanager/manager/peer_group_audit_manager.py @@ -51,9 +51,9 @@ class PeerGroupAuditManager(manager.Manager): peer_group_name) return subclouds except Exception: - LOG.exception("Failed to get subclouds of peer group %s " - "from DC: %s" % - (peer_group_name, system_peer.peer_name)) + LOG.exception(f"Failed to get subclouds of peer group " + f"{peer_group_name} from DC: " + f"{system_peer.peer_name}") def _update_remote_peer_group_migration_status(self, system_peer, @@ -65,37 +65,58 @@ class PeerGroupAuditManager(manager.Manager): } dc_client.update_subcloud_peer_group(peer_group_name, **peer_group_kwargs) - LOG.info("Updated Subcloud Peer Group %s on " - "peer site %s, set migration_status to: %s" % - (peer_group_name, system_peer.peer_name, migration_status)) + LOG.info(f"Updated Subcloud Peer Group {peer_group_name} on " + f"peer site {system_peer.peer_name}, set migration_status " + f"to: {migration_status}") - def _get_local_subclouds_to_update(self, - local_peer_group, - remote_subclouds): + def _get_local_subclouds_to_update_and_delete(self, + local_peer_group, + remote_subclouds): local_subclouds_to_update = list() - remote_managed_subcloud_region_names = list() + local_subclouds_to_delete = list() + remote_subclouds_dict = {remote_subcloud.get('region-name'): + remote_subcloud for remote_subcloud + in remote_subclouds} local_subclouds = db_api.subcloud_get_for_peer_group( self.context, local_peer_group.id) - # get the 'managed+online' remote subclouds - for remote_subcloud in remote_subclouds: - if (remote_subcloud.get('management-state') == - dccommon_consts.MANAGEMENT_MANAGED and - remote_subcloud.get('availability-status') == - dccommon_consts.AVAILABILITY_ONLINE): - remote_managed_subcloud_region_names.append( - remote_subcloud.get('region-name')) - - # Compare with the 'non-secondary' local subclouds for local_subcloud in local_subclouds: - if local_subcloud.region_name in \ - remote_managed_subcloud_region_names \ - and not utils.subcloud_is_secondary_state( - local_subcloud.deploy_status): + remote_subcloud = remote_subclouds_dict.get( + local_subcloud.region_name) + if remote_subcloud: + # Check if the remote subcloud meets the conditions for update + # if it is 'managed' and the local subcloud is not + # in 'secondary' status + if (remote_subcloud.get('management-state') == + dccommon_consts.MANAGEMENT_MANAGED and + not utils.subcloud_is_secondary_state( + local_subcloud.deploy_status)): + local_subclouds_to_update.append(local_subcloud) + else: + local_subclouds_to_delete.append(local_subcloud) - local_subclouds_to_update.append(local_subcloud) + return local_subclouds_to_update, local_subclouds_to_delete - return local_subclouds_to_update + def _set_local_subcloud_to_secondary(self, subcloud): + try: + LOG.info("Set local subcloud %s to secondary" % subcloud.name) + # There will be an exception when unmanage + # a subcloud in 'unamaged' state. + if subcloud.management_state != \ + dccommon_consts.MANAGEMENT_UNMANAGED: + self.subcloud_manager.update_subcloud( + self.context, + subcloud.id, + management_state=dccommon_consts. + MANAGEMENT_UNMANAGED) + self.subcloud_manager.update_subcloud( + self.context, + subcloud.id, + deploy_status=consts.DEPLOY_STATE_SECONDARY) + except Exception as e: + LOG.exception(f"Failed to update local non-secondary " + f"and offline subcloud [{subcloud.name}], err: {e}") + raise e def audit(self, system_peer, remote_peer_group, local_peer_group): if local_peer_group.migration_status == consts.PEER_GROUP_MIGRATING: @@ -120,9 +141,9 @@ class PeerGroupAuditManager(manager.Manager): if remote_peer_group.get("migration_status") == \ consts.PEER_GROUP_MIGRATING: # Unmanaged all local subclouds of peer group - LOG.info("Unmanaged all local subclouds of peer group %s " - "since remote is in migrating state" % - local_peer_group.peer_group_name) + LOG.info(f"Unmanaged all local subclouds of peer group " + f"{local_peer_group.peer_group_name} " + f"since remote is in migrating state") subclouds = db_api.subcloud_get_for_peer_group(self.context, local_peer_group.id) for subcloud in subclouds: @@ -152,8 +173,8 @@ class PeerGroupAuditManager(manager.Manager): subcloud.id, deploy_status=consts.DEPLOY_STATE_REHOME_PENDING) except Exception as e: - LOG.exception("Fail to unmanage local subcloud %s, err: " - "%s" % (subcloud.name, e)) + LOG.exception(f"Fail to unmanage local subcloud " + f"{subcloud.name}, err: {e}") raise e self.require_audit_flag = False @@ -167,39 +188,29 @@ class PeerGroupAuditManager(manager.Manager): system_peer, remote_peer_group.get("peer_group_name")) - if not remote_subclouds: - LOG.error("No subclouds in remote DC:%s's peer group %s" % - (system_peer.peer_name, - remote_peer_group.get("peer_group_name"))) - return - local_subclouds_to_update = \ - self._get_local_subclouds_to_update(local_peer_group, - remote_subclouds) + local_subclouds_to_update, local_subclouds_to_delete = \ + self._get_local_subclouds_to_update_and_delete( + local_peer_group, remote_subclouds) for subcloud in local_subclouds_to_update: + self._set_local_subcloud_to_secondary(subcloud) + + # Change the local subcloud not exist on peer site's SPG to + # secondary status then delete it + for subcloud in local_subclouds_to_delete: + self._set_local_subcloud_to_secondary(subcloud) try: - LOG.info("Set secondary to local subcloud %s" % - subcloud.name) - # There will be an exception when unmanage - # a subcloud in 'unamaged' state. - if subcloud.management_state != \ - dccommon_consts.MANAGEMENT_UNMANAGED: - self.subcloud_manager.update_subcloud( - self.context, - subcloud.id, - management_state=dccommon_consts. - MANAGEMENT_UNMANAGED) - self.subcloud_manager.update_subcloud( - self.context, - subcloud.id, - deploy_status=consts.DEPLOY_STATE_SECONDARY) + self.subcloud_manager.delete_subcloud( + self.context, subcloud.id) + LOG.info(f"Deleted local subcloud {subcloud.name}") except Exception as e: - LOG.exception("Failed to update local non-secondary " - "and offline subcloud [%s], err: %s" % - (subcloud.name, e)) + LOG.exception(f"Failed to delete local subcloud " + f"[{subcloud.name}] that does not exist " + f"under the same subcloud_peer_group on " + f"peer site, err: {e}") raise e - if local_subclouds_to_update: + if local_subclouds_to_update or local_subclouds_to_delete: self._clear_or_raise_alarm(system_peer, local_peer_group, remote_peer_group) @@ -229,10 +240,10 @@ class PeerGroupAuditManager(manager.Manager): entity_instance_id = "peer_group=%s,peer=%s" % \ (local_peer_group.peer_group_name, system_peer.peer_uuid) if local_peer_group.group_priority < remote_peer_group.get('group_priority'): - LOG.warning("Alarm: local subcloud peer group [%s] " - "is managed by remote system [%s]" % - (local_peer_group.peer_group_name, - system_peer.peer_name)) + LOG.warning("Alarm: local subcloud peer group [" + f"{local_peer_group.peer_group_name}] " + f"is managed by remote system [" + f"{system_peer.peer_name}]") try: fault = fm_api.Fault( alarm_id=fm_const. @@ -266,15 +277,15 @@ class PeerGroupAuditManager(manager.Manager): fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED, entity_instance_id) if fault: - LOG.info("Clear alarm: %s" % entity_instance_id) + LOG.info(f"Clear alarm: {entity_instance_id}") self.fm_api.clear_fault( fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED, entity_instance_id) except Exception: LOG.exception( - "Problem clearing fault [%s], alarm_id=%s" % - (entity_instance_id, - fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED)) + f"Problem clearing fault [{entity_instance_id}], " + f"alarm_id=" + f"{fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED}") def _do_audit(self, system_peer, remote_peer_group, local_peer_group): with self.thread_lock: @@ -286,15 +297,14 @@ class PeerGroupAuditManager(manager.Manager): def stop(self): if self.thread: self.thread.join() - LOG.info("stopped peer group %s audit thread" % self.peer_group_id) + LOG.info(f"stopped peer group {self.peer_group_id} audit thread") else: - LOG.info("No peer group %s audit thread to stop" % - self.peer_group_id) + LOG.info(f"No peer group {self.peer_group_id} audit thread to stop") def start(self, system_peer, remote_peer_group, local_peer_group): if self.thread_lock.locked(): - LOG.warning('Audit thread for %s has already started' % - local_peer_group.peer_group_name) + LOG.warning(f"Audit thread for {local_peer_group.peer_group_name} " + f"has already started") else: self.thread = threading.Thread( target=self._do_audit, @@ -305,8 +315,8 @@ class PeerGroupAuditManager(manager.Manager): system_peer, remote_peer_group, local_peer_group): - LOG.info("Audit peer group [%s] with remote system %s" % - (local_peer_group.peer_group_name, system_peer.peer_name)) + LOG.info(f"Audit peer group [{local_peer_group.peer_group_name}] " + f"with remote system {system_peer.peer_name}") self.start(system_peer, remote_peer_group, local_peer_group) @staticmethod @@ -332,6 +342,6 @@ class PeerGroupAuditManager(manager.Manager): if response: return response except Exception: - LOG.exception("Failed to send audit request for peer group %s " - "to DC: %s" % - (peer_group.peer_group_name, system.peer_name)) + LOG.exception("Failed to send audit request for peer group " + f"{peer_group.peer_group_name} to DC: " + f"{system.peer_name}")