diff --git a/distributedcloud/dcmanager/api/controllers/v1/peer_group_association.py b/distributedcloud/dcmanager/api/controllers/v1/peer_group_association.py index d13d8e09f..715338148 100644 --- a/distributedcloud/dcmanager/api/controllers/v1/peer_group_association.py +++ b/distributedcloud/dcmanager/api/controllers/v1/peer_group_association.py @@ -37,7 +37,8 @@ ASSOCIATION_SYNC_STATUS_LIST = \ [consts.ASSOCIATION_SYNC_STATUS_SYNCING, consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC, - consts.ASSOCIATION_SYNC_STATUS_FAILED] + consts.ASSOCIATION_SYNC_STATUS_FAILED, + consts.ASSOCIATION_SYNC_STATUS_UNKNOWN] class PeerGroupAssociationsController(restcomm.GenericPathController): @@ -336,8 +337,8 @@ class PeerGroupAssociationsController(restcomm.GenericPathController): try: # Ask dcmanager-manager to update the subcloud peer group priority # to peer site. It will do the real work... - return self.rpc_client.update_subcloud_peer_group(context, - association.id) + return self.rpc_client.sync_subcloud_peer_group_only( + context, association.id) except RemoteError as e: pecan.abort(httpclient.UNPROCESSABLE_ENTITY, e.value) except Exception as e: diff --git a/distributedcloud/dcmanager/api/controllers/v1/subcloud_peer_group.py b/distributedcloud/dcmanager/api/controllers/v1/subcloud_peer_group.py index 35fcfb46c..f38b0653d 100644 --- a/distributedcloud/dcmanager/api/controllers/v1/subcloud_peer_group.py +++ b/distributedcloud/dcmanager/api/controllers/v1/subcloud_peer_group.py @@ -290,6 +290,25 @@ class SubcloudPeerGroupsController(restcomm.GenericPathController): ): pecan.abort(httpclient.BAD_REQUEST, _('nothing to update')) + # The flag to indicate if the update needs to be synced to + # the peer site(s). + sync_needed = ((peer_group_name and + peer_group_name != group.peer_group_name) or + (group_state and + group_state != group.group_state) or + (max_subcloud_rehoming is not None and + max_subcloud_rehoming != group.max_subcloud_rehoming)) + + any_update = (sync_needed or + ((group_priority is not None and + group_priority != group.group_priority) or + (system_leader_id and + system_leader_id != group.system_leader_id) or + (system_leader_name and + system_leader_name != group.system_leader_name))) + if not any_update: + return db_api.subcloud_peer_group_db_model_to_dict(group) + # Check value is not None or empty before calling validation function if (peer_group_name is not None and not utils.validate_name(peer_group_name, @@ -322,6 +341,27 @@ class SubcloudPeerGroupsController(restcomm.GenericPathController): pecan.abort(httpclient.BAD_REQUEST, _('Invalid migration_status')) + # Update on peer site(s) + if (not utils.is_req_from_another_dc(request)) and sync_needed: + success_peer_ids, failed_peer_ids = \ + self.rpc_client.update_subcloud_peer_group( + context, group.id, group_state, max_subcloud_rehoming, + group.peer_group_name, peer_group_name) + if failed_peer_ids: + if not success_peer_ids: + # Reject if all failed + pecan.abort( + httpclient.FORBIDDEN, + _('Unable to sync the update to the peer site(s)')) + # Local update will continue if it's only partial + # failure. + LOG.error(f"Failed to sync the subcloud peer group " + f"{group.id} update on the peer site(s) " + f"{failed_peer_ids} with the values: " + f"{group_state=}, " + f"{max_subcloud_rehoming=}, " + f"{peer_group_name=}") + try: updated_peer_group = db_api.subcloud_peer_group_update( context, diff --git a/distributedcloud/dcmanager/api/controllers/v1/subclouds.py b/distributedcloud/dcmanager/api/controllers/v1/subclouds.py index 92c42964a..55582d9cb 100644 --- a/distributedcloud/dcmanager/api/controllers/v1/subclouds.py +++ b/distributedcloud/dcmanager/api/controllers/v1/subclouds.py @@ -648,14 +648,34 @@ class SubcloudsController(object): if not payload: pecan.abort(400, _('Body required')) + # Create a set to store the affected SPG(s) that need to be + # synced due to the subcloud update. This set will be utilized to + # update the sync_status in the corresponding PGA on each site. + sync_peer_groups = set() + + req_from_another_dc = utils.is_req_from_another_dc(request) + original_pgrp = None + leader_on_local_site = False + if subcloud.peer_group_id is not None: + # Get the original peer group of the subcloud + original_pgrp = db_api.subcloud_peer_group_get( + context, subcloud.peer_group_id) + leader_on_local_site = utils.is_leader_on_local_site(original_pgrp) + # A sync command is required after updating a subcloud + # in an SPG that is already associated with a PGA on the primary + # and leader site. The existence of the PGA will be checked + # by the update_association_sync_status method later. + if (original_pgrp.group_priority == 0 and + leader_on_local_site and + not req_from_another_dc): + sync_peer_groups.add(subcloud.peer_group_id) + peer_group = payload.get('peer_group') # Verify the peer_group is valid peer_group_id = None if peer_group is not None: # peer_group may be passed in the payload as an int or str peer_group = str(peer_group) - # Get current site system information - local_system_uuid = utils.get_local_system().uuid # Check if user wants to remove a subcloud # from a subcloud-peer-group by # setting peer_group_id as 'none', @@ -664,12 +684,9 @@ class SubcloudsController(object): # update_subcloud() will handle it and # Set the peer_group_id DB into None. if peer_group.lower() == 'none': - if subcloud.peer_group_id is not None: - # Get the peer group of the subcloud - original_pgrp = db_api.subcloud_peer_group_get( - context, subcloud.peer_group_id) + if original_pgrp: # Check the system leader is not on this site - if original_pgrp.system_leader_id != local_system_uuid: + if not leader_on_local_site: pecan.abort(400, _("Removing subcloud from a " "peer group not led by the " "current site is prohibited.")) @@ -691,23 +708,19 @@ class SubcloudsController(object): "is prohibited.")) peer_group_id = 'none' else: - if subcloud.peer_group_id is not None and \ + if original_pgrp and original_pgrp.group_priority > 0 and \ str(subcloud.peer_group_id) != peer_group: - original_pgrp = utils.subcloud_peer_group_get_by_ref( - context, str(subcloud.peer_group_id)) - if original_pgrp and original_pgrp.group_priority > 0: - pecan.abort(400, _( - "Cannot update subcloud to a new peer group " - "if the original peer group has non-zero " - "priority.")) + pecan.abort(400, _( + "Cannot update subcloud to a new peer group " + "if the original peer group has non-zero priority.")) pgrp = utils.subcloud_peer_group_get_by_ref(context, peer_group) if not pgrp: pecan.abort(400, _('Invalid peer group')) - if not utils.is_req_from_another_dc(request): + if not req_from_another_dc: if pgrp.group_priority > 0: pecan.abort(400, _("Cannot set the subcloud to a peer" " group with non-zero priority.")) - elif pgrp.system_leader_id != local_system_uuid: + elif not utils.is_leader_on_local_site(pgrp): pecan.abort(400, _("Update subcloud to a peer " "group that is not led by the " "current site is prohibited.")) @@ -720,24 +733,19 @@ class SubcloudsController(object): pecan.abort(400, _("Only subclouds that are " "managed and online can be " "added to a peer group.")) + sync_peer_groups.add(pgrp.id) peer_group_id = pgrp.id # Subcloud can only be updated while it is managed in # the primary site because the sync command can only be issued # in the site where the SPG was created. - if subcloud.peer_group_id is not None and peer_group_id is None \ - and not utils.is_req_from_another_dc(request): - # Get the peer group of the subcloud - original_pgrp = db_api.subcloud_peer_group_get( - context, subcloud.peer_group_id) + if original_pgrp and peer_group_id is None and not req_from_another_dc: if original_pgrp.group_priority > 0: pecan.abort(400, _("Subcloud update is only allowed when " "its peer group priority value is 0.")) - # Get current site system information - local_system_uuid = utils.get_local_system().uuid # Updating a subcloud under the peer group on primary site # that the peer group should be led by the primary site. - if original_pgrp.system_leader_id != local_system_uuid: + if not leader_on_local_site: pecan.abort(400, _("Updating subcloud from a " "peer group not led by the " "current site is prohibited.")) @@ -900,6 +908,23 @@ class SubcloudsController(object): bootstrap_values=bootstrap_values, bootstrap_address=bootstrap_address, deploy_status=deploy_status) + + if sync_peer_groups: + # Collect the affected peer group association IDs. + association_ids = set() + for pg_id in sync_peer_groups: + association_ids.update( + self.dcmanager_rpc_client. + update_association_sync_status( + context, pg_id, + consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC)) + # Generate an informative message for reminding the operator + # that the sync command(s) should be executed. + info_message = utils.generate_sync_info_message( + association_ids) + if info_message: + subcloud["info_message"] = info_message + return subcloud except RemoteError as e: pecan.abort(422, e.value) diff --git a/distributedcloud/dcmanager/common/exceptions.py b/distributedcloud/dcmanager/common/exceptions.py index 7e7bf4f41..3c9bf0031 100644 --- a/distributedcloud/dcmanager/common/exceptions.py +++ b/distributedcloud/dcmanager/common/exceptions.py @@ -210,6 +210,10 @@ class SubcloudBackupOperationFailed(DCManagerException): "'dcmanager subcloud error' command for details") +class SubcloudSyncFailedException(DCManagerException): + message = _("Failed to sync subcloud update to peer site %(peer_id)s") + + class ConnectionRefused(DCManagerException): message = _("Connection to the service endpoint is refused") diff --git a/distributedcloud/dcmanager/common/utils.py b/distributedcloud/dcmanager/common/utils.py index ade803632..1685c8f78 100644 --- a/distributedcloud/dcmanager/common/utils.py +++ b/distributedcloud/dcmanager/common/utils.py @@ -1595,3 +1595,19 @@ def get_msg_output_info(log_file, target_task, target_str): def get_subcloud_ansible_log_file(subcloud_name): return os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud_name + '_playbook_output.log') + + +def is_leader_on_local_site(peer_group): + return peer_group.system_leader_id == get_local_system().uuid + + +def generate_sync_info_message(association_ids): + info_message = None + if association_ids: + info_message = ("The operation has caused the SPG to be out-of-sync. " + "Please run peer-group-association sync command to push " + "the subcloud peer group changes to the peer site:\n") + for association_id in association_ids: + info_message += (f"$ dcmanager peer-group-association" + f" sync {association_id}\n") + return info_message diff --git a/distributedcloud/dcmanager/db/sqlalchemy/api.py b/distributedcloud/dcmanager/db/sqlalchemy/api.py index 81a6cbb52..39e52ca12 100644 --- a/distributedcloud/dcmanager/db/sqlalchemy/api.py +++ b/distributedcloud/dcmanager/db/sqlalchemy/api.py @@ -1272,7 +1272,10 @@ def peer_group_association_update(context, if sync_status is not None: association_ref.sync_status = sync_status if sync_message is not None: - association_ref.sync_message = sync_message + if str(sync_message).lower() == 'none': + association_ref.sync_message = None + else: + association_ref.sync_message = sync_message association_ref.save(session) return association_ref diff --git a/distributedcloud/dcmanager/manager/peer_group_audit_manager.py b/distributedcloud/dcmanager/manager/peer_group_audit_manager.py index 39f7e43ab..cf287ae17 100644 --- a/distributedcloud/dcmanager/manager/peer_group_audit_manager.py +++ b/distributedcloud/dcmanager/manager/peer_group_audit_manager.py @@ -176,7 +176,7 @@ class PeerGroupAuditManager(manager.Manager): LOG.exception(f"Fail to unmanage local subcloud " f"{subcloud.name}, err: {e}") raise e - SystemPeerManager.update_sync_status_on_peer_site( + SystemPeerManager.update_sync_status( self.context, system_peer, consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC, local_peer_group, remote_peer_group) @@ -208,7 +208,7 @@ class PeerGroupAuditManager(manager.Manager): self.context, subcloud.id) LOG.info(f"Deleted local subcloud {subcloud.name}") except Exception as e: - SystemPeerManager.update_sync_status_on_peer_site( + SystemPeerManager.update_sync_status( self.context, system_peer, consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC, local_peer_group, remote_peer_group) @@ -232,7 +232,7 @@ class PeerGroupAuditManager(manager.Manager): system_peer, remote_peer_group.get("peer_group_name"), None) - SystemPeerManager.update_sync_status_on_peer_site( + SystemPeerManager.update_sync_status( self.context, system_peer, consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, local_peer_group, remote_peer_group) diff --git a/distributedcloud/dcmanager/manager/peer_monitor_manager.py b/distributedcloud/dcmanager/manager/peer_monitor_manager.py index 408900b8f..afdd61a85 100644 --- a/distributedcloud/dcmanager/manager/peer_monitor_manager.py +++ b/distributedcloud/dcmanager/manager/peer_monitor_manager.py @@ -161,7 +161,7 @@ class PeerMonitor(object): sync_status = consts.ASSOCIATION_SYNC_STATUS_IN_SYNC dc_local_pg = db_api.subcloud_peer_group_get( self.context, association.peer_group_id) - SystemPeerManager.update_sync_status_on_peer_site( + SystemPeerManager.update_sync_status( self.context, self.peer, sync_status, dc_local_pg, association=association) diff --git a/distributedcloud/dcmanager/manager/service.py b/distributedcloud/dcmanager/manager/service.py index 83fc82d82..dd75f14b7 100644 --- a/distributedcloud/dcmanager/manager/service.py +++ b/distributedcloud/dcmanager/manager/service.py @@ -337,6 +337,16 @@ class DCManagerService(service.Service): return self.system_peer_manager.sync_subcloud_peer_group( context, association_id, sync_subclouds) + @request_context + def update_subcloud_peer_group(self, context, peer_group_id, + group_state, max_subcloud_rehoming, + group_name, new_group_name=None): + LOG.info("Handling update_subcloud_peer_group request for " + "peer group %s" % peer_group_id) + return self.system_peer_manager.update_subcloud_peer_group( + context, peer_group_id, group_state, max_subcloud_rehoming, + group_name, new_group_name) + @request_context def delete_peer_group_association(self, context, association_id): LOG.info("Handling delete_peer_group_association request for: %s", @@ -344,6 +354,15 @@ class DCManagerService(service.Service): return self.system_peer_manager.delete_peer_group_association( context, association_id) + @request_context + def update_association_sync_status(self, context, peer_group_id, + sync_status, sync_message=None): + # Updates peer group association sync_status + LOG.info("Handling update_peer_association_sync_status request for: %s" + % peer_group_id) + return self.system_peer_manager.update_association_sync_status( + context, peer_group_id, sync_status, sync_message) + def _stop_rpc_server(self): # Stop RPC connection to prevent new requests LOG.debug(_("Attempting to stop RPC service...")) diff --git a/distributedcloud/dcmanager/manager/system_peer_manager.py b/distributedcloud/dcmanager/manager/system_peer_manager.py index 320272e21..b2a15dc34 100644 --- a/distributedcloud/dcmanager/manager/system_peer_manager.py +++ b/distributedcloud/dcmanager/manager/system_peer_manager.py @@ -66,10 +66,9 @@ class SystemPeerManager(manager.Manager): return [association] if association else [] @staticmethod - def update_sync_status_on_peer_site(ctx, peer, sync_status, local_pg=None, - remote_pg=None, message="None", - association=None): - """Update sync status of association on peer site. + def update_sync_status(ctx, peer, sync_status, local_pg=None, + remote_pg=None, message="None", association=None): + """Update sync status of association. This function updates the sync status of the association on the peer site and then updates the sync status of the association on the @@ -84,7 +83,7 @@ class SystemPeerManager(manager.Manager): :param association: peer group association object """ - def _update_association_on_peer_site(association, peer, sync_status, + def _update_association_on_peer_site(peer, sync_status, local_pg, remote_pg, message): try: # Get peer site dcmanager client @@ -120,7 +119,6 @@ class SystemPeerManager(manager.Manager): return sync_status, message - associations = list() if association is None: associations = SystemPeerManager.get_local_associations( ctx, peer, local_pg) @@ -138,7 +136,7 @@ class SystemPeerManager(manager.Manager): local_pg = local_pg if local_pg is not None else db_api.\ subcloud_peer_group_get(ctx, association.peer_group_id) sync_status, message = _update_association_on_peer_site( - association, peer, sync_status, local_pg, remote_pg, message) + peer, sync_status, local_pg, remote_pg, message) if association.sync_status == sync_status and sync_status != \ consts.ASSOCIATION_SYNC_STATUS_FAILED: @@ -535,6 +533,120 @@ class SystemPeerManager(manager.Manager): failed_message, dc_peer_association_id) + def update_association_sync_status(self, context, peer_group_id, + sync_status, sync_message=None): + """Update PGA sync status on primary and peer site(s). + + The update of PGA sync status is always triggered on the primary site, + therefore, this method can only be called on the primary site. + + :param context: request context object + :param peer_group_id: id of the peer group used to fetch corresponding PGA + :param sync_status: the sync status to be updated to + :param sync_message: The sync message to be updated to + """ + # Collect the association IDs to be synced. + out_of_sync_associations_ids = set() + local_peer_gp = db_api.subcloud_peer_group_get(self.context, peer_group_id) + # Get associations by peer group id + associations = db_api.peer_group_association_get_by_peer_group_id( + context, peer_group_id) + if not associations: + LOG.debug("No association found for peer group %s" % peer_group_id) + else: + for association in associations: + if sync_status == consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC: + out_of_sync_associations_ids.add(association.id) + + pre_sync_status = association.sync_status + new_sync_status = sync_status + new_sync_message = sync_message + if sync_status in (consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, + consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC): + # We don't need sync_message for in-sync and out-of-sync + # status, so clear the previous remained message for these + # two status. + new_sync_message = 'None' + + # If the local sync_status already set to unknown, indicating + # that the peer site is unreachable, we'll change the sync_status + # to failed directly on the local site without trying to sync + # the status to the peer site. When the peer site becomes + # reachable, the primary site monitor will update the failed + # status to out-of-sync on both sites. + if pre_sync_status == consts.ASSOCIATION_SYNC_STATUS_UNKNOWN: + if sync_status != consts.ASSOCIATION_SYNC_STATUS_IN_SYNC: + new_sync_status = consts.ASSOCIATION_SYNC_STATUS_FAILED + new_sync_message = ("Failed to update sync_status, " + "because the peer site is unreachable.") + # Update peer site peer association sync_status + else: + # Get system peer by peer id from the association + system_peer = db_api.system_peer_get( + context, association.system_peer_id) + if pre_sync_status != sync_status: + SystemPeerManager.update_sync_status( + context, system_peer, sync_status, local_peer_gp, + None, new_sync_message, association) + # Already update sync_status on both peer and local sites + continue + + if pre_sync_status != new_sync_status: + # Update local site peer association sync_status + db_api.peer_group_association_update( + context, + association.id, + sync_status=new_sync_status, + sync_message=new_sync_message) + LOG.debug( + f"Updated Local Peer Group Association {association.id} " + f"sync_status to {new_sync_status}.") + + return out_of_sync_associations_ids + + def update_subcloud_peer_group(self, context, peer_group_id, + group_state, max_subcloud_rehoming, + group_name, new_group_name=None): + # Collect the success and failed peer ids. + success_peer_ids = set() + failed_peer_ids = set() + + # Get associations by peer group id + associations = db_api.peer_group_association_get_by_peer_group_id( + context, peer_group_id) + if not associations: + LOG.info("No association found for peer group %s" % peer_group_id) + else: + for association in associations: + # Get system peer by peer id from the association + system_peer = db_api.system_peer_get( + context, association.system_peer_id) + # Get 'available' system peer + if system_peer.availability_state != \ + consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE: + LOG.warning("Peer system %s offline" % system_peer.id) + failed_peer_ids.add(system_peer.id) + else: + try: + # Get a client for sending request to this system peer + dc_client = self.get_peer_dc_client(system_peer) + peer_group_kwargs = { + 'peer-group-name': new_group_name, + 'group-state': group_state, + 'max-subcloud-rehoming': max_subcloud_rehoming + } + # Make an API call to update peer group on peer site + dc_client.update_subcloud_peer_group( + group_name, **peer_group_kwargs) + success_peer_ids.add(system_peer.id) + except Exception: + LOG.error(f"Failed to update Subcloud Peer Group " + f"{group_name} on peer site {system_peer.id}" + f" with the values: {peer_group_kwargs}") + failed_peer_ids.add(system_peer.id) + + return success_peer_ids, failed_peer_ids + def _get_non_primary_association(self, dc_client, dc_peer_system_peer_id, dc_peer_pg_id): """Get non-primary Association from peer site.""" @@ -690,9 +802,14 @@ class SystemPeerManager(manager.Manager): error_msg = self._sync_subclouds(context, peer, dc_local_pg.id, dc_peer_pg_id) if len(error_msg) > 0: + LOG.error(f"Failed to sync subcloud(s) in the Subcloud " + f"Peer Group {peer_group_name}: " + f"{json.dumps(error_msg)}") association_update['sync_status'] = \ consts.ASSOCIATION_SYNC_STATUS_FAILED - association_update['sync_message'] = json.dumps(error_msg) + association_update['sync_message'] = \ + (f"Failed to sync {error_msg.keys()} in the Subcloud " + f"Peer Group {peer_group_name}.") association = self._update_sync_status( context, association_id, **association_update) @@ -779,8 +896,14 @@ class SystemPeerManager(manager.Manager): 'peer subcloud clean', clean_function, delete_pool, subclouds) if delete_error_msg: + LOG.error(f"Failed to delete subcloud(s) from " + f"the Subcloud Peer Group {peer_group_name} " + f"on peer site: {json.dumps(delete_error_msg)}") + sync_message = (f"Deletion of {delete_error_msg.keys()} from " + f"the Subcloud Peer Group {peer_group_name} " + f"on the peer site failed.") self._update_sync_status_to_failed(context, association_id, - json.dumps(delete_error_msg)) + sync_message) return # System Peer does not exist on peer site, delete peer group diff --git a/distributedcloud/dcmanager/rpc/client.py b/distributedcloud/dcmanager/rpc/client.py index 88cf6d486..ac4ec50d2 100644 --- a/distributedcloud/dcmanager/rpc/client.py +++ b/distributedcloud/dcmanager/rpc/client.py @@ -265,15 +265,33 @@ class ManagerClient(RPCClient): return self.cast(ctxt, self.make_msg( 'sync_subcloud_peer_group', association_id=association_id)) - def update_subcloud_peer_group(self, ctxt, association_id): + def sync_subcloud_peer_group_only(self, ctxt, association_id): + # Without synchronizing subclouds return self.call(ctxt, self.make_msg( 'sync_subcloud_peer_group', association_id=association_id, sync_subclouds=False)) + def update_subcloud_peer_group(self, ctxt, peer_group_id, + group_state, max_subcloud_rehoming, + group_name, new_group_name=None): + return self.call(ctxt, self.make_msg( + 'update_subcloud_peer_group', + peer_group_id=peer_group_id, + group_state=group_state, + max_subcloud_rehoming=max_subcloud_rehoming, + group_name=group_name, new_group_name=new_group_name)) + def delete_peer_group_association(self, ctxt, association_id): return self.call(ctxt, self.make_msg('delete_peer_group_association', association_id=association_id)) + def update_association_sync_status(self, ctxt, peer_group_id, + sync_status, sync_message=None): + return self.call(ctxt, self.make_msg('update_association_sync_status', + peer_group_id=peer_group_id, + sync_status=sync_status, + sync_message=sync_message)) + def peer_monitor_notify(self, ctxt): return self.call(ctxt, self.make_msg('peer_monitor_notify')) diff --git a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_peer_group_association.py b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_peer_group_association.py index c14c9c8b3..3f4cb29ec 100644 --- a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_peer_group_association.py +++ b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_peer_group_association.py @@ -294,7 +294,7 @@ class TestPeerGroupAssociationUpdate(testroot.DCManagerApiTest, @mock.patch.object(rpc_client, 'ManagerClient') def test_update_success(self, mock_client): - mock_client().update_subcloud_peer_group.return_value = { + mock_client().sync_subcloud_peer_group_only.return_value = { 'peer-group-priority': SAMPLE_PEER_GROUP_PRIORITY_UPDATED } context = utils.dummy_context() diff --git a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subcloud_peer_group.py b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subcloud_peer_group.py index fc4add3cb..fe902f209 100644 --- a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subcloud_peer_group.py +++ b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subcloud_peer_group.py @@ -18,8 +18,12 @@ from dcmanager.tests import utils SAMPLE_SUBCLOUD_PEER_GROUP_NAME = 'GroupX' SAMPLE_SUBCLOUD_PEER_GROUP_MAX_SUBCLOUDS_REHOMING = 50 +SAMPLE_SUBCLOUD_PEER_GROUP_STATE = 'enabled' SAMPLE_SUBCLOUD_PEER_GROUP_PIRORITY = 0 +NEW_SUBCLOUD_PEER_GROUP_NAME = 'GroupY' +NEW_SUBCLOUD_PEER_GROUP_MAX_SUBCLOUDS_REHOMING = 20 + API_PREFIX = '/v1.0/subcloud-peer-groups' RESULT_KEY = 'subcloud_peer_groups' EXPECTED_FIELDS = ["id", @@ -74,7 +78,8 @@ class SubcloudPeerGroupAPIMixin(APIMixin): ), 'system_leader_name': kw.get('system_leader_name', 'dc-test'), 'group_priority': kw.get('group_priority', '0'), - 'group_state': kw.get('group_state', 'enabled'), + 'group_state': kw.get( + 'group_state', SAMPLE_SUBCLOUD_PEER_GROUP_STATE), 'max_subcloud_rehoming': kw.get( 'max_subcloud_rehoming', SAMPLE_SUBCLOUD_PEER_GROUP_MAX_SUBCLOUDS_REHOMING @@ -343,6 +348,29 @@ class TestSubcloudPeerGroupUpdate(testroot.DCManagerApiTest, self.assertEqual(response.content_type, 'text/plain') self.assertEqual(response.status_code, http_client.BAD_REQUEST) + @mock.patch.object(rpc_client, 'ManagerClient') + def test_rename_subcloud_peer_group(self, mock_client): + mock_client().update_subcloud_peer_group.return_value = \ + (set(), set(mock.MagicMock())) + context = utils.dummy_context() + single_obj = self._create_db_object(context) + update_data = { + 'peer-group-name': NEW_SUBCLOUD_PEER_GROUP_NAME, + 'max-subcloud-rehoming': + NEW_SUBCLOUD_PEER_GROUP_MAX_SUBCLOUDS_REHOMING + } + response = self.app.patch_json(self.get_single_url(single_obj.id), + headers=self.get_api_headers(), + params=update_data, + expect_errors=False) + self.assertEqual(response.status_code, http_client.OK) + + mock_client().update_subcloud_peer_group.assert_called_once_with( + mock.ANY, single_obj.id, None, + NEW_SUBCLOUD_PEER_GROUP_MAX_SUBCLOUDS_REHOMING, + SAMPLE_SUBCLOUD_PEER_GROUP_NAME, + NEW_SUBCLOUD_PEER_GROUP_NAME) + class TestSubcloudPeerGroupDelete(testroot.DCManagerApiTest, SubcloudPeerGroupAPIMixin): diff --git a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py index 43a5953dd..d6a96ef40 100644 --- a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py +++ b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py @@ -45,6 +45,7 @@ from dcmanager.tests.unit.api.v1.controllers.mixins import PostMixin from dcmanager.tests.unit.common import fake_strategy from dcmanager.tests.unit.common import fake_subcloud from dcmanager.tests.unit.fakes import FakeVimStrategy +from dcmanager.tests.unit.manager import test_system_peer_manager from dcmanager.tests import utils SAMPLE_SUBCLOUD_NAME = "SubcloudX" @@ -62,6 +63,7 @@ FAKE_SUBCLOUD_INSTALL_VALUES_WITH_PERSISTENT_SIZE = ( ) FAKE_SUBCLOUD_BOOTSTRAP_PAYLOAD = fake_subcloud.FAKE_SUBCLOUD_BOOTSTRAP_PAYLOAD OAM_FLOATING_IP = "10.10.10.12" +FAKE_PEER_GROUP_ID = 10 FAKE_PATCH = {"value": {"patchstate": "Partial-Apply"}} @@ -1313,6 +1315,9 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest): self.assertEqual( dccommon_consts.MANAGEMENT_UNMANAGED, updated_subcloud.management_state ) + # Verify that the update PGA sync_status is not called since subcloud + # isn't associated with an SPG. + self.mock_rpc_client().update_association_sync_status.assert_not_called() @mock.patch.object(subclouds.SubcloudsController, "_get_patch_data") def test_update_subcloud_group_value(self, mock_get_patch_data): @@ -1407,6 +1412,10 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest): deploy_status=None) self.assertEqual(response.status_int, 200) + # Verify that the update PGA sync_status is not called since subcloud + # isn't associated with an SPG. + self.mock_rpc_client().update_association_sync_status.assert_not_called() + @mock.patch.object(subclouds.SubcloudsController, "_check_existing_vim_strategy") @mock.patch.object(psd_common, "get_network_address_pool") @@ -1739,6 +1748,106 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest): ) self.mock_rpc_client().update_subcloud_endpoint_status.assert_not_called() + @mock.patch.object(subclouds.SubcloudsController, "_get_patch_data") + @mock.patch.object(cutils, 'is_leader_on_local_site') + @mock.patch.object(cutils, 'subcloud_peer_group_get_by_ref') + def test_add_subcloud_to_peer_group(self, mock_get_peer_group, + mock_is_leader_on_local_site, + mock_get_patch_data): + peer_group = test_system_peer_manager.TestSystemPeerManager. \ + create_subcloud_peer_group_static( + self.ctx, peer_group_name='SubcloudPeerGroup1') + mock_get_peer_group.return_value = peer_group + subcloud = fake_subcloud.create_fake_subcloud(self.ctx) + subcloud = db_api.subcloud_update( + self.ctx, + subcloud.id, + availability_status=dccommon_consts.AVAILABILITY_ONLINE, + deploy_status=consts.DEPLOY_STATE_DONE, + management_state=dccommon_consts.MANAGEMENT_MANAGED, + prestage_status=consts.PRESTAGE_STATE_COMPLETE + ) + mock_is_leader_on_local_site.return_value = True + data = {"peer_group": peer_group.id} + self.mock_rpc_client().update_subcloud.return_value = True + mock_get_patch_data.return_value = data + response = self.app.patch_json( + FAKE_URL + "/" + str(subcloud.id), headers=FAKE_HEADERS, + params=data + ) + self.assertEqual(response.status_int, 200) + self.mock_rpc_client().update_subcloud.assert_called_once() + self.mock_rpc_client().update_association_sync_status. \ + assert_called_once() + + @mock.patch.object(subclouds.SubcloudsController, "_get_patch_data") + @mock.patch.object(cutils, 'is_leader_on_local_site') + @mock.patch.object(cutils, 'subcloud_peer_group_get_by_ref') + def test_remove_subcloud_from_peer_group(self, mock_get_peer_group, + mock_is_leader_on_local_site, + mock_get_patch_data): + peer_group = test_system_peer_manager.TestSystemPeerManager. \ + create_subcloud_peer_group_static( + self.ctx, peer_group_name='SubcloudPeerGroup1') + mock_get_peer_group.return_value = peer_group + subcloud = fake_subcloud.create_fake_subcloud(self.ctx) + subcloud = db_api.subcloud_update( + self.ctx, + subcloud.id, + availability_status=dccommon_consts.AVAILABILITY_ONLINE, + deploy_status=consts.DEPLOY_STATE_DONE, + management_state=dccommon_consts.MANAGEMENT_MANAGED, + prestage_status=consts.PRESTAGE_STATE_COMPLETE, + peer_group_id=peer_group.id + ) + mock_is_leader_on_local_site.return_value = True + data = {"peer_group": "none"} + self.mock_rpc_client().update_subcloud.return_value = True + mock_get_patch_data.return_value = data + response = self.app.patch_json( + FAKE_URL + "/" + str(subcloud.id), headers=FAKE_HEADERS, + params=data + ) + self.assertEqual(response.status_int, 200) + self.mock_rpc_client().update_subcloud.assert_called_once() + self.mock_rpc_client().update_association_sync_status. \ + assert_called_once() + + @mock.patch.object(subclouds.SubcloudsController, "_get_patch_data") + @mock.patch.object(cutils, 'is_leader_on_local_site') + @mock.patch.object(cutils, 'subcloud_peer_group_get_by_ref') + def test_update_subcloud_on_non_primary_site(self, mock_get_peer_group, + mock_is_leader_on_local_site, + mock_get_patch_data): + peer_group = test_system_peer_manager.TestSystemPeerManager. \ + create_subcloud_peer_group_static( + self.ctx, peer_group_name='SubcloudPeerGroup1', group_priority=1) + mock_get_peer_group.return_value = peer_group + subcloud = fake_subcloud.create_fake_subcloud(self.ctx) + subcloud = db_api.subcloud_update( + self.ctx, + subcloud.id, + availability_status=dccommon_consts.AVAILABILITY_ONLINE, + deploy_status=consts.DEPLOY_STATE_DONE, + management_state=dccommon_consts.MANAGEMENT_MANAGED, + prestage_status=consts.PRESTAGE_STATE_COMPLETE, + peer_group_id=peer_group.id + ) + mock_is_leader_on_local_site.return_value = True + data = {"bootstrap-address": "10.10.10.11"} + mock_get_patch_data.return_value = data + six.assertRaisesRegex( + self, + webtest.app.AppError, + "400 *", + self.app.patch_json, + FAKE_URL + "/" + str(subcloud.id), + headers=FAKE_HEADERS, + params=data, + ) + self.mock_rpc_client().update_subcloud.assert_not_called() + self.mock_rpc_client().update_association_sync_status.assert_not_called() + def test_get_config_file_path(self): bootstrap_file = psd_common.get_config_file_path("subcloud1") install_values = psd_common.get_config_file_path( diff --git a/distributedcloud/dcmanager/tests/unit/manager/test_system_peer_manager.py b/distributedcloud/dcmanager/tests/unit/manager/test_system_peer_manager.py index a9dcb49d9..419a5cc46 100644 --- a/distributedcloud/dcmanager/tests/unit/manager/test_system_peer_manager.py +++ b/distributedcloud/dcmanager/tests/unit/manager/test_system_peer_manager.py @@ -40,6 +40,9 @@ FAKE_SITE0_PEER_GROUP_STATE = 'enabled' # FAKE SUBCLOUD PEER GROUP DATA (SITE1) FAKE_SITE1_PEER_GROUP_ID = 9 +FAKE_SITE1_PEER_GROUP_NAME = 'PeerGroup2' +FAKE_SITE1_PEER_GROUP_MAX_SUBCLOUDS_REHOMING = 20 +FAKE_SITE1_PEER_GROUP_STATE = 'enabled' # FAKE SYSTEM PEER DATA (SITE1) FAKE_SITE1_SYSTEM_PEER_ID = 10 @@ -156,7 +159,9 @@ class TestSystemPeerManager(base.DCManagerTestCase): 'endpoint': FAKE_MANAGER_ENDPOINT, 'username': FAKE_MANAGER_USERNAME, 'password': FAKE_MANAGER_PASSWORD, - 'gateway_ip': FAKE_PEER_CONTROLLER_GATEWAY_IP + 'gateway_ip': FAKE_PEER_CONTROLLER_GATEWAY_IP, + 'availability_state': + consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE } values.update(kwargs) return db_api.system_peer_create(ctxt, **values) @@ -492,8 +497,7 @@ class TestSystemPeerManager(base.DCManagerTestCase): 'utils.get_local_system') @mock.patch('dcmanager.manager.system_peer_manager.' 'SystemPeerManager.get_peer_dc_client') - def test_update_sync_status_on_peer_site( - self, mock_client, mock_utils): + def test_update_sync_status(self, mock_client, mock_utils): mock_dc_client = mock.MagicMock() mock_dc_client().get_subcloud_peer_group = mock.MagicMock() mock_dc_client().get_system_peer = mock.MagicMock() @@ -521,7 +525,7 @@ class TestSystemPeerManager(base.DCManagerTestCase): return_value = {'id': FAKE_SITE1_ASSOCIATION_ID} spm = system_peer_manager.SystemPeerManager(mock.MagicMock()) - spm.update_sync_status_on_peer_site( + spm.update_sync_status( self.ctx, peer, consts.ASSOCIATION_SYNC_STATUS_IN_SYNC) mock_dc_client().get_subcloud_peer_group.assert_called_once_with( @@ -539,3 +543,101 @@ class TestSystemPeerManager(base.DCManagerTestCase): self.ctx, association.id) self.assertEqual(consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, association_new.sync_status) + + @mock.patch.object(system_peer_manager.SystemPeerManager, + 'update_sync_status') + def test_update_association_sync_status(self, mock_update_sync_status): + peer = self.create_system_peer_static( + self.ctx, + peer_name='SystemPeer1') + peer_group = self.create_subcloud_peer_group_static( + self.ctx, + peer_group_name='SubcloudPeerGroup1') + self.create_peer_group_association_static( + self.ctx, + system_peer_id=peer.id, + peer_group_id=peer_group.id, + sync_status=consts.ASSOCIATION_SYNC_STATUS_IN_SYNC) + + spm = system_peer_manager.SystemPeerManager(mock.MagicMock()) + spm.update_association_sync_status( + self.ctx, peer_group.id, + consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC) + + mock_update_sync_status.assert_called_once_with( + self.ctx, mock.ANY, consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC, + mock.ANY, None, 'None', mock.ANY) + + @mock.patch.object(system_peer_manager.SystemPeerManager, + 'update_sync_status') + def test_update_association_sync_status_when_peer_site_down( + self, mock_update_sync_status): + peer = self.create_system_peer_static( + self.ctx, + peer_name='SystemPeer1') + peer_group = self.create_subcloud_peer_group_static( + self.ctx, + peer_group_name='SubcloudPeerGroup1') + self.create_peer_group_association_static( + self.ctx, + system_peer_id=peer.id, + peer_group_id=peer_group.id, + sync_status=consts.ASSOCIATION_SYNC_STATUS_UNKNOWN) + + spm = system_peer_manager.SystemPeerManager(mock.MagicMock()) + spm.update_association_sync_status( + self.ctx, peer_group.id, + consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC) + + association = db_api. \ + peer_group_association_get_by_peer_group_and_system_peer_id( + self.ctx, peer_group.id, peer.id) + self.assertEqual(consts.ASSOCIATION_SYNC_STATUS_FAILED, + association.sync_status) + self.assertEqual("Failed to update sync_status, " + "because the peer site is unreachable.", + association.sync_message) + + mock_update_sync_status.assert_not_called() + + @mock.patch.object(system_peer_manager, 'PeerSiteDriver') + @mock.patch.object(system_peer_manager, 'SysinvClient') + @mock.patch.object(system_peer_manager, 'DcmanagerClient') + def test_update_subcloud_peer_group(self, + mock_dc_client, + mock_sysinv_client, + mock_keystone_client): + mock_keystone_client().keystone_client = FakeKeystoneClient() + mock_sysinv_client.return_value = FakeSysinvClient() + mock_dc_client.return_value = FakeDcmanagerClient() + mock_dc_client().get_subcloud_peer_group = mock.MagicMock() + mock_dc_client().update_subcloud_peer_group = mock.MagicMock() + mock_dc_client().get_system_peer = mock.MagicMock() + + peer = self.create_system_peer_static( + self.ctx, + peer_name='SystemPeer1') + peer_group = self.create_subcloud_peer_group_static( + self.ctx, + peer_group_name=FAKE_SITE0_PEER_GROUP_NAME) + self.create_peer_group_association_static( + self.ctx, + system_peer_id=peer.id, + peer_group_id=peer_group.id) + + spm = system_peer_manager.SystemPeerManager(mock.MagicMock()) + spm.update_subcloud_peer_group( + self.ctx, peer_group.id, + FAKE_SITE1_PEER_GROUP_STATE, + FAKE_SITE1_PEER_GROUP_MAX_SUBCLOUDS_REHOMING, + FAKE_SITE0_PEER_GROUP_NAME, + FAKE_SITE1_PEER_GROUP_NAME) + + peer_group_kwargs = { + 'peer-group-name': FAKE_SITE1_PEER_GROUP_NAME, + 'group-state': FAKE_SITE1_PEER_GROUP_STATE, + 'max-subcloud-rehoming': + FAKE_SITE1_PEER_GROUP_MAX_SUBCLOUDS_REHOMING + } + mock_dc_client().update_subcloud_peer_group.assert_called_once_with( + FAKE_SITE0_PEER_GROUP_NAME, **peer_group_kwargs)