diff --git a/api-ref/source/api-ref-dcmanager-v1.rst b/api-ref/source/api-ref-dcmanager-v1.rst index b128c2804..85bd8cbd3 100644 --- a/api-ref/source/api-ref-dcmanager-v1.rst +++ b/api-ref/source/api-ref-dcmanager-v1.rst @@ -2333,7 +2333,7 @@ Response - peer-controller-gateway-address: peer_controller_gateway_address - administrative-state: administrative_state - heartbeat-interval: heartbeat_interval - - heartbeat-failure-threshold: heartbeat_failure_threshold + - heartbeat-failure-threshold: heartbeat_failure_threshold - heartbeat-failure-policy: heartbeat_failure_policy - heartbeat-maintenance-timeout: heartbeat_maintenance_timeout - created-at: created_at @@ -2377,7 +2377,7 @@ serviceUnavailable (503) - peer_controller_gateway_address: peer_controller_gateway_address - administrative_state: administrative_state - heartbeat_interval: heartbeat_interval - - heartbeat_failure_threshold: heartbeat_failure_threshold + - heartbeat_failure_threshold: heartbeat_failure_threshold - heartbeat_failure_policy: heartbeat_failure_policy - heartbeat_maintenance_timeout: heartbeat_maintenance_timeout @@ -2400,7 +2400,7 @@ Request Example - peer-controller-gateway-address: peer_controller_gateway_address - administrative-state: administrative_state - heartbeat-interval: heartbeat_interval - - heartbeat-failure-threshold: heartbeat_failure_threshold + - heartbeat-failure-threshold: heartbeat_failure_threshold - heartbeat-failure-policy: heartbeat_failure_policy - heartbeat-maintenance-timeout: heartbeat_maintenance_timeout - created-at: created_at @@ -2449,7 +2449,7 @@ This operation does not accept a request body. - peer-controller-gateway-address: peer_controller_gateway_address - administrative-state: administrative_state - heartbeat-interval: heartbeat_interval - - heartbeat-failure-threshold: heartbeat_failure_threshold + - heartbeat-failure-threshold: heartbeat_failure_threshold - heartbeat-failure-policy: heartbeat_failure_policy - heartbeat-maintenance-timeout: heartbeat_maintenance_timeout - created-at: created_at @@ -2561,7 +2561,7 @@ serviceUnavailable (503) - peer_controller_gateway_address: peer_controller_gateway_address - administrative_state: administrative_state - heartbeat_interval: heartbeat_interval - - heartbeat_failure_threshold: heartbeat_failure_threshold + - heartbeat_failure_threshold: heartbeat_failure_threshold - heartbeat_failure_policy: heartbeat_failure_policy - heartbeat_maintenance_timeout: heartbeat_maintenance_timeout @@ -2582,7 +2582,7 @@ Request Example - peer-controller-gateway-address: peer_controller_gateway_address - administrative-state: administrative_state - heartbeat-interval: heartbeat_interval - - heartbeat-failure-threshold: heartbeat_failure_threshold + - heartbeat-failure-threshold: heartbeat_failure_threshold - heartbeat-failure-policy: heartbeat_failure_policy - heartbeat-maintenance-timeout: heartbeat_maintenance_timeout - created-at: created_at @@ -2625,7 +2625,7 @@ Subcloud Peer Groups Subcloud Peer Groups are logical groupings managed by a central System Controller. It's a group of the current managed subclouds which are supposed to be duplicated -in a peer site as secondary subclouds +in a peer site as secondary subclouds ****************************** Lists all subcloud peer groups @@ -3040,6 +3040,7 @@ internalServerError (500), serviceUnavailable (503) - peer-group-id: association_peer_group_id - system-peer-id: system_peer_id - peer-group-priority: association_peer_group_priority + - association-type: association_type - sync-status: association_sync_status - created-at: created_at - updated-at: updated_at @@ -3089,6 +3090,7 @@ Request Example - peer-group-id: association_peer_group_id - system-peer-id: system_peer_id - peer-group-priority: association_peer_group_priority + - association-type: association_type - sync-status: association_sync_status - sync-message: association_sync_message - created-at: created_at @@ -3133,6 +3135,7 @@ This operation does not accept a request body. - peer-group-id: association_peer_group_id - system-peer-id: system_peer_id - peer-group-priority: association_peer_group_priority + - association-type: association_type - sync-status: association_sync_status - sync-message: association_sync_message - created-at: created_at @@ -3175,6 +3178,7 @@ internalServerError (500), serviceUnavailable (503) - peer-group-id: association_peer_group_id - system-peer-id: system_peer_id - peer-group-priority: association_peer_group_priority + - association-type: association_type - sync-status: association_sync_status - sync-message: association_sync_message - created-at: created_at @@ -3213,6 +3217,7 @@ serviceUnavailable (503) - associate_id: peer_group_association_uri - peer_group_priority: association_peer_group_priority + - sync-status: association_sync_status Request Example ---------------- @@ -3227,6 +3232,7 @@ Request Example - peer-group-id: association_peer_group_id - system-peer-id: system_peer_id - peer-group-priority: association_peer_group_priority + - association-type: association_type - sync-status: association_sync_status - sync-message: association_sync_message - created-at: created_at diff --git a/api-ref/source/parameters.yaml b/api-ref/source/parameters.yaml index 5e53ab231..4d9786fbb 100644 --- a/api-ref/source/parameters.yaml +++ b/api-ref/source/parameters.yaml @@ -108,6 +108,12 @@ association_sync_status: in: body required: true type: string +association_type: + description: | + The type of association. + in: body + required: true + type: string availability_status: description: | The availability status of the subcloud. diff --git a/api-ref/source/samples/peer-group-associations/association-get-response.json b/api-ref/source/samples/peer-group-associations/association-get-response.json index 3aef416e5..7f69208fd 100644 --- a/api-ref/source/samples/peer-group-associations/association-get-response.json +++ b/api-ref/source/samples/peer-group-associations/association-get-response.json @@ -2,6 +2,7 @@ "id": 9, "peer-group-id": 1, "system-peer-id": 1, + "association-type": "primary", "peer-group-priority": 1, "sync-status": "synced", "sync-message": null, diff --git a/api-ref/source/samples/peer-group-associations/association-patch-response.json b/api-ref/source/samples/peer-group-associations/association-patch-response.json index 0d0b3917d..6e3c3df28 100644 --- a/api-ref/source/samples/peer-group-associations/association-patch-response.json +++ b/api-ref/source/samples/peer-group-associations/association-patch-response.json @@ -2,6 +2,7 @@ "id": 9, "peer-group-id": 1, "system-peer-id": 1, + "association-type": "primary", "peer-group-priority": 99, "sync-status": "synced", "sync-message": null, diff --git a/api-ref/source/samples/peer-group-associations/associations-get-response.json b/api-ref/source/samples/peer-group-associations/associations-get-response.json index 05152196e..2a3e5eb3d 100644 --- a/api-ref/source/samples/peer-group-associations/associations-get-response.json +++ b/api-ref/source/samples/peer-group-associations/associations-get-response.json @@ -4,6 +4,7 @@ "id": 9, "peer-group-id": 1, "system-peer-id": 1, + "association-type": "primary", "peer-group-priority": 1, "sync-status": "synced", "created-at": "2023-08-21 09:24:07.394961", diff --git a/api-ref/source/samples/peer-group-associations/associations-post-response.json b/api-ref/source/samples/peer-group-associations/associations-post-response.json index d01bacea8..0b6e9b6f6 100644 --- a/api-ref/source/samples/peer-group-associations/associations-post-response.json +++ b/api-ref/source/samples/peer-group-associations/associations-post-response.json @@ -2,6 +2,7 @@ "id": 9, "peer-group-id": 1, "system-peer-id": 1, + "association-type": "primary", "peer-group-priority": 1, "sync-status": "syncing", "sync-message": null, diff --git a/distributedcloud/dccommon/drivers/openstack/dcmanager_v1.py b/distributedcloud/dccommon/drivers/openstack/dcmanager_v1.py index 22d527d14..56bfe440a 100644 --- a/distributedcloud/dccommon/drivers/openstack/dcmanager_v1.py +++ b/distributedcloud/dccommon/drivers/openstack/dcmanager_v1.py @@ -33,6 +33,27 @@ class DcmanagerClient(base.DriverBase): self.token = session.get_token() self.timeout = timeout + def get_system_peer(self, system_peer_uuid): + """Get system peer.""" + if system_peer_uuid is None: + raise ValueError("system_peer_uuid is required.") + url = f"{self.endpoint}/system-peers/{system_peer_uuid}" + + headers = {"X-Auth-Token": self.token} + response = requests.get(url, headers=headers, timeout=self.timeout) + + if response.status_code == 200: + return response.json() + else: + if response.status_code == 404 and \ + 'System Peer not found' in response.text: + raise exceptions.SystemPeerNotFound( + system_peer=system_peer_uuid) + message = "Get SystemPeer: system_peer_uuid %s failed with RC: %d" \ + % (system_peer_uuid, response.status_code) + LOG.error(message) + raise Exception(message) + def get_subcloud(self, subcloud_ref, is_region_name=False): """Get subcloud.""" if subcloud_ref is None: @@ -57,7 +78,7 @@ class DcmanagerClient(base.DriverBase): def get_subcloud_list(self): """Get subcloud list.""" - url = self.endpoint + '/subclouds' + url = f"{self.endpoint}/subclouds" headers = {"X-Auth-Token": self.token} response = requests.get(url, headers=headers, timeout=self.timeout) @@ -73,7 +94,7 @@ class DcmanagerClient(base.DriverBase): def get_subcloud_group_list(self): """Get subcloud group list.""" - url = self.endpoint + '/subcloud-groups' + url = f"{self.endpoint}/subcloud-groups" headers = {"X-Auth-Token": self.token} response = requests.get(url, headers=headers, timeout=self.timeout) @@ -89,7 +110,7 @@ class DcmanagerClient(base.DriverBase): def get_subcloud_peer_group_list(self): """Get subcloud peer group list.""" - url = self.endpoint + '/subcloud-peer-groups' + url = f"{self.endpoint}/subcloud-peer-groups" headers = {"X-Auth-Token": self.token} response = requests.get(url, headers=headers, timeout=self.timeout) @@ -147,9 +168,35 @@ class DcmanagerClient(base.DriverBase): LOG.error(message) raise Exception(message) + def get_peer_group_association_with_peer_id_and_pg_id(self, peer_id, + pg_id): + """Get peer group association with peer id and PG id.""" + for association in self.get_peer_group_association_list(): + if peer_id == association.get('system-peer-id') and \ + pg_id == association.get('peer-group-id'): + return association + raise exceptions.PeerGroupAssociationNotFound( + association_id=None) + + def get_peer_group_association_list(self): + """Get peer group association list.""" + url = f"{self.endpoint}/peer-group-associations" + + headers = {"X-Auth-Token": self.token} + response = requests.get(url, headers=headers, timeout=self.timeout) + + if response.status_code == 200: + data = response.json() + return data.get('peer_group_associations', []) + else: + message = "Get Peer Group Association list failed with RC: %d" % \ + response.status_code + LOG.error(message) + raise Exception(message) + def add_subcloud_peer_group(self, **kwargs): """Add a subcloud peer group.""" - url = self.endpoint + '/subcloud-peer-groups' + url = f"{self.endpoint}/subcloud-peer-groups" headers = {"X-Auth-Token": self.token, "Content-Type": "application/json"} @@ -166,7 +213,7 @@ class DcmanagerClient(base.DriverBase): def add_subcloud_with_secondary_status(self, files, data): """Add a subcloud with secondary status.""" - url = self.endpoint + '/subclouds' + url = f"{self.endpoint}/subclouds" # If not explicitly specified, set 'secondary' to true by default. # This action adds a secondary subcloud with rehoming data in the @@ -197,6 +244,49 @@ class DcmanagerClient(base.DriverBase): LOG.error(message) raise Exception(message) + def add_peer_group_association(self, **kwargs): + """Add a peer group association.""" + url = f"{self.endpoint}/peer-group-associations" + + headers = {"X-Auth-Token": self.token, + "Content-Type": "application/json"} + response = requests.post(url, json=kwargs, headers=headers, + timeout=self.timeout) + + if response.status_code == 200: + return response.json() + else: + message = "Add Peer Group Association: %s, failed with RC: %d" % \ + (kwargs, response.status_code) + LOG.error(message) + raise Exception(message) + + def update_peer_group_association_sync_status(self, association_id, + sync_status): + """Update the peer group association sync_status.""" + if association_id is None: + raise ValueError("association_id is required.") + url = f"{self.endpoint}/peer-group-associations/{association_id}" + update_kwargs = {"sync_status": sync_status} + + headers = {"X-Auth-Token": self.token, + "Content-Type": "application/json"} + response = requests.patch(url, json=update_kwargs, headers=headers, + timeout=self.timeout) + + if response.status_code == 200: + return response.json() + else: + if response.status_code == 404 and \ + 'Peer Group Association not found' in response.text: + raise exceptions.PeerGroupAssociationNotFound( + association_id=association_id) + message = "Update Peer Group Association: association_id %s, " \ + "sync_status %s, failed with RC: %d" % ( + association_id, sync_status, response.status_code) + LOG.error(message) + raise Exception(message) + def update_subcloud_peer_group(self, peer_group_ref, **kwargs): """Update the subcloud peer group.""" if peer_group_ref is None: @@ -280,6 +370,28 @@ class DcmanagerClient(base.DriverBase): LOG.error(message) raise Exception(message) + def delete_peer_group_association(self, association_id): + """Delete the peer group association.""" + if association_id is None: + raise ValueError("association_id is required.") + url = f"{self.endpoint}/peer-group-associations/{association_id}" + + headers = {"X-Auth-Token": self.token} + response = requests.delete(url, headers=headers, + timeout=self.timeout) + + if response.status_code == 200: + return response.json() + else: + if response.status_code == 404 and \ + 'Peer Group Association not found' in response.text: + raise exceptions.PeerGroupAssociationNotFound( + association_id=association_id) + message = "Delete Peer Group Association: association_id %s " \ + "failed with RC: %d" % (association_id, response.status_code) + LOG.error(message) + raise Exception(message) + def delete_subcloud_peer_group(self, peer_group_ref): """Delete the subcloud peer group.""" if peer_group_ref is None: diff --git a/distributedcloud/dccommon/exceptions.py b/distributedcloud/dccommon/exceptions.py index 40863f616..da174d4bc 100644 --- a/distributedcloud/dccommon/exceptions.py +++ b/distributedcloud/dccommon/exceptions.py @@ -125,6 +125,10 @@ class ApiException(DCCommonException): message = _("%(endpoint)s failed with status code: %(rc)d") +class SystemPeerNotFound(NotFound): + message = _("System Peer %(system_peer)s not found") + + class SubcloudNotFound(NotFound): message = _("Subcloud %(subcloud_ref)s not found") @@ -133,6 +137,10 @@ class SubcloudPeerGroupNotFound(NotFound): message = _("Subcloud Peer Group %(peer_group_ref)s not found") +class PeerGroupAssociationNotFound(NotFound): + message = _("Peer Group Association %(association_id)s not found") + + class SubcloudPeerGroupDeleteFailedAssociated(DCCommonException): message = _("Subcloud Peer Group %(peer_group_ref)s delete failed " "cause it is associated with a system peer.") diff --git a/distributedcloud/dcmanager/api/controllers/v1/peer_group_association.py b/distributedcloud/dcmanager/api/controllers/v1/peer_group_association.py index 2f6da213b..5c7143085 100644 --- a/distributedcloud/dcmanager/api/controllers/v1/peer_group_association.py +++ b/distributedcloud/dcmanager/api/controllers/v1/peer_group_association.py @@ -31,8 +31,14 @@ from dcmanager.rpc import client as rpc_client CONF = cfg.CONF LOG = logging.getLogger(__name__) +PEER_GROUP_PRIMARY_PRIORITY = 0 MIN_PEER_GROUP_ASSOCIATION_PRIORITY = 1 MAX_PEER_GROUP_ASSOCIATION_PRIORITY = 65536 +ASSOCIATION_SYNC_STATUS_LIST = \ + [consts.ASSOCIATION_SYNC_STATUS_SYNCING, + consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, + consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC, + consts.ASSOCIATION_SYNC_STATUS_FAILED] class PeerGroupAssociationsController(restcomm.GenericPathController): @@ -150,6 +156,12 @@ class PeerGroupAssociationsController(restcomm.GenericPathController): return False return True + def _validate_sync_status(self, sync_status): + if sync_status not in ASSOCIATION_SYNC_STATUS_LIST: + LOG.debug("Invalid sync_status: %s" % sync_status) + return False + return True + @index.when(method='POST', template='json') def post(self): """Create a new peer group association.""" @@ -174,20 +186,22 @@ class PeerGroupAssociationsController(restcomm.GenericPathController): peer_group_priority = payload.get('peer_group_priority') peer_group = db_api.subcloud_peer_group_get(context, peer_group_id) - if (peer_group.group_priority == 0 and not peer_group_priority) or \ - (peer_group.group_priority > 0 and peer_group_priority): - pecan.abort(httpclient.BAD_REQUEST, - _('Peer Group Association create with peer_group_' - 'priority is required when the subcloud peer group ' - 'priority is 0, and it is not allowed when the ' - 'subcloud peer group priority is greater than 0.')) - - if peer_group_priority and not self._validate_peer_group_priority( - peer_group_priority): + if peer_group_priority is not None and not \ + self._validate_peer_group_priority(peer_group_priority): pecan.abort(httpclient.BAD_REQUEST, _('Invalid peer_group_priority')) - sync_enabled = peer_group.group_priority == 0 + if (peer_group.group_priority == PEER_GROUP_PRIMARY_PRIORITY and + peer_group_priority is None) or ( + peer_group.group_priority > PEER_GROUP_PRIMARY_PRIORITY and + peer_group_priority is not None): + pecan.abort(httpclient.BAD_REQUEST, + _('Peer Group Association create is not allowed when ' + 'the subcloud peer group priority is greater than 0 ' + 'and it is required when the subcloud peer group ' + 'priority is 0.')) + + is_primary = peer_group.group_priority == PEER_GROUP_PRIMARY_PRIORITY # only one combination of peer_group_id + system_peer_id can exists association = None @@ -198,9 +212,8 @@ class PeerGroupAssociationsController(restcomm.GenericPathController): peer_group_id, system_peer_id) except exception.PeerGroupAssociationCombinationNotFound: - LOG.warning("Peer Group Association Combination Not Found, " - "peer_group_id: %s, system_peer_id: %s" - % (peer_group_id, system_peer_id)) + # This is a normal scenario, no need to log or raise an error + pass except Exception as e: LOG.warning("Peer Group Association get failed: %s;" "peer_group_id: %s, system_peer_id: %s" @@ -209,22 +222,22 @@ class PeerGroupAssociationsController(restcomm.GenericPathController): _('peer_group_association_get_by_peer_group_and_' 'system_peer_id failed: %s' % e)) if association: - LOG.info("Failed to create Peer group association, association \ - with peer_group_id:[%s],system_peer_id:[%s] \ - already exists" % (peer_group_id, system_peer_id)) + LOG.warning("Failed to create Peer group association, association " + "with peer_group_id:[%s],system_peer_id:[%s] " + "already exists" % (peer_group_id, system_peer_id)) pecan.abort(httpclient.BAD_REQUEST, _('A Peer group association with same peer_group_id, ' 'system_peer_id already exists')) # Create the peer group association try: - sync_status = consts.ASSOCIATION_SYNC_STATUS_SYNCING if \ - sync_enabled else consts.ASSOCIATION_SYNC_STATUS_DISABLED + association_type = consts.ASSOCIATION_TYPE_PRIMARY if is_primary \ + else consts.ASSOCIATION_TYPE_NON_PRIMARY association = db_api.peer_group_association_create( context, peer_group_id, system_peer_id, peer_group_priority, - sync_status) + association_type, consts.ASSOCIATION_SYNC_STATUS_SYNCING) - if sync_enabled: + if is_primary: # Sync the subcloud peer group to peer site self.rpc_client.sync_subcloud_peer_group(context, association.id) else: @@ -237,6 +250,103 @@ class PeerGroupAssociationsController(restcomm.GenericPathController): pecan.abort(httpclient.INTERNAL_SERVER_ERROR, _('Unable to create peer group association')) + def _sync_association(self, context, association, is_non_primary): + if is_non_primary: + self.rpc_client.peer_monitor_notify(context) + pecan.abort(httpclient.BAD_REQUEST, + _('Peer Group Association sync is not allowed ' + 'when the association type is non-primary. But the ' + 'peer monitor notify was triggered.')) + else: + peer_group = db_api.subcloud_peer_group_get( + context, association.peer_group_id) + if not self._validate_peer_group_leader_id(peer_group. + system_leader_id): + pecan.abort(httpclient.BAD_REQUEST, + _('Peer Group Association sync is not allowed when ' + 'the subcloud peer group system_leader_id is not ' + 'the current system controller UUID.')) + try: + # Sync the subcloud peer group to peer site + self.rpc_client.sync_subcloud_peer_group(context, + association.id) + association = db_api.peer_group_association_update( + context, id=association.id, + sync_status=consts.ASSOCIATION_SYNC_STATUS_SYNCING, + sync_message='None') + return db_api.peer_group_association_db_model_to_dict( + association) + except RemoteError as e: + pecan.abort(httpclient.UNPROCESSABLE_ENTITY, e.value) + except Exception as e: + # additional exceptions. + LOG.exception(e) + pecan.abort(httpclient.INTERNAL_SERVER_ERROR, + _('Unable to sync peer group association')) + + def _update_association(self, context, association, is_non_primary): + payload = self._get_payload(request) + if not payload: + pecan.abort(httpclient.BAD_REQUEST, _('Body required')) + + peer_group_priority = payload.get('peer_group_priority') + sync_status = payload.get('sync_status') + # Check value is not None or empty before calling validate + if not (peer_group_priority is not None or sync_status): + pecan.abort(httpclient.BAD_REQUEST, _('nothing to update')) + elif peer_group_priority is not None and sync_status: + pecan.abort(httpclient.BAD_REQUEST, + _('peer_group_priority and sync_status cannot be ' + 'updated at the same time.')) + if peer_group_priority is not None: + if not self._validate_peer_group_priority(peer_group_priority): + pecan.abort(httpclient.BAD_REQUEST, + _('Invalid peer_group_priority')) + if is_non_primary: + self.rpc_client.peer_monitor_notify(context) + pecan.abort(httpclient.BAD_REQUEST, + _('Peer Group Association peer_group_priority is ' + 'not allowed to update when the association type ' + 'is non-primary.')) + else: + db_api.peer_group_association_update( + context, id=association.id, + peer_group_priority=peer_group_priority) + if sync_status: + if not self._validate_sync_status(sync_status): + pecan.abort(httpclient.BAD_REQUEST, + _('Invalid sync_status')) + + if not is_non_primary: + self.rpc_client.peer_monitor_notify(context) + pecan.abort(httpclient.BAD_REQUEST, + _('Peer Group Association sync_status is not ' + 'allowed to update when the association type is ' + 'primary.')) + else: + sync_message = 'Primary association sync to current site ' + \ + 'failed.' if sync_status == \ + consts.ASSOCIATION_SYNC_STATUS_FAILED else 'None' + association = db_api.peer_group_association_update( + context, id=association.id, sync_status=sync_status, + sync_message=sync_message) + self.rpc_client.peer_monitor_notify(context) + return db_api.peer_group_association_db_model_to_dict( + association) + + try: + # Ask dcmanager-manager to update the subcloud peer group priority + # to peer site. It will do the real work... + return self.rpc_client.update_subcloud_peer_group(context, + association.id) + except RemoteError as e: + pecan.abort(httpclient.UNPROCESSABLE_ENTITY, e.value) + except Exception as e: + # additional exceptions. + LOG.exception(e) + pecan.abort(httpclient.INTERNAL_SERVER_ERROR, + _('Unable to update peer group association')) + @index.when(method='PATCH', template='json') def patch(self, association_id, sync=False): """Update a peer group association. @@ -262,64 +372,13 @@ class PeerGroupAssociationsController(restcomm.GenericPathController): pecan.abort(httpclient.NOT_FOUND, _('Peer Group Association not found')) - sync_disabled = association.sync_status == consts.\ - ASSOCIATION_SYNC_STATUS_DISABLED - if sync_disabled: - pecan.abort(httpclient.BAD_REQUEST, - _('Peer Group Association sync or update is not allowed' - ' when the sync_status is disabled.')) + is_non_primary = association.association_type == consts.\ + ASSOCIATION_TYPE_NON_PRIMARY if sync: - peer_group = db_api.subcloud_peer_group_get( - context, association.peer_group_id) - if not self._validate_peer_group_leader_id(peer_group. - system_leader_id): - pecan.abort(httpclient.BAD_REQUEST, - _('Peer Group Association sync is not allowed when ' - 'the subcloud peer group system_leader_id is not ' - 'the current system controller UUID.')) - try: - # Sync the subcloud peer group to peer site - self.rpc_client.sync_subcloud_peer_group(context, - association.id) - association = db_api.peer_group_association_update( - context, id=association_id, - sync_status=consts.ASSOCIATION_SYNC_STATUS_SYNCING, - sync_message='None') - return db_api.peer_group_association_db_model_to_dict( - association) - except RemoteError as e: - pecan.abort(httpclient.UNPROCESSABLE_ENTITY, e.value) - except Exception as e: - # additional exceptions. - LOG.exception(e) - pecan.abort(httpclient.INTERNAL_SERVER_ERROR, - _('Unable to sync peer group association')) - - payload = self._get_payload(request) - if not payload: - pecan.abort(httpclient.BAD_REQUEST, _('Body required')) - - peer_group_priority = payload.get('peer_group_priority') - # Check value is not None or empty before calling validate - if not peer_group_priority: - pecan.abort(httpclient.BAD_REQUEST, _('nothing to update')) - if not self._validate_peer_group_priority(peer_group_priority): - pecan.abort(httpclient.BAD_REQUEST, - _('Invalid peer_group_priority')) - - try: - # Ask dcmanager-manager to update the subcloud peer group priority - # to peer site. It will do the real work... - return self.rpc_client.update_subcloud_peer_group( - context, association.id, peer_group_priority) - except RemoteError as e: - pecan.abort(httpclient.UNPROCESSABLE_ENTITY, e.value) - except Exception as e: - # additional exceptions. - LOG.exception(e) - pecan.abort(httpclient.INTERNAL_SERVER_ERROR, - _('Unable to update peer group association')) + return self._sync_association(context, association, is_non_primary) + else: + return self._update_association(context, association, is_non_primary) @index.when(method='delete', template='json') def delete(self, association_id): @@ -342,17 +401,18 @@ class PeerGroupAssociationsController(restcomm.GenericPathController): try: association = db_api.peer_group_association_get(context, association_id) - sync_disabled = association.sync_status == consts.\ - ASSOCIATION_SYNC_STATUS_DISABLED - if sync_disabled: + is_non_primary = association.association_type == consts.\ + ASSOCIATION_TYPE_NON_PRIMARY + if is_non_primary: result = db_api.peer_group_association_destroy(context, association_id) self.rpc_client.peer_monitor_notify(context) return result - # Ask system-peer-manager to delete the association. - # It will do all the real work... - return self.rpc_client.delete_peer_group_association( - context, association_id) + else: + # Ask system-peer-manager to delete the association. + # It will do all the real work... + return self.rpc_client.delete_peer_group_association( + context, association_id) except exception.PeerGroupAssociationNotFound: pecan.abort(httpclient.NOT_FOUND, _('Peer Group Association not found')) diff --git a/distributedcloud/dcmanager/api/controllers/v1/subcloud_peer_group.py b/distributedcloud/dcmanager/api/controllers/v1/subcloud_peer_group.py index ad7769466..2116da781 100644 --- a/distributedcloud/dcmanager/api/controllers/v1/subcloud_peer_group.py +++ b/distributedcloud/dcmanager/api/controllers/v1/subcloud_peer_group.py @@ -539,10 +539,11 @@ class SubcloudPeerGroupsController(restcomm.GenericPathController): LOG.info("Subcloud Peer Group [%s] not found" % group_ref) pecan.abort(httpclient.NOT_FOUND, _('Subcloud Peer Group not found')) - LOG.info("Handling delete subcloud peer group request for: %s" % group) + LOG.info("Handling delete subcloud peer group request for: %s" % + group) # A peer group cannot be deleted if it is used by any associations - association = db_api.peer_group_association_get_by_peer_group_id(context, - group.id) + association = db_api.peer_group_association_get_by_peer_group_id( + context, group.id) if len(association) > 0: pecan.abort(httpclient.BAD_REQUEST, _("Cannot delete a peer group " diff --git a/distributedcloud/dcmanager/api/controllers/v1/subclouds.py b/distributedcloud/dcmanager/api/controllers/v1/subclouds.py index b0b413450..6ada99d73 100644 --- a/distributedcloud/dcmanager/api/controllers/v1/subclouds.py +++ b/distributedcloud/dcmanager/api/controllers/v1/subclouds.py @@ -556,7 +556,7 @@ class SubcloudsController(object): # Ask dcmanager-manager to add the subcloud. # It will do all the real work... # If the subcloud is secondary, it will be synchronous operation. - # A normal subcloud add will be a synchronous operation. + # A normal subcloud add will be asynchronous operation. if 'secondary' in payload: self.dcmanager_rpc_client.add_secondary_subcloud( context, subcloud.id, payload) diff --git a/distributedcloud/dcmanager/api/controllers/v1/system_peers.py b/distributedcloud/dcmanager/api/controllers/v1/system_peers.py old mode 100755 new mode 100644 diff --git a/distributedcloud/dcmanager/common/consts.py b/distributedcloud/dcmanager/common/consts.py index 9dcabd06d..72f47ce68 100644 --- a/distributedcloud/dcmanager/common/consts.py +++ b/distributedcloud/dcmanager/common/consts.py @@ -466,11 +466,15 @@ PEER_GROUP_MIGRATING = 'migrating' PEER_GROUP_MIGRATION_COMPLETE = 'complete' PEER_GROUP_MIGRATION_NONE = 'none' +# Peer group association type +ASSOCIATION_TYPE_PRIMARY = 'primary' +ASSOCIATION_TYPE_NON_PRIMARY = 'non-primary' + # Peer group association sync status ASSOCIATION_SYNC_STATUS_SYNCING = 'syncing' -ASSOCIATION_SYNC_STATUS_SYNCED = 'synced' +ASSOCIATION_SYNC_STATUS_IN_SYNC = 'in-sync' ASSOCIATION_SYNC_STATUS_FAILED = 'failed' -ASSOCIATION_SYNC_STATUS_DISABLED = 'disabled' +ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC = 'out-of-sync' # Peer monitor heartbeat policy HEARTBEAT_FAILURE_POLICY_ALARM = 'alarm' diff --git a/distributedcloud/dcmanager/db/api.py b/distributedcloud/dcmanager/db/api.py index 457550786..4d651b81d 100644 --- a/distributedcloud/dcmanager/db/api.py +++ b/distributedcloud/dcmanager/db/api.py @@ -555,6 +555,7 @@ def peer_group_association_db_model_to_dict(peer_group_association): "peer-group-id": peer_group_association.peer_group_id, "system-peer-id": peer_group_association.system_peer_id, "peer-group-priority": peer_group_association.peer_group_priority, + "association-type": peer_group_association.association_type, "sync-status": peer_group_association.sync_status, "sync-message": peer_group_association.sync_message, "created-at": peer_group_association.created_at, @@ -563,13 +564,14 @@ def peer_group_association_db_model_to_dict(peer_group_association): def peer_group_association_create(context, peer_group_id, system_peer_id, - peer_group_priority, sync_status=None, - sync_message=None): + peer_group_priority, association_type=None, + sync_status=None, sync_message=None): """Create a peer_group_association.""" return IMPL.peer_group_association_create(context, peer_group_id, system_peer_id, peer_group_priority, + association_type, sync_status, sync_message) diff --git a/distributedcloud/dcmanager/db/sqlalchemy/api.py b/distributedcloud/dcmanager/db/sqlalchemy/api.py index 69d6b8004..f98676985 100644 --- a/distributedcloud/dcmanager/db/sqlalchemy/api.py +++ b/distributedcloud/dcmanager/db/sqlalchemy/api.py @@ -1241,6 +1241,7 @@ def peer_group_association_create(context, peer_group_id, system_peer_id, peer_group_priority, + association_type, sync_status, sync_message): with write_session() as session: @@ -1248,6 +1249,7 @@ def peer_group_association_create(context, peer_group_association_ref.peer_group_id = peer_group_id peer_group_association_ref.system_peer_id = system_peer_id peer_group_association_ref.peer_group_priority = peer_group_priority + peer_group_association_ref.association_type = association_type peer_group_association_ref.sync_status = sync_status peer_group_association_ref.sync_message = sync_message session.add(peer_group_association_ref) diff --git a/distributedcloud/dcmanager/db/sqlalchemy/migrate_repo/versions/014_add_subcloud_peer_group_and_association.py b/distributedcloud/dcmanager/db/sqlalchemy/migrate_repo/versions/014_add_subcloud_peer_group_and_association.py index 535afffa8..9d794e969 100644 --- a/distributedcloud/dcmanager/db/sqlalchemy/migrate_repo/versions/014_add_subcloud_peer_group_and_association.py +++ b/distributedcloud/dcmanager/db/sqlalchemy/migrate_repo/versions/014_add_subcloud_peer_group_and_association.py @@ -87,6 +87,7 @@ def upgrade(migrate_engine): sqlalchemy.ForeignKey('system_peer.id', ondelete='CASCADE')), sqlalchemy.Column('peer_group_priority', sqlalchemy.Integer), + sqlalchemy.Column('association_type', sqlalchemy.String(255)), sqlalchemy.Column('sync_status', sqlalchemy.String(255)), sqlalchemy.Column('sync_message', sqlalchemy.Text), sqlalchemy.Column('reserved_1', sqlalchemy.Text), diff --git a/distributedcloud/dcmanager/db/sqlalchemy/models.py b/distributedcloud/dcmanager/db/sqlalchemy/models.py index 3ab05d375..e637b31ed 100644 --- a/distributedcloud/dcmanager/db/sqlalchemy/models.py +++ b/distributedcloud/dcmanager/db/sqlalchemy/models.py @@ -155,6 +155,7 @@ class PeerGroupAssociation(BASE, DCManagerBase): id = Column(Integer, primary_key=True, autoincrement=True, nullable=False) peer_group_id = Column(Integer) system_peer_id = Column(Integer) + association_type = Column(String(255)) peer_group_priority = Column(Integer) sync_status = Column(String(255)) sync_message = Column(Text()) diff --git a/distributedcloud/dcmanager/manager/peer_group_audit_manager.py b/distributedcloud/dcmanager/manager/peer_group_audit_manager.py index 28377a5bc..f370a5270 100644 --- a/distributedcloud/dcmanager/manager/peer_group_audit_manager.py +++ b/distributedcloud/dcmanager/manager/peer_group_audit_manager.py @@ -156,11 +156,12 @@ class PeerGroupAuditManager(manager.Manager): raise e self.require_audit_flag = False - # if remote subcloud peer group's migration_status is 'complete', - # Get remote subclouds, for 'managed+online' subclouds, - # set 'unmanaged+secondary' to local on same subclouds + # if remote subcloud peer group's migration_status is 'complete' + # or self.require_audit_flag is True, get remote subclouds. + # For 'managed+online' subclouds, set 'unmanaged+secondary' to + # local on same subclouds elif remote_peer_group.get("migration_status") == \ - consts.PEER_GROUP_MIGRATION_COMPLETE: + consts.PEER_GROUP_MIGRATION_COMPLETE or self.require_audit_flag: remote_subclouds = \ self._get_subclouds_by_peer_group_from_system_peer( system_peer, diff --git a/distributedcloud/dcmanager/manager/peer_monitor_manager.py b/distributedcloud/dcmanager/manager/peer_monitor_manager.py index 44d4365d2..0f836280c 100644 --- a/distributedcloud/dcmanager/manager/peer_monitor_manager.py +++ b/distributedcloud/dcmanager/manager/peer_monitor_manager.py @@ -216,6 +216,7 @@ class PeerMonitor(object): pgam.PeerGroupAuditManager(self.subcloud_manager, peer_group_id) self.peer_group_id_set = peer_group_id_set + self._set_require_audit_flag_to_associated_peer_groups() def start(self): if self.thread is not None: diff --git a/distributedcloud/dcmanager/manager/service.py b/distributedcloud/dcmanager/manager/service.py index 4afa2b7be..ae58c19b5 100644 --- a/distributedcloud/dcmanager/manager/service.py +++ b/distributedcloud/dcmanager/manager/service.py @@ -115,6 +115,7 @@ class DCManagerService(service.Service): os.makedirs(dccommon_consts.ANSIBLE_OVERRIDES_PATH, 0o600, exist_ok=True) self.subcloud_manager.handle_subcloud_operations_in_progress() + self.system_peer_manager.handle_association_operations_in_progress() # Send notify to peer monitor. self.peer_monitor_manager.peer_monitor_notify(self.context) @@ -332,11 +333,11 @@ class DCManagerService(service.Service): @request_context def sync_subcloud_peer_group(self, context, association_id, - sync_subclouds=True, priority=None): + sync_subclouds=True): LOG.info("Handling sync_subcloud_peer_group request for: %s", association_id) return self.system_peer_manager.sync_subcloud_peer_group( - context, association_id, sync_subclouds, priority) + context, association_id, sync_subclouds) @request_context def delete_peer_group_association(self, context, association_id): diff --git a/distributedcloud/dcmanager/manager/system_peer_manager.py b/distributedcloud/dcmanager/manager/system_peer_manager.py index f3e7988e5..ce3afdd13 100644 --- a/distributedcloud/dcmanager/manager/system_peer_manager.py +++ b/distributedcloud/dcmanager/manager/system_peer_manager.py @@ -19,9 +19,11 @@ from dccommon.drivers.openstack.peer_site import PeerSiteDriver from dccommon.drivers.openstack.sysinv_v1 import SysinvClient from dccommon import exceptions as dccommon_exceptions from dcmanager.common import consts +from dcmanager.common import context as dcmanager_context from dcmanager.common import exceptions from dcmanager.common.i18n import _ from dcmanager.common import manager +from dcmanager.common import utils from dcmanager.db import api as db_api LOG = logging.getLogger(__name__) @@ -32,12 +34,17 @@ MAX_PARALLEL_SUBCLOUD_DELETE = 10 VERIFY_SUBCLOUD_SYNC_VALID = 'valid' VERIFY_SUBCLOUD_SYNC_IGNORE = 'ignore' +TRANSITORY_STATES = { + consts.ASSOCIATION_SYNC_STATUS_SYNCING: consts.ASSOCIATION_SYNC_STATUS_FAILED +} + class SystemPeerManager(manager.Manager): """Manages tasks related to system peers.""" def __init__(self, peer_monitor_manager, *args, **kwargs): LOG.debug(_('SystemPeerManager initialization...')) + self.context = dcmanager_context.get_admin_context() self.peer_monitor_manager = peer_monitor_manager super(SystemPeerManager, self).__init__( service_name="system_peer_manager", *args, **kwargs) @@ -192,7 +199,7 @@ class SystemPeerManager(manager.Manager): files, data) LOG.info(f"Updated Subcloud {dc_peer_subcloud.get('name')} " "(region_name: " - f"{dc_peer_subcloud.get('region_name')}) on peer " + f"{dc_peer_subcloud.get('region-name')}) on peer " "site.") else: # Create subcloud on peer site if not exist @@ -200,15 +207,18 @@ class SystemPeerManager(manager.Manager): add_subcloud_with_secondary_status(files, data) LOG.info(f"Created Subcloud {dc_peer_subcloud.get('name')} " "(region_name: " - f"{dc_peer_subcloud.get('region_name')}) on peer " + f"{dc_peer_subcloud.get('region-name')}) on peer " "site.") LOG.debug(f"Updating subcloud {subcloud_name} (region_name: " f"{region_name}) with subcloud peer group id " f"{dc_peer_pg_id} on peer site.") - # Update subcloud associated peer group on peer site + # Update subcloud associated peer group on peer site. + # The peer_group update will check the header and should + # use the region_name as subcloud_ref. peer_subcloud = dc_client.update_subcloud( - subcloud_name, files=None, data={"peer_group": - str(dc_peer_pg_id)}) + dc_peer_subcloud.get('region-name'), files=None, + data={"peer_group": str(dc_peer_pg_id)}, + is_region_name=True) # Need to check the subcloud only in secondary, otherwise it # should be recorded as a failure. @@ -382,14 +392,73 @@ class SystemPeerManager(manager.Manager): return error_msg + def _update_sync_status(self, context, association_id, sync_status, + sync_message, dc_peer_association_id=None, + dc_client=None, **kwargs): + """Update sync status of association.""" + if dc_peer_association_id is not None: + if dc_client is None: + association = db_api.peer_group_association_get(context, + association_id) + peer = db_api.system_peer_get(context, + association.system_peer_id) + dc_client = self.get_peer_dc_client(peer) + dc_client.update_peer_group_association_sync_status( + dc_peer_association_id, sync_status) + LOG.info(f"Updated non-primary Peer Group Association " + f"{dc_peer_association_id} sync_status to {sync_status}.") + return db_api.peer_group_association_update( + context, association_id, sync_status=sync_status, + sync_message=sync_message, **kwargs) + + def _update_sync_status_to_failed(self, context, association_id, + failed_message, + dc_peer_association_id=None): + """Update sync status to failed.""" + return self._update_sync_status(context, association_id, + consts.ASSOCIATION_SYNC_STATUS_FAILED, + failed_message, + dc_peer_association_id) + + def _get_non_primary_association(self, dc_client, dc_peer_system_peer_id, + dc_peer_pg_id): + """Get non-primary Association from peer site.""" + try: + return dc_client.get_peer_group_association_with_peer_id_and_pg_id( + dc_peer_system_peer_id, dc_peer_pg_id) + except dccommon_exceptions.PeerGroupAssociationNotFound: + LOG.error(f"Peer Group association does not exist on peer site." + f"Peer Group ID: {dc_peer_pg_id}, Peer System Peer ID: " + f"{dc_peer_system_peer_id}") + return None + + def _get_peer_site_pg_by_name(self, dc_client, peer_group_name): + """Get remote Peer Group from peer site by name.""" + try: + return dc_client.get_subcloud_peer_group(peer_group_name) + except dccommon_exceptions.SubcloudPeerGroupNotFound: + LOG.error(f"Peer Group {peer_group_name} does not exist on peer " + f"site.") + return None + + def _get_peer_site_system_peer(self, dc_client, peer_uuid=None): + """Get System Peer from peer site.""" + try: + peer_uuid = peer_uuid if peer_uuid is not None else \ + utils.get_local_system().uuid + return dc_client.get_system_peer(peer_uuid) + except dccommon_exceptions.SystemPeerNotFound: + LOG.error(f"Peer Site System Peer {peer_uuid} does not exist.") + return None + def sync_subcloud_peer_group(self, context, association_id, - sync_subclouds=True, priority=None): + sync_subclouds=True): """Sync subcloud peer group to peer site. This function synchronizes subcloud peer groups from current site to peer site, supporting two scenarios: - 1. When creating an association between the system peer and a subcloud + 1. When creating the association between the system peer and a subcloud peer group. This function creates the subcloud peer group on the peer site and synchronizes the subclouds to it. @@ -411,13 +480,12 @@ class SystemPeerManager(manager.Manager): dc_local_pg = db_api.subcloud_peer_group_get(context, association.peer_group_id) peer_group_name = dc_local_pg.peer_group_name + dc_peer_association_id = None try: - sysinv_client = self.get_peer_sysinv_client(peer) - # Check if the system_uuid of the peer site matches with the # peer_uuid - system = sysinv_client.get_system() + system = self.get_peer_sysinv_client(peer).get_system() if system.uuid != peer.peer_uuid: LOG.error(f"Peer site system uuid {system.uuid} does not match " f"with the peer_uuid {peer.peer_uuid}") @@ -426,6 +494,65 @@ class SystemPeerManager(manager.Manager): dc_client = self.get_peer_dc_client(peer) + # Get current site system information + local_system_uuid = utils.get_local_system().uuid + + # Get peer site system peer + dc_peer_system_peer = self._get_peer_site_system_peer( + dc_client, local_system_uuid) + + if dc_peer_system_peer is None: + failed_message = f"System Peer {local_system_uuid} does not" + \ + " exist on peer site." + return db_api.peer_group_association_db_model_to_dict( + self._update_sync_status_to_failed(context, association_id, + failed_message)) + dc_peer_system_peer_id = dc_peer_system_peer.get('id') + + # Get peer site peer group, create if not exist + dc_peer_pg = self._get_peer_site_pg_by_name(dc_client, + peer_group_name) + if dc_peer_pg is None: + peer_group_kwargs = { + 'group-priority': association.peer_group_priority, + 'group-state': dc_local_pg.group_state, + 'system-leader-id': dc_local_pg.system_leader_id, + 'system-leader-name': dc_local_pg.system_leader_name, + 'max-subcloud-rehoming': dc_local_pg.max_subcloud_rehoming + } + peer_group_kwargs['peer-group-name'] = peer_group_name + dc_peer_pg = dc_client.add_subcloud_peer_group( + **peer_group_kwargs) + LOG.info(f"Created Subcloud Peer Group {peer_group_name} on " + f"peer site. ID is {dc_peer_pg.get('id')}.") + dc_peer_pg_id = dc_peer_pg.get('id') + dc_peer_pg_priority = dc_peer_pg.get('group_priority') + # Check if the peer group priority is 0, if so, raise exception + if dc_peer_pg_priority == 0: + LOG.error(f"Skip update. Peer Site {peer_group_name} " + f"has priority 0.") + raise exceptions.SubcloudPeerGroupHasWrongPriority( + priority=dc_peer_pg_priority) + + # Get peer site non-primary association, create if not exist + dc_peer_association = self._get_non_primary_association( + dc_client, dc_peer_system_peer_id, dc_peer_pg_id) + if dc_peer_association is None: + non_primary_association_kwargs = { + 'peer_group_id': dc_peer_pg_id, + 'system_peer_id': dc_peer_system_peer_id + } + dc_peer_association = dc_client.add_peer_group_association( + **non_primary_association_kwargs) + LOG.info(f"Created \"non-primary\" Peer Group Association " + f"{dc_peer_association.get('id')} on peer site.") + dc_peer_association_id = dc_peer_association.get("id") + + # Update peer group association sync status to syncing + dc_client.update_peer_group_association_sync_status( + dc_peer_association_id, consts.ASSOCIATION_SYNC_STATUS_SYNCING) + + # Update peer group on peer site peer_group_kwargs = { 'group-priority': association.peer_group_priority, 'group-state': dc_local_pg.group_state, @@ -433,34 +560,16 @@ class SystemPeerManager(manager.Manager): 'system-leader-name': dc_local_pg.system_leader_name, 'max-subcloud-rehoming': dc_local_pg.max_subcloud_rehoming } - if priority: - peer_group_kwargs['group-priority'] = priority - try: - dc_peer_pg = dc_client.get_subcloud_peer_group( - peer_group_name) - dc_peer_pg_id = dc_peer_pg.get('id') - dc_peer_pg_priority = dc_peer_pg.get('group_priority') - if dc_peer_pg_priority == 0: - LOG.error(f"Skip update. Peer Site {peer_group_name} " - f"has priority 0.") - raise exceptions.SubcloudPeerGroupHasWrongPriority( - priority=dc_peer_pg_priority) - - dc_peer_pg = dc_client.update_subcloud_peer_group( - peer_group_name, **peer_group_kwargs) - LOG.info(f"Updated Subcloud Peer Group {peer_group_name} on " - f"peer site, ID is {dc_peer_pg.get('id')}.") - except dccommon_exceptions.SubcloudPeerGroupNotFound: - peer_group_kwargs['peer-group-name'] = peer_group_name - dc_peer_pg = dc_client.add_subcloud_peer_group( - **peer_group_kwargs) - dc_peer_pg_id = dc_peer_pg.get('id') - LOG.info(f"Created Subcloud Peer Group {peer_group_name} on " - f"peer site, ID is {dc_peer_pg_id}.") + dc_peer_pg = dc_client.update_subcloud_peer_group( + peer_group_name, **peer_group_kwargs) + LOG.info(f"Updated Subcloud Peer Group {peer_group_name} on " + f"peer site, ID is {dc_peer_pg.get('id')}.") association_update = { - 'sync_status': consts.ASSOCIATION_SYNC_STATUS_SYNCED, - 'sync_message': None + 'sync_status': consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, + 'sync_message': 'None', + 'dc_peer_association_id': dc_peer_association_id, + 'dc_client': dc_client } if sync_subclouds: error_msg = self._sync_subclouds(context, peer, dc_local_pg.id, @@ -469,9 +578,7 @@ class SystemPeerManager(manager.Manager): association_update['sync_status'] = \ consts.ASSOCIATION_SYNC_STATUS_FAILED association_update['sync_message'] = json.dumps(error_msg) - if priority: - association_update['peer_group_priority'] = priority - association = db_api.peer_group_association_update( + association = self._update_sync_status( context, association_id, **association_update) self.peer_monitor_manager.peer_monitor_notify(context) @@ -481,12 +588,17 @@ class SystemPeerManager(manager.Manager): except Exception as exception: LOG.exception(f"Failed to sync peer group {peer_group_name} to " f"peer site {peer.peer_name}") - db_api.peer_group_association_update( - context, association_id, - sync_status=consts.ASSOCIATION_SYNC_STATUS_FAILED, - sync_message=str(exception)) + self._update_sync_status_to_failed(context, association_id, + str(exception), + dc_peer_association_id) raise exception + def _delete_primary_association(self, context, association_id): + """Delete primary peer group association.""" + result = db_api.peer_group_association_destroy(context, association_id) + self.peer_monitor_manager.peer_monitor_notify(context) + return result + def delete_peer_group_association(self, context, association_id): """Delete association and remove related association from peer site. @@ -499,25 +611,52 @@ class SystemPeerManager(manager.Manager): association = db_api.peer_group_association_get(context, association_id) peer = db_api.system_peer_get(context, association.system_peer_id) - peer_group = db_api.subcloud_peer_group_get(context, - association.peer_group_id) + dc_local_pg = db_api.subcloud_peer_group_get(context, + association.peer_group_id) + peer_group_name = dc_local_pg.peer_group_name try: + # Check if the system_uuid of the peer site matches with the + # peer_uuid + system = self.get_peer_sysinv_client(peer).get_system() + if system.uuid != peer.peer_uuid: + LOG.warning(f"Peer site system uuid {system.uuid} does not " + f"match with the peer_uuid {peer.peer_uuid}") + return self._delete_primary_association(context, association_id) + dc_client = self.get_peer_dc_client(peer) - dc_peer_pg = dc_client.get_subcloud_peer_group( - peer_group.peer_group_name) + + # Get current site system information + local_system_uuid = utils.get_local_system().uuid + + # Get peer site system peer + dc_peer_system_peer = self._get_peer_site_system_peer( + dc_client, local_system_uuid) + + # Get peer site peer group + dc_peer_pg = self._get_peer_site_pg_by_name(dc_client, + peer_group_name) + + if dc_peer_pg is None: + # peer group does not exist on peer site, the association should + # be deleted + LOG.warning(f"Subcloud Peer Group {peer_group_name} does " + f"not exist on peer site.") + return self._delete_primary_association(context, association_id) + + dc_peer_pg_id = dc_peer_pg.get('id') dc_peer_pg_priority = dc_peer_pg.get('group_priority') + # Check if the peer group priority is 0, if so, raise exception if dc_peer_pg_priority == 0: - LOG.error(f"Failed to delete peer_group_association. " - f"Peer Group {peer_group.peer_group_name} " - f"has priority 0 on peer site.") + LOG.error(f"Failed to delete peer_group_association. Peer Group" + f" {peer_group_name} has priority 0 on peer site.") raise exceptions.SubcloudPeerGroupHasWrongPriority( priority=dc_peer_pg_priority) # Use thread pool to limit number of operations in parallel delete_pool = greenpool.GreenPool(size=MAX_PARALLEL_SUBCLOUD_DELETE) subclouds = db_api.subcloud_get_for_peer_group(context, - peer_group.id) + dc_local_pg.id) # Spawn threads to delete each subcloud clean_function = functools.partial(self._delete_subcloud, dc_client) @@ -525,28 +664,78 @@ class SystemPeerManager(manager.Manager): 'peer subcloud clean', clean_function, delete_pool, subclouds) if delete_error_msg: - association = db_api.peer_group_association_update( - context, association_id, - sync_status=consts.ASSOCIATION_SYNC_STATUS_FAILED, - sync_message=json.dumps(delete_error_msg)) + self._update_sync_status_to_failed(context, association_id, + json.dumps(delete_error_msg)) return - try: - dc_client.delete_subcloud_peer_group(peer_group.peer_group_name) - LOG.info("Deleted Subcloud Peer Group " - f"{peer_group.peer_group_name} on peer site.") - except dccommon_exceptions.SubcloudPeerGroupNotFound: - LOG.debug(f"Subcloud Peer Group {peer_group.peer_group_name} " - "does not exist on peer site.") - except dccommon_exceptions.SubcloudPeerGroupDeleteFailedAssociated: - LOG.debug(f"Subcloud Peer Group {peer_group.peer_group_name} " - "delete failed as it is associated with system peer " - "on peer site.") + # System Peer does not exist on peer site, delete peer group + if dc_peer_system_peer is None: + try: + dc_client.delete_subcloud_peer_group(peer_group_name) + LOG.info(f"Deleted Subcloud Peer Group {peer_group_name} " + f"on peer site.") + except dccommon_exceptions.\ + SubcloudPeerGroupDeleteFailedAssociated: + LOG.error(f"Subcloud Peer Group {peer_group_name} " + "delete failed as it is associated with System " + "Peer on peer site.") + return self._delete_primary_association(context, association_id) + dc_peer_system_peer_id = dc_peer_system_peer.get('id') - db_api.peer_group_association_destroy(context, association_id) - self.peer_monitor_manager.peer_monitor_notify(context) + # Get peer site non-primary association + dc_peer_association = self._get_non_primary_association( + dc_client, dc_peer_system_peer_id, dc_peer_pg_id) + # Delete peer group association on peer site if exist + if dc_peer_association is not None: + dc_peer_association_id = dc_peer_association.get("id") + dc_client.delete_peer_group_association( + dc_peer_association_id) + elif dc_peer_association is None: + LOG.warning(f"PeerGroupAssociation does not exist on peer site." + f"Peer Group ID: {dc_peer_pg_id}, peer site System " + f"Peer ID: {dc_peer_system_peer_id}") + + try: + dc_client.delete_subcloud_peer_group(peer_group_name) + LOG.info("Deleted Subcloud Peer Group " + f"{peer_group_name} on peer site.") + except dccommon_exceptions.SubcloudPeerGroupDeleteFailedAssociated: + failed_message = f"Subcloud Peer Group {peer_group_name} " \ + + "delete failed as it is associated with system peer " \ + + "on peer site." + self._update_sync_status_to_failed(context, association_id, + failed_message) + LOG.error(failed_message) + raise + + return self._delete_primary_association(context, association_id) except Exception as exception: LOG.exception("Failed to delete peer_group_association " f"{association.id}") raise exception + + def handle_association_operations_in_progress(self): + """Identify associations in transitory stages and update association + + state to failure. + """ + + LOG.info('Identifying associations in transitory stages.') + + associations = db_api.peer_group_association_get_all(self.context) + + for association in associations: + # Identify associations in transitory states + new_sync_status = TRANSITORY_STATES.get(association.sync_status) + + # update syncing states to the corresponding failure states + if new_sync_status: + LOG.info(f"Changing association {association.id} sync status " + f"from {association.sync_status} to {new_sync_status}") + + db_api.peer_group_association_update( + self.context, + association.id, + sync_status=new_sync_status or association.sync_status, + sync_message="Service restart during syncing") diff --git a/distributedcloud/dcmanager/rpc/client.py b/distributedcloud/dcmanager/rpc/client.py index 91b300ce7..5491c3b9d 100644 --- a/distributedcloud/dcmanager/rpc/client.py +++ b/distributedcloud/dcmanager/rpc/client.py @@ -265,10 +265,10 @@ class ManagerClient(RPCClient): return self.cast(ctxt, self.make_msg( 'sync_subcloud_peer_group', association_id=association_id)) - def update_subcloud_peer_group(self, ctxt, association_id, priority): + def update_subcloud_peer_group(self, ctxt, association_id): return self.call(ctxt, self.make_msg( 'sync_subcloud_peer_group', association_id=association_id, - sync_subclouds=False, priority=priority)) + sync_subclouds=False)) def delete_peer_group_association(self, ctxt, association_id): return self.call(ctxt, self.make_msg('delete_peer_group_association', diff --git a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_peer_group_association.py b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_peer_group_association.py index a4b00f7c7..1fafabea2 100644 --- a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_peer_group_association.py +++ b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_peer_group_association.py @@ -48,6 +48,7 @@ SAMPLE_PEER_GROUP_PRIORITY = 1 SAMPLE_PEER_GROUP_PRIORITY_UPDATED = 99 SAMPLE_SYNC_STATUS = 'synced' SAMPLE_SYNC_MESSAGE = 'None' +SAMPLE_ASSOCIATION_TYPE = 'primary' class FakeSystem(object): @@ -142,7 +143,9 @@ class PeerGroupAssociationAPIMixin(APIMixin): 'peer_group_priority': kw.get('peer_group_priority', SAMPLE_PEER_GROUP_PRIORITY), 'sync_status': kw.get('sync_status', SAMPLE_SYNC_STATUS), - 'sync_message': kw.get('sync_message', SAMPLE_SYNC_MESSAGE) + 'sync_message': kw.get('sync_message', SAMPLE_SYNC_MESSAGE), + 'association_type': kw.get('association_type', + SAMPLE_ASSOCIATION_TYPE) } return association diff --git a/distributedcloud/dcmanager/tests/unit/manager/test_system_peer_manager.py b/distributedcloud/dcmanager/tests/unit/manager/test_system_peer_manager.py index b280b6b09..0ce0b4ffc 100644 --- a/distributedcloud/dcmanager/tests/unit/manager/test_system_peer_manager.py +++ b/distributedcloud/dcmanager/tests/unit/manager/test_system_peer_manager.py @@ -41,17 +41,19 @@ FAKE_SITE1_PEER_GROUP_ID = 9 # FAKE SUBCLOUD DATA (SITE1) FAKE_SITE1_SUBCLOUD1_ID = 11 +FAKE_SITE1_SUBCLOUD1_REGION_NAME = str(uuid.uuid4()) FAKE_SITE1_SUBCLOUD1_DEPLOY_STATUS = 'secondary' FAKE_SITE1_SUBCLOUD1_DATA = {"id": FAKE_SITE1_SUBCLOUD1_ID, "name": "subcloud1", - "region-name": "subcloud1", + "region-name": FAKE_SITE1_SUBCLOUD1_REGION_NAME, "deploy-status": FAKE_SITE1_SUBCLOUD1_DEPLOY_STATUS} FAKE_SITE1_SUBCLOUD2_ID = 12 +FAKE_SITE1_SUBCLOUD2_REGION_NAME = str(uuid.uuid4()) FAKE_SITE1_SUBCLOUD2_DEPLOY_STATUS = 'secondary-failed' FAKE_SITE1_SUBCLOUD2_DATA = {"id": FAKE_SITE1_SUBCLOUD2_ID, "name": "subcloud2", - "region-name": "subcloud2", + "region-name": FAKE_SITE1_SUBCLOUD2_REGION_NAME, "deploy-status": FAKE_SITE1_SUBCLOUD2_DEPLOY_STATUS} FAKE_SITE1_SUBCLOUD3_ID = 13 @@ -62,7 +64,7 @@ FAKE_SITE1_SUBCLOUD3_DATA = {"id": FAKE_SITE1_SUBCLOUD3_ID, "deploy-status": FAKE_SITE1_SUBCLOUD3_DEPLOY_STATUS} -# FAKE PEER GROUP ASSOCIATION DATA +# FAKE PEER GROUP ASSOCIATION DATA (SITE0) FAKE_ASSOCIATION_PEER_GROUP_ID = \ FAKE_SITE0_PEER_GROUP_ID FAKE_ASSOCIATION_SYSTEM_PEER_ID = \ @@ -70,6 +72,10 @@ FAKE_ASSOCIATION_SYSTEM_PEER_ID = \ FAKE_ASSOCIATION_PEER_GROUP_PRIORITY = 1 FAKE_ASSOCIATION_SYNC_STATUS = 'synced' FAKE_ASSOCIATION_SYNC_MESSAGE = 'None' +FAKE_ASSOCIATION_TYPE = 'primary' + +# FAKE PEER GROUP ASSOCIATION DATA (SITE1) +FAKE_SITE1_ASSOCIATION_ID = 10 class FakeDCManagerAuditAPI(object): @@ -172,7 +178,8 @@ class TestSystemPeerManager(base.DCManagerTestCase): "peer_group_id": FAKE_ASSOCIATION_PEER_GROUP_ID, "peer_group_priority": FAKE_ASSOCIATION_PEER_GROUP_PRIORITY, "sync_status": FAKE_ASSOCIATION_SYNC_STATUS, - "sync_message": FAKE_ASSOCIATION_SYNC_MESSAGE + "sync_message": FAKE_ASSOCIATION_SYNC_MESSAGE, + "association_type": FAKE_ASSOCIATION_TYPE } values.update(kwargs) return db_api.peer_group_association_create(ctxt, **values) @@ -193,6 +200,8 @@ class TestSystemPeerManager(base.DCManagerTestCase): mock_sysinv_client.return_value = FakeSysinvClient() mock_dc_client.return_value = FakeDcmanagerClient() mock_dc_client().add_subcloud_with_secondary_status = mock.MagicMock() + mock_dc_client().add_subcloud_with_secondary_status.return_value = { + "region-name": FAKE_SITE1_SUBCLOUD2_REGION_NAME} mock_dc_client().delete_subcloud = mock.MagicMock() peer = self.create_system_peer_static( @@ -247,8 +256,12 @@ class TestSystemPeerManager(base.DCManagerTestCase): ]) mock_dc_client().update_subcloud.assert_has_calls([ mock.call('subcloud1', mock.ANY, mock.ANY), - mock.call('subcloud1', files=None, - data={'peer_group': str(FAKE_SITE1_PEER_GROUP_ID)}) + mock.call(FAKE_SITE1_SUBCLOUD1_REGION_NAME, files=None, + data={'peer_group': str(FAKE_SITE1_PEER_GROUP_ID)}, + is_region_name=True), + mock.call(FAKE_SITE1_SUBCLOUD2_REGION_NAME, files=None, + data={'peer_group': str(FAKE_SITE1_PEER_GROUP_ID)}, + is_region_name=True) ]) mock_dc_client().add_subcloud_with_secondary_status. \ assert_called_once() @@ -256,6 +269,7 @@ class TestSystemPeerManager(base.DCManagerTestCase): @mock.patch.object( system_peer_manager.SystemPeerManager, '_sync_subclouds') + @mock.patch.object(system_peer_manager, 'utils') @mock.patch.object(system_peer_manager, 'PeerSiteDriver') @mock.patch.object(system_peer_manager, 'SysinvClient') @mock.patch.object(system_peer_manager, 'DcmanagerClient') @@ -263,6 +277,7 @@ class TestSystemPeerManager(base.DCManagerTestCase): mock_dc_client, mock_sysinv_client, mock_keystone_client, + mock_utils, mock_sync_subclouds): mock_sync_subclouds.return_value = True mock_keystone_client().keystone_client = FakeKeystoneClient() @@ -270,6 +285,12 @@ class TestSystemPeerManager(base.DCManagerTestCase): mock_dc_client.return_value = FakeDcmanagerClient() mock_dc_client().get_subcloud_peer_group = mock.MagicMock() mock_dc_client().update_subcloud_peer_group = mock.MagicMock() + mock_dc_client().get_system_peer = mock.MagicMock() + mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id = \ + mock.MagicMock() + mock_dc_client().update_peer_group_association_sync_status = \ + mock.MagicMock() + mock_utils().get_local_system = mock.MagicMock() peer = self.create_system_peer_static( self.ctx, @@ -291,12 +312,14 @@ class TestSystemPeerManager(base.DCManagerTestCase): @mock.patch.object( system_peer_manager.SystemPeerManager, '_sync_subclouds') + @mock.patch.object(system_peer_manager, 'utils') @mock.patch.object(system_peer_manager, 'PeerSiteDriver') @mock.patch.object(system_peer_manager, 'SysinvClient') @mock.patch.object(system_peer_manager, 'DcmanagerClient') def test_sync_subcloud_peer_group_not_exist(self, mock_dc_client, mock_sysinv_client, mock_keystone_client, + mock_utils, mock_sync_subclouds): mock_sync_subclouds.return_value = True mock_keystone_client().keystone_client = FakeKeystoneClient() @@ -305,6 +328,12 @@ class TestSystemPeerManager(base.DCManagerTestCase): mock_dc_client().get_subcloud_peer_group = mock.MagicMock() mock_dc_client().add_subcloud_peer_group = mock.MagicMock() mock_dc_client().update_subcloud_peer_group = mock.MagicMock() + mock_dc_client().get_system_peer = mock.MagicMock() + mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id = \ + mock.MagicMock() + mock_dc_client().update_peer_group_association_sync_status = \ + mock.MagicMock() + mock_utils().get_local_system = mock.MagicMock() peer = self.create_system_peer_static( self.ctx, @@ -333,17 +362,27 @@ class TestSystemPeerManager(base.DCManagerTestCase): 'system-leader-name': peer_group.system_leader_name, 'max-subcloud-rehoming': peer_group.max_subcloud_rehoming }) - mock_dc_client().update_subcloud_peer_group.assert_not_called() + @mock.patch.object(system_peer_manager, 'utils') @mock.patch.object(system_peer_manager, 'PeerSiteDriver') + @mock.patch.object(system_peer_manager, 'SysinvClient') @mock.patch.object(system_peer_manager, 'DcmanagerClient') def test_delete_peer_group_association(self, mock_dc_client, - mock_keystone_client): + mock_sysinv_client, + mock_keystone_client, + mock_utils): mock_keystone_client().keystone_client = FakeKeystoneClient() + mock_sysinv_client.return_value = FakeSysinvClient() mock_dc_client.return_value = FakeDcmanagerClient() mock_dc_client().delete_subcloud_peer_group = mock.MagicMock() mock_dc_client().delete_subcloud = mock.MagicMock() + mock_dc_client().get_subcloud_peer_group = mock.MagicMock() + mock_dc_client().get_system_peer = mock.MagicMock() + mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id = \ + mock.MagicMock() + mock_dc_client().delete_peer_group_association = mock.MagicMock() + mock_utils().get_local_system = mock.MagicMock() peer = self.create_system_peer_static( self.ctx, @@ -370,11 +409,8 @@ class TestSystemPeerManager(base.DCManagerTestCase): mock_dc_client().get_subcloud = mock.MagicMock() mock_dc_client().get_subcloud.side_effect = [ peer_subcloud1, peer_subcloud2] - - mock_dc_client().get_subcloud_peer_group = mock.MagicMock() - mock_dc_client().get_subcloud_peer_group.return_value = { - 'group_priority': 1 - } + mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id.\ + return_value = {'id': FAKE_SITE1_ASSOCIATION_ID} spm = system_peer_manager.SystemPeerManager(mock.MagicMock()) spm.delete_peer_group_association(self.ctx, association.id) @@ -385,6 +421,61 @@ class TestSystemPeerManager(base.DCManagerTestCase): ]) mock_dc_client().delete_subcloud_peer_group.assert_called_once_with( peer_group.peer_group_name) + mock_dc_client().delete_peer_group_association.assert_called_once_with( + FAKE_SITE1_ASSOCIATION_ID) + + associations = db_api.peer_group_association_get_all(self.ctx) + self.assertEqual(0, len(associations)) + + @mock.patch.object(system_peer_manager, 'utils') + @mock.patch.object(system_peer_manager, 'PeerSiteDriver') + @mock.patch.object(system_peer_manager, 'SysinvClient') + @mock.patch.object(system_peer_manager, 'DcmanagerClient') + def test_delete_peer_group_association_peer_site_association_not_exsit( + self, mock_dc_client, mock_sysinv_client, mock_keystone_client, + mock_utils): + mock_keystone_client().keystone_client = FakeKeystoneClient() + mock_sysinv_client.return_value = FakeSysinvClient() + mock_dc_client.return_value = FakeDcmanagerClient() + mock_dc_client().delete_subcloud_peer_group = mock.MagicMock() + mock_dc_client().delete_subcloud = mock.MagicMock() + mock_dc_client().get_subcloud_peer_group = mock.MagicMock() + mock_dc_client().get_system_peer = mock.MagicMock() + mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id = \ + mock.MagicMock() + mock_dc_client().delete_peer_group_association = mock.MagicMock() + mock_utils().get_local_system = mock.MagicMock() + + peer = self.create_system_peer_static( + self.ctx, + peer_name='SystemPeer1') + peer_group = self.create_subcloud_peer_group_static( + self.ctx, + peer_group_name='SubcloudPeerGroup1') + # Create local dc subcloud1 mock data in database + subcloud1 = self.create_subcloud_with_pg_static( + self.ctx, + peer_group_id=peer_group.id, + name='subcloud1') + association = self.create_peer_group_association_static( + self.ctx, + system_peer_id=peer.id, + peer_group_id=peer_group.id) + peer_subcloud1 = FAKE_SITE1_SUBCLOUD1_DATA + mock_dc_client().get_subcloud = mock.MagicMock() + mock_dc_client().get_subcloud.side_effect = [ + peer_subcloud1, dccommon_exceptions.SubcloudNotFound] + mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id.\ + side_effect = [dccommon_exceptions.PeerGroupAssociationNotFound] + + spm = system_peer_manager.SystemPeerManager(mock.MagicMock()) + spm.delete_peer_group_association(self.ctx, association.id) + + mock_dc_client().delete_subcloud.assert_has_calls([ + mock.call(subcloud1.name)]) + mock_dc_client().delete_subcloud_peer_group.assert_called_once_with( + peer_group.peer_group_name) + mock_dc_client().delete_peer_group_association.assert_not_called() associations = db_api.peer_group_association_get_all(self.ctx) self.assertEqual(0, len(associations))