Fix unable to determine the SPG sync state if one site is down

If Site1 (the local site) is down while setting up the protection
group, the subcloud peer group sync state is unable to determine.
This commit will automatically create the non-primary association on
Site2 (the peer site) when creating a primary association, and
update the sync state to the non-primary association. Then the
operator can check the sync state on Site2 if Site1 is down.

Test Plan:
- PASS: Create a primary association and check the non-primary
        association on peer site. It was created, and sync_status
        will follow the primary association's sync_status.
- PASS: Delete the primary association and check the non-primary
        association on peer site. It was deleted.
- PASS: If you restart the "dcmanager-manager service" in the local
        site while the association sync_status is in "syncing", the
        sync_status will transition to "failed".
- PASS: Create a primary association and wait for the sync_status
        change to "in-sync". Delete the subcloud peer group on peer
        site, the deletion will fail because it is associating to
        the non-primary association.

Closes-Bug: 2046809

Change-Id: Ia917d0dc7c65fbea1e222fb52dbec79fdbe65b65
Signed-off-by: Zhang Rong(Jon) <rong.zhang@windriver.com>
This commit is contained in:
Zhang Rong(Jon) 2023-12-19 15:53:06 +08:00
parent b273a32927
commit e422731760
24 changed files with 687 additions and 194 deletions

View File

@ -2333,7 +2333,7 @@ Response
- peer-controller-gateway-address: peer_controller_gateway_address
- administrative-state: administrative_state
- heartbeat-interval: heartbeat_interval
- heartbeat-failure-threshold: heartbeat_failure_threshold
- heartbeat-failure-threshold: heartbeat_failure_threshold
- heartbeat-failure-policy: heartbeat_failure_policy
- heartbeat-maintenance-timeout: heartbeat_maintenance_timeout
- created-at: created_at
@ -2377,7 +2377,7 @@ serviceUnavailable (503)
- peer_controller_gateway_address: peer_controller_gateway_address
- administrative_state: administrative_state
- heartbeat_interval: heartbeat_interval
- heartbeat_failure_threshold: heartbeat_failure_threshold
- heartbeat_failure_threshold: heartbeat_failure_threshold
- heartbeat_failure_policy: heartbeat_failure_policy
- heartbeat_maintenance_timeout: heartbeat_maintenance_timeout
@ -2400,7 +2400,7 @@ Request Example
- peer-controller-gateway-address: peer_controller_gateway_address
- administrative-state: administrative_state
- heartbeat-interval: heartbeat_interval
- heartbeat-failure-threshold: heartbeat_failure_threshold
- heartbeat-failure-threshold: heartbeat_failure_threshold
- heartbeat-failure-policy: heartbeat_failure_policy
- heartbeat-maintenance-timeout: heartbeat_maintenance_timeout
- created-at: created_at
@ -2449,7 +2449,7 @@ This operation does not accept a request body.
- peer-controller-gateway-address: peer_controller_gateway_address
- administrative-state: administrative_state
- heartbeat-interval: heartbeat_interval
- heartbeat-failure-threshold: heartbeat_failure_threshold
- heartbeat-failure-threshold: heartbeat_failure_threshold
- heartbeat-failure-policy: heartbeat_failure_policy
- heartbeat-maintenance-timeout: heartbeat_maintenance_timeout
- created-at: created_at
@ -2561,7 +2561,7 @@ serviceUnavailable (503)
- peer_controller_gateway_address: peer_controller_gateway_address
- administrative_state: administrative_state
- heartbeat_interval: heartbeat_interval
- heartbeat_failure_threshold: heartbeat_failure_threshold
- heartbeat_failure_threshold: heartbeat_failure_threshold
- heartbeat_failure_policy: heartbeat_failure_policy
- heartbeat_maintenance_timeout: heartbeat_maintenance_timeout
@ -2582,7 +2582,7 @@ Request Example
- peer-controller-gateway-address: peer_controller_gateway_address
- administrative-state: administrative_state
- heartbeat-interval: heartbeat_interval
- heartbeat-failure-threshold: heartbeat_failure_threshold
- heartbeat-failure-threshold: heartbeat_failure_threshold
- heartbeat-failure-policy: heartbeat_failure_policy
- heartbeat-maintenance-timeout: heartbeat_maintenance_timeout
- created-at: created_at
@ -2625,7 +2625,7 @@ Subcloud Peer Groups
Subcloud Peer Groups are logical groupings managed by a central System Controller.
It's a group of the current managed subclouds which are supposed to be duplicated
in a peer site as secondary subclouds
in a peer site as secondary subclouds
******************************
Lists all subcloud peer groups
@ -3040,6 +3040,7 @@ internalServerError (500), serviceUnavailable (503)
- peer-group-id: association_peer_group_id
- system-peer-id: system_peer_id
- peer-group-priority: association_peer_group_priority
- association-type: association_type
- sync-status: association_sync_status
- created-at: created_at
- updated-at: updated_at
@ -3089,6 +3090,7 @@ Request Example
- peer-group-id: association_peer_group_id
- system-peer-id: system_peer_id
- peer-group-priority: association_peer_group_priority
- association-type: association_type
- sync-status: association_sync_status
- sync-message: association_sync_message
- created-at: created_at
@ -3133,6 +3135,7 @@ This operation does not accept a request body.
- peer-group-id: association_peer_group_id
- system-peer-id: system_peer_id
- peer-group-priority: association_peer_group_priority
- association-type: association_type
- sync-status: association_sync_status
- sync-message: association_sync_message
- created-at: created_at
@ -3175,6 +3178,7 @@ internalServerError (500), serviceUnavailable (503)
- peer-group-id: association_peer_group_id
- system-peer-id: system_peer_id
- peer-group-priority: association_peer_group_priority
- association-type: association_type
- sync-status: association_sync_status
- sync-message: association_sync_message
- created-at: created_at
@ -3213,6 +3217,7 @@ serviceUnavailable (503)
- associate_id: peer_group_association_uri
- peer_group_priority: association_peer_group_priority
- sync-status: association_sync_status
Request Example
----------------
@ -3227,6 +3232,7 @@ Request Example
- peer-group-id: association_peer_group_id
- system-peer-id: system_peer_id
- peer-group-priority: association_peer_group_priority
- association-type: association_type
- sync-status: association_sync_status
- sync-message: association_sync_message
- created-at: created_at

View File

@ -108,6 +108,12 @@ association_sync_status:
in: body
required: true
type: string
association_type:
description: |
The type of association.
in: body
required: true
type: string
availability_status:
description: |
The availability status of the subcloud.

View File

@ -2,6 +2,7 @@
"id": 9,
"peer-group-id": 1,
"system-peer-id": 1,
"association-type": "primary",
"peer-group-priority": 1,
"sync-status": "synced",
"sync-message": null,

View File

@ -2,6 +2,7 @@
"id": 9,
"peer-group-id": 1,
"system-peer-id": 1,
"association-type": "primary",
"peer-group-priority": 99,
"sync-status": "synced",
"sync-message": null,

View File

@ -4,6 +4,7 @@
"id": 9,
"peer-group-id": 1,
"system-peer-id": 1,
"association-type": "primary",
"peer-group-priority": 1,
"sync-status": "synced",
"created-at": "2023-08-21 09:24:07.394961",

View File

@ -2,6 +2,7 @@
"id": 9,
"peer-group-id": 1,
"system-peer-id": 1,
"association-type": "primary",
"peer-group-priority": 1,
"sync-status": "syncing",
"sync-message": null,

View File

@ -33,6 +33,27 @@ class DcmanagerClient(base.DriverBase):
self.token = session.get_token()
self.timeout = timeout
def get_system_peer(self, system_peer_uuid):
"""Get system peer."""
if system_peer_uuid is None:
raise ValueError("system_peer_uuid is required.")
url = f"{self.endpoint}/system-peers/{system_peer_uuid}"
headers = {"X-Auth-Token": self.token}
response = requests.get(url, headers=headers, timeout=self.timeout)
if response.status_code == 200:
return response.json()
else:
if response.status_code == 404 and \
'System Peer not found' in response.text:
raise exceptions.SystemPeerNotFound(
system_peer=system_peer_uuid)
message = "Get SystemPeer: system_peer_uuid %s failed with RC: %d" \
% (system_peer_uuid, response.status_code)
LOG.error(message)
raise Exception(message)
def get_subcloud(self, subcloud_ref, is_region_name=False):
"""Get subcloud."""
if subcloud_ref is None:
@ -57,7 +78,7 @@ class DcmanagerClient(base.DriverBase):
def get_subcloud_list(self):
"""Get subcloud list."""
url = self.endpoint + '/subclouds'
url = f"{self.endpoint}/subclouds"
headers = {"X-Auth-Token": self.token}
response = requests.get(url, headers=headers, timeout=self.timeout)
@ -73,7 +94,7 @@ class DcmanagerClient(base.DriverBase):
def get_subcloud_group_list(self):
"""Get subcloud group list."""
url = self.endpoint + '/subcloud-groups'
url = f"{self.endpoint}/subcloud-groups"
headers = {"X-Auth-Token": self.token}
response = requests.get(url, headers=headers, timeout=self.timeout)
@ -89,7 +110,7 @@ class DcmanagerClient(base.DriverBase):
def get_subcloud_peer_group_list(self):
"""Get subcloud peer group list."""
url = self.endpoint + '/subcloud-peer-groups'
url = f"{self.endpoint}/subcloud-peer-groups"
headers = {"X-Auth-Token": self.token}
response = requests.get(url, headers=headers, timeout=self.timeout)
@ -147,9 +168,35 @@ class DcmanagerClient(base.DriverBase):
LOG.error(message)
raise Exception(message)
def get_peer_group_association_with_peer_id_and_pg_id(self, peer_id,
pg_id):
"""Get peer group association with peer id and PG id."""
for association in self.get_peer_group_association_list():
if peer_id == association.get('system-peer-id') and \
pg_id == association.get('peer-group-id'):
return association
raise exceptions.PeerGroupAssociationNotFound(
association_id=None)
def get_peer_group_association_list(self):
"""Get peer group association list."""
url = f"{self.endpoint}/peer-group-associations"
headers = {"X-Auth-Token": self.token}
response = requests.get(url, headers=headers, timeout=self.timeout)
if response.status_code == 200:
data = response.json()
return data.get('peer_group_associations', [])
else:
message = "Get Peer Group Association list failed with RC: %d" % \
response.status_code
LOG.error(message)
raise Exception(message)
def add_subcloud_peer_group(self, **kwargs):
"""Add a subcloud peer group."""
url = self.endpoint + '/subcloud-peer-groups'
url = f"{self.endpoint}/subcloud-peer-groups"
headers = {"X-Auth-Token": self.token,
"Content-Type": "application/json"}
@ -166,7 +213,7 @@ class DcmanagerClient(base.DriverBase):
def add_subcloud_with_secondary_status(self, files, data):
"""Add a subcloud with secondary status."""
url = self.endpoint + '/subclouds'
url = f"{self.endpoint}/subclouds"
# If not explicitly specified, set 'secondary' to true by default.
# This action adds a secondary subcloud with rehoming data in the
@ -197,6 +244,49 @@ class DcmanagerClient(base.DriverBase):
LOG.error(message)
raise Exception(message)
def add_peer_group_association(self, **kwargs):
"""Add a peer group association."""
url = f"{self.endpoint}/peer-group-associations"
headers = {"X-Auth-Token": self.token,
"Content-Type": "application/json"}
response = requests.post(url, json=kwargs, headers=headers,
timeout=self.timeout)
if response.status_code == 200:
return response.json()
else:
message = "Add Peer Group Association: %s, failed with RC: %d" % \
(kwargs, response.status_code)
LOG.error(message)
raise Exception(message)
def update_peer_group_association_sync_status(self, association_id,
sync_status):
"""Update the peer group association sync_status."""
if association_id is None:
raise ValueError("association_id is required.")
url = f"{self.endpoint}/peer-group-associations/{association_id}"
update_kwargs = {"sync_status": sync_status}
headers = {"X-Auth-Token": self.token,
"Content-Type": "application/json"}
response = requests.patch(url, json=update_kwargs, headers=headers,
timeout=self.timeout)
if response.status_code == 200:
return response.json()
else:
if response.status_code == 404 and \
'Peer Group Association not found' in response.text:
raise exceptions.PeerGroupAssociationNotFound(
association_id=association_id)
message = "Update Peer Group Association: association_id %s, " \
"sync_status %s, failed with RC: %d" % (
association_id, sync_status, response.status_code)
LOG.error(message)
raise Exception(message)
def update_subcloud_peer_group(self, peer_group_ref, **kwargs):
"""Update the subcloud peer group."""
if peer_group_ref is None:
@ -280,6 +370,28 @@ class DcmanagerClient(base.DriverBase):
LOG.error(message)
raise Exception(message)
def delete_peer_group_association(self, association_id):
"""Delete the peer group association."""
if association_id is None:
raise ValueError("association_id is required.")
url = f"{self.endpoint}/peer-group-associations/{association_id}"
headers = {"X-Auth-Token": self.token}
response = requests.delete(url, headers=headers,
timeout=self.timeout)
if response.status_code == 200:
return response.json()
else:
if response.status_code == 404 and \
'Peer Group Association not found' in response.text:
raise exceptions.PeerGroupAssociationNotFound(
association_id=association_id)
message = "Delete Peer Group Association: association_id %s " \
"failed with RC: %d" % (association_id, response.status_code)
LOG.error(message)
raise Exception(message)
def delete_subcloud_peer_group(self, peer_group_ref):
"""Delete the subcloud peer group."""
if peer_group_ref is None:

View File

@ -125,6 +125,10 @@ class ApiException(DCCommonException):
message = _("%(endpoint)s failed with status code: %(rc)d")
class SystemPeerNotFound(NotFound):
message = _("System Peer %(system_peer)s not found")
class SubcloudNotFound(NotFound):
message = _("Subcloud %(subcloud_ref)s not found")
@ -133,6 +137,10 @@ class SubcloudPeerGroupNotFound(NotFound):
message = _("Subcloud Peer Group %(peer_group_ref)s not found")
class PeerGroupAssociationNotFound(NotFound):
message = _("Peer Group Association %(association_id)s not found")
class SubcloudPeerGroupDeleteFailedAssociated(DCCommonException):
message = _("Subcloud Peer Group %(peer_group_ref)s delete failed "
"cause it is associated with a system peer.")

View File

@ -31,8 +31,14 @@ from dcmanager.rpc import client as rpc_client
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
PEER_GROUP_PRIMARY_PRIORITY = 0
MIN_PEER_GROUP_ASSOCIATION_PRIORITY = 1
MAX_PEER_GROUP_ASSOCIATION_PRIORITY = 65536
ASSOCIATION_SYNC_STATUS_LIST = \
[consts.ASSOCIATION_SYNC_STATUS_SYNCING,
consts.ASSOCIATION_SYNC_STATUS_IN_SYNC,
consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC,
consts.ASSOCIATION_SYNC_STATUS_FAILED]
class PeerGroupAssociationsController(restcomm.GenericPathController):
@ -150,6 +156,12 @@ class PeerGroupAssociationsController(restcomm.GenericPathController):
return False
return True
def _validate_sync_status(self, sync_status):
if sync_status not in ASSOCIATION_SYNC_STATUS_LIST:
LOG.debug("Invalid sync_status: %s" % sync_status)
return False
return True
@index.when(method='POST', template='json')
def post(self):
"""Create a new peer group association."""
@ -174,20 +186,22 @@ class PeerGroupAssociationsController(restcomm.GenericPathController):
peer_group_priority = payload.get('peer_group_priority')
peer_group = db_api.subcloud_peer_group_get(context, peer_group_id)
if (peer_group.group_priority == 0 and not peer_group_priority) or \
(peer_group.group_priority > 0 and peer_group_priority):
pecan.abort(httpclient.BAD_REQUEST,
_('Peer Group Association create with peer_group_'
'priority is required when the subcloud peer group '
'priority is 0, and it is not allowed when the '
'subcloud peer group priority is greater than 0.'))
if peer_group_priority and not self._validate_peer_group_priority(
peer_group_priority):
if peer_group_priority is not None and not \
self._validate_peer_group_priority(peer_group_priority):
pecan.abort(httpclient.BAD_REQUEST,
_('Invalid peer_group_priority'))
sync_enabled = peer_group.group_priority == 0
if (peer_group.group_priority == PEER_GROUP_PRIMARY_PRIORITY and
peer_group_priority is None) or (
peer_group.group_priority > PEER_GROUP_PRIMARY_PRIORITY and
peer_group_priority is not None):
pecan.abort(httpclient.BAD_REQUEST,
_('Peer Group Association create is not allowed when '
'the subcloud peer group priority is greater than 0 '
'and it is required when the subcloud peer group '
'priority is 0.'))
is_primary = peer_group.group_priority == PEER_GROUP_PRIMARY_PRIORITY
# only one combination of peer_group_id + system_peer_id can exists
association = None
@ -198,9 +212,8 @@ class PeerGroupAssociationsController(restcomm.GenericPathController):
peer_group_id,
system_peer_id)
except exception.PeerGroupAssociationCombinationNotFound:
LOG.warning("Peer Group Association Combination Not Found, "
"peer_group_id: %s, system_peer_id: %s"
% (peer_group_id, system_peer_id))
# This is a normal scenario, no need to log or raise an error
pass
except Exception as e:
LOG.warning("Peer Group Association get failed: %s;"
"peer_group_id: %s, system_peer_id: %s"
@ -209,22 +222,22 @@ class PeerGroupAssociationsController(restcomm.GenericPathController):
_('peer_group_association_get_by_peer_group_and_'
'system_peer_id failed: %s' % e))
if association:
LOG.info("Failed to create Peer group association, association \
with peer_group_id:[%s],system_peer_id:[%s] \
already exists" % (peer_group_id, system_peer_id))
LOG.warning("Failed to create Peer group association, association "
"with peer_group_id:[%s],system_peer_id:[%s] "
"already exists" % (peer_group_id, system_peer_id))
pecan.abort(httpclient.BAD_REQUEST,
_('A Peer group association with same peer_group_id, '
'system_peer_id already exists'))
# Create the peer group association
try:
sync_status = consts.ASSOCIATION_SYNC_STATUS_SYNCING if \
sync_enabled else consts.ASSOCIATION_SYNC_STATUS_DISABLED
association_type = consts.ASSOCIATION_TYPE_PRIMARY if is_primary \
else consts.ASSOCIATION_TYPE_NON_PRIMARY
association = db_api.peer_group_association_create(
context, peer_group_id, system_peer_id, peer_group_priority,
sync_status)
association_type, consts.ASSOCIATION_SYNC_STATUS_SYNCING)
if sync_enabled:
if is_primary:
# Sync the subcloud peer group to peer site
self.rpc_client.sync_subcloud_peer_group(context, association.id)
else:
@ -237,6 +250,103 @@ class PeerGroupAssociationsController(restcomm.GenericPathController):
pecan.abort(httpclient.INTERNAL_SERVER_ERROR,
_('Unable to create peer group association'))
def _sync_association(self, context, association, is_non_primary):
if is_non_primary:
self.rpc_client.peer_monitor_notify(context)
pecan.abort(httpclient.BAD_REQUEST,
_('Peer Group Association sync is not allowed '
'when the association type is non-primary. But the '
'peer monitor notify was triggered.'))
else:
peer_group = db_api.subcloud_peer_group_get(
context, association.peer_group_id)
if not self._validate_peer_group_leader_id(peer_group.
system_leader_id):
pecan.abort(httpclient.BAD_REQUEST,
_('Peer Group Association sync is not allowed when '
'the subcloud peer group system_leader_id is not '
'the current system controller UUID.'))
try:
# Sync the subcloud peer group to peer site
self.rpc_client.sync_subcloud_peer_group(context,
association.id)
association = db_api.peer_group_association_update(
context, id=association.id,
sync_status=consts.ASSOCIATION_SYNC_STATUS_SYNCING,
sync_message='None')
return db_api.peer_group_association_db_model_to_dict(
association)
except RemoteError as e:
pecan.abort(httpclient.UNPROCESSABLE_ENTITY, e.value)
except Exception as e:
# additional exceptions.
LOG.exception(e)
pecan.abort(httpclient.INTERNAL_SERVER_ERROR,
_('Unable to sync peer group association'))
def _update_association(self, context, association, is_non_primary):
payload = self._get_payload(request)
if not payload:
pecan.abort(httpclient.BAD_REQUEST, _('Body required'))
peer_group_priority = payload.get('peer_group_priority')
sync_status = payload.get('sync_status')
# Check value is not None or empty before calling validate
if not (peer_group_priority is not None or sync_status):
pecan.abort(httpclient.BAD_REQUEST, _('nothing to update'))
elif peer_group_priority is not None and sync_status:
pecan.abort(httpclient.BAD_REQUEST,
_('peer_group_priority and sync_status cannot be '
'updated at the same time.'))
if peer_group_priority is not None:
if not self._validate_peer_group_priority(peer_group_priority):
pecan.abort(httpclient.BAD_REQUEST,
_('Invalid peer_group_priority'))
if is_non_primary:
self.rpc_client.peer_monitor_notify(context)
pecan.abort(httpclient.BAD_REQUEST,
_('Peer Group Association peer_group_priority is '
'not allowed to update when the association type '
'is non-primary.'))
else:
db_api.peer_group_association_update(
context, id=association.id,
peer_group_priority=peer_group_priority)
if sync_status:
if not self._validate_sync_status(sync_status):
pecan.abort(httpclient.BAD_REQUEST,
_('Invalid sync_status'))
if not is_non_primary:
self.rpc_client.peer_monitor_notify(context)
pecan.abort(httpclient.BAD_REQUEST,
_('Peer Group Association sync_status is not '
'allowed to update when the association type is '
'primary.'))
else:
sync_message = 'Primary association sync to current site ' + \
'failed.' if sync_status == \
consts.ASSOCIATION_SYNC_STATUS_FAILED else 'None'
association = db_api.peer_group_association_update(
context, id=association.id, sync_status=sync_status,
sync_message=sync_message)
self.rpc_client.peer_monitor_notify(context)
return db_api.peer_group_association_db_model_to_dict(
association)
try:
# Ask dcmanager-manager to update the subcloud peer group priority
# to peer site. It will do the real work...
return self.rpc_client.update_subcloud_peer_group(context,
association.id)
except RemoteError as e:
pecan.abort(httpclient.UNPROCESSABLE_ENTITY, e.value)
except Exception as e:
# additional exceptions.
LOG.exception(e)
pecan.abort(httpclient.INTERNAL_SERVER_ERROR,
_('Unable to update peer group association'))
@index.when(method='PATCH', template='json')
def patch(self, association_id, sync=False):
"""Update a peer group association.
@ -262,64 +372,13 @@ class PeerGroupAssociationsController(restcomm.GenericPathController):
pecan.abort(httpclient.NOT_FOUND,
_('Peer Group Association not found'))
sync_disabled = association.sync_status == consts.\
ASSOCIATION_SYNC_STATUS_DISABLED
if sync_disabled:
pecan.abort(httpclient.BAD_REQUEST,
_('Peer Group Association sync or update is not allowed'
' when the sync_status is disabled.'))
is_non_primary = association.association_type == consts.\
ASSOCIATION_TYPE_NON_PRIMARY
if sync:
peer_group = db_api.subcloud_peer_group_get(
context, association.peer_group_id)
if not self._validate_peer_group_leader_id(peer_group.
system_leader_id):
pecan.abort(httpclient.BAD_REQUEST,
_('Peer Group Association sync is not allowed when '
'the subcloud peer group system_leader_id is not '
'the current system controller UUID.'))
try:
# Sync the subcloud peer group to peer site
self.rpc_client.sync_subcloud_peer_group(context,
association.id)
association = db_api.peer_group_association_update(
context, id=association_id,
sync_status=consts.ASSOCIATION_SYNC_STATUS_SYNCING,
sync_message='None')
return db_api.peer_group_association_db_model_to_dict(
association)
except RemoteError as e:
pecan.abort(httpclient.UNPROCESSABLE_ENTITY, e.value)
except Exception as e:
# additional exceptions.
LOG.exception(e)
pecan.abort(httpclient.INTERNAL_SERVER_ERROR,
_('Unable to sync peer group association'))
payload = self._get_payload(request)
if not payload:
pecan.abort(httpclient.BAD_REQUEST, _('Body required'))
peer_group_priority = payload.get('peer_group_priority')
# Check value is not None or empty before calling validate
if not peer_group_priority:
pecan.abort(httpclient.BAD_REQUEST, _('nothing to update'))
if not self._validate_peer_group_priority(peer_group_priority):
pecan.abort(httpclient.BAD_REQUEST,
_('Invalid peer_group_priority'))
try:
# Ask dcmanager-manager to update the subcloud peer group priority
# to peer site. It will do the real work...
return self.rpc_client.update_subcloud_peer_group(
context, association.id, peer_group_priority)
except RemoteError as e:
pecan.abort(httpclient.UNPROCESSABLE_ENTITY, e.value)
except Exception as e:
# additional exceptions.
LOG.exception(e)
pecan.abort(httpclient.INTERNAL_SERVER_ERROR,
_('Unable to update peer group association'))
return self._sync_association(context, association, is_non_primary)
else:
return self._update_association(context, association, is_non_primary)
@index.when(method='delete', template='json')
def delete(self, association_id):
@ -342,17 +401,18 @@ class PeerGroupAssociationsController(restcomm.GenericPathController):
try:
association = db_api.peer_group_association_get(context,
association_id)
sync_disabled = association.sync_status == consts.\
ASSOCIATION_SYNC_STATUS_DISABLED
if sync_disabled:
is_non_primary = association.association_type == consts.\
ASSOCIATION_TYPE_NON_PRIMARY
if is_non_primary:
result = db_api.peer_group_association_destroy(context,
association_id)
self.rpc_client.peer_monitor_notify(context)
return result
# Ask system-peer-manager to delete the association.
# It will do all the real work...
return self.rpc_client.delete_peer_group_association(
context, association_id)
else:
# Ask system-peer-manager to delete the association.
# It will do all the real work...
return self.rpc_client.delete_peer_group_association(
context, association_id)
except exception.PeerGroupAssociationNotFound:
pecan.abort(httpclient.NOT_FOUND,
_('Peer Group Association not found'))

View File

@ -539,10 +539,11 @@ class SubcloudPeerGroupsController(restcomm.GenericPathController):
LOG.info("Subcloud Peer Group [%s] not found" % group_ref)
pecan.abort(httpclient.NOT_FOUND, _('Subcloud Peer Group not found'))
LOG.info("Handling delete subcloud peer group request for: %s" % group)
LOG.info("Handling delete subcloud peer group request for: %s" %
group)
# A peer group cannot be deleted if it is used by any associations
association = db_api.peer_group_association_get_by_peer_group_id(context,
group.id)
association = db_api.peer_group_association_get_by_peer_group_id(
context, group.id)
if len(association) > 0:
pecan.abort(httpclient.BAD_REQUEST,
_("Cannot delete a peer group "

View File

@ -556,7 +556,7 @@ class SubcloudsController(object):
# Ask dcmanager-manager to add the subcloud.
# It will do all the real work...
# If the subcloud is secondary, it will be synchronous operation.
# A normal subcloud add will be a synchronous operation.
# A normal subcloud add will be asynchronous operation.
if 'secondary' in payload:
self.dcmanager_rpc_client.add_secondary_subcloud(
context, subcloud.id, payload)

View File

View File

@ -466,11 +466,15 @@ PEER_GROUP_MIGRATING = 'migrating'
PEER_GROUP_MIGRATION_COMPLETE = 'complete'
PEER_GROUP_MIGRATION_NONE = 'none'
# Peer group association type
ASSOCIATION_TYPE_PRIMARY = 'primary'
ASSOCIATION_TYPE_NON_PRIMARY = 'non-primary'
# Peer group association sync status
ASSOCIATION_SYNC_STATUS_SYNCING = 'syncing'
ASSOCIATION_SYNC_STATUS_SYNCED = 'synced'
ASSOCIATION_SYNC_STATUS_IN_SYNC = 'in-sync'
ASSOCIATION_SYNC_STATUS_FAILED = 'failed'
ASSOCIATION_SYNC_STATUS_DISABLED = 'disabled'
ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC = 'out-of-sync'
# Peer monitor heartbeat policy
HEARTBEAT_FAILURE_POLICY_ALARM = 'alarm'

View File

@ -555,6 +555,7 @@ def peer_group_association_db_model_to_dict(peer_group_association):
"peer-group-id": peer_group_association.peer_group_id,
"system-peer-id": peer_group_association.system_peer_id,
"peer-group-priority": peer_group_association.peer_group_priority,
"association-type": peer_group_association.association_type,
"sync-status": peer_group_association.sync_status,
"sync-message": peer_group_association.sync_message,
"created-at": peer_group_association.created_at,
@ -563,13 +564,14 @@ def peer_group_association_db_model_to_dict(peer_group_association):
def peer_group_association_create(context, peer_group_id, system_peer_id,
peer_group_priority, sync_status=None,
sync_message=None):
peer_group_priority, association_type=None,
sync_status=None, sync_message=None):
"""Create a peer_group_association."""
return IMPL.peer_group_association_create(context,
peer_group_id,
system_peer_id,
peer_group_priority,
association_type,
sync_status,
sync_message)

View File

@ -1241,6 +1241,7 @@ def peer_group_association_create(context,
peer_group_id,
system_peer_id,
peer_group_priority,
association_type,
sync_status,
sync_message):
with write_session() as session:
@ -1248,6 +1249,7 @@ def peer_group_association_create(context,
peer_group_association_ref.peer_group_id = peer_group_id
peer_group_association_ref.system_peer_id = system_peer_id
peer_group_association_ref.peer_group_priority = peer_group_priority
peer_group_association_ref.association_type = association_type
peer_group_association_ref.sync_status = sync_status
peer_group_association_ref.sync_message = sync_message
session.add(peer_group_association_ref)

View File

@ -87,6 +87,7 @@ def upgrade(migrate_engine):
sqlalchemy.ForeignKey('system_peer.id',
ondelete='CASCADE')),
sqlalchemy.Column('peer_group_priority', sqlalchemy.Integer),
sqlalchemy.Column('association_type', sqlalchemy.String(255)),
sqlalchemy.Column('sync_status', sqlalchemy.String(255)),
sqlalchemy.Column('sync_message', sqlalchemy.Text),
sqlalchemy.Column('reserved_1', sqlalchemy.Text),

View File

@ -155,6 +155,7 @@ class PeerGroupAssociation(BASE, DCManagerBase):
id = Column(Integer, primary_key=True, autoincrement=True, nullable=False)
peer_group_id = Column(Integer)
system_peer_id = Column(Integer)
association_type = Column(String(255))
peer_group_priority = Column(Integer)
sync_status = Column(String(255))
sync_message = Column(Text())

View File

@ -156,11 +156,12 @@ class PeerGroupAuditManager(manager.Manager):
raise e
self.require_audit_flag = False
# if remote subcloud peer group's migration_status is 'complete',
# Get remote subclouds, for 'managed+online' subclouds,
# set 'unmanaged+secondary' to local on same subclouds
# if remote subcloud peer group's migration_status is 'complete'
# or self.require_audit_flag is True, get remote subclouds.
# For 'managed+online' subclouds, set 'unmanaged+secondary' to
# local on same subclouds
elif remote_peer_group.get("migration_status") == \
consts.PEER_GROUP_MIGRATION_COMPLETE:
consts.PEER_GROUP_MIGRATION_COMPLETE or self.require_audit_flag:
remote_subclouds = \
self._get_subclouds_by_peer_group_from_system_peer(
system_peer,

View File

@ -216,6 +216,7 @@ class PeerMonitor(object):
pgam.PeerGroupAuditManager(self.subcloud_manager,
peer_group_id)
self.peer_group_id_set = peer_group_id_set
self._set_require_audit_flag_to_associated_peer_groups()
def start(self):
if self.thread is not None:

View File

@ -115,6 +115,7 @@ class DCManagerService(service.Service):
os.makedirs(dccommon_consts.ANSIBLE_OVERRIDES_PATH, 0o600, exist_ok=True)
self.subcloud_manager.handle_subcloud_operations_in_progress()
self.system_peer_manager.handle_association_operations_in_progress()
# Send notify to peer monitor.
self.peer_monitor_manager.peer_monitor_notify(self.context)
@ -332,11 +333,11 @@ class DCManagerService(service.Service):
@request_context
def sync_subcloud_peer_group(self, context, association_id,
sync_subclouds=True, priority=None):
sync_subclouds=True):
LOG.info("Handling sync_subcloud_peer_group request for: %s",
association_id)
return self.system_peer_manager.sync_subcloud_peer_group(
context, association_id, sync_subclouds, priority)
context, association_id, sync_subclouds)
@request_context
def delete_peer_group_association(self, context, association_id):

View File

@ -19,9 +19,11 @@ from dccommon.drivers.openstack.peer_site import PeerSiteDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon import exceptions as dccommon_exceptions
from dcmanager.common import consts
from dcmanager.common import context as dcmanager_context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import manager
from dcmanager.common import utils
from dcmanager.db import api as db_api
LOG = logging.getLogger(__name__)
@ -32,12 +34,17 @@ MAX_PARALLEL_SUBCLOUD_DELETE = 10
VERIFY_SUBCLOUD_SYNC_VALID = 'valid'
VERIFY_SUBCLOUD_SYNC_IGNORE = 'ignore'
TRANSITORY_STATES = {
consts.ASSOCIATION_SYNC_STATUS_SYNCING: consts.ASSOCIATION_SYNC_STATUS_FAILED
}
class SystemPeerManager(manager.Manager):
"""Manages tasks related to system peers."""
def __init__(self, peer_monitor_manager, *args, **kwargs):
LOG.debug(_('SystemPeerManager initialization...'))
self.context = dcmanager_context.get_admin_context()
self.peer_monitor_manager = peer_monitor_manager
super(SystemPeerManager, self).__init__(
service_name="system_peer_manager", *args, **kwargs)
@ -192,7 +199,7 @@ class SystemPeerManager(manager.Manager):
files, data)
LOG.info(f"Updated Subcloud {dc_peer_subcloud.get('name')} "
"(region_name: "
f"{dc_peer_subcloud.get('region_name')}) on peer "
f"{dc_peer_subcloud.get('region-name')}) on peer "
"site.")
else:
# Create subcloud on peer site if not exist
@ -200,15 +207,18 @@ class SystemPeerManager(manager.Manager):
add_subcloud_with_secondary_status(files, data)
LOG.info(f"Created Subcloud {dc_peer_subcloud.get('name')} "
"(region_name: "
f"{dc_peer_subcloud.get('region_name')}) on peer "
f"{dc_peer_subcloud.get('region-name')}) on peer "
"site.")
LOG.debug(f"Updating subcloud {subcloud_name} (region_name: "
f"{region_name}) with subcloud peer group id "
f"{dc_peer_pg_id} on peer site.")
# Update subcloud associated peer group on peer site
# Update subcloud associated peer group on peer site.
# The peer_group update will check the header and should
# use the region_name as subcloud_ref.
peer_subcloud = dc_client.update_subcloud(
subcloud_name, files=None, data={"peer_group":
str(dc_peer_pg_id)})
dc_peer_subcloud.get('region-name'), files=None,
data={"peer_group": str(dc_peer_pg_id)},
is_region_name=True)
# Need to check the subcloud only in secondary, otherwise it
# should be recorded as a failure.
@ -382,14 +392,73 @@ class SystemPeerManager(manager.Manager):
return error_msg
def _update_sync_status(self, context, association_id, sync_status,
sync_message, dc_peer_association_id=None,
dc_client=None, **kwargs):
"""Update sync status of association."""
if dc_peer_association_id is not None:
if dc_client is None:
association = db_api.peer_group_association_get(context,
association_id)
peer = db_api.system_peer_get(context,
association.system_peer_id)
dc_client = self.get_peer_dc_client(peer)
dc_client.update_peer_group_association_sync_status(
dc_peer_association_id, sync_status)
LOG.info(f"Updated non-primary Peer Group Association "
f"{dc_peer_association_id} sync_status to {sync_status}.")
return db_api.peer_group_association_update(
context, association_id, sync_status=sync_status,
sync_message=sync_message, **kwargs)
def _update_sync_status_to_failed(self, context, association_id,
failed_message,
dc_peer_association_id=None):
"""Update sync status to failed."""
return self._update_sync_status(context, association_id,
consts.ASSOCIATION_SYNC_STATUS_FAILED,
failed_message,
dc_peer_association_id)
def _get_non_primary_association(self, dc_client, dc_peer_system_peer_id,
dc_peer_pg_id):
"""Get non-primary Association from peer site."""
try:
return dc_client.get_peer_group_association_with_peer_id_and_pg_id(
dc_peer_system_peer_id, dc_peer_pg_id)
except dccommon_exceptions.PeerGroupAssociationNotFound:
LOG.error(f"Peer Group association does not exist on peer site."
f"Peer Group ID: {dc_peer_pg_id}, Peer System Peer ID: "
f"{dc_peer_system_peer_id}")
return None
def _get_peer_site_pg_by_name(self, dc_client, peer_group_name):
"""Get remote Peer Group from peer site by name."""
try:
return dc_client.get_subcloud_peer_group(peer_group_name)
except dccommon_exceptions.SubcloudPeerGroupNotFound:
LOG.error(f"Peer Group {peer_group_name} does not exist on peer "
f"site.")
return None
def _get_peer_site_system_peer(self, dc_client, peer_uuid=None):
"""Get System Peer from peer site."""
try:
peer_uuid = peer_uuid if peer_uuid is not None else \
utils.get_local_system().uuid
return dc_client.get_system_peer(peer_uuid)
except dccommon_exceptions.SystemPeerNotFound:
LOG.error(f"Peer Site System Peer {peer_uuid} does not exist.")
return None
def sync_subcloud_peer_group(self, context, association_id,
sync_subclouds=True, priority=None):
sync_subclouds=True):
"""Sync subcloud peer group to peer site.
This function synchronizes subcloud peer groups from current site
to peer site, supporting two scenarios:
1. When creating an association between the system peer and a subcloud
1. When creating the association between the system peer and a subcloud
peer group. This function creates the subcloud peer group on the
peer site and synchronizes the subclouds to it.
@ -411,13 +480,12 @@ class SystemPeerManager(manager.Manager):
dc_local_pg = db_api.subcloud_peer_group_get(context,
association.peer_group_id)
peer_group_name = dc_local_pg.peer_group_name
dc_peer_association_id = None
try:
sysinv_client = self.get_peer_sysinv_client(peer)
# Check if the system_uuid of the peer site matches with the
# peer_uuid
system = sysinv_client.get_system()
system = self.get_peer_sysinv_client(peer).get_system()
if system.uuid != peer.peer_uuid:
LOG.error(f"Peer site system uuid {system.uuid} does not match "
f"with the peer_uuid {peer.peer_uuid}")
@ -426,6 +494,65 @@ class SystemPeerManager(manager.Manager):
dc_client = self.get_peer_dc_client(peer)
# Get current site system information
local_system_uuid = utils.get_local_system().uuid
# Get peer site system peer
dc_peer_system_peer = self._get_peer_site_system_peer(
dc_client, local_system_uuid)
if dc_peer_system_peer is None:
failed_message = f"System Peer {local_system_uuid} does not" + \
" exist on peer site."
return db_api.peer_group_association_db_model_to_dict(
self._update_sync_status_to_failed(context, association_id,
failed_message))
dc_peer_system_peer_id = dc_peer_system_peer.get('id')
# Get peer site peer group, create if not exist
dc_peer_pg = self._get_peer_site_pg_by_name(dc_client,
peer_group_name)
if dc_peer_pg is None:
peer_group_kwargs = {
'group-priority': association.peer_group_priority,
'group-state': dc_local_pg.group_state,
'system-leader-id': dc_local_pg.system_leader_id,
'system-leader-name': dc_local_pg.system_leader_name,
'max-subcloud-rehoming': dc_local_pg.max_subcloud_rehoming
}
peer_group_kwargs['peer-group-name'] = peer_group_name
dc_peer_pg = dc_client.add_subcloud_peer_group(
**peer_group_kwargs)
LOG.info(f"Created Subcloud Peer Group {peer_group_name} on "
f"peer site. ID is {dc_peer_pg.get('id')}.")
dc_peer_pg_id = dc_peer_pg.get('id')
dc_peer_pg_priority = dc_peer_pg.get('group_priority')
# Check if the peer group priority is 0, if so, raise exception
if dc_peer_pg_priority == 0:
LOG.error(f"Skip update. Peer Site {peer_group_name} "
f"has priority 0.")
raise exceptions.SubcloudPeerGroupHasWrongPriority(
priority=dc_peer_pg_priority)
# Get peer site non-primary association, create if not exist
dc_peer_association = self._get_non_primary_association(
dc_client, dc_peer_system_peer_id, dc_peer_pg_id)
if dc_peer_association is None:
non_primary_association_kwargs = {
'peer_group_id': dc_peer_pg_id,
'system_peer_id': dc_peer_system_peer_id
}
dc_peer_association = dc_client.add_peer_group_association(
**non_primary_association_kwargs)
LOG.info(f"Created \"non-primary\" Peer Group Association "
f"{dc_peer_association.get('id')} on peer site.")
dc_peer_association_id = dc_peer_association.get("id")
# Update peer group association sync status to syncing
dc_client.update_peer_group_association_sync_status(
dc_peer_association_id, consts.ASSOCIATION_SYNC_STATUS_SYNCING)
# Update peer group on peer site
peer_group_kwargs = {
'group-priority': association.peer_group_priority,
'group-state': dc_local_pg.group_state,
@ -433,34 +560,16 @@ class SystemPeerManager(manager.Manager):
'system-leader-name': dc_local_pg.system_leader_name,
'max-subcloud-rehoming': dc_local_pg.max_subcloud_rehoming
}
if priority:
peer_group_kwargs['group-priority'] = priority
try:
dc_peer_pg = dc_client.get_subcloud_peer_group(
peer_group_name)
dc_peer_pg_id = dc_peer_pg.get('id')
dc_peer_pg_priority = dc_peer_pg.get('group_priority')
if dc_peer_pg_priority == 0:
LOG.error(f"Skip update. Peer Site {peer_group_name} "
f"has priority 0.")
raise exceptions.SubcloudPeerGroupHasWrongPriority(
priority=dc_peer_pg_priority)
dc_peer_pg = dc_client.update_subcloud_peer_group(
peer_group_name, **peer_group_kwargs)
LOG.info(f"Updated Subcloud Peer Group {peer_group_name} on "
f"peer site, ID is {dc_peer_pg.get('id')}.")
except dccommon_exceptions.SubcloudPeerGroupNotFound:
peer_group_kwargs['peer-group-name'] = peer_group_name
dc_peer_pg = dc_client.add_subcloud_peer_group(
**peer_group_kwargs)
dc_peer_pg_id = dc_peer_pg.get('id')
LOG.info(f"Created Subcloud Peer Group {peer_group_name} on "
f"peer site, ID is {dc_peer_pg_id}.")
dc_peer_pg = dc_client.update_subcloud_peer_group(
peer_group_name, **peer_group_kwargs)
LOG.info(f"Updated Subcloud Peer Group {peer_group_name} on "
f"peer site, ID is {dc_peer_pg.get('id')}.")
association_update = {
'sync_status': consts.ASSOCIATION_SYNC_STATUS_SYNCED,
'sync_message': None
'sync_status': consts.ASSOCIATION_SYNC_STATUS_IN_SYNC,
'sync_message': 'None',
'dc_peer_association_id': dc_peer_association_id,
'dc_client': dc_client
}
if sync_subclouds:
error_msg = self._sync_subclouds(context, peer, dc_local_pg.id,
@ -469,9 +578,7 @@ class SystemPeerManager(manager.Manager):
association_update['sync_status'] = \
consts.ASSOCIATION_SYNC_STATUS_FAILED
association_update['sync_message'] = json.dumps(error_msg)
if priority:
association_update['peer_group_priority'] = priority
association = db_api.peer_group_association_update(
association = self._update_sync_status(
context, association_id, **association_update)
self.peer_monitor_manager.peer_monitor_notify(context)
@ -481,12 +588,17 @@ class SystemPeerManager(manager.Manager):
except Exception as exception:
LOG.exception(f"Failed to sync peer group {peer_group_name} to "
f"peer site {peer.peer_name}")
db_api.peer_group_association_update(
context, association_id,
sync_status=consts.ASSOCIATION_SYNC_STATUS_FAILED,
sync_message=str(exception))
self._update_sync_status_to_failed(context, association_id,
str(exception),
dc_peer_association_id)
raise exception
def _delete_primary_association(self, context, association_id):
"""Delete primary peer group association."""
result = db_api.peer_group_association_destroy(context, association_id)
self.peer_monitor_manager.peer_monitor_notify(context)
return result
def delete_peer_group_association(self, context, association_id):
"""Delete association and remove related association from peer site.
@ -499,25 +611,52 @@ class SystemPeerManager(manager.Manager):
association = db_api.peer_group_association_get(context,
association_id)
peer = db_api.system_peer_get(context, association.system_peer_id)
peer_group = db_api.subcloud_peer_group_get(context,
association.peer_group_id)
dc_local_pg = db_api.subcloud_peer_group_get(context,
association.peer_group_id)
peer_group_name = dc_local_pg.peer_group_name
try:
# Check if the system_uuid of the peer site matches with the
# peer_uuid
system = self.get_peer_sysinv_client(peer).get_system()
if system.uuid != peer.peer_uuid:
LOG.warning(f"Peer site system uuid {system.uuid} does not "
f"match with the peer_uuid {peer.peer_uuid}")
return self._delete_primary_association(context, association_id)
dc_client = self.get_peer_dc_client(peer)
dc_peer_pg = dc_client.get_subcloud_peer_group(
peer_group.peer_group_name)
# Get current site system information
local_system_uuid = utils.get_local_system().uuid
# Get peer site system peer
dc_peer_system_peer = self._get_peer_site_system_peer(
dc_client, local_system_uuid)
# Get peer site peer group
dc_peer_pg = self._get_peer_site_pg_by_name(dc_client,
peer_group_name)
if dc_peer_pg is None:
# peer group does not exist on peer site, the association should
# be deleted
LOG.warning(f"Subcloud Peer Group {peer_group_name} does "
f"not exist on peer site.")
return self._delete_primary_association(context, association_id)
dc_peer_pg_id = dc_peer_pg.get('id')
dc_peer_pg_priority = dc_peer_pg.get('group_priority')
# Check if the peer group priority is 0, if so, raise exception
if dc_peer_pg_priority == 0:
LOG.error(f"Failed to delete peer_group_association. "
f"Peer Group {peer_group.peer_group_name} "
f"has priority 0 on peer site.")
LOG.error(f"Failed to delete peer_group_association. Peer Group"
f" {peer_group_name} has priority 0 on peer site.")
raise exceptions.SubcloudPeerGroupHasWrongPriority(
priority=dc_peer_pg_priority)
# Use thread pool to limit number of operations in parallel
delete_pool = greenpool.GreenPool(size=MAX_PARALLEL_SUBCLOUD_DELETE)
subclouds = db_api.subcloud_get_for_peer_group(context,
peer_group.id)
dc_local_pg.id)
# Spawn threads to delete each subcloud
clean_function = functools.partial(self._delete_subcloud, dc_client)
@ -525,28 +664,78 @@ class SystemPeerManager(manager.Manager):
'peer subcloud clean', clean_function, delete_pool, subclouds)
if delete_error_msg:
association = db_api.peer_group_association_update(
context, association_id,
sync_status=consts.ASSOCIATION_SYNC_STATUS_FAILED,
sync_message=json.dumps(delete_error_msg))
self._update_sync_status_to_failed(context, association_id,
json.dumps(delete_error_msg))
return
try:
dc_client.delete_subcloud_peer_group(peer_group.peer_group_name)
LOG.info("Deleted Subcloud Peer Group "
f"{peer_group.peer_group_name} on peer site.")
except dccommon_exceptions.SubcloudPeerGroupNotFound:
LOG.debug(f"Subcloud Peer Group {peer_group.peer_group_name} "
"does not exist on peer site.")
except dccommon_exceptions.SubcloudPeerGroupDeleteFailedAssociated:
LOG.debug(f"Subcloud Peer Group {peer_group.peer_group_name} "
"delete failed as it is associated with system peer "
"on peer site.")
# System Peer does not exist on peer site, delete peer group
if dc_peer_system_peer is None:
try:
dc_client.delete_subcloud_peer_group(peer_group_name)
LOG.info(f"Deleted Subcloud Peer Group {peer_group_name} "
f"on peer site.")
except dccommon_exceptions.\
SubcloudPeerGroupDeleteFailedAssociated:
LOG.error(f"Subcloud Peer Group {peer_group_name} "
"delete failed as it is associated with System "
"Peer on peer site.")
return self._delete_primary_association(context, association_id)
dc_peer_system_peer_id = dc_peer_system_peer.get('id')
db_api.peer_group_association_destroy(context, association_id)
self.peer_monitor_manager.peer_monitor_notify(context)
# Get peer site non-primary association
dc_peer_association = self._get_non_primary_association(
dc_client, dc_peer_system_peer_id, dc_peer_pg_id)
# Delete peer group association on peer site if exist
if dc_peer_association is not None:
dc_peer_association_id = dc_peer_association.get("id")
dc_client.delete_peer_group_association(
dc_peer_association_id)
elif dc_peer_association is None:
LOG.warning(f"PeerGroupAssociation does not exist on peer site."
f"Peer Group ID: {dc_peer_pg_id}, peer site System "
f"Peer ID: {dc_peer_system_peer_id}")
try:
dc_client.delete_subcloud_peer_group(peer_group_name)
LOG.info("Deleted Subcloud Peer Group "
f"{peer_group_name} on peer site.")
except dccommon_exceptions.SubcloudPeerGroupDeleteFailedAssociated:
failed_message = f"Subcloud Peer Group {peer_group_name} " \
+ "delete failed as it is associated with system peer " \
+ "on peer site."
self._update_sync_status_to_failed(context, association_id,
failed_message)
LOG.error(failed_message)
raise
return self._delete_primary_association(context, association_id)
except Exception as exception:
LOG.exception("Failed to delete peer_group_association "
f"{association.id}")
raise exception
def handle_association_operations_in_progress(self):
"""Identify associations in transitory stages and update association
state to failure.
"""
LOG.info('Identifying associations in transitory stages.')
associations = db_api.peer_group_association_get_all(self.context)
for association in associations:
# Identify associations in transitory states
new_sync_status = TRANSITORY_STATES.get(association.sync_status)
# update syncing states to the corresponding failure states
if new_sync_status:
LOG.info(f"Changing association {association.id} sync status "
f"from {association.sync_status} to {new_sync_status}")
db_api.peer_group_association_update(
self.context,
association.id,
sync_status=new_sync_status or association.sync_status,
sync_message="Service restart during syncing")

View File

@ -265,10 +265,10 @@ class ManagerClient(RPCClient):
return self.cast(ctxt, self.make_msg(
'sync_subcloud_peer_group', association_id=association_id))
def update_subcloud_peer_group(self, ctxt, association_id, priority):
def update_subcloud_peer_group(self, ctxt, association_id):
return self.call(ctxt, self.make_msg(
'sync_subcloud_peer_group', association_id=association_id,
sync_subclouds=False, priority=priority))
sync_subclouds=False))
def delete_peer_group_association(self, ctxt, association_id):
return self.call(ctxt, self.make_msg('delete_peer_group_association',

View File

@ -48,6 +48,7 @@ SAMPLE_PEER_GROUP_PRIORITY = 1
SAMPLE_PEER_GROUP_PRIORITY_UPDATED = 99
SAMPLE_SYNC_STATUS = 'synced'
SAMPLE_SYNC_MESSAGE = 'None'
SAMPLE_ASSOCIATION_TYPE = 'primary'
class FakeSystem(object):
@ -142,7 +143,9 @@ class PeerGroupAssociationAPIMixin(APIMixin):
'peer_group_priority': kw.get('peer_group_priority',
SAMPLE_PEER_GROUP_PRIORITY),
'sync_status': kw.get('sync_status', SAMPLE_SYNC_STATUS),
'sync_message': kw.get('sync_message', SAMPLE_SYNC_MESSAGE)
'sync_message': kw.get('sync_message', SAMPLE_SYNC_MESSAGE),
'association_type': kw.get('association_type',
SAMPLE_ASSOCIATION_TYPE)
}
return association

View File

@ -41,17 +41,19 @@ FAKE_SITE1_PEER_GROUP_ID = 9
# FAKE SUBCLOUD DATA (SITE1)
FAKE_SITE1_SUBCLOUD1_ID = 11
FAKE_SITE1_SUBCLOUD1_REGION_NAME = str(uuid.uuid4())
FAKE_SITE1_SUBCLOUD1_DEPLOY_STATUS = 'secondary'
FAKE_SITE1_SUBCLOUD1_DATA = {"id": FAKE_SITE1_SUBCLOUD1_ID,
"name": "subcloud1",
"region-name": "subcloud1",
"region-name": FAKE_SITE1_SUBCLOUD1_REGION_NAME,
"deploy-status":
FAKE_SITE1_SUBCLOUD1_DEPLOY_STATUS}
FAKE_SITE1_SUBCLOUD2_ID = 12
FAKE_SITE1_SUBCLOUD2_REGION_NAME = str(uuid.uuid4())
FAKE_SITE1_SUBCLOUD2_DEPLOY_STATUS = 'secondary-failed'
FAKE_SITE1_SUBCLOUD2_DATA = {"id": FAKE_SITE1_SUBCLOUD2_ID,
"name": "subcloud2",
"region-name": "subcloud2",
"region-name": FAKE_SITE1_SUBCLOUD2_REGION_NAME,
"deploy-status":
FAKE_SITE1_SUBCLOUD2_DEPLOY_STATUS}
FAKE_SITE1_SUBCLOUD3_ID = 13
@ -62,7 +64,7 @@ FAKE_SITE1_SUBCLOUD3_DATA = {"id": FAKE_SITE1_SUBCLOUD3_ID,
"deploy-status":
FAKE_SITE1_SUBCLOUD3_DEPLOY_STATUS}
# FAKE PEER GROUP ASSOCIATION DATA
# FAKE PEER GROUP ASSOCIATION DATA (SITE0)
FAKE_ASSOCIATION_PEER_GROUP_ID = \
FAKE_SITE0_PEER_GROUP_ID
FAKE_ASSOCIATION_SYSTEM_PEER_ID = \
@ -70,6 +72,10 @@ FAKE_ASSOCIATION_SYSTEM_PEER_ID = \
FAKE_ASSOCIATION_PEER_GROUP_PRIORITY = 1
FAKE_ASSOCIATION_SYNC_STATUS = 'synced'
FAKE_ASSOCIATION_SYNC_MESSAGE = 'None'
FAKE_ASSOCIATION_TYPE = 'primary'
# FAKE PEER GROUP ASSOCIATION DATA (SITE1)
FAKE_SITE1_ASSOCIATION_ID = 10
class FakeDCManagerAuditAPI(object):
@ -172,7 +178,8 @@ class TestSystemPeerManager(base.DCManagerTestCase):
"peer_group_id": FAKE_ASSOCIATION_PEER_GROUP_ID,
"peer_group_priority": FAKE_ASSOCIATION_PEER_GROUP_PRIORITY,
"sync_status": FAKE_ASSOCIATION_SYNC_STATUS,
"sync_message": FAKE_ASSOCIATION_SYNC_MESSAGE
"sync_message": FAKE_ASSOCIATION_SYNC_MESSAGE,
"association_type": FAKE_ASSOCIATION_TYPE
}
values.update(kwargs)
return db_api.peer_group_association_create(ctxt, **values)
@ -193,6 +200,8 @@ class TestSystemPeerManager(base.DCManagerTestCase):
mock_sysinv_client.return_value = FakeSysinvClient()
mock_dc_client.return_value = FakeDcmanagerClient()
mock_dc_client().add_subcloud_with_secondary_status = mock.MagicMock()
mock_dc_client().add_subcloud_with_secondary_status.return_value = {
"region-name": FAKE_SITE1_SUBCLOUD2_REGION_NAME}
mock_dc_client().delete_subcloud = mock.MagicMock()
peer = self.create_system_peer_static(
@ -247,8 +256,12 @@ class TestSystemPeerManager(base.DCManagerTestCase):
])
mock_dc_client().update_subcloud.assert_has_calls([
mock.call('subcloud1', mock.ANY, mock.ANY),
mock.call('subcloud1', files=None,
data={'peer_group': str(FAKE_SITE1_PEER_GROUP_ID)})
mock.call(FAKE_SITE1_SUBCLOUD1_REGION_NAME, files=None,
data={'peer_group': str(FAKE_SITE1_PEER_GROUP_ID)},
is_region_name=True),
mock.call(FAKE_SITE1_SUBCLOUD2_REGION_NAME, files=None,
data={'peer_group': str(FAKE_SITE1_PEER_GROUP_ID)},
is_region_name=True)
])
mock_dc_client().add_subcloud_with_secondary_status. \
assert_called_once()
@ -256,6 +269,7 @@ class TestSystemPeerManager(base.DCManagerTestCase):
@mock.patch.object(
system_peer_manager.SystemPeerManager, '_sync_subclouds')
@mock.patch.object(system_peer_manager, 'utils')
@mock.patch.object(system_peer_manager, 'PeerSiteDriver')
@mock.patch.object(system_peer_manager, 'SysinvClient')
@mock.patch.object(system_peer_manager, 'DcmanagerClient')
@ -263,6 +277,7 @@ class TestSystemPeerManager(base.DCManagerTestCase):
mock_dc_client,
mock_sysinv_client,
mock_keystone_client,
mock_utils,
mock_sync_subclouds):
mock_sync_subclouds.return_value = True
mock_keystone_client().keystone_client = FakeKeystoneClient()
@ -270,6 +285,12 @@ class TestSystemPeerManager(base.DCManagerTestCase):
mock_dc_client.return_value = FakeDcmanagerClient()
mock_dc_client().get_subcloud_peer_group = mock.MagicMock()
mock_dc_client().update_subcloud_peer_group = mock.MagicMock()
mock_dc_client().get_system_peer = mock.MagicMock()
mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id = \
mock.MagicMock()
mock_dc_client().update_peer_group_association_sync_status = \
mock.MagicMock()
mock_utils().get_local_system = mock.MagicMock()
peer = self.create_system_peer_static(
self.ctx,
@ -291,12 +312,14 @@ class TestSystemPeerManager(base.DCManagerTestCase):
@mock.patch.object(
system_peer_manager.SystemPeerManager, '_sync_subclouds')
@mock.patch.object(system_peer_manager, 'utils')
@mock.patch.object(system_peer_manager, 'PeerSiteDriver')
@mock.patch.object(system_peer_manager, 'SysinvClient')
@mock.patch.object(system_peer_manager, 'DcmanagerClient')
def test_sync_subcloud_peer_group_not_exist(self, mock_dc_client,
mock_sysinv_client,
mock_keystone_client,
mock_utils,
mock_sync_subclouds):
mock_sync_subclouds.return_value = True
mock_keystone_client().keystone_client = FakeKeystoneClient()
@ -305,6 +328,12 @@ class TestSystemPeerManager(base.DCManagerTestCase):
mock_dc_client().get_subcloud_peer_group = mock.MagicMock()
mock_dc_client().add_subcloud_peer_group = mock.MagicMock()
mock_dc_client().update_subcloud_peer_group = mock.MagicMock()
mock_dc_client().get_system_peer = mock.MagicMock()
mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id = \
mock.MagicMock()
mock_dc_client().update_peer_group_association_sync_status = \
mock.MagicMock()
mock_utils().get_local_system = mock.MagicMock()
peer = self.create_system_peer_static(
self.ctx,
@ -333,17 +362,27 @@ class TestSystemPeerManager(base.DCManagerTestCase):
'system-leader-name': peer_group.system_leader_name,
'max-subcloud-rehoming': peer_group.max_subcloud_rehoming
})
mock_dc_client().update_subcloud_peer_group.assert_not_called()
@mock.patch.object(system_peer_manager, 'utils')
@mock.patch.object(system_peer_manager, 'PeerSiteDriver')
@mock.patch.object(system_peer_manager, 'SysinvClient')
@mock.patch.object(system_peer_manager, 'DcmanagerClient')
def test_delete_peer_group_association(self,
mock_dc_client,
mock_keystone_client):
mock_sysinv_client,
mock_keystone_client,
mock_utils):
mock_keystone_client().keystone_client = FakeKeystoneClient()
mock_sysinv_client.return_value = FakeSysinvClient()
mock_dc_client.return_value = FakeDcmanagerClient()
mock_dc_client().delete_subcloud_peer_group = mock.MagicMock()
mock_dc_client().delete_subcloud = mock.MagicMock()
mock_dc_client().get_subcloud_peer_group = mock.MagicMock()
mock_dc_client().get_system_peer = mock.MagicMock()
mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id = \
mock.MagicMock()
mock_dc_client().delete_peer_group_association = mock.MagicMock()
mock_utils().get_local_system = mock.MagicMock()
peer = self.create_system_peer_static(
self.ctx,
@ -370,11 +409,8 @@ class TestSystemPeerManager(base.DCManagerTestCase):
mock_dc_client().get_subcloud = mock.MagicMock()
mock_dc_client().get_subcloud.side_effect = [
peer_subcloud1, peer_subcloud2]
mock_dc_client().get_subcloud_peer_group = mock.MagicMock()
mock_dc_client().get_subcloud_peer_group.return_value = {
'group_priority': 1
}
mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id.\
return_value = {'id': FAKE_SITE1_ASSOCIATION_ID}
spm = system_peer_manager.SystemPeerManager(mock.MagicMock())
spm.delete_peer_group_association(self.ctx, association.id)
@ -385,6 +421,61 @@ class TestSystemPeerManager(base.DCManagerTestCase):
])
mock_dc_client().delete_subcloud_peer_group.assert_called_once_with(
peer_group.peer_group_name)
mock_dc_client().delete_peer_group_association.assert_called_once_with(
FAKE_SITE1_ASSOCIATION_ID)
associations = db_api.peer_group_association_get_all(self.ctx)
self.assertEqual(0, len(associations))
@mock.patch.object(system_peer_manager, 'utils')
@mock.patch.object(system_peer_manager, 'PeerSiteDriver')
@mock.patch.object(system_peer_manager, 'SysinvClient')
@mock.patch.object(system_peer_manager, 'DcmanagerClient')
def test_delete_peer_group_association_peer_site_association_not_exsit(
self, mock_dc_client, mock_sysinv_client, mock_keystone_client,
mock_utils):
mock_keystone_client().keystone_client = FakeKeystoneClient()
mock_sysinv_client.return_value = FakeSysinvClient()
mock_dc_client.return_value = FakeDcmanagerClient()
mock_dc_client().delete_subcloud_peer_group = mock.MagicMock()
mock_dc_client().delete_subcloud = mock.MagicMock()
mock_dc_client().get_subcloud_peer_group = mock.MagicMock()
mock_dc_client().get_system_peer = mock.MagicMock()
mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id = \
mock.MagicMock()
mock_dc_client().delete_peer_group_association = mock.MagicMock()
mock_utils().get_local_system = mock.MagicMock()
peer = self.create_system_peer_static(
self.ctx,
peer_name='SystemPeer1')
peer_group = self.create_subcloud_peer_group_static(
self.ctx,
peer_group_name='SubcloudPeerGroup1')
# Create local dc subcloud1 mock data in database
subcloud1 = self.create_subcloud_with_pg_static(
self.ctx,
peer_group_id=peer_group.id,
name='subcloud1')
association = self.create_peer_group_association_static(
self.ctx,
system_peer_id=peer.id,
peer_group_id=peer_group.id)
peer_subcloud1 = FAKE_SITE1_SUBCLOUD1_DATA
mock_dc_client().get_subcloud = mock.MagicMock()
mock_dc_client().get_subcloud.side_effect = [
peer_subcloud1, dccommon_exceptions.SubcloudNotFound]
mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id.\
side_effect = [dccommon_exceptions.PeerGroupAssociationNotFound]
spm = system_peer_manager.SystemPeerManager(mock.MagicMock())
spm.delete_peer_group_association(self.ctx, association.id)
mock_dc_client().delete_subcloud.assert_has_calls([
mock.call(subcloud1.name)])
mock_dc_client().delete_subcloud_peer_group.assert_called_once_with(
peer_group.peer_group_name)
mock_dc_client().delete_peer_group_association.assert_not_called()
associations = db_api.peer_group_association_get_all(self.ctx)
self.assertEqual(0, len(associations))