Merge "Auditor automatic reconciliation of subclouds"
This commit is contained in:
commit
75296e1caa
|
@ -427,7 +427,8 @@ class DcmanagerClient(base.DriverBase):
|
|||
raise ValueError("subcloud_ref is required.")
|
||||
url = f"{self.endpoint}/subclouds/{subcloud_ref}"
|
||||
|
||||
headers = {"X-Auth-Token": self.token}
|
||||
headers = {"X-Auth-Token": self.token,
|
||||
"User-Agent": consts.DCMANAGER_V1_HTTP_AGENT}
|
||||
response = requests.delete(url, headers=headers,
|
||||
timeout=self.timeout)
|
||||
|
||||
|
|
|
@ -362,7 +362,8 @@ class TestDcmanagerClient(base.DCCommonTestCase):
|
|||
result = client.delete_subcloud(SUBCLOUD_NAME)
|
||||
mock_delete.assert_called_once_with(
|
||||
FAKE_ENDPOINT + '/subclouds/' + SUBCLOUD_NAME,
|
||||
headers={"X-Auth-Token": FAKE_TOKEN},
|
||||
headers={"X-Auth-Token": FAKE_TOKEN,
|
||||
"User-Agent": dccommon_consts.DCMANAGER_V1_HTTP_AGENT},
|
||||
timeout=FAKE_TIMEOUT
|
||||
)
|
||||
self.assertEqual(result, '')
|
||||
|
|
|
@ -648,6 +648,100 @@ class SubcloudsController(object):
|
|||
if not payload:
|
||||
pecan.abort(400, _('Body required'))
|
||||
|
||||
peer_group = payload.get('peer_group')
|
||||
# Verify the peer_group is valid
|
||||
peer_group_id = None
|
||||
if peer_group is not None:
|
||||
# peer_group may be passed in the payload as an int or str
|
||||
peer_group = str(peer_group)
|
||||
# Get current site system information
|
||||
local_system_uuid = utils.get_local_system().uuid
|
||||
# Check if user wants to remove a subcloud
|
||||
# from a subcloud-peer-group by
|
||||
# setting peer_group_id as 'none',
|
||||
# then we will pass 'none' string as
|
||||
# the peer_group_id,
|
||||
# update_subcloud() will handle it and
|
||||
# Set the peer_group_id DB into None.
|
||||
if peer_group.lower() == 'none':
|
||||
if subcloud.peer_group_id is not None:
|
||||
# Get the peer group of the subcloud
|
||||
original_pgrp = db_api.subcloud_peer_group_get(
|
||||
context, subcloud.peer_group_id)
|
||||
# Check the system leader is not on this site
|
||||
if original_pgrp.system_leader_id != local_system_uuid:
|
||||
pecan.abort(400, _("Removing subcloud from a "
|
||||
"peer group not led by the "
|
||||
"current site is prohibited."))
|
||||
# Get associations by peer group id
|
||||
associations = db_api.\
|
||||
peer_group_association_get_by_peer_group_id(
|
||||
context, original_pgrp.id)
|
||||
for association in associations:
|
||||
system_peer = db_api.system_peer_get(
|
||||
context, association.system_peer_id)
|
||||
# If system peer is available, then does not allow
|
||||
# to remove the subcloud from secondary peer group
|
||||
if system_peer.availability_state == consts.\
|
||||
SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE \
|
||||
and original_pgrp.group_priority > 0:
|
||||
pecan.abort(400, _(
|
||||
"Removing subcloud from a peer group "
|
||||
"associated with an available system peer "
|
||||
"is prohibited."))
|
||||
peer_group_id = 'none'
|
||||
else:
|
||||
if subcloud.peer_group_id is not None and \
|
||||
str(subcloud.peer_group_id) != peer_group:
|
||||
original_pgrp = utils.subcloud_peer_group_get_by_ref(
|
||||
context, str(subcloud.peer_group_id))
|
||||
if original_pgrp and original_pgrp.group_priority > 0:
|
||||
pecan.abort(400, _(
|
||||
"Cannot update subcloud to a new peer group "
|
||||
"if the original peer group has non-zero "
|
||||
"priority."))
|
||||
pgrp = utils.subcloud_peer_group_get_by_ref(context, peer_group)
|
||||
if not pgrp:
|
||||
pecan.abort(400, _('Invalid peer group'))
|
||||
if not utils.is_req_from_another_dc(request):
|
||||
if pgrp.group_priority > 0:
|
||||
pecan.abort(400, _("Cannot set the subcloud to a peer"
|
||||
" group with non-zero priority."))
|
||||
elif pgrp.system_leader_id != local_system_uuid:
|
||||
pecan.abort(400, _("Update subcloud to a peer "
|
||||
"group that is not led by the "
|
||||
"current site is prohibited."))
|
||||
elif not (
|
||||
subcloud.deploy_status == consts.DEPLOY_STATE_DONE
|
||||
and subcloud.management_state ==
|
||||
dccommon_consts.MANAGEMENT_MANAGED
|
||||
and subcloud.availability_status ==
|
||||
dccommon_consts.AVAILABILITY_ONLINE):
|
||||
pecan.abort(400, _("Only subclouds that are "
|
||||
"managed and online can be "
|
||||
"added to a peer group."))
|
||||
peer_group_id = pgrp.id
|
||||
|
||||
# Subcloud can only be updated while it is managed in
|
||||
# the primary site because the sync command can only be issued
|
||||
# in the site where the SPG was created.
|
||||
if subcloud.peer_group_id is not None and peer_group_id is None \
|
||||
and not utils.is_req_from_another_dc(request):
|
||||
# Get the peer group of the subcloud
|
||||
original_pgrp = db_api.subcloud_peer_group_get(
|
||||
context, subcloud.peer_group_id)
|
||||
if original_pgrp.group_priority > 0:
|
||||
pecan.abort(400, _("Subcloud update is only allowed when "
|
||||
"its peer group priority value is 0."))
|
||||
# Get current site system information
|
||||
local_system_uuid = utils.get_local_system().uuid
|
||||
# Updating a subcloud under the peer group on primary site
|
||||
# that the peer group should be led by the primary site.
|
||||
if original_pgrp.system_leader_id != local_system_uuid:
|
||||
pecan.abort(400, _("Updating subcloud from a "
|
||||
"peer group not led by the "
|
||||
"current site is prohibited."))
|
||||
|
||||
# Rename the subcloud
|
||||
new_subcloud_name = payload.get('name')
|
||||
if new_subcloud_name is not None:
|
||||
|
@ -736,7 +830,6 @@ class SubcloudsController(object):
|
|||
description = payload.get('description')
|
||||
location = payload.get('location')
|
||||
bootstrap_values = payload.get('bootstrap_values')
|
||||
peer_group = payload.get('peer_group')
|
||||
bootstrap_address = payload.get('bootstrap_address')
|
||||
|
||||
# If the migrate flag is present we need to update the deploy status
|
||||
|
@ -752,6 +845,11 @@ class SubcloudsController(object):
|
|||
management_state not in [dccommon_consts.MANAGEMENT_UNMANAGED,
|
||||
dccommon_consts.MANAGEMENT_MANAGED]:
|
||||
pecan.abort(400, _('Invalid management-state'))
|
||||
if management_state and subcloud.peer_group_id is not None \
|
||||
and not utils.is_req_from_another_dc(request):
|
||||
pecan.abort(400, _('Cannot update the management state of a '
|
||||
'subcloud that is associated with '
|
||||
'a peer group.'))
|
||||
|
||||
force_flag = payload.get('force')
|
||||
if force_flag is not None:
|
||||
|
@ -776,41 +874,6 @@ class SubcloudsController(object):
|
|||
exceptions.SubcloudGroupNotFound):
|
||||
pecan.abort(400, _('Invalid group'))
|
||||
|
||||
# Verify the peer_group is valid
|
||||
peer_group_id = None
|
||||
if peer_group is not None:
|
||||
# peer_group may be passed in the payload as an int or str
|
||||
peer_group = str(peer_group)
|
||||
# Check if user wants to remove a subcloud
|
||||
# from a subcloud-peer-group by
|
||||
# setting peer_group_id as 'none',
|
||||
# then we will pass 'none' string as
|
||||
# the peer_group_id,
|
||||
# update_subcloud() will handle it and
|
||||
# Set the peer_group_id DB into None.
|
||||
if peer_group.lower() == 'none':
|
||||
peer_group_id = 'none'
|
||||
else:
|
||||
pgrp = utils.subcloud_peer_group_get_by_ref(context, peer_group)
|
||||
if not pgrp:
|
||||
pecan.abort(400, _('Invalid peer group'))
|
||||
if not utils.is_req_from_another_dc(request):
|
||||
if pgrp.group_priority > 0:
|
||||
pecan.abort(400, _("Cannot set the subcloud to a peer"
|
||||
" group with non-zero priority."))
|
||||
elif not (
|
||||
subcloud.deploy_status in [
|
||||
consts.DEPLOY_STATE_DONE,
|
||||
consts.PRESTAGE_STATE_COMPLETE
|
||||
] and subcloud.management_state ==
|
||||
dccommon_consts.MANAGEMENT_MANAGED
|
||||
and subcloud.availability_status ==
|
||||
dccommon_consts.AVAILABILITY_ONLINE):
|
||||
pecan.abort(400, _("Only subclouds that are "
|
||||
"managed and online can be "
|
||||
"added to a peer group."))
|
||||
peer_group_id = pgrp.id
|
||||
|
||||
if consts.INSTALL_VALUES in payload:
|
||||
# install_values of secondary subclouds are validated on
|
||||
# peer site
|
||||
|
@ -998,6 +1061,20 @@ class SubcloudsController(object):
|
|||
pecan.abort(404, _('Subcloud not found'))
|
||||
|
||||
subcloud_id = subcloud.id
|
||||
peer_group_id = subcloud.peer_group_id
|
||||
subcloud_management_state = subcloud.management_state
|
||||
|
||||
# Check if the subcloud is "managed" status
|
||||
if subcloud_management_state == dccommon_consts.MANAGEMENT_MANAGED \
|
||||
and not utils.is_req_from_another_dc(request):
|
||||
pecan.abort(400, _('Cannot delete a subcloud that is "managed" '
|
||||
'status'))
|
||||
|
||||
# Check if the subcloud is part of a peer group
|
||||
if peer_group_id is not None and \
|
||||
not utils.is_req_from_another_dc(request):
|
||||
pecan.abort(400, _('Cannot delete a subcloud that is part of '
|
||||
'a peer group on this site'))
|
||||
|
||||
try:
|
||||
# Ask dcmanager-manager to delete the subcloud.
|
||||
|
|
|
@ -51,9 +51,9 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
peer_group_name)
|
||||
return subclouds
|
||||
except Exception:
|
||||
LOG.exception("Failed to get subclouds of peer group %s "
|
||||
"from DC: %s" %
|
||||
(peer_group_name, system_peer.peer_name))
|
||||
LOG.exception(f"Failed to get subclouds of peer group "
|
||||
f"{peer_group_name} from DC: "
|
||||
f"{system_peer.peer_name}")
|
||||
|
||||
def _update_remote_peer_group_migration_status(self,
|
||||
system_peer,
|
||||
|
@ -65,37 +65,58 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
}
|
||||
dc_client.update_subcloud_peer_group(peer_group_name,
|
||||
**peer_group_kwargs)
|
||||
LOG.info("Updated Subcloud Peer Group %s on "
|
||||
"peer site %s, set migration_status to: %s" %
|
||||
(peer_group_name, system_peer.peer_name, migration_status))
|
||||
LOG.info(f"Updated Subcloud Peer Group {peer_group_name} on "
|
||||
f"peer site {system_peer.peer_name}, set migration_status "
|
||||
f"to: {migration_status}")
|
||||
|
||||
def _get_local_subclouds_to_update(self,
|
||||
def _get_local_subclouds_to_update_and_delete(self,
|
||||
local_peer_group,
|
||||
remote_subclouds):
|
||||
local_subclouds_to_update = list()
|
||||
remote_managed_subcloud_region_names = list()
|
||||
local_subclouds_to_delete = list()
|
||||
remote_subclouds_dict = {remote_subcloud.get('region-name'):
|
||||
remote_subcloud for remote_subcloud
|
||||
in remote_subclouds}
|
||||
local_subclouds = db_api.subcloud_get_for_peer_group(
|
||||
self.context, local_peer_group.id)
|
||||
|
||||
# get the 'managed+online' remote subclouds
|
||||
for remote_subcloud in remote_subclouds:
|
||||
for local_subcloud in local_subclouds:
|
||||
remote_subcloud = remote_subclouds_dict.get(
|
||||
local_subcloud.region_name)
|
||||
if remote_subcloud:
|
||||
# Check if the remote subcloud meets the conditions for update
|
||||
# if it is 'managed' and the local subcloud is not
|
||||
# in 'secondary' status
|
||||
if (remote_subcloud.get('management-state') ==
|
||||
dccommon_consts.MANAGEMENT_MANAGED and
|
||||
remote_subcloud.get('availability-status') ==
|
||||
dccommon_consts.AVAILABILITY_ONLINE):
|
||||
remote_managed_subcloud_region_names.append(
|
||||
remote_subcloud.get('region-name'))
|
||||
|
||||
# Compare with the 'non-secondary' local subclouds
|
||||
for local_subcloud in local_subclouds:
|
||||
if local_subcloud.region_name in \
|
||||
remote_managed_subcloud_region_names \
|
||||
and not utils.subcloud_is_secondary_state(
|
||||
local_subcloud.deploy_status):
|
||||
|
||||
not utils.subcloud_is_secondary_state(
|
||||
local_subcloud.deploy_status)):
|
||||
local_subclouds_to_update.append(local_subcloud)
|
||||
else:
|
||||
local_subclouds_to_delete.append(local_subcloud)
|
||||
|
||||
return local_subclouds_to_update
|
||||
return local_subclouds_to_update, local_subclouds_to_delete
|
||||
|
||||
def _set_local_subcloud_to_secondary(self, subcloud):
|
||||
try:
|
||||
LOG.info("Set local subcloud %s to secondary" % subcloud.name)
|
||||
# There will be an exception when unmanage
|
||||
# a subcloud in 'unamaged' state.
|
||||
if subcloud.management_state != \
|
||||
dccommon_consts.MANAGEMENT_UNMANAGED:
|
||||
self.subcloud_manager.update_subcloud(
|
||||
self.context,
|
||||
subcloud.id,
|
||||
management_state=dccommon_consts.
|
||||
MANAGEMENT_UNMANAGED)
|
||||
self.subcloud_manager.update_subcloud(
|
||||
self.context,
|
||||
subcloud.id,
|
||||
deploy_status=consts.DEPLOY_STATE_SECONDARY)
|
||||
except Exception as e:
|
||||
LOG.exception(f"Failed to update local non-secondary "
|
||||
f"and offline subcloud [{subcloud.name}], err: {e}")
|
||||
raise e
|
||||
|
||||
def audit(self, system_peer, remote_peer_group, local_peer_group):
|
||||
if local_peer_group.migration_status == consts.PEER_GROUP_MIGRATING:
|
||||
|
@ -120,9 +141,9 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
if remote_peer_group.get("migration_status") == \
|
||||
consts.PEER_GROUP_MIGRATING:
|
||||
# Unmanaged all local subclouds of peer group
|
||||
LOG.info("Unmanaged all local subclouds of peer group %s "
|
||||
"since remote is in migrating state" %
|
||||
local_peer_group.peer_group_name)
|
||||
LOG.info(f"Unmanaged all local subclouds of peer group "
|
||||
f"{local_peer_group.peer_group_name} "
|
||||
f"since remote is in migrating state")
|
||||
subclouds = db_api.subcloud_get_for_peer_group(self.context,
|
||||
local_peer_group.id)
|
||||
for subcloud in subclouds:
|
||||
|
@ -152,8 +173,8 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
subcloud.id,
|
||||
deploy_status=consts.DEPLOY_STATE_REHOME_PENDING)
|
||||
except Exception as e:
|
||||
LOG.exception("Fail to unmanage local subcloud %s, err: "
|
||||
"%s" % (subcloud.name, e))
|
||||
LOG.exception(f"Fail to unmanage local subcloud "
|
||||
f"{subcloud.name}, err: {e}")
|
||||
raise e
|
||||
self.require_audit_flag = False
|
||||
|
||||
|
@ -167,39 +188,29 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
system_peer,
|
||||
remote_peer_group.get("peer_group_name"))
|
||||
|
||||
if not remote_subclouds:
|
||||
LOG.error("No subclouds in remote DC:%s's peer group %s" %
|
||||
(system_peer.peer_name,
|
||||
remote_peer_group.get("peer_group_name")))
|
||||
return
|
||||
local_subclouds_to_update = \
|
||||
self._get_local_subclouds_to_update(local_peer_group,
|
||||
remote_subclouds)
|
||||
local_subclouds_to_update, local_subclouds_to_delete = \
|
||||
self._get_local_subclouds_to_update_and_delete(
|
||||
local_peer_group, remote_subclouds)
|
||||
|
||||
for subcloud in local_subclouds_to_update:
|
||||
self._set_local_subcloud_to_secondary(subcloud)
|
||||
|
||||
# Change the local subcloud not exist on peer site's SPG to
|
||||
# secondary status then delete it
|
||||
for subcloud in local_subclouds_to_delete:
|
||||
self._set_local_subcloud_to_secondary(subcloud)
|
||||
try:
|
||||
LOG.info("Set secondary to local subcloud %s" %
|
||||
subcloud.name)
|
||||
# There will be an exception when unmanage
|
||||
# a subcloud in 'unamaged' state.
|
||||
if subcloud.management_state != \
|
||||
dccommon_consts.MANAGEMENT_UNMANAGED:
|
||||
self.subcloud_manager.update_subcloud(
|
||||
self.context,
|
||||
subcloud.id,
|
||||
management_state=dccommon_consts.
|
||||
MANAGEMENT_UNMANAGED)
|
||||
self.subcloud_manager.update_subcloud(
|
||||
self.context,
|
||||
subcloud.id,
|
||||
deploy_status=consts.DEPLOY_STATE_SECONDARY)
|
||||
self.subcloud_manager.delete_subcloud(
|
||||
self.context, subcloud.id)
|
||||
LOG.info(f"Deleted local subcloud {subcloud.name}")
|
||||
except Exception as e:
|
||||
LOG.exception("Failed to update local non-secondary "
|
||||
"and offline subcloud [%s], err: %s" %
|
||||
(subcloud.name, e))
|
||||
LOG.exception(f"Failed to delete local subcloud "
|
||||
f"[{subcloud.name}] that does not exist "
|
||||
f"under the same subcloud_peer_group on "
|
||||
f"peer site, err: {e}")
|
||||
raise e
|
||||
|
||||
if local_subclouds_to_update:
|
||||
if local_subclouds_to_update or local_subclouds_to_delete:
|
||||
self._clear_or_raise_alarm(system_peer,
|
||||
local_peer_group,
|
||||
remote_peer_group)
|
||||
|
@ -229,10 +240,10 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
entity_instance_id = "peer_group=%s,peer=%s" % \
|
||||
(local_peer_group.peer_group_name, system_peer.peer_uuid)
|
||||
if local_peer_group.group_priority < remote_peer_group.get('group_priority'):
|
||||
LOG.warning("Alarm: local subcloud peer group [%s] "
|
||||
"is managed by remote system [%s]" %
|
||||
(local_peer_group.peer_group_name,
|
||||
system_peer.peer_name))
|
||||
LOG.warning("Alarm: local subcloud peer group ["
|
||||
f"{local_peer_group.peer_group_name}] "
|
||||
f"is managed by remote system ["
|
||||
f"{system_peer.peer_name}]")
|
||||
try:
|
||||
fault = fm_api.Fault(
|
||||
alarm_id=fm_const.
|
||||
|
@ -266,15 +277,15 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED,
|
||||
entity_instance_id)
|
||||
if fault:
|
||||
LOG.info("Clear alarm: %s" % entity_instance_id)
|
||||
LOG.info(f"Clear alarm: {entity_instance_id}")
|
||||
self.fm_api.clear_fault(
|
||||
fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED,
|
||||
entity_instance_id)
|
||||
except Exception:
|
||||
LOG.exception(
|
||||
"Problem clearing fault [%s], alarm_id=%s" %
|
||||
(entity_instance_id,
|
||||
fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED))
|
||||
f"Problem clearing fault [{entity_instance_id}], "
|
||||
f"alarm_id="
|
||||
f"{fm_const.FM_ALARM_ID_DC_SUBCLOUD_PEER_GROUP_NOT_MANAGED}")
|
||||
|
||||
def _do_audit(self, system_peer, remote_peer_group, local_peer_group):
|
||||
with self.thread_lock:
|
||||
|
@ -286,15 +297,14 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
def stop(self):
|
||||
if self.thread:
|
||||
self.thread.join()
|
||||
LOG.info("stopped peer group %s audit thread" % self.peer_group_id)
|
||||
LOG.info(f"stopped peer group {self.peer_group_id} audit thread")
|
||||
else:
|
||||
LOG.info("No peer group %s audit thread to stop" %
|
||||
self.peer_group_id)
|
||||
LOG.info(f"No peer group {self.peer_group_id} audit thread to stop")
|
||||
|
||||
def start(self, system_peer, remote_peer_group, local_peer_group):
|
||||
if self.thread_lock.locked():
|
||||
LOG.warning('Audit thread for %s has already started' %
|
||||
local_peer_group.peer_group_name)
|
||||
LOG.warning(f"Audit thread for {local_peer_group.peer_group_name} "
|
||||
f"has already started")
|
||||
else:
|
||||
self.thread = threading.Thread(
|
||||
target=self._do_audit,
|
||||
|
@ -305,8 +315,8 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
system_peer,
|
||||
remote_peer_group,
|
||||
local_peer_group):
|
||||
LOG.info("Audit peer group [%s] with remote system %s" %
|
||||
(local_peer_group.peer_group_name, system_peer.peer_name))
|
||||
LOG.info(f"Audit peer group [{local_peer_group.peer_group_name}] "
|
||||
f"with remote system {system_peer.peer_name}")
|
||||
self.start(system_peer, remote_peer_group, local_peer_group)
|
||||
|
||||
@staticmethod
|
||||
|
@ -332,6 +342,6 @@ class PeerGroupAuditManager(manager.Manager):
|
|||
if response:
|
||||
return response
|
||||
except Exception:
|
||||
LOG.exception("Failed to send audit request for peer group %s "
|
||||
"to DC: %s" %
|
||||
(peer_group.peer_group_name, system.peer_name))
|
||||
LOG.exception("Failed to send audit request for peer group "
|
||||
f"{peer_group.peer_group_name} to DC: "
|
||||
f"{system.peer_name}")
|
||||
|
|
Loading…
Reference in New Issue