Trigger subcloud patch and load audits by updating identity

During the 1st round subcloud audit triggered by the subcloud manage
call, there's a race condition between the dcdbsync and the subcloud
audit. The subcloud patch audit will fail to authenticate due to the
keystone database is synchronizing and the original keystone patching
user and its role cannot be found. It results the patching and load
sync status remain as "unkown" until the next periodic audit.

This commit removes the trigger of audit from the manage action,
instead, adds the trigger when updating the identity resource from
unknown to the other statuses.

After this change, the patching audit is expected to be audited and
its status should be updated right after the identity status is
updated out of unkown.

Test plan:
1. Deployed several subclouds with this change.
2. Manage all the subclouds at the same time, no RC 401 error returns
from the patching audit. All the subclouds go in-sync in a short time.
3. Unmanage all the subclouds, and manage them again, all the subcloud
endpoints except the "dc-cert_sync_status" go in-sync -> unkown ->
in-sync without any error. The subcloud patch and load audits are
triggered after the identity endpoints turn back to in-sync.
4. Leave the subclouds for 1 hr, the update calls for updating the
identity in-sync from dcorch will not trigger an extra round of audits.
5. Add a user to update the identity status from in-sync -> out-of-sync
-> in-sync, extra subcloud patch and load audits are not triggered.
6. Apply a patch to the SystemController, the patch audit should
detect and raise the appropriate subcloud alarms.

Closes-bug: 1949477
Signed-off-by: Yuxing Jiang <yuxing.jiang@windriver.com>
Change-Id: I6b88fa6e5d9fd86b47f9662112be137ce80ed9cd
This commit is contained in:
Yuxing Jiang 2021-11-03 15:04:13 -04:00
parent 7398c99877
commit f55c356ab2
8 changed files with 162 additions and 16 deletions

View File

@ -75,8 +75,13 @@ class ManagerAuditClient(object):
def trigger_load_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_load_audit'))
def trigger_subcloud_audits(self, ctxt, subcloud_id):
def trigger_subcloud_audits(self, ctxt, subcloud_id, exclude_endpoints=None):
return self.cast(ctxt, self.make_msg('trigger_subcloud_audits',
subcloud_id=subcloud_id,
exclude_endpoints=exclude_endpoints))
def trigger_subcloud_patch_load_audits(self, ctxt, subcloud_id):
return self.cast(ctxt, self.make_msg('trigger_subcloud_patch_load_audits',
subcloud_id=subcloud_id))

View File

@ -147,10 +147,18 @@ class DCManagerAuditService(service.Service):
return self.subcloud_audit_manager.trigger_load_audit(context)
@request_context
def trigger_subcloud_audits(self, context, subcloud_id):
def trigger_subcloud_audits(self, context, subcloud_id, exclude_endpoints):
"""Trigger all subcloud audits for one subcloud."""
LOG.info("Trigger all audits for subcloud %s", subcloud_id)
LOG.info("Trigger all audits for subcloud %s except endpoints %s" %
(subcloud_id, exclude_endpoints))
return self.subcloud_audit_manager.trigger_subcloud_audits(
context, subcloud_id, exclude_endpoints)
@request_context
def trigger_subcloud_patch_load_audits(self, context, subcloud_id):
"""Trigger patch and load audits for one subcloud."""
LOG.info("Trigger patch and load audits for subcloud %s", subcloud_id)
return self.subcloud_audit_manager.trigger_subcloud_patch_load_audits(
context, subcloud_id)

View File

@ -186,7 +186,7 @@ class SubcloudAuditManager(manager.Manager):
def reset_force_patch_audit(cls):
cls.force_patch_audit = False
def trigger_subcloud_audits(self, context, subcloud_id):
def trigger_subcloud_audits(self, context, subcloud_id, exclude_endpoints):
"""Trigger all subcloud audits for one subcloud."""
values = {
'patch_audit_requested': True,
@ -195,6 +195,22 @@ class SubcloudAuditManager(manager.Manager):
'kubernetes_audit_requested': True,
'kube_rootca_update_audit_requested': True,
}
# For the endpoints excluded in the audit, set it to False in db
# to disable the audit explicitly.
if exclude_endpoints:
for exclude_endpoint in exclude_endpoints:
exclude_request = dcorch_consts.ENDPOINT_AUDIT_REQUESTS.get(
exclude_endpoint)
if exclude_request:
values.update({exclude_request: False})
db_api.subcloud_audits_update(context, subcloud_id, values)
def trigger_subcloud_patch_load_audits(self, context, subcloud_id):
"""Trigger subcloud patch and load audits for one subcloud."""
values = {
"patch_audit_requested": True,
"load_audit_requested": True,
}
db_api.subcloud_audits_update(context, subcloud_id, values)
def periodic_subcloud_audit(self):

View File

@ -1253,9 +1253,13 @@ class SubcloudManager(manager.Manager):
LOG.info('Request for managed audit for %s' % subcloud.name)
dc_notification = rpc_client.DCManagerNotifications()
dc_notification.subcloud_managed(context, subcloud.name)
# Trigger all the audits for the subcloud so it can update the
# sync status ASAP.
self.audit_rpc_client.trigger_subcloud_audits(context, subcloud_id)
# Since sysinv user is sync'ed during bootstrap, trigger the
# related audits. Patch and load audits are delayed until the
# identity resource synchronized by dcdbsync is complete.
exclude_endpoints = [dcorch_consts.ENDPOINT_TYPE_PATCHING,
dcorch_consts.ENDPOINT_TYPE_LOAD]
self.audit_rpc_client.trigger_subcloud_audits(
context, subcloud_id, exclude_endpoints)
return db_api.subcloud_db_model_to_dict(subcloud)
@ -1278,6 +1282,7 @@ class SubcloudManager(manager.Manager):
subcloud_status_list = []
subcloud = None
original_identity_status = None
# retrieve the info from the db for this subcloud.
# subcloud_id should not be None
try:
@ -1287,6 +1292,9 @@ class SubcloudManager(manager.Manager):
subcloud_status_list.append(
db_api.subcloud_endpoint_status_db_model_to_dict(
subcloud_status))
if subcloud_status.endpoint_type == \
dcorch_consts.ENDPOINT_TYPE_IDENTITY:
original_identity_status = subcloud_status.sync_status
except Exception as e:
LOG.exception(e)
raise e
@ -1318,6 +1326,16 @@ class SubcloudManager(manager.Manager):
endpoint_type,
sync_status)
# Trigger subcloud patch and load audits for the subcloud after
# its identity endpoint turns to other status from unknown
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_IDENTITY \
and sync_status != consts.SYNC_STATUS_UNKNOWN \
and original_identity_status == consts.SYNC_STATUS_UNKNOWN:
LOG.debug('Request for patch and load audit for %s after updating '
'identity out of unknown' % subcloud.name)
self.audit_rpc_client.trigger_subcloud_patch_load_audits(
context, subcloud_id)
entity_instance_id = "subcloud=%s.resource=%s" % \
(subcloud.name, endpoint_type)
fault = self.fm_api.get_fault(

View File

@ -26,6 +26,7 @@ from dcmanager.audit import subcloud_audit_manager
from dcmanager.db.sqlalchemy import api as db_api
from dcmanager.tests import base
from dcorch.common import consts as dcorch_consts
class FakeAuditWorkerAPI(object):
@ -296,7 +297,7 @@ class TestAuditManager(base.DCManagerTestCase):
def test_audit_one_subcloud(self):
subcloud = self.create_subcloud_static(self.ctx)
am = subcloud_audit_manager.SubcloudAuditManager()
am.trigger_subcloud_audits(self.ctx, subcloud.id)
am.trigger_subcloud_audits(self.ctx, subcloud.id, None)
# Subaudits should be requested.
result = db_api.subcloud_audits_get(self.ctx, subcloud.id)
self.assertEqual(result['patch_audit_requested'], True)
@ -305,6 +306,20 @@ class TestAuditManager(base.DCManagerTestCase):
self.assertEqual(result['kubernetes_audit_requested'], True)
self.assertEqual(result['kube_rootca_update_audit_requested'], True)
def test_audit_one_subcloud_exclude_endpoints(self):
subcloud = self.create_subcloud_static(self.ctx)
am = subcloud_audit_manager.SubcloudAuditManager()
exclude_endpoints = [dcorch_consts.ENDPOINT_TYPE_PATCHING,
dcorch_consts.ENDPOINT_TYPE_LOAD]
am.trigger_subcloud_audits(self.ctx, subcloud.id, exclude_endpoints)
# Verify subaudits be requested.
result = db_api.subcloud_audits_get(self.ctx, subcloud.id)
self.assertEqual(result['patch_audit_requested'], False)
self.assertEqual(result['firmware_audit_requested'], True)
self.assertEqual(result['load_audit_requested'], False)
self.assertEqual(result['kubernetes_audit_requested'], True)
self.assertEqual(result['kube_rootca_update_audit_requested'], True)
def test_trigger_load_audit(self):
subcloud = self.create_subcloud_static(self.ctx)
am = subcloud_audit_manager.SubcloudAuditManager()
@ -313,3 +328,16 @@ class TestAuditManager(base.DCManagerTestCase):
result = db_api.subcloud_audits_get(self.ctx, subcloud.id)
self.assertEqual(result['patch_audit_requested'], False)
self.assertEqual(result['load_audit_requested'], True)
def test_trigger_one_subcloud_patch_load_audits(self):
subcloud = self.create_subcloud_static(self.ctx)
am = subcloud_audit_manager.SubcloudAuditManager()
am.trigger_subcloud_patch_load_audits(self.ctx, subcloud.id)
# Subcloud patch and load audits should be requested.
result = db_api.subcloud_audits_get(self.ctx, subcloud.id)
self.assertEqual(result['patch_audit_requested'], True)
self.assertEqual(result['load_audit_requested'], True)
# Other audits should not be requested
self.assertEqual(result['firmware_audit_requested'], False)
self.assertEqual(result['kubernetes_audit_requested'], False)
self.assertEqual(result['kube_rootca_update_audit_requested'], False)

View File

@ -1084,7 +1084,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
# Now pretend someone triggered all the subaudits in the DB
# after the subcloud audit was triggered but before it ran.
am.trigger_subcloud_audits(self.ctx, subcloud.id)
am.trigger_subcloud_audits(self.ctx, subcloud.id, None)
# Make sure all subaudits are requested in DB
audits = db_api.subcloud_audits_get(self.ctx, subcloud.id)

View File

@ -45,6 +45,7 @@ class FakeDCManagerAuditAPI(object):
def __init__(self):
self.trigger_subcloud_audits = mock.MagicMock()
self.trigger_subcloud_patch_load_audits = mock.MagicMock()
class FakeDCOrchAPI(object):
@ -491,8 +492,11 @@ class TestSubcloudManager(base.DCManagerTestCase):
fake_dcmanager_notification.subcloud_managed.assert_called_once_with(
self.ctx, subcloud.name)
self.fake_dcmanager_audit_api.trigger_subcloud_audits.assert_called_once_with(
self.ctx, subcloud.id)
exclude_endpoints = [dcorch_consts.ENDPOINT_TYPE_PATCHING,
dcorch_consts.ENDPOINT_TYPE_LOAD]
self.fake_dcmanager_audit_api.trigger_subcloud_audits.\
assert_called_once_with(self.ctx, subcloud.id, exclude_endpoints)
# Verify subcloud was updated with correct values
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, subcloud.name)
@ -528,8 +532,10 @@ class TestSubcloudManager(base.DCManagerTestCase):
fake_dcmanager_cermon_api.subcloud_managed.assert_called_once_with(
self.ctx, subcloud.name)
self.fake_dcmanager_audit_api.trigger_subcloud_audits.assert_called_once_with(
self.ctx, subcloud.id)
exclude_endpoints = [dcorch_consts.ENDPOINT_TYPE_PATCHING,
dcorch_consts.ENDPOINT_TYPE_LOAD]
self.fake_dcmanager_audit_api.trigger_subcloud_audits.\
assert_called_once_with(self.ctx, subcloud.id, exclude_endpoints)
# Verify subcloud was updated with correct values
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, subcloud.name)
@ -991,6 +997,11 @@ class TestSubcloudManager(base.DCManagerTestCase):
endpoint_type=endpoint,
sync_status=consts.SYNC_STATUS_IN_SYNC)
# We trigger a subcloud audits after updating the identity from unknown
# to in-sync
self.fake_dcmanager_audit_api.trigger_subcloud_patch_load_audits.\
assert_called_once_with(self.ctx, subcloud.id)
# Audit fails once
audit_fail_count = 1
sm.update_subcloud_availability(self.ctx, subcloud.name,
@ -1029,9 +1040,58 @@ class TestSubcloudManager(base.DCManagerTestCase):
self.assertEqual(subcloud_status.sync_status,
consts.SYNC_STATUS_UNKNOWN)
# Verify we did not trigger subcloud audits
self.fake_dcmanager_audit_api.trigger_subcloud_audits.\
assert_not_called()
def test_update_subcloud_identity_endpoint(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
sm = subcloud_manager.SubcloudManager()
# Set the subcloud to online/managed
db_api.subcloud_update(self.ctx, subcloud.id,
management_state=consts.MANAGEMENT_MANAGED,
availability_status=consts.AVAILABILITY_ONLINE)
# Create identity endpoints statuses
endpoint = dcorch_consts.ENDPOINT_TYPE_IDENTITY
db_api.subcloud_status_create(
self.ctx, subcloud.id, endpoint)
for original_sync_status in [consts.SYNC_STATUS_IN_SYNC,
consts.SYNC_STATUS_OUT_OF_SYNC,
consts.SYNC_STATUS_UNKNOWN]:
for new_sync_status in [consts.SYNC_STATUS_IN_SYNC,
consts.SYNC_STATUS_OUT_OF_SYNC,
consts.SYNC_STATUS_UNKNOWN]:
# Update identity to the original status
sm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=original_sync_status)
# Get the count of the trigger already called
original_trigger_subcloud_patch_load_audits_count = \
self.fake_dcmanager_audit_api.trigger_subcloud_patch_load_audits.call_count
# Update identity to new status and get the count of the trigger again
sm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=new_sync_status)
new_trigger_subcloud_patch_load_audits_count = \
self.fake_dcmanager_audit_api.trigger_subcloud_patch_load_audits.call_count
trigger_count = new_trigger_subcloud_patch_load_audits_count - \
original_trigger_subcloud_patch_load_audits_count
if original_sync_status == consts.SYNC_STATUS_UNKNOWN and \
new_sync_status != consts.SYNC_STATUS_UNKNOWN:
# Verify the subcloud patch and load audit is triggered once
self.assertEqual(trigger_count, 1)
else:
# Verify the subcloud patch and load audit is not triggered
self.assertEqual(trigger_count, 0)
def test_update_subcloud_sync_endpoint_type(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')

View File

@ -143,6 +143,17 @@ ENDPOINT_TYPES_LIST = [ENDPOINT_TYPE_PLATFORM,
ENDPOINT_TYPE_KUBERNETES,
ENDPOINT_TYPE_KUBE_ROOTCA]
# All endpoint audit requests
# TODO(yuxing): move some constants to dccommon as part of general refactoring
# for maintainability in a future commit.
ENDPOINT_AUDIT_REQUESTS = {
ENDPOINT_TYPE_FIRMWARE: 'firmware_audit_requested',
ENDPOINT_TYPE_KUBERNETES: 'kubernetes_audit_requested',
ENDPOINT_TYPE_KUBE_ROOTCA: 'kube_rootca_update_audit_requested',
ENDPOINT_TYPE_LOAD: 'load_audit_requested',
ENDPOINT_TYPE_PATCHING: 'patch_audit_requested',
}
# Dcorch sync endpoint types
SYNC_ENDPOINT_TYPES_LIST = [ENDPOINT_TYPE_PLATFORM,
ENDPOINT_TYPE_IDENTITY]