diff --git a/distributedcloud/dccommon/drivers/openstack/sdk_platform.py b/distributedcloud/dccommon/drivers/openstack/sdk_platform.py index 13ae9060f..3d993fce6 100644 --- a/distributedcloud/dccommon/drivers/openstack/sdk_platform.py +++ b/distributedcloud/dccommon/drivers/openstack/sdk_platform.py @@ -83,6 +83,9 @@ class OpenStackDriver(object): OpenStackDriver.update_region_clients(region_name, KEYSTONE_CLIENT_NAME, self.keystone_client) + # Clear client object cache + OpenStackDriver.os_clients_dict[region_name] = \ + collections.defaultdict(dict) except Exception as exception: LOG.error('keystone_client region %s error: %s' % (region_name, str(exception))) @@ -185,14 +188,18 @@ class OpenStackDriver(object): OpenStackDriver._identity_tokens[region_name], include_catalog=False) if token != OpenStackDriver._identity_tokens[region_name]: - LOG.debug("%s: updating token %s to %s" % + LOG.debug("%s: AccessInfo changed %s to %s" % (region_name, OpenStackDriver._identity_tokens[region_name], token)) - OpenStackDriver._identity_tokens[region_name] = token + OpenStackDriver._identity_tokens[region_name] = None + OpenStackDriver.os_clients_dict[region_name] = \ + collections.defaultdict(dict) + return False except Exception as exception: - LOG.info('_is_token_valid handle: %s', str(exception)) + LOG.info('_is_token_valid handle: region: %s error: %s', + (region_name, str(exception))) # Reset the cached dictionary OpenStackDriver.os_clients_dict[region_name] = \ collections.defaultdict(dict) diff --git a/distributedcloud/dccommon/endpoint_cache.py b/distributedcloud/dccommon/endpoint_cache.py index 343873c2f..fc5172d3f 100644 --- a/distributedcloud/dccommon/endpoint_cache.py +++ b/distributedcloud/dccommon/endpoint_cache.py @@ -23,7 +23,6 @@ import collections import threading -from keystoneauth1 import exceptions as keystone_exceptions from keystoneauth1 import loading from keystoneauth1 import session @@ -107,10 +106,6 @@ class EndpointCache(object): CONF.endpoint_cache.password, CONF.endpoint_cache.project_name, CONF.endpoint_cache.project_domain_name) - # check if the current session is valid and get an admin session - # if necessary - self.admin_session = EndpointCache.get_admin_backup_session( - self.admin_session, CONF.endpoint_cache.username, sc_auth_url) self.keystone_client = ks_client.Client( session=self.admin_session, @@ -140,33 +135,6 @@ class EndpointCache(object): auth=user_auth, additional_headers=consts.USER_HEADER, timeout=timeout) - @classmethod - def get_admin_backup_session(cls, admin_session, user_name, auth_url): - """Validate a session and open an admin session if it fails. - - This method is require to handle an upgrade to stx 4.0 and it - can be removed in stx 5.0. - - """ - - try: - admin_session.get_auth_headers() - except keystone_exceptions.Unauthorized: - # this will only happen briefly during an upgrade to stx 4.0 - # just until the dcorch has synced the dcmanager user to each - # subcloud - LOG.info("Failed to authenticate user:%s, use %s user instead" - % (user_name, - CONF.cache.admin_username)) - admin_session = EndpointCache.get_admin_session( - auth_url, - CONF.cache.admin_username, - CONF.cache.admin_user_domain_name, - CONF.cache.admin_password, - CONF.cache.admin_tenant, - CONF.cache.admin_project_domain_name) - return admin_session - @staticmethod def _is_central_cloud(region_id): central_cloud_regions = [consts.CLOUD_0, consts.VIRTUAL_MASTER_CLOUD] diff --git a/distributedcloud/dcmanager/common/consts.py b/distributedcloud/dcmanager/common/consts.py index 1df6be577..ca9f6794c 100644 --- a/distributedcloud/dcmanager/common/consts.py +++ b/distributedcloud/dcmanager/common/consts.py @@ -75,7 +75,11 @@ ENDPOINT_TYPE = "endpoint_type" SERVICE_GROUP_STATUS_ACTIVE = "active" # Availability fail count -AVAIL_FAIL_COUNT_TO_ALARM = 1 +# we don't want to alarm first failure since there are +# cases where we expect a transient failure in the +# subcloud (e.g. haproxy process restart to update +# certificates) +AVAIL_FAIL_COUNT_TO_ALARM = 2 AVAIL_FAIL_COUNT_MAX = 9999 # Software update strategy types diff --git a/distributedcloud/dcmanager/tests/unit/audit/test_subcloud_audit_worker_manager.py b/distributedcloud/dcmanager/tests/unit/audit/test_subcloud_audit_worker_manager.py index e083df398..8c6a473bc 100644 --- a/distributedcloud/dcmanager/tests/unit/audit/test_subcloud_audit_worker_manager.py +++ b/distributedcloud/dcmanager/tests/unit/audit/test_subcloud_audit_worker_manager.py @@ -583,8 +583,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase): audit_fail_count = 1 self.fake_dcmanager_api.update_subcloud_availability.\ assert_called_with(mock.ANY, subcloud.name, - consts.AVAILABILITY_OFFLINE, - False, audit_fail_count) + None, False, audit_fail_count) # Update the DB like dcmanager would do. subcloud = db_api.subcloud_update( @@ -605,23 +604,27 @@ class TestAuditWorkerManager(base.DCManagerTestCase): audit_fail_count = audit_fail_count + 1 - # Verify the subcloud availability didn't change, just the fail count + # Verify the subcloud goes offline self.fake_dcmanager_api.update_subcloud_availability.\ assert_called_with(mock.ANY, subcloud.name, None, False, audit_fail_count) - # Verify alarm update is not called - self.fake_alarm_aggr.update_alarm_summary.assert_not_called() + # Verify alarm update is called only once + self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with( + subcloud.name, self.fake_openstack_client.fm_client) - # Verify patch audit is not called - self.fake_patch_audit.subcloud_patch_audit.assert_not_called() + # Verify patch audit is called only once + self.fake_patch_audit.subcloud_patch_audit.assert_called_once_with( + subcloud.name, mock.ANY, True) - # Verify firmware audit is not called - self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called() + # Verify firmware audit is called + self.fake_firmware_audit.subcloud_firmware_audit.assert_called_once_with( + subcloud.name, mock.ANY) - # Verify firmware audit is not called - self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_not_called() + # Verify firmware audit is called + self.fake_kubernetes_audit.subcloud_kubernetes_audit.assert_called_once_with( + subcloud.name, mock.ANY) def test_audit_subcloud_offline_no_change(self): subcloud = self.create_subcloud_static(self.ctx, name='subcloud1') diff --git a/distributedcloud/dcorch/engine/sync_thread.py b/distributedcloud/dcorch/engine/sync_thread.py index a0b91313c..c67743f05 100644 --- a/distributedcloud/dcorch/engine/sync_thread.py +++ b/distributedcloud/dcorch/engine/sync_thread.py @@ -193,10 +193,6 @@ class SyncThread(object): config.admin_project_domain_name, timeout=60) - if config is cfg.CONF.endpoint_cache: - self.sc_admin_session = EndpointCache.get_admin_backup_session( - self.sc_admin_session, config.username, sc_auth_url) - def initial_sync(self): # Return True to indicate initial sync success return True