From be4062b08fcefec1d9f7f7e102cf3a59d2e7c5e4 Mon Sep 17 00:00:00 2001 From: Li Zhu Date: Mon, 25 Mar 2024 09:21:07 -0400 Subject: [PATCH] Add additional GEO-Redundancy unit tests Add unit tests for peer_group_audit_manager.py and subcloud bootstrap-address update. Closes-Bug: 2057981 Change-Id: I7bbc9d26fb698bade7e955b303cdbd30f87c7776 Signed-off-by: lzhu1 --- .../unit/api/v1/controllers/test_subclouds.py | 142 ++++++++- .../manager/test_peer_group_audit_manager.py | 269 ++++++++++++++++++ 2 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 distributedcloud/dcmanager/tests/unit/manager/test_peer_group_audit_manager.py diff --git a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py index 21c6eb909..2ca1e6a83 100644 --- a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py +++ b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py @@ -1874,7 +1874,7 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest): peer_group_id=peer_group.id ) mock_is_leader_on_local_site.return_value = True - data = {"bootstrap-address": "10.10.10.11"} + data = {"description": "test subcloud"} mock_get_patch_data.return_value = data six.assertRaisesRegex( self, @@ -1888,6 +1888,146 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest): self.mock_rpc_client().update_subcloud.assert_not_called() self.mock_rpc_client().update_association_sync_status.assert_not_called() + @mock.patch.object(subclouds.SubcloudsController, "_get_patch_data") + @mock.patch.object(cutils, 'is_leader_on_local_site') + @mock.patch.object(cutils, 'subcloud_peer_group_get_by_ref') + def test_update_subcloud_bootstrap_address_on_primary_site( + self, mock_get_peer_group, mock_is_leader_on_local_site, + mock_get_patch_data): + peer_group = test_system_peer_manager.TestSystemPeerManager. \ + create_subcloud_peer_group_static( + self.ctx, + peer_group_name='SubcloudPeerGroup1', + group_priority=consts.PEER_GROUP_PRIMARY_PRIORITY) + mock_get_peer_group.return_value = peer_group + subcloud = fake_subcloud.create_fake_subcloud(self.ctx) + subcloud = db_api.subcloud_update( + self.ctx, + subcloud.id, + availability_status=dccommon_consts.AVAILABILITY_ONLINE, + deploy_status=consts.DEPLOY_STATE_DONE, + management_state=dccommon_consts.MANAGEMENT_MANAGED, + prestage_status=consts.PRESTAGE_STATE_COMPLETE, + peer_group_id=peer_group.id + ) + mock_is_leader_on_local_site.return_value = True + data = {"bootstrap_address": "192.168.10.22"} + self.mock_rpc_client().update_subcloud.return_value = True + mock_get_patch_data.return_value = data + response = self.app.patch_json( + f"{FAKE_URL}/{subcloud.id}", headers=FAKE_HEADERS, + params=data + ) + self.assertEqual(response.status_int, 200) + self.mock_rpc_client().update_subcloud.assert_called_once_with( + mock.ANY, + subcloud.id, + management_state=None, + description=None, + location=None, + group_id=None, + data_install=None, + force=None, + peer_group_id=None, + bootstrap_values=None, + bootstrap_address=data["bootstrap_address"], + deploy_status=None) + self.mock_rpc_client().update_association_sync_status. \ + assert_called_once() + + def update_subcloud_bootstrap_address_on_non_primary_site( + self, mock_get_peer_group, mock_is_leader_on_local_site, + mock_get_patch_data, primary_site_availability_state): + peer = test_system_peer_manager.TestSystemPeerManager. \ + create_system_peer_static( + self.ctx, peer_name='SystemPeer1') + peer = db_api.system_peer_update( + self.ctx, peer.id, + availability_state=primary_site_availability_state) + peer_group = test_system_peer_manager.TestSystemPeerManager. \ + create_subcloud_peer_group_static( + self.ctx, peer_group_name='SubcloudPeerGroup1', group_priority=1) + association = test_system_peer_manager.TestSystemPeerManager. \ + create_peer_group_association_static( + self.ctx, system_peer_id=peer.id, peer_group_id=peer_group.id) + mock_get_peer_group.return_value = peer_group + subcloud = fake_subcloud.create_fake_subcloud(self.ctx) + subcloud = db_api.subcloud_update( + self.ctx, + subcloud.id, + availability_status=dccommon_consts.AVAILABILITY_ONLINE, + deploy_status=consts.DEPLOY_STATE_REHOME_FAILED, + management_state=dccommon_consts.MANAGEMENT_MANAGED, + prestage_status=consts.PRESTAGE_STATE_COMPLETE, + peer_group_id=peer_group.id + ) + mock_is_leader_on_local_site.return_value = True + patch_data = {"bootstrap_address": "192.168.10.22"} + self.mock_rpc_client().update_subcloud.return_value = True + mock_get_patch_data.return_value = patch_data + return association, subcloud, patch_data + + @mock.patch.object(subclouds.SubcloudsController, "_get_patch_data") + @mock.patch.object(cutils, 'is_leader_on_local_site') + @mock.patch.object(cutils, 'subcloud_peer_group_get_by_ref') + def test_update_subcloud_bootstrap_address_when_primary_site_is_available( + self, mock_get_peer_group, mock_is_leader_on_local_site, + mock_get_patch_data): + association, subcloud, patch_data = \ + self.update_subcloud_bootstrap_address_on_non_primary_site( + mock_get_peer_group, + mock_is_leader_on_local_site, + mock_get_patch_data, + consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE) + six.assertRaisesRegex( + self, + webtest.app.AppError, + "400 *", + self.app.patch_json, + FAKE_URL + "/" + str(subcloud.id), + headers=FAKE_HEADERS, + params=patch_data, + ) + self.mock_rpc_client().update_subcloud.assert_not_called() + self.mock_rpc_client().update_association_sync_status.assert_not_called() + self.assertEqual(consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, + db_api.peer_group_association_get( + self.ctx, association.id).sync_status) + + @mock.patch.object(subclouds.SubcloudsController, "_get_patch_data") + @mock.patch.object(cutils, 'is_leader_on_local_site') + @mock.patch.object(cutils, 'subcloud_peer_group_get_by_ref') + def test_update_subcloud_bootstrap_address_when_primary_site_is_unavailable( + self, mock_get_peer_group, mock_is_leader_on_local_site, + mock_get_patch_data): + association, subcloud, patch_data = \ + self.update_subcloud_bootstrap_address_on_non_primary_site( + mock_get_peer_group, + mock_is_leader_on_local_site, + mock_get_patch_data, + consts.SYSTEM_PEER_AVAILABILITY_STATE_UNAVAILABLE) + response = self.app.patch_json( + f"{FAKE_URL}/{subcloud.id}", headers=FAKE_HEADERS, + params=patch_data + ) + self.assertEqual(response.status_int, 200) + self.mock_rpc_client().update_subcloud.assert_called_once_with( + mock.ANY, + subcloud.id, + management_state=None, + description=None, + location=None, + group_id=None, + data_install=None, + force=None, + peer_group_id=None, + bootstrap_values=None, + bootstrap_address=patch_data["bootstrap_address"], + deploy_status=None) + self.assertEqual(consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC, + db_api.peer_group_association_get( + self.ctx, association.id).sync_status) + def test_get_config_file_path(self): bootstrap_file = psd_common.get_config_file_path("subcloud1") install_values = psd_common.get_config_file_path( diff --git a/distributedcloud/dcmanager/tests/unit/manager/test_peer_group_audit_manager.py b/distributedcloud/dcmanager/tests/unit/manager/test_peer_group_audit_manager.py new file mode 100644 index 000000000..a2dfdfaa5 --- /dev/null +++ b/distributedcloud/dcmanager/tests/unit/manager/test_peer_group_audit_manager.py @@ -0,0 +1,269 @@ +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +import copy +import json +import uuid + +import mock + +from dccommon import consts as dccommon_consts +from dcmanager.common import consts +from dcmanager.common import utils +from dcmanager.db.sqlalchemy import api as db_api +from dcmanager.manager import peer_group_audit_manager +from dcmanager.manager.system_peer_manager import SystemPeerManager +from dcmanager.tests.base import DCManagerTestCase +from dcmanager.tests.unit.manager import test_peer_monitor_manager as tpm +from dcmanager.tests.unit.manager import test_system_peer_manager as tsm + +# FAKE SUBCLOUD PEER GROUP DATA (SITE1) +FAKE_SITE1_PEER_GROUP_ID = 9 +FAKE_SITE1_PEER_GROUP_NAME = 'PeerGroup2' +FAKE_SITE1_PEER_GROUP_SYSTEM_LEADER_ID = tpm.FAKE_SYSTEM_PEER_UUID # SITE1 UUID +FAKE_SITE1_PEER_GROUP_SYSTEM_LEADER_NAME = tpm.FAKE_SYSTEM_PEER_NAME # SITE1 NAME +FAKE_SITE1_PEER_GROUP_MAX_SUBCLOUDS_REHOMING = 20 +FAKE_SITE1_PEER_GROUP_PRIORITY = 1 +FAKE_SITE1_PEER_GROUP_STATE = 'enabled' +FAKE_SITE1_PEER_GROUP_MIGRATION_STATUS = consts.PEER_GROUP_MIGRATION_COMPLETE +FAKE_SITE1_PEER_GROUP_DATA = { + "peer_group_name": FAKE_SITE1_PEER_GROUP_NAME, + "system_leader_id": FAKE_SITE1_PEER_GROUP_SYSTEM_LEADER_ID, + "system_leader_name": FAKE_SITE1_PEER_GROUP_SYSTEM_LEADER_NAME, + "group_priority": FAKE_SITE1_PEER_GROUP_PRIORITY, + "group_state": FAKE_SITE1_PEER_GROUP_STATE, + "max_subcloud_rehoming": FAKE_SITE1_PEER_GROUP_MAX_SUBCLOUDS_REHOMING, + "migration_status": FAKE_SITE1_PEER_GROUP_MIGRATION_STATUS +} + +# FAKE SUBCLOUD +FAKE_SUBCLOUD1_REGION_NAME = str(uuid.uuid4()) +FAKE_SUBCLOUD1_NAME = 'subcloud1' +FAKE_SUBCLOUD2_REGION_NAME = str(uuid.uuid4()) +FAKE_SUBCLOUD2_NAME = 'subcloud2' +FAKE_SUBCLOUD3_REGION_NAME = str(uuid.uuid4()) +FAKE_SUBCLOUD3_NAME = 'subcloud3' + +# FAKE SUBCLOUD REHOME DATA +FAKE_REHOME_DATA1 = { + "saved_payload": { + "bootstrap-address": "192.168.10.11", + "systemcontroller_gateway_address": "192.168.204.101" + } +} +FAKE_REHOME_DATA2 = { + "saved_payload": { + "bootstrap-address": "192.168.10.12", + "systemcontroller_gateway_address": "192.168.204.101" + } +} +FAKE_REHOME_DATA3 = { + "saved_payload": { + "bootstrap-address": "192.168.10.13", + "systemcontroller_gateway_address": "192.168.204.101" + } +} + +# FAKE SUBCLOUD DATA (SITE1) +FAKE_SITE1_SUBCLOUD1_ID = 11 +FAKE_SITE1_SUBCLOUD1_REGION_NAME = FAKE_SUBCLOUD1_REGION_NAME +FAKE_SITE1_SUBCLOUD1_DEPLOY_STATUS = consts.DEPLOY_STATE_DONE +FAKE_SITE1_SUBCLOUD1_MANAGEMENT_STATE = dccommon_consts.MANAGEMENT_MANAGED +FAKE_SITE1_SUBCLOUD1_PEER_GROUP_ID = FAKE_SITE1_PEER_GROUP_ID +FAKE_SITE1_SUBCLOUD1_DATA = { + "id": FAKE_SITE1_SUBCLOUD1_ID, + "name": FAKE_SUBCLOUD1_NAME, + "region-name": FAKE_SITE1_SUBCLOUD1_REGION_NAME, + "deploy-status": FAKE_SITE1_SUBCLOUD1_DEPLOY_STATUS, + "management-state": FAKE_SITE1_SUBCLOUD1_MANAGEMENT_STATE, + "peer_group_id": FAKE_SITE1_SUBCLOUD1_PEER_GROUP_ID, + "rehome_data": json.dumps(FAKE_REHOME_DATA1) +} +FAKE_SITE1_SUBCLOUD2_ID = 12 +FAKE_SITE1_SUBCLOUD2_REGION_NAME = FAKE_SUBCLOUD2_REGION_NAME +FAKE_SITE1_SUBCLOUD2_DEPLOY_STATUS = consts.DEPLOY_STATE_DONE +FAKE_SITE1_SUBCLOUD2_MANAGEMENT_STATE = dccommon_consts.MANAGEMENT_MANAGED +FAKE_SITE1_SUBCLOUD2_PEER_GROUP_ID = FAKE_SITE1_PEER_GROUP_ID +FAKE_SITE1_SUBCLOUD2_DATA = { + "id": FAKE_SITE1_SUBCLOUD2_ID, + "name": FAKE_SUBCLOUD2_NAME, + "region-name": FAKE_SITE1_SUBCLOUD2_REGION_NAME, + "deploy-status": FAKE_SITE1_SUBCLOUD2_DEPLOY_STATUS, + "management-state": FAKE_SITE1_SUBCLOUD2_MANAGEMENT_STATE, + "peer_group_id": FAKE_SITE1_SUBCLOUD2_PEER_GROUP_ID, + # To test syncing rehome_data from site1(remote) to site0(local), + # we set the rehome_data to data3 instead of data2 for remote subcloud2 + "rehome_data": json.dumps(FAKE_REHOME_DATA3) +} + + +class TestPeerGroupAudit(DCManagerTestCase): + def setUp(self): + super(TestPeerGroupAudit, self).setUp() + + self.peer = tpm.TestPeerMonitor.create_system_peer_static( + self.ctx, + peer_name='SystemPeer1') + self.peer_group = tsm.TestSystemPeerManager. \ + create_subcloud_peer_group_static( + self.ctx, + peer_group_name='SubcloudPeerGroup1') + # Create local dc subcloud1 mock data in database + self.subcloud1 = tsm.TestSystemPeerManager.create_subcloud_with_pg_static( + self.ctx, + peer_group_id=self.peer_group.id, + rehome_data=json.dumps(FAKE_REHOME_DATA1), + name=FAKE_SUBCLOUD1_NAME, + region_name=FAKE_SUBCLOUD1_REGION_NAME, + deploy_status=consts.DEPLOY_STATE_REHOME_PENDING) + # Create local dc subcloud2 mock data in database + self.subcloud2 = tsm.TestSystemPeerManager.create_subcloud_with_pg_static( + self.ctx, + peer_group_id=self.peer_group.id, + rehome_data=json.dumps(FAKE_REHOME_DATA2), + name=FAKE_SUBCLOUD2_NAME, + region_name=FAKE_SUBCLOUD2_REGION_NAME, + deploy_status=consts.DEPLOY_STATE_REHOME_PENDING) + # Create local dc subcloud3 mock data in database + self.subcloud3 = tsm.TestSystemPeerManager.create_subcloud_with_pg_static( + self.ctx, + peer_group_id=self.peer_group.id, + rehome_data=json.dumps(FAKE_REHOME_DATA3), + name=FAKE_SUBCLOUD3_NAME, + region_name=FAKE_SUBCLOUD3_REGION_NAME, + deploy_status=consts.DEPLOY_STATE_REHOME_PENDING) + # Remote subclouds + self.peer_subcloud1 = copy.deepcopy(FAKE_SITE1_SUBCLOUD1_DATA) + self.peer_subcloud2 = copy.deepcopy(FAKE_SITE1_SUBCLOUD2_DATA) + # Remote peer group + self.remote_peer_group = FAKE_SITE1_PEER_GROUP_DATA + + # Initialize mock objects + self.mock_update_sync_status = \ + mock.patch.object(SystemPeerManager, 'update_sync_status').start() + self.mock_get_peer_dc_client = \ + mock.patch.object(SystemPeerManager, 'get_peer_dc_client').start() + mock_get_local_system = mock.patch.object(utils, 'get_local_system').start() + + # Cleanup mock objects after test finishes + self.addCleanup(self.mock_update_sync_status.stop()) + self.addCleanup(self.mock_get_peer_dc_client.stop()) + self.addCleanup(mock_get_local_system.stop()) + + def run_audit(self): + self.mock_dc_client = mock.MagicMock() + self.mock_subcloud_manager = mock.MagicMock() + self.mock_get_peer_dc_client.return_value = self.mock_dc_client() + self.mock_dc_client().get_subcloud_list_by_peer_group.return_value = [ + self.peer_subcloud1, self.peer_subcloud2] + self.mock_dc_client().get_system_peer.return_value = mock.MagicMock() + self.mock_dc_client().get_peer_group_association_with_peer_id_and_pg_id. \ + return_value = { + "sync-status": consts.ASSOCIATION_SYNC_STATUS_OUT_OF_SYNC} + pm = peer_group_audit_manager.PeerGroupAuditManager( + self.mock_subcloud_manager, FAKE_SITE1_PEER_GROUP_ID) + pm._set_local_subcloud_to_secondary = mock.MagicMock( + wraps=pm._set_local_subcloud_to_secondary) + pm.audit(self.peer, self.remote_peer_group, self.peer_group) + return pm + + def set_subcloud_rehome_failed(self, subcloud): + subcloud["deploy-status"] = consts.DEPLOY_STATE_REHOME_FAILED + subcloud["management-state"] = dccommon_consts.MANAGEMENT_UNMANAGED + + def test_audit_migration_complete_with_all_success(self): + pm = self.run_audit() + + # Verify all of three local subclouds are set as secondary, + # even including subcloud3, which is deleted afterward + self.assertEqual(3, pm._set_local_subcloud_to_secondary.call_count) + # Verify that the rehome_data of the local site subcloud2 is updated + # from data2 to data3, syncing from the remote site subcloud2 + self.assertEqual( + json.dumps(FAKE_REHOME_DATA3), + db_api.subcloud_get(self.ctx, self.subcloud2.id).rehome_data + ) + # Verify that the subcloud3 is deleted because it doesn't + # exist in the peer site + self.mock_subcloud_manager.delete_subcloud.assert_called_with( + pm.context, self.subcloud3.id) + # Verify that the system leader id is updated to the peer site uuid + self.assertEqual( + tpm.FAKE_SITE1_SYSTEM_UUID, + db_api.subcloud_peer_group_get(self.ctx, self.peer_group.id) + .system_leader_id + ) + # Verify that the migration status of the remote peer group is updated + # to None since the migration completed + self.mock_dc_client().update_subcloud_peer_group.assert_called_with( + self.remote_peer_group.get("peer_group_name"), migration_status=None) + # Verify that the PGA sync status is updated to in-sync + self.mock_update_sync_status.assert_called_with( + pm.context, self.peer, consts.ASSOCIATION_SYNC_STATUS_IN_SYNC, + self.peer_group, self.remote_peer_group) + + def test_audit_migration_complete_with_partial_failure(self): + # Remove local subcloud3 + db_api.subcloud_destroy(self.ctx, self.subcloud3.id) + # Remote subclouds: subcloud1 success and subcloud2 failed + self.set_subcloud_rehome_failed(self.peer_subcloud2) + + pm = self.run_audit() + + # Verify that only subcloud1, the successful one, is set as secondary + self.assertEqual(1, pm._set_local_subcloud_to_secondary.call_count) + self.mock_subcloud_manager.delete_subcloud.assert_not_called() + # Verify that the local subcloud2 is also set to rehome-failed + self.assertEqual( + consts.DEPLOY_STATE_REHOME_FAILED, + db_api.subcloud_get(self.ctx, self.subcloud2.id).deploy_status + ) + # Verify that the system leader id is updated to the peer site uuid + self.assertEqual( + tpm.FAKE_SITE1_SYSTEM_UUID, + db_api.subcloud_peer_group_get(self.ctx, self.peer_group.id) + .system_leader_id + ) + # Verify that the migration status of the remote peer group is updated + # to None since the migration completed + self.mock_dc_client().update_subcloud_peer_group.assert_called_with( + self.remote_peer_group.get('peer_group_name'), migration_status=None) + # Verify that the PGA sync status remains out-of-sync due to rehome failure + self.mock_update_sync_status.assert_not_called() + + def test_audit_migration_complete_with_all_failed(self): + # Remove local subcloud3 + db_api.subcloud_destroy(self.ctx, self.subcloud3.id) + # Remote subclouds: both failed + self.set_subcloud_rehome_failed(self.peer_subcloud1) + self.set_subcloud_rehome_failed(self.peer_subcloud2) + + pm = self.run_audit() + + # Verify that none of the subclouds are set as secondary, + # as all of them are rehome-failed. + pm._set_local_subcloud_to_secondary.assert_not_called() + self.mock_subcloud_manager.delete_subcloud.assert_not_called() + # Verify that the local subclouds are also set to rehome-failed + self.assertEqual( + consts.DEPLOY_STATE_REHOME_FAILED, + db_api.subcloud_get(self.ctx, self.subcloud1.id).deploy_status + ) + self.assertEqual( + consts.DEPLOY_STATE_REHOME_FAILED, + db_api.subcloud_get(self.ctx, self.subcloud2.id).deploy_status + ) + # Verify that the system leader id is updated to the peer site uuid + self.assertEqual( + tpm.FAKE_SITE1_SYSTEM_UUID, + db_api.subcloud_peer_group_get(self.ctx, self.peer_group.id) + .system_leader_id + ) + # Verify that the migration status of the remote peer group is updated + # to None since the migration completed + self.mock_dc_client().update_subcloud_peer_group.assert_called_with( + self.remote_peer_group.get('peer_group_name'), migration_status=None) + # Verify that the PGA sync status remains out-of-sync due to rehome failure + self.mock_update_sync_status.assert_not_called()