diff --git a/api-ref/source/api-ref-dcmanager-v1.rst b/api-ref/source/api-ref-dcmanager-v1.rst index fd28c2124..b128c2804 100644 --- a/api-ref/source/api-ref-dcmanager-v1.rst +++ b/api-ref/source/api-ref-dcmanager-v1.rst @@ -343,6 +343,10 @@ The attributes of a subcloud which are modifiable: - bootstrap_address +Extra flags: + +- migrate + **Normal response codes** 200 @@ -371,6 +375,7 @@ serviceUnavailable (503) - bootstrap-address: bootstrap_address - sysadmin-password: sysadmin_password - bootstrap-values: bootstrap_values_for_rehome + - migrate: rehome_pending Request Example ---------------- diff --git a/api-ref/source/parameters.yaml b/api-ref/source/parameters.yaml index 4f17780d9..5e53ab231 100644 --- a/api-ref/source/parameters.yaml +++ b/api-ref/source/parameters.yaml @@ -470,6 +470,13 @@ rehome_data: in: body required: true type: string +rehome_pending: + description: | + A flag indicating if the subcloud will be rehomed away. Its deploy status + will be set to 'rehome-pending' + in: body + required: false + type: boolean release: description: | The subcloud software version. diff --git a/distributedcloud/dcmanager/api/controllers/v1/subclouds.py b/distributedcloud/dcmanager/api/controllers/v1/subclouds.py index 20b88afa8..b0b413450 100644 --- a/distributedcloud/dcmanager/api/controllers/v1/subclouds.py +++ b/distributedcloud/dcmanager/api/controllers/v1/subclouds.py @@ -54,6 +54,7 @@ from dcmanager.db import api as db_api from dcmanager.rpc import client as rpc_client from fm_api.constants import FM_ALARM_ID_UNSYNCHRONIZED_RESOURCE + CONF = cfg.CONF LOG = logging.getLogger(__name__) @@ -319,6 +320,30 @@ class SubcloudsController(object): pecan.abort(500, _("Unable to migrate subcloud %s, " "need sysadmin_password" % subcloud.name)) + def _validate_rehome_pending(self, subcloud, management_state): + unmanaged = dccommon_consts.MANAGEMENT_UNMANAGED + error_msg = None + + # Can only set the subcloud to rehome-pending + # if the deployment is done + if subcloud.deploy_status != consts.DEPLOY_STATE_DONE: + error_msg = ( + "The deploy status can only be updated to " + f"'{consts.DEPLOY_STATE_REHOME_PENDING}' if the current " + f"deploy status is '{consts.DEPLOY_STATE_DONE}'") + + # Can only set the subcloud to rehome-pending if the subcloud is + # being unmanaged or is already unmanaged + if management_state != unmanaged and ( + management_state or subcloud.management_state != unmanaged + ): + error_msg = ( + f"Subcloud must be {unmanaged} for its deploy status to " + f"be updated to '{consts.DEPLOY_STATE_REHOME_PENDING}'") + + if error_msg: + pecan.abort(400, error_msg) + @staticmethod def _append_static_err_content(subcloud): err_dict = consts.ERR_MSG_DICT @@ -668,6 +693,14 @@ class SubcloudsController(object): peer_group = payload.get('peer_group') bootstrap_address = payload.get('bootstrap_address') + # If the migrate flag is present we need to update the deploy status + # to consts.DEPLOY_STATE_REHOME_PENDING + deploy_status = None + if (payload.get('migrate') == 'true' and subcloud.deploy_status != + consts.DEPLOY_STATE_REHOME_PENDING): + self._validate_rehome_pending(subcloud, management_state) + deploy_status = consts.DEPLOY_STATE_REHOME_PENDING + # Syntax checking if management_state and \ management_state not in [dccommon_consts.MANAGEMENT_UNMANAGED, @@ -733,7 +766,8 @@ class SubcloudsController(object): force=force_flag, peer_group_id=peer_group_id, bootstrap_values=bootstrap_values, - bootstrap_address=bootstrap_address) + bootstrap_address=bootstrap_address, + deploy_status=deploy_status) return subcloud except RemoteError as e: pecan.abort(422, e.value) diff --git a/distributedcloud/dcmanager/common/consts.py b/distributedcloud/dcmanager/common/consts.py index 57af3dd43..291415bf3 100644 --- a/distributedcloud/dcmanager/common/consts.py +++ b/distributedcloud/dcmanager/common/consts.py @@ -230,9 +230,12 @@ DEPLOY_STATE_RESTORE_PREP_FAILED = 'restore-prep-failed' DEPLOY_STATE_RESTORING = 'restoring' DEPLOY_STATE_RESTORE_FAILED = 'restore-failed' DEPLOY_STATE_PRE_REHOME = 'pre-rehome' +# If any of the following rehoming or secondary statuses +# are modified, cert-mon code will need to be updated. DEPLOY_STATE_REHOMING = 'rehoming' DEPLOY_STATE_REHOME_FAILED = 'rehome-failed' DEPLOY_STATE_REHOME_PREP_FAILED = 'rehome-prep-failed' +DEPLOY_STATE_REHOME_PENDING = 'rehome-pending' DEPLOY_STATE_SECONDARY = 'secondary' DEPLOY_STATE_SECONDARY_FAILED = 'secondary-failed' DEPLOY_STATE_DONE = 'complete' diff --git a/distributedcloud/dcmanager/manager/subcloud_manager.py b/distributedcloud/dcmanager/manager/subcloud_manager.py index 2d3b52883..b542a1e31 100644 --- a/distributedcloud/dcmanager/manager/subcloud_manager.py +++ b/distributedcloud/dcmanager/manager/subcloud_manager.py @@ -2508,33 +2508,7 @@ class SubcloudManager(manager.Manager): except Exception as e: LOG.exception(e) - def delete_subcloud(self, context, subcloud_id): - """Delete subcloud and notify orchestrators. - - :param context: request context object. - :param subcloud_id: id of subcloud to delete - """ - LOG.info("Deleting subcloud %s." % subcloud_id) - - # Retrieve the subcloud details from the database - subcloud = db_api.subcloud_get(context, subcloud_id) - - # Semantic checking - if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED: - raise exceptions.SubcloudNotUnmanaged() - - if subcloud.availability_status == \ - dccommon_consts.AVAILABILITY_ONLINE: - raise exceptions.SubcloudNotOffline() - - # Ansible inventory filename for the specified subcloud - ansible_subcloud_inventory_file = self._get_ansible_filename( - subcloud.name, INVENTORY_FILE_POSTFIX) - - self._remove_subcloud_details(context, - subcloud, - ansible_subcloud_inventory_file) - + def _clear_subcloud_alarms(self, subcloud: Subcloud): # Clear any subcloud alarms. # Note that endpoint out-of-sync alarms should have been cleared when # the subcloud was unmanaged and the endpoint sync statuses were set to @@ -2565,6 +2539,35 @@ class SubcloudManager(manager.Manager): (subcloud.name, alarm_id)) LOG.exception(e) + def delete_subcloud(self, context, subcloud_id): + """Delete subcloud and notify orchestrators. + + :param context: request context object. + :param subcloud_id: id of subcloud to delete + """ + LOG.info("Deleting subcloud %s." % subcloud_id) + + # Retrieve the subcloud details from the database + subcloud = db_api.subcloud_get(context, subcloud_id) + + # Semantic checking + if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED: + raise exceptions.SubcloudNotUnmanaged() + + if subcloud.availability_status == \ + dccommon_consts.AVAILABILITY_ONLINE: + raise exceptions.SubcloudNotOffline() + + # Ansible inventory filename for the specified subcloud + ansible_subcloud_inventory_file = self._get_ansible_filename( + subcloud.name, INVENTORY_FILE_POSTFIX) + + self._remove_subcloud_details(context, + subcloud, + ansible_subcloud_inventory_file) + + self._clear_subcloud_alarms(subcloud) + def rename_subcloud(self, context, subcloud_id, @@ -2621,73 +2624,54 @@ class SubcloudManager(manager.Manager): return subcloud_name - def update_subcloud(self, - context, - subcloud_id, - management_state=None, - description=None, - location=None, - group_id=None, - data_install=None, - force=None, - deploy_status=None, - peer_group_id=None, - bootstrap_values=None, - bootstrap_address=None): - """Update subcloud and notify orchestrators. + def _validate_management_state_update(self, new_management_state: str, + new_deploy_status: str, + subcloud: Subcloud, force: bool): + if new_management_state == dccommon_consts.MANAGEMENT_UNMANAGED: + if subcloud.management_state == dccommon_consts.MANAGEMENT_UNMANAGED: + msg = f"Subcloud {subcloud.name} already unmanaged" + LOG.warning(msg) + raise exceptions.BadRequest(resource="subcloud", msg=msg) - :param context: request context object - :param subcloud_id: id of subcloud to update - :param management_state: new management state - :param description: new description - :param location: new location - :param group_id: new subcloud group id - :param data_install: subcloud install values - :param force: force flag - :param deploy_status: update to expected deploy status - :param peer_group_id: id of peer group - :param bootstrap_values: bootstrap_values yaml content - :param bootstrap_address: oam IP for rehome - """ + elif new_management_state == dccommon_consts.MANAGEMENT_MANAGED: + if subcloud.management_state == dccommon_consts.MANAGEMENT_MANAGED: + msg = f"Subcloud {subcloud.name} already managed" + LOG.warning(msg) + raise exceptions.BadRequest(resource="subcloud", msg=msg) - LOG.info("Updating subcloud %s." % subcloud_id) + if force: + # No need for further validation + return - # Get the subcloud details from the database - subcloud = db_api.subcloud_get(context, subcloud_id) - original_management_state = subcloud.management_state + deploy_status_complete = ( + subcloud.deploy_status == consts.DEPLOY_STATE_DONE + or prestage.is_deploy_status_prestage(subcloud.deploy_status) + ) + allowed_deploy_transition = ( + subcloud.deploy_status == consts.DEPLOY_STATE_REHOME_PENDING + and new_deploy_status == consts.DEPLOY_STATE_DONE + ) - # Semantic checking - if management_state: - if management_state == dccommon_consts.MANAGEMENT_UNMANAGED: - if subcloud.management_state == dccommon_consts.MANAGEMENT_UNMANAGED: - LOG.warning("Subcloud %s already unmanaged" % subcloud_id) - raise exceptions.BadRequest( - resource='subcloud', - msg='Subcloud is already unmanaged') - elif management_state == dccommon_consts.MANAGEMENT_MANAGED: - if subcloud.management_state == dccommon_consts.MANAGEMENT_MANAGED: - LOG.warning("Subcloud %s already managed" % subcloud_id) - raise exceptions.BadRequest( - resource='subcloud', - msg='Subcloud is already managed') - elif not force: - if (subcloud.deploy_status != consts.DEPLOY_STATE_DONE and - not prestage.is_deploy_status_prestage( - subcloud.deploy_status)): - LOG.warning("Subcloud %s can be managed only when" - "deploy_status is complete" % subcloud_id) - raise exceptions.BadRequest( - resource='subcloud', - msg='Subcloud can be managed only if deploy status is complete') - if subcloud.availability_status != \ - dccommon_consts.AVAILABILITY_ONLINE: - LOG.warning("Subcloud %s is not online" % subcloud_id) - raise exceptions.SubcloudNotOnline() - else: - LOG.error("Invalid management_state %s" % management_state) - raise exceptions.InternalError() + if not deploy_status_complete and not allowed_deploy_transition: + msg = (f"Unable to manage {subcloud.name}: its deploy_status " + f"must be either '{consts.DEPLOY_STATE_DONE}' or " + f"'{consts.DEPLOY_STATE_REHOME_PENDING}'") + LOG.warning(msg) + raise exceptions.BadRequest(resource="subcloud", msg=msg) - # update bootstrap values into rehome_data + if (subcloud.availability_status != + dccommon_consts.AVAILABILITY_ONLINE): + LOG.warning(f"Subcloud {subcloud.name} is not online") + raise exceptions.SubcloudNotOnline() + + # The management state can be 'unmanaged', 'managed' or None (which + # means that it's not being changed), any other value is invalid + elif new_management_state is not None: + LOG.error(f"Invalid management_state {new_management_state}") + raise exceptions.InvalidInputError() + + def _prepare_rehome_data(self, subcloud: Subcloud, + bootstrap_values, bootstrap_address): rehome_data_dict = None # load the existing data if it exists if subcloud.rehome_data: @@ -2721,44 +2705,6 @@ class SubcloudManager(manager.Manager): if _bootstrap_address: rehome_data_dict['saved_payload']['bootstrap-address'] = _bootstrap_address - # update deploy status, ONLY apply for unmanaged subcloud - new_deploy_status = None - if deploy_status is not None: - if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED: - raise exceptions.BadRequest( - resource='subcloud', - msg='deploy_status can only be updated on unmanaged subcloud') - new_deploy_status = deploy_status - # set all endpoint statuses to unknown - # no endpoint will be audited for secondary - # subclouds - self.state_rpc_client.update_subcloud_endpoint_status_sync( - context, - subcloud_name=subcloud.name, - endpoint_type=None, - sync_status=dccommon_consts.SYNC_STATUS_UNKNOWN) - - # clear existing fault alarm of secondary subcloud - for alarm_id, entity_instance_id in ( - (fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE, - "subcloud=%s" % subcloud.name), - (fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, - "subcloud=%s.resource=%s" % - (subcloud.name, dccommon_consts.ENDPOINT_TYPE_DC_CERT)), - (fm_const.FM_ALARM_ID_DC_SUBCLOUD_BACKUP_FAILED, - "subcloud=%s" % subcloud.name)): - try: - fault = self.fm_api.get_fault(alarm_id, - entity_instance_id) - if fault: - self.fm_api.clear_fault(alarm_id, - entity_instance_id) - except Exception as e: - LOG.info( - "Failed to clear fault for subcloud %s, alarm_id=%s" % - (subcloud.name, alarm_id)) - LOG.exception(e) - # update bootstrap_address if bootstrap_address: if rehome_data_dict is None: @@ -2768,10 +2714,79 @@ class SubcloudManager(manager.Manager): 'need to import bootstrap_values first') rehome_data_dict['saved_payload']['bootstrap-address'] = bootstrap_address + rehome_data = None if rehome_data_dict: rehome_data = json.dumps(rehome_data_dict) - else: - rehome_data = None + + return rehome_data + + def update_subcloud(self, + context, + subcloud_id, + management_state=None, + description=None, + location=None, + group_id=None, + data_install=None, + force=None, + deploy_status=None, + peer_group_id=None, + bootstrap_values=None, + bootstrap_address=None): + """Update subcloud and notify orchestrators. + + :param context: request context object + :param subcloud_id: id of subcloud to update + :param management_state: new management state + :param description: new description + :param location: new location + :param group_id: new subcloud group id + :param data_install: subcloud install values + :param force: force flag + :param deploy_status: update to expected deploy status + :param peer_group_id: id of peer group + :param bootstrap_values: bootstrap_values yaml content + :param bootstrap_address: oam IP for rehome + """ + + LOG.info("Updating subcloud %s." % subcloud_id) + + # Get the subcloud details from the database + subcloud: Subcloud = db_api.subcloud_get(context, subcloud_id) + original_management_state = subcloud.management_state + original_deploy_status = subcloud.deploy_status + + # When trying to manage a 'rehome-pending' subcloud, revert its deploy + # status back to 'complete' if its not specified + if (management_state == dccommon_consts.MANAGEMENT_MANAGED and + subcloud.deploy_status == consts.DEPLOY_STATE_REHOME_PENDING and + not deploy_status): + deploy_status = consts.DEPLOY_STATE_DONE + + # management_state semantic checking + self._validate_management_state_update(management_state, deploy_status, + subcloud, force) + + # Update bootstrap values into rehome_data + rehome_data = self._prepare_rehome_data(subcloud, bootstrap_values, + bootstrap_address) + if deploy_status: + msg = None + # Only update deploy_status if subcloud is or will be unmanaged + if dccommon_consts.MANAGEMENT_UNMANAGED not in ( + management_state, subcloud.management_state): + msg = ("Unable to update deploy_status of subcloud " + f"{subcloud.name} to {deploy_status}: subcloud " + "must also be unmanaged") + # Only allow managing if the deploy status is also set to 'complete' + if (management_state == dccommon_consts.MANAGEMENT_MANAGED and + deploy_status != consts.DEPLOY_STATE_DONE): + msg = (f"Unable to manage {subcloud.name} while also updating " + f"its deploy_status to {deploy_status}: not allowed") + if msg: + LOG.warning(msg) + raise exceptions.BadRequest(resource='subcloud', msg=msg) + subcloud = db_api.subcloud_update( context, subcloud_id, @@ -2780,7 +2795,7 @@ class SubcloudManager(manager.Manager): location=location, group_id=group_id, data_install=data_install, - deploy_status=new_deploy_status, + deploy_status=deploy_status, peer_group_id=peer_group_id, rehome_data=rehome_data ) @@ -2806,11 +2821,16 @@ class SubcloudManager(manager.Manager): 'state change, resume to original state, subcloud: %s' % subcloud.name) management_state = original_management_state + # Also revert the deploy_status otherwise we could have a + # managed subcloud with the 'secondary' or other invalid deploy + # status/management state combination. + deploy_status = original_deploy_status subcloud = \ db_api.subcloud_update(context, subcloud_id, management_state=management_state, description=description, - location=location) + location=location, + deploy_status=deploy_status) if management_state == dccommon_consts.MANAGEMENT_UNMANAGED: # set all endpoint statuses to unknown, except the dc-cert @@ -2830,6 +2850,20 @@ class SubcloudManager(manager.Manager): dc_notification = dcmanager_rpc_client.DCManagerNotifications() dc_notification.subcloud_managed(context, subcloud.region_name) + # Set all endpoint statuses to unknown, no endpoint + # will be audited for secondary or rehome-pending subclouds + if subcloud.deploy_status in (consts.DEPLOY_STATE_SECONDARY, + consts.DEPLOY_STATE_REHOME_PENDING): + self.state_rpc_client.update_subcloud_endpoint_status_sync( + context, + subcloud_name=subcloud.name, + endpoint_type=None, + sync_status=dccommon_consts.SYNC_STATUS_UNKNOWN) + + # Clear existing fault alarm of secondary subcloud + if subcloud.deploy_status == consts.DEPLOY_STATE_SECONDARY: + self._clear_subcloud_alarms(subcloud) + return db_api.subcloud_db_model_to_dict(subcloud) def update_subcloud_with_network_reconfig(self, context, subcloud_id, payload): diff --git a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py index 6397cddf2..aa90cb95a 100644 --- a/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py +++ b/distributedcloud/dcmanager/tests/unit/api/v1/controllers/test_subclouds.py @@ -1281,7 +1281,8 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest): force=None, peer_group_id=None, bootstrap_values=None, - bootstrap_address=None) + bootstrap_address=None, + deploy_status=None) self.assertEqual(response.status_int, 200) @mock.patch.object(psd_common, 'get_network_address_pool') @@ -1355,7 +1356,8 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest): force=None, peer_group_id=None, bootstrap_values=None, - bootstrap_address=None) + bootstrap_address=None, + deploy_status=None) self.assertEqual(response.status_int, 200) @mock.patch.object(subclouds.SubcloudsController, '_get_patch_data') @@ -1395,7 +1397,8 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest): force=None, peer_group_id=None, bootstrap_values=None, - bootstrap_address=None) + bootstrap_address=None, + deploy_status=None) self.assertEqual(response.status_int, 200) @mock.patch.object(subclouds.SubcloudsController, '_get_patch_data') @@ -1461,7 +1464,8 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest): force=True, peer_group_id=None, bootstrap_values=None, - bootstrap_address=None) + bootstrap_address=None, + deploy_status=None) self.assertEqual(response.status_int, 200) @mock.patch.object(subclouds.SubcloudsController, '_get_updatestatus_payload')