diff --git a/puppet-manifests/src/modules/platform/manifests/ceph.pp b/puppet-manifests/src/modules/platform/manifests/ceph.pp index 769bd554ee..9050457e58 100644 --- a/puppet-manifests/src/modules/platform/manifests/ceph.pp +++ b/puppet-manifests/src/modules/platform/manifests/ceph.pp @@ -1,5 +1,6 @@ class platform::ceph::params( $service_enabled = false, + $skip_osds_during_restore = false, $cluster_uuid = undef, $cluster_name = 'ceph', $authentication_type = 'none', @@ -375,31 +376,36 @@ class platform::ceph::osds( $journal_config = {}, ) inherits ::platform::ceph::params { - file { '/var/lib/ceph/osd': - ensure => 'directory', - path => '/var/lib/ceph/osd', - owner => 'root', - group => 'root', - mode => '0755', + # skip_osds_during_restore is set to true when the default primary + # ceph backend "ceph-store" has "restore" as its task and it is + # not an AIO system. + if ! $skip_osds_during_restore { + file { '/var/lib/ceph/osd': + ensure => 'directory', + path => '/var/lib/ceph/osd', + owner => 'root', + group => 'root', + mode => '0755', + } + + # Ensure ceph.conf is complete before configuring OSDs + Class['::ceph'] -> Platform_ceph_osd <| |> + + # Journal disks need to be prepared before the OSDs are configured + Platform_ceph_journal <| |> -> Platform_ceph_osd <| |> + # Crush locations in ceph.conf need to be set before the OSDs are configured + Osd_crush_location <| |> -> Platform_ceph_osd <| |> + + # default configuration for all ceph object resources + Ceph::Osd { + cluster => $cluster_name, + cluster_uuid => $cluster_uuid, + } + + create_resources('osd_crush_location', $osd_config) + create_resources('platform_ceph_osd', $osd_config) + create_resources('platform_ceph_journal', $journal_config) } - - # Ensure ceph.conf is complete before configuring OSDs - Class['::ceph'] -> Platform_ceph_osd <| |> - - # Journal disks need to be prepared before the OSDs are configured - Platform_ceph_journal <| |> -> Platform_ceph_osd <| |> - # Crush locations in ceph.conf need to be set before the OSDs are configured - Osd_crush_location <| |> -> Platform_ceph_osd <| |> - - # default configuration for all ceph object resources - Ceph::Osd { - cluster => $cluster_name, - cluster_uuid => $cluster_uuid, - } - - create_resources('osd_crush_location', $osd_config) - create_resources('platform_ceph_osd', $osd_config) - create_resources('platform_ceph_journal', $journal_config) } class platform::ceph::haproxy diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index 33be74beae..c494484144 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -2202,6 +2202,15 @@ class HostController(rest.RestController): LOG.info("Update host memory for (%s)" % ihost_obj['hostname']) pecan.request.rpcapi.update_host_memory(pecan.request.context, ihost_obj['uuid']) + + # The restore_in_progress flag file is needed to bypass vim and + # application re-apply when issuing the first unlock command during + # restore. Once the command is accepted by mtce, it can be removed. + if (os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG) and + patched_ihost.get('action') in + [constants.UNLOCK_ACTION, constants.FORCE_UNLOCK_ACTION]): + os.remove(tsc.RESTORE_IN_PROGRESS_FLAG) + return Host.convert_with_links(ihost_obj) def _vim_host_add(self, ihost): @@ -4448,8 +4457,7 @@ class HostController(rest.RestController): else: return False - @staticmethod - def _update_add_ceph_state(): + def _update_add_ceph_state(self): api = pecan.request.dbapi backend = StorageBackendConfig.get_configuring_backend(api) @@ -4556,18 +4564,63 @@ class HostController(rest.RestController): except Exception as e: raise wsme.exc.ClientSideError( _("Restore Ceph config failed: %s" % e)) - elif cutils.is_aio_system(pecan.request.dbapi): - # For AIO, ceph config restore is done in puppet when ceph + elif cutils.is_aio_simplex_system(pecan.request.dbapi): + # For AIO-SX, ceph config restore is done in puppet when ceph # manifest is applied on first unlock. The # initial_config_complete flag is set after first unlock. - # Once one controller is up, ceph cluster should be operational. + # Once one controller is up, ceph cluster should be fully + # operational. LOG.info("This is AIO-SX... Ceph backend task is RESTORE") if cutils.is_initial_config_complete(): LOG.info("This is AIO-SX... clear ceph backend task to None") api.storage_backend_update(backend.uuid, {'task': None}) + elif cutils.is_aio_duplex_system(pecan.request.dbapi): + # For AIO-DX, ceph config restore is done in puppet when ceph + # manifest is applied on first unlock. The 2nd osd is created + # in puppet when controller-1 is unlocked. Once both + # controllers are up, Ceph cluster should be fully operational. + LOG.info("This is AIO-DX... Ceph backend task is RESTORE") + c_hosts = api.ihost_get_by_personality( + constants.CONTROLLER + ) + + ctlr_enabled = 0 + for c_host in c_hosts: + if c_host.operational == constants.OPERATIONAL_ENABLED: + ctlr_enabled = ctlr_enabled + 1 + + if ctlr_enabled == len(c_hosts): + LOG.info("This is AIO-DX... clear ceph backend task to None") + api.storage_backend_update(backend.uuid, {'task': None}) else: - # TODO(Wei): Need more work to restore ceph for 2+2 - pass + # This is ceph restore for standard non-storage configuration. + # Ceph config restore is done via sysinv after both ceph + # monitors are available. + LOG.info("This is 2+2... Ceph backend task is RESTORE") + active_mons, required_mons, __ = \ + self._ceph.get_monitors_status(pecan.request.dbapi) + if required_mons > active_mons: + LOG.info("Not enough monitors yet to restore ceph config.") + else: + # By clearing ceph backend task to None osds will be + # created thru applying runtime manifests. + LOG.info("This is 2+2... clear ceph backend task to None") + api.storage_backend_update(backend.uuid, {'task': None}) + + # Apply runtime manifests to create OSDs on two controller + # nodes. + c_hosts = api.ihost_get_by_personality( + constants.CONTROLLER) + + runtime_manifests = True + for c_host in c_hosts: + istors = pecan.request.dbapi.istor_get_by_ihost(c_host.uuid) + for stor in istors: + pecan.request.rpcapi.update_ceph_osd_config( + pecan.request.context, + c_host, + stor.uuid, + runtime_manifests) @staticmethod def update_ihost_action(action, hostupdate): diff --git a/sysinv/sysinv/sysinv/sysinv/common/ceph.py b/sysinv/sysinv/sysinv/sysinv/common/ceph.py index e0831a04c9..3242c5f184 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/common/ceph.py @@ -735,9 +735,12 @@ def fix_crushmap(dbapi=None): # Check if a backup crushmap exists. If it does, that means # it is during restore. We need to restore the backup crushmap - # instead of generating it. For AIO system, the backup crushmap - # is stored in /etc/sysinv. For non-AIO system, it is stored in - # /opt/platform/sysinv. + # instead of generating it. For non-AIO system, it is stored in + # /opt/platform/sysinv which is a drbd fs. For AIO systems because + # when unlocking controller-0 for the first time, the crushmap is + # set thru ceph puppet when /opt/platform is not mounted yet, we + # store the crushmap in /etc/sysinv. + if cutils.is_aio_system(dbapi): backup = os.path.join(constants.CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO, constants.CEPH_CRUSH_MAP_BACKUP) diff --git a/sysinv/sysinv/sysinv/sysinv/common/utils.py b/sysinv/sysinv/sysinv/sysinv/common/utils.py index b2a963d87c..62164c7cd2 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/common/utils.py @@ -2037,6 +2037,11 @@ def is_inventory_config_complete(dbapi, forihostid): return False +def is_std_system(dbapi): + system = dbapi.isystem_get_one() + return system.system_type == constants.TIS_STD_BUILD + + def is_aio_system(dbapi): system = dbapi.isystem_get_one() return system.system_type == constants.TIS_AIO_BUILD diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py b/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py index 73c17fa977..2b7484ce91 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py @@ -83,6 +83,10 @@ class CephPuppet(openstack.OpenstackBasePuppet): ksuser = self._get_service_user_name(self.SERVICE_NAME_RGW) + skip_osds_during_restore = \ + (utils.is_std_system(self.dbapi) and + ceph_backend.task == constants.SB_TASK_RESTORE) + config = { 'ceph::ms_bind_ipv6': ms_bind_ipv6, @@ -112,6 +116,8 @@ class CephPuppet(openstack.OpenstackBasePuppet): self._get_service_user_domain_name(), 'platform::ceph::params::rgw_admin_project': self._get_service_tenant_name(), + 'platform::ceph::params::skip_osds_during_restore': + skip_osds_during_restore, } if utils.is_openstack_applied(self.dbapi):