Platform restore for AIO-DX and Standard no-storage configuration

This commit is to support platform restore for AIO-DX and Standard
no-storage configuration using restore_platform playbook:
 - For AIO-DX, the restored ceph crushmap is loaded through puppet
   when controller-0 is unlocked for the first time. OSDs are
   created on controller nodes during controller unlock.
 - For Standard no-storage configuration, the restored ceph crushmap
   is loaded through sysinv when ceph quorum is formed. OSDs are
   created on controller nodes by applying ceph osd runtime manifests.
 - The .restore_in_progress flag file is removed as part of first
   unlock of controller-0.

Change-Id: I65bfc67cf90e894d125eb6c860139b26d17b562e
Story: 2004761
Task: 35965
Signed-off-by: Wei Zhou <wei.zhou@windriver.com>
This commit is contained in:
Wei Zhou 2019-07-23 16:23:35 -04:00
parent 524c62c426
commit edb8206bf7
5 changed files with 107 additions and 34 deletions

View File

@ -1,5 +1,6 @@
class platform::ceph::params( class platform::ceph::params(
$service_enabled = false, $service_enabled = false,
$skip_osds_during_restore = false,
$cluster_uuid = undef, $cluster_uuid = undef,
$cluster_name = 'ceph', $cluster_name = 'ceph',
$authentication_type = 'none', $authentication_type = 'none',
@ -375,31 +376,36 @@ class platform::ceph::osds(
$journal_config = {}, $journal_config = {},
) inherits ::platform::ceph::params { ) inherits ::platform::ceph::params {
file { '/var/lib/ceph/osd': # skip_osds_during_restore is set to true when the default primary
ensure => 'directory', # ceph backend "ceph-store" has "restore" as its task and it is
path => '/var/lib/ceph/osd', # not an AIO system.
owner => 'root', if ! $skip_osds_during_restore {
group => 'root', file { '/var/lib/ceph/osd':
mode => '0755', ensure => 'directory',
path => '/var/lib/ceph/osd',
owner => 'root',
group => 'root',
mode => '0755',
}
# Ensure ceph.conf is complete before configuring OSDs
Class['::ceph'] -> Platform_ceph_osd <| |>
# Journal disks need to be prepared before the OSDs are configured
Platform_ceph_journal <| |> -> Platform_ceph_osd <| |>
# Crush locations in ceph.conf need to be set before the OSDs are configured
Osd_crush_location <| |> -> Platform_ceph_osd <| |>
# default configuration for all ceph object resources
Ceph::Osd {
cluster => $cluster_name,
cluster_uuid => $cluster_uuid,
}
create_resources('osd_crush_location', $osd_config)
create_resources('platform_ceph_osd', $osd_config)
create_resources('platform_ceph_journal', $journal_config)
} }
# Ensure ceph.conf is complete before configuring OSDs
Class['::ceph'] -> Platform_ceph_osd <| |>
# Journal disks need to be prepared before the OSDs are configured
Platform_ceph_journal <| |> -> Platform_ceph_osd <| |>
# Crush locations in ceph.conf need to be set before the OSDs are configured
Osd_crush_location <| |> -> Platform_ceph_osd <| |>
# default configuration for all ceph object resources
Ceph::Osd {
cluster => $cluster_name,
cluster_uuid => $cluster_uuid,
}
create_resources('osd_crush_location', $osd_config)
create_resources('platform_ceph_osd', $osd_config)
create_resources('platform_ceph_journal', $journal_config)
} }
class platform::ceph::haproxy class platform::ceph::haproxy

View File

@ -2202,6 +2202,15 @@ class HostController(rest.RestController):
LOG.info("Update host memory for (%s)" % ihost_obj['hostname']) LOG.info("Update host memory for (%s)" % ihost_obj['hostname'])
pecan.request.rpcapi.update_host_memory(pecan.request.context, pecan.request.rpcapi.update_host_memory(pecan.request.context,
ihost_obj['uuid']) ihost_obj['uuid'])
# The restore_in_progress flag file is needed to bypass vim and
# application re-apply when issuing the first unlock command during
# restore. Once the command is accepted by mtce, it can be removed.
if (os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG) and
patched_ihost.get('action') in
[constants.UNLOCK_ACTION, constants.FORCE_UNLOCK_ACTION]):
os.remove(tsc.RESTORE_IN_PROGRESS_FLAG)
return Host.convert_with_links(ihost_obj) return Host.convert_with_links(ihost_obj)
def _vim_host_add(self, ihost): def _vim_host_add(self, ihost):
@ -4448,8 +4457,7 @@ class HostController(rest.RestController):
else: else:
return False return False
@staticmethod def _update_add_ceph_state(self):
def _update_add_ceph_state():
api = pecan.request.dbapi api = pecan.request.dbapi
backend = StorageBackendConfig.get_configuring_backend(api) backend = StorageBackendConfig.get_configuring_backend(api)
@ -4556,18 +4564,63 @@ class HostController(rest.RestController):
except Exception as e: except Exception as e:
raise wsme.exc.ClientSideError( raise wsme.exc.ClientSideError(
_("Restore Ceph config failed: %s" % e)) _("Restore Ceph config failed: %s" % e))
elif cutils.is_aio_system(pecan.request.dbapi): elif cutils.is_aio_simplex_system(pecan.request.dbapi):
# For AIO, ceph config restore is done in puppet when ceph # For AIO-SX, ceph config restore is done in puppet when ceph
# manifest is applied on first unlock. The # manifest is applied on first unlock. The
# initial_config_complete flag is set after first unlock. # initial_config_complete flag is set after first unlock.
# Once one controller is up, ceph cluster should be operational. # Once one controller is up, ceph cluster should be fully
# operational.
LOG.info("This is AIO-SX... Ceph backend task is RESTORE") LOG.info("This is AIO-SX... Ceph backend task is RESTORE")
if cutils.is_initial_config_complete(): if cutils.is_initial_config_complete():
LOG.info("This is AIO-SX... clear ceph backend task to None") LOG.info("This is AIO-SX... clear ceph backend task to None")
api.storage_backend_update(backend.uuid, {'task': None}) api.storage_backend_update(backend.uuid, {'task': None})
elif cutils.is_aio_duplex_system(pecan.request.dbapi):
# For AIO-DX, ceph config restore is done in puppet when ceph
# manifest is applied on first unlock. The 2nd osd is created
# in puppet when controller-1 is unlocked. Once both
# controllers are up, Ceph cluster should be fully operational.
LOG.info("This is AIO-DX... Ceph backend task is RESTORE")
c_hosts = api.ihost_get_by_personality(
constants.CONTROLLER
)
ctlr_enabled = 0
for c_host in c_hosts:
if c_host.operational == constants.OPERATIONAL_ENABLED:
ctlr_enabled = ctlr_enabled + 1
if ctlr_enabled == len(c_hosts):
LOG.info("This is AIO-DX... clear ceph backend task to None")
api.storage_backend_update(backend.uuid, {'task': None})
else: else:
# TODO(Wei): Need more work to restore ceph for 2+2 # This is ceph restore for standard non-storage configuration.
pass # Ceph config restore is done via sysinv after both ceph
# monitors are available.
LOG.info("This is 2+2... Ceph backend task is RESTORE")
active_mons, required_mons, __ = \
self._ceph.get_monitors_status(pecan.request.dbapi)
if required_mons > active_mons:
LOG.info("Not enough monitors yet to restore ceph config.")
else:
# By clearing ceph backend task to None osds will be
# created thru applying runtime manifests.
LOG.info("This is 2+2... clear ceph backend task to None")
api.storage_backend_update(backend.uuid, {'task': None})
# Apply runtime manifests to create OSDs on two controller
# nodes.
c_hosts = api.ihost_get_by_personality(
constants.CONTROLLER)
runtime_manifests = True
for c_host in c_hosts:
istors = pecan.request.dbapi.istor_get_by_ihost(c_host.uuid)
for stor in istors:
pecan.request.rpcapi.update_ceph_osd_config(
pecan.request.context,
c_host,
stor.uuid,
runtime_manifests)
@staticmethod @staticmethod
def update_ihost_action(action, hostupdate): def update_ihost_action(action, hostupdate):

View File

@ -735,9 +735,12 @@ def fix_crushmap(dbapi=None):
# Check if a backup crushmap exists. If it does, that means # Check if a backup crushmap exists. If it does, that means
# it is during restore. We need to restore the backup crushmap # it is during restore. We need to restore the backup crushmap
# instead of generating it. For AIO system, the backup crushmap # instead of generating it. For non-AIO system, it is stored in
# is stored in /etc/sysinv. For non-AIO system, it is stored in # /opt/platform/sysinv which is a drbd fs. For AIO systems because
# /opt/platform/sysinv. # when unlocking controller-0 for the first time, the crushmap is
# set thru ceph puppet when /opt/platform is not mounted yet, we
# store the crushmap in /etc/sysinv.
if cutils.is_aio_system(dbapi): if cutils.is_aio_system(dbapi):
backup = os.path.join(constants.CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO, backup = os.path.join(constants.CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO,
constants.CEPH_CRUSH_MAP_BACKUP) constants.CEPH_CRUSH_MAP_BACKUP)

View File

@ -2037,6 +2037,11 @@ def is_inventory_config_complete(dbapi, forihostid):
return False return False
def is_std_system(dbapi):
system = dbapi.isystem_get_one()
return system.system_type == constants.TIS_STD_BUILD
def is_aio_system(dbapi): def is_aio_system(dbapi):
system = dbapi.isystem_get_one() system = dbapi.isystem_get_one()
return system.system_type == constants.TIS_AIO_BUILD return system.system_type == constants.TIS_AIO_BUILD

View File

@ -83,6 +83,10 @@ class CephPuppet(openstack.OpenstackBasePuppet):
ksuser = self._get_service_user_name(self.SERVICE_NAME_RGW) ksuser = self._get_service_user_name(self.SERVICE_NAME_RGW)
skip_osds_during_restore = \
(utils.is_std_system(self.dbapi) and
ceph_backend.task == constants.SB_TASK_RESTORE)
config = { config = {
'ceph::ms_bind_ipv6': ms_bind_ipv6, 'ceph::ms_bind_ipv6': ms_bind_ipv6,
@ -112,6 +116,8 @@ class CephPuppet(openstack.OpenstackBasePuppet):
self._get_service_user_domain_name(), self._get_service_user_domain_name(),
'platform::ceph::params::rgw_admin_project': 'platform::ceph::params::rgw_admin_project':
self._get_service_tenant_name(), self._get_service_tenant_name(),
'platform::ceph::params::skip_osds_during_restore':
skip_osds_during_restore,
} }
if utils.is_openstack_applied(self.dbapi): if utils.is_openstack_applied(self.dbapi):