Platform restore for AIO-DX and Standard no-storage configuration
This commit is to support platform restore for AIO-DX and Standard no-storage configuration using restore_platform playbook: - For AIO-DX, the restored ceph crushmap is loaded through puppet when controller-0 is unlocked for the first time. OSDs are created on controller nodes during controller unlock. - For Standard no-storage configuration, the restored ceph crushmap is loaded through sysinv when ceph quorum is formed. OSDs are created on controller nodes by applying ceph osd runtime manifests. - The .restore_in_progress flag file is removed as part of first unlock of controller-0. Change-Id: I65bfc67cf90e894d125eb6c860139b26d17b562e Story: 2004761 Task: 35965 Signed-off-by: Wei Zhou <wei.zhou@windriver.com>
This commit is contained in:
parent
524c62c426
commit
edb8206bf7
|
@ -1,5 +1,6 @@
|
||||||
class platform::ceph::params(
|
class platform::ceph::params(
|
||||||
$service_enabled = false,
|
$service_enabled = false,
|
||||||
|
$skip_osds_during_restore = false,
|
||||||
$cluster_uuid = undef,
|
$cluster_uuid = undef,
|
||||||
$cluster_name = 'ceph',
|
$cluster_name = 'ceph',
|
||||||
$authentication_type = 'none',
|
$authentication_type = 'none',
|
||||||
|
@ -375,31 +376,36 @@ class platform::ceph::osds(
|
||||||
$journal_config = {},
|
$journal_config = {},
|
||||||
) inherits ::platform::ceph::params {
|
) inherits ::platform::ceph::params {
|
||||||
|
|
||||||
file { '/var/lib/ceph/osd':
|
# skip_osds_during_restore is set to true when the default primary
|
||||||
ensure => 'directory',
|
# ceph backend "ceph-store" has "restore" as its task and it is
|
||||||
path => '/var/lib/ceph/osd',
|
# not an AIO system.
|
||||||
owner => 'root',
|
if ! $skip_osds_during_restore {
|
||||||
group => 'root',
|
file { '/var/lib/ceph/osd':
|
||||||
mode => '0755',
|
ensure => 'directory',
|
||||||
|
path => '/var/lib/ceph/osd',
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0755',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure ceph.conf is complete before configuring OSDs
|
||||||
|
Class['::ceph'] -> Platform_ceph_osd <| |>
|
||||||
|
|
||||||
|
# Journal disks need to be prepared before the OSDs are configured
|
||||||
|
Platform_ceph_journal <| |> -> Platform_ceph_osd <| |>
|
||||||
|
# Crush locations in ceph.conf need to be set before the OSDs are configured
|
||||||
|
Osd_crush_location <| |> -> Platform_ceph_osd <| |>
|
||||||
|
|
||||||
|
# default configuration for all ceph object resources
|
||||||
|
Ceph::Osd {
|
||||||
|
cluster => $cluster_name,
|
||||||
|
cluster_uuid => $cluster_uuid,
|
||||||
|
}
|
||||||
|
|
||||||
|
create_resources('osd_crush_location', $osd_config)
|
||||||
|
create_resources('platform_ceph_osd', $osd_config)
|
||||||
|
create_resources('platform_ceph_journal', $journal_config)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Ensure ceph.conf is complete before configuring OSDs
|
|
||||||
Class['::ceph'] -> Platform_ceph_osd <| |>
|
|
||||||
|
|
||||||
# Journal disks need to be prepared before the OSDs are configured
|
|
||||||
Platform_ceph_journal <| |> -> Platform_ceph_osd <| |>
|
|
||||||
# Crush locations in ceph.conf need to be set before the OSDs are configured
|
|
||||||
Osd_crush_location <| |> -> Platform_ceph_osd <| |>
|
|
||||||
|
|
||||||
# default configuration for all ceph object resources
|
|
||||||
Ceph::Osd {
|
|
||||||
cluster => $cluster_name,
|
|
||||||
cluster_uuid => $cluster_uuid,
|
|
||||||
}
|
|
||||||
|
|
||||||
create_resources('osd_crush_location', $osd_config)
|
|
||||||
create_resources('platform_ceph_osd', $osd_config)
|
|
||||||
create_resources('platform_ceph_journal', $journal_config)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class platform::ceph::haproxy
|
class platform::ceph::haproxy
|
||||||
|
|
|
@ -2202,6 +2202,15 @@ class HostController(rest.RestController):
|
||||||
LOG.info("Update host memory for (%s)" % ihost_obj['hostname'])
|
LOG.info("Update host memory for (%s)" % ihost_obj['hostname'])
|
||||||
pecan.request.rpcapi.update_host_memory(pecan.request.context,
|
pecan.request.rpcapi.update_host_memory(pecan.request.context,
|
||||||
ihost_obj['uuid'])
|
ihost_obj['uuid'])
|
||||||
|
|
||||||
|
# The restore_in_progress flag file is needed to bypass vim and
|
||||||
|
# application re-apply when issuing the first unlock command during
|
||||||
|
# restore. Once the command is accepted by mtce, it can be removed.
|
||||||
|
if (os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG) and
|
||||||
|
patched_ihost.get('action') in
|
||||||
|
[constants.UNLOCK_ACTION, constants.FORCE_UNLOCK_ACTION]):
|
||||||
|
os.remove(tsc.RESTORE_IN_PROGRESS_FLAG)
|
||||||
|
|
||||||
return Host.convert_with_links(ihost_obj)
|
return Host.convert_with_links(ihost_obj)
|
||||||
|
|
||||||
def _vim_host_add(self, ihost):
|
def _vim_host_add(self, ihost):
|
||||||
|
@ -4448,8 +4457,7 @@ class HostController(rest.RestController):
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@staticmethod
|
def _update_add_ceph_state(self):
|
||||||
def _update_add_ceph_state():
|
|
||||||
api = pecan.request.dbapi
|
api = pecan.request.dbapi
|
||||||
|
|
||||||
backend = StorageBackendConfig.get_configuring_backend(api)
|
backend = StorageBackendConfig.get_configuring_backend(api)
|
||||||
|
@ -4556,18 +4564,63 @@ class HostController(rest.RestController):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise wsme.exc.ClientSideError(
|
raise wsme.exc.ClientSideError(
|
||||||
_("Restore Ceph config failed: %s" % e))
|
_("Restore Ceph config failed: %s" % e))
|
||||||
elif cutils.is_aio_system(pecan.request.dbapi):
|
elif cutils.is_aio_simplex_system(pecan.request.dbapi):
|
||||||
# For AIO, ceph config restore is done in puppet when ceph
|
# For AIO-SX, ceph config restore is done in puppet when ceph
|
||||||
# manifest is applied on first unlock. The
|
# manifest is applied on first unlock. The
|
||||||
# initial_config_complete flag is set after first unlock.
|
# initial_config_complete flag is set after first unlock.
|
||||||
# Once one controller is up, ceph cluster should be operational.
|
# Once one controller is up, ceph cluster should be fully
|
||||||
|
# operational.
|
||||||
LOG.info("This is AIO-SX... Ceph backend task is RESTORE")
|
LOG.info("This is AIO-SX... Ceph backend task is RESTORE")
|
||||||
if cutils.is_initial_config_complete():
|
if cutils.is_initial_config_complete():
|
||||||
LOG.info("This is AIO-SX... clear ceph backend task to None")
|
LOG.info("This is AIO-SX... clear ceph backend task to None")
|
||||||
api.storage_backend_update(backend.uuid, {'task': None})
|
api.storage_backend_update(backend.uuid, {'task': None})
|
||||||
|
elif cutils.is_aio_duplex_system(pecan.request.dbapi):
|
||||||
|
# For AIO-DX, ceph config restore is done in puppet when ceph
|
||||||
|
# manifest is applied on first unlock. The 2nd osd is created
|
||||||
|
# in puppet when controller-1 is unlocked. Once both
|
||||||
|
# controllers are up, Ceph cluster should be fully operational.
|
||||||
|
LOG.info("This is AIO-DX... Ceph backend task is RESTORE")
|
||||||
|
c_hosts = api.ihost_get_by_personality(
|
||||||
|
constants.CONTROLLER
|
||||||
|
)
|
||||||
|
|
||||||
|
ctlr_enabled = 0
|
||||||
|
for c_host in c_hosts:
|
||||||
|
if c_host.operational == constants.OPERATIONAL_ENABLED:
|
||||||
|
ctlr_enabled = ctlr_enabled + 1
|
||||||
|
|
||||||
|
if ctlr_enabled == len(c_hosts):
|
||||||
|
LOG.info("This is AIO-DX... clear ceph backend task to None")
|
||||||
|
api.storage_backend_update(backend.uuid, {'task': None})
|
||||||
else:
|
else:
|
||||||
# TODO(Wei): Need more work to restore ceph for 2+2
|
# This is ceph restore for standard non-storage configuration.
|
||||||
pass
|
# Ceph config restore is done via sysinv after both ceph
|
||||||
|
# monitors are available.
|
||||||
|
LOG.info("This is 2+2... Ceph backend task is RESTORE")
|
||||||
|
active_mons, required_mons, __ = \
|
||||||
|
self._ceph.get_monitors_status(pecan.request.dbapi)
|
||||||
|
if required_mons > active_mons:
|
||||||
|
LOG.info("Not enough monitors yet to restore ceph config.")
|
||||||
|
else:
|
||||||
|
# By clearing ceph backend task to None osds will be
|
||||||
|
# created thru applying runtime manifests.
|
||||||
|
LOG.info("This is 2+2... clear ceph backend task to None")
|
||||||
|
api.storage_backend_update(backend.uuid, {'task': None})
|
||||||
|
|
||||||
|
# Apply runtime manifests to create OSDs on two controller
|
||||||
|
# nodes.
|
||||||
|
c_hosts = api.ihost_get_by_personality(
|
||||||
|
constants.CONTROLLER)
|
||||||
|
|
||||||
|
runtime_manifests = True
|
||||||
|
for c_host in c_hosts:
|
||||||
|
istors = pecan.request.dbapi.istor_get_by_ihost(c_host.uuid)
|
||||||
|
for stor in istors:
|
||||||
|
pecan.request.rpcapi.update_ceph_osd_config(
|
||||||
|
pecan.request.context,
|
||||||
|
c_host,
|
||||||
|
stor.uuid,
|
||||||
|
runtime_manifests)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def update_ihost_action(action, hostupdate):
|
def update_ihost_action(action, hostupdate):
|
||||||
|
|
|
@ -735,9 +735,12 @@ def fix_crushmap(dbapi=None):
|
||||||
|
|
||||||
# Check if a backup crushmap exists. If it does, that means
|
# Check if a backup crushmap exists. If it does, that means
|
||||||
# it is during restore. We need to restore the backup crushmap
|
# it is during restore. We need to restore the backup crushmap
|
||||||
# instead of generating it. For AIO system, the backup crushmap
|
# instead of generating it. For non-AIO system, it is stored in
|
||||||
# is stored in /etc/sysinv. For non-AIO system, it is stored in
|
# /opt/platform/sysinv which is a drbd fs. For AIO systems because
|
||||||
# /opt/platform/sysinv.
|
# when unlocking controller-0 for the first time, the crushmap is
|
||||||
|
# set thru ceph puppet when /opt/platform is not mounted yet, we
|
||||||
|
# store the crushmap in /etc/sysinv.
|
||||||
|
|
||||||
if cutils.is_aio_system(dbapi):
|
if cutils.is_aio_system(dbapi):
|
||||||
backup = os.path.join(constants.CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO,
|
backup = os.path.join(constants.CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO,
|
||||||
constants.CEPH_CRUSH_MAP_BACKUP)
|
constants.CEPH_CRUSH_MAP_BACKUP)
|
||||||
|
|
|
@ -2037,6 +2037,11 @@ def is_inventory_config_complete(dbapi, forihostid):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_std_system(dbapi):
|
||||||
|
system = dbapi.isystem_get_one()
|
||||||
|
return system.system_type == constants.TIS_STD_BUILD
|
||||||
|
|
||||||
|
|
||||||
def is_aio_system(dbapi):
|
def is_aio_system(dbapi):
|
||||||
system = dbapi.isystem_get_one()
|
system = dbapi.isystem_get_one()
|
||||||
return system.system_type == constants.TIS_AIO_BUILD
|
return system.system_type == constants.TIS_AIO_BUILD
|
||||||
|
|
|
@ -83,6 +83,10 @@ class CephPuppet(openstack.OpenstackBasePuppet):
|
||||||
|
|
||||||
ksuser = self._get_service_user_name(self.SERVICE_NAME_RGW)
|
ksuser = self._get_service_user_name(self.SERVICE_NAME_RGW)
|
||||||
|
|
||||||
|
skip_osds_during_restore = \
|
||||||
|
(utils.is_std_system(self.dbapi) and
|
||||||
|
ceph_backend.task == constants.SB_TASK_RESTORE)
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
'ceph::ms_bind_ipv6': ms_bind_ipv6,
|
'ceph::ms_bind_ipv6': ms_bind_ipv6,
|
||||||
|
|
||||||
|
@ -112,6 +116,8 @@ class CephPuppet(openstack.OpenstackBasePuppet):
|
||||||
self._get_service_user_domain_name(),
|
self._get_service_user_domain_name(),
|
||||||
'platform::ceph::params::rgw_admin_project':
|
'platform::ceph::params::rgw_admin_project':
|
||||||
self._get_service_tenant_name(),
|
self._get_service_tenant_name(),
|
||||||
|
'platform::ceph::params::skip_osds_during_restore':
|
||||||
|
skip_osds_during_restore,
|
||||||
}
|
}
|
||||||
|
|
||||||
if utils.is_openstack_applied(self.dbapi):
|
if utils.is_openstack_applied(self.dbapi):
|
||||||
|
|
Loading…
Reference in New Issue