Merge "Restore containerized platform using Ansible restore_platform playbook"
This commit is contained in:
commit
d1939669b9
|
@ -250,7 +250,12 @@ class platform::ceph::monitor
|
||||||
$crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
|
$crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
|
||||||
}
|
}
|
||||||
$crushmap_bin = '/etc/sysinv/crushmap.bin'
|
$crushmap_bin = '/etc/sysinv/crushmap.bin'
|
||||||
|
$crushmap_bin_backup = '/etc/sysinv/crushmap.bin.backup'
|
||||||
Ceph::Mon <| |>
|
Ceph::Mon <| |>
|
||||||
|
-> exec { 'Copy crushmap if backup exists':
|
||||||
|
command => "mv -f ${crushmap_bin_backup} ${crushmap_bin}",
|
||||||
|
onlyif => "test -f ${crushmap_bin_backup}",
|
||||||
|
}
|
||||||
-> exec { 'Compile crushmap':
|
-> exec { 'Compile crushmap':
|
||||||
command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
|
command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
|
||||||
onlyif => "test ! -f ${crushmap_bin}",
|
onlyif => "test ! -f ${crushmap_bin}",
|
||||||
|
|
|
@ -354,8 +354,7 @@ class platform::config::controller::post
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ! $::platform::params::controller_upgrade and
|
if ! $::platform::params::controller_upgrade {
|
||||||
! str2bool($::is_restore_in_progress) {
|
|
||||||
file { '/etc/platform/.initial_config_complete':
|
file { '/etc/platform/.initial_config_complete':
|
||||||
ensure => present,
|
ensure => present,
|
||||||
}
|
}
|
||||||
|
|
|
@ -2134,8 +2134,10 @@ class HostController(rest.RestController):
|
||||||
ihost_obj['uuid'],
|
ihost_obj['uuid'],
|
||||||
ibm_msg_dict)
|
ibm_msg_dict)
|
||||||
|
|
||||||
# Trigger a system app reapply if the host has been unlocked
|
# Trigger a system app reapply if the host has been unlocked.
|
||||||
if (patched_ihost.get('action') in
|
# Only trigger the reapply if it is not during restore.
|
||||||
|
if (not os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG) and
|
||||||
|
patched_ihost.get('action') in
|
||||||
[constants.UNLOCK_ACTION, constants.FORCE_UNLOCK_ACTION]):
|
[constants.UNLOCK_ACTION, constants.FORCE_UNLOCK_ACTION]):
|
||||||
self._reapply_system_app()
|
self._reapply_system_app()
|
||||||
|
|
||||||
|
@ -4681,8 +4683,8 @@ class HostController(rest.RestController):
|
||||||
)
|
)
|
||||||
|
|
||||||
if ihosts:
|
if ihosts:
|
||||||
# For storage setup, no change is required.
|
# TODO (Wei) Need to revisit storage setup.
|
||||||
LOG.info("This is a storage setup. No change.")
|
LOG.info("This is a storage setup. Will need to revisit.")
|
||||||
storage_enabled = 0
|
storage_enabled = 0
|
||||||
for ihost in ihosts:
|
for ihost in ihosts:
|
||||||
if ihost.operational == constants.OPERATIONAL_ENABLED:
|
if ihost.operational == constants.OPERATIONAL_ENABLED:
|
||||||
|
@ -4699,18 +4701,16 @@ class HostController(rest.RestController):
|
||||||
raise wsme.exc.ClientSideError(
|
raise wsme.exc.ClientSideError(
|
||||||
_("Restore Ceph config failed: %s" % e))
|
_("Restore Ceph config failed: %s" % e))
|
||||||
elif cutils.is_aio_system(pecan.request.dbapi):
|
elif cutils.is_aio_system(pecan.request.dbapi):
|
||||||
# TODO(wz): Need more work to restore ceph for AIO
|
# For AIO, ceph config restore is done in puppet when ceph
|
||||||
LOG.info("For an AIO system, Restore crushmap...")
|
# manifest is applied on first unlock. The
|
||||||
try:
|
# initial_config_complete flag is set after first unlock.
|
||||||
if not pecan.request.rpcapi.restore_ceph_config(
|
# Once one controller is up, ceph cluster should be operational.
|
||||||
pecan.request.context, after_storage_enabled=True):
|
LOG.info("This is AIO-SX... Ceph backend task is RESTORE")
|
||||||
raise Exception("restore_ceph_config returned false")
|
if cutils.is_initial_config_complete():
|
||||||
except Exception as e:
|
LOG.info("This is AIO-SX... clear ceph backend task to None")
|
||||||
raise wsme.exc.ClientSideError(
|
api.storage_backend_update(backend.uuid, {'task': None})
|
||||||
_("Restore Ceph config failed: %s" % e))
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# TODO(wz): Need more work to restore ceph for 2+2
|
# TODO(Wei): Need more work to restore ceph for 2+2
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -5057,11 +5057,12 @@ class HostController(rest.RestController):
|
||||||
self.check_unlock_patching(hostupdate, force_unlock)
|
self.check_unlock_patching(hostupdate, force_unlock)
|
||||||
|
|
||||||
hostupdate.configure_required = True
|
hostupdate.configure_required = True
|
||||||
if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_FLAG) and
|
if ((os.path.isfile(constants.ANSIBLE_BOOTSTRAP_FLAG) or
|
||||||
|
os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG)) and
|
||||||
hostupdate.ihost_patch['hostname'] ==
|
hostupdate.ihost_patch['hostname'] ==
|
||||||
constants.CONTROLLER_0_HOSTNAME):
|
constants.CONTROLLER_0_HOSTNAME):
|
||||||
# For the first unlock of the initial controller bootstrapped by
|
# For the first unlock of the initial controller bootstrapped by
|
||||||
# Ansible, don't notify vim.
|
# Ansible or the first unlock during restore, don't notify vim.
|
||||||
hostupdate.notify_vim = False
|
hostupdate.notify_vim = False
|
||||||
else:
|
else:
|
||||||
hostupdate.notify_vim = True
|
hostupdate.notify_vim = True
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os
|
||||||
import pecan
|
import pecan
|
||||||
|
@ -718,7 +719,8 @@ def fix_crushmap(dbapi=None):
|
||||||
LOG.info("Not enough monitors yet available to fix crushmap.")
|
LOG.info("Not enough monitors yet available to fix crushmap.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Crushmap may be already loaded thorough puppet, avoid doing it twice.
|
# For AIO system, crushmap should be already loaded through puppet.
|
||||||
|
# If it was loaded, set the crushmap flag to avoid loading it twice.
|
||||||
default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
|
default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
|
||||||
constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
|
constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
|
||||||
rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
|
rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
|
||||||
|
@ -726,28 +728,53 @@ def fix_crushmap(dbapi=None):
|
||||||
_create_crushmap_flag_file()
|
_create_crushmap_flag_file()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
stor_model = get_ceph_storage_model(dbapi)
|
|
||||||
if stor_model == constants.CEPH_AIO_SX_MODEL:
|
|
||||||
crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
|
|
||||||
elif stor_model == constants.CEPH_CONTROLLER_MODEL:
|
|
||||||
crushmap_txt = "/etc/sysinv/crushmap-controller-model.txt"
|
|
||||||
else:
|
|
||||||
crushmap_txt = "/etc/sysinv/crushmap-storage-model.txt"
|
|
||||||
LOG.info("Updating crushmap with: %s" % crushmap_txt)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Compile crushmap
|
# For AIO system, crushmap should alreadby be loaded through
|
||||||
|
# puppet. If for any reason it is not, as a precaution we set
|
||||||
|
# the crushmap here.
|
||||||
|
|
||||||
|
# Check if a backup crushmap exists. If it does, that means
|
||||||
|
# it is during restore. We need to restore the backup crushmap
|
||||||
|
# instead of generating it. For AIO system, the backup crushmap
|
||||||
|
# is stored in /etc/sysinv. For non-AIO system, it is stored in
|
||||||
|
# /opt/platform/sysinv.
|
||||||
|
if cutils.is_aio_system(dbapi):
|
||||||
|
backup = os.path.join(constants.CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO,
|
||||||
|
constants.CEPH_CRUSH_MAP_BACKUP)
|
||||||
|
else:
|
||||||
|
backup = os.path.join(constants.SYSINV_CONFIG_PATH,
|
||||||
|
constants.CEPH_CRUSH_MAP_BACKUP)
|
||||||
crushmap_bin = "/etc/sysinv/crushmap.bin"
|
crushmap_bin = "/etc/sysinv/crushmap.bin"
|
||||||
subprocess.check_output("crushtool -c %s "
|
if os.path.exists(backup):
|
||||||
"-o %s" % (crushmap_txt, crushmap_bin),
|
shutil.copyfile(backup, crushmap_bin)
|
||||||
|
else:
|
||||||
|
stor_model = get_ceph_storage_model(dbapi)
|
||||||
|
if stor_model == constants.CEPH_AIO_SX_MODEL:
|
||||||
|
crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
|
||||||
|
elif stor_model == constants.CEPH_CONTROLLER_MODEL:
|
||||||
|
crushmap_txt = "/etc/sysinv/crushmap-controller-model.txt"
|
||||||
|
elif stor_model == constants.CEPH_STORAGE_MODEL:
|
||||||
|
crushmap_txt = "/etc/sysinv/crushmap-storage-model.txt"
|
||||||
|
else:
|
||||||
|
reason = "Error: Undefined ceph storage model %s" % stor_model
|
||||||
|
raise exception.CephCrushMapNotApplied(reason=reason)
|
||||||
|
LOG.info("Updating crushmap with: %s" % crushmap_txt)
|
||||||
|
|
||||||
|
# Compile crushmap
|
||||||
|
subprocess.check_output("crushtool -c %s "
|
||||||
|
"-o %s" % (crushmap_txt, crushmap_bin),
|
||||||
stderr=subprocess.STDOUT, shell=True)
|
stderr=subprocess.STDOUT, shell=True)
|
||||||
# Set crushmap
|
# Set crushmap
|
||||||
subprocess.check_output("ceph osd setcrushmap -i %s" % crushmap_bin,
|
subprocess.check_output("ceph osd setcrushmap -i %s" % crushmap_bin,
|
||||||
stderr=subprocess.STDOUT, shell=True)
|
stderr=subprocess.STDOUT, shell=True)
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
|
if os.path.exists(backup):
|
||||||
|
os.remove(backup)
|
||||||
|
except (IOError, subprocess.CalledProcessError) as e:
|
||||||
# May not be critical, depends on where this is called.
|
# May not be critical, depends on where this is called.
|
||||||
reason = "Error: %s Output: %s" % (str(e), e.output)
|
reason = "Error: %s Output: %s" % (str(e), e.output)
|
||||||
raise exception.CephCrushMapNotApplied(reason=reason)
|
raise exception.CephCrushMapNotApplied(reason=reason)
|
||||||
|
|
||||||
_create_crushmap_flag_file()
|
_create_crushmap_flag_file()
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
|
@ -864,6 +864,7 @@ CEPH_REPLICATION_GROUP0_HOSTS = {
|
||||||
CEPH_MANAGER_RPC_TOPIC = "sysinv.ceph_manager"
|
CEPH_MANAGER_RPC_TOPIC = "sysinv.ceph_manager"
|
||||||
CEPH_MANAGER_RPC_VERSION = "1.0"
|
CEPH_MANAGER_RPC_VERSION = "1.0"
|
||||||
|
|
||||||
|
CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO = '/etc/sysinv'
|
||||||
CEPH_CRUSH_MAP_BACKUP = 'crushmap.bin.backup'
|
CEPH_CRUSH_MAP_BACKUP = 'crushmap.bin.backup'
|
||||||
CEPH_CRUSH_MAP_APPLIED = '.crushmap_applied'
|
CEPH_CRUSH_MAP_APPLIED = '.crushmap_applied'
|
||||||
CEPH_CRUSH_MAP_DEPTH = 3
|
CEPH_CRUSH_MAP_DEPTH = 3
|
||||||
|
|
|
@ -348,6 +348,8 @@ class CephOperator(object):
|
||||||
'recognized as operational.')
|
'recognized as operational.')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# TODO (Wei): This function is not invoked during AIO system restore.
|
||||||
|
# It will be revisited in the non-AIO system restore tasks.
|
||||||
try:
|
try:
|
||||||
backup = os.path.join(constants.SYSINV_CONFIG_PATH,
|
backup = os.path.join(constants.SYSINV_CONFIG_PATH,
|
||||||
constants.CEPH_CRUSH_MAP_BACKUP)
|
constants.CEPH_CRUSH_MAP_BACKUP)
|
||||||
|
|
Loading…
Reference in New Issue