Merge "Restore containerized platform using Ansible restore_platform playbook"

This commit is contained in:
Zuul 2019-07-19 22:27:27 +00:00 committed by Gerrit Code Review
commit d1939669b9
6 changed files with 68 additions and 33 deletions

View File

@ -250,7 +250,12 @@ class platform::ceph::monitor
$crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
}
$crushmap_bin = '/etc/sysinv/crushmap.bin'
$crushmap_bin_backup = '/etc/sysinv/crushmap.bin.backup'
Ceph::Mon <| |>
-> exec { 'Copy crushmap if backup exists':
command => "mv -f ${crushmap_bin_backup} ${crushmap_bin}",
onlyif => "test -f ${crushmap_bin_backup}",
}
-> exec { 'Compile crushmap':
command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
onlyif => "test ! -f ${crushmap_bin}",

View File

@ -354,8 +354,7 @@ class platform::config::controller::post
}
}
if ! $::platform::params::controller_upgrade and
! str2bool($::is_restore_in_progress) {
if ! $::platform::params::controller_upgrade {
file { '/etc/platform/.initial_config_complete':
ensure => present,
}

View File

@ -2134,8 +2134,10 @@ class HostController(rest.RestController):
ihost_obj['uuid'],
ibm_msg_dict)
# Trigger a system app reapply if the host has been unlocked
if (patched_ihost.get('action') in
# Trigger a system app reapply if the host has been unlocked.
# Only trigger the reapply if it is not during restore.
if (not os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG) and
patched_ihost.get('action') in
[constants.UNLOCK_ACTION, constants.FORCE_UNLOCK_ACTION]):
self._reapply_system_app()
@ -4681,8 +4683,8 @@ class HostController(rest.RestController):
)
if ihosts:
# For storage setup, no change is required.
LOG.info("This is a storage setup. No change.")
# TODO (Wei) Need to revisit storage setup.
LOG.info("This is a storage setup. Will need to revisit.")
storage_enabled = 0
for ihost in ihosts:
if ihost.operational == constants.OPERATIONAL_ENABLED:
@ -4699,18 +4701,16 @@ class HostController(rest.RestController):
raise wsme.exc.ClientSideError(
_("Restore Ceph config failed: %s" % e))
elif cutils.is_aio_system(pecan.request.dbapi):
# TODO(wz): Need more work to restore ceph for AIO
LOG.info("For an AIO system, Restore crushmap...")
try:
if not pecan.request.rpcapi.restore_ceph_config(
pecan.request.context, after_storage_enabled=True):
raise Exception("restore_ceph_config returned false")
except Exception as e:
raise wsme.exc.ClientSideError(
_("Restore Ceph config failed: %s" % e))
# For AIO, ceph config restore is done in puppet when ceph
# manifest is applied on first unlock. The
# initial_config_complete flag is set after first unlock.
# Once one controller is up, ceph cluster should be operational.
LOG.info("This is AIO-SX... Ceph backend task is RESTORE")
if cutils.is_initial_config_complete():
LOG.info("This is AIO-SX... clear ceph backend task to None")
api.storage_backend_update(backend.uuid, {'task': None})
else:
# TODO(wz): Need more work to restore ceph for 2+2
# TODO(Wei): Need more work to restore ceph for 2+2
pass
@staticmethod
@ -5057,11 +5057,12 @@ class HostController(rest.RestController):
self.check_unlock_patching(hostupdate, force_unlock)
hostupdate.configure_required = True
if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_FLAG) and
if ((os.path.isfile(constants.ANSIBLE_BOOTSTRAP_FLAG) or
os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG)) and
hostupdate.ihost_patch['hostname'] ==
constants.CONTROLLER_0_HOSTNAME):
# For the first unlock of the initial controller bootstrapped by
# Ansible, don't notify vim.
# Ansible or the first unlock during restore, don't notify vim.
hostupdate.notify_vim = False
else:
hostupdate.notify_vim = True

View File

@ -12,6 +12,7 @@
from __future__ import absolute_import
import shutil
import subprocess
import os
import pecan
@ -718,7 +719,8 @@ def fix_crushmap(dbapi=None):
LOG.info("Not enough monitors yet available to fix crushmap.")
return False
# Crushmap may be already loaded thorough puppet, avoid doing it twice.
# For AIO system, crushmap should be already loaded through puppet.
# If it was loaded, set the crushmap flag to avoid loading it twice.
default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
@ -726,28 +728,53 @@ def fix_crushmap(dbapi=None):
_create_crushmap_flag_file()
return False
stor_model = get_ceph_storage_model(dbapi)
if stor_model == constants.CEPH_AIO_SX_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
elif stor_model == constants.CEPH_CONTROLLER_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-controller-model.txt"
else:
crushmap_txt = "/etc/sysinv/crushmap-storage-model.txt"
LOG.info("Updating crushmap with: %s" % crushmap_txt)
try:
# Compile crushmap
# For AIO system, crushmap should alreadby be loaded through
# puppet. If for any reason it is not, as a precaution we set
# the crushmap here.
# Check if a backup crushmap exists. If it does, that means
# it is during restore. We need to restore the backup crushmap
# instead of generating it. For AIO system, the backup crushmap
# is stored in /etc/sysinv. For non-AIO system, it is stored in
# /opt/platform/sysinv.
if cutils.is_aio_system(dbapi):
backup = os.path.join(constants.CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO,
constants.CEPH_CRUSH_MAP_BACKUP)
else:
backup = os.path.join(constants.SYSINV_CONFIG_PATH,
constants.CEPH_CRUSH_MAP_BACKUP)
crushmap_bin = "/etc/sysinv/crushmap.bin"
subprocess.check_output("crushtool -c %s "
"-o %s" % (crushmap_txt, crushmap_bin),
if os.path.exists(backup):
shutil.copyfile(backup, crushmap_bin)
else:
stor_model = get_ceph_storage_model(dbapi)
if stor_model == constants.CEPH_AIO_SX_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
elif stor_model == constants.CEPH_CONTROLLER_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-controller-model.txt"
elif stor_model == constants.CEPH_STORAGE_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-storage-model.txt"
else:
reason = "Error: Undefined ceph storage model %s" % stor_model
raise exception.CephCrushMapNotApplied(reason=reason)
LOG.info("Updating crushmap with: %s" % crushmap_txt)
# Compile crushmap
subprocess.check_output("crushtool -c %s "
"-o %s" % (crushmap_txt, crushmap_bin),
stderr=subprocess.STDOUT, shell=True)
# Set crushmap
subprocess.check_output("ceph osd setcrushmap -i %s" % crushmap_bin,
stderr=subprocess.STDOUT, shell=True)
except subprocess.CalledProcessError as e:
if os.path.exists(backup):
os.remove(backup)
except (IOError, subprocess.CalledProcessError) as e:
# May not be critical, depends on where this is called.
reason = "Error: %s Output: %s" % (str(e), e.output)
raise exception.CephCrushMapNotApplied(reason=reason)
_create_crushmap_flag_file()
return True

View File

@ -864,6 +864,7 @@ CEPH_REPLICATION_GROUP0_HOSTS = {
CEPH_MANAGER_RPC_TOPIC = "sysinv.ceph_manager"
CEPH_MANAGER_RPC_VERSION = "1.0"
CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO = '/etc/sysinv'
CEPH_CRUSH_MAP_BACKUP = 'crushmap.bin.backup'
CEPH_CRUSH_MAP_APPLIED = '.crushmap_applied'
CEPH_CRUSH_MAP_DEPTH = 3

View File

@ -348,6 +348,8 @@ class CephOperator(object):
'recognized as operational.')
return False
# TODO (Wei): This function is not invoked during AIO system restore.
# It will be revisited in the non-AIO system restore tasks.
try:
backup = os.path.join(constants.SYSINV_CONFIG_PATH,
constants.CEPH_CRUSH_MAP_BACKUP)