Merge "Restore containerized platform using Ansible restore_platform playbook"

This commit is contained in:
Zuul 2019-07-19 22:27:27 +00:00 committed by Gerrit Code Review
commit d1939669b9
6 changed files with 68 additions and 33 deletions

View File

@ -250,7 +250,12 @@ class platform::ceph::monitor
$crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt' $crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
} }
$crushmap_bin = '/etc/sysinv/crushmap.bin' $crushmap_bin = '/etc/sysinv/crushmap.bin'
$crushmap_bin_backup = '/etc/sysinv/crushmap.bin.backup'
Ceph::Mon <| |> Ceph::Mon <| |>
-> exec { 'Copy crushmap if backup exists':
command => "mv -f ${crushmap_bin_backup} ${crushmap_bin}",
onlyif => "test -f ${crushmap_bin_backup}",
}
-> exec { 'Compile crushmap': -> exec { 'Compile crushmap':
command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}", command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
onlyif => "test ! -f ${crushmap_bin}", onlyif => "test ! -f ${crushmap_bin}",

View File

@ -354,8 +354,7 @@ class platform::config::controller::post
} }
} }
if ! $::platform::params::controller_upgrade and if ! $::platform::params::controller_upgrade {
! str2bool($::is_restore_in_progress) {
file { '/etc/platform/.initial_config_complete': file { '/etc/platform/.initial_config_complete':
ensure => present, ensure => present,
} }

View File

@ -2134,8 +2134,10 @@ class HostController(rest.RestController):
ihost_obj['uuid'], ihost_obj['uuid'],
ibm_msg_dict) ibm_msg_dict)
# Trigger a system app reapply if the host has been unlocked # Trigger a system app reapply if the host has been unlocked.
if (patched_ihost.get('action') in # Only trigger the reapply if it is not during restore.
if (not os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG) and
patched_ihost.get('action') in
[constants.UNLOCK_ACTION, constants.FORCE_UNLOCK_ACTION]): [constants.UNLOCK_ACTION, constants.FORCE_UNLOCK_ACTION]):
self._reapply_system_app() self._reapply_system_app()
@ -4681,8 +4683,8 @@ class HostController(rest.RestController):
) )
if ihosts: if ihosts:
# For storage setup, no change is required. # TODO (Wei) Need to revisit storage setup.
LOG.info("This is a storage setup. No change.") LOG.info("This is a storage setup. Will need to revisit.")
storage_enabled = 0 storage_enabled = 0
for ihost in ihosts: for ihost in ihosts:
if ihost.operational == constants.OPERATIONAL_ENABLED: if ihost.operational == constants.OPERATIONAL_ENABLED:
@ -4699,18 +4701,16 @@ class HostController(rest.RestController):
raise wsme.exc.ClientSideError( raise wsme.exc.ClientSideError(
_("Restore Ceph config failed: %s" % e)) _("Restore Ceph config failed: %s" % e))
elif cutils.is_aio_system(pecan.request.dbapi): elif cutils.is_aio_system(pecan.request.dbapi):
# TODO(wz): Need more work to restore ceph for AIO # For AIO, ceph config restore is done in puppet when ceph
LOG.info("For an AIO system, Restore crushmap...") # manifest is applied on first unlock. The
try: # initial_config_complete flag is set after first unlock.
if not pecan.request.rpcapi.restore_ceph_config( # Once one controller is up, ceph cluster should be operational.
pecan.request.context, after_storage_enabled=True): LOG.info("This is AIO-SX... Ceph backend task is RESTORE")
raise Exception("restore_ceph_config returned false") if cutils.is_initial_config_complete():
except Exception as e: LOG.info("This is AIO-SX... clear ceph backend task to None")
raise wsme.exc.ClientSideError( api.storage_backend_update(backend.uuid, {'task': None})
_("Restore Ceph config failed: %s" % e))
else: else:
# TODO(wz): Need more work to restore ceph for 2+2 # TODO(Wei): Need more work to restore ceph for 2+2
pass pass
@staticmethod @staticmethod
@ -5057,11 +5057,12 @@ class HostController(rest.RestController):
self.check_unlock_patching(hostupdate, force_unlock) self.check_unlock_patching(hostupdate, force_unlock)
hostupdate.configure_required = True hostupdate.configure_required = True
if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_FLAG) and if ((os.path.isfile(constants.ANSIBLE_BOOTSTRAP_FLAG) or
os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG)) and
hostupdate.ihost_patch['hostname'] == hostupdate.ihost_patch['hostname'] ==
constants.CONTROLLER_0_HOSTNAME): constants.CONTROLLER_0_HOSTNAME):
# For the first unlock of the initial controller bootstrapped by # For the first unlock of the initial controller bootstrapped by
# Ansible, don't notify vim. # Ansible or the first unlock during restore, don't notify vim.
hostupdate.notify_vim = False hostupdate.notify_vim = False
else: else:
hostupdate.notify_vim = True hostupdate.notify_vim = True

View File

@ -12,6 +12,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import shutil
import subprocess import subprocess
import os import os
import pecan import pecan
@ -718,7 +719,8 @@ def fix_crushmap(dbapi=None):
LOG.info("Not enough monitors yet available to fix crushmap.") LOG.info("Not enough monitors yet available to fix crushmap.")
return False return False
# Crushmap may be already loaded thorough puppet, avoid doing it twice. # For AIO system, crushmap should be already loaded through puppet.
# If it was loaded, set the crushmap flag to avoid loading it twice.
default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[ default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name) rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
@ -726,28 +728,53 @@ def fix_crushmap(dbapi=None):
_create_crushmap_flag_file() _create_crushmap_flag_file()
return False return False
stor_model = get_ceph_storage_model(dbapi)
if stor_model == constants.CEPH_AIO_SX_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
elif stor_model == constants.CEPH_CONTROLLER_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-controller-model.txt"
else:
crushmap_txt = "/etc/sysinv/crushmap-storage-model.txt"
LOG.info("Updating crushmap with: %s" % crushmap_txt)
try: try:
# Compile crushmap # For AIO system, crushmap should alreadby be loaded through
# puppet. If for any reason it is not, as a precaution we set
# the crushmap here.
# Check if a backup crushmap exists. If it does, that means
# it is during restore. We need to restore the backup crushmap
# instead of generating it. For AIO system, the backup crushmap
# is stored in /etc/sysinv. For non-AIO system, it is stored in
# /opt/platform/sysinv.
if cutils.is_aio_system(dbapi):
backup = os.path.join(constants.CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO,
constants.CEPH_CRUSH_MAP_BACKUP)
else:
backup = os.path.join(constants.SYSINV_CONFIG_PATH,
constants.CEPH_CRUSH_MAP_BACKUP)
crushmap_bin = "/etc/sysinv/crushmap.bin" crushmap_bin = "/etc/sysinv/crushmap.bin"
subprocess.check_output("crushtool -c %s " if os.path.exists(backup):
"-o %s" % (crushmap_txt, crushmap_bin), shutil.copyfile(backup, crushmap_bin)
else:
stor_model = get_ceph_storage_model(dbapi)
if stor_model == constants.CEPH_AIO_SX_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
elif stor_model == constants.CEPH_CONTROLLER_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-controller-model.txt"
elif stor_model == constants.CEPH_STORAGE_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-storage-model.txt"
else:
reason = "Error: Undefined ceph storage model %s" % stor_model
raise exception.CephCrushMapNotApplied(reason=reason)
LOG.info("Updating crushmap with: %s" % crushmap_txt)
# Compile crushmap
subprocess.check_output("crushtool -c %s "
"-o %s" % (crushmap_txt, crushmap_bin),
stderr=subprocess.STDOUT, shell=True) stderr=subprocess.STDOUT, shell=True)
# Set crushmap # Set crushmap
subprocess.check_output("ceph osd setcrushmap -i %s" % crushmap_bin, subprocess.check_output("ceph osd setcrushmap -i %s" % crushmap_bin,
stderr=subprocess.STDOUT, shell=True) stderr=subprocess.STDOUT, shell=True)
except subprocess.CalledProcessError as e:
if os.path.exists(backup):
os.remove(backup)
except (IOError, subprocess.CalledProcessError) as e:
# May not be critical, depends on where this is called. # May not be critical, depends on where this is called.
reason = "Error: %s Output: %s" % (str(e), e.output) reason = "Error: %s Output: %s" % (str(e), e.output)
raise exception.CephCrushMapNotApplied(reason=reason) raise exception.CephCrushMapNotApplied(reason=reason)
_create_crushmap_flag_file() _create_crushmap_flag_file()
return True return True

View File

@ -864,6 +864,7 @@ CEPH_REPLICATION_GROUP0_HOSTS = {
CEPH_MANAGER_RPC_TOPIC = "sysinv.ceph_manager" CEPH_MANAGER_RPC_TOPIC = "sysinv.ceph_manager"
CEPH_MANAGER_RPC_VERSION = "1.0" CEPH_MANAGER_RPC_VERSION = "1.0"
CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO = '/etc/sysinv'
CEPH_CRUSH_MAP_BACKUP = 'crushmap.bin.backup' CEPH_CRUSH_MAP_BACKUP = 'crushmap.bin.backup'
CEPH_CRUSH_MAP_APPLIED = '.crushmap_applied' CEPH_CRUSH_MAP_APPLIED = '.crushmap_applied'
CEPH_CRUSH_MAP_DEPTH = 3 CEPH_CRUSH_MAP_DEPTH = 3

View File

@ -348,6 +348,8 @@ class CephOperator(object):
'recognized as operational.') 'recognized as operational.')
return False return False
# TODO (Wei): This function is not invoked during AIO system restore.
# It will be revisited in the non-AIO system restore tasks.
try: try:
backup = os.path.join(constants.SYSINV_CONFIG_PATH, backup = os.path.join(constants.SYSINV_CONFIG_PATH,
constants.CEPH_CRUSH_MAP_BACKUP) constants.CEPH_CRUSH_MAP_BACKUP)