Restore containerized platform using Ansible restore_platform playbook
This commit is to support platform restore for AIO-SX using restore_platform playbook: 1. During AIO-SX restore, the restored ceph crushmap is loaded through puppet. 2. Bypass vim when unlocking controller-0 for the first time. 3. When unlocking controller-0 for the first time, app_reapply is skipped for stx-openstack application. 4. After controller-0 is unlocked, ceph backend task is set to None. Change-Id: I36d27b162334e5a2f0371793243f2301b5fec1eb Story: 2004761 Task: 33645 Signed-off-by: Wei Zhou <wei.zhou@windriver.com>
This commit is contained in:
parent
38f7697bdc
commit
6ef1e829fd
|
@ -250,7 +250,12 @@ class platform::ceph::monitor
|
|||
$crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
|
||||
}
|
||||
$crushmap_bin = '/etc/sysinv/crushmap.bin'
|
||||
$crushmap_bin_backup = '/etc/sysinv/crushmap.bin.backup'
|
||||
Ceph::Mon <| |>
|
||||
-> exec { 'Copy crushmap if backup exists':
|
||||
command => "mv -f ${crushmap_bin_backup} ${crushmap_bin}",
|
||||
onlyif => "test -f ${crushmap_bin_backup}",
|
||||
}
|
||||
-> exec { 'Compile crushmap':
|
||||
command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
|
||||
onlyif => "test ! -f ${crushmap_bin}",
|
||||
|
|
|
@ -354,8 +354,7 @@ class platform::config::controller::post
|
|||
}
|
||||
}
|
||||
|
||||
if ! $::platform::params::controller_upgrade and
|
||||
! str2bool($::is_restore_in_progress) {
|
||||
if ! $::platform::params::controller_upgrade {
|
||||
file { '/etc/platform/.initial_config_complete':
|
||||
ensure => present,
|
||||
}
|
||||
|
|
|
@ -2134,8 +2134,10 @@ class HostController(rest.RestController):
|
|||
ihost_obj['uuid'],
|
||||
ibm_msg_dict)
|
||||
|
||||
# Trigger a system app reapply if the host has been unlocked
|
||||
if (patched_ihost.get('action') in
|
||||
# Trigger a system app reapply if the host has been unlocked.
|
||||
# Only trigger the reapply if it is not during restore.
|
||||
if (not os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG) and
|
||||
patched_ihost.get('action') in
|
||||
[constants.UNLOCK_ACTION, constants.FORCE_UNLOCK_ACTION]):
|
||||
self._reapply_system_app()
|
||||
|
||||
|
@ -4681,8 +4683,8 @@ class HostController(rest.RestController):
|
|||
)
|
||||
|
||||
if ihosts:
|
||||
# For storage setup, no change is required.
|
||||
LOG.info("This is a storage setup. No change.")
|
||||
# TODO (Wei) Need to revisit storage setup.
|
||||
LOG.info("This is a storage setup. Will need to revisit.")
|
||||
storage_enabled = 0
|
||||
for ihost in ihosts:
|
||||
if ihost.operational == constants.OPERATIONAL_ENABLED:
|
||||
|
@ -4699,18 +4701,16 @@ class HostController(rest.RestController):
|
|||
raise wsme.exc.ClientSideError(
|
||||
_("Restore Ceph config failed: %s" % e))
|
||||
elif cutils.is_aio_system(pecan.request.dbapi):
|
||||
# TODO(wz): Need more work to restore ceph for AIO
|
||||
LOG.info("For an AIO system, Restore crushmap...")
|
||||
try:
|
||||
if not pecan.request.rpcapi.restore_ceph_config(
|
||||
pecan.request.context, after_storage_enabled=True):
|
||||
raise Exception("restore_ceph_config returned false")
|
||||
except Exception as e:
|
||||
raise wsme.exc.ClientSideError(
|
||||
_("Restore Ceph config failed: %s" % e))
|
||||
|
||||
# For AIO, ceph config restore is done in puppet when ceph
|
||||
# manifest is applied on first unlock. The
|
||||
# initial_config_complete flag is set after first unlock.
|
||||
# Once one controller is up, ceph cluster should be operational.
|
||||
LOG.info("This is AIO-SX... Ceph backend task is RESTORE")
|
||||
if cutils.is_initial_config_complete():
|
||||
LOG.info("This is AIO-SX... clear ceph backend task to None")
|
||||
api.storage_backend_update(backend.uuid, {'task': None})
|
||||
else:
|
||||
# TODO(wz): Need more work to restore ceph for 2+2
|
||||
# TODO(Wei): Need more work to restore ceph for 2+2
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
|
@ -5057,11 +5057,12 @@ class HostController(rest.RestController):
|
|||
self.check_unlock_patching(hostupdate, force_unlock)
|
||||
|
||||
hostupdate.configure_required = True
|
||||
if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_FLAG) and
|
||||
if ((os.path.isfile(constants.ANSIBLE_BOOTSTRAP_FLAG) or
|
||||
os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG)) and
|
||||
hostupdate.ihost_patch['hostname'] ==
|
||||
constants.CONTROLLER_0_HOSTNAME):
|
||||
# For the first unlock of the initial controller bootstrapped by
|
||||
# Ansible, don't notify vim.
|
||||
# Ansible or the first unlock during restore, don't notify vim.
|
||||
hostupdate.notify_vim = False
|
||||
else:
|
||||
hostupdate.notify_vim = True
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
import os
|
||||
import pecan
|
||||
|
@ -709,7 +710,8 @@ def fix_crushmap(dbapi=None):
|
|||
LOG.info("Not enough monitors yet available to fix crushmap.")
|
||||
return False
|
||||
|
||||
# Crushmap may be already loaded thorough puppet, avoid doing it twice.
|
||||
# For AIO system, crushmap should be already loaded through puppet.
|
||||
# If it was loaded, set the crushmap flag to avoid loading it twice.
|
||||
default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
|
||||
constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
|
||||
rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
|
||||
|
@ -717,28 +719,53 @@ def fix_crushmap(dbapi=None):
|
|||
_create_crushmap_flag_file()
|
||||
return False
|
||||
|
||||
stor_model = get_ceph_storage_model(dbapi)
|
||||
if stor_model == constants.CEPH_AIO_SX_MODEL:
|
||||
crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
|
||||
elif stor_model == constants.CEPH_CONTROLLER_MODEL:
|
||||
crushmap_txt = "/etc/sysinv/crushmap-controller-model.txt"
|
||||
else:
|
||||
crushmap_txt = "/etc/sysinv/crushmap-storage-model.txt"
|
||||
LOG.info("Updating crushmap with: %s" % crushmap_txt)
|
||||
|
||||
try:
|
||||
# Compile crushmap
|
||||
# For AIO system, crushmap should alreadby be loaded through
|
||||
# puppet. If for any reason it is not, as a precaution we set
|
||||
# the crushmap here.
|
||||
|
||||
# Check if a backup crushmap exists. If it does, that means
|
||||
# it is during restore. We need to restore the backup crushmap
|
||||
# instead of generating it. For AIO system, the backup crushmap
|
||||
# is stored in /etc/sysinv. For non-AIO system, it is stored in
|
||||
# /opt/platform/sysinv.
|
||||
if cutils.is_aio_system(dbapi):
|
||||
backup = os.path.join(constants.CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO,
|
||||
constants.CEPH_CRUSH_MAP_BACKUP)
|
||||
else:
|
||||
backup = os.path.join(constants.SYSINV_CONFIG_PATH,
|
||||
constants.CEPH_CRUSH_MAP_BACKUP)
|
||||
crushmap_bin = "/etc/sysinv/crushmap.bin"
|
||||
subprocess.check_output("crushtool -c %s "
|
||||
"-o %s" % (crushmap_txt, crushmap_bin),
|
||||
if os.path.exists(backup):
|
||||
shutil.copyfile(backup, crushmap_bin)
|
||||
else:
|
||||
stor_model = get_ceph_storage_model(dbapi)
|
||||
if stor_model == constants.CEPH_AIO_SX_MODEL:
|
||||
crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
|
||||
elif stor_model == constants.CEPH_CONTROLLER_MODEL:
|
||||
crushmap_txt = "/etc/sysinv/crushmap-controller-model.txt"
|
||||
elif stor_model == constants.CEPH_STORAGE_MODEL:
|
||||
crushmap_txt = "/etc/sysinv/crushmap-storage-model.txt"
|
||||
else:
|
||||
reason = "Error: Undefined ceph storage model %s" % stor_model
|
||||
raise exception.CephCrushMapNotApplied(reason=reason)
|
||||
LOG.info("Updating crushmap with: %s" % crushmap_txt)
|
||||
|
||||
# Compile crushmap
|
||||
subprocess.check_output("crushtool -c %s "
|
||||
"-o %s" % (crushmap_txt, crushmap_bin),
|
||||
stderr=subprocess.STDOUT, shell=True)
|
||||
# Set crushmap
|
||||
subprocess.check_output("ceph osd setcrushmap -i %s" % crushmap_bin,
|
||||
stderr=subprocess.STDOUT, shell=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
|
||||
if os.path.exists(backup):
|
||||
os.remove(backup)
|
||||
except (IOError, subprocess.CalledProcessError) as e:
|
||||
# May not be critical, depends on where this is called.
|
||||
reason = "Error: %s Output: %s" % (str(e), e.output)
|
||||
raise exception.CephCrushMapNotApplied(reason=reason)
|
||||
|
||||
_create_crushmap_flag_file()
|
||||
|
||||
return True
|
||||
|
|
|
@ -856,6 +856,7 @@ CEPH_REPLICATION_GROUP0_HOSTS = {
|
|||
CEPH_MANAGER_RPC_TOPIC = "sysinv.ceph_manager"
|
||||
CEPH_MANAGER_RPC_VERSION = "1.0"
|
||||
|
||||
CEPH_CRUSH_MAP_BACKUP_DIR_FOR_AIO = '/etc/sysinv'
|
||||
CEPH_CRUSH_MAP_BACKUP = 'crushmap.bin.backup'
|
||||
CEPH_CRUSH_MAP_APPLIED = '.crushmap_applied'
|
||||
CEPH_CRUSH_MAP_DEPTH = 3
|
||||
|
|
|
@ -348,6 +348,8 @@ class CephOperator(object):
|
|||
'recognized as operational.')
|
||||
return False
|
||||
|
||||
# TODO (Wei): This function is not invoked during AIO system restore.
|
||||
# It will be revisited in the non-AIO system restore tasks.
|
||||
try:
|
||||
backup = os.path.join(constants.SYSINV_CONFIG_PATH,
|
||||
constants.CEPH_CRUSH_MAP_BACKUP)
|
||||
|
|
Loading…
Reference in New Issue