Update upgrade code for removing Ceph Cache Tiering

Story: 2002884
Task: 22846

Change-Id: I31cf3eb7de935676790cf7e4c1b40307d7110390
Signed-off-by: Don Penney <don.penney@windriver.com>
Signed-off-by: Jack Ding <jack.ding@windriver.com>
This commit is contained in:
Robert Church 2018-05-21 09:10:15 -05:00 committed by Jack Ding
parent 8a8aaeecde
commit c1c51db08d
3 changed files with 23 additions and 30 deletions

View File

@ -65,7 +65,7 @@ CEPH_MANAGER_TOPIC = 'sysinv.ceph_manager'
SYSINV_CONFIG_FILE = '/etc/sysinv/sysinv.conf' SYSINV_CONFIG_FILE = '/etc/sysinv/sysinv.conf'
# Titanium Cloud version strings # Titanium Cloud version strings
TITANIUM_SERVER_VERSION_16_10 = '16.10' TITANIUM_SERVER_VERSION_18_03 = '18.03'
CEPH_HEALTH_WARN_REQUIRE_JEWEL_OSDS_NOT_SET = ( CEPH_HEALTH_WARN_REQUIRE_JEWEL_OSDS_NOT_SET = (
"all OSDs are running jewel or later but the " "all OSDs are running jewel or later but the "

View File

@ -22,20 +22,18 @@ import exception
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
# When upgrading from 16.10 to 17.x Ceph goes from Hammer release # In 18.03 R5, ceph cache tiering was disabled and prevented from being
# to Jewel release. After all storage nodes are upgraded to 17.x # re-enabled. When upgrading from 18.03 (R5) to R6 we need to remove the
# the cluster is in HEALTH_WARN until administrator explicitly # cache-tier from the crushmap ceph-cache-tiering
# enables require_jewel_osds flag - which signals Ceph that it
# can safely transition from Hammer to Jewel
# #
# This class is needed only when upgrading from 16.10 to 17.x # This class is needed only when upgrading from R5 to R6
# TODO: remove it after 1st 17.x release # TODO: remove it after 1st R6 release
# #
class HandleUpgradesMixin(object): class HandleUpgradesMixin(object):
def __init__(self, service): def __init__(self, service):
self.service = service self.service = service
self.surpress_require_jewel_osds_warning = False self.wait_for_upgrade_complete = False
def setup(self, config): def setup(self, config):
self._set_upgrade(self.service.retry_get_software_upgrade_status()) self._set_upgrade(self.service.retry_get_software_upgrade_status())
@ -45,9 +43,10 @@ class HandleUpgradesMixin(object):
from_version = upgrade.get('from_version') from_version = upgrade.get('from_version')
if (state if (state
and state != constants.UPGRADE_COMPLETED and state != constants.UPGRADE_COMPLETED
and from_version == constants.TITANIUM_SERVER_VERSION_16_10): and from_version == constants.TITANIUM_SERVER_VERSION_18_03):
LOG.info(_LI("Surpress require_jewel_osds health warning"))
self.surpress_require_jewel_osds_warning = True LOG.info(_LI("Wait for caph upgrade to complete before monitoring cluster."))
self.wait_for_upgrade_complete = True
def set_flag_require_jewel_osds(self): def set_flag_require_jewel_osds(self):
try: try:
@ -73,7 +72,7 @@ class HandleUpgradesMixin(object):
health = self.auto_heal(health) health = self.auto_heal(health)
# filter out require_jewel_osds warning # filter out require_jewel_osds warning
# #
if not self.surpress_require_jewel_osds_warning: if not self.wait_for_upgrade_complete:
return health return health
if health['health'] != constants.CEPH_HEALTH_WARN: if health['health'] != constants.CEPH_HEALTH_WARN:
return health return health
@ -114,17 +113,16 @@ class HandleUpgradesMixin(object):
state = upgrade.get('state') state = upgrade.get('state')
# surpress require_jewel_osds in case upgrade is # surpress require_jewel_osds in case upgrade is
# in progress but not completed or aborting # in progress but not completed or aborting
if (not self.surpress_require_jewel_osds_warning if (not self.wait_for_upgrade_complete
and (upgrade.get('from_version') and (upgrade.get('from_version')
== constants.TITANIUM_SERVER_VERSION_16_10) == constants.TITANIUM_SERVER_VERSION_18_03)
and state not in [ and state not in [
None, None,
constants.UPGRADE_COMPLETED, constants.UPGRADE_COMPLETED,
constants.UPGRADE_ABORTING, constants.UPGRADE_ABORTING,
constants.UPGRADE_ABORT_COMPLETING, constants.UPGRADE_ABORT_COMPLETING,
constants.UPGRADE_ABORTING_ROLLBACK]): constants.UPGRADE_ABORTING_ROLLBACK]):
LOG.info(_LI("Surpress require_jewel_osds health warning")) self.wait_for_upgrade_complete = True
self.surpress_require_jewel_osds_warning = True
# set require_jewel_osds in case upgrade is # set require_jewel_osds in case upgrade is
# not in progress or completed # not in progress or completed
if (state in [None, constants.UPGRADE_COMPLETED]): if (state in [None, constants.UPGRADE_COMPLETED]):
@ -135,16 +133,14 @@ class HandleUpgradesMixin(object):
self.set_flag_require_jewel_osds() self.set_flag_require_jewel_osds()
health = self._remove_require_jewel_osds_warning(health) health = self._remove_require_jewel_osds_warning(health)
LOG.info(_LI("Unsurpress require_jewel_osds health warning")) LOG.info(_LI("Unsurpress require_jewel_osds health warning"))
self.surpress_require_jewel_osds_warning = False self.wait_for_upgrade_complete = False
# unsurpress require_jewel_osds in case upgrade # unsurpress require_jewel_osds in case upgrade
# is aborting # is aborting
if (self.surpress_require_jewel_osds_warning if (state in [
and state in [ constants.UPGRADE_ABORTING,
constants.UPGRADE_ABORTING, constants.UPGRADE_ABORT_COMPLETING,
constants.UPGRADE_ABORT_COMPLETING, constants.UPGRADE_ABORTING_ROLLBACK]):
constants.UPGRADE_ABORTING_ROLLBACK]): self.wait_for_upgrade_complete = False
LOG.info(_LI("Unsurpress require_jewel_osds health warning"))
self.surpress_require_jewel_osds_warning = False
return health return health

View File

@ -97,9 +97,6 @@ class RpcEndpoint(PeriodicTasks):
return self.service.monitor.cluster_is_up return self.service.monitor.cluster_is_up
# This class is needed only when upgrading from 16.10 to 17.x
# TODO: remove it after 1st 17.x release
#
class SysinvConductorUpgradeApi(object): class SysinvConductorUpgradeApi(object):
def __init__(self): def __init__(self):
self.sysinv_conductor = None self.sysinv_conductor = None
@ -113,10 +110,10 @@ class SysinvConductorUpgradeApi(object):
return upgrade return upgrade
@retry(wait_fixed=1000, @retry(wait_fixed=1000,
retry_on_exception=lambda exception: retry_on_exception=lambda e:
LOG.warn(_LW( LOG.warn(_LW(
"Getting software upgrade status failed " "Getting software upgrade status failed "
"with: %s. Retrying... ") % str(exception)) or True) "with: %s. Retrying... ") % str(e)) or True)
def retry_get_software_upgrade_status(self): def retry_get_software_upgrade_status(self):
return self.get_software_upgrade_status() return self.get_software_upgrade_status()