diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index b4088c5890..15b6be42d0 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -2283,7 +2283,7 @@ class HostController(rest.RestController): if (personality is not None and personality.find(constants.STORAGE_HOSTNAME) != -1 and not skip_ceph_checks): - num_monitors, required_monitors, quorum_names = \ + num_monitors, required_monitors, __ = \ self._ceph.get_monitors_status(pecan.request.dbapi) if num_monitors < required_monitors: raise wsme.exc.ClientSideError(_( @@ -2292,7 +2292,7 @@ class HostController(rest.RestController): "enabled hosts with monitors are required. Please" " ensure hosts with monitors are unlocked and " "enabled.") % - (num_monitors, constants.MIN_STOR_MONITORS)) + (num_monitors, required_monitors)) # If it is the last storage node to delete, we need to delete # ceph osd pools and update additional tier status to "defined" @@ -5180,7 +5180,7 @@ class HostController(rest.RestController): "enabled hosts with monitors are required. Please" " ensure hosts with monitors are unlocked and " "enabled.") % - (num_monitors, constants.MIN_STOR_MONITORS)) + (num_monitors, required_monitors)) if not force: # sm-lock-pre-check @@ -5409,7 +5409,7 @@ class HostController(rest.RestController): _("Can not unlock a storage node without any Storage Volumes configured")) ceph_helper = ceph.CephApiOperator() - num_monitors, required_monitors, quorum_names = \ + num_monitors, required_monitors, __ = \ ceph_helper.get_monitors_status(pecan.request.dbapi) if num_monitors < required_monitors: raise wsme.exc.ClientSideError( @@ -5418,7 +5418,7 @@ class HostController(rest.RestController): "enabled hosts with monitors are required. Please" " ensure hosts with monitors are unlocked and " "enabled.") % - (num_monitors, constants.MIN_STOR_MONITORS)) + (num_monitors, required_monitors)) # Check Ceph configuration, if it is wiped out (in the Backup & Restore # process) then restore the configuration. @@ -5714,7 +5714,7 @@ class HostController(rest.RestController): "enabled hosts with monitors are required. Please" " ensure hosts with monitors are unlocked and " "enabled.") % - (num_monitors, constants.MIN_STOR_MONITORS)) + (num_monitors, required_monitors)) storage_nodes = pecan.request.dbapi.ihost_get_by_personality( constants.STORAGE) @@ -5861,7 +5861,7 @@ class HostController(rest.RestController): "enabled hosts with monitors are required. " "Please ensure hosts with monitors are " "unlocked and enabled.") % - (num_monitors, constants.MIN_STOR_MONITORS)) + (num_monitors, required_monitors)) def check_unlock_interfaces(self, hostupdate): """Semantic check for interfaces on host-unlock.""" diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py index 079a647ebe..0f2b83ed1d 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py @@ -575,7 +575,7 @@ def _check_host(stor): # semantic check: whether at least 2 unlocked hosts are monitors if not utils.is_aio_system(pecan.request.dbapi): ceph_helper = ceph.CephApiOperator() - num_monitors, required_monitors, quorum_names = \ + num_monitors, required_monitors, __ = \ ceph_helper.get_monitors_status(pecan.request.dbapi) # CGTS 503 for now update monitors requirement until controller-0 is # inventoried diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_tier.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_tier.py index 4eaf4ed542..d236c28c19 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_tier.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_tier.py @@ -305,7 +305,7 @@ class StorageTierController(rest.RestController): raise exception.PatchError(patch=patch, reason=e) # Semantic Checks - _check("modify", tier.as_dict()) + _check(self, "modify", tier.as_dict()) try: # Update only the fields that have changed for field in objects.storage_tier.fields: @@ -396,7 +396,7 @@ def _pre_patch_checks(tier_obj, patch_obj): "cannot be changed.") % tier_obj.name) -def _check(op, tier): +def _check(self, op, tier): # Semantic checks LOG.debug("storage_tier: Semantic check for %s operation" % op) @@ -410,10 +410,16 @@ def _check(op, tier): raise wsme.exc.ClientSideError(_("Storage tier (%s) " "already present." % tier['name'])) + + # Deny adding secondary tier if initial configuration is not done. + if not cutils.is_initial_config_complete(): + msg = _("Operation denied. Adding secondary tiers to a cluster requires " + "initial configuration to be complete and controller node unlocked.") + raise wsme.exc.ClientSideError(msg) + if utils.is_aio_system(pecan.request.dbapi): # Deny adding secondary tiers if primary tier backend is not configured - # for cluster. When secondary tier is added we also query ceph to create - # pools and set replication therefore cluster has to be up. + # for cluster. clusterId = tier.get('forclusterid') or tier.get('cluster_uuid') cluster_tiers = pecan.request.dbapi.storage_tier_get_by_cluster(clusterId) configured = False if cluster_tiers else True @@ -430,9 +436,20 @@ def _check(op, tier): msg = _("Operation denied. Adding secondary tiers to a cluster requires " "primary tier storage backend of this cluster to be configured.") raise wsme.exc.ClientSideError(msg) + else: + # Deny adding secondary tier if ceph is down on standard + num_monitors, required_monitors, __ = \ + self._ceph.get_monitors_status(pecan.request.dbapi) + if num_monitors < required_monitors: + raise wsme.exc.ClientSideError(_( + "Operation denied. Ceph is not operational. " + "Only %d storage monitor available. " + "At least %s unlocked and enabled hosts with " + "monitors are required. Please ensure hosts " + "with monitors are unlocked and enabled.") % + (num_monitors, required_monitors)) elif op == "delete": - if tier['name'] == constants.SB_TIER_DEFAULT_NAMES[ constants.SB_TIER_TYPE_CEPH]: raise wsme.exc.ClientSideError(_("Storage Tier %s cannot be " @@ -484,7 +501,7 @@ def _create(self, tier, iprofile=None): tier = _set_defaults(tier) # Semantic checks - tier = _check("add", tier) + tier = _check(self, "add", tier) LOG.info("storage_tier._create with validated params: %s" % tier) @@ -524,7 +541,7 @@ def _delete(self, tier_uuid): tier = objects.storage_tier.get_by_uuid(pecan.request.context, tier_uuid) # Semantic checks - _check("delete", tier.as_dict()) + _check(self, "delete", tier.as_dict()) # update the crushmap by removing the tier try: diff --git a/sysinv/sysinv/sysinv/sysinv/common/ceph.py b/sysinv/sysinv/sysinv/sysinv/common/ceph.py index c8eec777e9..f0aa9e77c2 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/common/ceph.py @@ -629,13 +629,15 @@ class CephApiOperator(object): return True def get_monitors_status(self, db_api): - # first check that the monitors are available in sysinv - num_active_monitors = 0 num_inv_monitors = 0 - required_monitors = constants.MIN_STOR_MONITORS + if utils.is_aio_system(pecan.request.dbapi): + required_monitors = constants.MIN_STOR_MONITORS_AIO + else: + required_monitors = constants.MIN_STOR_MONITORS_MULTINODE quorum_names = [] inventory_monitor_names = [] + # first check that the monitors are available in sysinv monitor_list = db_api.ceph_mon_get_list() for mon in monitor_list: ihost = db_api.ihost_get(mon['forihostid']) @@ -670,9 +672,16 @@ class CephApiOperator(object): # the intersection of the sysinv reported unlocked-available monitor # hosts and the monitors reported in the quorum via the ceph API. active_monitors = list(set(inventory_monitor_names) & set(quorum_names)) - LOG.info("Active ceph monitors = %s" % str(active_monitors)) - num_active_monitors = len(active_monitors) + if (num_inv_monitors and num_active_monitors == 0 and + cutils.is_initial_config_complete() and + not utils.is_aio_system(pecan.request.dbapi)): + # The active controller always has a monitor. + # We are on standard or storage, initial configuration + # was completed and Ceph is down so we can't check if + # it is working. Assume it is. + num_active_monitors = 1 + LOG.info("Active ceph monitors = %s" % str(active_monitors)) return num_active_monitors, required_monitors, active_monitors diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index 2e5f59f38e..fbe5f2688f 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -473,7 +473,8 @@ CEPH_AIO_SX_MODEL = 'aio-sx' CEPH_UNDEFINED_MODEL = 'undefined' # Storage: Minimum number of monitors -MIN_STOR_MONITORS = 2 +MIN_STOR_MONITORS_MULTINODE = 2 +MIN_STOR_MONITORS_AIO = 1 BACKUP_OVERHEAD = 20 diff --git a/sysinv/sysinv/sysinv/sysinv/tests/api/test_storage_tier.py b/sysinv/sysinv/sysinv/sysinv/tests/api/test_storage_tier.py index 08bf8cd4f4..2e6b0a10e3 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/api/test_storage_tier.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/api/test_storage_tier.py @@ -33,6 +33,7 @@ from sysinv.conductor import manager from sysinv.conductor import rpcapi from sysinv.common import ceph as ceph_utils from sysinv.common import constants +from sysinv.common import utils as cutils from sysinv.common.storage_backend_conf import StorageBackendConfig from sysinv.db import api as dbapi from sysinv.openstack.common import context @@ -44,10 +45,17 @@ from sysinv.tests.db import utils as dbutils class StorageTierIndependentTCs(base.FunctionalTest): set_crushmap_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'set_crushmap') + set_monitors_status_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'get_monitors_status') + set_is_initial_config_patcher = mock.patch.object(cutils, 'is_initial_config_complete') def setUp(self): super(StorageTierIndependentTCs, self).setUp() self.mock_set_crushmap = self.set_crushmap_patcher.start() + self.set_monitors_status_patcher = self.set_monitors_status_patcher.start() + self.set_monitors_status_patcher.return_value = \ + [3, 2, ['controller-0', 'controller-1', 'storage-0']] + self.set_is_initial_config_patcher = self.set_is_initial_config_patcher.start() + self.set_is_initial_config_patcher.return_value = True self.system = dbutils.create_test_isystem() self.cluster = dbutils.create_test_cluster(system_id=self.system.id, name='ceph_cluster') self.load = dbutils.create_test_load() @@ -56,6 +64,8 @@ class StorageTierIndependentTCs(base.FunctionalTest): def tearDown(self): super(StorageTierIndependentTCs, self).tearDown() self.set_crushmap_patcher.stop() + self.set_monitors_status_patcher = self.set_monitors_status_patcher.stop() + self.set_is_initial_config_patcher.stop() def assertDeleted(self, fullPath): self.get_json(fullPath, expect_errors=True) # Make sure this line raises an error @@ -531,10 +541,17 @@ class StorageTierIndependentTCs(base.FunctionalTest): class StorageTierDependentTCs(base.FunctionalTest): set_crushmap_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'set_crushmap') + set_monitors_status_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'get_monitors_status') + set_is_initial_config_patcher = mock.patch.object(cutils, 'is_initial_config_complete') def setUp(self): super(StorageTierDependentTCs, self).setUp() self.mock_set_crushmap = self.set_crushmap_patcher.start() + self.set_monitors_status_patcher = self.set_monitors_status_patcher.start() + self.set_monitors_status_patcher.return_value = \ + [3, 2, ['controller-0', 'controller-1', 'storage-0']] + self.set_is_initial_config_patcher = self.set_is_initial_config_patcher.start() + self.set_is_initial_config_patcher.return_value = True self.service = manager.ConductorManager('test-host', 'test-topic') self.service.dbapi = dbapi.get_instance() self.context = context.get_admin_context() @@ -547,6 +564,8 @@ class StorageTierDependentTCs(base.FunctionalTest): def tearDown(self): super(StorageTierDependentTCs, self).tearDown() self.set_crushmap_patcher.stop() + self.set_monitors_status_patcher = self.set_monitors_status_patcher.stop() + self.set_is_initial_config_patcher.stop() def assertDeleted(self, fullPath): self.get_json(fullPath, expect_errors=True) # Make sure this line raises an error