diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/tests/v1/test_ihost.py b/sysinv/cgts-client/cgts-client/cgtsclient/tests/v1/test_ihost.py index 818a986613..22d439ef48 100644 --- a/sysinv/cgts-client/cgts-client/cgtsclient/tests/v1/test_ihost.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/tests/v1/test_ihost.py @@ -15,7 +15,7 @@ # License for the specific language governing permissions and limitations # under the License. # -# Copyright (c) 2013-2014 Wind River Systems, Inc. +# Copyright (c) 2013-2023 Wind River Systems, Inc. # @@ -54,6 +54,14 @@ UPDATED_IHOST = copy.deepcopy(IHOST) NEW_LOC = 'newlocOttawa' UPDATED_IHOST['location'] = NEW_LOC +KERNEL = {'ihost_uuid': IHOST['uuid'], + 'hostname': IHOST['hostname'], + 'kernel_provisioned': 'standard', + 'kernel_running': 'standard'} + +UPDATED_KERNEL = copy.deepcopy(KERNEL) +NEW_KERNEL = 'lowlatency' +UPDATED_KERNEL['kernel_provisioned'] = NEW_KERNEL fixtures = { '/v1/ihosts': @@ -89,6 +97,17 @@ fixtures = { {"ports": [PORT]}, ), }, + '/v1/ihosts/%s/kernel' % IHOST['uuid']: + { + 'GET': ( + {}, + KERNEL, + ), + 'PATCH': ( + {}, + UPDATED_KERNEL, + ), + }, } @@ -142,3 +161,25 @@ class HostManagerTest(testtools.TestCase): ] self.assertEqual(self.api.calls, expect) self.assertEqual(ihost.location, NEW_LOC) + + def test_host_kernel_modify(self): + patch = {'op': 'replace', + 'value': NEW_KERNEL, + 'path': '/kernel_provisioned'} + kernel = self.mgr.host_kernel_modify(hostid=IHOST['uuid'], + patch=patch) + expect = [ + ('PATCH', '/v1/ihosts/%s/kernel' % IHOST['uuid'], {}, patch), + ] + self.assertEqual(self.api.calls, expect) + self.assertEqual(kernel.kernel_provisioned, NEW_KERNEL) + self.assertEqual(kernel.kernel_running, 'standard') + + def test_host_kernel_show(self): + kernel = self.mgr.host_kernel_show(hostid=IHOST['uuid']) + expect = [ + ('GET', '/v1/ihosts/%s/kernel' % IHOST['uuid'], {}, None), + ] + self.assertEqual(self.api.calls, expect) + self.assertEqual(kernel.kernel_provisioned, 'standard') + self.assertEqual(kernel.kernel_running, 'standard') diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/tests/v1/test_ihost_shell.py b/sysinv/cgts-client/cgts-client/cgtsclient/tests/v1/test_ihost_shell.py index af919fc116..9fdb7fd6a5 100644 --- a/sysinv/cgts-client/cgts-client/cgtsclient/tests/v1/test_ihost_shell.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/tests/v1/test_ihost_shell.py @@ -1,13 +1,15 @@ # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # import mock +import yaml from cgtsclient.tests import test_shell from cgtsclient.v1.ihost import ihost +from cgtsclient.v1.ihost import ihost_kernel from cgtsclient.v1.kube_host_upgrade import KubeHostUpgrade FAKE_KUBE_HOST_UPGRADE = { @@ -61,6 +63,12 @@ FAKE_IHOST_3 = { 'personality': 'storage', } +FAKE_KERNEL = { + 'hostname': 'controller-0', + 'kernel_provisioned': 'lowlatency', + 'kernel_running': 'lowlatency', +} + class HostTest(test_shell.ShellTest): @@ -210,3 +218,20 @@ class HostTest(test_shell.ShellTest): results) self.assertIn(str(FAKE_KUBE_HOST_UPGRADE_2['status']), results) + + @mock.patch('cgtsclient.v1.ihost.ihostManager.host_kernel_show') + def test_host_kernel_show(self, mock_host_kernel_show): + """Returns a single kernel """ + self.make_env() + mock_host_kernel_show.return_value = ihost_kernel(None, + FAKE_KERNEL, + True) + results = self.shell("host-kernel-show --format=yaml " + f"{FAKE_KERNEL['hostname']}") + kernel = yaml.safe_load(results) + self.assertEqual(kernel['hostname'], + FAKE_KERNEL['hostname']) + self.assertEqual(kernel['kernel_provisioned'], + FAKE_KERNEL['kernel_provisioned']) + self.assertEqual(kernel['kernel_running'], + FAKE_KERNEL['kernel_running']) diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py index b32c38a0a2..02fca07c20 100755 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2013-2021 Wind River Systems, Inc. +# Copyright (c) 2013-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -711,3 +711,46 @@ def do_host_cpu_max_frequency_modify(cc, args): except exc.HTTPNotFound: raise exc.CommandError('host not found: %s' % args.hostnameorid) _print_ihost_show(ihost) + + +@utils.arg('hostnameorid', + metavar='', + help="Name or ID of host") +@utils.arg('kernel', + metavar='', + choices=['standard', 'lowlatency'], + help="Kernel image is either standard or lowlatency") +def do_host_kernel_modify(cc, args): + """ + Modify the kernel image to either standard or lowlatency. + """ + attributes = {'kernel_provisioned': args.kernel} + patch = utils.dict_to_patch(attributes) + + ihost = ihost_utils._find_ihost(cc, args.hostnameorid) + try: + cc.ihost.host_kernel_modify(ihost.uuid, patch) + except exc.HTTPNotFound: + raise exc.CommandError('Host not found: %s' % args.hostnameorid) + + +def _print_kernel_show(kernel, output_format=None): + fields = ['hostname', 'kernel_provisioned', 'kernel_running'] + data_list = [(f, getattr(kernel, f, '')) for f in fields] + data = dict(data_list) + utils.print_dict_with_format(data, wrap=72, output_format=output_format) + + +@utils.arg('hostnameorid', metavar='', + help="Name or ID of host") +@utils.arg('--format', + choices=['table', 'yaml', 'value'], + help="specify the output format, defaults to table") +def do_host_kernel_show(cc, args): + """Show kernel attributes.""" + ihost = ihost_utils._find_ihost(cc, args.hostnameorid) + try: + kernel = cc.ihost.host_kernel_show(ihost.uuid) + except exc.HTTPNotFound: + raise exc.CommandError('Host not found: %s' % args.hostnameorid) + _print_kernel_show(kernel, args.format) diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/ihost.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/ihost.py index 6879b56134..3b8e0463b6 100644 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/ihost.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/ihost.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2013-2021 Wind River Systems, Inc. +# Copyright (c) 2013-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -27,6 +27,11 @@ class ihost(base.Resource): return "" % self._info +class ihost_kernel(base.Resource): + def __repr__(self): + return "" % self._info + + class ihostManager(base.Manager): resource_class = ihost @@ -151,6 +156,16 @@ class ihostManager(base.Manager): resp, body = self.api.json_request('POST', path) return self.resource_class(self, body) + def host_kernel_modify(self, hostid, patch): + url = self._path(hostid) + "/kernel" + resp, body = self.api.json_request('PATCH', url, body=patch) + return ihost_kernel(self, body) + + def host_kernel_show(self, hostid): + url = self._path(hostid) + "/kernel" + resp, body = self.api.json_request('GET', url) + return ihost_kernel(self, body) + def _find_ihost(cc, ihost_id): if ihost_id.isdigit() or utils.is_uuid_like(ihost_id): diff --git a/sysinv/sysinv/sysinv/sysinv/agent/manager.py b/sysinv/sysinv/sysinv/sysinv/agent/manager.py index cb01fe713e..8412615f0d 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py @@ -154,6 +154,7 @@ class AgentManager(service.PeriodicService): LVG = 'lvg' HOST_FILESYSTEMS = 'host_filesystems' K8S_DEVICE_PLUGIN = 'k8s_device_plugin' + KERNEL = 'kernel' # Note that this set must be extended when there are # additional inventory required for the initial @@ -168,7 +169,8 @@ class AgentManager(service.PeriodicService): PV, LVG, HOST_FILESYSTEMS, - K8S_DEVICE_PLUGIN} + K8S_DEVICE_PLUGIN, + KERNEL} def __init__(self, host, topic): self.host = host @@ -725,6 +727,21 @@ class AgentManager(service.PeriodicService): LOG.exception("Sysinv Agent exception updating pci_device.") pass + def _get_kernel_running(self): + """Get the running kernel + Examples: + lowlatency - 5.10.0-6-rt-amd64 + standard - 5.10.0-6-amd64 + Returns: + str: running kernel either standard or lowlatency + """ + kernel_release = os.uname().release + if '-rt-' in kernel_release: + kernel_running = constants.KERNEL_LOWLATENCY + else: + kernel_running = constants.KERNEL_STANDARD + return kernel_running + def ihost_inv_get_and_report(self, icontext): """Collect data for an ihost. @@ -920,6 +937,19 @@ class AgentManager(service.PeriodicService): LOG.exception("Sysinv Agent exception updating ilvg conductor.") pass + kernel_running = self._get_kernel_running() + try: + rpcapi.report_kernel_running(icontext, + ihost['uuid'], + kernel_running) + self._inventory_reported.add(self.KERNEL) + except RemoteError as e: + LOG.error("report_kernel_running " + f"RemoteError exc_type={e.exc_type}") + except exception.SysinvException: + LOG.exception("Sysinv Agent exception updating kernel conductor.") + pass + if constants.WORKER in self.subfunctions_list_get(): platform_interfaces = [] # retrieve the mgmt interfaces and associated numa nodes diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index e626b520e4..5f37f7ebfc 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -88,6 +88,7 @@ from sysinv.api.controllers.v1 import vim_api from sysinv.api.controllers.v1 import patch_api from sysinv.api.controllers.v1 import ptp_instance from sysinv.api.controllers.v1 import ptp_interface +from sysinv.api.controllers.v1 import kernel from sysinv.common import ceph from sysinv.common import constants from sysinv.common import device @@ -1145,6 +1146,9 @@ class HostController(rest.RestController): ptp_interfaces = ptp_interface.PtpInterfaceController(parent="ihosts") "Expose PTP interfaces as a sub-element of ihosts" + kernel = kernel.KernelController() + "Expose kernel as a sub-element of ihosts" + _custom_actions = { 'detail': ['GET'], 'bulk_add': ['POST'], diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kernel.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kernel.py new file mode 100644 index 0000000000..683ba79234 --- /dev/null +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kernel.py @@ -0,0 +1,257 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2023 Wind River Systems, Inc. +# + +import jsonpatch +import pecan +from pecan import rest +import wsme +from wsme import types as wtypes +import wsmeext.pecan as wsme_pecan + +from oslo_log import log + +from sysinv._i18n import _ +from sysinv.api.controllers.v1 import base +from sysinv.api.controllers.v1 import types +from sysinv.api.controllers.v1 import utils +from sysinv.api.controllers.v1 import link +from sysinv.common import utils as cutils +from sysinv import objects + +from sysinv.common import constants + +from typing import Set + +LOCK_NAME = 'KernelController' + +LOG = log.getLogger(__name__) + + +class KernelPatchType(types.JsonPatchType): + + @staticmethod + def mandatory_attrs(): + return ['ihost_uuid', + 'hostname', + 'kernel_provisioned', + 'kernel_running'] + + +class Kernel(base.APIBase): + """ + API representation of the kernel configuration of a ihost. + """ + ihost_uuid = types.uuid + "The UUID of the host of this kernel" + + hostname = wtypes.text + "The name of the host of this kernel" + + kernel_provisioned = wtypes.text + "The provisined kernel of the ihost" + + kernel_running = wtypes.text + "The running kernel of the ihost" + + links = [link.Link] + "A list containing a self link and associated kernel links" + + def __init__(self, **kwargs): + self.fields = ['ihost_uuid', + 'hostname', + 'kernel_provisioned', + 'kernel_running', + 'links'] + self.hostname = kwargs.get('hostname') + self.ihost_uuid = kwargs.get('uuid') + self.kernel_provisioned = kwargs.get('kernel_provisioned') + self.kernel_running = kwargs.get('kernel_running') + + # if 'kernel_provisioned' key is missing use 'subfunctions' key instead + if self.kernel_provisioned is None: + if constants.LOWLATENCY in kwargs.get(constants.SUBFUNCTIONS): + self.kernel_provisioned = constants.KERNEL_LOWLATENCY + else: + self.kernel_provisioned = constants.KERNEL_STANDARD + + @staticmethod + def _create_subfunctions_str(subfunctions_set: Set): + """Generate the subfunctions string using the set + Preserves expected order + + Args: + subfunctions_set (Set): _description_ + """ + expected_order = [constants.CONTROLLER, + constants.WORKER, + constants.STORAGE, + constants.LOWLATENCY] + subfunctions_list = [] + for i in expected_order: + if i in subfunctions_set: + subfunctions_list.append(i) + subfunctions_set.discard(i) + + for i in subfunctions_set: + subfunctions_list.append(i) + + return ','.join(subfunctions_list) + + def _update_kernel(self, ihost, kernel: str): + """ Update the kernel value + + Args: + ihost: rpc ihost object + kernel (str): kernel value + """ + LOG.info( + f"Updating kernel {self.hostname} " + f"[running={self.kernel_running} " + f"provisioned={self.kernel_provisioned}] " + f"to {kernel}]" + ) + + if self.kernel_provisioned == kernel and self.kernel_running == kernel: + return None + + if kernel == constants.KERNEL_LOWLATENCY: + lowlatency = True + else: + lowlatency = False + + subfunctions = ihost.get(constants.SUBFUNCTIONS) or "" + subfunctions_set = set(subfunctions.split(',')) + + if lowlatency is True: + subfunctions_set.add(constants.LOWLATENCY) + else: + subfunctions_set.discard(constants.LOWLATENCY) + + updated_subfunctions = Kernel._create_subfunctions_str(subfunctions_set) + updates = \ + { + constants.SUBFUNCTIONS: updated_subfunctions + } + + ihost.save_changes(pecan.request.context, updates) + pecan.request.rpcapi.kernel_runtime_manifests(pecan.request.context, + self.ihost_uuid) + self.kernel_provisioned = kernel + + @classmethod + def convert_with_links(cls, ihost): + ihost_dict = ihost.as_dict() + kernel = Kernel(**ihost_dict) + url_arg = f"{ihost.uuid}/kernel" + kernel.links = [link.Link.make_link('self', + pecan.request.host_url, + 'ihosts', url_arg), + link.Link.make_link('bookmark', + pecan.request.host_url, + 'ihosts', url_arg, + bookmark=True) + ] + return kernel + + +class KernelController(rest.RestController): + + @staticmethod + def _check_host(ihost): + if ihost.administrative != constants.ADMIN_LOCKED: + raise wsme.exc.ClientSideError(_('Host must be locked.')) + + if constants.WORKER not in ihost.subfunctions: + raise wsme.exc.ClientSideError(_('Can only modify worker nodes.')) + + @staticmethod + def _check_patch(patch): + KERNEL_PATH = '/kernel_provisioned' + + if not isinstance(patch, list): + patch = [patch] + + utils.validate_patch(patch) + + supported_ops = ['replace'] + supported_paths = [KERNEL_PATH] + supported_kernels = constants.SUPPORTED_KERNELS + for p in patch: + path = p["path"] + op = p["op"] + value = p["value"] + + if path not in supported_paths: + error_msg = f"Path in not supported: {path}" + raise wsme.exc.ClientSideError(_(error_msg)) + + if op not in supported_ops: + error_msg = f"Operation in not supported: {op}" + raise wsme.exc.ClientSideError(_(error_msg)) + + if path == KERNEL_PATH and value not in supported_kernels: + error_msg = f"Supported kernels: {supported_kernels}" + raise wsme.exc.ClientSideError(_(error_msg)) + + # GET ihosts//kernel + @wsme_pecan.wsexpose(Kernel, types.uuid) + def get(self, ihost_uuid): + """Query information of a specific host kernel + + Args: + ihost_uuid (uuid): UUID of the host + + Returns: + Kernel: Kernel API object + """ + ihost = objects.host.get_by_uuid(pecan.request.context, ihost_uuid) + kernel = Kernel.convert_with_links(ihost) + return kernel + + # PATCH ihosts//kernel + @cutils.synchronized(LOCK_NAME) + @wsme.validate(types.uuid, [KernelPatchType]) + @wsme_pecan.wsexpose(Kernel, types.uuid, + body=[KernelPatchType]) + def patch(self, ihost_uuid, patch): + """Modify a host kernel's configuration. + + Example: + /v1/ihost//kernel + patch + [ + { + "op" : "replace", + "path" : "/kernel", + "value" : "lowlatency" + } + ] + + Args: + ihost_uuid (uuid): UUID of the host + patch (json): kernel patch + """ + ihost = objects.host.get_by_uuid(pecan.request.context, ihost_uuid) + + KernelController._check_host(ihost) + KernelController._check_patch(patch) + + patch_obj = jsonpatch.JsonPatch(patch) + kernel_obj = Kernel.convert_with_links(ihost) + kernel_dict = kernel_obj.as_dict() + + try: + patched_kernel_dict = jsonpatch.apply_patch(kernel_dict, patch_obj) + except jsonpatch.JsonPatchException as inst: + LOG.exception(inst) + error_msg = f"Update Kernel Error: {inst}" + raise wsme.exc.ClientSideError(_(error_msg)) + + kernel_value = patched_kernel_dict.get('kernel_provisioned') + kernel_obj._update_kernel(ihost, kernel_value) + return kernel_obj diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/types.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/types.py index 7b67e02100..ddc555a715 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/types.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/types.py @@ -16,7 +16,7 @@ # License for the specific language governing permissions and limitations # under the License. # -# Copyright (c) 2013-2015 Wind River Systems, Inc. +# Copyright (c) 2013-2023 Wind River Systems, Inc. # import re @@ -205,7 +205,7 @@ class JsonPatchType(wtypes.Base): @staticmethod def mandatory_attrs(): - """Retruns a list of mandatory attributes. + """Returns a list of mandatory attributes. Mandatory attributes can't be removed from the document. This method should be overwritten by derived class. diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index ceba23966d..65f1f65f6e 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -122,6 +122,12 @@ PERSONALITIES = [CONTROLLER, STORAGE, WORKER, EDGEWORKER] SUBFUNCTIONS = 'subfunctions' LOWLATENCY = 'lowlatency' +# Kernel types +KERNEL_LOWLATENCY = 'lowlatency' +KERNEL_STANDARD = 'standard' + +SUPPORTED_KERNELS = [KERNEL_LOWLATENCY, KERNEL_STANDARD] + # CPU functions PLATFORM_FUNCTION = "Platform" VSWITCH_FUNCTION = "Vswitch" diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index 52d210c49a..2407850528 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -128,7 +128,6 @@ from sysinv.helm.lifecycle_hook import LifecycleHookInfo from sysinv.zmq_rpc.zmq_rpc import ZmqRpcServer from sysinv.zmq_rpc.zmq_rpc import is_rpc_hybrid_mode_active - MANAGER_TOPIC = 'sysinv.conductor_manager' LOG = log.getLogger(__name__) @@ -1232,6 +1231,181 @@ class ConductorManager(service.PeriodicService): self._config_apply_runtime_manifest(context, config_uuid, config_dict) + def kernel_runtime_manifests(self, context, ihost_uuid): + """Execute kernel runtime manifests + Uses db lowlatency value from ihost.subfunctions + + :param context: an admin context + :param ihost_uuid: uuid of host getting kernel config update + + """ + try: + host = self.dbapi.ihost_get(ihost_uuid) + except exception.ServerNotFound: + LOG.error(f'Host not found {ihost_uuid}') + return None + + personalities = [host['personality']] + host_uuids = [host['uuid']] + config_uuid = self._config_update_hosts( + context=context, + personalities=personalities, + host_uuids=host_uuids, + reboot=True) # TODO: check if reboot is required + config_dict = { + "personalities": personalities, + "host_uuids": host_uuids, + "classes": [ + 'platform::grub::kernel_image::runtime', + 'platform::config::file::subfunctions::lowlatency::runtime' + ] + } + self._config_apply_runtime_manifest(context, config_uuid, config_dict) + + def report_kernel_running(self, context, ihost_uuid, kernel_running: str): + """Report from sysinv agent with the running kernel of that host + + :param context: admin context + :param ihost_uuid: host uuid + :param kernel_running (str): the running kernel + """ + ihost_uuid = ihost_uuid.strip() + try: + host = self.dbapi.ihost_get(ihost_uuid) + except exception.ServerNotFound: + LOG.info(f'Report from uuid={ihost_uuid} ' + f'kernel_running={kernel_running}') + LOG.error(f'Host not found {ihost_uuid}') + return None + + hostname = host['hostname'] + LOG.info(f'Report from {hostname} running kernel={kernel_running}') + + # validate reported running kernel + if kernel_running not in constants.SUPPORTED_KERNELS: + error_msg = (f'{hostname} reported unexpected ' + f'kernel_running {kernel_running}') + LOG.error(error_msg) + raise exception.SysinvException(_(error_msg)) + + # update db with kernel_running update and reload host object + host.save_changes(context, {'kernel_running': kernel_running}) + host = self.dbapi.ihost_get(ihost_uuid) + LOG.info(f"DB updated {hostname} " + f"kernel_running={host['kernel_running']}") + + # raise and clear running kernel mismatch alarms + self._update_controllers_kernel_mismatch_alarms() + self._update_kernel_provisioned_mismatch_alarm(host) + + def _clear_kernel_mismatch_alarm(self, alarm_id: str, hostname: str): + """Clear alarm that matches the alarm id and hostname + entity_id include host=.kernel... + match the hostname + + Args: + alarm_id (str): alarm id + hostname (str): hostname + """ + entity_instance_id_partial = f"host={hostname}" + alarms = self.fm_api.get_faults_by_id(alarm_id) + if alarms is None: + return None + + for alarm in alarms: + if entity_instance_id_partial in alarm.entity_instance_id: + entity_instance_id = alarm.entity_instance_id + LOG.info(f"Clearing alarm {alarm_id} {entity_instance_id}") + self.fm_api.clear_fault(alarm_id, entity_instance_id) + + def _update_controllers_kernel_mismatch_alarms(self): + """ Raise or clear the 100.120 alarm + Controllers running mismtached kernels. + compares the 2 controllers running kernels + """ + alarm_id = fm_constants.FM_ALARM_ID_CONTROLLERS_KERNEL_MISMATCH + pra = _(fm_constants.FM_PRA_CONTROLLERS_KERNEL_MISMATCH) + reason_text = _("Controllers running mismatched kernels.") + + controller_kernels = set() + controllers = self.dbapi.ihost_get_by_personality(constants.CONTROLLER) + if len(controllers) != 2: + return None + + for host in controllers: + hostname = host['hostname'] + kernel_running = host['kernel_running'] + # incomplete info, wait for reports from all controller agents + if not kernel_running: + LOG.info(f"{hostname} missing running kernel info") + return None + controller_kernels.add(kernel_running) + + if len(controller_kernels) == 1: + # all running kernels match + for host in controllers: + hostname = host['hostname'] + self._clear_kernel_mismatch_alarm(alarm_id, hostname) + return None + + # detected mismatched running kernels + for host in controllers: + hostname = host['hostname'] + kernel_running = host['kernel_running'] + entity_instance_id = f"host={hostname}.kernel={kernel_running}" + fault = fm_api.Fault( + alarm_id=alarm_id, + alarm_state=fm_constants.FM_ALARM_STATE_SET, + entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, + entity_instance_id=entity_instance_id, + severity=fm_constants.FM_ALARM_SEVERITY_MINOR, + reason_text=reason_text, + alarm_type=fm_constants.FM_ALARM_TYPE_4, + probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_65, + proposed_repair_action=pra, + service_affecting=False) + LOG.info(f"Raising alarm {alarm_id} {entity_instance_id}") + self.fm_api.set_fault(fault) + + def _update_kernel_provisioned_mismatch_alarm(self, host): + """Raise or clear the 100.121 alarm + Host not running the provisioned kernel. + Compares the provisioned kernel vs the running kernel of the + specified host + + Args: + host: the host the alarm is against + """ + alarm_id = fm_constants.FM_ALARM_ID_PROVISIONED_KERNEL_MISMATCH + pra = _(fm_constants.FM_PRA_PROVISIONED_KERNEL_MISMATCH) + reason_text = _("Host not running the provisioned kernel.") + + if constants.LOWLATENCY in host[constants.SUBFUNCTIONS]: + kernel_provisioned = constants.KERNEL_LOWLATENCY + else: + kernel_provisioned = constants.KERNEL_STANDARD + + hostname = host['hostname'] + kernel_running = host['kernel_running'] + entity_instance_id = f"host={hostname}.kernel={kernel_running}" + + if kernel_running != kernel_provisioned: + fault = fm_api.Fault( + alarm_id=alarm_id, + alarm_state=fm_constants.FM_ALARM_STATE_SET, + entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, + entity_instance_id=entity_instance_id, + severity=fm_constants.FM_ALARM_SEVERITY_MAJOR, + reason_text=reason_text, + alarm_type=fm_constants.FM_ALARM_TYPE_4, + probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_65, + proposed_repair_action=pra, + service_affecting=False) + LOG.info(f"Raising alarm {alarm_id} {entity_instance_id}") + self.fm_api.set_fault(fault) + else: + self._clear_kernel_mismatch_alarm(alarm_id, hostname) + def _update_pxe_config(self, host, load=None): """Set up the PXE config file for this host so it can run the installer. @@ -7093,7 +7267,6 @@ class ConductorManager(service.PeriodicService): self.evaluate_apps_reapply( context, trigger={'type': constants.APP_EVALUATE_REAPPLY_TYPE_DETECTED_SWACT}) - else: LOG.info("Initial save active controller {}" "".format(new_active)) @@ -15778,7 +15951,7 @@ class ConductorManager(service.PeriodicService): """ LOG.info("Entering device_update_by_host %s %s" % (host_uuid, fpga_device_dict_array)) - host_uuid.strip() + host_uuid = host_uuid.strip() try: host = self.dbapi.ihost_get(host_uuid) except exception.ServerNotFound: diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py index 370e9e2f2f..b6a42c9a0d 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py @@ -2192,3 +2192,27 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): return self.call(context, self.make_msg('update_host_max_cpu_mhz_configured', host=host)) + + def kernel_runtime_manifests(self, context, ihost_uuid): + """Synchronously, execute runtime manifests to update kernel + + :param context: request context. + :param ihost_uuid: uuid of host getting kernel config update + + """ + + return self.call(context, + self.make_msg('kernel_runtime_manifests', + ihost_uuid=ihost_uuid)) + + def report_kernel_running(self, context, ihost_uuid, kernel_running: str): + """Report the local running kernel to the Conductor + + :param context: request context + :param ihost_uuid: uuid of the host sending the update + :param kernel_running (str): the running kernel + """ + return self.cast(context, + self.make_msg('report_kernel_running', + ihost_uuid=ihost_uuid, + kernel_running=kernel_running)) diff --git a/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/migrate_repo/versions/129_kernel_running.py b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/migrate_repo/versions/129_kernel_running.py new file mode 100644 index 0000000000..52388efcb0 --- /dev/null +++ b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/migrate_repo/versions/129_kernel_running.py @@ -0,0 +1,22 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +from sqlalchemy import Column, MetaData, Table +from sqlalchemy import String + + +def upgrade(migrate_engine): + meta = MetaData() + meta.bind = migrate_engine + migrate_engine.connect() + i_host = Table('i_host', meta, autoload=True) + i_host.create_column(Column('kernel_running', String(64))) + + +def downgrade(migrate_engine): + meta = MetaData() + meta.bind = migrate_engine + raise NotImplementedError('SysInv database downgrade is unsupported.') diff --git a/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/models.py b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/models.py index 915530cd3c..0e414689b0 100644 --- a/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/models.py +++ b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/models.py @@ -15,7 +15,7 @@ # License for the specific language governing permissions and limitations # under the License. # -# Copyright (c) 2013-2022 Wind River Systems, Inc. +# Copyright (c) 2013-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -204,6 +204,7 @@ class ihost(Base): bm_username = Column(String(255)) personality = Column(invPersonalityEnum) + kernel_running = Column(String(64)) subfunctions = Column(String(255)) subfunction_oper = Column(operEnum, default="disabled") subfunction_avail = Column(availEnum, default="not-installed") diff --git a/sysinv/sysinv/sysinv/sysinv/objects/host.py b/sysinv/sysinv/sysinv/sysinv/objects/host.py index a1acc2dd2f..6ae376dd63 100644 --- a/sysinv/sysinv/sysinv/sysinv/objects/host.py +++ b/sysinv/sysinv/sysinv/sysinv/objects/host.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2013-2022 Wind River Systems, Inc. +# Copyright (c) 2013-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -36,7 +36,8 @@ class Host(base.SysinvObject): dbapi = db_api.get_instance() - fields = { + fields = \ + { 'id': int, 'forisystemid': utils.int_or_none, 'isystem_uuid': utils.str_or_none, @@ -47,6 +48,7 @@ class Host(base.SysinvObject): # 'updated_at': utils.datetime_str_or_none, 'hostname': utils.str_or_none, 'personality': utils.str_or_none, + 'kernel_running': utils.str_or_none, 'subfunctions': utils.str_or_none, 'subfunction_oper': utils.str_or_none, 'subfunction_avail': utils.str_or_none, @@ -105,7 +107,7 @@ class Host(base.SysinvObject): 'reboot_needed': utils.bool_or_none, 'max_cpu_mhz_configured': utils.str_or_none, 'max_cpu_mhz_allowed': utils.str_or_none - } + } _foreign_fields = { 'isystem_uuid': 'system:uuid', diff --git a/sysinv/sysinv/sysinv/sysinv/tests/api/test_kernel.py b/sysinv/sysinv/sysinv/sysinv/tests/api/test_kernel.py new file mode 100644 index 0000000000..8522e717e0 --- /dev/null +++ b/sysinv/sysinv/sysinv/sysinv/tests/api/test_kernel.py @@ -0,0 +1,302 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 +# -*- encoding: utf-8 -*- +# +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +""" +Tests for the API /ihosts//kernel methods. +""" +import mock +from six.moves import http_client +from sysinv.common import constants +from sysinv.tests.api import base +from sysinv.tests.db import base as dbbase + + +class FakeConductorAPI(object): + + def __init__(self): + self.kernel_runtime_manifests = mock.MagicMock() + + +class FakeException(Exception): + pass + + +class TestKernel(base.FunctionalTest, dbbase.BaseHostTestCase): + # API_HEADERS are a generic header passed to most API calls + API_HEADERS = {'User-Agent': 'sysinv-test'} + + def _setup_configuration(self): + pass + + def _setup_context(self): + self.fake_conductor_api = FakeConductorAPI() + p = mock.patch('sysinv.conductor.rpcapiproxy.ConductorAPI') + self.mock_conductor_api = p.start() + self.mock_conductor_api.return_value = self.fake_conductor_api + self.addCleanup(p.stop) + + def setUp(self): + super(TestKernel, self).setUp() + self._setup_context() + + def _get_path(self, host_uuid): + return f'/ihosts/{host_uuid}/kernel' + + def _create_host(self, personality, subfunction=None, + mgmt_mac=None, mgmt_ip=None, + admin=None, + invprovision=constants.PROVISIONED, **kw): + host = self._create_test_host(personality=personality, + subfunction=subfunction, + administrative=(admin or + constants.ADMIN_UNLOCKED), + invprovision=invprovision, + **kw) + return host + + +class TestPatchKernel(TestKernel): + def setUp(self): + super(TestPatchKernel, self).setUp() + + def test_update_worker_kernel_noop(self): + """ Test updating the kernel from + standard -> standard of a locked worker node + """ + worker = self._create_host(constants.WORKER, + admin=constants.ADMIN_LOCKED) + hostid = worker['uuid'] + hostname = worker['hostname'] + + # Verify DB - lowlatency is not in subfunctions before update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + url = '%s' % self._get_path(hostid) + response = self.patch_dict_json(url, + headers=self.API_HEADERS, + kernel_provisioned=constants.KERNEL_STANDARD, + expect_errors=False) + + self.assertEqual('application/json', response.content_type) + self.assertEqual(http_client.OK, response.status_code) + self.assertEqual(constants.KERNEL_STANDARD, + response.json['kernel_provisioned']) + + # Verify DB - lowlatency is not in subfunctions after update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + # Verify that the method that updates kernel config is not called + self.fake_conductor_api.kernel_runtime_manifests.assert_not_called() + + def test_update_worker_kernel(self): + """ Test updating the kernel from + standard -> lowlatency of a locked worker node + """ + worker = self._create_host(constants.WORKER, + admin=constants.ADMIN_LOCKED) + hostid = worker['uuid'] + hostname = worker['hostname'] + + # Verify DB - lowlatency is not in subfunctions before update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + url = '%s' % self._get_path(hostid) + response = self.patch_dict_json(url, + headers=self.API_HEADERS, + kernel_provisioned=constants.KERNEL_LOWLATENCY, + expect_errors=False) + + self.assertEqual('application/json', response.content_type) + self.assertEqual(http_client.OK, response.status_code) + self.assertEqual(constants.KERNEL_LOWLATENCY, + response.json['kernel_provisioned']) + + # Verify DB - lowlatency is in subfunctions after update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertIn(constants.LOWLATENCY, subfunctions) + + # Verify that the method that updates kernel config is called once + self.fake_conductor_api.kernel_runtime_manifests.assert_called_once() + + def test_update_lowlatency_worker_kernel(self): + """ Test updating the kernel from + lowlatency -> standard of a locked worker node + """ + worker = self._create_host(constants.WORKER, + subfunction=constants.LOWLATENCY, + kernel_running=constants.KERNEL_LOWLATENCY, + admin=constants.ADMIN_LOCKED) + hostid = worker['uuid'] + hostname = worker['hostname'] + + # Verify DB - lowlatency is in subfunctions before update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertIn(constants.LOWLATENCY, subfunctions) + + url = '%s' % self._get_path(hostid) + response = self.patch_dict_json(url, + headers=self.API_HEADERS, + kernel_provisioned=constants.KERNEL_STANDARD, + expect_errors=False) + + self.assertEqual('application/json', response.content_type) + self.assertEqual(http_client.OK, response.status_code) + self.assertEqual(constants.KERNEL_STANDARD, response.json['kernel_provisioned']) + + # Verify DB - lowlatency is not in subfunctions after update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + # Verify that the method that updates kernel config is called once + self.fake_conductor_api.kernel_runtime_manifests.assert_called_once() + + def test_update_unlocked_worker_kernel(self): + """ Test updating the kernel from + standard -> lowlatency of a unlocked worker node + """ + worker = self._create_host(constants.WORKER, + admin=constants.ADMIN_UNLOCKED) + hostid = worker['uuid'] + hostname = worker['hostname'] + + # Verify DB - lowlatency is not in subfunctions before update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + url = '%s' % self._get_path(hostid) + response = self.patch_dict_json(url, + headers=self.API_HEADERS, + kernel_provisioned=constants.KERNEL_LOWLATENCY, + expect_errors=True) + + self.assertEqual(http_client.BAD_REQUEST, response.status_code) + self.assertEqual('application/json', response.content_type) + self.assertTrue(response.json['error_message']) + self.assertIn('Host must be locked', + response.json['error_message']) + + # Verify DB - lowlatency is not in subfunctions after update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + # Verify that the method that updates kernel config is not called + self.fake_conductor_api.kernel_runtime_manifests.assert_not_called() + + def test_update_storage_kernel(self): + """ Test updating the kernel from + standard -> lowlatency of a locked storage node + """ + storage = self._create_host(constants.STORAGE, + admin=constants.ADMIN_LOCKED) + hostid = storage['uuid'] + hostname = storage['hostname'] + + # Verify DB - lowlatency is not in subfunctions before update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + url = '%s' % self._get_path(hostid) + response = self.patch_dict_json(url, + headers=self.API_HEADERS, + kernel_provisioned=constants.KERNEL_LOWLATENCY, + expect_errors=True) + + self.assertEqual(http_client.BAD_REQUEST, response.status_code) + self.assertEqual('application/json', response.content_type) + self.assertTrue(response.json['error_message']) + self.assertIn('Can only modify worker nodes', + response.json['error_message']) + + # Verify DB - lowlatency is not in subfunctions after update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + # Verify that the method that updates kernel config is not called + self.fake_conductor_api.kernel_runtime_manifests.assert_not_called() + + def test_update_standard_controller_kernel(self): + """ Test updating the kernel from + standard -> lowlatency of a locked standard controller node + """ + controller = self._create_host(constants.CONTROLLER, + admin=constants.ADMIN_LOCKED) + hostid = controller['uuid'] + hostname = controller['hostname'] + + # Verify DB - lowlatency is not in subfunctions before update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + url = '%s' % self._get_path(hostid) + response = self.patch_dict_json(url, + headers=self.API_HEADERS, + kernel_provisioned=constants.KERNEL_LOWLATENCY, + expect_errors=True) + + self.assertEqual(http_client.BAD_REQUEST, response.status_code) + self.assertEqual('application/json', response.content_type) + self.assertTrue(response.json['error_message']) + self.assertIn('Can only modify worker nodes', + response.json['error_message']) + + # Verify DB - lowlatency is not in subfunctions after update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + # Verify that the method that updates kernel config is not called + self.fake_conductor_api.kernel_runtime_manifests.assert_not_called() + + def test_update_aio_controller_kernel(self): + """ Test updating the kernel from + standard -> lowlatency of a locked ALL-IN-ONE controller node + """ + controller = self._create_host(constants.CONTROLLER, + subfunction=constants.WORKER, + admin=constants.ADMIN_LOCKED) + hostid = controller['uuid'] + hostname = controller['hostname'] + + # Verify DB - lowlatency is not in subfunctions before update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertNotIn(constants.LOWLATENCY, subfunctions) + + url = '%s' % self._get_path(hostid) + response = self.patch_dict_json(url, + headers=self.API_HEADERS, + kernel_provisioned=constants.KERNEL_LOWLATENCY, + expect_errors=False) + + self.assertEqual('application/json', response.content_type) + self.assertEqual(http_client.OK, response.status_code) + self.assertEqual(constants.KERNEL_LOWLATENCY, + response.json['kernel_provisioned']) + + # Verify DB - lowlatency is in subfunctions after update + host = self._get_test_host_by_hostname(hostname) + subfunctions = host[constants.SUBFUNCTIONS] + self.assertIn(constants.LOWLATENCY, subfunctions) + + # Verify that the method that updates kernel config is called once + self.fake_conductor_api.kernel_runtime_manifests.assert_called_once() diff --git a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py index 7f2befafdb..4e1debac7c 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py @@ -551,6 +551,7 @@ class ManagerTestCase(base.DbTestCase): self.service._ceph_mon_create = mock.Mock() self.service._sx_to_dx_post_migration_actions = mock.Mock() self.alarm_raised = False + self.kernel_alarms = {} def tearDown(self): super(ManagerTestCase, self).tearDown() @@ -5088,6 +5089,226 @@ class ManagerTestCase(base.DbTestCase): expected_ouput = ['intelgpu', 'intelqat', 'intelfpga'] self.assertEqual(actual_output, expected_ouput) + def _kernel_alarms_fix_keys(self, alarm_id, entity_id=None): + """Create the nested dictionary keys if they are missing + Prevents KeyError exceptions + """ + if alarm_id not in self.kernel_alarms: + self.kernel_alarms[alarm_id] = {} + if entity_id and entity_id not in self.kernel_alarms[alarm_id]: + self.kernel_alarms[alarm_id][entity_id] = None + + def _kernel_set_fault(self, fault): + self._kernel_alarms_fix_keys(fault.alarm_id) + self.kernel_alarms[fault.alarm_id][fault.entity_instance_id] = fault + + def _kernel_clear_fault(self, alarm_id, entity_id): + self._kernel_alarms_fix_keys(alarm_id, entity_id) + self.kernel_alarms[alarm_id][entity_id] = None + + def _kernel_get_faults_by_id(self, alarm_id): + faults = [] + self._kernel_alarms_fix_keys(alarm_id) + for fault in self.kernel_alarms[alarm_id].values(): + if fault is not None: + faults.append(fault) + if not faults: + faults = None + return faults + + def _is_kernel_alarm_raised(self, alarm_id, hostname): + self._kernel_alarms_fix_keys(alarm_id) + entity_id = None + for key in self.kernel_alarms[alarm_id].keys(): + entity_id_partial = f"host={hostname}.kernel=" + if entity_id_partial in key: + entity_id = key + return self.kernel_alarms[alarm_id][entity_id] is not None + + return False + + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_apply_runtime_manifest') + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_update_hosts') + def test_kernel_runtime_manifests(self, + mock_config_update_hosts, + mock_config_apply_runtime_manifest): + self._create_test_ihosts() + ihost_hostname = 'controller-0' + ihost = self.service.get_ihost_by_hostname(self.context, + ihost_hostname) + ihost_uuid = ihost['uuid'] + personalities = [ihost['personality']] + host_uuids = [ihost_uuid] + config_dict = { + "personalities": personalities, + "host_uuids": host_uuids, + "classes": [ + 'platform::grub::kernel_image::runtime', + 'platform::config::file::subfunctions::lowlatency::runtime' + ] + } + config_uuid = '1234' + mock_config_update_hosts.return_value = config_uuid + self.service.kernel_runtime_manifests(context=self.context, + ihost_uuid=ihost_uuid) + + mock_config_update_hosts.assert_called_once() + mock_config_apply_runtime_manifest.assert_called_once_with(mock.ANY, + config_uuid, + config_dict) + + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_apply_runtime_manifest') + @mock.patch('sysinv.conductor.manager.' + 'ConductorManager._config_update_hosts') + def test_kernel_runtime_manifests_no_host(self, + mock_config_update_hosts, + mock_apply_runtime_manifest): + ihost_uuid = str(uuid.uuid4()) + self.service.kernel_runtime_manifests(context=self.context, + ihost_uuid=ihost_uuid) + + mock_config_update_hosts.assert_not_called() + mock_apply_runtime_manifest.assert_not_called() + + def test_host_kernel_mismatch_alarm(self): + """Test raising and clearing 100.121 alarm id""" + alarm_id = fm_constants.FM_ALARM_ID_PROVISIONED_KERNEL_MISMATCH + + self.service.fm_api.set_fault.side_effect = self._kernel_set_fault + self.service.fm_api.clear_fault.side_effect = self._kernel_clear_fault + self.service.fm_api.get_faults_by_id.side_effect = \ + self._kernel_get_faults_by_id + + # Create controller-0 + ihost_hostname = 'controller-0' + config_uuid = str(uuid.uuid4()) + self._create_test_ihost( + personality=constants.CONTROLLER, + hostname=ihost_hostname, + uuid=str(uuid.uuid4()), + config_status=None, + config_applied=config_uuid, + config_target=config_uuid, + invprovision=constants.PROVISIONED, + administrative=constants.ADMIN_UNLOCKED, + operational=constants.OPERATIONAL_ENABLED, + availability=constants.AVAILABILITY_ONLINE, + mgmt_mac='00:11:22:33:44:55', + mgmt_ip='1.2.3.4') + + ihost = self.service.get_ihost_by_hostname(self.context, + ihost_hostname) + + # before - no alarm + self.assertFalse(self._is_kernel_alarm_raised(alarm_id, ihost_hostname)) + + # simulate a running kernel update from controller-0 agent + kernel_running = constants.KERNEL_LOWLATENCY + self.service.report_kernel_running(self.context, + ihost['uuid'], + kernel_running) + + # after kernel=lowlatency update - alarm raised + self.assertTrue(self._is_kernel_alarm_raised(alarm_id, ihost_hostname)) + + # simulate a running kernel update from controller-0 agent + kernel_running = constants.KERNEL_STANDARD + self.service.report_kernel_running(self.context, + ihost['uuid'], + kernel_running) + + # after kernel=standard update - alarm cleared + self.assertFalse(self._is_kernel_alarm_raised(alarm_id, ihost_hostname)) + + def test_controllers_kernel_mismatch_alarms(self): + """Test raising and clearing 100.120 alarm id""" + alarm_id = fm_constants.FM_ALARM_ID_CONTROLLERS_KERNEL_MISMATCH + + self.service.fm_api.set_fault.side_effect = self._kernel_set_fault + self.service.fm_api.clear_fault.side_effect = self._kernel_clear_fault + self.service.fm_api.get_faults_by_id.side_effect = \ + self._kernel_get_faults_by_id + + # Create controller-0 + ihost_hostname0 = 'controller-0' + config_uuid = str(uuid.uuid4()) + controller_0_uuid = str(uuid.uuid4()) + self._create_test_ihost( + personality=constants.CONTROLLER, + hostname=ihost_hostname0, + uuid=controller_0_uuid, + config_status=None, + config_applied=config_uuid, + config_target=config_uuid, + invprovision=constants.PROVISIONED, + administrative=constants.ADMIN_UNLOCKED, + operational=constants.OPERATIONAL_ENABLED, + availability=constants.AVAILABILITY_ONLINE, + mgmt_mac='00:11:22:33:44:55', + mgmt_ip='1.2.3.4') + + # Create controller-1 + ihost_hostname1 = 'controller-1' + config_uuid = str(uuid.uuid4()) + controller_1_uuid = str(uuid.uuid4()) + self._create_test_ihost( + personality=constants.CONTROLLER, + hostname=ihost_hostname1, + uuid=controller_1_uuid, + config_status=None, + config_applied=config_uuid, + config_target=config_uuid, + invprovision=constants.PROVISIONED, + administrative=constants.ADMIN_UNLOCKED, + operational=constants.OPERATIONAL_ENABLED, + availability=constants.AVAILABILITY_ONLINE, + mgmt_mac='22:44:33:55:11:66', + mgmt_ip='1.2.3.5') + + # before - no alarm + self.assertFalse(self._is_kernel_alarm_raised(alarm_id, + ihost_hostname0)) + self.assertFalse(self._is_kernel_alarm_raised(alarm_id, + ihost_hostname1)) + + # simulate a running kernel update from controller-0 agent + kernel_running = constants.KERNEL_STANDARD + self.service.report_kernel_running(self.context, + controller_0_uuid, + kernel_running) + + self.assertFalse(self._is_kernel_alarm_raised(alarm_id, + ihost_hostname0)) + self.assertFalse(self._is_kernel_alarm_raised(alarm_id, + ihost_hostname1)) + + # simulate a running kernel update from controller-1 agent + kernel_running = constants.KERNEL_LOWLATENCY + self.service.report_kernel_running(self.context, + controller_1_uuid, + kernel_running) + + # 2 alarms raised - for each controller + self.assertTrue(self._is_kernel_alarm_raised(alarm_id, + ihost_hostname0)) + self.assertTrue(self._is_kernel_alarm_raised(alarm_id, + ihost_hostname1)) + + # simulate a running kernel update from controller-0 agent + kernel_running = constants.KERNEL_LOWLATENCY + self.service.report_kernel_running(self.context, + controller_0_uuid, + kernel_running) + + # 2 alarms cleared - for each controller + self.assertFalse(self._is_kernel_alarm_raised(alarm_id, + ihost_hostname0)) + self.assertFalse(self._is_kernel_alarm_raised(alarm_id, + ihost_hostname1)) + @mock.patch('sysinv.conductor.manager.verify_files', lambda x, y: True) @mock.patch('sysinv.conductor.manager.cutils.ISO', mock.MagicMock()) diff --git a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_rpcapi.py b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_rpcapi.py index 26aba14081..ced9e1888f 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_rpcapi.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_rpcapi.py @@ -33,6 +33,7 @@ from sysinv.db import api as dbapi from sysinv.openstack.common import rpc from sysinv.tests.db import base from sysinv.tests.db import utils as dbutils +from sysinv.common import constants CONF = cfg.CONF @@ -101,3 +102,14 @@ class RPCAPITestCase(base.DbTestCase): def test_update_dnsmasq_config(self): self._test_rpcapi('update_dnsmasq_config', 'call') + + def test_kernel_runtime_manifests(self): + self._test_rpcapi('kernel_runtime_manifests', + 'call', + ihost_uuid=self.fake_ihost['uuid']) + + def test_report_kernel_running(self): + self._test_rpcapi('report_kernel_running', + 'cast', + ihost_uuid=self.fake_ihost['uuid'], + kernel_running=constants.KERNEL_LOWLATENCY) diff --git a/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py b/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py index 71c0be8766..5969c0248c 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py @@ -15,7 +15,7 @@ # License for the specific language governing permissions and limitations # under the License. # -# Copyright (c) 2013-2022 Wind River Systems, Inc. +# Copyright (c) 2013-2023 Wind River Systems, Inc. # """Sysinv test utilities.""" @@ -111,11 +111,13 @@ def post_get_test_ihost(**kw): del inv['action_state'] del inv['recordtype'] del inv['uuid'] + del inv['kernel_running'] return inv def get_test_ihost(**kw): - inv = { + inv = \ + { 'id': kw.get('id', 123), 'forisystemid': kw.get('forisystemid', None), 'peer_id': kw.get('peer_id', None), @@ -139,6 +141,7 @@ def get_test_ihost(**kw): 'action': kw.get('action', "none"), 'task': kw.get('task', None), 'capabilities': kw.get('capabilities', {}), + 'kernel_running': kw.get('kernel_running', constants.KERNEL_STANDARD), 'subfunctions': kw.get('subfunctions', "ihostsubfunctions"), 'subfunction_oper': kw.get('subfunction_oper', "disabled"), 'subfunction_avail': kw.get('subfunction_avail', "not-installed"), @@ -168,7 +171,7 @@ def get_test_ihost(**kw): 'clock_synchronization': kw.get('clock_synchronization', constants.NTP), 'max_cpu_mhz_configured': kw.get('max_cpu_mhz_configured', ''), 'max_cpu_mhz_allowed': kw.get('max_cpu_mhz_allowed', '') - } + } return inv