diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py index 02fca07c20..e976d8a664 100755 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py @@ -37,7 +37,8 @@ def _print_ihost_show(ihost, columns=None, output_format=None): 'install_state', 'install_state_info', 'inv_state', 'clock_synchronization', 'device_image_update', 'reboot_needed', 'max_cpu_mhz_configured', - 'max_cpu_mhz_allowed', 'apparmor'] + 'min_cpu_mhz_allowed', 'max_cpu_mhz_allowed', + 'cstates_available', 'apparmor'] optional_fields = ['vsc_controllers', 'ttys_dcd'] if ihost.subfunctions != ihost.personality: fields.append('subfunctions') diff --git a/sysinv/sysinv/sysinv/sysinv/agent/manager.py b/sysinv/sysinv/sysinv/sysinv/agent/manager.py index be904e341d..d6974b24de 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py @@ -334,6 +334,22 @@ class AgentManager(service.PeriodicService): return constants.CONFIGURABLE return constants.NOT_CONFIGURABLE + def _get_min_cpu_mhz_allowed(self): + """Get minimum CPU frequency from lscpu + + Returns: + int: minimum CPU frequency in MHz + """ + output = utils.execute( + "lscpu | grep 'CPU min MHz' | awk '{ print $4 }' | cut -d ',' -f 1", + shell=True) + + if isinstance(output, tuple): + default_min = output[0] + if default_min: + LOG.info("Default CPU min frequency: {}".format(default_min)) + return int(default_min.split('.')[0]) + def _get_max_cpu_mhz_allowed(self): output = utils.execute( "lscpu | grep 'CPU max MHz' | awk '{ print $4 }' | cut -d ',' -f 1", @@ -345,6 +361,24 @@ class AgentManager(service.PeriodicService): LOG.info("Default CPU max frequency: {}".format(default_max)) return int(default_max.split('.')[0]) + def _get_cstates_names(self): + """Get the names of available c-state on the system. + + Returns: + list(string,..): A list of c-state names + """ + states = os.listdir(constants.CSTATE_PATH) + cstates = [] + + for state in states: + with open(os.path.join(constants.CSTATE_PATH, state + "/name"), + 'r') as file: + c_name = file.readline() + cstates.append(c_name.split('\n')[0]) + + cstates.sort() + return cstates + def _force_grub_update(self): """ Force update the grub on the first AIO controller after the initial config is completed @@ -742,6 +776,61 @@ class AgentManager(service.PeriodicService): kernel_running = constants.KERNEL_STANDARD return kernel_running + def _report_cstates_and_frequency_update(self, context, + ihost, rpcapi=None): + """Evaluate if minimum frequency, maximum frequency or cstates + are changed. If yes, report to conductor. + """ + if ihost is None: + return + + freq_dict = {} + try: + min_freq = self._get_min_cpu_mhz_allowed() + max_freq = self._get_max_cpu_mhz_allowed() + + if min_freq != ihost.min_cpu_mhz_allowed: + ihost.min_cpu_mhz_allowed = min_freq + freq_dict.update({ + constants.IHOST_MIN_CPU_MHZ_ALLOWED: + min_freq + }) + + if max_freq != ihost.max_cpu_mhz_allowed: + ihost.max_cpu_mhz_allowed = max_freq + freq_dict.update({ + constants.IHOST_MAX_CPU_MHZ_ALLOWED: + max_freq + }) + + if os.path.isfile(os.path.join(constants.CSTATE_PATH, + "state0/name")): + cstates_names = self._get_cstates_names() + if utils.cstates_need_update(ihost.cstates_available, + cstates_names): + ihost.cstates_available = ','.join(cstates_names) + freq_dict.update({ + constants.IHOST_CSTATES_AVAILABLE: + ','.join(cstates_names) + }) + except OSError as ex: + LOG.warning("Something wrong occurs during the cpu frequency" + f" search. {ex}") + return + + if len(freq_dict) == 0: + return + + if rpcapi is None: + rpcapi = conductor_rpcapi.ConductorAPI( + topic=conductor_rpcapi.MANAGER_TOPIC) + + LOG.info(f"Reporting CStates or Frequency changes {ihost['uuid']}" + f" -> {freq_dict}") + rpcapi.cstates_and_frequency_update_by_ihost(context, + ihost['uuid'], + freq_dict) + def ihost_inv_get_and_report(self, icontext): """Collect data for an ihost. @@ -856,6 +945,13 @@ class AgentManager(service.PeriodicService): "conductor.") pass + try: + self._report_cstates_and_frequency_update(icontext, ihost, rpcapi) + except exception.SysinvException as ex: + LOG.exception("Something wrong occurs during the cpu frequency" + f" search. {ex}") + pass + self._report_port_inventory(icontext, rpcapi, port_list, pci_device_list) diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/cpu_utils.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/cpu_utils.py index be2de86d3b..1c9f880fc4 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/cpu_utils.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/cpu_utils.py @@ -154,6 +154,18 @@ def restructure_host_cpu_data(host): host.cpu_lists[cpu.numa_node].append(int(cpu.cpu)) +def check_power_manager(host): + """Check if power manager is present. If so, CPU MHZ + cannot be configured.""" + + labels = pecan.request.dbapi.label_get_by_host(host) + + if cutils.has_power_management_enabled(labels): + raise wsme.exc.ClientSideError( + "Host CPU MHz cannot be configured " + "if Power Manager is enabled.") + + def check_core_allocations(host, cpu_counts, cpu_lists=None): """Check that minimum and maximum core values are respected.""" diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index 5f37f7ebfc..8ef6ee0bf0 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -560,9 +560,15 @@ class Host(base.APIBase): max_cpu_mhz_configured = wtypes.text "Represent the CPU max frequency" + min_cpu_mhz_allowed = wtypes.text + "Represent the default CPU min frequency" + max_cpu_mhz_allowed = wtypes.text "Represent the default CPU max frequency" + cstates_available = wtypes.text + "Represent the CStates available to use" + iscsi_initiator_name = wtypes.text "The iscsi initiator name (only used for worker hosts)" @@ -598,7 +604,8 @@ class Host(base.APIBase): 'install_state', 'install_state_info', 'iscsi_initiator_name', 'device_image_update', 'reboot_needed', 'inv_state', 'clock_synchronization', - 'max_cpu_mhz_configured', 'max_cpu_mhz_allowed', + 'max_cpu_mhz_configured', 'min_cpu_mhz_allowed', + 'max_cpu_mhz_allowed', 'cstates_available', 'apparmor'] fields = minimum_fields if not expand else None @@ -2897,6 +2904,8 @@ class HostController(rest.RestController): % (personality, load.software_version)) def _check_max_cpu_mhz_configured(self, host): + cpu_utils.check_power_manager(host.ihost_patch.get('uuid')) + # Max CPU frequency requested by the user and the maximum frequency # allowed by the CPU. max_cpu_mhz_configured = str(host.ihost_patch.get('max_cpu_mhz_configured', '')) diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/label.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/label.py index 0b4a1fd034..a8d2db8bcb 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/label.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/label.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022 Wind River Systems, Inc. +# Copyright (c) 2018-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -153,10 +153,15 @@ class LabelController(rest.RestController): sort_dir=sort_dir) def _apply_manifest_after_label_operation(self, uuid, keys): - if common.LABEL_DISABLE_NOHZ_FULL in keys: + if (common.LABEL_DISABLE_NOHZ_FULL in keys or + constants.KUBE_POWER_MANAGER_LABEL in keys): pecan.request.rpcapi.update_grub_config( pecan.request.context, uuid) + if constants.KUBE_POWER_MANAGER_LABEL in keys: + pecan.request.rpcapi.configure_power_manager( + pecan.request.context) + @wsme_pecan.wsexpose(LabelCollection, types.uuid, types.uuid, int, wtypes.text, wtypes.text) def get_all(self, uuid=None, marker=None, limit=None, @@ -362,6 +367,11 @@ def _semantic_check_worker_labels(body): raise wsme.exc.ClientSideError( _( "Invalid value for %s label." % constants.KUBE_CPU_MANAGER_LABEL)) + elif label_key == constants.KUBE_POWER_MANAGER_LABEL: + if label_value != constants.KUBE_POWER_MANAGER_VALUE: + raise wsme.exc.ClientSideError( + _( + "Invalid value for %s label." % constants.KUBE_POWER_MANAGER_LABEL)) def _get_system_enabled_k8s_plugins(): diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index e1c96dd94d..7a35338bee 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -208,7 +208,9 @@ PATCH_DEFAULT_TIMEOUT_IN_SECS = 6 # ihost field attributes IHOST_STOR_FUNCTION = 'stor_function' IHOST_IS_MAX_CPU_MHZ_CONFIGURABLE = 'is_max_cpu_configurable' +IHOST_MIN_CPU_MHZ_ALLOWED = 'min_cpu_mhz_allowed' IHOST_MAX_CPU_MHZ_ALLOWED = 'max_cpu_mhz_allowed' +IHOST_CSTATES_AVAILABLE = 'cstates_available' # ihost config_status field values CONFIG_STATUS_OUT_OF_DATE = "Config out-of-date" @@ -1960,6 +1962,8 @@ APP_EVALUATE_REAPPLY_HOST_AVAILABILITY = 'host-availability-updated' APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY = 'system-modify' APP_EVALUATE_REAPPLY_TYPE_DETECTED_SWACT = 'detected-swact' APP_EVALUATE_REAPPLY_TYPE_KUBE_UPGRADE_COMPLETE = 'kube-upgrade-complete' +APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL = 'host-label-assign' +APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY = 'host-modify' APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = { UNLOCK_ACTION: @@ -1987,7 +1991,11 @@ APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = { APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE: APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE, APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY: - APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY + APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY, + APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL: + APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL, + APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY: + APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY } # Progress constants @@ -2039,6 +2047,7 @@ SRIOVDP_LABEL = 'sriovdp=enabled' KUBE_TOPOLOGY_MANAGER_LABEL = 'kube-topology-mgr-policy' KUBE_CPU_MANAGER_LABEL = 'kube-cpu-mgr-policy' KUBE_IGNORE_ISOL_CPU_LABEL = 'kube-ignore-isol-cpus=enabled' +KUBE_POWER_MANAGER_LABEL = 'power-management' # Accepted label values KUBE_TOPOLOGY_MANAGER_VALUES = [ @@ -2051,6 +2060,7 @@ KUBE_CPU_MANAGER_VALUES = [ 'none', 'static' ] +KUBE_POWER_MANAGER_VALUE = 'enabled' # Default DNS service domain DEFAULT_DNS_SERVICE_DOMAIN = 'cluster.local' @@ -2380,3 +2390,7 @@ PLATFORM_FIREWALL_SM_PORT_2 = 2223 PLATFORM_FIREWALL_NTP_PORT = 123 PLATFORM_FIREWALL_PTP_PORT = 319 PLATFORM_FIREWALL_PTP_PORT = 320 + +# CState support. Whether the path exists depends on hardware support and driver availability. +# Validating the existence of the path is important. +CSTATE_PATH = "/sys/devices/system/cpu/cpu0/cpuidle" diff --git a/sysinv/sysinv/sysinv/sysinv/common/utils.py b/sysinv/sysinv/sysinv/sysinv/common/utils.py index 4cc7411f68..2e8d9c4dc7 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/common/utils.py @@ -2632,6 +2632,19 @@ def has_sriovdp_enabled(labels): return False +def has_power_management_enabled(labels): + """Returns true if the power-management=enabled label is set """ + if not labels: + return False + + for label in labels: + if label.label_key == constants.KUBE_POWER_MANAGER_LABEL and label.label_value: + return constants.KUBE_POWER_MANAGER_VALUE == label.label_value.lower() + + # We haven't found the power-management node key. Return False + return False + + def has_disable_nohz_full_enabled(labels): """Returns true if the disable-nohz-full=enabled label is set """ if not labels: @@ -3877,3 +3890,33 @@ def checkout_ostree(ostree_repo, commit, target_dir, subpath): raise exception.SysinvException( "Error checkout ostree commit: %s" % (error), ) + + +def cstates_need_update(old_cstates, new_cstates): + if old_cstates is None: + return True + if new_cstates is None: + return False + + old_cstates_list = [] + if isinstance(old_cstates, str): + if old_cstates.strip() == '': + return True + old_cstates_list = old_cstates.split(',') + else: + old_cstates_list = old_cstates + + new_cstates_list = [] + if isinstance(new_cstates, str): + if new_cstates.strip() == '': + return False + new_cstates_list = new_cstates.split(',') + else: + new_cstates_list = new_cstates + + if len(old_cstates_list) != len(new_cstates_list): + return True + diff = [v for v in old_cstates_list if v not in new_cstates_list] + if len(diff) > 0: + return True + return False diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index 710c30be90..e62b4076c2 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -12822,6 +12822,20 @@ class ConductorManager(service.PeriodicService): """ self._update_pxe_config(host, load) + def cstates_and_frequency_update_by_ihost(self, context, + ihost_uuid, freq_dict): + if ihost_uuid is None or freq_dict is None: + return + + if len(freq_dict) > 0: + try: + self.dbapi.ihost_update(ihost_uuid, freq_dict) + self.evaluate_apps_reapply(context, trigger={ + 'type': constants.APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY}) + except (RuntimeError, Exception): + LOG.warning("An error occurred during the cstates and frequency update. " + f"{traceback.format_exc()}") + def load_update_by_host(self, context, ihost_id, sw_version): """Update the host_upgrade table with the running SW_VERSION @@ -14330,19 +14344,26 @@ class ConductorManager(service.PeriodicService): raise exception.SysinvException(_(msg)) def update_host_max_cpu_mhz_configured(self, context, host): - personalities = [constants.WORKER] + labels = self.dbapi.label_get_by_host(host['uuid']) - config_uuid = self._config_update_hosts(context, - personalities, - [host['uuid']]) - config_dict = { - "personalities": personalities, - "host_uuids": [host['uuid']], - "classes": ['platform::compute::config::runtime'] - } - self._config_apply_runtime_manifest(context, - config_uuid, - config_dict) + if not cutils.has_power_management_enabled(labels): + personalities = [constants.WORKER] + + config_uuid = self._config_update_hosts(context, + personalities, + [host['uuid']]) + config_dict = { + "personalities": personalities, + "host_uuids": [host['uuid']], + "classes": ['platform::compute::config::runtime'] + } + self._config_apply_runtime_manifest(context, + config_uuid, + config_dict) + + def configure_power_manager(self, context): + self.evaluate_apps_reapply(context, trigger={ + 'type': constants.APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL}) def update_admin_ep_certificate(self, context): """ diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py index 200c0c0dbd..48c5a890a8 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py @@ -2232,6 +2232,31 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): ihost_uuid=ihost_uuid, kernel_running=kernel_running)) + def configure_power_manager(self, context): + """Synchronously, execute application reapply to update host + power profiles and c-states for Kubernetes Power Manager. + + :param context: request context. + + """ + return self.call(context, + self.make_msg('configure_power_manager')) + + def cstates_and_frequency_update_by_ihost(self, context, + ihost_uuid, freq_dict): + """Synchronously, execute update of min, and max frequency, and cstates + available on host. + + :param context: request context. + :param host_uuid: the uuid of the host + :param freq_dict: dict with params to update + + """ + return self.call(context, + self.make_msg('cstates_and_frequency_update_by_ihost', + ihost_uuid=ihost_uuid, + freq_dict=freq_dict)) + def request_firewall_runtime_update(self, context, host_uuid): """ Sent from sysinv-agent, request the firewall update via runtime manifest diff --git a/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/migrate_repo/versions/130_min_cpu_frequency_and_cstates.py b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/migrate_repo/versions/130_min_cpu_frequency_and_cstates.py new file mode 100644 index 0000000000..dbdb9d6734 --- /dev/null +++ b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/migrate_repo/versions/130_min_cpu_frequency_and_cstates.py @@ -0,0 +1,20 @@ +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +from sqlalchemy import Column, MetaData, Table +from sqlalchemy import String + + +def upgrade(migrate_engine): + meta = MetaData() + meta.bind = migrate_engine + host_table = Table('i_host', meta, autoload=True) + host_table.create_column(Column('min_cpu_mhz_allowed', String(64))) + host_table.create_column(Column('cstates_available', String(255))) + + +def downgrade(migrate_engine): + raise NotImplementedError('SysInv database downgrade is unsupported.') diff --git a/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/models.py b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/models.py index 0e414689b0..e59fe8c087 100644 --- a/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/models.py +++ b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/models.py @@ -244,8 +244,11 @@ class ihost(Base): device_image_update = Column(String(64)) reboot_needed = Column(Boolean, nullable=False, default=False) max_cpu_mhz_configured = Column(String(64)) # in MHz + min_cpu_mhz_allowed = Column(String(64)) # in MHz max_cpu_mhz_allowed = Column(String(64)) # in MHz + cstates_available = Column(String(255)) + forisystemid = Column(Integer, ForeignKey('i_system.id', ondelete='CASCADE')) peer_id = Column(Integer, diff --git a/sysinv/sysinv/sysinv/sysinv/objects/host.py b/sysinv/sysinv/sysinv/sysinv/objects/host.py index 6ae376dd63..78735c339c 100644 --- a/sysinv/sysinv/sysinv/sysinv/objects/host.py +++ b/sysinv/sysinv/sysinv/sysinv/objects/host.py @@ -106,8 +106,10 @@ class Host(base.SysinvObject): 'device_image_update': utils.str_or_none, 'reboot_needed': utils.bool_or_none, 'max_cpu_mhz_configured': utils.str_or_none, - 'max_cpu_mhz_allowed': utils.str_or_none - } + 'min_cpu_mhz_allowed': utils.str_or_none, + 'max_cpu_mhz_allowed': utils.str_or_none, + 'cstates_available': utils.str_or_none + } _foreign_fields = { 'isystem_uuid': 'system:uuid', diff --git a/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py b/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py index 5969c0248c..89105c8eac 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py @@ -170,8 +170,10 @@ def get_test_ihost(**kw): 'inv_state': kw.get('inv_state', 'inventoried'), 'clock_synchronization': kw.get('clock_synchronization', constants.NTP), 'max_cpu_mhz_configured': kw.get('max_cpu_mhz_configured', ''), - 'max_cpu_mhz_allowed': kw.get('max_cpu_mhz_allowed', '') - } + 'min_cpu_mhz_allowed': kw.get('min_cpu_mhz_allowed', ''), + 'max_cpu_mhz_allowed': kw.get('max_cpu_mhz_allowed', ''), + 'cstates_available': kw.get('cstates_available', '') + } return inv