Merge "Add support to Power Manager Profiles config"

This commit is contained in:
Zuul 2023-08-16 21:58:01 +00:00 committed by Gerrit Code Review
commit 0e616f6c91
13 changed files with 279 additions and 21 deletions

View File

@ -37,7 +37,8 @@ def _print_ihost_show(ihost, columns=None, output_format=None):
'install_state', 'install_state_info', 'inv_state', 'install_state', 'install_state_info', 'inv_state',
'clock_synchronization', 'device_image_update', 'clock_synchronization', 'device_image_update',
'reboot_needed', 'max_cpu_mhz_configured', 'reboot_needed', 'max_cpu_mhz_configured',
'max_cpu_mhz_allowed', 'apparmor'] 'min_cpu_mhz_allowed', 'max_cpu_mhz_allowed',
'cstates_available', 'apparmor']
optional_fields = ['vsc_controllers', 'ttys_dcd'] optional_fields = ['vsc_controllers', 'ttys_dcd']
if ihost.subfunctions != ihost.personality: if ihost.subfunctions != ihost.personality:
fields.append('subfunctions') fields.append('subfunctions')

View File

@ -334,6 +334,22 @@ class AgentManager(service.PeriodicService):
return constants.CONFIGURABLE return constants.CONFIGURABLE
return constants.NOT_CONFIGURABLE return constants.NOT_CONFIGURABLE
def _get_min_cpu_mhz_allowed(self):
"""Get minimum CPU frequency from lscpu
Returns:
int: minimum CPU frequency in MHz
"""
output = utils.execute(
"lscpu | grep 'CPU min MHz' | awk '{ print $4 }' | cut -d ',' -f 1",
shell=True)
if isinstance(output, tuple):
default_min = output[0]
if default_min:
LOG.info("Default CPU min frequency: {}".format(default_min))
return int(default_min.split('.')[0])
def _get_max_cpu_mhz_allowed(self): def _get_max_cpu_mhz_allowed(self):
output = utils.execute( output = utils.execute(
"lscpu | grep 'CPU max MHz' | awk '{ print $4 }' | cut -d ',' -f 1", "lscpu | grep 'CPU max MHz' | awk '{ print $4 }' | cut -d ',' -f 1",
@ -345,6 +361,24 @@ class AgentManager(service.PeriodicService):
LOG.info("Default CPU max frequency: {}".format(default_max)) LOG.info("Default CPU max frequency: {}".format(default_max))
return int(default_max.split('.')[0]) return int(default_max.split('.')[0])
def _get_cstates_names(self):
"""Get the names of available c-state on the system.
Returns:
list(string,..): A list of c-state names
"""
states = os.listdir(constants.CSTATE_PATH)
cstates = []
for state in states:
with open(os.path.join(constants.CSTATE_PATH, state + "/name"),
'r') as file:
c_name = file.readline()
cstates.append(c_name.split('\n')[0])
cstates.sort()
return cstates
def _force_grub_update(self): def _force_grub_update(self):
""" Force update the grub on the first AIO controller after the initial """ Force update the grub on the first AIO controller after the initial
config is completed config is completed
@ -742,6 +776,61 @@ class AgentManager(service.PeriodicService):
kernel_running = constants.KERNEL_STANDARD kernel_running = constants.KERNEL_STANDARD
return kernel_running return kernel_running
def _report_cstates_and_frequency_update(self, context,
ihost, rpcapi=None):
"""Evaluate if minimum frequency, maximum frequency or cstates
are changed. If yes, report to conductor.
"""
if ihost is None:
return
freq_dict = {}
try:
min_freq = self._get_min_cpu_mhz_allowed()
max_freq = self._get_max_cpu_mhz_allowed()
if min_freq != ihost.min_cpu_mhz_allowed:
ihost.min_cpu_mhz_allowed = min_freq
freq_dict.update({
constants.IHOST_MIN_CPU_MHZ_ALLOWED:
min_freq
})
if max_freq != ihost.max_cpu_mhz_allowed:
ihost.max_cpu_mhz_allowed = max_freq
freq_dict.update({
constants.IHOST_MAX_CPU_MHZ_ALLOWED:
max_freq
})
if os.path.isfile(os.path.join(constants.CSTATE_PATH,
"state0/name")):
cstates_names = self._get_cstates_names()
if utils.cstates_need_update(ihost.cstates_available,
cstates_names):
ihost.cstates_available = ','.join(cstates_names)
freq_dict.update({
constants.IHOST_CSTATES_AVAILABLE:
','.join(cstates_names)
})
except OSError as ex:
LOG.warning("Something wrong occurs during the cpu frequency"
f" search. {ex}")
return
if len(freq_dict) == 0:
return
if rpcapi is None:
rpcapi = conductor_rpcapi.ConductorAPI(
topic=conductor_rpcapi.MANAGER_TOPIC)
LOG.info(f"Reporting CStates or Frequency changes {ihost['uuid']}"
f" -> {freq_dict}")
rpcapi.cstates_and_frequency_update_by_ihost(context,
ihost['uuid'],
freq_dict)
def ihost_inv_get_and_report(self, icontext): def ihost_inv_get_and_report(self, icontext):
"""Collect data for an ihost. """Collect data for an ihost.
@ -856,6 +945,13 @@ class AgentManager(service.PeriodicService):
"conductor.") "conductor.")
pass pass
try:
self._report_cstates_and_frequency_update(icontext, ihost, rpcapi)
except exception.SysinvException as ex:
LOG.exception("Something wrong occurs during the cpu frequency"
f" search. {ex}")
pass
self._report_port_inventory(icontext, rpcapi, self._report_port_inventory(icontext, rpcapi,
port_list, pci_device_list) port_list, pci_device_list)

View File

@ -154,6 +154,18 @@ def restructure_host_cpu_data(host):
host.cpu_lists[cpu.numa_node].append(int(cpu.cpu)) host.cpu_lists[cpu.numa_node].append(int(cpu.cpu))
def check_power_manager(host):
"""Check if power manager is present. If so, CPU MHZ
cannot be configured."""
labels = pecan.request.dbapi.label_get_by_host(host)
if cutils.has_power_management_enabled(labels):
raise wsme.exc.ClientSideError(
"Host CPU MHz cannot be configured "
"if Power Manager is enabled.")
def check_core_allocations(host, cpu_counts, cpu_lists=None): def check_core_allocations(host, cpu_counts, cpu_lists=None):
"""Check that minimum and maximum core values are respected.""" """Check that minimum and maximum core values are respected."""

View File

@ -560,9 +560,15 @@ class Host(base.APIBase):
max_cpu_mhz_configured = wtypes.text max_cpu_mhz_configured = wtypes.text
"Represent the CPU max frequency" "Represent the CPU max frequency"
min_cpu_mhz_allowed = wtypes.text
"Represent the default CPU min frequency"
max_cpu_mhz_allowed = wtypes.text max_cpu_mhz_allowed = wtypes.text
"Represent the default CPU max frequency" "Represent the default CPU max frequency"
cstates_available = wtypes.text
"Represent the CStates available to use"
iscsi_initiator_name = wtypes.text iscsi_initiator_name = wtypes.text
"The iscsi initiator name (only used for worker hosts)" "The iscsi initiator name (only used for worker hosts)"
@ -598,7 +604,8 @@ class Host(base.APIBase):
'install_state', 'install_state_info', 'install_state', 'install_state_info',
'iscsi_initiator_name', 'device_image_update', 'iscsi_initiator_name', 'device_image_update',
'reboot_needed', 'inv_state', 'clock_synchronization', 'reboot_needed', 'inv_state', 'clock_synchronization',
'max_cpu_mhz_configured', 'max_cpu_mhz_allowed', 'max_cpu_mhz_configured', 'min_cpu_mhz_allowed',
'max_cpu_mhz_allowed', 'cstates_available',
'apparmor'] 'apparmor']
fields = minimum_fields if not expand else None fields = minimum_fields if not expand else None
@ -2897,6 +2904,8 @@ class HostController(rest.RestController):
% (personality, load.software_version)) % (personality, load.software_version))
def _check_max_cpu_mhz_configured(self, host): def _check_max_cpu_mhz_configured(self, host):
cpu_utils.check_power_manager(host.ihost_patch.get('uuid'))
# Max CPU frequency requested by the user and the maximum frequency # Max CPU frequency requested by the user and the maximum frequency
# allowed by the CPU. # allowed by the CPU.
max_cpu_mhz_configured = str(host.ihost_patch.get('max_cpu_mhz_configured', '')) max_cpu_mhz_configured = str(host.ihost_patch.get('max_cpu_mhz_configured', ''))

View File

@ -1,4 +1,4 @@
# Copyright (c) 2018-2022 Wind River Systems, Inc. # Copyright (c) 2018-2023 Wind River Systems, Inc.
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
@ -153,10 +153,15 @@ class LabelController(rest.RestController):
sort_dir=sort_dir) sort_dir=sort_dir)
def _apply_manifest_after_label_operation(self, uuid, keys): def _apply_manifest_after_label_operation(self, uuid, keys):
if common.LABEL_DISABLE_NOHZ_FULL in keys: if (common.LABEL_DISABLE_NOHZ_FULL in keys or
constants.KUBE_POWER_MANAGER_LABEL in keys):
pecan.request.rpcapi.update_grub_config( pecan.request.rpcapi.update_grub_config(
pecan.request.context, uuid) pecan.request.context, uuid)
if constants.KUBE_POWER_MANAGER_LABEL in keys:
pecan.request.rpcapi.configure_power_manager(
pecan.request.context)
@wsme_pecan.wsexpose(LabelCollection, types.uuid, types.uuid, @wsme_pecan.wsexpose(LabelCollection, types.uuid, types.uuid,
int, wtypes.text, wtypes.text) int, wtypes.text, wtypes.text)
def get_all(self, uuid=None, marker=None, limit=None, def get_all(self, uuid=None, marker=None, limit=None,
@ -362,6 +367,11 @@ def _semantic_check_worker_labels(body):
raise wsme.exc.ClientSideError( raise wsme.exc.ClientSideError(
_( _(
"Invalid value for %s label." % constants.KUBE_CPU_MANAGER_LABEL)) "Invalid value for %s label." % constants.KUBE_CPU_MANAGER_LABEL))
elif label_key == constants.KUBE_POWER_MANAGER_LABEL:
if label_value != constants.KUBE_POWER_MANAGER_VALUE:
raise wsme.exc.ClientSideError(
_(
"Invalid value for %s label." % constants.KUBE_POWER_MANAGER_LABEL))
def _get_system_enabled_k8s_plugins(): def _get_system_enabled_k8s_plugins():

View File

@ -208,7 +208,9 @@ PATCH_DEFAULT_TIMEOUT_IN_SECS = 6
# ihost field attributes # ihost field attributes
IHOST_STOR_FUNCTION = 'stor_function' IHOST_STOR_FUNCTION = 'stor_function'
IHOST_IS_MAX_CPU_MHZ_CONFIGURABLE = 'is_max_cpu_configurable' IHOST_IS_MAX_CPU_MHZ_CONFIGURABLE = 'is_max_cpu_configurable'
IHOST_MIN_CPU_MHZ_ALLOWED = 'min_cpu_mhz_allowed'
IHOST_MAX_CPU_MHZ_ALLOWED = 'max_cpu_mhz_allowed' IHOST_MAX_CPU_MHZ_ALLOWED = 'max_cpu_mhz_allowed'
IHOST_CSTATES_AVAILABLE = 'cstates_available'
# ihost config_status field values # ihost config_status field values
CONFIG_STATUS_OUT_OF_DATE = "Config out-of-date" CONFIG_STATUS_OUT_OF_DATE = "Config out-of-date"
@ -1960,6 +1962,8 @@ APP_EVALUATE_REAPPLY_HOST_AVAILABILITY = 'host-availability-updated'
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY = 'system-modify' APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY = 'system-modify'
APP_EVALUATE_REAPPLY_TYPE_DETECTED_SWACT = 'detected-swact' APP_EVALUATE_REAPPLY_TYPE_DETECTED_SWACT = 'detected-swact'
APP_EVALUATE_REAPPLY_TYPE_KUBE_UPGRADE_COMPLETE = 'kube-upgrade-complete' APP_EVALUATE_REAPPLY_TYPE_KUBE_UPGRADE_COMPLETE = 'kube-upgrade-complete'
APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL = 'host-label-assign'
APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY = 'host-modify'
APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = { APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = {
UNLOCK_ACTION: UNLOCK_ACTION:
@ -1987,7 +1991,11 @@ APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = {
APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE: APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE:
APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE, APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE,
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY: APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY:
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY,
APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL:
APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL,
APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY:
APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY
} }
# Progress constants # Progress constants
@ -2039,6 +2047,7 @@ SRIOVDP_LABEL = 'sriovdp=enabled'
KUBE_TOPOLOGY_MANAGER_LABEL = 'kube-topology-mgr-policy' KUBE_TOPOLOGY_MANAGER_LABEL = 'kube-topology-mgr-policy'
KUBE_CPU_MANAGER_LABEL = 'kube-cpu-mgr-policy' KUBE_CPU_MANAGER_LABEL = 'kube-cpu-mgr-policy'
KUBE_IGNORE_ISOL_CPU_LABEL = 'kube-ignore-isol-cpus=enabled' KUBE_IGNORE_ISOL_CPU_LABEL = 'kube-ignore-isol-cpus=enabled'
KUBE_POWER_MANAGER_LABEL = 'power-management'
# Accepted label values # Accepted label values
KUBE_TOPOLOGY_MANAGER_VALUES = [ KUBE_TOPOLOGY_MANAGER_VALUES = [
@ -2051,6 +2060,7 @@ KUBE_CPU_MANAGER_VALUES = [
'none', 'none',
'static' 'static'
] ]
KUBE_POWER_MANAGER_VALUE = 'enabled'
# Default DNS service domain # Default DNS service domain
DEFAULT_DNS_SERVICE_DOMAIN = 'cluster.local' DEFAULT_DNS_SERVICE_DOMAIN = 'cluster.local'
@ -2380,3 +2390,7 @@ PLATFORM_FIREWALL_SM_PORT_2 = 2223
PLATFORM_FIREWALL_NTP_PORT = 123 PLATFORM_FIREWALL_NTP_PORT = 123
PLATFORM_FIREWALL_PTP_PORT = 319 PLATFORM_FIREWALL_PTP_PORT = 319
PLATFORM_FIREWALL_PTP_PORT = 320 PLATFORM_FIREWALL_PTP_PORT = 320
# CState support. Whether the path exists depends on hardware support and driver availability.
# Validating the existence of the path is important.
CSTATE_PATH = "/sys/devices/system/cpu/cpu0/cpuidle"

View File

@ -2632,6 +2632,19 @@ def has_sriovdp_enabled(labels):
return False return False
def has_power_management_enabled(labels):
"""Returns true if the power-management=enabled label is set """
if not labels:
return False
for label in labels:
if label.label_key == constants.KUBE_POWER_MANAGER_LABEL and label.label_value:
return constants.KUBE_POWER_MANAGER_VALUE == label.label_value.lower()
# We haven't found the power-management node key. Return False
return False
def has_disable_nohz_full_enabled(labels): def has_disable_nohz_full_enabled(labels):
"""Returns true if the disable-nohz-full=enabled label is set """ """Returns true if the disable-nohz-full=enabled label is set """
if not labels: if not labels:
@ -3877,3 +3890,33 @@ def checkout_ostree(ostree_repo, commit, target_dir, subpath):
raise exception.SysinvException( raise exception.SysinvException(
"Error checkout ostree commit: %s" % (error), "Error checkout ostree commit: %s" % (error),
) )
def cstates_need_update(old_cstates, new_cstates):
if old_cstates is None:
return True
if new_cstates is None:
return False
old_cstates_list = []
if isinstance(old_cstates, str):
if old_cstates.strip() == '':
return True
old_cstates_list = old_cstates.split(',')
else:
old_cstates_list = old_cstates
new_cstates_list = []
if isinstance(new_cstates, str):
if new_cstates.strip() == '':
return False
new_cstates_list = new_cstates.split(',')
else:
new_cstates_list = new_cstates
if len(old_cstates_list) != len(new_cstates_list):
return True
diff = [v for v in old_cstates_list if v not in new_cstates_list]
if len(diff) > 0:
return True
return False

View File

@ -12822,6 +12822,20 @@ class ConductorManager(service.PeriodicService):
""" """
self._update_pxe_config(host, load) self._update_pxe_config(host, load)
def cstates_and_frequency_update_by_ihost(self, context,
ihost_uuid, freq_dict):
if ihost_uuid is None or freq_dict is None:
return
if len(freq_dict) > 0:
try:
self.dbapi.ihost_update(ihost_uuid, freq_dict)
self.evaluate_apps_reapply(context, trigger={
'type': constants.APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY})
except (RuntimeError, Exception):
LOG.warning("An error occurred during the cstates and frequency update. "
f"{traceback.format_exc()}")
def load_update_by_host(self, context, ihost_id, sw_version): def load_update_by_host(self, context, ihost_id, sw_version):
"""Update the host_upgrade table with the running SW_VERSION """Update the host_upgrade table with the running SW_VERSION
@ -14330,19 +14344,26 @@ class ConductorManager(service.PeriodicService):
raise exception.SysinvException(_(msg)) raise exception.SysinvException(_(msg))
def update_host_max_cpu_mhz_configured(self, context, host): def update_host_max_cpu_mhz_configured(self, context, host):
personalities = [constants.WORKER] labels = self.dbapi.label_get_by_host(host['uuid'])
config_uuid = self._config_update_hosts(context, if not cutils.has_power_management_enabled(labels):
personalities, personalities = [constants.WORKER]
[host['uuid']])
config_dict = { config_uuid = self._config_update_hosts(context,
"personalities": personalities, personalities,
"host_uuids": [host['uuid']], [host['uuid']])
"classes": ['platform::compute::config::runtime'] config_dict = {
} "personalities": personalities,
self._config_apply_runtime_manifest(context, "host_uuids": [host['uuid']],
config_uuid, "classes": ['platform::compute::config::runtime']
config_dict) }
self._config_apply_runtime_manifest(context,
config_uuid,
config_dict)
def configure_power_manager(self, context):
self.evaluate_apps_reapply(context, trigger={
'type': constants.APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL})
def update_admin_ep_certificate(self, context): def update_admin_ep_certificate(self, context):
""" """

View File

@ -2232,6 +2232,31 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
ihost_uuid=ihost_uuid, ihost_uuid=ihost_uuid,
kernel_running=kernel_running)) kernel_running=kernel_running))
def configure_power_manager(self, context):
"""Synchronously, execute application reapply to update host
power profiles and c-states for Kubernetes Power Manager.
:param context: request context.
"""
return self.call(context,
self.make_msg('configure_power_manager'))
def cstates_and_frequency_update_by_ihost(self, context,
ihost_uuid, freq_dict):
"""Synchronously, execute update of min, and max frequency, and cstates
available on host.
:param context: request context.
:param host_uuid: the uuid of the host
:param freq_dict: dict with params to update
"""
return self.call(context,
self.make_msg('cstates_and_frequency_update_by_ihost',
ihost_uuid=ihost_uuid,
freq_dict=freq_dict))
def request_firewall_runtime_update(self, context, host_uuid): def request_firewall_runtime_update(self, context, host_uuid):
""" Sent from sysinv-agent, request the firewall update via runtime manifest """ Sent from sysinv-agent, request the firewall update via runtime manifest

View File

@ -0,0 +1,20 @@
#
# Copyright (c) 2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from sqlalchemy import Column, MetaData, Table
from sqlalchemy import String
def upgrade(migrate_engine):
meta = MetaData()
meta.bind = migrate_engine
host_table = Table('i_host', meta, autoload=True)
host_table.create_column(Column('min_cpu_mhz_allowed', String(64)))
host_table.create_column(Column('cstates_available', String(255)))
def downgrade(migrate_engine):
raise NotImplementedError('SysInv database downgrade is unsupported.')

View File

@ -244,8 +244,11 @@ class ihost(Base):
device_image_update = Column(String(64)) device_image_update = Column(String(64))
reboot_needed = Column(Boolean, nullable=False, default=False) reboot_needed = Column(Boolean, nullable=False, default=False)
max_cpu_mhz_configured = Column(String(64)) # in MHz max_cpu_mhz_configured = Column(String(64)) # in MHz
min_cpu_mhz_allowed = Column(String(64)) # in MHz
max_cpu_mhz_allowed = Column(String(64)) # in MHz max_cpu_mhz_allowed = Column(String(64)) # in MHz
cstates_available = Column(String(255))
forisystemid = Column(Integer, forisystemid = Column(Integer,
ForeignKey('i_system.id', ondelete='CASCADE')) ForeignKey('i_system.id', ondelete='CASCADE'))
peer_id = Column(Integer, peer_id = Column(Integer,

View File

@ -106,8 +106,10 @@ class Host(base.SysinvObject):
'device_image_update': utils.str_or_none, 'device_image_update': utils.str_or_none,
'reboot_needed': utils.bool_or_none, 'reboot_needed': utils.bool_or_none,
'max_cpu_mhz_configured': utils.str_or_none, 'max_cpu_mhz_configured': utils.str_or_none,
'max_cpu_mhz_allowed': utils.str_or_none 'min_cpu_mhz_allowed': utils.str_or_none,
} 'max_cpu_mhz_allowed': utils.str_or_none,
'cstates_available': utils.str_or_none
}
_foreign_fields = { _foreign_fields = {
'isystem_uuid': 'system:uuid', 'isystem_uuid': 'system:uuid',

View File

@ -170,8 +170,10 @@ def get_test_ihost(**kw):
'inv_state': kw.get('inv_state', 'inventoried'), 'inv_state': kw.get('inv_state', 'inventoried'),
'clock_synchronization': kw.get('clock_synchronization', constants.NTP), 'clock_synchronization': kw.get('clock_synchronization', constants.NTP),
'max_cpu_mhz_configured': kw.get('max_cpu_mhz_configured', ''), 'max_cpu_mhz_configured': kw.get('max_cpu_mhz_configured', ''),
'max_cpu_mhz_allowed': kw.get('max_cpu_mhz_allowed', '') 'min_cpu_mhz_allowed': kw.get('min_cpu_mhz_allowed', ''),
} 'max_cpu_mhz_allowed': kw.get('max_cpu_mhz_allowed', ''),
'cstates_available': kw.get('cstates_available', '')
}
return inv return inv