Merge "Add support to Power Manager Profiles config"

This commit is contained in:
Zuul 2023-08-16 21:58:01 +00:00 committed by Gerrit Code Review
commit 0e616f6c91
13 changed files with 279 additions and 21 deletions

View File

@ -37,7 +37,8 @@ def _print_ihost_show(ihost, columns=None, output_format=None):
'install_state', 'install_state_info', 'inv_state',
'clock_synchronization', 'device_image_update',
'reboot_needed', 'max_cpu_mhz_configured',
'max_cpu_mhz_allowed', 'apparmor']
'min_cpu_mhz_allowed', 'max_cpu_mhz_allowed',
'cstates_available', 'apparmor']
optional_fields = ['vsc_controllers', 'ttys_dcd']
if ihost.subfunctions != ihost.personality:
fields.append('subfunctions')

View File

@ -334,6 +334,22 @@ class AgentManager(service.PeriodicService):
return constants.CONFIGURABLE
return constants.NOT_CONFIGURABLE
def _get_min_cpu_mhz_allowed(self):
"""Get minimum CPU frequency from lscpu
Returns:
int: minimum CPU frequency in MHz
"""
output = utils.execute(
"lscpu | grep 'CPU min MHz' | awk '{ print $4 }' | cut -d ',' -f 1",
shell=True)
if isinstance(output, tuple):
default_min = output[0]
if default_min:
LOG.info("Default CPU min frequency: {}".format(default_min))
return int(default_min.split('.')[0])
def _get_max_cpu_mhz_allowed(self):
output = utils.execute(
"lscpu | grep 'CPU max MHz' | awk '{ print $4 }' | cut -d ',' -f 1",
@ -345,6 +361,24 @@ class AgentManager(service.PeriodicService):
LOG.info("Default CPU max frequency: {}".format(default_max))
return int(default_max.split('.')[0])
def _get_cstates_names(self):
"""Get the names of available c-state on the system.
Returns:
list(string,..): A list of c-state names
"""
states = os.listdir(constants.CSTATE_PATH)
cstates = []
for state in states:
with open(os.path.join(constants.CSTATE_PATH, state + "/name"),
'r') as file:
c_name = file.readline()
cstates.append(c_name.split('\n')[0])
cstates.sort()
return cstates
def _force_grub_update(self):
""" Force update the grub on the first AIO controller after the initial
config is completed
@ -742,6 +776,61 @@ class AgentManager(service.PeriodicService):
kernel_running = constants.KERNEL_STANDARD
return kernel_running
def _report_cstates_and_frequency_update(self, context,
ihost, rpcapi=None):
"""Evaluate if minimum frequency, maximum frequency or cstates
are changed. If yes, report to conductor.
"""
if ihost is None:
return
freq_dict = {}
try:
min_freq = self._get_min_cpu_mhz_allowed()
max_freq = self._get_max_cpu_mhz_allowed()
if min_freq != ihost.min_cpu_mhz_allowed:
ihost.min_cpu_mhz_allowed = min_freq
freq_dict.update({
constants.IHOST_MIN_CPU_MHZ_ALLOWED:
min_freq
})
if max_freq != ihost.max_cpu_mhz_allowed:
ihost.max_cpu_mhz_allowed = max_freq
freq_dict.update({
constants.IHOST_MAX_CPU_MHZ_ALLOWED:
max_freq
})
if os.path.isfile(os.path.join(constants.CSTATE_PATH,
"state0/name")):
cstates_names = self._get_cstates_names()
if utils.cstates_need_update(ihost.cstates_available,
cstates_names):
ihost.cstates_available = ','.join(cstates_names)
freq_dict.update({
constants.IHOST_CSTATES_AVAILABLE:
','.join(cstates_names)
})
except OSError as ex:
LOG.warning("Something wrong occurs during the cpu frequency"
f" search. {ex}")
return
if len(freq_dict) == 0:
return
if rpcapi is None:
rpcapi = conductor_rpcapi.ConductorAPI(
topic=conductor_rpcapi.MANAGER_TOPIC)
LOG.info(f"Reporting CStates or Frequency changes {ihost['uuid']}"
f" -> {freq_dict}")
rpcapi.cstates_and_frequency_update_by_ihost(context,
ihost['uuid'],
freq_dict)
def ihost_inv_get_and_report(self, icontext):
"""Collect data for an ihost.
@ -856,6 +945,13 @@ class AgentManager(service.PeriodicService):
"conductor.")
pass
try:
self._report_cstates_and_frequency_update(icontext, ihost, rpcapi)
except exception.SysinvException as ex:
LOG.exception("Something wrong occurs during the cpu frequency"
f" search. {ex}")
pass
self._report_port_inventory(icontext, rpcapi,
port_list, pci_device_list)

View File

@ -154,6 +154,18 @@ def restructure_host_cpu_data(host):
host.cpu_lists[cpu.numa_node].append(int(cpu.cpu))
def check_power_manager(host):
"""Check if power manager is present. If so, CPU MHZ
cannot be configured."""
labels = pecan.request.dbapi.label_get_by_host(host)
if cutils.has_power_management_enabled(labels):
raise wsme.exc.ClientSideError(
"Host CPU MHz cannot be configured "
"if Power Manager is enabled.")
def check_core_allocations(host, cpu_counts, cpu_lists=None):
"""Check that minimum and maximum core values are respected."""

View File

@ -560,9 +560,15 @@ class Host(base.APIBase):
max_cpu_mhz_configured = wtypes.text
"Represent the CPU max frequency"
min_cpu_mhz_allowed = wtypes.text
"Represent the default CPU min frequency"
max_cpu_mhz_allowed = wtypes.text
"Represent the default CPU max frequency"
cstates_available = wtypes.text
"Represent the CStates available to use"
iscsi_initiator_name = wtypes.text
"The iscsi initiator name (only used for worker hosts)"
@ -598,7 +604,8 @@ class Host(base.APIBase):
'install_state', 'install_state_info',
'iscsi_initiator_name', 'device_image_update',
'reboot_needed', 'inv_state', 'clock_synchronization',
'max_cpu_mhz_configured', 'max_cpu_mhz_allowed',
'max_cpu_mhz_configured', 'min_cpu_mhz_allowed',
'max_cpu_mhz_allowed', 'cstates_available',
'apparmor']
fields = minimum_fields if not expand else None
@ -2897,6 +2904,8 @@ class HostController(rest.RestController):
% (personality, load.software_version))
def _check_max_cpu_mhz_configured(self, host):
cpu_utils.check_power_manager(host.ihost_patch.get('uuid'))
# Max CPU frequency requested by the user and the maximum frequency
# allowed by the CPU.
max_cpu_mhz_configured = str(host.ihost_patch.get('max_cpu_mhz_configured', ''))

View File

@ -1,4 +1,4 @@
# Copyright (c) 2018-2022 Wind River Systems, Inc.
# Copyright (c) 2018-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -153,10 +153,15 @@ class LabelController(rest.RestController):
sort_dir=sort_dir)
def _apply_manifest_after_label_operation(self, uuid, keys):
if common.LABEL_DISABLE_NOHZ_FULL in keys:
if (common.LABEL_DISABLE_NOHZ_FULL in keys or
constants.KUBE_POWER_MANAGER_LABEL in keys):
pecan.request.rpcapi.update_grub_config(
pecan.request.context, uuid)
if constants.KUBE_POWER_MANAGER_LABEL in keys:
pecan.request.rpcapi.configure_power_manager(
pecan.request.context)
@wsme_pecan.wsexpose(LabelCollection, types.uuid, types.uuid,
int, wtypes.text, wtypes.text)
def get_all(self, uuid=None, marker=None, limit=None,
@ -362,6 +367,11 @@ def _semantic_check_worker_labels(body):
raise wsme.exc.ClientSideError(
_(
"Invalid value for %s label." % constants.KUBE_CPU_MANAGER_LABEL))
elif label_key == constants.KUBE_POWER_MANAGER_LABEL:
if label_value != constants.KUBE_POWER_MANAGER_VALUE:
raise wsme.exc.ClientSideError(
_(
"Invalid value for %s label." % constants.KUBE_POWER_MANAGER_LABEL))
def _get_system_enabled_k8s_plugins():

View File

@ -208,7 +208,9 @@ PATCH_DEFAULT_TIMEOUT_IN_SECS = 6
# ihost field attributes
IHOST_STOR_FUNCTION = 'stor_function'
IHOST_IS_MAX_CPU_MHZ_CONFIGURABLE = 'is_max_cpu_configurable'
IHOST_MIN_CPU_MHZ_ALLOWED = 'min_cpu_mhz_allowed'
IHOST_MAX_CPU_MHZ_ALLOWED = 'max_cpu_mhz_allowed'
IHOST_CSTATES_AVAILABLE = 'cstates_available'
# ihost config_status field values
CONFIG_STATUS_OUT_OF_DATE = "Config out-of-date"
@ -1960,6 +1962,8 @@ APP_EVALUATE_REAPPLY_HOST_AVAILABILITY = 'host-availability-updated'
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY = 'system-modify'
APP_EVALUATE_REAPPLY_TYPE_DETECTED_SWACT = 'detected-swact'
APP_EVALUATE_REAPPLY_TYPE_KUBE_UPGRADE_COMPLETE = 'kube-upgrade-complete'
APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL = 'host-label-assign'
APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY = 'host-modify'
APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = {
UNLOCK_ACTION:
@ -1987,7 +1991,11 @@ APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = {
APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE:
APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE,
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY:
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY,
APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL:
APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL,
APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY:
APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY
}
# Progress constants
@ -2039,6 +2047,7 @@ SRIOVDP_LABEL = 'sriovdp=enabled'
KUBE_TOPOLOGY_MANAGER_LABEL = 'kube-topology-mgr-policy'
KUBE_CPU_MANAGER_LABEL = 'kube-cpu-mgr-policy'
KUBE_IGNORE_ISOL_CPU_LABEL = 'kube-ignore-isol-cpus=enabled'
KUBE_POWER_MANAGER_LABEL = 'power-management'
# Accepted label values
KUBE_TOPOLOGY_MANAGER_VALUES = [
@ -2051,6 +2060,7 @@ KUBE_CPU_MANAGER_VALUES = [
'none',
'static'
]
KUBE_POWER_MANAGER_VALUE = 'enabled'
# Default DNS service domain
DEFAULT_DNS_SERVICE_DOMAIN = 'cluster.local'
@ -2380,3 +2390,7 @@ PLATFORM_FIREWALL_SM_PORT_2 = 2223
PLATFORM_FIREWALL_NTP_PORT = 123
PLATFORM_FIREWALL_PTP_PORT = 319
PLATFORM_FIREWALL_PTP_PORT = 320
# CState support. Whether the path exists depends on hardware support and driver availability.
# Validating the existence of the path is important.
CSTATE_PATH = "/sys/devices/system/cpu/cpu0/cpuidle"

View File

@ -2632,6 +2632,19 @@ def has_sriovdp_enabled(labels):
return False
def has_power_management_enabled(labels):
"""Returns true if the power-management=enabled label is set """
if not labels:
return False
for label in labels:
if label.label_key == constants.KUBE_POWER_MANAGER_LABEL and label.label_value:
return constants.KUBE_POWER_MANAGER_VALUE == label.label_value.lower()
# We haven't found the power-management node key. Return False
return False
def has_disable_nohz_full_enabled(labels):
"""Returns true if the disable-nohz-full=enabled label is set """
if not labels:
@ -3877,3 +3890,33 @@ def checkout_ostree(ostree_repo, commit, target_dir, subpath):
raise exception.SysinvException(
"Error checkout ostree commit: %s" % (error),
)
def cstates_need_update(old_cstates, new_cstates):
if old_cstates is None:
return True
if new_cstates is None:
return False
old_cstates_list = []
if isinstance(old_cstates, str):
if old_cstates.strip() == '':
return True
old_cstates_list = old_cstates.split(',')
else:
old_cstates_list = old_cstates
new_cstates_list = []
if isinstance(new_cstates, str):
if new_cstates.strip() == '':
return False
new_cstates_list = new_cstates.split(',')
else:
new_cstates_list = new_cstates
if len(old_cstates_list) != len(new_cstates_list):
return True
diff = [v for v in old_cstates_list if v not in new_cstates_list]
if len(diff) > 0:
return True
return False

View File

@ -12822,6 +12822,20 @@ class ConductorManager(service.PeriodicService):
"""
self._update_pxe_config(host, load)
def cstates_and_frequency_update_by_ihost(self, context,
ihost_uuid, freq_dict):
if ihost_uuid is None or freq_dict is None:
return
if len(freq_dict) > 0:
try:
self.dbapi.ihost_update(ihost_uuid, freq_dict)
self.evaluate_apps_reapply(context, trigger={
'type': constants.APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY})
except (RuntimeError, Exception):
LOG.warning("An error occurred during the cstates and frequency update. "
f"{traceback.format_exc()}")
def load_update_by_host(self, context, ihost_id, sw_version):
"""Update the host_upgrade table with the running SW_VERSION
@ -14330,19 +14344,26 @@ class ConductorManager(service.PeriodicService):
raise exception.SysinvException(_(msg))
def update_host_max_cpu_mhz_configured(self, context, host):
personalities = [constants.WORKER]
labels = self.dbapi.label_get_by_host(host['uuid'])
config_uuid = self._config_update_hosts(context,
personalities,
[host['uuid']])
config_dict = {
"personalities": personalities,
"host_uuids": [host['uuid']],
"classes": ['platform::compute::config::runtime']
}
self._config_apply_runtime_manifest(context,
config_uuid,
config_dict)
if not cutils.has_power_management_enabled(labels):
personalities = [constants.WORKER]
config_uuid = self._config_update_hosts(context,
personalities,
[host['uuid']])
config_dict = {
"personalities": personalities,
"host_uuids": [host['uuid']],
"classes": ['platform::compute::config::runtime']
}
self._config_apply_runtime_manifest(context,
config_uuid,
config_dict)
def configure_power_manager(self, context):
self.evaluate_apps_reapply(context, trigger={
'type': constants.APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL})
def update_admin_ep_certificate(self, context):
"""

View File

@ -2232,6 +2232,31 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
ihost_uuid=ihost_uuid,
kernel_running=kernel_running))
def configure_power_manager(self, context):
"""Synchronously, execute application reapply to update host
power profiles and c-states for Kubernetes Power Manager.
:param context: request context.
"""
return self.call(context,
self.make_msg('configure_power_manager'))
def cstates_and_frequency_update_by_ihost(self, context,
ihost_uuid, freq_dict):
"""Synchronously, execute update of min, and max frequency, and cstates
available on host.
:param context: request context.
:param host_uuid: the uuid of the host
:param freq_dict: dict with params to update
"""
return self.call(context,
self.make_msg('cstates_and_frequency_update_by_ihost',
ihost_uuid=ihost_uuid,
freq_dict=freq_dict))
def request_firewall_runtime_update(self, context, host_uuid):
""" Sent from sysinv-agent, request the firewall update via runtime manifest

View File

@ -0,0 +1,20 @@
#
# Copyright (c) 2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from sqlalchemy import Column, MetaData, Table
from sqlalchemy import String
def upgrade(migrate_engine):
meta = MetaData()
meta.bind = migrate_engine
host_table = Table('i_host', meta, autoload=True)
host_table.create_column(Column('min_cpu_mhz_allowed', String(64)))
host_table.create_column(Column('cstates_available', String(255)))
def downgrade(migrate_engine):
raise NotImplementedError('SysInv database downgrade is unsupported.')

View File

@ -244,8 +244,11 @@ class ihost(Base):
device_image_update = Column(String(64))
reboot_needed = Column(Boolean, nullable=False, default=False)
max_cpu_mhz_configured = Column(String(64)) # in MHz
min_cpu_mhz_allowed = Column(String(64)) # in MHz
max_cpu_mhz_allowed = Column(String(64)) # in MHz
cstates_available = Column(String(255))
forisystemid = Column(Integer,
ForeignKey('i_system.id', ondelete='CASCADE'))
peer_id = Column(Integer,

View File

@ -106,8 +106,10 @@ class Host(base.SysinvObject):
'device_image_update': utils.str_or_none,
'reboot_needed': utils.bool_or_none,
'max_cpu_mhz_configured': utils.str_or_none,
'max_cpu_mhz_allowed': utils.str_or_none
}
'min_cpu_mhz_allowed': utils.str_or_none,
'max_cpu_mhz_allowed': utils.str_or_none,
'cstates_available': utils.str_or_none
}
_foreign_fields = {
'isystem_uuid': 'system:uuid',

View File

@ -170,8 +170,10 @@ def get_test_ihost(**kw):
'inv_state': kw.get('inv_state', 'inventoried'),
'clock_synchronization': kw.get('clock_synchronization', constants.NTP),
'max_cpu_mhz_configured': kw.get('max_cpu_mhz_configured', ''),
'max_cpu_mhz_allowed': kw.get('max_cpu_mhz_allowed', '')
}
'min_cpu_mhz_allowed': kw.get('min_cpu_mhz_allowed', ''),
'max_cpu_mhz_allowed': kw.get('max_cpu_mhz_allowed', ''),
'cstates_available': kw.get('cstates_available', '')
}
return inv