Add support to Power Manager Profiles config

To run properly, after the  installation process of  the
Kubernetes Power Manager, the  system must be configured
to publish shared, and c-state profiles (used to support
standard power settings).

Considering that, these information need to be available
in ihost information.

Also, to enable the usage of the Kubernetes Power Manager
only on user-chosen nodes, a new label will be used. When
the  "power-management=enabled"  label is assigned to the
node, the Kubernetes PM will be  deployed, and power pro-
files will  be builded.  The existence  of the label will
also  inhibit  setting the  maximum CPU frequency, native
to StarlingX.

This commit  aims to  add  new  information about minimum
frequency, and CStates available on CPUs in each ihost.

TEST PLAN:
PASS: AIO-SX bootstrap successfully
PASS: Standard bootstrap successfully
PASS: 'power-management' label assignment to nodes
      successfully
PASS: Nodes lock and unlock
PASS: Presence of minimum frequency and CStates infor-
      mation in system host-show for each node
PASS: Update information of cstates after BIOS/UEFI
      changes

Story: 2010773
Task: 48494

Author: Davi Frossard <dbarrosf@windriver.com>
Co-author: Eduardo Alberti <eduardo.alberti@windriver.com>
Co-author: Romão Martines <romaomatheus.martinesdejesus@windriver.com>

Change-Id: I8da74f0309fa2f5a3e4c3e684057c5217659940a
Signed-off-by: Eduardo Juliano Alberti <eduardo.alberti@windriver.com>
This commit is contained in:
Eduardo Juliano Alberti 2023-07-07 09:00:23 -03:00 committed by Eduardo Alberti
parent c307d07951
commit b05dfdbe81
13 changed files with 279 additions and 21 deletions

View File

@ -37,7 +37,8 @@ def _print_ihost_show(ihost, columns=None, output_format=None):
'install_state', 'install_state_info', 'inv_state',
'clock_synchronization', 'device_image_update',
'reboot_needed', 'max_cpu_mhz_configured',
'max_cpu_mhz_allowed', 'apparmor']
'min_cpu_mhz_allowed', 'max_cpu_mhz_allowed',
'cstates_available', 'apparmor']
optional_fields = ['vsc_controllers', 'ttys_dcd']
if ihost.subfunctions != ihost.personality:
fields.append('subfunctions')

View File

@ -334,6 +334,22 @@ class AgentManager(service.PeriodicService):
return constants.CONFIGURABLE
return constants.NOT_CONFIGURABLE
def _get_min_cpu_mhz_allowed(self):
"""Get minimum CPU frequency from lscpu
Returns:
int: minimum CPU frequency in MHz
"""
output = utils.execute(
"lscpu | grep 'CPU min MHz' | awk '{ print $4 }' | cut -d ',' -f 1",
shell=True)
if isinstance(output, tuple):
default_min = output[0]
if default_min:
LOG.info("Default CPU min frequency: {}".format(default_min))
return int(default_min.split('.')[0])
def _get_max_cpu_mhz_allowed(self):
output = utils.execute(
"lscpu | grep 'CPU max MHz' | awk '{ print $4 }' | cut -d ',' -f 1",
@ -345,6 +361,24 @@ class AgentManager(service.PeriodicService):
LOG.info("Default CPU max frequency: {}".format(default_max))
return int(default_max.split('.')[0])
def _get_cstates_names(self):
"""Get the names of available c-state on the system.
Returns:
list(string,..): A list of c-state names
"""
states = os.listdir(constants.CSTATE_PATH)
cstates = []
for state in states:
with open(os.path.join(constants.CSTATE_PATH, state + "/name"),
'r') as file:
c_name = file.readline()
cstates.append(c_name.split('\n')[0])
cstates.sort()
return cstates
def _force_grub_update(self):
""" Force update the grub on the first AIO controller after the initial
config is completed
@ -742,6 +776,61 @@ class AgentManager(service.PeriodicService):
kernel_running = constants.KERNEL_STANDARD
return kernel_running
def _report_cstates_and_frequency_update(self, context,
ihost, rpcapi=None):
"""Evaluate if minimum frequency, maximum frequency or cstates
are changed. If yes, report to conductor.
"""
if ihost is None:
return
freq_dict = {}
try:
min_freq = self._get_min_cpu_mhz_allowed()
max_freq = self._get_max_cpu_mhz_allowed()
if min_freq != ihost.min_cpu_mhz_allowed:
ihost.min_cpu_mhz_allowed = min_freq
freq_dict.update({
constants.IHOST_MIN_CPU_MHZ_ALLOWED:
min_freq
})
if max_freq != ihost.max_cpu_mhz_allowed:
ihost.max_cpu_mhz_allowed = max_freq
freq_dict.update({
constants.IHOST_MAX_CPU_MHZ_ALLOWED:
max_freq
})
if os.path.isfile(os.path.join(constants.CSTATE_PATH,
"state0/name")):
cstates_names = self._get_cstates_names()
if utils.cstates_need_update(ihost.cstates_available,
cstates_names):
ihost.cstates_available = ','.join(cstates_names)
freq_dict.update({
constants.IHOST_CSTATES_AVAILABLE:
','.join(cstates_names)
})
except OSError as ex:
LOG.warning("Something wrong occurs during the cpu frequency"
f" search. {ex}")
return
if len(freq_dict) == 0:
return
if rpcapi is None:
rpcapi = conductor_rpcapi.ConductorAPI(
topic=conductor_rpcapi.MANAGER_TOPIC)
LOG.info(f"Reporting CStates or Frequency changes {ihost['uuid']}"
f" -> {freq_dict}")
rpcapi.cstates_and_frequency_update_by_ihost(context,
ihost['uuid'],
freq_dict)
def ihost_inv_get_and_report(self, icontext):
"""Collect data for an ihost.
@ -856,6 +945,13 @@ class AgentManager(service.PeriodicService):
"conductor.")
pass
try:
self._report_cstates_and_frequency_update(icontext, ihost, rpcapi)
except exception.SysinvException as ex:
LOG.exception("Something wrong occurs during the cpu frequency"
f" search. {ex}")
pass
self._report_port_inventory(icontext, rpcapi,
port_list, pci_device_list)

View File

@ -154,6 +154,18 @@ def restructure_host_cpu_data(host):
host.cpu_lists[cpu.numa_node].append(int(cpu.cpu))
def check_power_manager(host):
"""Check if power manager is present. If so, CPU MHZ
cannot be configured."""
labels = pecan.request.dbapi.label_get_by_host(host)
if cutils.has_power_management_enabled(labels):
raise wsme.exc.ClientSideError(
"Host CPU MHz cannot be configured "
"if Power Manager is enabled.")
def check_core_allocations(host, cpu_counts, cpu_lists=None):
"""Check that minimum and maximum core values are respected."""

View File

@ -560,9 +560,15 @@ class Host(base.APIBase):
max_cpu_mhz_configured = wtypes.text
"Represent the CPU max frequency"
min_cpu_mhz_allowed = wtypes.text
"Represent the default CPU min frequency"
max_cpu_mhz_allowed = wtypes.text
"Represent the default CPU max frequency"
cstates_available = wtypes.text
"Represent the CStates available to use"
iscsi_initiator_name = wtypes.text
"The iscsi initiator name (only used for worker hosts)"
@ -598,7 +604,8 @@ class Host(base.APIBase):
'install_state', 'install_state_info',
'iscsi_initiator_name', 'device_image_update',
'reboot_needed', 'inv_state', 'clock_synchronization',
'max_cpu_mhz_configured', 'max_cpu_mhz_allowed',
'max_cpu_mhz_configured', 'min_cpu_mhz_allowed',
'max_cpu_mhz_allowed', 'cstates_available',
'apparmor']
fields = minimum_fields if not expand else None
@ -2897,6 +2904,8 @@ class HostController(rest.RestController):
% (personality, load.software_version))
def _check_max_cpu_mhz_configured(self, host):
cpu_utils.check_power_manager(host.ihost_patch.get('uuid'))
# Max CPU frequency requested by the user and the maximum frequency
# allowed by the CPU.
max_cpu_mhz_configured = str(host.ihost_patch.get('max_cpu_mhz_configured', ''))

View File

@ -1,4 +1,4 @@
# Copyright (c) 2018-2022 Wind River Systems, Inc.
# Copyright (c) 2018-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -153,10 +153,15 @@ class LabelController(rest.RestController):
sort_dir=sort_dir)
def _apply_manifest_after_label_operation(self, uuid, keys):
if common.LABEL_DISABLE_NOHZ_FULL in keys:
if (common.LABEL_DISABLE_NOHZ_FULL in keys or
constants.KUBE_POWER_MANAGER_LABEL in keys):
pecan.request.rpcapi.update_grub_config(
pecan.request.context, uuid)
if constants.KUBE_POWER_MANAGER_LABEL in keys:
pecan.request.rpcapi.configure_power_manager(
pecan.request.context)
@wsme_pecan.wsexpose(LabelCollection, types.uuid, types.uuid,
int, wtypes.text, wtypes.text)
def get_all(self, uuid=None, marker=None, limit=None,
@ -362,6 +367,11 @@ def _semantic_check_worker_labels(body):
raise wsme.exc.ClientSideError(
_(
"Invalid value for %s label." % constants.KUBE_CPU_MANAGER_LABEL))
elif label_key == constants.KUBE_POWER_MANAGER_LABEL:
if label_value != constants.KUBE_POWER_MANAGER_VALUE:
raise wsme.exc.ClientSideError(
_(
"Invalid value for %s label." % constants.KUBE_POWER_MANAGER_LABEL))
def _get_system_enabled_k8s_plugins():

View File

@ -208,7 +208,9 @@ PATCH_DEFAULT_TIMEOUT_IN_SECS = 6
# ihost field attributes
IHOST_STOR_FUNCTION = 'stor_function'
IHOST_IS_MAX_CPU_MHZ_CONFIGURABLE = 'is_max_cpu_configurable'
IHOST_MIN_CPU_MHZ_ALLOWED = 'min_cpu_mhz_allowed'
IHOST_MAX_CPU_MHZ_ALLOWED = 'max_cpu_mhz_allowed'
IHOST_CSTATES_AVAILABLE = 'cstates_available'
# ihost config_status field values
CONFIG_STATUS_OUT_OF_DATE = "Config out-of-date"
@ -1955,6 +1957,8 @@ APP_EVALUATE_REAPPLY_HOST_AVAILABILITY = 'host-availability-updated'
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY = 'system-modify'
APP_EVALUATE_REAPPLY_TYPE_DETECTED_SWACT = 'detected-swact'
APP_EVALUATE_REAPPLY_TYPE_KUBE_UPGRADE_COMPLETE = 'kube-upgrade-complete'
APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL = 'host-label-assign'
APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY = 'host-modify'
APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = {
UNLOCK_ACTION:
@ -1982,7 +1986,11 @@ APP_EVALUATE_REAPPLY_TRIGGER_TO_METADATA_MAP = {
APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE:
APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE,
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY:
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY
APP_EVALUATE_REAPPLY_TYPE_SYSTEM_MODIFY,
APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL:
APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL,
APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY:
APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY
}
# Progress constants
@ -2034,6 +2042,7 @@ SRIOVDP_LABEL = 'sriovdp=enabled'
KUBE_TOPOLOGY_MANAGER_LABEL = 'kube-topology-mgr-policy'
KUBE_CPU_MANAGER_LABEL = 'kube-cpu-mgr-policy'
KUBE_IGNORE_ISOL_CPU_LABEL = 'kube-ignore-isol-cpus=enabled'
KUBE_POWER_MANAGER_LABEL = 'power-management'
# Accepted label values
KUBE_TOPOLOGY_MANAGER_VALUES = [
@ -2046,6 +2055,7 @@ KUBE_CPU_MANAGER_VALUES = [
'none',
'static'
]
KUBE_POWER_MANAGER_VALUE = 'enabled'
# Default DNS service domain
DEFAULT_DNS_SERVICE_DOMAIN = 'cluster.local'
@ -2366,3 +2376,7 @@ PLATFORM_FIREWALL_SM_PORT_2 = 2223
PLATFORM_FIREWALL_NTP_PORT = 123
PLATFORM_FIREWALL_PTP_PORT = 319
PLATFORM_FIREWALL_PTP_PORT = 320
# CState support. Whether the path exists depends on hardware support and driver availability.
# Validating the existence of the path is important.
CSTATE_PATH = "/sys/devices/system/cpu/cpu0/cpuidle"

View File

@ -2632,6 +2632,19 @@ def has_sriovdp_enabled(labels):
return False
def has_power_management_enabled(labels):
"""Returns true if the power-management=enabled label is set """
if not labels:
return False
for label in labels:
if label.label_key == constants.KUBE_POWER_MANAGER_LABEL and label.label_value:
return constants.KUBE_POWER_MANAGER_VALUE == label.label_value.lower()
# We haven't found the power-management node key. Return False
return False
def has_disable_nohz_full_enabled(labels):
"""Returns true if the disable-nohz-full=enabled label is set """
if not labels:
@ -3877,3 +3890,33 @@ def checkout_ostree(ostree_repo, commit, target_dir, subpath):
raise exception.SysinvException(
"Error checkout ostree commit: %s" % (error),
)
def cstates_need_update(old_cstates, new_cstates):
if old_cstates is None:
return True
if new_cstates is None:
return False
old_cstates_list = []
if isinstance(old_cstates, str):
if old_cstates.strip() == '':
return True
old_cstates_list = old_cstates.split(',')
else:
old_cstates_list = old_cstates
new_cstates_list = []
if isinstance(new_cstates, str):
if new_cstates.strip() == '':
return False
new_cstates_list = new_cstates.split(',')
else:
new_cstates_list = new_cstates
if len(old_cstates_list) != len(new_cstates_list):
return True
diff = [v for v in old_cstates_list if v not in new_cstates_list]
if len(diff) > 0:
return True
return False

View File

@ -12713,6 +12713,20 @@ class ConductorManager(service.PeriodicService):
"""
self._update_pxe_config(host, load)
def cstates_and_frequency_update_by_ihost(self, context,
ihost_uuid, freq_dict):
if ihost_uuid is None or freq_dict is None:
return
if len(freq_dict) > 0:
try:
self.dbapi.ihost_update(ihost_uuid, freq_dict)
self.evaluate_apps_reapply(context, trigger={
'type': constants.APP_EVALUATE_REAPPLY_TYPE_HOST_MODIFY})
except (RuntimeError, Exception):
LOG.warning("An error occurred during the cstates and frequency update. "
f"{traceback.format_exc()}")
def load_update_by_host(self, context, ihost_id, sw_version):
"""Update the host_upgrade table with the running SW_VERSION
@ -14206,19 +14220,26 @@ class ConductorManager(service.PeriodicService):
raise exception.SysinvException(_(msg))
def update_host_max_cpu_mhz_configured(self, context, host):
personalities = [constants.WORKER]
labels = self.dbapi.label_get_by_host(host['uuid'])
config_uuid = self._config_update_hosts(context,
personalities,
[host['uuid']])
config_dict = {
"personalities": personalities,
"host_uuids": [host['uuid']],
"classes": ['platform::compute::config::runtime']
}
self._config_apply_runtime_manifest(context,
config_uuid,
config_dict)
if not cutils.has_power_management_enabled(labels):
personalities = [constants.WORKER]
config_uuid = self._config_update_hosts(context,
personalities,
[host['uuid']])
config_dict = {
"personalities": personalities,
"host_uuids": [host['uuid']],
"classes": ['platform::compute::config::runtime']
}
self._config_apply_runtime_manifest(context,
config_uuid,
config_dict)
def configure_power_manager(self, context):
self.evaluate_apps_reapply(context, trigger={
'type': constants.APP_EVALUATE_REAPPLY_TYPE_HOST_ADD_LABEL})
def update_admin_ep_certificate(self, context):
"""

View File

@ -2217,6 +2217,31 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
ihost_uuid=ihost_uuid,
kernel_running=kernel_running))
def configure_power_manager(self, context):
"""Synchronously, execute application reapply to update host
power profiles and c-states for Kubernetes Power Manager.
:param context: request context.
"""
return self.call(context,
self.make_msg('configure_power_manager'))
def cstates_and_frequency_update_by_ihost(self, context,
ihost_uuid, freq_dict):
"""Synchronously, execute update of min, and max frequency, and cstates
available on host.
:param context: request context.
:param host_uuid: the uuid of the host
:param freq_dict: dict with params to update
"""
return self.call(context,
self.make_msg('cstates_and_frequency_update_by_ihost',
ihost_uuid=ihost_uuid,
freq_dict=freq_dict))
def request_firewall_runtime_update(self, context, host_uuid):
""" Sent from sysinv-agent, request the firewall update via runtime manifest

View File

@ -0,0 +1,20 @@
#
# Copyright (c) 2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
from sqlalchemy import Column, MetaData, Table
from sqlalchemy import String
def upgrade(migrate_engine):
meta = MetaData()
meta.bind = migrate_engine
host_table = Table('i_host', meta, autoload=True)
host_table.create_column(Column('min_cpu_mhz_allowed', String(64)))
host_table.create_column(Column('cstates_available', String(255)))
def downgrade(migrate_engine):
raise NotImplementedError('SysInv database downgrade is unsupported.')

View File

@ -244,8 +244,11 @@ class ihost(Base):
device_image_update = Column(String(64))
reboot_needed = Column(Boolean, nullable=False, default=False)
max_cpu_mhz_configured = Column(String(64)) # in MHz
min_cpu_mhz_allowed = Column(String(64)) # in MHz
max_cpu_mhz_allowed = Column(String(64)) # in MHz
cstates_available = Column(String(255))
forisystemid = Column(Integer,
ForeignKey('i_system.id', ondelete='CASCADE'))
peer_id = Column(Integer,

View File

@ -106,8 +106,10 @@ class Host(base.SysinvObject):
'device_image_update': utils.str_or_none,
'reboot_needed': utils.bool_or_none,
'max_cpu_mhz_configured': utils.str_or_none,
'max_cpu_mhz_allowed': utils.str_or_none
}
'min_cpu_mhz_allowed': utils.str_or_none,
'max_cpu_mhz_allowed': utils.str_or_none,
'cstates_available': utils.str_or_none
}
_foreign_fields = {
'isystem_uuid': 'system:uuid',

View File

@ -170,8 +170,10 @@ def get_test_ihost(**kw):
'inv_state': kw.get('inv_state', 'inventoried'),
'clock_synchronization': kw.get('clock_synchronization', constants.NTP),
'max_cpu_mhz_configured': kw.get('max_cpu_mhz_configured', ''),
'max_cpu_mhz_allowed': kw.get('max_cpu_mhz_allowed', '')
}
'min_cpu_mhz_allowed': kw.get('min_cpu_mhz_allowed', ''),
'max_cpu_mhz_allowed': kw.get('max_cpu_mhz_allowed', ''),
'cstates_available': kw.get('cstates_available', '')
}
return inv