Merge "Revert "Add functionality for intel gpu device plugin""

This commit is contained in:
Zuul 2023-09-22 19:31:08 +00:00 committed by Gerrit Code Review
commit deac018faf
10 changed files with 8 additions and 226 deletions

View File

@ -153,7 +153,6 @@ class AgentManager(service.PeriodicService):
PV = 'pv'
LVG = 'lvg'
HOST_FILESYSTEMS = 'host_filesystems'
K8S_DEVICE_PLUGIN = 'k8s_device_plugin'
KERNEL = 'kernel'
# Note that this set must be extended when there are
@ -169,7 +168,6 @@ class AgentManager(service.PeriodicService):
PV,
LVG,
HOST_FILESYSTEMS,
K8S_DEVICE_PLUGIN,
KERNEL}
def __init__(self, host, topic):
@ -954,8 +952,6 @@ class AgentManager(service.PeriodicService):
self._report_port_inventory(icontext, rpcapi,
port_list, pci_device_list)
self._report_supported_device_plugin(icontext, rpcapi, pci_device_list)
# Find list of numa_nodes and cpus for this ihost
inumas, icpus = self._inode_operator.inodes_get_inumas_icpus()
@ -1079,36 +1075,6 @@ class AgentManager(service.PeriodicService):
self._report_to_conductor_iplatform_avail()
self._iconfig_read_config_reported = config_uuid
@retrying.retry(wait_fixed=15 * 1000, stop_max_delay=300 * 1000,
retry_on_exception=_retry_on_missing_host_uuid)
def _report_supported_device_plugin(self, context, rpcapi, pci_device_list=None):
if not self._ihost_uuid:
raise exception.LocalHostUUIDNotFound()
if pci_device_list is None:
port_list, pci_device_list, host_macs = self._get_ports_inventory()
device_plugins = self._ipci_operator.get_supported_device_plugins(pci_device_list)
if not device_plugins:
# self.K8S_DEVICE_PLUGIN is an expected inventory report, so need to add it.
self._inventory_reported.add(self.K8S_DEVICE_PLUGIN)
LOG.info("No plugins proposed by PCIOperator.")
return
try:
rpcapi.device_plugin_labels_update_by_ihost(context,
self._ihost_uuid,
device_plugins)
self._inventory_reported.add(self.K8S_DEVICE_PLUGIN)
except RemoteError as e:
LOG.error("device_plugin_labels_update_by_ihost RemoteError exc_type=%s" %
e.exc_type)
except exception.SysinvException:
LOG.exception("Sysinv Agent uncaught exception updating device plugin labels.")
pass
def subfunctions_get(self):
""" returns subfunctions on this host.
"""

View File

@ -179,31 +179,6 @@ class PCIDevice(object):
return "<PCIDevice '%s'>" % str(self)
class DevicePlugin(object):
'''Class to record specific information of each k8s device plugins'''
def __init__(self):
return
def get_plugin(self, pci_device_list):
return None
class IntelGPUDp(DevicePlugin):
def __init__(self):
return
def get_plugin(self, pci_device_list):
for device in pci_device_list:
if constants.GPU_DEVICE_PCLASS_VGA in device['pclass'] and device['driver'] is not None:
if constants.GPU_DEVICE_DRIVER_I915 in device['driver']:
return constants.K8S_INTEL_GPU_DEVICE_PLUGIN
return None
DEVICE_PLUGIN_LIST = [IntelGPUDp()]
class PCIOperator(object):
'''Class to encapsulate PCI operations for System Inventory'''
@ -809,13 +784,3 @@ class PCIOperator(object):
pci_attrs_array.append(attrs)
return pci_attrs_array
def get_supported_device_plugins(self, pci_device_list):
plugins = []
for device_plugin in DEVICE_PLUGIN_LIST:
plugin = device_plugin.get_plugin(pci_device_list)
if plugin is not None:
plugins.append(plugin)
return plugins

View File

@ -395,8 +395,7 @@ def _get_system_enabled_k8s_plugins():
def _semantic_check_intel_gpu_plugins_labels(host):
pci_devices = pecan.request.dbapi.pci_device_get_by_host(host.id)
for pci_device in pci_devices:
if (constants.GPU_DEVICE_PCLASS_VGA in pci_device.pclass and
pci_device.driver == constants.GPU_DEVICE_DRIVER_I915):
if ("VGA" in pci_device.pclass and pci_device.driver == "i915"):
return
raise wsme.exc.ClientSideError("Host %s does not support Intel GPU device plugin." % (host.hostname))
@ -410,7 +409,8 @@ def _semantic_check_k8s_plugins_labels(host, body):
if plugins is None:
return
for label in body.keys():
if label in plugins:
if label == constants.K8S_INTEL_GPU_DEVICE_PLUGIN:
for label_key, label_value in body.items():
label = label_key + "=" + label_value
if label in plugins.values():
if label == constants.KUBE_INTEL_GPU_DEVICE_PLUGIN_LABEL:
_semantic_check_intel_gpu_plugins_labels(host)

View File

@ -2242,9 +2242,7 @@ HOST_BM_VALID_PROVISIONED_TYPE_LIST = [HOST_BM_TYPE_DYNAMIC,
# K8s device plugins
DEVICE_PLUGINS_FILE = "enabled_kube_plugins"
ENABLED_KUBE_PLUGINS = os.path.join(tsc.CONFIG_PATH, DEVICE_PLUGINS_FILE)
K8S_INTEL_GPU_DEVICE_PLUGIN = "intelgpu"
GPU_DEVICE_DRIVER_I915 = "i915"
GPU_DEVICE_PCLASS_VGA = "VGA"
KUBE_INTEL_GPU_DEVICE_PLUGIN_LABEL = "intelgpu=enabled"
# Port on which ceph manager and ceph-mgr listens
CEPH_MGR_PORT = 7999

View File

@ -33,7 +33,6 @@ import errno
import filecmp
import glob
import hashlib
import json
import io
import math
import os
@ -5771,55 +5770,6 @@ class ConductorManager(service.PeriodicService):
tsc.install_uuid)
greenthread.sleep(constants.FIX_INSTALL_UUID_INTERVAL_SECS)
def _get_enabled_kube_plugins(self):
# this file will be generated after initial config process if the
# kubernetes device plugin list is not empty.
if not os.path.isfile(constants.ENABLED_KUBE_PLUGINS):
return None
try:
file_object = open(constants.ENABLED_KUBE_PLUGINS)
plugins = json.loads(file_object.read())
return plugins
except Exception as e:
LOG.error("failed to get kube_plugin list from file. \
exception: %s" % str(e))
return None
def device_plugin_labels_update_by_ihost(self, context,
host_uuid, device_plugins):
"""Assign device plugins to an ihost with the supplied data.
:param context: an admin context
:param host_uuid: host uuid unique id
:param device_plugins: kubernetes device plugins request to assign
"""
enabled_kube_plugins = self._get_enabled_kube_plugins()
if not enabled_kube_plugins:
LOG.info("Vendor k8s device plugin list is empty. \
Set parameters in ansible override file if required.")
return
host_uuid.strip()
try:
ihost = self.dbapi.ihost_get(host_uuid)
except exception.ServerNotFound:
LOG.exception("Invalid host_uuid %s" % host_uuid)
return
for device_plugin in device_plugins:
if device_plugin not in enabled_kube_plugins:
continue
label = {}
label.update({'label_key': device_plugin, 'label_value': 'enabled', 'host_id': ihost.id})
try:
self.dbapi.label_create(host_uuid, label)
except exception.HostLabelAlreadyExists:
pass
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.agent_update_request)
def _agent_update_request(self, context):
"""

View File

@ -468,22 +468,6 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy):
return self.cast(context, self.make_msg('update_partition_config',
partition=partition))
def device_plugin_labels_update_by_ihost(self, context,
host_uuid, device_plugins):
"""Assign device plugin labels to an ihost with the supplied data.
:param context: an admin context
:param host_uuid: host uuid unique id
:param device_plugins: kubernetes device plugins request to assign
:returns: pass or fail
"""
return self.call(context,
self.make_msg('device_plugin_labels_update_by_ihost',
host_uuid=host_uuid,
device_plugins=device_plugins))
def iplatform_update_by_ihost(self, context,
ihost_uuid, imsg_dict):
"""Create or update memory for an ihost with the supplied data.

View File

@ -25,7 +25,6 @@ class FakeConductorAPI(object):
self.finalize_delete_load = mock.MagicMock()
self.create_host_filesystems = mock.MagicMock()
self.update_host_max_cpu_mhz_configured = mock.MagicMock()
self.device_plugin_labels_update_by_ihost = mock.MagicMock()
self.is_virtual_system_config_result = False
self.isystem = isystem
@ -475,32 +474,6 @@ class TestHostFileSystems(base.TestCase):
expected_filesystems)
self.assertEqual(self.agent_manager._prev_fs, expected_filesystems)
def test_report_supported_device_plugin(self):
# Mock _get_ports_inventory
self.mock_get_ports_inventory = mock.MagicMock()
p = mock.patch('sysinv.agent.manager.AgentManager._get_ports_inventory',
self.mock_get_ports_inventory)
p.start().return_value = ["", ['k8s_device_plugin', 'port', 'pci_device'], ""]
self.addCleanup(p.stop)
# Mock get_supported_device_plugins
self.mock_get_supported_device_plugins = mock.MagicMock()
p = mock.patch('sysinv.agent.pci.PCIOperator.get_supported_device_plugins',
self.mock_get_supported_device_plugins)
p.start().return_value = ['intelgpu']
self.addCleanup(p.stop)
self.agent_manager._ihost_uuid = "f47ac10b-58cc-4372-a567-0e02b2c3d479"
# Verify the expected label_key and label_value to be called
expected_label = ['intelgpu']
self.agent_manager._report_supported_device_plugin(self.context, self.fake_conductor_api)
self.fake_conductor_api.device_plugin_labels_update_by_ihost.assert_called_with(
self.context,
self.agent_manager._ihost_uuid,
expected_label)
self.assertIn('k8s_device_plugin', self.agent_manager._inventory_reported)
@mock.patch('sysinv.agent.manager.os.path.isfile', mock.MagicMock())
@mock.patch('sysinv.agent.manager.subprocess.check_call', mock.MagicMock())

View File

@ -155,43 +155,6 @@ class TestPciOperator(base.TestCase):
result = self.pci_operator.get_pci_sriov_vf_module_name(pfaddr, vfaddrs)
assert result is None
def test_get_supported_device_plugins(self):
self.fake_pci_device_list = [
{
'name': 'pci_0000_00_01_1',
'pclass_id': '01018a',
'pclass': 'IDE interface',
'driver': 'ata_piix',
},
{
'name': 'pci_0000_00_02_0',
'pclass_id': '030000',
'pclass': 'VGA compatible controller',
'driver': 'i915'
},
{
'name': 'pci_0000_00_05_0',
'pclass_id': '040100',
'pclass': 'Multimedia audio controller',
'driver': None
},
{
'name': 'pci_0000_00_0d_0',
'pclass_id': '010601',
'pclass': 'VGA compatible controller',
'driver': 'ahci'
},
{
'name': 'pci_0000_00_01_2',
'pclass_id': '01018b',
'pclass': 'VGA compatible controller',
'driver': None
}]
# Verify the expected label_key and label_value with the actual output
expected_label = ['intelgpu']
actual_label = self.pci_operator.get_supported_device_plugins(self.fake_pci_device_list)
self.assertEqual(expected_label, actual_label)
class TestAgentOperator(base.TestCase):

View File

@ -28,7 +28,8 @@ def mock_helm_override_get(dbapi, app_name, chart_name, namespace):
def mock_get_system_enabled_k8s_plugins_return_plugins():
return ["intelgpu", "intelqat"]
return {"intel-gpu-plugin": "intelgpu=enabled",
"intel-qat-plugin": "intelqat=enabled"}
def mock_get_system_enabled_k8s_plugins_return_none():

View File

@ -5133,24 +5133,6 @@ class ManagerTestCase(base.DbTestCase):
self.assertEqual(endpoints, config_dict)
def mock_os_file_path(self):
# Mock _get_kube_plugin_labels os.path.isfile
mock_isfile = mock.patch('os.path.isfile')
mock_isfile.return_value = True
mock_isfile.start()
self.addCleanup(mock_isfile.stop)
def test_get_enabled_kube_plugins(self):
self.mock_os_file_path()
open_mock = mock.mock_open(read_data='''["intelgpu", "intelqat", "intelfpga"]''')
open_patch = mock.patch('builtins.open', open_mock)
open_patch.start()
self.addCleanup(open_patch.stop)
# Verify the _get_enabled_kube_plugins return all plugins when all are enabled
actual_output = self.service._get_enabled_kube_plugins()
expected_ouput = ['intelgpu', 'intelqat', 'intelfpga']
self.assertEqual(actual_output, expected_ouput)
def _kernel_alarms_fix_keys(self, alarm_id, entity_id=None):
"""Create the nested dictionary keys if they are missing
Prevents KeyError exceptions