From 9505cefcfef5ab47f4afa5a9466859e228a30524 Mon Sep 17 00:00:00 2001 From: amothuku1 Date: Tue, 12 Sep 2023 13:32:01 -0400 Subject: [PATCH] Revert "Add functionality for intel gpu device plugin" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reason for revert: The functionality added here will be a part of a different application, and the code here is now obsolete. This reverts commit 878509: Add functionality for Intel gpu device plugin https://review.opendev.org/c/starlingx/config/+/878509?usp=search. Test Plan: PASS: Bootstrap is Success PASS: system host-unlock with no alarms PASS: verified "kubectl get nodes controller-0 --show-labels" command doesn't show “intelgpu: enabled" label. PASS: verified "kubectl get ds -A | grep 'gpu-plugin'" command shows daemonset is offline PASS: Designer build of debian AIO-SX and deployment without QAT and GPU functionality code. Reference to the application where these functionalities will be added: https://opendev.org/starlingx/app-intel-device-plugins The same storyboard can be tracked for further updates. Story: 2010604 Task: 48739 Change-Id: I5d64cd8bf2439237b459347cb095bdc7f5bb420f Signed-off-by: Aman Pandae --- sysinv/sysinv/sysinv/sysinv/agent/manager.py | 34 ------------- sysinv/sysinv/sysinv/sysinv/agent/pci.py | 35 ------------- .../sysinv/sysinv/api/controllers/v1/label.py | 10 ++-- .../sysinv/sysinv/sysinv/common/constants.py | 4 +- .../sysinv/sysinv/sysinv/conductor/manager.py | 50 ------------------- .../sysinv/sysinv/sysinv/conductor/rpcapi.py | 16 ------ .../sysinv/sysinv/tests/agent/test_manager.py | 27 ---------- .../sysinv/sysinv/tests/agent/test_pci.py | 37 -------------- .../sysinv/sysinv/tests/api/test_label.py | 3 +- .../sysinv/tests/conductor/test_manager.py | 18 ------- 10 files changed, 8 insertions(+), 226 deletions(-) diff --git a/sysinv/sysinv/sysinv/sysinv/agent/manager.py b/sysinv/sysinv/sysinv/sysinv/agent/manager.py index 6512b73866..5bc46c306a 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py @@ -153,7 +153,6 @@ class AgentManager(service.PeriodicService): PV = 'pv' LVG = 'lvg' HOST_FILESYSTEMS = 'host_filesystems' - K8S_DEVICE_PLUGIN = 'k8s_device_plugin' KERNEL = 'kernel' # Note that this set must be extended when there are @@ -169,7 +168,6 @@ class AgentManager(service.PeriodicService): PV, LVG, HOST_FILESYSTEMS, - K8S_DEVICE_PLUGIN, KERNEL} def __init__(self, host, topic): @@ -955,8 +953,6 @@ class AgentManager(service.PeriodicService): self._report_port_inventory(icontext, rpcapi, port_list, pci_device_list) - self._report_supported_device_plugin(icontext, rpcapi, pci_device_list) - # Find list of numa_nodes and cpus for this ihost inumas, icpus = self._inode_operator.inodes_get_inumas_icpus() @@ -1080,36 +1076,6 @@ class AgentManager(service.PeriodicService): self._report_to_conductor_iplatform_avail() self._iconfig_read_config_reported = config_uuid - @retrying.retry(wait_fixed=15 * 1000, stop_max_delay=300 * 1000, - retry_on_exception=_retry_on_missing_host_uuid) - def _report_supported_device_plugin(self, context, rpcapi, pci_device_list=None): - - if not self._ihost_uuid: - raise exception.LocalHostUUIDNotFound() - - if pci_device_list is None: - port_list, pci_device_list, host_macs = self._get_ports_inventory() - - device_plugins = self._ipci_operator.get_supported_device_plugins(pci_device_list) - - if not device_plugins: - # self.K8S_DEVICE_PLUGIN is an expected inventory report, so need to add it. - self._inventory_reported.add(self.K8S_DEVICE_PLUGIN) - LOG.info("No plugins proposed by PCIOperator.") - return - - try: - rpcapi.device_plugin_labels_update_by_ihost(context, - self._ihost_uuid, - device_plugins) - self._inventory_reported.add(self.K8S_DEVICE_PLUGIN) - except RemoteError as e: - LOG.error("device_plugin_labels_update_by_ihost RemoteError exc_type=%s" % - e.exc_type) - except exception.SysinvException: - LOG.exception("Sysinv Agent uncaught exception updating device plugin labels.") - pass - def subfunctions_get(self): """ returns subfunctions on this host. """ diff --git a/sysinv/sysinv/sysinv/sysinv/agent/pci.py b/sysinv/sysinv/sysinv/sysinv/agent/pci.py index 6b2750fa1b..8b1baa1846 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/pci.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/pci.py @@ -179,31 +179,6 @@ class PCIDevice(object): return "" % str(self) -class DevicePlugin(object): - '''Class to record specific information of each k8s device plugins''' - def __init__(self): - return - - def get_plugin(self, pci_device_list): - return None - - -class IntelGPUDp(DevicePlugin): - def __init__(self): - return - - def get_plugin(self, pci_device_list): - - for device in pci_device_list: - if constants.GPU_DEVICE_PCLASS_VGA in device['pclass'] and device['driver'] is not None: - if constants.GPU_DEVICE_DRIVER_I915 in device['driver']: - return constants.K8S_INTEL_GPU_DEVICE_PLUGIN - return None - - -DEVICE_PLUGIN_LIST = [IntelGPUDp()] - - class PCIOperator(object): '''Class to encapsulate PCI operations for System Inventory''' @@ -809,13 +784,3 @@ class PCIOperator(object): pci_attrs_array.append(attrs) return pci_attrs_array - - def get_supported_device_plugins(self, pci_device_list): - - plugins = [] - for device_plugin in DEVICE_PLUGIN_LIST: - plugin = device_plugin.get_plugin(pci_device_list) - if plugin is not None: - plugins.append(plugin) - - return plugins diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/label.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/label.py index 24e6dcda30..e9838886d6 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/label.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/label.py @@ -395,8 +395,7 @@ def _get_system_enabled_k8s_plugins(): def _semantic_check_intel_gpu_plugins_labels(host): pci_devices = pecan.request.dbapi.pci_device_get_by_host(host.id) for pci_device in pci_devices: - if (constants.GPU_DEVICE_PCLASS_VGA in pci_device.pclass and - pci_device.driver == constants.GPU_DEVICE_DRIVER_I915): + if ("VGA" in pci_device.pclass and pci_device.driver == "i915"): return raise wsme.exc.ClientSideError("Host %s does not support Intel GPU device plugin." % (host.hostname)) @@ -410,7 +409,8 @@ def _semantic_check_k8s_plugins_labels(host, body): if plugins is None: return - for label in body.keys(): - if label in plugins: - if label == constants.K8S_INTEL_GPU_DEVICE_PLUGIN: + for label_key, label_value in body.items(): + label = label_key + "=" + label_value + if label in plugins.values(): + if label == constants.KUBE_INTEL_GPU_DEVICE_PLUGIN_LABEL: _semantic_check_intel_gpu_plugins_labels(host) diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index e6d3758f2c..da35c1276e 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -2230,9 +2230,7 @@ HOST_BM_VALID_PROVISIONED_TYPE_LIST = [HOST_BM_TYPE_DYNAMIC, # K8s device plugins DEVICE_PLUGINS_FILE = "enabled_kube_plugins" ENABLED_KUBE_PLUGINS = os.path.join(tsc.CONFIG_PATH, DEVICE_PLUGINS_FILE) -K8S_INTEL_GPU_DEVICE_PLUGIN = "intelgpu" -GPU_DEVICE_DRIVER_I915 = "i915" -GPU_DEVICE_PCLASS_VGA = "VGA" +KUBE_INTEL_GPU_DEVICE_PLUGIN_LABEL = "intelgpu=enabled" # Port on which ceph manager and ceph-mgr listens CEPH_MGR_PORT = 7999 diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index b955f11875..8ccbf0a5c6 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -33,7 +33,6 @@ import errno import filecmp import glob import hashlib -import json import io import math import os @@ -5745,55 +5744,6 @@ class ConductorManager(service.PeriodicService): tsc.install_uuid) greenthread.sleep(constants.FIX_INSTALL_UUID_INTERVAL_SECS) - def _get_enabled_kube_plugins(self): - - # this file will be generated after initial config process if the - # kubernetes device plugin list is not empty. - if not os.path.isfile(constants.ENABLED_KUBE_PLUGINS): - return None - - try: - file_object = open(constants.ENABLED_KUBE_PLUGINS) - plugins = json.loads(file_object.read()) - return plugins - except Exception as e: - LOG.error("failed to get kube_plugin list from file. \ - exception: %s" % str(e)) - return None - - def device_plugin_labels_update_by_ihost(self, context, - host_uuid, device_plugins): - - """Assign device plugins to an ihost with the supplied data. - - :param context: an admin context - :param host_uuid: host uuid unique id - :param device_plugins: kubernetes device plugins request to assign - """ - enabled_kube_plugins = self._get_enabled_kube_plugins() - if not enabled_kube_plugins: - LOG.info("Vendor k8s device plugin list is empty. \ - Set parameters in ansible override file if required.") - return - - host_uuid.strip() - try: - ihost = self.dbapi.ihost_get(host_uuid) - except exception.ServerNotFound: - LOG.exception("Invalid host_uuid %s" % host_uuid) - return - - for device_plugin in device_plugins: - if device_plugin not in enabled_kube_plugins: - continue - - label = {} - label.update({'label_key': device_plugin, 'label_value': 'enabled', 'host_id': ihost.id}) - try: - self.dbapi.label_create(host_uuid, label) - except exception.HostLabelAlreadyExists: - pass - @periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.agent_update_request) def _agent_update_request(self, context): """ diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py index b878fa48f6..f7b92b40b9 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py @@ -468,22 +468,6 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): return self.cast(context, self.make_msg('update_partition_config', partition=partition)) - def device_plugin_labels_update_by_ihost(self, context, - host_uuid, device_plugins): - - """Assign device plugin labels to an ihost with the supplied data. - - :param context: an admin context - :param host_uuid: host uuid unique id - :param device_plugins: kubernetes device plugins request to assign - :returns: pass or fail - """ - - return self.call(context, - self.make_msg('device_plugin_labels_update_by_ihost', - host_uuid=host_uuid, - device_plugins=device_plugins)) - def iplatform_update_by_ihost(self, context, ihost_uuid, imsg_dict): """Create or update memory for an ihost with the supplied data. diff --git a/sysinv/sysinv/sysinv/sysinv/tests/agent/test_manager.py b/sysinv/sysinv/sysinv/sysinv/tests/agent/test_manager.py index 842296aeb4..3deb91fc74 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/agent/test_manager.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/agent/test_manager.py @@ -25,7 +25,6 @@ class FakeConductorAPI(object): self.finalize_delete_load = mock.MagicMock() self.create_host_filesystems = mock.MagicMock() self.update_host_max_cpu_mhz_configured = mock.MagicMock() - self.device_plugin_labels_update_by_ihost = mock.MagicMock() self.is_virtual_system_config_result = False self.isystem = isystem @@ -475,32 +474,6 @@ class TestHostFileSystems(base.TestCase): expected_filesystems) self.assertEqual(self.agent_manager._prev_fs, expected_filesystems) - def test_report_supported_device_plugin(self): - - # Mock _get_ports_inventory - self.mock_get_ports_inventory = mock.MagicMock() - p = mock.patch('sysinv.agent.manager.AgentManager._get_ports_inventory', - self.mock_get_ports_inventory) - p.start().return_value = ["", ['k8s_device_plugin', 'port', 'pci_device'], ""] - self.addCleanup(p.stop) - - # Mock get_supported_device_plugins - self.mock_get_supported_device_plugins = mock.MagicMock() - p = mock.patch('sysinv.agent.pci.PCIOperator.get_supported_device_plugins', - self.mock_get_supported_device_plugins) - p.start().return_value = ['intelgpu'] - self.addCleanup(p.stop) - - self.agent_manager._ihost_uuid = "f47ac10b-58cc-4372-a567-0e02b2c3d479" - # Verify the expected label_key and label_value to be called - expected_label = ['intelgpu'] - self.agent_manager._report_supported_device_plugin(self.context, self.fake_conductor_api) - self.fake_conductor_api.device_plugin_labels_update_by_ihost.assert_called_with( - self.context, - self.agent_manager._ihost_uuid, - expected_label) - self.assertIn('k8s_device_plugin', self.agent_manager._inventory_reported) - @mock.patch('sysinv.agent.manager.os.path.isfile', mock.MagicMock()) @mock.patch('sysinv.agent.manager.subprocess.check_call', mock.MagicMock()) diff --git a/sysinv/sysinv/sysinv/sysinv/tests/agent/test_pci.py b/sysinv/sysinv/sysinv/sysinv/tests/agent/test_pci.py index 4681782ee8..e167834b98 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/agent/test_pci.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/agent/test_pci.py @@ -155,43 +155,6 @@ class TestPciOperator(base.TestCase): result = self.pci_operator.get_pci_sriov_vf_module_name(pfaddr, vfaddrs) assert result is None - def test_get_supported_device_plugins(self): - self.fake_pci_device_list = [ - { - 'name': 'pci_0000_00_01_1', - 'pclass_id': '01018a', - 'pclass': 'IDE interface', - 'driver': 'ata_piix', - }, - { - 'name': 'pci_0000_00_02_0', - 'pclass_id': '030000', - 'pclass': 'VGA compatible controller', - 'driver': 'i915' - }, - { - 'name': 'pci_0000_00_05_0', - 'pclass_id': '040100', - 'pclass': 'Multimedia audio controller', - 'driver': None - }, - { - 'name': 'pci_0000_00_0d_0', - 'pclass_id': '010601', - 'pclass': 'VGA compatible controller', - 'driver': 'ahci' - }, - { - 'name': 'pci_0000_00_01_2', - 'pclass_id': '01018b', - 'pclass': 'VGA compatible controller', - 'driver': None - }] - # Verify the expected label_key and label_value with the actual output - expected_label = ['intelgpu'] - actual_label = self.pci_operator.get_supported_device_plugins(self.fake_pci_device_list) - self.assertEqual(expected_label, actual_label) - class TestAgentOperator(base.TestCase): diff --git a/sysinv/sysinv/sysinv/sysinv/tests/api/test_label.py b/sysinv/sysinv/sysinv/sysinv/tests/api/test_label.py index b3b3d94280..54e82e079e 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/api/test_label.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/api/test_label.py @@ -28,7 +28,8 @@ def mock_helm_override_get(dbapi, app_name, chart_name, namespace): def mock_get_system_enabled_k8s_plugins_return_plugins(): - return ["intelgpu", "intelqat"] + return {"intel-gpu-plugin": "intelgpu=enabled", + "intel-qat-plugin": "intelqat=enabled"} def mock_get_system_enabled_k8s_plugins_return_none(): diff --git a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py index 9c5758e7ec..0ffdf00927 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py @@ -5133,24 +5133,6 @@ class ManagerTestCase(base.DbTestCase): self.assertEqual(endpoints, config_dict) - def mock_os_file_path(self): - # Mock _get_kube_plugin_labels os.path.isfile - mock_isfile = mock.patch('os.path.isfile') - mock_isfile.return_value = True - mock_isfile.start() - self.addCleanup(mock_isfile.stop) - - def test_get_enabled_kube_plugins(self): - self.mock_os_file_path() - open_mock = mock.mock_open(read_data='''["intelgpu", "intelqat", "intelfpga"]''') - open_patch = mock.patch('builtins.open', open_mock) - open_patch.start() - self.addCleanup(open_patch.stop) - # Verify the _get_enabled_kube_plugins return all plugins when all are enabled - actual_output = self.service._get_enabled_kube_plugins() - expected_ouput = ['intelgpu', 'intelqat', 'intelfpga'] - self.assertEqual(actual_output, expected_ouput) - def _kernel_alarms_fix_keys(self, alarm_id, entity_id=None): """Create the nested dictionary keys if they are missing Prevents KeyError exceptions