From 07a07c6bccb762f028e6e1d8b0d370d2d97ca6e8 Mon Sep 17 00:00:00 2001 From: zhipengl Date: Wed, 27 Feb 2019 23:24:36 +0800 Subject: [PATCH] Implement Pci Interrupt Affinity Agent Create an agent which runs on each worker node to do pci interrupt affinity work. nova-sriov installed by this new package instead of old nova-utils. Below test done and pass, see detailed test spec in story link. 1) deployment test with/without openstack application 2) Periodic audit pci irq affinity 3) Remove VM without sriov pci port 4) Remove VM with sriov pci port 5) Add VM without sriov pci port 6) Add VM with sriov pci port 7) Add VM without pci_irq_affinity_mask 8) Add VM without cpu policy set 9) VM resize test 10) Remove one pci port for VM Code framework is like below +------------+ +--------------+ +------------+ | | | | | | | | | | | | | Agent.py | -----> | affinity.py | -----> | driver.py | | | | | | | | Daemon | | Conduct | | Drv | | | | | | | +------------+ +--------------+ +------------+ Story: 2004600 Task: 28850 Depends-on: https://review.opendev.org/#/c/640263/ Depends-on: https://review.opendev.org/#/c/654415/ Change-Id: Ie668036efe4d0013fed8cd45805f0321692c76f0 Signed-off-by: zhipengl --- centos_iso_image.inc | 4 +- centos_pkg_dirs | 2 +- utilities/nova-utils/centos/build_srpm.data | 2 - utilities/nova-utils/centos/nova-utils.spec | 38 --- utilities/pci-irq-affinity-agent/PKG-INFO | 7 + .../centos/build_srpm.data | 3 + .../centos/pci-irq-affinity.spec | 70 +++++ .../files}/LICENSE | 0 .../pci-irq-affinity-agent/files/config.ini | 22 ++ .../files}/nova-sriov | 0 .../files/pci-irq-affinity-agent | 120 ++++++++ .../files/pci-irq-affinity-agent.conf | 9 + .../files/pci-irq-affinity-agent.service | 14 + .../pci_irq_affinity/__init__.py | 0 .../pci_irq_affinity/affinity.py | 92 ++++++ .../pci_irq_affinity/agent.py | 206 +++++++++++++ .../pci_irq_affinity/config.py | 45 +++ .../pci_irq_affinity/driver.py | 141 +++++++++ .../pci_irq_affinity/guest.py | 265 ++++++++++++++++ .../pci_irq_affinity/instance.py | 82 +++++ .../pci_irq_affinity/pci_irq_affinity/log.py | 28 ++ .../pci_irq_affinity/nova_provider.py | 139 +++++++++ .../pci_irq_affinity/utils.py | 291 ++++++++++++++++++ .../pci_irq_affinity/setup.py | 35 +++ 24 files changed, 1572 insertions(+), 43 deletions(-) delete mode 100644 utilities/nova-utils/centos/build_srpm.data delete mode 100644 utilities/nova-utils/centos/nova-utils.spec create mode 100644 utilities/pci-irq-affinity-agent/PKG-INFO create mode 100644 utilities/pci-irq-affinity-agent/centos/build_srpm.data create mode 100644 utilities/pci-irq-affinity-agent/centos/pci-irq-affinity.spec rename utilities/{nova-utils/nova-utils => pci-irq-affinity-agent/files}/LICENSE (100%) create mode 100644 utilities/pci-irq-affinity-agent/files/config.ini rename utilities/{nova-utils/nova-utils => pci-irq-affinity-agent/files}/nova-sriov (100%) create mode 100755 utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent create mode 100644 utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.conf create mode 100644 utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.service create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/__init__.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/affinity.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/agent.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/config.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/driver.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/guest.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/instance.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/log.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/nova_provider.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/utils.py create mode 100644 utilities/pci-irq-affinity-agent/pci_irq_affinity/setup.py diff --git a/centos_iso_image.inc b/centos_iso_image.inc index 29c39b47f..293c40d75 100644 --- a/centos_iso_image.inc +++ b/centos_iso_image.inc @@ -44,8 +44,8 @@ python-smartpm # lldpd lldpd -# nova-utils -nova-utils +# pci-irq-affinity-agent +pci-irq-affinity-agent # mlx4-config mlx4-config diff --git a/centos_pkg_dirs b/centos_pkg_dirs index 1e292863b..47a5d5ffc 100644 --- a/centos_pkg_dirs +++ b/centos_pkg_dirs @@ -27,7 +27,7 @@ python/python-voluptuous networking/lldpd logging/logrotate logging/logrotate-config -utilities/nova-utils +utilities/pci-irq-affinity-agent security/shim-unsigned security/shim-signed base/sudo diff --git a/utilities/nova-utils/centos/build_srpm.data b/utilities/nova-utils/centos/build_srpm.data deleted file mode 100644 index 4620f4786..000000000 --- a/utilities/nova-utils/centos/build_srpm.data +++ /dev/null @@ -1,2 +0,0 @@ -COPY_LIST="$PKG_BASE/nova-utils/*" -TIS_PATCH_VER=1 diff --git a/utilities/nova-utils/centos/nova-utils.spec b/utilities/nova-utils/centos/nova-utils.spec deleted file mode 100644 index dc0dd5f2f..000000000 --- a/utilities/nova-utils/centos/nova-utils.spec +++ /dev/null @@ -1,38 +0,0 @@ -Summary: nova-utils version 1.0-r1 -Name: nova-utils -Version: 1.0 -Release: %{tis_patch_ver}%{?_tis_dist} -License: Apache-2.0 -Group: development -Packager: Wind River -URL: unknown - -Source0: LICENSE -Source1: nova-sriov - -%description -Nova utilities package - -%package -n nova-utils-devel -Summary: nova-utils - Development files -Group: devel -Requires: nova-utils = %{version}-%{release} - -%description -n nova-utils-devel -Nova utilities package This package contains symbolic links, header files, -and related items necessary for software development. - -%install -rm -rf $RPM_BUILD_ROOT -mkdir -p $RPM_BUILD_ROOT/%{_bindir} -install -m 0755 %{SOURCE1} $RPM_BUILD_ROOT/%{_bindir}/nova-sriov -mkdir -p $RPM_BUILD_ROOT/%{_defaultdocdir}/%{name}-%{version} -install -m 644 %{SOURCE0} $RPM_BUILD_ROOT/%{_defaultdocdir}/%{name}-%{version} - -%files -%defattr(-,root,root,-) -%{_bindir}/nova-sriov -%{_defaultdocdir}/%{name}-%{version} - -%files -n nova-utils-devel -%defattr(-,root,root,-) diff --git a/utilities/pci-irq-affinity-agent/PKG-INFO b/utilities/pci-irq-affinity-agent/PKG-INFO new file mode 100644 index 000000000..cb14bb3eb --- /dev/null +++ b/utilities/pci-irq-affinity-agent/PKG-INFO @@ -0,0 +1,7 @@ +Metadata-Version: 1.2 +Name: PCIInterruptAffinityAgent +Version: 1.0 +Summary: PCI Interrupt Affinity Agent Package +Author: StarlingX +License: Apache-2.0 +Platform: UNKNOWN diff --git a/utilities/pci-irq-affinity-agent/centos/build_srpm.data b/utilities/pci-irq-affinity-agent/centos/build_srpm.data new file mode 100644 index 000000000..38fcf6e53 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/centos/build_srpm.data @@ -0,0 +1,3 @@ +SRC_DIR="pci_irq_affinity" +COPY_LIST_TO_TAR="files/*" +TIS_PATCH_VER=1 diff --git a/utilities/pci-irq-affinity-agent/centos/pci-irq-affinity.spec b/utilities/pci-irq-affinity-agent/centos/pci-irq-affinity.spec new file mode 100644 index 000000000..fc8d9853b --- /dev/null +++ b/utilities/pci-irq-affinity-agent/centos/pci-irq-affinity.spec @@ -0,0 +1,70 @@ +Summary: StarlingX PCI Interrupt Affinity Agent Package +Name: pci-irq-affinity-agent +Version: 1.0 +Release: %{tis_patch_ver}%{?_tis_dist} +License: Apache-2.0 +Group: base +Packager: StarlingX +URL: unknown + +Source0: %{name}-%{version}.tar.gz + +Requires: python-novaclient +BuildRequires: python-setuptools +BuildRequires: systemd-devel + +%description +StarlingX PCI Interrupt Affinity Agent Package + +%define local_etc_initd /etc/init.d/ +%define local_etc_pmond /etc/pmon.d/ +%define pythonroot /usr/lib64/python2.7/site-packages +%define debug_package %{nil} + +%prep +%setup + +# Remove bundled egg-info +rm -rf *.egg-info + +%build +%{__python} setup.py build + +%install +%{__python} setup.py install --root=%{buildroot} \ + --install-lib=%{pythonroot} \ + --prefix=/usr \ + --install-data=/usr/share \ + --single-version-externally-managed + +%{__install} -d -m 755 %{buildroot}%{local_etc_initd} +%{__install} -p -D -m 755 pci-irq-affinity-agent %{buildroot}%{local_etc_initd}/pci-irq-affinity-agent + +%{__install} -d -m 755 %{buildroot}%{local_etc_pmond} +%{__install} -p -D -m 644 pci-irq-affinity-agent.conf %{buildroot}%{local_etc_pmond}/pci-irq-affinity-agent.conf +%{__install} -p -D -m 644 pci-irq-affinity-agent.service %{buildroot}%{_unitdir}/pci-irq-affinity-agent.service + +%{__install} -d %{buildroot}%{_bindir} +%{__install} -p -D -m 755 nova-sriov %{buildroot}%{_bindir}/nova-sriov + +%{__install} -d %{buildroot}%{_sysconfdir}/pci_irq_affinity +%{__install} -p -D -m 600 config.ini %{buildroot}%{_sysconfdir}/pci_irq_affinity/config.ini + +%post +/usr/bin/systemctl enable pci-irq-affinity-agent.service >/dev/null 2>&1 + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root,-) +%doc LICENSE +%{local_etc_initd}/pci-irq-affinity-agent +%{local_etc_pmond}/pci-irq-affinity-agent.conf +%{_unitdir}/pci-irq-affinity-agent.service +%{pythonroot}/pci_irq_affinity/* +%{pythonroot}/pci_irq_affinity_agent-%{version}*.egg-info + +%{_bindir}/pci-irq-affinity-agent +%{_bindir}/nova-sriov +%config(noreplace) %{_sysconfdir}/pci_irq_affinity/config.ini diff --git a/utilities/nova-utils/nova-utils/LICENSE b/utilities/pci-irq-affinity-agent/files/LICENSE similarity index 100% rename from utilities/nova-utils/nova-utils/LICENSE rename to utilities/pci-irq-affinity-agent/files/LICENSE diff --git a/utilities/pci-irq-affinity-agent/files/config.ini b/utilities/pci-irq-affinity-agent/files/config.ini new file mode 100644 index 000000000..50fd8870c --- /dev/null +++ b/utilities/pci-irq-affinity-agent/files/config.ini @@ -0,0 +1,22 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# +[openstack] +openstack_enabled=False +username=admin +tenant=admin +authorization_protocol=http +authorization_ip=192.168.204.2 +authorization_port=5000 +user_domain_name=Default +project_domain_name=Default +keyring_service=CGCS + +[amqp] +host=192.168.204.2 +port=5672 +user_id=guest +password=guest +virt_host=/ diff --git a/utilities/nova-utils/nova-utils/nova-sriov b/utilities/pci-irq-affinity-agent/files/nova-sriov similarity index 100% rename from utilities/nova-utils/nova-utils/nova-sriov rename to utilities/pci-irq-affinity-agent/files/nova-sriov diff --git a/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent new file mode 100755 index 000000000..600b3d894 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent @@ -0,0 +1,120 @@ +#! /bin/sh +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 75 25 +# +### BEGIN INIT INFO +# Provides: pci-irq-affinity-agent +### END INIT INFO + +source /etc/init.d/functions + +PLATFORM_CONF="/etc/platform/platform.conf" +NODETYPE="" +DAEMON_NAME="pci-irq-affinity-agent" +AFFINITYAGENT="/usr/bin/${DAEMON_NAME}" + +daemon_pidfile="/var/run/${DAEMON_NAME}.pid" + +if [ -f ${PLATFORM_CONF} ] ; then + source ${PLATFORM_CONF} + NODETYPE=${nodetype} +else + logger "$0: ${PLATFORM_CONF} is missing" + exit 1 +fi + +if [ ! -f "${AFFINITYAGENT}" ] ; then + logger "$0: ${AFFINITYAGENT} is missing" + exit 1 +fi + +RETVAL=0 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +case "$1" in + start) + # Check for installation failure + if [ -f /etc/platform/installation_failed ] ; then + logger "$0: /etc/platform/installation_failed flag is set. Aborting." + exit 1 + fi + + if [ ${NODETYPE} = "worker" ] ; then + echo -n "Setting up config for pci-irq-affinity-agent: " + + if [ -e ${daemon_pidfile} ] ; then + echo "Killing existing process before starting new" + pid=`cat ${daemon_pidfile}` + kill -TERM $pid + rm -f ${daemon_pidfile} + fi + + echo -n "Starting pci-irq-affinity-agent: " + /bin/sh -c "${AFFINITYAGENT}"' >> /dev/null 2>&1 & echo $!' > ${daemon_pidfile} + RETVAL=$? + if [ $RETVAL -eq 0 ] ; then + echo "OK" + touch /var/lock/subsys/${DAEMON_NAME} + else + echo "FAIL" + fi + fi + ;; + + stop) + if [ ${NODETYPE} = "worker" ] ; then + echo -n "Stopping pci-irq-affinity-agent: " + + if [ -e ${daemon_pidfile} ] ; then + pid=`cat ${daemon_pidfile}` + kill -TERM $pid + rm -f ${daemon_pidfile} + rm -f /var/lock/subsys/${DAEMON_NAME} + echo "OK" + else + echo "FAIL" + fi + fi + ;; + + restart) + $0 stop + sleep 1 + $0 start + ;; + + status) + if [ -e ${daemon_pidfile} ] ; then + pid=`cat ${daemon_pidfile}` + ps -p $pid | grep -v "PID TTY" >> /dev/null 2>&1 + if [ $? -eq 0 ] ; then + echo "pci-irq-affinity-agent is running" + RETVAL=0 + else + echo "pci-irq-affinity-agent is not running" + RETVAL=1 + fi + else + echo "pci-irq-affinity-agent is not running ; no pidfile" + RETVAL=1 + fi + ;; + + condrestart) + [ -f /var/lock/subsys/$DAEMON_NAME ] && $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit $RETVAL diff --git a/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.conf b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.conf new file mode 100644 index 000000000..544cee0f7 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.conf @@ -0,0 +1,9 @@ +[process] +process = pci-irq-affinity-agent +pidfile = /var/run/pci-irq-affinity-agent.pid +script = /etc/init.d/pci-irq-affinity-agent +style = lsb ; ocf or lsb +severity = major ; minor, major, critical +restarts = 3 ; restarts before error assertion +interval = 5 ; number of seconds to wait between restarts +debounce = 20 ; number of seconds to wait before degrade clear diff --git a/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.service b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.service new file mode 100644 index 000000000..737d75a4e --- /dev/null +++ b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.service @@ -0,0 +1,14 @@ +[Unit] +Description=StarlingX PCI Interrupt Affinity Agent +After=sysinv-agent.service +Before=pmon.service + +[Service] +Type=forking +RemainAfterExit=yes +ExecStart=/etc/init.d/pci-irq-affinity-agent start +ExecStop=/etc/init.d/pci-irq-affinity-agent stop +PIDFile=/var/run/pci-irq-affinity-agent.pid + +[Install] +WantedBy=multi-user.target diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/__init__.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/affinity.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/affinity.py new file mode 100644 index 000000000..88bb1f923 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/affinity.py @@ -0,0 +1,92 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define pci_irq_affinity_provider class""" + +import utils as pci_utils +from driver import AffinePciIrqDriver +from nova_provider import novaClient +from log import LOG + + +class pci_irq_affinity_provider: + def __init__(self): + self.affinePciIrqDriver = AffinePciIrqDriver() + self.inst_dict = {} + + def reset_irq_affinity(self, uuid, irqs=None, msi_irqs=None): + """Reset irq affinity for instance + + The instance has already been deleted or + related PCI not used by it anymore. + """ + if irqs or msi_irqs: + # reset irq affinity for specified irqs + _irqs = irqs + _msi_irqs = msi_irqs + + elif uuid in self.inst_dict: + # reset all irq affinity for deleted instance + _irqs = self.inst_dict[uuid][0] + _msi_irqs = self.inst_dict[uuid][1] + else: + LOG.debug("No pci affinity need to be reset for instance=%s!" % uuid) + return + + try: + with open('/proc/irq/default_smp_affinity') as f: + cpulist = f.readline().strip() + LOG.debug("default smp affinity bitmap:%s" % cpulist) + + for x in [_irqs, _msi_irqs]: + if len(x) > 0: + pci_utils.set_irq_affinity(True, x, cpulist) + + except Exception as e: + LOG.error("Failed to reset smp affinity! error=%s" % e) + + LOG.info("Reset smp affinity done for instance=%s!" % uuid) + + def instance_irq_pcpulist_update(self, uuid, irqs, msi_irqs, cpulist): + if uuid in self.inst_dict: + _prev = self.inst_dict[uuid] + # get irqs that not appear anymore. + _irqs = _prev[0].difference(irqs) + _msi_irqs = _prev[1].difference(msi_irqs) + + # reset pci affinity for those pcis not used by intance anymore + if (len(_irqs) + len(_msi_irqs)) > 0: + self.reset_irq_affinity(uuid, _irqs, _msi_irqs) + + self.inst_dict[uuid] = [irqs, msi_irqs, cpulist] + LOG.debug(self.inst_dict) + + def affine_pci_dev_instance(self, instance, wait_for_irqs=True): + if instance is not None: + if instance.get_cpu_policy() == 'dedicated' and instance.get_pci_devices(): + LOG.debug("Instance=%s use dedicated cpu policy!!!" % instance.uuid) + irqs, msi_irqs, cpulist = \ + self.affinePciIrqDriver.affine_pci_dev_irqs(instance, wait_for_irqs) + # record instance on which pci affinity has been applied + self.instance_irq_pcpulist_update(instance.uuid, irqs, msi_irqs, cpulist) + return + + def audit_pci_irq_affinity(self): + # audit instance PCI devices periodically + filters = {'vm_state': 'active', + 'task_state': None, + 'deleted': False} + instances = novaClient.get_instances(filters) + for inst in instances: + self.affine_pci_dev_instance(inst, wait_for_irqs=False) + + +pciIrqAffinity = pci_irq_affinity_provider() diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/agent.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/agent.py new file mode 100644 index 000000000..c50c7388d --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/agent.py @@ -0,0 +1,206 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Pci interrupt affinity agent daemon entry""" + +import six +import json +import sys +import signal +import re +import eventlet +import threading +import time + +from oslo_service import periodic_task +from oslo_service import service +import oslo_messaging + +from config import CONF +from config import sysconfig +from nova_provider import novaClient +from affinity import pciIrqAffinity +from log import LOG + +stay_on = True + + +class EventType: + CREATE = 'compute.instance.create.end' + DELETE = 'compute.instance.delete.end' + RESIZE = 'compute.instance.resize.confirm.end' + + +def process_signal_handler(signum, frame): + """Process Signal Handler""" + global stay_on + + if signum in [signal.SIGTERM, signal.SIGINT, signal.SIGTSTP]: + stay_on = False + else: + LOG.info("Ignoring signal" % signum) + + +def get_inst(instance_uuid, callback): + # get instance info from nova + inst = novaClient.get_instance(instance_uuid) + if inst is not None: + LOG.debug("inst:%s" % inst) + callback(inst) + + +def query_instance_callback(inst): + LOG.debug("query inst:%s" % inst) + pciIrqAffinity.affine_pci_dev_instance(inst) + + +@periodic_task.periodic_task(spacing=CONF.pci_affine_interval) +def audit_affinity(self, context): + pciIrqAffinity.audit_pci_irq_affinity() + + +def audit_work(srv, callback): + srv.tg.add_dynamic_timer(callback, None, None, None) + srv.tg.wait() + + +def audits_initialize(): + """Init periodic audit task for pci interrupt affinity check""" + srv = service.Service() + periodicTasks = periodic_task.PeriodicTasks(CONF) + periodicTasks.add_periodic_task(audit_affinity) + thread = threading.Thread(target=audit_work, args=(srv, periodicTasks.run_periodic_tasks)) + thread.start() + return srv + + +class InstCreateNotificationEp(object): + filter_rule = oslo_messaging.NotificationFilter( + event_type=EventType.CREATE) + + def info(self, ctxt, publisher_id, event_type, payload, metadata): + uuid = payload.get('instance_id', None) + self.instance_create_handler(uuid) + + def instance_create_handler(self, instance_uuid): + if instance_uuid is not None: + LOG.info("instance_created: uuid=%s." % instance_uuid) + eventlet.spawn(get_inst, instance_uuid, query_instance_callback).wait() + + +class InstResizeNotificationEp(object): + filter_rule = oslo_messaging.NotificationFilter( + event_type=EventType.RESIZE) + + def info(self, ctxt, publisher_id, event_type, payload, metadata): + uuid = payload.get('instance_id', None) + self.instance_resize_handler(uuid) + + def instance_resize_handler(self, instance_uuid): + if instance_uuid is not None: + LOG.info("instance_resized: uuid=%s." % instance_uuid) + eventlet.spawn(get_inst, instance_uuid, query_instance_callback).wait() + + +class InstDelNotificationEp(object): + filter_rule = oslo_messaging.NotificationFilter( + event_type=EventType.DELETE) + + def info(self, ctxt, publisher_id, event_type, payload, metadata): + uuid = payload.get('instance_id', None) + self.instance_delete_handler(uuid) + + def instance_delete_handler(self, instance_uuid): + if instance_uuid is not None: + LOG.info("instance_deleted: uuid=%s." % instance_uuid) + pciIrqAffinity.reset_irq_affinity(instance_uuid) + + +def get_rabbit_config(): + """Get rabbit config info from specific system config file.""" + + rabbit_cfg = {} + rabbit_session = 'amqp' + options = ['host', 'port', 'user_id', 'password', + 'virt_host'] + try: + for option in options: + rabbit_cfg[option] = sysconfig.get(rabbit_session, option) + + except Exception as e: + LOG.error("Could not read all required rabbitmq configuration! Err=%s" % e) + rabbit_cfg = {} + + return rabbit_cfg + + +def rpc_work(srv): + srv.start() + srv.wait() + + +def start_rabbitmq_client(): + """Start Rabbitmq client to listen instance notifications from Nova""" + cfg = get_rabbit_config() + rabbit_url = "rabbit://%s:%s@%s:%s/%s" % (cfg['user_id'], cfg['password'], + cfg['host'], cfg['port'], cfg['virt_host']) + LOG.info(rabbit_url) + + target = oslo_messaging.Target(exchange="nova", topic="notifications", server="info", + version="2.1", fanout=True) + transport = oslo_messaging.get_notification_transport(CONF, url=rabbit_url) + endpoints = [InstCreateNotificationEp(), + InstResizeNotificationEp(), + InstDelNotificationEp()] + + server = oslo_messaging.get_notification_listener(transport, [target], + endpoints, "threading") + thread = threading.Thread(target=rpc_work, args=(server,)) + thread.start() + LOG.info("Rabbitmq Client Started!") + + return server + + +def process_main(): + """Entry function for PCI Interrupt Affinity Agent""" + + LOG.info("Enter PCIInterruptAffinity Agent") + + try: + signal.signal(signal.SIGTSTP, process_signal_handler) + openstack_enabled = sysconfig.get('openstack', 'openstack_enabled') + if openstack_enabled == 'true': + novaClient.open_libvirt_connect() + audit_srv = audits_initialize() + rabbit_client = start_rabbitmq_client() + + while stay_on: + time.sleep(1) + + except KeyboardInterrupt: + LOG.info("keyboard Interrupt received.") + pass + + except Exception as e: + LOG.info("%s" % e) + sys.exit(200) + + finally: + LOG.error("proces_main finalized!!!") + if openstack_enabled == 'true': + novaClient.close_libvirt_connect() + audit_srv.tg.stop() + rabbit_client.stop() + + +if __name__ == '__main__': + process_main() diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/config.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/config.py new file mode 100644 index 000000000..327a98522 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/config.py @@ -0,0 +1,45 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define configuration info for pci-irq-affinity-agent""" + +from six.moves import configparser +from oslo_config import cfg + +pci_irq_affinity_opts = [ + cfg.IntOpt('pci_affine_interval', + default=60, + help='Number of seconds between pci affinity updates'), + cfg.IntOpt('msi_irq_timeout', + default=45, + help='Number of seconds to wait for msi irq configuration'), + cfg.IntOpt('msi_irq_since', + default=6, + help='Number of seconds to wait for msi irqs to stabilize.'), + cfg.IntOpt('msi_irq_check_interval', + default=2, + help='Check interval in seconds for msi irqs to stabilize.'), + cfg.StrOpt('config_file', + default='/etc/pci_irq_affinity/config.ini', + help='Get config info from specific config file.'), +] + +CONF = cfg.CONF + + +def register_opts(conf): + conf.register_opts(pci_irq_affinity_opts) + + +register_opts(CONF) + +sysconfig = configparser.ConfigParser() +sysconfig.read(CONF.config_file) diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/driver.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/driver.py new file mode 100644 index 000000000..9f9c2ca3d --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/driver.py @@ -0,0 +1,141 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define AffinePciIrqDriver class""" + +from oslo_service import loopingcall +from oslo_concurrency import lockutils +import utils as pci_utils +import instance +from config import CONF +from log import LOG +from nova_provider import novaClient + +synchronized = lockutils.synchronized_with_prefix('pci_irq_affinity-') + + +class AffinePciIrqDriver: + + def __init__(self): + self._msi_irq_count = {} + self._msi_irq_since = {} + self._msi_irq_elapsed = {} + + def affine_pci_dev_irqs(self, inst, wait_for_irqs=True): + """Affine PCI device irqs to VM's pcpus.""" + + def _wait_for_msi_irqs(self, inst): + """Check if each pci device has the expected number of msi irqs.""" + _prev = self._msi_irq_count.copy() + addrs = set() + + for pci_dev in inst.pci_devices: + addr = pci_dev.address + addrs.update([addr]) + try: + irqs, msi_irqs = pci_utils.get_irqs_by_pci_address(addr) + except Exception as e: + msi_irqs = set() + LOG.error('_wait_for_msi_irqs: pci_addr=%(A)s, error=%(E)s' % + {'A': addr, 'E': e}) + self._msi_irq_count[addr] = len(msi_irqs) + self._msi_irq_elapsed[addr] += \ + CONF.msi_irq_check_interval + if _prev[addr] == self._msi_irq_count[addr]: + self._msi_irq_since[addr] += \ + CONF.msi_irq_check_interval + else: + self._msi_irq_since[addr] = 0 + + # Done when msi irq counts have not changed for some time + if all((self._msi_irq_count[k] > 0) and + (self._msi_irq_since[k] >= CONF.msi_irq_since) + for k in addrs): + raise loopingcall.LoopingCallDone() + + # Abort due to timeout + if all(self._msi_irq_elapsed[k] >= CONF.msi_irq_timeout + for k in addrs): + msg = ("reached %(timeout)s seconds timeout, waiting for " + "msi irqs of pci_addrs: %(addrs)s") % { + 'timeout': CONF.msi_irq_timeout, + 'addrs': list(addrs)} + LOG.warning(msg) + raise loopingcall.LoopingCallDone() + + # Determine how many msi irqs we expect to be configured. + if len(inst.get_pci_devices()) == 0: + return + + # Initialize msi irq tracking. + for pci_dev in inst.pci_devices: + if wait_for_irqs or (pci_dev.address not in self._msi_irq_count): + self._msi_irq_count[pci_dev.address] = 0 + self._msi_irq_since[pci_dev.address] = 0 + self._msi_irq_elapsed[pci_dev.address] = 0 + + # Wait for msi irqs to be configured. + if wait_for_irqs: + timer = loopingcall.FixedIntervalLoopingCall( + _wait_for_msi_irqs, self, inst) + timer.start(interval=CONF.msi_irq_check_interval).wait() + + @synchronized(inst.uuid) + def do_affine_pci_dev_instance(refresh_need): + """Set pci device irq affinity for this instance.""" + + _irqs = set() + _msi_irqs = set() + # refresh instance info. + if refresh_need: + _inst = novaClient.get_instance(inst.uuid) + if _inst is None: + return + + numa_topology = _inst.get_numa_topology() + extra_spec = _inst.get_extra_spec() + for pci_dev in _inst.pci_devices: + try: + irqs, msi_irqs, pci_numa_node, pci_cpulist = \ + pci_utils.set_irqs_affinity_by_pci_address( + pci_dev.address, extra_spec, numa_topology) + except Exception as e: + irqs = set() + msi_irqs = set() + pci_numa_node = None + pci_cpulist = '' + LOG.error("Could not affine irqs for pci_addr:%(A)s, " + "error: %(E)s" % {"A": pci_dev.address, "E": e}) + + # Log irqs affined when there is a change in the counts. + msi_irq_count = len(msi_irqs) + if ((msi_irq_count != self._msi_irq_count[pci_dev.address]) or + wait_for_irqs): + self._msi_irq_count[pci_dev.address] = msi_irq_count + LOG.info(("Instance=%(U)s: IRQs affined for pci_addr=%(A)s, " + "dev_id=%(D)s, dev_type=%(T)s, " + "vendor_id=%(V)s, product_id=%(P)s, " + "irqs=%(I)s, msi_irqs=%(M)s, " + "numa_node=%(N)s, cpulist=%(C)s") + % {'U': inst.uuid, + 'A': pci_dev.address, + 'D': pci_dev.dev_id, + 'T': pci_dev.dev_type, + 'V': pci_dev.vendor_id, + 'P': pci_dev.product_id, + 'I': ', '.join(map(str, irqs)), + 'M': ', '.join(map(str, msi_irqs)), + 'N': pci_numa_node, 'C': pci_cpulist}) + _irqs.update(irqs) + _msi_irqs.update(msi_irqs) + return (_irqs, _msi_irqs, pci_cpulist) + return do_affine_pci_dev_instance(wait_for_irqs) + diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/guest.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/guest.py new file mode 100644 index 000000000..ff8eac0fd --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/guest.py @@ -0,0 +1,265 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Encapsulate libvirt related interfaces""" + +import libvirt +import os +import sys +import signal +from xml.dom import minidom +from xml.etree import ElementTree +from log import LOG + +debug = 0 +# libvirt timeout parameters +LIBVIRT_TIMEOUT_SEC = 5.0 +total_cpus = 0 + + +def range_to_list(csv_range=None): + """Convert a string of comma separate ranges into an expanded list of integers. + + E.g., '1-3,8-9,15' is converted to [1,2,3,8,9,15] + """ + if not csv_range: + return [] + xranges = [(lambda L: range(L[0], L[-1] + 1))(map(int, r.split('-'))) + for r in csv_range.split(',')] + return [y for x in xranges for y in x] + + +def _translate_virDomainState(state): + """Return human readable virtual domain state string.""" + states = {} + states[0] = 'NOSTATE' + states[1] = 'Running' + states[2] = 'Blocked' + states[3] = 'Paused' + states[4] = 'Shutdown' + states[5] = 'Shutoff' + states[6] = 'Crashed' + states[7] = 'pmSuspended' + states[8] = 'Last' + return states[state] + + +def _mask_to_cpulist(mask=0): + """Create cpulist from mask, list in socket-core-thread enumerated order. + + :param extended: extended info + :param mask: cpuset mask + :returns cpulist: list of cpus in socket-core-thread enumerated order + """ + cpulist = [] + if mask is None or mask <= 0: + return cpulist + + # Assume max number of cpus for now... + max_cpus = 1024 + for cpu in range(max_cpus): + if ((1 << cpu) & mask): + cpulist.append(cpu) + return cpulist + + +class suppress_stdout_stderr(object): + """A context manager for doing a "deep suppression" of stdout and stderr in Python + + i.e. will suppress all print, even if the print originates in a compiled C/Fortran + sub-function. + This will not suppress raised exceptions, since exceptions are printed + to stderr just before a script exits, and after the context manager has + exited (at least, I think that is why it lets exceptions through). + """ + def __init__(self): + # Open a pair of null files + self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)] + # Save the actual stdout (1) and stderr (2) file descriptors. + self.save_fds = (os.dup(1), os.dup(2)) + + def __enter__(self): + # Assign the null pointers to stdout and stderr. + os.dup2(self.null_fds[0], 1) + os.dup2(self.null_fds[1], 2) + + def __exit__(self, *_): + # Re-assign the real stdout/stderr back to (1) and (2) + os.dup2(self.save_fds[0], 1) + os.dup2(self.save_fds[1], 2) + # Close the null files + os.close(self.null_fds[0]) + os.close(self.null_fds[1]) + + +class TimeoutError(Exception): + pass + + +def timeout_handler(signum, frame): + raise TimeoutError('timeout') + + +def connect_to_libvirt(): + """Connect to local libvirt.""" + duri = "qemu:///system" + try: + signal.signal(signal.SIGALRM, timeout_handler) + signal.setitimer(signal.ITIMER_REAL, LIBVIRT_TIMEOUT_SEC) + with suppress_stdout_stderr(): + conn = libvirt.openReadOnly(duri) + signal.alarm(0) + except TimeoutError: + conn = None + raise + except Exception as e: + conn = None + raise + finally: + signal.alarm(0) + return conn + + +def get_host_cpu_topology(): + """Enumerate logical cpu topology using socket_id, core_id, thread_id. + + This generates the following dictionary: + topology[socket_id][core_id][thread_id] = cpu_id + """ + global total_cpus + + # Connect to local libvirt hypervisor + conn = connect_to_libvirt() + # Get host capabilities + caps_str = conn.getCapabilities() + doc = ElementTree.fromstring(caps_str) + caps = minidom.parseString(caps_str) + caps_host = caps.getElementsByTagName('host')[0] + caps_cells = caps_host.getElementsByTagName('cells')[0] + total_cpus = caps_cells.getElementsByTagName('cpu').length + + Thread_cnt = {} + topology = {} + cells = doc.findall('./host/topology/cells/cell') + for cell in cells: + for cpu in cell.findall('./cpus/cpu'): + # obtain core_id, cpu_id, and socket_id; ignore 'siblings' since + # that can be inferred by enumeration of thread_id. + core_id = int(cpu.get('core_id')) + cpu_id = int(cpu.get('id')) + socket_id = int(cpu.get('socket_id')) + + # thread_id's are enumerated assuming cpu_id is already sorted + if socket_id not in Thread_cnt: + Thread_cnt[socket_id] = {} + if core_id not in Thread_cnt[socket_id]: + Thread_cnt[socket_id][core_id] = 0 + else: + Thread_cnt[socket_id][core_id] += 1 + thread_id = Thread_cnt[socket_id][core_id] + + # save topology[socket_id][core_id][thread_id] + if socket_id not in topology: + topology[socket_id] = {} + if core_id not in topology[socket_id]: + topology[socket_id][core_id] = {} + topology[socket_id][core_id][thread_id] = cpu_id + conn.close() + return topology + + +def get_guest_domain_info(dom): + """Obtain cpulist of pcpus in the order of vcpus. + + This applies to either pinned or floating vcpus, Note that the cpuinfo + pcpu value can be stale if we scale down cpus since it reports cpu-last-run. + For this reason use cpumap = d_vcpus[1][vcpu], instead of cpuinfo + (i.e., vcpu, state, cpuTime, pcpu = d_vcpus[0][vcpu]). + """ + uuid = dom.UUIDString() + d_state, d_maxMem_KiB, d_memory_KiB, \ + d_nrVirtCpu, d_cpuTime = dom.info() + try: + with suppress_stdout_stderr(): + d_vcpus = dom.vcpus() + except Exception as e: + d_vcpus = tuple([d_nrVirtCpu * [], + d_nrVirtCpu * [tuple(total_cpus * [False])]]) + + cpulist_p = [] + cpulist_d = {} + cpuset_total = 0 + up_total = 0 + for vcpu in range(d_nrVirtCpu): + cpuset_b = d_vcpus[1][vcpu] + cpuset = 0 + for cpu, up in enumerate(cpuset_b): + if up: + cpulist_d[vcpu] = cpu + aff = 1 << cpu + cpuset |= aff + up_total += 1 + cpuset_total |= cpuset + cpulist_f = _mask_to_cpulist(mask=cpuset_total) + for key in sorted(cpulist_d.keys()): + cpulist_p.append(cpulist_d[key]) + + # Determine if floating or pinned, display appropriate cpulist + if up_total > d_nrVirtCpu: + d_cpulist = cpulist_f + cpu_pinned = False + else: + d_cpulist = cpulist_p + cpu_pinned = True + + # Determine list of numa nodes (the hard way) + dom_xml = ElementTree.fromstring(dom.XMLDesc(0)) + nodeset = set([]) + for elem in dom_xml.findall('./numatune/memnode'): + nodes = range_to_list(elem.get('nodeset')) + nodeset.update(nodes) + d_nodelist = list(sorted(nodeset)) + + # Get pci info. + pci_addrs = set() + for interface in dom_xml.findall('./devices/interface'): + if interface.find('driver').get('name').startswith('vfio'): + addr_tag = interface.find('source/address') + if addr_tag.get('type') == 'pci': + pci_addr = "%04x:%02x:%02x.%01x" % ( + addr_tag.get('domain'), + addr_tag.get('bus'), + addr_tag.get('slot'), + addr_tag.get('function')) + pci_addrs.update([pci_addr]) + + # Update dictionary with per-domain information + domain = { + 'uuid': uuid, + 'state': _translate_virDomainState(d_state), + 'IsCpuPinned': cpu_pinned, + 'nr_vcpus': d_nrVirtCpu, + 'nodelist': d_nodelist, + 'cpulist': d_cpulist, + 'cpu_pinning': cpulist_d, + 'pci_addrs': pci_addrs + } + return domain + + +def get_guest_domain_by_uuid(conn, uuid): + try: + dom = conn.lookupByUUIDString(uuid) + except Exception as e: + LOG.warning("Failed to get domain for uuid=%s! error=%s" % (uuid, e)) + return None + domain = get_guest_domain_info(dom) + return domain diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/instance.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/instance.py new file mode 100644 index 000000000..c4a546211 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/instance.py @@ -0,0 +1,82 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define instance related class""" + +from log import LOG + + +class numa_cell: + def __init__(self, id, cpuset, cpu_pinning): + self.id = id + self.cpuset = cpuset + self.cpu_pinning = cpu_pinning + + +class numa_topology: + def __init__(self, uuid, cells): + self.instance_uuid = uuid + self.cells = cells + + def vcpu_to_pcpu(self, vcpu): + for cell in self.cells: + if vcpu in cell.cpu_pinning.keys(): + return cell, cell.cpu_pinning[vcpu] + raise KeyError('Unable to find pCPU for vCPU %d' % vcpu) + + +class pci_device: + def __init__(self, addr): + self.address = addr + self.dev_id = "" + self.dev_type = "" + self.vendor_id = "" + self.product_id = "" + + +class instance: + def __init__(self, uuid, name, extra_spec): + self.uuid = uuid + self.name = name + self.extra_spec = extra_spec + self.pci_devices = set() + self.numa_topology = None + self.cpu_policy = 'shared' + + def update(self, domain): + cells = set() + for node_id in domain['nodelist']: + cell = numa_cell(node_id, range(domain['nr_vcpus']), domain['cpu_pinning']) + LOG.debug("cell_id=%s, vcpuset=%s, cpu_pinning=%s" + % (node_id, range(domain['nr_vcpus']), domain['cpu_pinning'])) + cells.update([cell]) + + self.numa_topology = numa_topology(self.uuid, cells) + if domain['IsCpuPinned']: + self.cpu_policy = 'dedicated' + else: + self.cpu_policy = 'shared' + + for pci_addr in domain['pci_addrs']: + pci_dev = pci_device(pci_addr) + self.pci_devices.update([pci_dev]) + + def get_cpu_policy(self): + return self.cpu_policy + + def get_numa_topology(self): + return self.numa_topology + + def get_extra_spec(self): + return self.extra_spec + + def get_pci_devices(self): + return self.pci_devices diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/log.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/log.py new file mode 100644 index 000000000..e290f12fc --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/log.py @@ -0,0 +1,28 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define Logger class for this agent""" + +import logging +import logging.handlers + +_syslog_facility = 'local1' + + +LOG = logging.getLogger("pci-interrupt-affinity") +formatter = logging.Formatter("%(asctime)s %(threadName)s[%(process)d] " + "%(name)s.%(pathname)s.%(lineno)d - %(levelname)s " + "%(message)s") +handler = logging.handlers.SysLogHandler(address='/dev/log', + facility=_syslog_facility) +handler.setFormatter(formatter) +LOG.addHandler(handler) +LOG.setLevel(logging.INFO) diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/nova_provider.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/nova_provider.py new file mode 100644 index 000000000..51de754df --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/nova_provider.py @@ -0,0 +1,139 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define NovaProvider class +This class wraps novaclient access interface and expose get_instance() and +get_instances() to other agent classes. +""" + +import keyring +from novaclient import client +from keystoneauth1 import loading +from keystoneauth1 import session +import socket +from log import LOG +from config import CONF +from config import sysconfig +import instance +import guest + + +class NovaProvider: + + def __init__(self): + self._creds = self._get_keystone_creds() + self._auth = self._get_auth(self._creds) + self._hostname = self.get_hostname() + self._conn = None + + def get_hostname(self): + return socket.gethostname() + + def _get_keystone_creds(self): + creds = {} + openstackSession = 'openstack' + options = ['username', 'user_domain_name', 'project_name', + 'project_domain_name', 'keyring_service', 'auth_url'] + + try: + for option in options: + creds[option] = sysconfig.get(openstackSession, option) + + creds['password'] = keyring.get_password(creds.pop('keyring_service'), + creds['username']) + + except Exception as e: + LOG.error("Could not get keystone creds configuration! Err=%s" % e) + creds = None + + return creds + + def _get_auth(self, creds): + + if creds is not None: + loader = loading.get_plugin_loader('password') + auth = loader.load_from_options(**creds) + return auth + return None + + def get_nova(self): + try: + sess = session.Session(auth=self._auth) + nova = client.Client('2.1', session=sess) + return nova + except Exception as e: + LOG.warning("Failed to connect to nova!") + raise Exception("could not connect nova!") + + def open_libvirt_connect(self): + self._conn = guest.connect_to_libvirt() + guest.get_host_cpu_topology() + + def close_libvirt_connect(self): + self._conn.close() + + def get_instance(self, uuid): + try: + nova = self.get_nova() + server = nova.servers.get(uuid) + flavor_info = nova.flavors.get(server.flavor["id"]) + hostname = server.__dict__['OS-EXT-SRV-ATTR:host'] + except Exception as e: + LOG.warning("Could not get instance=%s from Nova! error=%s" % (uuid, e)) + return None + + LOG.debug('GET VM:%s in node:%s' % (server.name, hostname)) + + if hostname == self._hostname: + inst = instance.instance(uuid, server.name, flavor_info.get_keys()) + # get numa topology and pci info from libvirt + try: + domain = guest.get_guest_domain_by_uuid(self._conn, uuid) + if domain: + inst.update(domain) + except Exception as e: + LOG.warning("Failed to access libvirt! error=%s" % e) + return inst + else: + LOG.debug('The VM is not in current host!') + return None + + def get_instances(self, filters): + instances = set() + try: + nova = self.get_nova() + filters['host'] = self._hostname + servers = nova.servers.list(detailed=True, search_opts=filters) + flavors = nova.flavors.list() + + for server in servers: + for flavor in flavors: + if flavor.id == server.flavor["id"]: + extra_spec = flavor.get_keys() + if 'hw:cpu_policy' in extra_spec \ + and extra_spec['hw:cpu_policy'] == 'dedicated': + inst = instance.instance(server.id, server.name, extra_spec) + instances.update([inst]) + # get numa topology and pci info from libvirt + if len(instances) > 0: + for inst in instances: + domain = guest.get_guest_domain_by_uuid(self._conn, inst.uuid) + inst.update(domain) + except Exception as e: + LOG.warning("Failed to get instances info! error=%s" % e) + + return instances + + +if sysconfig.get('openstack', 'openstack_enabled') == 'true': + novaClient = NovaProvider() +else: + novaClient = None diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/utils.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/utils.py new file mode 100644 index 000000000..397cb7f75 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/utils.py @@ -0,0 +1,291 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define utility functions for this agent""" + +import os +import errno +from itertools import groupby + +from log import LOG +import instance + + +def list_to_range(input_list=None): + """Convert a list into a string of comma separate ranges. + + E.g., [1,2,3,8,9,15] is converted to '1-3,8-9,15' + """ + if input_list is None: + return '' + if len(input_list) < 3: + return ','.join(str(x) for x in input_list) + else: + G = (list(x) for _, x in groupby(enumerate(input_list), + lambda i, x: i - x)) + return ','.join( + '-'.join(map(str, (g[0][1], g[-1][1])[:len(g)])) for g in G) + + +def parse_cpu_spec(spec): + """Parse a CPU set specification. + + Each element in the list is either a single CPU number, a range of + CPU numbers, or a caret followed by a CPU number to be excluded + from a previous range. + + :param spec: cpu set string eg "1-4,^3,6" + + :returns: a set of CPU indexes + """ + cpuset_ids = set() + cpuset_reject_ids = set() + for rule in spec.split(','): + rule = rule.strip() + # Handle multi ',' + if len(rule) < 1: + continue + # Note the count limit in the .split() call + range_parts = rule.split('-', 1) + if len(range_parts) > 1: + reject = False + if range_parts[0] and range_parts[0][0] == '^': + reject = True + range_parts[0] = str(range_parts[0][1:]) + + # So, this was a range; start by converting the parts to ints + try: + start, end = [int(p.strip()) for p in range_parts] + except ValueError: + raise Exception("Invalid range expression %r" % rule) + # Make sure it's a valid range + if start > end: + raise Exception("Invalid range expression %r" % rule) + # Add available CPU ids to set + if not reject: + cpuset_ids |= set(range(start, end + 1)) + else: + cpuset_reject_ids |= set(range(start, end + 1)) + elif rule[0] == '^': + # Not a range, the rule is an exclusion rule; convert to int + try: + cpuset_reject_ids.add(int(rule[1:].strip())) + except ValueError: + raise Exception("Invalid exclusion expression %r" % rule) + else: + # OK, a single CPU to include; convert to int + try: + cpuset_ids.add(int(rule)) + except ValueError: + raise Exception("Invalid inclusion expression %r" % rule) + + # Use sets to handle the exclusion rules for us + cpuset_ids -= cpuset_reject_ids + + return cpuset_ids + + +def _get_pci_irq_affinity_mask(extra_spec): + """Parse pci irq affinity mask based on flavor extra-spec. + + Returns set of vcpu ids with corresponding pci irq affinity mask. + """ + + if 'hw:pci_irq_affinity_mask' in extra_spec: + pci_irq_affinity_mask = extra_spec['hw:pci_irq_affinity_mask'] + LOG.info("pci_irq_affinity_mask: %s" % pci_irq_affinity_mask) + else: + LOG.info('Not set pci_irq_affinity_mask!') + return None + + cpuset_ids = parse_cpu_spec(pci_irq_affinity_mask) + if not cpuset_ids: + raise Exception("No CPUs available after parsing %r" % pci_irq_affinity_mask) + return cpuset_ids + + +def get_irqs_by_pci_address(pci_addr): + """Get list of PCI IRQs based on a VF's pci address + + Raises PciDeviceNotFoundById in case the pci device is not found, + or when there is an underlying problem getting associated irqs. + :param pci_addr: PCI address + :return: irqs, msi_irqs + """ + irqs = set() + msi_irqs = set() + + dev_path = "/sys/bus/pci/devices/%s" % (pci_addr) + if not os.path.isdir(dev_path): + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + + _irqs = set() + irq_path = "%s/irq" % (dev_path) + try: + with open(irq_path) as f: + _irqs.update([int(x) for x in f.readline().split() if int(x) > 0]) + except Exception as e: + LOG.error('get_irqs_by_pci_address: ' + 'pci_addr=%(A)s: irq_path=%(P)s; error=%(E)s', + {'A': pci_addr, 'P': irq_path, 'E': e}) + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + + _msi_irqs = set() + msi_path = "%s/msi_irqs" % (dev_path) + try: + _msi_irqs.update([int(x) for x in os.listdir(msi_path) if int(x) > 0]) + except OSError as e: + # msi_path disappears during configuration; do not treat + # non-existance as fatal + if e.errno == errno.ENOENT: + return (irqs, msi_irqs) + else: + LOG.error('get_irqs_by_pci_address: ' + 'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', + {'A': pci_addr, 'P': msi_path, 'E': e}) + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + except Exception as e: + LOG.error('get_irqs_by_pci_address: ' + 'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', + {'A': pci_addr, 'P': msi_path, 'E': e}) + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + + # Return only configured irqs, ignore any that are missing. + for irq in _irqs: + irq_path = "/proc/irq/%s" % (irq) + if os.path.isdir(irq_path): + irqs.update([irq]) + for irq in _msi_irqs: + irq_path = "/proc/irq/%s" % (irq) + if os.path.isdir(irq_path): + msi_irqs.update([irq]) + return (irqs, msi_irqs) + + +def get_pci_irqs_pinned_cpuset(extra_spec=None, numa_topology=None, + pci_numa_node=None): + """Get pinned cpuset where pci irq are affined. + + :param extra_spec: extra_spec + :param pci_numa_node: numa node of a specific PCI device + :param numa_topology: instance numa topology + :return: cpuset, cpulist + """ + cpuset = set() + cpulist = '' + + LOG.debug("extra_spec:%s, topo:%s, numa_node:%s" % (extra_spec, numa_topology, pci_numa_node)) + if numa_topology is None or pci_numa_node is None or pci_numa_node < 0: + return (cpuset, cpulist) + + # Determine full affinity cpuset, but restrict to pci's numa node + for cell in numa_topology.cells: + if cell.id == pci_numa_node and cell.cpu_pinning is not None: + cpuset.update(set(cell.cpu_pinning.values())) + LOG.info("pinning pcpu list:%s" % cpuset) + + # Use extra-spec hw:pci_irq_affinity_mask only when the instance is pinned. + if cpuset: + pci_cpuset = _get_pci_irq_affinity_mask(extra_spec) + if pci_cpuset: + cpuset = set() + for cell in numa_topology.cells: + if cell.cpu_pinning is not None: + for vcpu in cell.cpuset: + if vcpu in pci_cpuset: + vcpu_cell, pcpu = numa_topology.vcpu_to_pcpu(vcpu) + cpuset.update(set([pcpu])) + + cpulist = list_to_range(input_list=list(cpuset)) + return (cpuset, cpulist) + + +def set_irq_affinity(set_bitmap, irqs, cpulist): + """Set irq affinity to the specified cpulist for list of irqs. + + :param set_bitmap: True: set bitmap file, False: set list file + :param irqs: irq list + :param cpulist: cpu list + """ + _irqs = set() + + if set_bitmap: + filename = 'smp_affinity' + else: + filename = 'smp_affinity_list' + + for irq in irqs: + irq_aff_path = "/proc/irq/%s/%s" % (irq, filename) + try: + with open(irq_aff_path, 'w') as f: + f.write(cpulist) + _irqs.update([irq]) + except Exception as e: + LOG.warning("Failed to write pci affine file:%(F)s, irq:%(I)s, " + "error=%(E)s" + % {"F": filename, "I": irq, "E": e}) + return _irqs + + +def set_irqs_affinity_by_pci_address(pci_addr, extra_spec=None, + numa_topology=None): + """Set cpu affinity for list of PCI IRQs with a VF's pci address, + + Restrict cpuset to the numa node of the PCI. + Return list + Raises PciDeviceNotFoundById in case the pci device is not found, + or when there is an underlying problem getting associated irqs. + :param pci_addr: PCI address + :param extra_spec: extra_spec + :param numa_topology: instance numa topology + :return: irqs, msi_irqs, numa_node, cpulist + """ + irqs = set() + msi_irqs = set() + numa_node = None + cpulist = '' + + if numa_topology is None: + return (irqs, msi_irqs, numa_node, cpulist) + + # Get the irqs associated with pci addr + _irqs, _msi_irqs = get_irqs_by_pci_address(pci_addr) + LOG.debug("pci: %s, irqs: %s, msi_irqs: %s" % (pci_addr, _irqs, _msi_irqs)) + + # Obtain physical numa_node for this pci addr + numa_path = "/sys/bus/pci/devices/%s/numa_node" % (pci_addr) + try: + with open(numa_path) as f: + numa_node = [int(x) for x in f.readline().split()][0] + except Exception as e: + LOG.error('set_irqs_affinity_by_pci_address: ' + 'pci_addr=%(A)s: numa_path=%(P)s; error=%(E)s', + {'A': pci_addr, 'P': numa_path, 'E': e}) + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + # Skip irq configuration if there is no associated numa node + if numa_node is None or numa_node < 0: + return (irqs, msi_irqs, numa_node, cpulist) + + # Determine the pinned cpuset where irqs are to be affined + cpuset, cpulist = get_pci_irqs_pinned_cpuset(extra_spec, + numa_topology, + numa_node) + + LOG.debug("cpuset where irqs are to be affined:%s or %s" % (cpuset, cpulist)) + + # Skip irq configuration if there are no pinned cpus + if not cpuset: + return (irqs, msi_irqs, numa_node, cpulist) + + # Set IRQ affinity, but do not treat errors as fatal. + irqs = set_irq_affinity(False, _irqs, cpulist) + msi_irqs = set_irq_affinity(False, _msi_irqs, cpulist) + return (irqs, msi_irqs, numa_node, cpulist) diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/setup.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/setup.py new file mode 100644 index 000000000..6e6806641 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/setup.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# +# flake8: noqa +# +from setuptools import setup, find_packages + +setup( + name='pci-irq-affinity-agent', + description='PCI Interrupt Affinity Agent', + version='1.0.0', + classifiers=[ + 'Environment :: OpenStack', + 'Intended Audience :: Information Technology', + 'Intended Audience :: System Administrators', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 2.6', + ], + license='Apache-2.0', + platforms=['any'], + provides='pci_irq_affinity_agent', + packages=find_packages(), + include_package_data=False, + entry_points={ + 'console_scripts': [ + 'pci-irq-affinity-agent = pci_irq_affinity.agent:process_main', + ], + } +)