diff --git a/centos_iso_image.inc b/centos_iso_image.inc index 29c39b47f..293c40d75 100644 --- a/centos_iso_image.inc +++ b/centos_iso_image.inc @@ -44,8 +44,8 @@ python-smartpm # lldpd lldpd -# nova-utils -nova-utils +# pci-irq-affinity-agent +pci-irq-affinity-agent # mlx4-config mlx4-config diff --git a/centos_pkg_dirs b/centos_pkg_dirs index 1e292863b..47a5d5ffc 100644 --- a/centos_pkg_dirs +++ b/centos_pkg_dirs @@ -27,7 +27,7 @@ python/python-voluptuous networking/lldpd logging/logrotate logging/logrotate-config -utilities/nova-utils +utilities/pci-irq-affinity-agent security/shim-unsigned security/shim-signed base/sudo diff --git a/utilities/nova-utils/centos/build_srpm.data b/utilities/nova-utils/centos/build_srpm.data deleted file mode 100644 index 4620f4786..000000000 --- a/utilities/nova-utils/centos/build_srpm.data +++ /dev/null @@ -1,2 +0,0 @@ -COPY_LIST="$PKG_BASE/nova-utils/*" -TIS_PATCH_VER=1 diff --git a/utilities/nova-utils/centos/nova-utils.spec b/utilities/nova-utils/centos/nova-utils.spec deleted file mode 100644 index dc0dd5f2f..000000000 --- a/utilities/nova-utils/centos/nova-utils.spec +++ /dev/null @@ -1,38 +0,0 @@ -Summary: nova-utils version 1.0-r1 -Name: nova-utils -Version: 1.0 -Release: %{tis_patch_ver}%{?_tis_dist} -License: Apache-2.0 -Group: development -Packager: Wind River -URL: unknown - -Source0: LICENSE -Source1: nova-sriov - -%description -Nova utilities package - -%package -n nova-utils-devel -Summary: nova-utils - Development files -Group: devel -Requires: nova-utils = %{version}-%{release} - -%description -n nova-utils-devel -Nova utilities package This package contains symbolic links, header files, -and related items necessary for software development. - -%install -rm -rf $RPM_BUILD_ROOT -mkdir -p $RPM_BUILD_ROOT/%{_bindir} -install -m 0755 %{SOURCE1} $RPM_BUILD_ROOT/%{_bindir}/nova-sriov -mkdir -p $RPM_BUILD_ROOT/%{_defaultdocdir}/%{name}-%{version} -install -m 644 %{SOURCE0} $RPM_BUILD_ROOT/%{_defaultdocdir}/%{name}-%{version} - -%files -%defattr(-,root,root,-) -%{_bindir}/nova-sriov -%{_defaultdocdir}/%{name}-%{version} - -%files -n nova-utils-devel -%defattr(-,root,root,-) diff --git a/utilities/pci-irq-affinity-agent/PKG-INFO b/utilities/pci-irq-affinity-agent/PKG-INFO new file mode 100644 index 000000000..cb14bb3eb --- /dev/null +++ b/utilities/pci-irq-affinity-agent/PKG-INFO @@ -0,0 +1,7 @@ +Metadata-Version: 1.2 +Name: PCIInterruptAffinityAgent +Version: 1.0 +Summary: PCI Interrupt Affinity Agent Package +Author: StarlingX +License: Apache-2.0 +Platform: UNKNOWN diff --git a/utilities/pci-irq-affinity-agent/centos/build_srpm.data b/utilities/pci-irq-affinity-agent/centos/build_srpm.data new file mode 100644 index 000000000..38fcf6e53 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/centos/build_srpm.data @@ -0,0 +1,3 @@ +SRC_DIR="pci_irq_affinity" +COPY_LIST_TO_TAR="files/*" +TIS_PATCH_VER=1 diff --git a/utilities/pci-irq-affinity-agent/centos/pci-irq-affinity.spec b/utilities/pci-irq-affinity-agent/centos/pci-irq-affinity.spec new file mode 100644 index 000000000..fc8d9853b --- /dev/null +++ b/utilities/pci-irq-affinity-agent/centos/pci-irq-affinity.spec @@ -0,0 +1,70 @@ +Summary: StarlingX PCI Interrupt Affinity Agent Package +Name: pci-irq-affinity-agent +Version: 1.0 +Release: %{tis_patch_ver}%{?_tis_dist} +License: Apache-2.0 +Group: base +Packager: StarlingX +URL: unknown + +Source0: %{name}-%{version}.tar.gz + +Requires: python-novaclient +BuildRequires: python-setuptools +BuildRequires: systemd-devel + +%description +StarlingX PCI Interrupt Affinity Agent Package + +%define local_etc_initd /etc/init.d/ +%define local_etc_pmond /etc/pmon.d/ +%define pythonroot /usr/lib64/python2.7/site-packages +%define debug_package %{nil} + +%prep +%setup + +# Remove bundled egg-info +rm -rf *.egg-info + +%build +%{__python} setup.py build + +%install +%{__python} setup.py install --root=%{buildroot} \ + --install-lib=%{pythonroot} \ + --prefix=/usr \ + --install-data=/usr/share \ + --single-version-externally-managed + +%{__install} -d -m 755 %{buildroot}%{local_etc_initd} +%{__install} -p -D -m 755 pci-irq-affinity-agent %{buildroot}%{local_etc_initd}/pci-irq-affinity-agent + +%{__install} -d -m 755 %{buildroot}%{local_etc_pmond} +%{__install} -p -D -m 644 pci-irq-affinity-agent.conf %{buildroot}%{local_etc_pmond}/pci-irq-affinity-agent.conf +%{__install} -p -D -m 644 pci-irq-affinity-agent.service %{buildroot}%{_unitdir}/pci-irq-affinity-agent.service + +%{__install} -d %{buildroot}%{_bindir} +%{__install} -p -D -m 755 nova-sriov %{buildroot}%{_bindir}/nova-sriov + +%{__install} -d %{buildroot}%{_sysconfdir}/pci_irq_affinity +%{__install} -p -D -m 600 config.ini %{buildroot}%{_sysconfdir}/pci_irq_affinity/config.ini + +%post +/usr/bin/systemctl enable pci-irq-affinity-agent.service >/dev/null 2>&1 + +%clean +rm -rf $RPM_BUILD_ROOT + +%files +%defattr(-,root,root,-) +%doc LICENSE +%{local_etc_initd}/pci-irq-affinity-agent +%{local_etc_pmond}/pci-irq-affinity-agent.conf +%{_unitdir}/pci-irq-affinity-agent.service +%{pythonroot}/pci_irq_affinity/* +%{pythonroot}/pci_irq_affinity_agent-%{version}*.egg-info + +%{_bindir}/pci-irq-affinity-agent +%{_bindir}/nova-sriov +%config(noreplace) %{_sysconfdir}/pci_irq_affinity/config.ini diff --git a/utilities/nova-utils/nova-utils/LICENSE b/utilities/pci-irq-affinity-agent/files/LICENSE similarity index 100% rename from utilities/nova-utils/nova-utils/LICENSE rename to utilities/pci-irq-affinity-agent/files/LICENSE diff --git a/utilities/pci-irq-affinity-agent/files/config.ini b/utilities/pci-irq-affinity-agent/files/config.ini new file mode 100644 index 000000000..50fd8870c --- /dev/null +++ b/utilities/pci-irq-affinity-agent/files/config.ini @@ -0,0 +1,22 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# +[openstack] +openstack_enabled=False +username=admin +tenant=admin +authorization_protocol=http +authorization_ip=192.168.204.2 +authorization_port=5000 +user_domain_name=Default +project_domain_name=Default +keyring_service=CGCS + +[amqp] +host=192.168.204.2 +port=5672 +user_id=guest +password=guest +virt_host=/ diff --git a/utilities/nova-utils/nova-utils/nova-sriov b/utilities/pci-irq-affinity-agent/files/nova-sriov similarity index 100% rename from utilities/nova-utils/nova-utils/nova-sriov rename to utilities/pci-irq-affinity-agent/files/nova-sriov diff --git a/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent new file mode 100755 index 000000000..600b3d894 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent @@ -0,0 +1,120 @@ +#! /bin/sh +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 75 25 +# +### BEGIN INIT INFO +# Provides: pci-irq-affinity-agent +### END INIT INFO + +source /etc/init.d/functions + +PLATFORM_CONF="/etc/platform/platform.conf" +NODETYPE="" +DAEMON_NAME="pci-irq-affinity-agent" +AFFINITYAGENT="/usr/bin/${DAEMON_NAME}" + +daemon_pidfile="/var/run/${DAEMON_NAME}.pid" + +if [ -f ${PLATFORM_CONF} ] ; then + source ${PLATFORM_CONF} + NODETYPE=${nodetype} +else + logger "$0: ${PLATFORM_CONF} is missing" + exit 1 +fi + +if [ ! -f "${AFFINITYAGENT}" ] ; then + logger "$0: ${AFFINITYAGENT} is missing" + exit 1 +fi + +RETVAL=0 + +PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export PATH + +case "$1" in + start) + # Check for installation failure + if [ -f /etc/platform/installation_failed ] ; then + logger "$0: /etc/platform/installation_failed flag is set. Aborting." + exit 1 + fi + + if [ ${NODETYPE} = "worker" ] ; then + echo -n "Setting up config for pci-irq-affinity-agent: " + + if [ -e ${daemon_pidfile} ] ; then + echo "Killing existing process before starting new" + pid=`cat ${daemon_pidfile}` + kill -TERM $pid + rm -f ${daemon_pidfile} + fi + + echo -n "Starting pci-irq-affinity-agent: " + /bin/sh -c "${AFFINITYAGENT}"' >> /dev/null 2>&1 & echo $!' > ${daemon_pidfile} + RETVAL=$? + if [ $RETVAL -eq 0 ] ; then + echo "OK" + touch /var/lock/subsys/${DAEMON_NAME} + else + echo "FAIL" + fi + fi + ;; + + stop) + if [ ${NODETYPE} = "worker" ] ; then + echo -n "Stopping pci-irq-affinity-agent: " + + if [ -e ${daemon_pidfile} ] ; then + pid=`cat ${daemon_pidfile}` + kill -TERM $pid + rm -f ${daemon_pidfile} + rm -f /var/lock/subsys/${DAEMON_NAME} + echo "OK" + else + echo "FAIL" + fi + fi + ;; + + restart) + $0 stop + sleep 1 + $0 start + ;; + + status) + if [ -e ${daemon_pidfile} ] ; then + pid=`cat ${daemon_pidfile}` + ps -p $pid | grep -v "PID TTY" >> /dev/null 2>&1 + if [ $? -eq 0 ] ; then + echo "pci-irq-affinity-agent is running" + RETVAL=0 + else + echo "pci-irq-affinity-agent is not running" + RETVAL=1 + fi + else + echo "pci-irq-affinity-agent is not running ; no pidfile" + RETVAL=1 + fi + ;; + + condrestart) + [ -f /var/lock/subsys/$DAEMON_NAME ] && $0 restart + ;; + + *) + echo "usage: $0 { start | stop | status | restart | condrestart | status }" + ;; +esac + +exit $RETVAL diff --git a/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.conf b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.conf new file mode 100644 index 000000000..544cee0f7 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.conf @@ -0,0 +1,9 @@ +[process] +process = pci-irq-affinity-agent +pidfile = /var/run/pci-irq-affinity-agent.pid +script = /etc/init.d/pci-irq-affinity-agent +style = lsb ; ocf or lsb +severity = major ; minor, major, critical +restarts = 3 ; restarts before error assertion +interval = 5 ; number of seconds to wait between restarts +debounce = 20 ; number of seconds to wait before degrade clear diff --git a/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.service b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.service new file mode 100644 index 000000000..737d75a4e --- /dev/null +++ b/utilities/pci-irq-affinity-agent/files/pci-irq-affinity-agent.service @@ -0,0 +1,14 @@ +[Unit] +Description=StarlingX PCI Interrupt Affinity Agent +After=sysinv-agent.service +Before=pmon.service + +[Service] +Type=forking +RemainAfterExit=yes +ExecStart=/etc/init.d/pci-irq-affinity-agent start +ExecStop=/etc/init.d/pci-irq-affinity-agent stop +PIDFile=/var/run/pci-irq-affinity-agent.pid + +[Install] +WantedBy=multi-user.target diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/__init__.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/affinity.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/affinity.py new file mode 100644 index 000000000..88bb1f923 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/affinity.py @@ -0,0 +1,92 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define pci_irq_affinity_provider class""" + +import utils as pci_utils +from driver import AffinePciIrqDriver +from nova_provider import novaClient +from log import LOG + + +class pci_irq_affinity_provider: + def __init__(self): + self.affinePciIrqDriver = AffinePciIrqDriver() + self.inst_dict = {} + + def reset_irq_affinity(self, uuid, irqs=None, msi_irqs=None): + """Reset irq affinity for instance + + The instance has already been deleted or + related PCI not used by it anymore. + """ + if irqs or msi_irqs: + # reset irq affinity for specified irqs + _irqs = irqs + _msi_irqs = msi_irqs + + elif uuid in self.inst_dict: + # reset all irq affinity for deleted instance + _irqs = self.inst_dict[uuid][0] + _msi_irqs = self.inst_dict[uuid][1] + else: + LOG.debug("No pci affinity need to be reset for instance=%s!" % uuid) + return + + try: + with open('/proc/irq/default_smp_affinity') as f: + cpulist = f.readline().strip() + LOG.debug("default smp affinity bitmap:%s" % cpulist) + + for x in [_irqs, _msi_irqs]: + if len(x) > 0: + pci_utils.set_irq_affinity(True, x, cpulist) + + except Exception as e: + LOG.error("Failed to reset smp affinity! error=%s" % e) + + LOG.info("Reset smp affinity done for instance=%s!" % uuid) + + def instance_irq_pcpulist_update(self, uuid, irqs, msi_irqs, cpulist): + if uuid in self.inst_dict: + _prev = self.inst_dict[uuid] + # get irqs that not appear anymore. + _irqs = _prev[0].difference(irqs) + _msi_irqs = _prev[1].difference(msi_irqs) + + # reset pci affinity for those pcis not used by intance anymore + if (len(_irqs) + len(_msi_irqs)) > 0: + self.reset_irq_affinity(uuid, _irqs, _msi_irqs) + + self.inst_dict[uuid] = [irqs, msi_irqs, cpulist] + LOG.debug(self.inst_dict) + + def affine_pci_dev_instance(self, instance, wait_for_irqs=True): + if instance is not None: + if instance.get_cpu_policy() == 'dedicated' and instance.get_pci_devices(): + LOG.debug("Instance=%s use dedicated cpu policy!!!" % instance.uuid) + irqs, msi_irqs, cpulist = \ + self.affinePciIrqDriver.affine_pci_dev_irqs(instance, wait_for_irqs) + # record instance on which pci affinity has been applied + self.instance_irq_pcpulist_update(instance.uuid, irqs, msi_irqs, cpulist) + return + + def audit_pci_irq_affinity(self): + # audit instance PCI devices periodically + filters = {'vm_state': 'active', + 'task_state': None, + 'deleted': False} + instances = novaClient.get_instances(filters) + for inst in instances: + self.affine_pci_dev_instance(inst, wait_for_irqs=False) + + +pciIrqAffinity = pci_irq_affinity_provider() diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/agent.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/agent.py new file mode 100644 index 000000000..c50c7388d --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/agent.py @@ -0,0 +1,206 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Pci interrupt affinity agent daemon entry""" + +import six +import json +import sys +import signal +import re +import eventlet +import threading +import time + +from oslo_service import periodic_task +from oslo_service import service +import oslo_messaging + +from config import CONF +from config import sysconfig +from nova_provider import novaClient +from affinity import pciIrqAffinity +from log import LOG + +stay_on = True + + +class EventType: + CREATE = 'compute.instance.create.end' + DELETE = 'compute.instance.delete.end' + RESIZE = 'compute.instance.resize.confirm.end' + + +def process_signal_handler(signum, frame): + """Process Signal Handler""" + global stay_on + + if signum in [signal.SIGTERM, signal.SIGINT, signal.SIGTSTP]: + stay_on = False + else: + LOG.info("Ignoring signal" % signum) + + +def get_inst(instance_uuid, callback): + # get instance info from nova + inst = novaClient.get_instance(instance_uuid) + if inst is not None: + LOG.debug("inst:%s" % inst) + callback(inst) + + +def query_instance_callback(inst): + LOG.debug("query inst:%s" % inst) + pciIrqAffinity.affine_pci_dev_instance(inst) + + +@periodic_task.periodic_task(spacing=CONF.pci_affine_interval) +def audit_affinity(self, context): + pciIrqAffinity.audit_pci_irq_affinity() + + +def audit_work(srv, callback): + srv.tg.add_dynamic_timer(callback, None, None, None) + srv.tg.wait() + + +def audits_initialize(): + """Init periodic audit task for pci interrupt affinity check""" + srv = service.Service() + periodicTasks = periodic_task.PeriodicTasks(CONF) + periodicTasks.add_periodic_task(audit_affinity) + thread = threading.Thread(target=audit_work, args=(srv, periodicTasks.run_periodic_tasks)) + thread.start() + return srv + + +class InstCreateNotificationEp(object): + filter_rule = oslo_messaging.NotificationFilter( + event_type=EventType.CREATE) + + def info(self, ctxt, publisher_id, event_type, payload, metadata): + uuid = payload.get('instance_id', None) + self.instance_create_handler(uuid) + + def instance_create_handler(self, instance_uuid): + if instance_uuid is not None: + LOG.info("instance_created: uuid=%s." % instance_uuid) + eventlet.spawn(get_inst, instance_uuid, query_instance_callback).wait() + + +class InstResizeNotificationEp(object): + filter_rule = oslo_messaging.NotificationFilter( + event_type=EventType.RESIZE) + + def info(self, ctxt, publisher_id, event_type, payload, metadata): + uuid = payload.get('instance_id', None) + self.instance_resize_handler(uuid) + + def instance_resize_handler(self, instance_uuid): + if instance_uuid is not None: + LOG.info("instance_resized: uuid=%s." % instance_uuid) + eventlet.spawn(get_inst, instance_uuid, query_instance_callback).wait() + + +class InstDelNotificationEp(object): + filter_rule = oslo_messaging.NotificationFilter( + event_type=EventType.DELETE) + + def info(self, ctxt, publisher_id, event_type, payload, metadata): + uuid = payload.get('instance_id', None) + self.instance_delete_handler(uuid) + + def instance_delete_handler(self, instance_uuid): + if instance_uuid is not None: + LOG.info("instance_deleted: uuid=%s." % instance_uuid) + pciIrqAffinity.reset_irq_affinity(instance_uuid) + + +def get_rabbit_config(): + """Get rabbit config info from specific system config file.""" + + rabbit_cfg = {} + rabbit_session = 'amqp' + options = ['host', 'port', 'user_id', 'password', + 'virt_host'] + try: + for option in options: + rabbit_cfg[option] = sysconfig.get(rabbit_session, option) + + except Exception as e: + LOG.error("Could not read all required rabbitmq configuration! Err=%s" % e) + rabbit_cfg = {} + + return rabbit_cfg + + +def rpc_work(srv): + srv.start() + srv.wait() + + +def start_rabbitmq_client(): + """Start Rabbitmq client to listen instance notifications from Nova""" + cfg = get_rabbit_config() + rabbit_url = "rabbit://%s:%s@%s:%s/%s" % (cfg['user_id'], cfg['password'], + cfg['host'], cfg['port'], cfg['virt_host']) + LOG.info(rabbit_url) + + target = oslo_messaging.Target(exchange="nova", topic="notifications", server="info", + version="2.1", fanout=True) + transport = oslo_messaging.get_notification_transport(CONF, url=rabbit_url) + endpoints = [InstCreateNotificationEp(), + InstResizeNotificationEp(), + InstDelNotificationEp()] + + server = oslo_messaging.get_notification_listener(transport, [target], + endpoints, "threading") + thread = threading.Thread(target=rpc_work, args=(server,)) + thread.start() + LOG.info("Rabbitmq Client Started!") + + return server + + +def process_main(): + """Entry function for PCI Interrupt Affinity Agent""" + + LOG.info("Enter PCIInterruptAffinity Agent") + + try: + signal.signal(signal.SIGTSTP, process_signal_handler) + openstack_enabled = sysconfig.get('openstack', 'openstack_enabled') + if openstack_enabled == 'true': + novaClient.open_libvirt_connect() + audit_srv = audits_initialize() + rabbit_client = start_rabbitmq_client() + + while stay_on: + time.sleep(1) + + except KeyboardInterrupt: + LOG.info("keyboard Interrupt received.") + pass + + except Exception as e: + LOG.info("%s" % e) + sys.exit(200) + + finally: + LOG.error("proces_main finalized!!!") + if openstack_enabled == 'true': + novaClient.close_libvirt_connect() + audit_srv.tg.stop() + rabbit_client.stop() + + +if __name__ == '__main__': + process_main() diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/config.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/config.py new file mode 100644 index 000000000..327a98522 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/config.py @@ -0,0 +1,45 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define configuration info for pci-irq-affinity-agent""" + +from six.moves import configparser +from oslo_config import cfg + +pci_irq_affinity_opts = [ + cfg.IntOpt('pci_affine_interval', + default=60, + help='Number of seconds between pci affinity updates'), + cfg.IntOpt('msi_irq_timeout', + default=45, + help='Number of seconds to wait for msi irq configuration'), + cfg.IntOpt('msi_irq_since', + default=6, + help='Number of seconds to wait for msi irqs to stabilize.'), + cfg.IntOpt('msi_irq_check_interval', + default=2, + help='Check interval in seconds for msi irqs to stabilize.'), + cfg.StrOpt('config_file', + default='/etc/pci_irq_affinity/config.ini', + help='Get config info from specific config file.'), +] + +CONF = cfg.CONF + + +def register_opts(conf): + conf.register_opts(pci_irq_affinity_opts) + + +register_opts(CONF) + +sysconfig = configparser.ConfigParser() +sysconfig.read(CONF.config_file) diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/driver.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/driver.py new file mode 100644 index 000000000..9f9c2ca3d --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/driver.py @@ -0,0 +1,141 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define AffinePciIrqDriver class""" + +from oslo_service import loopingcall +from oslo_concurrency import lockutils +import utils as pci_utils +import instance +from config import CONF +from log import LOG +from nova_provider import novaClient + +synchronized = lockutils.synchronized_with_prefix('pci_irq_affinity-') + + +class AffinePciIrqDriver: + + def __init__(self): + self._msi_irq_count = {} + self._msi_irq_since = {} + self._msi_irq_elapsed = {} + + def affine_pci_dev_irqs(self, inst, wait_for_irqs=True): + """Affine PCI device irqs to VM's pcpus.""" + + def _wait_for_msi_irqs(self, inst): + """Check if each pci device has the expected number of msi irqs.""" + _prev = self._msi_irq_count.copy() + addrs = set() + + for pci_dev in inst.pci_devices: + addr = pci_dev.address + addrs.update([addr]) + try: + irqs, msi_irqs = pci_utils.get_irqs_by_pci_address(addr) + except Exception as e: + msi_irqs = set() + LOG.error('_wait_for_msi_irqs: pci_addr=%(A)s, error=%(E)s' % + {'A': addr, 'E': e}) + self._msi_irq_count[addr] = len(msi_irqs) + self._msi_irq_elapsed[addr] += \ + CONF.msi_irq_check_interval + if _prev[addr] == self._msi_irq_count[addr]: + self._msi_irq_since[addr] += \ + CONF.msi_irq_check_interval + else: + self._msi_irq_since[addr] = 0 + + # Done when msi irq counts have not changed for some time + if all((self._msi_irq_count[k] > 0) and + (self._msi_irq_since[k] >= CONF.msi_irq_since) + for k in addrs): + raise loopingcall.LoopingCallDone() + + # Abort due to timeout + if all(self._msi_irq_elapsed[k] >= CONF.msi_irq_timeout + for k in addrs): + msg = ("reached %(timeout)s seconds timeout, waiting for " + "msi irqs of pci_addrs: %(addrs)s") % { + 'timeout': CONF.msi_irq_timeout, + 'addrs': list(addrs)} + LOG.warning(msg) + raise loopingcall.LoopingCallDone() + + # Determine how many msi irqs we expect to be configured. + if len(inst.get_pci_devices()) == 0: + return + + # Initialize msi irq tracking. + for pci_dev in inst.pci_devices: + if wait_for_irqs or (pci_dev.address not in self._msi_irq_count): + self._msi_irq_count[pci_dev.address] = 0 + self._msi_irq_since[pci_dev.address] = 0 + self._msi_irq_elapsed[pci_dev.address] = 0 + + # Wait for msi irqs to be configured. + if wait_for_irqs: + timer = loopingcall.FixedIntervalLoopingCall( + _wait_for_msi_irqs, self, inst) + timer.start(interval=CONF.msi_irq_check_interval).wait() + + @synchronized(inst.uuid) + def do_affine_pci_dev_instance(refresh_need): + """Set pci device irq affinity for this instance.""" + + _irqs = set() + _msi_irqs = set() + # refresh instance info. + if refresh_need: + _inst = novaClient.get_instance(inst.uuid) + if _inst is None: + return + + numa_topology = _inst.get_numa_topology() + extra_spec = _inst.get_extra_spec() + for pci_dev in _inst.pci_devices: + try: + irqs, msi_irqs, pci_numa_node, pci_cpulist = \ + pci_utils.set_irqs_affinity_by_pci_address( + pci_dev.address, extra_spec, numa_topology) + except Exception as e: + irqs = set() + msi_irqs = set() + pci_numa_node = None + pci_cpulist = '' + LOG.error("Could not affine irqs for pci_addr:%(A)s, " + "error: %(E)s" % {"A": pci_dev.address, "E": e}) + + # Log irqs affined when there is a change in the counts. + msi_irq_count = len(msi_irqs) + if ((msi_irq_count != self._msi_irq_count[pci_dev.address]) or + wait_for_irqs): + self._msi_irq_count[pci_dev.address] = msi_irq_count + LOG.info(("Instance=%(U)s: IRQs affined for pci_addr=%(A)s, " + "dev_id=%(D)s, dev_type=%(T)s, " + "vendor_id=%(V)s, product_id=%(P)s, " + "irqs=%(I)s, msi_irqs=%(M)s, " + "numa_node=%(N)s, cpulist=%(C)s") + % {'U': inst.uuid, + 'A': pci_dev.address, + 'D': pci_dev.dev_id, + 'T': pci_dev.dev_type, + 'V': pci_dev.vendor_id, + 'P': pci_dev.product_id, + 'I': ', '.join(map(str, irqs)), + 'M': ', '.join(map(str, msi_irqs)), + 'N': pci_numa_node, 'C': pci_cpulist}) + _irqs.update(irqs) + _msi_irqs.update(msi_irqs) + return (_irqs, _msi_irqs, pci_cpulist) + return do_affine_pci_dev_instance(wait_for_irqs) + diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/guest.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/guest.py new file mode 100644 index 000000000..ff8eac0fd --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/guest.py @@ -0,0 +1,265 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Encapsulate libvirt related interfaces""" + +import libvirt +import os +import sys +import signal +from xml.dom import minidom +from xml.etree import ElementTree +from log import LOG + +debug = 0 +# libvirt timeout parameters +LIBVIRT_TIMEOUT_SEC = 5.0 +total_cpus = 0 + + +def range_to_list(csv_range=None): + """Convert a string of comma separate ranges into an expanded list of integers. + + E.g., '1-3,8-9,15' is converted to [1,2,3,8,9,15] + """ + if not csv_range: + return [] + xranges = [(lambda L: range(L[0], L[-1] + 1))(map(int, r.split('-'))) + for r in csv_range.split(',')] + return [y for x in xranges for y in x] + + +def _translate_virDomainState(state): + """Return human readable virtual domain state string.""" + states = {} + states[0] = 'NOSTATE' + states[1] = 'Running' + states[2] = 'Blocked' + states[3] = 'Paused' + states[4] = 'Shutdown' + states[5] = 'Shutoff' + states[6] = 'Crashed' + states[7] = 'pmSuspended' + states[8] = 'Last' + return states[state] + + +def _mask_to_cpulist(mask=0): + """Create cpulist from mask, list in socket-core-thread enumerated order. + + :param extended: extended info + :param mask: cpuset mask + :returns cpulist: list of cpus in socket-core-thread enumerated order + """ + cpulist = [] + if mask is None or mask <= 0: + return cpulist + + # Assume max number of cpus for now... + max_cpus = 1024 + for cpu in range(max_cpus): + if ((1 << cpu) & mask): + cpulist.append(cpu) + return cpulist + + +class suppress_stdout_stderr(object): + """A context manager for doing a "deep suppression" of stdout and stderr in Python + + i.e. will suppress all print, even if the print originates in a compiled C/Fortran + sub-function. + This will not suppress raised exceptions, since exceptions are printed + to stderr just before a script exits, and after the context manager has + exited (at least, I think that is why it lets exceptions through). + """ + def __init__(self): + # Open a pair of null files + self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)] + # Save the actual stdout (1) and stderr (2) file descriptors. + self.save_fds = (os.dup(1), os.dup(2)) + + def __enter__(self): + # Assign the null pointers to stdout and stderr. + os.dup2(self.null_fds[0], 1) + os.dup2(self.null_fds[1], 2) + + def __exit__(self, *_): + # Re-assign the real stdout/stderr back to (1) and (2) + os.dup2(self.save_fds[0], 1) + os.dup2(self.save_fds[1], 2) + # Close the null files + os.close(self.null_fds[0]) + os.close(self.null_fds[1]) + + +class TimeoutError(Exception): + pass + + +def timeout_handler(signum, frame): + raise TimeoutError('timeout') + + +def connect_to_libvirt(): + """Connect to local libvirt.""" + duri = "qemu:///system" + try: + signal.signal(signal.SIGALRM, timeout_handler) + signal.setitimer(signal.ITIMER_REAL, LIBVIRT_TIMEOUT_SEC) + with suppress_stdout_stderr(): + conn = libvirt.openReadOnly(duri) + signal.alarm(0) + except TimeoutError: + conn = None + raise + except Exception as e: + conn = None + raise + finally: + signal.alarm(0) + return conn + + +def get_host_cpu_topology(): + """Enumerate logical cpu topology using socket_id, core_id, thread_id. + + This generates the following dictionary: + topology[socket_id][core_id][thread_id] = cpu_id + """ + global total_cpus + + # Connect to local libvirt hypervisor + conn = connect_to_libvirt() + # Get host capabilities + caps_str = conn.getCapabilities() + doc = ElementTree.fromstring(caps_str) + caps = minidom.parseString(caps_str) + caps_host = caps.getElementsByTagName('host')[0] + caps_cells = caps_host.getElementsByTagName('cells')[0] + total_cpus = caps_cells.getElementsByTagName('cpu').length + + Thread_cnt = {} + topology = {} + cells = doc.findall('./host/topology/cells/cell') + for cell in cells: + for cpu in cell.findall('./cpus/cpu'): + # obtain core_id, cpu_id, and socket_id; ignore 'siblings' since + # that can be inferred by enumeration of thread_id. + core_id = int(cpu.get('core_id')) + cpu_id = int(cpu.get('id')) + socket_id = int(cpu.get('socket_id')) + + # thread_id's are enumerated assuming cpu_id is already sorted + if socket_id not in Thread_cnt: + Thread_cnt[socket_id] = {} + if core_id not in Thread_cnt[socket_id]: + Thread_cnt[socket_id][core_id] = 0 + else: + Thread_cnt[socket_id][core_id] += 1 + thread_id = Thread_cnt[socket_id][core_id] + + # save topology[socket_id][core_id][thread_id] + if socket_id not in topology: + topology[socket_id] = {} + if core_id not in topology[socket_id]: + topology[socket_id][core_id] = {} + topology[socket_id][core_id][thread_id] = cpu_id + conn.close() + return topology + + +def get_guest_domain_info(dom): + """Obtain cpulist of pcpus in the order of vcpus. + + This applies to either pinned or floating vcpus, Note that the cpuinfo + pcpu value can be stale if we scale down cpus since it reports cpu-last-run. + For this reason use cpumap = d_vcpus[1][vcpu], instead of cpuinfo + (i.e., vcpu, state, cpuTime, pcpu = d_vcpus[0][vcpu]). + """ + uuid = dom.UUIDString() + d_state, d_maxMem_KiB, d_memory_KiB, \ + d_nrVirtCpu, d_cpuTime = dom.info() + try: + with suppress_stdout_stderr(): + d_vcpus = dom.vcpus() + except Exception as e: + d_vcpus = tuple([d_nrVirtCpu * [], + d_nrVirtCpu * [tuple(total_cpus * [False])]]) + + cpulist_p = [] + cpulist_d = {} + cpuset_total = 0 + up_total = 0 + for vcpu in range(d_nrVirtCpu): + cpuset_b = d_vcpus[1][vcpu] + cpuset = 0 + for cpu, up in enumerate(cpuset_b): + if up: + cpulist_d[vcpu] = cpu + aff = 1 << cpu + cpuset |= aff + up_total += 1 + cpuset_total |= cpuset + cpulist_f = _mask_to_cpulist(mask=cpuset_total) + for key in sorted(cpulist_d.keys()): + cpulist_p.append(cpulist_d[key]) + + # Determine if floating or pinned, display appropriate cpulist + if up_total > d_nrVirtCpu: + d_cpulist = cpulist_f + cpu_pinned = False + else: + d_cpulist = cpulist_p + cpu_pinned = True + + # Determine list of numa nodes (the hard way) + dom_xml = ElementTree.fromstring(dom.XMLDesc(0)) + nodeset = set([]) + for elem in dom_xml.findall('./numatune/memnode'): + nodes = range_to_list(elem.get('nodeset')) + nodeset.update(nodes) + d_nodelist = list(sorted(nodeset)) + + # Get pci info. + pci_addrs = set() + for interface in dom_xml.findall('./devices/interface'): + if interface.find('driver').get('name').startswith('vfio'): + addr_tag = interface.find('source/address') + if addr_tag.get('type') == 'pci': + pci_addr = "%04x:%02x:%02x.%01x" % ( + addr_tag.get('domain'), + addr_tag.get('bus'), + addr_tag.get('slot'), + addr_tag.get('function')) + pci_addrs.update([pci_addr]) + + # Update dictionary with per-domain information + domain = { + 'uuid': uuid, + 'state': _translate_virDomainState(d_state), + 'IsCpuPinned': cpu_pinned, + 'nr_vcpus': d_nrVirtCpu, + 'nodelist': d_nodelist, + 'cpulist': d_cpulist, + 'cpu_pinning': cpulist_d, + 'pci_addrs': pci_addrs + } + return domain + + +def get_guest_domain_by_uuid(conn, uuid): + try: + dom = conn.lookupByUUIDString(uuid) + except Exception as e: + LOG.warning("Failed to get domain for uuid=%s! error=%s" % (uuid, e)) + return None + domain = get_guest_domain_info(dom) + return domain diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/instance.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/instance.py new file mode 100644 index 000000000..c4a546211 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/instance.py @@ -0,0 +1,82 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define instance related class""" + +from log import LOG + + +class numa_cell: + def __init__(self, id, cpuset, cpu_pinning): + self.id = id + self.cpuset = cpuset + self.cpu_pinning = cpu_pinning + + +class numa_topology: + def __init__(self, uuid, cells): + self.instance_uuid = uuid + self.cells = cells + + def vcpu_to_pcpu(self, vcpu): + for cell in self.cells: + if vcpu in cell.cpu_pinning.keys(): + return cell, cell.cpu_pinning[vcpu] + raise KeyError('Unable to find pCPU for vCPU %d' % vcpu) + + +class pci_device: + def __init__(self, addr): + self.address = addr + self.dev_id = "" + self.dev_type = "" + self.vendor_id = "" + self.product_id = "" + + +class instance: + def __init__(self, uuid, name, extra_spec): + self.uuid = uuid + self.name = name + self.extra_spec = extra_spec + self.pci_devices = set() + self.numa_topology = None + self.cpu_policy = 'shared' + + def update(self, domain): + cells = set() + for node_id in domain['nodelist']: + cell = numa_cell(node_id, range(domain['nr_vcpus']), domain['cpu_pinning']) + LOG.debug("cell_id=%s, vcpuset=%s, cpu_pinning=%s" + % (node_id, range(domain['nr_vcpus']), domain['cpu_pinning'])) + cells.update([cell]) + + self.numa_topology = numa_topology(self.uuid, cells) + if domain['IsCpuPinned']: + self.cpu_policy = 'dedicated' + else: + self.cpu_policy = 'shared' + + for pci_addr in domain['pci_addrs']: + pci_dev = pci_device(pci_addr) + self.pci_devices.update([pci_dev]) + + def get_cpu_policy(self): + return self.cpu_policy + + def get_numa_topology(self): + return self.numa_topology + + def get_extra_spec(self): + return self.extra_spec + + def get_pci_devices(self): + return self.pci_devices diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/log.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/log.py new file mode 100644 index 000000000..e290f12fc --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/log.py @@ -0,0 +1,28 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define Logger class for this agent""" + +import logging +import logging.handlers + +_syslog_facility = 'local1' + + +LOG = logging.getLogger("pci-interrupt-affinity") +formatter = logging.Formatter("%(asctime)s %(threadName)s[%(process)d] " + "%(name)s.%(pathname)s.%(lineno)d - %(levelname)s " + "%(message)s") +handler = logging.handlers.SysLogHandler(address='/dev/log', + facility=_syslog_facility) +handler.setFormatter(formatter) +LOG.addHandler(handler) +LOG.setLevel(logging.INFO) diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/nova_provider.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/nova_provider.py new file mode 100644 index 000000000..51de754df --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/nova_provider.py @@ -0,0 +1,139 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define NovaProvider class +This class wraps novaclient access interface and expose get_instance() and +get_instances() to other agent classes. +""" + +import keyring +from novaclient import client +from keystoneauth1 import loading +from keystoneauth1 import session +import socket +from log import LOG +from config import CONF +from config import sysconfig +import instance +import guest + + +class NovaProvider: + + def __init__(self): + self._creds = self._get_keystone_creds() + self._auth = self._get_auth(self._creds) + self._hostname = self.get_hostname() + self._conn = None + + def get_hostname(self): + return socket.gethostname() + + def _get_keystone_creds(self): + creds = {} + openstackSession = 'openstack' + options = ['username', 'user_domain_name', 'project_name', + 'project_domain_name', 'keyring_service', 'auth_url'] + + try: + for option in options: + creds[option] = sysconfig.get(openstackSession, option) + + creds['password'] = keyring.get_password(creds.pop('keyring_service'), + creds['username']) + + except Exception as e: + LOG.error("Could not get keystone creds configuration! Err=%s" % e) + creds = None + + return creds + + def _get_auth(self, creds): + + if creds is not None: + loader = loading.get_plugin_loader('password') + auth = loader.load_from_options(**creds) + return auth + return None + + def get_nova(self): + try: + sess = session.Session(auth=self._auth) + nova = client.Client('2.1', session=sess) + return nova + except Exception as e: + LOG.warning("Failed to connect to nova!") + raise Exception("could not connect nova!") + + def open_libvirt_connect(self): + self._conn = guest.connect_to_libvirt() + guest.get_host_cpu_topology() + + def close_libvirt_connect(self): + self._conn.close() + + def get_instance(self, uuid): + try: + nova = self.get_nova() + server = nova.servers.get(uuid) + flavor_info = nova.flavors.get(server.flavor["id"]) + hostname = server.__dict__['OS-EXT-SRV-ATTR:host'] + except Exception as e: + LOG.warning("Could not get instance=%s from Nova! error=%s" % (uuid, e)) + return None + + LOG.debug('GET VM:%s in node:%s' % (server.name, hostname)) + + if hostname == self._hostname: + inst = instance.instance(uuid, server.name, flavor_info.get_keys()) + # get numa topology and pci info from libvirt + try: + domain = guest.get_guest_domain_by_uuid(self._conn, uuid) + if domain: + inst.update(domain) + except Exception as e: + LOG.warning("Failed to access libvirt! error=%s" % e) + return inst + else: + LOG.debug('The VM is not in current host!') + return None + + def get_instances(self, filters): + instances = set() + try: + nova = self.get_nova() + filters['host'] = self._hostname + servers = nova.servers.list(detailed=True, search_opts=filters) + flavors = nova.flavors.list() + + for server in servers: + for flavor in flavors: + if flavor.id == server.flavor["id"]: + extra_spec = flavor.get_keys() + if 'hw:cpu_policy' in extra_spec \ + and extra_spec['hw:cpu_policy'] == 'dedicated': + inst = instance.instance(server.id, server.name, extra_spec) + instances.update([inst]) + # get numa topology and pci info from libvirt + if len(instances) > 0: + for inst in instances: + domain = guest.get_guest_domain_by_uuid(self._conn, inst.uuid) + inst.update(domain) + except Exception as e: + LOG.warning("Failed to get instances info! error=%s" % e) + + return instances + + +if sysconfig.get('openstack', 'openstack_enabled') == 'true': + novaClient = NovaProvider() +else: + novaClient = None diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/utils.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/utils.py new file mode 100644 index 000000000..397cb7f75 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/pci_irq_affinity/utils.py @@ -0,0 +1,291 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 + +# All Rights Reserved. +# + +""" Define utility functions for this agent""" + +import os +import errno +from itertools import groupby + +from log import LOG +import instance + + +def list_to_range(input_list=None): + """Convert a list into a string of comma separate ranges. + + E.g., [1,2,3,8,9,15] is converted to '1-3,8-9,15' + """ + if input_list is None: + return '' + if len(input_list) < 3: + return ','.join(str(x) for x in input_list) + else: + G = (list(x) for _, x in groupby(enumerate(input_list), + lambda i, x: i - x)) + return ','.join( + '-'.join(map(str, (g[0][1], g[-1][1])[:len(g)])) for g in G) + + +def parse_cpu_spec(spec): + """Parse a CPU set specification. + + Each element in the list is either a single CPU number, a range of + CPU numbers, or a caret followed by a CPU number to be excluded + from a previous range. + + :param spec: cpu set string eg "1-4,^3,6" + + :returns: a set of CPU indexes + """ + cpuset_ids = set() + cpuset_reject_ids = set() + for rule in spec.split(','): + rule = rule.strip() + # Handle multi ',' + if len(rule) < 1: + continue + # Note the count limit in the .split() call + range_parts = rule.split('-', 1) + if len(range_parts) > 1: + reject = False + if range_parts[0] and range_parts[0][0] == '^': + reject = True + range_parts[0] = str(range_parts[0][1:]) + + # So, this was a range; start by converting the parts to ints + try: + start, end = [int(p.strip()) for p in range_parts] + except ValueError: + raise Exception("Invalid range expression %r" % rule) + # Make sure it's a valid range + if start > end: + raise Exception("Invalid range expression %r" % rule) + # Add available CPU ids to set + if not reject: + cpuset_ids |= set(range(start, end + 1)) + else: + cpuset_reject_ids |= set(range(start, end + 1)) + elif rule[0] == '^': + # Not a range, the rule is an exclusion rule; convert to int + try: + cpuset_reject_ids.add(int(rule[1:].strip())) + except ValueError: + raise Exception("Invalid exclusion expression %r" % rule) + else: + # OK, a single CPU to include; convert to int + try: + cpuset_ids.add(int(rule)) + except ValueError: + raise Exception("Invalid inclusion expression %r" % rule) + + # Use sets to handle the exclusion rules for us + cpuset_ids -= cpuset_reject_ids + + return cpuset_ids + + +def _get_pci_irq_affinity_mask(extra_spec): + """Parse pci irq affinity mask based on flavor extra-spec. + + Returns set of vcpu ids with corresponding pci irq affinity mask. + """ + + if 'hw:pci_irq_affinity_mask' in extra_spec: + pci_irq_affinity_mask = extra_spec['hw:pci_irq_affinity_mask'] + LOG.info("pci_irq_affinity_mask: %s" % pci_irq_affinity_mask) + else: + LOG.info('Not set pci_irq_affinity_mask!') + return None + + cpuset_ids = parse_cpu_spec(pci_irq_affinity_mask) + if not cpuset_ids: + raise Exception("No CPUs available after parsing %r" % pci_irq_affinity_mask) + return cpuset_ids + + +def get_irqs_by_pci_address(pci_addr): + """Get list of PCI IRQs based on a VF's pci address + + Raises PciDeviceNotFoundById in case the pci device is not found, + or when there is an underlying problem getting associated irqs. + :param pci_addr: PCI address + :return: irqs, msi_irqs + """ + irqs = set() + msi_irqs = set() + + dev_path = "/sys/bus/pci/devices/%s" % (pci_addr) + if not os.path.isdir(dev_path): + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + + _irqs = set() + irq_path = "%s/irq" % (dev_path) + try: + with open(irq_path) as f: + _irqs.update([int(x) for x in f.readline().split() if int(x) > 0]) + except Exception as e: + LOG.error('get_irqs_by_pci_address: ' + 'pci_addr=%(A)s: irq_path=%(P)s; error=%(E)s', + {'A': pci_addr, 'P': irq_path, 'E': e}) + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + + _msi_irqs = set() + msi_path = "%s/msi_irqs" % (dev_path) + try: + _msi_irqs.update([int(x) for x in os.listdir(msi_path) if int(x) > 0]) + except OSError as e: + # msi_path disappears during configuration; do not treat + # non-existance as fatal + if e.errno == errno.ENOENT: + return (irqs, msi_irqs) + else: + LOG.error('get_irqs_by_pci_address: ' + 'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', + {'A': pci_addr, 'P': msi_path, 'E': e}) + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + except Exception as e: + LOG.error('get_irqs_by_pci_address: ' + 'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', + {'A': pci_addr, 'P': msi_path, 'E': e}) + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + + # Return only configured irqs, ignore any that are missing. + for irq in _irqs: + irq_path = "/proc/irq/%s" % (irq) + if os.path.isdir(irq_path): + irqs.update([irq]) + for irq in _msi_irqs: + irq_path = "/proc/irq/%s" % (irq) + if os.path.isdir(irq_path): + msi_irqs.update([irq]) + return (irqs, msi_irqs) + + +def get_pci_irqs_pinned_cpuset(extra_spec=None, numa_topology=None, + pci_numa_node=None): + """Get pinned cpuset where pci irq are affined. + + :param extra_spec: extra_spec + :param pci_numa_node: numa node of a specific PCI device + :param numa_topology: instance numa topology + :return: cpuset, cpulist + """ + cpuset = set() + cpulist = '' + + LOG.debug("extra_spec:%s, topo:%s, numa_node:%s" % (extra_spec, numa_topology, pci_numa_node)) + if numa_topology is None or pci_numa_node is None or pci_numa_node < 0: + return (cpuset, cpulist) + + # Determine full affinity cpuset, but restrict to pci's numa node + for cell in numa_topology.cells: + if cell.id == pci_numa_node and cell.cpu_pinning is not None: + cpuset.update(set(cell.cpu_pinning.values())) + LOG.info("pinning pcpu list:%s" % cpuset) + + # Use extra-spec hw:pci_irq_affinity_mask only when the instance is pinned. + if cpuset: + pci_cpuset = _get_pci_irq_affinity_mask(extra_spec) + if pci_cpuset: + cpuset = set() + for cell in numa_topology.cells: + if cell.cpu_pinning is not None: + for vcpu in cell.cpuset: + if vcpu in pci_cpuset: + vcpu_cell, pcpu = numa_topology.vcpu_to_pcpu(vcpu) + cpuset.update(set([pcpu])) + + cpulist = list_to_range(input_list=list(cpuset)) + return (cpuset, cpulist) + + +def set_irq_affinity(set_bitmap, irqs, cpulist): + """Set irq affinity to the specified cpulist for list of irqs. + + :param set_bitmap: True: set bitmap file, False: set list file + :param irqs: irq list + :param cpulist: cpu list + """ + _irqs = set() + + if set_bitmap: + filename = 'smp_affinity' + else: + filename = 'smp_affinity_list' + + for irq in irqs: + irq_aff_path = "/proc/irq/%s/%s" % (irq, filename) + try: + with open(irq_aff_path, 'w') as f: + f.write(cpulist) + _irqs.update([irq]) + except Exception as e: + LOG.warning("Failed to write pci affine file:%(F)s, irq:%(I)s, " + "error=%(E)s" + % {"F": filename, "I": irq, "E": e}) + return _irqs + + +def set_irqs_affinity_by_pci_address(pci_addr, extra_spec=None, + numa_topology=None): + """Set cpu affinity for list of PCI IRQs with a VF's pci address, + + Restrict cpuset to the numa node of the PCI. + Return list + Raises PciDeviceNotFoundById in case the pci device is not found, + or when there is an underlying problem getting associated irqs. + :param pci_addr: PCI address + :param extra_spec: extra_spec + :param numa_topology: instance numa topology + :return: irqs, msi_irqs, numa_node, cpulist + """ + irqs = set() + msi_irqs = set() + numa_node = None + cpulist = '' + + if numa_topology is None: + return (irqs, msi_irqs, numa_node, cpulist) + + # Get the irqs associated with pci addr + _irqs, _msi_irqs = get_irqs_by_pci_address(pci_addr) + LOG.debug("pci: %s, irqs: %s, msi_irqs: %s" % (pci_addr, _irqs, _msi_irqs)) + + # Obtain physical numa_node for this pci addr + numa_path = "/sys/bus/pci/devices/%s/numa_node" % (pci_addr) + try: + with open(numa_path) as f: + numa_node = [int(x) for x in f.readline().split()][0] + except Exception as e: + LOG.error('set_irqs_affinity_by_pci_address: ' + 'pci_addr=%(A)s: numa_path=%(P)s; error=%(E)s', + {'A': pci_addr, 'P': numa_path, 'E': e}) + raise Exception("PciDeviceNotFoundById id = %r" % pci_addr) + # Skip irq configuration if there is no associated numa node + if numa_node is None or numa_node < 0: + return (irqs, msi_irqs, numa_node, cpulist) + + # Determine the pinned cpuset where irqs are to be affined + cpuset, cpulist = get_pci_irqs_pinned_cpuset(extra_spec, + numa_topology, + numa_node) + + LOG.debug("cpuset where irqs are to be affined:%s or %s" % (cpuset, cpulist)) + + # Skip irq configuration if there are no pinned cpus + if not cpuset: + return (irqs, msi_irqs, numa_node, cpulist) + + # Set IRQ affinity, but do not treat errors as fatal. + irqs = set_irq_affinity(False, _irqs, cpulist) + msi_irqs = set_irq_affinity(False, _msi_irqs, cpulist) + return (irqs, msi_irqs, numa_node, cpulist) diff --git a/utilities/pci-irq-affinity-agent/pci_irq_affinity/setup.py b/utilities/pci-irq-affinity-agent/pci_irq_affinity/setup.py new file mode 100644 index 000000000..6e6806641 --- /dev/null +++ b/utilities/pci-irq-affinity-agent/pci_irq_affinity/setup.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2019 StarlingX. +# +# SPDX-License-Identifier: Apache-2.0 +# +# flake8: noqa +# +from setuptools import setup, find_packages + +setup( + name='pci-irq-affinity-agent', + description='PCI Interrupt Affinity Agent', + version='1.0.0', + classifiers=[ + 'Environment :: OpenStack', + 'Intended Audience :: Information Technology', + 'Intended Audience :: System Administrators', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 2.6', + ], + license='Apache-2.0', + platforms=['any'], + provides='pci_irq_affinity_agent', + packages=find_packages(), + include_package_data=False, + entry_points={ + 'console_scripts': [ + 'pci-irq-affinity-agent = pci_irq_affinity.agent:process_main', + ], + } +)