config/sysinv/sysinv/sysinv/sysinv/agent/node.py

602 lines
22 KiB
Python

#
# Copyright (c) 2013-2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# All Rights Reserved.
#
""" inventory numa node Utilities and helper functions."""
import os
from os import listdir
from os.path import isfile, join
import re
import subprocess
from sysinv.openstack.common import log as logging
import tsconfig.tsconfig as tsc
LOG = logging.getLogger(__name__)
# Defines per-socket vswitch memory requirements (in MB)
VSWITCH_MEMORY_MB = 1024
# Defines the size of one kilobyte
SIZE_KB = 1024
# Defines the size of 2 megabytes in kilobyte units
SIZE_2M_KB = 2048
# Defines the size of 1 gigabyte in kilobyte units
SIZE_1G_KB = 1048576
# Defines the size of 2 megabytes in megabyte units
SIZE_2M_MB = int(SIZE_2M_KB / SIZE_KB)
# Defines the size of 1 gigabyte in megabyte units
SIZE_1G_MB = int(SIZE_1G_KB / SIZE_KB)
# Defines the minimum size of memory for a controller node in megabyte units
CONTROLLER_MIN_MB = 6000
# Defines the minimum size of memory for a worker node in megabyte units
COMPUTE_MIN_MB = 1600
# Defines the minimum size of memory for a secondary worker node in megabyte
# units
COMPUTE_MIN_NON_0_MB = 500
class CPU:
'''Class to encapsulate CPU data for System Inventory'''
def __init__(self, cpu, numa_node, core, thread,
cpu_family=None, cpu_model=None, revision=None):
'''Construct a Icpu object with the given values.'''
self.cpu = cpu
self.numa_node = numa_node
self.core = core
self.thread = thread
self.cpu_family = cpu_family
self.cpu_model = cpu_model
self.revision = revision
# self.allocated_functions = mgmt (usu. 0), vswitch
def __eq__(self, rhs):
return (self.cpu == rhs.cpu and
self.numa_node == rhs.numa_node and
self.core == rhs.core and
self.thread == rhs.thread)
def __ne__(self, rhs):
return (self.cpu != rhs.cpu or
self.numa_node != rhs.numa_node or
self.core != rhs.core or
self.thread != rhs.thread)
def __str__(self):
return "%s [%s] [%s] [%s]" % (self.cpu, self.numa_node,
self.core, self.thread)
def __repr__(self):
return "<CPU '%s'>" % str(self)
class NodeOperator(object):
'''Class to encapsulate CPU operations for System Inventory'''
def __init__(self):
self.num_cpus = 0
self.num_nodes = 0
self.float_cpuset = 0
self.total_memory_mb = 0
self.free_memory_mb = 0
self.total_memory_nodes_mb = []
self.free_memory_nodes_mb = []
self.topology = {}
# self._get_cpu_topology()
# self._get_total_memory_mb()
# self._get_total_memory_nodes_mb()
# self._get_free_memory_mb()
# self._get_free_memory_nodes_mb()
def _is_strict(self):
with open(os.devnull, "w") as fnull:
try:
output = subprocess.check_output(
["cat", "/proc/sys/vm/overcommit_memory"],
stderr=fnull)
if int(output) == 2:
return True
except subprocess.CalledProcessError as e:
LOG.info("Failed to check for overcommit, error (%s)",
e.output)
return False
def convert_range_string_to_list(self, s):
olist = []
s = s.strip()
if s:
for part in s.split(','):
if '-' in part:
a, b = part.split('-')
a, b = int(a), int(b)
olist.extend(range(a, b + 1))
else:
a = int(part)
olist.append(a)
olist.sort()
return olist
def inodes_get_inumas_icpus(self):
'''Enumerate logical cpu topology based on parsing /proc/cpuinfo
as function of socket_id, core_id, and thread_id. This updates
topology.
:param self
:updates self.num_cpus- number of logical cpus
:updates self.num_nodes- number of sockets;maps to number of numa nodes
:updates self.topology[socket_id][core_id][thread_id] = cpu
:returns None
'''
self.num_cpus = 0
self.num_nodes = 0
self.topology = {}
thread_cnt = {}
cpu = socket_id = core_id = thread_id = -1
re_processor = re.compile(r'^[Pp]rocessor\s+:\s+(\d+)')
re_socket = re.compile(r'^physical id\s+:\s+(\d+)')
re_core = re.compile(r'^core id\s+:\s+(\d+)')
re_cpu_family = re.compile(r'^cpu family\s+:\s+(\d+)')
re_cpu_model = re.compile(r'^model name\s+:\s+(\w+)')
inumas = []
icpus = []
sockets = []
with open('/proc/cpuinfo', 'r') as infile:
icpu_attrs = {}
for line in infile:
match = re_processor.search(line)
if match:
cpu = int(match.group(1))
socket_id = -1
core_id = -1
thread_id = -1
self.num_cpus += 1
continue
match = re_cpu_family.search(line)
if match:
name_value = [s.strip() for s in line.split(':', 1)]
name, value = name_value
icpu_attrs.update({'cpu_family': value})
continue
match = re_cpu_model.search(line)
if match:
name_value = [s.strip() for s in line.split(':', 1)]
name, value = name_value
icpu_attrs.update({'cpu_model': value})
continue
match = re_socket.search(line)
if match:
socket_id = int(match.group(1))
if socket_id not in sockets:
sockets.append(socket_id)
attrs = {
'numa_node': socket_id,
'capabilities': {},
}
inumas.append(attrs)
continue
match = re_core.search(line)
if match:
core_id = int(match.group(1))
if socket_id not in thread_cnt:
thread_cnt[socket_id] = {}
if core_id not in thread_cnt[socket_id]:
thread_cnt[socket_id][core_id] = 0
else:
thread_cnt[socket_id][core_id] += 1
thread_id = thread_cnt[socket_id][core_id]
if socket_id not in self.topology:
self.topology[socket_id] = {}
if core_id not in self.topology[socket_id]:
self.topology[socket_id][core_id] = {}
self.topology[socket_id][core_id][thread_id] = cpu
attrs = {
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
icpu_attrs.update(attrs)
icpus.append(icpu_attrs)
icpu_attrs = {}
continue
self.num_nodes = len(self.topology.keys())
# In the case topology not detected, hard-code structures
if self.num_nodes == 0:
n_sockets, n_cores, n_threads = (1, int(self.num_cpus), 1)
self.topology = {}
for socket_id in range(n_sockets):
self.topology[socket_id] = {}
if socket_id not in sockets:
sockets.append(socket_id)
attrs = {
'numa_node': socket_id,
'capabilities': {},
}
inumas.append(attrs)
for core_id in range(n_cores):
self.topology[socket_id][core_id] = {}
for thread_id in range(n_threads):
self.topology[socket_id][core_id][thread_id] = 0
attrs = {
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
icpus.append(attrs)
# Define Thread-Socket-Core order for logical cpu enumeration
cpu = 0
for thread_id in range(n_threads):
for core_id in range(n_cores):
for socket_id in range(n_sockets):
if socket_id not in sockets:
sockets.append(socket_id)
attrs = {
'numa_node': socket_id,
'capabilities': {},
}
inumas.append(attrs)
self.topology[socket_id][core_id][thread_id] = cpu
attrs = {
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
icpus.append(attrs)
cpu += 1
self.num_nodes = len(self.topology.keys())
LOG.debug("inumas= %s, icpus = %s" % (inumas, icpus))
return inumas, icpus
def _get_immediate_subdirs(self, dir):
return [name for name in listdir(dir)
if os.path.isdir(join(dir, name))]
def _inode_get_memory_hugepages(self):
"""Collect hugepage info, including vswitch, and vm.
Collect platform reserved if config.
:param self
:returns list of memory nodes and attributes
"""
imemory = []
initial_worker_config_completed = \
os.path.exists(tsc.INITIAL_WORKER_CONFIG_COMPLETE)
# check if it is initial report before the huge pages are allocated
initial_report = not initial_worker_config_completed
# do not send report if the initial worker config is completed and
# worker config has not finished, i.e.during subsequent
# reboot before the manifest allocates the huge pages
worker_config_completed = \
os.path.exists(tsc.VOLATILE_WORKER_CONFIG_COMPLETE)
if (initial_worker_config_completed and
not worker_config_completed):
return imemory
for node in range(self.num_nodes):
attr = {}
total_hp_mb = 0 # Total memory (MB) currently configured in HPs
free_hp_mb = 0
# Check vswitch and libvirt memory
# Loop through configured hugepage sizes of this node and record
# total number and number free
hugepages = "/sys/devices/system/node/node%d/hugepages" % node
try:
subdirs = self._get_immediate_subdirs(hugepages)
for subdir in subdirs:
hp_attr = {}
sizesplit = subdir.split('-')
if sizesplit[1].startswith("1048576kB"):
size = SIZE_1G_MB
else:
size = SIZE_2M_MB
nr_hugepages = 0
free_hugepages = 0
mydir = hugepages + '/' + subdir
files = [f for f in listdir(mydir) if isfile(join(mydir, f))]
if files:
for file in files:
with open(mydir + '/' + file, 'r') as f:
if file.startswith("nr_hugepages"):
nr_hugepages = int(f.readline())
if file.startswith("free_hugepages"):
free_hugepages = int(f.readline())
total_hp_mb = total_hp_mb + int(nr_hugepages * size)
free_hp_mb = free_hp_mb + int(free_hugepages * size)
# Libvirt hugepages can be 1G and 2M
if size == SIZE_1G_MB:
vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
hp_attr = {
'vswitch_hugepages_size_mib': size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0,
'vm_hugepages_nr_1G':
(nr_hugepages - vswitch_hugepages_nr),
'vm_hugepages_avail_1G': free_hugepages,
'vm_hugepages_use_1G': 'True'
}
else:
if len(subdirs) == 1:
# No 1G hugepage support.
vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
hp_attr = {
'vswitch_hugepages_size_mib': size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0,
}
hp_attr.update({'vm_hugepages_use_1G': 'False'})
else:
# vswitch will use 1G hugpages
vswitch_hugepages_nr = 0
hp_attr.update({
'vm_hugepages_avail_2M': free_hugepages,
'vm_hugepages_nr_2M':
(nr_hugepages - vswitch_hugepages_nr)
})
attr.update(hp_attr)
except IOError:
# silently ignore IO errors (eg. file missing)
pass
# Get the free and total memory from meminfo for this node
re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_filepages = \
re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
re_node_sreclaim = \
re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
re_node_commitlimit = \
re.compile(r'^Node\s+\d+\s+\CommitLimit:\s+(\d+)')
re_node_committed_as = \
re.compile(r'^Node\s+\d+\s+\'Committed_AS:\s+(\d+)')
free_kb = 0 # Free Memory (KB) available
total_kb = 0 # Total Memory (KB)
limit = 0 # only used in strict accounting
committed = 0 # only used in strict accounting
meminfo = "/sys/devices/system/node/node%d/meminfo" % node
try:
with open(meminfo, 'r') as infile:
for line in infile:
match = re_node_memtotal.search(line)
if match:
total_kb += int(match.group(1))
continue
match = re_node_memfree.search(line)
if match:
free_kb += int(match.group(1))
continue
match = re_node_filepages.search(line)
if match:
free_kb += int(match.group(1))
continue
match = re_node_sreclaim.search(line)
if match:
free_kb += int(match.group(1))
continue
match = re_node_commitlimit.search(line)
if match:
limit = int(match.group(1))
continue
match = re_node_committed_as.search(line)
if match:
committed = int(match.group(1))
continue
if self._is_strict():
free_kb = limit - committed
except IOError:
# silently ignore IO errors (eg. file missing)
pass
# Calculate PSS
pss_mb = 0
if node == 0:
cmd = 'cat /proc/*/smaps 2>/dev/null | awk \'/^Pss:/ ' \
'{a += $2;} END {printf "%d\\n", a/1024.0;}\''
try:
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
shell=True)
result = proc.stdout.read().strip()
pss_mb = int(result)
except subprocess.CalledProcessError as e:
LOG.error("Cannot calculate PSS (%s) (%d)", cmd,
e.returncode)
except OSError as e:
LOG.error("Failed to execute (%s) OS error (%d)", cmd,
e.errno)
# need to multiply total_mb by 1024
node_total_kb = total_hp_mb * SIZE_KB + free_kb + pss_mb * SIZE_KB
# Read base memory from worker_reserved.conf
base_mem_mb = 0
with open('/etc/platform/worker_reserved.conf', 'r') as infile:
for line in infile:
if "WORKER_BASE_RESERVED" in line:
val = line.split("=")
base_reserves = val[1].strip('\n')[1:-1]
for reserve in base_reserves.split():
reserve = reserve.split(":")
if reserve[0].strip('"') == "node%d" % node:
base_mem_mb = int(reserve[1].strip('MB'))
# On small systems, clip memory overhead to more reasonable minimal
# settings
if (total_kb / SIZE_KB - base_mem_mb) < 1000:
if node == 0:
base_mem_mb = COMPUTE_MIN_MB
if tsc.nodetype == 'controller':
base_mem_mb += CONTROLLER_MIN_MB
else:
base_mem_mb = COMPUTE_MIN_NON_0_MB
eng_kb = node_total_kb - base_mem_mb * SIZE_KB
vswitch_mem_kb = (attr.get('vswitch_hugepages_size_mib', 0) *
attr.get('vswitch_hugepages_nr', 0) * SIZE_KB)
vm_kb = (eng_kb - vswitch_mem_kb)
max_vm_pages_2mb = vm_kb / SIZE_2M_KB
max_vm_pages_1gb = vm_kb / SIZE_1G_KB
attr.update({
'vm_hugepages_possible_2M': max_vm_pages_2mb,
'vm_hugepages_possible_1G': max_vm_pages_1gb,
})
# calculate 90% 2M pages if it is initial report and the huge
# pages have not been allocated
if initial_report:
max_vm_pages_2mb = max_vm_pages_2mb * 0.9
total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB))
free_hp_mb = total_hp_mb
attr.update({
'vm_hugepages_nr_2M': max_vm_pages_2mb,
'vm_hugepages_avail_2M': max_vm_pages_2mb,
'vm_hugepages_nr_1G': 0
})
attr.update({
'numa_node': node,
'memtotal_mib': total_hp_mb,
'memavail_mib': free_hp_mb,
'hugepages_configured': 'True',
'node_memtotal_mib': node_total_kb / 1024,
})
imemory.append(attr)
return imemory
def _inode_get_memory_nonhugepages(self):
'''Collect nonhugepage info, including platform reserved if config.
:param self
:returns list of memory nodes and attributes
'''
imemory = []
self.total_memory_mb = 0
re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_filepages = re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
re_node_sreclaim = re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
for node in range(self.num_nodes):
attr = {}
total_mb = 0
free_mb = 0
meminfo = "/sys/devices/system/node/node%d/meminfo" % node
try:
with open(meminfo, 'r') as infile:
for line in infile:
match = re_node_memtotal.search(line)
if match:
total_mb += int(match.group(1))
continue
match = re_node_memfree.search(line)
if match:
free_mb += int(match.group(1))
continue
match = re_node_filepages.search(line)
if match:
free_mb += int(match.group(1))
continue
match = re_node_sreclaim.search(line)
if match:
free_mb += int(match.group(1))
continue
except IOError:
# silently ignore IO errors (eg. file missing)
pass
total_mb /= 1024
free_mb /= 1024
self.total_memory_nodes_mb.append(total_mb)
attr = {
'numa_node': node,
'memtotal_mib': total_mb,
'memavail_mib': free_mb,
'hugepages_configured': 'False',
}
imemory.append(attr)
return imemory
def inodes_get_imemory(self):
'''Collect logical memory topology
:param self
:returns list of memory nodes and attributes
'''
imemory = []
if os.path.isfile("/etc/platform/worker_reserved.conf"):
imemory = self._inode_get_memory_hugepages()
else:
imemory = self._inode_get_memory_nonhugepages()
LOG.debug("imemory= %s" % imemory)
return imemory