Merge "OVS: fix memory pool allocation for virtual environment"

This commit is contained in:
Zuul 2018-10-18 21:02:51 +00:00 committed by Gerrit Code Review
commit f7df12c09b
1 changed files with 167 additions and 168 deletions

View File

@ -17,16 +17,38 @@ from os.path import isfile, join
import re
import subprocess
from sysinv.common import utils
from sysinv.openstack.common import log as logging
import tsconfig.tsconfig as tsc
LOG = logging.getLogger(__name__)
# Defines per-socket vswitch memory requirements (in MB) for both real and
# virtual deployments
VSWITCH_REAL_MEMORY_MB = 1024
VSWITCH_VIRTUAL_MEMORY_MB = 512
# Defines per-socket vswitch memory requirements (in MB)
VSWITCH_MEMORY_MB = 1024
# Defines the size of one kilobyte
SIZE_KB = 1024
# Defines the size of 2 megabytes in kilobyte units
SIZE_2M_KB = 2048
# Defines the size of 1 gigabyte in kilobyte units
SIZE_1G_KB = 1048576
# Defines the size of 2 megabytes in megabyte units
SIZE_2M_MB = int(SIZE_2M_KB / SIZE_KB)
# Defines the size of 1 gigabyte in megabyte units
SIZE_1G_MB = int(SIZE_1G_KB / SIZE_KB)
# Defines the minimum size of memory for a controller node in megabyte units
CONTROLLER_MIN_MB = 6000
# Defines the minimum size of memory for a compute node in megabyte units
COMPUTE_MIN_MB = 1600
# Defines the minimum size of memory for a secondary compute node in megabyte
# units
COMPUTE_MIN_NON_0_MB = 500
class CPU:
@ -73,17 +95,17 @@ class NodeOperator(object):
self.num_cpus = 0
self.num_nodes = 0
self.float_cpuset = 0
self.total_memory_MiB = 0
self.free_memory_MiB = 0
self.total_memory_nodes_MiB = []
self.free_memory_nodes_MiB = []
self.total_memory_mb = 0
self.free_memory_mb = 0
self.total_memory_nodes_mb = []
self.free_memory_nodes_mb = []
self.topology = {}
# self._get_cpu_topology()
# self._get_total_memory_MiB()
# self._get_total_memory_nodes_MiB()
# self._get_free_memory_MiB()
# self._get_free_memory_nodes_MiB()
# self._get_total_memory_mb()
# self._get_total_memory_nodes_mb()
# self._get_free_memory_mb()
# self._get_free_memory_nodes_mb()
def _is_strict(self):
with open(os.devnull, "w") as fnull:
@ -128,7 +150,7 @@ class NodeOperator(object):
self.num_nodes = 0
self.topology = {}
Thread_cnt = {}
thread_cnt = {}
cpu = socket_id = core_id = thread_id = -1
re_processor = re.compile(r'^[Pp]rocessor\s+:\s+(\d+)')
re_socket = re.compile(r'^physical id\s+:\s+(\d+)')
@ -173,9 +195,9 @@ class NodeOperator(object):
if socket_id not in sockets:
sockets.append(socket_id)
attrs = {
'numa_node': socket_id,
'capabilities': {},
}
'numa_node': socket_id,
'capabilities': {},
}
inumas.append(attrs)
continue
@ -183,13 +205,13 @@ class NodeOperator(object):
if match:
core_id = int(match.group(1))
if socket_id not in Thread_cnt:
Thread_cnt[socket_id] = {}
if core_id not in Thread_cnt[socket_id]:
Thread_cnt[socket_id][core_id] = 0
if socket_id not in thread_cnt:
thread_cnt[socket_id] = {}
if core_id not in thread_cnt[socket_id]:
thread_cnt[socket_id][core_id] = 0
else:
Thread_cnt[socket_id][core_id] += 1
thread_id = Thread_cnt[socket_id][core_id]
thread_cnt[socket_id][core_id] += 1
thread_id = thread_cnt[socket_id][core_id]
if socket_id not in self.topology:
self.topology[socket_id] = {}
@ -197,12 +219,13 @@ class NodeOperator(object):
self.topology[socket_id][core_id] = {}
self.topology[socket_id][core_id][thread_id] = cpu
attrs = {'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
attrs = {
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
icpu_attrs.update(attrs)
icpus.append(icpu_attrs)
icpu_attrs = {}
@ -219,22 +242,21 @@ class NodeOperator(object):
if socket_id not in sockets:
sockets.append(socket_id)
attrs = {
'numa_node': socket_id,
'capabilities': {},
}
'numa_node': socket_id,
'capabilities': {},
}
inumas.append(attrs)
for core_id in range(n_cores):
self.topology[socket_id][core_id] = {}
for thread_id in range(n_threads):
self.topology[socket_id][core_id][thread_id] = 0
attrs = {
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
icpus.append(attrs)
# Define Thread-Socket-Core order for logical cpu enumeration
@ -245,19 +267,18 @@ class NodeOperator(object):
if socket_id not in sockets:
sockets.append(socket_id)
attrs = {
'numa_node': socket_id,
'capabilities': {},
}
'numa_node': socket_id,
'capabilities': {},
}
inumas.append(attrs)
self.topology[socket_id][core_id][thread_id] = cpu
attrs = {
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
'cpu': cpu,
'numa_node': socket_id,
'core': core_id,
'thread': thread_id,
'capabilities': {},
}
icpus.append(attrs)
cpu += 1
self.num_nodes = len(self.topology.keys())
@ -270,26 +291,6 @@ class NodeOperator(object):
return [name for name in listdir(dir)
if os.path.isdir(join(dir, name))]
def _set_default_vswitch_hugesize(self):
"""
Set the default memory size for vswitch hugepages when it must fallback
to 2MB pages because there are no 1GB pages. In a virtual environment
we set a smaller amount of memory because vswitch is configured to use
a smaller mbuf pool. In non-virtual environments we use the same
amount of memory as we would if 1GB pages were available.
"""
hugepage_size = 2
if utils.is_virtual():
vswitch_hugepages_nr = VSWITCH_VIRTUAL_MEMORY_MB / hugepage_size
else:
vswitch_hugepages_nr = VSWITCH_REAL_MEMORY_MB / hugepage_size
# Create a new set of dict attributes
hp_attr = {'vswitch_hugepages_size_mib': hugepage_size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0}
return hp_attr
def _inode_get_memory_hugepages(self):
"""Collect hugepage info, including vswitch, and vm.
Collect platform reserved if config.
@ -298,12 +299,6 @@ class NodeOperator(object):
"""
imemory = []
Ki = 1024
SZ_2M_Ki = 2048
SZ_1G_Ki = 1048576
controller_min_MB = 6000
compute_min_MB = 1600
compute_min_non0_MB = 500
initial_compute_config_completed = \
os.path.exists(tsc.INITIAL_COMPUTE_CONFIG_COMPLETE)
@ -322,8 +317,8 @@ class NodeOperator(object):
for node in range(self.num_nodes):
attr = {}
Total_HP_MiB = 0 # Total memory (MiB) currently configured in HPs
Free_HP_MiB = 0
total_hp_mb = 0 # Total memory (MB) currently configured in HPs
free_hp_mb = 0
# Check vswitch and libvirt memory
# Loop through configured hugepage sizes of this node and record
@ -336,13 +331,10 @@ class NodeOperator(object):
for subdir in subdirs:
hp_attr = {}
sizesplit = subdir.split('-')
# role via size; also from /etc/nova/compute_reserved.conf
if sizesplit[1].startswith("1048576kB"):
hugepages_role = "vswitch"
size = int(SZ_1G_Ki / Ki)
size = SIZE_1G_MB
else:
hugepages_role = "vm"
size = int(SZ_2M_Ki / Ki)
size = SIZE_2M_MB
nr_hugepages = 0
free_hugepages = 0
@ -358,33 +350,40 @@ class NodeOperator(object):
if file.startswith("free_hugepages"):
free_hugepages = int(f.readline())
Total_HP_MiB = Total_HP_MiB + int(nr_hugepages * size)
Free_HP_MiB = Free_HP_MiB + int(free_hugepages * size)
total_hp_mb = total_hp_mb + int(nr_hugepages * size)
free_hp_mb = free_hp_mb + int(free_hugepages * size)
# Libvirt hugepages can now be 1G and 2M, can't only look
# at 2M pages
if hugepages_role == "vswitch":
vswitch_hugepages_nr = VSWITCH_REAL_MEMORY_MB / size
# Libvirt hugepages can be 1G and 2M
if size == SIZE_1G_MB:
vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
hp_attr = {
'vswitch_hugepages_size_mib': size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0,
'vm_hugepages_nr_1G':
(nr_hugepages - vswitch_hugepages_nr),
'vm_hugepages_avail_1G': free_hugepages,
'vm_hugepages_use_1G': 'True'
}
'vswitch_hugepages_size_mib': size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0,
'vm_hugepages_nr_1G':
(nr_hugepages - vswitch_hugepages_nr),
'vm_hugepages_avail_1G': free_hugepages,
'vm_hugepages_use_1G': 'True'
}
else:
if len(subdirs) == 1:
hp_attr = self._set_default_vswitch_hugesize()
# No 1G hugepage support.
vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
hp_attr = {
'vswitch_hugepages_size_mib': size,
'vswitch_hugepages_nr': vswitch_hugepages_nr,
'vswitch_hugepages_avail': 0,
}
hp_attr.update({'vm_hugepages_use_1G': 'False'})
else:
# vswitch will use 1G hugpages
vswitch_hugepages_nr = 0
vswitch_hugepages_nr = hp_attr.get('vswitch_hugepages_nr', 0)
hp_attr.update({
'vm_hugepages_avail_2M': free_hugepages,
'vm_hugepages_nr_2M':
(nr_hugepages - vswitch_hugepages_nr)
})
})
attr.update(hp_attr)
@ -393,19 +392,19 @@ class NodeOperator(object):
pass
# Get the free and total memory from meminfo for this node
re_node_MemTotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_MemFree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_FilePages = \
re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_filepages = \
re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
re_node_SReclaim = \
re_node_sreclaim = \
re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
re_node_CommitLimit = \
re_node_commitlimit = \
re.compile(r'^Node\s+\d+\s+\CommitLimit:\s+(\d+)')
re_node_Committed_AS = \
re_node_committed_as = \
re.compile(r'^Node\s+\d+\s+\'Committed_AS:\s+(\d+)')
Free_KiB = 0 # Free Memory (KiB) available
Total_KiB = 0 # Total Memory (KiB)
free_kb = 0 # Free Memory (KB) available
total_kb = 0 # Total Memory (KB)
limit = 0 # only used in strict accounting
committed = 0 # only used in strict accounting
@ -413,40 +412,40 @@ class NodeOperator(object):
try:
with open(meminfo, 'r') as infile:
for line in infile:
match = re_node_MemTotal.search(line)
match = re_node_memtotal.search(line)
if match:
Total_KiB += int(match.group(1))
total_kb += int(match.group(1))
continue
match = re_node_MemFree.search(line)
match = re_node_memfree.search(line)
if match:
Free_KiB += int(match.group(1))
free_kb += int(match.group(1))
continue
match = re_node_FilePages.search(line)
match = re_node_filepages.search(line)
if match:
Free_KiB += int(match.group(1))
free_kb += int(match.group(1))
continue
match = re_node_SReclaim.search(line)
match = re_node_sreclaim.search(line)
if match:
Free_KiB += int(match.group(1))
free_kb += int(match.group(1))
continue
match = re_node_CommitLimit.search(line)
match = re_node_commitlimit.search(line)
if match:
limit = int(match.group(1))
continue
match = re_node_Committed_AS.search(line)
match = re_node_committed_as.search(line)
if match:
committed = int(match.group(1))
continue
if self._is_strict():
Free_KiB = limit - committed
free_kb = limit - committed
except IOError:
# silently ignore IO errors (eg. file missing)
pass
# Calculate PSS
Pss_MiB = 0
pss_mb = 0
if node == 0:
cmd = 'cat /proc/*/smaps 2>/dev/null | awk \'/^Pss:/ ' \
'{a += $2;} END {printf "%d\\n", a/1024.0;}\''
@ -454,7 +453,7 @@ class NodeOperator(object):
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
shell=True)
result = proc.stdout.read().strip()
Pss_MiB = int(result)
pss_mb = int(result)
except subprocess.CalledProcessError as e:
LOG.error("Cannot calculate PSS (%s) (%d)", cmd,
e.returncode)
@ -462,11 +461,11 @@ class NodeOperator(object):
LOG.error("Failed to execute (%s) OS error (%d)", cmd,
e.errno)
# need to multiply Total_MiB by 1024 to match compute_huge
node_total_kib = Total_HP_MiB * Ki + Free_KiB + Pss_MiB * Ki
# need to multiply total_mb by 1024 to match compute_huge
node_total_kb = total_hp_mb * SIZE_KB + free_kb + pss_mb * SIZE_KB
# Read base memory from compute_reserved.conf
base_mem_MiB = 0
base_mem_mb = 0
with open('/etc/nova/compute_reserved.conf', 'r') as infile:
for line in infile:
if "COMPUTE_BASE_RESERVED" in line:
@ -475,52 +474,52 @@ class NodeOperator(object):
for reserve in base_reserves.split():
reserve = reserve.split(":")
if reserve[0].strip('"') == "node%d" % node:
base_mem_MiB = int(reserve[1].strip('MB'))
base_mem_mb = int(reserve[1].strip('MB'))
# On small systems, clip memory overhead to more reasonable minimal
# settings
if (Total_KiB / Ki - base_mem_MiB) < 1000:
if (total_kb / SIZE_KB - base_mem_mb) < 1000:
if node == 0:
base_mem_MiB = compute_min_MB
base_mem_mb = COMPUTE_MIN_MB
if tsc.nodetype == 'controller':
base_mem_MiB += controller_min_MB
base_mem_mb += CONTROLLER_MIN_MB
else:
base_mem_MiB = compute_min_non0_MB
base_mem_mb = COMPUTE_MIN_NON_0_MB
Eng_KiB = node_total_kib - base_mem_MiB * Ki
eng_kb = node_total_kb - base_mem_mb * SIZE_KB
vswitch_mem_kib = (attr.get('vswitch_hugepages_size_mib', 0) *
attr.get('vswitch_hugepages_nr', 0) * Ki)
vswitch_mem_kb = (attr.get('vswitch_hugepages_size_mib', 0) *
attr.get('vswitch_hugepages_nr', 0) * SIZE_KB)
VM_KiB = (Eng_KiB - vswitch_mem_kib)
vm_kb = (eng_kb - vswitch_mem_kb)
max_vm_pages_2M = VM_KiB / SZ_2M_Ki
max_vm_pages_1G = VM_KiB / SZ_1G_Ki
max_vm_pages_2mb = vm_kb / SIZE_2M_KB
max_vm_pages_1gb = vm_kb / SIZE_1G_KB
attr.update({
'vm_hugepages_possible_2M': max_vm_pages_2M,
'vm_hugepages_possible_1G': max_vm_pages_1G,
'vm_hugepages_possible_2M': max_vm_pages_2mb,
'vm_hugepages_possible_1G': max_vm_pages_1gb,
})
# calculate 90% 2M pages if it is initial report and the huge
# pages have not been allocated
if initial_report:
max_vm_pages_2M = max_vm_pages_2M * 0.9
Total_HP_MiB += int(max_vm_pages_2M * (SZ_2M_Ki / Ki))
Free_HP_MiB = Total_HP_MiB
max_vm_pages_2mb = max_vm_pages_2mb * 0.9
total_hp_mb += int(max_vm_pages_2mb * (SIZE_2M_KB / SIZE_KB))
free_hp_mb = total_hp_mb
attr.update({
'vm_hugepages_nr_2M': max_vm_pages_2M,
'vm_hugepages_avail_2M': max_vm_pages_2M,
'vm_hugepages_nr_2M': max_vm_pages_2mb,
'vm_hugepages_avail_2M': max_vm_pages_2mb,
'vm_hugepages_nr_1G': 0
})
attr.update({
'numa_node': node,
'memtotal_mib': Total_HP_MiB,
'memavail_mib': Free_HP_MiB,
'memtotal_mib': total_hp_mb,
'memavail_mib': free_hp_mb,
'hugepages_configured': 'True',
'node_memtotal_mib': node_total_kib / 1024,
})
'node_memtotal_mib': node_total_kb / 1024,
})
imemory.append(attr)
@ -533,53 +532,53 @@ class NodeOperator(object):
'''
imemory = []
self.total_memory_MiB = 0
self.total_memory_mb = 0
re_node_MemTotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_MemFree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_FilePages = re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
re_node_SReclaim = re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
re_node_memtotal = re.compile(r'^Node\s+\d+\s+\MemTotal:\s+(\d+)')
re_node_memfree = re.compile(r'^Node\s+\d+\s+\MemFree:\s+(\d+)')
re_node_filepages = re.compile(r'^Node\s+\d+\s+\FilePages:\s+(\d+)')
re_node_sreclaim = re.compile(r'^Node\s+\d+\s+\SReclaimable:\s+(\d+)')
for node in range(self.num_nodes):
attr = {}
Total_MiB = 0
Free_MiB = 0
total_mb = 0
free_mb = 0
meminfo = "/sys/devices/system/node/node%d/meminfo" % node
try:
with open(meminfo, 'r') as infile:
for line in infile:
match = re_node_MemTotal.search(line)
match = re_node_memtotal.search(line)
if match:
Total_MiB += int(match.group(1))
total_mb += int(match.group(1))
continue
match = re_node_MemFree.search(line)
match = re_node_memfree.search(line)
if match:
Free_MiB += int(match.group(1))
free_mb += int(match.group(1))
continue
match = re_node_FilePages.search(line)
match = re_node_filepages.search(line)
if match:
Free_MiB += int(match.group(1))
free_mb += int(match.group(1))
continue
match = re_node_SReclaim.search(line)
match = re_node_sreclaim.search(line)
if match:
Free_MiB += int(match.group(1))
free_mb += int(match.group(1))
continue
except IOError:
# silently ignore IO errors (eg. file missing)
pass
Total_MiB /= 1024
Free_MiB /= 1024
self.total_memory_nodes_MiB.append(Total_MiB)
total_mb /= 1024
free_mb /= 1024
self.total_memory_nodes_mb.append(total_mb)
attr = {
'numa_node': node,
'memtotal_mib': Total_MiB,
'memavail_mib': Free_MiB,
'hugepages_configured': 'False',
}
'numa_node': node,
'memtotal_mib': total_mb,
'memavail_mib': free_mb,
'hugepages_configured': 'False',
}
imemory.append(attr)