From 74baed87deef6c5565581b71875aa44d07271663 Mon Sep 17 00:00:00 2001 From: Steven Webster Date: Mon, 17 Dec 2018 15:41:54 -0500 Subject: [PATCH] Enable configurable vswitch memory Currently, a DPDK enabled vswitch makes use of a fixed 1G hugepage to enable an optimized datapath. In the case of OVS-DPDK, this can cause an issue when changing the MTU of one or more interfaces, as a separate mempool is allocated for each size. If the minimal mempool size(s) cannot fit into the 1G page, DPDK memory initialization will fail. This commit allows an operator to configure the amount of hugepage memory allocated to each socket on a host, which can enable jumboframe support for OVS-DPDK. The system memory command has been modified to accept vswitch hugepage configuration via the function flag. ie: system host-memory-modify -f vswitch -1G 4 Story: 2004472 Task: 28162 Change-Id: Ie58d1b8317b4eb3c498524db6bd53ffff3bf1a39 Signed-off-by: Steven Webster --- .../src/modules/platform/manifests/compute.pp | 7 +- .../src/modules/platform/manifests/vswitch.pp | 14 ++ .../templates/ovs.disable-dpdk-init.erb | 5 + .../cgts-client/cgtsclient/v1/iHost_shell.py | 8 +- .../cgtsclient/v1/imemory_shell.py | 39 +++- .../cgtsclient/v1/iprofile_shell.py | 34 +++- .../sysinv/etc/sysinv/profileSchema.xsd | 2 + sysinv/sysinv/sysinv/sysinv/agent/node.py | 78 ++++++-- .../sysinv/sysinv/api/controllers/v1/host.py | 16 +- .../sysinv/api/controllers/v1/memory.py | 178 ++++++++++++++---- .../sysinv/api/controllers/v1/profile.py | 10 +- .../sysinv/sysinv/sysinv/conductor/manager.py | 1 + sysinv/sysinv/sysinv/sysinv/puppet/ovs.py | 21 ++- .../sysinv/sysinv/sysinv/puppet/platform.py | 33 +++- .../sysinv/sysinv/tests/api/test_profile.py | 2 + 15 files changed, 377 insertions(+), 71 deletions(-) create mode 100644 puppet-manifests/src/modules/platform/templates/ovs.disable-dpdk-init.erb diff --git a/puppet-manifests/src/modules/platform/manifests/compute.pp b/puppet-manifests/src/modules/platform/manifests/compute.pp index ffc840c4a0..3337a293b9 100644 --- a/puppet-manifests/src/modules/platform/manifests/compute.pp +++ b/puppet-manifests/src/modules/platform/manifests/compute.pp @@ -21,6 +21,7 @@ class platform::compute::grub::params ( $n_cpus = '', $cpu_options = '', $m_hugepages = 'hugepagesz=2M hugepages=0', + $g_hugepages = undef, $default_pgsz = 'default_hugepagesz=2M', $keys = [ 'kvm-intel.eptad', @@ -42,7 +43,11 @@ class platform::compute::grub::params ( } if $::is_gb_page_supported { - $gb_hugepages = "hugepagesz=1G hugepages=${::number_of_numa_nodes}" + if $g_hugepages != undef { + $gb_hugepages = $g_hugepages + } else { + $gb_hugepages = "hugepagesz=1G hugepages=${::number_of_numa_nodes}" + } } else { $gb_hugepages = '' } diff --git a/puppet-manifests/src/modules/platform/manifests/vswitch.pp b/puppet-manifests/src/modules/platform/manifests/vswitch.pp index cb9992801e..674b0006dc 100644 --- a/puppet-manifests/src/modules/platform/manifests/vswitch.pp +++ b/puppet-manifests/src/modules/platform/manifests/vswitch.pp @@ -95,6 +95,20 @@ class platform::vswitch::ovs( } elsif $::platform::params::vswitch_type == 'ovs-dpdk' { include ::vswitch::dpdk + # Since OVS socket memory is configurable, it is required to start the + # ovsdb server and disable DPDK initialization before the openvswitch + # service runs to prevent any previously stored OVSDB configuration from + # being used before the new Vs_config gets applied. + service { 'ovsdb-server': + ensure => 'running', + before => Service['openvswitch'], + } + exec { 'disable dpdk initialization': + command => template('platform/ovs.disable-dpdk-init.erb'), + provider => shell, + require => Service['ovsdb-server'] + } + Exec['vfio-iommu-mode'] -> Platform::Vswitch::Ovs::Device<||> -> Platform::Vswitch::Ovs::Bridge<||> diff --git a/puppet-manifests/src/modules/platform/templates/ovs.disable-dpdk-init.erb b/puppet-manifests/src/modules/platform/templates/ovs.disable-dpdk-init.erb new file mode 100644 index 0000000000..79eb67e42a --- /dev/null +++ b/puppet-manifests/src/modules/platform/templates/ovs.disable-dpdk-init.erb @@ -0,0 +1,5 @@ +# Disable DPDK initialization in ovsdb +# ovs-vsctl is not used here as it can fail after the initial start of ovsdb +# (even though the dpdk-init parameter actually gets applied). +ovsdb-client -v transact '["Open_vSwitch", {"op" : "mutate", "table": "Open_vSwitch", "where": [], "mutations" : [["other_config","delete", ["map",[["dpdk-init", "true"]]]]]}]' +ovsdb-client -v transact '["Open_vSwitch", {"op" : "mutate", "table": "Open_vSwitch", "where": [], "mutations" : [["other_config","insert", ["map",[["dpdk-init", "false"]]]]]}]' \ No newline at end of file diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py index b24c02002e..47f7400764 100755 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py @@ -609,10 +609,14 @@ def do_host_apply_memprofile(cc, args): # Echo list of new host memory imemory = cc.imemory.list(host.uuid) field_labels = ['uuid', 'vm_hugepages_1G', 'vm_hugepages_2M', - 'vm_hugepages_2M_pending', 'vm_hugepages_1G_pending'] + 'vm_hugepages_2M_pending', 'vm_hugepages_1G_pending', + 'vswitch_hugepages_nr', 'vswitch_hugepages_size_reqd', + 'vswitch_hugepages_size_mib'] fields = ['uuid', 'vm_hugepages_nr_1G', 'vm_hugepages_nr_2M', - 'vm_hugepages_nr_2M_pending', 'vm_hugepages_nr_1G_pending'] + 'vm_hugepages_nr_2M_pending', 'vm_hugepages_nr_1G_pending', + 'vswitch_hugepages_nr', 'vswitch_hugepages_reqd', + 'vswitch_hugepages_size_mib'] utils.print_list(imemory, fields, field_labels, sortby=0) diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py index 601f198f29..761f983434 100644 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py @@ -23,6 +23,7 @@ def _print_imemory_show(imemory): 'vswitch_hugepages_size_mib', 'vswitch_hugepages_nr', 'vswitch_hugepages_avail', + 'vswitch_hugepages_reqd', 'vm_hugepages_nr_4K', 'vm_hugepages_nr_2M', 'vm_hugepages_nr_2M_pending', @@ -39,6 +40,7 @@ def _print_imemory_show(imemory): 'vSwitch Huge Pages: Size (MiB)', ' Total', ' Available', + ' Required', 'Application Pages (4K): Total', 'Application Huge Pages (2M): Total', ' Total Pending', @@ -112,6 +114,7 @@ def do_host_memory_list(cc, args): 'vswitch_hugepages_size_mib', 'vswitch_hugepages_nr', 'vswitch_hugepages_avail', + 'vswitch_hugepages_reqd', 'vm_hugepages_nr_4K', 'vm_hugepages_nr_2M', 'vm_hugepages_avail_2M', @@ -129,6 +132,7 @@ def do_host_memory_list(cc, args): 'vs_hp_size(MiB)', 'vs_hp_total', 'vs_hp_avail', + 'vs_hp_reqd', 'vm_total_4K', 'vm_hp_total_2M', 'vm_hp_avail_2M', @@ -150,18 +154,24 @@ def do_host_memory_list(cc, args): @utils.arg('-m', '--platform_reserved_mib', metavar='', help='The amount of platform memory (MiB) for the numa node') -@utils.arg('-2M', '--vm_hugepages_nr_2M_pending', +@utils.arg('-2M', '--hugepages_nr_2M_pending', metavar='<2M hugepages number>', help='The number of 2M vm huge pages for the numa node') -@utils.arg('-1G', '--vm_hugepages_nr_1G_pending', +@utils.arg('-1G', '--hugepages_nr_1G_pending', metavar='<1G hugepages number>', help='The number of 1G vm huge pages for the numa node') +@utils.arg('-f', '--function', + metavar='', + choices=['vswitch', 'vm'], + default='vm', + help='The Memory Function.') def do_host_memory_modify(cc, args): """Modify platform reserved and/or application huge page memory attributes for worker nodes.""" rwfields = ['platform_reserved_mib', - 'vm_hugepages_nr_2M_pending', - 'vm_hugepages_nr_1G_pending'] + 'hugepages_nr_2M_pending', + 'hugepages_nr_1G_pending', + 'function'] ihost = ihost_utils._find_ihost(cc, args.hostnameorid) @@ -185,10 +195,31 @@ def do_host_memory_modify(cc, args): raise exc.CommandError('Processor not found: host %s processor %s' % (ihost.hostname, args.numa_node)) + function = user_specified_fields.get('function') + vswitch_hp_size_mib = None + patch = [] for (k, v) in user_specified_fields.items(): + if k == 'function': + continue + if function == 'vswitch': + if k == 'hugepages_nr_2M_pending': + vswitch_hp_size_mib = 2 + k = 'vswitch_hugepages_reqd' + elif k == 'hugepages_nr_1G_pending': + vswitch_hp_size_mib = 1024 + k = 'vswitch_hugepages_reqd' + else: + if k == 'hugepages_nr_2M_pending': + k = 'vm_hugepages_nr_2M_pending' + elif k == 'hugepages_nr_1G_pending': + k = 'vm_hugepages_nr_1G_pending' + patch.append({'op': 'replace', 'path': '/' + k, 'value': v}) if patch: + if vswitch_hp_size_mib: + patch.append({'op': 'replace', 'path': '/' + 'vswitch_hugepages_size_mib', 'value': vswitch_hp_size_mib}) + imemory = cc.imemory.update(mem.uuid, patch) _print_imemory_show(imemory) diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py index b8d062286b..e812b7e7e8 100644 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py @@ -552,12 +552,32 @@ def get_memoryconfig_1G(iprofile): return str +def get_memoryconfig_vswitch_nr(iprofile): + str = '' + for memory in iprofile.memory: + if str != '': + str = str + "; " + str = str + "%s" % (memory.vswitch_hugepages_reqd) + return str + + +def get_memoryconfig_vswitch_size(iprofile): + str = '' + for memory in iprofile.memory: + if str != '': + str = str + "; " + str = str + "%s" % (memory.vswitch_hugepages_size_mib) + return str + + def get_memprofile_data(cc, iprofile): iprofile.memory = cc.iprofile.list_imemorys(iprofile.uuid) iprofile.nodes = cc.iprofile.list_inodes(iprofile.uuid) iprofile.platform_reserved_mib = get_memoryconfig_platform(iprofile) iprofile.vm_hugepages_2M = get_memoryconfig_2M(iprofile) iprofile.vm_hugepages_1G = get_memoryconfig_1G(iprofile) + iprofile.vswitch_hugepages_nr = get_memoryconfig_vswitch_nr(iprofile) + iprofile.vswitch_hugepages_size_mib = get_memoryconfig_vswitch_size(iprofile) def do_memprofile_list(cc, args): @@ -567,19 +587,25 @@ def do_memprofile_list(cc, args): profile.platform_reserved_mib = get_memoryconfig_platform(profile) profile.vm_hugepages_2M = get_memoryconfig_2M(profile) profile.vm_hugepages_1G = get_memoryconfig_1G(profile) + profile.vswitch_hugepages_nr = get_memoryconfig_vswitch_nr(profile) + profile.vswitch_hugepages_size_mib = get_memoryconfig_vswitch_size(profile) field_labels = ['uuid', 'name', 'platform_reserved_mib', - 'vm_hugepages_2M', 'vm_hugepages_1G'] + 'vm_hugepages_2M', 'vm_hugepages_1G', + 'vswitch_hugepages_nr', 'vswitch_hugepages_size_mib'] fields = ['uuid', 'profilename', 'platform_reserved_mib', - 'vm_hugepages_2M', 'vm_hugepages_1G'] + 'vm_hugepages_2M', 'vm_hugepages_1G', + 'vswitch_hugepages_nr', 'vswitch_hugepages_size_mib'] utils.print_list(profiles, fields, field_labels, sortby=0) def _print_memprofile_show(memoryprofile): fields = ['profilename', 'platform_reserved_mib', 'vm_hugepages_2M', - 'vm_hugepages_1G', 'uuid', 'created_at', 'updated_at'] + 'vm_hugepages_1G', 'vswitch_hugepages_nr', + 'vswitch_hugepages_size_mib', 'uuid', 'created_at', 'updated_at'] labels = ['name', 'platform_reserved_mib', 'vm_hugepages_2M', - 'vm_hugepages_1G', 'uuid', 'created_at', 'updated_at'] + 'vm_hugepages_1G', 'vswitch_hugepages_nr', + 'vswitch_hugepages_size_mib', 'uuid', 'created_at', 'updated_at'] data = [(f, getattr(memoryprofile, f, '')) for f in fields] utils.print_tuple_list(data, labels) diff --git a/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd b/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd index e8fc5b3b30..cbf7eb07a0 100644 --- a/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd +++ b/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd @@ -54,6 +54,8 @@ + + diff --git a/sysinv/sysinv/sysinv/sysinv/agent/node.py b/sysinv/sysinv/sysinv/sysinv/agent/node.py index 0a1d4cd3e0..ec921d9129 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/node.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/node.py @@ -292,6 +292,36 @@ class NodeOperator(object): return [name for name in listdir(dir) if os.path.isdir(join(dir, name))] + def _get_vswitch_reserved_memory(self, node): + # Read vswitch memory from worker_reserved.conf + + vswitch_hugepages_nr = 0 + vswitch_hugepages_size = 0 + try: + with open('/etc/platform/worker_reserved.conf', 'r') as infile: + for line in infile: + if line.startswith("COMPUTE_VSWITCH_MEMORY="): + val = line.split("=") + vswitch_reserves = val[1].strip('\n')[1:-1] + for idx, reserve in enumerate(vswitch_reserves.split()): + if idx != node: + continue + reserve = reserve.split(":") + if reserve[0].strip('"') == "node%d" % node: + pages_nr = re.sub('[^0-9]', '', reserve[2]) + pages_size = reserve[1] + + vswitch_hugepages_nr = int(pages_nr) + if pages_size == "1048576kB": + vswitch_hugepages_size = SIZE_1G_MB + else: + vswitch_hugepages_size = SIZE_2M_MB + break + except Exception as e: + LOG.debug("Could not read vswitch reserved memory: %s", e) + + return vswitch_hugepages_nr, vswitch_hugepages_size + def _inode_get_memory_hugepages(self): """Collect hugepage info, including vswitch, and vm. Collect platform reserved if config. @@ -354,36 +384,50 @@ class NodeOperator(object): total_hp_mb = total_hp_mb + int(nr_hugepages * size) free_hp_mb = free_hp_mb + int(free_hugepages * size) + vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory( + node) + if vs_hp_nr == 0 or vs_hp_size == 0: + vs_hp_nr = VSWITCH_MEMORY_MB // size + vs_hp_size = size + # Libvirt hugepages can be 1G and 2M if size == SIZE_1G_MB: - vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size - hp_attr = { - 'vswitch_hugepages_size_mib': size, - 'vswitch_hugepages_nr': vswitch_hugepages_nr, - 'vswitch_hugepages_avail': 0, - 'vm_hugepages_nr_1G': - (nr_hugepages - vswitch_hugepages_nr), + hp_attr = {} + if vs_hp_size == size: + nr_hugepages -= vs_hp_nr + hp_attr.update({ + 'vswitch_hugepages_size_mib': vs_hp_size, + 'vswitch_hugepages_nr': vs_hp_nr, + 'vswitch_hugepages_avail': 0 + }) + hp_attr.update({ + 'vm_hugepages_nr_1G': nr_hugepages, 'vm_hugepages_avail_1G': free_hugepages, 'vm_hugepages_use_1G': 'True' - } + }) else: if len(subdirs) == 1: # No 1G hugepage support. - vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size hp_attr = { - 'vswitch_hugepages_size_mib': size, - 'vswitch_hugepages_nr': vswitch_hugepages_nr, - 'vswitch_hugepages_avail': 0, + 'vm_hugepages_use_1G': 'False', + 'vswitch_hugepages_size_mib': vs_hp_size, + 'vswitch_hugepages_nr': vs_hp_nr, + 'vswitch_hugepages_avail': 0 } - hp_attr.update({'vm_hugepages_use_1G': 'False'}) else: - # vswitch will use 1G hugpages - vswitch_hugepages_nr = 0 + hp_attr = {} + if vs_hp_size == size and initial_report is False: + # User manually set 2M pages + nr_hugepages -= vs_hp_nr + hp_attr.update({ + 'vswitch_hugepages_size_mib': vs_hp_size, + 'vswitch_hugepages_nr': vs_hp_nr, + 'vswitch_hugepages_avail': 0 + }) hp_attr.update({ 'vm_hugepages_avail_2M': free_hugepages, - 'vm_hugepages_nr_2M': - (nr_hugepages - vswitch_hugepages_nr) + 'vm_hugepages_nr_2M': nr_hugepages }) attr.update(hp_attr) diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index e62e34a64d..222af7411d 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -3506,7 +3506,10 @@ class HostController(rest.RestController): memtotal = m.node_memtotal_mib allocated = m.platform_reserved_mib if m.hugepages_configured: - allocated += m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib + if m.vswitch_hugepages_reqd is not None: + allocated += m.vswitch_hugepages_reqd * m.vswitch_hugepages_size_mib + else: + allocated += m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib if m.vm_hugepages_nr_2M_pending is not None: allocated += constants.MIB_2M * m.vm_hugepages_nr_2M_pending pending_2M_memory = True @@ -5062,6 +5065,8 @@ class HostController(rest.RestController): mib_reserved_disk_io = 0 align_2M_memory = False align_1G_memory = False + vswitch_hp_size = None + for node in ihost_inodes: # If the reserved memory has changed (eg, due to patch that # changes common/constants.py), then push updated reserved memory @@ -5071,6 +5076,15 @@ class HostController(rest.RestController): # of the original setting. self._auto_adjust_memory_for_node(ihost, node) + mems = pecan.request.dbapi.imemory_get_by_inode(node['id']) + for m in mems: + if not vswitch_hp_size: + vswitch_hp_size = m.vswitch_hugepages_size_mib + else: + if m.vswitch_hugepages_size_mib != vswitch_hp_size: + raise wsme.exc.ClientSideError(_( + "Mismatched vswitch socket memory hugepage size.")) + # check whether the pending hugepages changes and the current # platform reserved memory fit within the total memory available mib_reserved_node, pending_2M_memory, pending_1G_memory = \ diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py index a7f713485c..bbd4e4837d 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py @@ -34,6 +34,7 @@ from sysinv.api.controllers.v1 import collection from sysinv.api.controllers.v1 import link from sysinv.api.controllers.v1 import types from sysinv.api.controllers.v1 import utils +from sysinv.common import constants from sysinv.common import exception from sysinv.common import utils as cutils from sysinv import objects @@ -390,6 +391,9 @@ class MemoryController(rest.RestController): vm_hugepages_nr_2M_pending = None vm_hugepages_nr_1G_pending = None + vswitch_hugepages_reqd = None + vswitch_hugepages_size_mib = None + platform_reserved_mib = None for p in patch: if p['path'] == '/platform_reserved_mib': @@ -400,6 +404,12 @@ class MemoryController(rest.RestController): if p['path'] == '/vm_hugepages_nr_1G_pending': vm_hugepages_nr_1G_pending = p['value'] + if p['path'] == '/vswitch_hugepages_reqd': + vswitch_hugepages_reqd = p['value'] + + if p['path'] == '/vswitch_hugepages_size_mib': + vswitch_hugepages_size_mib = p['value'] + # The host must be locked if host_id: _check_host(host_id) @@ -410,7 +420,8 @@ class MemoryController(rest.RestController): try: # Semantics checks and update hugepage memory accounting patch = _check_huge_values(rpc_port, patch, - vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending) + vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending, + vswitch_hugepages_reqd, vswitch_hugepages_size_mib) except wsme.exc.ClientSideError as e: inode = pecan.request.dbapi.inode_get(inode_id=rpc_port.forinodeid) numa_node = inode.numa_node @@ -419,7 +430,8 @@ class MemoryController(rest.RestController): # Semantics checks for platform memory _check_memory(rpc_port, host_id, platform_reserved_mib, - vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending) + vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending, + vswitch_hugepages_reqd, vswitch_hugepages_size_mib) # only allow patching allocated_function and capabilities # replace ihost_uuid and inode_uuid with corresponding @@ -472,6 +484,9 @@ class MemoryController(rest.RestController): def _update(mem_uuid, mem_values): + vswitch_hugepages_reqd = None + vswitch_hugepages_size_mib = None + rpc_port = objects.memory.get_by_uuid(pecan.request.context, mem_uuid) if 'forihostid' in rpc_port: ihostId = rpc_port['forihostid'] @@ -483,29 +498,37 @@ def _update(mem_uuid, mem_values): if 'platform_reserved_mib' in mem_values: platform_reserved_mib = mem_values['platform_reserved_mib'] + if 'vswitch_hugepages_size_mib' in mem_values: + vswitch_hugepages_size_mib = mem_values['vswitch_hugepages_size_mib'] + + if 'vswitch_hugepages_reqd' in mem_values: + vswitch_hugepages_reqd = mem_values['vswitch_hugepages_reqd'] + if 'vm_hugepages_nr_2M_pending' in mem_values: vm_hugepages_nr_2M_pending = mem_values['vm_hugepages_nr_2M_pending'] if 'vm_hugepages_nr_1G_pending' in mem_values: vm_hugepages_nr_1G_pending = mem_values['vm_hugepages_nr_1G_pending'] - # The host must be locked - if host_id: - _check_host(host_id) - else: - raise wsme.exc.ClientSideError(( - "Hostname or uuid must be defined")) + # The host must be locked + if host_id: + _check_host(host_id) + else: + raise wsme.exc.ClientSideError(( + "Hostname or uuid must be defined")) - # Semantics checks and update hugepage memory accounting - mem_values = _check_huge_values(rpc_port, mem_values, - vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending) + # Semantics checks and update hugepage memory accounting + mem_values = _check_huge_values(rpc_port, mem_values, + vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending, + vswitch_hugepages_reqd, vswitch_hugepages_size_mib) - # Semantics checks for platform memory - _check_memory(rpc_port, host_id, platform_reserved_mib, - vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending) + # Semantics checks for platform memory + _check_memory(rpc_port, host_id, platform_reserved_mib, + vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending, + vswitch_hugepages_reqd) - # update memory values - pecan.request.dbapi.imemory_update(mem_uuid, mem_values) + # update memory values + pecan.request.dbapi.imemory_update(mem_uuid, mem_values) def _check_host(ihost): @@ -523,7 +546,8 @@ def _check_host(ihost): def _check_memory(rpc_port, ihost, platform_reserved_mib=None, - vm_hugepages_nr_2M_pending=None, vm_hugepages_nr_1G_pending=None): + vm_hugepages_nr_2M_pending=None, vm_hugepages_nr_1G_pending=None, + vswitch_hugepages_reqd=None, vswitch_hugepages_size_mib=None): if platform_reserved_mib: # Check for invalid characters try: @@ -577,7 +601,11 @@ def _check_memory(rpc_port, ihost, platform_reserved_mib=None, LOG.debug("vm total=%s" % (mem_alloc)) vs_hp_size = rpc_port['vswitch_hugepages_size_mib'] - vs_hp_nr = rpc_port['vswitch_hugepages_nr'] + if vswitch_hugepages_reqd: + vs_hp_nr = int(vswitch_hugepages_reqd) + elif rpc_port['vswitch_hugepages_nr']: + vs_hp_nr = int(rpc_port['vswitch_hugepages_nr']) + mem_alloc += vs_hp_size * vs_hp_nr LOG.debug("vs_hp_nr=%s vs_hp_size=%s" % (vs_hp_nr, vs_hp_size)) LOG.debug("memTotal %s mem_alloc %s" % (node_memtotal_mib, mem_alloc)) @@ -605,13 +633,93 @@ def _check_memory(rpc_port, ihost, platform_reserved_mib=None, LOG.info(msg) -def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, - vm_hugepages_nr_1G=None): +def _check_vswitch_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, + vm_hugepages_nr_1G=None, + vswitch_hugepages_reqd=None, + vswitch_hugepages_size_mib=None): - if rpc_port['vm_hugepages_use_1G'] == 'False' and vm_hugepages_nr_1G: - # cannot provision 1G huge pages if the processor does not support them + if vswitch_hugepages_reqd and not vswitch_hugepages_size_mib: raise wsme.exc.ClientSideError(_( - "Processor does not support 1G huge pages.")) + "No vswitch hugepage size specified.")) + + if vswitch_hugepages_reqd: + try: + val = int(vswitch_hugepages_reqd) + except ValueError: + raise wsme.exc.ClientSideError(_( + "Vswitch huge pages must be a number")) + if val <= 0: + raise wsme.exc.ClientSideError(_( + "Vswitch huge pages must be greater than zero")) + + if vswitch_hugepages_size_mib: + try: + val = int(vswitch_hugepages_size_mib) + except ValueError: + raise wsme.exc.ClientSideError(_( + "Vswitch huge pages must be a number")) + if val <= 0: + raise wsme.exc.ClientSideError(_( + "Vswitch huge pages size (Mib) must be greater than zero")) + if ((val & (val - 1)) != 0): + raise wsme.exc.ClientSideError(_( + "Vswitch hugepage size (Mib) must be a power of 2")) + + # None == unchanged + if vswitch_hugepages_reqd is not None: + new_vs_pages = int(vswitch_hugepages_reqd) + elif rpc_port['vswitch_hugepages_nr']: + new_vs_pages = rpc_port['vswitch_hugepages_nr'] + else: + new_vs_pages = 0 + + # None == unchanged + if vswitch_hugepages_size_mib is not None: + vs_hp_size_mib = int(vswitch_hugepages_size_mib) + elif rpc_port['vswitch_hugepages_size_mib']: + vs_hp_size_mib = rpc_port['vswitch_hugepages_size_mib'] + else: + # default + vs_hp_size_mib = constants.MIB_2M + + vs_hp_reqd_mib = new_vs_pages * vs_hp_size_mib + + # Throttle the maximum amount of memory that vswitch can take to + # 90% of usable memory to account for fluctuations in the reported + # node mem total. + vs_hp_avail_mib = 0.9 * (rpc_port['node_memtotal_mib'] - + rpc_port['platform_reserved_mib'] - + vm_hugepages_nr_2M*constants.MIB_2M - + vm_hugepages_nr_1G*constants.MIB_1G) + + if vs_hp_avail_mib < vs_hp_reqd_mib: + if vs_hp_size_mib == constants.MIB_2M: + vs_possible_2M = int(vs_hp_avail_mib / constants.MIB_2M) + msg = _("No available space for 2M vswitch huge page allocation, " + "max 2M vswitch pages: %d") % vs_possible_2M + elif vs_hp_size_mib == constants.MIB_1G: + vs_possible_1G = int(vs_hp_avail_mib / constants.MIB_1G) + msg = _("No available space for 1G vswitch huge page allocation, " + "max 1G vswitch pages: %d") % vs_possible_1G + else: + msg = _("No available space for vswitch huge page allocation, " + "max memory (MB): %d") % vs_hp_avail_mib + raise wsme.exc.ClientSideError(msg) + + return patch + + +def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, + vm_hugepages_nr_1G=None, vswitch_hugepages_reqd=None, + vswitch_hugepages_size_mib=None): + + if rpc_port['vm_hugepages_use_1G'] == 'False': + vs_hp_size = vswitch_hugepages_size_mib + if (vm_hugepages_nr_1G or vs_hp_size == constants.MIB_1G): + # cannot provision 1G huge pages if the processor does not support + # them + raise wsme.exc.ClientSideError(_( + "Processor does not support 1G huge pages.")) # Check for invalid characters if vm_hugepages_nr_2M: @@ -675,17 +783,17 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, if rpc_port['vm_hugepages_possible_1G']: vm_possible_1G = int(rpc_port['vm_hugepages_possible_1G']) - LOG.debug("max possible 2M pages: %s, max possible 1G pages: %s" % + LOG.debug("max possible 2M VM pages: %s, max possible 1G VM pages: %s" % (vm_possible_2M, vm_possible_1G)) if vm_possible_2M < new_2M_pages: - msg = _("No available space for 2M huge page allocation, " - "max 2M pages: %d") % vm_possible_2M + msg = _("No available space for 2M VM huge page allocation, " + "max 2M VM pages: %d") % vm_possible_2M raise wsme.exc.ClientSideError(msg) if vm_possible_1G < new_1G_pages: - msg = _("No available space for 1G huge page allocation, " - "max 1G pages: %d") % vm_possible_1G + msg = _("No available space for 1G VM huge page allocation, " + "max 1G VM pages: %d") % vm_possible_1G raise wsme.exc.ClientSideError(msg) # always use vm_possible_2M to compare, @@ -693,18 +801,22 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, max_1G = int((vm_possible_2M - new_2M_pages) / num_2M_for_1G) max_2M = vm_possible_2M - new_1G_pages * num_2M_for_1G if new_2M_pages > 0 and new_1G_pages > 0: - msg = _("No available space for new settings." + msg = _("No available space for new VM hugepage settings." "Max 1G pages is %s when 2M is %s, or " "Max 2M pages is %s when 1G is %s." % ( max_1G, new_2M_pages, max_2M, new_1G_pages )) elif new_1G_pages > 0: - msg = _("No available space for 1G huge page allocation, " - "max 1G pages: %d") % vm_possible_1G + msg = _("No available space for 1G VM huge page allocation, " + "max 1G VM pages: %d") % vm_possible_1G else: - msg = _("No available space for 2M huge page allocation, " - "max 2M pages: %d") % vm_possible_2M + msg = _("No available space for 2M VM huge page allocation, " + "max 2M VM pages: %d") % vm_possible_2M raise wsme.exc.ClientSideError(msg) + _check_vswitch_huge_values( + rpc_port, patch, new_2M_pages, new_1G_pages, + vswitch_hugepages_reqd, vswitch_hugepages_size_mib) + return patch diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py index 3cddb8a4ea..438c0127fb 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py @@ -1532,6 +1532,8 @@ def _create_mem_profile(profile_name, profile_node): platform_reserved = get_mem_assignment(profile_node, "platformReservedMiB") vm_hp_2m = get_mem_assignment(profile_node, "vmHugePages2M") vm_hp_1g = get_mem_assignment(profile_node, "vmHugePages1G") + vs_hp_nr = get_mem_assignment(profile_node, "vsHugePagesNr") + vs_hp_sz = get_mem_assignment(profile_node, "vsHugePagesSz") except profile_utils.InvalidProfileData as e: return "Error", _('error: CPU profile %s is invalid') % profile_name, e.message @@ -1559,6 +1561,8 @@ def _create_mem_profile(profile_name, profile_node): mdict['platform_reserved_mib'] = get_mem_size(platform_reserved, node_idx) mdict['vm_hugepages_nr_2M_pending'] = get_mem_size(vm_hp_2m, node_idx) mdict['vm_hugepages_nr_1G_pending'] = get_mem_size(vm_hp_1g, node_idx) + mdict['vswitch_hugepages_reqd'] = get_mem_size(vs_hp_nr, node_idx) + mdict['vswitch_hugepages_size_mib'] = get_mem_size(vs_hp_sz, node_idx) pecan.request.dbapi.imemory_create(iprofile_id, mdict) node_idx += 1 @@ -2326,6 +2330,8 @@ def memoryprofile_copy_data(host, profile): mdict['platform_reserved_mib'] = m.platform_reserved_mib mdict['vm_hugepages_nr_2M_pending'] = m.vm_hugepages_nr_2M mdict['vm_hugepages_nr_1G_pending'] = m.vm_hugepages_nr_1G + mdict['vswitch_hugepages_reqd'] = m.vswitch_hugepages_nr + mdict['vswitch_hugepages_size_mib'] = m.vswitch_hugepages_size_mib newmemory = pecan.request.dbapi.imemory_create(iprofile_id, mdict) # if memory wasn't actualy created, @@ -3177,7 +3183,9 @@ def memoryprofile_apply_to_host(host, profile): if int(host_inode.numa_node) == int(profile_inode.numa_node): data = {'vm_hugepages_nr_2M_pending': pmem.vm_hugepages_nr_2M_pending, 'vm_hugepages_nr_1G_pending': pmem.vm_hugepages_nr_1G_pending, - 'platform_reserved_mib': pmem.platform_reserved_mib} + 'platform_reserved_mib': pmem.platform_reserved_mib, + 'vswitch_hugepages_reqd': pmem.vswitch_hugepages_reqd, + 'vswitch_hugepages_size_mib': pmem.vswitch_hugepages_size_mib} try: memory_api._update(hmem.uuid, data) except wsme.exc.ClientSideError as cse: diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index cb9f5e4ede..9ca824f654 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -2825,6 +2825,7 @@ class ConductorManager(service.PeriodicService): if ihost.administrative == constants.ADMIN_UNLOCKED: mem_dict['vm_hugepages_nr_2M_pending'] = None mem_dict['vm_hugepages_nr_1G_pending'] = None + mem_dict['vswitch_hugepages_reqd'] = None try: imems = self.dbapi.imemory_get_by_ihost_inode(ihost_uuid, diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py b/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py index 0fffffb52a..ced5410d35 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py @@ -331,18 +331,33 @@ class OVSPuppet(base.BasePuppet): def _get_memory_config(self, host): vswitch_memory = [] + config = {} + vswitch_size = 0 host_memory = self.dbapi.imemory_get_by_ihost(host.id) for memory in host_memory: vswitch_size = memory.vswitch_hugepages_size_mib - vswitch_pages = memory.vswitch_hugepages_nr + vswitch_pages = memory.vswitch_hugepages_reqd \ + if memory.vswitch_hugepages_reqd is not None \ + else memory.vswitch_hugepages_nr + + if vswitch_pages == 0: + vswitch_pages = memory.vswitch_hugepages_nr + vswitch_memory.append(str(vswitch_size * vswitch_pages)) dpdk_socket_mem = self.quoted_str(','.join(vswitch_memory)) - return { + config.update({ 'vswitch::dpdk::socket_mem': dpdk_socket_mem - } + }) + + if vswitch_size == constants.MIB_2M: + config.update({ + 'platform::vswitch::params::hugepage_dir': '/mnt/huge-2048kB' + }) + + return config def _get_virtual_config(self, host): config = {} diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/platform.py b/sysinv/sysinv/sysinv/sysinv/puppet/platform.py index 0210470927..b31e7d7751 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/platform.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/platform.py @@ -670,6 +670,8 @@ class PlatformPuppet(base.BasePuppet): vm_2M_pages = [] vm_1G_pages = [] + vs_pages_updated = False + for node, memory_list in memory_numa_list.items(): memory = memory_list[0] @@ -682,7 +684,13 @@ class PlatformPuppet(base.BasePuppet): platform_nodes.append(platform_node) vswitch_size = memory.vswitch_hugepages_size_mib - vswitch_pages = memory.vswitch_hugepages_nr + vswitch_pages = memory.vswitch_hugepages_reqd \ + if memory.vswitch_hugepages_reqd is not None \ + else memory.vswitch_hugepages_nr + + if vswitch_pages == 0: + vswitch_pages = memory.vswitch_hugepages_nr + vswitch_node = "\"node%d:%dkB:%d\"" % ( node, vswitch_size * 1024, vswitch_pages) vswitch_nodes.append(vswitch_node) @@ -700,11 +708,11 @@ class PlatformPuppet(base.BasePuppet): total_hugepages_1G = vm_hugepages_nr_1G if memory.vswitch_hugepages_size_mib == constants.MIB_2M: - total_hugepages_2M += memory.vswitch_hugepages_nr - vswitch_2M_page += memory.vswitch_hugepages_nr + total_hugepages_2M += vswitch_pages + vswitch_2M_page += vswitch_pages elif memory.vswitch_hugepages_size_mib == constants.MIB_1G: - total_hugepages_1G += memory.vswitch_hugepages_nr - vswitch_1G_page += memory.vswitch_hugepages_nr + total_hugepages_1G += vswitch_pages + vswitch_1G_page += vswitch_pages vswitch_2M_pages.append(vswitch_2M_page) vswitch_1G_pages.append(vswitch_1G_page) @@ -720,6 +728,10 @@ class PlatformPuppet(base.BasePuppet): vm_2M_pages.append(vm_hugepages_nr_2M) vm_1G_pages.append(vm_hugepages_nr_1G) + if (memory.vswitch_hugepages_reqd and + vswitch_pages != memory.vswitch_hugepages_nr): + vs_pages_updated = True + platform_reserved_memory = "(%s)" % ' '.join(platform_nodes) vswitch_reserved_memory = "(%s)" % ' '.join(vswitch_nodes) @@ -752,6 +764,17 @@ class PlatformPuppet(base.BasePuppet): 'platform::compute::hugepage::params::vm_1G_pages': vm_1G, }) + if vs_pages_updated: + grub_hugepages_1G = "hugepagesz=1G hugepages=%d" % ( + sum(vswitch_1G_pages) + sum(vm_1G_pages)) + config.update({ + 'platform::compute::grub::params::g_hugepages': + grub_hugepages_1G, + }) + if sum(vswitch_2M_pages) > 0: + config.update({ + 'platform::vswitch::params::hugepage_dir': '/mnt/huge-2048kB' + }) return config diff --git a/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py b/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py index a9b758b2c7..3643c1b798 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py @@ -328,6 +328,8 @@ class ProfileApplyTestCase(ProfileTestCase): profile_r['imemorys'][0]['vm_hugepages_nr_2M_pending']) self.assertEqual(hostmem_r['imemorys'][0]['vm_hugepages_nr_1G_pending'], profile_r['imemorys'][0]['vm_hugepages_nr_1G_pending']) + self.assertEqual(hostmem_r['imemorys'][0]['vswitch_hugepages_reqd'], + profile_r['imemorys'][0]['vswitch_hugepages_reqd']) def test_apply_storage_success(self): self.profile["profiletype"] = constants.PROFILE_TYPE_LOCAL_STORAGE