diff --git a/puppet-manifests/src/modules/platform/manifests/compute.pp b/puppet-manifests/src/modules/platform/manifests/compute.pp index ffc840c4a0..3337a293b9 100644 --- a/puppet-manifests/src/modules/platform/manifests/compute.pp +++ b/puppet-manifests/src/modules/platform/manifests/compute.pp @@ -21,6 +21,7 @@ class platform::compute::grub::params ( $n_cpus = '', $cpu_options = '', $m_hugepages = 'hugepagesz=2M hugepages=0', + $g_hugepages = undef, $default_pgsz = 'default_hugepagesz=2M', $keys = [ 'kvm-intel.eptad', @@ -42,7 +43,11 @@ class platform::compute::grub::params ( } if $::is_gb_page_supported { - $gb_hugepages = "hugepagesz=1G hugepages=${::number_of_numa_nodes}" + if $g_hugepages != undef { + $gb_hugepages = $g_hugepages + } else { + $gb_hugepages = "hugepagesz=1G hugepages=${::number_of_numa_nodes}" + } } else { $gb_hugepages = '' } diff --git a/puppet-manifests/src/modules/platform/manifests/vswitch.pp b/puppet-manifests/src/modules/platform/manifests/vswitch.pp index cb9992801e..674b0006dc 100644 --- a/puppet-manifests/src/modules/platform/manifests/vswitch.pp +++ b/puppet-manifests/src/modules/platform/manifests/vswitch.pp @@ -95,6 +95,20 @@ class platform::vswitch::ovs( } elsif $::platform::params::vswitch_type == 'ovs-dpdk' { include ::vswitch::dpdk + # Since OVS socket memory is configurable, it is required to start the + # ovsdb server and disable DPDK initialization before the openvswitch + # service runs to prevent any previously stored OVSDB configuration from + # being used before the new Vs_config gets applied. + service { 'ovsdb-server': + ensure => 'running', + before => Service['openvswitch'], + } + exec { 'disable dpdk initialization': + command => template('platform/ovs.disable-dpdk-init.erb'), + provider => shell, + require => Service['ovsdb-server'] + } + Exec['vfio-iommu-mode'] -> Platform::Vswitch::Ovs::Device<||> -> Platform::Vswitch::Ovs::Bridge<||> diff --git a/puppet-manifests/src/modules/platform/templates/ovs.disable-dpdk-init.erb b/puppet-manifests/src/modules/platform/templates/ovs.disable-dpdk-init.erb new file mode 100644 index 0000000000..79eb67e42a --- /dev/null +++ b/puppet-manifests/src/modules/platform/templates/ovs.disable-dpdk-init.erb @@ -0,0 +1,5 @@ +# Disable DPDK initialization in ovsdb +# ovs-vsctl is not used here as it can fail after the initial start of ovsdb +# (even though the dpdk-init parameter actually gets applied). +ovsdb-client -v transact '["Open_vSwitch", {"op" : "mutate", "table": "Open_vSwitch", "where": [], "mutations" : [["other_config","delete", ["map",[["dpdk-init", "true"]]]]]}]' +ovsdb-client -v transact '["Open_vSwitch", {"op" : "mutate", "table": "Open_vSwitch", "where": [], "mutations" : [["other_config","insert", ["map",[["dpdk-init", "false"]]]]]}]' \ No newline at end of file diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py index b24c02002e..47f7400764 100755 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py @@ -609,10 +609,14 @@ def do_host_apply_memprofile(cc, args): # Echo list of new host memory imemory = cc.imemory.list(host.uuid) field_labels = ['uuid', 'vm_hugepages_1G', 'vm_hugepages_2M', - 'vm_hugepages_2M_pending', 'vm_hugepages_1G_pending'] + 'vm_hugepages_2M_pending', 'vm_hugepages_1G_pending', + 'vswitch_hugepages_nr', 'vswitch_hugepages_size_reqd', + 'vswitch_hugepages_size_mib'] fields = ['uuid', 'vm_hugepages_nr_1G', 'vm_hugepages_nr_2M', - 'vm_hugepages_nr_2M_pending', 'vm_hugepages_nr_1G_pending'] + 'vm_hugepages_nr_2M_pending', 'vm_hugepages_nr_1G_pending', + 'vswitch_hugepages_nr', 'vswitch_hugepages_reqd', + 'vswitch_hugepages_size_mib'] utils.print_list(imemory, fields, field_labels, sortby=0) diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py index 601f198f29..761f983434 100644 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py @@ -23,6 +23,7 @@ def _print_imemory_show(imemory): 'vswitch_hugepages_size_mib', 'vswitch_hugepages_nr', 'vswitch_hugepages_avail', + 'vswitch_hugepages_reqd', 'vm_hugepages_nr_4K', 'vm_hugepages_nr_2M', 'vm_hugepages_nr_2M_pending', @@ -39,6 +40,7 @@ def _print_imemory_show(imemory): 'vSwitch Huge Pages: Size (MiB)', ' Total', ' Available', + ' Required', 'Application Pages (4K): Total', 'Application Huge Pages (2M): Total', ' Total Pending', @@ -112,6 +114,7 @@ def do_host_memory_list(cc, args): 'vswitch_hugepages_size_mib', 'vswitch_hugepages_nr', 'vswitch_hugepages_avail', + 'vswitch_hugepages_reqd', 'vm_hugepages_nr_4K', 'vm_hugepages_nr_2M', 'vm_hugepages_avail_2M', @@ -129,6 +132,7 @@ def do_host_memory_list(cc, args): 'vs_hp_size(MiB)', 'vs_hp_total', 'vs_hp_avail', + 'vs_hp_reqd', 'vm_total_4K', 'vm_hp_total_2M', 'vm_hp_avail_2M', @@ -150,18 +154,24 @@ def do_host_memory_list(cc, args): @utils.arg('-m', '--platform_reserved_mib', metavar='', help='The amount of platform memory (MiB) for the numa node') -@utils.arg('-2M', '--vm_hugepages_nr_2M_pending', +@utils.arg('-2M', '--hugepages_nr_2M_pending', metavar='<2M hugepages number>', help='The number of 2M vm huge pages for the numa node') -@utils.arg('-1G', '--vm_hugepages_nr_1G_pending', +@utils.arg('-1G', '--hugepages_nr_1G_pending', metavar='<1G hugepages number>', help='The number of 1G vm huge pages for the numa node') +@utils.arg('-f', '--function', + metavar='', + choices=['vswitch', 'vm'], + default='vm', + help='The Memory Function.') def do_host_memory_modify(cc, args): """Modify platform reserved and/or application huge page memory attributes for worker nodes.""" rwfields = ['platform_reserved_mib', - 'vm_hugepages_nr_2M_pending', - 'vm_hugepages_nr_1G_pending'] + 'hugepages_nr_2M_pending', + 'hugepages_nr_1G_pending', + 'function'] ihost = ihost_utils._find_ihost(cc, args.hostnameorid) @@ -185,10 +195,31 @@ def do_host_memory_modify(cc, args): raise exc.CommandError('Processor not found: host %s processor %s' % (ihost.hostname, args.numa_node)) + function = user_specified_fields.get('function') + vswitch_hp_size_mib = None + patch = [] for (k, v) in user_specified_fields.items(): + if k == 'function': + continue + if function == 'vswitch': + if k == 'hugepages_nr_2M_pending': + vswitch_hp_size_mib = 2 + k = 'vswitch_hugepages_reqd' + elif k == 'hugepages_nr_1G_pending': + vswitch_hp_size_mib = 1024 + k = 'vswitch_hugepages_reqd' + else: + if k == 'hugepages_nr_2M_pending': + k = 'vm_hugepages_nr_2M_pending' + elif k == 'hugepages_nr_1G_pending': + k = 'vm_hugepages_nr_1G_pending' + patch.append({'op': 'replace', 'path': '/' + k, 'value': v}) if patch: + if vswitch_hp_size_mib: + patch.append({'op': 'replace', 'path': '/' + 'vswitch_hugepages_size_mib', 'value': vswitch_hp_size_mib}) + imemory = cc.imemory.update(mem.uuid, patch) _print_imemory_show(imemory) diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py index b8d062286b..e812b7e7e8 100644 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py @@ -552,12 +552,32 @@ def get_memoryconfig_1G(iprofile): return str +def get_memoryconfig_vswitch_nr(iprofile): + str = '' + for memory in iprofile.memory: + if str != '': + str = str + "; " + str = str + "%s" % (memory.vswitch_hugepages_reqd) + return str + + +def get_memoryconfig_vswitch_size(iprofile): + str = '' + for memory in iprofile.memory: + if str != '': + str = str + "; " + str = str + "%s" % (memory.vswitch_hugepages_size_mib) + return str + + def get_memprofile_data(cc, iprofile): iprofile.memory = cc.iprofile.list_imemorys(iprofile.uuid) iprofile.nodes = cc.iprofile.list_inodes(iprofile.uuid) iprofile.platform_reserved_mib = get_memoryconfig_platform(iprofile) iprofile.vm_hugepages_2M = get_memoryconfig_2M(iprofile) iprofile.vm_hugepages_1G = get_memoryconfig_1G(iprofile) + iprofile.vswitch_hugepages_nr = get_memoryconfig_vswitch_nr(iprofile) + iprofile.vswitch_hugepages_size_mib = get_memoryconfig_vswitch_size(iprofile) def do_memprofile_list(cc, args): @@ -567,19 +587,25 @@ def do_memprofile_list(cc, args): profile.platform_reserved_mib = get_memoryconfig_platform(profile) profile.vm_hugepages_2M = get_memoryconfig_2M(profile) profile.vm_hugepages_1G = get_memoryconfig_1G(profile) + profile.vswitch_hugepages_nr = get_memoryconfig_vswitch_nr(profile) + profile.vswitch_hugepages_size_mib = get_memoryconfig_vswitch_size(profile) field_labels = ['uuid', 'name', 'platform_reserved_mib', - 'vm_hugepages_2M', 'vm_hugepages_1G'] + 'vm_hugepages_2M', 'vm_hugepages_1G', + 'vswitch_hugepages_nr', 'vswitch_hugepages_size_mib'] fields = ['uuid', 'profilename', 'platform_reserved_mib', - 'vm_hugepages_2M', 'vm_hugepages_1G'] + 'vm_hugepages_2M', 'vm_hugepages_1G', + 'vswitch_hugepages_nr', 'vswitch_hugepages_size_mib'] utils.print_list(profiles, fields, field_labels, sortby=0) def _print_memprofile_show(memoryprofile): fields = ['profilename', 'platform_reserved_mib', 'vm_hugepages_2M', - 'vm_hugepages_1G', 'uuid', 'created_at', 'updated_at'] + 'vm_hugepages_1G', 'vswitch_hugepages_nr', + 'vswitch_hugepages_size_mib', 'uuid', 'created_at', 'updated_at'] labels = ['name', 'platform_reserved_mib', 'vm_hugepages_2M', - 'vm_hugepages_1G', 'uuid', 'created_at', 'updated_at'] + 'vm_hugepages_1G', 'vswitch_hugepages_nr', + 'vswitch_hugepages_size_mib', 'uuid', 'created_at', 'updated_at'] data = [(f, getattr(memoryprofile, f, '')) for f in fields] utils.print_tuple_list(data, labels) diff --git a/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd b/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd index e8fc5b3b30..cbf7eb07a0 100644 --- a/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd +++ b/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd @@ -54,6 +54,8 @@ + + diff --git a/sysinv/sysinv/sysinv/sysinv/agent/node.py b/sysinv/sysinv/sysinv/sysinv/agent/node.py index 0a1d4cd3e0..ec921d9129 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/node.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/node.py @@ -292,6 +292,36 @@ class NodeOperator(object): return [name for name in listdir(dir) if os.path.isdir(join(dir, name))] + def _get_vswitch_reserved_memory(self, node): + # Read vswitch memory from worker_reserved.conf + + vswitch_hugepages_nr = 0 + vswitch_hugepages_size = 0 + try: + with open('/etc/platform/worker_reserved.conf', 'r') as infile: + for line in infile: + if line.startswith("COMPUTE_VSWITCH_MEMORY="): + val = line.split("=") + vswitch_reserves = val[1].strip('\n')[1:-1] + for idx, reserve in enumerate(vswitch_reserves.split()): + if idx != node: + continue + reserve = reserve.split(":") + if reserve[0].strip('"') == "node%d" % node: + pages_nr = re.sub('[^0-9]', '', reserve[2]) + pages_size = reserve[1] + + vswitch_hugepages_nr = int(pages_nr) + if pages_size == "1048576kB": + vswitch_hugepages_size = SIZE_1G_MB + else: + vswitch_hugepages_size = SIZE_2M_MB + break + except Exception as e: + LOG.debug("Could not read vswitch reserved memory: %s", e) + + return vswitch_hugepages_nr, vswitch_hugepages_size + def _inode_get_memory_hugepages(self): """Collect hugepage info, including vswitch, and vm. Collect platform reserved if config. @@ -354,36 +384,50 @@ class NodeOperator(object): total_hp_mb = total_hp_mb + int(nr_hugepages * size) free_hp_mb = free_hp_mb + int(free_hugepages * size) + vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory( + node) + if vs_hp_nr == 0 or vs_hp_size == 0: + vs_hp_nr = VSWITCH_MEMORY_MB // size + vs_hp_size = size + # Libvirt hugepages can be 1G and 2M if size == SIZE_1G_MB: - vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size - hp_attr = { - 'vswitch_hugepages_size_mib': size, - 'vswitch_hugepages_nr': vswitch_hugepages_nr, - 'vswitch_hugepages_avail': 0, - 'vm_hugepages_nr_1G': - (nr_hugepages - vswitch_hugepages_nr), + hp_attr = {} + if vs_hp_size == size: + nr_hugepages -= vs_hp_nr + hp_attr.update({ + 'vswitch_hugepages_size_mib': vs_hp_size, + 'vswitch_hugepages_nr': vs_hp_nr, + 'vswitch_hugepages_avail': 0 + }) + hp_attr.update({ + 'vm_hugepages_nr_1G': nr_hugepages, 'vm_hugepages_avail_1G': free_hugepages, 'vm_hugepages_use_1G': 'True' - } + }) else: if len(subdirs) == 1: # No 1G hugepage support. - vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size hp_attr = { - 'vswitch_hugepages_size_mib': size, - 'vswitch_hugepages_nr': vswitch_hugepages_nr, - 'vswitch_hugepages_avail': 0, + 'vm_hugepages_use_1G': 'False', + 'vswitch_hugepages_size_mib': vs_hp_size, + 'vswitch_hugepages_nr': vs_hp_nr, + 'vswitch_hugepages_avail': 0 } - hp_attr.update({'vm_hugepages_use_1G': 'False'}) else: - # vswitch will use 1G hugpages - vswitch_hugepages_nr = 0 + hp_attr = {} + if vs_hp_size == size and initial_report is False: + # User manually set 2M pages + nr_hugepages -= vs_hp_nr + hp_attr.update({ + 'vswitch_hugepages_size_mib': vs_hp_size, + 'vswitch_hugepages_nr': vs_hp_nr, + 'vswitch_hugepages_avail': 0 + }) hp_attr.update({ 'vm_hugepages_avail_2M': free_hugepages, - 'vm_hugepages_nr_2M': - (nr_hugepages - vswitch_hugepages_nr) + 'vm_hugepages_nr_2M': nr_hugepages }) attr.update(hp_attr) diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index 3526299f0e..78ada95f2d 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -3511,7 +3511,10 @@ class HostController(rest.RestController): memtotal = m.node_memtotal_mib allocated = m.platform_reserved_mib if m.hugepages_configured: - allocated += m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib + if m.vswitch_hugepages_reqd is not None: + allocated += m.vswitch_hugepages_reqd * m.vswitch_hugepages_size_mib + else: + allocated += m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib if m.vm_hugepages_nr_2M_pending is not None: allocated += constants.MIB_2M * m.vm_hugepages_nr_2M_pending pending_2M_memory = True @@ -5088,6 +5091,8 @@ class HostController(rest.RestController): mib_reserved_disk_io = 0 align_2M_memory = False align_1G_memory = False + vswitch_hp_size = None + for node in ihost_inodes: # If the reserved memory has changed (eg, due to patch that # changes common/constants.py), then push updated reserved memory @@ -5097,6 +5102,15 @@ class HostController(rest.RestController): # of the original setting. self._auto_adjust_memory_for_node(ihost, node) + mems = pecan.request.dbapi.imemory_get_by_inode(node['id']) + for m in mems: + if not vswitch_hp_size: + vswitch_hp_size = m.vswitch_hugepages_size_mib + else: + if m.vswitch_hugepages_size_mib != vswitch_hp_size: + raise wsme.exc.ClientSideError(_( + "Mismatched vswitch socket memory hugepage size.")) + # check whether the pending hugepages changes and the current # platform reserved memory fit within the total memory available mib_reserved_node, pending_2M_memory, pending_1G_memory = \ diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py index a7f713485c..bbd4e4837d 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py @@ -34,6 +34,7 @@ from sysinv.api.controllers.v1 import collection from sysinv.api.controllers.v1 import link from sysinv.api.controllers.v1 import types from sysinv.api.controllers.v1 import utils +from sysinv.common import constants from sysinv.common import exception from sysinv.common import utils as cutils from sysinv import objects @@ -390,6 +391,9 @@ class MemoryController(rest.RestController): vm_hugepages_nr_2M_pending = None vm_hugepages_nr_1G_pending = None + vswitch_hugepages_reqd = None + vswitch_hugepages_size_mib = None + platform_reserved_mib = None for p in patch: if p['path'] == '/platform_reserved_mib': @@ -400,6 +404,12 @@ class MemoryController(rest.RestController): if p['path'] == '/vm_hugepages_nr_1G_pending': vm_hugepages_nr_1G_pending = p['value'] + if p['path'] == '/vswitch_hugepages_reqd': + vswitch_hugepages_reqd = p['value'] + + if p['path'] == '/vswitch_hugepages_size_mib': + vswitch_hugepages_size_mib = p['value'] + # The host must be locked if host_id: _check_host(host_id) @@ -410,7 +420,8 @@ class MemoryController(rest.RestController): try: # Semantics checks and update hugepage memory accounting patch = _check_huge_values(rpc_port, patch, - vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending) + vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending, + vswitch_hugepages_reqd, vswitch_hugepages_size_mib) except wsme.exc.ClientSideError as e: inode = pecan.request.dbapi.inode_get(inode_id=rpc_port.forinodeid) numa_node = inode.numa_node @@ -419,7 +430,8 @@ class MemoryController(rest.RestController): # Semantics checks for platform memory _check_memory(rpc_port, host_id, platform_reserved_mib, - vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending) + vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending, + vswitch_hugepages_reqd, vswitch_hugepages_size_mib) # only allow patching allocated_function and capabilities # replace ihost_uuid and inode_uuid with corresponding @@ -472,6 +484,9 @@ class MemoryController(rest.RestController): def _update(mem_uuid, mem_values): + vswitch_hugepages_reqd = None + vswitch_hugepages_size_mib = None + rpc_port = objects.memory.get_by_uuid(pecan.request.context, mem_uuid) if 'forihostid' in rpc_port: ihostId = rpc_port['forihostid'] @@ -483,29 +498,37 @@ def _update(mem_uuid, mem_values): if 'platform_reserved_mib' in mem_values: platform_reserved_mib = mem_values['platform_reserved_mib'] + if 'vswitch_hugepages_size_mib' in mem_values: + vswitch_hugepages_size_mib = mem_values['vswitch_hugepages_size_mib'] + + if 'vswitch_hugepages_reqd' in mem_values: + vswitch_hugepages_reqd = mem_values['vswitch_hugepages_reqd'] + if 'vm_hugepages_nr_2M_pending' in mem_values: vm_hugepages_nr_2M_pending = mem_values['vm_hugepages_nr_2M_pending'] if 'vm_hugepages_nr_1G_pending' in mem_values: vm_hugepages_nr_1G_pending = mem_values['vm_hugepages_nr_1G_pending'] - # The host must be locked - if host_id: - _check_host(host_id) - else: - raise wsme.exc.ClientSideError(( - "Hostname or uuid must be defined")) + # The host must be locked + if host_id: + _check_host(host_id) + else: + raise wsme.exc.ClientSideError(( + "Hostname or uuid must be defined")) - # Semantics checks and update hugepage memory accounting - mem_values = _check_huge_values(rpc_port, mem_values, - vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending) + # Semantics checks and update hugepage memory accounting + mem_values = _check_huge_values(rpc_port, mem_values, + vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending, + vswitch_hugepages_reqd, vswitch_hugepages_size_mib) - # Semantics checks for platform memory - _check_memory(rpc_port, host_id, platform_reserved_mib, - vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending) + # Semantics checks for platform memory + _check_memory(rpc_port, host_id, platform_reserved_mib, + vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending, + vswitch_hugepages_reqd) - # update memory values - pecan.request.dbapi.imemory_update(mem_uuid, mem_values) + # update memory values + pecan.request.dbapi.imemory_update(mem_uuid, mem_values) def _check_host(ihost): @@ -523,7 +546,8 @@ def _check_host(ihost): def _check_memory(rpc_port, ihost, platform_reserved_mib=None, - vm_hugepages_nr_2M_pending=None, vm_hugepages_nr_1G_pending=None): + vm_hugepages_nr_2M_pending=None, vm_hugepages_nr_1G_pending=None, + vswitch_hugepages_reqd=None, vswitch_hugepages_size_mib=None): if platform_reserved_mib: # Check for invalid characters try: @@ -577,7 +601,11 @@ def _check_memory(rpc_port, ihost, platform_reserved_mib=None, LOG.debug("vm total=%s" % (mem_alloc)) vs_hp_size = rpc_port['vswitch_hugepages_size_mib'] - vs_hp_nr = rpc_port['vswitch_hugepages_nr'] + if vswitch_hugepages_reqd: + vs_hp_nr = int(vswitch_hugepages_reqd) + elif rpc_port['vswitch_hugepages_nr']: + vs_hp_nr = int(rpc_port['vswitch_hugepages_nr']) + mem_alloc += vs_hp_size * vs_hp_nr LOG.debug("vs_hp_nr=%s vs_hp_size=%s" % (vs_hp_nr, vs_hp_size)) LOG.debug("memTotal %s mem_alloc %s" % (node_memtotal_mib, mem_alloc)) @@ -605,13 +633,93 @@ def _check_memory(rpc_port, ihost, platform_reserved_mib=None, LOG.info(msg) -def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, - vm_hugepages_nr_1G=None): +def _check_vswitch_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, + vm_hugepages_nr_1G=None, + vswitch_hugepages_reqd=None, + vswitch_hugepages_size_mib=None): - if rpc_port['vm_hugepages_use_1G'] == 'False' and vm_hugepages_nr_1G: - # cannot provision 1G huge pages if the processor does not support them + if vswitch_hugepages_reqd and not vswitch_hugepages_size_mib: raise wsme.exc.ClientSideError(_( - "Processor does not support 1G huge pages.")) + "No vswitch hugepage size specified.")) + + if vswitch_hugepages_reqd: + try: + val = int(vswitch_hugepages_reqd) + except ValueError: + raise wsme.exc.ClientSideError(_( + "Vswitch huge pages must be a number")) + if val <= 0: + raise wsme.exc.ClientSideError(_( + "Vswitch huge pages must be greater than zero")) + + if vswitch_hugepages_size_mib: + try: + val = int(vswitch_hugepages_size_mib) + except ValueError: + raise wsme.exc.ClientSideError(_( + "Vswitch huge pages must be a number")) + if val <= 0: + raise wsme.exc.ClientSideError(_( + "Vswitch huge pages size (Mib) must be greater than zero")) + if ((val & (val - 1)) != 0): + raise wsme.exc.ClientSideError(_( + "Vswitch hugepage size (Mib) must be a power of 2")) + + # None == unchanged + if vswitch_hugepages_reqd is not None: + new_vs_pages = int(vswitch_hugepages_reqd) + elif rpc_port['vswitch_hugepages_nr']: + new_vs_pages = rpc_port['vswitch_hugepages_nr'] + else: + new_vs_pages = 0 + + # None == unchanged + if vswitch_hugepages_size_mib is not None: + vs_hp_size_mib = int(vswitch_hugepages_size_mib) + elif rpc_port['vswitch_hugepages_size_mib']: + vs_hp_size_mib = rpc_port['vswitch_hugepages_size_mib'] + else: + # default + vs_hp_size_mib = constants.MIB_2M + + vs_hp_reqd_mib = new_vs_pages * vs_hp_size_mib + + # Throttle the maximum amount of memory that vswitch can take to + # 90% of usable memory to account for fluctuations in the reported + # node mem total. + vs_hp_avail_mib = 0.9 * (rpc_port['node_memtotal_mib'] - + rpc_port['platform_reserved_mib'] - + vm_hugepages_nr_2M*constants.MIB_2M - + vm_hugepages_nr_1G*constants.MIB_1G) + + if vs_hp_avail_mib < vs_hp_reqd_mib: + if vs_hp_size_mib == constants.MIB_2M: + vs_possible_2M = int(vs_hp_avail_mib / constants.MIB_2M) + msg = _("No available space for 2M vswitch huge page allocation, " + "max 2M vswitch pages: %d") % vs_possible_2M + elif vs_hp_size_mib == constants.MIB_1G: + vs_possible_1G = int(vs_hp_avail_mib / constants.MIB_1G) + msg = _("No available space for 1G vswitch huge page allocation, " + "max 1G vswitch pages: %d") % vs_possible_1G + else: + msg = _("No available space for vswitch huge page allocation, " + "max memory (MB): %d") % vs_hp_avail_mib + raise wsme.exc.ClientSideError(msg) + + return patch + + +def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, + vm_hugepages_nr_1G=None, vswitch_hugepages_reqd=None, + vswitch_hugepages_size_mib=None): + + if rpc_port['vm_hugepages_use_1G'] == 'False': + vs_hp_size = vswitch_hugepages_size_mib + if (vm_hugepages_nr_1G or vs_hp_size == constants.MIB_1G): + # cannot provision 1G huge pages if the processor does not support + # them + raise wsme.exc.ClientSideError(_( + "Processor does not support 1G huge pages.")) # Check for invalid characters if vm_hugepages_nr_2M: @@ -675,17 +783,17 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, if rpc_port['vm_hugepages_possible_1G']: vm_possible_1G = int(rpc_port['vm_hugepages_possible_1G']) - LOG.debug("max possible 2M pages: %s, max possible 1G pages: %s" % + LOG.debug("max possible 2M VM pages: %s, max possible 1G VM pages: %s" % (vm_possible_2M, vm_possible_1G)) if vm_possible_2M < new_2M_pages: - msg = _("No available space for 2M huge page allocation, " - "max 2M pages: %d") % vm_possible_2M + msg = _("No available space for 2M VM huge page allocation, " + "max 2M VM pages: %d") % vm_possible_2M raise wsme.exc.ClientSideError(msg) if vm_possible_1G < new_1G_pages: - msg = _("No available space for 1G huge page allocation, " - "max 1G pages: %d") % vm_possible_1G + msg = _("No available space for 1G VM huge page allocation, " + "max 1G VM pages: %d") % vm_possible_1G raise wsme.exc.ClientSideError(msg) # always use vm_possible_2M to compare, @@ -693,18 +801,22 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, max_1G = int((vm_possible_2M - new_2M_pages) / num_2M_for_1G) max_2M = vm_possible_2M - new_1G_pages * num_2M_for_1G if new_2M_pages > 0 and new_1G_pages > 0: - msg = _("No available space for new settings." + msg = _("No available space for new VM hugepage settings." "Max 1G pages is %s when 2M is %s, or " "Max 2M pages is %s when 1G is %s." % ( max_1G, new_2M_pages, max_2M, new_1G_pages )) elif new_1G_pages > 0: - msg = _("No available space for 1G huge page allocation, " - "max 1G pages: %d") % vm_possible_1G + msg = _("No available space for 1G VM huge page allocation, " + "max 1G VM pages: %d") % vm_possible_1G else: - msg = _("No available space for 2M huge page allocation, " - "max 2M pages: %d") % vm_possible_2M + msg = _("No available space for 2M VM huge page allocation, " + "max 2M VM pages: %d") % vm_possible_2M raise wsme.exc.ClientSideError(msg) + _check_vswitch_huge_values( + rpc_port, patch, new_2M_pages, new_1G_pages, + vswitch_hugepages_reqd, vswitch_hugepages_size_mib) + return patch diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py index 3cddb8a4ea..438c0127fb 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py @@ -1532,6 +1532,8 @@ def _create_mem_profile(profile_name, profile_node): platform_reserved = get_mem_assignment(profile_node, "platformReservedMiB") vm_hp_2m = get_mem_assignment(profile_node, "vmHugePages2M") vm_hp_1g = get_mem_assignment(profile_node, "vmHugePages1G") + vs_hp_nr = get_mem_assignment(profile_node, "vsHugePagesNr") + vs_hp_sz = get_mem_assignment(profile_node, "vsHugePagesSz") except profile_utils.InvalidProfileData as e: return "Error", _('error: CPU profile %s is invalid') % profile_name, e.message @@ -1559,6 +1561,8 @@ def _create_mem_profile(profile_name, profile_node): mdict['platform_reserved_mib'] = get_mem_size(platform_reserved, node_idx) mdict['vm_hugepages_nr_2M_pending'] = get_mem_size(vm_hp_2m, node_idx) mdict['vm_hugepages_nr_1G_pending'] = get_mem_size(vm_hp_1g, node_idx) + mdict['vswitch_hugepages_reqd'] = get_mem_size(vs_hp_nr, node_idx) + mdict['vswitch_hugepages_size_mib'] = get_mem_size(vs_hp_sz, node_idx) pecan.request.dbapi.imemory_create(iprofile_id, mdict) node_idx += 1 @@ -2326,6 +2330,8 @@ def memoryprofile_copy_data(host, profile): mdict['platform_reserved_mib'] = m.platform_reserved_mib mdict['vm_hugepages_nr_2M_pending'] = m.vm_hugepages_nr_2M mdict['vm_hugepages_nr_1G_pending'] = m.vm_hugepages_nr_1G + mdict['vswitch_hugepages_reqd'] = m.vswitch_hugepages_nr + mdict['vswitch_hugepages_size_mib'] = m.vswitch_hugepages_size_mib newmemory = pecan.request.dbapi.imemory_create(iprofile_id, mdict) # if memory wasn't actualy created, @@ -3177,7 +3183,9 @@ def memoryprofile_apply_to_host(host, profile): if int(host_inode.numa_node) == int(profile_inode.numa_node): data = {'vm_hugepages_nr_2M_pending': pmem.vm_hugepages_nr_2M_pending, 'vm_hugepages_nr_1G_pending': pmem.vm_hugepages_nr_1G_pending, - 'platform_reserved_mib': pmem.platform_reserved_mib} + 'platform_reserved_mib': pmem.platform_reserved_mib, + 'vswitch_hugepages_reqd': pmem.vswitch_hugepages_reqd, + 'vswitch_hugepages_size_mib': pmem.vswitch_hugepages_size_mib} try: memory_api._update(hmem.uuid, data) except wsme.exc.ClientSideError as cse: diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index ef66d21c64..4a7f33541a 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -2834,6 +2834,7 @@ class ConductorManager(service.PeriodicService): if ihost.administrative == constants.ADMIN_UNLOCKED: mem_dict['vm_hugepages_nr_2M_pending'] = None mem_dict['vm_hugepages_nr_1G_pending'] = None + mem_dict['vswitch_hugepages_reqd'] = None try: imems = self.dbapi.imemory_get_by_ihost_inode(ihost_uuid, diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py b/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py index 0fffffb52a..ced5410d35 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py @@ -331,18 +331,33 @@ class OVSPuppet(base.BasePuppet): def _get_memory_config(self, host): vswitch_memory = [] + config = {} + vswitch_size = 0 host_memory = self.dbapi.imemory_get_by_ihost(host.id) for memory in host_memory: vswitch_size = memory.vswitch_hugepages_size_mib - vswitch_pages = memory.vswitch_hugepages_nr + vswitch_pages = memory.vswitch_hugepages_reqd \ + if memory.vswitch_hugepages_reqd is not None \ + else memory.vswitch_hugepages_nr + + if vswitch_pages == 0: + vswitch_pages = memory.vswitch_hugepages_nr + vswitch_memory.append(str(vswitch_size * vswitch_pages)) dpdk_socket_mem = self.quoted_str(','.join(vswitch_memory)) - return { + config.update({ 'vswitch::dpdk::socket_mem': dpdk_socket_mem - } + }) + + if vswitch_size == constants.MIB_2M: + config.update({ + 'platform::vswitch::params::hugepage_dir': '/mnt/huge-2048kB' + }) + + return config def _get_virtual_config(self, host): config = {} diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/platform.py b/sysinv/sysinv/sysinv/sysinv/puppet/platform.py index 933bfd19ef..79392e94ff 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/platform.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/platform.py @@ -674,6 +674,8 @@ class PlatformPuppet(base.BasePuppet): vm_2M_pages = [] vm_1G_pages = [] + vs_pages_updated = False + for node, memory_list in memory_numa_list.items(): memory = memory_list[0] @@ -686,7 +688,13 @@ class PlatformPuppet(base.BasePuppet): platform_nodes.append(platform_node) vswitch_size = memory.vswitch_hugepages_size_mib - vswitch_pages = memory.vswitch_hugepages_nr + vswitch_pages = memory.vswitch_hugepages_reqd \ + if memory.vswitch_hugepages_reqd is not None \ + else memory.vswitch_hugepages_nr + + if vswitch_pages == 0: + vswitch_pages = memory.vswitch_hugepages_nr + vswitch_node = "\"node%d:%dkB:%d\"" % ( node, vswitch_size * 1024, vswitch_pages) vswitch_nodes.append(vswitch_node) @@ -704,11 +712,11 @@ class PlatformPuppet(base.BasePuppet): total_hugepages_1G = vm_hugepages_nr_1G if memory.vswitch_hugepages_size_mib == constants.MIB_2M: - total_hugepages_2M += memory.vswitch_hugepages_nr - vswitch_2M_page += memory.vswitch_hugepages_nr + total_hugepages_2M += vswitch_pages + vswitch_2M_page += vswitch_pages elif memory.vswitch_hugepages_size_mib == constants.MIB_1G: - total_hugepages_1G += memory.vswitch_hugepages_nr - vswitch_1G_page += memory.vswitch_hugepages_nr + total_hugepages_1G += vswitch_pages + vswitch_1G_page += vswitch_pages vswitch_2M_pages.append(vswitch_2M_page) vswitch_1G_pages.append(vswitch_1G_page) @@ -724,6 +732,10 @@ class PlatformPuppet(base.BasePuppet): vm_2M_pages.append(vm_hugepages_nr_2M) vm_1G_pages.append(vm_hugepages_nr_1G) + if (memory.vswitch_hugepages_reqd and + vswitch_pages != memory.vswitch_hugepages_nr): + vs_pages_updated = True + platform_reserved_memory = "(%s)" % ' '.join(platform_nodes) vswitch_reserved_memory = "(%s)" % ' '.join(vswitch_nodes) @@ -756,6 +768,17 @@ class PlatformPuppet(base.BasePuppet): 'platform::compute::hugepage::params::vm_1G_pages': vm_1G, }) + if vs_pages_updated: + grub_hugepages_1G = "hugepagesz=1G hugepages=%d" % ( + sum(vswitch_1G_pages) + sum(vm_1G_pages)) + config.update({ + 'platform::compute::grub::params::g_hugepages': + grub_hugepages_1G, + }) + if sum(vswitch_2M_pages) > 0: + config.update({ + 'platform::vswitch::params::hugepage_dir': '/mnt/huge-2048kB' + }) return config diff --git a/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py b/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py index a9b758b2c7..3643c1b798 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py @@ -328,6 +328,8 @@ class ProfileApplyTestCase(ProfileTestCase): profile_r['imemorys'][0]['vm_hugepages_nr_2M_pending']) self.assertEqual(hostmem_r['imemorys'][0]['vm_hugepages_nr_1G_pending'], profile_r['imemorys'][0]['vm_hugepages_nr_1G_pending']) + self.assertEqual(hostmem_r['imemorys'][0]['vswitch_hugepages_reqd'], + profile_r['imemorys'][0]['vswitch_hugepages_reqd']) def test_apply_storage_success(self): self.profile["profiletype"] = constants.PROFILE_TYPE_LOCAL_STORAGE