Enable configurable vswitch memory

Currently, a DPDK enabled vswitch makes use of a fixed 1G hugepage to enable an optimized datapath. In the case of OVS-DPDK, this can cause an issue when changing the MTU of one or more interfaces, as a separate mempool is allocated for each size. If the minimal mempool size(s) cannot fit into the 1G page, DPDK memory initialization will fail. This commit allows an operator to configure the amount of hugepage memory allocated to each socket on a host, which can enable jumboframe support for OVS-DPDK. The system memory command has been modified to accept vswitch hugepage configuration via the function flag. ie: system host-memory-modify -f vswitch -1G 4 <worker_name> <node> Story: 2004472 Task: 28162 Change-Id: Ie58d1b8317b4eb3c498524db6bd53ffff3bf1a39 Signed-off-by: Steven Webster <steven.webster@windriver.com>
2018-12-17 15:41:54 -05:00 · 2018-12-17 15:41:54 -05:00 · 74baed87de
parent 1b8a344edd
commit 74baed87de
15 changed files with 377 additions and 71 deletions
--- a/puppet-manifests/src/modules/platform/manifests/compute.pp
+++ b/puppet-manifests/src/modules/platform/manifests/compute.pp
@ -21,6 +21,7 @@ class platform::compute::grub::params (
  $n_cpus = '',
  $cpu_options = '',
  $m_hugepages = 'hugepagesz=2M hugepages=0',
+  $g_hugepages = undef,
  $default_pgsz = 'default_hugepagesz=2M',
  $keys = [
    'kvm-intel.eptad',
@ -42,7 +43,11 @@ class platform::compute::grub::params (
  }

  if $::is_gb_page_supported {
-    $gb_hugepages = "hugepagesz=1G hugepages=${::number_of_numa_nodes}"
+    if $g_hugepages != undef {
+      $gb_hugepages = $g_hugepages
+    } else {
+      $gb_hugepages = "hugepagesz=1G hugepages=${::number_of_numa_nodes}"
+    }
  } else {
    $gb_hugepages = ''
  }
--- a/puppet-manifests/src/modules/platform/manifests/vswitch.pp
+++ b/puppet-manifests/src/modules/platform/manifests/vswitch.pp
@ -95,6 +95,20 @@ class platform::vswitch::ovs(
  } elsif $::platform::params::vswitch_type == 'ovs-dpdk' {
    include ::vswitch::dpdk

+    # Since OVS socket memory is configurable, it is required to start the
+    # ovsdb server and disable DPDK initialization before the openvswitch
+    # service runs to prevent any previously stored OVSDB configuration from
+    # being used before the new Vs_config gets applied.
+    service { 'ovsdb-server':
+      ensure => 'running',
+      before => Service['openvswitch'],
+    }
+    exec { 'disable dpdk initialization':
+      command  => template('platform/ovs.disable-dpdk-init.erb'),
+      provider => shell,
+      require  => Service['ovsdb-server']
+    }
+
    Exec['vfio-iommu-mode']
    -> Platform::Vswitch::Ovs::Device<||>
    -> Platform::Vswitch::Ovs::Bridge<||>
--- a/puppet-manifests/src/modules/platform/templates/ovs.disable-dpdk-init.erb
+++ b/puppet-manifests/src/modules/platform/templates/ovs.disable-dpdk-init.erb
@ -0,0 +1,5 @@
+# Disable DPDK initialization in ovsdb
+# ovs-vsctl is not used here as it can fail after the initial start of ovsdb
+# (even though the dpdk-init parameter actually gets applied).
+ovsdb-client -v transact '["Open_vSwitch", {"op" : "mutate", "table": "Open_vSwitch", "where": [], "mutations" : [["other_config","delete", ["map",[["dpdk-init", "true"]]]]]}]'
+ovsdb-client -v transact '["Open_vSwitch", {"op" : "mutate", "table": "Open_vSwitch", "where": [], "mutations" : [["other_config","insert", ["map",[["dpdk-init", "false"]]]]]}]'
--- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py
+++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iHost_shell.py
@ -609,10 +609,14 @@ def do_host_apply_memprofile(cc, args):
    # Echo list of new host memory
    imemory = cc.imemory.list(host.uuid)
    field_labels = ['uuid', 'vm_hugepages_1G', 'vm_hugepages_2M',
-                    'vm_hugepages_2M_pending', 'vm_hugepages_1G_pending']
+                    'vm_hugepages_2M_pending', 'vm_hugepages_1G_pending',
+                    'vswitch_hugepages_nr', 'vswitch_hugepages_size_reqd',
+                    'vswitch_hugepages_size_mib']

    fields = ['uuid', 'vm_hugepages_nr_1G', 'vm_hugepages_nr_2M',
-              'vm_hugepages_nr_2M_pending', 'vm_hugepages_nr_1G_pending']
+              'vm_hugepages_nr_2M_pending', 'vm_hugepages_nr_1G_pending',
+              'vswitch_hugepages_nr', 'vswitch_hugepages_reqd',
+              'vswitch_hugepages_size_mib']
    utils.print_list(imemory, fields, field_labels, sortby=0)


--- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py
+++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/imemory_shell.py
@ -23,6 +23,7 @@ def _print_imemory_show(imemory):
              'vswitch_hugepages_size_mib',
              'vswitch_hugepages_nr',
              'vswitch_hugepages_avail',
+              'vswitch_hugepages_reqd',
              'vm_hugepages_nr_4K',
              'vm_hugepages_nr_2M',
              'vm_hugepages_nr_2M_pending',
@ -39,6 +40,7 @@ def _print_imemory_show(imemory):
              'vSwitch Huge Pages: Size (MiB)',
              '                    Total',
              '                    Available',
+              '                    Required',
              'Application  Pages (4K): Total',
              'Application  Huge Pages (2M): Total',
              '                Total Pending',
@ -112,6 +114,7 @@ def do_host_memory_list(cc, args):
              'vswitch_hugepages_size_mib',
              'vswitch_hugepages_nr',
              'vswitch_hugepages_avail',
+              'vswitch_hugepages_reqd',
              'vm_hugepages_nr_4K',
              'vm_hugepages_nr_2M',
              'vm_hugepages_avail_2M',
@ -129,6 +132,7 @@ def do_host_memory_list(cc, args):
                    'vs_hp_size(MiB)',
                    'vs_hp_total',
                    'vs_hp_avail',
+                    'vs_hp_reqd',
                    'vm_total_4K',
                    'vm_hp_total_2M',
                    'vm_hp_avail_2M',
@ -150,18 +154,24 @@ def do_host_memory_list(cc, args):
@utils.arg('-m', '--platform_reserved_mib',
           metavar='<Platform Reserved MiB>',
           help='The amount of platform memory (MiB) for the numa node')
-@utils.arg('-2M', '--vm_hugepages_nr_2M_pending',
+@utils.arg('-2M', '--hugepages_nr_2M_pending',
           metavar='<2M hugepages number>',
           help='The number of 2M vm huge pages for the numa node')
-@utils.arg('-1G', '--vm_hugepages_nr_1G_pending',
+@utils.arg('-1G', '--hugepages_nr_1G_pending',
           metavar='<1G hugepages number>',
           help='The number of 1G vm huge pages for the numa node')
+@utils.arg('-f', '--function',
+           metavar='<function>',
+           choices=['vswitch', 'vm'],
+           default='vm',
+           help='The Memory Function.')
 def do_host_memory_modify(cc, args):
    """Modify platform reserved and/or application huge page memory attributes for worker nodes."""

    rwfields = ['platform_reserved_mib',
-                'vm_hugepages_nr_2M_pending',
-                'vm_hugepages_nr_1G_pending']
+                'hugepages_nr_2M_pending',
+                'hugepages_nr_1G_pending',
+                'function']

    ihost = ihost_utils._find_ihost(cc, args.hostnameorid)

@ -185,10 +195,31 @@ def do_host_memory_modify(cc, args):
        raise exc.CommandError('Processor not found: host %s processor %s' %
                               (ihost.hostname, args.numa_node))

+    function = user_specified_fields.get('function')
+    vswitch_hp_size_mib = None
+
    patch = []
    for (k, v) in user_specified_fields.items():
+        if k == 'function':
+            continue
+        if function == 'vswitch':
+            if k == 'hugepages_nr_2M_pending':
+                vswitch_hp_size_mib = 2
+                k = 'vswitch_hugepages_reqd'
+            elif k == 'hugepages_nr_1G_pending':
+                vswitch_hp_size_mib = 1024
+                k = 'vswitch_hugepages_reqd'
+        else:
+            if k == 'hugepages_nr_2M_pending':
+                k = 'vm_hugepages_nr_2M_pending'
+            elif k == 'hugepages_nr_1G_pending':
+                k = 'vm_hugepages_nr_1G_pending'
+
        patch.append({'op': 'replace', 'path': '/' + k, 'value': v})

    if patch:
+        if vswitch_hp_size_mib:
+            patch.append({'op': 'replace', 'path': '/' + 'vswitch_hugepages_size_mib', 'value': vswitch_hp_size_mib})
+
        imemory = cc.imemory.update(mem.uuid, patch)
        _print_imemory_show(imemory)
--- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py
+++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/iprofile_shell.py
@ -552,12 +552,32 @@ def get_memoryconfig_1G(iprofile):
    return str


+def get_memoryconfig_vswitch_nr(iprofile):
+    str = ''
+    for memory in iprofile.memory:
+        if str != '':
+            str = str + "; "
+        str = str + "%s" % (memory.vswitch_hugepages_reqd)
+    return str
+
+
+def get_memoryconfig_vswitch_size(iprofile):
+    str = ''
+    for memory in iprofile.memory:
+        if str != '':
+            str = str + "; "
+        str = str + "%s" % (memory.vswitch_hugepages_size_mib)
+    return str
+
+
 def get_memprofile_data(cc, iprofile):
    iprofile.memory = cc.iprofile.list_imemorys(iprofile.uuid)
    iprofile.nodes = cc.iprofile.list_inodes(iprofile.uuid)
    iprofile.platform_reserved_mib = get_memoryconfig_platform(iprofile)
    iprofile.vm_hugepages_2M = get_memoryconfig_2M(iprofile)
    iprofile.vm_hugepages_1G = get_memoryconfig_1G(iprofile)
+    iprofile.vswitch_hugepages_nr = get_memoryconfig_vswitch_nr(iprofile)
+    iprofile.vswitch_hugepages_size_mib = get_memoryconfig_vswitch_size(iprofile)


 def do_memprofile_list(cc, args):
@ -567,19 +587,25 @@ def do_memprofile_list(cc, args):
        profile.platform_reserved_mib = get_memoryconfig_platform(profile)
        profile.vm_hugepages_2M = get_memoryconfig_2M(profile)
        profile.vm_hugepages_1G = get_memoryconfig_1G(profile)
+        profile.vswitch_hugepages_nr = get_memoryconfig_vswitch_nr(profile)
+        profile.vswitch_hugepages_size_mib = get_memoryconfig_vswitch_size(profile)

    field_labels = ['uuid', 'name', 'platform_reserved_mib',
-                    'vm_hugepages_2M', 'vm_hugepages_1G']
+                    'vm_hugepages_2M', 'vm_hugepages_1G',
+                    'vswitch_hugepages_nr', 'vswitch_hugepages_size_mib']
    fields = ['uuid', 'profilename', 'platform_reserved_mib',
-              'vm_hugepages_2M', 'vm_hugepages_1G']
+              'vm_hugepages_2M', 'vm_hugepages_1G',
+              'vswitch_hugepages_nr', 'vswitch_hugepages_size_mib']
    utils.print_list(profiles, fields, field_labels, sortby=0)


 def _print_memprofile_show(memoryprofile):
    fields = ['profilename', 'platform_reserved_mib', 'vm_hugepages_2M',
-              'vm_hugepages_1G', 'uuid', 'created_at', 'updated_at']
+              'vm_hugepages_1G', 'vswitch_hugepages_nr',
+              'vswitch_hugepages_size_mib', 'uuid', 'created_at', 'updated_at']
    labels = ['name', 'platform_reserved_mib', 'vm_hugepages_2M',
-              'vm_hugepages_1G', 'uuid', 'created_at', 'updated_at']
+              'vm_hugepages_1G', 'vswitch_hugepages_nr',
+              'vswitch_hugepages_size_mib', 'uuid', 'created_at', 'updated_at']

    data = [(f, getattr(memoryprofile, f, '')) for f in fields]
    utils.print_tuple_list(data, labels)
--- a/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd
+++ b/sysinv/sysinv/sysinv/etc/sysinv/profileSchema.xsd
@ -54,6 +54,8 @@
      <xs:element name="platformReservedMiB" type="memoryAllocation" minOccurs="0" maxOccurs="1"/>
      <xs:element name="vmHugePages2M" type="memoryAllocation" minOccurs="0" maxOccurs="1"/>
      <xs:element name="vmHugePages1G" type="memoryAllocation" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="vsHugePagesNr" type="memoryAllocation" minOccurs="0" maxOccurs="1"/>
+      <xs:element name="vsHugePagesSz" type="memoryAllocation" minOccurs="0" maxOccurs="1"/>
 	</xs:sequence>
    <xs:attribute type="xs:string" name="name" use="required" />
  </xs:complexType>
--- a/sysinv/sysinv/sysinv/sysinv/agent/node.py
+++ b/sysinv/sysinv/sysinv/sysinv/agent/node.py
@ -292,6 +292,36 @@ class NodeOperator(object):
        return [name for name in listdir(dir)
                if os.path.isdir(join(dir, name))]

+    def _get_vswitch_reserved_memory(self, node):
+        # Read vswitch memory from worker_reserved.conf
+
+        vswitch_hugepages_nr = 0
+        vswitch_hugepages_size = 0
+        try:
+            with open('/etc/platform/worker_reserved.conf', 'r') as infile:
+                for line in infile:
+                    if line.startswith("COMPUTE_VSWITCH_MEMORY="):
+                        val = line.split("=")
+                        vswitch_reserves = val[1].strip('\n')[1:-1]
+                        for idx, reserve in enumerate(vswitch_reserves.split()):
+                            if idx != node:
+                                continue
+                            reserve = reserve.split(":")
+                            if reserve[0].strip('"') == "node%d" % node:
+                                pages_nr = re.sub('[^0-9]', '', reserve[2])
+                                pages_size = reserve[1]
+
+                                vswitch_hugepages_nr = int(pages_nr)
+                                if pages_size == "1048576kB":
+                                    vswitch_hugepages_size = SIZE_1G_MB
+                                else:
+                                    vswitch_hugepages_size = SIZE_2M_MB
+                        break
+        except Exception as e:
+            LOG.debug("Could not read vswitch reserved memory: %s", e)
+
+        return vswitch_hugepages_nr, vswitch_hugepages_size
+
    def _inode_get_memory_hugepages(self):
        """Collect hugepage info, including vswitch, and vm.
           Collect platform reserved if config.
@ -354,36 +384,50 @@ class NodeOperator(object):
                    total_hp_mb = total_hp_mb + int(nr_hugepages * size)
                    free_hp_mb = free_hp_mb + int(free_hugepages * size)

+                    vs_hp_nr, vs_hp_size = self._get_vswitch_reserved_memory(
+                        node)
+                    if vs_hp_nr == 0 or vs_hp_size == 0:
+                        vs_hp_nr = VSWITCH_MEMORY_MB // size
+                        vs_hp_size = size
+
                    # Libvirt hugepages can be 1G and 2M
                    if size == SIZE_1G_MB:
-                        vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
-                        hp_attr = {
-                            'vswitch_hugepages_size_mib': size,
-                            'vswitch_hugepages_nr': vswitch_hugepages_nr,
-                            'vswitch_hugepages_avail': 0,
-                            'vm_hugepages_nr_1G':
-                                (nr_hugepages - vswitch_hugepages_nr),
+                        hp_attr = {}
+                        if vs_hp_size == size:
+                            nr_hugepages -= vs_hp_nr
+                            hp_attr.update({
+                                'vswitch_hugepages_size_mib': vs_hp_size,
+                                'vswitch_hugepages_nr': vs_hp_nr,
+                                'vswitch_hugepages_avail': 0
+                            })
+                        hp_attr.update({
+                            'vm_hugepages_nr_1G': nr_hugepages,
                            'vm_hugepages_avail_1G': free_hugepages,
                            'vm_hugepages_use_1G': 'True'
-                        }
+                        })
                    else:
                        if len(subdirs) == 1:
                            # No 1G hugepage support.
-                            vswitch_hugepages_nr = VSWITCH_MEMORY_MB / size
                            hp_attr = {
-                                'vswitch_hugepages_size_mib': size,
-                                'vswitch_hugepages_nr': vswitch_hugepages_nr,
-                                'vswitch_hugepages_avail': 0,
+                                'vm_hugepages_use_1G': 'False',
+                                'vswitch_hugepages_size_mib': vs_hp_size,
+                                'vswitch_hugepages_nr': vs_hp_nr,
+                                'vswitch_hugepages_avail': 0
                            }
-                            hp_attr.update({'vm_hugepages_use_1G': 'False'})
                        else:
-                            # vswitch will use 1G hugpages
-                            vswitch_hugepages_nr = 0
+                            hp_attr = {}
+                            if vs_hp_size == size and initial_report is False:
+                                # User manually set 2M pages
+                                nr_hugepages -= vs_hp_nr
+                                hp_attr.update({
+                                    'vswitch_hugepages_size_mib': vs_hp_size,
+                                    'vswitch_hugepages_nr': vs_hp_nr,
+                                    'vswitch_hugepages_avail': 0
+                                })

                        hp_attr.update({
                            'vm_hugepages_avail_2M': free_hugepages,
-                            'vm_hugepages_nr_2M':
-                                (nr_hugepages - vswitch_hugepages_nr)
+                            'vm_hugepages_nr_2M': nr_hugepages
                        })

                    attr.update(hp_attr)
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
@ -3506,7 +3506,10 @@ class HostController(rest.RestController):
            memtotal = m.node_memtotal_mib
            allocated = m.platform_reserved_mib
            if m.hugepages_configured:
-                allocated += m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib
+                if m.vswitch_hugepages_reqd is not None:
+                    allocated += m.vswitch_hugepages_reqd * m.vswitch_hugepages_size_mib
+                else:
+                    allocated += m.vswitch_hugepages_nr * m.vswitch_hugepages_size_mib
            if m.vm_hugepages_nr_2M_pending is not None:
                allocated += constants.MIB_2M * m.vm_hugepages_nr_2M_pending
                pending_2M_memory = True
@ -5062,6 +5065,8 @@ class HostController(rest.RestController):
        mib_reserved_disk_io = 0
        align_2M_memory = False
        align_1G_memory = False
+        vswitch_hp_size = None
+
        for node in ihost_inodes:
            # If the reserved memory has changed (eg, due to patch that
            # changes common/constants.py), then push updated reserved memory
@ -5071,6 +5076,15 @@ class HostController(rest.RestController):
            # of the original setting.
            self._auto_adjust_memory_for_node(ihost, node)

+            mems = pecan.request.dbapi.imemory_get_by_inode(node['id'])
+            for m in mems:
+                if not vswitch_hp_size:
+                    vswitch_hp_size = m.vswitch_hugepages_size_mib
+                else:
+                    if m.vswitch_hugepages_size_mib != vswitch_hp_size:
+                        raise wsme.exc.ClientSideError(_(
+                            "Mismatched vswitch socket memory hugepage size."))
+
            # check whether the pending hugepages changes and the current
            # platform reserved memory fit within the total memory available
            mib_reserved_node, pending_2M_memory, pending_1G_memory = \
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py
@ -34,6 +34,7 @@ from sysinv.api.controllers.v1 import collection
 from sysinv.api.controllers.v1 import link
 from sysinv.api.controllers.v1 import types
 from sysinv.api.controllers.v1 import utils
+from sysinv.common import constants
 from sysinv.common import exception
 from sysinv.common import utils as cutils
 from sysinv import objects
@ -390,6 +391,9 @@ class MemoryController(rest.RestController):

        vm_hugepages_nr_2M_pending = None
        vm_hugepages_nr_1G_pending = None
+        vswitch_hugepages_reqd = None
+        vswitch_hugepages_size_mib = None
+
        platform_reserved_mib = None
        for p in patch:
            if p['path'] == '/platform_reserved_mib':
@ -400,6 +404,12 @@ class MemoryController(rest.RestController):
            if p['path'] == '/vm_hugepages_nr_1G_pending':
                vm_hugepages_nr_1G_pending = p['value']

+            if p['path'] == '/vswitch_hugepages_reqd':
+                vswitch_hugepages_reqd = p['value']
+
+            if p['path'] == '/vswitch_hugepages_size_mib':
+                vswitch_hugepages_size_mib = p['value']
+
        # The host must be locked
        if host_id:
            _check_host(host_id)
@ -410,7 +420,8 @@ class MemoryController(rest.RestController):
        try:
            # Semantics checks and update hugepage memory accounting
            patch = _check_huge_values(rpc_port, patch,
-                    vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending)
+                    vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending,
+                    vswitch_hugepages_reqd, vswitch_hugepages_size_mib)
        except wsme.exc.ClientSideError as e:
            inode = pecan.request.dbapi.inode_get(inode_id=rpc_port.forinodeid)
            numa_node = inode.numa_node
@ -419,7 +430,8 @@ class MemoryController(rest.RestController):

        # Semantics checks for platform memory
        _check_memory(rpc_port, host_id, platform_reserved_mib,
-                      vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending)
+                      vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending,
+                      vswitch_hugepages_reqd, vswitch_hugepages_size_mib)

        # only allow patching allocated_function and capabilities
        # replace ihost_uuid and inode_uuid with corresponding
@ -472,6 +484,9 @@ class MemoryController(rest.RestController):

 def _update(mem_uuid, mem_values):

+    vswitch_hugepages_reqd = None
+    vswitch_hugepages_size_mib = None
+
    rpc_port = objects.memory.get_by_uuid(pecan.request.context, mem_uuid)
    if 'forihostid' in rpc_port:
        ihostId = rpc_port['forihostid']
@ -483,29 +498,37 @@ def _update(mem_uuid, mem_values):
    if 'platform_reserved_mib' in mem_values:
        platform_reserved_mib = mem_values['platform_reserved_mib']

+    if 'vswitch_hugepages_size_mib' in mem_values:
+        vswitch_hugepages_size_mib = mem_values['vswitch_hugepages_size_mib']
+
+    if 'vswitch_hugepages_reqd' in mem_values:
+        vswitch_hugepages_reqd = mem_values['vswitch_hugepages_reqd']
+
    if 'vm_hugepages_nr_2M_pending' in mem_values:
        vm_hugepages_nr_2M_pending = mem_values['vm_hugepages_nr_2M_pending']

    if 'vm_hugepages_nr_1G_pending' in mem_values:
        vm_hugepages_nr_1G_pending = mem_values['vm_hugepages_nr_1G_pending']

-        # The host must be locked
-        if host_id:
-            _check_host(host_id)
-        else:
-            raise wsme.exc.ClientSideError((
-                "Hostname or uuid must be defined"))
+    # The host must be locked
+    if host_id:
+        _check_host(host_id)
+    else:
+        raise wsme.exc.ClientSideError((
+            "Hostname or uuid must be defined"))

-        # Semantics checks and update hugepage memory accounting
-        mem_values = _check_huge_values(rpc_port, mem_values,
-                                         vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending)
+    # Semantics checks and update hugepage memory accounting
+    mem_values = _check_huge_values(rpc_port, mem_values,
+                                    vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending,
+                                    vswitch_hugepages_reqd, vswitch_hugepages_size_mib)

-        # Semantics checks for platform memory
-        _check_memory(rpc_port, host_id, platform_reserved_mib,
-                      vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending)
+    # Semantics checks for platform memory
+    _check_memory(rpc_port, host_id, platform_reserved_mib,
+                  vm_hugepages_nr_2M_pending, vm_hugepages_nr_1G_pending,
+                  vswitch_hugepages_reqd)

-        # update memory values
-        pecan.request.dbapi.imemory_update(mem_uuid, mem_values)
+    # update memory values
+    pecan.request.dbapi.imemory_update(mem_uuid, mem_values)


 def _check_host(ihost):
@ -523,7 +546,8 @@ def _check_host(ihost):


 def _check_memory(rpc_port, ihost, platform_reserved_mib=None,
-                  vm_hugepages_nr_2M_pending=None, vm_hugepages_nr_1G_pending=None):
+                  vm_hugepages_nr_2M_pending=None, vm_hugepages_nr_1G_pending=None,
+                  vswitch_hugepages_reqd=None, vswitch_hugepages_size_mib=None):
    if platform_reserved_mib:
        # Check for invalid characters
        try:
@ -577,7 +601,11 @@ def _check_memory(rpc_port, ihost, platform_reserved_mib=None,
        LOG.debug("vm total=%s" % (mem_alloc))

        vs_hp_size = rpc_port['vswitch_hugepages_size_mib']
-        vs_hp_nr = rpc_port['vswitch_hugepages_nr']
+        if vswitch_hugepages_reqd:
+            vs_hp_nr = int(vswitch_hugepages_reqd)
+        elif rpc_port['vswitch_hugepages_nr']:
+            vs_hp_nr = int(rpc_port['vswitch_hugepages_nr'])
+
        mem_alloc += vs_hp_size * vs_hp_nr
        LOG.debug("vs_hp_nr=%s vs_hp_size=%s" % (vs_hp_nr, vs_hp_size))
        LOG.debug("memTotal %s mem_alloc %s" % (node_memtotal_mib, mem_alloc))
@ -605,13 +633,93 @@ def _check_memory(rpc_port, ihost, platform_reserved_mib=None,
            LOG.info(msg)


-def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
-                       vm_hugepages_nr_1G=None):
+def _check_vswitch_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
+                               vm_hugepages_nr_1G=None,
+                               vswitch_hugepages_reqd=None,
+                               vswitch_hugepages_size_mib=None):

-    if rpc_port['vm_hugepages_use_1G'] == 'False' and vm_hugepages_nr_1G:
-        # cannot provision 1G huge pages if the processor does not support them
+    if vswitch_hugepages_reqd and not vswitch_hugepages_size_mib:
        raise wsme.exc.ClientSideError(_(
-              "Processor does not support 1G huge pages."))
+              "No vswitch hugepage size specified."))
+
+    if vswitch_hugepages_reqd:
+        try:
+            val = int(vswitch_hugepages_reqd)
+        except ValueError:
+            raise wsme.exc.ClientSideError(_(
+                  "Vswitch huge pages must be a number"))
+        if val <= 0:
+            raise wsme.exc.ClientSideError(_(
+                  "Vswitch huge pages must be greater than zero"))
+
+    if vswitch_hugepages_size_mib:
+        try:
+            val = int(vswitch_hugepages_size_mib)
+        except ValueError:
+            raise wsme.exc.ClientSideError(_(
+                  "Vswitch huge pages must be a number"))
+        if val <= 0:
+            raise wsme.exc.ClientSideError(_(
+                  "Vswitch huge pages size (Mib) must be greater than zero"))
+        if ((val & (val - 1)) != 0):
+            raise wsme.exc.ClientSideError(_(
+                  "Vswitch hugepage size (Mib) must be a power of 2"))
+
+    # None == unchanged
+    if vswitch_hugepages_reqd is not None:
+        new_vs_pages = int(vswitch_hugepages_reqd)
+    elif rpc_port['vswitch_hugepages_nr']:
+        new_vs_pages = rpc_port['vswitch_hugepages_nr']
+    else:
+        new_vs_pages = 0
+
+    # None == unchanged
+    if vswitch_hugepages_size_mib is not None:
+        vs_hp_size_mib = int(vswitch_hugepages_size_mib)
+    elif rpc_port['vswitch_hugepages_size_mib']:
+        vs_hp_size_mib = rpc_port['vswitch_hugepages_size_mib']
+    else:
+        # default
+        vs_hp_size_mib = constants.MIB_2M
+
+    vs_hp_reqd_mib = new_vs_pages * vs_hp_size_mib
+
+    # Throttle the maximum amount of memory that vswitch can take to
+    # 90% of usable memory to account for fluctuations in the reported
+    # node mem total.
+    vs_hp_avail_mib = 0.9 * (rpc_port['node_memtotal_mib'] -
+        rpc_port['platform_reserved_mib'] -
+        vm_hugepages_nr_2M*constants.MIB_2M -
+        vm_hugepages_nr_1G*constants.MIB_1G)
+
+    if vs_hp_avail_mib < vs_hp_reqd_mib:
+        if vs_hp_size_mib == constants.MIB_2M:
+            vs_possible_2M = int(vs_hp_avail_mib / constants.MIB_2M)
+            msg = _("No available space for 2M vswitch huge page allocation, "
+                    "max 2M vswitch pages: %d") % vs_possible_2M
+        elif vs_hp_size_mib == constants.MIB_1G:
+            vs_possible_1G = int(vs_hp_avail_mib / constants.MIB_1G)
+            msg = _("No available space for 1G vswitch huge page allocation, "
+                    "max 1G vswitch pages: %d") % vs_possible_1G
+        else:
+            msg = _("No available space for vswitch huge page allocation, "
+                    "max memory (MB): %d") % vs_hp_avail_mib
+        raise wsme.exc.ClientSideError(msg)
+
+    return patch
+
+
+def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
+                       vm_hugepages_nr_1G=None, vswitch_hugepages_reqd=None,
+                       vswitch_hugepages_size_mib=None):
+
+    if rpc_port['vm_hugepages_use_1G'] == 'False':
+        vs_hp_size = vswitch_hugepages_size_mib
+        if (vm_hugepages_nr_1G or vs_hp_size == constants.MIB_1G):
+            # cannot provision 1G huge pages if the processor does not support
+            # them
+            raise wsme.exc.ClientSideError(_(
+                  "Processor does not support 1G huge pages."))

    # Check for invalid characters
    if vm_hugepages_nr_2M:
@ -675,17 +783,17 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
    if rpc_port['vm_hugepages_possible_1G']:
        vm_possible_1G = int(rpc_port['vm_hugepages_possible_1G'])

-    LOG.debug("max possible 2M pages: %s, max possible 1G pages: %s" %
+    LOG.debug("max possible 2M VM pages: %s, max possible 1G VM pages: %s" %
              (vm_possible_2M, vm_possible_1G))

    if vm_possible_2M < new_2M_pages:
-        msg = _("No available space for 2M huge page allocation, "
-                "max 2M pages: %d") % vm_possible_2M
+        msg = _("No available space for 2M VM huge page allocation, "
+                "max 2M VM pages: %d") % vm_possible_2M
        raise wsme.exc.ClientSideError(msg)

    if vm_possible_1G < new_1G_pages:
-        msg = _("No available space for 1G huge page allocation, "
-                "max 1G pages: %d") % vm_possible_1G
+        msg = _("No available space for 1G VM huge page allocation, "
+                "max 1G VM pages: %d") % vm_possible_1G
        raise wsme.exc.ClientSideError(msg)

    # always use vm_possible_2M to compare,
@ -693,18 +801,22 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
        max_1G = int((vm_possible_2M - new_2M_pages) / num_2M_for_1G)
        max_2M = vm_possible_2M - new_1G_pages * num_2M_for_1G
        if new_2M_pages > 0 and new_1G_pages > 0:
-            msg = _("No available space for new settings."
+            msg = _("No available space for new VM hugepage settings."
                    "Max 1G pages is %s when 2M is %s, or "
                    "Max 2M pages is %s when 1G is %s." % (
                        max_1G, new_2M_pages, max_2M, new_1G_pages
                    ))
        elif new_1G_pages > 0:
-            msg = _("No available space for 1G huge page allocation, "
-                    "max 1G pages: %d") % vm_possible_1G
+            msg = _("No available space for 1G VM huge page allocation, "
+                    "max 1G VM pages: %d") % vm_possible_1G
        else:
-            msg = _("No available space for 2M huge page allocation, "
-                    "max 2M pages: %d") % vm_possible_2M
+            msg = _("No available space for 2M VM huge page allocation, "
+                    "max 2M VM pages: %d") % vm_possible_2M

        raise wsme.exc.ClientSideError(msg)

+    _check_vswitch_huge_values(
+        rpc_port, patch, new_2M_pages, new_1G_pages,
+        vswitch_hugepages_reqd, vswitch_hugepages_size_mib)
+
    return patch
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/profile.py
@ -1532,6 +1532,8 @@ def _create_mem_profile(profile_name, profile_node):
        platform_reserved = get_mem_assignment(profile_node, "platformReservedMiB")
        vm_hp_2m = get_mem_assignment(profile_node, "vmHugePages2M")
        vm_hp_1g = get_mem_assignment(profile_node, "vmHugePages1G")
+        vs_hp_nr = get_mem_assignment(profile_node, "vsHugePagesNr")
+        vs_hp_sz = get_mem_assignment(profile_node, "vsHugePagesSz")
    except profile_utils.InvalidProfileData as e:
        return "Error", _('error: CPU profile %s is invalid') % profile_name, e.message

@ -1559,6 +1561,8 @@ def _create_mem_profile(profile_name, profile_node):
            mdict['platform_reserved_mib'] = get_mem_size(platform_reserved, node_idx)
            mdict['vm_hugepages_nr_2M_pending'] = get_mem_size(vm_hp_2m, node_idx)
            mdict['vm_hugepages_nr_1G_pending'] = get_mem_size(vm_hp_1g, node_idx)
+            mdict['vswitch_hugepages_reqd'] = get_mem_size(vs_hp_nr, node_idx)
+            mdict['vswitch_hugepages_size_mib'] = get_mem_size(vs_hp_sz, node_idx)
            pecan.request.dbapi.imemory_create(iprofile_id, mdict)

            node_idx += 1
@ -2326,6 +2330,8 @@ def memoryprofile_copy_data(host, profile):
                mdict['platform_reserved_mib'] = m.platform_reserved_mib
                mdict['vm_hugepages_nr_2M_pending'] = m.vm_hugepages_nr_2M
                mdict['vm_hugepages_nr_1G_pending'] = m.vm_hugepages_nr_1G
+                mdict['vswitch_hugepages_reqd'] = m.vswitch_hugepages_nr
+                mdict['vswitch_hugepages_size_mib'] = m.vswitch_hugepages_size_mib
                newmemory = pecan.request.dbapi.imemory_create(iprofile_id, mdict)

                # if memory wasn't actualy created,
@ -3177,7 +3183,9 @@ def memoryprofile_apply_to_host(host, profile):
            if int(host_inode.numa_node) == int(profile_inode.numa_node):
                data = {'vm_hugepages_nr_2M_pending': pmem.vm_hugepages_nr_2M_pending,
                        'vm_hugepages_nr_1G_pending': pmem.vm_hugepages_nr_1G_pending,
-                        'platform_reserved_mib': pmem.platform_reserved_mib}
+                        'platform_reserved_mib': pmem.platform_reserved_mib,
+                        'vswitch_hugepages_reqd': pmem.vswitch_hugepages_reqd,
+                        'vswitch_hugepages_size_mib': pmem.vswitch_hugepages_size_mib}
                try:
                    memory_api._update(hmem.uuid, data)
                except wsme.exc.ClientSideError as cse:
--- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
+++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
@ -2825,6 +2825,7 @@ class ConductorManager(service.PeriodicService):
            if ihost.administrative == constants.ADMIN_UNLOCKED:
                mem_dict['vm_hugepages_nr_2M_pending'] = None
                mem_dict['vm_hugepages_nr_1G_pending'] = None
+                mem_dict['vswitch_hugepages_reqd'] = None

            try:
                imems = self.dbapi.imemory_get_by_ihost_inode(ihost_uuid,
--- a/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py
+++ b/sysinv/sysinv/sysinv/sysinv/puppet/ovs.py
@ -331,18 +331,33 @@ class OVSPuppet(base.BasePuppet):

    def _get_memory_config(self, host):
        vswitch_memory = []
+        config = {}
+        vswitch_size = 0

        host_memory = self.dbapi.imemory_get_by_ihost(host.id)
        for memory in host_memory:
            vswitch_size = memory.vswitch_hugepages_size_mib
-            vswitch_pages = memory.vswitch_hugepages_nr
+            vswitch_pages = memory.vswitch_hugepages_reqd \
+                if memory.vswitch_hugepages_reqd is not None \
+                else memory.vswitch_hugepages_nr
+
+            if vswitch_pages == 0:
+                vswitch_pages = memory.vswitch_hugepages_nr
+
            vswitch_memory.append(str(vswitch_size * vswitch_pages))

        dpdk_socket_mem = self.quoted_str(','.join(vswitch_memory))

-        return {
+        config.update({
            'vswitch::dpdk::socket_mem': dpdk_socket_mem
-        }
+        })
+
+        if vswitch_size == constants.MIB_2M:
+            config.update({
+                'platform::vswitch::params::hugepage_dir': '/mnt/huge-2048kB'
+            })
+
+        return config

    def _get_virtual_config(self, host):
        config = {}
--- a/sysinv/sysinv/sysinv/sysinv/puppet/platform.py
+++ b/sysinv/sysinv/sysinv/sysinv/puppet/platform.py
@ -670,6 +670,8 @@ class PlatformPuppet(base.BasePuppet):
            vm_2M_pages = []
            vm_1G_pages = []

+            vs_pages_updated = False
+
            for node, memory_list in memory_numa_list.items():

                memory = memory_list[0]
@ -682,7 +684,13 @@ class PlatformPuppet(base.BasePuppet):
                platform_nodes.append(platform_node)

                vswitch_size = memory.vswitch_hugepages_size_mib
-                vswitch_pages = memory.vswitch_hugepages_nr
+                vswitch_pages = memory.vswitch_hugepages_reqd \
+                    if memory.vswitch_hugepages_reqd is not None \
+                    else memory.vswitch_hugepages_nr
+
+                if vswitch_pages == 0:
+                    vswitch_pages = memory.vswitch_hugepages_nr
+
                vswitch_node = "\"node%d:%dkB:%d\"" % (
                        node, vswitch_size * 1024, vswitch_pages)
                vswitch_nodes.append(vswitch_node)
@ -700,11 +708,11 @@ class PlatformPuppet(base.BasePuppet):
                total_hugepages_1G = vm_hugepages_nr_1G

                if memory.vswitch_hugepages_size_mib == constants.MIB_2M:
-                    total_hugepages_2M += memory.vswitch_hugepages_nr
-                    vswitch_2M_page += memory.vswitch_hugepages_nr
+                    total_hugepages_2M += vswitch_pages
+                    vswitch_2M_page += vswitch_pages
                elif memory.vswitch_hugepages_size_mib == constants.MIB_1G:
-                    total_hugepages_1G += memory.vswitch_hugepages_nr
-                    vswitch_1G_page += memory.vswitch_hugepages_nr
+                    total_hugepages_1G += vswitch_pages
+                    vswitch_1G_page += vswitch_pages

                vswitch_2M_pages.append(vswitch_2M_page)
                vswitch_1G_pages.append(vswitch_1G_page)
@ -720,6 +728,10 @@ class PlatformPuppet(base.BasePuppet):
                vm_2M_pages.append(vm_hugepages_nr_2M)
                vm_1G_pages.append(vm_hugepages_nr_1G)

+                if (memory.vswitch_hugepages_reqd and
+                        vswitch_pages != memory.vswitch_hugepages_nr):
+                    vs_pages_updated = True
+
            platform_reserved_memory = "(%s)" % ' '.join(platform_nodes)
            vswitch_reserved_memory = "(%s)" % ' '.join(vswitch_nodes)

@ -752,6 +764,17 @@ class PlatformPuppet(base.BasePuppet):
                'platform::compute::hugepage::params::vm_1G_pages':
                    vm_1G,
            })
+            if vs_pages_updated:
+                grub_hugepages_1G = "hugepagesz=1G hugepages=%d" % (
+                    sum(vswitch_1G_pages) + sum(vm_1G_pages))
+                config.update({
+                    'platform::compute::grub::params::g_hugepages':
+                    grub_hugepages_1G,
+                })
+                if sum(vswitch_2M_pages) > 0:
+                    config.update({
+                        'platform::vswitch::params::hugepage_dir': '/mnt/huge-2048kB'
+                    })

        return config

--- a/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py
+++ b/sysinv/sysinv/sysinv/sysinv/tests/api/test_profile.py
@ -328,6 +328,8 @@ class ProfileApplyTestCase(ProfileTestCase):
                         profile_r['imemorys'][0]['vm_hugepages_nr_2M_pending'])
        self.assertEqual(hostmem_r['imemorys'][0]['vm_hugepages_nr_1G_pending'],
                         profile_r['imemorys'][0]['vm_hugepages_nr_1G_pending'])
+        self.assertEqual(hostmem_r['imemorys'][0]['vswitch_hugepages_reqd'],
+                         profile_r['imemorys'][0]['vswitch_hugepages_reqd'])

    def test_apply_storage_success(self):
        self.profile["profiletype"] = constants.PROFILE_TYPE_LOCAL_STORAGE