From 47735c6ab945552d58ca8307bea5a0129b690f8b Mon Sep 17 00:00:00 2001
From: Tao Liu <tao.liu@windriver.com>
Date: Thu, 15 Aug 2019 10:24:40 -0400
Subject: [PATCH] Support Single huge page size for openstack worker node

Kubernetes only supports a single huge page size per worker
node. Prior to kubernetes 1.15, the huge page feature could
be disabled via a feature gate. In kubernetes 1.15, the
feature gate has been removed so huge page support is always
on in k8s.

This update removes the conditional disabling of the hugepage
feature and enforces the provisioning of a single page size
per worker.

When vswitch type is set to ovs-dpdk or avs, the application
huge pages size goes with the vswitch huge pages size.

This update also changes the auto-provisioning of VM huge
pages to 1G as there is no auto-provisioning in virtual
environment.

Story: 2006295
Task: 36006

Change-Id: I84d4959b420584fdcdf8a8664a6f4855c08ec989
Signed-off-by: Tao Liu <tao.liu@windriver.com>
---
 .../modules/platform/manifests/kubernetes.pp   |  6 ++----
 .../platform/templates/kubeadm.yaml.erb        |  2 --
 .../platform/templates/kubelet.conf.erb        |  2 +-
 .../sysinv/sysinv/api/controllers/v1/host.py   | 18 ++++++++----------
 .../sysinv/sysinv/api/controllers/v1/memory.py | 13 +++++++++++++
 5 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp
index 950f25113f..e57c38980d 100644
--- a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp
+++ b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp
@@ -115,11 +115,10 @@ class platform::kubernetes::kubeadm {
     $k8s_registry = 'k8s.gcr.io'
   }
 
-  # Configure kubelet hugepage and cpumanager options
+  # Configure kubelet cpumanager options
   if str2bool($::is_worker_subfunction)
     and !('openstack-compute-node'
           in $host_labels) {
-    $k8s_hugepage = true
     $k8s_cpu_manager_opts = join([
       '--cpu-manager-policy=static',
       '--system-reserved-cgroup=/system.slice',
@@ -129,12 +128,11 @@ class platform::kubernetes::kubeadm {
         "memory=${k8s_reserved_mem}Mi"])
       ], ' ')
   } else {
-    $k8s_hugepage = false
     $k8s_cpu_manager_opts = '--cpu-manager-policy=none'
   }
 
   # Enable kubelet extra parameters that are node specific such as
-  # hugepages and cpumanager
+  # cpumanager
   file { '/etc/sysconfig/kubelet':
     ensure  => file,
     content => template('platform/kubelet.conf.erb'),
diff --git a/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb b/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb
index a965f1342c..42c7ddc585 100644
--- a/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb
+++ b/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb
@@ -41,8 +41,6 @@ apiVersion: kubelet.config.k8s.io/v1beta1
 configMapAndSecretChangeDetectionStrategy: Cache
 nodeStatusUpdateFrequency: "4s"
 failSwapOn: false
-featureGates:
-  HugePages: false
 cgroupRoot: "/k8s-infra"
 ---
 kind: KubeProxyConfiguration
diff --git a/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb b/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb
index 898254bcc7..9eabca34a6 100644
--- a/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb
+++ b/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb
@@ -1,2 +1,2 @@
 # Overrides config file for kubelet
-KUBELET_EXTRA_ARGS=--node-ip=<%= @node_ip %> --feature-gates=HugePages=<%= @k8s_hugepage %> <%= @k8s_cpu_manager_opts %>
+KUBELET_EXTRA_ARGS=--node-ip=<%= @node_ip %> <%= @k8s_cpu_manager_opts %>
diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
index 1a0502c292..5ac5a7a7b2 100644
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
@@ -3486,9 +3486,9 @@ class HostController(rest.RestController):
                 pending_2M_memory, pending_1G_memory)
 
     @staticmethod
-    def _check_memory_for_non_openstack(ihost):
+    def _check_memory_for_single_size(ihost):
         """
-        Perform memory semantic checks on a non openstack worker.
+        Perform memory semantic checks on a worker node.
         It restricts the huge page allocation to either a 2M or 1G
         pool.
         """
@@ -3606,16 +3606,16 @@ class HostController(rest.RestController):
                                                  constants.MIB_2M)
                         value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
 
-                    # calculate 90% 2M pages if the huge pages have not been
+                    # calculate 90% 1G pages if the huge pages have not been
                     # allocated and the compute label is set
                     if cutils.has_openstack_compute(labels) and \
                             vm_hugepages_nr_2M == 0 and \
                             vm_hugepages_nr_1G == 0 and \
                             vm_mem_mib > 0 and \
                             cutils.is_default_huge_pages_required(ihost):
-                        vm_hugepages_nr_2M = int((hp_possible_mib * 0.9 - vs_mem_mib) /
-                                                 constants.MIB_2M)
-                        value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
+                        vm_hugepages_nr_1G = int((hp_possible_mib * 0.9 - vs_mem_mib) /
+                                                 constants.MIB_1G)
+                        value.update({'vm_hugepages_nr_1G': vm_hugepages_nr_1G})
 
                     vm_hugepages_4K = vm_mem_mib
                     vm_hugepages_4K -= \
@@ -5221,10 +5221,8 @@ class HostController(rest.RestController):
         # Check if cpu assignments are valid
         self._semantic_check_worker_cpu_assignments(ihost)
 
-        # for non-openstack worker node, only allow allocating huge pages
-        # for a single size
-        if not utils.is_openstack_compute(ihost):
-            self._check_memory_for_non_openstack(ihost)
+        # only allow allocating huge pages for a single size
+        self._check_memory_for_single_size(ihost)
 
         # check if the platform reserved memory is valid
         ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])
diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py
index 044e27a84d..eda1d136dd 100644
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py
@@ -741,6 +741,19 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
         vs_hp_size_mib = constants.MIB_2M
     vs_hp_reqd_mib = new_vs_pages * vs_hp_size_mib
 
+    if new_2M_pages != 0 or new_1G_pages != 0:
+        if utils.get_vswitch_type() != constants.VSWITCH_TYPE_NONE:
+            if vs_hp_size_mib == constants.MIB_1G:
+                if new_2M_pages != 0:
+                    raise wsme.exc.ClientSideError(_(
+                        "Only 1G huge page allocation is supported"))
+            elif new_1G_pages != 0:
+                raise wsme.exc.ClientSideError(_(
+                    "Only 2M huge page allocation is supported"))
+        elif new_2M_pages != 0 and new_1G_pages != 0:
+            raise wsme.exc.ClientSideError(_(
+                "Host only supports single huge page size."))
+
     # The size of possible hugepages is the node mem total - platform reserved
     base_mem_mib = rpc_port['platform_reserved_mib']
     if platform_reserved_mib: