From 47735c6ab945552d58ca8307bea5a0129b690f8b Mon Sep 17 00:00:00 2001 From: Tao Liu Date: Thu, 15 Aug 2019 10:24:40 -0400 Subject: [PATCH] Support Single huge page size for openstack worker node Kubernetes only supports a single huge page size per worker node. Prior to kubernetes 1.15, the huge page feature could be disabled via a feature gate. In kubernetes 1.15, the feature gate has been removed so huge page support is always on in k8s. This update removes the conditional disabling of the hugepage feature and enforces the provisioning of a single page size per worker. When vswitch type is set to ovs-dpdk or avs, the application huge pages size goes with the vswitch huge pages size. This update also changes the auto-provisioning of VM huge pages to 1G as there is no auto-provisioning in virtual environment. Story: 2006295 Task: 36006 Change-Id: I84d4959b420584fdcdf8a8664a6f4855c08ec989 Signed-off-by: Tao Liu --- .../modules/platform/manifests/kubernetes.pp | 6 ++---- .../platform/templates/kubeadm.yaml.erb | 2 -- .../platform/templates/kubelet.conf.erb | 2 +- .../sysinv/sysinv/api/controllers/v1/host.py | 18 ++++++++---------- .../sysinv/sysinv/api/controllers/v1/memory.py | 13 +++++++++++++ 5 files changed, 24 insertions(+), 17 deletions(-) diff --git a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp index 950f25113f..e57c38980d 100644 --- a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp +++ b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp @@ -115,11 +115,10 @@ class platform::kubernetes::kubeadm { $k8s_registry = 'k8s.gcr.io' } - # Configure kubelet hugepage and cpumanager options + # Configure kubelet cpumanager options if str2bool($::is_worker_subfunction) and !('openstack-compute-node' in $host_labels) { - $k8s_hugepage = true $k8s_cpu_manager_opts = join([ '--cpu-manager-policy=static', '--system-reserved-cgroup=/system.slice', @@ -129,12 +128,11 @@ class platform::kubernetes::kubeadm { "memory=${k8s_reserved_mem}Mi"]) ], ' ') } else { - $k8s_hugepage = false $k8s_cpu_manager_opts = '--cpu-manager-policy=none' } # Enable kubelet extra parameters that are node specific such as - # hugepages and cpumanager + # cpumanager file { '/etc/sysconfig/kubelet': ensure => file, content => template('platform/kubelet.conf.erb'), diff --git a/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb b/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb index a965f1342c..42c7ddc585 100644 --- a/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb +++ b/puppet-manifests/src/modules/platform/templates/kubeadm.yaml.erb @@ -41,8 +41,6 @@ apiVersion: kubelet.config.k8s.io/v1beta1 configMapAndSecretChangeDetectionStrategy: Cache nodeStatusUpdateFrequency: "4s" failSwapOn: false -featureGates: - HugePages: false cgroupRoot: "/k8s-infra" --- kind: KubeProxyConfiguration diff --git a/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb b/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb index 898254bcc7..9eabca34a6 100644 --- a/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb +++ b/puppet-manifests/src/modules/platform/templates/kubelet.conf.erb @@ -1,2 +1,2 @@ # Overrides config file for kubelet -KUBELET_EXTRA_ARGS=--node-ip=<%= @node_ip %> --feature-gates=HugePages=<%= @k8s_hugepage %> <%= @k8s_cpu_manager_opts %> +KUBELET_EXTRA_ARGS=--node-ip=<%= @node_ip %> <%= @k8s_cpu_manager_opts %> diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index 1a0502c292..5ac5a7a7b2 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -3486,9 +3486,9 @@ class HostController(rest.RestController): pending_2M_memory, pending_1G_memory) @staticmethod - def _check_memory_for_non_openstack(ihost): + def _check_memory_for_single_size(ihost): """ - Perform memory semantic checks on a non openstack worker. + Perform memory semantic checks on a worker node. It restricts the huge page allocation to either a 2M or 1G pool. """ @@ -3606,16 +3606,16 @@ class HostController(rest.RestController): constants.MIB_2M) value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M}) - # calculate 90% 2M pages if the huge pages have not been + # calculate 90% 1G pages if the huge pages have not been # allocated and the compute label is set if cutils.has_openstack_compute(labels) and \ vm_hugepages_nr_2M == 0 and \ vm_hugepages_nr_1G == 0 and \ vm_mem_mib > 0 and \ cutils.is_default_huge_pages_required(ihost): - vm_hugepages_nr_2M = int((hp_possible_mib * 0.9 - vs_mem_mib) / - constants.MIB_2M) - value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M}) + vm_hugepages_nr_1G = int((hp_possible_mib * 0.9 - vs_mem_mib) / + constants.MIB_1G) + value.update({'vm_hugepages_nr_1G': vm_hugepages_nr_1G}) vm_hugepages_4K = vm_mem_mib vm_hugepages_4K -= \ @@ -5221,10 +5221,8 @@ class HostController(rest.RestController): # Check if cpu assignments are valid self._semantic_check_worker_cpu_assignments(ihost) - # for non-openstack worker node, only allow allocating huge pages - # for a single size - if not utils.is_openstack_compute(ihost): - self._check_memory_for_non_openstack(ihost) + # only allow allocating huge pages for a single size + self._check_memory_for_single_size(ihost) # check if the platform reserved memory is valid ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid']) diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py index 044e27a84d..eda1d136dd 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/memory.py @@ -741,6 +741,19 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None, vs_hp_size_mib = constants.MIB_2M vs_hp_reqd_mib = new_vs_pages * vs_hp_size_mib + if new_2M_pages != 0 or new_1G_pages != 0: + if utils.get_vswitch_type() != constants.VSWITCH_TYPE_NONE: + if vs_hp_size_mib == constants.MIB_1G: + if new_2M_pages != 0: + raise wsme.exc.ClientSideError(_( + "Only 1G huge page allocation is supported")) + elif new_1G_pages != 0: + raise wsme.exc.ClientSideError(_( + "Only 2M huge page allocation is supported")) + elif new_2M_pages != 0 and new_1G_pages != 0: + raise wsme.exc.ClientSideError(_( + "Host only supports single huge page size.")) + # The size of possible hugepages is the node mem total - platform reserved base_mem_mib = rpc_port['platform_reserved_mib'] if platform_reserved_mib: