Support Single huge page size for openstack worker node

Kubernetes only supports a single huge page size per worker
node. Prior to kubernetes 1.15, the huge page feature could
be disabled via a feature gate. In kubernetes 1.15, the
feature gate has been removed so huge page support is always
on in k8s.

This update removes the conditional disabling of the hugepage
feature and enforces the provisioning of a single page size
per worker.

When vswitch type is set to ovs-dpdk or avs, the application
huge pages size goes with the vswitch huge pages size.

This update also changes the auto-provisioning of VM huge
pages to 1G as there is no auto-provisioning in virtual
environment.

Story: 2006295
Task: 36006

Change-Id: I84d4959b420584fdcdf8a8664a6f4855c08ec989
Signed-off-by: Tao Liu <tao.liu@windriver.com>
This commit is contained in:
Tao Liu 2019-08-15 10:24:40 -04:00
parent c6a18c4833
commit 47735c6ab9
5 changed files with 24 additions and 17 deletions

View File

@ -115,11 +115,10 @@ class platform::kubernetes::kubeadm {
$k8s_registry = 'k8s.gcr.io'
}
# Configure kubelet hugepage and cpumanager options
# Configure kubelet cpumanager options
if str2bool($::is_worker_subfunction)
and !('openstack-compute-node'
in $host_labels) {
$k8s_hugepage = true
$k8s_cpu_manager_opts = join([
'--cpu-manager-policy=static',
'--system-reserved-cgroup=/system.slice',
@ -129,12 +128,11 @@ class platform::kubernetes::kubeadm {
"memory=${k8s_reserved_mem}Mi"])
], ' ')
} else {
$k8s_hugepage = false
$k8s_cpu_manager_opts = '--cpu-manager-policy=none'
}
# Enable kubelet extra parameters that are node specific such as
# hugepages and cpumanager
# cpumanager
file { '/etc/sysconfig/kubelet':
ensure => file,
content => template('platform/kubelet.conf.erb'),

View File

@ -41,8 +41,6 @@ apiVersion: kubelet.config.k8s.io/v1beta1
configMapAndSecretChangeDetectionStrategy: Cache
nodeStatusUpdateFrequency: "4s"
failSwapOn: false
featureGates:
HugePages: false
cgroupRoot: "/k8s-infra"
---
kind: KubeProxyConfiguration

View File

@ -1,2 +1,2 @@
# Overrides config file for kubelet
KUBELET_EXTRA_ARGS=--node-ip=<%= @node_ip %> --feature-gates=HugePages=<%= @k8s_hugepage %> <%= @k8s_cpu_manager_opts %>
KUBELET_EXTRA_ARGS=--node-ip=<%= @node_ip %> <%= @k8s_cpu_manager_opts %>

View File

@ -3486,9 +3486,9 @@ class HostController(rest.RestController):
pending_2M_memory, pending_1G_memory)
@staticmethod
def _check_memory_for_non_openstack(ihost):
def _check_memory_for_single_size(ihost):
"""
Perform memory semantic checks on a non openstack worker.
Perform memory semantic checks on a worker node.
It restricts the huge page allocation to either a 2M or 1G
pool.
"""
@ -3606,16 +3606,16 @@ class HostController(rest.RestController):
constants.MIB_2M)
value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
# calculate 90% 2M pages if the huge pages have not been
# calculate 90% 1G pages if the huge pages have not been
# allocated and the compute label is set
if cutils.has_openstack_compute(labels) and \
vm_hugepages_nr_2M == 0 and \
vm_hugepages_nr_1G == 0 and \
vm_mem_mib > 0 and \
cutils.is_default_huge_pages_required(ihost):
vm_hugepages_nr_2M = int((hp_possible_mib * 0.9 - vs_mem_mib) /
constants.MIB_2M)
value.update({'vm_hugepages_nr_2M': vm_hugepages_nr_2M})
vm_hugepages_nr_1G = int((hp_possible_mib * 0.9 - vs_mem_mib) /
constants.MIB_1G)
value.update({'vm_hugepages_nr_1G': vm_hugepages_nr_1G})
vm_hugepages_4K = vm_mem_mib
vm_hugepages_4K -= \
@ -5221,10 +5221,8 @@ class HostController(rest.RestController):
# Check if cpu assignments are valid
self._semantic_check_worker_cpu_assignments(ihost)
# for non-openstack worker node, only allow allocating huge pages
# for a single size
if not utils.is_openstack_compute(ihost):
self._check_memory_for_non_openstack(ihost)
# only allow allocating huge pages for a single size
self._check_memory_for_single_size(ihost)
# check if the platform reserved memory is valid
ihost_inodes = pecan.request.dbapi.inode_get_by_ihost(ihost['uuid'])

View File

@ -741,6 +741,19 @@ def _check_huge_values(rpc_port, patch, vm_hugepages_nr_2M=None,
vs_hp_size_mib = constants.MIB_2M
vs_hp_reqd_mib = new_vs_pages * vs_hp_size_mib
if new_2M_pages != 0 or new_1G_pages != 0:
if utils.get_vswitch_type() != constants.VSWITCH_TYPE_NONE:
if vs_hp_size_mib == constants.MIB_1G:
if new_2M_pages != 0:
raise wsme.exc.ClientSideError(_(
"Only 1G huge page allocation is supported"))
elif new_1G_pages != 0:
raise wsme.exc.ClientSideError(_(
"Only 2M huge page allocation is supported"))
elif new_2M_pages != 0 and new_1G_pages != 0:
raise wsme.exc.ClientSideError(_(
"Host only supports single huge page size."))
# The size of possible hugepages is the node mem total - platform reserved
base_mem_mib = rpc_port['platform_reserved_mib']
if platform_reserved_mib: