Add staged kubernetes version 1.20.9

Multiple versions of kubernetes are required to support upgrade.

This adds staged version of kubernetes 1.20.9, built with a
specific version of golang.

All subpackage versions are included in the iso image without
collisions.

The following patches are ported to specific kubernetes version:
kubelet-cpumanager-disable-CFS-quota-throttling-for-.patch
kubelet-cpumanager-keep-normal-containers-off-reserv.patch
kubelet-cpumanager-infrastructure-pods-use-system-re.patch
kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch
kubeadm-create-platform-pods-with-zero-CPU-resources.patch
enable-support-for-kubernetes-to-ignore-isolcpus.patch

Story: 2008972
Task: 43056

Signed-off-by: Jim Gauld <james.gauld@windriver.com>
Change-Id: Ie19612f1980690be073ab2236afbb9ccefe504e5
This commit is contained in:
Jim Gauld 2021-09-13 14:53:42 -04:00
parent c4898b0bd9
commit 94517398e4
16 changed files with 6063 additions and 0 deletions

View File

@ -162,6 +162,9 @@ kubernetes-1.18.1-client
kubernetes-1.19.13-node
kubernetes-1.19.13-kubeadm
kubernetes-1.19.13-client
kubernetes-1.20.9-node
kubernetes-1.20.9-kubeadm
kubernetes-1.20.9-client
containerd
k8s-pod-recovery
containernetworking-plugins

View File

@ -56,6 +56,7 @@ kubernetes/containerd
kubernetes/cni/plugins
kubernetes/kubernetes-1.18.1
kubernetes/kubernetes-1.19.13
kubernetes/kubernetes-1.20.9
kubernetes/kubernetes-unversioned
kubernetes/docker-distribution
kubernetes/etcd

View File

@ -33,6 +33,7 @@ keycodemapdb-16e5b07.tar.gz#keycodemapdb#https://github.com/CendioOssman/keycode
kubernetes-contrib-v1.18.1.tar.gz#kubernetes-contrib-1.18.1#https://github.com/kubernetes-retired/contrib/tarball/89f6948e24578fed2a90a87871b2263729f90ac3#http##
kubernetes-v1.18.1.tar.gz#kubernetes-1.18.1#https://github.com/kubernetes/kubernetes/archive/7879fc12a63337efff607952a323df90cdc7a335.tar.gz#http##
kubernetes-v1.19.13.tar.gz#kubernetes-1.19.13#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.19.13.tar.gz#http##
kubernetes-v1.20.9.tar.gz#kubernetes-1.20.9#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.20.9.tar.gz#http##
kvm-unit-tests.git-4ea7633.tar.bz2#kvm-unit-tests#https://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git/snapshot/kvm-unit-tests-20171020.tar.gz#http##
ldapscripts-2.0.8.tgz#ldapscripts-2.0.8#https://sourceforge.net/projects/ldapscripts/files/ldapscripts/ldapscripts-2.0.8/ldapscripts-2.0.8.tgz/download#http##
libtpms-0.6.0-4f0d59d.tar.gz#libtpms-0.6.0#https://github.com/stefanberger/libtpms/tarball/c421ca0f4d00c0caceeda8d62c1efb2b7e47db04#http##

View File

@ -0,0 +1,9 @@
The spec file used here was from the kubernetes 1.10.0 src rpm.
The orig file is included to help show modifications made to that
spec file, to help understand which changes were needed and to
assist with future upversioning.
The contrib tarball does not have the same versioning as kubernetes and
there is little activity in that repo.
The version for the contrib tarball is arbitrary.

View File

@ -0,0 +1,5 @@
VERSION=1.20.9
TAR_NAME=kubernetes
TAR="$TAR_NAME-v$VERSION.tar.gz"
COPY_LIST="${CGCS_BASE}/downloads/$TAR $FILES_BASE/*"
TIS_PATCH_VER=PKG_GITREVCOUNT

View File

@ -0,0 +1,79 @@
From 80fc45845ac260819108a6a6dabb9da7c0fd111f Mon Sep 17 00:00:00 2001
From: Chris Friesen <chris.friesen@windriver.com>
Date: Fri, 23 Oct 2020 17:46:10 -0600
Subject: [PATCH 6/6] enable support for kubernetes to ignore isolcpus
The normal mechanisms for allocating isolated CPUs do not allow
a mix of isolated and exclusive CPUs in the same container. In
order to allow this in *very* limited cases where the pod spec
is known in advance we will add the ability to disable the normal
isolcpus behaviour.
If the file "/etc/kubernetes/ignore_isolcpus" exists, then kubelet
will basically forget everything it knows about isolcpus and just
treat them like regular CPUs.
The admin user can then rely on the fact that CPU allocation is
deterministic to ensure that the isolcpus they configure end up being
allocated to the correct pods.
---
pkg/kubelet/cm/cpumanager/cpu_manager.go | 9 +++++++++
pkg/kubelet/cm/cpumanager/policy_static.go | 8 ++++++++
2 files changed, 17 insertions(+)
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
index 8470431c07c..fd0bdeeee07 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
@@ -19,6 +19,7 @@ package cpumanager
import (
"fmt"
"math"
+ "os"
"sync"
"time"
"strings"
@@ -56,6 +57,14 @@ const cpuManagerStateFileName = "cpu_manager_state"
// get the system-level isolated CPUs
func getIsolcpus() cpuset.CPUSet {
+
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
+ klog.Infof("[cpumanager] turning off isolcpus awareness")
+ return cpuset.NewCPUSet()
+ }
+
dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
if err != nil {
klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
index 4acd5609748..78c5f0f2576 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -18,6 +18,7 @@ package cpumanager
import (
"fmt"
+ "os"
"strconv"
v1 "k8s.io/api/core/v1"
@@ -614,6 +615,13 @@ func isKubeInfra(pod *v1.Pod) bool {
// get the isolated CPUs (if any) from the devices associated with a specific container
func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
+
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
+ return cpuset.NewCPUSet()
+ }
+
// NOTE: This is required for TestStaticPolicyAdd() since makePod() does
// not create UID. We also need a way to properly stub devicemanager.
if len(string(pod.UID)) == 0 {
--
2.17.1

View File

@ -0,0 +1,84 @@
MDSFORMANPAGES="kube-apiserver.md kube-controller-manager.md kube-proxy.md kube-scheduler.md kubelet.md"
# remove comments from man pages
for manpage in ${MDSFORMANPAGES}; do
pos=$(grep -n "<\!-- END MUNGE: UNVERSIONED_WARNING -->" ${manpage} | cut -d':' -f1)
if [ -n ${pos} ]; then
sed -i "1,${pos}{/.*/d}" ${manpage}
fi
done
# for each man page add NAME and SYNOPSIS section
# kube-apiserver
sed -i -s "s/## kube-apiserver/# NAME\nkube-apiserver \- Provides the API for kubernetes orchestration.\n\n# SYNOPSIS\n**kube-apiserver** [OPTIONS]\n/" kube-apiserver.md
cat << 'EOF' >> kube-apiserver.md
# EXAMPLES
```
/usr/bin/kube-apiserver --logtostderr=true --v=0 --etcd_servers=http://127.0.0.1:4001 --insecure_bind_address=127.0.0.1 --insecure_port=8080 --kubelet_port=10250 --service-cluster-ip-range=10.1.1.0/24 --allow_privileged=false
```
EOF
# kube-controller-manager
sed -i -s "s/## kube-controller-manager/# NAME\nkube-controller-manager \- Enforces kubernetes services.\n\n# SYNOPSIS\n**kube-controller-manager** [OPTIONS]\n/" kube-controller-manager.md
cat << 'EOF' >> kube-controller-manager.md
# EXAMPLES
```
/usr/bin/kube-controller-manager --logtostderr=true --v=0 --master=127.0.0.1:8080
```
EOF
# kube-proxy
sed -i -s "s/## kube-proxy/# NAME\nkube-proxy \- Provides network proxy services.\n\n# SYNOPSIS\n**kube-proxy** [OPTIONS]\n/" kube-proxy.md
cat << 'EOF' >> kube-proxy.md
# EXAMPLES
```
/usr/bin/kube-proxy --logtostderr=true --v=0 --master=http://127.0.0.1:8080
```
EOF
# kube-scheduler
sed -i -s "s/## kube-scheduler/# NAME\nkube-scheduler \- Schedules containers on hosts.\n\n# SYNOPSIS\n**kube-scheduler** [OPTIONS]\n/" kube-scheduler.md
cat << 'EOF' >> kube-scheduler.md
# EXAMPLES
```
/usr/bin/kube-scheduler --logtostderr=true --v=0 --master=127.0.0.1:8080
```
EOF
# kubelet
sed -i -s "s/## kubelet/# NAME\nkubelet \- Processes a container manifest so the containers are launched according to how they are described.\n\n# SYNOPSIS\n**kubelet** [OPTIONS]\n/" kubelet.md
cat << 'EOF' >> kubelet.md
# EXAMPLES
```
/usr/bin/kubelet --logtostderr=true --v=0 --api_servers=http://127.0.0.1:8080 --address=127.0.0.1 --port=10250 --hostname_override=127.0.0.1 --allow-privileged=false
```
EOF
# for all man-pages
for md in $MDSFORMANPAGES; do
# correct section names
sed -i -s "s/### Synopsis/# DESCRIPTION/" $md
sed -i -s "s/### Options/# OPTIONS/" $md
# add header
sed -i "s/# NAME/% KUBERNETES(1) kubernetes User Manuals\n# NAME/" $md
# modify list of options
# options with no value in ""
sed -i -r 's/(^ )(-[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
# option with value in ""
sed -i -r 's/(^ )(-[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
# options in -s, --long
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)(:)(.*)/\*\*\2\*\*\n\t\4\n/' $md
sed -i -r 's/(^ )(-[a-z], -[^":][^":]*)("[^"]*")(:)(.*)/\*\*\2\3\*\*\n\t\5\n/' $md
# remove ```
sed -i 's/```//' $md
# remove all lines starting with ######
sed -i 's/^######.*//' $md
# modify footer
sed -i -r "s/^\[!\[Analytics\].*//" $md
# md does not contain section => taking 1
name="${md%.md}"
go-md2man -in $md -out man/man1/$name.1
done

View File

@ -0,0 +1,108 @@
From 62575aa6d34c52dffb02535a526f6361cdedb300 Mon Sep 17 00:00:00 2001
From: Chris Friesen <chris.friesen@windriver.com>
Date: Fri, 3 Sep 2021 18:05:15 -0400
Subject: [PATCH 5/6] kubeadm: create platform pods with zero CPU resources
We want to specify zero CPU resources when creating the manifests
for the static platform pods, as a workaround for the lack of
separate resource tracking for platform resources.
We also specify zero CPU resources for the coredns deployment.
manifests.go appears to be the main file for this, not sure if the
others are used but I changed them just in case.
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
---
cluster/addons/dns/coredns/coredns.yaml.base | 2 +-
cluster/addons/dns/coredns/coredns.yaml.in | 2 +-
cluster/addons/dns/coredns/coredns.yaml.sed | 2 +-
cmd/kubeadm/app/phases/addons/dns/manifests.go | 2 +-
cmd/kubeadm/app/phases/controlplane/manifests.go | 6 +++---
5 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/cluster/addons/dns/coredns/coredns.yaml.base b/cluster/addons/dns/coredns/coredns.yaml.base
index 460db6317db..30873a81f18 100644
--- a/cluster/addons/dns/coredns/coredns.yaml.base
+++ b/cluster/addons/dns/coredns/coredns.yaml.base
@@ -138,7 +138,7 @@ spec:
limits:
memory: __DNS__MEMORY__LIMIT__
requests:
- cpu: 100m
+ cpu: 0
memory: 70Mi
args: [ "-conf", "/etc/coredns/Corefile" ]
volumeMounts:
diff --git a/cluster/addons/dns/coredns/coredns.yaml.in b/cluster/addons/dns/coredns/coredns.yaml.in
index 35fd52f15cd..51d963282ea 100644
--- a/cluster/addons/dns/coredns/coredns.yaml.in
+++ b/cluster/addons/dns/coredns/coredns.yaml.in
@@ -138,7 +138,7 @@ spec:
limits:
memory: 'dns_memory_limit'
requests:
- cpu: 100m
+ cpu: 0
memory: 70Mi
args: [ "-conf", "/etc/coredns/Corefile" ]
volumeMounts:
diff --git a/cluster/addons/dns/coredns/coredns.yaml.sed b/cluster/addons/dns/coredns/coredns.yaml.sed
index ebe0c7182e8..dab87353509 100644
--- a/cluster/addons/dns/coredns/coredns.yaml.sed
+++ b/cluster/addons/dns/coredns/coredns.yaml.sed
@@ -138,7 +138,7 @@ spec:
limits:
memory: $DNS_MEMORY_LIMIT
requests:
- cpu: 100m
+ cpu: 0
memory: 70Mi
args: [ "-conf", "/etc/coredns/Corefile" ]
volumeMounts:
diff --git a/cmd/kubeadm/app/phases/addons/dns/manifests.go b/cmd/kubeadm/app/phases/addons/dns/manifests.go
index 014cbd773c2..18ce45d1e85 100644
--- a/cmd/kubeadm/app/phases/addons/dns/manifests.go
+++ b/cmd/kubeadm/app/phases/addons/dns/manifests.go
@@ -254,7 +254,7 @@ spec:
limits:
memory: 170Mi
requests:
- cpu: 100m
+ cpu: 0
memory: 70Mi
args: [ "-conf", "/etc/coredns/Corefile" ]
volumeMounts:
diff --git a/cmd/kubeadm/app/phases/controlplane/manifests.go b/cmd/kubeadm/app/phases/controlplane/manifests.go
index 8181bea63a4..4c4b4448dd4 100644
--- a/cmd/kubeadm/app/phases/controlplane/manifests.go
+++ b/cmd/kubeadm/app/phases/controlplane/manifests.go
@@ -60,7 +60,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS),
ReadinessProbe: staticpodutil.ReadinessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/readyz", int(endpoint.BindPort), v1.URISchemeHTTPS),
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
- Resources: staticpodutil.ComponentResources("250m"),
+ Resources: staticpodutil.ComponentResources("0"),
Env: kubeadmutil.GetProxyEnvVars(),
}, mounts.GetVolumes(kubeadmconstants.KubeAPIServer),
map[string]string{kubeadmconstants.KubeAPIServerAdvertiseAddressEndpointAnnotationKey: endpoint.String()}),
@@ -72,7 +72,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)),
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS),
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
- Resources: staticpodutil.ComponentResources("200m"),
+ Resources: staticpodutil.ComponentResources("0"),
Env: kubeadmutil.GetProxyEnvVars(),
}, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil),
kubeadmconstants.KubeScheduler: staticpodutil.ComponentPod(v1.Container{
@@ -83,7 +83,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)),
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS),
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
- Resources: staticpodutil.ComponentResources("100m"),
+ Resources: staticpodutil.ComponentResources("0"),
Env: kubeadmutil.GetProxyEnvVars(),
}, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil),
}
--
2.17.1

View File

@ -0,0 +1,17 @@
# Note: This dropin only works with kubeadm and kubelet v1.11+
[Service]
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
# This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamically
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
# This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use
# the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.
EnvironmentFile=-/etc/sysconfig/kubelet
ExecStart=
ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
ExecStartPre=-/usr/bin/kubelet-cgroup-setup.sh
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/kubelet.pid;'
ExecStopPost=/bin/rm -f /var/run/kubelet.pid
Restart=always
StartLimitInterval=0
RestartSec=10

View File

@ -0,0 +1,132 @@
#!/bin/bash
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script does minimal cgroup setup for kubelet. This creates k8s-infra
# cgroup for a minimal set of resource controllers, and configures cpuset
# attributes to span all online cpus and nodes. This will do nothing if
# the k8s-infra cgroup already exists (i.e., assume already configured).
# NOTE: The creation of directories under /sys/fs/cgroup is volatile, and
# does not persist reboots. The cpuset.mems and cpuset.cpus is later updated
# by puppet kubernetes.pp manifest.
#
# Define minimal path
PATH=/bin:/usr/bin:/usr/local/bin
# Log info message to /var/log/daemon.log
function LOG {
logger -p daemon.info "$0($$): $@"
}
# Log error message to /var/log/daemon.log
function ERROR {
logger -s -p daemon.error "$0($$): ERROR: $@"
}
# Create minimal cgroup directories and configure cpuset attributes if required
function create_cgroup {
local cg_name=$1
local cg_nodeset=$2
local cg_cpuset=$3
local CGROUP=/sys/fs/cgroup
local CONTROLLERS_AUTO_DELETED=("pids" "hugetlb")
local CONTROLLERS_PRESERVED=("cpuset" "memory" "cpu,cpuacct" "systemd")
local cnt=''
local CGDIR=''
local RC=0
# Ensure that these cgroups are created every time as they are auto deleted
for cnt in ${CONTROLLERS_AUTO_DELETED[@]}; do
CGDIR=${CGROUP}/${cnt}/${cg_name}
if [ -d ${CGDIR} ]; then
LOG "Nothing to do, already configured: ${CGDIR}."
continue
fi
LOG "Creating: ${CGDIR}"
mkdir -p ${CGDIR}
RC=$?
if [ ${RC} -ne 0 ]; then
ERROR "Creating: ${CGDIR}, rc=${RC}"
exit ${RC}
fi
done
# These cgroups are preserved so if any of these are encountered additional
# cgroup setup is not required
for cnt in ${CONTROLLERS_PRESERVED[@]}; do
CGDIR=${CGROUP}/${cnt}/${cg_name}
if [ -d ${CGDIR} ]; then
LOG "Nothing to do, already configured: ${CGDIR}."
exit ${RC}
fi
LOG "Creating: ${CGDIR}"
mkdir -p ${CGDIR}
RC=$?
if [ ${RC} -ne 0 ]; then
ERROR "Creating: ${CGDIR}, rc=${RC}"
exit ${RC}
fi
done
# Customize cpuset attributes
LOG "Configuring cgroup: ${cg_name}, nodeset: ${cg_nodeset}, cpuset: ${cg_cpuset}"
CGDIR=${CGROUP}/cpuset/${cg_name}
local CGMEMS=${CGDIR}/cpuset.mems
local CGCPUS=${CGDIR}/cpuset.cpus
local CGTASKS=${CGDIR}/tasks
# Assign cgroup memory nodeset
LOG "Assign nodeset ${cg_nodeset} to ${CGMEMS}"
/bin/echo ${cg_nodeset} > ${CGMEMS}
RC=$?
if [ ${RC} -ne 0 ]; then
ERROR "Unable to write to: ${CGMEMS}, rc=${RC}"
exit ${RC}
fi
# Assign cgroup cpus
LOG "Assign cpuset ${cg_cpuset} to ${CGCPUS}"
/bin/echo ${cg_cpuset} > ${CGCPUS}
RC=$?
if [ ${RC} -ne 0 ]; then
ERROR "Assigning: ${cg_cpuset} to ${CGCPUS}, rc=${RC}"
exit ${RC}
fi
# Set file ownership
chown root:root ${CGMEMS} ${CGCPUS} ${CGTASKS}
RC=$?
if [ ${RC} -ne 0 ]; then
ERROR "Setting owner for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
exit ${RC}
fi
# Set file mode permissions
chmod 644 ${CGMEMS} ${CGCPUS} ${CGTASKS}
RC=$?
if [ ${RC} -ne 0 ]; then
ERROR "Setting mode for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
exit ${RC}
fi
return ${RC}
}
if [ $UID -ne 0 ]; then
ERROR "Require sudo/root."
exit 1
fi
# Configure default kubepods cpuset to span all online cpus and nodes.
ONLINE_NODESET=$(/bin/cat /sys/devices/system/node/online)
ONLINE_CPUSET=$(/bin/cat /sys/devices/system/cpu/online)
# Configure kubelet cgroup to match cgroupRoot.
create_cgroup 'k8s-infra' ${ONLINE_NODESET} ${ONLINE_CPUSET}
exit $?

View File

@ -0,0 +1,111 @@
From 019172946c0146eca91d611595866c70a8ed3ddb Mon Sep 17 00:00:00 2001
From: Jim Gauld <james.gauld@windriver.com>
Date: Fri, 3 Sep 2021 14:10:46 -0400
Subject: [PATCH 1/6] kubelet cpumanager disable CFS quota throttling for
Guaranteed pods
This disables CFS CPU quota to avoid performance degradation due to
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
to solve the CFS throttling problem, but there are reports that it is
not completely effective.
This disables CFS quota throttling for Guaranteed pods for both
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
Disabling has a dramatic latency improvement for HTTP response times.
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
---
pkg/kubelet/cm/cpumanager/cpu_manager.go | 22 ++++++++++++++++++++++
pkg/kubelet/cm/helpers_linux.go | 5 +++++
pkg/kubelet/cm/helpers_linux_test.go | 8 ++++----
3 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
index 44368efc441..88cfbc1fa83 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
@@ -36,6 +36,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/config"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/status"
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
)
// ActivePodsFunc is a function that returns a list of pods to reconcile.
@@ -242,6 +243,14 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e
// Get the CPUs assigned to the container during Allocate()
// (or fall back to the default CPUSet if none were assigned).
cpus := m.state.GetCPUSetOrDefault(string(p.UID), c.Name)
+
+ // Guaranteed PODs should not have CFS quota throttle
+ if m.policy.Name() == string(PolicyStatic) && v1qos.GetPodQOS(p) == v1.PodQOSGuaranteed {
+ err := m.disableContainerCPUQuota(containerID)
+ if err != nil {
+ klog.Errorf("[cpumanager] AddContainer disable CPU Quota error: %v", err)
+ }
+ }
m.Unlock()
if !cpus.IsEmpty() {
@@ -489,3 +498,16 @@ func (m *manager) GetCPUs(podUID, containerName string) []int64 {
}
return result
}
+
+func (m *manager) disableContainerCPUQuota(containerID string) error {
+ // Disable CFS CPU quota to avoid performance degradation due to
+ // Linux kernel CFS throttle implementation.
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
+ // but there are reports that it is not completely effective.
+ return m.containerRuntime.UpdateContainerResources(
+ containerID,
+ &runtimeapi.LinuxContainerResources{
+ CpuPeriod: 100000,
+ CpuQuota: -1,
+ })
+}
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
index 9b115ab5380..d3185e1e958 100644
--- a/pkg/kubelet/cm/helpers_linux.go
+++ b/pkg/kubelet/cm/helpers_linux.go
@@ -166,6 +166,11 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64)
// determine the qos class
qosClass := v1qos.GetPodQOS(pod)
+ // disable cfs quota for guaranteed pods
+ if qosClass == v1.PodQOSGuaranteed {
+ cpuQuota = int64(-1)
+ }
+
// build the result
result := &ResourceConfig{}
if qosClass == v1.PodQOSGuaranteed {
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
index 56d765fbc22..0c43afe5875 100644
--- a/pkg/kubelet/cm/helpers_linux_test.go
+++ b/pkg/kubelet/cm/helpers_linux_test.go
@@ -63,8 +63,8 @@ func TestResourceConfigForPod(t *testing.T) {
burstablePartialShares := MilliCPUToShares(200)
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
guaranteedShares := MilliCPUToShares(100)
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
+ guaranteedQuota := int64(-1)
+ guaranteedTunedQuota := int64(-1)
memoryQuantity = resource.MustParse("100Mi")
cpuNoLimit := int64(-1)
guaranteedMemory := memoryQuantity.Value()
@@ -283,8 +283,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
burstablePartialShares := MilliCPUToShares(200)
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
guaranteedShares := MilliCPUToShares(100)
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
+ guaranteedQuota := int64(-1)
+ guaranteedTunedQuota := int64(-1)
memoryQuantity = resource.MustParse("100Mi")
cpuNoLimit := int64(-1)
guaranteedMemory := memoryQuantity.Value()
--
2.17.1

View File

@ -0,0 +1,139 @@
From 5471fc2f03d1d14ceb250bf98f400cee9feb6983 Mon Sep 17 00:00:00 2001
From: Jim Gauld <james.gauld@windriver.com>
Date: Fri, 3 Sep 2021 15:57:58 -0400
Subject: [PATCH 3/6] kubelet cpumanager infrastructure pods use system
reserved CPUs
This assigns system infrastructure pods to the "reserved" cpuset
to isolate them from the shared pool of CPUs.
Infrastructure pods include any pods that belong to the kube-system,
armada, cert-manager, vault, platform-deployment-manager, portieris,
or notification namespaces.
The implementation is a bit simplistic, it is assumed that the
"reserved" cpuset is large enough to handle all infrastructure pods
CPU allocations.
This also prevents infrastucture pods from using Guaranteed resources.
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
---
pkg/kubelet/cm/cpumanager/policy_static.go | 44 +++++++++++++++++++
.../cm/cpumanager/policy_static_test.go | 19 +++++++-
2 files changed, 62 insertions(+), 1 deletion(-)
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
index e892d63641b..ab3206c5dc4 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -33,6 +33,11 @@ import (
// PolicyStatic is the name of the static policy
const PolicyStatic policyName = "static"
+// Define namespaces used by platform infrastructure pods
+var infraNamespaces = [...]string{
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification",
+}
+
// staticPolicy is a CPU manager policy that does not change CPU
// assignments for exclusively pinned guaranteed containers after the main
// container process starts.
@@ -233,6 +238,31 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
}
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
+ // Process infra pods before guaranteed pods
+ if isKubeInfra(pod) {
+ // Container belongs in reserved pool.
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping " +
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
+ return nil
+ }
+
+ cpuset := p.reserved
+ if cpuset.IsEmpty() {
+ // If this happens then someone messed up.
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
+ }
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
+ return nil
+ }
+
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
klog.Infof("[cpumanager] static policy: Allocate (pod: %s, container: %s)", format.Pod(pod), container.Name)
// container belongs in an exclusively allocated pool
@@ -322,6 +352,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
return 0
}
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
+ if isKubeInfra(pod) {
+ return 0
+ }
// Safe downcast to do for all systems with < 2.1 billion CPUs.
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
// https://golang.org/ref/spec#Numeric_types
@@ -524,3 +558,13 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
return hints
}
+
+// check if a given pod is in a platform infrastructure namespace
+func isKubeInfra(pod *v1.Pod) bool {
+ for _, namespace := range infraNamespaces {
+ if namespace == pod.Namespace {
+ return true
+ }
+ }
+ return false
+}
\ No newline at end of file
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
index 9c7e4f146ff..5cfd9a8e24e 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
@@ -747,7 +747,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
}
func TestStaticPolicyAddWithResvList(t *testing.T) {
-
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
+ infraPod.Namespace = "kube-system"
testCases := []staticPolicyTestWithResvList{
{
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
@@ -789,6 +790,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
expCPUAlloc: true,
expCSet: cpuset.NewCPUSet(4, 5),
},
+ {
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
+ topo: topoSingleSocketHT,
+ numReservedCPUs: 2,
+ reserved: cpuset.NewCPUSet(0, 1),
+ stAssignments: state.ContainerCPUAssignments{
+ "fakePod": map[string]cpuset.CPUSet{
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
+ },
+ },
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
+ pod: infraPod,
+ expErr: nil,
+ expCPUAlloc: true,
+ expCSet: cpuset.NewCPUSet(0, 1),
+ },
}
testExcl := true
--
2.17.1

View File

@ -0,0 +1,526 @@
From 05db95e27509e60022a62c1001be2191ba42d2a3 Mon Sep 17 00:00:00 2001
From: Jim Gauld <james.gauld@windriver.com>
Date: Fri, 3 Sep 2021 17:30:31 -0400
Subject: [PATCH 4/6] kubelet cpumanager introduce concept of isolated CPUs
This introduces the concept of "isolated CPUs", which are CPUs that
have been isolated at the kernel level via the "isolcpus" kernel boot
parameter.
When starting the kubelet process, two separate sets of reserved CPUs
may be specified. With this change CPUs reserved via
'--system-reserved=cpu' will be used for infrastructure pods while the
isolated CPUs should be reserved via '--kube-reserved=cpu' to cause
kubelet to skip over them for "normal" CPU resource tracking. The
kubelet code will double-check that the specified isolated CPUs match
what the kernel exposes in "/sys/devices/system/cpu/isolated".
A plugin (outside the scope of this commit) will expose the isolated
CPUs to kubelet via the device plugin API.
If a pod specifies some number of "isolcpus" resources, the device
manager will allocate them. In this code we check whether such
resources have been allocated, and if so we set the container cpuset to
the isolated CPUs. This does mean that it really only makes sense to
specify "isolcpus" resources for best-effort or burstable pods, not for
guaranteed ones since that would throw off the accounting code. In
order to ensure the accounting still works as designed, if "isolcpus"
are specified for guaranteed pods, the affinity will be set to the
non-isolated CPUs.
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
Co-authored-by: Chris Friesen <chris.friesen@windriver.com>
---
pkg/kubelet/cm/container_manager_linux.go | 1 +
pkg/kubelet/cm/cpumanager/cpu_manager.go | 31 ++++++-
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 13 ++-
pkg/kubelet/cm/cpumanager/policy_static.go | 84 +++++++++++++++++--
.../cm/cpumanager/policy_static_test.go | 44 ++++++++--
5 files changed, 155 insertions(+), 18 deletions(-)
diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go
index eeea6a8b7e4..4f250b2a6ca 100644
--- a/pkg/kubelet/cm/container_manager_linux.go
+++ b/pkg/kubelet/cm/container_manager_linux.go
@@ -333,6 +333,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
cm.GetNodeAllocatableReservation(),
nodeConfig.KubeletRootDir,
cm.topologyManager,
+ cm.deviceManager,
)
if err != nil {
klog.Errorf("failed to initialize cpu manager: %v", err)
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
index a0586c7b860..8470431c07c 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
@@ -21,6 +21,8 @@ import (
"math"
"sync"
"time"
+ "strings"
+ "io/ioutil"
cadvisorapi "github.com/google/cadvisor/info/v1"
v1 "k8s.io/api/core/v1"
@@ -34,6 +36,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/config"
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/status"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
@@ -51,6 +54,25 @@ type policyName string
// cpuManagerStateFileName is the file name where cpu manager stores its state
const cpuManagerStateFileName = "cpu_manager_state"
+// get the system-level isolated CPUs
+func getIsolcpus() cpuset.CPUSet {
+ dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
+ if err != nil {
+ klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
+ return cpuset.NewCPUSet()
+ }
+
+ // The isolated cpus string ends in a newline
+ cpustring := strings.TrimSuffix(string(dat), "\n")
+ cset, err := cpuset.Parse(cpustring)
+ if err != nil {
+ klog.Errorf("[cpumanager] unable to parse sysfs isolcpus string to cpuset")
+ return cpuset.NewCPUSet()
+ }
+
+ return cset
+}
+
// Manager interface provides methods for Kubelet to manage pod cpus.
type Manager interface {
// Start is called during Kubelet initialization.
@@ -136,7 +158,7 @@ func (s *sourcesReadyStub) AddSource(source string) {}
func (s *sourcesReadyStub) AllReady() bool { return true }
// NewManager creates new cpu manager based on provided policy
-func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
+func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store, deviceManager devicemanager.Manager) (Manager, error) {
var topo *topology.CPUTopology
var policy Policy
@@ -173,8 +195,11 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
// NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
// This variable is primarily to make testing easier.
excludeReserved := true
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
-
+ // isolCPUs is the set of kernel-isolated CPUs. They should be a subset of specificCPUs or
+ // of the CPUs that NewStaticPolicy() will pick if numReservedCPUs is set. It's only in the
+ // argument list here for ease of testing, it's really internal to the policy.
+ isolCPUs := getIsolcpus()
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, isolCPUs, affinity, deviceManager, excludeReserved)
if err != nil {
return nil, fmt.Errorf("new static policy error: %v", err)
}
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
index a155791e75f..7a6ea90b3c5 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
@@ -38,6 +38,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
)
type mockState struct {
@@ -211,6 +212,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
}
func TestCPUManagerAdd(t *testing.T) {
+ testDM, _ := devicemanager.NewManagerStub()
testExcl := false
testPolicy, _ := NewStaticPolicy(
&topology.CPUTopology{
@@ -226,7 +228,9 @@ func TestCPUManagerAdd(t *testing.T) {
},
0,
cpuset.NewCPUSet(),
+ cpuset.NewCPUSet(),
topologymanager.NewFakeManager(),
+ testDM,
testExcl)
testCases := []struct {
description string
@@ -483,8 +487,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
}
testExcl := false
+ testDM, _ := devicemanager.NewManagerStub()
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
state := &mockState{
assignments: testCase.stAssignments,
@@ -636,7 +641,8 @@ func TestCPUManagerGenerate(t *testing.T) {
}
defer os.RemoveAll(sDir)
- mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
+ testDM, err := devicemanager.NewManagerStub()
+ mgr, err := NewManager(testCase.cpuPolicyName, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
if testCase.expectedError != nil {
if !strings.Contains(err.Error(), testCase.expectedError.Error()) {
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error())
@@ -991,6 +997,7 @@ func TestReconcileState(t *testing.T) {
// the following tests are with --reserved-cpus configured
func TestCPUManagerAddWithResvList(t *testing.T) {
testExcl := false
+ testDM, _ := devicemanager.NewManagerStub()
testPolicy, _ := NewStaticPolicy(
&topology.CPUTopology{
NumCPUs: 4,
@@ -1005,7 +1012,9 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
},
1,
cpuset.NewCPUSet(0),
+ cpuset.NewCPUSet(),
topologymanager.NewFakeManager(),
+ testDM,
testExcl)
testCases := []struct {
description string
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
index ab3206c5dc4..4acd5609748 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -18,6 +18,7 @@ package cpumanager
import (
"fmt"
+ "strconv"
v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
@@ -28,6 +29,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
"k8s.io/kubernetes/pkg/kubelet/util/format"
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
)
// PolicyStatic is the name of the static policy
@@ -81,6 +83,10 @@ type staticPolicy struct {
topology *topology.CPUTopology
// set of CPUs that is not available for exclusive assignment
reserved cpuset.CPUSet
+ // subset of reserved CPUs with isolcpus attribute
+ isolcpus cpuset.CPUSet
+ // parent containerManager, used to get device list
+ deviceManager devicemanager.Manager
// If true, default CPUSet should exclude reserved CPUs
excludeReserved bool
// topology manager reference to get container Topology affinity
@@ -95,7 +101,7 @@ var _ Policy = &staticPolicy{}
// NewStaticPolicy returns a CPU manager policy that does not change CPU
// assignments for exclusively pinned guaranteed containers after the main
// container process starts.
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, isolCPUs cpuset.CPUSet, affinity topologymanager.Store, deviceManager devicemanager.Manager, excludeReserved bool) (Policy, error) {
allCPUs := topology.CPUDetails.CPUs()
var reserved cpuset.CPUSet
if reservedCPUs.Size() > 0 {
@@ -116,9 +122,17 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
klog.Infof("[cpumanager] reserved %d CPUs (\"%s\") not available for exclusive assignment", reserved.Size(), reserved)
+ if !isolCPUs.IsSubsetOf(reserved) {
+ klog.Errorf("[cpumanager] isolCPUs %v is not a subset of reserved %v", isolCPUs, reserved)
+ reserved = reserved.Union(isolCPUs)
+ klog.Warningf("[cpumanager] mismatch isolCPUs %v, force reserved %v", isolCPUs, reserved)
+ }
+
return &staticPolicy{
topology: topology,
reserved: reserved,
+ isolcpus: isolCPUs,
+ deviceManager: deviceManager,
excludeReserved: excludeReserved,
affinity: affinity,
cpusToReuse: make(map[string]cpuset.CPUSet),
@@ -155,8 +169,8 @@ func (p *staticPolicy) validateState(s state.State) error {
} else {
s.SetDefaultCPUSet(allCPUs)
}
- klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
- allCPUs, p.reserved, s.GetDefaultCPUSet())
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, isolcpus:%v, default:%v\n",
+ allCPUs, p.reserved, p.isolcpus, s.GetDefaultCPUSet())
return nil
}
@@ -249,12 +263,12 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
return nil
}
- cpuset := p.reserved
+ cpuset := p.reserved.Clone().Difference(p.isolcpus)
if cpuset.IsEmpty() {
// If this happens then someone messed up.
return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus " +
- "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v",
- pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v, isolcpus:%v",
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved, p.isolcpus)
}
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
klog.Infof("[cpumanager] static policy: reserved: AddContainer " +
@@ -285,8 +299,37 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
}
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
p.updateCPUsToReuse(pod, container, cpuset)
+ klog.Infof("[cpumanager] guaranteed: AddContainer " +
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); numCPUS=%d, cpuset=%v",
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, numCPUs, cpuset)
+ return nil
+ }
+ if isolcpus := p.podIsolCPUs(pod, container); isolcpus.Size() > 0 {
+ // container has requested isolated CPUs
+ if set, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
+ if set.Equals(isolcpus) {
+ klog.Infof("[cpumanager] isolcpus container already present in state, skipping " +
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
+ return nil
+ } else {
+ klog.Infof("[cpumanager] isolcpus container state has cpus %v, should be %v" +
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s)",
+ isolcpus, set, pod.Namespace, string(pod.UID), pod.Name, container.Name)
+ }
+ }
+ // Note that we do not do anything about init containers here.
+ // It looks like devices are allocated per-pod based on effective requests/limits
+ // and extra devices from initContainers are not freed up when the regular containers start.
+ // TODO: confirm this is still true for 1.20
+ s.SetCPUSet(string(pod.UID), container.Name, isolcpus)
+ klog.Infof("[cpumanager] isolcpus: AddContainer " +
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, isolcpus)
+ return nil
}
+
// container belongs in the shared pool (nothing to do; use default cpuset)
return nil
}
@@ -567,4 +610,33 @@ func isKubeInfra(pod *v1.Pod) bool {
}
}
return false
+}
+
+// get the isolated CPUs (if any) from the devices associated with a specific container
+func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
+ // NOTE: This is required for TestStaticPolicyAdd() since makePod() does
+ // not create UID. We also need a way to properly stub devicemanager.
+ if len(string(pod.UID)) == 0 {
+ return cpuset.NewCPUSet()
+ }
+ devices := p.deviceManager.GetDevices(string(pod.UID), container.Name)
+ for _, dev := range devices {
+ // this resource name needs to match the isolcpus device plugin
+ if dev.ResourceName == "windriver.com/isolcpus" {
+ cpuStrList := dev.DeviceIds
+ if len(cpuStrList) > 0 {
+ cpuSet := cpuset.NewCPUSet()
+ // loop over the list of strings, convert each one to int, add to cpuset
+ for _, cpuStr := range cpuStrList {
+ cpu, err := strconv.Atoi(cpuStr)
+ if err != nil {
+ panic(err)
+ }
+ cpuSet = cpuSet.Union(cpuset.NewCPUSet(cpu))
+ }
+ return cpuSet
+ }
+ }
+ }
+ return cpuset.NewCPUSet()
}
\ No newline at end of file
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
index 5cfd9a8e24e..8307aa1e3f0 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
@@ -27,6 +27,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
)
type staticPolicyTest struct {
@@ -45,8 +46,9 @@ type staticPolicyTest struct {
}
func TestStaticPolicyName(t *testing.T) {
+ testDM, _ := devicemanager.NewManagerStub()
testExcl := false
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
policyName := policy.Name()
if policyName != "static" {
@@ -56,6 +58,7 @@ func TestStaticPolicyName(t *testing.T) {
}
func TestStaticPolicyStart(t *testing.T) {
+ testDM, _ := devicemanager.NewManagerStub()
testCases := []staticPolicyTest{
{
description: "non-corrupted state",
@@ -131,7 +134,7 @@ func TestStaticPolicyStart(t *testing.T) {
}
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
policy := p.(*staticPolicy)
st := &mockState{
assignments: testCase.stAssignments,
@@ -179,6 +182,7 @@ func TestStaticPolicyAdd(t *testing.T) {
largeTopoSock0CPUSet := largeTopoSock0Builder.Result()
largeTopoSock1CPUSet := largeTopoSock1Builder.Result()
+ testDM, _ := devicemanager.NewManagerStub()
testCases := []staticPolicyTest{
{
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
@@ -447,7 +451,7 @@ func TestStaticPolicyAdd(t *testing.T) {
}
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testCase.excludeReserved)
st := &mockState{
assignments: testCase.stAssignments,
@@ -490,6 +494,7 @@ func TestStaticPolicyAdd(t *testing.T) {
}
func TestStaticPolicyRemove(t *testing.T) {
+ testDM, _ := devicemanager.NewManagerStub()
excludeReserved := false
testCases := []staticPolicyTest{
{
@@ -549,7 +554,7 @@ func TestStaticPolicyRemove(t *testing.T) {
}
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
st := &mockState{
assignments: testCase.stAssignments,
@@ -571,6 +576,7 @@ func TestStaticPolicyRemove(t *testing.T) {
}
func TestTopologyAwareAllocateCPUs(t *testing.T) {
+ testDM, _ := devicemanager.NewManagerStub()
excludeReserved := false
testCases := []struct {
description string
@@ -640,7 +646,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
},
}
for _, tc := range testCases {
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, excludeReserved)
policy := p.(*staticPolicy)
st := &mockState{
assignments: tc.stAssignments,
@@ -673,6 +679,7 @@ type staticPolicyTestWithResvList struct {
topo *topology.CPUTopology
numReservedCPUs int
reserved cpuset.CPUSet
+ isolcpus cpuset.CPUSet
stAssignments state.ContainerCPUAssignments
stDefaultCPUSet cpuset.CPUSet
pod *v1.Pod
@@ -713,9 +720,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
},
}
testExcl := false
+ testDM, _ := devicemanager.NewManagerStub()
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testDM, testExcl)
if !reflect.DeepEqual(err, testCase.expNewErr) {
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
testCase.description, testCase.expNewErr, err)
@@ -755,6 +763,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
topo: topoSingleSocketHT,
numReservedCPUs: 1,
reserved: cpuset.NewCPUSet(0),
+ isolcpus: cpuset.NewCPUSet(),
stAssignments: state.ContainerCPUAssignments{},
stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
@@ -767,6 +776,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
topo: topoSingleSocketHT,
numReservedCPUs: 2,
reserved: cpuset.NewCPUSet(0, 1),
+ isolcpus: cpuset.NewCPUSet(),
stAssignments: state.ContainerCPUAssignments{},
stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
@@ -779,6 +789,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
topo: topoSingleSocketHT,
numReservedCPUs: 2,
reserved: cpuset.NewCPUSet(0, 1),
+ isolcpus: cpuset.NewCPUSet(),
stAssignments: state.ContainerCPUAssignments{
"fakePod": map[string]cpuset.CPUSet{
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
@@ -795,6 +806,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
topo: topoSingleSocketHT,
numReservedCPUs: 2,
reserved: cpuset.NewCPUSet(0, 1),
+ isolcpus: cpuset.NewCPUSet(),
stAssignments: state.ContainerCPUAssignments{
"fakePod": map[string]cpuset.CPUSet{
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
@@ -806,11 +818,29 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
expCPUAlloc: true,
expCSet: cpuset.NewCPUSet(0, 1),
},
+ {
+ description: "InfraPod, SingleSocketHT, Isolcpus, ExpectAllocReserved",
+ topo: topoSingleSocketHT,
+ numReservedCPUs: 2,
+ reserved: cpuset.NewCPUSet(0, 1),
+ isolcpus: cpuset.NewCPUSet(1),
+ stAssignments: state.ContainerCPUAssignments{
+ "fakePod": map[string]cpuset.CPUSet{
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
+ },
+ },
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
+ pod: infraPod,
+ expErr: nil,
+ expCPUAlloc: true,
+ expCSet: cpuset.NewCPUSet(0),
+ },
}
testExcl := true
+ testDM, _ := devicemanager.NewManagerStub()
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, testCase.isolcpus, topologymanager.NewFakeManager(), testDM, testExcl)
st := &mockState{
assignments: testCase.stAssignments,
--
2.17.1

View File

@ -0,0 +1,313 @@
From c85d0d1a42fc5989f2e989daf46fdedeebf486a4 Mon Sep 17 00:00:00 2001
From: Jim Gauld <james.gauld@windriver.com>
Date: Fri, 3 Sep 2021 15:31:31 -0400
Subject: [PATCH 2/6] kubelet cpumanager keep normal containers off reserved
CPUs
When starting the kubelet process, two separate sets of reserved CPUs
may be specified. With this change CPUs reserved via '--system-reserved=cpu'
or '--kube-reserved=cpu' will be ignored by kubernetes itself. A small
tweak to the default CPU affinity ensures that "normal" Kubernetes
pods won't run on the reserved CPUs.
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
---
pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 +++-
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 11 ++++--
pkg/kubelet/cm/cpumanager/policy_static.go | 29 ++++++++++++---
.../cm/cpumanager/policy_static_test.go | 35 +++++++++++++------
4 files changed, 62 insertions(+), 19 deletions(-)
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
index 88cfbc1fa83..a0586c7b860 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
@@ -170,7 +170,11 @@ func NewManager(cpuPolicyName string, reconcilePeriod time.Duration, machineInfo
// exclusively allocated.
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity)
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
+ // This variable is primarily to make testing easier.
+ excludeReserved := true
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, excludeReserved)
+
if err != nil {
return nil, fmt.Errorf("new static policy error: %v", err)
}
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
index 34b170be234..a155791e75f 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
@@ -211,6 +211,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
}
func TestCPUManagerAdd(t *testing.T) {
+ testExcl := false
testPolicy, _ := NewStaticPolicy(
&topology.CPUTopology{
NumCPUs: 4,
@@ -225,7 +226,8 @@ func TestCPUManagerAdd(t *testing.T) {
},
0,
cpuset.NewCPUSet(),
- topologymanager.NewFakeManager())
+ topologymanager.NewFakeManager(),
+ testExcl)
testCases := []struct {
description string
updateErr error
@@ -480,8 +482,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
},
}
+ testExcl := false
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
state := &mockState{
assignments: testCase.stAssignments,
@@ -987,6 +990,7 @@ func TestReconcileState(t *testing.T) {
// above test cases are without kubelet --reserved-cpus cmd option
// the following tests are with --reserved-cpus configured
func TestCPUManagerAddWithResvList(t *testing.T) {
+ testExcl := false
testPolicy, _ := NewStaticPolicy(
&topology.CPUTopology{
NumCPUs: 4,
@@ -1001,7 +1005,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
},
1,
cpuset.NewCPUSet(0),
- topologymanager.NewFakeManager())
+ topologymanager.NewFakeManager(),
+ testExcl)
testCases := []struct {
description string
updateErr error
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
index c3309ef7280..e892d63641b 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
@@ -76,6 +76,8 @@ type staticPolicy struct {
topology *topology.CPUTopology
// set of CPUs that is not available for exclusive assignment
reserved cpuset.CPUSet
+ // If true, default CPUSet should exclude reserved CPUs
+ excludeReserved bool
// topology manager reference to get container Topology affinity
affinity topologymanager.Store
// set of CPUs to reuse across allocations in a pod
@@ -88,7 +90,7 @@ var _ Policy = &staticPolicy{}
// NewStaticPolicy returns a CPU manager policy that does not change CPU
// assignments for exclusively pinned guaranteed containers after the main
// container process starts.
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store) (Policy, error) {
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, excludeReserved bool) (Policy, error) {
allCPUs := topology.CPUDetails.CPUs()
var reserved cpuset.CPUSet
if reservedCPUs.Size() > 0 {
@@ -112,6 +114,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
return &staticPolicy{
topology: topology,
reserved: reserved,
+ excludeReserved: excludeReserved,
affinity: affinity,
cpusToReuse: make(map[string]cpuset.CPUSet),
}, nil
@@ -140,7 +143,15 @@ func (p *staticPolicy) validateState(s state.State) error {
}
// state is empty initialize
allCPUs := p.topology.CPUDetails.CPUs()
- s.SetDefaultCPUSet(allCPUs)
+ if p.excludeReserved {
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
+ // unless explicitly affined.
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reserved))
+ } else {
+ s.SetDefaultCPUSet(allCPUs)
+ }
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
+ allCPUs, p.reserved, s.GetDefaultCPUSet())
return nil
}
@@ -148,9 +159,11 @@ func (p *staticPolicy) validateState(s state.State) error {
// 1. Check if the reserved cpuset is not part of default cpuset because:
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
// - user tampered with file
- if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
- p.reserved.String(), tmpDefaultCPUset.String())
+ if !p.excludeReserved {
+ if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
+ p.reserved.String(), tmpDefaultCPUset.String())
+ }
}
// 2. Check if state for static policy is consistent
@@ -179,6 +192,9 @@ func (p *staticPolicy) validateState(s state.State) error {
}
}
totalKnownCPUs = totalKnownCPUs.UnionAll(tmpCPUSets)
+ if p.excludeReserved {
+ totalKnownCPUs = totalKnownCPUs.Union(p.reserved)
+ }
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
@@ -249,6 +265,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
klog.Infof("[cpumanager] static policy: RemoveContainer (pod: %s, container: %s)", podUID, containerName)
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
s.Delete(podUID, containerName)
+ if p.excludeReserved {
+ toRelease = toRelease.Difference(p.reserved)
+ }
// Mutate the shared pool, adding released cpus.
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
}
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
index b4b46c68c17..9c7e4f146ff 100644
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
@@ -33,6 +33,7 @@ type staticPolicyTest struct {
description string
topo *topology.CPUTopology
numReservedCPUs int
+ excludeReserved bool
podUID string
containerName string
stAssignments state.ContainerCPUAssignments
@@ -44,7 +45,8 @@ type staticPolicyTest struct {
}
func TestStaticPolicyName(t *testing.T) {
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
+ testExcl := false
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testExcl)
policyName := policy.Name()
if policyName != "static" {
@@ -74,6 +76,15 @@ func TestStaticPolicyStart(t *testing.T) {
stDefaultCPUSet: cpuset.NewCPUSet(),
expCSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
},
+ {
+ description: "empty cpuset exclude reserved",
+ topo: topoDualSocketHT,
+ numReservedCPUs: 2,
+ excludeReserved: true,
+ stAssignments: state.ContainerCPUAssignments{},
+ stDefaultCPUSet: cpuset.NewCPUSet(),
+ expCSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
+ },
{
description: "reserved cores 0 & 6 are not present in available cpuset",
topo: topoDualSocketHT,
@@ -120,7 +131,7 @@ func TestStaticPolicyStart(t *testing.T) {
}
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
policy := p.(*staticPolicy)
st := &mockState{
assignments: testCase.stAssignments,
@@ -436,7 +447,7 @@ func TestStaticPolicyAdd(t *testing.T) {
}
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.excludeReserved)
st := &mockState{
assignments: testCase.stAssignments,
@@ -479,6 +490,7 @@ func TestStaticPolicyAdd(t *testing.T) {
}
func TestStaticPolicyRemove(t *testing.T) {
+ excludeReserved := false
testCases := []staticPolicyTest{
{
description: "SingleSocketHT, DeAllocOneContainer",
@@ -537,7 +549,7 @@ func TestStaticPolicyRemove(t *testing.T) {
}
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
st := &mockState{
assignments: testCase.stAssignments,
@@ -559,6 +571,7 @@ func TestStaticPolicyRemove(t *testing.T) {
}
func TestTopologyAwareAllocateCPUs(t *testing.T) {
+ excludeReserved := false
testCases := []struct {
description string
topo *topology.CPUTopology
@@ -627,7 +640,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
},
}
for _, tc := range testCases {
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager())
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), excludeReserved)
policy := p.(*staticPolicy)
st := &mockState{
assignments: tc.stAssignments,
@@ -699,9 +712,10 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
},
}
+ testExcl := false
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
if !reflect.DeepEqual(err, testCase.expNewErr) {
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
testCase.description, testCase.expNewErr, err)
@@ -741,7 +755,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
numReservedCPUs: 1,
reserved: cpuset.NewCPUSet(0),
stAssignments: state.ContainerCPUAssignments{},
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
+ stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
expErr: fmt.Errorf("not enough cpus available to satisfy request"),
expCPUAlloc: false,
@@ -753,7 +767,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
numReservedCPUs: 2,
reserved: cpuset.NewCPUSet(0, 1),
stAssignments: state.ContainerCPUAssignments{},
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
+ stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
expErr: nil,
expCPUAlloc: true,
@@ -769,7 +783,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
},
},
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 4, 5),
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"),
expErr: nil,
expCPUAlloc: true,
@@ -777,8 +791,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
},
}
+ testExcl := true
for _, testCase := range testCases {
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager())
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), testExcl)
st := &mockState{
assignments: testCase.stAssignments,
--
2.17.1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff