From 019172946c0146eca91d611595866c70a8ed3ddb Mon Sep 17 00:00:00 2001 From: Jim Gauld Date: Fri, 3 Sep 2021 14:10:46 -0400 Subject: [PATCH 1/6] kubelet cpumanager disable CFS quota throttling for Guaranteed pods This disables CFS CPU quota to avoid performance degradation due to Linux kernel CFS quota implementation. Note that 4.18 kernel attempts to solve the CFS throttling problem, but there are reports that it is not completely effective. This disables CFS quota throttling for Guaranteed pods for both parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us. Disabling has a dramatic latency improvement for HTTP response times. Signed-off-by: Jim Gauld --- pkg/kubelet/cm/cpumanager/cpu_manager.go | 22 ++++++++++++++++++++++ pkg/kubelet/cm/helpers_linux.go | 5 +++++ pkg/kubelet/cm/helpers_linux_test.go | 8 ++++---- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index 44368efc441..88cfbc1fa83 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -36,6 +36,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/config" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" "k8s.io/kubernetes/pkg/kubelet/status" + v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" ) // ActivePodsFunc is a function that returns a list of pods to reconcile. @@ -242,6 +243,14 @@ func (m *manager) AddContainer(p *v1.Pod, c *v1.Container, containerID string) e // Get the CPUs assigned to the container during Allocate() // (or fall back to the default CPUSet if none were assigned). cpus := m.state.GetCPUSetOrDefault(string(p.UID), c.Name) + + // Guaranteed PODs should not have CFS quota throttle + if m.policy.Name() == string(PolicyStatic) && v1qos.GetPodQOS(p) == v1.PodQOSGuaranteed { + err := m.disableContainerCPUQuota(containerID) + if err != nil { + klog.Errorf("[cpumanager] AddContainer disable CPU Quota error: %v", err) + } + } m.Unlock() if !cpus.IsEmpty() { @@ -489,3 +498,16 @@ func (m *manager) GetCPUs(podUID, containerName string) []int64 { } return result } + +func (m *manager) disableContainerCPUQuota(containerID string) error { + // Disable CFS CPU quota to avoid performance degradation due to + // Linux kernel CFS throttle implementation. + // NOTE: 4.18 kernel attempts to solve CFS throttling problem, + // but there are reports that it is not completely effective. + return m.containerRuntime.UpdateContainerResources( + containerID, + &runtimeapi.LinuxContainerResources{ + CpuPeriod: 100000, + CpuQuota: -1, + }) +} diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go index 9b115ab5380..d3185e1e958 100644 --- a/pkg/kubelet/cm/helpers_linux.go +++ b/pkg/kubelet/cm/helpers_linux.go @@ -166,6 +166,11 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64) // determine the qos class qosClass := v1qos.GetPodQOS(pod) + // disable cfs quota for guaranteed pods + if qosClass == v1.PodQOSGuaranteed { + cpuQuota = int64(-1) + } + // build the result result := &ResourceConfig{} if qosClass == v1.PodQOSGuaranteed { diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go index 56d765fbc22..0c43afe5875 100644 --- a/pkg/kubelet/cm/helpers_linux_test.go +++ b/pkg/kubelet/cm/helpers_linux_test.go @@ -63,8 +63,8 @@ func TestResourceConfigForPod(t *testing.T) { burstablePartialShares := MilliCPUToShares(200) burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod)) guaranteedShares := MilliCPUToShares(100) - guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod)) - guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod)) + guaranteedQuota := int64(-1) + guaranteedTunedQuota := int64(-1) memoryQuantity = resource.MustParse("100Mi") cpuNoLimit := int64(-1) guaranteedMemory := memoryQuantity.Value() @@ -283,8 +283,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) { burstablePartialShares := MilliCPUToShares(200) burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod)) guaranteedShares := MilliCPUToShares(100) - guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod)) - guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod)) + guaranteedQuota := int64(-1) + guaranteedTunedQuota := int64(-1) memoryQuantity = resource.MustParse("100Mi") cpuNoLimit := int64(-1) guaranteedMemory := memoryQuantity.Value() -- 2.17.1