config/puppet-manifests/src/modules/platform/manifests/compute.pp

399 lines
11 KiB
Puppet

class platform::compute::params (
$worker_cpu_list = '',
$platform_cpu_list = '',
$reserved_vswitch_cores = '',
$reserved_platform_cores = '',
$worker_base_reserved = '',
$compute_vswitch_reserved = '',
) { }
class platform::compute::config
inherits ::platform::compute::params {
include ::platform::collectd::restart
file { '/etc/platform/worker_reserved.conf':
ensure => 'present',
replace => true,
content => template('platform/worker_reserved.conf.erb')
}
-> Exec['collectd-restart']
if $::platform::params::system_type != 'All-in-one' {
file { '/etc/systemd/system.conf.d/platform-cpuaffinity.conf':
ensure => 'present',
replace => true,
content => template('platform/systemd-system-cpuaffinity.conf.erb')
}
}
}
class platform::compute::config::runtime {
include ::platform::compute::config
}
class platform::compute::grub::params (
$n_cpus = '',
$cpu_options = '',
$m_hugepages = 'hugepagesz=2M hugepages=0',
$g_hugepages = undef,
$default_pgsz = 'default_hugepagesz=2M',
$keys = [
'kvm-intel.eptad',
'default_hugepagesz',
'hugepagesz',
'hugepages',
'isolcpus',
'nohz_full',
'rcu_nocbs',
'kthread_cpus',
'irqaffinity',
],
) {
if $::is_broadwell_processor {
$eptad = 'kvm-intel.eptad=0'
} else {
$eptad = ''
}
if $::is_gb_page_supported and $::platform::params::vswitch_type != 'none' {
if $g_hugepages != undef {
$gb_hugepages = $g_hugepages
} else {
$gb_hugepages = "hugepagesz=1G hugepages=${::number_of_numa_nodes}"
}
} else {
$gb_hugepages = ''
}
$grub_updates = strip("${eptad} ${$gb_hugepages} ${m_hugepages} ${default_pgsz} ${cpu_options}")
}
class platform::compute::grub::update
inherits ::platform::compute::grub::params {
notice('Updating grub configuration')
$to_be_removed = join($keys, ' ')
exec { 'Remove the cpu arguments':
command => "grubby --update-kernel=ALL --remove-args='${to_be_removed}'",
}
-> exec { 'Add the cpu arguments':
command => "grubby --update-kernel=ALL --args='${grub_updates}'",
}
}
class platform::compute::grub::recovery {
notice('Update Grub and Reboot')
class {'platform::compute::grub::update': } -> Exec['reboot-recovery']
exec { 'reboot-recovery':
command => 'reboot',
}
}
class platform::compute::grub::audit
inherits ::platform::compute::grub::params {
if ! str2bool($::is_initial_config_primary) {
notice('Audit CPU and Grub Configuration')
$expected_n_cpus = Integer($::number_of_logical_cpus)
$n_cpus_ok = ($n_cpus == $expected_n_cpus)
$cmd_ok = check_grub_config($grub_updates)
if $cmd_ok and $n_cpus_ok {
$ensure = present
notice('CPU and Boot Argument audit passed.')
} else {
$ensure = absent
if !$cmd_ok {
notice('Kernel Boot Argument Mismatch')
include ::platform::compute::grub::recovery
} else {
notice("Mismatched CPUs: Found=${n_cpus}, Expected=${expected_n_cpus}")
}
}
file { '/var/run/worker_goenabled':
ensure => $ensure,
owner => 'root',
group => 'root',
mode => '0644',
}
}
}
class platform::compute::grub::runtime {
include ::platform::compute::grub::update
}
# Mounts virtual hugetlbfs filesystems for each supported page size
class platform::compute::hugetlbf {
if str2bool($::is_hugetlbfs_enabled) {
$fs_list = generate('/bin/bash', '-c', 'ls -1d /sys/kernel/mm/hugepages/hugepages-*')
$array = split($fs_list, '\n')
$array.each | String $val | {
$page_name = generate('/bin/bash', '-c', "basename ${val}")
$page_size = strip(regsubst($page_name, 'hugepages-', ''))
$hugemnt ="/mnt/huge-${page_size}"
$options = "pagesize=${page_size}"
# TODO: Once all the code is switched over to use the /dev
# mount point we can get rid of this mount point.
notice("Mounting hugetlbfs at: ${hugemnt}")
exec { "create ${hugemnt}":
command => "mkdir -p ${hugemnt}",
onlyif => "test ! -d ${hugemnt}",
}
-> mount { $hugemnt:
ensure => 'mounted',
device => 'none',
fstype => 'hugetlbfs',
name => $hugemnt,
options => $options,
atboot => 'yes',
remounts => true,
}
# The libvirt helm chart expects hugepages to be mounted
# under /dev so let's do that.
$hugemnt2 ="/dev/huge-${page_size}"
notice("Mounting hugetlbfs at: ${hugemnt2}")
file { $hugemnt2:
ensure => 'directory',
owner => 'root',
group => 'root',
mode => '0755',
}
-> mount { $hugemnt2:
ensure => 'mounted',
device => 'none',
fstype => 'hugetlbfs',
name => $hugemnt2,
options => $options,
atboot => 'yes',
remounts => true,
}
}
# The libvirt helm chart also assumes that the default hugepage size
# will be mounted at /dev/hugepages so let's make that happen too.
# Once we upstream a fix to the helm chart to automatically determine
# the mountpoint then we can remove this.
$page_size = '2M'
$hugemnt ='/dev/hugepages'
$options = "pagesize=${page_size}"
notice("Mounting hugetlbfs at: ${hugemnt}")
exec { "create ${hugemnt}":
command => "mkdir -p ${hugemnt}",
onlyif => "test ! -d ${hugemnt}",
}
-> mount { $hugemnt:
ensure => 'mounted',
device => 'none',
fstype => 'hugetlbfs',
name => $hugemnt,
options => $options,
atboot => 'yes',
remounts => true,
}
}
}
# lint:ignore:variable_is_lowercase
class platform::compute::hugepage::params (
$nr_hugepages_2M = undef,
$nr_hugepages_1G = undef,
$vswitch_2M_pages = '',
$vswitch_1G_pages = '',
$vm_4K_pages = '',
$vm_2M_pages = '',
$vm_1G_pages = '',
) {}
define allocate_pages (
$path,
$page_count,
) {
exec { "Allocate ${page_count} ${path}":
command => "echo ${page_count} > ${path}",
onlyif => "test -f ${path}",
}
}
# Allocates HugeTLB memory according to the attributes specified in the
# nr_hugepages_2M and nr_hugepages_1G
class platform::compute::allocate
inherits ::platform::compute::hugepage::params {
# determine the node file system
if str2bool($::is_per_numa_supported) {
$nodefs = '/sys/devices/system/node'
} else {
$nodefs = '/sys/kernel/mm'
}
if $nr_hugepages_2M != undef {
$nr_hugepages_2M_array = regsubst($nr_hugepages_2M, '[\(\)\"]', '', 'G').split(' ')
$nr_hugepages_2M_array.each | String $val | {
$per_node_2M = $val.split(':')
if size($per_node_2M)== 3 {
$node = $per_node_2M[0]
$page_size = $per_node_2M[1]
allocate_pages { "Start ${node} ${page_size}":
path => "${nodefs}/${node}/hugepages/hugepages-${page_size}/nr_hugepages",
page_count => $per_node_2M[2],
}
}
}
}
if $nr_hugepages_1G != undef {
$nr_hugepages_1G_array = regsubst($nr_hugepages_1G , '[\(\)\"]', '', 'G').split(' ')
$nr_hugepages_1G_array.each | String $val | {
$per_node_1G = $val.split(':')
if size($per_node_1G)== 3 {
$node = $per_node_1G[0]
$page_size = $per_node_1G[1]
allocate_pages { "Start ${node} ${page_size}":
path => "${nodefs}/${node}/hugepages/hugepages-${page_size}/nr_hugepages",
page_count => $per_node_1G[2],
}
}
}
}
}
# lint:endignore:variable_is_lowercase
# Mount resctrl to allow Cache Allocation Technology per VM
class platform::compute::resctrl {
if str2bool($::is_resctrl_supported) {
mount { '/sys/fs/resctrl':
ensure => 'mounted',
device => 'resctrl',
fstype => 'resctrl',
name => '/sys/fs/resctrl',
atboot => 'yes',
remounts => true,
}
}
}
# Set Power Management QoS resume latency constraints for CPUs.
# The PM QoS resume latency limit is set to shallow C-state for vswitch CPUs.
# All other CPUs are allowed to go to the deepest C-state available.
class platform::compute::pmqos (
$low_wakeup_cpus = '',
$hight_wakeup_cpus = '',
) {
if str2bool($::is_worker_subfunction) and str2bool($::is_lowlatency_subfunction) {
$script = '/usr/bin/set-cpu-wakeup-latency.sh'
if $low_wakeup_cpus != '""' {
# Set low wakeup latency (shallow C-state) for vswitch CPUs using PM QoS interface
exec { 'low-wakeup-latency':
command => "${script} low ${low_wakeup_cpus}",
onlyif => "test -f ${script}",
logoutput => true,
}
}
if $hight_wakeup_cpus != '""' {
#Set high wakeup latency (deep C-state) for non-vswitch CPUs using PM QoS interface
exec { 'high-wakeup-latency':
command => "${script} high ${hight_wakeup_cpus}",
onlyif => "test -f ${script}",
logoutput => true,
}
}
}
}
# Set systemd machine.slice cgroup cpuset to be used with VMs,
# and configure this cpuset to span all logical cpus and numa nodes.
# NOTES:
# - The parent directory cpuset spans all online cpus and numa nodes.
# - Setting the machine.slice cpuset prevents this from inheriting
# kubernetes libvirt pod's cpuset, since machine.slice cgroup will be
# created when a VM is launched if it does not already exist.
# - systemd automatically mounts cgroups and controllers, so don't need
# to do that here.
class platform::compute::machine {
$parent_dir = '/sys/fs/cgroup/cpuset'
$parent_mems = "${parent_dir}/cpuset.mems"
$parent_cpus = "${parent_dir}/cpuset.cpus"
$machine_dir = "${parent_dir}/machine.slice"
$machine_mems = "${machine_dir}/cpuset.mems"
$machine_cpus = "${machine_dir}/cpuset.cpus"
notice("Create ${machine_dir}")
file { $machine_dir :
ensure => directory,
owner => 'root',
group => 'root',
mode => '0700',
}
-> exec { "Create ${machine_mems}" :
command => "/bin/cat ${parent_mems} > ${machine_mems}",
}
-> exec { "Create ${machine_cpus}" :
command => "/bin/cat ${parent_cpus} > ${machine_cpus}",
}
}
class platform::compute::kvm_timer_advance(
$enabled = False,
$vcpu_pin_set = undef
) {
if $enabled {
# include the declaration of the kubelet service
include ::platform::kubernetes::worker
file { '/etc/kvm-timer-advance/kvm-timer-advance.conf':
ensure => 'present',
replace => true,
content => template('platform/kvm_timer_advance.conf.erb')
}
-> service { 'kvm_timer_advance_setup':
ensure => 'running',
enable => true,
before => Service['kubelet'],
}
# A separate enable is required since we have modified the service resource
# to never enable/disable services in puppet.
-> exec { 'Enable kvm_timer_advance_setup':
command => '/usr/bin/systemctl enable kvm_timer_advance_setup.service',
}
} else {
# A disable is required since we have modified the service resource
# to never enable/disable services in puppet and stop has no effect.
exec { 'Disable kvm_timer_advance_setup':
command => '/usr/bin/systemctl disable kvm_timer_advance_setup.service',
}
}
}
class platform::compute {
Class[$name] -> Class['::platform::vswitch']
require ::platform::compute::grub::audit
require ::platform::compute::hugetlbf
require ::platform::compute::allocate
require ::platform::compute::pmqos
require ::platform::compute::resctrl
require ::platform::compute::machine
require ::platform::compute::config
require ::platform::compute::kvm_timer_advance
}