323 lines
12 KiB
Bash
Executable File
323 lines
12 KiB
Bash
Executable File
#!/bin/bash
|
|
################################################################################
|
|
# Copyright (c) 2017 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
################################################################################
|
|
# Define minimal path
|
|
PATH=/bin:/usr/bin:/usr/local/bin
|
|
|
|
. /etc/platform/platform.conf
|
|
LOG_FUNCTIONS=${LOG_FUNCTIONS:-"/etc/init.d/log_functions.sh"}
|
|
CPUMAP_FUNCTIONS=${CPUMAP_FUNCTIONS:-"/etc/init.d/cpumap_functions.sh"}
|
|
[[ -e ${LOG_FUNCTIONS} ]] && source ${LOG_FUNCTIONS}
|
|
[[ -e ${CPUMAP_FUNCTIONS} ]] && source ${CPUMAP_FUNCTIONS}
|
|
|
|
# Enable debug logs and tag them
|
|
LOG_DEBUG=1
|
|
TAG="TASKAFFINITY:"
|
|
|
|
TASK_AFFINING_INCOMPLETE="/etc/platform/.task_affining_incomplete"
|
|
N_CPUS=$(cat /proc/cpuinfo 2>/dev/null | \
|
|
awk '/^[pP]rocessor/ { n +=1 } END { print (n>0) ? n : 1}')
|
|
FULLSET_CPUS="0-"$((N_CPUS-1))
|
|
FULLSET_MASK=$(cpulist_to_cpumap ${FULLSET_CPUS} ${N_CPUS})
|
|
PLATFORM_CPUS=$(get_platform_cpu_list)
|
|
PLATFORM_CPULIST=$(get_platform_cpu_list| \
|
|
perl -pe 's/(\d+)-(\d+)/join(",",$1..$2)/eg'| \
|
|
sed 's/,/ /g')
|
|
VSWITCH_CPULIST=$(get_vswitch_cpu_list| \
|
|
perl -pe 's/(\d+)-(\d+)/join(",",$1..$2)/eg'| \
|
|
sed 's/,/ /g')
|
|
IDLE_MARK=95.0
|
|
KERNEL=`uname -a`
|
|
|
|
################################################################################
|
|
# Check if a given core is one of the platform cores
|
|
################################################################################
|
|
function is_platform_core {
|
|
local core=$1
|
|
for CPU in ${PLATFORM_CPULIST}; do
|
|
if [ $core -eq $CPU ]; then
|
|
return 1
|
|
fi
|
|
done
|
|
return 0
|
|
}
|
|
|
|
################################################################################
|
|
# Check if a given core is one of the vswitch cores
|
|
################################################################################
|
|
function is_vswitch_core {
|
|
local core=$1
|
|
for CPU in ${VSWITCH_CPULIST}; do
|
|
if [ $core -eq $CPU ]; then
|
|
return 1
|
|
fi
|
|
done
|
|
return 0
|
|
}
|
|
|
|
################################################################################
|
|
# An audit and corrective action following a swact
|
|
################################################################################
|
|
function audit_and_reaffine {
|
|
local mask=$1
|
|
local cmd_str=""
|
|
local tasklist
|
|
|
|
cmd_str="ps-sched.sh|awk '(\$9==\"$mask\") {print \$2}'"
|
|
|
|
tasklist=($(eval $cmd_str))
|
|
# log_debug "cmd str = $cmd_str"
|
|
log_debug "${TAG} There are ${#tasklist[@]} tasks to reaffine."
|
|
|
|
for task in ${tasklist[@]}; do
|
|
taskset -acp ${PLATFORM_CPUS} $task &> /dev/null
|
|
rc=$?
|
|
[[ $rc -ne 0 ]] && log_error "Failed to set CPU affinity for pid $pid, rc=$rc"
|
|
done
|
|
tasklist=($(eval $cmd_str))
|
|
[[ ${#tasklist[@]} -eq 0 ]] && return 0 || return 1
|
|
}
|
|
|
|
################################################################################
|
|
# The following function is used to verify that any sleeping management tasks
|
|
# that are on non-platform cores can be migrated to platform cores as soon as
|
|
# they are scheduled. It can be invoked either manually or from goenableCompute
|
|
# script as a scheduled job (with a few minute delay) if desired.
|
|
# The induced tasks migration should be done after all VMs have been restored
|
|
# following a host reboot in AIO, hence the delay.
|
|
################################################################################
|
|
function move_inactive_threads_to_platform_cores {
|
|
local tasklist
|
|
local cmd_str=""
|
|
|
|
# Compile a list of non-kernel & non-vswitch/VM related threads that are not
|
|
# on platform cores.
|
|
# e.g. if the platform cpulist value is "0 8", the resulting command to be
|
|
# evaluated should look like this:
|
|
# ps-sched.sh|grep -v vswitch|awk '($10!=0 && $10!=8 && $3!=2) {if(NR>1)print $2}'
|
|
cmd_str="ps-sched.sh|grep -v vswitch|awk '("
|
|
for cpu_num in ${PLATFORM_CPULIST}; do
|
|
cmd_str=$cmd_str"\$10!="${cpu_num}" && "
|
|
done
|
|
cmd_str=$cmd_str"\$3!=2) {if(NR>1)print \$2}'"
|
|
echo "selection string = $cmd_str"
|
|
tasklist=($(eval $cmd_str))
|
|
log_debug "${TAG} There are ${#tasklist[@]} number of tasks to be moved."
|
|
|
|
# These sleep tasks are stuck on the wrong core(s). They need to be woken up
|
|
# so they can be migrated to the right ones. Attaching and detaching strace
|
|
# momentarily to the task does the trick.
|
|
for task in ${tasklist[@]}; do
|
|
strace -p $task 2>/dev/null &
|
|
pid=$!
|
|
sleep 0.1
|
|
kill -SIGINT $pid
|
|
done
|
|
tasklist=($(eval $cmd_str))
|
|
[[ ${#tasklist[@]} -eq 0 ]] && return 0 || return 1
|
|
}
|
|
|
|
################################################################################
|
|
# The following function is called by affine-platform.sh to affine tasks to
|
|
# all available cores during initial startup and subsequent host reboots.
|
|
################################################################################
|
|
function affine_tasks_to_all_cores {
|
|
local pidlist
|
|
local rc=0
|
|
|
|
if [[ "${KERNEL}" == *" RT "* ]]; then
|
|
return 0
|
|
fi
|
|
|
|
log_debug "${TAG} Affining all tasks to CPU (${FULLSET_CPUS})"
|
|
|
|
pidlist=$(ps --ppid 2 -p 2 --deselect -o pid= | awk '{ print $1; }')
|
|
for pid in ${pidlist[@]}; do
|
|
ppid=$(ps -o ppid= -p $pid |tr -d '[:space:]')
|
|
if [ -z $ppid ] || [ $ppid -eq 2 ]; then
|
|
continue
|
|
fi
|
|
log_debug "Affining pid $pid, parent pid = $ppid"
|
|
taskset --all-tasks --pid --cpu-list ${FULLSET_CPUS} $pid &> /dev/null
|
|
rc=$?
|
|
[[ $rc -ne 0 ]] && log_error "Failed to set CPU affinity for pid $pid, rc=$rc"
|
|
done
|
|
# Write the cpu list to a temp file which will be read and removed when
|
|
# the tasks are reaffined back to platform cores later on.
|
|
echo ${FULLSET_CPUS} > ${TASK_AFFINING_INCOMPLETE}
|
|
|
|
return $rc
|
|
}
|
|
|
|
################################################################################
|
|
# The following function can be called by any platform service that needs to
|
|
# temporarily make use of idle VM cores to run a short-duration, service
|
|
# critical and cpu intensive operation in AIO. For instance, sm can levearage
|
|
# the idle cores to speed up swact activity.
|
|
#
|
|
# At the end of the operation, regarless of the result, the service must be
|
|
# calling function affine_tasks_to_platform_cores to re-affine platform tasks
|
|
# back to their assigned core(s).
|
|
#
|
|
# Kernel, vswitch and VM related tasks are untouched.
|
|
################################################################################
|
|
function affine_tasks_to_idle_cores {
|
|
local cpulist
|
|
local cpuocc_list
|
|
local vswitch_pid
|
|
local pidlist
|
|
local idle_cpulist
|
|
local platform_cpus
|
|
local rc=0
|
|
local cpu=0
|
|
|
|
if [ -f ${TASK_AFFINING_INCOMPLETE} ]; then
|
|
read cpulist < ${TASK_AFFINING_INCOMPLETE}
|
|
log_debug "${TAG} Tasks have already been affined to CPU ($cpulist)."
|
|
return 0
|
|
fi
|
|
|
|
if [[ "${KERNEL}" == *" RT "* ]]; then
|
|
return 0
|
|
fi
|
|
|
|
# Compile a list of cpus with idle percentage greater than 95% in the last
|
|
# 5 seconds.
|
|
cpuocc_list=($(sar -P ALL 1 5|grep Average|awk '{if(NR>2)print $8}'))
|
|
|
|
for idle_value in ${cpuocc_list[@]}; do
|
|
is_vswitch_core $cpu
|
|
if [ $? -eq 1 ]; then
|
|
cpu=$(($cpu+1))
|
|
continue
|
|
fi
|
|
|
|
is_platform_core $cpu
|
|
if [ $? -eq 1 ]; then
|
|
# Platform core is added to the idle list by default
|
|
idle_cpulist=$idle_cpulist$cpu","
|
|
else
|
|
# Non platform core is added to the idle list if it is more than 95% idle
|
|
[[ $(echo "$idle_value > ${IDLE_MARK}"|bc) -eq 1 ]] && idle_cpulist=$idle_cpulist$cpu","
|
|
fi
|
|
cpu=$(($cpu+1))
|
|
done
|
|
|
|
idle_cpulist=$(echo $idle_cpulist|sed 's/.$//')
|
|
platform_affinity_mask=$(cpulist_to_cpumap ${PLATFORM_CPUS} ${N_CPUS} \
|
|
|awk '{print tolower($0)}')
|
|
|
|
log_debug "${TAG} Affining all tasks to idle CPU ($idle_cpulist)"
|
|
|
|
vswitch_pid=$(pgrep vswitch)
|
|
pidlist=$(ps --ppid 2 -p 2 --deselect -o pid= | awk '{ print $1; }')
|
|
for pid in ${pidlist[@]}; do
|
|
ppid=$(ps -o ppid= -p $pid |tr -d '[:space:]')
|
|
if [ -z $ppid ] || [ $ppid -eq 2 ] || [ "$pid" = "$vswitch_pid" ]; then
|
|
continue
|
|
fi
|
|
pid_affinity_mask=$(taskset -p $pid | awk '{print $6}')
|
|
if [ "${pid_affinity_mask}" == "${platform_affinity_mask}" ]; then
|
|
# log_debug "Affining pid $pid to idle cores..."
|
|
taskset --all-tasks --pid --cpu-list $idle_cpulist $pid &> /dev/null
|
|
rc=$?
|
|
[[ $rc -ne 0 ]] && log_error "Failed to set CPU affinity for pid $pid, rc=$rc"
|
|
fi
|
|
done
|
|
|
|
# Save the cpu list to the temp file which will be read and removed when
|
|
# tasks are reaffined to the platform cores later on.
|
|
echo $idle_cpulist > ${TASK_AFFINING_INCOMPLETE}
|
|
return $rc
|
|
}
|
|
|
|
################################################################################
|
|
# The following function is called by either:
|
|
# a) nova-compute wrapper script during AIO system initial bringup or reboot
|
|
# or
|
|
# b) sm at the end of swact sequence
|
|
# to re-affine management tasks back to the platform cores.
|
|
################################################################################
|
|
function affine_tasks_to_platform_cores {
|
|
local cpulist
|
|
local pidlist
|
|
local rc=0
|
|
local count=0
|
|
|
|
if [ ! -f ${TASK_AFFINING_INCOMPLETE} ]; then
|
|
dbg_str="${TAG} Either tasks have never been affined to all/idle cores or"
|
|
dbg_str=$dbg_str" they have already been reaffined to platform cores."
|
|
log_debug "$dbg_str"
|
|
return 0
|
|
fi
|
|
|
|
read cpulist < ${TASK_AFFINING_INCOMPLETE}
|
|
affinity_mask=$(cpulist_to_cpumap $cpulist ${N_CPUS}|awk '{print tolower($0)}')
|
|
|
|
log_debug "${TAG} Reaffining tasks to platform cores (${PLATFORM_CPUS})..."
|
|
pidlist=$(ps --ppid 2 -p 2 --deselect -o pid= | awk '{ print $1; }')
|
|
for pid in ${pidlist[@]}; do
|
|
# log_debug "Processing pid $pid..."
|
|
pid_affinity_mask=$(taskset -p $pid | awk '{print $6}')
|
|
# Only management tasks need to be reaffined. Kernel, vswitch and VM related
|
|
# tasks were not affined previously so they should have different affinity
|
|
# mask(s).
|
|
if [ "${pid_affinity_mask}" == "${affinity_mask}" ]; then
|
|
count=$(($count+1))
|
|
# log_debug "Affining pid $pid to platform cores..."
|
|
taskset --all-tasks --pid --cpu-list ${PLATFORM_CPUS} $pid &> /dev/null
|
|
rc=$?
|
|
[[ $rc -ne 0 ]] && log_error "Failed to set CPU affinity for pid $pid, rc=$rc"
|
|
fi
|
|
done
|
|
|
|
# A workaround for lack of "end of swact" state
|
|
fullmask=$(echo ${FULLSET_MASK} | awk '{print tolower($0)}')
|
|
if [ "${affinity_mask}" != "${fullmask}" ]; then
|
|
log_debug "${TAG} Schedule an audit and cleanup"
|
|
(sleep 60; audit_and_reaffine "0x"$affinity_mask) &
|
|
fi
|
|
|
|
rm -rf ${TASK_AFFINING_INCOMPLETE}
|
|
log_debug "${TAG} $count tasks were reaffined to platform cores."
|
|
|
|
return $rc
|
|
}
|
|
|
|
################################################################################
|
|
# The following function can be leveraged by cron tasks
|
|
################################################################################
|
|
function get_most_idle_core {
|
|
local cpuocc_list
|
|
local cpu=0
|
|
local most_idle_value=${IDLE_MARK}
|
|
local most_idle_cpu=0
|
|
|
|
if [[ "${KERNEL}" == *" RT "* ]]; then
|
|
echo $cpu
|
|
return
|
|
fi
|
|
|
|
cpuocc_list=($(sar -P ALL 1 5|grep Average|awk '{if(NR>2)print $8}'))
|
|
|
|
for idle_value in ${cpuocc_list[@]}; do
|
|
is_vswitch_core $cpu
|
|
if [ $? -eq 1 ]; then
|
|
cpu=$(($cpu+1))
|
|
continue
|
|
fi
|
|
|
|
if [ $(echo "$idle_value > $most_idle_value"|bc) -eq 1 ]; then
|
|
most_idle_value=$idle_value
|
|
most_idle_cpu=$cpu
|
|
fi
|
|
cpu=$(($cpu+1))
|
|
done
|
|
|
|
echo $most_idle_cpu
|
|
}
|