350 lines
11 KiB
Bash
Executable File
350 lines
11 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Copyright (c) 2020-2021 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
#
|
|
# chkconfig: 2345 76 25
|
|
#
|
|
### BEGIN INIT INFO
|
|
# Provides: k8s-pod-recovery
|
|
# Default-Start: 3 5
|
|
# Required-Start:
|
|
# Required-Stop:
|
|
# Default-Stop: 0 1 2 6
|
|
# Short-Description: Service to recovery pods after host boot
|
|
### END INIT INFO
|
|
|
|
. /etc/platform/platform.conf
|
|
|
|
export PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin:/usr/local/sbin
|
|
export KUBECONFIG=/etc/kubernetes/admin.conf
|
|
CONF_DIR=/etc/k8s-post-recovery.d
|
|
SLEEP_DELAY_SEC=15
|
|
|
|
NAME=$(basename $0)
|
|
PIDFILE=/var/run/${NAME}.pid
|
|
HOST=$(hostname)
|
|
|
|
# Log info message to /var/log/daemon.log
|
|
function LOG {
|
|
logger -p daemon.info -t "${NAME}($$): " "$@"
|
|
}
|
|
|
|
# Log error message to /var/log/daemon.log
|
|
function ERROR {
|
|
logger -p daemon.error -t "${NAME}($$): " "$@"
|
|
}
|
|
|
|
function _check_for_k8s_config {
|
|
# If this node has not been configured, then there is nothing to recovery
|
|
if [ ! -f ${KUBECONFIG} ]; then
|
|
LOG "${KUBECONFIG} does not exist. No pods to recover."
|
|
exit 0
|
|
fi
|
|
}
|
|
|
|
function _check_for_existing_process {
|
|
# Abort if another instantiation is already running
|
|
if [ -e ${PIDFILE} ]; then
|
|
PID=$(cat ${PIDFILE})
|
|
PROCESS=$(cat /proc/${PID}/comm)
|
|
if [ -n "${PID}" -a -e /proc/${PID} -a ${PROCESS} == ${NAME} ]; then
|
|
ERROR "Aborting, ${PID} already running: ${PIDFILE}."
|
|
exit 1
|
|
else
|
|
OUT=$(rm -v -f ${PIDFILE})
|
|
LOG "${OUT}"
|
|
fi
|
|
fi
|
|
|
|
# Create pidfile to indicate the script is running
|
|
echo $$ > ${PIDFILE}
|
|
}
|
|
|
|
function _wait_for_systemd {
|
|
while true; do
|
|
if systemctl is-system-running | grep -q -e running -e degraded; then
|
|
break
|
|
fi
|
|
LOG "Waiting for systemd to finish booting..."
|
|
sleep ${SLEEP_DELAY_SEC}
|
|
done
|
|
}
|
|
|
|
function _do_cni_cache_cleanup {
|
|
# Cleanup any stale CNI cache files (not associated with any running pod)
|
|
# that are older than 1 hour old
|
|
LOG "Starting CNI cache cleanup..."
|
|
k8s-cni-cache-cleanup -o 1 -d
|
|
if [[ ${?} -ne 0 ]]; then
|
|
ERROR "Failed to run CNI cache cleanup."
|
|
fi
|
|
}
|
|
|
|
function _wait_for_pod_stabilization {
|
|
|
|
local extra_args=$1
|
|
local time_between_polls=$2
|
|
local stable_cycles=$3
|
|
|
|
last_count=0
|
|
stability_count=0
|
|
while [[ $stability_count -lt $stable_cycles ]] ; do
|
|
pods_in_flux=$(KUBECONFIG=/etc/kubernetes/admin.conf kubectl get pods --no-headers --all-namespaces $extra_args | grep -v -e Running -e Completed | wc -l)
|
|
if [[ $pods_in_flux -ne $last_count ]]; then
|
|
LOG "Waiting on pod transitions to stabilize... $pods_in_flux pods are not Running/Completed"
|
|
last_count=$pods_in_flux
|
|
stability_count=0
|
|
else
|
|
LOG "Pods transitions are stable... for $((stability_count*time_between_polls)) seconds."
|
|
stability_count=$((stability_count+1))
|
|
fi
|
|
sleep $time_between_polls
|
|
done
|
|
}
|
|
|
|
function _unknown_pods {
|
|
# $1: actions <recover|verify>
|
|
|
|
# Target specific namespaces and pods on this host
|
|
SUPPORTED_NAMESPACES=('armada' 'openstack' 'monitor')
|
|
|
|
shopt -s nullglob
|
|
for conf_file in ${CONF_DIR}/*.conf; do
|
|
grep -q '^namespace=' $conf_file || continue
|
|
SUPPORTED_NAMESPACES+=($(grep '^namespace=' $conf_file | awk -F '=' '{print $2}'))
|
|
done
|
|
|
|
if [ "$1" == 'recover' ]; then
|
|
# Recovers pods that are: Running/Unknown and Pending/Init:Unknown
|
|
for ns in ${SUPPORTED_NAMESPACES[@]}; do
|
|
PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /Unknown/'{print $1}')
|
|
for pod in $PODS ; do
|
|
LOG "Unknown pods: Recovering: $ns/$pod"
|
|
kubectl delete pods -n $ns $pod --wait=false
|
|
done
|
|
done
|
|
elif [ "$1" == 'verify' ]; then
|
|
for ns in ${SUPPORTED_NAMESPACES[@]}; do
|
|
PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /Unknown/'{print $1}')
|
|
if [ -z "${PODS}" ]; then
|
|
LOG "Unknown pods: None present for namespace: $ns"
|
|
else
|
|
ERROR "Unknown pods: still present for namespace: $ns"
|
|
fi
|
|
done
|
|
else
|
|
ERROR "Unknown action: $1"
|
|
fi
|
|
}
|
|
|
|
function _outofhugepages_pods {
|
|
# $1: actions <recover|verify>
|
|
|
|
# Target all namespaces and pods on this host
|
|
NAMESPACES=$(kubectl get ns | tail -n +2 | awk '{ print $1 }')
|
|
|
|
if [ "$1" == 'recover' ]; then
|
|
# Recovers pods that are: Running/OutOfhugepages
|
|
for ns in ${NAMESPACES[@]}; do
|
|
PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /OutOfhugepages/'{print $1}')
|
|
for pod in $PODS ; do
|
|
LOG "OutOfhugepages pods: Recovering: $ns/$pod"
|
|
kubectl delete pods -n $ns $pod --wait=false
|
|
done
|
|
done
|
|
elif [ "$1" == 'verify' ]; then
|
|
for ns in ${NAMESPACES[@]}; do
|
|
PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /OutOfhugepages/'{print $1}')
|
|
if [ -z "${PODS}" ]; then
|
|
LOG "OutOfhugepages pods: None present for namespace: $ns"
|
|
else
|
|
ERROR "OutOfhugepages pods: still present for namespace: $ns"
|
|
fi
|
|
done
|
|
else
|
|
ERROR "Unknown action: $1"
|
|
fi
|
|
}
|
|
|
|
function _node_affinity_pods {
|
|
# $1: actions <recover|verify>
|
|
|
|
if [ "$1" == 'recover' ]; then
|
|
PODS=$(kubectl get pods --all-namespaces --field-selector status.phase=Failed,spec.nodeName=${HOST} 2>/dev/null | awk /NodeAffinity/'{print $1"/"$2}')
|
|
for pod in $PODS ; do
|
|
LOG "NodeAffinity pods: Recovering: $pod"
|
|
kubectl delete pods -n ${pod//// } --wait=false
|
|
done
|
|
elif [ "$1" == 'verify' ]; then
|
|
PODS=$(kubectl get pods --all-namespaces --field-selector status.phase=Failed,spec.nodeName=${HOST} 2>/dev/null | awk /NodeAffnity/'{print $1"/"$2}')
|
|
if [ -z "${PODS}" ]; then
|
|
LOG "NodeAffinity pods: None present."
|
|
else
|
|
ERROR "NodeAffinity pods: still present"
|
|
fi
|
|
else
|
|
ERROR "Unknown action: $1"
|
|
fi
|
|
|
|
}
|
|
|
|
function _labeled_pods {
|
|
# $1: actions <recover|verify>
|
|
|
|
if [ "$1" == 'recover' ]; then
|
|
POLLING_INTERVAL=5
|
|
STABILITY_COUNT=6
|
|
_wait_for_pod_stabilization "--selector=restart-on-reboot=true --field-selector=spec.nodeName=${HOST}" $POLLING_INTERVAL $STABILITY_COUNT
|
|
|
|
|
|
PODS=$(kubectl get pods --all-namespaces --no-headers --field-selector=spec.nodeName=${HOST} --selector=restart-on-reboot=true 2>/dev/null | awk '{print $1"/"$2}')
|
|
|
|
# Don't have to restart device-plugin if no labeled pods are present. System may not be configured for SRIOV.
|
|
if [ ! -z "${PODS}" ]; then
|
|
LOG "Waiting for SRIOV device plugin pod to become available"
|
|
|
|
# Check if device-plugin is ready, but do not wait
|
|
kubectl wait pods -n kube-system --selector=app=sriovdp --field-selector=spec.nodeName=${HOST} --for=condition=Ready --timeout=0s
|
|
|
|
# If device plugin is not ready, restart it and wait
|
|
if [ "$?" -ne 0 ]; then
|
|
kubectl delete pods -n kube-system --selector=app=sriovdp --field-selector=spec.nodeName=${HOST} --wait=false
|
|
kubectl wait pods -n kube-system --selector=app=sriovdp --field-selector=spec.nodeName=${HOST} --for=condition=Ready --timeout=360s
|
|
|
|
if [ "$?" -ne 0 ]; then
|
|
ERROR "SRIOV device plugin timed out on ready wait. Continuing anyway. SRIOV pods may not recover."
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Delete pods with the restart-on-reboot=true label
|
|
for pod in $PODS; do
|
|
LOG "restart-on-reboot labeled pods: Recovering: ${pod//// }"
|
|
kubectl delete pods -n ${pod//// } --wait=false
|
|
done
|
|
elif [ "$1" == 'verify' ]; then
|
|
PODS=$(kubectl get pods --all-namespaces --no-headers --field-selector=spec.nodeName=${HOST} --selector=restart-on-reboot=true 2>/dev/null | awk '{print $1"/"$2}')
|
|
for pod in $PODS; do
|
|
LOG "restart-on-reboot labeled pods: Verifying: ${pod//// }"
|
|
STATUS=$(kubectl get pod --no-headers -n ${pod//// } 2>/dev/null | awk '{print $3}')
|
|
if [[ "${STATUS}" != "Running" ]]; then
|
|
ERROR "$pod: not recovered: $STATUS"
|
|
else
|
|
LOG "$pod: recovered"
|
|
fi
|
|
done
|
|
else
|
|
ERROR "Unknown action: $1"
|
|
fi
|
|
}
|
|
|
|
function _force_reset_pods {
|
|
# $1: actions <recover|verify>
|
|
|
|
# Handle resetting openstack libvirt pod as it sometimes is in a Running but
|
|
# unusable state
|
|
if kubectl get namespace openstack > /dev/null 2>&1; then
|
|
|
|
# Get the libvirt pods on this host that are Running without all
|
|
# conditions True
|
|
#
|
|
# Conditions:
|
|
# Initialized True
|
|
# Ready True
|
|
# ContainersReady True
|
|
# PodScheduled True
|
|
#
|
|
# NAME STATUS CONDITIONS NODE
|
|
# libvirt-libvirt-controller-0-937646f6-xst4r Running True,True,True,True controller-0
|
|
#
|
|
CUSTOM_COLUMNS='custom-columns=NAME:.metadata.name,STATUS:status.phase,CONDITIONS:status.conditions[*].status,NODE:spec.nodeName'
|
|
FIELD_SELECTOR="spec.nodeName=${HOST}"
|
|
PODS=$(kubectl get pods -n openstack -l application=libvirt --field-selector ${FIELD_SELECTOR} -o ${CUSTOM_COLUMNS} | grep -v NAME | grep -v 'True,True,True,True' | awk '{print $1}')
|
|
|
|
if [ "$1" == 'recover' ]; then
|
|
for pod in $PODS ; do
|
|
LOG "Recovering libvirt pod: $pod"
|
|
kubectl delete pods -n openstack $pod --wait=false
|
|
done
|
|
elif [ "$1" == 'verify' ]; then
|
|
if [ -z "${PODS}" ]; then
|
|
LOG "Openstack libvirt pod on ${HOST} is running."
|
|
else
|
|
ERROR "Openstack libvirt pod on ${HOST} has not been recovered."
|
|
fi
|
|
else
|
|
ERROR "Unknown action: $1"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
function _examine_pods {
|
|
# $1: actions <recover|verify>
|
|
|
|
# Manage labeled pods first
|
|
_labeled_pods $1
|
|
|
|
# Wait for pods transitions to stop
|
|
_wait_for_pod_stabilization "" $SLEEP_DELAY_SEC 6
|
|
|
|
# Check for recovery actions
|
|
_unknown_pods $1
|
|
_node_affinity_pods $1
|
|
_force_reset_pods $1
|
|
_outofhugepages_pods $1
|
|
}
|
|
|
|
|
|
function start {
|
|
_check_for_k8s_config
|
|
_check_for_existing_process
|
|
|
|
LOG "Starting."
|
|
|
|
_wait_for_systemd
|
|
_examine_pods 'recover'
|
|
_examine_pods 'verify'
|
|
_do_cni_cache_cleanup
|
|
}
|
|
|
|
function stop {
|
|
LOG "Stopping."
|
|
}
|
|
|
|
function status {
|
|
:
|
|
}
|
|
|
|
function reset {
|
|
:
|
|
}
|
|
|
|
case "$1" in
|
|
start)
|
|
start
|
|
;;
|
|
stop)
|
|
stop
|
|
;;
|
|
restart|force-reload|reload)
|
|
stop
|
|
start
|
|
;;
|
|
status)
|
|
status
|
|
;;
|
|
reset)
|
|
reset
|
|
;;
|
|
*)
|
|
echo "Usage: $0 {start|stop|force-reload|restart|reload|status|reset}"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
exit 0
|