Introduce k8s pod recovery service

Add a recovery service, started by systemd on a host boot, that waits
for pod transitions to stabilize and then takes corrective action for
the following set of conditions:
- Delete to restart pods stuck in an Unknown or Init:Unknown state for
  the 'openstack' and 'monitor' namespaces.
- Delete to restart Failed pods stuck in a NodeAffinity state that occur
  in any namespace.
- Delete to restart the libvirt pod in the 'openstack' namespace when
  any of its conditions (Initialized, Ready, ContainersReady,
  PodScheduled) are not True.

This will only recover pods specific to the host where the service is
installed.

This service is installed on all controller types. There is currently no
evidence that we need this on dedicated worker nodes.

Each of these conditions should to be evaluated after the next k8s
component rebase to determine if any of these recovery action can be
removed.

Change-Id: I0e304d1a2b0425624881f3b2d9c77f6568844196
Closes-Bug: #1893977
Signed-off-by: Robert Church <robert.church@windriver.com>
This commit is contained in:
Robert Church 2020-09-02 00:59:44 -04:00
parent a944973da4
commit 17c1b8894d
6 changed files with 319 additions and 0 deletions

View File

@ -174,6 +174,7 @@ kubernetes-node
kubernetes-kubeadm
kubernetes-client
containerd
k8s-pod-recovery
# resource-agents
resource-agents

View File

@ -61,6 +61,7 @@ kubernetes/helm
kubernetes/chartmuseum
kubernetes/armada-helm-toolkit
kubernetes/armada
kubernetes/k8s-pod-recovery
grub/grubby
base/dpkg
base/cluster-resource-agents

View File

@ -0,0 +1,4 @@
SRC_DIR="."
COPY_LIST="$FILES_BASE/*"
TIS_PATCH_VER=PKG_GITREVCOUNT

View File

@ -0,0 +1,247 @@
#!/bin/bash
#
# Copyright (c) 2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# chkconfig: 2345 76 25
#
### BEGIN INIT INFO
# Provides: k8s-pod-recovery
# Default-Start: 3 5
# Required-Start:
# Required-Stop:
# Default-Stop: 0 1 2 6
# Short-Description: Service to recovery pods after host boot
### END INIT INFO
. /etc/platform/platform.conf
export PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
export KUBECONFIG=/etc/kubernetes/admin.conf
SLEEP_DELAY_SEC=15
NAME=$(basename $0)
PIDFILE=/var/run/${NAME}.pid
HOST=$(hostname)
# Log info message to /var/log/daemon.log
function LOG {
logger -p daemon.info -t "${NAME}($$): " "$@"
}
# Log error message to /var/log/daemon.log
function ERROR {
logger -p daemon.error -t "${NAME}($$): " "$@"
}
function _check_for_k8s_config {
# If this node has not been configured, then there is nothing to recovery
if [ ! -f ${KUBECONFIG} ]; then
LOG "${KUBECONFIG} does not exist. No pods to recover."
exit 0
fi
}
function _check_for_existing_process {
# Abort if another instantiation is already running
if [ -e ${PIDFILE} ]; then
PID=$(cat ${PIDFILE})
PROCESS=$(cat /proc/${PID}/comm)
if [ -n "${PID}" -a -e /proc/${PID} -a ${PROCESS} == ${NAME} ]; then
ERROR "Aborting, ${PID} already running: ${PIDFILE}."
exit 1
else
OUT=$(rm -v -f ${PIDFILE})
LOG "${OUT}"
fi
fi
# Create pidfile to indicate the script is running
echo $$ > ${PIDFILE}
}
function _wait_for_systemd {
while true; do
if systemctl is-system-running | grep -q -e running -e degraded; then
break
fi
LOG "Waiting for systemd to finish booting..."
sleep ${SLEEP_DELAY_SEC}
done
}
function _wait_for_pod_stabilization {
last_count=0
stability_count=0
NINETY_SEC_COUNT=$((90/SLEEP_DELAY_SEC))
while true ; do
pods_in_flux=$(KUBECONFIG=/etc/kubernetes/admin.conf kubectl get pods --no-headers --all-namespaces | grep -v -e Running -e Completed | wc -l)
if [[ $pods_in_flux -ne $last_count ]]; then
LOG "Waiting on pod transitions to stabilize... $pods_in_flux pods are not Running/Completed"
last_count=$pods_in_flux
stability_count=0
else
LOG "Pods transitions are stable... for $((stability_count*${SLEEP_DELAY_SEC})) seconds."
if [[ $stability_count -eq $NINETY_SEC_COUNT ]]; then
break
fi
stability_count=$((stability_count+1))
fi
sleep ${SLEEP_DELAY_SEC}
done
}
function _unknown_pods {
# $1: actions <recover|verify>
# Target specific namespaces and pods on this host
SUPPORTED_NAMESPACES=('openstack' 'monitor')
if [ "$1" == 'recover' ]; then
# Recovers pods that are: Running/Unknown and Pending/Init:Unknown
for ns in ${SUPPORTED_NAMESPACES[@]}; do
PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /Unknown/'{print $1}')
for pod in $PODS ; do
LOG "Unknown pods: Recovering: $ns/$pod"
kubectl delete pods -n $ns $pod --wait=false
done
done
elif [ "$1" == 'verify' ]; then
for ns in ${SUPPORTED_NAMESPACES[@]}; do
PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /Unknown/'{print $1}')
if [ -z "${PODS}" ]; then
LOG "Unknown pods: None present for namespace: $ns"
else
ERROR "Unknown pods: still present for namespace: $ns"
fi
done
else
ERROR "Unknown action: $1"
fi
}
function _node_affinity_pods {
# $1: actions <recover|verify>
if [ "$1" == 'recover' ]; then
PODS=$(kubectl get pods --all-namespaces --field-selector status.phase=Failed,spec.nodeName=${HOST} 2>/dev/null | awk /NodeAffinity/'{print $1"/"$2}')
for pod in $PODS ; do
LOG "NodeAffinity pods: Recovering: $pod"
kubectl delete pods -n ${pod//// } --wait=false
done
elif [ "$1" == 'verify' ]; then
PODS=$(kubectl get pods --all-namespaces --field-selector status.phase=Failed,spec.nodeName=${HOST} 2>/dev/null | awk /NodeAffnity/'{print $1"/"$2}')
if [ -z "${PODS}" ]; then
LOG "NodeAffinity pods: None present."
else
ERROR "NodeAffinity pods: still present"
fi
else
ERROR "Unknown action: $1"
fi
}
function _force_reset_pods {
# $1: actions <recover|verify>
# Handle resetting openstack libvirt pod as it sometimes is in a Running but
# unusable state
if kubectl get namespace openstack > /dev/null 2>&1; then
# Get the libvirt pods on this host that are Running without all
# conditions True
#
# Conditions:
# Initialized True
# Ready True
# ContainersReady True
# PodScheduled True
#
# NAME STATUS CONDITIONS NODE
# libvirt-libvirt-controller-0-937646f6-xst4r Running True,True,True,True controller-0
#
CUSTOM_COLUMNS='custom-columns=NAME:.metadata.name,STATUS:status.phase,CONDITIONS:status.conditions[*].status,NODE:spec.nodeName'
FIELD_SELECTOR="spec.nodeName=${HOST}"
PODS=$(kubectl get pods -n openstack -l application=libvirt --field-selector ${FIELD_SELECTOR} -o ${CUSTOM_COLUMNS} | grep -v NAME | grep -v 'True,True,True,True' | awk '{print $1}')
if [ "$1" == 'recover' ]; then
for pod in $PODS ; do
LOG "Recovering libvirt pod: $pod"
kubectl delete pods -n openstack $pod --wait=false
done
elif [ "$1" == 'verify' ]; then
if [ -z "${PODS}" ]; then
LOG "Openstack libvirt pod on ${HOST} is running."
else
ERROR "Openstack libvirt pod on ${HOST} has not been recovered."
fi
else
ERROR "Unknown action: $1"
fi
fi
}
function _examine_pods {
# $1: actions <recover|verify>
# Wait for pods transitions to stop
_wait_for_pod_stabilization
# Check for recovery actions
_unknown_pods $1
_node_affinity_pods $1
_force_reset_pods $1
}
function start {
_check_for_k8s_config
_check_for_existing_process
LOG "Starting."
_wait_for_systemd
_examine_pods 'recover'
_examine_pods 'verify'
}
function stop {
LOG "Stopping."
}
function status {
:
}
function reset {
:
}
case "$1" in
start)
start
;;
stop)
stop
;;
restart|force-reload|reload)
stop
start
;;
status)
status
;;
reset)
reset
;;
*)
echo "Usage: $0 {start|stop|force-reload|restart|reload|status|reset}"
exit 1
;;
esac
exit 0

View File

@ -0,0 +1,14 @@
[Unit]
Description=Kubernetes Pods Recovery Service
After=sw-patch.service
After=kubelet.service
Requires=kubelet.service
[Service]
Type=simple
ExecStart=/usr/local/sbin/k8s-pod-recovery start
ExecStop=/usr/local/sbin/k8s-pod-recovery stop
PIDFile=/var/run/k8s-pod-recovery.pid
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,52 @@
Name: k8s-pod-recovery
Version: 1.0
Release: 0%{?_tis_dist}.%{tis_patch_ver}
Summary: Kubernetes Pod Recovery Service
License: Apache-2.0
Group: base
Packager: Wind River <info@windriver.com>
URL: unknown
Source0: k8s-pod-recovery
Source1: k8s-pod-recovery.service
Requires: /bin/bash
Requires: systemd
%description
%{summary}
%define local_dir /usr/local
%define local_sbindir %{local_dir}/sbin
%prep
%install
install -d %{buildroot}%{local_sbindir}
install -m 755 %{SOURCE0} %{buildroot}%{local_sbindir}/k8s-pod-recovery
install -p -D -m 644 %{SOURCE1} %{buildroot}%{_unitdir}/k8s-pod-recovery.service
%post
if [ $1 -eq 1 ]; then
# Package install: enable and start it
/usr/bin/systemctl enable k8s-pod-recovery.service > /dev/null 2>&1 || :
/usr/bin/systemctl start k8s-pod-recovery.service > /dev/null 2>&1 || :
else
# Package upgrade: reenable in case [Install] changes and restart to pick up
# new actions
if /usr/bin/systemctl --quiet is-enabled k8s-pod-recovery.service ; then
/usr/bin/systemctl reenable k8s-pod-recovery.service > /dev/null 2>&1 || :
/usr/bin/systemctl restart k8s-pod-recovery.service > /dev/null 2>&1 || :
fi
fi
%preun
if [ $1 -eq 0 ]; then
/usr/bin/systemctl stop k8s-pod-recovery.service > /dev/null 2>&1 || :
/usr/bin/systemctl disable k8s-pod-recovery.service > /dev/null 2>&1 || :
fi
%files
%defattr(-,root,root,-)
%{local_sbindir}/k8s-pod-recovery
%{_unitdir}/k8s-pod-recovery.service