From 17c1b8894deeb973dfb29a5fcac9fd630591b649 Mon Sep 17 00:00:00 2001 From: Robert Church Date: Wed, 2 Sep 2020 00:59:44 -0400 Subject: [PATCH] Introduce k8s pod recovery service Add a recovery service, started by systemd on a host boot, that waits for pod transitions to stabilize and then takes corrective action for the following set of conditions: - Delete to restart pods stuck in an Unknown or Init:Unknown state for the 'openstack' and 'monitor' namespaces. - Delete to restart Failed pods stuck in a NodeAffinity state that occur in any namespace. - Delete to restart the libvirt pod in the 'openstack' namespace when any of its conditions (Initialized, Ready, ContainersReady, PodScheduled) are not True. This will only recover pods specific to the host where the service is installed. This service is installed on all controller types. There is currently no evidence that we need this on dedicated worker nodes. Each of these conditions should to be evaluated after the next k8s component rebase to determine if any of these recovery action can be removed. Change-Id: I0e304d1a2b0425624881f3b2d9c77f6568844196 Closes-Bug: #1893977 Signed-off-by: Robert Church --- centos_iso_image.inc | 1 + centos_pkg_dirs | 1 + .../k8s-pod-recovery/centos/build_srpm.data | 4 + .../centos/files/k8s-pod-recovery | 247 ++++++++++++++++++ .../centos/files/k8s-pod-recovery.service | 14 + .../centos/k8s-pod-recovery.spec | 52 ++++ 6 files changed, 319 insertions(+) create mode 100644 kubernetes/k8s-pod-recovery/centos/build_srpm.data create mode 100755 kubernetes/k8s-pod-recovery/centos/files/k8s-pod-recovery create mode 100644 kubernetes/k8s-pod-recovery/centos/files/k8s-pod-recovery.service create mode 100644 kubernetes/k8s-pod-recovery/centos/k8s-pod-recovery.spec diff --git a/centos_iso_image.inc b/centos_iso_image.inc index 048d001e0..af18217fb 100644 --- a/centos_iso_image.inc +++ b/centos_iso_image.inc @@ -174,6 +174,7 @@ kubernetes-node kubernetes-kubeadm kubernetes-client containerd +k8s-pod-recovery # resource-agents resource-agents diff --git a/centos_pkg_dirs b/centos_pkg_dirs index eadeb773f..0c9f26a17 100644 --- a/centos_pkg_dirs +++ b/centos_pkg_dirs @@ -61,6 +61,7 @@ kubernetes/helm kubernetes/chartmuseum kubernetes/armada-helm-toolkit kubernetes/armada +kubernetes/k8s-pod-recovery grub/grubby base/dpkg base/cluster-resource-agents diff --git a/kubernetes/k8s-pod-recovery/centos/build_srpm.data b/kubernetes/k8s-pod-recovery/centos/build_srpm.data new file mode 100644 index 000000000..2f3c17bc1 --- /dev/null +++ b/kubernetes/k8s-pod-recovery/centos/build_srpm.data @@ -0,0 +1,4 @@ +SRC_DIR="." +COPY_LIST="$FILES_BASE/*" + +TIS_PATCH_VER=PKG_GITREVCOUNT diff --git a/kubernetes/k8s-pod-recovery/centos/files/k8s-pod-recovery b/kubernetes/k8s-pod-recovery/centos/files/k8s-pod-recovery new file mode 100755 index 000000000..68b965cd0 --- /dev/null +++ b/kubernetes/k8s-pod-recovery/centos/files/k8s-pod-recovery @@ -0,0 +1,247 @@ +#!/bin/bash +# +# Copyright (c) 2020 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# +# chkconfig: 2345 76 25 +# +### BEGIN INIT INFO +# Provides: k8s-pod-recovery +# Default-Start: 3 5 +# Required-Start: +# Required-Stop: +# Default-Stop: 0 1 2 6 +# Short-Description: Service to recovery pods after host boot +### END INIT INFO + +. /etc/platform/platform.conf + +export PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin +export KUBECONFIG=/etc/kubernetes/admin.conf +SLEEP_DELAY_SEC=15 + +NAME=$(basename $0) +PIDFILE=/var/run/${NAME}.pid +HOST=$(hostname) + +# Log info message to /var/log/daemon.log +function LOG { + logger -p daemon.info -t "${NAME}($$): " "$@" +} + +# Log error message to /var/log/daemon.log +function ERROR { + logger -p daemon.error -t "${NAME}($$): " "$@" +} + +function _check_for_k8s_config { + # If this node has not been configured, then there is nothing to recovery + if [ ! -f ${KUBECONFIG} ]; then + LOG "${KUBECONFIG} does not exist. No pods to recover." + exit 0 + fi +} + +function _check_for_existing_process { + # Abort if another instantiation is already running + if [ -e ${PIDFILE} ]; then + PID=$(cat ${PIDFILE}) + PROCESS=$(cat /proc/${PID}/comm) + if [ -n "${PID}" -a -e /proc/${PID} -a ${PROCESS} == ${NAME} ]; then + ERROR "Aborting, ${PID} already running: ${PIDFILE}." + exit 1 + else + OUT=$(rm -v -f ${PIDFILE}) + LOG "${OUT}" + fi + fi + + # Create pidfile to indicate the script is running + echo $$ > ${PIDFILE} +} + +function _wait_for_systemd { + while true; do + if systemctl is-system-running | grep -q -e running -e degraded; then + break + fi + LOG "Waiting for systemd to finish booting..." + sleep ${SLEEP_DELAY_SEC} + done +} + +function _wait_for_pod_stabilization { + last_count=0 + stability_count=0 + NINETY_SEC_COUNT=$((90/SLEEP_DELAY_SEC)) + while true ; do + pods_in_flux=$(KUBECONFIG=/etc/kubernetes/admin.conf kubectl get pods --no-headers --all-namespaces | grep -v -e Running -e Completed | wc -l) + if [[ $pods_in_flux -ne $last_count ]]; then + LOG "Waiting on pod transitions to stabilize... $pods_in_flux pods are not Running/Completed" + last_count=$pods_in_flux + stability_count=0 + else + LOG "Pods transitions are stable... for $((stability_count*${SLEEP_DELAY_SEC})) seconds." + if [[ $stability_count -eq $NINETY_SEC_COUNT ]]; then + break + fi + stability_count=$((stability_count+1)) + fi + sleep ${SLEEP_DELAY_SEC} + done +} + +function _unknown_pods { + # $1: actions + + # Target specific namespaces and pods on this host + SUPPORTED_NAMESPACES=('openstack' 'monitor') + + if [ "$1" == 'recover' ]; then + # Recovers pods that are: Running/Unknown and Pending/Init:Unknown + for ns in ${SUPPORTED_NAMESPACES[@]}; do + PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /Unknown/'{print $1}') + for pod in $PODS ; do + LOG "Unknown pods: Recovering: $ns/$pod" + kubectl delete pods -n $ns $pod --wait=false + done + done + elif [ "$1" == 'verify' ]; then + for ns in ${SUPPORTED_NAMESPACES[@]}; do + PODS=$(kubectl get pods -n $ns --field-selector spec.nodeName=${HOST} 2>/dev/null | awk /Unknown/'{print $1}') + if [ -z "${PODS}" ]; then + LOG "Unknown pods: None present for namespace: $ns" + else + ERROR "Unknown pods: still present for namespace: $ns" + fi + done + else + ERROR "Unknown action: $1" + fi +} + +function _node_affinity_pods { + # $1: actions + + if [ "$1" == 'recover' ]; then + PODS=$(kubectl get pods --all-namespaces --field-selector status.phase=Failed,spec.nodeName=${HOST} 2>/dev/null | awk /NodeAffinity/'{print $1"/"$2}') + for pod in $PODS ; do + LOG "NodeAffinity pods: Recovering: $pod" + kubectl delete pods -n ${pod//// } --wait=false + done + elif [ "$1" == 'verify' ]; then + PODS=$(kubectl get pods --all-namespaces --field-selector status.phase=Failed,spec.nodeName=${HOST} 2>/dev/null | awk /NodeAffnity/'{print $1"/"$2}') + if [ -z "${PODS}" ]; then + LOG "NodeAffinity pods: None present." + else + ERROR "NodeAffinity pods: still present" + fi + else + ERROR "Unknown action: $1" + fi + +} + +function _force_reset_pods { + # $1: actions + + # Handle resetting openstack libvirt pod as it sometimes is in a Running but + # unusable state + if kubectl get namespace openstack > /dev/null 2>&1; then + + # Get the libvirt pods on this host that are Running without all + # conditions True + # + # Conditions: + # Initialized True + # Ready True + # ContainersReady True + # PodScheduled True + # + # NAME STATUS CONDITIONS NODE + # libvirt-libvirt-controller-0-937646f6-xst4r Running True,True,True,True controller-0 + # + CUSTOM_COLUMNS='custom-columns=NAME:.metadata.name,STATUS:status.phase,CONDITIONS:status.conditions[*].status,NODE:spec.nodeName' + FIELD_SELECTOR="spec.nodeName=${HOST}" + PODS=$(kubectl get pods -n openstack -l application=libvirt --field-selector ${FIELD_SELECTOR} -o ${CUSTOM_COLUMNS} | grep -v NAME | grep -v 'True,True,True,True' | awk '{print $1}') + + if [ "$1" == 'recover' ]; then + for pod in $PODS ; do + LOG "Recovering libvirt pod: $pod" + kubectl delete pods -n openstack $pod --wait=false + done + elif [ "$1" == 'verify' ]; then + if [ -z "${PODS}" ]; then + LOG "Openstack libvirt pod on ${HOST} is running." + else + ERROR "Openstack libvirt pod on ${HOST} has not been recovered." + fi + else + ERROR "Unknown action: $1" + fi + fi +} + +function _examine_pods { + # $1: actions + + # Wait for pods transitions to stop + _wait_for_pod_stabilization + + # Check for recovery actions + _unknown_pods $1 + _node_affinity_pods $1 + _force_reset_pods $1 +} + + +function start { + _check_for_k8s_config + _check_for_existing_process + + LOG "Starting." + + _wait_for_systemd + _examine_pods 'recover' + _examine_pods 'verify' +} + +function stop { + LOG "Stopping." +} + +function status { + : +} + +function reset { + : +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + restart|force-reload|reload) + stop + start + ;; + status) + status + ;; + reset) + reset + ;; + *) + echo "Usage: $0 {start|stop|force-reload|restart|reload|status|reset}" + exit 1 + ;; +esac + +exit 0 diff --git a/kubernetes/k8s-pod-recovery/centos/files/k8s-pod-recovery.service b/kubernetes/k8s-pod-recovery/centos/files/k8s-pod-recovery.service new file mode 100644 index 000000000..113d0efd4 --- /dev/null +++ b/kubernetes/k8s-pod-recovery/centos/files/k8s-pod-recovery.service @@ -0,0 +1,14 @@ +[Unit] +Description=Kubernetes Pods Recovery Service +After=sw-patch.service +After=kubelet.service +Requires=kubelet.service + +[Service] +Type=simple +ExecStart=/usr/local/sbin/k8s-pod-recovery start +ExecStop=/usr/local/sbin/k8s-pod-recovery stop +PIDFile=/var/run/k8s-pod-recovery.pid + +[Install] +WantedBy=multi-user.target diff --git a/kubernetes/k8s-pod-recovery/centos/k8s-pod-recovery.spec b/kubernetes/k8s-pod-recovery/centos/k8s-pod-recovery.spec new file mode 100644 index 000000000..082dd6cc6 --- /dev/null +++ b/kubernetes/k8s-pod-recovery/centos/k8s-pod-recovery.spec @@ -0,0 +1,52 @@ +Name: k8s-pod-recovery +Version: 1.0 +Release: 0%{?_tis_dist}.%{tis_patch_ver} +Summary: Kubernetes Pod Recovery Service +License: Apache-2.0 +Group: base +Packager: Wind River +URL: unknown +Source0: k8s-pod-recovery +Source1: k8s-pod-recovery.service + +Requires: /bin/bash +Requires: systemd + +%description +%{summary} + +%define local_dir /usr/local +%define local_sbindir %{local_dir}/sbin + +%prep + +%install +install -d %{buildroot}%{local_sbindir} +install -m 755 %{SOURCE0} %{buildroot}%{local_sbindir}/k8s-pod-recovery +install -p -D -m 644 %{SOURCE1} %{buildroot}%{_unitdir}/k8s-pod-recovery.service + +%post +if [ $1 -eq 1 ]; then + # Package install: enable and start it + /usr/bin/systemctl enable k8s-pod-recovery.service > /dev/null 2>&1 || : + /usr/bin/systemctl start k8s-pod-recovery.service > /dev/null 2>&1 || : +else + # Package upgrade: reenable in case [Install] changes and restart to pick up + # new actions + if /usr/bin/systemctl --quiet is-enabled k8s-pod-recovery.service ; then + /usr/bin/systemctl reenable k8s-pod-recovery.service > /dev/null 2>&1 || : + /usr/bin/systemctl restart k8s-pod-recovery.service > /dev/null 2>&1 || : + fi +fi + +%preun +if [ $1 -eq 0 ]; then + /usr/bin/systemctl stop k8s-pod-recovery.service > /dev/null 2>&1 || : + /usr/bin/systemctl disable k8s-pod-recovery.service > /dev/null 2>&1 || : +fi + + +%files +%defattr(-,root,root,-) +%{local_sbindir}/k8s-pod-recovery +%{_unitdir}/k8s-pod-recovery.service