upstream/openstack/python-cinder/centos/files/restart-cinder

166 lines
6.1 KiB
Bash

#!/bin/bash
#
# Copyright (c) 2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# The patching subsystem provides a patch-functions bash source file
# with useful function and variable definitions.
#
. /etc/patching/patch-functions
#
# We can now check to see what type of node we're on, if it's locked, etc,
# and act accordingly
#
#
# Declare an overall script return code
#
declare -i GLOBAL_RC=$PATCH_STATUS_OK
#
# Handle restarting Cinder services
#
# Syntax:("<service_name> <timeout> <initialize_interval> <behavior after timeout>"\)
SERVICES=("cinder-volume 30 30 kill"\
"cinder-scheduler 50 20 kill"\
"cinder-backup 30 30 kill"\
"cinder-api 30 20 kill")
# where:
# <service_name> = name of executable file reported by ps
# <timeout> = how much to wait for the process to gracefully shutdown
# <behavior after timeout> = either 'kill' the process with SIGKILL or 'leave' it running
# the idea is to avoid leaving behind processes that are degraded, better have
# new ones re-spawn
# <initialize interval> = how much to wait before the new process can be considered available.
# The processes are restarted by sm by running the ocf scripts at
# /usr/lib/ocf/resource.d/openstack/cinder-*. These scripts have a very good service init
# monitoring routine. We just have to make sure that they don't hang while restarting.
# The values are taken from SM, but we don't wait for any retry.
# Note: cinder-volume timeout is set to 180 secs in sm which is too much for our controlled
# restart
function get_pid {
local service=$1
PID=$(cat /var/run/resource-agents/$service.pid)
echo "$PID"
}
if is_controller
then
# Cinder services only run on the controller
if [ ! -f $PATCH_FLAGDIR/cinder.restarted ]
then
touch $PATCH_FLAGDIR/cinder.restarted
# cinder-volume uses this to know that its new process was restarted by in-service patching
touch $PATCH_FLAGDIR/cinder.restarting
for s in "${SERVICES[@]}"; do
set -- $s
service="$1"
timeout="$2"
initialize_interval="$3"
after_timeout="$4"
new_process="false"
# Check SM to see if service is running
sm-query service $service | grep -q 'enabled-active'
if [ $? -eq 0 ]
then
loginfo "$0: Restarting $service"
# Get PID
PID=$(get_pid $service)
# Send restart signal to process
kill -s TERM $PID
# Wait up to $timeout seconds for service to gracefully recover
let -i UNTIL=$SECONDS+$timeout
while [ $UNTIL -ge $SECONDS ]
do
# Check to see if we have a new process
NEW_PID=$(get_pid $service)
if [[ "$PID" != "$NEW_PID" ]]
then
# We have a new process
new_process="true"
break
fi
# Still old process? Let's wait 5 seconds and check again
sleep 5
done
# Do a hard restart of the process if we still have the old one
NEW_PID=$(get_pid $service)
if [[ "$PID" == "$NEW_PID" ]]
then
# we have the old process still running!
if [[ "$after_timeout" == "kill" ]]
then
loginfo "$0: Old process of $service failed to gracefully terminate in $timeout, killing it!"
# kill the old process
kill -s KILL $PID
# wait for a new process to be restarted by sm
let -i UNTIL=$SECONDS+10
while [ $UNTIL -ge $SECONDS ]
do
sleep 1
# Check to see if we have a new process
NEW_PID=$(get_pid $service)
if [[ ! -z "$NEW_PID" ]] && [[ "$PID" != "$NEW_PID" ]]
then
loginfo "$0: New process of $service started"
new_process="true"
break
fi
done
fi
fi
# Wait for the new process to complete initialisation
if [[ "$new_process" == "true" ]]
then
let -i UNTIL=$SECONDS+$initialize_interval
while [ $UNTIL -ge $SECONDS ]
do
# Note: Services are restarted by sm which runs the ocf start script.
# Sm reports enabled-active only *after* those scripts return success
sm-query service $service | grep -q 'enabled-active'
if [ $? -eq 0 ]
then
loginfo "$0: New process of $service started correctly"
break
fi
sleep 1
done
fi
sm-query service $service | grep -q 'enabled-active'
if [ $? -ne 0 ]
then
# Still not running! Clear the flag and mark the RC as failed
loginfo "$0: Failed to restart $service"
rm -f $PATCH_FLAGDIR/$service.restarted
GLOBAL_RC=$PATCH_STATUS_FAILED
sm-query service $service
break
# Note: break if any process in the SERVICES list fails
fi
fi
done
fi
fi
#
# Exit the script with the overall return code
#
rm -f $PATCH_FLAGDIR/cinder.restarting
exit $GLOBAL_RC