integ/utilities/platform-util/scripts/patch-restart-mtce

480 lines
15 KiB
Bash
Executable File

#!/bin/bash
#
# Copyright (c) 2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
##############################################################################
#
# This script supports no-reboot patching of any single or
# combination of maintenance processes specified on the command line.
#
# Calling sequence:
#
# rc=mtce-restart process1 process2 process3 ...
# if [ $? != 0 ] ; then
# restart action failed
#
#
###############################################################################
#
# The patching subsystem provides a patch-functions bash source file
# with useful function and variable definitions.
#
if [ -e "/etc/patching/patch-functions" ] ; then
. /etc/patching/patch-functions
fi
loginfo "----------------------------------------------"
loginfo "Maintenance No-Reboot Patching Restart Request"
#
# Declare an overall script return code
#
declare -i GLOBAL_RC=$PATCH_STATUS_FAILED
#if [ ! -e $PATCH_FLAGDIR ] ; then
# mkdir -p $PATCH_FLAGDIR
#fi
# if set with -c or --clean options then the flag files for
# each process are removed at the start.
CLEAN=false
#
# Completion status ; stored in PID index
#
DISABLED="disabled"
NOPID="not-running"
SKIPPED="skipped"
RESTARTED="restarted"
#
# process query and restart executables
#
SM_RESTART_EXEC="sm-restart-safe"
SM_QUERY_EXEC="sm-query"
PMON_RESTART_EXEC="pmon-restart"
#
# Struct indexes
#
PROCESS_INDEX=0
PID_INDEX=1
ALIAS_INDEX=2
#
# Process Struct and List [ name ] [ alias ] [ pid | status ]
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# NOTE TO PATCH WRITERS: Simply Un-Comment processes you want no-reboot patch restarted.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# The process restart control structure
declare sm_managed_processes=""
declare pmon_managed_processes=""
# Build the process list.
# All arguements should be a valid maintenance process name.
# The name of the binary, not the SM alias.
# See the list below for supported process names.
while [[ ${#} > 0 ]]
do
process="${1}"
case $process in
-c|--clean)
CLEAN=true
;;
# Maintenance Processes - SM managed
"mtcAgent")
sm_managed_processes=( ${sm_managed_processes[@]} "mtcAgent:0:mtc-agent")
;;
"guestAgent")
sm_managed_processes=( ${sm_managed_processes[@]} "guestAgent:0:guest-agent")
;;
"hwmond")
sm_managed_processes=( ${sm_managed_processes[@]} "hwmond:0:hw-mon")
;;
# Maintenance Processes - PMON managed
"pmond")
pmon_managed_processes=(${pmon_managed_processes[@]} "pmond:0")
;;
"guestServer")
pmon_managed_processes=(${pmon_managed_processes[@]} "guestServer:0")
;;
"hbsAgent")
pmon_managed_processes=(${pmon_managed_processes[@]} "hbsAgent:0")
;;
"mtcClient")
pmon_managed_processes=(${pmon_managed_processes[@]} "mtcClient:0")
;;
"hbsClient")
pmon_managed_processes=(${pmon_managed_processes[@]} "hbsClient:0")
;;
"rmond")
pmon_managed_processes=(${pmon_managed_processes[@]} "rmond:0")
;;
"hostwd")
pmon_managed_processes=(${pmon_managed_processes[@]} "hostwd:0")
;;
"fsmond")
pmon_managed_processes=(${pmon_managed_processes[@]} "fsmond:0")
;;
"mtclogd")
pmon_managed_processes=(${pmon_managed_processes[@]} "mtclogd:0")
;;
"mtcalarmd")
pmon_managed_processes=(${pmon_managed_processes[@]} "mtcalarmd:0")
;;
"lmond")
pmon_managed_processes=(${pmon_managed_processes[@]} "lmond:0")
;;
*)
loginfo "Unknown process:${process}"
;;
esac
shift
done
# Assume both groupings are done until we know there are not
sm_done=true
pmon_done=true
#if [ ${#sm_managed_processes[@]} -ne 0 -a is_controller ] ; then
if [ -n "${sm_managed_processes}" -a is_controller ] ; then
# Record current process IDs
index=0
for DAEMON in "${sm_managed_processes[@]}"
do
info=(${DAEMON//:/ })
if [ "${CLEAN}" = true ] ; then
rm -f $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
fi
info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}`
if [ -z "${info[${PID_INDEX}]}" ] ; then
loginfo "${info[${PROCESS_INDEX}]} is not running"
info[${PID_INDEX}]="${NOPID}"
fi
# Save the PID or NOPID status to the process line
sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"
((index++))
done
# Restart the processes
index=0
for DAEMON in "${sm_managed_processes[@]}"
do
info=(${DAEMON//:/ })
if [ -e $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ] ; then
info[${PID_INDEX}]="${SKIPPED}"
# Add the PID to the process line
sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"
((index++))
continue
fi
sm_query_result=`${SM_QUERY_EXEC} service ${info[${ALIAS_INDEX}]}`
if [[ "${sm_query_result}" == *"enabled-active"* ]] ; then
# Save the original PID
info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}`
if [ -n "${info[${PID_INDEX}]}" ] ; then
loginfo "sm-restart of ${info[${PROCESS_INDEX}]} [pid:${info[${PID_INDEX}]}]"
touch $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
${SM_RESTART_EXEC} service "${info[${ALIAS_INDEX}]}"
sm_done=false
sleep 5
else
loginfo "${info[${PROCESS_INDEX}]} is not running ; must be on inactive controller"
info[${PID_INDEX}]="${NOPID}"
fi
elif [[ ${sm_query_result} == *"is enabling"* ]] ; then
info[${PID_INDEX}]="${NOPID}"
loginfo "sm-restart ${info[${PROCESS_INDEX}]} ; [in progress] ; [pid:${info[${PID_INDEX}]}]"
else
info[${PID_INDEX}]="${DISABLED}"
loginfo "${info[${PROCESS_INDEX}]} is not active"
fi
# Add the PID to the process line
sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"
((index++))
done
fi
if [ -n "${pmon_managed_processes}" ] ; then
echo "DEBUG: pmon_managed_processes:${pmon_managed_processes}"
# Restart the pmond processes
index=0
for DAEMON in "${pmon_managed_processes[@]}"
do
info=(${DAEMON//:/ })
if [ "${CLEAN}" = true ] ; then
rm -f $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
fi
if [ -e $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ] ; then
info[${PID_INDEX}]="${SKIPPED}"
pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}"
((index++))
continue
fi
# Save the original PID
info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}`
if [ -n "${info[${PID_INDEX}]}" ] ; then
loginfo "pmon-restart of ${info[${PROCESS_INDEX}]} [pid:${info[${PID_INDEX}]}]"
touch $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted
${PMON_RESTART_EXEC} ${info[${PROCESS_INDEX}]}
pmon_done=false
sleep 2
####################################################################
# Special Handling Section
#
# - pmond needs 30 seconds to restart before it will start
# monitoring processes.We can maybe remove that in the daemon
# config file but for now its there and we have to wait.
####################################################################
if [ "${info[${PROCESS_INDEX}]}" == "pmond" ] ; then
sleep 30
fi
else
info[${PID_INDEX}]="${DISABLED}"
loginfo "${info[${PROCESS_INDEX}]} is not active"
fi
# Save the updated PID or other status to the process line
pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}"
((index++))
done
fi
# check for done. If this is not met in timeout then fail is returned
if [ "$sm_done" = true -a "$pmon_done" = true ] ; then
GLOBAL_RC=$PATCH_STATUS_OK
loginfo " SM Processes: ${sm_managed_processes[@]}"
loginfo "PMON Processes: ${pmon_managed_processes[@]}"
loginfo "Maintenance No-Reboot Patching Status: ${GLOBAL_RC} - nothing to do."
exit ${GLOBAL_RC}
fi
# Monitor the restart of SM processes
#
# Don't want to start from the beginning of the shell
# Want time zero now plus 20 seconds.
#
SECONDS=0
TIMEOUT=120
let UNTIL=${SECONDS}+${TIMEOUT}
loginfo "restart timeout is ${TIMEOUT}"
while [ ${UNTIL} -ge ${SECONDS} ]
do
if [ "$sm_done" = false ] ; then
if [ is_controller -o is_cpe ] ; then
sm_not_done=false
index=0
for DAEMON in "${sm_managed_processes[@]}"
do
info=(${DAEMON//:/ })
# Don't wast time on processes that are being skipped due to past restart
if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
((index++))
continue
# Don't wast time on processes that have already restarted
elif [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
((index++))
continue
# Don't look for disabled processes
elif [ "${info[${PID_INDEX}]}" == "${DISABLED}" ] ; then
((index++))
continue
# Don't look at not running processes
elif [ "${info[${PID_INDEX}]}" == "${NOPID}" ] ; then
((index++))
continue
elif [[ `sm-query service ${info[${ALIAS_INDEX}]}` == *"enabled-active"* ]] ; then
# Save the original PID
new_pid=`pidof ${info[${PROCESS_INDEX}]}`
if [ $? -eq 0 -a -n ${new_pid} ] ; then
if [ "${info[${PID_INDEX}]}" != "${new_pid}" ] ; then
loginfo "${info[${PROCESS_INDEX}]} ${RESTARTED} ok [pid:${info[${PID_INDEX}]} -> ${new_pid}]"
info[${PID_INDEX}]="${RESTARTED}"
fi
fi
fi
if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then
sm_not_done=true
fi
# Add the PID to the process line
sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}"
((index++))
done
fi
# log when SM restarts are done print a summary only once
if [ "${sm_not_done}" = false -a "${sm_done}" = false ] ; then
sm_done=true
logged=false
for DAEMON in "${sm_managed_processes[@]}"
do
info=(${DAEMON//:/ })
if [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
if [ "${logged}" = false ] ; then
loginfo "The following 'sm managed' processes have been 'restarted'"
logged=true
fi
loginfo "... process: ${info[${PROCESS_INDEX}]}"
fi
done
logged=false
for DAEMON in "${sm_managed_processes[@]}"
do
info=(${DAEMON//:/ })
if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
if [ "${logged}" = false ] ; then
loginfo "The following 'sm managed' processes have been 'skipped' ; due to previous restart"
logged=true
fi
loginfo "... process: ${info[${PROCESS_INDEX}]}"
fi
done
fi
fi
#########################################################################
# For all nodes ....
#########################################################################
# Loop over all PMON proceses looking for complete restarts.
# Update process struct PID field as status is learned.
if [ "$pmon_done" = false ] ; then
# Start assuming we are not done
pmon_not_done=false
index=0
for DAEMON in "${pmon_managed_processes[@]}"
do
info=(${DAEMON//:/ })
# Don't wast time on processes that are being skipped due to past restart
if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
((index++))
continue
# Don't wast time on processes that have already restarted
elif [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
((index++))
continue
# Don't look for disabled processes
elif [ "${info[${PID_INDEX}]}" == "${DISABLED}" ] ; then
((index++))
continue
# Don't look at not running processes
elif [ "${info[${PID_INDEX}]}" == "${NOPID}" ] ; then
((index++))
continue
fi
# Save the original PID
new_pid=`pidof ${info[${PROCESS_INDEX}]}`
if [ $? -eq 0 -a "${new_pid}" != "" ] ; then
# set the process as restarted as soon as we have a new pid
if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then
loginfo "${info[${PROCESS_INDEX}]} ${RESTARTED} ok [PID: ${info[${PID_INDEX}]} -> ${new_pid}]"
info[${PID_INDEX}]=${RESTARTED}
fi
fi
# Set not done as long as there is one process not restarted
if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then
pmon_not_done=true
fi
# Add the PID to the process line
pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}"
((index++))
done
fi
# log when all pmond restarts are done
if [ "${pmon_not_done}" = false -a "${pmon_done}" = false ] ; then
pmon_done=true
logged=false
for DAEMON in "${pmon_managed_processes[@]}"
do
info=(${DAEMON//:/ })
if [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then
if [ "${logged}" = false ] ; then
loginfo "The following 'pmon managed' processes have been 'restarted'"
logged=true
fi
loginfo "... process: ${info[${PROCESS_INDEX}]}"
fi
done
logged=false
for DAEMON in "${pmon_managed_processes[@]}"
do
info=(${DAEMON//:/ })
if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then
if [ "${logged}" = false ] ; then
loginfo "The following 'pmon managed' processes have been 'skipped' ; due to previous restart"
logged=true
fi
loginfo "... process: ${info[${PROCESS_INDEX}]}"
fi
done
fi
# check for done. If this is not met in timeout then fail is returned
if [ "$sm_done" = true -a "$pmon_done" = true ] ; then
GLOBAL_RC=$PATCH_STATUS_OK
break
fi
sleep 1
done
loginfo "Maintenance No-Reboot Patching Status: ${GLOBAL_RC}"
exit ${GLOBAL_RC}