#!/bin/bash # # Copyright (c) 2016 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # ############################################################################## # # This script supports no-reboot patching of any single or # combination of maintenance processes specified on the command line. # # Calling sequence: # # rc=mtce-restart process1 process2 process3 ... # if [ $? != 0 ] ; then # restart action failed # # ############################################################################### # # The patching subsystem provides a patch-functions bash source file # with useful function and variable definitions. # if [ -e "/etc/patching/patch-functions" ] ; then . /etc/patching/patch-functions fi loginfo "----------------------------------------------" loginfo "Maintenance No-Reboot Patching Restart Request" # # Declare an overall script return code # declare -i GLOBAL_RC=$PATCH_STATUS_FAILED #if [ ! -e $PATCH_FLAGDIR ] ; then # mkdir -p $PATCH_FLAGDIR #fi # if set with -c or --clean options then the flag files for # each process are removed at the start. CLEAN=false # # Completion status ; stored in PID index # DISABLED="disabled" NOPID="not-running" SKIPPED="skipped" RESTARTED="restarted" # # process query and restart executables # SM_RESTART_EXEC="sm-restart-safe" SM_QUERY_EXEC="sm-query" PMON_RESTART_EXEC="pmon-restart" # # Struct indexes # PROCESS_INDEX=0 PID_INDEX=1 ALIAS_INDEX=2 # # Process Struct and List [ name ] [ alias ] [ pid | status ] # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # NOTE TO PATCH WRITERS: Simply Un-Comment processes you want no-reboot patch restarted. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # The process restart control structure declare sm_managed_processes="" declare pmon_managed_processes="" # Build the process list. # All arguements should be a valid maintenance process name. # The name of the binary, not the SM alias. # See the list below for supported process names. while [[ ${#} > 0 ]] do process="${1}" case $process in -c|--clean) CLEAN=true ;; # Maintenance Processes - SM managed "mtcAgent") sm_managed_processes=( ${sm_managed_processes[@]} "mtcAgent:0:mtc-agent") ;; "guestAgent") sm_managed_processes=( ${sm_managed_processes[@]} "guestAgent:0:guest-agent") ;; "hwmond") sm_managed_processes=( ${sm_managed_processes[@]} "hwmond:0:hw-mon") ;; # Maintenance Processes - PMON managed "pmond") pmon_managed_processes=(${pmon_managed_processes[@]} "pmond:0") ;; "guestServer") pmon_managed_processes=(${pmon_managed_processes[@]} "guestServer:0") ;; "hbsAgent") pmon_managed_processes=(${pmon_managed_processes[@]} "hbsAgent:0") ;; "mtcClient") pmon_managed_processes=(${pmon_managed_processes[@]} "mtcClient:0") ;; "hbsClient") pmon_managed_processes=(${pmon_managed_processes[@]} "hbsClient:0") ;; "rmond") pmon_managed_processes=(${pmon_managed_processes[@]} "rmond:0") ;; "hostwd") pmon_managed_processes=(${pmon_managed_processes[@]} "hostwd:0") ;; "fsmond") pmon_managed_processes=(${pmon_managed_processes[@]} "fsmond:0") ;; "mtclogd") pmon_managed_processes=(${pmon_managed_processes[@]} "mtclogd:0") ;; "mtcalarmd") pmon_managed_processes=(${pmon_managed_processes[@]} "mtcalarmd:0") ;; "lmond") pmon_managed_processes=(${pmon_managed_processes[@]} "lmond:0") ;; *) loginfo "Unknown process:${process}" ;; esac shift done # Assume both groupings are done until we know there are not sm_done=true pmon_done=true #if [ ${#sm_managed_processes[@]} -ne 0 -a is_controller ] ; then if [ -n "${sm_managed_processes}" -a is_controller ] ; then # Record current process IDs index=0 for DAEMON in "${sm_managed_processes[@]}" do info=(${DAEMON//:/ }) if [ "${CLEAN}" = true ] ; then rm -f $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted fi info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}` if [ -z "${info[${PID_INDEX}]}" ] ; then loginfo "${info[${PROCESS_INDEX}]} is not running" info[${PID_INDEX}]="${NOPID}" fi # Save the PID or NOPID status to the process line sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}" ((index++)) done # Restart the processes index=0 for DAEMON in "${sm_managed_processes[@]}" do info=(${DAEMON//:/ }) if [ -e $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ] ; then info[${PID_INDEX}]="${SKIPPED}" # Add the PID to the process line sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}" ((index++)) continue fi sm_query_result=`${SM_QUERY_EXEC} service ${info[${ALIAS_INDEX}]}` if [[ "${sm_query_result}" == *"enabled-active"* ]] ; then # Save the original PID info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}` if [ -n "${info[${PID_INDEX}]}" ] ; then loginfo "sm-restart of ${info[${PROCESS_INDEX}]} [pid:${info[${PID_INDEX}]}]" touch $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ${SM_RESTART_EXEC} service "${info[${ALIAS_INDEX}]}" sm_done=false sleep 5 else loginfo "${info[${PROCESS_INDEX}]} is not running ; must be on inactive controller" info[${PID_INDEX}]="${NOPID}" fi elif [[ ${sm_query_result} == *"is enabling"* ]] ; then info[${PID_INDEX}]="${NOPID}" loginfo "sm-restart ${info[${PROCESS_INDEX}]} ; [in progress] ; [pid:${info[${PID_INDEX}]}]" else info[${PID_INDEX}]="${DISABLED}" loginfo "${info[${PROCESS_INDEX}]} is not active" fi # Add the PID to the process line sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}" ((index++)) done fi if [ -n "${pmon_managed_processes}" ] ; then echo "DEBUG: pmon_managed_processes:${pmon_managed_processes}" # Restart the pmond processes index=0 for DAEMON in "${pmon_managed_processes[@]}" do info=(${DAEMON//:/ }) if [ "${CLEAN}" = true ] ; then rm -f $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted fi if [ -e $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ] ; then info[${PID_INDEX}]="${SKIPPED}" pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}" ((index++)) continue fi # Save the original PID info[${PID_INDEX}]=`pidof ${info[${PROCESS_INDEX}]}` if [ -n "${info[${PID_INDEX}]}" ] ; then loginfo "pmon-restart of ${info[${PROCESS_INDEX}]} [pid:${info[${PID_INDEX}]}]" touch $PATCH_FLAGDIR/${info[${PROCESS_INDEX}]}.restarted ${PMON_RESTART_EXEC} ${info[${PROCESS_INDEX}]} pmon_done=false sleep 2 #################################################################### # Special Handling Section # # - pmond needs 30 seconds to restart before it will start # monitoring processes.We can maybe remove that in the daemon # config file but for now its there and we have to wait. #################################################################### if [ "${info[${PROCESS_INDEX}]}" == "pmond" ] ; then sleep 30 fi else info[${PID_INDEX}]="${DISABLED}" loginfo "${info[${PROCESS_INDEX}]} is not active" fi # Save the updated PID or other status to the process line pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}" ((index++)) done fi # check for done. If this is not met in timeout then fail is returned if [ "$sm_done" = true -a "$pmon_done" = true ] ; then GLOBAL_RC=$PATCH_STATUS_OK loginfo " SM Processes: ${sm_managed_processes[@]}" loginfo "PMON Processes: ${pmon_managed_processes[@]}" loginfo "Maintenance No-Reboot Patching Status: ${GLOBAL_RC} - nothing to do." exit ${GLOBAL_RC} fi # Monitor the restart of SM processes # # Don't want to start from the beginning of the shell # Want time zero now plus 20 seconds. # SECONDS=0 TIMEOUT=120 let UNTIL=${SECONDS}+${TIMEOUT} loginfo "restart timeout is ${TIMEOUT}" while [ ${UNTIL} -ge ${SECONDS} ] do if [ "$sm_done" = false ] ; then if [ is_controller -o is_cpe ] ; then sm_not_done=false index=0 for DAEMON in "${sm_managed_processes[@]}" do info=(${DAEMON//:/ }) # Don't wast time on processes that are being skipped due to past restart if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then ((index++)) continue # Don't wast time on processes that have already restarted elif [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then ((index++)) continue # Don't look for disabled processes elif [ "${info[${PID_INDEX}]}" == "${DISABLED}" ] ; then ((index++)) continue # Don't look at not running processes elif [ "${info[${PID_INDEX}]}" == "${NOPID}" ] ; then ((index++)) continue elif [[ `sm-query service ${info[${ALIAS_INDEX}]}` == *"enabled-active"* ]] ; then # Save the original PID new_pid=`pidof ${info[${PROCESS_INDEX}]}` if [ $? -eq 0 -a -n ${new_pid} ] ; then if [ "${info[${PID_INDEX}]}" != "${new_pid}" ] ; then loginfo "${info[${PROCESS_INDEX}]} ${RESTARTED} ok [pid:${info[${PID_INDEX}]} -> ${new_pid}]" info[${PID_INDEX}]="${RESTARTED}" fi fi fi if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then sm_not_done=true fi # Add the PID to the process line sm_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}:${info[${ALIAS_INDEX}]}" ((index++)) done fi # log when SM restarts are done print a summary only once if [ "${sm_not_done}" = false -a "${sm_done}" = false ] ; then sm_done=true logged=false for DAEMON in "${sm_managed_processes[@]}" do info=(${DAEMON//:/ }) if [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then if [ "${logged}" = false ] ; then loginfo "The following 'sm managed' processes have been 'restarted'" logged=true fi loginfo "... process: ${info[${PROCESS_INDEX}]}" fi done logged=false for DAEMON in "${sm_managed_processes[@]}" do info=(${DAEMON//:/ }) if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then if [ "${logged}" = false ] ; then loginfo "The following 'sm managed' processes have been 'skipped' ; due to previous restart" logged=true fi loginfo "... process: ${info[${PROCESS_INDEX}]}" fi done fi fi ######################################################################### # For all nodes .... ######################################################################### # Loop over all PMON proceses looking for complete restarts. # Update process struct PID field as status is learned. if [ "$pmon_done" = false ] ; then # Start assuming we are not done pmon_not_done=false index=0 for DAEMON in "${pmon_managed_processes[@]}" do info=(${DAEMON//:/ }) # Don't wast time on processes that are being skipped due to past restart if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then ((index++)) continue # Don't wast time on processes that have already restarted elif [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then ((index++)) continue # Don't look for disabled processes elif [ "${info[${PID_INDEX}]}" == "${DISABLED}" ] ; then ((index++)) continue # Don't look at not running processes elif [ "${info[${PID_INDEX}]}" == "${NOPID}" ] ; then ((index++)) continue fi # Save the original PID new_pid=`pidof ${info[${PROCESS_INDEX}]}` if [ $? -eq 0 -a "${new_pid}" != "" ] ; then # set the process as restarted as soon as we have a new pid if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then loginfo "${info[${PROCESS_INDEX}]} ${RESTARTED} ok [PID: ${info[${PID_INDEX}]} -> ${new_pid}]" info[${PID_INDEX}]=${RESTARTED} fi fi # Set not done as long as there is one process not restarted if [ "${info[${PID_INDEX}]}" != "${RESTARTED}" ] ; then pmon_not_done=true fi # Add the PID to the process line pmon_managed_processes[${index}]="${info[${PROCESS_INDEX}]}:${info[${PID_INDEX}]}" ((index++)) done fi # log when all pmond restarts are done if [ "${pmon_not_done}" = false -a "${pmon_done}" = false ] ; then pmon_done=true logged=false for DAEMON in "${pmon_managed_processes[@]}" do info=(${DAEMON//:/ }) if [ "${info[${PID_INDEX}]}" == "${RESTARTED}" ] ; then if [ "${logged}" = false ] ; then loginfo "The following 'pmon managed' processes have been 'restarted'" logged=true fi loginfo "... process: ${info[${PROCESS_INDEX}]}" fi done logged=false for DAEMON in "${pmon_managed_processes[@]}" do info=(${DAEMON//:/ }) if [ "${info[${PID_INDEX}]}" == "${SKIPPED}" ] ; then if [ "${logged}" = false ] ; then loginfo "The following 'pmon managed' processes have been 'skipped' ; due to previous restart" logged=true fi loginfo "... process: ${info[${PROCESS_INDEX}]}" fi done fi # check for done. If this is not met in timeout then fail is returned if [ "$sm_done" = true -a "$pmon_done" = true ] ; then GLOBAL_RC=$PATCH_STATUS_OK break fi sleep 1 done loginfo "Maintenance No-Reboot Patching Status: ${GLOBAL_RC}" exit ${GLOBAL_RC}