Merge "ceph-init-wrapper use flock instead of flag files"

This commit is contained in:
Zuul 2019-09-09 19:34:31 +00:00 committed by Gerrit Code Review
commit 4b6a275e4f
1 changed files with 133 additions and 149 deletions

View File

@ -39,7 +39,6 @@ source /etc/platform/platform.conf
CEPH_SCRIPT="/etc/init.d/ceph" CEPH_SCRIPT="/etc/init.d/ceph"
CEPH_FILE="$VOLATILE_PATH/.ceph_started" CEPH_FILE="$VOLATILE_PATH/.ceph_started"
CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting"
CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status" CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status"
CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status" CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status"
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt" CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
@ -60,111 +59,97 @@ mkdir -p $DATA_PATH # make sure folder exists
MONITORING_INTERVAL=15 MONITORING_INTERVAL=15
TRACE_LOOP_INTERVAL=5 TRACE_LOOP_INTERVAL=5
GET_OSD_STATUS_TIMEOUT=120
GET_MONITOR_STATUS_TIMEOUT=30
CEPH_STATUS_TIMEOUT=20 CEPH_STATUS_TIMEOUT=20
WAIT_FOR_CMD=1 LOCK_CEPH_MON_SERVICE_FILE="$VOLATILE_PATH/.ceph_mon_status"
MONITOR_COMMAND=0 LOCK_CEPH_OSD_SERVICE_FILE="$VOLATILE_PATH/.ceph_osd_status"
OSD_COMMAND=0 LOCK_CEPH_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_mon_service"
LOCK_CEPH_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_osd_service"
# Seconds to wait for ceph status to finish before
# continuing to execute a service action
MONITOR_STATUS_TIMEOUT=30
MAX_STATUS_TIMEOUT=120
RC=0 RC=0
# SM can only pass arguments through environment variable
# when ARGS is not empty use it to extend command line arguments
args=("$@") args=("$@")
if [ ! -z $ARGS ]; then if [ ! -z $ARGS ]; then
IFS=";" read -r -a new_args <<< "$ARGS" IFS=";" read -r -a new_args <<< "$ARGS"
args+=("${new_args[@]}") args+=("${new_args[@]}")
fi fi
check_command_type () with_service_lock ()
{ {
if [[ $# -eq 0 ]]; then local target="$1"; shift
MONITOR_COMMAND=1 [ -z "${target}" ] && target="mon osd"
OSD_COMMAND=1
elif [[ "$1" == "osd"* ]]; then
OSD_COMMAND=1
elif [[ "$1" == "mon"* ]]; then
MONITOR_COMMAND=1
else
exit 1
fi
} # Run in sub-shell so we don't leak file descriptors
# used for locking service actions
(
# Grab service locks
wlog "-" INFO "Grab service locks"
[[ "${target}" == *"mon"* ]] && flock ${LOCK_CEPH_MON_SERVICE_FD}
[[ "${target}" == *"osd"* ]] && flock ${LOCK_CEPH_OSD_SERVICE_FD}
wait_for_status () # Try to lock status with a timeout in case status is stuck
{ wlog "-" INFO "Lock service status"
local STATUS_TIMEOUT=0 deadline=$((SECONDS + MAX_STATUS_TIMEOUT + 1))
if [[ "${target}" == *"mon"* ]]; then
# For a general "ceph status" command which includes checks flock --exclusive --timeout ${MONITOR_STATUS_TIMEOUT} ${LOCK_CEPH_MON_STATUS_FD}
# for both monitors and OSDS, we use the OSD timeout. fi
if [[ $OSD_COMMAND == 1 ]]; then if [[ "${target}" == *"osd"* ]]; then
STATUS_TIMEOUT=$GET_OSD_STATUS_TIMEOUT timeout=$((deadline - SECONDS))
elif [[ $MONITOR_COMMAND == 1 ]]; then if [[ $timeout -gt 0 ]]; then
STATUS_TIMEOUT=$GET_MONITOR_STATUS_TIMEOUT flock --exclusive --timeout ${timeout} ${LOCK_CEPH_OSD_STATUS_FD}
fi fi
timeout_expiry=$((${SECONDS} + ${STATUS_TIMEOUT}))
while [ ${SECONDS} -le ${timeout_expiry} ]; do
if [[ $MONITOR_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_MON_STATUS_FILE} ]]; then
break
fi fi
if [[ $OSD_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_OSD_STATUS_FILE} ]]; then # Close lock file descriptors so they are
break # not inherited by the spawned process then
fi # run service action
wlog "-" INFO "Run service action: $@"
"$@" {LOCK_CEPH_MON_SERVICE_FD}>&- \
{LOCK_CEPH_MON_STATUS_FD}>&- \
{LOCK_CEPH_OSD_SERVICE_FD}>&- \
{LOCK_CEPH_OSD_STATUS_FD}>&-
sleep 1 ) {LOCK_CEPH_MON_SERVICE_FD}>${LOCK_CEPH_MON_SERVICE_FILE} \
done {LOCK_CEPH_MON_STATUS_FD}>${LOCK_CEPH_MON_STATUS_FILE} \
{LOCK_CEPH_OSD_SERVICE_FD}>${LOCK_CEPH_OSD_SERVICE_FILE} \
if [ $timeout -eq 0 ]; then {LOCK_CEPH_OSD_STATUS_FD}>${LOCK_CEPH_OSD_STATUS_FILE}
wlog "-" "WARN" "Getting status takes more than ${STATUS_TIMEOUT}s, continuing" RC=$?
if [[ $MONITOR_COMMAND == 1 ]]; then
rm -f $CEPH_GET_MON_STATUS_FILE
fi
if [[ $OSD_COMMAND == 1 ]]; then
rm -f $CEPH_GET_OSD_STATUS_FILE
fi
fi
} }
start () start ()
{ {
if [ -f ${CEPH_FILE} ]; then if [ ! -f ${CEPH_FILE} ]; then
wlog "-" INFO "Ceph START $1 command received"
wait_for_status
${CEPH_SCRIPT} start $1
wlog "-" INFO "Ceph START $1 command finished."
RC=$?
else
# Ceph is not running on this node, return success # Ceph is not running on this node, return success
exit 0 exit 0
fi fi
wlog "-" INFO "Ceph START $1 command received"
with_service_lock "$1" ${CEPH_SCRIPT} start $1
wlog "-" INFO "Ceph START $1 command finished."
} }
stop () stop ()
{ {
wlog "-" INFO "Ceph STOP $1 command received." wlog "-" INFO "Ceph STOP $1 command received."
wait_for_status with_service_lock "$1" ${CEPH_SCRIPT} stop $1
${CEPH_SCRIPT} stop $1
wlog "-" INFO "Ceph STOP $1 command finished." wlog "-" INFO "Ceph STOP $1 command finished."
} }
restart () restart ()
{ {
if [ -f ${CEPH_FILE} ]; then if [ ! -f ${CEPH_FILE} ]; then
wlog "-" INFO "Ceph RESTART $1 command received."
wait_for_status
touch $CEPH_RESTARTING_FILE
${CEPH_SCRIPT} restart $1
rm -f $CEPH_RESTARTING_FILE
wlog "-" INFO "Ceph RESTART $1 command finished."
else
# Ceph is not running on this node, return success # Ceph is not running on this node, return success
exit 0 exit 0
fi fi
wlog "-" INFO "Ceph RESTART $1 command received."
with_service_lock "$1" ${CEPH_SCRIPT} restart $1
wlog "-" INFO "Ceph RESTART $1 command finished."
} }
log_and_restart_blocked_osds () log_and_restart_blocked_osds ()
@ -221,6 +206,14 @@ log_and_kill_hung_procs ()
status () status ()
{ {
local target="$1" # no shift here
[ -z "${target}" ] && target="mon osd"
if [ ! -f ${CEPH_FILE} ]; then
# Ceph is not running on this node, return success
exit 0
fi
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
timeout $CEPH_STATUS_TIMEOUT ceph -s timeout $CEPH_STATUS_TIMEOUT ceph -s
if [ "$?" -ne 0 ]; then if [ "$?" -ne 0 ]; then
@ -231,111 +224,102 @@ status ()
fi fi
fi fi
if [ -f ${CEPH_RESTARTING_FILE} ]; then # Report success while ceph mon is running a service action
# Ceph is restarting, we don't report state changes on the first pass # otherwise mark ceph mon status is in progress
rm -f ${CEPH_RESTARTING_FILE} exec {LOCK_CEPH_MON_STATUS_FD}>${LOCK_CEPH_MON_STATUS_FILE}
exit 0 if [[ "${target}" == *"mon"* ]]; then
flock --shared --nonblock ${LOCK_CEPH_MON_SERVICE_FILE} true
if [[ $? -ne 0 ]]; then
exit 0
fi
# Lock will be released when script exits
flock --shared ${LOCK_CEPH_MON_STATUS_FD}
fi fi
if [ -f ${CEPH_FILE} ]; then # Report success while ceph mon is running a service action
# Make sure the script does not 'exit' between here and the 'rm -f' below # otherwise mark ceph osd status is in progress
# or the checkpoint file will be left behind exec {LOCK_CEPH_OSD_STATUS_FD}>${LOCK_CEPH_OSD_STATUS_FILE}
if [[ $MONITOR_COMMAND == 1 ]]; then if [[ "${target}" == *"osd"* ]]; then
touch -f ${CEPH_GET_MON_STATUS_FILE} flock --shared --nonblock ${LOCK_CEPH_OSD_SERVICE_FILE} true
if [[ $? -ne 0 ]]; then
exit 0
fi fi
# Lock will be released when script exits
flock --shared ${LOCK_CEPH_OSD_STATUS_FD}
fi
if [[ $OSD_COMMAND == 1 ]]; then result=`${CEPH_SCRIPT} status $1 {LOCK_CEPH_MON_STATUS_FD}>&- {LOCK_CEPH_OSD_STATUS_FD}>&-`
touch -f ${CEPH_GET_OSD_STATUS_FILE} RC=$?
if [ "$RC" -ne 0 ]; then
erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
hung_procs=`echo "$result" | sort | uniq | awk ' /hung/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
blocked_ops_procs=`echo "$result" | sort | uniq | awk ' /blocked ops/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
invalid=0
host=`hostname`
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
# On 2 node configuration we have a floating monitor
host="controller"
fi fi
for i in $(echo $erred_procs $hung_procs); do
result=`${CEPH_SCRIPT} status $1` if [[ "$i" =~ osd.?[0-9]?[0-9]|mon.$host ]]; then
RC=$? continue
if [ "$RC" -ne 0 ]; then
erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
hung_procs=`echo "$result" | sort | uniq | awk ' /hung/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
blocked_ops_procs=`echo "$result" | sort | uniq | awk ' /blocked ops/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
invalid=0
host=`hostname`
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
# On 2 node configuration we have a floating monitor
host="controller"
fi
for i in $(echo $erred_procs $hung_procs); do
if [[ "$i" =~ osd.?[0-9]?[0-9]|mon.$host ]]; then
continue
else
invalid=1
fi
done
log_and_restart_blocked_osds $blocked_ops_procs
log_and_kill_hung_procs $hung_procs
hung_procs_text=""
for i in $(echo $hung_procs); do
hung_procs_text+="$i(process hung) "
done
rm -f $CEPH_STATUS_FAILURE_TEXT_FILE
if [ $invalid -eq 0 ]; then
text=""
for i in $erred_procs; do
text+="$i, "
done
for i in $hung_procs; do
text+="$i (process hang), "
done
echo "$text" | tr -d '\n' > $CEPH_STATUS_FAILURE_TEXT_FILE
else else
echo "$host: '${CEPH_SCRIPT} status $1' result contains invalid process names: $erred_procs" invalid=1
echo "Undetermined osd or monitor id" > $CEPH_STATUS_FAILURE_TEXT_FILE
fi fi
fi done
if [[ $MONITOR_COMMAND == 1 ]]; then log_and_restart_blocked_osds $blocked_ops_procs
rm -f ${CEPH_GET_MON_STATUS_FILE} log_and_kill_hung_procs $hung_procs
fi
if [[ $OSD_COMMAND == 1 ]]; then hung_procs_text=""
rm -f ${CEPH_GET_OSD_STATUS_FILE} for i in $(echo $hung_procs); do
fi hung_procs_text+="$i(process hung) "
done
if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then rm -f $CEPH_STATUS_FAILURE_TEXT_FILE
# SM needs exit code != 0 from 'status mon' argument of the init script on if [ $invalid -eq 0 ]; then
# standby controller otherwise it thinks that the monitor is running and text=""
# tries to stop it. for i in $erred_procs; do
# '/etc/init.d/ceph status mon' checks the status of monitors configured in text+="$i, "
# /etc/ceph/ceph.conf and if it should be running on current host. done
# If it should not be running it just exits with code 0. This is what for i in $hung_procs; do
# happens on the standby controller. text+="$i (process hang), "
# When floating monitor is running on active controller /var/lib/ceph/mon of done
# standby is not mounted (Ceph monitor partition is DRBD synced). echo "$text" | tr -d '\n' > $CEPH_STATUS_FAILURE_TEXT_FILE
test -e "/var/lib/ceph/mon/ceph-controller" else
if [ "$?" -ne 0 ]; then echo "$host: '${CEPH_SCRIPT} status $1' result contains invalid process names: $erred_procs"
exit 3 echo "Undetermined osd or monitor id" > $CEPH_STATUS_FAILURE_TEXT_FILE
fi fi
fi
if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
# SM needs exit code != 0 from 'status mon' argument of the init script on
# standby controller otherwise it thinks that the monitor is running and
# tries to stop it.
# '/etc/init.d/ceph status mon' checks the status of monitors configured in
# /etc/ceph/ceph.conf and if it should be running on current host.
# If it should not be running it just exits with code 0. This is what
# happens on the standby controller.
# When floating monitor is running on active controller /var/lib/ceph/mon of
# standby is not mounted (Ceph monitor partition is DRBD synced).
test -e "/var/lib/ceph/mon/ceph-controller"
if [ "$?" -ne 0 ]; then
exit 3
fi fi
else
# Ceph is not running on this node, return success
exit 0
fi fi
} }
case "${args[0]}" in case "${args[0]}" in
start) start)
check_command_type ${args[1]}
start ${args[1]} start ${args[1]}
;; ;;
stop) stop)
check_command_type ${args[1]}
stop ${args[1]} stop ${args[1]}
;; ;;
restart) restart)
check_command_type ${args[1]}
restart ${args[1]} restart ${args[1]}
;; ;;
status) status)
check_command_type ${args[1]}
status ${args[1]} status ${args[1]}
;; ;;
*) *)