ceph-init-wrapper use flock instead of flag files

When swact occurs and ceph-init-wrapper is slow to respond
to a status request it gets killed by SM. This means the
corresponding flag file that marks status in progress is left
behind.

When controller swacts back ceph-init-wrapper sees status
in progress and waits for it to finish (with a timeout).
Because it does not respond fast enough SM tries to start
again ceph-init-wrapper to get ceph-mon service up and running.

This happens a couple of times until the service is declared
failed and controller swacts back.

To fix this we need to use flock instead of flag files as the
locks will be automatically released by the OS when process
is killed.

Change-Id: If1912e8575258a4f79321d8435c8ae1b96b78b98
Closes-bug: 1840176
Signed-off-by: Daniel Badea <daniel.badea@windriver.com>
This commit is contained in:
Daniel Badea 2019-08-26 06:59:28 +00:00 committed by dbadea
parent 43fda7f16a
commit 9faad45703
1 changed files with 133 additions and 149 deletions

View File

@ -39,7 +39,6 @@ source /etc/platform/platform.conf
CEPH_SCRIPT="/etc/init.d/ceph" CEPH_SCRIPT="/etc/init.d/ceph"
CEPH_FILE="$VOLATILE_PATH/.ceph_started" CEPH_FILE="$VOLATILE_PATH/.ceph_started"
CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting"
CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status" CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status"
CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status" CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status"
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt" CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
@ -60,111 +59,97 @@ mkdir -p $DATA_PATH # make sure folder exists
MONITORING_INTERVAL=15 MONITORING_INTERVAL=15
TRACE_LOOP_INTERVAL=5 TRACE_LOOP_INTERVAL=5
GET_OSD_STATUS_TIMEOUT=120
GET_MONITOR_STATUS_TIMEOUT=30
CEPH_STATUS_TIMEOUT=20 CEPH_STATUS_TIMEOUT=20
WAIT_FOR_CMD=1 LOCK_CEPH_MON_SERVICE_FILE="$VOLATILE_PATH/.ceph_mon_status"
MONITOR_COMMAND=0 LOCK_CEPH_OSD_SERVICE_FILE="$VOLATILE_PATH/.ceph_osd_status"
OSD_COMMAND=0 LOCK_CEPH_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_mon_service"
LOCK_CEPH_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_osd_service"
# Seconds to wait for ceph status to finish before
# continuing to execute a service action
MONITOR_STATUS_TIMEOUT=30
MAX_STATUS_TIMEOUT=120
RC=0 RC=0
# SM can only pass arguments through environment variable
# when ARGS is not empty use it to extend command line arguments
args=("$@") args=("$@")
if [ ! -z $ARGS ]; then if [ ! -z $ARGS ]; then
IFS=";" read -r -a new_args <<< "$ARGS" IFS=";" read -r -a new_args <<< "$ARGS"
args+=("${new_args[@]}") args+=("${new_args[@]}")
fi fi
check_command_type () with_service_lock ()
{ {
if [[ $# -eq 0 ]]; then local target="$1"; shift
MONITOR_COMMAND=1 [ -z "${target}" ] && target="mon osd"
OSD_COMMAND=1
elif [[ "$1" == "osd"* ]]; then # Run in sub-shell so we don't leak file descriptors
OSD_COMMAND=1 # used for locking service actions
elif [[ "$1" == "mon"* ]]; then (
MONITOR_COMMAND=1 # Grab service locks
else wlog "-" INFO "Grab service locks"
exit 1 [[ "${target}" == *"mon"* ]] && flock ${LOCK_CEPH_MON_SERVICE_FD}
[[ "${target}" == *"osd"* ]] && flock ${LOCK_CEPH_OSD_SERVICE_FD}
# Try to lock status with a timeout in case status is stuck
wlog "-" INFO "Lock service status"
deadline=$((SECONDS + MAX_STATUS_TIMEOUT + 1))
if [[ "${target}" == *"mon"* ]]; then
flock --exclusive --timeout ${MONITOR_STATUS_TIMEOUT} ${LOCK_CEPH_MON_STATUS_FD}
fi fi
if [[ "${target}" == *"osd"* ]]; then
} timeout=$((deadline - SECONDS))
if [[ $timeout -gt 0 ]]; then
wait_for_status () flock --exclusive --timeout ${timeout} ${LOCK_CEPH_OSD_STATUS_FD}
{
local STATUS_TIMEOUT=0
# For a general "ceph status" command which includes checks
# for both monitors and OSDS, we use the OSD timeout.
if [[ $OSD_COMMAND == 1 ]]; then
STATUS_TIMEOUT=$GET_OSD_STATUS_TIMEOUT
elif [[ $MONITOR_COMMAND == 1 ]]; then
STATUS_TIMEOUT=$GET_MONITOR_STATUS_TIMEOUT
fi
timeout_expiry=$((${SECONDS} + ${STATUS_TIMEOUT}))
while [ ${SECONDS} -le ${timeout_expiry} ]; do
if [[ $MONITOR_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_MON_STATUS_FILE} ]]; then
break
fi
if [[ $OSD_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_OSD_STATUS_FILE} ]]; then
break
fi
sleep 1
done
if [ $timeout -eq 0 ]; then
wlog "-" "WARN" "Getting status takes more than ${STATUS_TIMEOUT}s, continuing"
if [[ $MONITOR_COMMAND == 1 ]]; then
rm -f $CEPH_GET_MON_STATUS_FILE
fi
if [[ $OSD_COMMAND == 1 ]]; then
rm -f $CEPH_GET_OSD_STATUS_FILE
fi fi
fi fi
# Close lock file descriptors so they are
# not inherited by the spawned process then
# run service action
wlog "-" INFO "Run service action: $@"
"$@" {LOCK_CEPH_MON_SERVICE_FD}>&- \
{LOCK_CEPH_MON_STATUS_FD}>&- \
{LOCK_CEPH_OSD_SERVICE_FD}>&- \
{LOCK_CEPH_OSD_STATUS_FD}>&-
) {LOCK_CEPH_MON_SERVICE_FD}>${LOCK_CEPH_MON_SERVICE_FILE} \
{LOCK_CEPH_MON_STATUS_FD}>${LOCK_CEPH_MON_STATUS_FILE} \
{LOCK_CEPH_OSD_SERVICE_FD}>${LOCK_CEPH_OSD_SERVICE_FILE} \
{LOCK_CEPH_OSD_STATUS_FD}>${LOCK_CEPH_OSD_STATUS_FILE}
RC=$?
} }
start () start ()
{ {
if [ -f ${CEPH_FILE} ]; then if [ ! -f ${CEPH_FILE} ]; then
wlog "-" INFO "Ceph START $1 command received"
wait_for_status
${CEPH_SCRIPT} start $1
wlog "-" INFO "Ceph START $1 command finished."
RC=$?
else
# Ceph is not running on this node, return success # Ceph is not running on this node, return success
exit 0 exit 0
fi fi
wlog "-" INFO "Ceph START $1 command received"
with_service_lock "$1" ${CEPH_SCRIPT} start $1
wlog "-" INFO "Ceph START $1 command finished."
} }
stop () stop ()
{ {
wlog "-" INFO "Ceph STOP $1 command received." wlog "-" INFO "Ceph STOP $1 command received."
wait_for_status with_service_lock "$1" ${CEPH_SCRIPT} stop $1
${CEPH_SCRIPT} stop $1
wlog "-" INFO "Ceph STOP $1 command finished." wlog "-" INFO "Ceph STOP $1 command finished."
} }
restart () restart ()
{ {
if [ -f ${CEPH_FILE} ]; then if [ ! -f ${CEPH_FILE} ]; then
wlog "-" INFO "Ceph RESTART $1 command received."
wait_for_status
touch $CEPH_RESTARTING_FILE
${CEPH_SCRIPT} restart $1
rm -f $CEPH_RESTARTING_FILE
wlog "-" INFO "Ceph RESTART $1 command finished."
else
# Ceph is not running on this node, return success # Ceph is not running on this node, return success
exit 0 exit 0
fi fi
wlog "-" INFO "Ceph RESTART $1 command received."
with_service_lock "$1" ${CEPH_SCRIPT} restart $1
wlog "-" INFO "Ceph RESTART $1 command finished."
} }
log_and_restart_blocked_osds () log_and_restart_blocked_osds ()
@ -221,6 +206,14 @@ log_and_kill_hung_procs ()
status () status ()
{ {
local target="$1" # no shift here
[ -z "${target}" ] && target="mon osd"
if [ ! -f ${CEPH_FILE} ]; then
# Ceph is not running on this node, return success
exit 0
fi
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
timeout $CEPH_STATUS_TIMEOUT ceph -s timeout $CEPH_STATUS_TIMEOUT ceph -s
if [ "$?" -ne 0 ]; then if [ "$?" -ne 0 ]; then
@ -231,23 +224,30 @@ status ()
fi fi
fi fi
if [ -f ${CEPH_RESTARTING_FILE} ]; then # Report success while ceph mon is running a service action
# Ceph is restarting, we don't report state changes on the first pass # otherwise mark ceph mon status is in progress
rm -f ${CEPH_RESTARTING_FILE} exec {LOCK_CEPH_MON_STATUS_FD}>${LOCK_CEPH_MON_STATUS_FILE}
if [[ "${target}" == *"mon"* ]]; then
flock --shared --nonblock ${LOCK_CEPH_MON_SERVICE_FILE} true
if [[ $? -ne 0 ]]; then
exit 0 exit 0
fi fi
if [ -f ${CEPH_FILE} ]; then # Lock will be released when script exits
# Make sure the script does not 'exit' between here and the 'rm -f' below flock --shared ${LOCK_CEPH_MON_STATUS_FD}
# or the checkpoint file will be left behind fi
if [[ $MONITOR_COMMAND == 1 ]]; then # Report success while ceph mon is running a service action
touch -f ${CEPH_GET_MON_STATUS_FILE} # otherwise mark ceph osd status is in progress
exec {LOCK_CEPH_OSD_STATUS_FD}>${LOCK_CEPH_OSD_STATUS_FILE}
if [[ "${target}" == *"osd"* ]]; then
flock --shared --nonblock ${LOCK_CEPH_OSD_SERVICE_FILE} true
if [[ $? -ne 0 ]]; then
exit 0
fi
# Lock will be released when script exits
flock --shared ${LOCK_CEPH_OSD_STATUS_FD}
fi fi
if [[ $OSD_COMMAND == 1 ]]; then result=`${CEPH_SCRIPT} status $1 {LOCK_CEPH_MON_STATUS_FD}>&- {LOCK_CEPH_OSD_STATUS_FD}>&-`
touch -f ${CEPH_GET_OSD_STATUS_FILE}
fi
result=`${CEPH_SCRIPT} status $1`
RC=$? RC=$?
if [ "$RC" -ne 0 ]; then if [ "$RC" -ne 0 ]; then
erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'` erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
@ -291,14 +291,6 @@ status ()
fi fi
fi fi
if [[ $MONITOR_COMMAND == 1 ]]; then
rm -f ${CEPH_GET_MON_STATUS_FILE}
fi
if [[ $OSD_COMMAND == 1 ]]; then
rm -f ${CEPH_GET_OSD_STATUS_FILE}
fi
if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
# SM needs exit code != 0 from 'status mon' argument of the init script on # SM needs exit code != 0 from 'status mon' argument of the init script on
# standby controller otherwise it thinks that the monitor is running and # standby controller otherwise it thinks that the monitor is running and
@ -314,28 +306,20 @@ status ()
exit 3 exit 3
fi fi
fi fi
else
# Ceph is not running on this node, return success
exit 0
fi
} }
case "${args[0]}" in case "${args[0]}" in
start) start)
check_command_type ${args[1]}
start ${args[1]} start ${args[1]}
;; ;;
stop) stop)
check_command_type ${args[1]}
stop ${args[1]} stop ${args[1]}
;; ;;
restart) restart)
check_command_type ${args[1]}
restart ${args[1]} restart ${args[1]}
;; ;;
status) status)
check_command_type ${args[1]}
status ${args[1]} status ${args[1]}
;; ;;
*) *)