ceph-init-wrapper use flock instead of flag files

When swact occurs and ceph-init-wrapper is slow to respond to a status request it gets killed by SM. This means the corresponding flag file that marks status in progress is left behind. When controller swacts back ceph-init-wrapper sees status in progress and waits for it to finish (with a timeout). Because it does not respond fast enough SM tries to start again ceph-init-wrapper to get ceph-mon service up and running. This happens a couple of times until the service is declared failed and controller swacts back. To fix this we need to use flock instead of flag files as the locks will be automatically released by the OS when process is killed. Change-Id: If1912e8575258a4f79321d8435c8ae1b96b78b98 Closes-bug: 1840176 Signed-off-by: Daniel Badea <daniel.badea@windriver.com>
2019-08-26 06:59:28 +00:00 · 2019-08-26 06:59:28 +00:00 · 9faad45703
parent 43fda7f16a
commit 9faad45703
1 changed files with 133 additions and 149 deletions
--- a/ceph/ceph/files/ceph-init-wrapper.sh
+++ b/ceph/ceph/files/ceph-init-wrapper.sh
@ -39,7 +39,6 @@ source /etc/platform/platform.conf
 CEPH_SCRIPT="/etc/init.d/ceph"
 CEPH_FILE="$VOLATILE_PATH/.ceph_started"
 CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting"
 CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status"
 CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status"
 CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
@ -60,111 +59,97 @@ mkdir -p $DATA_PATH                   # make sure folder exists
 MONITORING_INTERVAL=15
 TRACE_LOOP_INTERVAL=5
 GET_OSD_STATUS_TIMEOUT=120
 GET_MONITOR_STATUS_TIMEOUT=30
 CEPH_STATUS_TIMEOUT=20
-WAIT_FOR_CMD=1
+LOCK_CEPH_MON_SERVICE_FILE="$VOLATILE_PATH/.ceph_mon_status"
-MONITOR_COMMAND=0
+LOCK_CEPH_OSD_SERVICE_FILE="$VOLATILE_PATH/.ceph_osd_status"
-OSD_COMMAND=0
+LOCK_CEPH_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_mon_service"
 LOCK_CEPH_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_osd_service"
 # Seconds to wait for ceph status to finish before
 # continuing to execute a service action
 MONITOR_STATUS_TIMEOUT=30
 MAX_STATUS_TIMEOUT=120
 RC=0
 # SM can only pass arguments through environment variable
 # when ARGS is not empty use it to extend command line arguments
 args=("$@")
 if [ ! -z $ARGS ]; then
    IFS=";" read -r -a new_args <<< "$ARGS"
    args+=("${new_args[@]}")
 fi
-check_command_type ()
+with_service_lock ()
 {
-    if [[ $# -eq 0 ]]; then
+    local target="$1"; shift
-        MONITOR_COMMAND=1
+    [ -z "${target}" ] && target="mon osd"
-        OSD_COMMAND=1
+
-    elif [[ "$1" == "osd"* ]]; then
+    # Run in sub-shell so we don't leak file descriptors
-        OSD_COMMAND=1
+    # used for locking service actions
-    elif [[ "$1" == "mon"* ]]; then
+    (
-        MONITOR_COMMAND=1
+        # Grab service locks
-    else
+        wlog "-" INFO "Grab service locks"
-        exit 1
+        [[ "${target}" == *"mon"* ]] && flock ${LOCK_CEPH_MON_SERVICE_FD}
        [[ "${target}" == *"osd"* ]] && flock ${LOCK_CEPH_OSD_SERVICE_FD}
        # Try to lock status with a timeout in case status is stuck
        wlog "-" INFO "Lock service status"
        deadline=$((SECONDS + MAX_STATUS_TIMEOUT + 1))
        if [[ "${target}" == *"mon"* ]]; then
            flock --exclusive --timeout ${MONITOR_STATUS_TIMEOUT} ${LOCK_CEPH_MON_STATUS_FD}
        fi
-
+        if [[ "${target}" == *"osd"* ]]; then
-}
+            timeout=$((deadline - SECONDS))
-
+            if [[ $timeout -gt 0 ]]; then
-wait_for_status ()
+                flock --exclusive --timeout ${timeout} ${LOCK_CEPH_OSD_STATUS_FD}
 {
    local STATUS_TIMEOUT=0
    # For a general "ceph status" command which includes checks
    # for both monitors and OSDS, we use the OSD timeout.
    if [[ $OSD_COMMAND == 1 ]]; then
        STATUS_TIMEOUT=$GET_OSD_STATUS_TIMEOUT
    elif [[ $MONITOR_COMMAND == 1 ]]; then
        STATUS_TIMEOUT=$GET_MONITOR_STATUS_TIMEOUT
    fi
    timeout_expiry=$((${SECONDS} + ${STATUS_TIMEOUT}))
    while [ ${SECONDS} -le ${timeout_expiry} ]; do
        if [[ $MONITOR_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_MON_STATUS_FILE} ]]; then
            break
        fi
        if [[ $OSD_COMMAND == 1 ]] && [[ ! -f ${CEPH_GET_OSD_STATUS_FILE} ]]; then
            break
        fi
        sleep 1
    done
    if [ $timeout -eq 0 ]; then
        wlog "-" "WARN" "Getting status takes more than ${STATUS_TIMEOUT}s, continuing"
        if [[ $MONITOR_COMMAND == 1 ]]; then
            rm -f $CEPH_GET_MON_STATUS_FILE
        fi
        if [[ $OSD_COMMAND == 1 ]]; then
            rm -f $CEPH_GET_OSD_STATUS_FILE
            fi
        fi
        # Close lock file descriptors so they are
        # not inherited by the spawned process then
        # run service action
        wlog "-" INFO "Run service action: $@"
        "$@" {LOCK_CEPH_MON_SERVICE_FD}>&- \
             {LOCK_CEPH_MON_STATUS_FD}>&- \
             {LOCK_CEPH_OSD_SERVICE_FD}>&- \
             {LOCK_CEPH_OSD_STATUS_FD}>&-
    ) {LOCK_CEPH_MON_SERVICE_FD}>${LOCK_CEPH_MON_SERVICE_FILE} \
      {LOCK_CEPH_MON_STATUS_FD}>${LOCK_CEPH_MON_STATUS_FILE} \
      {LOCK_CEPH_OSD_SERVICE_FD}>${LOCK_CEPH_OSD_SERVICE_FILE} \
      {LOCK_CEPH_OSD_STATUS_FD}>${LOCK_CEPH_OSD_STATUS_FILE}
    RC=$?
 }
 start ()
 {
-    if [ -f ${CEPH_FILE} ]; then
+    if [ ! -f ${CEPH_FILE} ]; then
        wlog "-" INFO "Ceph START $1 command received"
        wait_for_status
        ${CEPH_SCRIPT} start $1
        wlog "-" INFO "Ceph START $1 command finished."
        RC=$?
    else
        # Ceph is not running on this node, return success
        exit 0
    fi
    wlog "-" INFO "Ceph START $1 command received"
    with_service_lock "$1" ${CEPH_SCRIPT} start $1
    wlog "-" INFO "Ceph START $1 command finished."
 }
 stop ()
 {
    wlog "-" INFO "Ceph STOP $1 command received."
-    wait_for_status
+    with_service_lock "$1" ${CEPH_SCRIPT} stop $1
    ${CEPH_SCRIPT} stop $1
    wlog "-" INFO "Ceph STOP $1 command finished."
 }
 restart ()
 {
-    if [ -f ${CEPH_FILE} ]; then
+    if [ ! -f ${CEPH_FILE} ]; then
        wlog "-" INFO "Ceph RESTART $1 command received."
        wait_for_status
        touch $CEPH_RESTARTING_FILE
        ${CEPH_SCRIPT} restart $1
        rm -f $CEPH_RESTARTING_FILE
        wlog "-" INFO "Ceph RESTART $1 command finished."
    else
        # Ceph is not running on this node, return success
        exit 0
    fi
-
+    wlog "-" INFO "Ceph RESTART $1 command received."
    with_service_lock "$1" ${CEPH_SCRIPT} restart $1
    wlog "-" INFO "Ceph RESTART $1 command finished."
 }
 log_and_restart_blocked_osds ()
@ -221,6 +206,14 @@ log_and_kill_hung_procs ()
 status ()
 {
    local target="$1"  # no shift here
    [ -z "${target}" ] && target="mon osd"
    if [ ! -f ${CEPH_FILE} ]; then
        # Ceph is not running on this node, return success
        exit 0
    fi
    if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
        timeout $CEPH_STATUS_TIMEOUT ceph -s
        if [ "$?" -ne 0 ]; then
@ -231,23 +224,30 @@ status ()
        fi
    fi
-    if [ -f ${CEPH_RESTARTING_FILE} ]; then
+    # Report success while ceph mon is running a service action
-        # Ceph is restarting, we don't report state changes on the first pass
+    # otherwise mark ceph mon status is in progress
-        rm -f ${CEPH_RESTARTING_FILE}
+    exec {LOCK_CEPH_MON_STATUS_FD}>${LOCK_CEPH_MON_STATUS_FILE}
    if [[ "${target}" == *"mon"* ]]; then
        flock --shared --nonblock ${LOCK_CEPH_MON_SERVICE_FILE} true
        if [[ $? -ne 0 ]]; then
            exit 0
        fi
-    if [ -f ${CEPH_FILE} ]; then
+        # Lock will be released when script exits
-        # Make sure the script does not 'exit' between here and the 'rm -f' below
+        flock --shared ${LOCK_CEPH_MON_STATUS_FD}
-        # or the checkpoint file will be left behind
+    fi
-        if [[ $MONITOR_COMMAND == 1 ]]; then
+    # Report success while ceph mon is running a service action
-            touch -f ${CEPH_GET_MON_STATUS_FILE}
+    # otherwise mark ceph osd status is in progress
    exec {LOCK_CEPH_OSD_STATUS_FD}>${LOCK_CEPH_OSD_STATUS_FILE}
    if [[ "${target}" == *"osd"* ]]; then
        flock --shared --nonblock ${LOCK_CEPH_OSD_SERVICE_FILE} true
        if [[ $? -ne 0 ]]; then
            exit 0
        fi
        # Lock will be released when script exits
        flock --shared ${LOCK_CEPH_OSD_STATUS_FD}
    fi
-        if [[ $OSD_COMMAND == 1 ]]; then
+    result=`${CEPH_SCRIPT} status $1 {LOCK_CEPH_MON_STATUS_FD}>&- {LOCK_CEPH_OSD_STATUS_FD}>&-`
            touch -f ${CEPH_GET_OSD_STATUS_FILE}
        fi
        result=`${CEPH_SCRIPT} status $1`
    RC=$?
    if [ "$RC" -ne 0 ]; then
        erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
@ -291,14 +291,6 @@ status ()
        fi
    fi
        if [[ $MONITOR_COMMAND == 1 ]]; then
            rm -f ${CEPH_GET_MON_STATUS_FILE}
        fi
        if [[ $OSD_COMMAND == 1 ]]; then
            rm -f ${CEPH_GET_OSD_STATUS_FILE}
        fi
    if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
        # SM needs exit code != 0 from 'status mon' argument of the init script on
        # standby controller otherwise it thinks that the monitor is running and
@ -314,28 +306,20 @@ status ()
            exit 3
        fi
    fi
    else
        # Ceph is not running on this node, return success
        exit 0
    fi
 }
 case "${args[0]}" in
    start)
        check_command_type ${args[1]}
        start ${args[1]}
        ;;
    stop)
        check_command_type ${args[1]}
        stop ${args[1]}
        ;;
    restart)
        check_command_type ${args[1]}
        restart ${args[1]}
        ;;
    status)
        check_command_type ${args[1]}
        status ${args[1]}
        ;;
    *)