diff --git a/ceph/ceph/debian/deb_folder/ceph-base.ceph.init b/ceph/ceph/debian/deb_folder/ceph-base.ceph.init index badd1ea9b..d29d2849a 100755 --- a/ceph/ceph/debian/deb_folder/ceph-base.ceph.init +++ b/ceph/ceph/debian/deb_folder/ceph-base.ceph.init @@ -612,8 +612,10 @@ stop_daemon() { ## command line options options= +IFS=" " read -r -a args <<< "$@" +wlog "-" INFO "$@" -OPTS=$(${GETOPT} -n 'init-ceph' -o 'hvam:c:' -l 'help,verbose,valgrind,novalgrind,allhosts,restart,norestart,btrfs,nobtrfs,fsmount,nofsmount,btrfsumount,fsumount,conf:,cluster:,hostname:' -- "$@") +OPTS=$(${GETOPT} -n 'init-ceph' -o 'hvam:c:' -l 'help,verbose,valgrind,novalgrind,allhosts,restart,norestart,btrfs,nobtrfs,fsmount,nofsmount,btrfsumount,fsumount,conf:,cluster:,hostname:' -- "${args[@]}") if [ $? != 0 ] then exit 1 @@ -735,11 +737,51 @@ if [ "$command" = "stop" -o "$command" = "onestop" ]; then what="$new_order" fi -# Check if the monitors are up before starting any mds -# This is needed only for Standard deployments . /etc/platform/platform.conf + +# When this is a AIO-DX pmon is monitoring ceph-mds process. +# If ceph-mon is not running, ceph-mds will hang when starting. +# Check if we are trying to bring up ceph-mds and ceph-mon is not ready yet +if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then + if [ "${command}" = "start" -o "${command}" = "onestart" ]; then + what_out= + what_mds= + re="\s*mon" + if [[ ${what} =~ ${re} ]]; then + has_mon=1 + else + has_mon=0 + CEPH_STATUS='' + execute_ceph_cmd CEPH_STATUS "ceph status" "ceph -s" + if [ $? -eq 0 ]; then + has_mon=1 + fi + fi + for name in ${what}; do + type=$(echo "${name}" | cut -c 1-3) + if [ "${type}" == "mds" ]; then + what_mds="${name}" + continue + fi + what_out+=" ${name}" + done + if [ ${has_mon} -eq 1 ] && [ ! -z "${what_mds}" ]; then + what_out+=" ${what_mds}" + fi + what="${what_out}" + fi + # If the variable 'what' is empty, then it was trying to bring up ceph-mds but ceph-mon is not active. + # When ceph-mon is not active, we cannot execute ceph-mds yet, thus returning error. + if [ -z "${what}" ]; then + EXIT_STATUS=1 + fi +fi + +# Check if the monitors are up before starting any mds +# This is needed only for Standard deployments + if [ "$system_type" == "Standard" ]; then CEPH_STATUS='' execute_ceph_cmd CEPH_STATUS "ceph status" "ceph -s" @@ -999,8 +1041,15 @@ EOF [ -n "$post_stop" ] && do_cmd "$post_stop" [ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile # flush journal to data disk in background - if [ "$type" = "osd" ];then - $(/usr/bin/ceph-osd -i $id --flush-journal) & + if [ "${type}" = "osd" ];then + CMD_OUTPUT='' + execute_ceph_cmd CMD_OUTPUT "Ceph Status" "ceph -s" + if [ $? == 0 ]; then + wlog "${name}" "INFO" "Flushing journal" + $(/usr/bin/ceph-osd -i $id --flush-journal) & + else + wlog "${name}" "INFO" "Skipping journal flush" + fi fi wlog $name "INFO" "Process stopped, setting state to $ST_STOPPED" save_proc_state $name $ST_STOPPED diff --git a/ceph/ceph/debian/deb_folder/ceph-base.install b/ceph/ceph/debian/deb_folder/ceph-base.install index 9cecc1aa1..549058c0c 100644 --- a/ceph/ceph/debian/deb_folder/ceph-base.install +++ b/ceph/ceph/debian/deb_folder/ceph-base.install @@ -26,6 +26,7 @@ etc/init.d/ceph etc/init.d/mgr-restful-plugin etc/init.d/ceph-init-wrapper etc/ceph/ceph.conf.pmon +etc/ceph/ceph-mds.conf.pmon etc/ceph/ceph.conf etc/services.d/* usr/sbin/ceph-preshutdown.sh diff --git a/ceph/ceph/debian/deb_folder/rules b/ceph/ceph/debian/deb_folder/rules index 7709b7aff..a4590e681 100755 --- a/ceph/ceph/debian/deb_folder/rules +++ b/ceph/ceph/debian/deb_folder/rules @@ -6,13 +6,15 @@ SOURCE1 := ceph.sh SOURCE2 := mgr-restful-plugin.py SOURCE3 := ceph.conf.pmon -SOURCE4 := ceph-init-wrapper.sh -SOURCE5 := ceph.conf -SOURCE6 := ceph-manage-journal.py -SOURCE7 := ceph.service -SOURCE8 := mgr-restful-plugin.service -SOURCE9 := ceph-preshutdown.sh -SOURCE10 := starlingx-docker-override.conf +SOURCE4 := ceph-mds.conf.pmon +SOURCE5 := ceph-init-wrapper.sh +SOURCE6 := ceph.conf +SOURCE7 := ceph-manage-journal.py +SOURCE8 := ceph.service +SOURCE9 := mgr-restful-plugin.service +SOURCE10 := ceph-preshutdown.sh +SOURCE11 := starlingx-docker-override.conf + # Paths export DESTDIR = $(CURDIR)/debian/tmp @@ -188,13 +190,14 @@ override_dh_auto_install: install -D -m 750 ${SOURCE1} $(DESTDIR)/${SYSCONFDIR}/services.d/worker/ install -D -m 750 ${SOURCE2} $(DESTDIR)/${INITDIR}/mgr-restful-plugin install -D -m 750 ${SOURCE3} $(DESTDIR)/${SYSCONFDIR}/ceph/ - install -D -m 750 ${SOURCE4} $(DESTDIR)/${INITDIR}/ceph-init-wrapper - install -D -m 640 ${SOURCE5} $(DESTDIR)/${SYSCONFDIR}/ceph/ - install -D -m 700 ${SOURCE6} $(DESTDIR)/${SBINDIR}/ceph-manage-journal - install -D -m 644 ${SOURCE7} $(DESTDIR)/${UNITDIR}/ceph.service - install -D -m 644 ${SOURCE8} $(DESTDIR)/${UNITDIR}/mgr-restful-plugin.service - install -D -m 700 ${SOURCE9} $(DESTDIR)/${SBINDIR}/ceph-preshutdown.sh - install -D -m 644 ${SOURCE10} $(DESTDIR)/${UNITDIR}/docker.service.d/starlingx-docker-override.conf + install -D -m 750 ${SOURCE4} $(DESTDIR)/${SYSCONFDIR}/ceph/ + install -D -m 750 ${SOURCE5} $(DESTDIR)/${INITDIR}/ceph-init-wrapper + install -D -m 640 ${SOURCE6} $(DESTDIR)/${SYSCONFDIR}/ceph/ + install -D -m 700 ${SOURCE7} $(DESTDIR)/${SBINDIR}/ceph-manage-journal + install -D -m 644 ${SOURCE8} $(DESTDIR)/${UNITDIR}/ceph.service + install -D -m 644 ${SOURCE9} $(DESTDIR)/${UNITDIR}/mgr-restful-plugin.service + install -D -m 700 ${SOURCE10} $(DESTDIR)/${SBINDIR}/ceph-preshutdown.sh + install -D -m 644 ${SOURCE11} $(DESTDIR)/${UNITDIR}/docker.service.d/starlingx-docker-override.conf install -m 750 src/init-radosgw $(DESTDIR)/${INITDIR}/ceph-radosgw sed -i '/### END INIT INFO/a SYSTEMCTL_SKIP_REDIRECT=1' $(DESTDIR)/${INITDIR}/ceph-radosgw install -m 750 src/init-rbdmap $(DESTDIR)/${INITDIR}/rbdmap @@ -275,6 +278,7 @@ override_dh_fixperms: -Xceph.sh \ -Xmgr-restful-plugin \ -Xceph.conf.pmon \ + -Xceph-mds.conf.pmon \ -Xceph-init-wrapper \ -Xceph.conf \ -Xceph-manage-journal \ diff --git a/ceph/ceph/files/ceph-init-wrapper.sh b/ceph/ceph/files/ceph-init-wrapper.sh index 33f56c441..3b1c1ce19 100755 --- a/ceph/ceph/files/ceph-init-wrapper.sh +++ b/ceph/ceph/files/ceph-init-wrapper.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -14,8 +14,8 @@ # "/var/run/.ceph_started" when ceph is running and remove it when # is not. # -# The script also extracts one or more ceph process names that are -# reported as 'not running' or 'dead' or 'failed' by '/etc/intit.d/ceph status' +# The script also extracts one or more ceph process names that are +# reported as 'not running' or 'dead' or 'failed' by '/etc/init.d/ceph status' # and writes the names to a text file: /tmp/ceph_status_failure.txt for # pmond to access. The pmond adds the text to logs and alarms. Example of text # samples written to file by this script are: @@ -24,7 +24,7 @@ # 'mon.storage-0' # 'mon.storage-0, osd.2' # -# Moreover, for processes that are reported as 'hung' by '/etc/intit.d/ceph status' +# Moreover, for processes that are reported as 'hung' by '/etc/init.d/ceph status' # the script will try increase their logging to 'debug' for a configurable interval. # With logging increased it will outputs a few stack traces then, at the end of this # interval, it dumps its stack core and kills it. @@ -43,6 +43,14 @@ CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status" CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status" CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt" +# For All-in-one duplex, set some variables +if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then + CEPH_MON_LIB_PATH=/var/lib/ceph/mon + CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_0" + CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_1" + CEPH_LAST_ACTIVE_CONTROLLER_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_${HOSTNAME/-/_}" +fi + BINDIR=/usr/bin SBINDIR=/usr/sbin if grep -q "Debian" /etc/os-release; then @@ -85,6 +93,114 @@ if [ ! -z $ARGS ]; then args+=("${new_args[@]}") fi +# Verify if drbd-cephmon is in sync, checking the output of 'drbdadm dstate' +# Return 0 on success and 1 if drbd-cephmon is not ready +is_drbd_cephmon_in_sync () +{ + local DRBD_CEPHMON_STATUS=$(drbdadm dstate drbd-cephmon) + wlog "-" INFO "drbd-cephmon status: ${DRBD_CEPHMON_STATUS}" + if [ "${DRBD_CEPHMON_STATUS}" == "UpToDate/UpToDate" ]; then + return 0 + fi + return 1 +} + +# Verify if drbd-cephmon role is primary, checking the output of 'drbdadm role' +# Return 0 on success and 1 if drbd-cephmon is not primary +is_drbd_cephmon_primary () +{ + drbdadm role drbd-cephmon | grep -q 'Primary/' + if [ $? -eq 0 ]; then + wlog "-" INFO "drbd-cephmon role is Primary" + return 0 + fi + wlog "-" INFO "drbd-cephmon role is NOT Primary" + return 1 +} + +# Verify if drbd-cephmon partition is mounted. +# Return 0 on success and 1 if drbd-cephmon partition is not mounted +is_drbd_cephmon_mounted () +{ + findmnt -no SOURCE "${CEPH_MON_LIB_PATH}" | grep -q drbd + if [ $? -eq 0 ]; then + wlog "-" INFO "drbd-cephmon partition is mounted" + return 0 + fi + wlog "-" INFO "drbd-cephmon partition is NOT mounted" + return 1 +} + +# Verify if ceph mon can be started on AIO-DX configuration. +# This function must be called only on AIO-DX. +# Return 0 on success and 1 if ceph mon cannot be started +can_start_ceph_mon () +{ + local times="" + + # Verify if drbd-cephmon has role Primary + # Retries 10 times, 1 second interval + for times in {9..0}; do + is_drbd_cephmon_primary + if [ $? -eq 0 ]; then + times=-1 + break; + fi + sleep 1 + done + + if [ ${times} -eq 0 ]; then + wlog "-" ERROR "drbd-cephmon is not primary, cannot start ceph mon" + return 1 + fi + + # Check if drbd-cephmon partition is mounted + # Retries 10 times, 1 second interval + for times in {9..0}; do + is_drbd_cephmon_mounted + if [ $? -eq 0 ]; then + times=-1 + break; + fi + sleep 1 + done + + if [ ${times} -eq 0 ]; then + wlog "-" ERROR "drbd-cephmon is not mounted, cannot start ceph mon" + return 1 + fi + + # Ceph mon was last active in this controller. Can run safely. + if [ -f "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}" ]; then + return 0 + fi + + # Check if last active ceph-mon was in another controller + if [ "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}" == "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}" ]; then + local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}" + else + local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}" + fi + if [ -f "${CEPH_OTHER_ACTIVE_CONTROLLER_FLAG}" ]; then + # Verify drbd-cephmon status + for times in {9..0}; do + is_drbd_cephmon_in_sync + if [ $? -eq 0 ]; then + # drbd-cephmon is in sync, it is safe to run. + return 0 + fi + sleep 1 + done + + # drbd-cephmon is not in sync, it is not safe to run + wlog "-" ERROR "drbd-cephmon is not in sync, cannot start ceph mon" + return 1 + fi + + # This is safe to run ceph mon + return 0 +} + with_service_lock () { local target="$1"; shift @@ -133,9 +249,45 @@ start () # Ceph is not running on this node, return success exit 0 fi - wlog "-" INFO "Ceph START $1 command received" - with_service_lock "$1" ${CEPH_SCRIPT} start $1 - wlog "-" INFO "Ceph START $1 command finished." + + local service="$1" + + # For AIO-DX, the mon service has special treatment + if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then + # After the first controller unlock, ceph-mon is started by + # puppet-ceph module via sysvinit using /etc/init.d/ceph directly. + # Setting the controller-0 flag to the default prevents + # another controller from starting before any host-swact. + if [ ! -e "${CEPH_MON_LIB_PATH}"/.last_ceph_mon_active_controller_* ]; then + touch "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}" + fi + + # NOTE: In case of uncontrolled swact, to force start ceph-mon service + # it will be needed to rename the flag to the desired controller. + can_start_ceph_mon + if [ $? -ne 0 ]; then + wlog "-" ERROR "Ceph mon cannot be started now." + exit 1 + fi + fi + + # Start the service + wlog "-" INFO "Ceph START ${service} command received" + with_service_lock "${service}" ${CEPH_SCRIPT} start ${service} + wlog "-" INFO "Ceph START ${service} command finished." + + # For AIO-DX, the mon service has special treatment + if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then + # If ceph-mon is successfully running, clear old flags and set the new one + # RC global variable is set by the with_service_lock function trying to start ceph-mon + if [ ${RC} -eq 0 ]; then + # Remove old flags + rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}" + rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}" + # Create new flag + touch "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}" + fi + fi } stop () diff --git a/ceph/ceph/files/ceph-mds.conf.pmon b/ceph/ceph/files/ceph-mds.conf.pmon new file mode 100644 index 000000000..f02b22b42 --- /dev/null +++ b/ceph/ceph/files/ceph-mds.conf.pmon @@ -0,0 +1,26 @@ +[process] +process = ceph-mds +script = /etc/init.d/ceph + +style = lsb +severity = major ; minor, major, critical +restarts = 5 ; restart retries before error assertion +interval = 30 ; number of seconds to wait between restarts + +mode = status ; Monitoring mode: passive (default) or active + ; passive: process death monitoring (default: always) + ; active : heartbeat monitoring, i.e. request / response messaging + ; status : determine process health with executing "status" command + ; "start" is used to start the process(es) again + ; ignore : do not monitor or stop monitoring + +; Status and Active Monitoring Options + +period = 30 ; monitor period in seconds +timeout = 120 ; for active mode, messaging timeout period in seconds, must be shorter than period + ; for status mode, max amount of time for a command to execute + +; Status Monitoring Options +start_arg = start mds ; start argument for the script +status_arg = status mds ; status argument for the script +status_failure_text = /tmp/ceph_status_failure.txt ; text to be added to alarms or logs, this is optional diff --git a/ceph/ceph/files/ceph.sh b/ceph/ceph/files/ceph.sh index e646a149f..be72061e0 100644 --- a/ceph/ceph/files/ceph.sh +++ b/ceph/ceph/files/ceph.sh @@ -1,4 +1,8 @@ #!/bin/bash +# +# Copyright (c) 2023 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 INITDIR=/etc/init.d LOGFILE=/var/log/ceph/ceph-init.log @@ -22,20 +26,17 @@ logecho () start () { - SERVICES="" - if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" == "duplex" ]]; then - # In an AIO-DX configuration SM manages the floating MON and OSDs. Here - # we defer starting OSDs directly via the init script to allow SM to - # start them at the appropriate time. This will eliminate a race between - # MTC and SM starting OSDs simultaneously. Continue to start MON/MDS - # service here so that MDS is operational after the monitor is up. - SERVICES="mon mds" + if [[ "$system_type" != "All-in-one" ]] || [[ "$system_mode" != "duplex" ]]; then + logecho "Starting ceph services..." + ${INITDIR}/ceph start >> ${LOGFILE} 2>&1 + RC=$? + else + # In an AIO-DX configuration SM manages the floating MON and OSDs and pmon manages + # the ceph-mds process. Here we defer starting all ceph process to allow SM and pmon + # to start them at the appropriate time. + RC=0 fi - logecho "Starting ceph ${SERVICES} services..." - ${INITDIR}/ceph start ${SERVICES} >> ${LOGFILE} 2>&1 - RC=$? - if [ ! -f ${CEPH_FILE} ]; then touch ${CEPH_FILE} fi