Merge "AIO-DX Ceph Optimizations"

This commit is contained in:
Zuul 2023-03-09 14:44:23 +00:00 committed by Gerrit Code Review
commit e49a70fd01
6 changed files with 271 additions and 38 deletions

View File

@ -612,8 +612,10 @@ stop_daemon() {
## command line options
options=
IFS=" " read -r -a args <<< "$@"
wlog "-" INFO "$@"
OPTS=$(${GETOPT} -n 'init-ceph' -o 'hvam:c:' -l 'help,verbose,valgrind,novalgrind,allhosts,restart,norestart,btrfs,nobtrfs,fsmount,nofsmount,btrfsumount,fsumount,conf:,cluster:,hostname:' -- "$@")
OPTS=$(${GETOPT} -n 'init-ceph' -o 'hvam:c:' -l 'help,verbose,valgrind,novalgrind,allhosts,restart,norestart,btrfs,nobtrfs,fsmount,nofsmount,btrfsumount,fsumount,conf:,cluster:,hostname:' -- "${args[@]}")
if [ $? != 0 ]
then
exit 1
@ -735,11 +737,51 @@ if [ "$command" = "stop" -o "$command" = "onestop" ]; then
what="$new_order"
fi
# Check if the monitors are up before starting any mds
# This is needed only for Standard deployments
. /etc/platform/platform.conf
# When this is a AIO-DX pmon is monitoring ceph-mds process.
# If ceph-mon is not running, ceph-mds will hang when starting.
# Check if we are trying to bring up ceph-mds and ceph-mon is not ready yet
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
if [ "${command}" = "start" -o "${command}" = "onestart" ]; then
what_out=
what_mds=
re="\s*mon"
if [[ ${what} =~ ${re} ]]; then
has_mon=1
else
has_mon=0
CEPH_STATUS=''
execute_ceph_cmd CEPH_STATUS "ceph status" "ceph -s"
if [ $? -eq 0 ]; then
has_mon=1
fi
fi
for name in ${what}; do
type=$(echo "${name}" | cut -c 1-3)
if [ "${type}" == "mds" ]; then
what_mds="${name}"
continue
fi
what_out+=" ${name}"
done
if [ ${has_mon} -eq 1 ] && [ ! -z "${what_mds}" ]; then
what_out+=" ${what_mds}"
fi
what="${what_out}"
fi
# If the variable 'what' is empty, then it was trying to bring up ceph-mds but ceph-mon is not active.
# When ceph-mon is not active, we cannot execute ceph-mds yet, thus returning error.
if [ -z "${what}" ]; then
EXIT_STATUS=1
fi
fi
# Check if the monitors are up before starting any mds
# This is needed only for Standard deployments
if [ "$system_type" == "Standard" ]; then
CEPH_STATUS=''
execute_ceph_cmd CEPH_STATUS "ceph status" "ceph -s"
@ -999,8 +1041,15 @@ EOF
[ -n "$post_stop" ] && do_cmd "$post_stop"
[ -n "$lockfile" ] && [ "$?" -eq 0 ] && rm -f $lockfile
# flush journal to data disk in background
if [ "$type" = "osd" ];then
$(/usr/bin/ceph-osd -i $id --flush-journal) &
if [ "${type}" = "osd" ];then
CMD_OUTPUT=''
execute_ceph_cmd CMD_OUTPUT "Ceph Status" "ceph -s"
if [ $? == 0 ]; then
wlog "${name}" "INFO" "Flushing journal"
$(/usr/bin/ceph-osd -i $id --flush-journal) &
else
wlog "${name}" "INFO" "Skipping journal flush"
fi
fi
wlog $name "INFO" "Process stopped, setting state to $ST_STOPPED"
save_proc_state $name $ST_STOPPED

View File

@ -26,6 +26,7 @@ etc/init.d/ceph
etc/init.d/mgr-restful-plugin
etc/init.d/ceph-init-wrapper
etc/ceph/ceph.conf.pmon
etc/ceph/ceph-mds.conf.pmon
etc/ceph/ceph.conf
etc/services.d/*
usr/sbin/ceph-preshutdown.sh

View File

@ -6,13 +6,15 @@
SOURCE1 := ceph.sh
SOURCE2 := mgr-restful-plugin.py
SOURCE3 := ceph.conf.pmon
SOURCE4 := ceph-init-wrapper.sh
SOURCE5 := ceph.conf
SOURCE6 := ceph-manage-journal.py
SOURCE7 := ceph.service
SOURCE8 := mgr-restful-plugin.service
SOURCE9 := ceph-preshutdown.sh
SOURCE10 := starlingx-docker-override.conf
SOURCE4 := ceph-mds.conf.pmon
SOURCE5 := ceph-init-wrapper.sh
SOURCE6 := ceph.conf
SOURCE7 := ceph-manage-journal.py
SOURCE8 := ceph.service
SOURCE9 := mgr-restful-plugin.service
SOURCE10 := ceph-preshutdown.sh
SOURCE11 := starlingx-docker-override.conf
# Paths
export DESTDIR = $(CURDIR)/debian/tmp
@ -188,13 +190,14 @@ override_dh_auto_install:
install -D -m 750 ${SOURCE1} $(DESTDIR)/${SYSCONFDIR}/services.d/worker/
install -D -m 750 ${SOURCE2} $(DESTDIR)/${INITDIR}/mgr-restful-plugin
install -D -m 750 ${SOURCE3} $(DESTDIR)/${SYSCONFDIR}/ceph/
install -D -m 750 ${SOURCE4} $(DESTDIR)/${INITDIR}/ceph-init-wrapper
install -D -m 640 ${SOURCE5} $(DESTDIR)/${SYSCONFDIR}/ceph/
install -D -m 700 ${SOURCE6} $(DESTDIR)/${SBINDIR}/ceph-manage-journal
install -D -m 644 ${SOURCE7} $(DESTDIR)/${UNITDIR}/ceph.service
install -D -m 644 ${SOURCE8} $(DESTDIR)/${UNITDIR}/mgr-restful-plugin.service
install -D -m 700 ${SOURCE9} $(DESTDIR)/${SBINDIR}/ceph-preshutdown.sh
install -D -m 644 ${SOURCE10} $(DESTDIR)/${UNITDIR}/docker.service.d/starlingx-docker-override.conf
install -D -m 750 ${SOURCE4} $(DESTDIR)/${SYSCONFDIR}/ceph/
install -D -m 750 ${SOURCE5} $(DESTDIR)/${INITDIR}/ceph-init-wrapper
install -D -m 640 ${SOURCE6} $(DESTDIR)/${SYSCONFDIR}/ceph/
install -D -m 700 ${SOURCE7} $(DESTDIR)/${SBINDIR}/ceph-manage-journal
install -D -m 644 ${SOURCE8} $(DESTDIR)/${UNITDIR}/ceph.service
install -D -m 644 ${SOURCE9} $(DESTDIR)/${UNITDIR}/mgr-restful-plugin.service
install -D -m 700 ${SOURCE10} $(DESTDIR)/${SBINDIR}/ceph-preshutdown.sh
install -D -m 644 ${SOURCE11} $(DESTDIR)/${UNITDIR}/docker.service.d/starlingx-docker-override.conf
install -m 750 src/init-radosgw $(DESTDIR)/${INITDIR}/ceph-radosgw
sed -i '/### END INIT INFO/a SYSTEMCTL_SKIP_REDIRECT=1' $(DESTDIR)/${INITDIR}/ceph-radosgw
install -m 750 src/init-rbdmap $(DESTDIR)/${INITDIR}/rbdmap
@ -275,6 +278,7 @@ override_dh_fixperms:
-Xceph.sh \
-Xmgr-restful-plugin \
-Xceph.conf.pmon \
-Xceph-mds.conf.pmon \
-Xceph-init-wrapper \
-Xceph.conf \
-Xceph-manage-journal \

View File

@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright (c) 2019 Wind River Systems, Inc.
# Copyright (c) 2019-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -14,8 +14,8 @@
# "/var/run/.ceph_started" when ceph is running and remove it when
# is not.
#
# The script also extracts one or more ceph process names that are
# reported as 'not running' or 'dead' or 'failed' by '/etc/intit.d/ceph status'
# The script also extracts one or more ceph process names that are
# reported as 'not running' or 'dead' or 'failed' by '/etc/init.d/ceph status'
# and writes the names to a text file: /tmp/ceph_status_failure.txt for
# pmond to access. The pmond adds the text to logs and alarms. Example of text
# samples written to file by this script are:
@ -24,7 +24,7 @@
# 'mon.storage-0'
# 'mon.storage-0, osd.2'
#
# Moreover, for processes that are reported as 'hung' by '/etc/intit.d/ceph status'
# Moreover, for processes that are reported as 'hung' by '/etc/init.d/ceph status'
# the script will try increase their logging to 'debug' for a configurable interval.
# With logging increased it will outputs a few stack traces then, at the end of this
# interval, it dumps its stack core and kills it.
@ -43,6 +43,14 @@ CEPH_GET_MON_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_mon_status"
CEPH_GET_OSD_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_osd_status"
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
# For All-in-one duplex, set some variables
if [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
CEPH_MON_LIB_PATH=/var/lib/ceph/mon
CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_0"
CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_controller_1"
CEPH_LAST_ACTIVE_CONTROLLER_FLAG="${CEPH_MON_LIB_PATH}/.last_ceph_mon_active_${HOSTNAME/-/_}"
fi
BINDIR=/usr/bin
SBINDIR=/usr/sbin
if grep -q "Debian" /etc/os-release; then
@ -85,6 +93,114 @@ if [ ! -z $ARGS ]; then
args+=("${new_args[@]}")
fi
# Verify if drbd-cephmon is in sync, checking the output of 'drbdadm dstate'
# Return 0 on success and 1 if drbd-cephmon is not ready
is_drbd_cephmon_in_sync ()
{
local DRBD_CEPHMON_STATUS=$(drbdadm dstate drbd-cephmon)
wlog "-" INFO "drbd-cephmon status: ${DRBD_CEPHMON_STATUS}"
if [ "${DRBD_CEPHMON_STATUS}" == "UpToDate/UpToDate" ]; then
return 0
fi
return 1
}
# Verify if drbd-cephmon role is primary, checking the output of 'drbdadm role'
# Return 0 on success and 1 if drbd-cephmon is not primary
is_drbd_cephmon_primary ()
{
drbdadm role drbd-cephmon | grep -q 'Primary/'
if [ $? -eq 0 ]; then
wlog "-" INFO "drbd-cephmon role is Primary"
return 0
fi
wlog "-" INFO "drbd-cephmon role is NOT Primary"
return 1
}
# Verify if drbd-cephmon partition is mounted.
# Return 0 on success and 1 if drbd-cephmon partition is not mounted
is_drbd_cephmon_mounted ()
{
findmnt -no SOURCE "${CEPH_MON_LIB_PATH}" | grep -q drbd
if [ $? -eq 0 ]; then
wlog "-" INFO "drbd-cephmon partition is mounted"
return 0
fi
wlog "-" INFO "drbd-cephmon partition is NOT mounted"
return 1
}
# Verify if ceph mon can be started on AIO-DX configuration.
# This function must be called only on AIO-DX.
# Return 0 on success and 1 if ceph mon cannot be started
can_start_ceph_mon ()
{
local times=""
# Verify if drbd-cephmon has role Primary
# Retries 10 times, 1 second interval
for times in {9..0}; do
is_drbd_cephmon_primary
if [ $? -eq 0 ]; then
times=-1
break;
fi
sleep 1
done
if [ ${times} -eq 0 ]; then
wlog "-" ERROR "drbd-cephmon is not primary, cannot start ceph mon"
return 1
fi
# Check if drbd-cephmon partition is mounted
# Retries 10 times, 1 second interval
for times in {9..0}; do
is_drbd_cephmon_mounted
if [ $? -eq 0 ]; then
times=-1
break;
fi
sleep 1
done
if [ ${times} -eq 0 ]; then
wlog "-" ERROR "drbd-cephmon is not mounted, cannot start ceph mon"
return 1
fi
# Ceph mon was last active in this controller. Can run safely.
if [ -f "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}" ]; then
return 0
fi
# Check if last active ceph-mon was in another controller
if [ "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}" == "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}" ]; then
local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}"
else
local CEPH_OTHER_ACTIVE_CONTROLLER_FLAG="${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
fi
if [ -f "${CEPH_OTHER_ACTIVE_CONTROLLER_FLAG}" ]; then
# Verify drbd-cephmon status
for times in {9..0}; do
is_drbd_cephmon_in_sync
if [ $? -eq 0 ]; then
# drbd-cephmon is in sync, it is safe to run.
return 0
fi
sleep 1
done
# drbd-cephmon is not in sync, it is not safe to run
wlog "-" ERROR "drbd-cephmon is not in sync, cannot start ceph mon"
return 1
fi
# This is safe to run ceph mon
return 0
}
with_service_lock ()
{
local target="$1"; shift
@ -133,9 +249,45 @@ start ()
# Ceph is not running on this node, return success
exit 0
fi
wlog "-" INFO "Ceph START $1 command received"
with_service_lock "$1" ${CEPH_SCRIPT} start $1
wlog "-" INFO "Ceph START $1 command finished."
local service="$1"
# For AIO-DX, the mon service has special treatment
if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
# After the first controller unlock, ceph-mon is started by
# puppet-ceph module via sysvinit using /etc/init.d/ceph directly.
# Setting the controller-0 flag to the default prevents
# another controller from starting before any host-swact.
if [ ! -e "${CEPH_MON_LIB_PATH}"/.last_ceph_mon_active_controller_* ]; then
touch "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
fi
# NOTE: In case of uncontrolled swact, to force start ceph-mon service
# it will be needed to rename the flag to the desired controller.
can_start_ceph_mon
if [ $? -ne 0 ]; then
wlog "-" ERROR "Ceph mon cannot be started now."
exit 1
fi
fi
# Start the service
wlog "-" INFO "Ceph START ${service} command received"
with_service_lock "${service}" ${CEPH_SCRIPT} start ${service}
wlog "-" INFO "Ceph START ${service} command finished."
# For AIO-DX, the mon service has special treatment
if [ "${service}" == "mon" ] && [ "${system_type}" == "All-in-one" ] && [ "${system_mode}" == "duplex" ]; then
# If ceph-mon is successfully running, clear old flags and set the new one
# RC global variable is set by the with_service_lock function trying to start ceph-mon
if [ ${RC} -eq 0 ]; then
# Remove old flags
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_0_FLAG}"
rm -f "${CEPH_LAST_ACTIVE_CONTROLLER_1_FLAG}"
# Create new flag
touch "${CEPH_LAST_ACTIVE_CONTROLLER_FLAG}"
fi
fi
}
stop ()

View File

@ -0,0 +1,26 @@
[process]
process = ceph-mds
script = /etc/init.d/ceph
style = lsb
severity = major ; minor, major, critical
restarts = 5 ; restart retries before error assertion
interval = 30 ; number of seconds to wait between restarts
mode = status ; Monitoring mode: passive (default) or active
; passive: process death monitoring (default: always)
; active : heartbeat monitoring, i.e. request / response messaging
; status : determine process health with executing "status" command
; "start" is used to start the process(es) again
; ignore : do not monitor or stop monitoring
; Status and Active Monitoring Options
period = 30 ; monitor period in seconds
timeout = 120 ; for active mode, messaging timeout period in seconds, must be shorter than period
; for status mode, max amount of time for a command to execute
; Status Monitoring Options
start_arg = start mds ; start argument for the script
status_arg = status mds ; status argument for the script
status_failure_text = /tmp/ceph_status_failure.txt ; text to be added to alarms or logs, this is optional

View File

@ -1,4 +1,8 @@
#!/bin/bash
#
# Copyright (c) 2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
INITDIR=/etc/init.d
LOGFILE=/var/log/ceph/ceph-init.log
@ -22,20 +26,17 @@ logecho ()
start ()
{
SERVICES=""
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" == "duplex" ]]; then
# In an AIO-DX configuration SM manages the floating MON and OSDs. Here
# we defer starting OSDs directly via the init script to allow SM to
# start them at the appropriate time. This will eliminate a race between
# MTC and SM starting OSDs simultaneously. Continue to start MON/MDS
# service here so that MDS is operational after the monitor is up.
SERVICES="mon mds"
if [[ "$system_type" != "All-in-one" ]] || [[ "$system_mode" != "duplex" ]]; then
logecho "Starting ceph services..."
${INITDIR}/ceph start >> ${LOGFILE} 2>&1
RC=$?
else
# In an AIO-DX configuration SM manages the floating MON and OSDs and pmon manages
# the ceph-mds process. Here we defer starting all ceph process to allow SM and pmon
# to start them at the appropriate time.
RC=0
fi
logecho "Starting ceph ${SERVICES} services..."
${INITDIR}/ceph start ${SERVICES} >> ${LOGFILE} 2>&1
RC=$?
if [ ! -f ${CEPH_FILE} ]; then
touch ${CEPH_FILE}
fi