From ad20667beb19fa16927cefc3e31c8034e66e8679 Mon Sep 17 00:00:00 2001 From: Felipe Sanches Zanoni Date: Sat, 10 Jun 2023 09:41:48 -0300 Subject: [PATCH] Fix Ceph processes start race condition When nodes are unlocked, mtc will start ceph processes after a successful boot. It was also observed at the end of AIO-SX optimized restore playbook. This is causing racing condition with sm and pmon leading to failure in some scenarios. To avoid this, the script called by mtc will not start ceph processes anymore. It will only set the flag to enable ceph to run on the node and the processes will be started by sm or pmon later. Test Plan: For each installation setup do: - Fresh install and verify Ceph is running with HEALTH_OK status; - Swact controllers and verify Ceph has HEALTH_OK status; - Run DOR (Dead Office Recover) and verify Ceph has HEALTH_OK status; - Lock/Unlock Controllers/Storage nodes and check Ceph has HEALTH_OK status; - Reboot active Controller and check Ceph has HEALTH_OK status. PASS: AIO-SX PASS: AIO-DX PASS: Standard (2+2) PASS: Standard with dedicated Storage (2+2+2) PASS: B&R AIO-SX PASS: B&R Optimized AIO-SX Closes-bug: 2023445 Change-Id: I0c81749c6db1e17761aa8aca6276eff50f135959 Signed-off-by: Felipe Sanches Zanoni --- ceph/ceph/files/ceph.sh | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/ceph/ceph/files/ceph.sh b/ceph/ceph/files/ceph.sh index be72061e0..d9d1806e8 100644 --- a/ceph/ceph/files/ceph.sh +++ b/ceph/ceph/files/ceph.sh @@ -26,17 +26,10 @@ logecho () start () { - if [[ "$system_type" != "All-in-one" ]] || [[ "$system_mode" != "duplex" ]]; then - logecho "Starting ceph services..." - ${INITDIR}/ceph start >> ${LOGFILE} 2>&1 - RC=$? - else - # In an AIO-DX configuration SM manages the floating MON and OSDs and pmon manages - # the ceph-mds process. Here we defer starting all ceph process to allow SM and pmon - # to start them at the appropriate time. - RC=0 - fi - + # Defer ceph initialization to avoid race conditions. Let SM and Pmon to start the + # processes in the appropriate time. + # Set the flag to let ceph start later. + logecho "Setting flag to enable ceph processes to start." if [ ! -f ${CEPH_FILE} ]; then touch ${CEPH_FILE} fi @@ -55,7 +48,7 @@ stop () rm -f ${CEPH_FILE} fi - ${INITDIR}/ceph stop >> ${LOGFILE} 2>&1 + ${INITDIR}/ceph-init-wrapper stop >> ${LOGFILE} 2>&1 RC=$? }