diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeCmds.h b/mtce-common/cgts-mtce-common-1.0/common/nodeCmds.h index 47ff9fdb..7944e61e 100644 --- a/mtce-common/cgts-mtce-common-1.0/common/nodeCmds.h +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeCmds.h @@ -56,6 +56,7 @@ typedef enum MTC_CMD_STAGE__HOST_SERVICES_SEND_CMD, MTC_CMD_STAGE__HOST_SERVICES_RECV_ACK, MTC_CMD_STAGE__HOST_SERVICES_WAIT_FOR_RESULT, + MTC_CMD_STAGE__STORAGE_LOCK_DELAY, /* Common command done stage */ MTC_CMD_STAGE__DONE, diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.h b/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.h index 154ffc7d..7371c3bd 100755 --- a/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.h +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeTimers.h @@ -88,6 +88,7 @@ #define MTC_MIN_ONLINE_PERIOD_SECS (7) #define MTC_RETRY_WAIT (5) #define MTC_AGENT_TIMEOUT_EXTENSION (5) +#define MTC_LOCK_CEPH_DELAY (90) /** Host must stay enabled for this long for the * failed_recovery_counter to get cleared */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCmdHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCmdHdlr.cpp index 98c9d9de..1277b131 100644 --- a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCmdHdlr.cpp +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCmdHdlr.cpp @@ -231,6 +231,40 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) dlog ("%s %s request ack (legacy mode)\n", node_ptr->hostname.c_str(), node_ptr->host_services_req.name.c_str()); + + // Upgrades that lock storage nodes can + // lead to storage corruption if ceph isn't given + // enough time to shut down. + // + // The following special case for storage node + // lock forces a 90 sec holdoff for pre-upgrade storage + // hosts ; i.e. legacy mode. + // + if ( is_storage(node_ptr) ) + { + ilog ("%s waiting for ceph OSD shutdown\n", node_ptr->hostname.c_str()); + mtcTimer_reset ( node_ptr->mtcCmd_timer ); + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_LOCK_CEPH_DELAY ); + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__STORAGE_LOCK_DELAY ; + } + else + { + node_ptr->mtcCmd_work_fifo_ptr->status = + node_ptr->host_services_req.status = PASS ; + + node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; + } + } + break ; + } + case MTC_CMD_STAGE__STORAGE_LOCK_DELAY: + { + /* wait for the timer to expire before moving on */ + if ( mtcTimer_expired ( node_ptr->mtcCmd_timer ) ) + { + ilog ("%s ceph OSD shutdown wait complete\n", + node_ptr->hostname.c_str()); + node_ptr->mtcCmd_work_fifo_ptr->status = node_ptr->host_services_req.status = PASS ;