Add 90s delay before locking storage node for upgrade

Adds support to the mtcAgent for detecting the absence of the 'host
services execution enhancement feature' in the mtcClient and implements
the pre-upgrade implementation in that case. When mtcAgent tries to lock
a storage node running pre-upgrade verison it will implement a 90s
lock wait before proceeding to declare that storage host as
locked-disabled.

Story: 2002886
Task: 22847
Change-Id: I99fb5576e027621019adb5eff553d52773f608db
Signed-off-by: Jack Ding <jack.ding@windriver.com>
This commit is contained in:
Eric MacDonald 2018-05-02 10:31:29 -05:00 committed by Jack Ding
parent 51d572ceed
commit 7be3b9085a
3 changed files with 36 additions and 0 deletions

View File

@ -56,6 +56,7 @@ typedef enum
MTC_CMD_STAGE__HOST_SERVICES_SEND_CMD,
MTC_CMD_STAGE__HOST_SERVICES_RECV_ACK,
MTC_CMD_STAGE__HOST_SERVICES_WAIT_FOR_RESULT,
MTC_CMD_STAGE__STORAGE_LOCK_DELAY,
/* Common command done stage */
MTC_CMD_STAGE__DONE,

View File

@ -88,6 +88,7 @@
#define MTC_MIN_ONLINE_PERIOD_SECS (7)
#define MTC_RETRY_WAIT (5)
#define MTC_AGENT_TIMEOUT_EXTENSION (5)
#define MTC_LOCK_CEPH_DELAY (90)
/** Host must stay enabled for this long for the
* failed_recovery_counter to get cleared */

View File

@ -231,6 +231,40 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
dlog ("%s %s request ack (legacy mode)\n",
node_ptr->hostname.c_str(),
node_ptr->host_services_req.name.c_str());
// Upgrades that lock storage nodes can
// lead to storage corruption if ceph isn't given
// enough time to shut down.
//
// The following special case for storage node
// lock forces a 90 sec holdoff for pre-upgrade storage
// hosts ; i.e. legacy mode.
//
if ( is_storage(node_ptr) )
{
ilog ("%s waiting for ceph OSD shutdown\n", node_ptr->hostname.c_str());
mtcTimer_reset ( node_ptr->mtcCmd_timer );
mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_LOCK_CEPH_DELAY );
node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__STORAGE_LOCK_DELAY ;
}
else
{
node_ptr->mtcCmd_work_fifo_ptr->status =
node_ptr->host_services_req.status = PASS ;
node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ;
}
}
break ;
}
case MTC_CMD_STAGE__STORAGE_LOCK_DELAY:
{
/* wait for the timer to expire before moving on */
if ( mtcTimer_expired ( node_ptr->mtcCmd_timer ) )
{
ilog ("%s ceph OSD shutdown wait complete\n",
node_ptr->hostname.c_str());
node_ptr->mtcCmd_work_fifo_ptr->status =
node_ptr->host_services_req.status = PASS ;