Add mtce support for manifest apply over initial controller unlock

The introduction of Ansible requires the execution of a manifest
as part of the first controller's initial unlock.

Unfortunately maintenance issues the lazy self reboot immediately
upon receiving the unlock command, interrupting the in-progress
manifest apply.

This update identifies the initial self reboot of the only
provisioned host condition and waits for up to a timeout
period for an unlock ready signal that is provided by
successful completion of the 'initial-unlock-manfest'.

Seeing the unlock ready signal prior to the timeout allows
the unlock self reboot to proceed normally.

Depends-On:https://review.openstack.org/#/c/643914
Story:2004695
Task:30243

Test Plan:
PASS: Verify timeout handling - allowing retry
PASS: Verify with signal - immediate
PASS: Verify with signal - before timeout

Change-Id: I3633e772310c36af5df57364f66c14f037b2ea8f
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2019-03-29 09:06:27 -04:00
parent f10b9a5170
commit 543a89eaf6
3 changed files with 43 additions and 0 deletions

View File

@ -107,6 +107,7 @@ void daemon_exit ( void );
#define NODE_RESET_FILE ((const char *)"/var/run/.node_reset")
#define SMGMT_DEGRADED_FILE ((const char *)"/var/run/.sm_degraded")
#define SMGMT_UNHEALTHY_FILE ((const char *)"/var/run/.sm_node_unhealthy")
#define UNLOCK_READY_FILE ((const char *)"/etc/platform/.unlock_ready")
/** path to and module init file name */
#define MTCE_CONF_FILE ((const char *)"/etc/mtc.conf")
@ -284,6 +285,7 @@ void daemon_exit ( void );
#define MTC_TASK_SELF_UNLOCK_MSG "Unlocking active controller, please stand-by while it reboots"
#define MTC_TASK_FAILED_SWACT_REQ "Critical failure.Requesting SWACT to enabled standby controller"
#define MTC_TASK_FAILED_NO_BACKUP "Critical failure.Please provision/enable standby controller"
#define MTC_TASK_MANIFEST_APPLY "Applying manifest, please stand-by ..."
#define COMMAND_RETRY_DELAY (8) /* from sshUtil.h */
#define COMMAND_DELAY (2) /* from sshUtil.h */

View File

@ -1414,6 +1414,9 @@ public:
/** Number of provisioned hosts (nodes) */
int hosts ;
/* Set to True while waiting for UNLOCK_READY_FILE in simplex mode */
bool unlock_ready_wait ;
/** Host has been deleted */
bool host_deleted ;

View File

@ -474,6 +474,44 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
else
aio = false ;
if (( this->hosts == 1 ) &&
( daemon_is_file_present (PLATFORM_SIMPLEX_MODE) == true ))
{
/* Check for first pass through case where we need to
* start the timer */
if ( this->unlock_ready_wait == false )
{
if ( daemon_is_file_present(UNLOCK_READY_FILE) == false )
{
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_5 );
mtcInvApi_update_task_now ( node_ptr, MTC_TASK_MANIFEST_APPLY );
this->unlock_ready_wait = true ;
return (PASS);
}
}
else
{
if ( daemon_is_file_present(UNLOCK_READY_FILE) == true )
{
mtcTimer_reset(node_ptr->mtcTimer);
/* fall through to proceed with self reboot */
}
else if ( node_ptr->mtcTimer.ring == true )
{
this->unlock_ready_wait = false ;
mtcInvApi_update_task_now ( node_ptr, "Manifest apply timeout ; Unlock to retry" );
mtcInvApi_update_states_now ( node_ptr, "locked", "disabled" , "online", "disabled", "offline" );
adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
return (PASS);
}
else
{
/* wait longer */
return (RETRY);
}
}
}
mtcInvApi_update_states_now ( node_ptr, "unlocked", "disabled" , "offline", "disabled", "offline" );
mtcInvApi_update_task_now ( node_ptr, aio ? MTC_TASK_CPE_SX_UNLOCK_MSG : MTC_TASK_SELF_UNLOCK_MSG );