Mtce: Enable offline handler during Graceful recovery

Story: 2002882
Task: 22845

Change-Id: Ie5e43a0fe150d277514ef75b9e4c9461951efc26
Signed-off-by: Jack Ding <jack.ding@windriver.com>
This commit is contained in:
Eric MacDonald 2018-06-08 10:44:14 -04:00 committed by Jack Ding
parent 76fbef1d01
commit 23d9dd711c
3 changed files with 18 additions and 12 deletions

View File

@ -540,6 +540,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
ptr->reboot_cmd_ack_mgmnt = false ; ptr->reboot_cmd_ack_mgmnt = false ;
ptr->reboot_cmd_ack_infra = false ; ptr->reboot_cmd_ack_infra = false ;
ptr->offline_log_throttle = 0 ;
ptr->offline_log_reported = true ; ptr->offline_log_reported = true ;
ptr->online_log_reported = false ; ptr->online_log_reported = false ;
@ -2448,6 +2449,7 @@ void nodeLinkClass::start_offline_handler ( struct nodeLinkClass::node * node_pt
operState_enum_to_str(node_ptr->operState).c_str(), operState_enum_to_str(node_ptr->operState).c_str(),
availStatus_enum_to_str(node_ptr->availStatus).c_str(), availStatus_enum_to_str(node_ptr->availStatus).c_str(),
offlineStage_saved); offlineStage_saved);
node_ptr->offline_log_throttle = 0;
} }
void nodeLinkClass::stop_offline_handler ( struct nodeLinkClass::node * node_ptr ) void nodeLinkClass::stop_offline_handler ( struct nodeLinkClass::node * node_ptr )
@ -2462,6 +2464,7 @@ void nodeLinkClass::stop_offline_handler ( struct nodeLinkClass::node * node_ptr
node_ptr->offlineStage); node_ptr->offlineStage);
node_ptr->offlineStage = MTC_OFFLINE__IDLE ; node_ptr->offlineStage = MTC_OFFLINE__IDLE ;
} }
node_ptr->offline_log_throttle = 0;
} }
string nodeLinkClass::get_host ( string uuid ) string nodeLinkClass::get_host ( string uuid )

View File

@ -315,8 +315,8 @@ private:
bool mtcAlive_online ; /* this is consumed by online and offline handler */ bool mtcAlive_online ; /* this is consumed by online and offline handler */
bool mtcAlive_offline ; /* this is consumed by reset progression handler */ bool mtcAlive_offline ; /* this is consumed by reset progression handler */
int offline_search_count ; /* count back-2-back mtcAlive request misses */ int offline_search_count ; /* count back-2-back mtcAlive request misses */
int offline_log_throttle ; /* throttle offline handler logs */
bool offline_log_reported ; /* prevents offline/online log flooding when */ bool offline_log_reported ; /* prevents offline/online log flooding when */
bool online_log_reported ; /* availStatus switches between these states */ bool online_log_reported ; /* availStatus switches between these states */
/* and failed */ /* and failed */

View File

@ -1605,6 +1605,9 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
* it to compare as a dicision point later on in recovery handling */ * it to compare as a dicision point later on in recovery handling */
node_ptr->uptime_save = node_ptr->uptime ; node_ptr->uptime_save = node_ptr->uptime ;
/* send mtcAlive requests */
start_offline_handler ( node_ptr );
/* A host is considered failed if it goes away for more /* A host is considered failed if it goes away for more
* than a Loss Of Communication Recovery Timeout specified as mtc.ini * than a Loss Of Communication Recovery Timeout specified as mtc.ini
* configuration option 'loc_recovery_timeout' time in seconds. */ * configuration option 'loc_recovery_timeout' time in seconds. */
@ -1631,6 +1634,8 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->hostname.c_str(), node_ptr->hostname.c_str(),
node_ptr->was_dor_recovery_mode ? " (DOR)" : "" ); node_ptr->was_dor_recovery_mode ? " (DOR)" : "" );
stop_offline_handler ( node_ptr );
/* Check to see if the host is/got configured correctly */ /* Check to see if the host is/got configured correctly */
if ( (node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED) == 0 ) if ( (node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED) == 0 )
{ {
@ -1737,6 +1742,8 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
/* A timer ring indicates that the host is not up */ /* A timer ring indicates that the host is not up */
else if ( node_ptr->mtcTimer.ring == true ) else if ( node_ptr->mtcTimer.ring == true )
{ {
stop_offline_handler ( node_ptr );
/* So now this means the node is failed /* So now this means the node is failed
* we need to stop services and transition into * we need to stop services and transition into
* a longer 'waiting' for the asynchronous mtcAlive * a longer 'waiting' for the asynchronous mtcAlive
@ -3107,7 +3114,7 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
{ {
node_ptr->mtcAlive_mgmnt = false ; node_ptr->mtcAlive_mgmnt = false ;
node_ptr->mtcAlive_infra = false ; node_ptr->mtcAlive_infra = false ;
node_ptr->offline_log_throttle = 0 ;
node_ptr->offline_search_count = 0 ; node_ptr->offline_search_count = 0 ;
mtcTimer_reset ( node_ptr->offline_timer ); mtcTimer_reset ( node_ptr->offline_timer );
@ -3156,11 +3163,6 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
plog ("%s offline (external)\n", node_ptr->hostname.c_str()); plog ("%s offline (external)\n", node_ptr->hostname.c_str());
node_ptr->offlineStage = MTC_OFFLINE__IDLE ; node_ptr->offlineStage = MTC_OFFLINE__IDLE ;
} }
else if ( node_ptr->operState == MTC_OPER_STATE__ENABLED )
{
slog ("%s offline search while 'enabled' ; invalid\n", node_ptr->hostname.c_str());
node_ptr->offlineStage = MTC_OFFLINE__IDLE ;
}
else if ( !node_ptr->mtcAlive_mgmnt && !node_ptr->mtcAlive_infra ) else if ( !node_ptr->mtcAlive_mgmnt && !node_ptr->mtcAlive_infra )
{ {
if ( ++node_ptr->offline_search_count > offline_threshold ) if ( ++node_ptr->offline_search_count > offline_threshold )
@ -3192,10 +3194,11 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->mtcAlive_online = true ; node_ptr->mtcAlive_online = true ;
if ( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_infra ) if ( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_infra )
{ {
ilog ("%s still seeing mtcAlive (%c:%c)\n", ilog_throttled ( node_ptr->offline_log_throttle, 10,
node_ptr->hostname.c_str(), "%s still seeing mtcAlive (%c:%c)\n",
node_ptr->mtcAlive_mgmnt ? 'Y' : 'n', node_ptr->hostname.c_str(),
node_ptr->mtcAlive_infra ? 'Y' : 'n'); node_ptr->mtcAlive_mgmnt ? 'Y' : 'n',
node_ptr->mtcAlive_infra ? 'Y' : 'n');
} }
else else
{ {