From 23d9dd711c1d0cf4be65b90d85d559489a3969fb Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Fri, 8 Jun 2018 10:44:14 -0400 Subject: [PATCH] Mtce: Enable offline handler during Graceful recovery Story: 2002882 Task: 22845 Change-Id: Ie5e43a0fe150d277514ef75b9e4c9461951efc26 Signed-off-by: Jack Ding --- .../cgts-mtce-common-1.0/common/nodeClass.cpp | 3 +++ .../cgts-mtce-common-1.0/common/nodeClass.h | 4 ++-- .../maintenance/mtcNodeHdlrs.cpp | 23 +++++++++++-------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp index d096e81a..4352950e 100755 --- a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp @@ -540,6 +540,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) ptr->reboot_cmd_ack_mgmnt = false ; ptr->reboot_cmd_ack_infra = false ; + ptr->offline_log_throttle = 0 ; ptr->offline_log_reported = true ; ptr->online_log_reported = false ; @@ -2448,6 +2449,7 @@ void nodeLinkClass::start_offline_handler ( struct nodeLinkClass::node * node_pt operState_enum_to_str(node_ptr->operState).c_str(), availStatus_enum_to_str(node_ptr->availStatus).c_str(), offlineStage_saved); + node_ptr->offline_log_throttle = 0; } void nodeLinkClass::stop_offline_handler ( struct nodeLinkClass::node * node_ptr ) @@ -2462,6 +2464,7 @@ void nodeLinkClass::stop_offline_handler ( struct nodeLinkClass::node * node_ptr node_ptr->offlineStage); node_ptr->offlineStage = MTC_OFFLINE__IDLE ; } + node_ptr->offline_log_throttle = 0; } string nodeLinkClass::get_host ( string uuid ) diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h index 1e2c80f4..8940f39b 100755 --- a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h @@ -315,8 +315,8 @@ private: bool mtcAlive_online ; /* this is consumed by online and offline handler */ bool mtcAlive_offline ; /* this is consumed by reset progression handler */ - int offline_search_count ; /* count back-2-back mtcAlive request misses */ - + int offline_search_count ; /* count back-2-back mtcAlive request misses */ + int offline_log_throttle ; /* throttle offline handler logs */ bool offline_log_reported ; /* prevents offline/online log flooding when */ bool online_log_reported ; /* availStatus switches between these states */ /* and failed */ diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp index ffb6e44d..febfb164 100755 --- a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp @@ -1605,6 +1605,9 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) * it to compare as a dicision point later on in recovery handling */ node_ptr->uptime_save = node_ptr->uptime ; + /* send mtcAlive requests */ + start_offline_handler ( node_ptr ); + /* A host is considered failed if it goes away for more * than a Loss Of Communication Recovery Timeout specified as mtc.ini * configuration option 'loc_recovery_timeout' time in seconds. */ @@ -1631,6 +1634,8 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->hostname.c_str(), node_ptr->was_dor_recovery_mode ? " (DOR)" : "" ); + stop_offline_handler ( node_ptr ); + /* Check to see if the host is/got configured correctly */ if ( (node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED) == 0 ) { @@ -1737,6 +1742,8 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) /* A timer ring indicates that the host is not up */ else if ( node_ptr->mtcTimer.ring == true ) { + stop_offline_handler ( node_ptr ); + /* So now this means the node is failed * we need to stop services and transition into * a longer 'waiting' for the asynchronous mtcAlive @@ -3107,7 +3114,7 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr ) { node_ptr->mtcAlive_mgmnt = false ; node_ptr->mtcAlive_infra = false ; - + node_ptr->offline_log_throttle = 0 ; node_ptr->offline_search_count = 0 ; mtcTimer_reset ( node_ptr->offline_timer ); @@ -3156,11 +3163,6 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr ) plog ("%s offline (external)\n", node_ptr->hostname.c_str()); node_ptr->offlineStage = MTC_OFFLINE__IDLE ; } - else if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) - { - slog ("%s offline search while 'enabled' ; invalid\n", node_ptr->hostname.c_str()); - node_ptr->offlineStage = MTC_OFFLINE__IDLE ; - } else if ( !node_ptr->mtcAlive_mgmnt && !node_ptr->mtcAlive_infra ) { if ( ++node_ptr->offline_search_count > offline_threshold ) @@ -3192,10 +3194,11 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->mtcAlive_online = true ; if ( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_infra ) { - ilog ("%s still seeing mtcAlive (%c:%c)\n", - node_ptr->hostname.c_str(), - node_ptr->mtcAlive_mgmnt ? 'Y' : 'n', - node_ptr->mtcAlive_infra ? 'Y' : 'n'); + ilog_throttled ( node_ptr->offline_log_throttle, 10, + "%s still seeing mtcAlive (%c:%c)\n", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_mgmnt ? 'Y' : 'n', + node_ptr->mtcAlive_infra ? 'Y' : 'n'); } else {