From 484d662cb748747aea4c5137c340cc7ac316d21c Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Wed, 16 Dec 2020 21:16:48 -0500 Subject: [PATCH] Fix hbsAgent log flooding when SM heartbeat fails persistently If the SM part of this update is missing or the SM heartbeat is missing for a long period of time the hbsAgent produces 5 logs every 10 seconds reporting the missing SM heartbeat. This is a follow-up update to its parent update https://review.opendev.org/c/starlingx/metal/+/751558 This update throttles the warning log and corresponding cluster dump when SM heartbeat is persistently missing. PASS: Verify hbsAgent service and log behavior when SM heartbeat is persistently missing. Change-Id: Ib379ed5d37b5349ca170b5661a930b6a71c2bed1 Partial-Fix: 1895350 Signed-off-by: Eric MacDonald --- mtce/src/heartbeat/hbsAgent.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mtce/src/heartbeat/hbsAgent.cpp b/mtce/src/heartbeat/hbsAgent.cpp index 4eec5a29..c8cb0305 100644 --- a/mtce/src/heartbeat/hbsAgent.cpp +++ b/mtce/src/heartbeat/hbsAgent.cpp @@ -1521,6 +1521,7 @@ void hbs_sm_handler ( void ) * False if time delta is greater * ***************************************************************************/ +#define HUGE_NUMBER_B2B_SM_HEARTBEAT_MISSES (10000) bool manage_sm_heartbeat ( void ) { struct timespec ts ; @@ -1532,8 +1533,9 @@ bool manage_sm_heartbeat ( void ) if ( delta_in_ms > SM_HEARTBEAT_PULSE_PERIOD_MSECS ) { sm_heartbeat_count = 0; - if (( ++sm_heartbeat_count_b2b_misses < 20 )|| - (!( sm_heartbeat_count_b2b_misses % 100 ))) + if ((( ++sm_heartbeat_count_b2b_misses < 20 ) || + (!( sm_heartbeat_count_b2b_misses % 1000 ))) && + ( sm_heartbeat_count_b2b_misses < HUGE_NUMBER_B2B_SM_HEARTBEAT_MISSES )) { wlog("SM Heartbeat missing since %ld.%03ld secs ago ; HBS Period Misses:%3d ; Running HB Count:%4d", delta.secs, delta.msecs, @@ -2523,7 +2525,9 @@ void daemon_service_run ( void ) } } /* log cluster throttled */ - if (( heartbeat_ok == false ) && ( !( sm_heartbeat_count_b2b_misses % 100 ))) + if ((( heartbeat_ok == false ) && + ( !( sm_heartbeat_count_b2b_misses % 1000 ))) && + ( sm_heartbeat_count_b2b_misses < HUGE_NUMBER_B2B_SM_HEARTBEAT_MISSES )) { hbs_state_audit ( ); }