diff --git a/mtce-common/src/common/msgClass.cpp b/mtce-common/src/common/msgClass.cpp index af284c25..2caf7185 100644 --- a/mtce-common/src/common/msgClass.cpp +++ b/mtce-common/src/common/msgClass.cpp @@ -1041,19 +1041,25 @@ int msgClassRx::initSocket(bool allow_any, bool is_multicast) * @param IP protocol * @param name of interface to bind to, or null otherwise */ -msgClassTx::msgClassTx(const char* address, int port, int proto, const char* interface) +msgClassTx::msgClassTx(const char* address, int port, int proto, const char* interface, bool quiet) { this->dst_addr = new msgClassAddr(address, port, proto); this->src_addr = new msgClassAddr(address, port, proto); if(interface) { - ilog ("Creating %s socket on port %d with address: %s\n", interface, port, address); + if ( !quiet) + { + ilog ("Creating %s socket on port %d with address: %s\n", interface, port, address); + } this->interface = new char[strlen(interface)+1]; snprintf(this->interface, strlen(interface)+1, "%s", interface); } else { - ilog ("Creating socket on port %d with address: %s\n", port, address); + if ( !quiet) + { + ilog ("Creating socket on port %d with address: %s\n", port, address); + } this->interface = NULL; } this->return_status = initSocket(); diff --git a/mtce-common/src/common/msgClass.h b/mtce-common/src/common/msgClass.h index cbeee29f..abcc88a7 100644 --- a/mtce-common/src/common/msgClass.h +++ b/mtce-common/src/common/msgClass.h @@ -207,7 +207,7 @@ private: class msgClassTx : public msgClassSock { public: - msgClassTx(const char* address, int port, int proto, const char* interface=NULL); + msgClassTx(const char* address, int port, int proto, const char* interface=NULL, bool quiet=false); msgClassTx(const msgClassAddr& addr, const char* interface=NULL); private: int initSocket(); diff --git a/mtce-common/src/common/pingUtil.cpp b/mtce-common/src/common/pingUtil.cpp index 01edd64d..308ebb02 100644 --- a/mtce-common/src/common/pingUtil.cpp +++ b/mtce-common/src/common/pingUtil.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017 Wind River Systems, Inc. + * Copyright (c) 2015-2017,2020 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -89,6 +89,7 @@ int pingUtil_init ( string hostname, ping_info_type & ping_info, const char * ip ping_info.sequence = getpid() ; ping_info.recv_retries = 0 ; ping_info.send_retries = 0 ; + ping_info.fail_debounce = 0 ; ping_info.requested= false ; ping_info.received = false ; ping_info.recv_flush_highwater=2; @@ -97,7 +98,7 @@ int pingUtil_init ( string hostname, ping_info_type & ping_info, const char * ip ping_info.monitoring = false ; ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ; - ping_info.sock = new msgClassTx(ip_address, 0, IPPROTO_RAW, NULL ); + ping_info.sock = new msgClassTx(ip_address, 0, IPPROTO_RAW, NULL, true); /* Validate the socket setup */ if ( ping_info.sock == NULL ) @@ -194,7 +195,6 @@ int pingUtil_send ( ping_info_type & ping_info ) } ping_info.sequence++ ; - ping_info.recv_retries = 0; if ( ping_info.ipv6_mode == false ) { @@ -238,8 +238,6 @@ int pingUtil_send ( ping_info_type & ping_info ) bytes = ping_info.sock->write( (const char*)&ping6_tx, sizeof(ping6_tx_message_type)); } - ping_info.recv_retries = 0; - if ( bytes <= 0 ) { wlog ("%s ping %s send failed (rc:%d) (%d:%m)\n", ping_info.hostname.c_str(), ping_info.ip.c_str(), bytes, errno ); @@ -508,10 +506,19 @@ int pingUtil_recv ( ping_info_type & ping_info, if ( ping_info.monitoring == false ) { /* ... only want the log when we are first connecting */ - ilog ("%s ping to %s ok ; (try %d)", + if ( ping_info.recv_retries ) + { + ilog ("%s ping %s ok ; (try %d)", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.recv_retries+1); + } + else + { + ilog ("%s ping %s ok", ping_info.hostname.c_str(), - ping_info.ip.c_str(), - ping_info.recv_retries+1); + ping_info.ip.c_str()); + } } else { @@ -575,6 +582,7 @@ void pingUtil_fini ( ping_info_type & ping_info ) ping_info.recv_retries = 0; ping_info.send_retries = 0; + ping_info.fail_debounce = 0; ping_info.sequence = 0; ping_info.identity = 0; @@ -585,10 +593,7 @@ void pingUtil_fini ( ping_info_type & ping_info ) void pingUtil_restart ( ping_info_type & ping_info ) { - ilog ("%s ping monitor restart", ping_info.hostname.c_str()); - ping_info.ok = false ; - ping_info.send_retries = 0 ; - ping_info.monitoring = false ; + dlog ("%s ping monitor restart", ping_info.hostname.c_str()); pingUtil_fini (ping_info); pingUtil_init (ping_info.hostname, ping_info, ping_info.ip.data()); @@ -668,7 +673,7 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info ) if ( ++ping_info.send_retries > PING_MAX_SEND_RETRIES ) { - elog ("%s ping to %s failed\n", + dlog ("%s ping %s failed ; max send retries\n", ping_info.hostname.c_str(), ping_info.ip.c_str()); @@ -676,7 +681,9 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info ) } else if ( pingUtil_send ( ping_info ) ) { - elog ("%s failed to send bmc ping\n", ping_info.hostname.c_str()); + wlog ("%s ping %s send failed\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str()); ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; } else @@ -694,6 +701,10 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info ) mtcTimer_reset ( ping_info.timer ); } mtcTimer_start_msec ( ping_info.timer, ping_info.timer_handler, PING_WAIT_TIMER_MSEC ); + + /* send was ok so clear its retry counter */ + ping_info.send_retries = 0 ; + ping_info.stage = PINGUTIL_MONITOR_STAGE__RECV ; } } @@ -711,25 +722,17 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info ) { if ( ++ping_info.recv_retries > (PING_MAX_RECV_RETRIES) ) { - /* only print this log once on the resend attempt */ - if ( ping_info.send_retries >= PING_MAX_SEND_RETRIES ) - { - mlog ("%s ping recv from %s missed ; identity:%04x sequence:%04x (try %d of %d)\n", - ping_info.hostname.c_str(), - ping_info.ip.c_str(), - ping_info.identity, - ping_info.sequence, - ping_info.recv_retries-1, - PING_MAX_RECV_RETRIES); - } - ping_info.stage = PINGUTIL_MONITOR_STAGE__SEND ; - break ; + ping_info.recv_retries = 0; + dlog ("%s ping %s failed ; max recv retries\n", + ping_info.hostname.c_str(), + ping_info.ip.c_str()); + ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; } else { - blog1 ("%s retrying ping\n", ping_info.hostname.c_str()); + blog1 ("%s retrying ping\n", ping_info.hostname.c_str()); + mtcTimer_start_msec ( ping_info.timer, ping_info.timer_handler, PING_RETRY_DELAY_MSECS ); } - mtcTimer_start_msec ( ping_info.timer, ping_info.timer_handler, PING_RETRY_DELAY_MSECS ); } else { @@ -746,13 +749,7 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info ) ping_info.send_retries = 0 ; ping_info.recv_retries = 0 ; - -#ifdef WANT_FIT_TESTING - if ( daemon_want_fit ( FIT_CODE__FAST_PING_AUDIT_HOST, ping_info.hostname ) == true ) - interval = 3 ; - if ( daemon_want_fit ( FIT_CODE__FAST_PING_AUDIT_ALL ) == true ) - interval = 3 ; -#endif + ping_info.fail_debounce = 0 ; mtcTimer_start ( ping_info.timer, ping_info.timer_handler, interval ); ping_info.stage = PINGUTIL_MONITOR_STAGE__WAIT ; } @@ -766,14 +763,30 @@ int pingUtil_acc_monitor ( ping_info_type & ping_info ) } case PINGUTIL_MONITOR_STAGE__FAIL: { - ping_info.ok = false ; - ping_info.send_retries = 0 ; - ping_info.monitoring = false ; - pingUtil_fini (ping_info); - pingUtil_init (ping_info.hostname, ping_info, ping_info.ip.data()); + // Assume socket reinit unless thresholding */ + bool reinit = true ; + if ( ping_info.ok == true ) + { + if ( ++ping_info.fail_debounce < PING_FAIL_DEBOUNCE_THLD ) + { + reinit = false ; + wlog("%s ping %s miss (%d of %d)", + ping_info.hostname.c_str(), + ping_info.ip.c_str(), + ping_info.fail_debounce, + PING_FAIL_DEBOUNCE_THLD ); + } + } + if ( reinit == true ) + { + elog("%s ping %s fail", ping_info.hostname.c_str(), + ping_info.ip.c_str()); + pingUtil_fini (ping_info); + pingUtil_init (ping_info.hostname, ping_info, ping_info.ip.data()); + } mtcTimer_reset ( ping_info.timer ); - mtcTimer_start ( ping_info.timer, ping_info.timer_handler, PING_MONITOR_INTERVAL ); + mtcTimer_start ( ping_info.timer, ping_info.timer_handler, PING_FAIL_RETRY_DELAY ); ping_info.stage = PINGUTIL_MONITOR_STAGE__WAIT; break ; } diff --git a/mtce-common/src/common/pingUtil.h b/mtce-common/src/common/pingUtil.h index 045ffb7e..7a9e4f22 100644 --- a/mtce-common/src/common/pingUtil.h +++ b/mtce-common/src/common/pingUtil.h @@ -2,7 +2,7 @@ #define __INCLUDE_PINGUTIL_H__ /* - * Copyright (c) 2015-2017 Wind River Systems, Inc. + * Copyright (c) 2015-2017,2020 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -47,8 +47,8 @@ using namespace std; #define PING_WAIT_TIMER_MSEC (200) #define PING_RETRY_DELAY_MSECS (200) #define PING_MONITOR_INTERVAL (60) -#define PING_MISS_RETRY_DELAY (5) - +#define PING_FAIL_RETRY_DELAY (30) +#define PING_FAIL_DEBOUNCE_THLD (3) #define PING_MESSAGE_LEN (80) typedef enum @@ -73,6 +73,7 @@ typedef struct unsigned short sequence ; int send_retries ; int recv_retries ; + int fail_debounce ; bool ipv6_mode ; bool received ; bool requested ; diff --git a/mtce/centos/build_srpm.data b/mtce/centos/build_srpm.data index 7bbb93a8..5bc6d613 100644 --- a/mtce/centos/build_srpm.data +++ b/mtce/centos/build_srpm.data @@ -1,3 +1,3 @@ SRC_DIR="src" -TIS_PATCH_VER=156 +TIS_PATCH_VER=157 BUILD_IS_SLOW=5 diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp index d3775089..ae43fe64 100755 --- a/mtce/src/common/nodeClass.cpp +++ b/mtce/src/common/nodeClass.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2016 Wind River Systems, Inc. + * Copyright (c) 2013-2020 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -4322,7 +4322,9 @@ void nodeLinkClass::bmc_access_data_init ( struct nodeLinkClass::node * node_ptr node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ; mtcTimer_reset ( node_ptr->bm_ping_info.timer ); node_ptr->bm_ping_info.timer_handler = &mtcTimer_handler ; + node_ptr->bm_ping_info.ip = node_ptr->bm_ip ; + node_ptr->bm_ping_info.hostname = node_ptr->hostname ; node_ptr->bmc_protocol_learning = false ; diff --git a/mtce/src/hwmon/hwmonClass.cpp b/mtce/src/hwmon/hwmonClass.cpp index ab131c4f..7ccef7b5 100644 --- a/mtce/src/hwmon/hwmonClass.cpp +++ b/mtce/src/hwmon/hwmonClass.cpp @@ -85,6 +85,9 @@ void hwmonHostClass::bmc_data_init ( struct hwmonHostClass::hwmon_host * host_pt host_ptr->accessible = false; host_ptr->degraded = false ; + host_ptr->ping_info.ip = host_ptr->bm_ip ; + host_ptr->ping_info.hostname = host_ptr->hostname ; + hwmon_del_groups ( host_ptr ); hwmon_del_sensors ( host_ptr ); diff --git a/mtce/src/hwmon/hwmonFsm.cpp b/mtce/src/hwmon/hwmonFsm.cpp index 0971a944..dcecacaa 100644 --- a/mtce/src/hwmon/hwmonFsm.cpp +++ b/mtce/src/hwmon/hwmonFsm.cpp @@ -96,25 +96,9 @@ void hwmonHostClass::hwmon_fsm ( void ) pingUtil_acc_monitor ( host_ptr->ping_info ); - if ( host_ptr->ping_info.ok == false ) - { - /* Auto correct key ping information ; should never occur but if it does ... */ - if (( host_ptr->ping_info.hostname.empty()) || - ( hostUtil_is_valid_ip_addr(host_ptr->ping_info.ip ) == false )) - { - slog ("%s host ping info missing ; (%d:%d)\n", - host_ptr->hostname.c_str(), - host_ptr->ping_info.hostname.empty(), - host_ptr->ping_info.ip.empty()); - - host_ptr->ping_info.hostname = host_ptr->hostname ; - host_ptr->ping_info.ip = host_ptr->bm_ip ; - } - } - /* Check to see if sensor monitoring for this host is * disabled or the bm password has not yet been learned */ - else if (( host_ptr->monitor == false ) || ( host_ptr->bm_pw.empty())) + if (( host_ptr->monitor == false ) || ( host_ptr->bm_pw.empty())) { /* ... make sure the thread sits in the * idle state while disabled or there diff --git a/mtce/src/hwmon/hwmonHdlr.cpp b/mtce/src/hwmon/hwmonHdlr.cpp index 4eae4eb8..f06506f2 100644 --- a/mtce/src/hwmon/hwmonHdlr.cpp +++ b/mtce/src/hwmon/hwmonHdlr.cpp @@ -1165,12 +1165,12 @@ int hwmonHostClass::bmc_sensor_monitor ( struct hwmonHostClass::hwmon_host * hos { if ( host_ptr->bmc_thread_info.status == FAIL_SYSTEM_CALL ) { - elog ("%s '%s' system call failed (retry %d of %d) (%s)", + elog ("%s '%s' system call failed (retry %d of %d)", host_ptr->bmc_thread_info.log_prefix, bmcUtil_getCmd_str( host_ptr->bmc_thread_info.command).c_str(), host_ptr->bmc_thread_ctrl.retries, - MAX_THREAD_RETRIES, host_ptr->thread_extra_info.bm_pw.c_str()); + MAX_THREAD_RETRIES); } else if (( host_ptr->bmc_thread_ctrl.retries == (MAX_THREAD_RETRIES-1)) || ( host_ptr->bmc_thread_ctrl.retries == 1 )) diff --git a/mtce/src/maintenance/mtcNodeHdlrs.cpp b/mtce/src/maintenance/mtcNodeHdlrs.cpp index 07777c85..16951d11 100755 --- a/mtce/src/maintenance/mtcNodeHdlrs.cpp +++ b/mtce/src/maintenance/mtcNodeHdlrs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2018 Wind River Systems, Inc. + * Copyright (c) 2013-2020 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -3901,7 +3901,7 @@ int nodeLinkClass::reset_handler ( struct nodeLinkClass::node * node_ptr ) if ( node_ptr->bmc_accessible == false ) { - wlog ("%s Power Off request rejected ; BMC not accessible ; retry in %d seconds \n", + wlog ("%s Reset request rejected ; BMC not accessible ; retry in %d seconds \n", node_ptr->hostname.c_str(), MTC_POWER_ACTION_RETRY_DELAY); @@ -6185,29 +6185,10 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) pingUtil_acc_monitor ( node_ptr->bm_ping_info ); } - /***************************************************************** - * Manage bmc creds refresh - ****************************************************************/ - if ( node_ptr->bm_ping_info.ok == false ) - { - /* Auto correct key ping information ; - * should never occur but if it does ... */ - if (( node_ptr->bm_ping_info.hostname.empty()) || - ( node_ptr->bm_ping_info.ip.empty())) - { - node_ptr->bm_ping_info.hostname = node_ptr->hostname ; - node_ptr->bm_ping_info.ip = node_ptr->bm_ip ; - } - if ( ! node_ptr->bm_pw.empty() ) - { - node_ptr->bm_pw.clear(); - } - } - /***************************************************************** * Manage getting the bm password but only when ping is ok ****************************************************************/ - else if ( node_ptr->bm_pw.empty() ) + if ( node_ptr->bm_pw.empty() ) { barbicanSecret_type * secret = secretUtil_manage_secret( node_ptr->secretEvent, node_ptr->hostname,