diff --git a/mtce-common/src/common/fitCodes.h b/mtce-common/src/common/fitCodes.h index cfbe7119..023f3483 100644 --- a/mtce-common/src/common/fitCodes.h +++ b/mtce-common/src/common/fitCodes.h @@ -131,6 +131,11 @@ #define FIT_CODE__STOP_HOST_SERVICES (71) #define FIT_CODE__SOCKET_SETUP (72) +#define FIT_CODE__READ_JSON_FROM_FILE (73) + +#define FIT_CODE__HTTP_WORKQUEUE_OPERATION_FAILED (75) +#define FIT_CODE__HTTP_WORKQUEUE_REQUEST_TIMEOUT (76) +#define FIT_CODE__HTTP_WORKQUEUE_CONNECTION_LOSS (77) /***************** Process Fit Codes ********************************/ diff --git a/mtce-common/src/common/httpUtil.h b/mtce-common/src/common/httpUtil.h index c1956c3f..27f4724b 100644 --- a/mtce-common/src/common/httpUtil.h +++ b/mtce-common/src/common/httpUtil.h @@ -2,10 +2,10 @@ #define __INCLUDE_HTTPUTIL_H__ /* - * Copyright (c) 2013, 2016 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* + * Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * */ #include /* for ... string */ @@ -93,12 +93,14 @@ typedef enum { HTTP__RECEIVE_WAIT = 1, HTTP__RECEIVE = 2, HTTP__FAILURE = 3, - HTTP__DONE_FAIL = 4, - HTTP__DONE_PASS = 5, - HTTP__STAGES = 6 + HTTP__RETRY_WAIT = 4, + HTTP__DONE_FAIL = 5, + HTTP__DONE_PASS = 6, + HTTP__STAGES = 7 } httpStages_enum ; #define HTTP_RECEIVE_WAIT_MSEC (10) +#define HTTP_RETRY_WAIT_SECS (10) typedef struct { @@ -142,7 +144,7 @@ typedef enum { SYSINV_CONFIG_SHOW, SYSINV_CONFIG_MODIFY, - + SYSINV_SENSOR_LOAD, SYSINV_SENSOR_LOAD_GROUPS, SYSINV_SENSOR_LOAD_GROUP, diff --git a/mtce-common/src/common/logMacros.h b/mtce-common/src/common/logMacros.h index b6c758e6..55f6d293 100644 --- a/mtce-common/src/common/logMacros.h +++ b/mtce-common/src/common/logMacros.h @@ -1,10 +1,10 @@ #ifndef __INCLUDE_NODELOG_HH__ #define __INCLUDE_NODELOG_HH__ /* - * Copyright (c) 2013-2017,2023 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* + * Copyright (c) 2013-2017, 2023-2024 Wind River Systems, Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * */ /** @@ -116,6 +116,7 @@ typedef struct int start_delay ; /**< startup delay, added for pmon */ int api_retries ; /**< api retries before failure */ int bmc_reset_delay ; /**< secs delay before bmc reset */ + int http_retry_wait ; /**< secs to wait between http reg retries */ int hostwd_failure_threshold ; /**< allowed # of missed pmon/hostwd messages */ bool hostwd_reboot_on_err ; /**< should hostwd reboot on fault detected */ bool hostwd_kdump_on_stall ; /**< sysrq crash dump on quorum msg'ing stall */ diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp index bb97253d..7a4ce958 100755 --- a/mtce/src/common/nodeClass.cpp +++ b/mtce/src/common/nodeClass.cpp @@ -1,8 +1,8 @@ /* * Copyright (c) 2013-2020, 2023 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* + * + * SPDX-License-Identifier: Apache-2.0 + * */ /** @@ -314,6 +314,7 @@ nodeLinkClass::nodeLinkClass() sysinv_timeout = HTTP_SYSINV_CRIT_TIMEOUT ; sysinv_noncrit_timeout = HTTP_SYSINV_NONC_TIMEOUT ; work_queue_timeout = MTC_WORKQUEUE_TIMEOUT ; + http_retry_wait = HTTP_RETRY_WAIT_SECS ; /* Init the auto recovery threshold and intervals to zero until * modified by daemon config */ diff --git a/mtce/src/common/nodeClass.h b/mtce/src/common/nodeClass.h index 514b25ef..1cd03813 100755 --- a/mtce/src/common/nodeClass.h +++ b/mtce/src/common/nodeClass.h @@ -1866,6 +1866,9 @@ public: * time for crashdumps to complete. */ int bmc_reset_delay ; + /** seconds to wait between http request retries */ + int http_retry_wait ; + /* collectd event handler */ int collectd_notify_handler ( string & hostname, string & resource, diff --git a/mtce/src/maintenance/mtcNodeCtrl.cpp b/mtce/src/maintenance/mtcNodeCtrl.cpp index f18efc23..e5bdf25e 100644 --- a/mtce/src/maintenance/mtcNodeCtrl.cpp +++ b/mtce/src/maintenance/mtcNodeCtrl.cpp @@ -376,6 +376,11 @@ static int mtc_config_handler ( void * user, config_ptr->bmc_reset_delay = atoi(value); mtcInv.bmc_reset_delay = config_ptr->bmc_reset_delay ; } + else if (MATCH("agent", "http_retry_wait")) + { + config_ptr->http_retry_wait = atoi(value); + mtcInv.http_retry_wait = config_ptr->http_retry_wait ; + } else if (MATCH("timeouts", "failsafe_shutdown_delay")) { config_ptr->failsafe_shutdown_delay = atoi(value); @@ -692,6 +697,7 @@ int daemon_configure ( void ) ilog ("TokenRefresh: %3d secs\n" , mtcInv.token_refresh_rate); ilog ("API Retries : %3d secs\n" , mtcInv.api_retries); ilog ("Reset Delay : %3d secs\n" , mtcInv.bmc_reset_delay); + ilog ("HTTP Retry : %3d secs\n" , mtcInv.http_retry_wait); /* Verify loaded config against an expected mask * as an ini file fault detection method */ diff --git a/mtce/src/maintenance/mtcVimApi.cpp b/mtce/src/maintenance/mtcVimApi.cpp index a5169d14..fe58f1ab 100644 --- a/mtce/src/maintenance/mtcVimApi.cpp +++ b/mtce/src/maintenance/mtcVimApi.cpp @@ -69,6 +69,23 @@ string nodeLinkClass::mtcVimApi_state_get ( string hostname, int & http_status_c http_status_code = HTTP_NOTFOUND ; return ( payload ); } + #ifdef WANT_FIT_TESTING + static const char * fit_file = "/var/run/fit/mtcVimApi_state_get"; + if ( daemon_want_fit ( FIT_CODE__READ_JSON_FROM_FILE, hostname, "mtcVimApi_state_get")) + { + if ( daemon_is_file_present (fit_file) ) + { + payload = daemon_read_file(fit_file); + ilog("%s FIT Json: %s", hostname.c_str(), payload.c_str()); + return (payload); + } + else + { + slog("%s FIT file %s not found ; aborting fit", hostname.c_str(), fit_file); + } + } + #endif + payload = ("{\"") ; payload.append (MTC_JSON_INV_ADMIN); payload.append ("\":\""); @@ -246,6 +263,22 @@ int nodeLinkClass::mtcVimApi_state_change ( struct nodeLinkClass::node * node_pt node_ptr->httpReq.payload = "{\"state-change\": " ; node_ptr->httpReq.payload.append (mtcVimApi_state_get ( node_ptr->hostname , http_status_code )); + #ifdef WANT_FIT_TESTING + static const char * fit_file = "/var/run/fit/mtcVimApi_state_change"; + if ( daemon_want_fit ( FIT_CODE__READ_JSON_FROM_FILE, node_ptr->hostname, "mtcVimApi_state_change" )) + { + if ( daemon_is_file_present (fit_file) ) + { + node_ptr->httpReq.payload = daemon_read_file(fit_file); + ilog("%s FIT Json: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str()); + } + else + { + slog("%s FIT file %s not found ; aborting fit", node_ptr->hostname.c_str(), fit_file); + } + } + #endif + if (( request == VIM_HOST_FAILED ) || ( request == VIM_DPORT_FAILED )) { wlog ("%s %s\n", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str()); diff --git a/mtce/src/maintenance/mtcWorkQueue.cpp b/mtce/src/maintenance/mtcWorkQueue.cpp index 3e74e3ea..2ef0a8c7 100644 --- a/mtce/src/maintenance/mtcWorkQueue.cpp +++ b/mtce/src/maintenance/mtcWorkQueue.cpp @@ -1,8 +1,8 @@ /* - * Copyright (c) 2013, 2016 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* + * Copyright (c) 2013, 2016, 2023-2024 Wind River Systems, Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * */ /** @@ -38,7 +38,7 @@ string _get_work_state_str ( httpStages_enum state ) else if ( state == HTTP__RECEIVE ) return (" Rx"); else if ( state == HTTP__FAILURE ) return (" Er "); else if ( state == HTTP__RECEIVE_WAIT ) return ("Wait"); - else + else { elog ("Invalid Http Work Queue State: %d\n", state ); return ("----"); @@ -58,7 +58,7 @@ void nodeLinkClass::workQueue_dump ( struct nodeLinkClass::node * node_ptr ) node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end(); node_ptr->libEvent_work_fifo_ptr ++ ) { - syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %15s:%d | %s\n", + syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %15s:%d | %s\n", _get_work_state_str(node_ptr->libEvent_work_fifo_ptr->state).c_str(), node_ptr->libEvent_work_fifo_ptr->sequence, node_ptr->libEvent_work_fifo_ptr->hostname.c_str(), @@ -101,11 +101,11 @@ void nodeLinkClass::doneQueue_dump ( struct nodeLinkClass::node * node_ptr ) node_ptr->libEvent_done_fifo_ptr != node_ptr->libEvent_done_fifo.end(); node_ptr->libEvent_done_fifo_ptr ++ ) { - syslog ( LOG_INFO, "%15s httpReq doneQueue:%5d - %s '%s' -> Status:%d\n", - node_ptr->libEvent_done_fifo_ptr->hostname.c_str(), - node_ptr->libEvent_done_fifo_ptr->sequence, - node_ptr->libEvent_done_fifo_ptr->service.c_str(), - node_ptr->libEvent_done_fifo_ptr->operation.c_str(), + syslog ( LOG_INFO, "%15s httpReq doneQueue:%5d - %s '%s' -> Status:%d\n", + node_ptr->libEvent_done_fifo_ptr->hostname.c_str(), + node_ptr->libEvent_done_fifo_ptr->sequence, + node_ptr->libEvent_done_fifo_ptr->service.c_str(), + node_ptr->libEvent_done_fifo_ptr->operation.c_str(), node_ptr->libEvent_done_fifo_ptr->status ); } } @@ -240,9 +240,9 @@ int nodeLinkClass::doneQueue_dequeue ( libEvent & event ) * Description: This is a Per Host Finite State Machine (FSM) that * processes the work queue for the supplied host's * node pointer. - * + * * Constructs: - * + * * node_ptr->libEvent_work_fifo - the current work queue/fifo * node_ptr->libEvent_done_fifo - queue/fifo of completed requests * @@ -255,17 +255,17 @@ int nodeLinkClass::doneQueue_dequeue ( libEvent & event ) * * In process libEvents are copied from the callers work queue to * its thisReq. - * + * * Completed events including execution status are copied to the host's * done fifo. - * - * Failed events may be retried up to max_retries as specified by + * + * Failed events may be retried up to max_retries as specified by * the callers libEvent. * * @param event is a reference to the callers libEvent. * * @return an integer with values of PASS, FAIL, RETRY - * + * * ************************************************************************/ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) @@ -280,18 +280,18 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) * responses */ if ( node_ptr->libEvent_done_fifo.size() > 10 ) { - qlog ("%s Done Queue has %ld elements\n", + qlog ("%s Done Queue has %ld elements\n", node_ptr->hostname.c_str(), node_ptr->libEvent_done_fifo.size()); /* TODO: look at the status of the commands and print a log of those that failed */ - + /* Remove the first 8 - its a fifo the first ones at the front are the oldest */ for ( int i=0 ; i < 8 ; i++ ) { node_ptr->libEvent_done_fifo.pop_front(); } - qlog ("%s Done Queue has %ld elements remaining\n", + qlog ("%s Done Queue has %ld elements remaining\n", node_ptr->hostname.c_str(), node_ptr->libEvent_done_fifo.size()); } @@ -299,8 +299,8 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) if ( node_ptr->libEvent_work_fifo.empty() ) { - // qlog_throttled ( node_ptr->no_work_log_throttle, 300, - // "%s Idle ... \n", + // qlog_throttled ( node_ptr->no_work_log_throttle, 300, + // "%s Idle ... \n", // node_ptr->hostname.c_str()); node_ptr->no_work_log_throttle = 0 ; return (PASS); @@ -317,7 +317,7 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) node_ptr->libEvent_work_fifo_ptr != node_ptr->libEvent_work_fifo.end(); node_ptr->libEvent_work_fifo_ptr ++ ) { - syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %3d | %15s:%d | %s\n", + syslog ( LOG_INFO, "| %-4s | %5d | %-12s | %-7s | %-13s | %3d | %15s:%d | %s\n", _get_work_state_str(node_ptr->libEvent_work_fifo_ptr->state).c_str(), node_ptr->libEvent_work_fifo_ptr->sequence, node_ptr->libEvent_work_fifo_ptr->hostname.c_str(), @@ -331,7 +331,6 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) syslog ( LOG_INFO, "+------+-------+--------------+---------+--------------+-----+----------------------+\n"); } - int size = node_ptr->libEvent_work_fifo.size() ; if ( size > QUEUE_OVERLOAD ) { @@ -354,18 +353,18 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) case HTTP__TRANSMIT: { node_ptr->thisReq = node_ptr->libEvent_work_fifo.front(); - + qlog ("%s Transmitted\n", node_ptr->thisReq.log_prefix.c_str() ); rc = mtcHttpUtil_api_request ( node_ptr->thisReq ) ; if ( rc ) { - node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->libEvent_work_fifo_ptr->state = node_ptr->thisReq.state = HTTP__FAILURE ; } else { - node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->libEvent_work_fifo_ptr->state = node_ptr->thisReq.state = HTTP__RECEIVE_WAIT ; if ( node_ptr->http_timer.tid ) @@ -374,7 +373,7 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) if ( rc != PASS ) { elog ("%s failed to start http command timer ; failing command\n", node_ptr->thisReq.log_prefix.c_str()); - node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->libEvent_work_fifo_ptr->state = node_ptr->thisReq.state = HTTP__FAILURE ; } } @@ -408,20 +407,20 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) slog ("%s has unexpected null HTTP request base pointer\n", node_ptr->thisReq.log_prefix.c_str()); - node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->libEvent_work_fifo_ptr->state = node_ptr->thisReq.state = HTTP__FAILURE ; break ; } - + int msec_timeout = (node_ptr->thisReq.timeout*1000); int wait_time = (++node_ptr->thisReq.rx_retry_cnt)*HTTP_RECEIVE_WAIT_MSEC ; rc = mtcHttpUtil_receive ( node_ptr->thisReq ); if ( rc == RETRY ) { - node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->libEvent_work_fifo_ptr->state = node_ptr->thisReq.state = HTTP__RECEIVE_WAIT ; - mtcTimer_start_msec ( node_ptr->http_timer, mtcTimer_handler, HTTP_RECEIVE_WAIT_MSEC ); + mtcTimer_start_msec ( node_ptr->http_timer, mtcTimer_handler, HTTP_RECEIVE_WAIT_MSEC ); if ((wait_time > (msec_timeout/4)) && ( node_ptr->thisReq.low_wm == false ) ) { @@ -449,48 +448,66 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) /* Only print every 16 starting with 2 */ if ( (node_ptr->thisReq.rx_retry_cnt & 0xF) == 2 ) { - qlog ("%s rx_retry_cnt:%d\n", + qlog ("%s rx_retry_cnt:%d\n", node_ptr->thisReq.log_prefix.c_str(), node_ptr->thisReq.rx_retry_cnt ); } break ; } } + #ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__HTTP_WORKQUEUE_OPERATION_FAILED, node_ptr->hostname, "" )) + { + ilog("%s FIT Operation Failed: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str()); + node_ptr->thisReq.status = FAIL_AUTHENTICATION ; + rc = FAIL_OPERATION ; + } + else if ( daemon_want_fit ( FIT_CODE__HTTP_WORKQUEUE_REQUEST_TIMEOUT, node_ptr->hostname, "" )) + { + ilog("%s FIT Request Timeout Failed: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str()); + rc = FAIL_TIMEOUT ; + } + else if ( daemon_want_fit ( FIT_CODE__HTTP_WORKQUEUE_CONNECTION_LOSS, node_ptr->hostname, "" )) + { + ilog("%s FIT Connection Loss: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str()); + node_ptr->thisReq.status = rc = FAIL_HTTP_ZERO_STATUS ; + } + #endif if ( rc != PASS ) { - node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->libEvent_work_fifo_ptr->state = node_ptr->thisReq.state = HTTP__FAILURE ; } else { if ( node_ptr->thisReq.cur_retries ) { - ilog ("%s Completed (after %d retries) (took %d of %d msecs)\n", - node_ptr->thisReq.log_prefix.c_str(), - node_ptr->thisReq.cur_retries, wait_time, + ilog ("%s Completed (after %d retries) (took %d of %d msecs)\n", + node_ptr->thisReq.log_prefix.c_str(), + node_ptr->thisReq.cur_retries, wait_time, node_ptr->thisReq.timeout*1000); } else { - qlog ("%s Completed (took %d of %d msecs)\n", - node_ptr->thisReq.log_prefix.c_str(), - wait_time, + qlog ("%s Completed (took %d of %d msecs)\n", + node_ptr->thisReq.log_prefix.c_str(), + wait_time, node_ptr->thisReq.timeout*1000); } node_ptr->thisReq.exec_time_msec = wait_time ; node_ptr->thisReq.rx_retry_cnt = 0 ; - + mtcHttpUtil_free_conn ( node_ptr->thisReq ); mtcHttpUtil_free_base ( node_ptr->thisReq ); - /* Don't add success responses to non-critical commands like + /* Don't add success responses to non-critical commands like * "update uptime" and "update task" to the done queue */ if ( !node_ptr->thisReq.noncritical ) { /* Copy done event to the done queue */ node_ptr->libEvent_done_fifo.push_back(node_ptr->thisReq); - + } /* Pop that done event off the work queue */ node_ptr->libEvent_work_fifo.pop_front(); @@ -503,21 +520,21 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) mtcHttpUtil_free_conn ( node_ptr->thisReq ); mtcHttpUtil_free_base ( node_ptr->thisReq ); - + node_ptr->http_retries_cur++ ; node_ptr->thisReq.cur_retries++ ; - if ( node_ptr->thisReq.noncritical == true ) - { + if ( node_ptr->thisReq.noncritical == true ) + { if ( node_ptr->thisReq.cur_retries > node_ptr->thisReq.max_retries ) { node_ptr->oper_failures++ ; wlog ("%s retry conjestion abort of non-critical command (%d:%d)\n", - node_ptr->thisReq.log_prefix.c_str(), + node_ptr->thisReq.log_prefix.c_str(), node_ptr->thisReq.cur_retries, node_ptr->thisReq.max_retries ); - + /* Pop this aborted event off the work queue */ node_ptr->libEvent_work_fifo.pop_front(); } @@ -561,7 +578,7 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) node_ptr->thisReq.max_retries, node_ptr->thisReq.timeout, node_ptr->thisReq.noncritical ? "No" : "Yes" ); - + node_ptr->thisReq.response.clear(); node_ptr->thisReq.status = PASS ; @@ -569,10 +586,10 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) node_ptr->thisReq.active = false ; node_ptr->thisReq.response_len= 0 ; - /* + /* * If this is an inventory request ... * - * 1. Init the inv struct + * 1. Init the inv struct * 2. increase the timeout if is a critical command * * */ @@ -583,30 +600,42 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) { int temp = node_ptr->libEvent_work_fifo_ptr->timeout ; - /* + /* * Increase and update the timeout value for critical commands * in hope that it will succeed on he next go around. */ node_ptr->libEvent_work_fifo_ptr->timeout += get_mtcInv_ptr()->sysinv_timeout ; - dlog ("%s timeout extended from %d to %d secs\n", + dlog ("%s timeout extended from %d to %d secs\n", node_ptr->thisReq.log_prefix.c_str(), temp, node_ptr->libEvent_work_fifo_ptr->timeout ); } } /* Save the retry count */ - node_ptr->libEvent_work_fifo_ptr->cur_retries = + node_ptr->libEvent_work_fifo_ptr->cur_retries = node_ptr->thisReq.cur_retries ; - node_ptr->libEvent_work_fifo_ptr->state = + mtcTimer_start ( node_ptr->http_timer, mtcTimer_handler, HTTP_RETRY_WAIT_SECS ); + node_ptr->libEvent_work_fifo_ptr->state = + node_ptr->thisReq.state = HTTP__RETRY_WAIT ; + dlog ("%s %d sec retry wait started", node_ptr->thisReq.log_prefix.c_str(), HTTP_RETRY_WAIT_SECS); + } + break ; + } + case HTTP__RETRY_WAIT: + { + if ( node_ptr->http_timer.ring == true ) + { + dlog ("%s %d sec retry wait expired", node_ptr->thisReq.log_prefix.c_str(), HTTP_RETRY_WAIT_SECS); + node_ptr->libEvent_work_fifo_ptr->state = node_ptr->thisReq.state = HTTP__TRANSMIT ; } break ; } default: { - slog ("%s Bad libEvent work state (%d) ; clearing work/done queue\n", - node_ptr->hostname.c_str(), + slog ("%s Bad libEvent work state (%d) ; clearing work/done queue\n", + node_ptr->hostname.c_str(), node_ptr->libEvent_work_fifo_ptr->state ); node_ptr->libEvent_work_fifo.clear(); node_ptr->libEvent_done_fifo.clear(); @@ -623,7 +652,7 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr ) * Description: To handle the pathalogical case where an event seems to * have timed out at the callers level then this interface * can be called to delete it from the work queue. - * + * * @param node_ptr so that the hosts work queue can be found * @param sequence to specify the specific sequence number to remove * @return always PASS since there is nothing the caller can or needs @@ -660,7 +689,7 @@ int nodeLinkClass::workQueue_del_cmd ( struct nodeLinkClass::node * node_ptr, in * * Description: Removes all items from the done queue. * - * Returns a failure, the sequence number of the first command + * Returns a failure, the sequence number of the first command * in the done queue that did not PASS. * */ @@ -717,7 +746,7 @@ int nodeLinkClass::doneQueue_purge ( struct nodeLinkClass::node * node_ptr ) { qlog ("%s all (%d) priority queued operations passed (qlog)\n", node_ptr->hostname.c_str(), size ); } - + qlog ("%s purging %d items from doneQueue\n", node_ptr->hostname.c_str(), size ); node_ptr->libEvent_done_fifo.clear(); } @@ -738,7 +767,7 @@ int nodeLinkClass::workQueue_purge ( struct nodeLinkClass::node * node_ptr ) { /* TODO: find out how to force close a connection. * Don't free the connection if it is in the receiving state or - * we might get a segfault + * we might get a segfault * There is only ever one connection open at a time for a specific host * so its only 'thisReq' we need to worry about. */ if ( node_ptr->libEvent_work_fifo_ptr->state != HTTP__RECEIVE ) @@ -754,12 +783,12 @@ int nodeLinkClass::workQueue_purge ( struct nodeLinkClass::node * node_ptr ) { if ( node_ptr->libEvent_work_fifo_ptr->state == HTTP__TRANSMIT ) { - wlog ("%s ... was not executed\n", + wlog ("%s ... was not executed\n", node_ptr->libEvent_work_fifo_ptr->log_prefix.c_str()); } else { - wlog ("%s ... did not complete (%s)\n", + wlog ("%s ... did not complete (%s)\n", node_ptr->libEvent_work_fifo_ptr->log_prefix.c_str(), _get_work_state_str(node_ptr->libEvent_work_fifo_ptr->state).c_str()); } @@ -771,7 +800,7 @@ int nodeLinkClass::workQueue_purge ( struct nodeLinkClass::node * node_ptr ) { qlog ("%s all work done\n", node_ptr->hostname.c_str()); } - + // node_ptr->libEvent_work_fifo_ptr->state = HTTP__TRANSMIT ; return (PASS); } @@ -793,7 +822,7 @@ int nodeLinkClass::workQueue_done ( struct nodeLinkClass::node * node_ptr ) node_ptr->libEvent_work_fifo_ptr++ ) { /* Don't report work queue timeout if there are only noncritical - * commands left in the work queue. Such commands might be + * commands left in the work queue. Such commands might be * "update uptime" and "update task" */ if ( !node_ptr->libEvent_work_fifo_ptr->noncritical ) { @@ -862,7 +891,6 @@ bool nodeLinkClass::workQueue_present ( libEvent & event ) } } } - wlog ("%s ... not found in work queue\n", event.log_prefix.c_str()); return (false); } diff --git a/mtce/src/scripts/mtc.conf b/mtce/src/scripts/mtc.conf index 74b56206..53f252b0 100644 --- a/mtce/src/scripts/mtc.conf +++ b/mtce/src/scripts/mtc.conf @@ -78,6 +78,8 @@ bmc_reset_delay = 300 ; seconds to wait before issuing a bmc ; ACK reboot requests. The delay gives ; time for crashdumps to complete. +http_retry_wait = 10 ; secs to wait between http request retries + [client] ; Client Configuration scheduling_priority = 45 ; realtime scheduling; range of 1 .. 99