Merge "Add a wait time between http request retries"

This commit is contained in:
Zuul 2024-02-13 16:45:26 +00:00 committed by Gerrit Code Review
commit 739c508e92
9 changed files with 162 additions and 81 deletions

View File

@ -131,6 +131,11 @@
#define FIT_CODE__STOP_HOST_SERVICES (71)
#define FIT_CODE__SOCKET_SETUP (72)
#define FIT_CODE__READ_JSON_FROM_FILE (73)
#define FIT_CODE__HTTP_WORKQUEUE_OPERATION_FAILED (75)
#define FIT_CODE__HTTP_WORKQUEUE_REQUEST_TIMEOUT (76)
#define FIT_CODE__HTTP_WORKQUEUE_CONNECTION_LOSS (77)
/***************** Process Fit Codes ********************************/

View File

@ -2,7 +2,7 @@
#define __INCLUDE_HTTPUTIL_H__
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -93,12 +93,14 @@ typedef enum {
HTTP__RECEIVE_WAIT = 1,
HTTP__RECEIVE = 2,
HTTP__FAILURE = 3,
HTTP__DONE_FAIL = 4,
HTTP__DONE_PASS = 5,
HTTP__STAGES = 6
HTTP__RETRY_WAIT = 4,
HTTP__DONE_FAIL = 5,
HTTP__DONE_PASS = 6,
HTTP__STAGES = 7
} httpStages_enum ;
#define HTTP_RECEIVE_WAIT_MSEC (10)
#define HTTP_RETRY_WAIT_SECS (10)
typedef struct
{

View File

@ -1,7 +1,7 @@
#ifndef __INCLUDE_NODELOG_HH__
#define __INCLUDE_NODELOG_HH__
/*
* Copyright (c) 2013-2017,2023 Wind River Systems, Inc.
* Copyright (c) 2013-2017, 2023-2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -116,6 +116,7 @@ typedef struct
int start_delay ; /**< startup delay, added for pmon */
int api_retries ; /**< api retries before failure */
int bmc_reset_delay ; /**< secs delay before bmc reset */
int http_retry_wait ; /**< secs to wait between http reg retries */
int hostwd_failure_threshold ; /**< allowed # of missed pmon/hostwd messages */
bool hostwd_reboot_on_err ; /**< should hostwd reboot on fault detected */
bool hostwd_kdump_on_stall ; /**< sysrq crash dump on quorum msg'ing stall */

View File

@ -314,6 +314,7 @@ nodeLinkClass::nodeLinkClass()
sysinv_timeout = HTTP_SYSINV_CRIT_TIMEOUT ;
sysinv_noncrit_timeout = HTTP_SYSINV_NONC_TIMEOUT ;
work_queue_timeout = MTC_WORKQUEUE_TIMEOUT ;
http_retry_wait = HTTP_RETRY_WAIT_SECS ;
/* Init the auto recovery threshold and intervals to zero until
* modified by daemon config */

View File

@ -1866,6 +1866,9 @@ public:
* time for crashdumps to complete. */
int bmc_reset_delay ;
/** seconds to wait between http request retries */
int http_retry_wait ;
/* collectd event handler */
int collectd_notify_handler ( string & hostname,
string & resource,

View File

@ -376,6 +376,11 @@ static int mtc_config_handler ( void * user,
config_ptr->bmc_reset_delay = atoi(value);
mtcInv.bmc_reset_delay = config_ptr->bmc_reset_delay ;
}
else if (MATCH("agent", "http_retry_wait"))
{
config_ptr->http_retry_wait = atoi(value);
mtcInv.http_retry_wait = config_ptr->http_retry_wait ;
}
else if (MATCH("timeouts", "failsafe_shutdown_delay"))
{
config_ptr->failsafe_shutdown_delay = atoi(value);
@ -692,6 +697,7 @@ int daemon_configure ( void )
ilog ("TokenRefresh: %3d secs\n" , mtcInv.token_refresh_rate);
ilog ("API Retries : %3d secs\n" , mtcInv.api_retries);
ilog ("Reset Delay : %3d secs\n" , mtcInv.bmc_reset_delay);
ilog ("HTTP Retry : %3d secs\n" , mtcInv.http_retry_wait);
/* Verify loaded config against an expected mask
* as an ini file fault detection method */

View File

@ -69,6 +69,23 @@ string nodeLinkClass::mtcVimApi_state_get ( string hostname, int & http_status_c
http_status_code = HTTP_NOTFOUND ;
return ( payload );
}
#ifdef WANT_FIT_TESTING
static const char * fit_file = "/var/run/fit/mtcVimApi_state_get";
if ( daemon_want_fit ( FIT_CODE__READ_JSON_FROM_FILE, hostname, "mtcVimApi_state_get"))
{
if ( daemon_is_file_present (fit_file) )
{
payload = daemon_read_file(fit_file);
ilog("%s FIT Json: %s", hostname.c_str(), payload.c_str());
return (payload);
}
else
{
slog("%s FIT file %s not found ; aborting fit", hostname.c_str(), fit_file);
}
}
#endif
payload = ("{\"") ;
payload.append (MTC_JSON_INV_ADMIN);
payload.append ("\":\"");
@ -246,6 +263,22 @@ int nodeLinkClass::mtcVimApi_state_change ( struct nodeLinkClass::node * node_pt
node_ptr->httpReq.payload = "{\"state-change\": " ;
node_ptr->httpReq.payload.append (mtcVimApi_state_get ( node_ptr->hostname , http_status_code ));
#ifdef WANT_FIT_TESTING
static const char * fit_file = "/var/run/fit/mtcVimApi_state_change";
if ( daemon_want_fit ( FIT_CODE__READ_JSON_FROM_FILE, node_ptr->hostname, "mtcVimApi_state_change" ))
{
if ( daemon_is_file_present (fit_file) )
{
node_ptr->httpReq.payload = daemon_read_file(fit_file);
ilog("%s FIT Json: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());
}
else
{
slog("%s FIT file %s not found ; aborting fit", node_ptr->hostname.c_str(), fit_file);
}
}
#endif
if (( request == VIM_HOST_FAILED ) || ( request == VIM_DPORT_FAILED ))
{
wlog ("%s %s\n", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2023-2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -331,7 +331,6 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
syslog ( LOG_INFO, "+------+-------+--------------+---------+--------------+-----+----------------------+\n");
}
int size = node_ptr->libEvent_work_fifo.size() ;
if ( size > QUEUE_OVERLOAD )
{
@ -456,6 +455,24 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
break ;
}
}
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__HTTP_WORKQUEUE_OPERATION_FAILED, node_ptr->hostname, "" ))
{
ilog("%s FIT Operation Failed: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());
node_ptr->thisReq.status = FAIL_AUTHENTICATION ;
rc = FAIL_OPERATION ;
}
else if ( daemon_want_fit ( FIT_CODE__HTTP_WORKQUEUE_REQUEST_TIMEOUT, node_ptr->hostname, "" ))
{
ilog("%s FIT Request Timeout Failed: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());
rc = FAIL_TIMEOUT ;
}
else if ( daemon_want_fit ( FIT_CODE__HTTP_WORKQUEUE_CONNECTION_LOSS, node_ptr->hostname, "" ))
{
ilog("%s FIT Connection Loss: %s", node_ptr->hostname.c_str(), node_ptr->httpReq.payload.c_str());
node_ptr->thisReq.status = rc = FAIL_HTTP_ZERO_STATUS ;
}
#endif
if ( rc != PASS )
{
node_ptr->libEvent_work_fifo_ptr->state =
@ -598,6 +615,18 @@ int nodeLinkClass::workQueue_process ( struct nodeLinkClass::node * node_ptr )
node_ptr->libEvent_work_fifo_ptr->cur_retries =
node_ptr->thisReq.cur_retries ;
mtcTimer_start ( node_ptr->http_timer, mtcTimer_handler, HTTP_RETRY_WAIT_SECS );
node_ptr->libEvent_work_fifo_ptr->state =
node_ptr->thisReq.state = HTTP__RETRY_WAIT ;
dlog ("%s %d sec retry wait started", node_ptr->thisReq.log_prefix.c_str(), HTTP_RETRY_WAIT_SECS);
}
break ;
}
case HTTP__RETRY_WAIT:
{
if ( node_ptr->http_timer.ring == true )
{
dlog ("%s %d sec retry wait expired", node_ptr->thisReq.log_prefix.c_str(), HTTP_RETRY_WAIT_SECS);
node_ptr->libEvent_work_fifo_ptr->state =
node_ptr->thisReq.state = HTTP__TRANSMIT ;
}
@ -862,7 +891,6 @@ bool nodeLinkClass::workQueue_present ( libEvent & event )
}
}
}
wlog ("%s ... not found in work queue\n", event.log_prefix.c_str());
return (false);
}

View File

@ -78,6 +78,8 @@ bmc_reset_delay = 300 ; seconds to wait before issuing a bmc
; ACK reboot requests. The delay gives
; time for crashdumps to complete.
http_retry_wait = 10 ; secs to wait between http request retries
[client] ; Client Configuration
scheduling_priority = 45 ; realtime scheduling; range of 1 .. 99