Mtce: Improve non-blocking http request dispatch
Maintenance is seen to intermittently fail Swact requests early after initial system provisioning, without logging an error reason, only to always succeed later on. The issue is difficult to reproduce so this update adds extra logging to this code path and implements a speculative fix. The event_base_loop calls' non-zero return code is never being logged. The libevent documentation states that this API will return 1 while the target has not yet provided any data. Theory is, because the call is local, that normally it returns with data even on the first dispatch case. However, during early system configuration, when the system is busy, that first dispatch does not complete immediately like it normally does later on. Speculation is, instead it returns a 1 stating retry but the existing code path treats that as a failure. This update modifies the code to return a PASS if the command dispatch returns a 1 while the error case of -1 gets enhanced logging and continues to be treated as a failure. Test Plan: PASS: Swact 5 times PASS: Lock/Unlock Host PASS: Large System DOR Related Bug: https://bugs.launchpad.net/starlingx/+bug/1791381 Change-Id: I19b22e07d3224b2e9dd3f3569ecbe9aed7d9402f Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
74c5f89ab4
commit
316032b904
|
@ -1,3 +1,3 @@
|
|||
SRC_DIR="cgts-mtce-common-1.0"
|
||||
TIS_PATCH_VER=137
|
||||
TIS_PATCH_VER=138
|
||||
BUILD_IS_SLOW=5
|
||||
|
|
|
@ -112,6 +112,7 @@
|
|||
#define FIT_CODE__LOCK_HOST (31)
|
||||
#define FIT_CODE__FORCE_LOCK_HOST (32)
|
||||
#define FIT_CODE__UNLOCK_HOST (33)
|
||||
#define FIT_CODE__FAIL_SWACT (34)
|
||||
|
||||
#define FIT_CODE__FM_SET_ALARM (40)
|
||||
#define FIT_CODE__FM_GET_ALARM (41)
|
||||
|
|
|
@ -1344,8 +1344,8 @@ int daemon_load_fit ( void )
|
|||
{
|
||||
daemon_log_value ( "/var/run/fit/fithits", "hits =", __fit_info.hits );
|
||||
daemon_remove_file ( "/var/run/fit/fitdone" );
|
||||
daemon_rename_file ( FIT__INFO_FILEPATH, FIT__INFO_FILENAME, FIT__INFO_FILENAME_RENAMED );
|
||||
}
|
||||
daemon_rename_file ( FIT__INFO_FILEPATH, FIT__INFO_FILENAME, FIT__INFO_FILENAME_RENAMED );
|
||||
|
||||
#endif
|
||||
return (PASS);
|
||||
|
|
|
@ -260,7 +260,7 @@ int mtcHttpUtil_request_old ( libEvent & event,
|
|||
void(*hdlr)(struct evhttp_request *, void *))
|
||||
{
|
||||
int rc = PASS ;
|
||||
|
||||
|
||||
/* make a new request and bind the event handler to it */
|
||||
event.req = evhttp_request_new( hdlr , event.base );
|
||||
if ( ! event.req )
|
||||
|
@ -275,7 +275,7 @@ int mtcHttpUtil_request_new ( libEvent & event,
|
|||
void(*hdlr)(struct evhttp_request *, void *))
|
||||
{
|
||||
int rc = PASS ;
|
||||
|
||||
|
||||
/* make a new request and bind the event handler to it */
|
||||
event.req = evhttp_request_new( hdlr , &event );
|
||||
if ( ! event.req )
|
||||
|
@ -374,15 +374,15 @@ int mtcHttpUtil_header_add ( libEvent * ptr, http_headers_type * hdrs_ptr )
|
|||
|
||||
if ( hdrs_ptr->entries > MAX_HEADERS )
|
||||
{
|
||||
elog ("%s Too many headers (%d:%d)\n",
|
||||
elog ("%s Too many headers (%d:%d)\n",
|
||||
ptr->hostname.c_str(), MAX_HEADERS, hdrs_ptr->entries );
|
||||
return FAIL ;
|
||||
}
|
||||
for ( int i = 0 ; i < hdrs_ptr->entries ; i++ )
|
||||
{
|
||||
/* Add the header */
|
||||
rc = evhttp_add_header( ptr->req->output_headers,
|
||||
hdrs_ptr->entry[i].key.c_str() ,
|
||||
rc = evhttp_add_header( ptr->req->output_headers,
|
||||
hdrs_ptr->entry[i].key.c_str() ,
|
||||
hdrs_ptr->entry[i].value.c_str());
|
||||
if ( rc )
|
||||
{
|
||||
|
@ -391,12 +391,12 @@ int mtcHttpUtil_header_add ( libEvent * ptr, http_headers_type * hdrs_ptr )
|
|||
hdrs_ptr->entry[i].value.c_str());
|
||||
rc = FAIL ;
|
||||
break ;
|
||||
}
|
||||
}
|
||||
}
|
||||
return (rc);
|
||||
}
|
||||
|
||||
//int mtcHttpUtil_request_make ( libEvent * ptr,
|
||||
//int mtcHttpUtil_request_make ( libEvent * ptr,
|
||||
// enum evhttp_cmd_type type,
|
||||
// string path )
|
||||
//{
|
||||
|
@ -451,7 +451,7 @@ int mtcHttpUtil_status ( libEvent & event )
|
|||
}
|
||||
default:
|
||||
{
|
||||
hlog3 ("%s Status: %d\n", event.hostname.c_str(), event.status );
|
||||
wlog ("%s Status: %d\n", event.hostname.c_str(), event.status );
|
||||
rc = event.status ;
|
||||
break;
|
||||
}
|
||||
|
@ -476,16 +476,16 @@ int mtcHttpUtil_status ( libEvent & event )
|
|||
int mtcHttpUtil_api_request ( libEvent & event )
|
||||
|
||||
{
|
||||
http_headers_type hdrs ;
|
||||
http_headers_type hdrs ;
|
||||
int hdr_entry = 0 ;
|
||||
int rc = FAIL ;
|
||||
void(*handler)(struct evhttp_request *, void *) = NULL ;
|
||||
|
||||
|
||||
/* Default to PUT */
|
||||
event.type = EVHTTP_REQ_PUT ;
|
||||
|
||||
if (( event.request == SERVICE_NONE ) ||
|
||||
( event.request >= SERVICE_LAST ))
|
||||
if (( event.request == SERVICE_NONE ) ||
|
||||
( event.request >= SERVICE_LAST ))
|
||||
{
|
||||
slog ("Invalid request %d\n", event.request);
|
||||
event.status = FAIL_BAD_PARM ;
|
||||
|
@ -577,7 +577,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
|
||||
event.payload.append (",\"task\":\"\"");
|
||||
event.payload.append (",\"action\":\"none\"");
|
||||
|
||||
|
||||
event.payload.append (",\"personality\":\"");
|
||||
event.payload.append ( event.inv_info.type );
|
||||
event.payload.append ("\"");
|
||||
|
@ -593,9 +593,9 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
event.payload.append (",\"availability\":\"");
|
||||
event.payload.append ( event.inv_info.avail );
|
||||
event.payload.append ("\"");
|
||||
|
||||
|
||||
event.payload.append (",\"bm_ip\":\"\"");
|
||||
|
||||
|
||||
if ( !event.inv_info.name.compare("controller-0") )
|
||||
{
|
||||
event.payload.append (",\"invprovision\":\"provisioned\"");
|
||||
|
@ -662,7 +662,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
event.timeout = HTTP_VIM_TIMEOUT ;
|
||||
}
|
||||
|
||||
else if (( event.request == SMGR_QUERY_SWACT ) ||
|
||||
else if (( event.request == SMGR_QUERY_SWACT ) ||
|
||||
( event.request == SMGR_START_SWACT ) ||
|
||||
( event.request == SMGR_HOST_LOCKED ) ||
|
||||
( event.request == SMGR_HOST_UNLOCKED ) ||
|
||||
|
@ -719,7 +719,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
jlog ("%s Address : %s\n", event.hostname.c_str(), event.token.url.c_str());
|
||||
}
|
||||
|
||||
if (( event.type != EVHTTP_REQ_GET ) &&
|
||||
if (( event.type != EVHTTP_REQ_GET ) &&
|
||||
( event.type != EVHTTP_REQ_DELETE ))
|
||||
{
|
||||
/* Add payload to the output buffer but only for PUT, POST and PATCH requests */
|
||||
|
@ -730,15 +730,15 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
}
|
||||
if ( daemon_get_cfg_ptr()->debug_json )
|
||||
{
|
||||
if ((!string_contains(event.payload,"token")) &&
|
||||
if ((!string_contains(event.payload,"token")) &&
|
||||
(!string_contains(event.payload,"assword")))
|
||||
{
|
||||
jlog ("%s Payload : %s\n", event.hostname.c_str(),
|
||||
jlog ("%s Payload : %s\n", event.hostname.c_str(),
|
||||
event.payload.c_str() );
|
||||
}
|
||||
else
|
||||
{
|
||||
jlog ("%s Payload : ... contains private content ...\n",
|
||||
jlog ("%s Payload : ... contains private content ...\n",
|
||||
event.hostname.c_str());
|
||||
|
||||
}
|
||||
|
@ -754,7 +754,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
hdrs.entry[hdr_entry].value = "admin";
|
||||
hdr_entry++;
|
||||
|
||||
if (( event.type != EVHTTP_REQ_GET ) &&
|
||||
if (( event.type != EVHTTP_REQ_GET ) &&
|
||||
( event.type != EVHTTP_REQ_DELETE ))
|
||||
{
|
||||
hdrs.entry[hdr_entry].key = "Content-Length" ;
|
||||
|
@ -765,7 +765,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
hdrs.entry[hdr_entry].key = "User-Agent" ;
|
||||
hdrs.entry[hdr_entry].value = "mtce/1.0" ;
|
||||
hdr_entry++;
|
||||
|
||||
|
||||
hdrs.entry[hdr_entry].key = "Content-Type" ;
|
||||
hdrs.entry[hdr_entry].value = "application/json" ;
|
||||
hdr_entry++;
|
||||
|
@ -800,13 +800,13 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
event.status = FAIL_HEADER_ADD ;
|
||||
goto mtcHttpUtil_api_request_done ;
|
||||
}
|
||||
|
||||
|
||||
/* get some timestamps and log the request */
|
||||
snprintf (&event.req_str[0], MAX_API_LOG_LEN-1,
|
||||
snprintf (&event.req_str[0], MAX_API_LOG_LEN-1,
|
||||
"\n%s [%5d] %s %s '%s' seq:%d -> Address : %s:%d %s %s ... %s",
|
||||
pt(), getpid(),
|
||||
pt(), getpid(),
|
||||
event.hostname.c_str(),
|
||||
event.service.c_str(),
|
||||
event.service.c_str(),
|
||||
event.operation.c_str(),
|
||||
event.sequence, event.ip.c_str(), event.port,
|
||||
getHttpCmdType_str( event.type ),
|
||||
|
@ -842,10 +842,10 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
event.log_prefix.append (" ");
|
||||
event.log_prefix.append (event.operation) ;
|
||||
hlog ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout);
|
||||
|
||||
|
||||
/* Send the message with timeout */
|
||||
event_base_dispatch(event.base);
|
||||
|
||||
|
||||
goto mtcHttpUtil_api_request_done ;
|
||||
}
|
||||
else if (( event.request == SYSINV_UPDATE ) ||
|
||||
|
@ -865,13 +865,50 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
{
|
||||
if ( event.operation.compare(SYSINV_OPER__UPDATE_UPTIME) )
|
||||
{
|
||||
hlog ("%s Dispatched (to:%d)\n", event.log_prefix.c_str(), event.timeout);
|
||||
hlog ("%s Dispatched (to:%d)\n", event.log_prefix.c_str(), event.timeout);
|
||||
}
|
||||
|
||||
/* TODO: Set a command timer to free up the resources
|
||||
* and deal with the error if the handler never runs */
|
||||
/*
|
||||
* non-blocking event_base_loop can return ...
|
||||
*
|
||||
* 0 - command complete ; data available
|
||||
* 1 - command dispatched but not complete ; no data available
|
||||
* -1 - error in dispatch ; check errno
|
||||
*
|
||||
*/
|
||||
event.active = true ;
|
||||
return (event_base_loop(event.base, EVLOOP_NONBLOCK));
|
||||
rc = event_base_loop(event.base, EVLOOP_NONBLOCK);
|
||||
|
||||
#ifdef WANT_FIT_TESTING
|
||||
string value = "" ;
|
||||
if ( daemon_want_fit ( FIT_CODE__FAIL_SWACT, event.hostname, "query", value ))
|
||||
{
|
||||
if ( value == "-1" )
|
||||
rc = -1 ;
|
||||
else
|
||||
rc = atoi(value.data());
|
||||
}
|
||||
#endif
|
||||
if (( rc == 0 ) || // Dispatched and done with Data ready
|
||||
( rc == 1 )) // Dispatched but no response yet
|
||||
{
|
||||
if (( event.request == SMGR_QUERY_SWACT ) ||
|
||||
( event.request == SMGR_START_SWACT ))
|
||||
{
|
||||
ilog ("%s dispatched%s\n",
|
||||
event.log_prefix.c_str(),
|
||||
rc ? "" : " ; data ready" );
|
||||
}
|
||||
rc = PASS ;
|
||||
}
|
||||
else
|
||||
{
|
||||
elog ("%s command dispatch failed (%d)\n",
|
||||
event.log_prefix.c_str(), errno );
|
||||
event.active = false ;
|
||||
rc = FAIL_REQUEST ;
|
||||
}
|
||||
return (rc);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -881,7 +918,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
event.log_prefix.append (event.service) ;
|
||||
event.log_prefix.append (" ");
|
||||
event.log_prefix.append (event.operation) ;
|
||||
slog ("%s Requested (blocking) (to:%d) ----------------------------------------\n", event.log_prefix.c_str(), event.timeout );
|
||||
slog ("%s Requested (blocking) (to:%d) ----------------------------------------\n", event.log_prefix.c_str(), event.timeout );
|
||||
|
||||
event_base_dispatch(event.base);
|
||||
|
||||
|
@ -890,10 +927,10 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
}
|
||||
else
|
||||
{
|
||||
elog ("%s Call to 'evhttp_make_request' failed (rc:%d)\n",
|
||||
elog ("%s Call to 'evhttp_make_request' failed (rc:%d)\n",
|
||||
event.hostname.c_str(), rc);
|
||||
}
|
||||
|
||||
|
||||
return (FAIL_MAKE_REQUEST);
|
||||
|
||||
mtcHttpUtil_api_request_done:
|
||||
|
|
|
@ -58,6 +58,8 @@ void nodeLinkClass::mtcSmgrApi_handler ( struct evhttp_request *req, void *arg )
|
|||
|
||||
mtcSmgrApi_handler_out:
|
||||
|
||||
mtcHttpUtil_log_event ( smgrEvent );
|
||||
|
||||
if ( smgrEvent.blocking == true )
|
||||
{
|
||||
mtcHttpUtil_free_conn ( smgrEvent );
|
||||
|
@ -120,6 +122,15 @@ int nodeLinkClass::mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, m
|
|||
elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc );
|
||||
return (rc);
|
||||
}
|
||||
|
||||
#ifdef WANT_FIT_TESTING
|
||||
string value = "" ;
|
||||
if ( daemon_want_fit ( FIT_CODE__FAIL_SWACT, node_ptr->hostname, "port", value ))
|
||||
{
|
||||
smgrEvent.port = atoi(value.data());
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Set the common context of this new operation */
|
||||
smgrEvent.status = RETRY ;
|
||||
smgrEvent.hostname = node_ptr->hostname ;
|
||||
|
@ -143,7 +154,13 @@ int nodeLinkClass::mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, m
|
|||
ilog ("%s sending 'query services' request to HA Service Manager\n",
|
||||
smgrEvent.hostname.c_str());
|
||||
|
||||
return ( mtcHttpUtil_api_request ( smgrEvent )) ;
|
||||
rc = mtcHttpUtil_api_request ( smgrEvent ) ;
|
||||
if ( rc )
|
||||
{
|
||||
elog ("%s mtcHttpUtil_api_request (rc:%d)\n",
|
||||
node_ptr->hostname.c_str(), rc );
|
||||
}
|
||||
return ( rc ) ;
|
||||
}
|
||||
else if ( operation == CONTROLLER_SWACT )
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue