Mtce: Improve non-blocking http request dispatch

Maintenance is seen to intermittently fail Swact requests early
after initial system provisioning, without logging an error
reason, only to always succeed later on.

The issue is difficult to reproduce so this update adds extra
logging to this code path and implements a speculative fix.

The event_base_loop calls' non-zero return code is never being
logged. The libevent documentation states that this API will
return 1 while the target has not yet provided any data.

Theory is, because the call is local, that normally it returns
with data even on the first dispatch case. However, during early
system configuration, when the system is busy, that first dispatch
does not complete immediately like it normally does later on.

Speculation is, instead it returns a 1 stating retry but the
existing code path treats that as a failure.

This update modifies the code to return a PASS if the command
dispatch returns a 1 while the error case of -1 gets enhanced
logging and continues to be treated as a failure.

Test Plan:
PASS: Swact 5 times
PASS: Lock/Unlock Host
PASS: Large System DOR

Related Bug: https://bugs.launchpad.net/starlingx/+bug/1791381
Change-Id: I19b22e07d3224b2e9dd3f3569ecbe9aed7d9402f
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2018-09-10 14:38:52 -04:00
parent 74c5f89ab4
commit 316032b904
5 changed files with 93 additions and 38 deletions

View File

@ -1,3 +1,3 @@
SRC_DIR="cgts-mtce-common-1.0"
TIS_PATCH_VER=137
TIS_PATCH_VER=138
BUILD_IS_SLOW=5

View File

@ -112,6 +112,7 @@
#define FIT_CODE__LOCK_HOST (31)
#define FIT_CODE__FORCE_LOCK_HOST (32)
#define FIT_CODE__UNLOCK_HOST (33)
#define FIT_CODE__FAIL_SWACT (34)
#define FIT_CODE__FM_SET_ALARM (40)
#define FIT_CODE__FM_GET_ALARM (41)

View File

@ -1344,8 +1344,8 @@ int daemon_load_fit ( void )
{
daemon_log_value ( "/var/run/fit/fithits", "hits =", __fit_info.hits );
daemon_remove_file ( "/var/run/fit/fitdone" );
daemon_rename_file ( FIT__INFO_FILEPATH, FIT__INFO_FILENAME, FIT__INFO_FILENAME_RENAMED );
}
daemon_rename_file ( FIT__INFO_FILEPATH, FIT__INFO_FILENAME, FIT__INFO_FILENAME_RENAMED );
#endif
return (PASS);

View File

@ -260,7 +260,7 @@ int mtcHttpUtil_request_old ( libEvent & event,
void(*hdlr)(struct evhttp_request *, void *))
{
int rc = PASS ;
/* make a new request and bind the event handler to it */
event.req = evhttp_request_new( hdlr , event.base );
if ( ! event.req )
@ -275,7 +275,7 @@ int mtcHttpUtil_request_new ( libEvent & event,
void(*hdlr)(struct evhttp_request *, void *))
{
int rc = PASS ;
/* make a new request and bind the event handler to it */
event.req = evhttp_request_new( hdlr , &event );
if ( ! event.req )
@ -374,15 +374,15 @@ int mtcHttpUtil_header_add ( libEvent * ptr, http_headers_type * hdrs_ptr )
if ( hdrs_ptr->entries > MAX_HEADERS )
{
elog ("%s Too many headers (%d:%d)\n",
elog ("%s Too many headers (%d:%d)\n",
ptr->hostname.c_str(), MAX_HEADERS, hdrs_ptr->entries );
return FAIL ;
}
for ( int i = 0 ; i < hdrs_ptr->entries ; i++ )
{
/* Add the header */
rc = evhttp_add_header( ptr->req->output_headers,
hdrs_ptr->entry[i].key.c_str() ,
rc = evhttp_add_header( ptr->req->output_headers,
hdrs_ptr->entry[i].key.c_str() ,
hdrs_ptr->entry[i].value.c_str());
if ( rc )
{
@ -391,12 +391,12 @@ int mtcHttpUtil_header_add ( libEvent * ptr, http_headers_type * hdrs_ptr )
hdrs_ptr->entry[i].value.c_str());
rc = FAIL ;
break ;
}
}
}
return (rc);
}
//int mtcHttpUtil_request_make ( libEvent * ptr,
//int mtcHttpUtil_request_make ( libEvent * ptr,
// enum evhttp_cmd_type type,
// string path )
//{
@ -451,7 +451,7 @@ int mtcHttpUtil_status ( libEvent & event )
}
default:
{
hlog3 ("%s Status: %d\n", event.hostname.c_str(), event.status );
wlog ("%s Status: %d\n", event.hostname.c_str(), event.status );
rc = event.status ;
break;
}
@ -476,16 +476,16 @@ int mtcHttpUtil_status ( libEvent & event )
int mtcHttpUtil_api_request ( libEvent & event )
{
http_headers_type hdrs ;
http_headers_type hdrs ;
int hdr_entry = 0 ;
int rc = FAIL ;
void(*handler)(struct evhttp_request *, void *) = NULL ;
/* Default to PUT */
event.type = EVHTTP_REQ_PUT ;
if (( event.request == SERVICE_NONE ) ||
( event.request >= SERVICE_LAST ))
if (( event.request == SERVICE_NONE ) ||
( event.request >= SERVICE_LAST ))
{
slog ("Invalid request %d\n", event.request);
event.status = FAIL_BAD_PARM ;
@ -577,7 +577,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
event.payload.append (",\"task\":\"\"");
event.payload.append (",\"action\":\"none\"");
event.payload.append (",\"personality\":\"");
event.payload.append ( event.inv_info.type );
event.payload.append ("\"");
@ -593,9 +593,9 @@ int mtcHttpUtil_api_request ( libEvent & event )
event.payload.append (",\"availability\":\"");
event.payload.append ( event.inv_info.avail );
event.payload.append ("\"");
event.payload.append (",\"bm_ip\":\"\"");
if ( !event.inv_info.name.compare("controller-0") )
{
event.payload.append (",\"invprovision\":\"provisioned\"");
@ -662,7 +662,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
event.timeout = HTTP_VIM_TIMEOUT ;
}
else if (( event.request == SMGR_QUERY_SWACT ) ||
else if (( event.request == SMGR_QUERY_SWACT ) ||
( event.request == SMGR_START_SWACT ) ||
( event.request == SMGR_HOST_LOCKED ) ||
( event.request == SMGR_HOST_UNLOCKED ) ||
@ -719,7 +719,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
jlog ("%s Address : %s\n", event.hostname.c_str(), event.token.url.c_str());
}
if (( event.type != EVHTTP_REQ_GET ) &&
if (( event.type != EVHTTP_REQ_GET ) &&
( event.type != EVHTTP_REQ_DELETE ))
{
/* Add payload to the output buffer but only for PUT, POST and PATCH requests */
@ -730,15 +730,15 @@ int mtcHttpUtil_api_request ( libEvent & event )
}
if ( daemon_get_cfg_ptr()->debug_json )
{
if ((!string_contains(event.payload,"token")) &&
if ((!string_contains(event.payload,"token")) &&
(!string_contains(event.payload,"assword")))
{
jlog ("%s Payload : %s\n", event.hostname.c_str(),
jlog ("%s Payload : %s\n", event.hostname.c_str(),
event.payload.c_str() );
}
else
{
jlog ("%s Payload : ... contains private content ...\n",
jlog ("%s Payload : ... contains private content ...\n",
event.hostname.c_str());
}
@ -754,7 +754,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
hdrs.entry[hdr_entry].value = "admin";
hdr_entry++;
if (( event.type != EVHTTP_REQ_GET ) &&
if (( event.type != EVHTTP_REQ_GET ) &&
( event.type != EVHTTP_REQ_DELETE ))
{
hdrs.entry[hdr_entry].key = "Content-Length" ;
@ -765,7 +765,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
hdrs.entry[hdr_entry].key = "User-Agent" ;
hdrs.entry[hdr_entry].value = "mtce/1.0" ;
hdr_entry++;
hdrs.entry[hdr_entry].key = "Content-Type" ;
hdrs.entry[hdr_entry].value = "application/json" ;
hdr_entry++;
@ -800,13 +800,13 @@ int mtcHttpUtil_api_request ( libEvent & event )
event.status = FAIL_HEADER_ADD ;
goto mtcHttpUtil_api_request_done ;
}
/* get some timestamps and log the request */
snprintf (&event.req_str[0], MAX_API_LOG_LEN-1,
snprintf (&event.req_str[0], MAX_API_LOG_LEN-1,
"\n%s [%5d] %s %s '%s' seq:%d -> Address : %s:%d %s %s ... %s",
pt(), getpid(),
pt(), getpid(),
event.hostname.c_str(),
event.service.c_str(),
event.service.c_str(),
event.operation.c_str(),
event.sequence, event.ip.c_str(), event.port,
getHttpCmdType_str( event.type ),
@ -842,10 +842,10 @@ int mtcHttpUtil_api_request ( libEvent & event )
event.log_prefix.append (" ");
event.log_prefix.append (event.operation) ;
hlog ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout);
/* Send the message with timeout */
event_base_dispatch(event.base);
goto mtcHttpUtil_api_request_done ;
}
else if (( event.request == SYSINV_UPDATE ) ||
@ -865,13 +865,50 @@ int mtcHttpUtil_api_request ( libEvent & event )
{
if ( event.operation.compare(SYSINV_OPER__UPDATE_UPTIME) )
{
hlog ("%s Dispatched (to:%d)\n", event.log_prefix.c_str(), event.timeout);
hlog ("%s Dispatched (to:%d)\n", event.log_prefix.c_str(), event.timeout);
}
/* TODO: Set a command timer to free up the resources
* and deal with the error if the handler never runs */
/*
* non-blocking event_base_loop can return ...
*
* 0 - command complete ; data available
* 1 - command dispatched but not complete ; no data available
* -1 - error in dispatch ; check errno
*
*/
event.active = true ;
return (event_base_loop(event.base, EVLOOP_NONBLOCK));
rc = event_base_loop(event.base, EVLOOP_NONBLOCK);
#ifdef WANT_FIT_TESTING
string value = "" ;
if ( daemon_want_fit ( FIT_CODE__FAIL_SWACT, event.hostname, "query", value ))
{
if ( value == "-1" )
rc = -1 ;
else
rc = atoi(value.data());
}
#endif
if (( rc == 0 ) || // Dispatched and done with Data ready
( rc == 1 )) // Dispatched but no response yet
{
if (( event.request == SMGR_QUERY_SWACT ) ||
( event.request == SMGR_START_SWACT ))
{
ilog ("%s dispatched%s\n",
event.log_prefix.c_str(),
rc ? "" : " ; data ready" );
}
rc = PASS ;
}
else
{
elog ("%s command dispatch failed (%d)\n",
event.log_prefix.c_str(), errno );
event.active = false ;
rc = FAIL_REQUEST ;
}
return (rc);
}
else
{
@ -881,7 +918,7 @@ int mtcHttpUtil_api_request ( libEvent & event )
event.log_prefix.append (event.service) ;
event.log_prefix.append (" ");
event.log_prefix.append (event.operation) ;
slog ("%s Requested (blocking) (to:%d) ----------------------------------------\n", event.log_prefix.c_str(), event.timeout );
slog ("%s Requested (blocking) (to:%d) ----------------------------------------\n", event.log_prefix.c_str(), event.timeout );
event_base_dispatch(event.base);
@ -890,10 +927,10 @@ int mtcHttpUtil_api_request ( libEvent & event )
}
else
{
elog ("%s Call to 'evhttp_make_request' failed (rc:%d)\n",
elog ("%s Call to 'evhttp_make_request' failed (rc:%d)\n",
event.hostname.c_str(), rc);
}
return (FAIL_MAKE_REQUEST);
mtcHttpUtil_api_request_done:

View File

@ -58,6 +58,8 @@ void nodeLinkClass::mtcSmgrApi_handler ( struct evhttp_request *req, void *arg )
mtcSmgrApi_handler_out:
mtcHttpUtil_log_event ( smgrEvent );
if ( smgrEvent.blocking == true )
{
mtcHttpUtil_free_conn ( smgrEvent );
@ -120,6 +122,15 @@ int nodeLinkClass::mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, m
elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc );
return (rc);
}
#ifdef WANT_FIT_TESTING
string value = "" ;
if ( daemon_want_fit ( FIT_CODE__FAIL_SWACT, node_ptr->hostname, "port", value ))
{
smgrEvent.port = atoi(value.data());
}
#endif
/* Set the common context of this new operation */
smgrEvent.status = RETRY ;
smgrEvent.hostname = node_ptr->hostname ;
@ -143,7 +154,13 @@ int nodeLinkClass::mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, m
ilog ("%s sending 'query services' request to HA Service Manager\n",
smgrEvent.hostname.c_str());
return ( mtcHttpUtil_api_request ( smgrEvent )) ;
rc = mtcHttpUtil_api_request ( smgrEvent ) ;
if ( rc )
{
elog ("%s mtcHttpUtil_api_request (rc:%d)\n",
node_ptr->hostname.c_str(), rc );
}
return ( rc ) ;
}
else if ( operation == CONTROLLER_SWACT )
{