Mtce: Improve non-blocking http request dispatch
Maintenance is seen to intermittently fail Swact requests early after initial system provisioning, without logging an error reason, only to always succeed later on. The issue is difficult to reproduce so this update adds extra logging to this code path and implements a speculative fix. The event_base_loop calls' non-zero return code is never being logged. The libevent documentation states that this API will return 1 while the target has not yet provided any data. Theory is, because the call is local, that normally it returns with data even on the first dispatch case. However, during early system configuration, when the system is busy, that first dispatch does not complete immediately like it normally does later on. Speculation is, instead it returns a 1 stating retry but the existing code path treats that as a failure. This update modifies the code to return a PASS if the command dispatch returns a 1 while the error case of -1 gets enhanced logging and continues to be treated as a failure. Test Plan: PASS: Swact 5 times PASS: Lock/Unlock Host PASS: Large System DOR Related Bug: https://bugs.launchpad.net/starlingx/+bug/1791381 Change-Id: I19b22e07d3224b2e9dd3f3569ecbe9aed7d9402f Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
parent
74c5f89ab4
commit
316032b904
|
@ -1,3 +1,3 @@
|
|||
SRC_DIR="cgts-mtce-common-1.0"
|
||||
TIS_PATCH_VER=137
|
||||
TIS_PATCH_VER=138
|
||||
BUILD_IS_SLOW=5
|
||||
|
|
|
@ -112,6 +112,7 @@
|
|||
#define FIT_CODE__LOCK_HOST (31)
|
||||
#define FIT_CODE__FORCE_LOCK_HOST (32)
|
||||
#define FIT_CODE__UNLOCK_HOST (33)
|
||||
#define FIT_CODE__FAIL_SWACT (34)
|
||||
|
||||
#define FIT_CODE__FM_SET_ALARM (40)
|
||||
#define FIT_CODE__FM_GET_ALARM (41)
|
||||
|
|
|
@ -1344,8 +1344,8 @@ int daemon_load_fit ( void )
|
|||
{
|
||||
daemon_log_value ( "/var/run/fit/fithits", "hits =", __fit_info.hits );
|
||||
daemon_remove_file ( "/var/run/fit/fitdone" );
|
||||
daemon_rename_file ( FIT__INFO_FILEPATH, FIT__INFO_FILENAME, FIT__INFO_FILENAME_RENAMED );
|
||||
}
|
||||
daemon_rename_file ( FIT__INFO_FILEPATH, FIT__INFO_FILENAME, FIT__INFO_FILENAME_RENAMED );
|
||||
|
||||
#endif
|
||||
return (PASS);
|
||||
|
|
|
@ -451,7 +451,7 @@ int mtcHttpUtil_status ( libEvent & event )
|
|||
}
|
||||
default:
|
||||
{
|
||||
hlog3 ("%s Status: %d\n", event.hostname.c_str(), event.status );
|
||||
wlog ("%s Status: %d\n", event.hostname.c_str(), event.status );
|
||||
rc = event.status ;
|
||||
break;
|
||||
}
|
||||
|
@ -868,10 +868,47 @@ int mtcHttpUtil_api_request ( libEvent & event )
|
|||
hlog ("%s Dispatched (to:%d)\n", event.log_prefix.c_str(), event.timeout);
|
||||
}
|
||||
|
||||
/* TODO: Set a command timer to free up the resources
|
||||
* and deal with the error if the handler never runs */
|
||||
/*
|
||||
* non-blocking event_base_loop can return ...
|
||||
*
|
||||
* 0 - command complete ; data available
|
||||
* 1 - command dispatched but not complete ; no data available
|
||||
* -1 - error in dispatch ; check errno
|
||||
*
|
||||
*/
|
||||
event.active = true ;
|
||||
return (event_base_loop(event.base, EVLOOP_NONBLOCK));
|
||||
rc = event_base_loop(event.base, EVLOOP_NONBLOCK);
|
||||
|
||||
#ifdef WANT_FIT_TESTING
|
||||
string value = "" ;
|
||||
if ( daemon_want_fit ( FIT_CODE__FAIL_SWACT, event.hostname, "query", value ))
|
||||
{
|
||||
if ( value == "-1" )
|
||||
rc = -1 ;
|
||||
else
|
||||
rc = atoi(value.data());
|
||||
}
|
||||
#endif
|
||||
if (( rc == 0 ) || // Dispatched and done with Data ready
|
||||
( rc == 1 )) // Dispatched but no response yet
|
||||
{
|
||||
if (( event.request == SMGR_QUERY_SWACT ) ||
|
||||
( event.request == SMGR_START_SWACT ))
|
||||
{
|
||||
ilog ("%s dispatched%s\n",
|
||||
event.log_prefix.c_str(),
|
||||
rc ? "" : " ; data ready" );
|
||||
}
|
||||
rc = PASS ;
|
||||
}
|
||||
else
|
||||
{
|
||||
elog ("%s command dispatch failed (%d)\n",
|
||||
event.log_prefix.c_str(), errno );
|
||||
event.active = false ;
|
||||
rc = FAIL_REQUEST ;
|
||||
}
|
||||
return (rc);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -58,6 +58,8 @@ void nodeLinkClass::mtcSmgrApi_handler ( struct evhttp_request *req, void *arg )
|
|||
|
||||
mtcSmgrApi_handler_out:
|
||||
|
||||
mtcHttpUtil_log_event ( smgrEvent );
|
||||
|
||||
if ( smgrEvent.blocking == true )
|
||||
{
|
||||
mtcHttpUtil_free_conn ( smgrEvent );
|
||||
|
@ -120,6 +122,15 @@ int nodeLinkClass::mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, m
|
|||
elog ("%s failed to allocate libEvent memory (%d)\n", node_ptr->hostname.c_str(), rc );
|
||||
return (rc);
|
||||
}
|
||||
|
||||
#ifdef WANT_FIT_TESTING
|
||||
string value = "" ;
|
||||
if ( daemon_want_fit ( FIT_CODE__FAIL_SWACT, node_ptr->hostname, "port", value ))
|
||||
{
|
||||
smgrEvent.port = atoi(value.data());
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Set the common context of this new operation */
|
||||
smgrEvent.status = RETRY ;
|
||||
smgrEvent.hostname = node_ptr->hostname ;
|
||||
|
@ -143,7 +154,13 @@ int nodeLinkClass::mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, m
|
|||
ilog ("%s sending 'query services' request to HA Service Manager\n",
|
||||
smgrEvent.hostname.c_str());
|
||||
|
||||
return ( mtcHttpUtil_api_request ( smgrEvent )) ;
|
||||
rc = mtcHttpUtil_api_request ( smgrEvent ) ;
|
||||
if ( rc )
|
||||
{
|
||||
elog ("%s mtcHttpUtil_api_request (rc:%d)\n",
|
||||
node_ptr->hostname.c_str(), rc );
|
||||
}
|
||||
return ( rc ) ;
|
||||
}
|
||||
else if ( operation == CONTROLLER_SWACT )
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue