Merge "Mtce: Implement all token fetches as non-blocking operations."

This commit is contained in:
Zuul 2018-06-28 13:39:01 +00:00 committed by Gerrit Code Review
commit 4765055961
9 changed files with 334 additions and 114 deletions

View File

@ -103,6 +103,8 @@
#define FIT_CODE__NO_PULSE_REQUEST (15)
#define FIT_CODE__NO_PULSE_RESPONSE (16)
#define FIT_CODE__TOKEN (17)
#define FIT_CODE__FAST_PING_AUDIT_HOST (20)
#define FIT_CODE__FAST_PING_AUDIT_ALL (21)

View File

@ -911,7 +911,7 @@ int httpUtil_api_request ( libEvent & event )
event.status = RETRY ;
if ( event.blocking == true )
{
hlog ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout);
hlog ("%s Requested (blocking) (timeout:%d secs)\n", event.log_prefix.c_str(), event.timeout);
/* Send the message with timeout */
event_base_dispatch(event.base);
@ -920,7 +920,7 @@ int httpUtil_api_request ( libEvent & event )
}
else if ( event.request == KEYSTONE_GET_TOKEN )
{
httpUtil_event_info (event);
hlog ("%s Requested (non-blocking) (timeout:%d secs)\n", event.log_prefix.c_str(), event.timeout);
event.active = true ;
event.status = event_base_loop(event.base, EVLOOP_NONBLOCK);
httpUtil_latency_log ( event, label.c_str(), __LINE__, MAX_DELAY_B4_LATENCY_LOG ); /* Should be immediate ; non blocking */
@ -929,7 +929,7 @@ int httpUtil_api_request ( libEvent & event )
}
else
{
hlog ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout );
hlog ("%s Requested (blocking) (timeout:%d secs)\n", event.log_prefix.c_str(), event.timeout );
event_base_dispatch(event.base);
httpUtil_latency_log ( event, label.c_str(), __LINE__, MAX_DELAY_B4_LATENCY_LOG ) ;
goto httpUtil_api_request_done ;

View File

@ -46,7 +46,7 @@ using namespace std;
#define HTTP_SYSINV_CRIT_TIMEOUT (20)
#define HTTP_SYSINV_NONC_TIMEOUT (10)
#define HTTP_TOKEN_TIMEOUT (15)
#define HTTP_TOKEN_TIMEOUT (30)
#define HTTP_KEYSTONE_GET_TIMEOUT (10)
#define HTTP_SMGR_TIMEOUT (20)
#define HTTP_VIM_TIMEOUT (20)

View File

@ -30,6 +30,7 @@
#include "tokenUtil.h" /* for ... this module header */
#define GET_SERVICE_LIST_LABEL ((const char *)"OS-KSADM:services")
#define TOKEN_REFRESH_RETRY_DELAY (5)
/* The static token used for authentication by any
* daemon that includes this module */
@ -37,6 +38,301 @@ static keyToken_type __token__ ;
keyToken_type * tokenUtil_get_ptr ( void ) { return &__token__ ; };
keyToken_type tokenUtil_get_token ( void ) { return __token__ ; };
/* hold off for TOKEN_REFRESH_RETRY_DELAY seconds before trying again. */
static int __retries = 0 ;
static void __retry_holdoff( int delay )
{
for ( int i = 0 ; i < delay ; i++ )
{
daemon_signal_hdlr ();
__retries++ ;
sleep (1);
}
}
/***************************************************************************
*
* Name : tokenUtil_get_first
*
* Description: Get the first token. Should only be called at process startup
* time before entering the main loop.
*
* Assumptions: The token request is non-blocking but this interface does not
* return until a token has been acquired.
* The signal handler is serviced.
*
* Behavior : Issue non-blocking token request. If the request fails
* immediately the the far end did not connect and we backoff
* for RETRY_DELAY seconds. With a successful send we loop for
* up to 2 seconds longer than the specified timeout, Any failure
* results in a retry after RETRY_DELAY seconds.
* Return as soon as we get a token.
*
* Returns : nothing
*
***************************************************************************/
void tokenUtil_get_first ( libEvent & event, string & hostname )
{
int rc = FAIL ; /* error staus local variable */
bool got_token = false ; /* exit criteria when true */
bool blocking = false ; /* token request is non_blocking */
int log_throttle = 0 ; /* control var to prevent log flooding */
__retries = 0 ; /* count how long it took to get the token */
dlog ("%s Requesting initial token\n", hostname.c_str());
do
{
log_throttle = 0 ;
__token__.token.clear(); /* start with an empty token */
/* Issue the token request in non-blocking form */
rc = tokenUtil_new_token ( event, hostname, blocking );
#ifdef WANT_FIT_TESTING
string data = "" ;
if ( daemon_want_fit ( FIT_CODE__TOKEN , hostname, "fail_request", data ))
rc = atoi(data.data()) ;
#endif
if ( rc == PASS )
{
for ( int i = 0 ; i < (HTTP_TOKEN_TIMEOUT+2) ; i++ )
{
daemon_signal_hdlr ();
/* Look for the response */
if ( event.base )
{
/* look for the response */
event_base_loop( event.base, EVLOOP_NONBLOCK );
/* response is received once the active state is false */
if ( event.active )
{
ilog_throttled ( log_throttle, 20,
"%s waiting on token request completion loop:%d\n",
hostname.c_str(), i);
__retries++ ;
sleep (1);
}
else
{
#ifdef WANT_FIT_TESTING
string data = "" ;
if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname, "timeout", data ))
{
event.status = atoi(data.data());
__token__.token.clear();
}
#endif
if ( event.status == PASS )
{
ilog ("%s got token after %d seconds\n",
hostname.c_str(),
__retries );
got_token = true ;
break ; /* will exit below if there is a token */
}
else
{
/* report warning for timeout and error otherwise */
if ( event.status == FAIL_TIMEOUT )
{
wlog ("%s token request timeout after %d seconds\n",
hostname.c_str(),
__retries);
}
else
{
elog ("%s token request failed after %d seconds (rc:%d)",
hostname.c_str(),
__retries,
event.status );
}
/* check connection pointers ; for debug and failure rca */
if ( ( event.base == NULL ) || ( event.conn == NULL ))
{
wlog ("%s ... base:%p conn:%p retries:%d\n",
hostname.c_str(),
event.base,
event.conn,
__retries);
}
break ; /* will retry below */
} /* end else status fail */
} /* end else no active */
}
else
{
/* should not get here with a null base pointer
* but if we do then break and try again. */
slog ("%s unexpected null event base (%d) - retrying\n",
hostname.c_str(),
__retries );
break ; /* retry below */
}
}
/* Check for a response string */
if ( __token__.token.empty() )
{
elog ("%s no token ; %d second hold-off before retry\n",
hostname.c_str(), TOKEN_REFRESH_RETRY_DELAY);
__retry_holdoff(TOKEN_REFRESH_RETRY_DELAY);
}
}
else
{
elog ("%s token request failed (rc:%d:%d) active:%d\n",
hostname.c_str(),
rc,
event.status,
event.active );
__retry_holdoff(TOKEN_REFRESH_RETRY_DELAY);
}
httpUtil_free_conn ( event );
httpUtil_free_base ( event );
} while (( __token__.token.empty() ) || ( got_token == false )) ;
dlog ("%s took %d seconds to get token\n", hostname.c_str(), __retries );
/* wait 5 seconds for sysinv to be ready if the number of retries > 1 */
if ( __retries > 1 )
{
__retry_holdoff (3);
}
}
/****************************************************************************
*
* Name : tokenUtil_manage_token
*
* Purpose : Manage token refresh and failure retries
*
* Description: There should always be an active token refresh timer running.
* If there is none (swerr) then one is started.
*
* Any maintenance daemon that needs to periodically refresh its
* token must periodicslly call this API as part of its main loop.
*
* All error conditions are handled with a small hold-off retry
* by timer rater than inline wait like in get_first.
*
* Returns : Nothing
*
***************************************************************************/
void tokenUtil_manage_token ( libEvent & event,
string & hostname,
int & refresh_rate,
struct mtc_timer & token_refresh_timer,
void (*handler)(int, siginfo_t*, void*))
{
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname , "corrupt" ))
tokenUtil_fail_token ();
#endif
if ( token_refresh_timer.ring == true )
{
bool blocking = false ; /* token request is non_blocking */
int _rr = refresh_rate ; /* local copy of refresh rate */
dlog ("%s renewing token\n", hostname.c_str());
/* this is a non-blocking call with the 'false' spec */
int rc = tokenUtil_new_token ( event, hostname, blocking );
if ( rc )
{
/* go for a retry by delayed refresh if the request fails */
__token__.delay = true ;
}
#ifdef WANT_FIT_TESTING
else if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname, "null_base" ))
httpUtil_free_base ( event );
#endif
if ( __token__.delay == true )
{
__token__.delay = false ;
_rr = TOKEN_REFRESH_RETRY_DELAY ;
}
mtcTimer_start(token_refresh_timer,handler,_rr );
}
else if ( token_refresh_timer.active == false )
{
slog ("%s no active token refresh timer ; starting new\n", hostname.c_str());
mtcTimer_start(token_refresh_timer,handler,TOKEN_REFRESH_RETRY_DELAY);
}
else if ( __token__.delay == true )
{
ilog ( "Token Refresh in %d seconds\n", TOKEN_REFRESH_RETRY_DELAY );
mtcTimer_stop ( token_refresh_timer );
__token__.delay = false ;
/* force refresh of token in 5 seconds */
mtcTimer_start(token_refresh_timer,handler,TOKEN_REFRESH_RETRY_DELAY);
}
else if ( event.active == true )
{
/* Look for the response */
if ( event.base )
{
event_base_loop( event.base, EVLOOP_NONBLOCK );
}
else
{
/* should not get here. event active while base is null
* try and recover from this error case. */
__token__.delay = true ;
event.active = false ;
}
}
else if ( event.base )
{
#ifdef WANT_FIT_TESTING
string data = "" ;
if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname, "refresh", data ))
__token__.token.clear();
#endif
/* Check for a response string */
if ( __token__.token.empty() )
{
elog ("%s no token ; %d second hold-off before retry\n",
hostname.c_str(), TOKEN_REFRESH_RETRY_DELAY );
/* force refresh of token in 5 seconds */
mtcTimer_reset(token_refresh_timer);
mtcTimer_start(token_refresh_timer,handler,TOKEN_REFRESH_RETRY_DELAY);
}
dlog ("%s freeing token event base and conn data\n", hostname.c_str());
httpUtil_free_conn ( event );
httpUtil_free_base ( event );
}
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname, "cancel_timer" ))
mtcTimer_reset ( token_refresh_timer );
#endif
}
void tokenUtil_log_refresh ( void )
{
if ( __token__.refreshed == true )
@ -58,7 +354,7 @@ int tokenUtil_token_refresh ( libEvent & event, string hostname )
if ( event.status != PASS )
{
event.status = tokenUtil_new_token( event, hostname );
event.status = tokenUtil_new_token( event, hostname );
}
else
{
@ -148,7 +444,7 @@ string _get_keystone_prefix_path ( )
int tokenUtil_handler ( libEvent & event )
{
jsonUtil_auth_type info ;
string hn = event.hostname ;
int rc = event.status ;
@ -185,6 +481,7 @@ int tokenUtil_handler ( libEvent & event )
token_ptr->token = token_str ;
token_ptr->url = info.adminURL ;
token_ptr->refreshed = true ;
}
}
else if ( event.request == KEYSTONE_GET_ENDPOINT_LIST )
@ -288,6 +585,7 @@ int tokenUtil_handler ( libEvent & event )
event.information.c_str(),
event.label.c_str());
}
event.active = false ;
return (event.status);
}
else if ( event.request == KEYSTONE_GET_SERVICE_LIST )
@ -393,6 +691,7 @@ int tokenUtil_handler ( libEvent & event )
{
dlog ("%s Token Refresh O.K.\n", event.hostname.c_str());
}
event.active = false ;
return (rc);
}
@ -403,7 +702,7 @@ void tokenUtil_fail_token ( void )
}
/* fetches an authorization token as a blocking request */
int tokenUtil_new_token ( libEvent & event, string hostname )
int tokenUtil_new_token ( libEvent & event, string hostname, bool blocking )
{
ilog ("%s Requesting Authentication Token\n", hostname.c_str());
@ -418,7 +717,8 @@ int tokenUtil_new_token ( libEvent & event, string hostname )
dlog ("%s fetching new token\n", event.hostname.c_str());
event.prefix_path = _get_keystone_prefix_path();
event.blocking = true ;
event.blocking = blocking ;
// event.blocking = true ;
event.request = KEYSTONE_GET_TOKEN ;
event.operation = "get new" ;
event.type = EVHTTP_REQ_POST ;

View File

@ -42,7 +42,8 @@ keyToken_type * tokenUtil_get_ptr ( void );
keyToken_type tokenUtil_get_token ( void );
int tokenUtil_handler ( libEvent & event );
int tokenUtil_new_token ( libEvent & event, string hostname );
int tokenUtil_new_token ( libEvent & event, string hostname, bool blocking=true );
void tokenUtil_get_first ( libEvent & event, string & hostname );
int tokenUtil_token_refresh( libEvent & event, string hostname );
int tokenUtil_get_endpoints( libEvent & event, string service_uuid );
string tokenUtil_get_svc_uuid ( libEvent & event, string service_name );
@ -50,9 +51,15 @@ string tokenUtil_get_svc_uuid ( libEvent & event, string service_name )
void tokenUtil_fail_token ( void );
void tokenUtil_log_refresh ( void );
int keystone_config_handler ( void * user,
int keystone_config_handler ( void * user,
const char * section,
const char * name,
const char * value);
void tokenUtil_manage_token ( libEvent & event,
string & hostname,
int & refresh_rate,
struct mtc_timer & token_refresh_timer,
void (*handler)(int, siginfo_t*, void*));
#endif /* __INCLUDE_TOKENUTIL_H__ */

View File

@ -253,7 +253,6 @@ void hwmon_service ( hwmon_ctrl_type * ctrl_ptr )
daemon_config_type * config_ptr = daemon_get_cfg_ptr();
hwmon_socket_type * sock_ptr = getSock_ptr();
keyToken_type * token_ptr = tokenUtil_get_ptr();
hostInv.hostBase.my_hostname = ctrl_ptr->my_hostname ;
hostInv.hostBase.my_local_ip = ctrl_ptr->my_local_ip ;
@ -362,37 +361,11 @@ void hwmon_service ( hwmon_ctrl_type * ctrl_ptr )
/* Handle refreshing the authentication token */
tokenUtil_log_refresh ();
if ( hwmonTimer_token.ring == true )
{
hwmonTimer_token.ring = false ;
tokenUtil_new_token ( ctrl_ptr->httpEvent, ctrl_ptr->my_hostname );
/* If the token fetch fails then we retry in a shorter interval */
if ( token_ptr->delay == true )
{
mtcTimer_start( hwmonTimer_token, hwmonTimer_handler, 5 );
token_ptr->delay = false ;
}
/* otherwise at the normal refresh rate */
else
{
mtcTimer_start ( hwmonTimer_token, hwmonTimer_handler, config_ptr->token_refresh_rate ) ;
}
}
/* handle the spontaneous error case */
else if ( token_ptr->delay == true )
{
mtcTimer_stop ( hwmonTimer_token );
mtcTimer_start( hwmonTimer_token, hwmonTimer_handler, 5 );
token_ptr->delay = false ;
}
/* Always ensure that the token refesh timer is running */
if ( hwmonTimer_token.tid == NULL )
{
slog ("auto-restarting dead token refresh timer\n");
mtcTimer_start ( hwmonTimer_token, hwmonTimer_handler, config_ptr->token_refresh_rate ) ;
}
tokenUtil_manage_token ( ctrl_ptr->httpEvent,
ctrl_ptr->my_hostname,
config_ptr->token_refresh_rate,
hwmonTimer_token,
hwmonTimer_handler );
/* Run the FSM */
hostInv.hwmon_fsm ( ) ;

View File

@ -371,30 +371,14 @@ void daemon_service_run ( void )
}
}
/* Get an Authentication Token */
ilog ("%s Requesting initial token\n", hwmon_ctrl.my_hostname.c_str() );
do
{
// rc = mtcInv.mtcKeyApi_get_token ( mtcInv.my_hostname );
rc = tokenUtil_new_token ( hwmon_ctrl.httpEvent, hwmon_ctrl.my_hostname );
if ( rc )
{
elog ("Failed to get authentication token (%d)\n", rc );
sleep (2);
}
if ( hwmon_ctrl.httpEvent.base )
{
slog ("%s token base:%p\n",
hwmon_ctrl.my_hostname.c_str(),
hwmon_ctrl.httpEvent.base );
}
} while ( rc != PASS ) ;
/* Get the initial token.
* This call does not return until a token is received */
tokenUtil_get_first ( hwmon_ctrl.httpEvent, hwmon_ctrl.my_hostname );
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__HWMON__CORRUPT_TOKEN ))
{
tokenUtil_fail_token ();
}
#endif
/* enable the base level signal handler latency monitor */
daemon_latency_monitor (true);

View File

@ -929,25 +929,9 @@ int _self_provision ( void )
daemon_exit();
}
/* Get an Authentication Token */
ilog ("%s Requesting initial token\n", mtcInv.my_hostname.c_str() );
do
{
rc = tokenUtil_new_token ( mtcInv.tokenEvent, mtcInv.my_hostname );
if ( rc )
{
elog ("Failed to get authentication token (%d)\n", rc );
}
/* Give system inventory some time after start-up.
* We see frequent first request failures requiring retry when this
* wait was inside the above if clause. Moving it here ensure there
* is always a small wait after the token fetch on process startup. */
daemon_signal_hdlr ();
sleep (5);
daemon_signal_hdlr ();
} while ( rc != PASS ) ;
/* Get the initial token.
* This call does not return until a token is received */
tokenUtil_get_first ( mtcInv.tokenEvent, mtcInv.my_hostname );
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__CORRUPT_TOKEN, mtcInv.my_hostname ))
@ -1296,6 +1280,7 @@ void daemon_service_run ( void )
/* Run Maintenance service forever */
for ( ; ; )
{
daemon_signal_hdlr ();
/**
* Can't just run 'mtcHttpSvr_look' off select as it is seen to miss events.
* Would like to use event_base_loopexit with event_base_loopcontinue
@ -1304,11 +1289,11 @@ void daemon_service_run ( void )
* mtcHttpSvr_work ( mtce_event );
**/
mtcHttpSvr_look ( mtce_event );
tokenUtil_manage_token ( mtcInv.tokenEvent, mtcInv.my_hostname, mtcInv.token_refresh_rate, mtcInv.mtcTimer_token, mtcTimer_handler );
tokenUtil_log_refresh ();
if ( mtcInv_ptr->num_hosts () == 0 )
{
// mtcHttpSvr_look ( mtce_event );
sleep (1);
continue ;
}

View File

@ -35,8 +35,6 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
{
int rc = PASS ;
keyToken_type * __token_ptr = tokenUtil_get_ptr();
if ( node_ptr == NULL )
{
slog ("Null Node Pointer\n");
@ -92,35 +90,6 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
*/
nodeLinkClass::online_handler ( node_ptr );
if ( this->mtcTimer_token.ring == true )
{
dlog ("%s renewing token\n", my_hostname.c_str());
/* this is a blocking call */
tokenUtil_new_token ( tokenEvent, my_hostname );
if ( __token_ptr->delay == true )
{
__token_ptr->delay = false ;
mtcTimer_start( mtcTimer_token, mtcTimer_handler, 5 );
}
else
{
mtcTimer_start ( mtcTimer_token, mtcTimer_handler, token_refresh_rate );
}
}
else if ( __token_ptr->delay == true )
{
ilog ( "Token Refresh in 5 seconds\n" );
if ( mtcTimer_token.tid )
mtcTimer_stop ( mtcTimer_token );
__token_ptr->delay = false ;
/* force refresh of token in 5 seconds */
mtcTimer_start( mtcTimer_token, mtcTimer_handler, 5 );
}
if ( node_ptr->adminAction == MTC_ADMIN_ACTION__DELETE )
{
flog ("%s -> Delete Action\n", node_ptr->hostname.c_str());