diff --git a/mtce-common/cgts-mtce-common-1.0/common/fitCodes.h b/mtce-common/cgts-mtce-common-1.0/common/fitCodes.h index 2f041d5e..74a9d2b8 100644 --- a/mtce-common/cgts-mtce-common-1.0/common/fitCodes.h +++ b/mtce-common/cgts-mtce-common-1.0/common/fitCodes.h @@ -103,6 +103,8 @@ #define FIT_CODE__NO_PULSE_REQUEST (15) #define FIT_CODE__NO_PULSE_RESPONSE (16) +#define FIT_CODE__TOKEN (17) + #define FIT_CODE__FAST_PING_AUDIT_HOST (20) #define FIT_CODE__FAST_PING_AUDIT_ALL (21) diff --git a/mtce-common/cgts-mtce-common-1.0/common/httpUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/httpUtil.cpp index e626961e..1370ecc8 100644 --- a/mtce-common/cgts-mtce-common-1.0/common/httpUtil.cpp +++ b/mtce-common/cgts-mtce-common-1.0/common/httpUtil.cpp @@ -911,7 +911,7 @@ int httpUtil_api_request ( libEvent & event ) event.status = RETRY ; if ( event.blocking == true ) { - hlog ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout); + hlog ("%s Requested (blocking) (timeout:%d secs)\n", event.log_prefix.c_str(), event.timeout); /* Send the message with timeout */ event_base_dispatch(event.base); @@ -920,7 +920,7 @@ int httpUtil_api_request ( libEvent & event ) } else if ( event.request == KEYSTONE_GET_TOKEN ) { - httpUtil_event_info (event); + hlog ("%s Requested (non-blocking) (timeout:%d secs)\n", event.log_prefix.c_str(), event.timeout); event.active = true ; event.status = event_base_loop(event.base, EVLOOP_NONBLOCK); httpUtil_latency_log ( event, label.c_str(), __LINE__, MAX_DELAY_B4_LATENCY_LOG ); /* Should be immediate ; non blocking */ @@ -929,7 +929,7 @@ int httpUtil_api_request ( libEvent & event ) } else { - hlog ("%s Requested (blocking) (to:%d)\n", event.log_prefix.c_str(), event.timeout ); + hlog ("%s Requested (blocking) (timeout:%d secs)\n", event.log_prefix.c_str(), event.timeout ); event_base_dispatch(event.base); httpUtil_latency_log ( event, label.c_str(), __LINE__, MAX_DELAY_B4_LATENCY_LOG ) ; goto httpUtil_api_request_done ; diff --git a/mtce-common/cgts-mtce-common-1.0/common/httpUtil.h b/mtce-common/cgts-mtce-common-1.0/common/httpUtil.h index 294f2a51..f39c460d 100644 --- a/mtce-common/cgts-mtce-common-1.0/common/httpUtil.h +++ b/mtce-common/cgts-mtce-common-1.0/common/httpUtil.h @@ -46,7 +46,7 @@ using namespace std; #define HTTP_SYSINV_CRIT_TIMEOUT (20) #define HTTP_SYSINV_NONC_TIMEOUT (10) -#define HTTP_TOKEN_TIMEOUT (15) +#define HTTP_TOKEN_TIMEOUT (30) #define HTTP_KEYSTONE_GET_TIMEOUT (10) #define HTTP_SMGR_TIMEOUT (20) #define HTTP_VIM_TIMEOUT (20) diff --git a/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.cpp b/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.cpp index 0613839e..251ecf56 100644 --- a/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.cpp +++ b/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.cpp @@ -30,6 +30,7 @@ #include "tokenUtil.h" /* for ... this module header */ #define GET_SERVICE_LIST_LABEL ((const char *)"OS-KSADM:services") +#define TOKEN_REFRESH_RETRY_DELAY (5) /* The static token used for authentication by any * daemon that includes this module */ @@ -37,6 +38,301 @@ static keyToken_type __token__ ; keyToken_type * tokenUtil_get_ptr ( void ) { return &__token__ ; }; keyToken_type tokenUtil_get_token ( void ) { return __token__ ; }; + +/* hold off for TOKEN_REFRESH_RETRY_DELAY seconds before trying again. */ +static int __retries = 0 ; +static void __retry_holdoff( int delay ) +{ + for ( int i = 0 ; i < delay ; i++ ) + { + daemon_signal_hdlr (); + __retries++ ; + sleep (1); + } +} + +/*************************************************************************** + * + * Name : tokenUtil_get_first + * + * Description: Get the first token. Should only be called at process startup + * time before entering the main loop. + * + * Assumptions: The token request is non-blocking but this interface does not + * return until a token has been acquired. + * The signal handler is serviced. + * + * Behavior : Issue non-blocking token request. If the request fails + * immediately the the far end did not connect and we backoff + * for RETRY_DELAY seconds. With a successful send we loop for + * up to 2 seconds longer than the specified timeout, Any failure + * results in a retry after RETRY_DELAY seconds. + * Return as soon as we get a token. + * + * Returns : nothing + * + ***************************************************************************/ +void tokenUtil_get_first ( libEvent & event, string & hostname ) +{ + int rc = FAIL ; /* error staus local variable */ + bool got_token = false ; /* exit criteria when true */ + bool blocking = false ; /* token request is non_blocking */ + int log_throttle = 0 ; /* control var to prevent log flooding */ + + __retries = 0 ; /* count how long it took to get the token */ + + dlog ("%s Requesting initial token\n", hostname.c_str()); + + do + { + log_throttle = 0 ; + + __token__.token.clear(); /* start with an empty token */ + + /* Issue the token request in non-blocking form */ + rc = tokenUtil_new_token ( event, hostname, blocking ); + +#ifdef WANT_FIT_TESTING + string data = "" ; + if ( daemon_want_fit ( FIT_CODE__TOKEN , hostname, "fail_request", data )) + rc = atoi(data.data()) ; +#endif + + if ( rc == PASS ) + { + for ( int i = 0 ; i < (HTTP_TOKEN_TIMEOUT+2) ; i++ ) + { + daemon_signal_hdlr (); + + /* Look for the response */ + if ( event.base ) + { + /* look for the response */ + event_base_loop( event.base, EVLOOP_NONBLOCK ); + + /* response is received once the active state is false */ + if ( event.active ) + { + ilog_throttled ( log_throttle, 20, + "%s waiting on token request completion loop:%d\n", + hostname.c_str(), i); + __retries++ ; + sleep (1); + } + else + { + +#ifdef WANT_FIT_TESTING + string data = "" ; + if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname, "timeout", data )) + { + event.status = atoi(data.data()); + __token__.token.clear(); + } +#endif + + if ( event.status == PASS ) + { + ilog ("%s got token after %d seconds\n", + hostname.c_str(), + __retries ); + got_token = true ; + break ; /* will exit below if there is a token */ + } + else + { + /* report warning for timeout and error otherwise */ + if ( event.status == FAIL_TIMEOUT ) + { + wlog ("%s token request timeout after %d seconds\n", + hostname.c_str(), + __retries); + } + else + { + elog ("%s token request failed after %d seconds (rc:%d)", + hostname.c_str(), + __retries, + event.status ); + } + + /* check connection pointers ; for debug and failure rca */ + if ( ( event.base == NULL ) || ( event.conn == NULL )) + { + wlog ("%s ... base:%p conn:%p retries:%d\n", + hostname.c_str(), + event.base, + event.conn, + __retries); + } + break ; /* will retry below */ + } /* end else status fail */ + } /* end else no active */ + } + else + { + /* should not get here with a null base pointer + * but if we do then break and try again. */ + slog ("%s unexpected null event base (%d) - retrying\n", + hostname.c_str(), + __retries ); + + break ; /* retry below */ + } + } + + /* Check for a response string */ + if ( __token__.token.empty() ) + { + elog ("%s no token ; %d second hold-off before retry\n", + hostname.c_str(), TOKEN_REFRESH_RETRY_DELAY); + __retry_holdoff(TOKEN_REFRESH_RETRY_DELAY); + } + } + else + { + elog ("%s token request failed (rc:%d:%d) active:%d\n", + hostname.c_str(), + rc, + event.status, + event.active ); + + __retry_holdoff(TOKEN_REFRESH_RETRY_DELAY); + } + + httpUtil_free_conn ( event ); + httpUtil_free_base ( event ); + + } while (( __token__.token.empty() ) || ( got_token == false )) ; + + dlog ("%s took %d seconds to get token\n", hostname.c_str(), __retries ); + + /* wait 5 seconds for sysinv to be ready if the number of retries > 1 */ + if ( __retries > 1 ) + { + __retry_holdoff (3); + } +} + +/**************************************************************************** + * + * Name : tokenUtil_manage_token + * + * Purpose : Manage token refresh and failure retries + * + * Description: There should always be an active token refresh timer running. + * If there is none (swerr) then one is started. + * + * Any maintenance daemon that needs to periodically refresh its + * token must periodicslly call this API as part of its main loop. + * + * All error conditions are handled with a small hold-off retry + * by timer rater than inline wait like in get_first. + * + * Returns : Nothing + * + ***************************************************************************/ +void tokenUtil_manage_token ( libEvent & event, + string & hostname, + int & refresh_rate, + struct mtc_timer & token_refresh_timer, + void (*handler)(int, siginfo_t*, void*)) +{ + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname , "corrupt" )) + tokenUtil_fail_token (); +#endif + + if ( token_refresh_timer.ring == true ) + { + bool blocking = false ; /* token request is non_blocking */ + int _rr = refresh_rate ; /* local copy of refresh rate */ + + dlog ("%s renewing token\n", hostname.c_str()); + + /* this is a non-blocking call with the 'false' spec */ + int rc = tokenUtil_new_token ( event, hostname, blocking ); + if ( rc ) + { + /* go for a retry by delayed refresh if the request fails */ + __token__.delay = true ; + } + +#ifdef WANT_FIT_TESTING + else if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname, "null_base" )) + httpUtil_free_base ( event ); +#endif + + if ( __token__.delay == true ) + { + __token__.delay = false ; + _rr = TOKEN_REFRESH_RETRY_DELAY ; + } + mtcTimer_start(token_refresh_timer,handler,_rr ); + } + else if ( token_refresh_timer.active == false ) + { + slog ("%s no active token refresh timer ; starting new\n", hostname.c_str()); + mtcTimer_start(token_refresh_timer,handler,TOKEN_REFRESH_RETRY_DELAY); + } + else if ( __token__.delay == true ) + { + ilog ( "Token Refresh in %d seconds\n", TOKEN_REFRESH_RETRY_DELAY ); + mtcTimer_stop ( token_refresh_timer ); + + __token__.delay = false ; + + /* force refresh of token in 5 seconds */ + mtcTimer_start(token_refresh_timer,handler,TOKEN_REFRESH_RETRY_DELAY); + } + else if ( event.active == true ) + { + /* Look for the response */ + if ( event.base ) + { + event_base_loop( event.base, EVLOOP_NONBLOCK ); + } + else + { + /* should not get here. event active while base is null + * try and recover from this error case. */ + __token__.delay = true ; + event.active = false ; + } + } + else if ( event.base ) + { + +#ifdef WANT_FIT_TESTING + string data = "" ; + if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname, "refresh", data )) + __token__.token.clear(); +#endif + + /* Check for a response string */ + if ( __token__.token.empty() ) + { + elog ("%s no token ; %d second hold-off before retry\n", + hostname.c_str(), TOKEN_REFRESH_RETRY_DELAY ); + + /* force refresh of token in 5 seconds */ + mtcTimer_reset(token_refresh_timer); + mtcTimer_start(token_refresh_timer,handler,TOKEN_REFRESH_RETRY_DELAY); + } + + dlog ("%s freeing token event base and conn data\n", hostname.c_str()); + httpUtil_free_conn ( event ); + httpUtil_free_base ( event ); + } + +#ifdef WANT_FIT_TESTING + if ( daemon_want_fit ( FIT_CODE__TOKEN, hostname, "cancel_timer" )) + mtcTimer_reset ( token_refresh_timer ); +#endif +} + + void tokenUtil_log_refresh ( void ) { if ( __token__.refreshed == true ) @@ -58,7 +354,7 @@ int tokenUtil_token_refresh ( libEvent & event, string hostname ) if ( event.status != PASS ) { - event.status = tokenUtil_new_token( event, hostname ); + event.status = tokenUtil_new_token( event, hostname ); } else { @@ -148,7 +444,7 @@ string _get_keystone_prefix_path ( ) int tokenUtil_handler ( libEvent & event ) { jsonUtil_auth_type info ; - + string hn = event.hostname ; int rc = event.status ; @@ -185,6 +481,7 @@ int tokenUtil_handler ( libEvent & event ) token_ptr->token = token_str ; token_ptr->url = info.adminURL ; token_ptr->refreshed = true ; + } } else if ( event.request == KEYSTONE_GET_ENDPOINT_LIST ) @@ -288,6 +585,7 @@ int tokenUtil_handler ( libEvent & event ) event.information.c_str(), event.label.c_str()); } + event.active = false ; return (event.status); } else if ( event.request == KEYSTONE_GET_SERVICE_LIST ) @@ -393,6 +691,7 @@ int tokenUtil_handler ( libEvent & event ) { dlog ("%s Token Refresh O.K.\n", event.hostname.c_str()); } + event.active = false ; return (rc); } @@ -403,7 +702,7 @@ void tokenUtil_fail_token ( void ) } /* fetches an authorization token as a blocking request */ -int tokenUtil_new_token ( libEvent & event, string hostname ) +int tokenUtil_new_token ( libEvent & event, string hostname, bool blocking ) { ilog ("%s Requesting Authentication Token\n", hostname.c_str()); @@ -418,7 +717,8 @@ int tokenUtil_new_token ( libEvent & event, string hostname ) dlog ("%s fetching new token\n", event.hostname.c_str()); event.prefix_path = _get_keystone_prefix_path(); - event.blocking = true ; + event.blocking = blocking ; + // event.blocking = true ; event.request = KEYSTONE_GET_TOKEN ; event.operation = "get new" ; event.type = EVHTTP_REQ_POST ; diff --git a/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.h b/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.h index 1937ddc0..7e269561 100644 --- a/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.h +++ b/mtce-common/cgts-mtce-common-1.0/common/tokenUtil.h @@ -42,7 +42,8 @@ keyToken_type * tokenUtil_get_ptr ( void ); keyToken_type tokenUtil_get_token ( void ); int tokenUtil_handler ( libEvent & event ); -int tokenUtil_new_token ( libEvent & event, string hostname ); +int tokenUtil_new_token ( libEvent & event, string hostname, bool blocking=true ); +void tokenUtil_get_first ( libEvent & event, string & hostname ); int tokenUtil_token_refresh( libEvent & event, string hostname ); int tokenUtil_get_endpoints( libEvent & event, string service_uuid ); string tokenUtil_get_svc_uuid ( libEvent & event, string service_name ); @@ -50,9 +51,15 @@ string tokenUtil_get_svc_uuid ( libEvent & event, string service_name ) void tokenUtil_fail_token ( void ); void tokenUtil_log_refresh ( void ); -int keystone_config_handler ( void * user, +int keystone_config_handler ( void * user, const char * section, const char * name, const char * value); +void tokenUtil_manage_token ( libEvent & event, + string & hostname, + int & refresh_rate, + struct mtc_timer & token_refresh_timer, + void (*handler)(int, siginfo_t*, void*)); + #endif /* __INCLUDE_TOKENUTIL_H__ */ diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHdlr.cpp index d4369e22..e4d41f54 100644 --- a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHdlr.cpp +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonHdlr.cpp @@ -253,7 +253,6 @@ void hwmon_service ( hwmon_ctrl_type * ctrl_ptr ) daemon_config_type * config_ptr = daemon_get_cfg_ptr(); hwmon_socket_type * sock_ptr = getSock_ptr(); - keyToken_type * token_ptr = tokenUtil_get_ptr(); hostInv.hostBase.my_hostname = ctrl_ptr->my_hostname ; hostInv.hostBase.my_local_ip = ctrl_ptr->my_local_ip ; @@ -362,37 +361,11 @@ void hwmon_service ( hwmon_ctrl_type * ctrl_ptr ) /* Handle refreshing the authentication token */ tokenUtil_log_refresh (); - if ( hwmonTimer_token.ring == true ) - { - hwmonTimer_token.ring = false ; - tokenUtil_new_token ( ctrl_ptr->httpEvent, ctrl_ptr->my_hostname ); - - /* If the token fetch fails then we retry in a shorter interval */ - if ( token_ptr->delay == true ) - { - mtcTimer_start( hwmonTimer_token, hwmonTimer_handler, 5 ); - token_ptr->delay = false ; - } - /* otherwise at the normal refresh rate */ - else - { - mtcTimer_start ( hwmonTimer_token, hwmonTimer_handler, config_ptr->token_refresh_rate ) ; - } - } - /* handle the spontaneous error case */ - else if ( token_ptr->delay == true ) - { - mtcTimer_stop ( hwmonTimer_token ); - mtcTimer_start( hwmonTimer_token, hwmonTimer_handler, 5 ); - token_ptr->delay = false ; - } - - /* Always ensure that the token refesh timer is running */ - if ( hwmonTimer_token.tid == NULL ) - { - slog ("auto-restarting dead token refresh timer\n"); - mtcTimer_start ( hwmonTimer_token, hwmonTimer_handler, config_ptr->token_refresh_rate ) ; - } + tokenUtil_manage_token ( ctrl_ptr->httpEvent, + ctrl_ptr->my_hostname, + config_ptr->token_refresh_rate, + hwmonTimer_token, + hwmonTimer_handler ); /* Run the FSM */ hostInv.hwmon_fsm ( ) ; diff --git a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonInit.cpp b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonInit.cpp index 4e57ff94..92174004 100644 --- a/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonInit.cpp +++ b/mtce-common/cgts-mtce-common-1.0/hwmon/hwmonInit.cpp @@ -371,30 +371,14 @@ void daemon_service_run ( void ) } } - /* Get an Authentication Token */ - ilog ("%s Requesting initial token\n", hwmon_ctrl.my_hostname.c_str() ); - do - { - // rc = mtcInv.mtcKeyApi_get_token ( mtcInv.my_hostname ); - rc = tokenUtil_new_token ( hwmon_ctrl.httpEvent, hwmon_ctrl.my_hostname ); - if ( rc ) - { - elog ("Failed to get authentication token (%d)\n", rc ); - sleep (2); - } - if ( hwmon_ctrl.httpEvent.base ) - { - slog ("%s token base:%p\n", - hwmon_ctrl.my_hostname.c_str(), - hwmon_ctrl.httpEvent.base ); - } - - } while ( rc != PASS ) ; + /* Get the initial token. + * This call does not return until a token is received */ + tokenUtil_get_first ( hwmon_ctrl.httpEvent, hwmon_ctrl.my_hostname ); +#ifdef WANT_FIT_TESTING if ( daemon_want_fit ( FIT_CODE__HWMON__CORRUPT_TOKEN )) - { tokenUtil_fail_token (); - } +#endif /* enable the base level signal handler latency monitor */ daemon_latency_monitor (true); diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeCtrl.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeCtrl.cpp index e288c3d4..9fc79453 100755 --- a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeCtrl.cpp +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeCtrl.cpp @@ -929,25 +929,9 @@ int _self_provision ( void ) daemon_exit(); } - /* Get an Authentication Token */ - ilog ("%s Requesting initial token\n", mtcInv.my_hostname.c_str() ); - do - { - rc = tokenUtil_new_token ( mtcInv.tokenEvent, mtcInv.my_hostname ); - if ( rc ) - { - elog ("Failed to get authentication token (%d)\n", rc ); - } - - /* Give system inventory some time after start-up. - * We see frequent first request failures requiring retry when this - * wait was inside the above if clause. Moving it here ensure there - * is always a small wait after the token fetch on process startup. */ - daemon_signal_hdlr (); - sleep (5); - daemon_signal_hdlr (); - - } while ( rc != PASS ) ; + /* Get the initial token. + * This call does not return until a token is received */ + tokenUtil_get_first ( mtcInv.tokenEvent, mtcInv.my_hostname ); #ifdef WANT_FIT_TESTING if ( daemon_want_fit ( FIT_CODE__CORRUPT_TOKEN, mtcInv.my_hostname )) @@ -1299,6 +1283,7 @@ void daemon_service_run ( void ) /* Run Maintenance service forever */ for ( ; ; ) { + daemon_signal_hdlr (); /** * Can't just run 'mtcHttpSvr_look' off select as it is seen to miss events. * Would like to use event_base_loopexit with event_base_loopcontinue @@ -1307,11 +1292,11 @@ void daemon_service_run ( void ) * mtcHttpSvr_work ( mtce_event ); **/ mtcHttpSvr_look ( mtce_event ); + tokenUtil_manage_token ( mtcInv.tokenEvent, mtcInv.my_hostname, mtcInv.token_refresh_rate, mtcInv.mtcTimer_token, mtcTimer_handler ); tokenUtil_log_refresh (); if ( mtcInv_ptr->num_hosts () == 0 ) { - // mtcHttpSvr_look ( mtce_event ); sleep (1); continue ; } diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp index 493b356e..8ee70593 100755 --- a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp @@ -35,8 +35,6 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr ) { int rc = PASS ; - keyToken_type * __token_ptr = tokenUtil_get_ptr(); - if ( node_ptr == NULL ) { slog ("Null Node Pointer\n"); @@ -92,35 +90,6 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr ) */ nodeLinkClass::online_handler ( node_ptr ); - if ( this->mtcTimer_token.ring == true ) - { - dlog ("%s renewing token\n", my_hostname.c_str()); - - /* this is a blocking call */ - tokenUtil_new_token ( tokenEvent, my_hostname ); - - if ( __token_ptr->delay == true ) - { - __token_ptr->delay = false ; - mtcTimer_start( mtcTimer_token, mtcTimer_handler, 5 ); - } - else - { - mtcTimer_start ( mtcTimer_token, mtcTimer_handler, token_refresh_rate ); - } - } - else if ( __token_ptr->delay == true ) - { - ilog ( "Token Refresh in 5 seconds\n" ); - if ( mtcTimer_token.tid ) - mtcTimer_stop ( mtcTimer_token ); - - __token_ptr->delay = false ; - - /* force refresh of token in 5 seconds */ - mtcTimer_start( mtcTimer_token, mtcTimer_handler, 5 ); - } - if ( node_ptr->adminAction == MTC_ADMIN_ACTION__DELETE ) { flog ("%s -> Delete Action\n", node_ptr->hostname.c_str());