Merge "Mtce: Avoid running subfunction FSM for AIO-DX compute only hosts"

This commit is contained in:
Zuul 2018-06-28 13:39:02 +00:00 committed by Gerrit Code Review
commit 25ee1b19a4
5 changed files with 121 additions and 101 deletions

View File

@ -2609,7 +2609,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -2720,7 +2720,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -2737,7 +2737,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -2755,7 +2755,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -2773,12 +2773,12 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
}
;
adminActionChange ( node_ptr, MTC_ADMIN_ACTION__REBOOT );
}
else if ( !inv.action.compare ("reset") && validStates )
@ -2791,7 +2791,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -2842,7 +2842,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = MTC_OPER_STATE__DISABLED ;
node_ptr->availStatus = MTC_AVAIL_STATUS__OFFLINE ;
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = MTC_OPER_STATE__DISABLED ;
node_ptr->availStatus_subf = MTC_AVAIL_STATUS__OFFLINE ;
@ -2860,7 +2860,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -3678,8 +3678,8 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags )
}
/* Deal with sub-function if combo host */
if ( CPE_SYSTEM )
/* Deal with sub-function if AIO controller host */
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if ( flags & MTC_FLAG__SUBF_GOENABLED )
{
@ -5426,7 +5426,7 @@ int nodeLinkClass::degrade_resource_raise ( string & hostname,
* This utility handles critical process failure event notifications.
* Typically this interface will force a host re-enable through reset.
*
* For CPE Simplex this failure sets the auto recovery bool
* For AIO Simplex this failure sets the auto recovery bool
* so that the main enable FSM can handle it through a thresholded
* self reboot.
*
@ -5768,29 +5768,45 @@ string nodeLinkClass::get_node_subfunction_str ( string hostname )
bool nodeLinkClass::is_controller ( string & hostname )
{
nodeLinkClass::node * node_ptr = getNode ( hostname );
if ( node_ptr )
{
return is_controller(node_ptr);
}
return false ;
}
/** Check if a node is a compute */
bool nodeLinkClass::is_compute ( string & hostname )
{
nodeLinkClass::node * node_ptr = getNode ( hostname );
if ( node_ptr )
{
return is_compute(node_ptr);
}
return false ;
}
/** Check if a node is a compute */
bool nodeLinkClass::is_compute_subfunction ( string & hostname )
{
nodeLinkClass::node * node_ptr = getNode ( hostname );
if ( node_ptr )
{
return is_compute_subfunction(node_ptr);
}
return false ;
}
/** Check if a node is a storage */
bool nodeLinkClass::is_storage ( string & hostname )
{
nodeLinkClass::node * node_ptr = getNode ( hostname );
if ( node_ptr )
{
return is_storage(node_ptr);
}
return false ;
}
/** Maintenance FSM Test Case Setup procedure */
int nodeLinkClass::set_enableStage ( string & hostname,
@ -8330,18 +8346,9 @@ void nodeLinkClass::mem_log_state1 ( struct nodeLinkClass::node * node_ptr )
mem_log (str);
op = operState_enum_to_str(node_ptr->operState_subf) ;
av = availStatus_enum_to_str(node_ptr->availStatus_subf);
if ( node_ptr->subfunction_str.empty() )
if ( ! node_ptr->subfunction_str.empty() )
{
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tFunction: %s %s-%s-%s\n",
node_ptr->hostname.c_str(),
node_ptr->function_str.c_str(),
ad.c_str(),
op.c_str(),
av.c_str());
}
else
{
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tFunctions %s-%s %s-%s-%s\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tSub-Functions: %s-%s %s-%s-%s\n",
node_ptr->hostname.c_str(),
node_ptr->function_str.c_str(),
node_ptr->subfunction_str.c_str(),
@ -8518,7 +8525,7 @@ void nodeLinkClass::mem_log_type_info ( struct nodeLinkClass::node * node_ptr )
node_ptr->function);
mem_log (str);
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tSub-Function: %s (%u) (SubFunc Enabled:%c)\n",
node_ptr->hostname.c_str(),

View File

@ -1007,12 +1007,12 @@ int daemon_init ( string iface, string nodeType_str )
if ( is_combo_system ( my_nodetype ) == true )
{
daemon_wait_for_file ( GOENABLED_SUBF_PASS , 0);
ilog ("GOENABLE (small system)\n");
ilog ("GOENABLE (AIO Host)\n");
}
else
{
daemon_wait_for_file ( GOENABLED_MAIN_PASS , 0);
ilog ("GOENABLE (large system)\n");
ilog ("GOENABLE\n");
}
/* Configure the client */

View File

@ -940,7 +940,7 @@ int nodeLinkClass::mtcInvApi_update_states_now ( struct nodeLinkClass::node * no
else
avail = " " ;
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if ( ! oper_subf.empty() )
{
@ -982,7 +982,7 @@ int nodeLinkClass::mtcInvApi_update_states_now ( struct nodeLinkClass::node * no
this->sysinvEvent.payload.erase(len-1,1);
this->sysinvEvent.payload.append ( "]");
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
ilog ("%s %s-%s-%s %s-%s\n",
node_ptr->hostname.c_str(),

View File

@ -1629,20 +1629,33 @@ int run_hostservices_scripts ( unsigned int cmd )
* one for controller and 1 for compute */
if ( ctrl.system_type != SYSTEM_TYPE__NORMAL )
{
string dir = "" ;
if ( action == "stop" )
{
std::list<string> more_scripts ;
if ( cmd == MTC_CMD_STOP_COMPUTE_SVCS )
{
/* only add the controller if we get a compute stop
* and this host has a controller nodetype function */
if (ctrl.nodetype & CONTROLLER_TYPE)
{
dir = SERVICES_DIR ;
dir.append("/controller");
}
else
}
else if ( cmd == MTC_CMD_STOP_CONTROL_SVCS )
{
/* add the compute stop if we get a controller stop
* and this host has a compute nodetype function */
if (ctrl.nodetype & COMPUTE_TYPE)
{
dir = SERVICES_DIR ;
dir.append("/compute");
}
}
if ( ! dir.empty() )
{
if ( load_filenames_in_dir ( dir.data(), more_scripts ) != PASS )
{
ctrl.active_script_set = NO_SCRIPTS ;
@ -1655,6 +1668,7 @@ int run_hostservices_scripts ( unsigned int cmd )
}
}
}
}
if ( scripts.empty() )
{

View File

@ -600,7 +600,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
this->autorecovery_disabled = true ;
}
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
/* Raise Critical Compute Function Alarm */
alarm_compute_failure ( node_ptr , FM_ALARM_SEVERITY_CRITICAL );
@ -613,7 +613,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->graceful_recovery_counter = 0 ;
node_ptr->health_threshold_counter = 0 ;
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->inservice_failed_subf = true ;
subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
@ -1262,8 +1262,16 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
}
else /* success path */
{
/* heartbeat is only started now in a normal system. */
if (( LARGE_SYSTEM ) && ( NOT_THIS_HOST ))
/* Don't start the self heartbeat for the active controller.
* Also, in AIO , hosts that have a controller function also
* have a compute function and the heartbeat for those hosts
* are started at the end of the subfunction handler. */
if (( THIS_HOST ) ||
(( CPE_SYSTEM ) && ( is_controller(node_ptr)) ))
{
enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE );
}
else
{
/* allow the fsm to wait for up to 1 minute for the
* hbsClient's ready event before starting heartbeat
@ -1271,12 +1279,6 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 );
enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_WAIT );
}
else
{
/* The heartbeat soak will be started in the enable
* subfunction handler in the combined system. */
enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE );
}
}
break ;
}
@ -1446,7 +1448,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
enableStageChange ( node_ptr, MTC_ENABLE__START );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr)))
{
ilog ("%s running compute sub-function enable handler\n", node_ptr->hostname.c_str());
mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF );
@ -1749,7 +1751,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
MTC_OPER_STATE__DISABLED,
MTC_AVAIL_STATUS__FAILED );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
MTC_AVAIL_STATUS__FAILED );
@ -2140,7 +2142,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
{
/* The active controller would never get/be here but
* if it did then just fall through to change state. */
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
/* Here we need to run the sub-fnction goenable and start
* host services if this is the other controller in a AIO
@ -2368,7 +2370,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
}
case MTC_RECOVERY__STATE_CHANGE:
{
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
/* Set node as unlocked-enabled */
subfStateChange ( node_ptr, MTC_OPER_STATE__ENABLED,
@ -2646,7 +2648,7 @@ int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr )
MTC_OPER_STATE__DISABLED,
locked_status );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
locked_status );
@ -3290,7 +3292,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
/* otherwise change state */
mtcInvApi_update_state(node_ptr, MTC_JSON_INV_AVAIL,"offline" );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
mtcInvApi_update_state(node_ptr, MTC_JSON_INV_AVAIL_SUBF,"offline" );
}
@ -3331,7 +3333,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->hostname.c_str());
mtcInvApi_update_state ( node_ptr, MTC_JSON_INV_AVAIL, "online" );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
mtcInvApi_update_state ( node_ptr, MTC_JSON_INV_AVAIL_SUBF, "online" );
}
@ -5318,7 +5320,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->uuid.length() ? node_ptr->uuid.c_str() : "" );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if ( daemon_is_file_present ( CONFIG_COMPLETE_COMPUTE ) == false )
{
@ -5578,12 +5580,17 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ))
{
if (( NOT_THIS_HOST ) && ( LARGE_SYSTEM ))
/* start the heartbeat service in all cases except for
* THIS host and CPE controller hosts */
if ( NOT_THIS_HOST )
{
if (( LARGE_SYSTEM ) ||
(( CPE_SYSTEM ) && ( is_controller(node_ptr) == false )))
{
send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
}
}
}
/* Only run hardware monitor if the bm ip is provisioned */
if (( hostUtil_is_valid_bm_type ( node_ptr->bm_type )) &&
( hostUtil_is_valid_ip_addr ( node_ptr->bm_ip )))
@ -5627,19 +5634,11 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
}
/* Special Add handling for the AIO system */
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ))
{
/* If AIO and unlocked-enabled and not this host then
* start the heartbeat service
* (inactive controller case) */
if ( NOT_THIS_HOST )
{
send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
}
/* In AIO if in DOR mode and the host is unlocked enabled
* we need to run the subfunction handler and request
* to start host services. */
@ -6353,7 +6352,7 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
* and we have only a single enabled controller (which must be this one)
* and the alarm is not already raised.
**/
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ) &&