Merge "Mtce: Avoid running subfunction FSM for AIO-DX compute only hosts"

This commit is contained in:
Zuul 2018-06-28 13:39:02 +00:00 committed by Gerrit Code Review
commit 25ee1b19a4
5 changed files with 121 additions and 101 deletions

View File

@ -2609,7 +2609,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -2720,7 +2720,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -2736,13 +2736,13 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->adminState = adminState_str_to_enum (inv.admin.data());
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
}
adminActionChange ( node_ptr, MTC_ADMIN_ACTION__LOCK );
}
else if ( !inv.action.compare ("force-lock") && validStates )
@ -2754,8 +2754,8 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->adminState = adminState_str_to_enum (inv.admin.data());
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -2766,58 +2766,58 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
else if ( !inv.action.compare ("reboot") && validStates )
{
ilog ("%s Added with 'reboot' in 'locked' state\n", node_ptr->hostname.c_str());
print_inv ( inv );
node_ptr->adminState = adminState_str_to_enum (inv.admin.data());
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
}
;
adminActionChange ( node_ptr, MTC_ADMIN_ACTION__REBOOT );
}
else if ( !inv.action.compare ("reset") && validStates )
{
ilog ("%s Added with 'reset' in 'locked' state\n", node_ptr->hostname.c_str());
print_inv ( inv );
node_ptr->adminState = adminState_str_to_enum (inv.admin.data());
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
}
adminActionChange ( node_ptr, MTC_ADMIN_ACTION__RESET );
}
}
else if ( !inv.action.compare ("power-off") && validStates )
{
ilog ("%s Added in a 'locked' and 'power-off' state\n", node_ptr->hostname.c_str());
print_inv ( inv );
node_ptr->adminState = MTC_ADMIN_STATE__LOCKED ;
node_ptr->operState = MTC_OPER_STATE__DISABLED;
node_ptr->availStatus = MTC_AVAIL_STATUS__POWERED_OFF ;
node_ptr->operState_subf = MTC_OPER_STATE__DISABLED ;
node_ptr->availStatus_subf = MTC_AVAIL_STATUS__POWERED_OFF ;
adminActionChange ( node_ptr, MTC_ADMIN_ACTION__POWEROFF );
}
else if ( !inv.action.compare ("power-on") && validStates )
{
ilog ("%s Added with 'power-on' in 'locked' state\n", node_ptr->hostname.c_str());
print_inv ( inv );
node_ptr->adminState = MTC_ADMIN_STATE__LOCKED ;
@ -2832,9 +2832,9 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
}
else
{
wlog ("%s Need add Action support for '%s' action\n", node_ptr->hostname.c_str(),
wlog ("%s Need add Action support for '%s' action\n", node_ptr->hostname.c_str(),
inv.action.c_str());
print_inv ( inv );
/* Load in maintenance states */
@ -2842,7 +2842,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->operState = MTC_OPER_STATE__DISABLED ;
node_ptr->availStatus = MTC_AVAIL_STATUS__OFFLINE ;
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = MTC_OPER_STATE__DISABLED ;
node_ptr->availStatus_subf = MTC_AVAIL_STATUS__OFFLINE ;
@ -2859,8 +2859,8 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
node_ptr->adminState = adminState_str_to_enum (inv.admin.data());
node_ptr->operState = operState_str_to_enum (inv.oper.data ());
node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->operState_subf = operState_str_to_enum (inv.oper_subf.data());
node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@ -3678,8 +3678,8 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags )
}
/* Deal with sub-function if combo host */
if ( CPE_SYSTEM )
/* Deal with sub-function if AIO controller host */
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if ( flags & MTC_FLAG__SUBF_GOENABLED )
{
@ -5426,7 +5426,7 @@ int nodeLinkClass::degrade_resource_raise ( string & hostname,
* This utility handles critical process failure event notifications.
* Typically this interface will force a host re-enable through reset.
*
* For CPE Simplex this failure sets the auto recovery bool
* For AIO Simplex this failure sets the auto recovery bool
* so that the main enable FSM can handle it through a thresholded
* self reboot.
*
@ -5768,32 +5768,48 @@ string nodeLinkClass::get_node_subfunction_str ( string hostname )
bool nodeLinkClass::is_controller ( string & hostname )
{
nodeLinkClass::node * node_ptr = getNode ( hostname );
return is_controller(node_ptr);
if ( node_ptr )
{
return is_controller(node_ptr);
}
return false ;
}
/** Check if a node is a compute */
bool nodeLinkClass::is_compute ( string & hostname )
{
nodeLinkClass::node * node_ptr = getNode ( hostname );
return is_compute(node_ptr);
if ( node_ptr )
{
return is_compute(node_ptr);
}
return false ;
}
/** Check if a node is a compute */
bool nodeLinkClass::is_compute_subfunction ( string & hostname )
{
nodeLinkClass::node * node_ptr = getNode ( hostname );
return is_compute_subfunction(node_ptr);
if ( node_ptr )
{
return is_compute_subfunction(node_ptr);
}
return false ;
}
/** Check if a node is a storage */
bool nodeLinkClass::is_storage ( string & hostname )
{
nodeLinkClass::node * node_ptr = getNode ( hostname );
return is_storage(node_ptr);
if ( node_ptr )
{
return is_storage(node_ptr);
}
return false ;
}
/** Maintenance FSM Test Case Setup procedure */
int nodeLinkClass::set_enableStage ( string & hostname,
int nodeLinkClass::set_enableStage ( string & hostname,
mtc_enableStages_enum stage )
{
nodeLinkClass::node * node_ptr = getNode ( hostname ) ;
@ -8306,10 +8322,10 @@ void nodeLinkClass::mem_log_bm ( struct nodeLinkClass::node * node_ptr )
void nodeLinkClass::mem_log_identity ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s %s (%u)\n",
node_ptr->hostname.c_str(),
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s %s (%u)\n",
node_ptr->hostname.c_str(),
node_ptr->uuid.c_str(),
node_ptr->type.c_str(),
node_ptr->type.c_str(),
node_ptr->nodetype);
mem_log (str);
}
@ -8321,32 +8337,23 @@ void nodeLinkClass::mem_log_state1 ( struct nodeLinkClass::node * node_ptr )
string op = operState_enum_to_str(node_ptr->operState) ;
string av = availStatus_enum_to_str(node_ptr->availStatus);
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s-%s-%s degrade_mask:%08x\n",
node_ptr->hostname.c_str(),
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s-%s-%s degrade_mask:%08x\n",
node_ptr->hostname.c_str(),
ad.c_str(),
op.c_str(),
op.c_str(),
av.c_str(),
node_ptr->degrade_mask);
mem_log (str);
op = operState_enum_to_str(node_ptr->operState_subf) ;
av = availStatus_enum_to_str(node_ptr->availStatus_subf);
if ( node_ptr->subfunction_str.empty() )
if ( ! node_ptr->subfunction_str.empty() )
{
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tFunction: %s %s-%s-%s\n",
node_ptr->hostname.c_str(),
node_ptr->function_str.c_str(),
ad.c_str(),
op.c_str(),
av.c_str());
}
else
{
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tFunctions %s-%s %s-%s-%s\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tSub-Functions: %s-%s %s-%s-%s\n",
node_ptr->hostname.c_str(),
node_ptr->function_str.c_str(),
node_ptr->subfunction_str.c_str(),
ad.c_str(),
op.c_str(),
op.c_str(),
av.c_str());
}
mem_log (str);
@ -8357,10 +8364,10 @@ void nodeLinkClass::mem_log_state2 ( struct nodeLinkClass::node * node_ptr )
char str[MAX_MEM_LOG_DATA] ;
string aa = adminAction_enum_to_str(node_ptr->adminAction) ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAction:%s invAction:%s Task:%s\n",
node_ptr->hostname.c_str(),
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAction:%s invAction:%s Task:%s\n",
node_ptr->hostname.c_str(),
aa.c_str(),
node_ptr->action.c_str(),
node_ptr->action.c_str(),
node_ptr->task.c_str());
mem_log (str);
}
@ -8518,7 +8525,7 @@ void nodeLinkClass::mem_log_type_info ( struct nodeLinkClass::node * node_ptr )
node_ptr->function);
mem_log (str);
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tSub-Function: %s (%u) (SubFunc Enabled:%c)\n",
node_ptr->hostname.c_str(),

View File

@ -1007,12 +1007,12 @@ int daemon_init ( string iface, string nodeType_str )
if ( is_combo_system ( my_nodetype ) == true )
{
daemon_wait_for_file ( GOENABLED_SUBF_PASS , 0);
ilog ("GOENABLE (small system)\n");
ilog ("GOENABLE (AIO Host)\n");
}
else
{
daemon_wait_for_file ( GOENABLED_MAIN_PASS , 0);
ilog ("GOENABLE (large system)\n");
ilog ("GOENABLE\n");
}
/* Configure the client */

View File

@ -940,7 +940,7 @@ int nodeLinkClass::mtcInvApi_update_states_now ( struct nodeLinkClass::node * no
else
avail = " " ;
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if ( ! oper_subf.empty() )
{
@ -982,7 +982,7 @@ int nodeLinkClass::mtcInvApi_update_states_now ( struct nodeLinkClass::node * no
this->sysinvEvent.payload.erase(len-1,1);
this->sysinvEvent.payload.append ( "]");
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
ilog ("%s %s-%s-%s %s-%s\n",
node_ptr->hostname.c_str(),

View File

@ -1629,29 +1629,43 @@ int run_hostservices_scripts ( unsigned int cmd )
* one for controller and 1 for compute */
if ( ctrl.system_type != SYSTEM_TYPE__NORMAL )
{
string dir = "" ;
if ( action == "stop" )
{
std::list<string> more_scripts ;
if ( cmd == MTC_CMD_STOP_COMPUTE_SVCS )
{
dir = SERVICES_DIR ;
dir.append("/controller");
/* only add the controller if we get a compute stop
* and this host has a controller nodetype function */
if (ctrl.nodetype & CONTROLLER_TYPE)
{
dir = SERVICES_DIR ;
dir.append("/controller");
}
}
else
else if ( cmd == MTC_CMD_STOP_CONTROL_SVCS )
{
dir = SERVICES_DIR ;
dir.append("/compute");
/* add the compute stop if we get a controller stop
* and this host has a compute nodetype function */
if (ctrl.nodetype & COMPUTE_TYPE)
{
dir = SERVICES_DIR ;
dir.append("/compute");
}
}
if ( load_filenames_in_dir ( dir.data(), more_scripts ) != PASS )
if ( ! dir.empty() )
{
ctrl.active_script_set = NO_SCRIPTS ;
return (FAIL_READ_FILES) ;
}
if ( load_filenames_in_dir ( dir.data(), more_scripts ) != PASS )
{
ctrl.active_script_set = NO_SCRIPTS ;
return (FAIL_READ_FILES) ;
}
if ( ! more_scripts.empty() )
{
scripts.merge(more_scripts);
if ( ! more_scripts.empty() )
{
scripts.merge(more_scripts);
}
}
}
}

View File

@ -600,7 +600,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
this->autorecovery_disabled = true ;
}
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
/* Raise Critical Compute Function Alarm */
alarm_compute_failure ( node_ptr , FM_ALARM_SEVERITY_CRITICAL );
@ -613,7 +613,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->graceful_recovery_counter = 0 ;
node_ptr->health_threshold_counter = 0 ;
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
node_ptr->inservice_failed_subf = true ;
subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
@ -1262,8 +1262,16 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
}
else /* success path */
{
/* heartbeat is only started now in a normal system. */
if (( LARGE_SYSTEM ) && ( NOT_THIS_HOST ))
/* Don't start the self heartbeat for the active controller.
* Also, in AIO , hosts that have a controller function also
* have a compute function and the heartbeat for those hosts
* are started at the end of the subfunction handler. */
if (( THIS_HOST ) ||
(( CPE_SYSTEM ) && ( is_controller(node_ptr)) ))
{
enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE );
}
else
{
/* allow the fsm to wait for up to 1 minute for the
* hbsClient's ready event before starting heartbeat
@ -1271,12 +1279,6 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 );
enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_WAIT );
}
else
{
/* The heartbeat soak will be started in the enable
* subfunction handler in the combined system. */
enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE );
}
}
break ;
}
@ -1446,7 +1448,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
enableStageChange ( node_ptr, MTC_ENABLE__START );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr)))
{
ilog ("%s running compute sub-function enable handler\n", node_ptr->hostname.c_str());
mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF );
@ -1749,7 +1751,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
MTC_OPER_STATE__DISABLED,
MTC_AVAIL_STATUS__FAILED );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
MTC_AVAIL_STATUS__FAILED );
@ -2140,7 +2142,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
{
/* The active controller would never get/be here but
* if it did then just fall through to change state. */
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
/* Here we need to run the sub-fnction goenable and start
* host services if this is the other controller in a AIO
@ -2368,7 +2370,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
}
case MTC_RECOVERY__STATE_CHANGE:
{
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
/* Set node as unlocked-enabled */
subfStateChange ( node_ptr, MTC_OPER_STATE__ENABLED,
@ -2646,7 +2648,7 @@ int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr )
MTC_OPER_STATE__DISABLED,
locked_status );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
locked_status );
@ -3290,7 +3292,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
/* otherwise change state */
mtcInvApi_update_state(node_ptr, MTC_JSON_INV_AVAIL,"offline" );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
mtcInvApi_update_state(node_ptr, MTC_JSON_INV_AVAIL_SUBF,"offline" );
}
@ -3331,7 +3333,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->hostname.c_str());
mtcInvApi_update_state ( node_ptr, MTC_JSON_INV_AVAIL, "online" );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
mtcInvApi_update_state ( node_ptr, MTC_JSON_INV_AVAIL_SUBF, "online" );
}
@ -5318,7 +5320,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->uuid.length() ? node_ptr->uuid.c_str() : "" );
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if ( daemon_is_file_present ( CONFIG_COMPLETE_COMPUTE ) == false )
{
@ -5578,12 +5580,17 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ))
{
if (( NOT_THIS_HOST ) && ( LARGE_SYSTEM ))
/* start the heartbeat service in all cases except for
* THIS host and CPE controller hosts */
if ( NOT_THIS_HOST )
{
send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
if (( LARGE_SYSTEM ) ||
(( CPE_SYSTEM ) && ( is_controller(node_ptr) == false )))
{
send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
}
}
}
/* Only run hardware monitor if the bm ip is provisioned */
if (( hostUtil_is_valid_bm_type ( node_ptr->bm_type )) &&
( hostUtil_is_valid_ip_addr ( node_ptr->bm_ip )))
@ -5627,19 +5634,11 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
}
/* Special Add handling for the AIO system */
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ))
{
/* If AIO and unlocked-enabled and not this host then
* start the heartbeat service
* (inactive controller case) */
if ( NOT_THIS_HOST )
{
send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
}
/* In AIO if in DOR mode and the host is unlocked enabled
* we need to run the subfunction handler and request
* to start host services. */
@ -6353,7 +6352,7 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
* and we have only a single enabled controller (which must be this one)
* and the alarm is not already raised.
**/
if ( CPE_SYSTEM )
if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
{
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ) &&