Merge "Maintenance Redfish support useability enhancements."

This commit is contained in:
Zuul 2019-10-10 18:38:21 +00:00 committed by Gerrit Code Review
commit f2cba8f89b
10 changed files with 187 additions and 149 deletions

View File

@ -90,7 +90,11 @@ string bmcUtil_getProtocol_str ( bmc_protocol_enum protocol )
{
case BMC_PROTOCOL__REDFISHTOOL: return(BMC_PROTOCOL__REDFISHTOOL_STR);
case BMC_PROTOCOL__IPMITOOL: return(BMC_PROTOCOL__IPMITOOL_STR);
default: return("unknown");
default:
{
blog ("unknown bmc protocol %d", protocol );
return("unknown");
}
}
}
@ -329,10 +333,12 @@ bmc_protocol_enum bmcUtil_read_hwmond_protocol ( string hostname )
bmc_protocol_enum protocol = BMC_PROTOCOL__IPMITOOL ;
string hwmond_proto_filename = BMC_HWMON_TMP_DIR + hostname ;
string proto_str = daemon_read_file ( hwmond_proto_filename.data() ) ;
if ( strcmp (proto_str.data(), BMC_PROTOCOL__REDFISHTOOL_STR) )
protocol = BMC_PROTOCOL__REDFISHTOOL ;
if ( daemon_is_file_present ( hwmond_proto_filename.data() ) == true )
{
string proto_str = daemon_read_file ( hwmond_proto_filename.data() ) ;
if ( !strcmp (proto_str.data(), BMC_PROTOCOL__REDFISHTOOL_STR) )
protocol = BMC_PROTOCOL__REDFISHTOOL ;
}
return protocol;
}

View File

@ -803,6 +803,7 @@ int load_filenames_in_dir ( const char * directory, std::list<string> & filelist
{
DIR *d;
struct dirent *dir;
int rc = PASS ;
/* Clear the content of the config file list and running counter */
filelist.clear ();
@ -813,7 +814,7 @@ int load_filenames_in_dir ( const char * directory, std::list<string> & filelist
while ((dir = readdir(d)) != NULL)
{
dlog3 ("File: %s\n", dir->d_name);
if ( strcmp ( dir->d_name , "." ) &&
if ( strcmp ( dir->d_name , "." ) &&
strcmp ( dir->d_name , ".." ))
{
string temp = directory ;
@ -828,8 +829,9 @@ int load_filenames_in_dir ( const char * directory, std::list<string> & filelist
else
{
elog ("Failed to open %s\n", directory );
rc = FAIL_FILE_ACCESS ;
}
return(PASS);
return(rc);
}
int setup_child ( bool close_file_descriptors )

View File

@ -4066,6 +4066,7 @@ void nodeLinkClass::bmc_access_data_init ( struct nodeLinkClass::node * node_ptr
{
if ( node_ptr )
{
node_ptr->bm_pw.clear();
node_ptr->bmc_accessible = false ;
node_ptr->bm_ping_info.ok = false ;
node_ptr->bmc_info_query_active = false ;
@ -4148,12 +4149,8 @@ int nodeLinkClass::set_bm_prov ( struct nodeLinkClass::node * node_ptr, bool sta
secret->reference.clear() ;
secret->payload.clear() ;
secret->stage = MTC_SECRET__START ;
mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, SECRET_START_DELAY );
}
mtcTimer_start( node_ptr->bm_timer, mtcTimer_handler, SECRET_START_DELAY );
node_ptr->thread_extra_info.bm_pw.clear() ;
node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ;
node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ;
send_hwmon_command(node_ptr->hostname, MTC_CMD_ADD_HOST);
send_hwmon_command(node_ptr->hostname, MTC_CMD_START_HOST);

View File

@ -504,21 +504,32 @@ int hwmonHostClass::set_bm_prov ( struct hwmonHostClass::hwmon_host * host_ptr,
{
rc = PASS ;
/* Clear the alarm if we are starting fresh from an unprovisioned state */
bool connect = false ;
bool reconnect = false ;
if (( host_ptr->bm_provisioned == false ) && ( state == true ))
connect = true ;
else if (( host_ptr->bm_provisioned == true ) && ( state == true ))
reconnect = true ;
if ( connect || reconnect )
{
ilog ("%s board management controller is being provisioned\n", host_ptr->hostname.c_str());
ilog ("%s setting up ping socket\n", host_ptr->hostname.c_str() );
ilog ("%s board management controller is being %sprovisioned\n",
host_ptr->hostname.c_str(),
host_ptr->bm_provisioned ? "re":"" );
/* ---------------------------------------
* Init bmc data based on monitoring mode
* ---------------------------------------*/
blog ("%s setting up ping socket\n", host_ptr->hostname.c_str() );
mtcTimer_reset ( host_ptr->ping_info.timer ) ;
host_ptr->ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ;
host_ptr->ping_info.ip = host_ptr->bm_ip ;
host_ptr->ping_info.hostname = host_ptr->hostname ;
bmc_data_init ( host_ptr );
if ( connect || host_ptr->relearn )
bmc_data_init ( host_ptr );
string host_uuid = hostBase.get_uuid( host_ptr->hostname );
barbicanSecret_type * secret = secretUtil_find_secret( host_uuid );
@ -534,6 +545,7 @@ int hwmonHostClass::set_bm_prov ( struct hwmonHostClass::hwmon_host * host_ptr,
host_ptr->thread_extra_info.bm_ip = host_ptr->bm_ip ;
host_ptr->thread_extra_info.bm_un = host_ptr->bm_un ;
}
/* handle the case going from provisioned to not provisioned */
if (( host_ptr->bm_provisioned == true ) && ( state == false ))
{

View File

@ -96,59 +96,6 @@ void hwmonHostClass::hwmon_fsm ( void )
pingUtil_acc_monitor ( host_ptr->ping_info );
/* Check to see if sensor monitoring for this host is disabled.
* If it is ... */
if (( host_ptr->monitor == false ) || ( host_ptr->bm_pw.empty()))
{
/* ... make sure the thread sits in the
* idle state while disabled or there is no pw learned yet */
if ( thread_idle ( host_ptr->bmc_thread_ctrl ) == false )
{
if ( thread_done ( host_ptr->bmc_thread_ctrl ) == true )
{
host_ptr->bmc_thread_ctrl.done = true ;
}
else
{
thread_kill ( host_ptr->bmc_thread_ctrl, host_ptr->bmc_thread_info );
}
}
if ( host_ptr->bm_pw.empty( ))
{
string host_uuid = hostBase.get_uuid(host_ptr->hostname);
barbicanSecret_type * secret =
secretUtil_manage_secret( host_ptr->secretEvent,
host_ptr->hostname,
host_uuid,
host_ptr->secretTimer,
hwmonTimer_handler );
if ( secret->stage == MTC_SECRET__GET_PWD_RECV )
{
host_ptr->thread_extra_info.bm_pw = host_ptr->bm_pw = secret->payload ;
}
ilog_throttled (host_ptr->empty_secret_log_throttle, 50,
"%s waiting on bm password learn", host_ptr->hostname.c_str());
}
continue ;
}
else if (( host_ptr->accessible == false ) && ( host_ptr->ping_info.ok == true ))
{
ilog ("%s bmc is accessible\n", host_ptr->hostname.c_str());
host_ptr->accessible = true ;
}
else if (( host_ptr->accessible == true ) && ( host_ptr->ping_info.ok == false ))
{
wlog ("%s bmc access lost\n", host_ptr->hostname.c_str());
thread_kill ( host_ptr->bmc_thread_ctrl, host_ptr->bmc_thread_info );
host_ptr->accessible = false ;
host_ptr->sensor_query_count = 0 ;
host_ptr->bmc_fw_version.clear();
host_ptr->ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
}
if ( host_ptr->ping_info.ok == false )
{
/* Auto correct key ping information ; should never occur but if it does ... */
@ -163,15 +110,81 @@ void hwmonHostClass::hwmon_fsm ( void )
host_ptr->ping_info.hostname = host_ptr->hostname ;
host_ptr->ping_info.ip = host_ptr->bm_ip ;
}
// pingUtil_acc_monitor ( host_ptr->ping_info );
}
/* Check to see if sensor monitoring for this host is
* disabled or the bm password has not yet been learned */
else if (( host_ptr->monitor == false ) || ( host_ptr->bm_pw.empty()))
{
/* ... make sure the thread sits in the
* idle state while disabled or there
* is no pw learned yet */
if ( thread_idle ( host_ptr->bmc_thread_ctrl ) == false )
{
if ( thread_done ( host_ptr->bmc_thread_ctrl ) == true )
{
host_ptr->bmc_thread_ctrl.done = true ;
}
else
{
thread_kill ( host_ptr->bmc_thread_ctrl, host_ptr->bmc_thread_info );
}
}
/* Only try and get the password if sensor monitoring
* is enabled */
if (( host_ptr->monitor ) && ( host_ptr->bm_pw.empty( )))
{
string host_uuid = hostBase.get_uuid(host_ptr->hostname);
barbicanSecret_type * secret =
secretUtil_manage_secret( host_ptr->secretEvent,
host_ptr->hostname,
host_uuid,
host_ptr->secretTimer,
hwmonTimer_handler );
if ( secret->stage == MTC_SECRET__GET_PWD_RECV )
{
host_ptr->bm_pw = secret->payload ;
ilog ("%s bmc credentials received",
hostname.c_str());
}
else
{
ilog_throttled (host_ptr->empty_secret_log_throttle, 50,
"%s waiting on bm credentials", host_ptr->hostname.c_str());
}
}
continue ;
}
else if (( host_ptr->accessible == false ) && ( host_ptr->ping_info.ok == true ) && ( !host_ptr->bm_pw.empty() ))
{
ilog ("%s bmc is accessible ; using %s\n",
host_ptr->hostname.c_str(),
bmcUtil_getProtocol_str(host_ptr->protocol).c_str());
host_ptr->accessible = true ;
}
else if (( host_ptr->accessible == true ) && ( host_ptr->ping_info.ok == false ))
{
wlog ("%s bmc access lost, changed or being retried ; using %s\n",
host_ptr->hostname.c_str(),
bmcUtil_getProtocol_str(host_ptr->protocol).c_str());
thread_kill ( host_ptr->bmc_thread_ctrl, host_ptr->bmc_thread_info );
host_ptr->accessible = false ;
host_ptr->sensor_query_count = 0 ;
host_ptr->bmc_fw_version.clear();
host_ptr->bm_pw.clear();
host_ptr->ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
}
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__EMPTY_BM_PASSWORD ))
{
host_ptr->thread_extra_info.bm_pw = "" ;
}
#endif
if ( host_ptr->accessible )
if (( host_ptr->accessible ) && ( !host_ptr->bm_pw.empty()))
{
/* typical success path */
hwmonHostClass::bmc_sensor_monitor ( host_ptr );

View File

@ -1205,19 +1205,33 @@ int hwmonHostClass::bmc_sensor_monitor ( struct hwmonHostClass::hwmon_host * hos
/* Consume done results */
mtcTimer_stop ( host_ptr->monitor_ctrl.timer );
if ( host_ptr->bmc_thread_info.status ) // == FAIL_SYSTEM_CALL )
if ( host_ptr->bmc_thread_info.status )
{
if ( ++host_ptr->bmc_thread_ctrl.retries < MAX_THREAD_RETRIES )
host_ptr->bmc_thread_ctrl.retries++ ;
if (!(host_ptr->bmc_thread_ctrl.retries>MAX_THREAD_RETRIES))
{
elog ("%s %s thread %2d failed (rc:%d) (try %d of %d) (%d:%d)\n",
host_ptr->bmc_thread_ctrl.hostname.c_str(),
host_ptr->bmc_thread_ctrl.name.c_str(),
host_ptr->bmc_thread_info.command,
host_ptr->bmc_thread_info.status,
host_ptr->bmc_thread_ctrl.retries,
MAX_THREAD_RETRIES,
host_ptr->bmc_thread_info.progress,
host_ptr->bmc_thread_info.runcount);
if ( host_ptr->bmc_thread_info.status == FAIL_SYSTEM_CALL )
{
elog ("%s '%s' system call failed (retry %d of %d) (%s)",
host_ptr->bmc_thread_info.log_prefix,
bmcUtil_getCmd_str(
host_ptr->bmc_thread_info.command).c_str(),
host_ptr->bmc_thread_ctrl.retries,
MAX_THREAD_RETRIES, host_ptr->thread_extra_info.bm_pw.c_str());
}
else if (( host_ptr->bmc_thread_ctrl.retries == (MAX_THREAD_RETRIES-1)) ||
( host_ptr->bmc_thread_ctrl.retries == 1 ))
{
elog ("%s '%s' failed (rc:%d) (retry %d of %d) (%d:%d)\n",
host_ptr->bmc_thread_info.log_prefix,
bmcUtil_getCmd_str(
host_ptr->bmc_thread_info.command).c_str(),
host_ptr->bmc_thread_info.status,
host_ptr->bmc_thread_ctrl.retries,
MAX_THREAD_RETRIES,
host_ptr->bmc_thread_info.progress,
host_ptr->bmc_thread_info.runcount);
}
/* don't flood the logs with the same error data over and over */
if ( host_ptr->bmc_thread_ctrl.retries == 1 )
@ -1234,22 +1248,6 @@ int hwmonHostClass::bmc_sensor_monitor ( struct hwmonHostClass::hwmon_host * hos
HWMON_SENSOR_MONITOR__DELAY );
break ;
}
#ifdef WANT_THIS
/* don't flood the logs with the same error data over and over */
if ( host_ptr->bmc_thread_ctrl.retries > 1 )
{
wlog ("%s %s thread '%d' command is done ; (%d:%d) (rc:%d)\n",
host_ptr->bmc_thread_ctrl.hostname.c_str(),
host_ptr->bmc_thread_ctrl.name.c_str(),
host_ptr->bmc_thread_info.command,
host_ptr->bmc_thread_info.progress,
host_ptr->bmc_thread_info.runcount,
host_ptr->bmc_thread_info.status);
blog ("%s ... data: %s\n",
host_ptr->bmc_thread_ctrl.hostname.c_str(),
host_ptr->bmc_thread_info.status_string.c_str());
}
#endif
}
else
{
@ -1636,7 +1634,7 @@ int hwmonHostClass::bmc_sensor_monitor ( struct hwmonHostClass::hwmon_host * hos
if ( severity != ptr->severity)
{
blog ("%s %s status change ; %s:%s -> %s\n",
ilog ("%s %s status change ; %s:%s -> %s\n",
host_ptr->hostname.c_str(),
ptr->sensorname.c_str(),
get_severity(ptr->severity).c_str(),

View File

@ -37,11 +37,6 @@
static event_type hwmon_event ;
void hwmonHttp_server_init ( void )
{
}
/* Cleanup */
void hwmonHttp_server_fini ( void )
{

View File

@ -296,7 +296,6 @@ int daemon_init ( string iface, string nodetype )
}
threadUtil_init ( hwmonTimer_handler ) ;
ilog ("BMC Acc Mode: %s\n", "ipmi/ipmitool" );
/* override the config reload for the startup case */
obj_ptr->config_reload = false ;
@ -304,7 +303,6 @@ int daemon_init ( string iface, string nodetype )
/* Init the hwmon service timers */
hwmon_timer_init ();
daemon_make_dir(IPMITOOL_OUTPUT_DIR) ;
#ifdef WANT_FIT_TESTING
daemon_make_dir(FIT__INFO_FILEPATH);

View File

@ -993,8 +993,8 @@ static int _redfishUtil_send_request( thread_info_type * info_ptr, string & data
}
dlog_t ("%s password filename : %s\n",
info_ptr->log_prefix,
info_ptr->password_file.c_str());
info_ptr->log_prefix,
info_ptr->password_file.c_str());
/*************** Create the output filename ***************/
datafile = bmcUtil_create_data_fn (info_ptr->hostname, file_suffix, BMC_PROTOCOL__REDFISHTOOL ) ;
@ -1007,8 +1007,8 @@ static int _redfishUtil_send_request( thread_info_type * info_ptr, string & data
datafile );
dlog_t ("%s query cmd: %s\n",
info_ptr->log_prefix,
request.c_str());
info_ptr->log_prefix,
request.c_str());
if (( info_ptr->command == BMC_THREAD_CMD__READ_SENSORS ) &&
( daemon_is_file_present ( MTC_CMD_FIT__SENSOR_DATA )))

View File

@ -6201,7 +6201,9 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
}
#endif
/* Handle BMC access method changes */
/*****************************************************************
* Handle BMC access method changes
****************************************************************/
if ( node_ptr->bmc_access_method_changed )
{
node_ptr->bmc_access_method_changed = false ;
@ -6216,6 +6218,9 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
pingUtil_fini ( node_ptr->bm_ping_info );
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ;
/* force re-fetch of the BMC password */
node_ptr->bm_pw.clear();
/* start a timer that will raise the BM Access alarm
* if we are not accessible by the time it expires */
mtcTimer_reset ( node_ptr->bm_timer );
@ -6224,6 +6229,52 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 );
}
/*****************************************************************
* Run the ping monitor if BMC provisioned and ip address is valid
*****************************************************************/
if (( node_ptr->bmc_provisioned ) &&
( hostUtil_is_valid_ip_addr ( node_ptr->bm_ping_info.ip )))
{
pingUtil_acc_monitor ( node_ptr->bm_ping_info );
}
/*****************************************************************
* Manage bmc creds refresh
****************************************************************/
if ( node_ptr->bm_ping_info.ok == false )
{
/* Auto correct key ping information ;
* should never occur but if it does ... */
if (( node_ptr->bm_ping_info.hostname.empty()) ||
( node_ptr->bm_ping_info.ip.empty()))
{
node_ptr->bm_ping_info.hostname = node_ptr->hostname ;
node_ptr->bm_ping_info.ip = node_ptr->bm_ip ;
}
if ( ! node_ptr->bm_pw.empty() )
{
node_ptr->bm_pw.clear();
}
}
/*****************************************************************
* Manage getting the bm password but only when ping is ok
****************************************************************/
else if ( node_ptr->bm_pw.empty() )
{
barbicanSecret_type * secret = secretUtil_manage_secret( node_ptr->secretEvent,
node_ptr->hostname,
node_ptr->uuid,
node_ptr->bm_timer,
mtcTimer_handler );
if ( secret->stage == MTC_SECRET__GET_PWD_RECV )
{
node_ptr->bm_pw = secret->payload ;
ilog ("%s bmc credentials received",
node_ptr->hostname.c_str());
}
}
if (( node_ptr->bmc_accessible == true ) &&
( node_ptr->bm_ping_info.ok == false ))
{
@ -6258,38 +6309,12 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
mtcTimer_reset ( node_ptr->bmc_audit_timer );
}
/* manage bmc creds refresh ; not expected but should be handled */
if ( node_ptr->bm_ping_info.ok == false )
{
/* Auto correct key ping information ;
* should never occur but if it does ... */
if (( node_ptr->bm_ping_info.hostname.empty()) ||
( node_ptr->bm_ping_info.ip.empty()))
{
node_ptr->bm_ping_info.hostname = node_ptr->hostname ;
node_ptr->bm_ping_info.ip = node_ptr->bm_ip ;
}
}
/* manage getting the bm password */
if ( node_ptr->thread_extra_info.bm_pw.empty() )
{
barbicanSecret_type * secret = secretUtil_manage_secret( node_ptr->secretEvent,
node_ptr->hostname,
node_ptr->uuid,
node_ptr->bm_timer,
mtcTimer_handler );
if ( secret->stage == MTC_SECRET__GET_PWD_RECV )
{
node_ptr->thread_extra_info.bm_pw = node_ptr->bm_pw = secret->payload ;
}
}
/* If the BMC protocol has not yet been learned then do so.
* Default is ipmi unless the target host responds to a
* redfish root query with a minimum version number ; 1.0 */
else if (( node_ptr->bm_ping_info.ok == true ) &&
( node_ptr->bmc_protocol_learned == false ))
if (( node_ptr->bm_ping_info.ok == true ) &&
(!node_ptr->bm_pw.empty()) &&
( node_ptr->bmc_protocol_learned == false ))
{
if ( node_ptr->bmc_protocol_learning == false )
{
@ -6673,20 +6698,12 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
} /* end handling ipmi query, info, restart cause, power state */
} /* end main condition handling */
/*****************************************************************
* Run the ping monitor if BMC provisioned and ip address is valid
*****************************************************************/
if (( node_ptr->bmc_provisioned ) &&
( hostUtil_is_valid_ip_addr ( node_ptr->bm_ping_info.ip )))
{
pingUtil_acc_monitor ( node_ptr->bm_ping_info );
}
/* BMC Access Audit for Redfish.
* - used to refresh the host power state */
if (( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) &&
( node_ptr->bmc_provisioned ) &&
( node_ptr->bmc_accessible ) &&
(!node_ptr->bm_pw.empty() ) &&
( mtcTimer_expired ( node_ptr->bmc_audit_timer ) == true ) &&
( mtcTimer_expired ( node_ptr->bm_timer ) == true ))
{