Maintenance Redfish support useability enhancements.

This update is a result of changes made during a suite of
end-to-end provisioning, reprovisioning and deprovisioning
customer exterience testing of the maintenance RedFish support
feature.

1. Force reconnection and password fetch on provisioning changes
2. Force reconnection and password fetch on persistent connection failures
3. Fix redfish protocol learning (string compare) in hardware monitor
4. Improve logging for some typical error paths.

Test Plan:

PASS: Verify handling of reprovisioning BMC between hosts that support
             different protocols.
PASS: Verify handling of reprovisioning ip address to host that leads to a
             different protocol select.
PASS: Verify manual relearn handling to recover from errors that result from
             the above case.
PASS: Verify host BMC deprovisioning handling and cleanup.
PASS: Verify sensor monitoring.
PASS: Verify hwmond sticks with a selected protocol once a sensor model
             has been created using that protocol.
PASS: Verify handling of BMC reprovision - ip address change only
PASS: Verify handling of BMC reprovision - username change only
FAIL: Verify handling of BMC reprovision - password change only
             https://bugs.launchpad.net/starlingx/+bug/1846418

Change-Id: I4bf52a5dc3c97d7794ff623c881dff7886234e79
Closes-Bug: #1846212
Story: 2005861
Task: 36606
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2019-10-03 11:56:42 -04:00
parent 01818fdf09
commit 4c541f50d4
10 changed files with 187 additions and 149 deletions

View File

@ -90,7 +90,11 @@ string bmcUtil_getProtocol_str ( bmc_protocol_enum protocol )
{
case BMC_PROTOCOL__REDFISHTOOL: return(BMC_PROTOCOL__REDFISHTOOL_STR);
case BMC_PROTOCOL__IPMITOOL: return(BMC_PROTOCOL__IPMITOOL_STR);
default: return("unknown");
default:
{
blog ("unknown bmc protocol %d", protocol );
return("unknown");
}
}
}
@ -329,10 +333,12 @@ bmc_protocol_enum bmcUtil_read_hwmond_protocol ( string hostname )
bmc_protocol_enum protocol = BMC_PROTOCOL__IPMITOOL ;
string hwmond_proto_filename = BMC_HWMON_TMP_DIR + hostname ;
string proto_str = daemon_read_file ( hwmond_proto_filename.data() ) ;
if ( strcmp (proto_str.data(), BMC_PROTOCOL__REDFISHTOOL_STR) )
protocol = BMC_PROTOCOL__REDFISHTOOL ;
if ( daemon_is_file_present ( hwmond_proto_filename.data() ) == true )
{
string proto_str = daemon_read_file ( hwmond_proto_filename.data() ) ;
if ( !strcmp (proto_str.data(), BMC_PROTOCOL__REDFISHTOOL_STR) )
protocol = BMC_PROTOCOL__REDFISHTOOL ;
}
return protocol;
}

View File

@ -803,6 +803,7 @@ int load_filenames_in_dir ( const char * directory, std::list<string> & filelist
{
DIR *d;
struct dirent *dir;
int rc = PASS ;
/* Clear the content of the config file list and running counter */
filelist.clear ();
@ -813,7 +814,7 @@ int load_filenames_in_dir ( const char * directory, std::list<string> & filelist
while ((dir = readdir(d)) != NULL)
{
dlog3 ("File: %s\n", dir->d_name);
if ( strcmp ( dir->d_name , "." ) &&
if ( strcmp ( dir->d_name , "." ) &&
strcmp ( dir->d_name , ".." ))
{
string temp = directory ;
@ -828,8 +829,9 @@ int load_filenames_in_dir ( const char * directory, std::list<string> & filelist
else
{
elog ("Failed to open %s\n", directory );
rc = FAIL_FILE_ACCESS ;
}
return(PASS);
return(rc);
}
int setup_child ( bool close_file_descriptors )

View File

@ -4066,6 +4066,7 @@ void nodeLinkClass::bmc_access_data_init ( struct nodeLinkClass::node * node_ptr
{
if ( node_ptr )
{
node_ptr->bm_pw.clear();
node_ptr->bmc_accessible = false ;
node_ptr->bm_ping_info.ok = false ;
node_ptr->bmc_info_query_active = false ;
@ -4148,12 +4149,8 @@ int nodeLinkClass::set_bm_prov ( struct nodeLinkClass::node * node_ptr, bool sta
secret->reference.clear() ;
secret->payload.clear() ;
secret->stage = MTC_SECRET__START ;
mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, SECRET_START_DELAY );
}
mtcTimer_start( node_ptr->bm_timer, mtcTimer_handler, SECRET_START_DELAY );
node_ptr->thread_extra_info.bm_pw.clear() ;
node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ;
node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ;
send_hwmon_command(node_ptr->hostname, MTC_CMD_ADD_HOST);
send_hwmon_command(node_ptr->hostname, MTC_CMD_START_HOST);

View File

@ -504,21 +504,32 @@ int hwmonHostClass::set_bm_prov ( struct hwmonHostClass::hwmon_host * host_ptr,
{
rc = PASS ;
/* Clear the alarm if we are starting fresh from an unprovisioned state */
bool connect = false ;
bool reconnect = false ;
if (( host_ptr->bm_provisioned == false ) && ( state == true ))
connect = true ;
else if (( host_ptr->bm_provisioned == true ) && ( state == true ))
reconnect = true ;
if ( connect || reconnect )
{
ilog ("%s board management controller is being provisioned\n", host_ptr->hostname.c_str());
ilog ("%s setting up ping socket\n", host_ptr->hostname.c_str() );
ilog ("%s board management controller is being %sprovisioned\n",
host_ptr->hostname.c_str(),
host_ptr->bm_provisioned ? "re":"" );
/* ---------------------------------------
* Init bmc data based on monitoring mode
* ---------------------------------------*/
blog ("%s setting up ping socket\n", host_ptr->hostname.c_str() );
mtcTimer_reset ( host_ptr->ping_info.timer ) ;
host_ptr->ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ;
host_ptr->ping_info.ip = host_ptr->bm_ip ;
host_ptr->ping_info.hostname = host_ptr->hostname ;
bmc_data_init ( host_ptr );
if ( connect || host_ptr->relearn )
bmc_data_init ( host_ptr );
string host_uuid = hostBase.get_uuid( host_ptr->hostname );
barbicanSecret_type * secret = secretUtil_find_secret( host_uuid );
@ -534,6 +545,7 @@ int hwmonHostClass::set_bm_prov ( struct hwmonHostClass::hwmon_host * host_ptr,
host_ptr->thread_extra_info.bm_ip = host_ptr->bm_ip ;
host_ptr->thread_extra_info.bm_un = host_ptr->bm_un ;
}
/* handle the case going from provisioned to not provisioned */
if (( host_ptr->bm_provisioned == true ) && ( state == false ))
{

View File

@ -96,59 +96,6 @@ void hwmonHostClass::hwmon_fsm ( void )
pingUtil_acc_monitor ( host_ptr->ping_info );
/* Check to see if sensor monitoring for this host is disabled.
* If it is ... */
if (( host_ptr->monitor == false ) || ( host_ptr->bm_pw.empty()))
{
/* ... make sure the thread sits in the
* idle state while disabled or there is no pw learned yet */
if ( thread_idle ( host_ptr->bmc_thread_ctrl ) == false )
{
if ( thread_done ( host_ptr->bmc_thread_ctrl ) == true )
{
host_ptr->bmc_thread_ctrl.done = true ;
}
else
{
thread_kill ( host_ptr->bmc_thread_ctrl, host_ptr->bmc_thread_info );
}
}
if ( host_ptr->bm_pw.empty( ))
{
string host_uuid = hostBase.get_uuid(host_ptr->hostname);
barbicanSecret_type * secret =
secretUtil_manage_secret( host_ptr->secretEvent,
host_ptr->hostname,
host_uuid,
host_ptr->secretTimer,
hwmonTimer_handler );
if ( secret->stage == MTC_SECRET__GET_PWD_RECV )
{
host_ptr->thread_extra_info.bm_pw = host_ptr->bm_pw = secret->payload ;
}
ilog_throttled (host_ptr->empty_secret_log_throttle, 50,
"%s waiting on bm password learn", host_ptr->hostname.c_str());
}
continue ;
}
else if (( host_ptr->accessible == false ) && ( host_ptr->ping_info.ok == true ))
{
ilog ("%s bmc is accessible\n", host_ptr->hostname.c_str());
host_ptr->accessible = true ;
}
else if (( host_ptr->accessible == true ) && ( host_ptr->ping_info.ok == false ))
{
wlog ("%s bmc access lost\n", host_ptr->hostname.c_str());
thread_kill ( host_ptr->bmc_thread_ctrl, host_ptr->bmc_thread_info );
host_ptr->accessible = false ;
host_ptr->sensor_query_count = 0 ;
host_ptr->bmc_fw_version.clear();
host_ptr->ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
}
if ( host_ptr->ping_info.ok == false )
{
/* Auto correct key ping information ; should never occur but if it does ... */
@ -163,15 +110,81 @@ void hwmonHostClass::hwmon_fsm ( void )
host_ptr->ping_info.hostname = host_ptr->hostname ;
host_ptr->ping_info.ip = host_ptr->bm_ip ;
}
// pingUtil_acc_monitor ( host_ptr->ping_info );
}
/* Check to see if sensor monitoring for this host is
* disabled or the bm password has not yet been learned */
else if (( host_ptr->monitor == false ) || ( host_ptr->bm_pw.empty()))
{
/* ... make sure the thread sits in the
* idle state while disabled or there
* is no pw learned yet */
if ( thread_idle ( host_ptr->bmc_thread_ctrl ) == false )
{
if ( thread_done ( host_ptr->bmc_thread_ctrl ) == true )
{
host_ptr->bmc_thread_ctrl.done = true ;
}
else
{
thread_kill ( host_ptr->bmc_thread_ctrl, host_ptr->bmc_thread_info );
}
}
/* Only try and get the password if sensor monitoring
* is enabled */
if (( host_ptr->monitor ) && ( host_ptr->bm_pw.empty( )))
{
string host_uuid = hostBase.get_uuid(host_ptr->hostname);
barbicanSecret_type * secret =
secretUtil_manage_secret( host_ptr->secretEvent,
host_ptr->hostname,
host_uuid,
host_ptr->secretTimer,
hwmonTimer_handler );
if ( secret->stage == MTC_SECRET__GET_PWD_RECV )
{
host_ptr->bm_pw = secret->payload ;
ilog ("%s bmc credentials received",
hostname.c_str());
}
else
{
ilog_throttled (host_ptr->empty_secret_log_throttle, 50,
"%s waiting on bm credentials", host_ptr->hostname.c_str());
}
}
continue ;
}
else if (( host_ptr->accessible == false ) && ( host_ptr->ping_info.ok == true ) && ( !host_ptr->bm_pw.empty() ))
{
ilog ("%s bmc is accessible ; using %s\n",
host_ptr->hostname.c_str(),
bmcUtil_getProtocol_str(host_ptr->protocol).c_str());
host_ptr->accessible = true ;
}
else if (( host_ptr->accessible == true ) && ( host_ptr->ping_info.ok == false ))
{
wlog ("%s bmc access lost, changed or being retried ; using %s\n",
host_ptr->hostname.c_str(),
bmcUtil_getProtocol_str(host_ptr->protocol).c_str());
thread_kill ( host_ptr->bmc_thread_ctrl, host_ptr->bmc_thread_info );
host_ptr->accessible = false ;
host_ptr->sensor_query_count = 0 ;
host_ptr->bmc_fw_version.clear();
host_ptr->bm_pw.clear();
host_ptr->ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ;
}
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__EMPTY_BM_PASSWORD ))
{
host_ptr->thread_extra_info.bm_pw = "" ;
}
#endif
if ( host_ptr->accessible )
if (( host_ptr->accessible ) && ( !host_ptr->bm_pw.empty()))
{
/* typical success path */
hwmonHostClass::bmc_sensor_monitor ( host_ptr );

View File

@ -1205,19 +1205,33 @@ int hwmonHostClass::bmc_sensor_monitor ( struct hwmonHostClass::hwmon_host * hos
/* Consume done results */
mtcTimer_stop ( host_ptr->monitor_ctrl.timer );
if ( host_ptr->bmc_thread_info.status ) // == FAIL_SYSTEM_CALL )
if ( host_ptr->bmc_thread_info.status )
{
if ( ++host_ptr->bmc_thread_ctrl.retries < MAX_THREAD_RETRIES )
host_ptr->bmc_thread_ctrl.retries++ ;
if (!(host_ptr->bmc_thread_ctrl.retries>MAX_THREAD_RETRIES))
{
elog ("%s %s thread %2d failed (rc:%d) (try %d of %d) (%d:%d)\n",
host_ptr->bmc_thread_ctrl.hostname.c_str(),
host_ptr->bmc_thread_ctrl.name.c_str(),
host_ptr->bmc_thread_info.command,
host_ptr->bmc_thread_info.status,
host_ptr->bmc_thread_ctrl.retries,
MAX_THREAD_RETRIES,
host_ptr->bmc_thread_info.progress,
host_ptr->bmc_thread_info.runcount);
if ( host_ptr->bmc_thread_info.status == FAIL_SYSTEM_CALL )
{
elog ("%s '%s' system call failed (retry %d of %d) (%s)",
host_ptr->bmc_thread_info.log_prefix,
bmcUtil_getCmd_str(
host_ptr->bmc_thread_info.command).c_str(),
host_ptr->bmc_thread_ctrl.retries,
MAX_THREAD_RETRIES, host_ptr->thread_extra_info.bm_pw.c_str());
}
else if (( host_ptr->bmc_thread_ctrl.retries == (MAX_THREAD_RETRIES-1)) ||
( host_ptr->bmc_thread_ctrl.retries == 1 ))
{
elog ("%s '%s' failed (rc:%d) (retry %d of %d) (%d:%d)\n",
host_ptr->bmc_thread_info.log_prefix,
bmcUtil_getCmd_str(
host_ptr->bmc_thread_info.command).c_str(),
host_ptr->bmc_thread_info.status,
host_ptr->bmc_thread_ctrl.retries,
MAX_THREAD_RETRIES,
host_ptr->bmc_thread_info.progress,
host_ptr->bmc_thread_info.runcount);
}
/* don't flood the logs with the same error data over and over */
if ( host_ptr->bmc_thread_ctrl.retries == 1 )
@ -1234,22 +1248,6 @@ int hwmonHostClass::bmc_sensor_monitor ( struct hwmonHostClass::hwmon_host * hos
HWMON_SENSOR_MONITOR__DELAY );
break ;
}
#ifdef WANT_THIS
/* don't flood the logs with the same error data over and over */
if ( host_ptr->bmc_thread_ctrl.retries > 1 )
{
wlog ("%s %s thread '%d' command is done ; (%d:%d) (rc:%d)\n",
host_ptr->bmc_thread_ctrl.hostname.c_str(),
host_ptr->bmc_thread_ctrl.name.c_str(),
host_ptr->bmc_thread_info.command,
host_ptr->bmc_thread_info.progress,
host_ptr->bmc_thread_info.runcount,
host_ptr->bmc_thread_info.status);
blog ("%s ... data: %s\n",
host_ptr->bmc_thread_ctrl.hostname.c_str(),
host_ptr->bmc_thread_info.status_string.c_str());
}
#endif
}
else
{
@ -1636,7 +1634,7 @@ int hwmonHostClass::bmc_sensor_monitor ( struct hwmonHostClass::hwmon_host * hos
if ( severity != ptr->severity)
{
blog ("%s %s status change ; %s:%s -> %s\n",
ilog ("%s %s status change ; %s:%s -> %s\n",
host_ptr->hostname.c_str(),
ptr->sensorname.c_str(),
get_severity(ptr->severity).c_str(),

View File

@ -37,11 +37,6 @@
static event_type hwmon_event ;
void hwmonHttp_server_init ( void )
{
}
/* Cleanup */
void hwmonHttp_server_fini ( void )
{

View File

@ -296,7 +296,6 @@ int daemon_init ( string iface, string nodetype )
}
threadUtil_init ( hwmonTimer_handler ) ;
ilog ("BMC Acc Mode: %s\n", "ipmi/ipmitool" );
/* override the config reload for the startup case */
obj_ptr->config_reload = false ;
@ -304,7 +303,6 @@ int daemon_init ( string iface, string nodetype )
/* Init the hwmon service timers */
hwmon_timer_init ();
daemon_make_dir(IPMITOOL_OUTPUT_DIR) ;
#ifdef WANT_FIT_TESTING
daemon_make_dir(FIT__INFO_FILEPATH);

View File

@ -993,8 +993,8 @@ static int _redfishUtil_send_request( thread_info_type * info_ptr, string & data
}
dlog_t ("%s password filename : %s\n",
info_ptr->log_prefix,
info_ptr->password_file.c_str());
info_ptr->log_prefix,
info_ptr->password_file.c_str());
/*************** Create the output filename ***************/
datafile = bmcUtil_create_data_fn (info_ptr->hostname, file_suffix, BMC_PROTOCOL__REDFISHTOOL ) ;
@ -1007,8 +1007,8 @@ static int _redfishUtil_send_request( thread_info_type * info_ptr, string & data
datafile );
dlog_t ("%s query cmd: %s\n",
info_ptr->log_prefix,
request.c_str());
info_ptr->log_prefix,
request.c_str());
if (( info_ptr->command == BMC_THREAD_CMD__READ_SENSORS ) &&
( daemon_is_file_present ( MTC_CMD_FIT__SENSOR_DATA )))

View File

@ -6201,7 +6201,9 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
}
#endif
/* Handle BMC access method changes */
/*****************************************************************
* Handle BMC access method changes
****************************************************************/
if ( node_ptr->bmc_access_method_changed )
{
node_ptr->bmc_access_method_changed = false ;
@ -6216,6 +6218,9 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
pingUtil_fini ( node_ptr->bm_ping_info );
node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ;
/* force re-fetch of the BMC password */
node_ptr->bm_pw.clear();
/* start a timer that will raise the BM Access alarm
* if we are not accessible by the time it expires */
mtcTimer_reset ( node_ptr->bm_timer );
@ -6224,6 +6229,52 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 );
}
/*****************************************************************
* Run the ping monitor if BMC provisioned and ip address is valid
*****************************************************************/
if (( node_ptr->bmc_provisioned ) &&
( hostUtil_is_valid_ip_addr ( node_ptr->bm_ping_info.ip )))
{
pingUtil_acc_monitor ( node_ptr->bm_ping_info );
}
/*****************************************************************
* Manage bmc creds refresh
****************************************************************/
if ( node_ptr->bm_ping_info.ok == false )
{
/* Auto correct key ping information ;
* should never occur but if it does ... */
if (( node_ptr->bm_ping_info.hostname.empty()) ||
( node_ptr->bm_ping_info.ip.empty()))
{
node_ptr->bm_ping_info.hostname = node_ptr->hostname ;
node_ptr->bm_ping_info.ip = node_ptr->bm_ip ;
}
if ( ! node_ptr->bm_pw.empty() )
{
node_ptr->bm_pw.clear();
}
}
/*****************************************************************
* Manage getting the bm password but only when ping is ok
****************************************************************/
else if ( node_ptr->bm_pw.empty() )
{
barbicanSecret_type * secret = secretUtil_manage_secret( node_ptr->secretEvent,
node_ptr->hostname,
node_ptr->uuid,
node_ptr->bm_timer,
mtcTimer_handler );
if ( secret->stage == MTC_SECRET__GET_PWD_RECV )
{
node_ptr->bm_pw = secret->payload ;
ilog ("%s bmc credentials received",
node_ptr->hostname.c_str());
}
}
if (( node_ptr->bmc_accessible == true ) &&
( node_ptr->bm_ping_info.ok == false ))
{
@ -6258,38 +6309,12 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
mtcTimer_reset ( node_ptr->bmc_audit_timer );
}
/* manage bmc creds refresh ; not expected but should be handled */
if ( node_ptr->bm_ping_info.ok == false )
{
/* Auto correct key ping information ;
* should never occur but if it does ... */
if (( node_ptr->bm_ping_info.hostname.empty()) ||
( node_ptr->bm_ping_info.ip.empty()))
{
node_ptr->bm_ping_info.hostname = node_ptr->hostname ;
node_ptr->bm_ping_info.ip = node_ptr->bm_ip ;
}
}
/* manage getting the bm password */
if ( node_ptr->thread_extra_info.bm_pw.empty() )
{
barbicanSecret_type * secret = secretUtil_manage_secret( node_ptr->secretEvent,
node_ptr->hostname,
node_ptr->uuid,
node_ptr->bm_timer,
mtcTimer_handler );
if ( secret->stage == MTC_SECRET__GET_PWD_RECV )
{
node_ptr->thread_extra_info.bm_pw = node_ptr->bm_pw = secret->payload ;
}
}
/* If the BMC protocol has not yet been learned then do so.
* Default is ipmi unless the target host responds to a
* redfish root query with a minimum version number ; 1.0 */
else if (( node_ptr->bm_ping_info.ok == true ) &&
( node_ptr->bmc_protocol_learned == false ))
if (( node_ptr->bm_ping_info.ok == true ) &&
(!node_ptr->bm_pw.empty()) &&
( node_ptr->bmc_protocol_learned == false ))
{
if ( node_ptr->bmc_protocol_learning == false )
{
@ -6673,20 +6698,12 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
} /* end handling ipmi query, info, restart cause, power state */
} /* end main condition handling */
/*****************************************************************
* Run the ping monitor if BMC provisioned and ip address is valid
*****************************************************************/
if (( node_ptr->bmc_provisioned ) &&
( hostUtil_is_valid_ip_addr ( node_ptr->bm_ping_info.ip )))
{
pingUtil_acc_monitor ( node_ptr->bm_ping_info );
}
/* BMC Access Audit for Redfish.
* - used to refresh the host power state */
if (( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) &&
( node_ptr->bmc_provisioned ) &&
( node_ptr->bmc_accessible ) &&
(!node_ptr->bm_pw.empty() ) &&
( mtcTimer_expired ( node_ptr->bmc_audit_timer ) == true ) &&
( mtcTimer_expired ( node_ptr->bm_timer ) == true ))
{