diff --git a/mtce-common/src/common/bmcUtil.cpp b/mtce-common/src/common/bmcUtil.cpp index 12a717a4..b1307599 100644 --- a/mtce-common/src/common/bmcUtil.cpp +++ b/mtce-common/src/common/bmcUtil.cpp @@ -14,10 +14,11 @@ using namespace std; -#include "nodeBase.h" /* for ... mtce-common node definitions */ -#include "hostUtil.h" /* for ... mtce-common host definitions */ -#include "bmcUtil.h" /* for ... mtce-common bmc utility header */ -#include "jsonUtil.h" /* for ... json_tokener_parse */ +#include "nodeBase.h" /* for ... mtce-common node definitions */ +#include "hostUtil.h" /* for ... mtce-common host definitions */ +#include "bmcUtil.h" /* for ... mtce-common bmc utility header */ +#include "nodeUtil.h" /* for ... tolowercase */ +#include "jsonUtil.h" /* for ... jsonUtil_get_key_value_string */ /********************************************************************** * @@ -133,8 +134,10 @@ string bmcUtil_chop_system_req ( string request ) int bmcUtil_init ( void ) { - daemon_make_dir(BMC_OUTPUT_DIR) ; - daemon_make_dir(BMC_HWMON_TMP_DIR) ; + if ( daemon_is_file_present ( BMC_OUTPUT_DIR ) == false ) + daemon_make_dir(BMC_OUTPUT_DIR) ; + if ( daemon_is_file_present ( BMC_HWMON_TMP_DIR ) == false ) + daemon_make_dir(BMC_HWMON_TMP_DIR) ; ipmiUtil_init (); redfishUtil_init (); @@ -194,6 +197,11 @@ void bmcUtil_info_init ( bmc_info_type & bmc_info ) bmc_info.power_on = false ; bmc_info.restart_cause.clear() ; + + /* clear the supported actions lists */ + bmc_info.reset_action_list.clear(); + bmc_info.power_on_action_list.clear(); + bmc_info.power_off_action_list.clear(); } /************************************************************************* @@ -232,9 +240,9 @@ void bmcUtil_hwmon_info ( string hostname, /* add the 'power' state key:val pair */ if ( power_on ) - info_str.append("\",\"power\":\"on\""); + info_str.append("\",\"power_state\":\"on\""); else - info_str.append("\",\"power\":\"off\""); + info_str.append("\",\"power_state\":\"off\""); /* add the extra data if it exists */ if ( ! extra.empty () ) @@ -287,7 +295,13 @@ bool bmcUtil_read_bmc_info( string hostname, protocol = BMC_PROTOCOL__IPMITOOL ; else protocol = BMC_PROTOCOL__REDFISHTOOL ; + json_object_put(json_obj); + + ilog ("%s power is %s with bmc communication using %s", + hostname.c_str(), + power_state.c_str(), + bmcUtil_getProtocol_str(protocol).c_str()); return (true); } else @@ -298,8 +312,9 @@ bool bmcUtil_read_bmc_info( string hostname, blog ("%s failed to parse bmc info! set to ipmitool by default!\n", hostname.c_str()); return (false); } -} + return (true); +} /***************************************************************************** * * Name : bmcUtil_read_hwmond_protocol @@ -431,3 +446,140 @@ string bmcUtil_create_data_fn ( string & hostname, return ( datafile ); } + +/************************************************************************* + * + * Name : bmcUtil_is_power_on + * + * Purpose : Get power state from query response data. + * + * Description: Parse a BMC protocol specific response for current + * power state. + * + * Assumptions: supplied power state is not changed on failure. + * + * Parameters : hostname - string + * protocol - BMC_PROTOCOL__REDFISHTOOL | BMC_PROTOCOL__IPMITOOL + * response - protocol specific power query response data + * + * Updates : power_on - updated if response is queried ok + * set true if power is on + * set false if power is off + * + * Returns : PASS or + * FAIL_NO_DATA , FAIL_JSON_PARSE + * + *************************************************************************/ + +int bmcUtil_is_power_on ( string hostname, + bmc_protocol_enum protocol, + string & response, + bool & power_on) +{ + if ( response.empty() ) + { + wlog ("%s bmc power status query response empty", + hostname.c_str()); + return (FAIL_NO_DATA); + } + else if ( protocol == BMC_PROTOCOL__REDFISHTOOL ) + { + struct json_object *json_obj = json_tokener_parse((char*)response.data()); + if ( !json_obj ) + { + wlog ("%s failed to tokenize bmc info", hostname.c_str()); + return (FAIL_JSON_PARSE) ; + } + else if (tolowercase(jsonUtil_get_key_value_string(json_obj,REDFISH_LABEL__POWER_STATE)) == "on" ) + power_on = true ; + else + power_on = false ; + + /* free the json object */ + json_object_put(json_obj ); + } + else /* IPMI */ + { + if ( response.find (IPMITOOL_POWER_ON_STATUS) != std::string::npos ) + power_on = true ; + else + power_on = false ; + } + return (PASS); +} + + +/**************************************************************************** + * + * Name : bmcUtil_remove_files + * + * Purpose : cleanup temp files. + * + * Description: Called during de-provision to remove temporary files created + * by host provisioning and command output. + * + * Function detects which process is calling it and removes + * only the temp files that daemon created for a specific host. + * + * Assumptions: Keeps the temp dirs clean and current. + * + ****************************************************************************/ + +extern char *program_invocation_short_name; +void bmcUtil_remove_files ( string hostname, bmc_protocol_enum protocol ) +{ + /* Read in the list of config files and their contents */ + + std::list filelist ; + std::list::iterator file_ptr ; + + string dir = BMC_OUTPUT_DIR ; + dir.append(bmcUtil_getProtocol_str(protocol)); + + int rc = load_filenames_in_dir ( dir.data(), filelist ) ; + if ( rc ) + { + ilog ("%s failed to load files (rc:%d)", hostname.c_str(), rc ); + return ; + } + + /* files exist as __ */ + if ( !strcmp(MTC_SERVICE_MTCAGENT_NAME, program_invocation_short_name )) + { + for ( file_ptr = filelist.begin(); + file_ptr != filelist.end() ; + file_ptr++ ) + { + if ( file_ptr->find (program_invocation_short_name) != string::npos ) + { + if ( file_ptr->find (hostname) != string::npos ) + { + daemon_remove_file ( file_ptr->data() ); + blog2 ("%s %s removed", hostname.c_str(), file_ptr->c_str()); + } + } + } + } + else if ( !strcmp(MTC_SERVICE_HWMOND_NAME, program_invocation_short_name )) + { + for ( file_ptr = filelist.begin(); + file_ptr != filelist.end() ; + file_ptr++ ) + { + if ( file_ptr->find (program_invocation_short_name) != string::npos ) + { + if ( file_ptr->find (hostname) != string::npos ) + { + daemon_remove_file ( file_ptr->data() ); + blog2 ("%s %s removed", hostname.c_str(), file_ptr->c_str()); + } + } + } + + /* remove the static file that specified the protocol that was used to create this host's sensor model */ + string hwmond_proto_filename = BMC_HWMON_TMP_DIR ; + hwmond_proto_filename.append("/") ; + hwmond_proto_filename.append(hostname); + daemon_remove_file ( hwmond_proto_filename.data() ); + } +} diff --git a/mtce-common/src/common/bmcUtil.h b/mtce-common/src/common/bmcUtil.h index 02390e63..a7b6b17d 100644 --- a/mtce-common/src/common/bmcUtil.h +++ b/mtce-common/src/common/bmcUtil.h @@ -56,7 +56,9 @@ typedef struct std::string sn ; /* actions */ - std::list allowable_reset_action_list ; + std::list reset_action_list ; + std::list power_on_action_list ; + std::list power_off_action_list ; /* state info */ std::string restart_cause ; @@ -140,6 +142,15 @@ void bmcUtil_hwmon_info ( string hostname, bool power_on, string extra ); +/* Get power state from query response data. */ +int bmcUtil_is_power_on ( string hostname, + bmc_protocol_enum protocol, + string & response, + bool & power_on); + +void bmcUtil_remove_files ( string hostname, + bmc_protocol_enum protocol ); + #include "ipmiUtil.h" /* for ... mtce-common ipmi utility header */ #include "redfishUtil.h" /* for ... mtce-common redfish utility header */ diff --git a/mtce-common/src/common/fitCodes.h b/mtce-common/src/common/fitCodes.h index 71ce8bb0..10abb498 100644 --- a/mtce-common/src/common/fitCodes.h +++ b/mtce-common/src/common/fitCodes.h @@ -53,6 +53,7 @@ #define MTC_CMD_FIT__LINKLIST ("/var/run/fit/linklist") /* hbsAgent */ #define MTC_CMD_FIT__HBSSILENT ("/var/run/fit/hbs_silent_fault") /* hbsAgent */ #define MTC_CMD_FIT__SENSOR_DATA ("/var/run/fit/sensor_data") /* hwmond */ +#define MTC_CMD_FIT__INLINE_CREDS ("/var/run/fit/inline_creds") /* mtcAgent */ #define MTC_CMD_FIT__POWER_CMD ("/var/run/fit/power_cmd_result") /* mtcAgent */ #define MTC_CMD_FIT__ROOT_QUERY ("/var/run/fit/root_query") /* mtcAgent */ #define MTC_CMD_FIT__MC_INFO ("/var/run/fit/mc_info") /* mtcAgent */ @@ -63,6 +64,9 @@ #define MTC_CMD_FIT__START_SVCS ("/var/run/fit/host_services") /* mtcClient */ #define MTC_CMD_FIT__NO_HS_ACK ("/var/run/fit/no_hs_ack") /* mtcClient */ #define MTC_CMD_FIT__GOENABLE_AUDIT ("/var/run/fit/goenable_audit") /* mtcAgent */ +#define MTC_CMD_FIT__JSON_LEAK_SOAK ("/var/run/fit/json_leak_soak") /* mtcAgent */ +#define MTC_CMD_FIT__BMC_ACC_FAIL ("/var/run/fit/bmc_access_fail")/* mtcAgent */ +#define MTC_CMD_FIT__MEM_LEAK_DEBUG ("/var/run/fit/mem_leak_debug")/* mtcAgent */ /***************************************************** * Fault Insertion Codes diff --git a/mtce-common/src/common/logMacros.h b/mtce-common/src/common/logMacros.h index c347b462..25d62eaa 100644 --- a/mtce-common/src/common/logMacros.h +++ b/mtce-common/src/common/logMacros.h @@ -239,6 +239,7 @@ extern char *program_invocation_short_name; /** Scheduling Latency */ #define NSEC_TO_MSEC (1000000) +#define NSEC_TO_SEC (1000000000) #define llog(format, args...) \ { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Latncy: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ diff --git a/mtce-common/src/common/nodeBase.h b/mtce-common/src/common/nodeBase.h index 03418e34..f56161ca 100755 --- a/mtce-common/src/common/nodeBase.h +++ b/mtce-common/src/common/nodeBase.h @@ -169,7 +169,12 @@ typedef enum #define CLUSTER_HOST_SUFFIX ((const char*)("-cluster-host")) -#define NONE (const char *)"none" +#define NONE (const char *)"none" + +#ifdef UNKNOWN +#undef UNKNOWN +#endif +#define UNKNOWN (const char *)"unknown" /** Largest heartbeat pulse (req/resp) message size */ #define MAX_API_LOG_LEN (0x1000) diff --git a/mtce-common/src/common/nodeTimers.h b/mtce-common/src/common/nodeTimers.h index 5d932c04..aac3e4ba 100755 --- a/mtce-common/src/common/nodeTimers.h +++ b/mtce-common/src/common/nodeTimers.h @@ -84,7 +84,7 @@ #define MTC_REINSTALL_TIMEOUT_MIN (MTC_MINS_1) #define MTC_REINSTALL_TIMEOUT_MAX (MTC_HRS_4) #define MTC_REINSTALL_WAIT_TIMER (10) -#define MTC_IPMITOOL_REQUEST_DELAY (10) /* consider making this shorter */ +#define MTC_BMC_REQUEST_DELAY (10) /* consider making this shorter */ #define LAZY_REBOOT_RETRY_DELAY_SECS (60) #define SM_NOTIFY_UNHEALTHY_DELAY_SECS (5) #define MTC_MIN_ONLINE_PERIOD_SECS (7) diff --git a/mtce-common/src/common/pingUtil.cpp b/mtce-common/src/common/pingUtil.cpp index 4d1a72d1..cb6a3697 100644 --- a/mtce-common/src/common/pingUtil.cpp +++ b/mtce-common/src/common/pingUtil.cpp @@ -49,6 +49,7 @@ typedef struct char msg[PING_MESSAGE_LEN]; } ping6_rx_message_type ; + /******************************************************************************* * * Name : pingUtil_init @@ -244,14 +245,16 @@ int pingUtil_send ( ping_info_type & ping_info ) wlog ("%s ping %s send failed (rc:%d) (%d:%m)\n", ping_info.hostname.c_str(), ping_info.ip.c_str(), bytes, errno ); return FAIL ; } - if ( ping_info.monitoring == false ) + if (( ping_info.monitoring == false ) && + ( ping_info.send_retries >= PING_MAX_SEND_RETRIES )) { - ilog ("%s ping send %s ok ; identity:%04x sequence:%04x (try %d)\n", + ilog ("%s ping send %s ok ; identity:%04x sequence:%04x (try %d of %d)\n", ping_info.hostname.c_str(), ping_info.ip.c_str(), ping_info.identity, ping_info.sequence, - ping_info.send_retries); + ping_info.send_retries, + PING_MAX_SEND_RETRIES); } else { diff --git a/mtce-common/src/common/redfishUtil.cpp b/mtce-common/src/common/redfishUtil.cpp index b94cd723..7cc2e775 100644 --- a/mtce-common/src/common/redfishUtil.cpp +++ b/mtce-common/src/common/redfishUtil.cpp @@ -11,6 +11,7 @@ #include #include #include +#include /* for ... json-c json string parsing */ using namespace std; @@ -20,6 +21,12 @@ using namespace std; #include "jsonUtil.h" /* for ... */ #include "redfishUtil.h" /* for ... this module header */ +/* static prioritized list of redfish actions. + * Higher priority action first. */ +static std::list reset_actions ; +static std::list poweron_actions ; +static std::list poweroff_actions ; + /************************************************************************* * * Name : redfishUtil_init @@ -35,9 +42,203 @@ using namespace std; int redfishUtil_init ( void ) { daemon_make_dir(REDFISHTOOL_OUTPUT_DIR) ; + + /* Stock reset actions in order of priority */ + reset_actions.push_front(REDFISHTOOL_RESET__GRACEFUL_RESTART); /* P1 */ + reset_actions.push_back (REDFISHTOOL_RESET__FORCE_RESTART); /* P2 */ + + poweron_actions.push_front(REDFISHTOOL_POWER_ON__ON); + poweron_actions.push_back (REDFISHTOOL_POWER_ON__FORCE_ON); + + poweroff_actions.push_front(REDFISHTOOL_POWER_OFF__GRACEFUL_SHUTDOWN); + poweroff_actions.push_back (REDFISHTOOL_POWER_OFF__FORCE_OFF); + return (PASS); } +/************************************************************************* + * + * Name : _load_action_lists + * + * Purpose : Load supported host actions. + * + * Description: Filter stock actions through host actions. + * + * Parameters : hostname - this host amer + * host_action_list - what actions this host reports support for. + * + * Updates: bmc_info - reference that includes host action lists + * + *************************************************************************/ + +void _load_action_lists ( string & hostname, + bmc_info_type & bmc_info, + std::list & host_action_list) +{ + bmc_info.reset_action_list.clear(); + bmc_info.power_on_action_list.clear(); + bmc_info.power_off_action_list.clear(); + + /* Walk through the host action list looking for and updating + * this host's bmc_info supported actions lists */ + std::list::iterator _host_action_list_ptr ; + for ( _host_action_list_ptr = host_action_list.begin(); + _host_action_list_ptr != host_action_list.end() ; + _host_action_list_ptr++ ) + { + std::list::iterator _action_list_ptr ; + for ( _action_list_ptr = poweroff_actions.begin(); + _action_list_ptr != poweroff_actions.end() ; + _action_list_ptr++ ) + { + if ( (*_host_action_list_ptr) == (*_action_list_ptr) ) + { + bmc_info.power_off_action_list.push_back(*_action_list_ptr) ; + break ; + } + } + for ( _action_list_ptr = poweron_actions.begin(); + _action_list_ptr != poweron_actions.end() ; + _action_list_ptr++ ) + { + if ( (*_host_action_list_ptr) == (*_action_list_ptr) ) + { + bmc_info.power_on_action_list.push_back(*_action_list_ptr) ; + break ; + } + } + for ( _action_list_ptr = reset_actions.begin(); + _action_list_ptr != reset_actions.end() ; + _action_list_ptr++ ) + { + if ( (*_host_action_list_ptr) == (*_action_list_ptr) ) + { + bmc_info.reset_action_list.push_back(*_action_list_ptr) ; + break ; + } + } + } + string reset_tmp = "" ; + string poweron_tmp = "" ; + string poweroff_tmp = "" ; + std::list::iterator _ptr ; + for ( _ptr = bmc_info.reset_action_list.begin(); + _ptr != bmc_info.reset_action_list.end() ; + _ptr++ ) + { + if ( !reset_tmp.empty() ) + reset_tmp.append(","); + reset_tmp.append(*_ptr); + } + for ( _ptr = bmc_info.power_on_action_list.begin(); + _ptr != bmc_info.power_on_action_list.end() ; + _ptr++ ) + { + if ( !poweron_tmp.empty() ) + poweron_tmp.append(","); + poweron_tmp.append(*_ptr); + } + for ( _ptr = bmc_info.power_off_action_list.begin(); + _ptr != bmc_info.power_off_action_list.end() ; + _ptr++ ) + { + if ( !poweroff_tmp.empty() ) + poweroff_tmp.append(","); + poweroff_tmp.append(*_ptr); + } + ilog ("%s bmc actions ; reset:%s power-on:%s power-off:%s", + hostname.c_str(), + reset_tmp.empty() ? "none" : reset_tmp.c_str(), + poweron_tmp.empty() ? "none" : poweron_tmp.c_str(), + poweroff_tmp.empty() ? "none" : poweroff_tmp.c_str()); +} + +#ifdef SAVE_IMP +int _get_action_list ( string hostname, + redfish_action_enum action, + std::list host_action_list, + std::list & supp_action_list) +{ + int status = PASS ; + std::list * action_ptr = NULL ; + string action_str = "" ; + supp_action_list.clear(); + switch ( action ) + { + case REDFISH_ACTION__RESET: + { + action_ptr = &reset_actions ; + action_str = "reset" ; + break ; + } + case REDFISH_ACTION__POWER_ON: + { + action_ptr = &poweron_actions ; + action_str = "power-on" ; + break ; + } + case REDFISH_ACTION__POWER_OFF: + { + action_ptr = &poweroff_actions ; + action_str = "power-off" ; + break ; + } + default: + { + status = FAIL_BAD_CASE ; + } + } + + /* Filter */ + if (( status == PASS ) && (action_ptr)) + { + /* get the best supported action command + * for the specified action group. */ + std::list::iterator _action_list_ptr ; + std::list::iterator _host_action_list_ptr ; + for ( _action_list_ptr = action_ptr->begin(); + _action_list_ptr != action_ptr->end() ; + _action_list_ptr++ ) + { + for ( _host_action_list_ptr = host_action_list.begin(); + _host_action_list_ptr != host_action_list.end() ; + _host_action_list_ptr++ ) + { + if ( (*_host_action_list_ptr) == (*_action_list_ptr) ) + { + supp_action_list.push_back(*_action_list_ptr) ; + break ; + } + } + } + } + if ( supp_action_list.empty() ) + { + elog ("%s has no %s actions", hostname.c_str(), action_str.c_str()); + if ( status == PASS ) + status = FAIL_STRING_EMPTY ; + } + else + { + string tmp = "" ; + std::list::iterator _ptr ; + for ( _ptr = supp_action_list.begin(); + _ptr != supp_action_list.end() ; + _ptr++ ) + { + if ( !tmp.empty() ) + tmp.append(", "); + tmp.append(*_ptr); + } + ilog ("%s redfish %s actions: %s", + hostname.c_str(), + action_str.c_str(), + tmp.c_str()); + } + return (status); +} +#endif + /************************************************************************* * * Name : redfishUtil_is_supported @@ -95,22 +296,33 @@ bool redfishUtil_is_supported (string & hostname, string & response) &major, &minor, &revision ); - - if (( fields ) && ( major >= REDFISH_MIN_MAJOR_VERSION )) + if ( fields ) { - ilog ("%s bmc redfish version %s (%d.%d.%d)", - hostname.c_str(), - redfish_version.c_str(), - major, minor, revision ); - return true ; + if (( major >= REDFISH_MIN_MAJOR_VERSION ) && ( minor >= REDFISH_MIN_MINOR_VERSION )) + { + ilog ("%s bmc supports redfish version %s", + hostname.c_str(), + redfish_version.c_str()); + return true ; + } + else + { + ilog ("%s bmc redfish version '%s' is below minimum baseline %d.%d.x (%d:%d.%d.%d)", + hostname.c_str(), + redfish_version.c_str(), + REDFISH_MIN_MAJOR_VERSION, + REDFISH_MIN_MINOR_VERSION, + fields, major, minor, revision); + } } else { - ilog ("%s bmc has unsupported redfish version %s (%d:%d.%d.%d)", + wlog ("%s failed to parse redfish version %s", hostname.c_str(), - redfish_version.c_str(), - fields, major, minor, revision ); - blog ("%s response: %s", hostname.c_str(), response.c_str()); + redfish_version.c_str()); + blog ("%s response: %s", + hostname.c_str(), + response.c_str()); } } else @@ -165,13 +377,39 @@ string redfishUtil_create_request ( string cmd, /* allow the BMC to redirect http to https */ command_request.append(" -S Always"); + /* redfishtool default timeout is 10 seconds. + * Seeing requests that are taking a little longer than that. + * defaulting to 20 sec timeout */ + command_request.append(" -T 30"); + /* specify the bmc ip address */ command_request.append(" -r "); command_request.append(ip); - /* add the config file option and config filename */ - command_request.append(" -c "); - command_request.append(config_file); +#ifdef WANT_INLINE_CREDS + if ( daemon_is_file_present ( MTC_CMD_FIT__INLINE_CREDS ) ) + { + string cfg_str = daemon_read_file (config_file.data()); + struct json_object *_obj = json_tokener_parse( cfg_str.data() ); + if ( _obj ) + { + command_request.append(" -u "); + command_request.append(jsonUtil_get_key_value_string(_obj,"username")); + command_request.append(" -p "); + command_request.append(jsonUtil_get_key_value_string(_obj,"password")); + } + else + { + slog("FIT: failed to get creds from config file"); + } + } + else +#endif + { + /* add the config file option and config filename */ + command_request.append(" -c "); + command_request.append(config_file); + } /* add the command */ command_request.append(" "); @@ -189,11 +427,66 @@ string redfishUtil_create_request ( string cmd, /************************************************************************* * - * Name : redfishUtil_get_bmc_info + * Name : redfishUtil_health_info * - * Purpose : + * Purpose : Parse the supplied object. * - * Description: + * Description: Update callers health state, health and health_rollup + * variables with what is contained in the supplied object. + * + * "Status": { + * "HealthRollup": "OK", + * "State": "Enabled", + * "Health": "OK" + * }, + * + * Assumptions: Status label must be a first order label. + * This utility does nto walk the object looking for status. + * + * Returns : PASS if succesful + * FAIL_OPERATION if unsuccessful + * + ************************************************************************/ + +int redfishUtil_health_info ( string & hostname, + string entity, + struct json_object * info_obj, + redfish_entity_status & status ) +{ + if ( info_obj ) + { + struct json_object *status_obj = (struct json_object *)(NULL); + json_bool json_rc = json_object_object_get_ex( info_obj, + REDFISH_LABEL__STATUS, + &status_obj ); + if (( json_rc == TRUE ) && ( status_obj )) + { + status.state = jsonUtil_get_key_value_string( status_obj, + REDFISH_LABEL__STATE ); + status.health = jsonUtil_get_key_value_string( status_obj, + REDFISH_LABEL__HEALTH ); + status.health_rollup = jsonUtil_get_key_value_string( status_obj, + REDFISH_LABEL__HEALTHROLLUP ); + return (PASS); + } + } + wlog ("%s unable to get %s state and health info", + hostname.c_str(), entity.c_str()); + + status.state = UNKNOWN ; + status.health = UNKNOWN ; + status.health_rollup = UNKNOWN ; + return (FAIL_OPERATION); +} + +/************************************************************************* + * + * Name : redfishUtil_get_bmc_info + * + * Purpose : Parse the Systems get output + * + * Description: Log all important BMC server info such as processors, memory, + * model number, firmware version, hardware part number, etc. * * Returns : PASS if succesful * FAIL_OPERATION if unsuccessful @@ -204,6 +497,11 @@ int redfishUtil_get_bmc_info ( string & hostname, string & bmc_info_filename, bmc_info_type & bmc_info ) { +#ifdef WANT_FIT_TESTING + if ( daemon_is_file_present ( MTC_CMD_FIT__MEM_LEAK_DEBUG )) + return (PASS) ; +#endif + if ( bmc_info_filename.empty() ) { wlog ("%s bmc info filename empty", hostname.c_str()); @@ -225,28 +523,6 @@ int redfishUtil_get_bmc_info ( string & hostname, } - bmc_info.manufacturer = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MANUFACTURER ); - bmc_info.sn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__SERIAL_NUMBER); - bmc_info.mn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MODEL_NUMBER ); - bmc_info.pn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__PART_NUMBER ); - bmc_info.bmc_ver = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__BMC_VERSION ); - bmc_info.bios_ver = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__BIOS_VERSION ); - - ilog ("%s manufacturer is %s", hostname.c_str(), bmc_info.manufacturer.c_str()); - ilog ("%s model number:%s part number:%s serial number:%s", - hostname.c_str(), - bmc_info.mn.c_str(), - bmc_info.pn.c_str(), - bmc_info.sn.c_str()); - - ilog ("%s BIOS firmware version is %s", - hostname.c_str(), - bmc_info.bios_ver != NONE ? bmc_info.bios_ver.c_str() : "unavailable" ); - - ilog ("%s BMC firmware version is %s", - hostname.c_str(), - bmc_info.bmc_ver != NONE ? bmc_info.bmc_ver.c_str() : "unavailable" ); - /* load the power state */ string power_state = tolowercase(jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__POWER_STATE)); if ( power_state == "on" ) @@ -255,50 +531,104 @@ int redfishUtil_get_bmc_info ( string & hostname, bmc_info.power_on = false ; ilog ("%s power is %s", hostname.c_str(), power_state.c_str()); + bmc_info.manufacturer = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MANUFACTURER ); + bmc_info.sn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__SERIAL_NUMBER); + bmc_info.mn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MODEL_NUMBER ); + bmc_info.pn = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__PART_NUMBER ); + ilog ("%s manufacturer is %s ; model:%s part:%s serial:%s ", + hostname.c_str(), + bmc_info.manufacturer.c_str(), + bmc_info.mn.c_str(), + bmc_info.pn.c_str(), + bmc_info.sn.c_str()); - /* get number of processors */ - string processors = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__PROCESSOR ); - if ( ! processors.empty() ) + bmc_info.bios_ver = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__BIOS_VERSION ); + if (( !bmc_info.bios_ver.empty() ) && ( bmc_info.bios_ver != NONE )) { - struct json_object *proc_obj = json_tokener_parse((char*)processors.data()); - if ( proc_obj ) + ilog ("%s BIOS fw version %s", + hostname.c_str(), + bmc_info.bios_ver.c_str()); + } + + bmc_info.bmc_ver = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__BMC_VERSION ); + if (( !bmc_info.bmc_ver.empty() ) && ( bmc_info.bmc_ver != NONE )) + { + ilog ("%s BMC fw version %s", + hostname.c_str(), + bmc_info.bmc_ver.c_str()); + } + + struct json_object *json_obj_actions; + if ( json_object_object_get_ex(json_obj, REDFISH_LABEL__ACTIONS, &json_obj_actions )) + { + std::list action_list ; + + /* get the first level reset action label content */ + string json_actions = + jsonUtil_get_key_value_string (json_obj_actions, + REDFISHTOOL_RESET_ACTIONS_LABEL); + + if ( jsonUtil_get_list ((char*)json_actions.data(), REDFISHTOOL_RESET_ACTIONS_ALLOWED_LABEL, action_list ) == PASS ) { - bmc_info.processors = jsonUtil_get_key_value_int ( proc_obj, REDFISH_LABEL__COUNT ); - ilog ("%s has %d processors", hostname.c_str(), bmc_info.processors); - json_object_put(proc_obj ); + _load_action_lists ( hostname, bmc_info, action_list); } else { - slog ("%s processor obj: %s", hostname.c_str(), processors.c_str()); + elog ("%s actions list get failed ; [%s]", hostname.c_str(), json_actions.c_str()); } } else { - slog ("%s processor count unavailable", hostname.c_str()); + elog ("%s action object get failed", hostname.c_str()); + } + + /* get number of processors */ + struct json_object *proc_obj = (struct json_object *)(NULL); + json_bool json_rc = json_object_object_get_ex( json_obj, + REDFISH_LABEL__PROCESSOR, + &proc_obj ); + if (( json_rc == TRUE ) && ( proc_obj )) + { + redfish_entity_status status ; + bmc_info.processors = jsonUtil_get_key_value_int ( proc_obj, REDFISH_LABEL__COUNT ); + redfishUtil_health_info ( hostname, REDFISH_LABEL__PROCESSOR, + proc_obj, status) ; + ilog ("%s has %2d Processors ; %s and %s:%s", + hostname.c_str(), + bmc_info.processors, + status.state.c_str(), + status.health.c_str(), + status.health_rollup.c_str()); + } + else + { + wlog ("%s processor object not found", hostname.c_str()); } /* get amount of memory */ - string memory = jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__MEMORY ); - if ( ! memory.empty() ) + struct json_object *mem_obj = (struct json_object *)(NULL); + json_rc = json_object_object_get_ex( json_obj, + REDFISH_LABEL__MEMORY, + &mem_obj ); + if (( json_rc == TRUE ) && ( mem_obj )) { - struct json_object *mem_obj = json_tokener_parse((char*)memory.data()); - if ( mem_obj ) - { - bmc_info.memory_in_gigs = jsonUtil_get_key_value_int ( mem_obj, REDFISH_LABEL__MEMORY_TOTAL ); - ilog ("%s has %d gigs of memory", hostname.c_str(), bmc_info.memory_in_gigs ); - json_object_put(mem_obj ); - } - else - { - slog ("%s memory obj: %s", hostname.c_str(), memory.c_str() ); - } + redfish_entity_status status ; + bmc_info.memory_in_gigs = jsonUtil_get_key_value_int ( mem_obj, REDFISH_LABEL__MEMORY_TOTAL ); + redfishUtil_health_info ( hostname, REDFISH_LABEL__MEMORY, + mem_obj, status) ; + ilog ("%s has %d GiB Memory ; %s and %s:%s", + hostname.c_str(), + bmc_info.memory_in_gigs, + status.state.c_str(), + status.health.c_str(), + status.health_rollup.c_str() ); } else { - slog ("%s memory size unavailable", hostname.c_str()); + wlog ("%s memory object not found", hostname.c_str()); } json_object_put(json_obj ); - return PASS ; + return (PASS) ; } diff --git a/mtce-common/src/common/redfishUtil.h b/mtce-common/src/common/redfishUtil.h index d6492ba8..d50e4d08 100644 --- a/mtce-common/src/common/redfishUtil.h +++ b/mtce-common/src/common/redfishUtil.h @@ -21,12 +21,38 @@ /* generic labels */ #define REDFISH_LABEL__STATUS ((const char *)("Status")) #define REDFISH_LABEL__STATE ((const char *)("State")) -#define REDFISH_LABEL__HEALTH ((const char *)("Health")) #define REDFISH_LABEL__COUNT ((const char *)("Count")) #define REDFISH_LABEL__MODEL ((const char *)("Model")) +#define REDFISH_LABEL__HEALTH ((const char *)("Health")) +#define REDFISH_LABEL__HEALTHROLLUP ((const char *)("HealthRollup")) -/* redfish version */ +typedef struct +{ + /* Enabled indicates the resource is available. + * Disabled indicates the resource has been intentionally made unavailable + * but it can be enabled. + * Offline indicates the resource is unavailable intentionally and requires + * action to be made available. + * InTest indicates that the component is undergoing testing. + * Starting indicates that the resource is on its way to becoming available. + * Absent indicates the resources is physically unavailable */ + string state ; + + /* Health State of the resource without dependents */ + string health ; + + /* Health State of the resource and dependents */ + string health_rollup ; + +} redfish_entity_status ; + +/* Redfish version format is #.#.# or major.minor.revision + * This feature does not care about revision. + * The following are the minimum version numbers for major and minor + * for maintenance to accept it as a selectable option */ #define REDFISH_MIN_MAJOR_VERSION (1) +#define REDFISH_MIN_MINOR_VERSION (0) + #define REDFISH_LABEL__REDFISH_VERSION ((const char *)("RedfishVersion")) /* bmc info labels */ @@ -45,17 +71,69 @@ /* server processor info label */ #define REDFISH_LABEL__PROCESSOR ((const char *)("ProcessorSummary")) +/* maintenance administrative action commands */ +#define REDFISHTOOL_ROOT_QUERY_CMD ((const char *)("root")) +#define REDFISHTOOL_BMC_INFO_CMD ((const char *)("Systems get")) + + /* supported actions */ #define REDFISH_LABEL__ACTIONS ((const char *)("Actions")) #define REDFISH_LABEL__ACTION_RESET ((const char *)("#ComputerSystem.Reset")) #define REDFISH_LABEL__ACTION_RESET_ALLOWED ((const char *)("ResetType@Redfish.AllowableValues")) -/* maintenance administrative action commands */ -#define REDFISHTOOL_ROOT_QUERY_CMD ((const char *)("root")) -#define REDFISHTOOL_BMC_INFO_CMD ((const char *)("Systems get")) -#define REDFISHTOOL_POWER_RESET_CMD ((const char *)("Systems reset GracefulRestart")) -#define REDFISHTOOL_POWER_ON_CMD ((const char *)("Systems reset On")) -#define REDFISHTOOL_POWER_OFF_CMD ((const char *)("Systems reset ForceOff")) +/* Redfish Reset Types: + * + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0268_2019.1a.pdf */ + +#define REDFISHTOOL_POWER_RESET_CMD ((const char *)("Systems reset ")) + +typedef enum +{ + REDFISH_ACTION__RESET, + REDFISH_ACTION__POWER_ON, + REDFISH_ACTION__POWER_OFF, +} redfish_action_enum ; + +/* Reset actions allows json block + + "Actions": { + "#ComputerSystem.Reset": { + "ResetType@Redfish.AllowableValues": [ + "On", + "ForceOff", + "GracefulRestart", + "PushPowerButton", + "Nmi" + ], + "target": "/redfish/v1/Systems/System.Embedded.1/Actions/ComputerSystem.Reset" + } + }, + +*/ +#define REDFISHTOOL_RESET_ACTIONS_LABEL ((const char *)("#ComputerSystem.Reset")) /* level 1 label */ +#define REDFISHTOOL_RESET_ACTIONS_ALLOWED_LABEL ((const char *)("ResetType@Redfish.AllowableValues")) /* level 2 label */ + + +/* Reset sub-commands */ +#define REDFISHTOOL_RESET__GRACEFUL_RESTART ((const char *)("GracefulRestart")) /* Perform a graceful shutdown followed by a restart of the system. */ +#define REDFISHTOOL_RESET__FORCE_RESTART ((const char *)("ForceRestart")) /* Perform an immediate (non-graceful) shutdown, followed by a restart */ + +/* Power off sub-commands */ +#define REDFISHTOOL_POWER_OFF__GRACEFUL_SHUTDOWN ((const char *)("GracefulShutdown")) /* Perform a graceful shutdown and power off. */ +#define REDFISHTOOL_POWER_OFF__FORCE_OFF ((const char *)("ForceOff")) /* Perform a Non-Graceful immediate power off */ + +/* Power On sub-commands */ +#define REDFISHTOOL_POWER_ON__ON ((const char *)("On")) /* Turn the unit on. */ +#define REDFISHTOOL_POWER_ON__FORCE_ON ((const char *)("ForceOn")) /* Turn the unit on immediately. */ + +/* Power Cycle sub-commands */ +#define REDFISHTOOL_POWER_CYCLE__POWER_CYCLE ((const char *)("PowerCycle")) /* Perform a power cycle of the unit. */ + +/* Diagnostic sub-commands */ +#define REDFISHTOOL_DIAG__NMI ((const char *)("Nmi") /* Generate a Diagnostic Interrupt to halt the system. */ +#define REDFISHTOOL_RESET__PUSH_BUTTON ((const char *)("PushPowerButton")) /* Simulate the pressing of the physical power button on this unit */ + + #define REDFISHTOOL_BOOTDEV_PXE_CMD ((const char *)("Systems setBootOverride Once Pxe")) @@ -85,4 +163,6 @@ int redfishUtil_get_bmc_info ( string & hostname, string & response, bmc_info_type & bmc_info ); +string redfishUtil_get_cmd_option ( redfish_action_enum action, + std::list host_action_list ); #endif // __INCLUDE_REDFISHUTIL_H__ diff --git a/mtce-common/src/common/threadUtil.cpp b/mtce-common/src/common/threadUtil.cpp index aa95a657..96cc96d8 100644 --- a/mtce-common/src/common/threadUtil.cpp +++ b/mtce-common/src/common/threadUtil.cpp @@ -25,10 +25,10 @@ * ****************************************************************************/ -#include "daemon_common.h" /* for ... daemon_health_test */ -#include "nodeBase.h" /* for ... mtce node common definitions */ -#include "hostUtil.h" /* for ... mtce host common definitions */ -#include "threadUtil.h" /* for ... this module header */ +#include "daemon_common.h" /* for ... daemon_health_test */ +#include "nodeBase.h" /* for ... mtce node common definitions */ +#include "hostUtil.h" /* for ... mtce host common definitions */ +#include "threadUtil.h" /* for ... this module header */ /* Stores the parent process's timer handler */ static void (*thread_timer_handler)(int, siginfo_t*, void*) = NULL ; diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp index 016eb394..3bda323a 100755 --- a/mtce/src/common/nodeClass.cpp +++ b/mtce/src/common/nodeClass.cpp @@ -584,6 +584,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) mtcTimer_init ( ptr->bm_timer, hostname, "bm timer" ); /* Init node's bm timer */ mtcTimer_init ( ptr->bm_ping_info.timer,hostname,"ping timer" ); /* Init node's ping timer */ mtcTimer_init ( ptr->bmc_access_timer, hostname, "bmc acc timer" ); /* Init node's bm access timer */ + mtcTimer_init ( ptr->bmc_audit_timer, hostname, "bmc aud timer" ); /* Init node's bm audit timer */ mtcTimer_init ( ptr->host_services_timer, hostname, "host services timer" ); /* host services timer */ mtcTimer_init ( ptr->hwmon_powercycle.control_timer, hostname, "powercycle control timer"); @@ -633,11 +634,22 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) /* initialize all board management variables for this host */ ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ; ptr->bm_ip = NONE ; - ptr->bm_type = NONE ; ptr->bm_un = NONE ; ptr->bm_pw = NONE ; + ptr->bm_cmd= NONE ; + ptr->bm_type = NONE ; /* TODO: OBS */ - ptr->bmc_provisioned = false ; /* assume not provisioned until learned */ + /* restart command tht need to learned for Redfish. + * ipmi commands are hard coded fro legacy support. + */ + ptr->bm_reset_cmd = NONE ; + ptr->bm_restart_cmd = NONE ; + ptr->bm_poweron_cmd = NONE ; + ptr->bm_poweroff_cmd = NONE ; + + ptr->bmc_provisioned = false ; /* assume not provisioned until learned */ + ptr->bmc_accessible = false ; /* assume not accessible until proven */ + ptr->bmc_access_method_changed = false ; if ( hostname == my_hostname ) ptr->power_on = true ; @@ -908,6 +920,7 @@ int nodeLinkClass::remNode( string hostname ) mtcTimer_fini ( ptr->bm_timer ); mtcTimer_fini ( ptr->bmc_access_timer ); + mtcTimer_fini ( ptr->bmc_audit_timer ); mtcTimer_fini ( ptr->bm_ping_info.timer ); #ifdef WANT_PULSE_LIST_SEARCH_ON_DELETE @@ -2745,7 +2758,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv ) node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip = inv.bm_ip ; node_ptr->thread_extra_info.bm_un = node_ptr->bm_un = inv.bm_un ; - node_ptr->thread_extra_info.bm_type= node_ptr->bm_type = inv.bm_type ; + node_ptr->bm_type = inv.bm_type ; node_ptr->bm_ping_info.sock = 0 ; @@ -4068,9 +4081,35 @@ void nodeLinkClass::bmc_access_data_init ( struct nodeLinkClass::node * node_ptr node_ptr->reset_cause_query_done = false ; node_ptr->power_status_query_active = false ; node_ptr->power_status_query_done = false ; - node_ptr->bmc_protocol_learned = false ; - node_ptr->bmc_protocol_learning = false ; - node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ; + + /* remove all the bmc related temporary files created + * for this host and process */ + bmcUtil_remove_files ( node_ptr->hostname, node_ptr->bmc_protocol ); + + if ( this->bmc_access_method == "ipmi" ) + { + blog2 ("%s BMC access method set to 'ipmi'", + node_ptr->hostname.c_str()); + node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ; + node_ptr->bmc_protocol_learning = false ; + node_ptr->bmc_protocol_learned = true ; + } + else if ( this->bmc_access_method == "redfish" ) + { + blog2 ("%s BMC access method set to 'redfish'", + node_ptr->hostname.c_str()); + node_ptr->bmc_protocol = BMC_PROTOCOL__REDFISHTOOL ; + node_ptr->bmc_protocol_learning = false ; + node_ptr->bmc_protocol_learned = true ; + } + else + { + blog2 ("%s BMC access method will be learned", + node_ptr->hostname.c_str()); + node_ptr->bmc_protocol_learned = false ; + node_ptr->bmc_protocol_learning = false ; + node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ; + } bmcUtil_info_init ( node_ptr->bmc_info ); } } @@ -4101,10 +4140,6 @@ int nodeLinkClass::set_bm_prov ( struct nodeLinkClass::node * node_ptr, bool sta /* Clear the alarm if we are starting fresh from an unprovisioned state */ if (( node_ptr->bmc_provisioned == false ) && ( state == true )) { - bmcUtil_hwmon_info ( node_ptr->hostname, - node_ptr->bmc_protocol, - node_ptr->power_on, "" ); - ilog ("%s starting BM ping monitor to address '%s'\n", node_ptr->hostname.c_str(), node_ptr->bm_ip.c_str()); @@ -4151,7 +4186,7 @@ int nodeLinkClass::set_bm_prov ( struct nodeLinkClass::node * node_ptr, bool sta pingUtil_fini ( node_ptr->bm_ping_info ); bmc_access_data_init ( node_ptr ); - + mtcTimer_reset ( node_ptr->bmc_audit_timer ); if ( !thread_idle( node_ptr->bmc_thread_ctrl ) ) { thread_kill ( node_ptr->bmc_thread_ctrl , node_ptr->bmc_thread_info); @@ -6889,6 +6924,24 @@ struct nodeLinkClass::node * nodeLinkClass::get_bmc_access_timer ( timer_t tid ) } +struct nodeLinkClass::node * nodeLinkClass::get_bmc_audit_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->bmc_audit_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} + struct nodeLinkClass::node * nodeLinkClass::get_mtcConfig_timer ( timer_t tid ) { @@ -8617,14 +8670,20 @@ void nodeLinkClass::mem_log_general_mtce_hosts ( void ) void nodeLinkClass::mem_log_bm ( struct nodeLinkClass::node * node_ptr ) { char str[MAX_MEM_LOG_DATA] ; - snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tBMC %s %s:%s prov:%s learn:%s:%s\n", + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tBMC %s %s:%s prov:%s acc:%s ping:%s learn:%s:%s Query:%s:%s Timer:%s:%s\n", node_ptr->hostname.c_str(), bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(), node_ptr->bm_un.c_str(), node_ptr->bm_ip.c_str(), - node_ptr->bmc_provisioned ? "Yes" : "No", - node_ptr->bmc_protocol_learned ? "Yes" : "No", - node_ptr->bmc_protocol_learning ? "Yes" : "No"); + node_ptr->bmc_provisioned ? "Y" : "N", + node_ptr->bmc_accessible ? "Y" : "N", + node_ptr->bm_ping_info.ok ? "Y" : "N", + node_ptr->bmc_protocol_learned ? "Y" : "N", + node_ptr->bmc_protocol_learning ? "Y" : "N", + node_ptr->bmc_info_query_active ? "Y" : "N", + node_ptr->bmc_info_query_done ? "Y" : "N", + node_ptr->bm_timer.active ? "Y" : "N", + node_ptr->bmc_access_timer.active ? "Y" : "N" ); mem_log (str); } @@ -8729,14 +8788,16 @@ void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr ) void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr ) { char str[MAX_MEM_LOG_DATA] ; - snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d\n", + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d Power:%d Cycle:%d\n", node_ptr->hostname.c_str(), node_ptr->addStage, node_ptr->offlineStage, node_ptr->swactStage, node_ptr->recoveryStage, node_ptr->enableStage, - node_ptr->disableStage); + node_ptr->disableStage, + node_ptr->powerStage, + node_ptr->powercycleStage); mem_log (str); } diff --git a/mtce/src/common/nodeClass.h b/mtce/src/common/nodeClass.h index daef8287..42198bad 100755 --- a/mtce/src/common/nodeClass.h +++ b/mtce/src/common/nodeClass.h @@ -583,18 +583,21 @@ private: /** A string label that represents the board management * controller type for this host */ - string bm_type ; + string bm_type ; /* TODO: OBS */ /** The operator provisioned board management hostname */ string bm_un ; - /* Indicates there is a board management test - * for this host in progress */ - bool bm_test_in_progress ; + /** the command to use in the bmc thread. + * introduced for redfish reset sub command ; reset type */ + string bm_cmd; - /* Indicates there is a board management operation - * in progress on this host */ - bool bm_oper_in_progress ; + /* restart command tht need to learned for Redfish. + * ipmi commands are hard coded fro legacy support. */ + string bm_reset_cmd ; + string bm_restart_cmd ; + string bm_poweron_cmd ; + string bm_poweroff_cmd ; /** * The BMC is 'accessible' once provisioning data is available @@ -602,6 +605,10 @@ private: **/ bool bmc_accessible; + /* tell the host level bmc_handler that this hosts access + * method has changed */ + bool bmc_access_method_changed ; + /** @} private_boad_management_variables */ /** @@ -694,6 +701,9 @@ private: /* timer used to manage the bmc access alarm */ struct mtc_timer bmc_access_timer ; + /* timer used to audit bmc info */ + struct mtc_timer bmc_audit_timer ; + /***************************************************** * Maintenance Thread Structs *****************************************************/ @@ -1092,6 +1102,7 @@ private: struct nodeLinkClass::node * get_ping_timer ( timer_t tid ); struct nodeLinkClass::node * get_bm_timer ( timer_t tid ); struct nodeLinkClass::node * get_bmc_access_timer ( timer_t tid ); + struct nodeLinkClass::node * get_bmc_audit_timer ( timer_t tid ); struct nodeLinkClass::node * get_host_services_timer ( timer_t tid ); struct nodeLinkClass::node * get_powercycle_control_timer ( timer_t tid ); @@ -1363,6 +1374,8 @@ public: /* the main fsm entrypoint to service all hosts */ void fsm ( void ) ; + void bmc_access_method_change_notifier ( void ); + /** This controller's hostname set'er */ void set_my_hostname ( string hostname ); @@ -1486,6 +1499,13 @@ public: std::list mnfa_awol_list ; void mnfa_timeout_handler ( void ); + /* How to communicate with the BMCs in this lab. + * Options are: ipmi, redfish, learn */ + string bmc_access_method ; + + /* handle bmc access method change by service parameter */ + bool bmc_access_method_changed ; + /** Return the number of inventoried hosts */ int num_hosts ( void ); @@ -1676,9 +1696,6 @@ public: /** Returns number of enabled controllers */ int num_controllers_enabled ( void ); - /** Run the FSM against the specified host */ - int run_fsm ( string hostname ); - /** Post a specific enable handler stage */ int set_enableStage ( string & hostname, mtc_enableStages_enum stage ); diff --git a/mtce/src/heartbeat/hbsStubs.cpp b/mtce/src/heartbeat/hbsStubs.cpp index b92db539..7a173f20 100644 --- a/mtce/src/heartbeat/hbsStubs.cpp +++ b/mtce/src/heartbeat/hbsStubs.cpp @@ -340,6 +340,11 @@ void bmcUtil_info_init ( bmc_info_type & bmc_info ) UNUSED(bmc_info); } +void bmcUtil_remove_files ( string hostname, bmc_protocol_enum protocol ) +{ + UNUSED(hostname); + UNUSED(protocol); +} int nodeLinkClass::bmc_command_send ( struct nodeLinkClass::node * node_ptr, int command ) { UNUSED(node_ptr); diff --git a/mtce/src/hwmon/hwmon.h b/mtce/src/hwmon/hwmon.h index cd2ce777..5703f66d 100644 --- a/mtce/src/hwmon/hwmon.h +++ b/mtce/src/hwmon/hwmon.h @@ -52,7 +52,6 @@ using namespace std; #define MAX_HOST_SENSORS (512) // (100) #define MAX_HOST_GROUPS (20) #define MIN_SENSOR_GROUPS (4) -#define HWMON_MAX_BMC_DATA_BUF_SIZE (4096*8) // Thermal sensor data need 20KiB at least #define HWMON_DEFAULT_LARGE_INTERVAL (MTC_MINS_15) #define HWMON_DEFAULT_AUDIT_INTERVAL (MTC_MINS_2) #define HWMON_MIN_AUDIT_INTERVAL (10) @@ -60,6 +59,10 @@ using namespace std; #define MAX_SENSORS_NOT_FOUND (5) #define START_DEBOUCE_COUNT (1) +// Power sensor data for Dell R740-emc-1 needs 45KiB +// Thermal sensor readout on wolfpass requires 20KiB +#define HWMON_MAX_BMC_DATA_BUF_SIZE (102400) + /* Daemon Sensor Config Directory - where profile files are stored */ #define CONFIG_DIR ((const char *)("/etc/hwmon.d")) diff --git a/mtce/src/hwmon/hwmonClass.cpp b/mtce/src/hwmon/hwmonClass.cpp index c47fe252..0ddc1f44 100644 --- a/mtce/src/hwmon/hwmonClass.cpp +++ b/mtce/src/hwmon/hwmonClass.cpp @@ -91,6 +91,10 @@ void hwmonHostClass::bmc_data_init ( struct hwmonHostClass::hwmon_host * host_pt host_ptr->addStage = HWMON_ADD__START; host_ptr->sensor_query_count = 0 ; + + /* remove all the bmc related temporary files created + * for this host and process */ + bmcUtil_remove_files ( host_ptr->hostname, host_ptr->protocol ); } /* @@ -611,14 +615,13 @@ int hwmonHostClass::mod_host ( node_inv_type & inv ) { /* if we have a credentials only change then disable the sensor * model only to get re-enabled if sensor monitoring is - * successful with the new credentils */ + * successful with the new credentials */ if (( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) && ( host_ptr->bm_un.compare(NONE))) { bmc_set_group_state ( host_ptr, "disabled" ); bmc_disable_sensors ( host_ptr ); } - rc = set_bm_prov ( host_ptr, false ); } if (( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) && @@ -713,6 +716,8 @@ int hwmonHostClass::add_host ( node_inv_type & inv ) host_ptr->accounting_ok = false ; host_ptr->accounting_bad_count = 0 ; + host_ptr->general_log_throttle = 0 ; + /* Additions for sensor monitoring using IPMI protocol */ host_ptr->want_degrade_audit = false ; host_ptr->degrade_audit_log_throttle = 0 ; @@ -758,9 +763,7 @@ int hwmonHostClass::add_host ( node_inv_type & inv ) host_ptr->group_index = 0 ; /* Set default BMC protocol */ - host_ptr->protocol = BMC_PROTOCOL__IPMITOOL ; - host_ptr->bmc_thread_info.proto = BMC_PROTOCOL__IPMITOOL ; - bmcUtil_write_hwmond_protocol ( host_ptr->hostname, BMC_PROTOCOL__IPMITOOL ) ; + host_ptr->protocol = bmcUtil_read_hwmond_protocol(host_ptr->hostname) ; /* Init sensor model relearn controls, state and status */ host_ptr->relearn = false ; diff --git a/mtce/src/hwmon/hwmonInit.cpp b/mtce/src/hwmon/hwmonInit.cpp index fa84a972..fb4db7d2 100644 --- a/mtce/src/hwmon/hwmonInit.cpp +++ b/mtce/src/hwmon/hwmonInit.cpp @@ -253,6 +253,7 @@ int daemon_init ( string iface, string nodetype ) hwmon_hdlr_init ( &hwmon_ctrl ); hwmon_stages_init (); httpUtil_init (); + bmcUtil_init(); /* init the control struct */ hwmon_ctrl.my_hostname = "" ; diff --git a/mtce/src/hwmon/hwmonModel.cpp b/mtce/src/hwmon/hwmonModel.cpp index c4b0552b..f8d4423c 100644 --- a/mtce/src/hwmon/hwmonModel.cpp +++ b/mtce/src/hwmon/hwmonModel.cpp @@ -58,7 +58,9 @@ int hwmonHostClass::bmc_create_sensor_model ( struct hwmonHostClass::hwmon_host * host_ptr ) { int rc = PASS ; - ilog ("%s creating sensor model\n", host_ptr->hostname.c_str()); + ilog ("%s creating sensor model using %s\n", + host_ptr->hostname.c_str(), + bmcUtil_getProtocol_str(host_ptr->protocol).c_str()); host_ptr->groups = 0 ; diff --git a/mtce/src/hwmon/hwmonThreads.h b/mtce/src/hwmon/hwmonThreads.h index c26e7ae2..f215cca8 100644 --- a/mtce/src/hwmon/hwmonThreads.h +++ b/mtce/src/hwmon/hwmonThreads.h @@ -160,7 +160,8 @@ using namespace std; #define BMC_JSON__SENSORS_LABEL ((const char *)("sensors")) #define IPMITOOL_SENSOR_QUERY_CMD ((const char *)(" sensor list")) -#define BMC_SENSOR_OUTPUT_FILE_SUFFIX ((const char *)("_sensor_data")) +#define BMC_POWER_SENSOR_OUTPUT_FILE_SUFFIX ((const char *)("_power_sensor_data")) +#define BMC_THERMAL_SENSOR_OUTPUT_FILE_SUFFIX ((const char *)("_thermal_sensor_data")) #define REDFISHTOOL_READ_POWER_SENSORS_CMD ((const char *)("Chassis Power")) #define REDFISHTOOL_READ_THERMAL_SENSORS_CMD ((const char *)("Chassis Thermal")) diff --git a/mtce/src/maintenance/mtcBmcUtil.cpp b/mtce/src/maintenance/mtcBmcUtil.cpp index a397d348..7ad40b8c 100644 --- a/mtce/src/maintenance/mtcBmcUtil.cpp +++ b/mtce/src/maintenance/mtcBmcUtil.cpp @@ -45,9 +45,8 @@ int nodeLinkClass::bmc_command_send ( struct nodeLinkClass::node * node_ptr, node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ; node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ; node_ptr->thread_extra_info.bm_pw = node_ptr->bm_pw ; - node_ptr->thread_extra_info.bm_type = node_ptr->bm_type ; - /* Special case handliong for Redfish Root (BMC) Query command. + /* Special case handling for Redfish Root (BMC) Query command. * Current protocol override for this command that only applies * to redfish and used for the bmc protocol learning process. */ if ( command == BMC_THREAD_CMD__BMC_QUERY ) @@ -55,6 +54,26 @@ int nodeLinkClass::bmc_command_send ( struct nodeLinkClass::node * node_ptr, else node_ptr->bmc_thread_info.proto = node_ptr->bmc_protocol ; + if ( node_ptr->bmc_thread_info.proto == BMC_PROTOCOL__REDFISHTOOL ) + { + /* build the reset/power control command */ + switch (command) + { + case BMC_THREAD_CMD__POWER_RESET: + node_ptr->bm_cmd = REDFISHTOOL_POWER_RESET_CMD ; + node_ptr->bm_cmd.append(node_ptr->bmc_info.reset_action_list.front()); + break ; + case BMC_THREAD_CMD__POWER_ON: + node_ptr->bm_cmd = REDFISHTOOL_POWER_RESET_CMD ; + node_ptr->bm_cmd.append(node_ptr->bmc_info.power_on_action_list.front()); + break ; + case BMC_THREAD_CMD__POWER_OFF: + node_ptr->bm_cmd = REDFISHTOOL_POWER_RESET_CMD ; + node_ptr->bm_cmd.append(node_ptr->bmc_info.power_off_action_list.front()); + break ; + } + node_ptr->thread_extra_info.bm_cmd = node_ptr->bm_cmd ; + } #ifdef WANT_FIT_TESTING { bool want_fit = false ; @@ -170,39 +189,38 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr ) { if ( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) { - /* handle the redfishtool root query as a special case because - * it is likely to fail and we don't want un-necessary error logs */ - if ( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_QUERY ) + if (( rc = node_ptr->bmc_thread_info.status ) != PASS ) { - if (( rc = node_ptr->bmc_thread_info.status ) != PASS ) + /* handle the redfishtool root query as a special case because + * it is likely to fail and we don't want un-necessary error logs */ + if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_QUERY ) && + (( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE ))) { - blog2 ("%s %s command failed (%s) (data:%s) (rc:%d:%d:%s)\n", - node_ptr->hostname.c_str(), - bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(), - bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(), - node_ptr->bmc_thread_info.data.c_str(), - rc, - node_ptr->bmc_thread_info.status, - node_ptr->bmc_thread_info.status_string.c_str()); + blog ("%s bmc redfish %s failed", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str( + node_ptr->bmc_thread_info.command).c_str()); + } + else if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_INFO ) && + (( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE ))) + { + wlog ("%s bmc redfish %s failed", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str( + node_ptr->bmc_thread_info.command).c_str()); } else { - ilog("%s Redfish Root Query:\n%s", - node_ptr->hostname.c_str(), - node_ptr->bmc_thread_info.data.c_str()); + elog ("%s bmc redfish %s command failed (%s) (data:%s) (rc:%d:%d:%s)\n", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(), + bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(), + node_ptr->bmc_thread_info.data.c_str(), + rc, + node_ptr->bmc_thread_info.status, + node_ptr->bmc_thread_info.status_string.c_str()); } } - else if (( rc = node_ptr->bmc_thread_info.status ) != PASS ) - { - elog ("%s %s command failed (%s) (data:%s) (rc:%d:%d:%s)\n", - node_ptr->hostname.c_str(), - bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(), - bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(), - node_ptr->bmc_thread_info.data.c_str(), - rc, - node_ptr->bmc_thread_info.status, - node_ptr->bmc_thread_info.status_string.c_str()); - } else { rc = PASS ; @@ -261,11 +279,11 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr ) } else { - blog ("%s %s Response: %s\n", - node_ptr->hostname.c_str(), - bmcUtil_getCmd_str( - node_ptr->bmc_thread_info.command).c_str(), - node_ptr->bmc_thread_info.data.c_str()); + blog1 ("%s %s Response: %s\n", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str( + node_ptr->bmc_thread_info.command).c_str(), + node_ptr->bmc_thread_info.data.c_str()); } } } @@ -318,6 +336,10 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr ) } /* handle max retries reached */ + if ( rc == PASS ) + { + ; + } else if ( node_ptr->bmc_thread_ctrl.retries++ >= BMC__MAX_RECV_RETRIES ) { wlog ("%s %s command timeout (%d of %d)\n", @@ -334,18 +356,28 @@ int nodeLinkClass::bmc_command_recv ( struct nodeLinkClass::node * node_ptr ) { if ( node_ptr->bmc_thread_ctrl.id == 0 ) { - slog ("%s %s command not-running\n", - node_ptr->hostname.c_str(), - bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str()); + /* don't log a warning for redfish query failures. */ + if (( node_ptr->bmc_thread_info.command != BMC_THREAD_CMD__BMC_QUERY ) && + ( node_ptr->bmc_thread_info.command != BMC_THREAD_CMD__BMC_INFO )) + { + wlog ("%s %s command not-running\n", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str()); + } rc = FAIL_NOT_ACTIVE ; } else { - ilog ("%s %s command in-progress (polling %d of %d)\n", - node_ptr->hostname.c_str(), - bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(), - node_ptr->bmc_thread_ctrl.retries, - BMC__MAX_RECV_RETRIES); + /* The BMC is sometimes slow, + * No need to log till we reach lalf of the retry threshold */ + if ( node_ptr->bmc_thread_ctrl.retries > (BMC__MAX_RECV_RETRIES/2) ) + { + ilog ("%s %s command in-progress (polling %d of %d)\n", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(), + node_ptr->bmc_thread_ctrl.retries, + BMC__MAX_RECV_RETRIES); + } rc = RETRY ; } } diff --git a/mtce/src/maintenance/mtcCmdHdlr.cpp b/mtce/src/maintenance/mtcCmdHdlr.cpp index e7be32e2..d89580bc 100644 --- a/mtce/src/maintenance/mtcCmdHdlr.cpp +++ b/mtce/src/maintenance/mtcCmdHdlr.cpp @@ -486,7 +486,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) { dlog ("%s Board Management Interface RESET Requested\n", node_ptr->hostname.c_str()); - mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY ); node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__RESET_ACK; break ; } @@ -522,7 +522,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) rc = bmc_command_recv ( node_ptr ); if ( rc == RETRY ) { - mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY ); break ; } @@ -633,7 +633,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->hostname.c_str(), bmcUtil_getCmd_str(node_ptr->cmdReq).c_str()); - mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY ); node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__IPMI_COMMAND_RECV ; } break ; diff --git a/mtce/src/maintenance/mtcHttpSvr.cpp b/mtce/src/maintenance/mtcHttpSvr.cpp index d7a513ca..660a3476 100644 --- a/mtce/src/maintenance/mtcHttpSvr.cpp +++ b/mtce/src/maintenance/mtcHttpSvr.cpp @@ -87,7 +87,7 @@ void mtcHttpSvr_fini ( event_type & mtce_event ) /************************************************************************************ * - * event_base_loopcontinue is not supported until version 2.1.2-alpha + * event_base_loopcontinue is not supported until version 2.1.2-alpha * It allows processing of events in main loop instead of in the handler. * Theoretically this would be nice to use in conjunction with * event_base_loopexit in the selected fd @@ -281,26 +281,26 @@ void _create_error_response ( int rc , string & resp_buffer , node_inv_type & in ******************************************************************************/ /* Test Commands: * - * Test 1: Select host, get uuid and make sure it is unlocked-enabled. + * Test 1: Select host, get uuid and make sure it is unlocked-enabled. * Verify: Host should fail, reset and auto re-enable. curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "failed"}' - * Test 2: Lock Host and issue command with correct uuids and hostname. + * Test 2: Lock Host and issue command with correct uuids and hostname. * Verify: The host is rebooted/reset curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "failed"}' - * Test 3: + * Test 3: curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "degraded"}' - * Test 4: + * Test 4: curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "cleared"}' * Test 5: Unsuppored VIM Command curl -i -X PATCH -H 'Content-Type: application/json' -H 'Accept: application/json' -H 'User-Agent: vim/1.0' http://localhost:2112/v1/hosts/8b216803-c47c-40b3-bf61-ed84ff83754e -d '{"uuid":"8b216803-c47c-40b3-bf61-ed84ff83754e", "hostname": "compute-1", "severity": "degradeded"}' */ -string mtcHttpSvr_vim_req ( char * buffer_ptr, - evhttp_cmd_type http_cmd, +string mtcHttpSvr_vim_req ( char * buffer_ptr, + evhttp_cmd_type http_cmd, int & http_status_code ) { nodeLinkClass * obj_ptr = get_mtcInv_ptr () ; @@ -311,7 +311,7 @@ string mtcHttpSvr_vim_req ( char * buffer_ptr, int rc1 = jsonUtil_get_key_val ( buffer_ptr, MTC_JSON_SEVERITY, severity ); int rc2 = jsonUtil_get_key_val ( buffer_ptr, MTC_JSON_INV_NAME, hostname ); - jlog ("%s '%s' request\n", hostname.c_str(), getHttpCmdType_str(http_cmd)); + jlog ("%s '%s' request\n", hostname.c_str(), getHttpCmdType_str(http_cmd)); if ( rc1 | rc2 ) { wlog ("Failed to parse command key values (%d:%d)\n", rc1, rc2); @@ -337,7 +337,7 @@ string mtcHttpSvr_vim_req ( char * buffer_ptr, { /* Test 1 */ ilog ("%s is now failed due to failed event (host is unlocked)\n", hostname.c_str()); - obj_ptr->mtcInvApi_update_states ( hostname, + obj_ptr->mtcInvApi_update_states ( hostname, get_adminState_str (MTC_ADMIN_STATE__UNLOCKED), get_operState_str (MTC_OPER_STATE__DISABLED ), get_availStatus_str(MTC_AVAIL_STATUS__FAILED)); @@ -356,7 +356,7 @@ string mtcHttpSvr_vim_req ( char * buffer_ptr, http_status_code = HTTP_BADMETHOD; } else if ( ! severity.compare("cleared")) - { + { /* Test 4 */ ilog ("%s severity 'cleared' request not supported\n", hostname.c_str() ); response.append ("{\"status\" : \"fail\""); @@ -393,8 +393,8 @@ string mtcHttpSvr_vim_req ( char * buffer_ptr, * ******************************************************************************/ -string mtcHttpSvr_inv_req ( char * request_ptr, - evhttp_cmd_type event_type, +string mtcHttpSvr_inv_req ( char * request_ptr, + evhttp_cmd_type event_type, int & http_status_code ) { int rc = PASS ; @@ -426,7 +426,7 @@ string mtcHttpSvr_inv_req ( char * request_ptr, hostname = inv.name ; } - snprintf (&log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] http event seq: %d Payload:%s: %s", + snprintf (&log_str[0], MAX_API_LOG_LEN-1, "%s [%5d] http event seq: %d Payload:%s: %s", pt(), getpid(), sequence, hostname.data(), request_ptr); send_log_message ( mtclogd_ptr, obj_ptr->my_hostname.data(), &filename[0], &log_str[0] ); @@ -437,14 +437,14 @@ string mtcHttpSvr_inv_req ( char * request_ptr, if ( rc == PASS ) { ilog ("%s Add Operation\n", inv.name.c_str()); - + /* generate event=add alarm if the add_host returns a PASS */ mtcAlarm_log ( inv.name, MTC_LOG_ID__EVENT_ADD ); } - /* A RETRY return from add_host indicates that the node is - * already provisioned. At this point changes can only be - * implemented as modification so call mod_host + /* A RETRY return from add_host indicates that the node is + * already provisioned. At this point changes can only be + * implemented as modification so call mod_host */ if ( rc == RETRY ) { @@ -459,8 +459,8 @@ string mtcHttpSvr_inv_req ( char * request_ptr, } else { - elog ("%s Inventory Add failed (%s)\n", - inv.name.length() ? inv.name.c_str() : "none", + elog ("%s Inventory Add failed (%s)\n", + inv.name.length() ? inv.name.c_str() : "none", inv.uuid.c_str() ); _create_error_response ( rc , resp_buffer, inv ) ; } @@ -685,8 +685,8 @@ string mtcHttpSvr_sm_req ( char * request_ptr, * * Verify this request contains valid client info. * - * 1. the URL must have - * CLIENT_SYSINV_URL or + * 1. the URL must have + * CLIENT_SYSINV_URL or * CLIENT_VIM_HOSTS_URL or * CLIENT_VIM_SYSTEMS_URL * @@ -703,16 +703,16 @@ mtc_client_enum _get_client_id ( struct evhttp_request *req ) * correct User-Agent header and supported version */ struct evkeyvalq * headers_ptr = evhttp_request_get_input_headers (req); const char * header_value_ptr = evhttp_find_header (headers_ptr, CLIENT_HEADER); - if ( header_value_ptr ) + if ( header_value_ptr ) { const char * url_ptr = evhttp_request_get_uri (req); - + hlog2 ("URI: %s\n", url_ptr ); if ( ! strncmp ( header_value_ptr, CLIENT_SYSINV_1_0, 20 ) ) { hlog3 ("%s\n", header_value_ptr ); - + if ( strstr ( url_ptr, CLIENT_SYSINV_URL) ) { client = CLIENT_SYSINV ; @@ -721,7 +721,7 @@ mtc_client_enum _get_client_id ( struct evhttp_request *req ) else if ( ! strncmp ( header_value_ptr, CLIENT_VIM_1_0, 20 ) ) { hlog3 ("%s\n", header_value_ptr ); - + if ( strstr ( url_ptr, CLIENT_VIM_HOSTS_URL)) { client = CLIENT_VIM_HOSTS ; @@ -762,14 +762,14 @@ mtc_client_enum _get_client_id ( struct evhttp_request *req ) void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) { struct evbuffer *resp_buf ; - mtc_client_enum client = CLIENT_NONE ; + mtc_client_enum client = CLIENT_NONE ; int http_status_code = HTTP_NOTFOUND ; string service = "" ; string uuid = "" ; string response = "" ; string hostname = "n/a" ; - UNUSED(arg); + UNUSED(arg); response = "{" ; response.append (" \"status\" : \"fail\""); @@ -792,7 +792,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) } const char * url_ptr = evhttp_request_get_uri (req); - + /* Extract the operation */ evhttp_cmd_type http_cmd = evhttp_request_get_command (req); jlog ("%s request from '%s'\n", getHttpCmdType_str(http_cmd), host_ptr ); @@ -814,7 +814,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) return ; } - if (( client == CLIENT_VIM_HOSTS ) || + if (( client == CLIENT_VIM_HOSTS ) || ( client == CLIENT_VIM_SYSTEMS )) { service = "vim" ; @@ -830,7 +830,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) else service = "unknown" ; - snprintf (&log_str[0], MAX_API_LOG_LEN-1, "\n%s [%5d] http event seq: %d with %s %s request from %s:%s", + snprintf (&log_str[0], MAX_API_LOG_LEN-1, "\n%s [%5d] http event seq: %d with %s %s request from %s:%s", pt(), getpid(), ++sequence, service.c_str(), getHttpCmdType_str(http_cmd), host_ptr, url_ptr ); send_log_message ( mtclogd_ptr, obj_ptr->my_hostname.data(), &filename[0], &log_str[0] ); @@ -842,7 +842,11 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) size_t len = strlen(CLIENT_SYSINV_URL) ; uuid = (url_ptr+len) ; hostname = obj_ptr->get_host(uuid) ; - if (( http_cmd == EVHTTP_REQ_GET ) && ( client == CLIENT_VIM_SYSTEMS )) + if ( hostname.empty() ) + { + wlog("uuid to host lookup failed ; '%s' not found ", uuid.c_str()); + } + else if (( http_cmd == EVHTTP_REQ_GET ) && ( client == CLIENT_VIM_SYSTEMS )) { http_status_code = obj_ptr->mtcVimApi_system_info ( response ); break ; @@ -852,7 +856,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) http_status_code = HTTP_OK ; if ( uuid.length() != UUID_LEN ) { - wlog ("http '%s' request rejected, invalid uuid size (%ld:%s)\n", + wlog ("http '%s' request rejected, invalid uuid size (%ld:%s)\n", getHttpCmdType_str(http_cmd), uuid.length(), uuid.c_str()); response = "{" ; @@ -861,7 +865,7 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) response.append (",\"action\" : \"Undetermined\""); response.append ("}"); http_status_code = HTTP_BADREQUEST ; - } + } if (( http_cmd == EVHTTP_REQ_DELETE ) && (( hostname.length() == 0 ) || ( !hostname.compare("none")))) { @@ -918,9 +922,9 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) { ev_ssize_t bytes = 0 ; char * buffer_ptr = (char*)malloc(len+1); - memset ( buffer_ptr, 0, len+1 ); + memset ( buffer_ptr, 0, len+1 ); bytes = evbuffer_remove(in_buf, buffer_ptr, len ); - + if ( bytes <= 0 ) { http_status_code = HTTP_BADREQUEST ; @@ -976,16 +980,17 @@ void mtcHttpSvr_handler (struct evhttp_request *req, void *arg) resp_buf = evbuffer_new(); jlog ("Event Response: %s\n", response.c_str()); evbuffer_add_printf (resp_buf, "%s\n", response.data()); - evhttp_send_reply (event_ptr->req, http_status_code, "OK", resp_buf ); + evhttp_send_reply (event_ptr->req, http_status_code, "OK", resp_buf ); evbuffer_free ( resp_buf ); } else { - elog ("HTTP Event error:%d ; cmd:%s url:%s response:%s\n", - http_status_code, - getHttpCmdType_str(http_cmd), - url_ptr, - response.c_str()); + wlog ("%s sending %d response for %s:%s request ; response:%s\n", + hostname.c_str(), + http_status_code, + getHttpCmdType_str(http_cmd), + url_ptr, + response.c_str()); evhttp_send_error (event_ptr->req, http_status_code, response.data() ); } } @@ -1002,7 +1007,7 @@ int mtcHttpSvr_bind ( event_type & event ) int rc ; int flags ; int one = 1; - + event.fd = socket(AF_INET, SOCK_STREAM, 0); if (event.fd < 0) { @@ -1012,13 +1017,13 @@ int mtcHttpSvr_bind ( event_type & event ) /* make socket reusable */ rc = setsockopt(event.fd, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(int)); - + memset(&event.addr, 0, sizeof(struct sockaddr_in)); event.addr.sin_family = AF_INET; event.addr.sin_addr.s_addr = inet_addr(LOOPBACK_IP) ; /* INADDR_ANY; TODO: Refine this if we can */ // event.addr.sin_addr.s_addr = INADDR_ANY; event.addr.sin_port = htons(event.port); - + /* bind port */ rc = bind ( event.fd, (struct sockaddr*)&event.addr, sizeof(struct sockaddr_in)); if (rc < 0) @@ -1039,7 +1044,7 @@ int mtcHttpSvr_bind ( event_type & event ) flags = fcntl ( event.fd, F_GETFL, 0) ; if ( flags < 0 || fcntl(event.fd, F_SETFL, flags | O_NONBLOCK) < 0) { - elog ("failed to set HTTP server socket to non-blocking (%d:%m)\n", errno ); + elog ("failed to set HTTP server socket to non-blocking (%d:%m)\n", errno ); return FAIL_SOCKET_OPTION; } @@ -1082,7 +1087,7 @@ int mtcHttpSvr_setup ( event_type & event ) return -1; } evhttp_set_gencb(event.httpd, mtcHttpSvr_handler, NULL); - + return PASS ; } @@ -1104,7 +1109,7 @@ int mtcHttpSvr_init ( event_type & mtce_event ) } else if ( mtce_event.fd > 0 ) { - ilog ("Listening On: 'http event server ' socket %s:%d\n", + ilog ("Listening On: 'http event server ' socket %s:%d\n", inet_ntoa(mtce_event.addr.sin_addr), mtce_event.port ); rc = PASS ; break ; diff --git a/mtce/src/maintenance/mtcNodeCtrl.cpp b/mtce/src/maintenance/mtcNodeCtrl.cpp index ad10640b..e8996423 100644 --- a/mtce/src/maintenance/mtcNodeCtrl.cpp +++ b/mtce/src/maintenance/mtcNodeCtrl.cpp @@ -327,6 +327,7 @@ static int mtc_config_handler ( void * user, else if (MATCH("agent", "ar_heartbeat_interval")) mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = atoi(value); + else { return (PASS); @@ -431,6 +432,15 @@ static int mtc_ini_handler ( void * user, } } } + else if (MATCH("agent", "bmc_access_method")) + { + string bmc_access_method_current = mtcInv.bmc_access_method ; + mtcInv.bmc_access_method = value ; + if ( mtcInv.bmc_access_method != bmc_access_method_current ) + { + mtcInv.bmc_access_method_changed = true ; + } + } return (PASS); } @@ -672,6 +682,8 @@ int daemon_configure ( void ) ilog ("Controller : %s\n", mtc_config.active ? "Active" : "In-Active" ); + ilog ("BMC Access : %s", mtcInv.bmc_access_method.c_str()); + /* remove any existing fit */ daemon_init_fit (); @@ -984,6 +996,10 @@ int daemon_init ( string iface, string nodetype ) return ( FAIL_DAEMON_CONFIG ) ; } + /* bmc access method should not be considered changed if we + * are going through daemon_init ; i.e. process startup */ + mtcInv.bmc_access_method_changed = false ; + return (rc); } @@ -1185,6 +1201,23 @@ void nodeLinkClass::fsm ( void ) } } +/* handle BMC access method change */ +void nodeLinkClass::bmc_access_method_change_notifier ( void ) +{ + if ( head ) + { + struct node * node_ptr ; + for ( node_ptr = head ; + node_ptr != NULL ; + node_ptr = node_ptr->next ) + { + if ( node_ptr->bmc_provisioned ) + node_ptr->bmc_access_method_changed = true ; + } + } + mtcInv.bmc_access_method_changed = false ; +} + void daemon_service_run ( void ) { int rc ; @@ -1563,6 +1596,11 @@ void daemon_service_run ( void ) ilog ("DOR mode disable\n"); mtcInv.dor_mode_active = false ; } + + if ( mtcInv.bmc_access_method_changed == true ) + { + mtcInv.bmc_access_method_change_notifier(); + } } daemon_exit (); } diff --git a/mtce/src/maintenance/mtcNodeHdlrs.cpp b/mtce/src/maintenance/mtcNodeHdlrs.cpp index 1812849b..5ede7927 100755 --- a/mtce/src/maintenance/mtcNodeHdlrs.cpp +++ b/mtce/src/maintenance/mtcNodeHdlrs.cpp @@ -324,6 +324,19 @@ void nodeLinkClass::timer_handler ( int sig, siginfo_t *si, void *uc) } } + /* is the bmc audit timer ? */ + node_ptr = get_bmc_audit_timer ( *tid_ptr ); + if ( node_ptr ) + { + /* is this the bm ping timer */ + if ( *tid_ptr == node_ptr->bmc_audit_timer.tid ) + { + mtcTimer_stop_int_safe ( node_ptr->bmc_audit_timer ); + node_ptr->bmc_audit_timer.ring = true ; + return ; + } + } + /* is the host services handler timer ? */ node_ptr = get_host_services_timer ( *tid_ptr ); if ( node_ptr ) @@ -2741,7 +2754,7 @@ int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr ) } else { - mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY ); disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_POWERON_RECV) ; } @@ -2879,7 +2892,7 @@ int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr ) } else { - mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY ); disableStageChange ( node_ptr, MTC_DISABLE__HANDLE_POWERON_RECV) ; } } @@ -4779,8 +4792,11 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) snprintf ( buffer, 255, MTC_TASK_POWEROFF_QUEUE, attempts, MTC_POWER_ACTION_RETRY_COUNT); mtcInvApi_update_task ( node_ptr, buffer); - /* check the thread error status if thetre is one */ - if ( node_ptr->bmc_thread_info.status ) + /* Check the thread error status if there is one. Skip the + * typical system call log which just floods the log file. + * The failure is reported in the update task log above. */ + if (( node_ptr->bmc_thread_info.status ) && + ( node_ptr->bmc_thread_info.status != FAIL_SYSTEM_CALL)) { wlog ("%s ... %s (rc:%d)\n", node_ptr->hostname.c_str(), node_ptr->bmc_thread_info.status_string.c_str(), @@ -4882,7 +4898,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) powerStageChange ( node_ptr , MTC_POWERON__POWER_STATUS_WAIT ); } mtcTimer_reset ( node_ptr->mtcTimer ); - mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY ); break ; } case MTC_POWERON__POWER_STATUS_WAIT: @@ -4896,21 +4912,38 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) } else if ( rc == PASS ) { - if ( node_ptr->bmc_thread_info.data.find (IPMITOOL_POWER_ON_STATUS) != std::string::npos ) + rc = bmcUtil_is_power_on ( node_ptr->hostname, + node_ptr->bmc_protocol, + node_ptr->bmc_thread_info.data, + node_ptr->power_on); + + /* If there was an error in querying the power state, + * assume the power is off so that it will be powered on. */ + if ( rc ) + node_ptr->power_on = false ; + + if ( node_ptr->power_on ) { ilog ("%s power is already on ; no action required\n", node_ptr->hostname.c_str()); - node_ptr->power_on = true ; mtcInvApi_update_task ( node_ptr, "Power Already On" ); mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_TASK_UPDATE_DELAY ); powerStageChange ( node_ptr , MTC_POWERON__DONE ); } else { - node_ptr->power_on = false ; ilog ("%s power is off ; powering on ...\n", node_ptr->hostname.c_str() ); powerStageChange ( node_ptr , MTC_POWERON__REQ_SEND ); } } + /* failure path handling */ + else if ( node_ptr->power_action_retries <= 0 ) + { + wlog ("%s current power state query failed ; " + "proceeding with power-on", + node_ptr->hostname.c_str()); + powerStageChange ( node_ptr , MTC_POWERON__REQ_SEND ); + node_ptr->power_action_retries = MTC_POWER_ACTION_RETRY_COUNT ; + } else { powerStageChange ( node_ptr , MTC_POWERON__POWER_STATUS ); @@ -4961,7 +4994,7 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr ) { blog ("%s Power-On requested\n", node_ptr->hostname.c_str()); - mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY ); powerStageChange ( node_ptr , MTC_POWERON__RESP_WAIT ); } @@ -5194,7 +5227,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr ) { if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) { - int delay = MTC_IPMITOOL_REQUEST_DELAY ; + int delay = MTC_BMC_REQUEST_DELAY ; ilog ("%s querying current power state\n", node_ptr->hostname.c_str()); rc = bmc_command_send ( node_ptr, BMC_THREAD_CMD__POWER_STATUS ); @@ -5246,22 +5279,14 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr ) } else { - bool on = false ; - - ilog ("%s Power Status: %s\n", - node_ptr->hostname.c_str(), - node_ptr->bmc_thread_info.data.c_str()); - - if ( node_ptr->bmc_thread_info.data.find ( IPMITOOL_POWER_ON_STATUS ) != std::string::npos ) + int status = + bmcUtil_is_power_on ( node_ptr->hostname, + node_ptr->bmc_protocol, + node_ptr->bmc_thread_info.data, + node_ptr->power_on); + if ( status == PASS ) { - on = true ; - } - if ( rc == PASS ) - { - /* maintain current power state */ - node_ptr->power_on = on ; - - if ( on == true ) + if ( node_ptr->power_on ) { ilog ("%s invoking 'powerdown' phase\n", node_ptr->hostname.c_str()); @@ -5277,9 +5302,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr ) } else { - /* TODO: use FAIL handler */ node_ptr->hwmon_powercycle.retries = MAX_POWERCYCLE_STAGE_RETRIES+1 ; - // powercycleStageChange ( node_ptr, MTC_POWERCYCLE__FAIL ); elog ("%s failed to query power status ; aborting powercycle action\n", node_ptr->hostname.c_str()); } @@ -5303,7 +5326,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr ) case MTC_POWERCYCLE__POWEROFF: { - int delay = MTC_IPMITOOL_REQUEST_DELAY ; + int delay = MTC_BMC_REQUEST_DELAY ; /* Stop heartbeat if we are powering off the host */ send_hbs_command ( node_ptr->hostname, MTC_CMD_STOP_HOST ); @@ -5474,7 +5497,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr ) { if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) { - int delay = MTC_IPMITOOL_REQUEST_DELAY ; + int delay = MTC_BMC_REQUEST_DELAY ; clog ("%s %s stage\n", node_ptr->hostname.c_str(), get_powercycleStages_str(node_ptr->powercycleStage).c_str()); @@ -5551,7 +5574,7 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr ) else { wlog ("%s power status query requested\n", node_ptr->hostname.c_str() ); - mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_IPMITOOL_REQUEST_DELAY ); + mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_BMC_REQUEST_DELAY ); powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON_VERIFY_WAIT ); } } @@ -5561,8 +5584,6 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr ) { if ( mtcTimer_expired ( node_ptr->hwmon_powercycle.control_timer ) ) { - bool on = false ; - rc = bmc_command_recv ( node_ptr ); if ( rc == RETRY ) { @@ -5571,32 +5592,26 @@ int nodeLinkClass::powercycle_handler ( struct nodeLinkClass::node * node_ptr ) } if ( rc == PASS ) { - if ( node_ptr->bmc_thread_info.data.find (IPMITOOL_POWER_ON_STATUS) != std::string::npos ) - { - on = true ; - } + rc = bmcUtil_is_power_on ( node_ptr->hostname, + node_ptr->bmc_protocol, + node_ptr->bmc_thread_info.data, + node_ptr->power_on); } - ilog ("%s power state query result: %s\n", - node_ptr->hostname.c_str(), - node_ptr->bmc_thread_info.data.c_str() ); - - if (( rc == PASS ) && ( on == true )) + if (( rc == PASS ) && ( node_ptr->power_on )) { - node_ptr->power_on = true ; ilog ("%s is Powered On - waiting for 'online' (%d sec timeout)\n", node_ptr->hostname.c_str(), MTC_POWERON_TO_ONLINE_TIMEOUT); mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_BOOT, node_ptr->hwmon_powercycle.attempts ); - /* Set the online timeout */ mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_POWERON_TO_ONLINE_TIMEOUT ); powercycleStageChange ( node_ptr, MTC_POWERCYCLE__POWERON_WAIT ); } else { - wlog ("%s Power-On failed or did not occur ; retrying (rc:%d:%d)\n", node_ptr->hostname.c_str(), rc, on ); + wlog ("%s Power-On failed or did not occur ; retrying (rc:%d)\n", node_ptr->hostname.c_str(), rc ); node_ptr->power_on = false ; mtcInvApi_update_task ( node_ptr, MTC_TASK_POWERCYCLE_RETRY, node_ptr->hwmon_powercycle.attempts ); mtcTimer_start ( node_ptr->hwmon_powercycle.control_timer, mtcTimer_handler, MTC_BM_POWERON_TIMEOUT ); @@ -6177,13 +6192,46 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) { if ( node_ptr->bmc_provisioned == true ) { - if (( node_ptr->bmc_accessible == true ) && ( node_ptr->bm_ping_info.ok == false )) +#ifdef WANT_FIT_TESTING + if (( node_ptr->bmc_accessible == true ) && + ( node_ptr->bm_ping_info.ok == true ) && + ( daemon_is_file_present ( MTC_CMD_FIT__JSON_LEAK_SOAK ) == true )) + { + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } +#endif + + /* Handle BMC access method changes */ + if ( node_ptr->bmc_access_method_changed ) + { + node_ptr->bmc_access_method_changed = false ; + + ilog ("%s bmc access method change ; force %s", + node_ptr->hostname.c_str(), + this->bmc_access_method.c_str()); + + thread_kill ( node_ptr->bmc_thread_ctrl, node_ptr->bmc_thread_info ); + + bmc_access_data_init ( node_ptr ); + pingUtil_fini ( node_ptr->bm_ping_info ); + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ; + + /* start a timer that will raise the BM Access alarm + * if we are not accessible by the time it expires */ + mtcTimer_reset ( node_ptr->bm_timer ); + mtcTimer_reset ( node_ptr->bmc_audit_timer ); + mtcTimer_reset ( node_ptr->bmc_access_timer ); + mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 ); + } + + if (( node_ptr->bmc_accessible == true ) && + ( node_ptr->bm_ping_info.ok == false )) { - string bmc_info_filename = "" ; wlog ("%s bmc access lost\n", node_ptr->hostname.c_str()); /* Be sure the BMC info file is removed. * The 'hwmond' reads it and gets the bmc fw version from it. */ + string bmc_info_filename = "" ; if ( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) { bmc_info_filename.append(REDFISHTOOL_OUTPUT_DIR) ; @@ -6207,18 +6255,23 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) plog ("%s bmc access timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2); mtcTimer_reset ( node_ptr->bmc_access_timer ); mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 ); + mtcTimer_reset ( node_ptr->bmc_audit_timer ); } + /* manage bmc creds refresh ; not expected but should be handled */ if ( node_ptr->bm_ping_info.ok == false ) { - /* Auto correct key ping information ; should ever occur but if it does ... */ - if (( node_ptr->bm_ping_info.hostname.empty()) || ( node_ptr->bm_ping_info.ip.empty())) + /* Auto correct key ping information ; + * should never occur but if it does ... */ + if (( node_ptr->bm_ping_info.hostname.empty()) || + ( node_ptr->bm_ping_info.ip.empty())) { node_ptr->bm_ping_info.hostname = node_ptr->hostname ; node_ptr->bm_ping_info.ip = node_ptr->bm_ip ; } } + /* manage getting the bm password */ if ( node_ptr->thread_extra_info.bm_pw.empty() ) { barbicanSecret_type * secret = secretUtil_manage_secret( node_ptr->secretEvent, @@ -6235,7 +6288,7 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) /* If the BMC protocol has not yet been learned then do so. * Default is ipmi unless the target host responds to a * redfish root query with a minimum version number ; 1.0 */ - else if (( node_ptr->bm_ping_info.ok ) && + else if (( node_ptr->bm_ping_info.ok == true ) && ( node_ptr->bmc_protocol_learned == false )) { if ( node_ptr->bmc_protocol_learning == false ) @@ -6277,9 +6330,10 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) else if ( rc != PASS ) { if (( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_QUERY ) && - ( rc == FAIL_SYSTEM_CALL )) + (( rc == FAIL_SYSTEM_CALL ) || ( rc == FAIL_NOT_ACTIVE ))) { - wlog ("%s bmc does not support Redfish ; " \ + /* TODO: may need retries */ + plog ("%s bmc does not support Redfish ; " \ "defaulting to ipmi", node_ptr->hostname.c_str()); } @@ -6292,9 +6346,9 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->bmc_thread_info.status, node_ptr->bmc_thread_info.status_string.c_str()); } + node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ; node_ptr->bmc_protocol_learning = false ; node_ptr->bmc_protocol_learned = true ; - node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ; node_ptr->bmc_thread_ctrl.done = true ; } else @@ -6316,13 +6370,12 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) { node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ; } + node_ptr->bmc_protocol_learned = true ; - ilog ("%s bmc supports %s", + blog ("%s bmc supports %s", node_ptr->hostname.c_str(), bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str()); - node_ptr->bmc_protocol_learning = false ; - node_ptr->bmc_protocol_learned = true ; node_ptr->bmc_thread_ctrl.done = true ; } } @@ -6339,10 +6392,10 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) ( node_ptr->bmc_accessible == false ) && ( node_ptr->bm_ping_info.ok == true ) && ( node_ptr->bmc_info_query_done == false ) && + ( node_ptr->bmc_protocol_learned == true ) && ( mtcTimer_expired (node_ptr->bm_timer ) == true )) { - if (( node_ptr->bmc_info_query_active == false ) && - ( node_ptr->bmc_info_query_done == false )) + if ( node_ptr->bmc_info_query_active == false ) { if ( bmc_command_send ( node_ptr, BMC_THREAD_CMD__BMC_INFO ) != PASS ) { @@ -6355,14 +6408,13 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) else { node_ptr->bmc_info_query_active = true ; - ilog ("%s bmc redfish '%s' in progress", /* ERIK: blog */ + blog ("%s bmc redfish '%s' in progress", node_ptr->hostname.c_str(), bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str()); mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_FIRST_WAIT ); } } - else if (( node_ptr->bmc_info_query_active == true ) && - ( node_ptr->bmc_info_query_done == false)) + else if ( node_ptr->bmc_info_query_active == true ) { int rc ; if ( ( rc = bmc_command_recv ( node_ptr ) ) == RETRY ) @@ -6397,15 +6449,18 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) else { mtcTimer_reset ( node_ptr->bm_timer ); + mtcTimer_reset ( node_ptr->bmc_audit_timer ); + mtcTimer_start ( node_ptr->bmc_audit_timer, mtcTimer_handler, MTC_MINS_2 ); + plog ("%s bmc audit timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2); -#ifdef REDFISH_INTEGRATION_DONE /* success path */ - node_ptr->bmc_info_query_active = false ; + node_ptr->bmc_accessible = true ; node_ptr->bmc_info_query_done = true ; - node_ptr->bmc_protocol = BMC_PROTOCOL__REDFISHTOOL ; + node_ptr->bmc_info_query_active = false ; + node_ptr->bmc_protocol_learning = false ; + node_ptr->bmc_protocol_learned = true ; mtcTimer_reset ( node_ptr->bmc_access_timer ); - node_ptr->bmc_accessible = true ; /* save the host's power state */ node_ptr->power_on = node_ptr->bmc_info.power_on ; @@ -6413,25 +6468,12 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) plog ("%s bmc is accessible using redfish", node_ptr->hostname.c_str()); - /* tell the hardware monitor of the power state and protocol */ bmcUtil_hwmon_info ( node_ptr->hostname, - node_ptr->bmc_protocol, - node_ptr->power_on, "" ); + node_ptr->bmc_protocol, + node_ptr->power_on, "" ); send_hwmon_command ( node_ptr->hostname, MTC_CMD_MOD_HOST ); -#else - /* Redfish Power Control Commands not Implemented Yet - * Redfish not fully integrated. - * Need to continue to default to IPMI - * - * Start */ - node_ptr->bmc_accessible = false ; - node_ptr->bmc_info_query_active = false ; - node_ptr->bmc_info_query_done = false ; - node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ; - /* End */ -#endif node_ptr->bmc_thread_ctrl.done = true ; node_ptr->bmc_thread_info.command = 0 ; @@ -6640,6 +6682,112 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) pingUtil_acc_monitor ( node_ptr->bm_ping_info ); } + /* BMC Access Audit for Redfish. + * - used to refresh the host power state */ + if (( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) && + ( node_ptr->bmc_provisioned ) && + ( node_ptr->bmc_accessible ) && + ( mtcTimer_expired ( node_ptr->bmc_audit_timer ) == true ) && + ( mtcTimer_expired ( node_ptr->bm_timer ) == true )) + { + if ( node_ptr->bmc_thread_ctrl.done ) + { + /* send the BMC Query command */ + if ( bmc_command_send ( node_ptr, BMC_THREAD_CMD__BMC_INFO ) != PASS ) + { + elog ("%s bmc redfish '%s' send failed\n", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str( + node_ptr->bmc_thread_info.command).c_str()); + node_ptr->bm_ping_info.ok = false ; + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + else + { + blog1 ("%s bmc redfish '%s' audit in progress", + node_ptr->hostname.c_str(), + bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str()); + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + } + } + else if ( node_ptr->bmc_thread_info.command == BMC_THREAD_CMD__BMC_INFO ) + { + int rc ; + if ( ( rc = bmc_command_recv ( node_ptr ) ) == RETRY ) + { + mtcTimer_start ( node_ptr->bm_timer, mtcTimer_handler, MTC_RETRY_WAIT ); + } + else if ( rc != PASS ) + { + wlog ("%s bmc audit failed receive (rc:%d)", + node_ptr->hostname.c_str(), rc ); + node_ptr->bm_ping_info.ok = false ; + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + else if ( node_ptr->bmc_thread_info.data.empty()) + { + wlog ("%s bmc audit failed get bmc query response data", + node_ptr->hostname.c_str()); + node_ptr->bm_ping_info.ok = false ; + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + else + { + string filedata = daemon_read_file (node_ptr->bmc_thread_info.data.data()) ; + struct json_object *json_obj = + json_tokener_parse((char*)filedata.data()); + if ( json_obj ) + { + /* load the power state */ + bool power_on ; + string power_state = + tolowercase(jsonUtil_get_key_value_string( json_obj, REDFISH_LABEL__POWER_STATE)); + if ( power_state == BMC_POWER_ON_STATUS ) + power_on = true ; + else if ( power_state == BMC_POWER_OFF_STATUS ) + power_on = false ; + else + { + wlog ("%s bmc audit failed to get power state", + node_ptr->hostname.c_str()); + node_ptr->bm_ping_info.ok = false ; + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + rc = FAIL_JSON_PARSE ; + } + if ( rc == PASS ) + { + if ( power_on != node_ptr->power_on ) + { + ilog ("%s power state changed to %s", + node_ptr->hostname.c_str(), + power_state.c_str()); + + /* tell the hardware monitor of the power state and protocol */ + bmcUtil_hwmon_info ( node_ptr->hostname, + node_ptr->bmc_protocol, + power_on, "" ); + send_hwmon_command ( node_ptr->hostname, MTC_CMD_MOD_HOST ); + } + node_ptr->power_on = power_on ; + blog1 ("%s bmc audit timer re-started (%d secs)\n", + node_ptr->hostname.c_str(), MTC_MINS_2); + mtcTimer_start ( node_ptr->bmc_audit_timer, + mtcTimer_handler, + MTC_MINS_2 ); + } + json_object_put(json_obj); + } + else + { + node_ptr->bm_ping_info.ok = false ; + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + wlog ("%s bmc audit failed parse bmc query response", + node_ptr->hostname.c_str()); + } + } + } + } + /****************************************************************** * Manage the Board Management Access Alarm ******************************************************************/ @@ -6652,11 +6800,11 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) /* start a timer that will raise the BM Access alarm * if we are not accessible by the time it expires */ - plog ("%s bmc access timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2); mtcTimer_start ( node_ptr->bmc_access_timer, mtcTimer_handler, MTC_MINS_2 ); if ( node_ptr->alarms[MTC_ALARM_ID__BM] == FM_ALARM_SEVERITY_CLEAR ) { + plog ("%s bmc access timer started (%d secs)\n", node_ptr->hostname.c_str(), MTC_MINS_2); mtcAlarm_warning ( node_ptr->hostname, MTC_ALARM_ID__BM ); node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_WARNING ; } @@ -6668,21 +6816,19 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) * ... if BMs are accessible then see if we need to clear the * major BM Alarm. *****************************************************************/ - else if ( node_ptr->alarms[MTC_ALARM_ID__BM] != FM_ALARM_SEVERITY_CLEAR ) + if (( node_ptr->bmc_accessible == true ) && + ( node_ptr->alarms[MTC_ALARM_ID__BM] != FM_ALARM_SEVERITY_CLEAR ) && + ((( node_ptr->bmc_protocol == BMC_PROTOCOL__IPMITOOL ) && + ( node_ptr->bmc_info_query_done == true ) && + ( node_ptr->reset_cause_query_done == true ) && + ( node_ptr->power_status_query_done == true )) || + (( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) && + ( node_ptr->bmc_protocol_learned == true )))) { - if ((( node_ptr->bmc_protocol == BMC_PROTOCOL__IPMITOOL ) && - ( node_ptr->bmc_info_query_done == true ) && - ( node_ptr->reset_cause_query_done == true ) && - ( node_ptr->power_status_query_done == true )) || - (( node_ptr->bmc_protocol == BMC_PROTOCOL__REDFISHTOOL ) && - ( node_ptr->bmc_protocol_learned == true ))) - { - mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM ); - node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_CLEAR ; - } + mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM ); + node_ptr->alarms[MTC_ALARM_ID__BM] = FM_ALARM_SEVERITY_CLEAR ; } /* else alarms already cleared */ } /* end if bmc_provisioned */ - else if ( node_ptr->alarms[MTC_ALARM_ID__BM] != FM_ALARM_SEVERITY_CLEAR ) { mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM ); @@ -6885,6 +7031,15 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr ) } case MTC_INSV_TEST__RUN: { + if ( daemon_is_file_present ( MTC_CMD_FIT__BMC_ACC_FAIL )) + { + if ( node_ptr->bm_ping_info.ok ) + { + ilog ("%s FIT failing bmc ping monitor", node_ptr->hostname.c_str()); + node_ptr->bm_ping_info.stage = PINGUTIL_MONITOR_STAGE__FAIL ; + } + } + #ifdef WANT_FIT_TESTING daemon_load_fit (); @@ -6916,7 +7071,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->thread_extra_info.bm_ip = node_ptr->bm_ip ; node_ptr->thread_extra_info.bm_un = node_ptr->bm_un ; node_ptr->thread_extra_info.bm_pw = node_ptr->bm_pw ; - node_ptr->thread_extra_info.bm_type = node_ptr->bm_type ; node_ptr->bmc_thread_info.extra_info_ptr = &node_ptr->thread_extra_info ; if ( thread_launch_thread ( mtcThread_bmc, &node_ptr->bmc_thread_info ) == 0 ) { diff --git a/mtce/src/maintenance/mtcSubfHdlrs.cpp b/mtce/src/maintenance/mtcSubfHdlrs.cpp index 1b2d85c3..9f539d97 100644 --- a/mtce/src/maintenance/mtcSubfHdlrs.cpp +++ b/mtce/src/maintenance/mtcSubfHdlrs.cpp @@ -599,7 +599,7 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr ) } else { - wlog ("%s is ENABLED-degraded\n", name.c_str()); + plog ("%s is ENABLED-degraded\n", name.c_str()); } enableStageChange ( node_ptr, MTC_ENABLE__DONE ); diff --git a/mtce/src/maintenance/mtcThreads.cpp b/mtce/src/maintenance/mtcThreads.cpp index ca7e85b5..f2acca31 100644 --- a/mtce/src/maintenance/mtcThreads.cpp +++ b/mtce/src/maintenance/mtcThreads.cpp @@ -38,7 +38,6 @@ using namespace std; #include "mtcThreads.h" /* for ... IPMITOOL_THREAD_CMD__RESET ... */ #include "bmcUtil.h" /* for ... mtce-common bmc utility header */ - /************************************************************************** * * Name : mtcThread_bmc @@ -111,6 +110,7 @@ void * mtcThread_bmc ( void * arg ) break ; } case BMC_THREAD_CMD__BMC_INFO: + case BMC_THREAD_CMD__POWER_STATUS: { command = REDFISHTOOL_BMC_INFO_CMD ; suffix = BMC_INFO_FILE_SUFFIX ; @@ -120,26 +120,25 @@ void * mtcThread_bmc ( void * arg ) /* control commands */ case BMC_THREAD_CMD__POWER_RESET: { - command = REDFISHTOOL_POWER_RESET_CMD ; + command = extra_ptr->bm_cmd ; suffix = BMC_POWER_CMD_FILE_SUFFIX ; break ; } case BMC_THREAD_CMD__POWER_ON: { - command = REDFISHTOOL_POWER_ON_CMD ; + command = extra_ptr->bm_cmd ; suffix = BMC_POWER_CMD_FILE_SUFFIX ; break ; } case BMC_THREAD_CMD__POWER_OFF: { - command = REDFISHTOOL_POWER_OFF_CMD ; + command = extra_ptr->bm_cmd ; suffix = BMC_POWER_CMD_FILE_SUFFIX ; break ; } case BMC_THREAD_CMD__BOOTDEV_PXE: { - /* json response */ - command = REDFISHTOOL_BOOTDEV_PXE_CMD ; + command = REDFISHTOOL_BOOTDEV_PXE_CMD ; suffix = BMC_BOOTDEV_CMD_FILE_SUFFIX ; break ; } @@ -153,6 +152,7 @@ void * mtcThread_bmc ( void * arg ) } }/* end redfishtool switch */ } /* end if */ + /* IPMI */ else { switch ( info_ptr->command ) @@ -308,17 +308,30 @@ void * mtcThread_bmc ( void * arg ) else #endif { + string chopped_request = bmcUtil_chop_system_req(request); daemon_remove_file ( datafile.data() ) ; + blog_t("%s %s", info_ptr->hostname.c_str(), chopped_request.c_str()); + + /****** Make the system call ******/ + rc = + threadUtil_bmcSystemCall (info_ptr->hostname, + request, + DEFAULT_SYSTEM_REQUEST_LATENCY_SECS); - nodeUtil_latency_log ( info_ptr->hostname, NODEUTIL_LATENCY_MON_START, 0 ); - rc = system ( request.data()) ; if ( rc != PASS ) { - if ( info_ptr->command != BMC_THREAD_CMD__BMC_QUERY ) + /* Log the command that failed unless ... + * - its the root query during learning + * - its not the typical falure to reach the BMC whose + * error shows up as a ENOENT or + * 'No such file or directory' + */ + if (( info_ptr->command != BMC_THREAD_CMD__BMC_QUERY ) && + ( errno != ENOENT )) { - elog_t ("%s redfishtool system call failed (%s) (%d:%d:%m)\n", + elog_t ("%s system call failed [%s] (%d:%d:%m)\n", info_ptr->hostname.c_str(), - request.c_str(), + chopped_request.c_str(), rc, errno ); } info_ptr->status = FAIL_SYSTEM_CALL ; @@ -328,8 +341,6 @@ void * mtcThread_bmc ( void * arg ) info_ptr->status_string = daemon_read_file(datafile.data()); } } - /* produce latency log if command takes longer than 5 seconds */ - nodeUtil_latency_log ( info_ptr->hostname, "redfishtool system call", 5000 ); } #ifdef WANT_FIT_TESTING @@ -478,15 +489,29 @@ void * mtcThread_bmc ( void * arg ) else #endif { - daemon_remove_file ( datafile.data() ) ; + string chopped_request = bmcUtil_chop_system_req(request); + daemon_remove_file ( datafile.data() ); + blog_t("%s %s", info_ptr->hostname.c_str(), chopped_request.c_str()); + + /****** Make the system call ******/ + rc = + threadUtil_bmcSystemCall (info_ptr->hostname, + request, + DEFAULT_SYSTEM_REQUEST_LATENCY_SECS); - nodeUtil_latency_log ( info_ptr->hostname, NODEUTIL_LATENCY_MON_START, 0 ); - rc = system ( request.data()) ; if ( rc != PASS ) { - wlog_t ("%s ipmitool system call failed (%d:%d:%m)\n", info_ptr->hostname.c_str(), rc, errno ); + elog_t ("%s system call failed [%s] (%d:%d:%m)\n", + info_ptr->hostname.c_str(), + chopped_request.c_str(), + rc, errno ); + info_ptr->status = FAIL_SYSTEM_CALL ; + if ( daemon_is_file_present ( datafile.data() )) + { + /* load in the error. stdio is redirected to the datafile */ + info_ptr->status_string = daemon_read_file(datafile.data()); + } } - nodeUtil_latency_log ( info_ptr->hostname, "ipmitool system call", 1000 ); } #ifdef WANT_FIT_TESTING diff --git a/mtce/src/maintenance/mtcThreads.h b/mtce/src/maintenance/mtcThreads.h index c6b310c3..bb521a63 100644 --- a/mtce/src/maintenance/mtcThreads.h +++ b/mtce/src/maintenance/mtcThreads.h @@ -19,8 +19,7 @@ typedef struct string bm_ip ; string bm_un ; string bm_pw ; - string bm_type ; - + string bm_cmd ; } thread_extra_info_type ; void * mtcThread_bmc ( void * ); diff --git a/mtce/src/scripts/mtc.conf b/mtce/src/scripts/mtc.conf index 0a2ec050..1b431004 100644 --- a/mtce/src/scripts/mtc.conf +++ b/mtce/src/scripts/mtc.conf @@ -64,7 +64,7 @@ ar_goenable_interval = 30 ar_hostservices_interval = 30 ar_heartbeat_interval = 600 -api_retries = 10 ; number of API retries b4 failure +api_retries = 10 ; number of API retries b4 failure [client] ; Client Configuration scheduling_priority = 99 ; realtime scheduling; range of 1 .. 99 is acceptable