metal/mtce/src/hwmon/hwmonClass.cpp

2453 lines
84 KiB
C++

/*
* Copyright (c) 2015-2017 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
*/
#include "nodeBase.h"
#include "tokenUtil.h"
#include "secretUtil.h"
#include "hwmonClass.h"
#include "hwmonUtil.h"
#include "hwmonIpmi.h"
#include "hwmonHttp.h"
#include "hwmonAlarm.h"
#include "hwmonGroup.h"
#include "hwmonSensor.h"
#include "hwmonThreads.h"
#include "hwmon.h"
/**< constructor */
hwmonHostClass::hwmonHostClass()
{
for ( int i = 0 ; i < MAX_HOSTS ; i++ )
host_ptrs[i] = static_cast<struct hwmon_host *>(NULL) ;
memory_allocs = 0 ;
memory_used = 0 ;
hwmon_head = NULL ;
hwmon_tail = NULL ;
hosts = 0 ;
host_deleted = false ;
config_reload = false ;
return ;
}
hwmonHostClass::~hwmonHostClass() { return ; } /**< destructor */
static std::string addStages_str [HWMON_ADD__STAGES +1] ;
void hwmon_stages_init ( void )
{
addStages_str [HWMON_ADD__START ] = "Add-Start" ;
addStages_str [HWMON_ADD__STATES ] = "Add-States" ;
addStages_str [HWMON_ADD__WAIT ] = "Add-Wait" ;
addStages_str [HWMON_ADD__DONE ] = "Add-Done" ;
}
/** Host add handler Stage Change member function */
int hwmonHostClass::addStageChange ( struct hwmonHostClass::hwmon_host * ptr,
hwmon_addStages_enum newStage )
{
if (( newStage < HWMON_ADD__STAGES ) &&
( ptr->addStage < HWMON_ADD__STAGES ))
{
clog ("%s %s -> %s (%d->%d)\n",
&ptr->hostname[0],
addStages_str[ptr->addStage].c_str(),
addStages_str[newStage].c_str(),
ptr->addStage, newStage);
ptr->addStage = newStage ;
return (PASS);
}
else
{
slog ("%s Invalid Stage (now:%d new:%d)\n",
ptr->hostname.c_str(),
ptr->addStage,
newStage );
ptr->addStage = HWMON_ADD__DONE ;
return (FAIL);
}
}
/* Initialize bmc data for ipmi mode monitoring */
void hwmonHostClass::ipmi_bmc_data_init ( struct hwmonHostClass::hwmon_host * host_ptr )
{
host_ptr->ping_info.timer_handler = &hwmonTimer_handler ;
host_ptr->accessible = false;
host_ptr->degraded = false ;
hwmon_del_groups ( host_ptr );
hwmon_del_sensors ( host_ptr );
/* force the add handler to run */
host_ptr->addStage = HWMON_ADD__START;
host_ptr->sensor_query_count = 0 ;
}
/*
* Allocate new host and tack it on the end of the host_list
*/
struct hwmonHostClass::hwmon_host* hwmonHostClass::addHost( string hostname )
{
/* verify host is not already provisioned */
struct hwmon_host * ptr = hwmonHostClass::getHost ( hostname );
if ( ptr )
{
if ( hwmonHostClass::remHost ( hostname ) )
{
/* Should never get here but if we do then */
/* something is seriously wrong */
elog ("Error: Unable to remove host during reprovision\n");
return static_cast<struct hwmon_host *>(NULL);
}
}
/* allocate memory for new host */
ptr = hwmonHostClass::newHost ();
if( ptr == NULL )
{
elog ( "Error: Failed to allocate memory for new host\n" );
return static_cast<struct hwmon_host *>(NULL);
}
/* Init the new host */
ptr->hostname = hostname ;
ptr->host_delete = false ;
ptr->poweron = false ;
ptr->retries = 0 ;
ptr->delStage = HWMON_DEL__START ;
ptr->ping_info.timer_handler = &hwmonTimer_handler ;
mtcTimer_init ( ptr->hostTimer, ptr->hostname, "host timer" );
mtcTimer_init ( ptr->addTimer, ptr->hostname, "add timer" );
mtcTimer_init ( ptr->secretTimer, ptr->hostname, "secret timer" );
mtcTimer_init ( ptr->relearnTimer, ptr->hostname, "relearn timer" );
mtcTimer_init ( ptr->ping_info.timer, ptr->hostname, "ping monitor timer" );
mtcTimer_init ( ptr->monitor_ctrl.timer, ptr->hostname, "sensor monitor timer") ;
ptr->groups = 0 ;
ptr->sensors = 0 ;
ptr->samples = 0 ;
/* http event pre-init
* PATCHBACK - consider patchback to REL3 and earlier */
ptr->event.base = NULL ;
ptr->event.conn = NULL ;
ptr->event.req = NULL ;
ptr->event.buf = NULL ;
ptr->secretEvent.base= NULL ;
ptr->secretEvent.conn= NULL ;
ptr->secretEvent.req = NULL ;
ptr->secretEvent.buf = NULL ;
/* If the host list is empty add it to the head */
if( hwmon_head == NULL )
{
hwmon_head = ptr ;
hwmon_tail = ptr ;
ptr->prev = NULL ;
ptr->next = NULL ;
}
else
{
/* link the new_host to the tail of the host_list
* then mark the next field as the end of the host_list
* adjust tail to point to the last host
*/
hwmon_tail->next = ptr ;
ptr->prev = hwmon_tail ;
ptr->next = NULL ;
hwmon_tail = ptr ;
}
/* Default to not monitoring */
ptr->monitor = false ;
ptr->bm_provisioned = false ;
ptr->alarmed = false ;
ptr->alarmed_config = false ;
ptr->degraded = false ;
hosts++ ;
dlog2 ("Added hwmonHostClass host instance %d\n", hosts);
return ptr ;
}
void hwmonHostClass::free_host_timers ( struct hwmon_host * ptr )
{
mtcTimer_fini ( ptr->hostTimer );
mtcTimer_fini ( ptr->addTimer );
mtcTimer_fini ( ptr->secretTimer );
mtcTimer_fini ( ptr->relearnTimer );
mtcTimer_fini ( ptr->ping_info.timer );
mtcTimer_fini ( ptr->monitor_ctrl.timer );
mtcTimer_fini ( ptr->ipmitool_thread_ctrl.timer );
}
/* Remove a hist from the linked list of hosts - may require splice action */
int hwmonHostClass::remHost( string hostname )
{
if ( hostname.c_str() == NULL )
return -ENODEV ;
if ( hwmon_head == NULL )
return -ENXIO ;
struct hwmon_host * ptr = hwmonHostClass::getHost ( hostname );
if ( ptr == NULL )
return -EFAULT ;
free_host_timers ( ptr );
/* If the host is the head host */
if ( ptr == hwmon_head )
{
/* only one host in the list case */
if ( hwmon_head == hwmon_tail )
{
dlog2 ("Single Host -> Head Case\n");
hwmon_head = NULL ;
hwmon_tail = NULL ;
}
else
{
dlog2 ("Multiple Hosts -> Head Case\n");
hwmon_head = hwmon_head->next ;
hwmon_head->prev = NULL ;
}
}
/* if not head but tail then there must be more than one
* host in the list so go ahead and chop the tail.
*/
else if ( ptr == hwmon_tail )
{
dlog2 ("Multiple Host -> Tail Case\n");
hwmon_tail = hwmon_tail->prev ;
hwmon_tail->next = NULL ;
}
else
{
dlog2 ("Multiple Host -> Full Splice Out\n");
ptr->prev->next = ptr->next ;
ptr->next->prev = ptr->prev ;
}
hwmonHostClass::delHost ( ptr );
hosts-- ;
return (PASS) ;
}
struct hwmonHostClass::hwmon_host* hwmonHostClass::getHost ( string hostname )
{
/* check for empty list condition */
if ( hwmon_head == NULL )
return NULL ;
for ( struct hwmon_host * ptr = hwmon_head ; ; ptr = ptr->next )
{
if ( !hostname.compare ( ptr->hostname ))
{
// dlog2 ("Fetched hwmonHostClass host instance %s\n", ptr->hostname.c_str());
return ptr ;
}
if (( ptr->next == NULL ) || ( ptr == hwmon_tail ))
break ;
}
return static_cast<struct hwmon_host *>(NULL);
}
/*
* Allocates memory for a new host and stores its the address in host_ptrs
*
* @param void
* @return pointer to the newly allocted host memory
*/
struct hwmonHostClass::hwmon_host * hwmonHostClass::newHost ( void )
{
struct hwmonHostClass::hwmon_host * temp_host_ptr = NULL ;
if ( memory_allocs == 0 )
{
memset ( host_ptrs, 0 , sizeof(struct hwmon_host *)*MAX_HOSTS);
}
// find an empty spot
for ( int i = 0 ; i < MAX_HOSTS ; i++ )
{
if ( host_ptrs[i] == NULL )
{
host_ptrs[i] = temp_host_ptr = new hwmon_host ;
memory_allocs++ ;
memory_used += sizeof (struct hwmonHostClass::hwmon_host);
return temp_host_ptr ;
}
}
elog ( "Failed to save new host pointer address\n" );
return temp_host_ptr ;
}
void hwmonHostClass::degrade_state_audit ( struct hwmonHostClass::hwmon_host * host_ptr )
{
bool found ;
string sensorname ;
int s ;
/* manage degrade state */
for ( s = 0 , sensorname.clear() , found = false ; s < host_ptr->sensors ; s++ )
{
if ( host_ptr->sensor[s].degraded == true )
{
sensorname = host_ptr->sensor[s].sensorname ;
/* do some auto correction of degrade */
if ( is_alarmed ( &host_ptr->sensor[s] ) == false )
{
slog ("%s %s is degraded but not alarmed ; correcting by removing degrade\n",
host_ptr->hostname.c_str(),
host_ptr->sensor[s].sensorname.c_str());
host_ptr->sensor[s].degraded = false ;
}
else
{
found = true ;
break ;
}
}
}
if ( found == true )
{
hwmon_send_event ( host_ptr->hostname, MTC_DEGRADE_RAISE , sensorname.data() );
wlog_throttled (host_ptr->degrade_audit_log_throttle, 20, "%s degraded ... due to '%s' sensor\n", host_ptr->hostname.c_str(), sensorname.c_str());
}
else if ( host_ptr->degraded == true )
{
hwmon_send_event ( host_ptr->hostname, MTC_DEGRADE_RAISE , sensorname.data());
wlog_throttled (host_ptr->degrade_audit_log_throttle, 20, "%s degraded ... due to 'hwmon' config error\n", host_ptr->hostname.c_str());
}
else
{
dlog ("%s available\n", host_ptr->hostname.c_str());
hwmon_send_event ( host_ptr->hostname, MTC_DEGRADE_CLEAR, "sensors" );
host_ptr->degrade_audit_log_throttle = 0 ;
}
#ifdef WANT_FIT_TESTING
if (daemon_want_fit(FIT_CODE__HWMON__CORRUPT_TOKEN))
{
tokenUtil_fail_token ();
if ( host_ptr->event.active == false )
{
hwmonHttp_load_sensors ( host_ptr->hostname, host_ptr->event );
}
else
{
slog ("%s FIT skipping hwmonHttp_load_sensors failure trigger due to in-progress event\n",
host_ptr->hostname.c_str());
daemon_hits_fit (1);
}
}
if ( host_ptr->bm_provisioned == true )
{
/* FIT Support for creating orphan sensor or group alarm */
if ( daemon_want_fit ( FIT_CODE__HWMON__CREATE_ORPHAN_GROUP_ALARM, host_ptr->hostname ))
{
string orphan = "orphan_group_" + itos((rand()%1000)) ;
hwmonAlarm_major ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, orphan, REASON_DEGRADED );
}
if ( daemon_want_fit ( FIT_CODE__HWMON__CREATE_ORPHAN_SENSOR_ALARM, host_ptr->hostname ))
{
string orphan = "orphan_sensor_" + itos((rand()%1000)) ;
hwmonAlarm_major ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, orphan, REASON_DEGRADED );
}
/* FIT Support for forcing raise or clear of any Group or Sensor Alarm in FM */
/* FIT Support for forcing state or status of any Group or Sensor Alarm in the database */
for ( int g = 0 ; g < host_ptr->groups ; g++ )
{
string sev ;
if ( daemon_want_fit ( FIT_CODE__HWMON__RAISE_GROUP_ALARM, host_ptr->hostname, host_ptr->group[g].group_name, sev ))
{
hwmon_alarm_util ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, FM_ALARM_STATE_SET, alarmUtil_getSev_enum(sev), host_ptr->group[g].group_name, REASON_DEGRADED );
break ;
}
if ( daemon_want_fit ( FIT_CODE__HWMON__CLEAR_GROUP_ALARM, host_ptr->hostname, host_ptr->group[g].group_name ))
{
hwmon_alarm_util ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, FM_ALARM_STATE_CLEAR, FM_ALARM_SEVERITY_CLEAR, host_ptr->group[g].group_name, REASON_OK );
break ;
}
if ( daemon_want_fit ( FIT_CODE__HWMON__SET_DB_GROUP_STATE, host_ptr->hostname, host_ptr->group[g].group_name, sev ))
{
hwmonHttp_mod_group ( host_ptr->hostname, host_ptr->event , host_ptr->group[g].group_uuid, "state", sev );
break ;
}
if ( daemon_want_fit ( FIT_CODE__HWMON__SET_DB_GROUP_STATUS, host_ptr->hostname, host_ptr->group[g].group_name, sev ))
{
hwmonHttp_mod_group ( host_ptr->hostname, host_ptr->event , host_ptr->group[g].group_uuid, "status", sev );
break ;
}
}
for ( int s = 0 ; s < host_ptr->sensors ; s++ )
{
string sev ;
if ( daemon_want_fit ( FIT_CODE__HWMON__RAISE_SENSOR_ALARM, host_ptr->hostname, host_ptr->sensor[s].sensorname, sev ))
{
hwmon_alarm_util ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, FM_ALARM_STATE_SET, alarmUtil_getSev_enum(sev), host_ptr->sensor[s].sensorname, REASON_DEGRADED );
break ;
}
if ( daemon_want_fit ( FIT_CODE__HWMON__CLEAR_SENSOR_ALARM, host_ptr->hostname, host_ptr->sensor[s].sensorname ))
{
hwmon_alarm_util ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, FM_ALARM_STATE_CLEAR, FM_ALARM_SEVERITY_CLEAR, host_ptr->sensor[s].sensorname, REASON_OK );
break ;
}
if ( daemon_want_fit ( FIT_CODE__HWMON__SET_DB_SENSOR_STATE, host_ptr->hostname, host_ptr->sensor[s].sensorname, sev ))
{
hwmonHttp_mod_sensor ( host_ptr->hostname, host_ptr->event , host_ptr->sensor[s].uuid, "state", sev );
break ;
}
if ( daemon_want_fit ( FIT_CODE__HWMON__SET_DB_SENSOR_STATUS, host_ptr->hostname, host_ptr->sensor[s].sensorname, sev ))
{
hwmonHttp_mod_sensor ( host_ptr->hostname, host_ptr->event , host_ptr->sensor[s].uuid, "status", sev );
break ;
}
}
}
#endif
}
/* Frees the memory of a pre-allocated host and removes
* it from the host_ptrs list
* @param host * pointer to the host memory address to be freed
* @return int return code { PASS or -EINVAL }
*/
int hwmonHostClass::delHost ( struct hwmonHostClass::hwmon_host * host_ptr )
{
if ( hwmonHostClass::memory_allocs > 0 )
{
for ( int i = 0 ; i < MAX_NODES ; i++ )
{
if ( hwmonHostClass::host_ptrs[i] == host_ptr )
{
delete host_ptr ;
hwmonHostClass::host_ptrs[i] = NULL ;
hwmonHostClass::memory_allocs-- ;
hwmonHostClass::memory_used -= sizeof (struct hwmonHostClass::hwmon_host);
return PASS ;
}
}
elog ( "Error: Unable to validate memory address being freed\n" );
}
else
elog ( "Error: Free memory called when there is no memory to free\n" );
return -EINVAL ;
}
void hwmonHostClass::clear_bm_assertions ( struct hwmonHostClass::hwmon_host * host_ptr )
{
/* Loop over all sensors and groups
* - clear any outstanding alarms
* - clear degrade of host
* ... while we deprovision the BMC */
for ( int i = 0 ; i < host_ptr->sensors ; i++ )
{
if ( host_ptr->sensor[i].alarmed == true )
{
hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSOR, host_ptr->sensor[i].sensorname, REASON_DEPROVISIONED );
host_ptr->sensor[i].alarmed = false ;
host_ptr->sensor[i].degraded = false ;
}
}
for ( int g = 0 ; g < host_ptr->groups ; ++g )
{
hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORGROUP, host_ptr->group[g].group_name, REASON_DEPROVISIONED );
}
/* send the degrade anyway , just to be safe */
hwmon_send_event ( host_ptr->hostname, MTC_DEGRADE_CLEAR , "sensors" );
/* Bug Fix: This was outside the if bm_provisioned clause causing it
* to be called even if the bmc was not already provisioned
*/
hwmonAlarm_clear ( host_ptr->hostname, HWMON_ALARM_ID__SENSORCFG, "sensors", REASON_DEPROVISIONED );
}
int hwmonHostClass::set_bm_prov ( struct hwmonHostClass::hwmon_host * host_ptr, bool state )
{
int rc = FAIL_HOSTNAME_LOOKUP ;
if ( host_ptr )
{
rc = PASS ;
/* Clear the alarm if we are starting fresh from an unprovisioned state */
if (( host_ptr->bm_provisioned == false ) && ( state == true ))
{
ilog ("%s board management controller is being provisioned\n", host_ptr->hostname.c_str());
ilog ("%s setting up ping socket\n", host_ptr->hostname.c_str() );
/* ---------------------------------------
* Init bmc data based on monitoring mode
* ---------------------------------------*/
mtcTimer_reset ( host_ptr->ping_info.timer ) ;
host_ptr->ping_info.stage = PINGUTIL_MONITOR_STAGE__OPEN ;
host_ptr->ping_info.ip = host_ptr->bm_ip ;
host_ptr->ping_info.hostname = host_ptr->hostname ;
ipmi_bmc_data_init ( host_ptr );
string host_uuid = hostBase.get_uuid( host_ptr->hostname );
barbicanSecret_type * secret = secretUtil_find_secret( host_uuid );
if ( secret )
{
secret->reference.clear() ;
secret->payload.clear() ;
secret->stage = MTC_SECRET__START ;
}
mtcTimer_start( host_ptr->secretTimer, hwmonTimer_handler, SECRET_START_DELAY );
host_ptr->thread_extra_info.bm_pw.clear() ;
host_ptr->thread_extra_info.bm_ip = host_ptr->bm_ip ;
host_ptr->thread_extra_info.bm_un = host_ptr->bm_un ;
}
/* handle the case going from provisioned to not provisioned */
if (( host_ptr->bm_provisioned == true ) && ( state == false ))
{
ilog ("%s board management controller is being deprovisioned\n", host_ptr->hostname.c_str());
clear_bm_assertions ( host_ptr );
pingUtil_fini ( host_ptr->ping_info );
ipmi_bmc_data_init ( host_ptr );
}
host_ptr->bm_provisioned = state ;
}
return (rc);
}
int hwmonHostClass::mod_host ( node_inv_type & inv )
{
int rc = FAIL ;
struct hwmonHostClass::hwmon_host * host_ptr = static_cast<struct hwmon_host *>(NULL);
if (( inv.name.empty()) ||
( !inv.name.compare (NONE)) ||
( !inv.name.compare ("None")))
{
wlog ("Refusing to add host with 'null' or 'invalid' hostname (%s)\n",
inv.uuid.c_str());
return (FAIL_INVALID_HOSTNAME) ;
}
host_ptr = hwmonHostClass::getHost(inv.name);
if ( host_ptr )
{
rc = PASS ;
bool modify_bm = false ;
if ( host_ptr->bm_ip.compare( inv.bm_ip ) )
{
ilog ("%s modify board management 'ip' from '%s' to '%s'\n",
inv.name.c_str(),
host_ptr->bm_ip.c_str(),
inv.bm_ip.c_str());
host_ptr->bm_ip = inv.bm_ip ;
modify_bm = true ;
}
if ( host_ptr->bm_un.compare( inv.bm_un ) )
{
ilog ("%s modify board management 'username' from '%s' to '%s'\n",
inv.name.c_str(),
host_ptr->bm_un.c_str(),
inv.bm_un.c_str());
host_ptr->bm_un = inv.bm_un ;
modify_bm = true ;
}
if ( host_ptr->bm_type.compare( inv.bm_type ) )
{
ilog ("%s modify board management 'type' from '%s' to '%s'\n",
inv.name.c_str(),
host_ptr->bm_type.c_str(),
inv.bm_type.c_str());
host_ptr->bm_type = inv.bm_type ;
modify_bm = true ;
}
if ( modify_bm == true )
{
ilog ("%s modify summary %s %s@%s ... provisioned = %s\n",
inv.name.c_str(),
host_ptr->bm_type.c_str(),
host_ptr->bm_un.c_str(),
host_ptr->bm_ip.c_str(),
host_ptr->bm_provisioned ? "Yes" : "No" );
if ( host_ptr->bm_provisioned == true )
{
/* if we have a credentials only change then disable the sensor
* model only to get re-enabled if sensor monitoring is
* successful with the new credentils */
if (( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) &&
( host_ptr->bm_un.compare(NONE)))
{
ipmi_set_group_state ( host_ptr, "disabled" );
ipmi_disable_sensors ( host_ptr );
}
rc = set_bm_prov ( host_ptr, false );
}
if (( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) &&
( hostUtil_is_valid_ip_addr (host_ptr->bm_ip) == true ) &&
!host_ptr->bm_un.empty())
{
rc = set_bm_prov ( host_ptr, true );
}
}
else
{
/* Only reprovision if the provisioning data has changed */
dlog ("%s bmc provisioning unchanged\n", host_ptr->hostname.c_str());
}
}
else
{
elog ("%s board management info modify failed\n", inv.name.c_str());
rc = FAIL_NULL_POINTER ;
}
return (rc);
}
void hwmonHostClass::set_degrade_audit ( void )
{
struct hwmon_host * ptr = hwmon_head ;
for ( int i = 0 ; i < hosts ; i++ )
{
ptr->want_degrade_audit = true ;
ptr = ptr->next ;
if ( ptr == NULL )
break ;
}
}
int hwmonHostClass::add_host ( node_inv_type & inv )
{
int rc = FAIL ;
struct hwmonHostClass::hwmon_host * host_ptr = static_cast<struct hwmon_host *>(NULL);
if (( inv.name.empty()) ||
( !inv.name.compare (NONE)) ||
( !inv.name.compare ("None")))
{
wlog ("Refusing to add host with 'null' or 'invalid' hostname (%s)\n",
inv.uuid.c_str());
return (FAIL_INVALID_HOSTNAME) ;
}
rc = hostBase.add_host ( inv );
if ( rc > RETRY )
{
elog ("Error\n");
}
host_ptr = hwmonHostClass::getHost(inv.name);
if ( host_ptr )
{
if ( host_ptr->host_delete == true )
{
ilog ("%s cannot be added while previous delete is still in progress\n", host_ptr->hostname.c_str());
return (FAIL_OPERATION);
}
dlog ("%s already provisioned\n", host_ptr->hostname.c_str());
/* Send back a retry in case the add needs to be converted to a modify */
return (RETRY);
}
/* Otherwise add it as a new host */
else
{
host_ptr = hwmonHostClass::addHost(inv.name);
if ( host_ptr )
{
/* Add board management stuff */
host_ptr->bm_ip = inv.bm_ip ;
host_ptr->bm_un = inv.bm_un ;
host_ptr->bm_type = inv.bm_type ;
/* default the socket number to closed */
host_ptr->ping_info.sock = 0 ;
host_ptr->quanta_server= false ;
ipmi_bmc_data_init ( host_ptr );
/* Default audit interval to zero - disable sensor monitoring by default */
host_ptr->interval = 0 ;
host_ptr->interval_old = 0 ;
host_ptr->interval_changed = false ;
host_ptr->accounting_ok = false ;
host_ptr->accounting_bad_count = 0 ;
/* Additions for sensor monitoring using IPMI protocol */
host_ptr->want_degrade_audit = false ;
host_ptr->degrade_audit_log_throttle = 0 ;
host_ptr->json_ipmi_sensors.clear();
/* Sensor Monitoring Control Structure */
host_ptr->monitor_ctrl.stage = HWMON_SENSOR_MONITOR__START ;
host_ptr->monitor_ctrl.last_sample_time = 0 ;
host_ptr->monitor_ctrl.this_sample_time = 0 ;
host_ptr->sensor_query_count = 0 ;
/* Sensor Monitoring Thread 'Extra Request Information' */
host_ptr->empty_secret_log_throttle = 0 ;
host_ptr->thread_extra_info.bm_ip = host_ptr->bm_ip ;
host_ptr->thread_extra_info.bm_un = host_ptr->bm_un ;
host_ptr->thread_extra_info.bm_pw.clear() ;
host_ptr->thread_extra_info.sensor_query_request = IPMITOOL_PATH_AND_FILENAME ;
/* Sensor Monitoring Thread Initialization */
thread_init ( host_ptr->ipmitool_thread_ctrl,
host_ptr->ipmitool_thread_info,
&host_ptr->thread_extra_info,
hwmonThread_ipmitool,
DEFAULT_THREAD_TIMEOUT_SECS,
host_ptr->hostname,
THREAD_NAME__IPMITOOL);
/* TODO: create a is_bm_info_valid */
if ( ( hostUtil_is_valid_ip_addr (host_ptr->bm_ip) == true ) &&
( hostUtil_is_valid_bm_type (host_ptr->bm_type) == true ) &&
( !host_ptr->bm_un.empty() ) &&
( host_ptr->bm_un.compare(NONE)) )
{
set_bm_prov ( host_ptr, true );
}
else
{
set_bm_prov ( host_ptr, false );
}
ilog ("%s BMC is %sprovisioned\n", host_ptr->hostname.c_str(), host_ptr->bm_provisioned ? "" : "not " );
host_ptr->bmc_fw_version.clear();
host_ptr->group_index = 0 ;
/* Init sensor model relearn controls, state and status */
host_ptr->relearn = false ;
host_ptr->relearn_request = false ;
host_ptr->relearn_retry_counter = 0 ;
host_ptr->relearn_done_date.clear();
init_model_attributes ( host_ptr->model_attributes_preserved );
/* Add to the end of inventory */
hostlist.push_back ( host_ptr->hostname );
rc = PASS ;
dlog ("%s running add FSM\n", inv.name.c_str());
}
else
{
elog ("%s host service add failed\n", inv.name.c_str());
rc = FAIL_NULL_POINTER ;
}
}
return (rc);
}
int hwmonHostClass::rem_host ( string hostname )
{
int rc = FAIL ;
if ( ! hostname.empty() )
{
/* Remove the hostBase */
rc = hostBase.rem_host ( hostname );
if ( rc == PASS )
{
rc = hwmonHostClass::remHost ( hostname );
}
else
{
hwmonHostClass::remHost ( hostname );
slog ("potential memory leak !\n");
}
/* Now remove the service specific component */
hostlist.remove ( hostname );
}
return ( rc );
}
int hwmonHostClass::request_del_host ( string hostname )
{
int rc = FAIL_DEL_UNKNOWN ;
hwmonHostClass::hwmon_host * host_ptr = hwmonHostClass::getHost( hostname );
if ( host_ptr )
{
if ( host_ptr->host_delete == true )
{
ilog ("%s delete already in progress\n", hostname.c_str());
}
else
{
host_ptr->delStage = HWMON_DEL__START ;
host_ptr->host_delete = true ;
}
rc = PASS ;
}
else
{
wlog ("Unknown hostname: %s\n", hostname.c_str());
}
return (rc);
}
int hwmonHostClass::del_host ( string hostname )
{
int rc = FAIL_DEL_UNKNOWN ;
hwmonHostClass::hwmon_host * hwmon_host_ptr = hwmonHostClass::getHost( hostname );
if ( hwmon_host_ptr )
{
rc = rem_host ( hostname );
if ( rc == PASS )
{
ilog ("%s deleted\n", hostname.c_str());
print_node_info();
}
else
{
elog ("%s delete host failed (rc:%d)\n", hostname.c_str(), rc );
}
}
else
{
wlog ("Unknown hostname: %s\n", hostname.c_str());
}
return (rc);
}
int hwmonHostClass::mon_host ( string hostname, bool monitor )
{
int rc = FAIL_UNKNOWN_HOSTNAME ;
hwmonHostClass::hwmon_host * hwmon_host_ptr = hwmonHostClass::getHost( hostname );
if ( hwmon_host_ptr )
{
bool change = false ;
string want_state = "" ;
if ( monitor == true )
want_state = "enabled" ;
else
want_state = "disabled" ;
/* if not provisioned then just return */
if ( hwmon_host_ptr->bm_provisioned == false )
{
dlog ("%s ignoring monitor '%s' request for unprovisioned bmc\n",
hostname.c_str(), want_state.c_str());
return (PASS);
}
else if ( hwmon_host_ptr->host_delete == true )
{
dlog ("%s ignoring monitor '%s' request while delete is pending\n",
hostname.c_str(), want_state.c_str() );
return (PASS);
}
if (( monitor == false ) &&
( hwmon_host_ptr->monitor != monitor ) &&
( hwmon_host_ptr->bm_provisioned == true ))
{
clear_bm_assertions ( hwmon_host_ptr );
}
if ( hwmon_host_ptr->monitor == monitor )
{
dlog ("%s sensor monitoring already %s\n", hwmon_host_ptr->hostname.c_str(), monitor ? "enabled" : "disabled" );
/* if any group is not in the correct enabled state then set change bool */
for ( int g = 0 ; g < hwmon_host_ptr->groups ; ++g )
{
if ( hwmon_host_ptr->group[g].group_state.compare(want_state) )
{
change = true ;
}
}
}
else
{
ilog ("%s sensor monitoring set to %s\n", hwmon_host_ptr->hostname.c_str(), monitor ? "enabled" : "disabled" );
change = true ;
hwmon_host_ptr->monitor = monitor ;
}
if ( change == true )
{
if ( monitor == false )
{
/* sets all groups state to disable if monitor is false ; handle state change failure alarming internally */
rc = ipmi_set_group_state ( hwmon_host_ptr, "disabled" );
}
else if ( hwmon_host_ptr->group[0].group_state.compare("disabled") == 0 )
{
/* or to enabled if presently disabled - don't change from failed to enabled over a monitor start */
rc = ipmi_set_group_state ( hwmon_host_ptr, "enabled" );
}
}
}
else
{
dlog ("Unknown hostname: %s\n", hostname.c_str());
}
return (rc);
}
/****************************************************************************/
/** Host Class Setter / Getters */
/****************************************************************************/
bool hwmonHostClass::is_bm_provisioned ( string hostname )
{
hwmonHostClass::hwmon_host * hwmon_host_ptr ;
hwmon_host_ptr = hwmonHostClass::getHost ( hostname );
if ( hwmon_host_ptr != NULL )
{
return (hwmon_host_ptr->bm_provisioned);
}
elog ("%s lookup failed\n", hostname.c_str() );
return (false);
}
/** Get this hosts board management IP address */
string hwmonHostClass::get_bm_ip ( string hostname )
{
hwmonHostClass::hwmon_host * hwmon_host_ptr ;
hwmon_host_ptr = hwmonHostClass::getHost ( hostname );
if ( hwmon_host_ptr != NULL )
{
if ( hostUtil_is_valid_ip_addr (hwmon_host_ptr->bm_ip) == false )
{
return (NONE);
}
else
{
return (hwmon_host_ptr->bm_ip);
}
}
elog ("%s bm ip lookup failed\n", hostname.c_str() );
return ("");
}
/** Get this hosts board management TYPE ilo3/ilo4/quanta/etc */
string hwmonHostClass::get_bm_type ( string hostname )
{
hwmonHostClass::hwmon_host * hwmon_host_ptr ;
hwmon_host_ptr = hwmonHostClass::getHost ( hostname );
if ( hwmon_host_ptr != NULL )
{
return (hwmon_host_ptr->bm_type);
}
elog ("%s bm type lookup failed\n", hostname.c_str() );
return ("");
}
/** Get this hosts board management user name */
string hwmonHostClass::get_bm_un ( string hostname )
{
hwmonHostClass::hwmon_host * hwmon_host_ptr ;
hwmon_host_ptr = hwmonHostClass::getHost ( hostname );
if ( hwmon_host_ptr != NULL )
{
if ( hwmon_host_ptr->bm_un.empty() )
{
return (NONE);
}
else
{
return (hwmon_host_ptr->bm_un);
}
}
elog ("%s bm username lookup failed\n", hostname.c_str() );
return ("");
}
string hwmonHostClass::get_relearn_done_date ( string hostname )
{
hwmonHostClass::hwmon_host * hwmon_host_ptr ;
hwmon_host_ptr = hwmonHostClass::getHost ( hostname );
if ( hwmon_host_ptr != NULL )
{
if ( !hwmon_host_ptr->relearn_done_date.empty())
{
return (hwmon_host_ptr->relearn_done_date);
}
}
elog ("%s relearn done date empty or hostname lookup failed\n", hostname.c_str());
return (pt());
}
struct hwmonHostClass::hwmon_host * hwmonHostClass::getHost_timer ( timer_t tid )
{
/* check for empty list condition */
if (( hwmon_head ) && ( tid ))
{
for ( struct hwmon_host * host_ptr = hwmon_head ; ; host_ptr = host_ptr->next )
{
if ( host_ptr->ipmitool_thread_ctrl.timer.tid == tid )
{
return host_ptr ;
}
if ( host_ptr->hostTimer.tid == tid )
{
return host_ptr ;
}
if ( host_ptr->secretTimer.tid == tid )
{
return host_ptr ;
}
if ( host_ptr->ping_info.timer.tid == tid )
{
return host_ptr ;
}
if ( host_ptr->monitor_ctrl.timer.tid == tid )
{
return host_ptr ;
}
if ( host_ptr->addTimer.tid == tid )
{
return host_ptr ;
}
if ( host_ptr->relearnTimer.tid == tid )
{
return host_ptr ;
}
if (( host_ptr->next == NULL ) || ( host_ptr == hwmon_tail ))
break ;
}
}
return static_cast<struct hwmon_host *>(NULL);
}
/**********************************************************************************
*
* Name : get_sensor
*
* Description : Update the supplied pointer with the host sensor
* that matches the supplied sensor name.
*
* Updates : sensor_ptr is set if found, otherwise a NULL is returned
*
**********************************************************************************/
sensor_type * hwmonHostClass::get_sensor ( string hostname, string entity_path )
{
int rc = FAIL_NOT_FOUND ;
if ( entity_path.empty() )
rc = FAIL_STRING_EMPTY ;
else
{
hwmonHostClass::hwmon_host * host_ptr ;
host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr != NULL )
{
for ( int i = 0 ; i < host_ptr->sensors ; i++ )
{
if ( !entity_path.compare(host_ptr->sensor[i].sensorname))
{
blog ("%s '%s' sensor found\n",
hostname.c_str(),
host_ptr->sensor[i].sensorname.c_str());
return (&host_ptr->sensor[i]) ;
}
}
}
}
if ( rc == FAIL_NOT_FOUND )
{
wlog ("%s '%s' entity path not found\n", hostname.c_str() , entity_path.c_str());
}
else if ( rc )
{
elog ("%s sensor entity path query failed\n", hostname.c_str() );
}
return (static_cast<sensor_type*>(NULL));
}
int hwmonHostClass::add_sensor ( string hostname, sensor_type & sensor )
{
int rc = PASS ;
if ( sensor.sensorname.empty() )
return (FAIL_STRING_EMPTY);
else
{
hwmonHostClass::hwmon_host * host_ptr ;
host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr != NULL )
{
int i ;
bool found = false ;
for ( i = 0 ; i < host_ptr->sensors ; i++ )
{
if ( !sensor.entity_path.compare(host_ptr->sensor[i].sensorname))
{
found = true ;
break ;
}
}
if ( i >= MAX_HOST_SENSORS )
{
rc = FAIL ;
}
else
{
/* PATCHBACK - to REL3 and earlier
* This init should have been initialized here all along */
hwmonSensor_init ( hostname, &host_ptr->sensor[i] );
host_ptr->sensor[i].sensorname = sensor.sensorname ; /* for fresh add case */
host_ptr->sensor[i].sensortype = sensor.sensortype ;
host_ptr->sensor[i].script = sensor.script ;
host_ptr->sensor[i].uuid = sensor.uuid ;
host_ptr->sensor[i].datatype = sensor.datatype ;
host_ptr->sensor[i].group_uuid = sensor.group_uuid;
host_ptr->sensor[i].host_uuid = sensor.host_uuid ;
host_ptr->sensor[i].algorithm = sensor.algorithm ;
host_ptr->sensor[i].group_uuid = sensor.group_uuid;
host_ptr->sensor[i].status = sensor.status ;
host_ptr->sensor[i].state = sensor.state ;
host_ptr->sensor[i].prot = sensor.prot ;
host_ptr->sensor[i].kind = sensor.kind ;
host_ptr->sensor[i].unit = sensor.unit ;
host_ptr->sensor[i].suppress = sensor.suppress ;
host_ptr->sensor[i].path = sensor.path ;
if ( sensor.path.empty() )
{
host_ptr->sensor[i].entity_path = sensor.sensorname ;
}
else
{
host_ptr->sensor[i].entity_path = sensor.path ;
host_ptr->sensor[i].entity_path.append(ENTITY_DELIMITER);
host_ptr->sensor[i].entity_path.append(sensor.sensorname);
}
host_ptr->sensor[i].unit_base = sensor.unit_base ;
host_ptr->sensor[i].unit_rate = sensor.unit_rate ;
host_ptr->sensor[i].unit_modifier = sensor.unit_modifier ;
host_ptr->sensor[i].actions_minor = sensor.actions_minor ;
host_ptr->sensor[i].actions_major = sensor.actions_major ;
host_ptr->sensor[i].actions_critl = sensor.actions_critl ;
host_ptr->sensor[i].t_critical_lower = sensor.t_critical_lower ;
host_ptr->sensor[i].t_major_lower = sensor.t_major_lower ;
host_ptr->sensor[i].t_minor_lower = sensor.t_minor_lower ;
host_ptr->sensor[i].t_minor_upper = sensor.t_minor_upper ;
host_ptr->sensor[i].t_major_upper = sensor.t_major_upper ;
host_ptr->sensor[i].t_critical_upper = sensor.t_critical_upper ;
if ( found == false )
host_ptr->sensors++ ;
}
}
}
if ( rc )
{
elog ("%s '%s' sensor add failed\n", hostname.c_str(),
sensor.sensorname.c_str());
}
return (rc);
}
/****************************************************************************
*
* Name: hwmon_get_sensorgroup
*
* Description: Returns a pointer to the sensor group that matches the supplied
* entity path.
*
****************************************************************************/
struct sensor_group_type * hwmonHostClass::hwmon_get_sensorgroup ( string hostname, string entity_path )
{
int rc = FAIL_NOT_FOUND ;
if ( ( !entity_path.empty() ) && ( !hostname.empty()) )
{
hwmonHostClass::hwmon_host * host_ptr ;
host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr != NULL )
{
for ( int g = 0 ; g < host_ptr->groups ; g++ )
{
/* look for the sensor in the group */
for ( int s = 0 ; s < host_ptr->group[g].sensors ; s++ )
{
if ( !host_ptr->group[g].sensor_ptr[s]->sensorname.compare(entity_path) )
{
blog ("%s '%s' sensor found in '%s' group\n",
hostname.c_str(),
host_ptr->group[g].sensor_ptr[s]->sensorname.c_str(),
host_ptr->group[g].group_name.c_str());
return (&host_ptr->group[g]);
}
}
}
}
else
{
rc = FAIL_HOSTNAME_LOOKUP ;
elog ("%s hostname lookup failed\n", hostname.c_str() );
}
}
else
{
rc = FAIL_STRING_EMPTY ;
slog ("%s empty hostname or entity path '%s' string\n", hostname.c_str(), entity_path.c_str() );
}
if ( rc == FAIL_NOT_FOUND )
{
slog ("%s '%s' entity path not found in any group\n", hostname.c_str() , entity_path.c_str());
}
return (static_cast<struct sensor_group_type*>(NULL));
}
/**********************************************************************************
*
* Name : hwmon_get_group
*
* Description : Returns a pointer to the sensor group that matches the supplied
* group name.
*
**********************************************************************************/
struct sensor_group_type * hwmonHostClass::hwmon_get_group ( string hostname, string group_name )
{
int rc = FAIL_NOT_FOUND ;
if ( ( !group_name.empty() ) && ( !hostname.empty()) )
{
hwmonHostClass::hwmon_host * host_ptr ;
host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr != NULL )
{
for ( int i = 0 ; i < host_ptr->groups ; i++ )
{
if ( !group_name.compare(host_ptr->group[i].group_name))
{
blog ("%s '%s' sensor group found\n",
hostname.c_str(),
host_ptr->group[i].group_name.c_str());
return (&host_ptr->group[i]) ;
}
}
}
}
if ( rc == FAIL_NOT_FOUND )
{
wlog ("%s '%s' sensor group not found\n", hostname.c_str() , group_name.c_str());
}
else if ( rc )
{
elog ("%s sensor group query failed\n", hostname.c_str() );
}
return (static_cast<struct sensor_group_type*>(NULL));
}
/* Add a sensor group to a host */
int hwmonHostClass::hwmon_add_group ( string hostname, struct sensor_group_type & group )
{
int rc = PASS ;
if ( group.group_name.empty() )
return (FAIL_STRING_EMPTY);
else
{
hwmonHostClass::hwmon_host * host_ptr ;
host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr != NULL )
{
int i ;
bool found = false ;
for ( i = 0 ; i < host_ptr->groups ; i++ )
{
if ( !group.group_name.compare(host_ptr->group[i].group_name))
{
found = true ;
break ;
}
}
if ( i >= MAX_HOST_GROUPS )
{
rc = FAIL ;
}
else
{
host_ptr->group[i].failed = false ;
host_ptr->group[i].host_uuid = group.host_uuid ;
host_ptr->group[i].group_name = group.group_name ; /* for fresh add case */
host_ptr->group[i].group_uuid = group.group_uuid ;
host_ptr->group[i].hostname = hostname ;
host_ptr->interval_changed = true ;
host_ptr->group[i].group_interval = group.group_interval ;
host_ptr->group[i].sensortype = group.sensortype ;
host_ptr->group[i].datatype = group.datatype ;
host_ptr->group[i].algorithm = group.algorithm ;
host_ptr->group[i].group_state = group.group_state ;
host_ptr->group[i].suppress = group.suppress ;
host_ptr->group[i].path = group.path ;
host_ptr->group[i].unit_base_group = group.unit_base_group ;
host_ptr->group[i].unit_rate_group = group.unit_rate_group ;
host_ptr->group[i].unit_modifier_group = group.unit_modifier_group ;
host_ptr->group[i].actions_minor_choices = group.actions_minor_choices ;
host_ptr->group[i].actions_major_choices = group.actions_major_choices ;
host_ptr->group[i].actions_critical_choices = group.actions_critical_choices ;
host_ptr->group[i].actions_minor_group = group.actions_minor_group ;
host_ptr->group[i].actions_major_group = group.actions_major_group ;
host_ptr->group[i].actions_critl_group = group.actions_critl_group ;
host_ptr->group[i].t_critical_lower_group = group.t_critical_lower_group ;
host_ptr->group[i].t_critical_upper_group = group.t_critical_upper_group ;
host_ptr->group[i].t_major_lower_group = group.t_major_lower_group ;
host_ptr->group[i].t_major_upper_group = group.t_major_upper_group ;
host_ptr->group[i].t_minor_lower_group = group.t_minor_lower_group ;
host_ptr->group[i].t_minor_upper_group = group.t_minor_upper_group ;
/* Default the read index to the first sensor in this group.
* This member is only used when we are reading group sensors individually */
host_ptr->group[i].sensor_read_index = 0 ;
blog ("%s '%s' sensor group added\n", host_ptr->hostname.c_str(), host_ptr->group[i].group_name.c_str() );
if ( found == false )
host_ptr->groups++ ;
}
}
}
if ( rc )
{
elog ("%s '%s' sensor group add failed\n", hostname.c_str(),
group.group_name.c_str());
}
return (rc);
}
/****************************************************************************
*
* Name: add_group_uuid
*
* Description: Adds the sysinv supplied group uuid to hwmon for
* the specified group/host.
*
****************************************************************************/
int hwmonHostClass::add_group_uuid ( string & hostname, string & group_name, string & uuid )
{
int rc = FAIL_NOT_FOUND ;
if ( ( !group_name.empty() ) && ( !hostname.empty()) )
{
hwmonHostClass::hwmon_host * host_ptr ;
host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr != NULL )
{
for ( int i = 0 ; i < host_ptr->groups ; i++ )
{
if ( !group_name.compare(host_ptr->group[i].group_name))
{
blog1 ("%s '%s' sensor group found\n",
hostname.c_str(),
host_ptr->group[i].group_name.c_str());
host_ptr->group[i].group_uuid = uuid ;
rc = PASS ;
break ;
}
}
}
}
if ( rc == FAIL_NOT_FOUND )
{
wlog ("%s '%s' sensor group not found\n", hostname.c_str() , group_name.c_str());
}
return (rc);
}
/****************************************************************************
*
* Name: add_sensor_uuid
*
* Description: Adds the sysinv supplied sensor uuid to hwmon for
* the specified sensor/host.
*
****************************************************************************/
int hwmonHostClass::add_sensor_uuid ( string & hostname, string & sensorname, string & uuid )
{
int rc = FAIL_NOT_FOUND ;
if ( ( !sensorname.empty() ) && ( !hostname.empty()) )
{
hwmonHostClass::hwmon_host * host_ptr ;
host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr != NULL )
{
for ( int i = 0 ; i < host_ptr->sensors ; i++ )
{
if ( !sensorname.compare(host_ptr->sensor[i].sensorname))
{
blog1 ("%s '%s' sensor found\n",
hostname.c_str(),
host_ptr->sensor[i].sensorname.c_str());
host_ptr->sensor[i].uuid = uuid ;
rc = PASS ;
break ;
}
}
}
}
if ( rc == FAIL_NOT_FOUND )
{
wlog ("%s '%s' sensor not found\n", hostname.c_str() , sensorname.c_str());
}
return (rc);
}
/*****************************************************************************
*
* Name : hwmon_del_groups
*
* Description: Delete all the groups from the specified host in hwmon
*
* Purpose : In support of group reprovisioning
*
*****************************************************************************/
int hwmonHostClass::hwmon_del_groups ( struct hwmonHostClass::hwmon_host * host_ptr )
{
int rc = PASS ;
for ( int g = 0 ; g < host_ptr->groups ; g++ )
{
hwmonGroup_init ( host_ptr->hostname , &host_ptr->group[g] );
}
host_ptr->groups = 0 ;
return (rc);
}
/*****************************************************************************
*
* Name : hwmon_del_sensors
*
* Description: Delete all the sensors from the specified host in hwmon
*
* Purpose : In support of sensor reprovisioning
*
*****************************************************************************/
int hwmonHostClass::hwmon_del_sensors ( struct hwmonHostClass::hwmon_host * host_ptr )
{
int rc = PASS ;
host_ptr->quanta_server = false ;
for ( int s = 0 ; s < host_ptr->sensors ; s++ )
{
hwmonSensor_init ( host_ptr->hostname, &host_ptr->sensor[s] );
}
/* these are the sample data transient lists */
for ( int i = 0 ; i < (MAX_HOST_SENSORS-1) ; i++ )
{
sensor_data_init ( host_ptr->sample[i] );
}
host_ptr->sensors =
host_ptr->samples =
host_ptr->profile_sensor_checksum =
host_ptr->sample_sensor_checksum =
host_ptr->last_sample_sensor_checksum = 0 ;
return (rc);
}
/* look up a host name from a host uuid */
string hwmonHostClass::get_hostname ( string uuid )
{
if ( !uuid.empty() )
{
string hostname = hostBase.get_hostname ( uuid ) ;
if ( !hostname.empty() )
{
dlog ("%s is hostname for uuid:%s\n", hostname.c_str(), uuid.c_str());
return (hostname);
}
}
wlog ("hostname not found (uuid:%s)\n", uuid.c_str());
return ("");
}
/*************************************************************************
*
* Sensor Model Attributes Saving and Restoring Support Utilities
*
*************************************************************************/
void init_model_attributes ( model_attr_type & attr )
{
attr.interval = HWMON_DEFAULT_AUDIT_INTERVAL ;
for ( int i = 0 ; i < MAX_HOST_GROUPS ; i++ )
{
attr.group_actions[i].name = HWMON_GROUP_NAME__NULL ;
attr.group_actions[i].minor = HWMON_ACTION_IGNORE ;
attr.group_actions[i].major = HWMON_ACTION_LOG ;
attr.group_actions[i].critl = HWMON_ACTION_ALARM ;
}
attr.groups = 0 ;
}
/*****************************************************************************
*
* Name : save_model_attributes
*
* Description: Save key sensor group settings.
*
* - severity level group_actions
* - audit interval
*
*****************************************************************************/
void hwmonHostClass::save_model_attributes ( struct hwmonHostClass::hwmon_host * host_ptr )
{
init_model_attributes ( host_ptr->model_attributes_preserved );
if ( host_ptr->groups )
{
for ( int g = 0 ; g < host_ptr->groups ; g++ )
{
host_ptr->model_attributes_preserved.group_actions[g].name = host_ptr->group[g].group_name ;
host_ptr->model_attributes_preserved.group_actions[g].minor = host_ptr->group[g].actions_minor_group ;
host_ptr->model_attributes_preserved.group_actions[g].major = host_ptr->group[g].actions_major_group ;
host_ptr->model_attributes_preserved.group_actions[g].critl = host_ptr->group[g].actions_critl_group ;
}
host_ptr->model_attributes_preserved.interval = host_ptr->interval ;
host_ptr->model_attributes_preserved.groups = host_ptr->groups ;
}
}
/******************************************************************************
*
* Name : restore_group_actions
*
* Description: Copy saved severity level group action into the matching
* sensor group (name).
*
*****************************************************************************/
void hwmonHostClass::restore_group_actions ( struct hwmonHostClass::hwmon_host * host_ptr,
struct sensor_group_type * group_ptr )
{
if ( ( host_ptr ) && ( group_ptr ) && ( host_ptr->model_attributes_preserved.groups ) )
{
for ( int i = 0 ; i < host_ptr->model_attributes_preserved.groups ; i++ )
{
/* look for a matching group name and restore the settings for that group */
if ( group_ptr->group_name == host_ptr->model_attributes_preserved.group_actions[i].name )
{
ilog ("%s %s group match\n", host_ptr->hostname.c_str(), group_ptr->group_name.c_str());
if ( group_ptr->actions_minor_group != host_ptr->model_attributes_preserved.group_actions[i].minor )
{
group_ptr->actions_minor_group = host_ptr->model_attributes_preserved.group_actions[i].minor ;
ilog ("%s %s group 'minor' action restored to '%s'\n",
host_ptr->hostname.c_str(),
group_ptr->group_name.c_str(),
group_ptr->actions_minor_group.c_str());
}
if ( group_ptr->actions_major_group != host_ptr->model_attributes_preserved.group_actions[i].major )
{
group_ptr->actions_major_group = host_ptr->model_attributes_preserved.group_actions[i].major ;
ilog ("%s %s group 'major' action restored to '%s'\n",
host_ptr->hostname.c_str(),
group_ptr->group_name.c_str(),
group_ptr->actions_major_group.c_str());
}
if ( group_ptr->actions_critl_group != host_ptr->model_attributes_preserved.group_actions[i].critl )
{
group_ptr->actions_critl_group = host_ptr->model_attributes_preserved.group_actions[i].critl ;
ilog ("%s %s group 'critical' action restored to '%s'\n",
host_ptr->hostname.c_str(),
group_ptr->group_name.c_str(),
group_ptr->actions_critl_group.c_str());
}
/* don't need to look anymore */
return ;
}
}
}
}
/*****************************************************************************
*
* Name : ipmi_sensor_model_learn
*
* Description: Setup hwmon for a sesor model relearn.
* Relearn is a background operation.
* Generates warning log if requested while already in progress.
*
*****************************************************************************/
int hwmonHostClass::ipmi_learn_sensor_model ( string uuid )
{
/* check for empty list condition */
if ( hwmon_head == NULL )
{
elog ("no provisioned hosts\n");
return FAIL_HOSTNAME_LOOKUP ;
}
else if ( hostUtil_is_valid_uuid ( uuid ) == false )
{
elog ("invalid host uuid:%s\n",
uuid.empty() ? "empty" : uuid.c_str());
return FAIL_INVALID_UUID ;
}
for ( struct hwmon_host * ptr = hwmon_head ; ; ptr = ptr->next )
{
string hostname = hostBase.get_hostname ( uuid ) ;
if ( hostname == ptr->hostname )
{
int rc ;
if ( ptr->relearn == true )
{
wlog ("%s sensor model relearn already in progress\n",
ptr->hostname.c_str());
wlog ("%s ... projected completion time: %s\n",
ptr->hostname.c_str(),
ptr->relearn_done_date.c_str());
rc = RETRY ;
}
else
{
ilog ("%s sensor model relearn request accepted\n",
ptr->hostname.c_str());
ptr->bmc_fw_version.clear();
ptr->relearn_request = true ;
ptr->relearn_retry_counter = 0 ;
rc = PASS ;
}
return rc ;
}
if (( ptr->next == NULL ) || ( ptr == hwmon_tail ))
break ;
}
elog ("hostname lookup failed for uuid:%s\n", uuid.c_str());
return FAIL_HOSTNAME_LOOKUP ;
}
/*********************************************************************************
*
* Name : manage_sensor_state
*
* Purpose : manage sensor that change events
*
* Description: Manages sensor failures in the following way
*
* 1. if the sensor is suppressed then check to see if it is already alarmed
* and if so clear that alarm. Send degrade clear message to mtce if this is
* the only sensor that is degraded.
*
* 2. if the sensor is already failed then
* - see if its severity level has changed
* - if the new level is to not alarm then clear the alarm.
* - if the new level is alarm then raise the correct alarm level
*
* 3. if the severity action is to alarm then raise the alarm
*
* Assumptions: sensor status in the database is managed by the caller
*
* Parameters:
*
* hostname - the host that is affected.
* sensor - the sensor that is affected
* severity - any of sensor_severity_enum types
*
**********************************************************************************/
int hwmonHostClass::manage_sensor_state ( string & hostname, sensor_type * sensor_ptr, sensor_severity_enum severity )
{
int rc = FAIL_UNKNOWN_HOSTNAME ;
hwmonHostClass::hwmon_host * host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr )
{
string reason = REASON_OOT ;
bool ignore_action = false ;
bool log_action = false ;
bool clear_alarm = false ;
bool clear_degrade = false ;
bool clear_log = false ;
bool assert_alarm = false ;
bool assert_degrade = false ;
bool assert_log_minor = false ;
bool assert_log_major = false ;
bool assert_log_critical = false ;
int current_severity = HWMON_SEVERITY_GOOD ;
/* load up the severity level */
if ( !sensor_ptr->status.compare("ok") )
current_severity = HWMON_SEVERITY_GOOD ;
else if ( !sensor_ptr->status.compare("critical") )
current_severity = HWMON_SEVERITY_CRITICAL ;
else if ( !sensor_ptr->status.compare("major") )
current_severity = HWMON_SEVERITY_MAJOR ;
else if ( !sensor_ptr->status.compare("minor") )
current_severity = HWMON_SEVERITY_MINOR ;
else if ( !sensor_ptr->status.compare("offline") )
{
current_severity = HWMON_SEVERITY_GOOD ;
return (PASS);
}
else
{
slog ("%s unsupported sensor status '%s'\n", hostname.c_str(), sensor_ptr->status.c_str());
return (FAIL_BAD_STATE);
}
/* Check suppression */
if ( sensor_ptr->suppress == true )
{
reason = REASON_SUPPRESSED ;
blog ("%s '%s' sensor %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), reason.c_str());
if ( sensor_ptr->critl.logged || sensor_ptr->major.logged || sensor_ptr->minor.logged )
{
clear_log = true ;
}
if ( sensor_ptr->alarmed == true )
clear_alarm = true ;
if ( sensor_ptr->degraded == true )
clear_degrade = true ;
clear_ignored_state (sensor_ptr);
clear_logged_state (sensor_ptr);
}
/* ignore these cases if suppress is true (else if) */
else if ( severity == HWMON_SEVERITY_GOOD )
{
reason = REASON_OK ;
if ( sensor_ptr->critl.logged || sensor_ptr->major.logged || sensor_ptr->minor.logged )
{
clear_log = true ;
}
if ( sensor_ptr->alarmed == true )
{
clear_alarm = true ;
}
if ( sensor_ptr->degraded == true )
{
clear_degrade = true ;
}
clear_ignored_state (sensor_ptr);
clear_logged_state (sensor_ptr);
}
else if ( severity == HWMON_SEVERITY_MINOR )
{
if ( sensor_ptr->degraded == true )
clear_degrade = true ;
if ( sensor_ptr->minor.ignored == true )
{
reason = REASON_IGNORED ;
if ( is_alarmed ( sensor_ptr ) == true )
{
clear_alarm = true ;
}
ignore_action = true ;
}
else if ( ( log_action = is_log_action ( sensor_ptr->actions_minor )) == true )
{
if ( sensor_ptr->minor.logged == false)
{
clear_logged_state ( sensor_ptr );
assert_log_minor = true ;
}
if ( sensor_ptr->alarmed == true )
{
clear_alarm = true ;
}
clear_ignored_state ( sensor_ptr );
}
else if ( sensor_ptr->alarmed == true )
{
if (( ignore_action == true ) || ( log_action == true ))
{
clear_alarm = true ;
}
else if ( current_severity != HWMON_SEVERITY_MINOR )
{
assert_alarm = true ;
}
}
else
{
assert_alarm = true ;
}
/* Minor assertions should not degrade */
if ( sensor_ptr->degraded == true )
{
clear_degraded_state ( sensor_ptr ) ;
}
}
else if ( severity == HWMON_SEVERITY_MAJOR )
{
if ( sensor_ptr->major.ignored == true )
{
reason = REASON_IGNORED ;
if ( is_alarmed ( sensor_ptr ) == true )
{
clear_alarm = true ;
}
ignore_action = true ;
if ( sensor_ptr->degraded == true )
clear_degrade = true ;
}
else if (( log_action = is_log_action ( sensor_ptr->actions_major )) == true )
{
if ( sensor_ptr->major.logged == false)
{
clear_logged_state ( sensor_ptr );
assert_log_major = true ;
}
if ( sensor_ptr->alarmed == true )
{
clear_alarm = true ;
}
clear_ignored_state ( sensor_ptr );
}
else if ( sensor_ptr->alarmed == true )
{
if (( ignore_action == true ) || ( log_action == true ))
{
clear_alarm = true ;
}
else if ( current_severity != HWMON_SEVERITY_MAJOR )
{
assert_alarm = true ;
}
}
else
{
assert_alarm = true ;
}
if ( sensor_ptr->degraded == false )
{
if (( ignore_action == true ) || ( log_action == true ))
{
; // clear_degrade = true ;
}
else
{
assert_degrade = true ;
}
}
}
else if ( severity == HWMON_SEVERITY_CRITICAL )
{
if ( sensor_ptr->critl.ignored == true )
{
reason = REASON_IGNORED ;
if ( is_alarmed ( sensor_ptr ) == true )
{
clear_alarm = true ;
}
ignore_action = true ;
if ( sensor_ptr->degraded == true )
clear_degrade = true ;
}
else if ( ( log_action = is_log_action ( sensor_ptr->actions_critl )) == true )
{
if ( sensor_ptr->critl.logged == false )
{
clear_logged_state ( sensor_ptr );
assert_log_critical = true ;
}
if ( sensor_ptr->alarmed == true )
{
clear_alarm = true ;
}
clear_ignored_state ( sensor_ptr );
}
else if ( sensor_ptr->alarmed == true )
{
if (( ignore_action == true ) || ( log_action == true ))
{
clear_alarm = true ;
}
else if ( current_severity != HWMON_SEVERITY_CRITICAL )
{
assert_alarm = true ;
}
}
else
{
assert_alarm = true ;
}
if ( sensor_ptr->degraded == false )
{
if (( ignore_action == true ) || ( log_action == true ))
{
; // clear_degrade = true ;
}
else
{
assert_degrade = true ;
}
}
}
if ( assert_degrade || clear_degrade || clear_alarm || assert_alarm )
{
ilog ("%s %-20s assert_degrade = %d severity = %x %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), assert_degrade, severity, sensor_ptr->suppress ? "suppressed" : " action " );
ilog ("%s %-20s clear_degrade = %d status = %3s minor = %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), clear_degrade , sensor_ptr->status.c_str(), sensor_ptr->actions_minor.c_str());
ilog ("%s %-20s clear_alarm = %d degraded = %3s major = %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), clear_alarm , sensor_ptr->degraded ? "Yes" : "No ", sensor_ptr->actions_major.c_str());
ilog ("%s %-20s assert_alarm = %d alarmed = %3s critl = %s\n", hostname.c_str(), sensor_ptr->sensorname.c_str(), assert_alarm , sensor_ptr->alarmed ? "Yes" : "No ", sensor_ptr->actions_critl.c_str());
}
if ( assert_log_critical || assert_log_major || assert_log_minor || clear_log )
{
ilog ("%s %s assert log [%s%s%s] %s %s\n",
hostname.c_str(),
sensor_ptr->sensorname.c_str(),
assert_log_critical ? "crit" : "",
assert_log_major ? "major" : "",
assert_log_minor ? "minor" : "",
clear_log ? "clear log" : "",
ignore_action ? "ignore" : "" );
}
/* logic error check */
if ((( assert_degrade == true ) && ( clear_degrade == true )) ||
(( assert_alarm == true ) && ( clear_alarm == true )))
{
slog ("%s conflicting degrade state or alarming calculation - favoring clear\n", hostname.c_str() );
if ( clear_alarm == true )
{
assert_alarm = false ;
}
if ( clear_degrade == true )
{
assert_degrade = false ;
}
}
/***************************************************************************
*
* TAKE THE ACTIONS NOW
*
**************************************************************************/
if ( clear_log == true )
{
hwmonLog_clear ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason );
clear_logged_state ( sensor_ptr );
}
if ( assert_log_critical )
{
clear_logged_state (sensor_ptr);
sensor_ptr->critl.logged = true ;
hwmonLog_critical ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason );
}
if ( assert_log_major )
{
clear_logged_state (sensor_ptr);
sensor_ptr->major.logged = true ;
hwmonLog_major ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason );
}
if ( assert_log_minor )
{
clear_logged_state (sensor_ptr);
sensor_ptr->minor.logged = true ;
hwmonLog_minor ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason );
}
/* handle clearing the specified alarm */
if ( clear_alarm == true )
{
hwmonAlarm_clear ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason );
clear_degraded_state ( sensor_ptr );
clear_alarmed_state ( sensor_ptr );
}
/* handle asserting the specified alarm */
else if ( assert_alarm == true )
{
clear_alarmed_state ( sensor_ptr);
if ( severity == HWMON_SEVERITY_CRITICAL )
{
hwmonAlarm_critical ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason );
set_alarmed_severity ( sensor_ptr, FM_ALARM_SEVERITY_CRITICAL );
if ( assert_degrade != true )
assert_degrade = true ;
}
else if ( severity == HWMON_SEVERITY_MAJOR )
{
hwmonAlarm_major ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason );
set_alarmed_severity ( sensor_ptr, FM_ALARM_SEVERITY_MAJOR );
if ( assert_degrade != true )
assert_degrade = true ;
}
else if ( severity == HWMON_SEVERITY_MINOR )
{
hwmonAlarm_minor ( hostname, HWMON_ALARM_ID__SENSOR, sensor_ptr->sensorname, reason );
set_alarmed_severity ( sensor_ptr, FM_ALARM_SEVERITY_MINOR );
}
/* NEW */
clear_logged_state ( sensor_ptr );
clear_ignored_state ( sensor_ptr );
}
/* handle sending a degrade clear request to mtcAgent */
if ( clear_degrade == true )
{
clear_degraded_state ( sensor_ptr );
}
/* handle sending a degrade request to mtcAgent */
else if ( assert_degrade == true )
{
set_degraded_state ( sensor_ptr );
}
}
else
{
wlog ("%s Unknown Host\n", hostname.c_str());
}
sensorState_print ( hostname, sensor_ptr );
return (rc);
}
/*****************************************************************************
*
* Name : audit_interval_change
*
* Description: Set a host specific flag indicating that the sensor monitoring
* audit interval for this host has changed.
*
* The actual interval change is handled in the add handler.
*
* This API is used during group load from the database when the
* default host_ptr->interval is zero or groups have differing
* values.
*
*****************************************************************************/
void hwmonHostClass::audit_interval_change ( string hostname )
{
if ( !hostname.empty())
{
hwmon_host * host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr != NULL )
{
/* handle refreshing sysinv at base level to avoid deadlock */
host_ptr->interval_changed = true ;
}
}
}
/*****************************************************************************
*
* Name : modify_audit_interval
*
* Description: Changes the host_ptr->interval to the specified value and
* sets the 'interval_changed' flag indicating that the sensor
* monitoring audit interval for this host has changed.
*
* The actual interval change is handled in the DELAY stage of the
* ipmi_sensor_monitor.
*
* This API is called by http group modify handler to trigger
* change of the sensor audit interval to a specific value.
*
*****************************************************************************/
void hwmonHostClass::modify_audit_interval ( string hostname , int interval )
{
if ( !hostname.empty())
{
hwmonHostClass::hwmon_host * host_ptr ;
host_ptr = hwmonHostClass::getHost ( hostname );
if ( host_ptr != NULL )
{
if ( host_ptr->interval != interval )
{
host_ptr->interval_old = host_ptr->interval ;
host_ptr->interval = interval ;
/* handle popping this new value to hwmon groups
* and sysinv database at base level to avoid deadlock */
host_ptr->interval_changed = true ;
}
}
}
}
/* log sensor data to a tmp file to assis debug of sensor read issues */
void hwmonHostClass::log_sensor_data ( struct hwmonHostClass::hwmon_host * host_ptr, string & sensorname, string from, string to )
{
string sensor_datafile = IPMITOOL_OUTPUT_DIR ;
sensor_datafile.append(host_ptr->hostname);
sensor_datafile.append(IPMITOOL_SENSOR_OUTPUT_FILE_SUFFIX);
string debugfile = "/tmp/" ;
debugfile.append(host_ptr->hostname);
debugfile.append(IPMITOOL_SENSOR_OUTPUT_FILE_SUFFIX);
debugfile.append("_debug");
string source = pt() ;
source.append (" - ");
source.append (sensorname);
source.append (" from '");
source.append (from );
source.append ("' to '");
source.append (to );
source.append ("'\n");
daemon_log ( debugfile.data(), source.data());
daemon_log ( debugfile.data(), host_ptr->ipmitool_thread_info.data.data());
daemon_log ( debugfile.data(), daemon_read_file ( sensor_datafile.data()).data());
daemon_log ( debugfile.data(), "---------------------------------------------------------------------\n");
}
void hwmonHostClass::print_node_info ( void )
{
fflush (stdout);
fflush (stderr);
}
void hwmonHostClass::mem_log_info ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s has %d sensor(s) across %d sensor group(s)\n",
hwmon_host_ptr->hostname.c_str(),
hwmon_host_ptr->sensors,
hwmon_host_ptr->groups );
mem_log (str);
}
void hwmonHostClass::mem_log_options ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tMonitoring: %s Provisioned: %s Connected: %s Count: %d\n",
hwmon_host_ptr->hostname.c_str(),
hwmon_host_ptr->monitor ? "YES" : "no" ,
hwmon_host_ptr->bm_provisioned ? "YES" : "no",
hwmon_host_ptr->connected ? "YES" : "no",
hwmon_host_ptr->sensor_query_count);
mem_log (str);
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tMon Gates : GroupIndex:%d Groups:%d Sensors:%d\n",
hwmon_host_ptr->hostname.c_str(),
hwmon_host_ptr->group_index,
hwmon_host_ptr->groups,
hwmon_host_ptr->sensors );
mem_log (str);
}
void hwmonHostClass::mem_log_bm ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tbm_ip:%s bm_un:%s bm_type:%s\n",
hwmon_host_ptr->hostname.c_str(),
hwmon_host_ptr->bm_ip.c_str(),
hwmon_host_ptr->bm_un.c_str(),
hwmon_host_ptr->bm_type.c_str());
mem_log (str);
}
void hwmonHostClass::mem_log_threads ( struct hwmonHostClass::hwmon_host * hwmon_host_ptr)
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tThread Stage:%d Runs:%d Progress:%d Ctrl Status:%d Thread Status:%d\n",
hwmon_host_ptr->hostname.c_str(),
hwmon_host_ptr->ipmitool_thread_ctrl.stage,
hwmon_host_ptr->ipmitool_thread_ctrl.runcount,
hwmon_host_ptr->ipmitool_thread_info.progress,
hwmon_host_ptr->ipmitool_thread_ctrl.status,
hwmon_host_ptr->ipmitool_thread_info.status);
mem_log (str);
}
void hwmonHostClass::check_accounting ( struct hwmonHostClass::hwmon_host * host_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
int count = 0 ;
for ( int g = 0 ; g < host_ptr->groups ; ++g )
{
for ( int s = 0 ; s < host_ptr->group[g].sensors ; ++s )
{
count++ ;
}
}
if ( count == host_ptr->sensors )
host_ptr->accounting_ok = true ;
else
host_ptr->accounting_ok = false ;
snprintf ( &str[0], MAX_MEM_LOG_DATA, "SENSOR: Accounting is %s (%d:%d)", host_ptr->accounting_ok ? "GOOD" : "BAD", host_ptr->sensors, count );
mem_log (str);
}
void hwmonHostClass::mem_log_groups ( struct hwmonHostClass::hwmon_host * host_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
for ( int i = 0 ; i < host_ptr->groups ; i++ )
{
/* Don't dump sensor group info if there are no sensors in it */
if ( !host_ptr->group[i].sensors )
continue ;
snprintf (&str[0], MAX_MEM_LOG_DATA, " ");
mem_log (str);
snprintf (&str[0], MAX_MEM_LOG_DATA, "GROUP : %03d secs %s %s %s uuid:%s\n",
host_ptr->group[i].group_interval,
host_ptr->group[i].group_name.c_str(),
host_ptr->group[i].group_state.c_str(),
host_ptr->group[i].suppress ? "suppressed" : "",
host_ptr->group[i].group_uuid.c_str());
mem_log (str);
snprintf (&str[0], MAX_MEM_LOG_DATA, " Actions: [minor:%s][%s] [major:%s][%s] [crit:%s][%s]\n\n",
host_ptr->group[i].actions_minor_group.c_str(),
host_ptr->group[i].actions_minor_choices.c_str(),
host_ptr->group[i].actions_major_group.c_str(),
host_ptr->group[i].actions_major_choices.c_str(),
host_ptr->group[i].actions_critl_group.c_str(),
host_ptr->group[i].actions_critical_choices.c_str());
mem_log (str);
#ifdef WANT_UNIT_MEMLOG_INFO /* not used presently */
snprintf (&str[0], MAX_MEM_LOG_DATA, " > Info : algorithm: %s - unit [base:%s] [rate:%s] [modifier:%s]\n",
host_ptr->group[i].algorithm.c_str(),
host_ptr->group[i].unit_base_group.c_str(),
host_ptr->group[i].unit_rate_group.c_str(),
host_ptr->group[i].unit_modifier_group.c_str());
mem_log (str);
#endif
#ifdef WANT_THRESHOLD_MEMLOG_INFO /* not used presently */
snprintf (&str[0], MAX_MEM_LOG_DATA, " > Threshold: Lcrit - Lmajor - Lminor | Uminor - Umajor - Ucrit\n");
mem_log (str);
snprintf (&str[0], MAX_MEM_LOG_DATA, " > %5.3f - %6.3f - %6.3f | %6.3f - %6.3f - %6.3f\n",
host_ptr->group[i].t_critical_lower_group, host_ptr->group[i].t_major_lower_group ,
host_ptr->group[i].t_minor_lower_group, host_ptr->group[i].t_minor_upper_group ,
host_ptr->group[i].t_major_upper_group, host_ptr->group[i].t_critical_upper_group);
mem_log (str);
#endif
if ( host_ptr->accounting_ok == true )
{
for ( int s = 0 ; s < host_ptr->group[i].sensors ; s++ )
{
sensor_type * sensor_ptr = host_ptr->group[i].sensor_ptr[s] ;
snprintf ( &str[0], MAX_MEM_LOG_DATA, "SENSOR: %-20s %-20s %8s-%-8s sev:%-8s [minor:%-6s major:%-6s crit:%-6s] [alarmed:%c%c%c] [ignored:%c%c%c] [logged:%c%c%c] %s:%s %s%s%s\n",
host_ptr->group[i].group_name.c_str(),
sensor_ptr->sensorname.c_str(),
sensor_ptr->state.c_str(),
sensor_ptr->status.c_str(),
get_severity(sensor_ptr->severity).c_str(),
sensor_ptr->actions_minor.c_str(),
sensor_ptr->actions_major.c_str(),
sensor_ptr->actions_critl.c_str(),
sensor_ptr->minor.alarmed ? 'Y' : '.',
sensor_ptr->major.alarmed ? 'Y' : '.',
sensor_ptr->critl.alarmed ? 'Y' : '.',
sensor_ptr->minor.ignored ? 'Y' : '.',
sensor_ptr->major.ignored ? 'Y' : '.',
sensor_ptr->critl.ignored ? 'Y' : '.',
sensor_ptr->minor.logged ? 'Y' : '.',
sensor_ptr->major.logged ? 'Y' : '.',
sensor_ptr->critl.logged ? 'Y' : '.',
sensor_ptr->uuid.c_str(),
sensor_ptr->group_uuid.substr(0,8).c_str(),
sensor_ptr->degraded ? "degraded " : "",
sensor_ptr->alarmed ? "alarmed " : "",
sensor_ptr->suppress ? "suppressed " : "");
mem_log (str);
}
}
else
{
string sensor_list = "" ;
bool first = true ;
bool done = false ;
for ( int x = 0 ; x < host_ptr->group[i].sensors ; x++ )
{
sensor_type * sensor_ptr = host_ptr->group[i].sensor_ptr[x] ;
sensor_list.append(sensor_ptr->sensorname);
if ( x < host_ptr->group[i].sensors - 1 )
sensor_list.append(", ");
if ( x == host_ptr->group[i].sensors - 1 )
{
done = true ;
}
if ((( x % 8 == 0 ) & ( x != 0 )) || ( done == true ))
{
if ( first == true )
{
snprintf (&str[0], MAX_MEM_LOG_DATA, " SENSORS:%02d: %s\n", host_ptr->group[i].sensors, sensor_list.c_str() );
mem_log (str);
first = false ;
}
else
{
snprintf (&str[0], MAX_MEM_LOG_DATA, " %s\n", sensor_list.c_str() );
mem_log (str);
}
sensor_list = " " ;
}
if ( done == true ) break ;
}
}
}
}
void hwmonHostClass::memDumpNodeState ( string hostname )
{
hwmonHostClass::hwmon_host* hwmon_host_ptr ;
hwmon_host_ptr = hwmonHostClass::getHost ( hostname );
if ( hwmon_host_ptr == NULL )
{
mem_log ( hostname, ": ", "Not Found in hwmonHostClass\n" );
return ;
}
else
{
mem_log_options ( hwmon_host_ptr );
hwmonHostClass::hostBase.memDumpNodeState ( hostname );
mem_log_info ( hwmon_host_ptr );
mem_log_bm ( hwmon_host_ptr );
mem_log_threads ( hwmon_host_ptr );
check_accounting( hwmon_host_ptr );
mem_log_groups ( hwmon_host_ptr );
}
}
void hwmonHostClass::memDumpAllState ( void )
{
struct hwmon_host * ptr = hwmon_head ;
if ( hwmon_head == NULL ) return ;
hwmonHostClass::hostBase.memLogDelimit ();
/* walk the node list looking for nodes that should be monitored */
for ( int i = 0 ; i < hosts ; i++ )
{
memDumpNodeState ( ptr->hostname );
hwmonHostClass::hostBase.memLogDelimit ();
ptr = ptr->next ;
if ( ptr == NULL )
break ;
}
}
void hwmonHostClass::sensorState_print_debug ( struct hwmonHostClass::hwmon_host * host_ptr, string sensorname, string proc, int line )
{
/* loop over all the sensors handling their current severity */
for ( int i = 0 ; i < host_ptr->sensors ; i++ )
{
sensor_type * ptr = &host_ptr->sensor[i] ;
if ( ptr->sensorname.compare(sensorname) == 0 )
{
plog ("Location: %s %d\n", proc.c_str(), line );
sensorState_print ( host_ptr->hostname, ptr );
break ;
}
}
}