Merge "Collectd+InfluxDb-RMON Replacement(ALL METRICS) P1"

This commit is contained in:
Zuul 2018-07-03 17:02:34 +00:00 committed by Gerrit Code Review
commit 4a4c540a3c
14 changed files with 192 additions and 219 deletions

View File

@ -421,11 +421,6 @@ install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-restart %{buildroot}/%{lo
install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-start %{buildroot}/%{local_sbindir}/pmon-start
install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-stop %{buildroot}/%{local_sbindir}/pmon-stop
# test tools
install -m 755 %{_buildsubdir}/hwmon/scripts/show_hp360 %{buildroot}/%{_sbindir}/show_hp360
install -m 755 %{_buildsubdir}/hwmon/scripts/show_hp380 %{buildroot}/%{_sbindir}/show_hp380
install -m 755 %{_buildsubdir}/hwmon/scripts/show_quanta %{buildroot}/%{_sbindir}/show_quanta
# init script files
install -m 755 -p -D %{_buildsubdir}/scripts/mtcClient %{buildroot}%{_sysconfdir}/init.d/mtcClient
install -m 755 -p -D %{_buildsubdir}/scripts/hbsClient %{buildroot}%{_sysconfdir}/init.d/hbsClient
@ -498,9 +493,6 @@ install -m 755 -d %{buildroot}%{_sysconfdir}/rmonapi.d
install -m 755 -d %{buildroot}%{_sysconfdir}/rmonfiles.d
install -m 755 -d %{buildroot}%{_sysconfdir}/rmon_interfaces.d
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/remotelogging_resource.conf %{buildroot}%{local_etc_rmond}/remotelogging_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cpu_resource.conf %{buildroot}%{local_etc_rmond}/cpu_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/memory_resource.conf %{buildroot}%{local_etc_rmond}/memory_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/filesystem_resource.conf %{buildroot}%{local_etc_rmond}/filesystem_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cinder_virtual_resource.conf %{buildroot}%{local_etc_rmond}/cinder_virtual_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/nova_virtual_resource.conf %{buildroot}%{local_etc_rmond}/nova_virtual_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/oam_resource.conf %{buildroot}%{_sysconfdir}/rmon_interfaces.d/oam_resource.conf
@ -676,10 +668,7 @@ install -m 755 -d %{buildroot}/var/run
%{local_etc_logrotated}/rmon.logrotate
%{_unitdir}/rmon.service
%{local_etc_rmond}/filesystem_resource.conf
%{local_etc_rmond}/cpu_resource.conf
%{local_etc_rmond}/remotelogging_resource.conf
%{local_etc_rmond}/memory_resource.conf
%{local_etc_rmond}/cinder_virtual_resource.conf
%{local_etc_rmond}/nova_virtual_resource.conf
@ -713,10 +702,6 @@ install -m 755 -d %{buildroot}/var/run
%{local_etc_logrotated}/hwmon.logrotate
%{ocf_resourced}/platform/hwmon
%{_sbindir}/show_hp380
%{_sbindir}/show_hp360
%{_sbindir}/show_quanta
%{_sysconfdir}/init.d/hwmon
%{local_bindir}/hwmond

View File

@ -255,12 +255,38 @@ const char * get_mtcNodeCommand_str ( int cmd )
}
void print_mtc_message ( string hostname, int direction, mtc_message_type & msg , const char * iface, bool force )
void print_mtc_message ( string hostname,
int direction,
mtc_message_type & msg,
const char * iface,
bool force )
{
/* Handle raw json string messages differently.
* Those messages just have a json string that starts at the header */
if ( msg.hdr[0] == '{' )
{
if ( force )
{
ilog ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
else
{
mlog1 ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
return ;
}
string str = "-" ;
if ( msg.buf[0] )
str = msg.buf ;
if ( force )
{
ilog ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n",

View File

@ -92,6 +92,9 @@ void daemon_exit ( void );
#define NODE_HEALTHY (1)
#define NODE_UNHEALTHY (2)
#define AUTO_RECOVERY_FILE_SUFFIX ((const char *)"_ar_count")
#define TMP_DIR_PATH ((const char *)"/etc/mtc/tmp/")
#define HOST_IS_VIRTUAL ((const char *)"/var/run/virtual.host")
/** Configuration Pass/Fail Flag File */
@ -146,10 +149,6 @@ void daemon_exit ( void );
#define BM_DNSMASQ_FILENAME ((const char *)"dnsmasq.bmc_hosts")
/* Added for Centos */
#define CENTOS_RELEASE_FILE ((const char *)"/etc/centos-release")
#define SYSTEMD_SERVICE_FILE_DIR ((const char *)"/usr/lib/systemd/system")
#define THREAD_NAME__IPMITOOL ((const char *)("ipmitool"))
#define IPMITOOL_PATH_AND_FILENAME ((const char *)("/usr/bin/ipmitool"))
@ -970,7 +969,7 @@ string get_configStages_str ( mtc_configStages_enum stage );
#define DEGRADE_MASK_SUBF 0x00000100
#define DEGRADE_MASK_SM 0x00000200
#define DEGRADE_MASK_CONFIG 0x00000400
#define DEGRADE_MASK_RES2 0x00000800
#define DEGRADE_MASK_COLLECTD 0x00000800
#define DEGRADE_MASK_ENABLE 0x00001000
#define DEGRADE_MASK_RES4 0x00002000
#define DEGRADE_MASK_RES5 0x00004000

View File

@ -662,8 +662,8 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
ptr->log_throttle = 0 ;
ptr->no_work_log_throttle = 0 ;
/* Clear the degrade control structs */
ptr->degrade_mask = DEGRADE_MASK_NONE ;
ptr->degrade_mask = ptr->degrade_mask_save = DEGRADE_MASK_NONE ;
ptr->degraded_resources_list.clear () ;
ptr->pmond_ready = false ;
ptr->rmond_ready = false ;
@ -4561,16 +4561,6 @@ void nodeLinkClass::manage_heartbeat_degrade ( string hostname, iface_enum iface
}
hbs_minor_clear ( node_ptr, iface );
/* Set the host available if the degrade mask is now
* cleared and we are degraded */
if ( node_ptr->degrade_mask == 0 )
{
if ( get_availStatus ( hostname ) == MTC_AVAIL_STATUS__DEGRADED )
{
set_availStatus ( hostname, MTC_AVAIL_STATUS__AVAILABLE );
}
}
}
else if ( this->mtcTimer_dor.tid )
{
@ -4602,12 +4592,6 @@ void nodeLinkClass::manage_heartbeat_degrade ( string hostname, iface_enum iface
node_ptr->degrade_mask |= DEGRADE_MASK_HEARTBEAT_INFRA ;
}
}
/* No point in changing if we are already degraded */
if ( nodeLinkClass::get_availStatus ( hostname ) == MTC_AVAIL_STATUS__AVAILABLE )
{
set_availStatus ( hostname, MTC_AVAIL_STATUS__DEGRADED );
}
}
}
}
@ -4621,7 +4605,7 @@ void nodeLinkClass::manage_heartbeat_minor ( string hostname, iface_enum iface,
wlog ("%s Unknown host\n", hostname.c_str());
return ;
}
/* is this a clear event ? */
if ( clear_event == true )
{
@ -4639,15 +4623,15 @@ void nodeLinkClass::manage_heartbeat_minor ( string hostname, iface_enum iface,
else if ( node_ptr->hbs_minor[iface] != true )
{
mnfa_add_host ( node_ptr, iface );
mnfa_add_host ( node_ptr, iface );
}
}
}
/** Interface to declare that a key service on the
/** Interface to declare that a key service on the
* specified host is up, running and ready */
int nodeLinkClass::declare_service_ready ( string & hostname,
int nodeLinkClass::declare_service_ready ( string & hostname,
unsigned int service )
{
nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname );
@ -4661,18 +4645,11 @@ int nodeLinkClass::declare_service_ready ( string & hostname,
node_ptr->pmond_ready = true ;
plog ("%s got pmond ready event\n", hostname.c_str());
/* A ready event means that pmond pocess has started.
* Any previous history is gone. Cleanup mtce.
/* A ready event means that pmond pocess has started.
* Any previous history is gone. Cleanup mtce.
* If there are still process issues on this host then
* they will be reported again.*/
node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ;
if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE )
{
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
}
return (PASS);
}
else if ( service == MTC_SERVICE_HWMOND )
@ -4719,14 +4696,6 @@ int nodeLinkClass::degrade_pmond_clear ( string & hostname )
if ( node_ptr->degrade_mask )
{
node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ;
if ( !node_ptr->degrade_mask )
{
if ( node_ptr->operState == MTC_OPER_STATE__ENABLED )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
}
}
/* The only detectable inservice failures are process failures */
@ -4735,15 +4704,65 @@ int nodeLinkClass::degrade_pmond_clear ( string & hostname )
return (PASS);
}
/* This private API handles event messages from collectd */
int nodeLinkClass::collectd_notify_handler ( string & hostname,
string & resource,
string & state )
{
int rc = PASS ;
nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr == NULL )
{
wlog ("%s Unknown Host\n", hostname.c_str());
return (FAIL_UNKNOWN_HOSTNAME) ;
}
if ( state == "clear" )
{
if ( node_ptr->degrade_mask & DEGRADE_MASK_COLLECTD )
{
ilog("%s collectd degrade state change ; assert -> clear (%s)",
hostname.c_str(), resource.c_str());
node_ptr->degrade_mask &= ~DEGRADE_MASK_COLLECTD ;
}
else
{
mlog3("%s collectd degrade 'clear' request (%s)",
hostname.c_str(), resource.c_str());
}
}
else if ( state == "assert" )
{
if ( (node_ptr->degrade_mask & DEGRADE_MASK_COLLECTD) == 0 )
{
ilog("%s collectd degrade state change ; clear -> assert (due to %s)",
hostname.c_str(), resource.c_str());
node_ptr->degrade_mask |= DEGRADE_MASK_COLLECTD ;
}
else
{
mlog3("%s collectd degrade 'assert' request (%s)",
hostname.c_str(), resource.c_str());
}
}
else
{
wlog ("%s collectd degrade state unknown (%s)\n",
hostname.c_str(),
state.c_str());
rc = FAIL_OPERATION ;
}
return (rc);
}
/** Resource Monitor 'Clear' Event handler.
*
*
* The resource specified will be removed from the
* 'degraded_resources_list' for specified host.
* if there are no other degraded resources or other
* degraded services/reasons against that host then
* this handler will clear the degrade state for the
* specified host all together. */
int nodeLinkClass::degrade_resource_clear ( string & hostname,
int nodeLinkClass::degrade_resource_clear ( string & hostname,
string & resource )
{
/* lr - Log Prefix Rmon */
@ -4788,18 +4807,6 @@ int nodeLinkClass::degrade_resource_clear ( string & hostname,
if ( node_ptr->degraded_resources_list.empty() )
{
node_ptr->degrade_mask &= ~DEGRADE_MASK_RESMON ; ;
if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE )
{
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
}
else
{
wlog ("%s Remains Degraded - Reason Mask:0x%08x\n",
hostname.c_str(), node_ptr->degrade_mask );
}
}
else
{
@ -4874,30 +4881,6 @@ int nodeLinkClass::node_degrade_control ( string & hostname, int state, string s
/* clear the mask regardless of host state */
node_ptr->degrade_mask &= ~service_flag ;
/* only applies if host is unlocked-enabled-degraded and
* there are no other degrade flags in the degrade mask */
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))
{
if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
else
{
/* TODO: convert lask to a sring or services and print that string */
wlog ("%s remains degraded - degrade mask:0x%08x\n",
hostname.c_str(),
node_ptr->degrade_mask );
}
}
else
{
dlog ("%s unexpected degrade clear for '%s' service\n",
hostname.c_str(), service.c_str() );
}
rc = PASS ;
break ;
}
@ -4910,13 +4893,6 @@ int nodeLinkClass::node_degrade_control ( string & hostname, int state, string s
wlog ("%s degrade 'assert' from '%s'\n", hostname.c_str(), service.c_str() );
node_ptr->degrade_mask |= service_flag ;
}
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
rc = PASS ;
break ;
}
@ -5232,10 +5208,6 @@ int nodeLinkClass::degrade_process_raise ( string & hostname,
{
node_ptr->degrade_mask |= DEGRADE_MASK_PMON ;
wlog ("%s is degraded due to '%s' process failure\n", hostname.c_str(), process.c_str());
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
}
}
return (PASS);
@ -5412,11 +5384,6 @@ int nodeLinkClass::degrade_resource_raise ( string & hostname,
{
dlog ("%s '%s' Degraded (again)\n", lr.c_str(), resource.c_str());
}
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
}
return (PASS);
}
@ -7039,9 +7006,6 @@ struct nodeLinkClass::node * nodeLinkClass::get_insvTestTimer ( timer_t tid )
*
*****************************************************************************/
#define TMP_DIR_PATH ((const char *)"/etc/mtc/tmp/")
#define AUTO_RECOVERY_FILE_SUFFIX ((const char *)"_ar_count")
void autorecovery_clear ( string hostname )
{
string ar_file = TMP_DIR_PATH + hostname + AUTO_RECOVERY_FILE_SUFFIX ;

View File

@ -585,6 +585,7 @@ private:
/* Bit mask of degrade reasons */
unsigned int degrade_mask ;
unsigned int degrade_mask_save ;
/** Process Monitor Daemon Flag Missing count */
int pmon_missing_count ;
@ -785,6 +786,7 @@ private:
int insv_test_handler ( struct nodeLinkClass::node * node_ptr );
int stress_handler ( struct nodeLinkClass::node * node_ptr );
int bm_handler ( struct nodeLinkClass::node * node_ptr );
int degrade_handler ( struct nodeLinkClass::node * node_ptr );
int uptime_handler ( void );
int host_services_handler ( struct nodeLinkClass::node * node_ptr );
@ -1731,6 +1733,11 @@ public:
/** Calculates and returns the mnfa threshold based on enabled hosts */
int mnfa_calculate_threshold ( string hostname );
/* collectd event handler */
int collectd_notify_handler ( string & hostname,
string & resource,
string & state );
/*****************************************
** Process Monitor Event Utilities API **
*****************************************/

View File

@ -68,9 +68,9 @@ string daemon_read_file ( const char * filename );
void daemon_logfile_close ( void );
void daemon_logfile_open ( void );
void daemon_log ( const char * filename , const char * str );
void daemon_log_value ( const char * filename , int val );
void daemon_log_value ( const char * filename , const char * str, int val );
int daemon_log ( const char * filename , const char * str );
int daemon_log_value ( const char * filename , int val );
int daemon_log_value ( const char * filename , const char * str, int val );
/* reads the first line of a file and if it contains a string
* that represents an integer value then return it */

View File

@ -103,7 +103,7 @@ void daemon_healthcheck ( const char * sig )
#define BUFFER 1024
void daemon_log_value ( const char * filename , const char * str, int val )
int daemon_log_value ( const char * filename , const char * str, int val )
{
FILE * file_stream = fopen (filename, "a" ) ;
if ( file_stream != NULL )
@ -111,10 +111,12 @@ void daemon_log_value ( const char * filename , const char * str, int val )
fprintf ( file_stream,"%s %d\n", str, val );
fflush (file_stream);
fclose (file_stream);
return (PASS);
}
return (FAIL_FILE_OPEN);
}
void daemon_log_value ( const char * filename , int val )
int daemon_log_value ( const char * filename , int val )
{
FILE * file_stream = fopen (filename, "w" ) ;
if ( file_stream != NULL )
@ -122,10 +124,12 @@ void daemon_log_value ( const char * filename , int val )
fprintf ( file_stream,"%d\n", val );
fflush (file_stream);
fclose (file_stream);
return (PASS);
}
return (FAIL_FILE_OPEN);
}
void daemon_log ( const char * filename , const char * str )
int daemon_log ( const char * filename , const char * str )
{
FILE * file_stream = fopen (filename, "a" ) ;
if ( file_stream != NULL )
@ -133,7 +137,9 @@ void daemon_log ( const char * filename , const char * str )
fprintf ( file_stream,"%s\n", str );
fflush (file_stream);
fclose (file_stream);
return (PASS);
}
return (FAIL_FILE_OPEN);
}
/* reads the first line of a file and if it contains a string

View File

@ -191,8 +191,50 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false );
if ( msg.hdr[0] == '{' )
{
int rc1 ;
string service ;
mlog1 ("%s\n", &msg.hdr[0] );
rc1 = jsonUtil_get_key_val(&msg.hdr[0],"service", service );
if ( rc1 == PASS )
{
if ( service == "collectd_notifier" )
{
int rc1,rc2,rc3 ;
string hostname,resource,state ;
rc1 = jsonUtil_get_key_val(&msg.hdr[0],"hostname", hostname );
rc2 = jsonUtil_get_key_val(&msg.hdr[0],"resource", resource );
rc3 = jsonUtil_get_key_val(&msg.hdr[0],"degrade", state );
if ( rc1|rc2|rc3 )
{
elog ("failed to parse '%s' message\n", service.c_str());
wlog ("... %s\n", &msg.hdr[0] );
}
else
{
obj_ptr->collectd_notify_handler ( hostname,
resource,
state );
}
}
/* future service requests */
else
{
wlog ("Unexpected service request: '%s'\n", service.c_str());
}
}
else
{
wlog("Unexpected json message: %s\n", &msg.hdr[0] );
}
}
/* Check for response messages */
if ( strstr ( &msg.hdr[0], get_cmd_rsp_msg_header() ) )
else if ( strstr ( &msg.hdr[0], get_cmd_rsp_msg_header() ) )
{
obj_ptr->set_cmd_resp ( hostname , msg ) ;
}

View File

@ -74,6 +74,9 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
/* manage the host connected state and board management alarms */
nodeLinkClass::bm_handler ( node_ptr );
/* manage host's degrade state */
nodeLinkClass::degrade_handler ( node_ptr );
/*
* Always run the offline handler
*

View File

@ -5599,15 +5599,6 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST );
}
/* handle coming out of the ADD in a degraded state */
if (( node_ptr->degrade_mask != 0 ) &&
(( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
node_ptr->mtcAlive_gate = false ;
node_ptr->addStage = MTC_ADD__DONE ;
break;
@ -6111,22 +6102,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
{
alarm_compute_clear ( node_ptr, false );
}
/************************************************************
* Manage host degrade based on degrade mask *
***********************************************************/
if (( node_ptr->degrade_mask == 0 ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
/* expected degrade audit */
else if (( node_ptr->degrade_mask ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
}
break ;
}
@ -6461,12 +6436,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->degrade_mask |= DEGRADE_MASK_SM ;
ilog ("%s sm degrade\n", node_ptr->hostname.c_str());
/* degrade the host if not already degraded */
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
}
/* Manage de-asserting degrade due to Software Management */
@ -6477,16 +6446,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->degrade_mask &= ~DEGRADE_MASK_SM ;
ilog ("%s sm degrade clear\n", node_ptr->hostname.c_str());
/* if the degrade mask is now clear then consider clearing the degrade state */
if ( node_ptr->degrade_mask == 0 )
{
/* ... but only if we are degraded */
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
}
}
if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY)
@ -6502,10 +6461,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
if ( node_ptr->health_threshold_counter >= MTC_UNHEALTHY_THRESHOLD )
{
node_ptr->degrade_mask |= DEGRADE_MASK_CONFIG ;
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
/* threshold is reached so raise the config alarm if it is not already raised */
if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CRITICAL )
@ -6554,6 +6509,30 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
return (PASS);
}
/************************************************************
* Manage host degrade state based on degrade mask *
* The availability state of degrade only applies when the *
* host is unlocked-enabled. *
***********************************************************/
int nodeLinkClass::degrade_handler ( struct nodeLinkClass::node * node_ptr )
{
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ))
{
if (( node_ptr->degrade_mask == DEGRADE_MASK_NONE ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
else if (( node_ptr->degrade_mask ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
}
return (PASS);
}
int nodeLinkClass::cfg_handler ( struct nodeLinkClass::node * node_ptr )
{

View File

@ -1127,6 +1127,7 @@ void read_fs_file ( vector<string> & dynamic_resources )
*****************************************************************************/
void add_dynamic_fs_resource ( bool send_response )
{
#ifdef WANT_FS_MONITORING
char resource[50];
char temp_resource[50];
char device [50];
@ -1206,10 +1207,14 @@ void add_dynamic_fs_resource ( bool send_response )
}
}
}
#endif
if (send_response)
{
#ifdef WANT_FS_MONITORING
ilog ("sending response to dynamic FS add, to the rmon client\n");
#else
ilog("dynamic filesystem monitoring moved to collectd\n");
#endif
/* let the rmon client know that we are done with the file */
rmon_resource_response(_rmon_ctrl_ptr->clients);
}
@ -4650,6 +4655,8 @@ void rmon_service (rmon_ctrl_type * ctrl_ptr)
ilog ("registered clients: %d\n", _rmon_ctrl_ptr->clients);
#ifdef WANT_FS_MONITORING
/* Initialize the resource specific configuration */
for (int j=0; j<_rmon_ctrl_ptr->resources; j++)
{
@ -4669,6 +4676,9 @@ void rmon_service (rmon_ctrl_type * ctrl_ptr)
/* add any dynamic resources from before */
add_dynamic_fs_resource(false);
#else
ilog("static filesystem monitoring moved to collectd\n");
#endif
/* Clear any stale dynamic alarms that can be caused by dynamic resources. */
/* An alarm become stale for example if it was raised against a local volumn group (lvg) and */

View File

@ -1,16 +0,0 @@
[resource]
resource = Platform CPU Usage
debounce = 20 ; number of seconds to wait before degrade clear
severity = critical ; minor, major, critical
minor_threshold = 80 ; minor cpu utilization threshold percentage
major_threshold = 90 ; major cpu utilization threshold percentage
critical_threshold = 95 ; critical cpu utilization threshold percentage (use 101 if unused)
minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0
major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0
critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0
minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1
major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1
critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1
num_tries = 2 ; number of tries before the alarm is raised
alarm_on = 1 ; 1 for alarm on, 0 for alarm off
percent = 1 ; Always use 1 for this resource (thresholds by percentage)

View File

@ -1,16 +0,0 @@
[resource]
resource = Platform Filesystem Usage
debounce = 20 ; number of seconds to wait before degrade clear
severity = critical ; minor, major, critical
minor_threshold = 70 ; minor filesystem utilization threshold percentage
major_threshold = 80 ; major filesystem utilization threshold percentage
critical_threshold = 90 ; critical filesystem utilization threshold percentage (use 101 if unused)
minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0
major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0
critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused)
minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1
major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1
critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1
num_tries = 2 ; number of tries before the alarm is raised
alarm_on = 1 ; 1 for alarm on, 0 for alarm off
percent = 1 ; 1 for percentage used, 0 for absolute value (file system available in MiB) (default is 1)

View File

@ -1,16 +0,0 @@
[resource]
resource = Platform Memory Usage
debounce = 20 ; number of seconds to wait before degrade clear
severity = critical ; minor, major, critical
minor_threshold = 70 ; minor memory utilization threshold percentage
major_threshold = 80 ; major memory utilization threshold percentage
critical_threshold = 90 ; critical memory utilization threshold percentage (use 101 if unsed)
minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0
major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0
critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused)
minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1
major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1
critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1
num_tries = 2 ; number of tries before the alarm is raised
alarm_on = 1 ; 1 for alarm on, 0 for alarm off
percent = 1 ; 1 for percentage used, 0 for absolute value (memory available in MiB) (default is 1)