Merge "Collectd+InfluxDb-RMON Replacement(ALL METRICS) P1"

This commit is contained in:
Zuul 2018-07-03 17:02:34 +00:00 committed by Gerrit Code Review
commit 4a4c540a3c
14 changed files with 192 additions and 219 deletions

View File

@ -421,11 +421,6 @@ install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-restart %{buildroot}/%{lo
install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-start %{buildroot}/%{local_sbindir}/pmon-start install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-start %{buildroot}/%{local_sbindir}/pmon-start
install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-stop %{buildroot}/%{local_sbindir}/pmon-stop install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-stop %{buildroot}/%{local_sbindir}/pmon-stop
# test tools
install -m 755 %{_buildsubdir}/hwmon/scripts/show_hp360 %{buildroot}/%{_sbindir}/show_hp360
install -m 755 %{_buildsubdir}/hwmon/scripts/show_hp380 %{buildroot}/%{_sbindir}/show_hp380
install -m 755 %{_buildsubdir}/hwmon/scripts/show_quanta %{buildroot}/%{_sbindir}/show_quanta
# init script files # init script files
install -m 755 -p -D %{_buildsubdir}/scripts/mtcClient %{buildroot}%{_sysconfdir}/init.d/mtcClient install -m 755 -p -D %{_buildsubdir}/scripts/mtcClient %{buildroot}%{_sysconfdir}/init.d/mtcClient
install -m 755 -p -D %{_buildsubdir}/scripts/hbsClient %{buildroot}%{_sysconfdir}/init.d/hbsClient install -m 755 -p -D %{_buildsubdir}/scripts/hbsClient %{buildroot}%{_sysconfdir}/init.d/hbsClient
@ -498,9 +493,6 @@ install -m 755 -d %{buildroot}%{_sysconfdir}/rmonapi.d
install -m 755 -d %{buildroot}%{_sysconfdir}/rmonfiles.d install -m 755 -d %{buildroot}%{_sysconfdir}/rmonfiles.d
install -m 755 -d %{buildroot}%{_sysconfdir}/rmon_interfaces.d install -m 755 -d %{buildroot}%{_sysconfdir}/rmon_interfaces.d
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/remotelogging_resource.conf %{buildroot}%{local_etc_rmond}/remotelogging_resource.conf install -m 644 -p -D %{_buildsubdir}/rmon/scripts/remotelogging_resource.conf %{buildroot}%{local_etc_rmond}/remotelogging_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cpu_resource.conf %{buildroot}%{local_etc_rmond}/cpu_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/memory_resource.conf %{buildroot}%{local_etc_rmond}/memory_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/filesystem_resource.conf %{buildroot}%{local_etc_rmond}/filesystem_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cinder_virtual_resource.conf %{buildroot}%{local_etc_rmond}/cinder_virtual_resource.conf install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cinder_virtual_resource.conf %{buildroot}%{local_etc_rmond}/cinder_virtual_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/nova_virtual_resource.conf %{buildroot}%{local_etc_rmond}/nova_virtual_resource.conf install -m 644 -p -D %{_buildsubdir}/rmon/scripts/nova_virtual_resource.conf %{buildroot}%{local_etc_rmond}/nova_virtual_resource.conf
install -m 644 -p -D %{_buildsubdir}/rmon/scripts/oam_resource.conf %{buildroot}%{_sysconfdir}/rmon_interfaces.d/oam_resource.conf install -m 644 -p -D %{_buildsubdir}/rmon/scripts/oam_resource.conf %{buildroot}%{_sysconfdir}/rmon_interfaces.d/oam_resource.conf
@ -676,10 +668,7 @@ install -m 755 -d %{buildroot}/var/run
%{local_etc_logrotated}/rmon.logrotate %{local_etc_logrotated}/rmon.logrotate
%{_unitdir}/rmon.service %{_unitdir}/rmon.service
%{local_etc_rmond}/filesystem_resource.conf
%{local_etc_rmond}/cpu_resource.conf
%{local_etc_rmond}/remotelogging_resource.conf %{local_etc_rmond}/remotelogging_resource.conf
%{local_etc_rmond}/memory_resource.conf
%{local_etc_rmond}/cinder_virtual_resource.conf %{local_etc_rmond}/cinder_virtual_resource.conf
%{local_etc_rmond}/nova_virtual_resource.conf %{local_etc_rmond}/nova_virtual_resource.conf
@ -713,10 +702,6 @@ install -m 755 -d %{buildroot}/var/run
%{local_etc_logrotated}/hwmon.logrotate %{local_etc_logrotated}/hwmon.logrotate
%{ocf_resourced}/platform/hwmon %{ocf_resourced}/platform/hwmon
%{_sbindir}/show_hp380
%{_sbindir}/show_hp360
%{_sbindir}/show_quanta
%{_sysconfdir}/init.d/hwmon %{_sysconfdir}/init.d/hwmon
%{local_bindir}/hwmond %{local_bindir}/hwmond

View File

@ -255,12 +255,38 @@ const char * get_mtcNodeCommand_str ( int cmd )
} }
void print_mtc_message ( string hostname, int direction, mtc_message_type & msg , const char * iface, bool force ) void print_mtc_message ( string hostname,
int direction,
mtc_message_type & msg,
const char * iface,
bool force )
{ {
/* Handle raw json string messages differently.
* Those messages just have a json string that starts at the header */
if ( msg.hdr[0] == '{' )
{
if ( force )
{
ilog ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
else
{
mlog1 ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
return ;
}
string str = "-" ; string str = "-" ;
if ( msg.buf[0] ) if ( msg.buf[0] )
str = msg.buf ; str = msg.buf ;
if ( force ) if ( force )
{ {
ilog ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n", ilog ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n",

View File

@ -92,6 +92,9 @@ void daemon_exit ( void );
#define NODE_HEALTHY (1) #define NODE_HEALTHY (1)
#define NODE_UNHEALTHY (2) #define NODE_UNHEALTHY (2)
#define AUTO_RECOVERY_FILE_SUFFIX ((const char *)"_ar_count")
#define TMP_DIR_PATH ((const char *)"/etc/mtc/tmp/")
#define HOST_IS_VIRTUAL ((const char *)"/var/run/virtual.host") #define HOST_IS_VIRTUAL ((const char *)"/var/run/virtual.host")
/** Configuration Pass/Fail Flag File */ /** Configuration Pass/Fail Flag File */
@ -146,10 +149,6 @@ void daemon_exit ( void );
#define BM_DNSMASQ_FILENAME ((const char *)"dnsmasq.bmc_hosts") #define BM_DNSMASQ_FILENAME ((const char *)"dnsmasq.bmc_hosts")
/* Added for Centos */
#define CENTOS_RELEASE_FILE ((const char *)"/etc/centos-release")
#define SYSTEMD_SERVICE_FILE_DIR ((const char *)"/usr/lib/systemd/system")
#define THREAD_NAME__IPMITOOL ((const char *)("ipmitool")) #define THREAD_NAME__IPMITOOL ((const char *)("ipmitool"))
#define IPMITOOL_PATH_AND_FILENAME ((const char *)("/usr/bin/ipmitool")) #define IPMITOOL_PATH_AND_FILENAME ((const char *)("/usr/bin/ipmitool"))
@ -970,7 +969,7 @@ string get_configStages_str ( mtc_configStages_enum stage );
#define DEGRADE_MASK_SUBF 0x00000100 #define DEGRADE_MASK_SUBF 0x00000100
#define DEGRADE_MASK_SM 0x00000200 #define DEGRADE_MASK_SM 0x00000200
#define DEGRADE_MASK_CONFIG 0x00000400 #define DEGRADE_MASK_CONFIG 0x00000400
#define DEGRADE_MASK_RES2 0x00000800 #define DEGRADE_MASK_COLLECTD 0x00000800
#define DEGRADE_MASK_ENABLE 0x00001000 #define DEGRADE_MASK_ENABLE 0x00001000
#define DEGRADE_MASK_RES4 0x00002000 #define DEGRADE_MASK_RES4 0x00002000
#define DEGRADE_MASK_RES5 0x00004000 #define DEGRADE_MASK_RES5 0x00004000

View File

@ -662,8 +662,8 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
ptr->log_throttle = 0 ; ptr->log_throttle = 0 ;
ptr->no_work_log_throttle = 0 ; ptr->no_work_log_throttle = 0 ;
/* Clear the degrade control structs */ ptr->degrade_mask = ptr->degrade_mask_save = DEGRADE_MASK_NONE ;
ptr->degrade_mask = DEGRADE_MASK_NONE ;
ptr->degraded_resources_list.clear () ; ptr->degraded_resources_list.clear () ;
ptr->pmond_ready = false ; ptr->pmond_ready = false ;
ptr->rmond_ready = false ; ptr->rmond_ready = false ;
@ -4561,16 +4561,6 @@ void nodeLinkClass::manage_heartbeat_degrade ( string hostname, iface_enum iface
} }
hbs_minor_clear ( node_ptr, iface ); hbs_minor_clear ( node_ptr, iface );
/* Set the host available if the degrade mask is now
* cleared and we are degraded */
if ( node_ptr->degrade_mask == 0 )
{
if ( get_availStatus ( hostname ) == MTC_AVAIL_STATUS__DEGRADED )
{
set_availStatus ( hostname, MTC_AVAIL_STATUS__AVAILABLE );
}
}
} }
else if ( this->mtcTimer_dor.tid ) else if ( this->mtcTimer_dor.tid )
{ {
@ -4602,12 +4592,6 @@ void nodeLinkClass::manage_heartbeat_degrade ( string hostname, iface_enum iface
node_ptr->degrade_mask |= DEGRADE_MASK_HEARTBEAT_INFRA ; node_ptr->degrade_mask |= DEGRADE_MASK_HEARTBEAT_INFRA ;
} }
} }
/* No point in changing if we are already degraded */
if ( nodeLinkClass::get_availStatus ( hostname ) == MTC_AVAIL_STATUS__AVAILABLE )
{
set_availStatus ( hostname, MTC_AVAIL_STATUS__DEGRADED );
}
} }
} }
} }
@ -4621,7 +4605,7 @@ void nodeLinkClass::manage_heartbeat_minor ( string hostname, iface_enum iface,
wlog ("%s Unknown host\n", hostname.c_str()); wlog ("%s Unknown host\n", hostname.c_str());
return ; return ;
} }
/* is this a clear event ? */ /* is this a clear event ? */
if ( clear_event == true ) if ( clear_event == true )
{ {
@ -4639,15 +4623,15 @@ void nodeLinkClass::manage_heartbeat_minor ( string hostname, iface_enum iface,
else if ( node_ptr->hbs_minor[iface] != true ) else if ( node_ptr->hbs_minor[iface] != true )
{ {
mnfa_add_host ( node_ptr, iface ); mnfa_add_host ( node_ptr, iface );
} }
} }
} }
/** Interface to declare that a key service on the /** Interface to declare that a key service on the
* specified host is up, running and ready */ * specified host is up, running and ready */
int nodeLinkClass::declare_service_ready ( string & hostname, int nodeLinkClass::declare_service_ready ( string & hostname,
unsigned int service ) unsigned int service )
{ {
nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname );
@ -4661,18 +4645,11 @@ int nodeLinkClass::declare_service_ready ( string & hostname,
node_ptr->pmond_ready = true ; node_ptr->pmond_ready = true ;
plog ("%s got pmond ready event\n", hostname.c_str()); plog ("%s got pmond ready event\n", hostname.c_str());
/* A ready event means that pmond pocess has started. /* A ready event means that pmond pocess has started.
* Any previous history is gone. Cleanup mtce. * Any previous history is gone. Cleanup mtce.
* If there are still process issues on this host then * If there are still process issues on this host then
* they will be reported again.*/ * they will be reported again.*/
node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ; node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ;
if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE )
{
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
}
return (PASS); return (PASS);
} }
else if ( service == MTC_SERVICE_HWMOND ) else if ( service == MTC_SERVICE_HWMOND )
@ -4719,14 +4696,6 @@ int nodeLinkClass::degrade_pmond_clear ( string & hostname )
if ( node_ptr->degrade_mask ) if ( node_ptr->degrade_mask )
{ {
node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ; node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ;
if ( !node_ptr->degrade_mask )
{
if ( node_ptr->operState == MTC_OPER_STATE__ENABLED )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
}
} }
/* The only detectable inservice failures are process failures */ /* The only detectable inservice failures are process failures */
@ -4735,15 +4704,65 @@ int nodeLinkClass::degrade_pmond_clear ( string & hostname )
return (PASS); return (PASS);
} }
/* This private API handles event messages from collectd */
int nodeLinkClass::collectd_notify_handler ( string & hostname,
string & resource,
string & state )
{
int rc = PASS ;
nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr == NULL )
{
wlog ("%s Unknown Host\n", hostname.c_str());
return (FAIL_UNKNOWN_HOSTNAME) ;
}
if ( state == "clear" )
{
if ( node_ptr->degrade_mask & DEGRADE_MASK_COLLECTD )
{
ilog("%s collectd degrade state change ; assert -> clear (%s)",
hostname.c_str(), resource.c_str());
node_ptr->degrade_mask &= ~DEGRADE_MASK_COLLECTD ;
}
else
{
mlog3("%s collectd degrade 'clear' request (%s)",
hostname.c_str(), resource.c_str());
}
}
else if ( state == "assert" )
{
if ( (node_ptr->degrade_mask & DEGRADE_MASK_COLLECTD) == 0 )
{
ilog("%s collectd degrade state change ; clear -> assert (due to %s)",
hostname.c_str(), resource.c_str());
node_ptr->degrade_mask |= DEGRADE_MASK_COLLECTD ;
}
else
{
mlog3("%s collectd degrade 'assert' request (%s)",
hostname.c_str(), resource.c_str());
}
}
else
{
wlog ("%s collectd degrade state unknown (%s)\n",
hostname.c_str(),
state.c_str());
rc = FAIL_OPERATION ;
}
return (rc);
}
/** Resource Monitor 'Clear' Event handler. /** Resource Monitor 'Clear' Event handler.
* *
* The resource specified will be removed from the * The resource specified will be removed from the
* 'degraded_resources_list' for specified host. * 'degraded_resources_list' for specified host.
* if there are no other degraded resources or other * if there are no other degraded resources or other
* degraded services/reasons against that host then * degraded services/reasons against that host then
* this handler will clear the degrade state for the * this handler will clear the degrade state for the
* specified host all together. */ * specified host all together. */
int nodeLinkClass::degrade_resource_clear ( string & hostname, int nodeLinkClass::degrade_resource_clear ( string & hostname,
string & resource ) string & resource )
{ {
/* lr - Log Prefix Rmon */ /* lr - Log Prefix Rmon */
@ -4788,18 +4807,6 @@ int nodeLinkClass::degrade_resource_clear ( string & hostname,
if ( node_ptr->degraded_resources_list.empty() ) if ( node_ptr->degraded_resources_list.empty() )
{ {
node_ptr->degrade_mask &= ~DEGRADE_MASK_RESMON ; ; node_ptr->degrade_mask &= ~DEGRADE_MASK_RESMON ; ;
if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE )
{
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
}
else
{
wlog ("%s Remains Degraded - Reason Mask:0x%08x\n",
hostname.c_str(), node_ptr->degrade_mask );
}
} }
else else
{ {
@ -4874,30 +4881,6 @@ int nodeLinkClass::node_degrade_control ( string & hostname, int state, string s
/* clear the mask regardless of host state */ /* clear the mask regardless of host state */
node_ptr->degrade_mask &= ~service_flag ; node_ptr->degrade_mask &= ~service_flag ;
/* only applies if host is unlocked-enabled-degraded and
* there are no other degrade flags in the degrade mask */
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))
{
if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
else
{
/* TODO: convert lask to a sring or services and print that string */
wlog ("%s remains degraded - degrade mask:0x%08x\n",
hostname.c_str(),
node_ptr->degrade_mask );
}
}
else
{
dlog ("%s unexpected degrade clear for '%s' service\n",
hostname.c_str(), service.c_str() );
}
rc = PASS ; rc = PASS ;
break ; break ;
} }
@ -4910,13 +4893,6 @@ int nodeLinkClass::node_degrade_control ( string & hostname, int state, string s
wlog ("%s degrade 'assert' from '%s'\n", hostname.c_str(), service.c_str() ); wlog ("%s degrade 'assert' from '%s'\n", hostname.c_str(), service.c_str() );
node_ptr->degrade_mask |= service_flag ; node_ptr->degrade_mask |= service_flag ;
} }
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
rc = PASS ; rc = PASS ;
break ; break ;
} }
@ -5232,10 +5208,6 @@ int nodeLinkClass::degrade_process_raise ( string & hostname,
{ {
node_ptr->degrade_mask |= DEGRADE_MASK_PMON ; node_ptr->degrade_mask |= DEGRADE_MASK_PMON ;
wlog ("%s is degraded due to '%s' process failure\n", hostname.c_str(), process.c_str()); wlog ("%s is degraded due to '%s' process failure\n", hostname.c_str(), process.c_str());
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
} }
} }
return (PASS); return (PASS);
@ -5412,11 +5384,6 @@ int nodeLinkClass::degrade_resource_raise ( string & hostname,
{ {
dlog ("%s '%s' Degraded (again)\n", lr.c_str(), resource.c_str()); dlog ("%s '%s' Degraded (again)\n", lr.c_str(), resource.c_str());
} }
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
} }
return (PASS); return (PASS);
} }
@ -7039,9 +7006,6 @@ struct nodeLinkClass::node * nodeLinkClass::get_insvTestTimer ( timer_t tid )
* *
*****************************************************************************/ *****************************************************************************/
#define TMP_DIR_PATH ((const char *)"/etc/mtc/tmp/")
#define AUTO_RECOVERY_FILE_SUFFIX ((const char *)"_ar_count")
void autorecovery_clear ( string hostname ) void autorecovery_clear ( string hostname )
{ {
string ar_file = TMP_DIR_PATH + hostname + AUTO_RECOVERY_FILE_SUFFIX ; string ar_file = TMP_DIR_PATH + hostname + AUTO_RECOVERY_FILE_SUFFIX ;

View File

@ -585,6 +585,7 @@ private:
/* Bit mask of degrade reasons */ /* Bit mask of degrade reasons */
unsigned int degrade_mask ; unsigned int degrade_mask ;
unsigned int degrade_mask_save ;
/** Process Monitor Daemon Flag Missing count */ /** Process Monitor Daemon Flag Missing count */
int pmon_missing_count ; int pmon_missing_count ;
@ -785,6 +786,7 @@ private:
int insv_test_handler ( struct nodeLinkClass::node * node_ptr ); int insv_test_handler ( struct nodeLinkClass::node * node_ptr );
int stress_handler ( struct nodeLinkClass::node * node_ptr ); int stress_handler ( struct nodeLinkClass::node * node_ptr );
int bm_handler ( struct nodeLinkClass::node * node_ptr ); int bm_handler ( struct nodeLinkClass::node * node_ptr );
int degrade_handler ( struct nodeLinkClass::node * node_ptr );
int uptime_handler ( void ); int uptime_handler ( void );
int host_services_handler ( struct nodeLinkClass::node * node_ptr ); int host_services_handler ( struct nodeLinkClass::node * node_ptr );
@ -1731,6 +1733,11 @@ public:
/** Calculates and returns the mnfa threshold based on enabled hosts */ /** Calculates and returns the mnfa threshold based on enabled hosts */
int mnfa_calculate_threshold ( string hostname ); int mnfa_calculate_threshold ( string hostname );
/* collectd event handler */
int collectd_notify_handler ( string & hostname,
string & resource,
string & state );
/***************************************** /*****************************************
** Process Monitor Event Utilities API ** ** Process Monitor Event Utilities API **
*****************************************/ *****************************************/

View File

@ -68,9 +68,9 @@ string daemon_read_file ( const char * filename );
void daemon_logfile_close ( void ); void daemon_logfile_close ( void );
void daemon_logfile_open ( void ); void daemon_logfile_open ( void );
void daemon_log ( const char * filename , const char * str ); int daemon_log ( const char * filename , const char * str );
void daemon_log_value ( const char * filename , int val ); int daemon_log_value ( const char * filename , int val );
void daemon_log_value ( const char * filename , const char * str, int val ); int daemon_log_value ( const char * filename , const char * str, int val );
/* reads the first line of a file and if it contains a string /* reads the first line of a file and if it contains a string
* that represents an integer value then return it */ * that represents an integer value then return it */

View File

@ -103,7 +103,7 @@ void daemon_healthcheck ( const char * sig )
#define BUFFER 1024 #define BUFFER 1024
void daemon_log_value ( const char * filename , const char * str, int val ) int daemon_log_value ( const char * filename , const char * str, int val )
{ {
FILE * file_stream = fopen (filename, "a" ) ; FILE * file_stream = fopen (filename, "a" ) ;
if ( file_stream != NULL ) if ( file_stream != NULL )
@ -111,10 +111,12 @@ void daemon_log_value ( const char * filename , const char * str, int val )
fprintf ( file_stream,"%s %d\n", str, val ); fprintf ( file_stream,"%s %d\n", str, val );
fflush (file_stream); fflush (file_stream);
fclose (file_stream); fclose (file_stream);
return (PASS);
} }
return (FAIL_FILE_OPEN);
} }
void daemon_log_value ( const char * filename , int val ) int daemon_log_value ( const char * filename , int val )
{ {
FILE * file_stream = fopen (filename, "w" ) ; FILE * file_stream = fopen (filename, "w" ) ;
if ( file_stream != NULL ) if ( file_stream != NULL )
@ -122,10 +124,12 @@ void daemon_log_value ( const char * filename , int val )
fprintf ( file_stream,"%d\n", val ); fprintf ( file_stream,"%d\n", val );
fflush (file_stream); fflush (file_stream);
fclose (file_stream); fclose (file_stream);
return (PASS);
} }
return (FAIL_FILE_OPEN);
} }
void daemon_log ( const char * filename , const char * str ) int daemon_log ( const char * filename , const char * str )
{ {
FILE * file_stream = fopen (filename, "a" ) ; FILE * file_stream = fopen (filename, "a" ) ;
if ( file_stream != NULL ) if ( file_stream != NULL )
@ -133,7 +137,9 @@ void daemon_log ( const char * filename , const char * str )
fprintf ( file_stream,"%s\n", str ); fprintf ( file_stream,"%s\n", str );
fflush (file_stream); fflush (file_stream);
fclose (file_stream); fclose (file_stream);
return (PASS);
} }
return (FAIL_FILE_OPEN);
} }
/* reads the first line of a file and if it contains a string /* reads the first line of a file and if it contains a string

View File

@ -191,8 +191,50 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false ); print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false );
if ( msg.hdr[0] == '{' )
{
int rc1 ;
string service ;
mlog1 ("%s\n", &msg.hdr[0] );
rc1 = jsonUtil_get_key_val(&msg.hdr[0],"service", service );
if ( rc1 == PASS )
{
if ( service == "collectd_notifier" )
{
int rc1,rc2,rc3 ;
string hostname,resource,state ;
rc1 = jsonUtil_get_key_val(&msg.hdr[0],"hostname", hostname );
rc2 = jsonUtil_get_key_val(&msg.hdr[0],"resource", resource );
rc3 = jsonUtil_get_key_val(&msg.hdr[0],"degrade", state );
if ( rc1|rc2|rc3 )
{
elog ("failed to parse '%s' message\n", service.c_str());
wlog ("... %s\n", &msg.hdr[0] );
}
else
{
obj_ptr->collectd_notify_handler ( hostname,
resource,
state );
}
}
/* future service requests */
else
{
wlog ("Unexpected service request: '%s'\n", service.c_str());
}
}
else
{
wlog("Unexpected json message: %s\n", &msg.hdr[0] );
}
}
/* Check for response messages */ /* Check for response messages */
if ( strstr ( &msg.hdr[0], get_cmd_rsp_msg_header() ) ) else if ( strstr ( &msg.hdr[0], get_cmd_rsp_msg_header() ) )
{ {
obj_ptr->set_cmd_resp ( hostname , msg ) ; obj_ptr->set_cmd_resp ( hostname , msg ) ;
} }

View File

@ -74,6 +74,9 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
/* manage the host connected state and board management alarms */ /* manage the host connected state and board management alarms */
nodeLinkClass::bm_handler ( node_ptr ); nodeLinkClass::bm_handler ( node_ptr );
/* manage host's degrade state */
nodeLinkClass::degrade_handler ( node_ptr );
/* /*
* Always run the offline handler * Always run the offline handler
* *

View File

@ -5599,15 +5599,6 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST );
} }
/* handle coming out of the ADD in a degraded state */
if (( node_ptr->degrade_mask != 0 ) &&
(( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
node_ptr->mtcAlive_gate = false ; node_ptr->mtcAlive_gate = false ;
node_ptr->addStage = MTC_ADD__DONE ; node_ptr->addStage = MTC_ADD__DONE ;
break; break;
@ -6111,22 +6102,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
{ {
alarm_compute_clear ( node_ptr, false ); alarm_compute_clear ( node_ptr, false );
} }
/************************************************************
* Manage host degrade based on degrade mask *
***********************************************************/
if (( node_ptr->degrade_mask == 0 ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
/* expected degrade audit */
else if (( node_ptr->degrade_mask ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
} }
break ; break ;
} }
@ -6461,12 +6436,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->degrade_mask |= DEGRADE_MASK_SM ; node_ptr->degrade_mask |= DEGRADE_MASK_SM ;
ilog ("%s sm degrade\n", node_ptr->hostname.c_str()); ilog ("%s sm degrade\n", node_ptr->hostname.c_str());
/* degrade the host if not already degraded */
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
} }
/* Manage de-asserting degrade due to Software Management */ /* Manage de-asserting degrade due to Software Management */
@ -6477,16 +6446,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->degrade_mask &= ~DEGRADE_MASK_SM ; node_ptr->degrade_mask &= ~DEGRADE_MASK_SM ;
ilog ("%s sm degrade clear\n", node_ptr->hostname.c_str()); ilog ("%s sm degrade clear\n", node_ptr->hostname.c_str());
/* if the degrade mask is now clear then consider clearing the degrade state */
if ( node_ptr->degrade_mask == 0 )
{
/* ... but only if we are degraded */
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
}
} }
if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY) if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY)
@ -6502,10 +6461,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
if ( node_ptr->health_threshold_counter >= MTC_UNHEALTHY_THRESHOLD ) if ( node_ptr->health_threshold_counter >= MTC_UNHEALTHY_THRESHOLD )
{ {
node_ptr->degrade_mask |= DEGRADE_MASK_CONFIG ; node_ptr->degrade_mask |= DEGRADE_MASK_CONFIG ;
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
/* threshold is reached so raise the config alarm if it is not already raised */ /* threshold is reached so raise the config alarm if it is not already raised */
if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CRITICAL ) if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CRITICAL )
@ -6554,6 +6509,30 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
return (PASS); return (PASS);
} }
/************************************************************
* Manage host degrade state based on degrade mask *
* The availability state of degrade only applies when the *
* host is unlocked-enabled. *
***********************************************************/
int nodeLinkClass::degrade_handler ( struct nodeLinkClass::node * node_ptr )
{
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ))
{
if (( node_ptr->degrade_mask == DEGRADE_MASK_NONE ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE );
}
else if (( node_ptr->degrade_mask ) &&
( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ))
{
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED );
}
}
return (PASS);
}
int nodeLinkClass::cfg_handler ( struct nodeLinkClass::node * node_ptr ) int nodeLinkClass::cfg_handler ( struct nodeLinkClass::node * node_ptr )
{ {

View File

@ -1127,6 +1127,7 @@ void read_fs_file ( vector<string> & dynamic_resources )
*****************************************************************************/ *****************************************************************************/
void add_dynamic_fs_resource ( bool send_response ) void add_dynamic_fs_resource ( bool send_response )
{ {
#ifdef WANT_FS_MONITORING
char resource[50]; char resource[50];
char temp_resource[50]; char temp_resource[50];
char device [50]; char device [50];
@ -1206,10 +1207,14 @@ void add_dynamic_fs_resource ( bool send_response )
} }
} }
} }
#endif
if (send_response) if (send_response)
{ {
#ifdef WANT_FS_MONITORING
ilog ("sending response to dynamic FS add, to the rmon client\n"); ilog ("sending response to dynamic FS add, to the rmon client\n");
#else
ilog("dynamic filesystem monitoring moved to collectd\n");
#endif
/* let the rmon client know that we are done with the file */ /* let the rmon client know that we are done with the file */
rmon_resource_response(_rmon_ctrl_ptr->clients); rmon_resource_response(_rmon_ctrl_ptr->clients);
} }
@ -4650,6 +4655,8 @@ void rmon_service (rmon_ctrl_type * ctrl_ptr)
ilog ("registered clients: %d\n", _rmon_ctrl_ptr->clients); ilog ("registered clients: %d\n", _rmon_ctrl_ptr->clients);
#ifdef WANT_FS_MONITORING
/* Initialize the resource specific configuration */ /* Initialize the resource specific configuration */
for (int j=0; j<_rmon_ctrl_ptr->resources; j++) for (int j=0; j<_rmon_ctrl_ptr->resources; j++)
{ {
@ -4669,6 +4676,9 @@ void rmon_service (rmon_ctrl_type * ctrl_ptr)
/* add any dynamic resources from before */ /* add any dynamic resources from before */
add_dynamic_fs_resource(false); add_dynamic_fs_resource(false);
#else
ilog("static filesystem monitoring moved to collectd\n");
#endif
/* Clear any stale dynamic alarms that can be caused by dynamic resources. */ /* Clear any stale dynamic alarms that can be caused by dynamic resources. */
/* An alarm become stale for example if it was raised against a local volumn group (lvg) and */ /* An alarm become stale for example if it was raised against a local volumn group (lvg) and */

View File

@ -1,16 +0,0 @@
[resource]
resource = Platform CPU Usage
debounce = 20 ; number of seconds to wait before degrade clear
severity = critical ; minor, major, critical
minor_threshold = 80 ; minor cpu utilization threshold percentage
major_threshold = 90 ; major cpu utilization threshold percentage
critical_threshold = 95 ; critical cpu utilization threshold percentage (use 101 if unused)
minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0
major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0
critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0
minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1
major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1
critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1
num_tries = 2 ; number of tries before the alarm is raised
alarm_on = 1 ; 1 for alarm on, 0 for alarm off
percent = 1 ; Always use 1 for this resource (thresholds by percentage)

View File

@ -1,16 +0,0 @@
[resource]
resource = Platform Filesystem Usage
debounce = 20 ; number of seconds to wait before degrade clear
severity = critical ; minor, major, critical
minor_threshold = 70 ; minor filesystem utilization threshold percentage
major_threshold = 80 ; major filesystem utilization threshold percentage
critical_threshold = 90 ; critical filesystem utilization threshold percentage (use 101 if unused)
minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0
major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0
critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused)
minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1
major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1
critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1
num_tries = 2 ; number of tries before the alarm is raised
alarm_on = 1 ; 1 for alarm on, 0 for alarm off
percent = 1 ; 1 for percentage used, 0 for absolute value (file system available in MiB) (default is 1)

View File

@ -1,16 +0,0 @@
[resource]
resource = Platform Memory Usage
debounce = 20 ; number of seconds to wait before degrade clear
severity = critical ; minor, major, critical
minor_threshold = 70 ; minor memory utilization threshold percentage
major_threshold = 80 ; major memory utilization threshold percentage
critical_threshold = 90 ; critical memory utilization threshold percentage (use 101 if unsed)
minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0
major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0
critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused)
minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1
major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1
critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1
num_tries = 2 ; number of tries before the alarm is raised
alarm_on = 1 ; 1 for alarm on, 0 for alarm off
percent = 1 ; 1 for percentage used, 0 for absolute value (memory available in MiB) (default is 1)