diff --git a/mtce-common/centos/cgts-mtce-common.spec b/mtce-common/centos/cgts-mtce-common.spec index 126b677a..429cfa79 100644 --- a/mtce-common/centos/cgts-mtce-common.spec +++ b/mtce-common/centos/cgts-mtce-common.spec @@ -421,11 +421,6 @@ install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-restart %{buildroot}/%{lo install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-start %{buildroot}/%{local_sbindir}/pmon-start install -m 700 -p -D %{_buildsubdir}/pmon/scripts/pmon-stop %{buildroot}/%{local_sbindir}/pmon-stop -# test tools -install -m 755 %{_buildsubdir}/hwmon/scripts/show_hp360 %{buildroot}/%{_sbindir}/show_hp360 -install -m 755 %{_buildsubdir}/hwmon/scripts/show_hp380 %{buildroot}/%{_sbindir}/show_hp380 -install -m 755 %{_buildsubdir}/hwmon/scripts/show_quanta %{buildroot}/%{_sbindir}/show_quanta - # init script files install -m 755 -p -D %{_buildsubdir}/scripts/mtcClient %{buildroot}%{_sysconfdir}/init.d/mtcClient install -m 755 -p -D %{_buildsubdir}/scripts/hbsClient %{buildroot}%{_sysconfdir}/init.d/hbsClient @@ -498,9 +493,6 @@ install -m 755 -d %{buildroot}%{_sysconfdir}/rmonapi.d install -m 755 -d %{buildroot}%{_sysconfdir}/rmonfiles.d install -m 755 -d %{buildroot}%{_sysconfdir}/rmon_interfaces.d install -m 644 -p -D %{_buildsubdir}/rmon/scripts/remotelogging_resource.conf %{buildroot}%{local_etc_rmond}/remotelogging_resource.conf -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cpu_resource.conf %{buildroot}%{local_etc_rmond}/cpu_resource.conf -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/memory_resource.conf %{buildroot}%{local_etc_rmond}/memory_resource.conf -install -m 644 -p -D %{_buildsubdir}/rmon/scripts/filesystem_resource.conf %{buildroot}%{local_etc_rmond}/filesystem_resource.conf install -m 644 -p -D %{_buildsubdir}/rmon/scripts/cinder_virtual_resource.conf %{buildroot}%{local_etc_rmond}/cinder_virtual_resource.conf install -m 644 -p -D %{_buildsubdir}/rmon/scripts/nova_virtual_resource.conf %{buildroot}%{local_etc_rmond}/nova_virtual_resource.conf install -m 644 -p -D %{_buildsubdir}/rmon/scripts/oam_resource.conf %{buildroot}%{_sysconfdir}/rmon_interfaces.d/oam_resource.conf @@ -676,10 +668,7 @@ install -m 755 -d %{buildroot}/var/run %{local_etc_logrotated}/rmon.logrotate %{_unitdir}/rmon.service -%{local_etc_rmond}/filesystem_resource.conf -%{local_etc_rmond}/cpu_resource.conf %{local_etc_rmond}/remotelogging_resource.conf -%{local_etc_rmond}/memory_resource.conf %{local_etc_rmond}/cinder_virtual_resource.conf %{local_etc_rmond}/nova_virtual_resource.conf @@ -713,10 +702,6 @@ install -m 755 -d %{buildroot}/var/run %{local_etc_logrotated}/hwmon.logrotate %{ocf_resourced}/platform/hwmon -%{_sbindir}/show_hp380 -%{_sbindir}/show_hp360 -%{_sbindir}/show_quanta - %{_sysconfdir}/init.d/hwmon %{local_bindir}/hwmond diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeBase.cpp b/mtce-common/cgts-mtce-common-1.0/common/nodeBase.cpp index c3480638..f7410580 100755 --- a/mtce-common/cgts-mtce-common-1.0/common/nodeBase.cpp +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeBase.cpp @@ -255,12 +255,38 @@ const char * get_mtcNodeCommand_str ( int cmd ) } -void print_mtc_message ( string hostname, int direction, mtc_message_type & msg , const char * iface, bool force ) +void print_mtc_message ( string hostname, + int direction, + mtc_message_type & msg, + const char * iface, + bool force ) { + /* Handle raw json string messages differently. + * Those messages just have a json string that starts at the header */ + if ( msg.hdr[0] == '{' ) + { + if ( force ) + { + ilog ("%s %s (%s network) - %s\n", + hostname.c_str(), + direction ? "rx <-" : "tx ->" , + iface, + msg.hdr); + } + else + { + mlog1 ("%s %s (%s network) - %s\n", + hostname.c_str(), + direction ? "rx <-" : "tx ->" , + iface, + msg.hdr); + } + return ; + } + string str = "-" ; if ( msg.buf[0] ) str = msg.buf ; - if ( force ) { ilog ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n", diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeBase.h b/mtce-common/cgts-mtce-common-1.0/common/nodeBase.h index 390d5ca5..332e4041 100755 --- a/mtce-common/cgts-mtce-common-1.0/common/nodeBase.h +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeBase.h @@ -92,6 +92,9 @@ void daemon_exit ( void ); #define NODE_HEALTHY (1) #define NODE_UNHEALTHY (2) +#define AUTO_RECOVERY_FILE_SUFFIX ((const char *)"_ar_count") +#define TMP_DIR_PATH ((const char *)"/etc/mtc/tmp/") + #define HOST_IS_VIRTUAL ((const char *)"/var/run/virtual.host") /** Configuration Pass/Fail Flag File */ @@ -146,10 +149,6 @@ void daemon_exit ( void ); #define BM_DNSMASQ_FILENAME ((const char *)"dnsmasq.bmc_hosts") -/* Added for Centos */ -#define CENTOS_RELEASE_FILE ((const char *)"/etc/centos-release") -#define SYSTEMD_SERVICE_FILE_DIR ((const char *)"/usr/lib/systemd/system") - #define THREAD_NAME__IPMITOOL ((const char *)("ipmitool")) #define IPMITOOL_PATH_AND_FILENAME ((const char *)("/usr/bin/ipmitool")) @@ -970,7 +969,7 @@ string get_configStages_str ( mtc_configStages_enum stage ); #define DEGRADE_MASK_SUBF 0x00000100 #define DEGRADE_MASK_SM 0x00000200 #define DEGRADE_MASK_CONFIG 0x00000400 -#define DEGRADE_MASK_RES2 0x00000800 +#define DEGRADE_MASK_COLLECTD 0x00000800 #define DEGRADE_MASK_ENABLE 0x00001000 #define DEGRADE_MASK_RES4 0x00002000 #define DEGRADE_MASK_RES5 0x00004000 diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp index 1dfc57fc..e10f2528 100755 --- a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.cpp @@ -662,8 +662,8 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) ptr->log_throttle = 0 ; ptr->no_work_log_throttle = 0 ; - /* Clear the degrade control structs */ - ptr->degrade_mask = DEGRADE_MASK_NONE ; + ptr->degrade_mask = ptr->degrade_mask_save = DEGRADE_MASK_NONE ; + ptr->degraded_resources_list.clear () ; ptr->pmond_ready = false ; ptr->rmond_ready = false ; @@ -4561,16 +4561,6 @@ void nodeLinkClass::manage_heartbeat_degrade ( string hostname, iface_enum iface } hbs_minor_clear ( node_ptr, iface ); - - /* Set the host available if the degrade mask is now - * cleared and we are degraded */ - if ( node_ptr->degrade_mask == 0 ) - { - if ( get_availStatus ( hostname ) == MTC_AVAIL_STATUS__DEGRADED ) - { - set_availStatus ( hostname, MTC_AVAIL_STATUS__AVAILABLE ); - } - } } else if ( this->mtcTimer_dor.tid ) { @@ -4602,12 +4592,6 @@ void nodeLinkClass::manage_heartbeat_degrade ( string hostname, iface_enum iface node_ptr->degrade_mask |= DEGRADE_MASK_HEARTBEAT_INFRA ; } } - - /* No point in changing if we are already degraded */ - if ( nodeLinkClass::get_availStatus ( hostname ) == MTC_AVAIL_STATUS__AVAILABLE ) - { - set_availStatus ( hostname, MTC_AVAIL_STATUS__DEGRADED ); - } } } } @@ -4621,7 +4605,7 @@ void nodeLinkClass::manage_heartbeat_minor ( string hostname, iface_enum iface, wlog ("%s Unknown host\n", hostname.c_str()); return ; } - + /* is this a clear event ? */ if ( clear_event == true ) { @@ -4639,15 +4623,15 @@ void nodeLinkClass::manage_heartbeat_minor ( string hostname, iface_enum iface, else if ( node_ptr->hbs_minor[iface] != true ) { - mnfa_add_host ( node_ptr, iface ); + mnfa_add_host ( node_ptr, iface ); } } } -/** Interface to declare that a key service on the +/** Interface to declare that a key service on the * specified host is up, running and ready */ -int nodeLinkClass::declare_service_ready ( string & hostname, +int nodeLinkClass::declare_service_ready ( string & hostname, unsigned int service ) { nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); @@ -4661,18 +4645,11 @@ int nodeLinkClass::declare_service_ready ( string & hostname, node_ptr->pmond_ready = true ; plog ("%s got pmond ready event\n", hostname.c_str()); - /* A ready event means that pmond pocess has started. - * Any previous history is gone. Cleanup mtce. + /* A ready event means that pmond pocess has started. + * Any previous history is gone. Cleanup mtce. * If there are still process issues on this host then * they will be reported again.*/ node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ; - if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE ) - { - if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); - } - } return (PASS); } else if ( service == MTC_SERVICE_HWMOND ) @@ -4719,14 +4696,6 @@ int nodeLinkClass::degrade_pmond_clear ( string & hostname ) if ( node_ptr->degrade_mask ) { node_ptr->degrade_mask &= ~DEGRADE_MASK_PMON ; - - if ( !node_ptr->degrade_mask ) - { - if ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); - } - } } /* The only detectable inservice failures are process failures */ @@ -4735,15 +4704,65 @@ int nodeLinkClass::degrade_pmond_clear ( string & hostname ) return (PASS); } +/* This private API handles event messages from collectd */ +int nodeLinkClass::collectd_notify_handler ( string & hostname, + string & resource, + string & state ) +{ + int rc = PASS ; + nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr == NULL ) + { + wlog ("%s Unknown Host\n", hostname.c_str()); + return (FAIL_UNKNOWN_HOSTNAME) ; + } + if ( state == "clear" ) + { + if ( node_ptr->degrade_mask & DEGRADE_MASK_COLLECTD ) + { + ilog("%s collectd degrade state change ; assert -> clear (%s)", + hostname.c_str(), resource.c_str()); + node_ptr->degrade_mask &= ~DEGRADE_MASK_COLLECTD ; + } + else + { + mlog3("%s collectd degrade 'clear' request (%s)", + hostname.c_str(), resource.c_str()); + } + } + else if ( state == "assert" ) + { + if ( (node_ptr->degrade_mask & DEGRADE_MASK_COLLECTD) == 0 ) + { + ilog("%s collectd degrade state change ; clear -> assert (due to %s)", + hostname.c_str(), resource.c_str()); + node_ptr->degrade_mask |= DEGRADE_MASK_COLLECTD ; + } + else + { + mlog3("%s collectd degrade 'assert' request (%s)", + hostname.c_str(), resource.c_str()); + } + } + else + { + wlog ("%s collectd degrade state unknown (%s)\n", + hostname.c_str(), + state.c_str()); + rc = FAIL_OPERATION ; + } + return (rc); +} + /** Resource Monitor 'Clear' Event handler. - * + * * The resource specified will be removed from the * 'degraded_resources_list' for specified host. * if there are no other degraded resources or other * degraded services/reasons against that host then * this handler will clear the degrade state for the * specified host all together. */ -int nodeLinkClass::degrade_resource_clear ( string & hostname, +int nodeLinkClass::degrade_resource_clear ( string & hostname, string & resource ) { /* lr - Log Prefix Rmon */ @@ -4788,18 +4807,6 @@ int nodeLinkClass::degrade_resource_clear ( string & hostname, if ( node_ptr->degraded_resources_list.empty() ) { node_ptr->degrade_mask &= ~DEGRADE_MASK_RESMON ; ; - if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE ) - { - if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); - } - } - else - { - wlog ("%s Remains Degraded - Reason Mask:0x%08x\n", - hostname.c_str(), node_ptr->degrade_mask ); - } } else { @@ -4874,30 +4881,6 @@ int nodeLinkClass::node_degrade_control ( string & hostname, int state, string s /* clear the mask regardless of host state */ node_ptr->degrade_mask &= ~service_flag ; - - /* only applies if host is unlocked-enabled-degraded and - * there are no other degrade flags in the degrade mask */ - if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && - ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && - ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) - { - if ( node_ptr->degrade_mask == DEGRADE_MASK_NONE ) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); - } - else - { - /* TODO: convert lask to a sring or services and print that string */ - wlog ("%s remains degraded - degrade mask:0x%08x\n", - hostname.c_str(), - node_ptr->degrade_mask ); - } - } - else - { - dlog ("%s unexpected degrade clear for '%s' service\n", - hostname.c_str(), service.c_str() ); - } rc = PASS ; break ; } @@ -4910,13 +4893,6 @@ int nodeLinkClass::node_degrade_control ( string & hostname, int state, string s wlog ("%s degrade 'assert' from '%s'\n", hostname.c_str(), service.c_str() ); node_ptr->degrade_mask |= service_flag ; } - - if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && - ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && - ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); - } rc = PASS ; break ; } @@ -5232,10 +5208,6 @@ int nodeLinkClass::degrade_process_raise ( string & hostname, { node_ptr->degrade_mask |= DEGRADE_MASK_PMON ; wlog ("%s is degraded due to '%s' process failure\n", hostname.c_str(), process.c_str()); - if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); - } } } return (PASS); @@ -5412,11 +5384,6 @@ int nodeLinkClass::degrade_resource_raise ( string & hostname, { dlog ("%s '%s' Degraded (again)\n", lr.c_str(), resource.c_str()); } - if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); - } - } return (PASS); } @@ -7039,9 +7006,6 @@ struct nodeLinkClass::node * nodeLinkClass::get_insvTestTimer ( timer_t tid ) * *****************************************************************************/ -#define TMP_DIR_PATH ((const char *)"/etc/mtc/tmp/") -#define AUTO_RECOVERY_FILE_SUFFIX ((const char *)"_ar_count") - void autorecovery_clear ( string hostname ) { string ar_file = TMP_DIR_PATH + hostname + AUTO_RECOVERY_FILE_SUFFIX ; diff --git a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h index 2b9b72ad..444e618b 100755 --- a/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h +++ b/mtce-common/cgts-mtce-common-1.0/common/nodeClass.h @@ -585,6 +585,7 @@ private: /* Bit mask of degrade reasons */ unsigned int degrade_mask ; + unsigned int degrade_mask_save ; /** Process Monitor Daemon Flag Missing count */ int pmon_missing_count ; @@ -785,6 +786,7 @@ private: int insv_test_handler ( struct nodeLinkClass::node * node_ptr ); int stress_handler ( struct nodeLinkClass::node * node_ptr ); int bm_handler ( struct nodeLinkClass::node * node_ptr ); + int degrade_handler ( struct nodeLinkClass::node * node_ptr ); int uptime_handler ( void ); int host_services_handler ( struct nodeLinkClass::node * node_ptr ); @@ -1731,6 +1733,11 @@ public: /** Calculates and returns the mnfa threshold based on enabled hosts */ int mnfa_calculate_threshold ( string hostname ); + /* collectd event handler */ + int collectd_notify_handler ( string & hostname, + string & resource, + string & state ); + /***************************************** ** Process Monitor Event Utilities API ** *****************************************/ diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_common.h b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_common.h index f804b849..e7e6c85d 100755 --- a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_common.h +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_common.h @@ -68,9 +68,9 @@ string daemon_read_file ( const char * filename ); void daemon_logfile_close ( void ); void daemon_logfile_open ( void ); -void daemon_log ( const char * filename , const char * str ); -void daemon_log_value ( const char * filename , int val ); -void daemon_log_value ( const char * filename , const char * str, int val ); +int daemon_log ( const char * filename , const char * str ); +int daemon_log_value ( const char * filename , int val ); +int daemon_log_value ( const char * filename , const char * str, int val ); /* reads the first line of a file and if it contains a string * that represents an integer value then return it */ diff --git a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_files.cpp b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_files.cpp index 89408f46..002c4db4 100755 --- a/mtce-common/cgts-mtce-common-1.0/daemon/daemon_files.cpp +++ b/mtce-common/cgts-mtce-common-1.0/daemon/daemon_files.cpp @@ -103,7 +103,7 @@ void daemon_healthcheck ( const char * sig ) #define BUFFER 1024 -void daemon_log_value ( const char * filename , const char * str, int val ) +int daemon_log_value ( const char * filename , const char * str, int val ) { FILE * file_stream = fopen (filename, "a" ) ; if ( file_stream != NULL ) @@ -111,10 +111,12 @@ void daemon_log_value ( const char * filename , const char * str, int val ) fprintf ( file_stream,"%s %d\n", str, val ); fflush (file_stream); fclose (file_stream); + return (PASS); } + return (FAIL_FILE_OPEN); } -void daemon_log_value ( const char * filename , int val ) +int daemon_log_value ( const char * filename , int val ) { FILE * file_stream = fopen (filename, "w" ) ; if ( file_stream != NULL ) @@ -122,10 +124,12 @@ void daemon_log_value ( const char * filename , int val ) fprintf ( file_stream,"%d\n", val ); fflush (file_stream); fclose (file_stream); + return (PASS); } + return (FAIL_FILE_OPEN); } -void daemon_log ( const char * filename , const char * str ) +int daemon_log ( const char * filename , const char * str ) { FILE * file_stream = fopen (filename, "a" ) ; if ( file_stream != NULL ) @@ -133,7 +137,9 @@ void daemon_log ( const char * filename , const char * str ) fprintf ( file_stream,"%s\n", str ); fflush (file_stream); fclose (file_stream); + return (PASS); } + return (FAIL_FILE_OPEN); } /* reads the first line of a file and if it contains a string diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCtrlMsg.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCtrlMsg.cpp index 8a1f5291..bcc9b5f5 100755 --- a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCtrlMsg.cpp +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcCtrlMsg.cpp @@ -191,8 +191,50 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false ); + if ( msg.hdr[0] == '{' ) + { + int rc1 ; + string service ; + + mlog1 ("%s\n", &msg.hdr[0] ); + + rc1 = jsonUtil_get_key_val(&msg.hdr[0],"service", service ); + if ( rc1 == PASS ) + { + if ( service == "collectd_notifier" ) + { + int rc1,rc2,rc3 ; + string hostname,resource,state ; + + rc1 = jsonUtil_get_key_val(&msg.hdr[0],"hostname", hostname ); + rc2 = jsonUtil_get_key_val(&msg.hdr[0],"resource", resource ); + rc3 = jsonUtil_get_key_val(&msg.hdr[0],"degrade", state ); + if ( rc1|rc2|rc3 ) + { + elog ("failed to parse '%s' message\n", service.c_str()); + wlog ("... %s\n", &msg.hdr[0] ); + } + else + { + obj_ptr->collectd_notify_handler ( hostname, + resource, + state ); + } + } + /* future service requests */ + else + { + wlog ("Unexpected service request: '%s'\n", service.c_str()); + } + } + else + { + wlog("Unexpected json message: %s\n", &msg.hdr[0] ); + } + } + /* Check for response messages */ - if ( strstr ( &msg.hdr[0], get_cmd_rsp_msg_header() ) ) + else if ( strstr ( &msg.hdr[0], get_cmd_rsp_msg_header() ) ) { obj_ptr->set_cmd_resp ( hostname , msg ) ; } diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp index 8ee70593..20fad599 100755 --- a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeFsm.cpp @@ -74,6 +74,9 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr ) /* manage the host connected state and board management alarms */ nodeLinkClass::bm_handler ( node_ptr ); + /* manage host's degrade state */ + nodeLinkClass::degrade_handler ( node_ptr ); + /* * Always run the offline handler * diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp index fc13c5c1..6a38d5e7 100755 --- a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp @@ -5599,15 +5599,6 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr ) send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); } - /* handle coming out of the ADD in a degraded state */ - if (( node_ptr->degrade_mask != 0 ) && - (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && - ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && - ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ))) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); - } - node_ptr->mtcAlive_gate = false ; node_ptr->addStage = MTC_ADD__DONE ; break; @@ -6111,22 +6102,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr ) { alarm_compute_clear ( node_ptr, false ); } - - /************************************************************ - * Manage host degrade based on degrade mask * - ***********************************************************/ - if (( node_ptr->degrade_mask == 0 ) && - ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); - } - - /* expected degrade audit */ - else if (( node_ptr->degrade_mask ) && - ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); - } } break ; } @@ -6461,12 +6436,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->degrade_mask |= DEGRADE_MASK_SM ; ilog ("%s sm degrade\n", node_ptr->hostname.c_str()); - - /* degrade the host if not already degraded */ - if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); - } } /* Manage de-asserting degrade due to Software Management */ @@ -6477,16 +6446,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->degrade_mask &= ~DEGRADE_MASK_SM ; ilog ("%s sm degrade clear\n", node_ptr->hostname.c_str()); - - /* if the degrade mask is now clear then consider clearing the degrade state */ - if ( node_ptr->degrade_mask == 0 ) - { - /* ... but only if we are degraded */ - if ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); - } - } } if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY) @@ -6502,10 +6461,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr ) if ( node_ptr->health_threshold_counter >= MTC_UNHEALTHY_THRESHOLD ) { node_ptr->degrade_mask |= DEGRADE_MASK_CONFIG ; - if ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) - { - availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); - } /* threshold is reached so raise the config alarm if it is not already raised */ if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CRITICAL ) @@ -6554,6 +6509,30 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr ) return (PASS); } +/************************************************************ + * Manage host degrade state based on degrade mask * + * The availability state of degrade only applies when the * + * host is unlocked-enabled. * + ***********************************************************/ +int nodeLinkClass::degrade_handler ( struct nodeLinkClass::node * node_ptr ) +{ + if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && + ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) + { + if (( node_ptr->degrade_mask == DEGRADE_MASK_NONE ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__AVAILABLE ); + } + + else if (( node_ptr->degrade_mask ) && + ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )) + { + availStatusChange ( node_ptr, MTC_AVAIL_STATUS__DEGRADED ); + } + } + return (PASS); +} int nodeLinkClass::cfg_handler ( struct nodeLinkClass::node * node_ptr ) { diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/rmonHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/rmon/rmonHdlr.cpp index 71f1bf16..c1357a99 100644 --- a/mtce-common/cgts-mtce-common-1.0/rmon/rmonHdlr.cpp +++ b/mtce-common/cgts-mtce-common-1.0/rmon/rmonHdlr.cpp @@ -1127,6 +1127,7 @@ void read_fs_file ( vector & dynamic_resources ) *****************************************************************************/ void add_dynamic_fs_resource ( bool send_response ) { +#ifdef WANT_FS_MONITORING char resource[50]; char temp_resource[50]; char device [50]; @@ -1206,10 +1207,14 @@ void add_dynamic_fs_resource ( bool send_response ) } } } - +#endif if (send_response) { +#ifdef WANT_FS_MONITORING ilog ("sending response to dynamic FS add, to the rmon client\n"); +#else + ilog("dynamic filesystem monitoring moved to collectd\n"); +#endif /* let the rmon client know that we are done with the file */ rmon_resource_response(_rmon_ctrl_ptr->clients); } @@ -4650,6 +4655,8 @@ void rmon_service (rmon_ctrl_type * ctrl_ptr) ilog ("registered clients: %d\n", _rmon_ctrl_ptr->clients); +#ifdef WANT_FS_MONITORING + /* Initialize the resource specific configuration */ for (int j=0; j<_rmon_ctrl_ptr->resources; j++) { @@ -4669,6 +4676,9 @@ void rmon_service (rmon_ctrl_type * ctrl_ptr) /* add any dynamic resources from before */ add_dynamic_fs_resource(false); +#else + ilog("static filesystem monitoring moved to collectd\n"); +#endif /* Clear any stale dynamic alarms that can be caused by dynamic resources. */ /* An alarm become stale for example if it was raised against a local volumn group (lvg) and */ diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/cpu_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/cpu_resource.conf deleted file mode 100644 index dc0ab8dd..00000000 --- a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/cpu_resource.conf +++ /dev/null @@ -1,16 +0,0 @@ -[resource] -resource = Platform CPU Usage -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -minor_threshold = 80 ; minor cpu utilization threshold percentage -major_threshold = 90 ; major cpu utilization threshold percentage -critical_threshold = 95 ; critical cpu utilization threshold percentage (use 101 if unused) -minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 -major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 -critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 -minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 -major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 -critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off -percent = 1 ; Always use 1 for this resource (thresholds by percentage) diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/filesystem_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/filesystem_resource.conf deleted file mode 100644 index e8496b54..00000000 --- a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/filesystem_resource.conf +++ /dev/null @@ -1,16 +0,0 @@ -[resource] -resource = Platform Filesystem Usage -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -minor_threshold = 70 ; minor filesystem utilization threshold percentage -major_threshold = 80 ; major filesystem utilization threshold percentage -critical_threshold = 90 ; critical filesystem utilization threshold percentage (use 101 if unused) -minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 -major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 -critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) -minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 -major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 -critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off -percent = 1 ; 1 for percentage used, 0 for absolute value (file system available in MiB) (default is 1) diff --git a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/memory_resource.conf b/mtce-common/cgts-mtce-common-1.0/rmon/scripts/memory_resource.conf deleted file mode 100644 index 926e28cd..00000000 --- a/mtce-common/cgts-mtce-common-1.0/rmon/scripts/memory_resource.conf +++ /dev/null @@ -1,16 +0,0 @@ -[resource] -resource = Platform Memory Usage -debounce = 20 ; number of seconds to wait before degrade clear -severity = critical ; minor, major, critical -minor_threshold = 70 ; minor memory utilization threshold percentage -major_threshold = 80 ; major memory utilization threshold percentage -critical_threshold = 90 ; critical memory utilization threshold percentage (use 101 if unsed) -minor_threshold_abs_node0 = 512 ; absolute minor threshold value MiB processor node 0 -major_threshold_abs_node0 = 307 ; absolute major threshold value MiB processor node 0 -critical_threshold_abs_node0 = 102 ; absolute critical threshold value MiB processor node 0 (use 0 if unused) -minor_threshold_abs_node1 = 0 ; absolute minor threshold value MiB processor node 1 -major_threshold_abs_node1 = 0 ; absolute major threshold value MiB processor node 1 -critical_threshold_abs_node1 = 0 ; absolute critical threshold value MiB processor node 1 -num_tries = 2 ; number of tries before the alarm is raised -alarm_on = 1 ; 1 for alarm on, 0 for alarm off -percent = 1 ; 1 for percentage used, 0 for absolute value (memory available in MiB) (default is 1)