Merge "Mtce: Add heartbeat cluster information for SM query"
This commit is contained in:
commit
0362090b73
|
@ -249,6 +249,44 @@ int jsonUtil_get_key_val ( char * json_str_ptr,
|
||||||
return (PASS);
|
return (PASS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int jsonUtil_get_key_val_int ( char * json_str_ptr,
|
||||||
|
string key,
|
||||||
|
int & value )
|
||||||
|
{
|
||||||
|
/* init to null to avoid trap on early cleanup call with
|
||||||
|
* bad non-null default pointer value */
|
||||||
|
struct json_object *raw_obj = (struct json_object *)(NULL);
|
||||||
|
|
||||||
|
if ((json_str_ptr == NULL) || ( *json_str_ptr == '\0' ) || ( ! strncmp ( json_str_ptr, "(null)" , 6 )))
|
||||||
|
{
|
||||||
|
elog ("Cannot tokenize a null json string\n");
|
||||||
|
elog ("... json string: %s\n", json_str_ptr );
|
||||||
|
return (FAIL);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t len_before = strlen (json_str_ptr);
|
||||||
|
|
||||||
|
jlog2 ("String: %s\n", json_str_ptr );
|
||||||
|
|
||||||
|
raw_obj = json_tokener_parse( json_str_ptr );
|
||||||
|
if ( raw_obj )
|
||||||
|
{
|
||||||
|
value = jsonUtil_get_key_value_int ( raw_obj, key.data() ) ;
|
||||||
|
jlog1 ("%s:%d\n", key.c_str(), value);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
size_t len_after = strlen (json_str_ptr);
|
||||||
|
|
||||||
|
elog ("Unable to tokenize string (before:%ld after:%ld);\n", len_before, len_after);
|
||||||
|
elog ("... json string: %s\n", json_str_ptr );
|
||||||
|
}
|
||||||
|
|
||||||
|
if (raw_obj)
|
||||||
|
json_object_put(raw_obj);
|
||||||
|
|
||||||
|
return (PASS);
|
||||||
|
}
|
||||||
|
|
||||||
/** This utility freads the passed in inventory GET request
|
/** This utility freads the passed in inventory GET request
|
||||||
* response json character string and performes the following
|
* response json character string and performes the following
|
||||||
|
|
|
@ -69,6 +69,10 @@ int jsonUtil_get_key_val ( char * json_str_ptr,
|
||||||
string key,
|
string key,
|
||||||
string & value );
|
string & value );
|
||||||
|
|
||||||
|
int jsonUtil_get_key_val_int ( char * json_str_ptr,
|
||||||
|
string key,
|
||||||
|
int & value );
|
||||||
|
|
||||||
/** Submit a request to get an authorization token and nova URL */
|
/** Submit a request to get an authorization token and nova URL */
|
||||||
int jsonApi_auth_request ( string & hostname, string & payload );
|
int jsonApi_auth_request ( string & hostname, string & payload );
|
||||||
|
|
||||||
|
|
|
@ -114,6 +114,8 @@ typedef struct
|
||||||
int event_port ; /**< daemon specific event tx port */
|
int event_port ; /**< daemon specific event tx port */
|
||||||
int cmd_port ; /**< daemon specific command rx port */
|
int cmd_port ; /**< daemon specific command rx port */
|
||||||
int sensor_port ; /**< sensor read value port */
|
int sensor_port ; /**< sensor read value port */
|
||||||
|
int sm_server_port ; /**< port mtce uses to receive data from SM */
|
||||||
|
int sm_client_port ; /**< port mtce uses to send SM data */
|
||||||
int start_delay ; /**< startup delay, added for pmon */
|
int start_delay ; /**< startup delay, added for pmon */
|
||||||
int api_retries ; /**< api retries before failure */
|
int api_retries ; /**< api retries before failure */
|
||||||
int hostwd_failure_threshold ; /**< allowed # of missed pmon/hostwd messages */
|
int hostwd_failure_threshold ; /**< allowed # of missed pmon/hostwd messages */
|
||||||
|
@ -243,6 +245,19 @@ extern char *program_invocation_short_name;
|
||||||
else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
|
else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Error logger macro with throttling */
|
||||||
|
#define elog_throttled(cnt,max,format,args...) { \
|
||||||
|
if ( ++cnt == 1 ) \
|
||||||
|
{ \
|
||||||
|
if (ltc()) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
|
||||||
|
else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
|
||||||
|
} \
|
||||||
|
if ( cnt >= max ) \
|
||||||
|
{ \
|
||||||
|
cnt = 0 ; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
/** Warning logger macro */
|
/** Warning logger macro */
|
||||||
#define wlog(format, args...) { \
|
#define wlog(format, args...) { \
|
||||||
if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Warn : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
|
if ( ltc() ) { printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Warn : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
|
||||||
|
@ -387,7 +402,9 @@ extern char *program_invocation_short_name;
|
||||||
|
|
||||||
#define flog(format, args...) { if(daemon_get_cfg_ptr()->debug_fsm) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: FSM : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
#define flog(format, args...) { if(daemon_get_cfg_ptr()->debug_fsm) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: FSM : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
||||||
#define tlog(format, args...) { if(daemon_get_cfg_ptr()->debug_timer) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Timer: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
#define tlog(format, args...) { if(daemon_get_cfg_ptr()->debug_timer) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Timer: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
||||||
|
|
||||||
#define clog(format, args...) { if(daemon_get_cfg_ptr()->debug_state) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Change: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
#define clog(format, args...) { if(daemon_get_cfg_ptr()->debug_state) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Change: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
||||||
|
#define clog1(format, args...) { if(daemon_get_cfg_ptr()->debug_state&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
||||||
|
|
||||||
#define log_event(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Event: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
#define log_event(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Event: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
||||||
#define log_stress(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Stress: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
#define log_stress(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Stress: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
|
||||||
|
|
|
@ -23,7 +23,9 @@ using namespace std;
|
||||||
#include "returnCodes.h"
|
#include "returnCodes.h"
|
||||||
#include "nodeTimers.h"
|
#include "nodeTimers.h"
|
||||||
|
|
||||||
|
#ifndef ALIGN_PACK
|
||||||
#define ALIGN_PACK(x) __attribute__((packed)) x
|
#define ALIGN_PACK(x) __attribute__((packed)) x
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Out-Of-Service Stress tests */
|
/* Out-Of-Service Stress tests */
|
||||||
#define WANT_SYSINV_API_STRESS 0x00000001
|
#define WANT_SYSINV_API_STRESS 0x00000001
|
||||||
|
@ -359,8 +361,12 @@ void daemon_exit ( void );
|
||||||
|
|
||||||
#define CONTROLLER_0 ((const char *)"controller-0")
|
#define CONTROLLER_0 ((const char *)"controller-0")
|
||||||
#define CONTROLLER_1 ((const char *)"controller-1")
|
#define CONTROLLER_1 ((const char *)"controller-1")
|
||||||
|
#define CONTROLLER_2 ((const char *)"controller-2")
|
||||||
#define CONTROLLER ((const char *)"controller")
|
#define CONTROLLER ((const char *)"controller")
|
||||||
|
|
||||||
|
#define STORAGE_0 ((const char *)"storage-0")
|
||||||
|
#define STORAGE_1 ((const char *)"storage-1")
|
||||||
|
|
||||||
/* The infrastructure networking floating IP
|
/* The infrastructure networking floating IP
|
||||||
*
|
*
|
||||||
* Note: If there is no infra then this label will resolve
|
* Note: If there is no infra then this label will resolve
|
||||||
|
|
|
@ -267,7 +267,7 @@ bool is_goenabled ( int nodeType, bool pass )
|
||||||
return daemon_is_file_present ( file );
|
return daemon_is_file_present ( file );
|
||||||
}
|
}
|
||||||
|
|
||||||
#define LOG_MEMORY(buf) ilog ("%s", buf ); \
|
#define LOG_MEMORY(buf) syslog ( LOG_INFO, "%s", buf ); \
|
||||||
buf_ptr = &buf[0]; \
|
buf_ptr = &buf[0]; \
|
||||||
MEMSET_ZERO ( buf );
|
MEMSET_ZERO ( buf );
|
||||||
|
|
||||||
|
@ -279,7 +279,7 @@ void dump_memory ( void * raw_ptr , int format, size_t bytes )
|
||||||
char buf[0x1024] ;
|
char buf[0x1024] ;
|
||||||
char * buf_ptr = &buf[0];
|
char * buf_ptr = &buf[0];
|
||||||
MEMSET_ZERO ( buf );
|
MEMSET_ZERO ( buf );
|
||||||
ilog ("Dumping Memory:\n");
|
syslog ( LOG_INFO, "Dumping Memory: %ld bytes", bytes );
|
||||||
if ( format == 4 )
|
if ( format == 4 )
|
||||||
{
|
{
|
||||||
int loops = bytes/format ;
|
int loops = bytes/format ;
|
||||||
|
@ -294,7 +294,6 @@ void dump_memory ( void * raw_ptr , int format, size_t bytes )
|
||||||
buf_ptr += sprintf ( buf_ptr, "%c", *byte_ptr) ;
|
buf_ptr += sprintf ( buf_ptr, "%c", *byte_ptr) ;
|
||||||
else
|
else
|
||||||
buf_ptr += sprintf ( buf_ptr, "%c", '.');
|
buf_ptr += sprintf ( buf_ptr, "%c", '.');
|
||||||
|
|
||||||
byte_ptr++ ;
|
byte_ptr++ ;
|
||||||
}
|
}
|
||||||
LOG_MEMORY(buf);
|
LOG_MEMORY(buf);
|
||||||
|
@ -315,7 +314,6 @@ void dump_memory ( void * raw_ptr , int format, size_t bytes )
|
||||||
buf_ptr += sprintf ( buf_ptr , "%c", *byte_ptr) ;
|
buf_ptr += sprintf ( buf_ptr , "%c", *byte_ptr) ;
|
||||||
else
|
else
|
||||||
buf_ptr += sprintf ( buf_ptr , "%c", '.');
|
buf_ptr += sprintf ( buf_ptr , "%c", '.');
|
||||||
|
|
||||||
byte_ptr++ ;
|
byte_ptr++ ;
|
||||||
}
|
}
|
||||||
LOG_MEMORY(buf);
|
LOG_MEMORY(buf);
|
||||||
|
@ -336,21 +334,12 @@ void dump_memory ( void * raw_ptr , int format, size_t bytes )
|
||||||
buf_ptr += sprintf ( buf_ptr , "%c", *byte_ptr) ;
|
buf_ptr += sprintf ( buf_ptr , "%c", *byte_ptr) ;
|
||||||
else
|
else
|
||||||
buf_ptr += sprintf ( buf_ptr , "%c", '.');
|
buf_ptr += sprintf ( buf_ptr , "%c", '.');
|
||||||
|
|
||||||
byte_ptr++ ;
|
byte_ptr++ ;
|
||||||
}
|
}
|
||||||
LOG_MEMORY(buf);
|
LOG_MEMORY(buf);
|
||||||
word_ptr += 4 ;
|
word_ptr += 4 ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
byte_ptr = (uint8_t*)raw_ptr ;
|
|
||||||
ilog ("Raw Hex Dump : %ld\n", bytes );
|
|
||||||
for ( unsigned int x = 0 ; x < bytes ; x++ )
|
|
||||||
{
|
|
||||||
buf_ptr += sprintf ( buf_ptr, " %02x", *byte_ptr );
|
|
||||||
byte_ptr++ ;
|
|
||||||
}
|
|
||||||
// printf ("\n\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -93,7 +93,7 @@
|
||||||
#define FAIL_INVALID_DATA (71)
|
#define FAIL_INVALID_DATA (71)
|
||||||
#define FAIL_BAD_STATE (72)
|
#define FAIL_BAD_STATE (72)
|
||||||
#define FAIL_KEY_VALUE_PARSE (73)
|
#define FAIL_KEY_VALUE_PARSE (73)
|
||||||
#define FAIL____UNUSED____74 (74)
|
#define FAIL_DATA_SIZE (74)
|
||||||
#define FAIL_NOT_FOUND (75)
|
#define FAIL_NOT_FOUND (75)
|
||||||
#define FAIL_WORKQ_TIMEOUT (76)
|
#define FAIL_WORKQ_TIMEOUT (76)
|
||||||
#define FAIL_HTTP_DELETE (77)
|
#define FAIL_HTTP_DELETE (77)
|
||||||
|
|
|
@ -207,7 +207,7 @@ int daemon_run_testhead ( void );
|
||||||
#define CONFIG_AGENT_INV_PORT 0x00000100 /**< Inventory Port Number */
|
#define CONFIG_AGENT_INV_PORT 0x00000100 /**< Inventory Port Number */
|
||||||
#define CONFIG_AGENT_HA_PORT 0x00000200 /**< HA Framework Port Number */
|
#define CONFIG_AGENT_HA_PORT 0x00000200 /**< HA Framework Port Number */
|
||||||
#define CONFIG_CLIENT_MTCALARM_PORT 0x00000400 /**< Send alarm requests to */
|
#define CONFIG_CLIENT_MTCALARM_PORT 0x00000400 /**< Send alarm requests to */
|
||||||
#define CONFIG_RESERVED_800 0x00000800 /**< */
|
#define CONFIG_AGENT_SM_CLIENT_PORT 0x00000800 /**< Port to Send SM data on */
|
||||||
#define CONFIG_MTC_TO_HWMON_CMD_PORT 0x00001000 /**< HWmon Port Number */
|
#define CONFIG_MTC_TO_HWMON_CMD_PORT 0x00001000 /**< HWmon Port Number */
|
||||||
#define CONFIG_AGENT_KEY_PORT 0x00002000 /**< Keystone HTTP port */
|
#define CONFIG_AGENT_KEY_PORT 0x00002000 /**< Keystone HTTP port */
|
||||||
#define CONFIG_AGENT_HBS_MTC_PORT 0x00004000 /**< Heartbeat Service Port */
|
#define CONFIG_AGENT_HBS_MTC_PORT 0x00004000 /**< Heartbeat Service Port */
|
||||||
|
@ -217,8 +217,8 @@ int daemon_run_testhead ( void );
|
||||||
#define CONFIG_AGENT_MTC_MGMNT_PORT 0x00040000 /**< Agent Infr network port */
|
#define CONFIG_AGENT_MTC_MGMNT_PORT 0x00040000 /**< Agent Infr network port */
|
||||||
#define CONFIG_AGENT_TOKEN_REFRESH 0x00080000 /**< Token refresh rate mask */
|
#define CONFIG_AGENT_TOKEN_REFRESH 0x00080000 /**< Token refresh rate mask */
|
||||||
#define CONFIG_CLIENT_MTC_INFRA_PORT 0x00100000 /**< Client Infra nwk mtc port */
|
#define CONFIG_CLIENT_MTC_INFRA_PORT 0x00100000 /**< Client Infra nwk mtc port */
|
||||||
#define CONFIG_CLIENT_MTC_MGMNT_PORT 0x00200000 /**< Client mgmnt nwk mtc port */
|
#define CONFIG_CLIENT_MTC_MGMNT_PORT 0x00200000 /**< Client mgmnt nwk mtc port */
|
||||||
#define CONFIG_AGENT_VIM_CMD_PORT 0x00400000 /**< VIM Command Port Mask */
|
#define CONFIG_AGENT_SM_SERVER_PORT 0x00400000 /**< Port to RX data from SM */
|
||||||
#define CONFIG_CLIENT_HBS_INFRA_PORT 0x00800000 /**< Infrastructure ntwk Port */
|
#define CONFIG_CLIENT_HBS_INFRA_PORT 0x00800000 /**< Infrastructure ntwk Port */
|
||||||
#define CONFIG_CLIENT_HBS_MGMNT_PORT 0x01000000 /**< Management network Port */
|
#define CONFIG_CLIENT_HBS_MGMNT_PORT 0x01000000 /**< Management network Port */
|
||||||
#define CONFIG_CLIENT_HBS_EVENT_PORT 0x02000000 /**< Heartbeat Event Messaging */
|
#define CONFIG_CLIENT_HBS_EVENT_PORT 0x02000000 /**< Heartbeat Event Messaging */
|
||||||
|
|
|
@ -90,6 +90,15 @@ of spec operating conditions that can reduce outage time through automated
|
||||||
notification and recovery thereby improving overall platform availability
|
notification and recovery thereby improving overall platform availability
|
||||||
for the customer.
|
for the customer.
|
||||||
|
|
||||||
|
%package -n mtce-dev
|
||||||
|
Summary: Titanuim Server Maintenance Software Development Package
|
||||||
|
Group: base
|
||||||
|
Provides: mtce-dev = %{version}-%{release}
|
||||||
|
|
||||||
|
%description -n mtce-dev
|
||||||
|
Titanuim Cloud Maintenance. This package contains header files,
|
||||||
|
and related items necessary for software development.
|
||||||
|
|
||||||
%package -n mtce-pmon
|
%package -n mtce-pmon
|
||||||
Summary: Titanuim Server Maintenance Process Monitor Package
|
Summary: Titanuim Server Maintenance Process Monitor Package
|
||||||
Group: base
|
Group: base
|
||||||
|
@ -424,6 +433,9 @@ install -m 644 -p -D %{_buildsubdir}/fsmon/scripts/fsmon.logrotate %{buildroot}%
|
||||||
install -m 644 -p -D %{_buildsubdir}/hwmon/scripts/hwmon.logrotate %{buildroot}%{local_etc_logrotated}/hwmon.logrotate
|
install -m 644 -p -D %{_buildsubdir}/hwmon/scripts/hwmon.logrotate %{buildroot}%{local_etc_logrotated}/hwmon.logrotate
|
||||||
install -m 644 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.logrotate %{buildroot}%{local_etc_logrotated}/mtcalarm.logrotate
|
install -m 644 -p -D %{_buildsubdir}/alarm/scripts/mtcalarm.logrotate %{buildroot}%{local_etc_logrotated}/mtcalarm.logrotate
|
||||||
|
|
||||||
|
# software development files
|
||||||
|
install -m 644 -p -D %{_buildsubdir}/heartbeat/mtceHbsCluster.h %{buildroot}/%{_includedir}/mtceHbsCluster.h
|
||||||
|
|
||||||
install -m 755 -p -D %{_buildsubdir}/public/libamon.so.$MAJOR %{buildroot}%{_libdir}/libamon.so.$MAJOR
|
install -m 755 -p -D %{_buildsubdir}/public/libamon.so.$MAJOR %{buildroot}%{_libdir}/libamon.so.$MAJOR
|
||||||
cd %{buildroot}%{_libdir} ; ln -s libamon.so.$MAJOR libamon.so.$MAJOR.$MINOR
|
cd %{buildroot}%{_libdir} ; ln -s libamon.so.$MAJOR libamon.so.$MAJOR.$MINOR
|
||||||
cd %{buildroot}%{_libdir} ; ln -s libamon.so.$MAJOR libamon.so
|
cd %{buildroot}%{_libdir} ; ln -s libamon.so.$MAJOR libamon.so
|
||||||
|
@ -621,3 +633,10 @@ install -m 755 -d %{buildroot}/var/run
|
||||||
%{_sysconfdir}/init.d/hostw
|
%{_sysconfdir}/init.d/hostw
|
||||||
%{local_bindir}/hostwd
|
%{local_bindir}/hostwd
|
||||||
|
|
||||||
|
###############################
|
||||||
|
# Maintenance Software Development RPM
|
||||||
|
###############################
|
||||||
|
%files -n mtce-dev
|
||||||
|
%defattr(-,root,root,-)
|
||||||
|
|
||||||
|
%{_includedir}/mtceHbsCluster.h
|
||||||
|
|
|
@ -269,7 +269,7 @@ nodeLinkClass::nodeLinkClass()
|
||||||
hbs_ready = false ;
|
hbs_ready = false ;
|
||||||
hbs_state_change = false ;
|
hbs_state_change = false ;
|
||||||
hbs_disabled = true ;
|
hbs_disabled = true ;
|
||||||
hbs_pulse_period = hbs_pulse_period_save = 200 ;
|
hbs_pulse_period = hbs_pulse_period_save = 0 ;
|
||||||
hbs_minor_threshold = HBS_MINOR_THRESHOLD ;
|
hbs_minor_threshold = HBS_MINOR_THRESHOLD ;
|
||||||
hbs_degrade_threshold = HBS_DEGRADE_THRESHOLD ;
|
hbs_degrade_threshold = HBS_DEGRADE_THRESHOLD ;
|
||||||
hbs_failure_threshold = HBS_FAILURE_THRESHOLD ;
|
hbs_failure_threshold = HBS_FAILURE_THRESHOLD ;
|
||||||
|
@ -7325,18 +7325,40 @@ int nodeLinkClass::launch_host_services_cmd ( struct nodeLinkClass::node * node_
|
||||||
|
|
||||||
int send_event ( string & hostname, unsigned int cmd, iface_enum iface );
|
int send_event ( string & hostname, unsigned int cmd, iface_enum iface );
|
||||||
|
|
||||||
int nodeLinkClass::mon_host ( const string & hostname, iface_enum iface, bool true_false, bool send_clear )
|
int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool send_clear )
|
||||||
{
|
{
|
||||||
int rc = FAIL ;
|
nodeLinkClass::node* node_ptr ;
|
||||||
if ( ! hostname.empty() )
|
node_ptr = nodeLinkClass::getNode ( hostname );
|
||||||
|
if ( node_ptr != NULL )
|
||||||
{
|
{
|
||||||
nodeLinkClass::node* node_ptr ;
|
bool want_log = true ;
|
||||||
node_ptr = nodeLinkClass::getNode ( hostname );
|
for ( int iface = 0 ; iface < MAX_IFACES ; iface++ )
|
||||||
if ( node_ptr != NULL )
|
|
||||||
{
|
{
|
||||||
node_ptr->monitor[iface] = true_false ;
|
if ( node_ptr->monitor[iface] == true_false )
|
||||||
|
continue ;
|
||||||
|
|
||||||
|
if ( iface == INFRA_IFACE )
|
||||||
|
{
|
||||||
|
if ( this->infra_network_provisioned == false )
|
||||||
|
continue ;
|
||||||
|
|
||||||
|
if ( node_ptr->monitor[MGMNT_IFACE] == true_false )
|
||||||
|
want_log = false ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( send_clear == true )
|
||||||
|
{
|
||||||
|
send_event ( node_ptr->hostname, MTC_EVENT_HEARTBEAT_MINOR_CLR, (iface_enum)iface ) ;
|
||||||
|
send_event ( node_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_CLR, (iface_enum)iface ) ;
|
||||||
|
}
|
||||||
|
|
||||||
if ( true_false == true )
|
if ( true_false == true )
|
||||||
{
|
{
|
||||||
|
if ( want_log )
|
||||||
|
{
|
||||||
|
ilog ("%s starting heartbeat service \n",
|
||||||
|
hostname.c_str());
|
||||||
|
}
|
||||||
node_ptr->no_work_log_throttle = 0 ;
|
node_ptr->no_work_log_throttle = 0 ;
|
||||||
node_ptr->b2b_misses_count[iface] = 0 ;
|
node_ptr->b2b_misses_count[iface] = 0 ;
|
||||||
node_ptr->hbs_misses_count[iface] = 0 ;
|
node_ptr->hbs_misses_count[iface] = 0 ;
|
||||||
|
@ -7345,16 +7367,20 @@ int nodeLinkClass::mon_host ( const string & hostname, iface_enum iface, bool tr
|
||||||
node_ptr->hbs_failure[iface] = false ;
|
node_ptr->hbs_failure[iface] = false ;
|
||||||
node_ptr->hbs_minor[iface] = false ;
|
node_ptr->hbs_minor[iface] = false ;
|
||||||
node_ptr->hbs_degrade[iface] = false ;
|
node_ptr->hbs_degrade[iface] = false ;
|
||||||
if ( send_clear == true )
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if ( want_log )
|
||||||
{
|
{
|
||||||
send_event ( node_ptr->hostname, MTC_EVENT_HEARTBEAT_MINOR_CLR, iface ) ;
|
ilog ("%s stopping heartbeat service\n",
|
||||||
send_event ( node_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_CLR, iface ) ;
|
hostname.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return PASS ;
|
node_ptr->monitor[iface] = true_false ;
|
||||||
}
|
}
|
||||||
|
return PASS ;
|
||||||
}
|
}
|
||||||
return ( rc );
|
return ( FAIL );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* store the current hardware monitor monitoring state */
|
/* store the current hardware monitor monitoring state */
|
||||||
|
@ -7887,11 +7913,11 @@ int nodeLinkClass::remPulse ( struct node * node_ptr, iface_enum iface, bool cle
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
pulse_list[iface].head_ptr = pulse_list[iface].head_ptr->pulse_link[iface].next_ptr ;
|
pulse_list[iface].head_ptr = pulse_list[iface].head_ptr->pulse_link[iface].next_ptr ;
|
||||||
pulse_list[iface].head_ptr->pulse_link[iface].prev_ptr = NULL ;
|
pulse_list[iface].head_ptr->pulse_link[iface].prev_ptr = NULL ;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else if ( pulse_list[iface].tail_ptr == pulse_ptr )
|
else if ( pulse_list[iface].tail_ptr == pulse_ptr )
|
||||||
{
|
{
|
||||||
qlog2 ("%s Pulse: Multiple Node -> Tail Case : %d of %d\n", node_ptr->hostname.c_str(), pulse_ptr->linknum[iface], pulses[iface] );
|
qlog2 ("%s Pulse: Multiple Node -> Tail Case : %d of %d\n", node_ptr->hostname.c_str(), pulse_ptr->linknum[iface], pulses[iface] );
|
||||||
|
@ -7906,19 +7932,16 @@ int nodeLinkClass::remPulse ( struct node * node_ptr, iface_enum iface, bool cle
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
pulse_list[iface].tail_ptr = pulse_list[iface].tail_ptr->pulse_link[iface].prev_ptr ;
|
pulse_list[iface].tail_ptr = pulse_list[iface].tail_ptr->pulse_link[iface].prev_ptr ;
|
||||||
pulse_list[iface].tail_ptr->pulse_link[iface].next_ptr = NULL ;
|
pulse_list[iface].tail_ptr->pulse_link[iface].next_ptr = NULL ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* July 1 emacdona: Make failure path case more robust */
|
/* July 1 emacdona: Make failure path case more robust */
|
||||||
if ( pulse_ptr == NULL ) { slog ("Internal Err 1\n"); rc = FAIL; }
|
if ( pulse_ptr == NULL ) { slog ("Internal Err 1\n"); rc = FAIL; }
|
||||||
else if ( pulse_ptr->pulse_link[iface].prev_ptr == NULL ) { slog ("Internal Err 2\n"); rc = FAIL; }
|
else if ( pulse_ptr->pulse_link[iface].prev_ptr == NULL ) { slog ("Internal Err 2\n"); rc = FAIL; }
|
||||||
else if ( pulse_ptr->pulse_link[iface].prev_ptr->pulse_link[iface].next_ptr == NULL ) { slog ("Internal Err 3\n"); rc = FAIL; }
|
else if ( pulse_ptr->pulse_link[iface].next_ptr == NULL ) { slog ("Internal Err 3\n"); rc = FAIL; }
|
||||||
else if ( pulse_ptr->pulse_link[iface].next_ptr == NULL ) { slog ("Internal Err 4\n"); rc = FAIL; }
|
|
||||||
else if ( pulse_ptr->pulse_link[iface].next_ptr->pulse_link[iface].prev_ptr == NULL ) { slog ("Internal Err 5\n"); rc = FAIL; }
|
|
||||||
|
|
||||||
if ( rc == FAIL )
|
if ( rc == FAIL )
|
||||||
{
|
{
|
||||||
slog ("%s Null pointer error splicing %s out of pulse list with %d pulses remaining (Monitoring:%s)\n",
|
slog ("%s Null pointer error splicing %s out of pulse list with %d pulses remaining (Monitoring:%s)\n",
|
||||||
|
@ -7935,7 +7958,7 @@ int nodeLinkClass::remPulse ( struct node * node_ptr, iface_enum iface, bool cle
|
||||||
}
|
}
|
||||||
if ( rc == PASS )
|
if ( rc == PASS )
|
||||||
{
|
{
|
||||||
pulse_ptr->linknum[iface]-- ; // = 0 ;
|
pulse_ptr->linknum[iface]-- ;
|
||||||
}
|
}
|
||||||
pulses[iface]-- ;
|
pulses[iface]-- ;
|
||||||
}
|
}
|
||||||
|
@ -8082,14 +8105,26 @@ void nodeLinkClass::manage_heartbeat_alarm ( struct nodeLinkClass::node * node_p
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int nodeLinkClass::lost_pulses ( iface_enum iface )
|
int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
|
||||||
{
|
{
|
||||||
int rc = PASS ;
|
int lost = 0 ;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Assume storage-0 is responding until otherwise proven its not.
|
||||||
|
* keep in mind that this interface counts nodes that have not responded ;
|
||||||
|
* not those that have.
|
||||||
|
*/
|
||||||
|
storage_0_responding = true ;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Loop over the pulse_list which now onoly contains a list of hosts
|
||||||
|
* that have not responded in this heartbeat period.
|
||||||
|
*/
|
||||||
for ( ; pulse_list[iface].head_ptr != NULL ; )
|
for ( ; pulse_list[iface].head_ptr != NULL ; )
|
||||||
{
|
{
|
||||||
daemon_signal_hdlr ();
|
daemon_signal_hdlr ();
|
||||||
pulse_ptr = pulse_list[iface].head_ptr ;
|
pulse_ptr = pulse_list[iface].head_ptr ;
|
||||||
|
lost++ ;
|
||||||
if ( active )
|
if ( active )
|
||||||
{
|
{
|
||||||
string flat = "Flat Line:" ;
|
string flat = "Flat Line:" ;
|
||||||
|
@ -8098,6 +8133,15 @@ int nodeLinkClass::lost_pulses ( iface_enum iface )
|
||||||
pulse_ptr->b2b_pulses_count[iface] = 0 ;
|
pulse_ptr->b2b_pulses_count[iface] = 0 ;
|
||||||
// pulse_ptr->max_count[iface]++ ;
|
// pulse_ptr->max_count[iface]++ ;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update storage_0_responding reference to false if storgate-0
|
||||||
|
* is found in the pulse lots list.
|
||||||
|
*/
|
||||||
|
if ( pulse_ptr->hostname == STORAGE_0 )
|
||||||
|
{
|
||||||
|
storage_0_responding = false ;
|
||||||
|
}
|
||||||
|
|
||||||
/* Don't log single misses unless in debug mode */
|
/* Don't log single misses unless in debug mode */
|
||||||
if ( pulse_ptr->b2b_misses_count[iface] > 1 )
|
if ( pulse_ptr->b2b_misses_count[iface] > 1 )
|
||||||
{
|
{
|
||||||
|
@ -8156,8 +8200,9 @@ int nodeLinkClass::lost_pulses ( iface_enum iface )
|
||||||
get_iface_name_str(iface),
|
get_iface_name_str(iface),
|
||||||
pulse_ptr->b2b_misses_count[iface] );
|
pulse_ptr->b2b_misses_count[iface] );
|
||||||
}
|
}
|
||||||
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
mem_log ( flat, pulse_ptr->b2b_misses_count[iface], pulse_ptr->hostname.c_str());
|
mem_log ( flat, pulse_ptr->b2b_misses_count[iface], pulse_ptr->hostname.c_str());
|
||||||
|
#endif
|
||||||
if ( iface == MGMNT_IFACE )
|
if ( iface == MGMNT_IFACE )
|
||||||
{
|
{
|
||||||
if ( pulse_ptr->b2b_misses_count[iface] == hbs_minor_threshold )
|
if ( pulse_ptr->b2b_misses_count[iface] == hbs_minor_threshold )
|
||||||
|
@ -8252,8 +8297,7 @@ int nodeLinkClass::lost_pulses ( iface_enum iface )
|
||||||
if ( pulse_ptr->b2b_misses_count[iface] > pulse_ptr->max_count[iface] )
|
if ( pulse_ptr->b2b_misses_count[iface] > pulse_ptr->max_count[iface] )
|
||||||
pulse_ptr->max_count[iface] = pulse_ptr->b2b_misses_count[iface] ;
|
pulse_ptr->max_count[iface] = pulse_ptr->b2b_misses_count[iface] ;
|
||||||
}
|
}
|
||||||
rc = remPulse_by_name ( pulse_ptr->hostname, iface, false, NULL_PULSE_FLAGS );
|
if ( remPulse_by_name ( pulse_ptr->hostname, iface, false, NULL_PULSE_FLAGS ))
|
||||||
if ( rc != PASS )
|
|
||||||
{
|
{
|
||||||
elog ("%s %s not in pulse list\n", pulse_ptr->hostname.c_str(),
|
elog ("%s %s not in pulse list\n", pulse_ptr->hostname.c_str(),
|
||||||
get_iface_name_str(iface));
|
get_iface_name_str(iface));
|
||||||
|
@ -8266,7 +8310,7 @@ int nodeLinkClass::lost_pulses ( iface_enum iface )
|
||||||
break ;
|
break ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (rc);
|
return (lost);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return true if the specified interface is being monitored for this host */
|
/* Return true if the specified interface is being monitored for this host */
|
||||||
|
@ -8301,7 +8345,7 @@ void nodeLinkClass::print_pulse_list ( iface_enum iface )
|
||||||
|
|
||||||
if ( pulse_list[iface].head_ptr != NULL )
|
if ( pulse_list[iface].head_ptr != NULL )
|
||||||
{
|
{
|
||||||
for ( pulse_ptr = pulse_list[iface].head_ptr ;
|
for ( pulse_ptr = pulse_list[iface].head_ptr ;
|
||||||
pulse_ptr != NULL ;
|
pulse_ptr != NULL ;
|
||||||
pulse_ptr = pulse_ptr->pulse_link[iface].next_ptr )
|
pulse_ptr = pulse_ptr->pulse_link[iface].next_ptr )
|
||||||
{
|
{
|
||||||
|
@ -8310,12 +8354,15 @@ void nodeLinkClass::print_pulse_list ( iface_enum iface )
|
||||||
}
|
}
|
||||||
dlog ("Patients: %s\n", pulse_host_list.c_str());
|
dlog ("Patients: %s\n", pulse_host_list.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
if ( pulses[iface] && !pulse_host_list.empty() )
|
if ( pulses[iface] && !pulse_host_list.empty() )
|
||||||
{
|
{
|
||||||
string temp = get_iface_name_str(iface) ;
|
string temp = get_iface_name_str(iface) ;
|
||||||
temp.append(" Patients :") ;
|
temp.append(" Patients :") ;
|
||||||
mem_log ( temp, pulses[iface], pulse_host_list );
|
mem_log ( temp, pulses[iface], pulse_host_list );
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1940,7 +1940,7 @@ public:
|
||||||
void manage_pulse_flags ( string & hostname, unsigned int flags );
|
void manage_pulse_flags ( string & hostname, unsigned int flags );
|
||||||
|
|
||||||
/** Control the heartbeat monitoring state of a host */
|
/** Control the heartbeat monitoring state of a host */
|
||||||
int mon_host ( const string & hostname, iface_enum iface, bool true_false, bool send_clear );
|
int mon_host ( const string & hostname, bool true_false, bool send_clear );
|
||||||
|
|
||||||
/** Return true if the pulse list is empty */
|
/** Return true if the pulse list is empty */
|
||||||
bool pulse_list_empty ( iface_enum iface );
|
bool pulse_list_empty ( iface_enum iface );
|
||||||
|
@ -1956,7 +1956,7 @@ public:
|
||||||
* that exceed preset thresholds.
|
* that exceed preset thresholds.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
int lost_pulses ( iface_enum iface );
|
int lost_pulses ( iface_enum iface, bool & storage_0_responding );
|
||||||
|
|
||||||
bool monitored_pulse ( string hostname , iface_enum iface );
|
bool monitored_pulse ( string hostname , iface_enum iface );
|
||||||
|
|
||||||
|
|
|
@ -4,10 +4,10 @@
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
|
|
||||||
SRCS = hbsAlarm.cpp hbsClient.cpp hbsAgent.cpp hbsPmon.cpp hbsStubs.cpp
|
SRCS = hbsAlarm.cpp hbsClient.cpp hbsAgent.cpp hbsPmon.cpp hbsUtil.cpp hbsCluster.cpp hbsStubs.cpp
|
||||||
OBJS = $(SRCS:.cpp=.o)
|
OBJS = $(SRCS:.cpp=.o)
|
||||||
|
|
||||||
LDLIBS = -lstdc++ -ldaemon -lcommon -lthreadUtil -lpthread -lfmcommon -lalarm -lrt -lamon -lcrypto -luuid
|
LDLIBS = -lstdc++ -ldaemon -lcommon -lthreadUtil -lpthread -lfmcommon -lalarm -lrt -lamon -lcrypto -luuid -ljson-c
|
||||||
INCLUDES = -I. -I/usr/include/mtce-daemon -I/usr/include/mtce-common
|
INCLUDES = -I. -I/usr/include/mtce-daemon -I/usr/include/mtce-common
|
||||||
INCLUDES += -I../common -I../alarm -I../maintenance -I../public
|
INCLUDES += -I../common -I../alarm -I../maintenance -I../public
|
||||||
|
|
||||||
|
@ -31,8 +31,8 @@ endif
|
||||||
all: static_analysis common agent client
|
all: static_analysis common agent client
|
||||||
|
|
||||||
build: static_analysis $(OBJS)
|
build: static_analysis $(OBJS)
|
||||||
$(CXX) $(CCFLAGS) hbsAlarm.o hbsAgent.o hbsStubs.o ../common/nodeClass.o -L../public -L../alarm $(LDLIBS) -o hbsAgent
|
$(CXX) $(CCFLAGS) hbsAlarm.o hbsAgent.o hbsUtil.o hbsCluster.o hbsStubs.o ../common/nodeClass.o -L../public -L../alarm $(LDLIBS) -o hbsAgent
|
||||||
$(CXX) $(CCFLAGS) hbsClient.o hbsPmon.o -L../public -L../alarm $(LDLIBS) -o hbsClient
|
$(CXX) $(CCFLAGS) hbsClient.o hbsPmon.o hbsUtil.o -L../public -L../alarm $(LDLIBS) -o hbsClient
|
||||||
|
|
||||||
common:
|
common:
|
||||||
( cd ../common ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR))
|
( cd ../common ; make clean ; make lib VER=$(VER) VER_MJR=$(VER_MJR))
|
||||||
|
|
|
@ -41,6 +41,7 @@ using namespace std;
|
||||||
#include "hbsBase.h" /* Heartbeat Base Header File */
|
#include "hbsBase.h" /* Heartbeat Base Header File */
|
||||||
#include "hbsAlarm.h" /* for ... hbsAlarm_clear_all */
|
#include "hbsAlarm.h" /* for ... hbsAlarm_clear_all */
|
||||||
#include "alarm.h" /* for ... alarm send message to mtcalarmd */
|
#include "alarm.h" /* for ... alarm send message to mtcalarmd */
|
||||||
|
#include "jsonUtil.h" /* for ... jsonUtil_get_key_val */
|
||||||
|
|
||||||
/**************************************************************
|
/**************************************************************
|
||||||
* Implementation Structure
|
* Implementation Structure
|
||||||
|
@ -68,6 +69,8 @@ using namespace std;
|
||||||
/* Number of back to back interface errors before the interface is re-initialized. */
|
/* Number of back to back interface errors before the interface is re-initialized. */
|
||||||
#define INTERFACE_ERRORS_FOR_REINIT (8)
|
#define INTERFACE_ERRORS_FOR_REINIT (8)
|
||||||
|
|
||||||
|
#define MAX_LEN 1000
|
||||||
|
|
||||||
/* Historical String data for mem_logs */
|
/* Historical String data for mem_logs */
|
||||||
static string unexpected_pulse_list[MAX_IFACES] = { "" , "" } ;
|
static string unexpected_pulse_list[MAX_IFACES] = { "" , "" } ;
|
||||||
static string arrival_histogram[MAX_IFACES] = { "" , "" } ;
|
static string arrival_histogram[MAX_IFACES] = { "" , "" } ;
|
||||||
|
@ -90,6 +93,8 @@ int module_init ( void )
|
||||||
return (PASS);
|
return (PASS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned int controller_number = 0 ;
|
||||||
|
|
||||||
void daemon_sigchld_hdlr ( void )
|
void daemon_sigchld_hdlr ( void )
|
||||||
{
|
{
|
||||||
; /* dlog("Received SIGCHLD ... no action\n"); */
|
; /* dlog("Received SIGCHLD ... no action\n"); */
|
||||||
|
@ -184,14 +189,16 @@ void daemon_exit ( void )
|
||||||
CONFIG_AGENT_HBS_DEGRADE |\
|
CONFIG_AGENT_HBS_DEGRADE |\
|
||||||
CONFIG_AGENT_HBS_FAILURE |\
|
CONFIG_AGENT_HBS_FAILURE |\
|
||||||
CONFIG_AGENT_MULTICAST |\
|
CONFIG_AGENT_MULTICAST |\
|
||||||
CONFIG_SCHED_PRIORITY |\
|
CONFIG_SCHED_PRIORITY |\
|
||||||
CONFIG_MTC_TO_HBS_CMD_PORT |\
|
CONFIG_MTC_TO_HBS_CMD_PORT |\
|
||||||
CONFIG_HBS_TO_MTC_EVENT_PORT |\
|
CONFIG_HBS_TO_MTC_EVENT_PORT |\
|
||||||
CONFIG_AGENT_HBS_MGMNT_PORT |\
|
CONFIG_AGENT_HBS_MGMNT_PORT |\
|
||||||
CONFIG_AGENT_HBS_INFRA_PORT |\
|
CONFIG_AGENT_HBS_INFRA_PORT |\
|
||||||
CONFIG_CLIENT_HBS_MGMNT_PORT |\
|
CONFIG_CLIENT_HBS_MGMNT_PORT |\
|
||||||
CONFIG_CLIENT_MTCALARM_PORT |\
|
CONFIG_CLIENT_MTCALARM_PORT |\
|
||||||
CONFIG_CLIENT_HBS_INFRA_PORT )
|
CONFIG_CLIENT_HBS_INFRA_PORT |\
|
||||||
|
CONFIG_AGENT_SM_SERVER_PORT |\
|
||||||
|
CONFIG_AGENT_SM_CLIENT_PORT)
|
||||||
|
|
||||||
/* Startup config read */
|
/* Startup config read */
|
||||||
static int hbs_config_handler ( void * user,
|
static int hbs_config_handler ( void * user,
|
||||||
|
@ -203,6 +210,8 @@ static int hbs_config_handler ( void * user,
|
||||||
|
|
||||||
if (MATCH("agent", "heartbeat_period"))
|
if (MATCH("agent", "heartbeat_period"))
|
||||||
{
|
{
|
||||||
|
int curr_period = hbsInv.hbs_pulse_period ;
|
||||||
|
|
||||||
config_ptr->hbs_pulse_period = atoi(value);
|
config_ptr->hbs_pulse_period = atoi(value);
|
||||||
hbsInv.hbs_pulse_period = atoi(value);
|
hbsInv.hbs_pulse_period = atoi(value);
|
||||||
hbsInv.hbs_state_change = true ;
|
hbsInv.hbs_state_change = true ;
|
||||||
|
@ -227,10 +236,14 @@ static int hbs_config_handler ( void * user,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
hbsInv.hbs_pulse_period_save = hbsInv.hbs_pulse_period ;
|
||||||
|
if ( curr_period != hbsInv.hbs_pulse_period )
|
||||||
|
{
|
||||||
|
/* initialize cluster info */
|
||||||
|
hbs_cluster_init ( hbsInv.hbs_pulse_period );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
hbsInv.hbs_pulse_period_save = hbsInv.hbs_pulse_period ;
|
|
||||||
|
|
||||||
if (MATCH("agent", "hbs_minor_threshold"))
|
if (MATCH("agent", "hbs_minor_threshold"))
|
||||||
{
|
{
|
||||||
config_ptr->hbs_minor_threshold =
|
config_ptr->hbs_minor_threshold =
|
||||||
|
@ -312,6 +325,16 @@ static int hbs_config_handler ( void * user,
|
||||||
config_ptr->hbs_agent_mgmnt_port = atoi(value);
|
config_ptr->hbs_agent_mgmnt_port = atoi(value);
|
||||||
config_ptr->mask |= CONFIG_AGENT_HBS_MGMNT_PORT ;
|
config_ptr->mask |= CONFIG_AGENT_HBS_MGMNT_PORT ;
|
||||||
}
|
}
|
||||||
|
else if (MATCH("agent", "sm_server_port"))
|
||||||
|
{
|
||||||
|
config_ptr->sm_server_port = atoi(value);
|
||||||
|
config_ptr->mask |= CONFIG_AGENT_SM_SERVER_PORT ;
|
||||||
|
}
|
||||||
|
else if (MATCH("agent", "sm_client_port"))
|
||||||
|
{
|
||||||
|
config_ptr->sm_client_port = atoi(value);
|
||||||
|
config_ptr->mask |= CONFIG_AGENT_SM_CLIENT_PORT ;
|
||||||
|
}
|
||||||
else if (MATCH("client", "hbs_client_mgmnt_port"))
|
else if (MATCH("client", "hbs_client_mgmnt_port"))
|
||||||
{
|
{
|
||||||
config_ptr->hbs_client_mgmnt_port = atoi(value);
|
config_ptr->hbs_client_mgmnt_port = atoi(value);
|
||||||
|
@ -617,6 +640,34 @@ int alarm_port_init ( void )
|
||||||
return ( hbs_sock.alarm_sock->return_status ) ;
|
return ( hbs_sock.alarm_sock->return_status ) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int hbs_sm_sockets_init ( void )
|
||||||
|
{
|
||||||
|
int rc = PASS ;
|
||||||
|
|
||||||
|
/* Create an UDP RX Message Socket for SM Requests; LO interface only */
|
||||||
|
hbs_sock.sm_server_sock = new msgClassRx(LOOPBACK_IP, hbs_config.sm_server_port, IPPROTO_UDP);
|
||||||
|
if ( ! hbs_sock.sm_server_sock )
|
||||||
|
{
|
||||||
|
elog ("Failed to setup SM receive socket");
|
||||||
|
rc = FAIL_SOCKET_CREATE ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create an UDP TX Message Socket for SM Requests; LO interface only */
|
||||||
|
hbs_sock.sm_client_sock = new msgClassTx(LOOPBACK_IP, hbs_config.sm_client_port,IPPROTO_UDP);
|
||||||
|
if ( ! hbs_sock.sm_client_sock )
|
||||||
|
{
|
||||||
|
elog ("Failed to setup SM transmit socket");
|
||||||
|
rc = FAIL_SOCKET_CREATE ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( rc == PASS )
|
||||||
|
{
|
||||||
|
hbs_sock.sm_server_sock->sock_ok(true);
|
||||||
|
hbs_sock.sm_client_sock->sock_ok(true);
|
||||||
|
}
|
||||||
|
return (rc);
|
||||||
|
}
|
||||||
|
|
||||||
/* Init the internal/local sockets ; the ones that will no change.
|
/* Init the internal/local sockets ; the ones that will no change.
|
||||||
* This way we don't miss add and start commands from maintenance. */
|
* This way we don't miss add and start commands from maintenance. */
|
||||||
|
|
||||||
|
@ -654,6 +705,9 @@ int hbs_int_socket_init ( void )
|
||||||
{
|
{
|
||||||
elog ("Alarm port setup or registration failed (rc:%d)\n", rc );
|
elog ("Alarm port setup or registration failed (rc:%d)\n", rc );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rc = hbs_sm_sockets_init () ;
|
||||||
|
|
||||||
return (rc);
|
return (rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -697,26 +751,36 @@ int hbs_pulse_request ( iface_enum iface,
|
||||||
string hostname_clue,
|
string hostname_clue,
|
||||||
unsigned int lookup_clue)
|
unsigned int lookup_clue)
|
||||||
{
|
{
|
||||||
int rc = PASS ;
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
#define MAX_LEN 1000
|
|
||||||
char str[MAX_LEN] ;
|
char str[MAX_LEN] ;
|
||||||
|
#endif
|
||||||
/* Add the sequence number */
|
int bytes = 0 ;
|
||||||
hbs_sock.tx_mesg[iface].s = seq_num ;
|
|
||||||
memset ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE], 0, MAX_CHARS_HOSTNAME );
|
|
||||||
if (( lookup_clue ) &&
|
|
||||||
( hostname_clue.length() <= MAX_CHARS_HOSTNAME ))
|
|
||||||
{
|
|
||||||
hbs_sock.tx_mesg[iface].c = lookup_clue ;
|
|
||||||
memcpy ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE],
|
|
||||||
hostname_clue.data(),
|
|
||||||
hostname_clue.length());
|
|
||||||
}
|
|
||||||
/* Message length is the size of the sequence number, the clue and the buffer */
|
|
||||||
|
|
||||||
int msg_len = (HBS_MAX_MSG+(sizeof(unsigned int)*2)) ;
|
|
||||||
if ( hbs_sock.tx_sock[iface] )
|
if ( hbs_sock.tx_sock[iface] )
|
||||||
{
|
{
|
||||||
|
// int unused_networks = 0 ;
|
||||||
|
memset ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE], 0, MAX_CHARS_HOSTNAME );
|
||||||
|
|
||||||
|
/* Add message version - 0 -> 1 with the acction of cluster information */
|
||||||
|
hbs_sock.tx_mesg[iface].v = HBS_MESSAGE_VERSION ;
|
||||||
|
|
||||||
|
/* Add the sequence number */
|
||||||
|
hbs_sock.tx_mesg[iface].s = seq_num ;
|
||||||
|
|
||||||
|
if (( lookup_clue ) &&
|
||||||
|
( hostname_clue.length() <= MAX_CHARS_HOSTNAME ))
|
||||||
|
{
|
||||||
|
hbs_sock.tx_mesg[iface].c = lookup_clue ;
|
||||||
|
memcpy ( &hbs_sock.tx_mesg[iface].m[HBS_HEADER_SIZE],
|
||||||
|
hostname_clue.data(),
|
||||||
|
hostname_clue.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Append the cluster info to the pulse request */
|
||||||
|
hbs_cluster_append(hbs_sock.tx_mesg[iface]) ;
|
||||||
|
|
||||||
|
/* Calculate the total message size */
|
||||||
|
bytes = sizeof(hbs_message_type)-hbs_cluster_unused_bytes();
|
||||||
|
|
||||||
#ifdef WANT_FIT_TESTING
|
#ifdef WANT_FIT_TESTING
|
||||||
if ( daemon_want_fit ( FIT_CODE__NO_PULSE_REQUEST, "any" , get_iface_name_str(iface) ) )
|
if ( daemon_want_fit ( FIT_CODE__NO_PULSE_REQUEST, "any" , get_iface_name_str(iface) ) )
|
||||||
{
|
{
|
||||||
|
@ -727,14 +791,15 @@ int hbs_pulse_request ( iface_enum iface,
|
||||||
goto hbs_pulse_request_out ;
|
goto hbs_pulse_request_out ;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if ( (rc = hbs_sock.tx_sock[iface]->write((char*)&hbs_sock.tx_mesg[iface], msg_len)) < 0 )
|
|
||||||
|
if ( (bytes = hbs_sock.tx_sock[iface]->write((char*)&hbs_sock.tx_mesg[iface], bytes)) < 0 )
|
||||||
{
|
{
|
||||||
elog("Failed to send Pulse request: %d:%s to %s.%d (rc:%i ; %d:%s)\n",
|
elog("Failed to send Pulse request: %d:%s to %s.%d (rc:%i ; %d:%s)\n",
|
||||||
hbs_sock.tx_mesg[iface].s,
|
hbs_sock.tx_mesg[iface].s,
|
||||||
&hbs_sock.tx_mesg[iface].m[0],
|
&hbs_sock.tx_mesg[iface].m[0],
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(),
|
hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(),
|
||||||
rc, errno, strerror(errno) );
|
bytes, errno, strerror(errno) );
|
||||||
return (FAIL_SOCKET_SENDTO);
|
return (FAIL_SOCKET_SENDTO);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -748,16 +813,17 @@ int hbs_pulse_request ( iface_enum iface,
|
||||||
hbs_pulse_request_out:
|
hbs_pulse_request_out:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
mlog1("%s Pulse Req: (%5d): %17s:%5d: %d:%d:%x:%s\n",
|
mlog("%s Pulse Req: (%5d): %17s:%5d: %d:%d:%d:%x:%s\n",
|
||||||
get_iface_name_str(iface), rc,
|
get_iface_name_str(iface), bytes,
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(),
|
hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(),
|
||||||
|
hbs_sock.tx_mesg[iface].v,
|
||||||
hbs_sock.tx_mesg[iface].s,
|
hbs_sock.tx_mesg[iface].s,
|
||||||
hbs_sock.tx_mesg[iface].c,
|
hbs_sock.tx_mesg[iface].c,
|
||||||
hbs_sock.tx_mesg[iface].f,
|
hbs_sock.tx_mesg[iface].f,
|
||||||
hbs_sock.tx_mesg[iface].m);
|
hbs_sock.tx_mesg[iface].m);
|
||||||
|
|
||||||
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
snprintf ( &str[0], MAX_LEN, "%s Pulse Req: %17s:%5d: %u:%u:%s\n",
|
snprintf ( &str[0], MAX_LEN, "%s Pulse Req: %17s:%5d: %u:%u:%s\n",
|
||||||
get_iface_name_str(iface),
|
get_iface_name_str(iface),
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
||||||
|
@ -766,6 +832,7 @@ hbs_pulse_request_out:
|
||||||
hbs_sock.tx_mesg[iface].c,
|
hbs_sock.tx_mesg[iface].c,
|
||||||
hbs_sock.tx_mesg[iface].m);
|
hbs_sock.tx_mesg[iface].m);
|
||||||
mem_log (&str[0]);
|
mem_log (&str[0]);
|
||||||
|
#endif
|
||||||
|
|
||||||
return (PASS);
|
return (PASS);
|
||||||
}
|
}
|
||||||
|
@ -785,7 +852,7 @@ string get_hostname_from_pulse ( char * msg_ptr )
|
||||||
|
|
||||||
int _pulse_receive ( iface_enum iface , unsigned int seq_num )
|
int _pulse_receive ( iface_enum iface , unsigned int seq_num )
|
||||||
{
|
{
|
||||||
int n = 0 ;
|
int bytes = 0 ;
|
||||||
|
|
||||||
int detected_pulses = 0 ;
|
int detected_pulses = 0 ;
|
||||||
|
|
||||||
|
@ -796,7 +863,7 @@ int _pulse_receive ( iface_enum iface , unsigned int seq_num )
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
/* Clean the receive buffer */
|
/* Clean the receive buffer */
|
||||||
memset ( hbs_sock.rx_mesg[iface].m, 0, HBS_MAX_MSG );
|
memset ( hbs_sock.rx_mesg[iface].m, 0, sizeof(hbs_message_type) );
|
||||||
hbs_sock.rx_mesg[iface].s = 0 ;
|
hbs_sock.rx_mesg[iface].s = 0 ;
|
||||||
hbs_sock.rx_mesg[iface].c = 0 ;
|
hbs_sock.rx_mesg[iface].c = 0 ;
|
||||||
if ( hbs_sock.rx_sock[iface] == NULL )
|
if ( hbs_sock.rx_sock[iface] == NULL )
|
||||||
|
@ -804,10 +871,10 @@ int _pulse_receive ( iface_enum iface , unsigned int seq_num )
|
||||||
elog ("%s cannot receive pulses - null object\n", get_iface_name_str(iface) );
|
elog ("%s cannot receive pulses - null object\n", get_iface_name_str(iface) );
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
if ( (n = hbs_sock.rx_sock[iface]->read((char*)&hbs_sock.rx_mesg[iface], sizeof(hbs_message_type))) != -1 )
|
if ( (bytes = hbs_sock.rx_sock[iface]->read((char*)&hbs_sock.rx_mesg[iface], sizeof(hbs_message_type))) != -1 )
|
||||||
{
|
{
|
||||||
mlog1 ("%s Pulse Rsp: (%5d): %17s:%5d: %d:%d:%x:%s\n",
|
mlog1 ("%s Pulse Rsp: (%5d): %17s:%5d: %d:%d:%x:%s\n",
|
||||||
get_iface_name_str(iface), n,
|
get_iface_name_str(iface), bytes,
|
||||||
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
|
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
|
||||||
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
|
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
|
||||||
hbs_sock.rx_mesg[iface].s,
|
hbs_sock.rx_mesg[iface].s,
|
||||||
|
@ -839,7 +906,7 @@ int _pulse_receive ( iface_enum iface , unsigned int seq_num )
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
mlog ("%s Pulse Rsp from (%s)\n", get_iface_name_str(iface), hostname.c_str());
|
// mlog ("%s Pulse Rsp from (%s)\n", get_iface_name_str(iface), hostname.c_str());
|
||||||
if ( !hostname.compare("localhost") )
|
if ( !hostname.compare("localhost") )
|
||||||
{
|
{
|
||||||
mlog3 ("%s Pulse Rsp (local): %17s:%5d: %d:%d:%x:%s\n",
|
mlog3 ("%s Pulse Rsp (local): %17s:%5d: %d:%d:%x:%s\n",
|
||||||
|
@ -868,7 +935,6 @@ int _pulse_receive ( iface_enum iface , unsigned int seq_num )
|
||||||
{
|
{
|
||||||
if ( hbsInv.monitored_pulse ( hostname , iface ) == true )
|
if ( hbsInv.monitored_pulse ( hostname , iface ) == true )
|
||||||
{
|
{
|
||||||
#define MAX_LEN 1000
|
|
||||||
char str[MAX_LEN] ;
|
char str[MAX_LEN] ;
|
||||||
string extra = "Rsp" ;
|
string extra = "Rsp" ;
|
||||||
|
|
||||||
|
@ -880,25 +946,42 @@ int _pulse_receive ( iface_enum iface , unsigned int seq_num )
|
||||||
{
|
{
|
||||||
rc = hbsInv.remove_pulse ( hostname, iface, hbs_sock.rx_mesg[iface].c, hbs_sock.rx_mesg[iface].f ) ;
|
rc = hbsInv.remove_pulse ( hostname, iface, hbs_sock.rx_mesg[iface].c, hbs_sock.rx_mesg[iface].f ) ;
|
||||||
}
|
}
|
||||||
snprintf (&str[0], MAX_LEN, "%s Pulse %s: (%5d): %17s:%5d: %u:%u:%x:%s\n",
|
snprintf (&str[0], MAX_LEN, "%s Pulse %s: (%5d): %s:%d: %u:%u:%x:%s\n",
|
||||||
get_iface_name_str(iface), extra.c_str(), n,
|
get_iface_name_str(iface), extra.c_str(), bytes,
|
||||||
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
|
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
|
||||||
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
|
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
|
||||||
hbs_sock.rx_mesg[iface].s,
|
hbs_sock.rx_mesg[iface].s,
|
||||||
hbs_sock.rx_mesg[iface].c,
|
hbs_sock.rx_mesg[iface].c,
|
||||||
hbs_sock.rx_mesg[iface].f,
|
hbs_sock.rx_mesg[iface].f,
|
||||||
hbs_sock.rx_mesg[iface].m);
|
hbs_sock.rx_mesg[iface].m);
|
||||||
mlog1 ("%s", &str[0]);
|
mlog ("%s", &str[0]);
|
||||||
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
mem_log (str);
|
mem_log (str);
|
||||||
|
#endif
|
||||||
if ( extra.empty())
|
if ( extra.empty())
|
||||||
{
|
{
|
||||||
detected_pulses++ ;
|
detected_pulses++ ;
|
||||||
}
|
}
|
||||||
|
/* don't save data from self */
|
||||||
|
if ( hostname != hbsInv.my_hostname )
|
||||||
|
{
|
||||||
|
if ( hbs_sock.rx_mesg[iface].v >= HBS_MESSAGE_VERSION )
|
||||||
|
{
|
||||||
|
if ( iface == MGMNT_IFACE )
|
||||||
|
hbs_cluster_save ( hostname, MTCE_HBS_NETWORK_MGMT , hbs_sock.rx_mesg[iface]);
|
||||||
|
else
|
||||||
|
hbs_cluster_save ( hostname, MTCE_HBS_NETWORK_INFRA , hbs_sock.rx_mesg[iface]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ilog ("skipping my hostname");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mlog3 ("%s Pulse Dis: (%5d): %17s:%5d: %d:%d:%x:%s\n",
|
mlog3 ("%s Pulse Dis: (%5d): %17s:%5d: %d:%d:%x:%s\n",
|
||||||
get_iface_name_str(iface), n,
|
get_iface_name_str(iface), bytes,
|
||||||
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
|
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
|
||||||
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
|
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
|
||||||
hbs_sock.rx_mesg[iface].s,
|
hbs_sock.rx_mesg[iface].s,
|
||||||
|
@ -934,7 +1017,7 @@ int _pulse_receive ( iface_enum iface , unsigned int seq_num )
|
||||||
hbs_sock.rx_mesg[iface].m) ;
|
hbs_sock.rx_mesg[iface].m) ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while ( n > 0 ) ;
|
} while ( bytes > 0 ) ;
|
||||||
monitor_scheduling ( after_rx_time, before_rx_time, detected_pulses, SCHED_MONITOR__RECEIVER );
|
monitor_scheduling ( after_rx_time, before_rx_time, detected_pulses, SCHED_MONITOR__RECEIVER );
|
||||||
return (detected_pulses);
|
return (detected_pulses);
|
||||||
}
|
}
|
||||||
|
@ -951,6 +1034,8 @@ int send_event ( string & hostname, unsigned int event_cmd, iface_enum iface )
|
||||||
if ( event_cmd == MTC_EVENT_HEARTBEAT_LOSS )
|
if ( event_cmd == MTC_EVENT_HEARTBEAT_LOSS )
|
||||||
{
|
{
|
||||||
daemon_dump_membuf_banner ();
|
daemon_dump_membuf_banner ();
|
||||||
|
hbsInv.print_node_info ();
|
||||||
|
hbs_cluster_log( hbsInv.my_hostname, "event");
|
||||||
daemon_dump_membuf ();
|
daemon_dump_membuf ();
|
||||||
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_loss_header());
|
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_heartbeat_loss_header());
|
||||||
}
|
}
|
||||||
|
@ -1038,6 +1123,9 @@ int daemon_init ( string iface, string nodetype )
|
||||||
/* Initialize the hbs control struct */
|
/* Initialize the hbs control struct */
|
||||||
MEMSET_ZERO ( hbs_ctrl );
|
MEMSET_ZERO ( hbs_ctrl );
|
||||||
|
|
||||||
|
/* init the utility module */
|
||||||
|
hbs_utils_init ();
|
||||||
|
|
||||||
/* initialize the timer */
|
/* initialize the timer */
|
||||||
mtcTimer_init ( hbsTimer, "controller", "heartbeat" );
|
mtcTimer_init ( hbsTimer, "controller", "heartbeat" );
|
||||||
|
|
||||||
|
@ -1091,9 +1179,123 @@ int daemon_init ( string iface, string nodetype )
|
||||||
return (rc);
|
return (rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_sm_handler
|
||||||
|
*
|
||||||
|
* Description: Try and receive a Service Management request from sm_server_sock
|
||||||
|
*
|
||||||
|
* Expecting request in the following form:
|
||||||
|
* ~66 bytes with moderate spacing
|
||||||
|
*
|
||||||
|
* {
|
||||||
|
* "origin" :"sm",
|
||||||
|
* "service":"heartbeat",
|
||||||
|
* "request":"cluster_info"
|
||||||
|
* "req_id" : number
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* Successfully parsed request results in a call to
|
||||||
|
* hbs_cluser_send which sends the latest snapshot of
|
||||||
|
* the heartbeat cluser info to SM.
|
||||||
|
*
|
||||||
|
* Assumptions: log flooding is avoided.
|
||||||
|
*
|
||||||
|
* Returns : Nothing
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
static int _hbs_sm_handler_log_throttle = 0 ;
|
||||||
|
void hbs_sm_handler ( void )
|
||||||
|
{
|
||||||
|
#define _MAX_MSG_LEN (80)
|
||||||
|
#define _MAX_LOG_CNT (1000)
|
||||||
|
|
||||||
|
#define PRIMARY_LABEL "origin"
|
||||||
|
#define SERVICE_LABEL "service"
|
||||||
|
#define REQUEST_LABEL "request"
|
||||||
|
#define REQID_LABEL "reqid"
|
||||||
|
|
||||||
|
#define SUPPORTED_ORIGIN "sm"
|
||||||
|
#define SUPPERTED_SERVICE "heartbeat"
|
||||||
|
#define SUPPORTED_REQUEST "cluster_info"
|
||||||
|
|
||||||
|
char sm_mesg[_MAX_MSG_LEN] ;
|
||||||
|
MEMSET_ZERO(sm_mesg);
|
||||||
|
int bytes = hbs_sock.sm_server_sock->read((char*)&sm_mesg, _MAX_MSG_LEN);
|
||||||
|
if ( bytes )
|
||||||
|
{
|
||||||
|
/* Expecting request in the following form:
|
||||||
|
* { "origin":"sm" ... } */
|
||||||
|
if ( sm_mesg[0] == '{' )
|
||||||
|
{
|
||||||
|
int reqid = 0 ;
|
||||||
|
string origin = "" ;
|
||||||
|
string service = "" ;
|
||||||
|
string request = "" ;
|
||||||
|
if ( jsonUtil_get_key_val ( sm_mesg, PRIMARY_LABEL, origin ) != PASS )
|
||||||
|
{
|
||||||
|
wlog_throttled ( _hbs_sm_handler_log_throttle, _MAX_LOG_CNT,
|
||||||
|
"missing primary label 'origin' in request.");
|
||||||
|
}
|
||||||
|
else if (( origin == SUPPORTED_ORIGIN ) &&
|
||||||
|
( jsonUtil_get_key_val ( sm_mesg, SERVICE_LABEL, service ) == PASS ) &&
|
||||||
|
( jsonUtil_get_key_val ( sm_mesg, REQUEST_LABEL, request ) == PASS ) &&
|
||||||
|
( jsonUtil_get_key_val_int ( sm_mesg, REQID_LABEL, reqid ) == PASS ))
|
||||||
|
{
|
||||||
|
if (( service == SUPPERTED_SERVICE ) &&
|
||||||
|
( request == SUPPORTED_REQUEST ))
|
||||||
|
{
|
||||||
|
/* success path ... */
|
||||||
|
hbs_cluster_send( hbs_sock.sm_client_sock, reqid );
|
||||||
|
|
||||||
|
/* reset log throttle */
|
||||||
|
_hbs_sm_handler_log_throttle = 0 ;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
wlog_throttled ( _hbs_sm_handler_log_throttle, _MAX_LOG_CNT,
|
||||||
|
"missing service or request labels in request.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
wlog_throttled ( _hbs_sm_handler_log_throttle, _MAX_LOG_CNT,
|
||||||
|
"failed to parse one or more request labels.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
wlog_throttled ( _hbs_sm_handler_log_throttle, _MAX_LOG_CNT,
|
||||||
|
"improperly formatted json string request.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( bytes == -1 )
|
||||||
|
{
|
||||||
|
wlog_throttled ( _hbs_sm_handler_log_throttle, _MAX_LOG_CNT,
|
||||||
|
"message receive error (%d:%s)",
|
||||||
|
errno, strerror(errno));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
wlog_throttled ( _hbs_sm_handler_log_throttle, _MAX_LOG_CNT,
|
||||||
|
"unknown error Error (rc:%d)", bytes );
|
||||||
|
}
|
||||||
|
dlog ("... %s", sm_mesg );
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : daemon_service_run
|
||||||
|
*
|
||||||
|
* Description: Daemon's main loop
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
void daemon_service_run ( void )
|
void daemon_service_run ( void )
|
||||||
{
|
{
|
||||||
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
int exp_pulses[MAX_IFACES] ;
|
int exp_pulses[MAX_IFACES] ;
|
||||||
|
#endif
|
||||||
int rc = PASS ;
|
int rc = PASS ;
|
||||||
int counter = 0 ;
|
int counter = 0 ;
|
||||||
int goenabled_wait_log_throttle = 0 ;
|
int goenabled_wait_log_throttle = 0 ;
|
||||||
|
@ -1154,6 +1356,8 @@ void daemon_service_run ( void )
|
||||||
daemon_exit ();
|
daemon_exit ();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* set this controller as provisioned */
|
||||||
|
hbs_manage_controller_state ( hbsInv.my_hostname , true );
|
||||||
|
|
||||||
/* CGTS 4114: Small Footprint: Alarm 200.005 remains active after connectivity restored
|
/* CGTS 4114: Small Footprint: Alarm 200.005 remains active after connectivity restored
|
||||||
*
|
*
|
||||||
|
@ -1195,6 +1399,16 @@ void daemon_service_run ( void )
|
||||||
/* enable the base level signal handler latency monitor */
|
/* enable the base level signal handler latency monitor */
|
||||||
daemon_latency_monitor (true);
|
daemon_latency_monitor (true);
|
||||||
|
|
||||||
|
/* load this controller index number - used for cluster stuff */
|
||||||
|
if ( hbsInv.my_hostname == CONTROLLER_0 )
|
||||||
|
controller_number = 0 ;
|
||||||
|
else
|
||||||
|
controller_number = 1 ;
|
||||||
|
|
||||||
|
/* tell the cluster which controller this is and
|
||||||
|
* how many networks are being monitored */
|
||||||
|
hbs_cluster_nums (controller_number,hbsInv.infra_network_provisioned ?2:1);
|
||||||
|
|
||||||
/* Run heartbeat service forever or until stop condition */
|
/* Run heartbeat service forever or until stop condition */
|
||||||
for ( hbsTimer.ring = false ; ; )
|
for ( hbsTimer.ring = false ; ; )
|
||||||
{
|
{
|
||||||
|
@ -1315,6 +1529,14 @@ void daemon_service_run ( void )
|
||||||
FD_SET(hbs_sock.mtc_to_hbs_sock->getFD(), &hbs_sock.readfds);
|
FD_SET(hbs_sock.mtc_to_hbs_sock->getFD(), &hbs_sock.readfds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Add the sm request receiver to the select list */
|
||||||
|
if (( hbs_sock.sm_server_sock ) &&
|
||||||
|
( hbs_sock.sm_server_sock->getFD()))
|
||||||
|
{
|
||||||
|
socks.push_front (hbs_sock.sm_server_sock->getFD());
|
||||||
|
FD_SET(hbs_sock.sm_server_sock->getFD(), &hbs_sock.readfds);
|
||||||
|
}
|
||||||
|
|
||||||
/* Add the netlink event listener to the select list */
|
/* Add the netlink event listener to the select list */
|
||||||
if ( hbs_sock.netlink_sock )
|
if ( hbs_sock.netlink_sock )
|
||||||
{
|
{
|
||||||
|
@ -1379,6 +1601,11 @@ void daemon_service_run ( void )
|
||||||
hbs_sock.fired[INFRA_INTERFACE] = true ;
|
hbs_sock.fired[INFRA_INTERFACE] = true ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((hbs_sock.sm_server_sock != NULL ) &&
|
||||||
|
( FD_ISSET(hbs_sock.sm_server_sock->getFD(), &hbs_sock.readfds)))
|
||||||
|
{
|
||||||
|
hbs_sm_handler();
|
||||||
|
}
|
||||||
if ((hbs_sock.mtc_to_hbs_sock != NULL ) &&
|
if ((hbs_sock.mtc_to_hbs_sock != NULL ) &&
|
||||||
( FD_ISSET(hbs_sock.mtc_to_hbs_sock->getFD(), &hbs_sock.readfds)))
|
( FD_ISSET(hbs_sock.mtc_to_hbs_sock->getFD(), &hbs_sock.readfds)))
|
||||||
{
|
{
|
||||||
|
@ -1404,7 +1631,7 @@ void daemon_service_run ( void )
|
||||||
inv.nodetype = msg.parm[0];
|
inv.nodetype = msg.parm[0];
|
||||||
hbsInv.add_heartbeat_host ( inv ) ;
|
hbsInv.add_heartbeat_host ( inv ) ;
|
||||||
hostname_inventory.push_back ( hostname );
|
hostname_inventory.push_back ( hostname );
|
||||||
ilog ("%s added to heartbeat service (%d)\n", hostname.c_str(), inv.nodetype );
|
ilog ("%s added to heartbeat service (%d)\n", hostname.c_str(), msg.parm[0] );
|
||||||
|
|
||||||
/* clear any outstanding alarms on the ADD */
|
/* clear any outstanding alarms on the ADD */
|
||||||
if ( hbsInv.hbs_failure_action != HBS_FAILURE_ACTION__NONE )
|
if ( hbsInv.hbs_failure_action != HBS_FAILURE_ACTION__NONE )
|
||||||
|
@ -1415,10 +1642,7 @@ void daemon_service_run ( void )
|
||||||
}
|
}
|
||||||
else if ( msg.cmd == MTC_CMD_DEL_HOST )
|
else if ( msg.cmd == MTC_CMD_DEL_HOST )
|
||||||
{
|
{
|
||||||
for ( int iface = 0 ; iface < MAX_IFACES ; iface++ )
|
hbsInv.mon_host ( hostname, false, false );
|
||||||
{
|
|
||||||
hbsInv.mon_host ( hostname, (iface_enum)iface, false, false );
|
|
||||||
}
|
|
||||||
hostname_inventory.remove ( hostname );
|
hostname_inventory.remove ( hostname );
|
||||||
hbsInv.del_host ( hostname );
|
hbsInv.del_host ( hostname );
|
||||||
ilog ("%s deleted from heartbeat service\n", hostname.c_str());
|
ilog ("%s deleted from heartbeat service\n", hostname.c_str());
|
||||||
|
@ -1432,27 +1656,24 @@ void daemon_service_run ( void )
|
||||||
}
|
}
|
||||||
else if ( msg.cmd == MTC_CMD_STOP_HOST )
|
else if ( msg.cmd == MTC_CMD_STOP_HOST )
|
||||||
{
|
{
|
||||||
for ( int iface = 0 ; iface < MAX_IFACES ; iface++ )
|
hbsInv.mon_host ( hostname, false, true );
|
||||||
{
|
hbs_cluster_del ( hostname );
|
||||||
hbsInv.mon_host ( hostname, (iface_enum)iface, false, true );
|
|
||||||
}
|
ilog ("%s stopping heartbeat service\n",
|
||||||
ilog ("%s stopping heartbeat service\n", hostname.c_str());
|
hostname.c_str());
|
||||||
}
|
}
|
||||||
else if ( msg.cmd == MTC_CMD_START_HOST )
|
else if ( msg.cmd == MTC_CMD_START_HOST )
|
||||||
{
|
{
|
||||||
for ( int iface = 0 ; iface < MAX_IFACES ; iface++ )
|
hbsInv.mon_host ( hostname, true, true );
|
||||||
{
|
hbs_cluster_add ( hostname );
|
||||||
hbsInv.mon_host ( hostname, (iface_enum)iface, true, true );
|
|
||||||
}
|
ilog ("%s starting heartbeat service\n",
|
||||||
ilog ("%s starting heartbeat service\n", hostname.c_str());
|
hostname.c_str());
|
||||||
}
|
}
|
||||||
else if ( msg.cmd == MTC_RESTART_HBS )
|
else if ( msg.cmd == MTC_RESTART_HBS )
|
||||||
{
|
{
|
||||||
for ( int iface = 0 ; iface < MAX_IFACES ; iface++ )
|
hbsInv.mon_host ( hostname, false, false );
|
||||||
{
|
hbsInv.mon_host ( hostname, true, false );
|
||||||
hbsInv.mon_host ( hostname, (iface_enum)iface, false, false );
|
|
||||||
hbsInv.mon_host ( hostname, (iface_enum)iface, true, false );
|
|
||||||
}
|
|
||||||
ilog ("%s restarting heartbeat service\n", hostname.c_str());
|
ilog ("%s restarting heartbeat service\n", hostname.c_str());
|
||||||
hbsInv.print_node_info();
|
hbsInv.print_node_info();
|
||||||
}
|
}
|
||||||
|
@ -1616,7 +1837,9 @@ void daemon_service_run ( void )
|
||||||
int rri = 0 ;
|
int rri = 0 ;
|
||||||
string lf = "\n" ;
|
string lf = "\n" ;
|
||||||
|
|
||||||
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
mem_log ((char*)lf.data());
|
mem_log ((char*)lf.data());
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Get the next Resource Reference Identifier
|
/* Get the next Resource Reference Identifier
|
||||||
* and its Resourvce Identifier. These values
|
* and its Resourvce Identifier. These values
|
||||||
|
@ -1630,7 +1853,9 @@ void daemon_service_run ( void )
|
||||||
if (( iface == INFRA_IFACE ) && ( hbsInv.infra_network_provisioned == false ))
|
if (( iface == INFRA_IFACE ) && ( hbsInv.infra_network_provisioned == false ))
|
||||||
continue ;
|
continue ;
|
||||||
|
|
||||||
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
exp_pulses[iface] =
|
exp_pulses[iface] =
|
||||||
|
#endif
|
||||||
hbsInv.hbs_expected_pulses[iface] =
|
hbsInv.hbs_expected_pulses[iface] =
|
||||||
hbsInv.create_pulse_list((iface_enum)iface);
|
hbsInv.create_pulse_list((iface_enum)iface);
|
||||||
|
|
||||||
|
@ -1759,28 +1984,33 @@ void daemon_service_run ( void )
|
||||||
if (( iface == INFRA_IFACE ) && ( hbsInv.infra_network_provisioned != true ))
|
if (( iface == INFRA_IFACE ) && ( hbsInv.infra_network_provisioned != true ))
|
||||||
continue ;
|
continue ;
|
||||||
|
|
||||||
#define MAX_LEN 1000
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
char str[MAX_LEN] ;
|
char str[MAX_LEN] ;
|
||||||
|
|
||||||
snprintf (&str[0], MAX_LEN, "%s Histogram: %d - %s\n",
|
snprintf (&str[0], MAX_LEN, "%s Histogram: %d - %s\n",
|
||||||
get_iface_name_str(iface),
|
get_iface_name_str(iface),
|
||||||
exp_pulses[iface],
|
exp_pulses[iface],
|
||||||
arrival_histogram[iface].c_str());
|
arrival_histogram[iface].c_str());
|
||||||
|
|
||||||
mem_log (str);
|
mem_log (str);
|
||||||
|
|
||||||
if ( !unexpected_pulse_list[iface].empty() )
|
if ( !unexpected_pulse_list[iface].empty() )
|
||||||
{
|
{
|
||||||
snprintf ( &str[0], MAX_LEN, "%s Others : %s\n",
|
snprintf ( &str[0], MAX_LEN, "%s Others : %s\n",
|
||||||
get_iface_name_str(iface),
|
get_iface_name_str(iface),
|
||||||
unexpected_pulse_list[iface].c_str());
|
unexpected_pulse_list[iface].c_str());
|
||||||
|
|
||||||
mem_log(str);
|
mem_log(str);
|
||||||
}
|
}
|
||||||
hbsInv.lost_pulses ( (iface_enum)iface );
|
#endif
|
||||||
|
/*
|
||||||
|
* Assume storage-0 is responding until otherwise proven
|
||||||
|
* its not. Keep in mind that the 'lost_pulses' interface
|
||||||
|
* only counts nodes that have not responded.
|
||||||
|
*/
|
||||||
|
bool storage_0_responding = true ;
|
||||||
|
int lost = hbsInv.lost_pulses ((iface_enum)iface, storage_0_responding);
|
||||||
|
hbs_cluster_update ((iface_enum)iface, lost, storage_0_responding);
|
||||||
}
|
}
|
||||||
hbsTimer.ring = false ;
|
hbsTimer.ring = false ;
|
||||||
heartbeat_request = true ;
|
heartbeat_request = true ;
|
||||||
|
// hbs_cluster_log ( hbsInv.my_hostname, "->") ;
|
||||||
seq_num++ ;
|
seq_num++ ;
|
||||||
}
|
}
|
||||||
daemon_load_fit ();
|
daemon_load_fit ();
|
||||||
|
@ -1796,7 +2026,9 @@ void daemon_dump_info ( void )
|
||||||
hbsInv.print_node_info ();
|
hbsInv.print_node_info ();
|
||||||
hbsInv.memDumpAllState ();
|
hbsInv.memDumpAllState ();
|
||||||
|
|
||||||
|
#ifdef WANT_HBS_MEM_LOGS
|
||||||
daemon_dump_membuf (); /* write mem_logs to log file and clear log list */
|
daemon_dump_membuf (); /* write mem_logs to log file and clear log list */
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
const char MY_DATA [100] = { "eieio\n" } ;
|
const char MY_DATA [100] = { "eieio\n" } ;
|
||||||
|
|
|
@ -27,6 +27,8 @@
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include "msgClass.h"
|
#include "msgClass.h"
|
||||||
|
#include "mtceHbsCluster.h"
|
||||||
|
#include "hbsCluster.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hbs_base
|
* @addtogroup hbs_base
|
||||||
|
@ -38,6 +40,8 @@
|
||||||
#endif
|
#endif
|
||||||
#define __AREA__ "hbs"
|
#define __AREA__ "hbs"
|
||||||
|
|
||||||
|
// #define WANT_CLUSTER_DEBUG
|
||||||
|
|
||||||
#define ALIGN_PACK(x) __attribute__((packed)) x
|
#define ALIGN_PACK(x) __attribute__((packed)) x
|
||||||
|
|
||||||
/** Maximum service fail count before action */
|
/** Maximum service fail count before action */
|
||||||
|
@ -56,15 +60,18 @@ const char rsp_msg_header [HBS_HEADER_SIZE+1] = {"cgts pulse rsp:"};
|
||||||
|
|
||||||
#define HBS_MAX_MSG (HBS_HEADER_SIZE+MAX_CHARS_HOSTNAME)
|
#define HBS_MAX_MSG (HBS_HEADER_SIZE+MAX_CHARS_HOSTNAME)
|
||||||
|
|
||||||
|
#define HBS_MESSAGE_VERSION (1) // 0 -> 1 with intro of cluster info
|
||||||
|
|
||||||
/* Heartbeat control structure */
|
/* Heartbeat control structure */
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
unsigned int nodetype ;
|
unsigned int nodetype ;
|
||||||
bool clear_alarms ;
|
bool clear_alarms ;
|
||||||
} hbs_ctrl_type ;
|
} hbs_ctrl_type ;
|
||||||
|
hbs_ctrl_type * get_hbs_ctrl_ptr ( void );
|
||||||
|
|
||||||
/* A heartbeat service message
|
/* A heartbeat service message
|
||||||
* if this structire is changed then
|
* if this structure is changed then
|
||||||
* hbs_pulse_request needs to be looked at
|
* hbs_pulse_request needs to be looked at
|
||||||
*/
|
*/
|
||||||
typedef struct
|
typedef struct
|
||||||
|
@ -76,7 +83,7 @@ typedef struct
|
||||||
unsigned int s ;
|
unsigned int s ;
|
||||||
|
|
||||||
/* Fast Lookup Clue Info */
|
/* Fast Lookup Clue Info */
|
||||||
unsigned int c ;
|
unsigned int c ;
|
||||||
|
|
||||||
/* Status Flags
|
/* Status Flags
|
||||||
* ------------
|
* ------------
|
||||||
|
@ -89,6 +96,16 @@ typedef struct
|
||||||
/** message version number */
|
/** message version number */
|
||||||
unsigned int v ;
|
unsigned int v ;
|
||||||
|
|
||||||
|
/** Heartbeat cluster information that is put into heartbeat messages.
|
||||||
|
*
|
||||||
|
* Pulse Request : To hbsClient: Only 1 controller with up to 2 network types history.
|
||||||
|
* Pulse Response: From hbsClient: Can include up to 2 controllers with 2 networks each.
|
||||||
|
*
|
||||||
|
* This addition requires message verison increment.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
mtce_hbs_cluster_type cluster ;
|
||||||
|
|
||||||
} ALIGN_PACK(hbs_message_type) ;
|
} ALIGN_PACK(hbs_message_type) ;
|
||||||
|
|
||||||
|
|
||||||
|
@ -104,6 +121,12 @@ typedef struct
|
||||||
/** Heartbeat Service Event Transmit Interface - hbsClient -> mtcAgent */
|
/** Heartbeat Service Event Transmit Interface - hbsClient -> mtcAgent */
|
||||||
msgClassSock* hbs_ready_tx_sock;
|
msgClassSock* hbs_ready_tx_sock;
|
||||||
|
|
||||||
|
/** Heartbeat Service SM Transmit Interface - hbsAgent -> sm */
|
||||||
|
msgClassSock* sm_client_sock;
|
||||||
|
|
||||||
|
/** Heartbeat Service SM Receive Interface - sm -> hbsAgent */
|
||||||
|
msgClassSock* sm_server_sock;
|
||||||
|
|
||||||
/** PMON Pulse Receive Interface - pmond -> hbsClient */
|
/** PMON Pulse Receive Interface - pmond -> hbsClient */
|
||||||
msgClassSock* pmon_pulse_sock;
|
msgClassSock* pmon_pulse_sock;
|
||||||
|
|
||||||
|
@ -166,6 +189,9 @@ int hbs_refresh_pids ( std::list<procList> & proc_list );
|
||||||
int hbs_process_monitor ( std::list<procList> & pmon_list );
|
int hbs_process_monitor ( std::list<procList> & pmon_list );
|
||||||
int hbs_self_recovery ( unsigned int cmd );
|
int hbs_self_recovery ( unsigned int cmd );
|
||||||
|
|
||||||
|
/* returns this controller's number ; 0 or 1 */
|
||||||
|
unsigned int hbs_get_controller_number ( void );
|
||||||
|
|
||||||
/* Setup the pulse messaging interfaces
|
/* Setup the pulse messaging interfaces
|
||||||
* 'p' is a boot that indicates if the infrastructure network is provisioned
|
* 'p' is a boot that indicates if the infrastructure network is provisioned
|
||||||
* 'p' = true means it is provisioned */
|
* 'p' = true means it is provisioned */
|
||||||
|
@ -184,6 +210,93 @@ int hbs_self_recovery ( unsigned int cmd );
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*********** Common Heartbeat Utilities in hbsUtil.cpp ***************/
|
||||||
|
|
||||||
|
/* module init */
|
||||||
|
void hbs_utils_init ( void );
|
||||||
|
|
||||||
|
/* network enum to name lookup */
|
||||||
|
string hbs_cluster_network_name ( mtce_hbs_network_enum network );
|
||||||
|
|
||||||
|
/* Produce formatted clog's that characterize current and changing cluster
|
||||||
|
* history for a given network. Each log is controller/network specific. */
|
||||||
|
void hbs_cluster_log ( string & hostname, mtce_hbs_cluster_type & cluster, string prefix );
|
||||||
|
|
||||||
|
/* Initialize the specified history array */
|
||||||
|
void hbs_cluster_history_init ( mtce_hbs_cluster_history_type & history );
|
||||||
|
|
||||||
|
/* Clear all history in the cluster vault */
|
||||||
|
void hbs_cluster_history_clear( mtce_hbs_cluster_type & cluster );
|
||||||
|
|
||||||
|
|
||||||
|
/******** Heartbeat Agent Cluster Functions in hbsCluster.cpp ********/
|
||||||
|
|
||||||
|
/* Set the cluster vault to default state.
|
||||||
|
* Called upon daemon init or heartbeat period change. */
|
||||||
|
void hbs_cluster_init ( unsigned short period );
|
||||||
|
|
||||||
|
/* Calculate number of bytes that is unused in the cluster data structure.
|
||||||
|
* Primarily to know how many history elements are missing. */
|
||||||
|
unsigned short hbs_cluster_unused_bytes ( void );
|
||||||
|
|
||||||
|
/* Add and delete hosts from the monitored list.
|
||||||
|
* Automatically adjusts the numbers in the cluster vault. */
|
||||||
|
void hbs_cluster_add ( string & hostname );
|
||||||
|
void hbs_cluster_del ( string & hostname );
|
||||||
|
|
||||||
|
/* Report status of storgate-0 */
|
||||||
|
void hbs_cluster_storage0_status ( iface_enum iface , bool responding );
|
||||||
|
|
||||||
|
/* Look for and clog changes in cluster state */
|
||||||
|
int hbs_cluster_cmp ( hbs_message_type & msg );
|
||||||
|
|
||||||
|
/* Manage the enabled state of the controllers */
|
||||||
|
void hbs_manage_controller_state ( string & hostname, bool enabled );
|
||||||
|
|
||||||
|
/* Set the number of monitored hosts and this controller's
|
||||||
|
* number in the cluster vault. */
|
||||||
|
void hbs_cluster_nums ( unsigned short this_controller,
|
||||||
|
unsigned short monitored_networks );
|
||||||
|
|
||||||
|
/* Copy/Save the peer controller's cluster info from the hbsClient's
|
||||||
|
* pulse response into the cluster vault so its there and ready for
|
||||||
|
* an SM cluster_info request. */
|
||||||
|
int hbs_cluster_save ( string & hostname,
|
||||||
|
mtce_hbs_network_enum network,
|
||||||
|
hbs_message_type & msg );
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Called by the hbsAgent pulse receiver to create a network specific
|
||||||
|
* history update entry consisting of
|
||||||
|
*
|
||||||
|
* 1. the number of monitored hosts
|
||||||
|
* 2. how many of those that responded in the last heartbeat period.
|
||||||
|
* 3. threshold storage-0 responding count and manage that state in that
|
||||||
|
* networks history header.
|
||||||
|
*/
|
||||||
|
void hbs_cluster_update ( iface_enum iface,
|
||||||
|
unsigned short not_responding_hosts,
|
||||||
|
bool storage_0_responding );
|
||||||
|
|
||||||
|
/* Called by the hbsAgent pulse transmitter to append this controllers
|
||||||
|
* running cluster view in the next multicast pulse request.
|
||||||
|
* The hbsClient is expected to loop this data and any other like data from
|
||||||
|
* the other controller back in its response. */
|
||||||
|
void hbs_cluster_append ( hbs_message_type & msg );
|
||||||
|
|
||||||
|
/* Produce formatted clog's that characterize current and changing cluster
|
||||||
|
* history for a given network. Each log is controller/network specific. */
|
||||||
|
void hbs_cluster_log ( string & hostname, string prefix );
|
||||||
|
|
||||||
|
/* Service SM cluster info request */
|
||||||
|
void hbs_sm_handler ( void );
|
||||||
|
|
||||||
|
/* send the cluster vault to SM */
|
||||||
|
void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid );
|
||||||
|
|
||||||
|
/* print the contents of the vault */
|
||||||
|
void hbs_cluster_dump ( mtce_hbs_cluster_type & vault );
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @} hbs_base
|
* @} hbs_base
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
* daemon_files_init
|
* daemon_files_init
|
||||||
* daemon_configure
|
* daemon_configure
|
||||||
* daemon_signal_init
|
* daemon_signal_init
|
||||||
* hbs_message_init
|
|
||||||
* hbs_socket_init
|
* hbs_socket_init
|
||||||
*
|
*
|
||||||
* daemon_service_run
|
* daemon_service_run
|
||||||
|
@ -59,7 +58,7 @@ using namespace std;
|
||||||
#include "daemon_option.h" /* Common options for daemons */
|
#include "daemon_option.h" /* Common options for daemons */
|
||||||
#include "nodeTimers.h" /* for ... maintenance timers */
|
#include "nodeTimers.h" /* for ... maintenance timers */
|
||||||
#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */
|
#include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */
|
||||||
#include "nlEvent.h" /* for ... open_netlink_socket */
|
#include "nlEvent.h" /* for ... open_netlink_socket */
|
||||||
#include "hbsBase.h" /* Heartbeat Base Header File */
|
#include "hbsBase.h" /* Heartbeat Base Header File */
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
|
@ -95,8 +94,9 @@ typedef struct
|
||||||
std::list<procList>::iterator proc_ptr ;
|
std::list<procList>::iterator proc_ptr ;
|
||||||
} stallMon_type ;
|
} stallMon_type ;
|
||||||
|
|
||||||
|
static char pulse_resp_tx_hdr [HBS_MAX_MSG];
|
||||||
static char my_hostname [MAX_HOST_NAME_SIZE+1];
|
static char my_hostname [MAX_HOST_NAME_SIZE+1];
|
||||||
|
static char my_hostname_length ;
|
||||||
static string my_macaddr = "" ;
|
static string my_macaddr = "" ;
|
||||||
static string my_address = "" ;
|
static string my_address = "" ;
|
||||||
static unsigned int my_nodetype= CGTS_NODE_NULL ;
|
static unsigned int my_nodetype= CGTS_NODE_NULL ;
|
||||||
|
@ -360,6 +360,12 @@ static int hbs_config_handler ( void * user,
|
||||||
config_ptr->pmon_pulse_port = atoi(value);
|
config_ptr->pmon_pulse_port = atoi(value);
|
||||||
config_ptr->mask |= CONFIG_CLIENT_PULSE_PORT ;
|
config_ptr->mask |= CONFIG_CLIENT_PULSE_PORT ;
|
||||||
}
|
}
|
||||||
|
#ifdef WANT_CLUSTER_DEBUG
|
||||||
|
else if (MATCH("agent", "sm_client_port"))
|
||||||
|
{
|
||||||
|
config_ptr->sm_client_port = atoi(value);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return (PASS);
|
return (PASS);
|
||||||
|
@ -446,20 +452,6 @@ int daemon_configure ( void )
|
||||||
/* Initialization Utilities */
|
/* Initialization Utilities */
|
||||||
/****************************/
|
/****************************/
|
||||||
|
|
||||||
/* Initialize the unicast pulse response message */
|
|
||||||
/* One time thing ; tx same message all the time. */
|
|
||||||
int hbs_message_init ( void )
|
|
||||||
{
|
|
||||||
/* Build the transmit pulse response message for each interface */
|
|
||||||
for ( int i = 0 ; i < MAX_IFACES ; i++ )
|
|
||||||
{
|
|
||||||
memset ( &hbs_sock.tx_mesg[i], 0, sizeof (hbs_message_type));
|
|
||||||
memcpy ( &hbs_sock.tx_mesg[i].m[0], &rsp_msg_header[0], HBS_HEADER_SIZE );
|
|
||||||
memcpy ( &hbs_sock.tx_mesg[i].m[HBS_HEADER_SIZE], my_hostname, strlen(my_hostname));
|
|
||||||
}
|
|
||||||
return (PASS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize pulse messaging for the specified interface
|
/* Initialize pulse messaging for the specified interface
|
||||||
* This is called by a macro defined in hbsBase.h */
|
* This is called by a macro defined in hbsBase.h */
|
||||||
int _setup_pulse_messaging ( iface_enum i, int rmem )
|
int _setup_pulse_messaging ( iface_enum i, int rmem )
|
||||||
|
@ -621,6 +613,11 @@ int hbs_socket_init ( void )
|
||||||
return (FAIL_SOCKET_NOBLOCK);
|
return (FAIL_SOCKET_NOBLOCK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef WANT_CLUSTER_DEBUG
|
||||||
|
hbs_sock.sm_client_sock = new msgClassRx(LOOPBACK_IP,hbs_config.sm_client_port,IPPROTO_UDP);
|
||||||
|
if ( rc ) return (rc) ;
|
||||||
|
hbs_sock.sm_client_sock->sock_ok(true);
|
||||||
|
#endif
|
||||||
return (PASS);
|
return (PASS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -648,7 +645,7 @@ int get_pmon_pulses ( void )
|
||||||
if ( !strncmp ( &msg.hdr[0] , get_pmond_pulse_header(), MSG_HEADER_SIZE ))
|
if ( !strncmp ( &msg.hdr[0] , get_pmond_pulse_header(), MSG_HEADER_SIZE ))
|
||||||
{
|
{
|
||||||
pulses++ ;
|
pulses++ ;
|
||||||
mlog ("Pmon Pulse (%s) (%d)\n", msg.hdr, pulses );
|
mlog1 ("Pmon Pulse (%s) (%d)\n", msg.hdr, pulses );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -710,92 +707,87 @@ static unsigned int my_rri = 0 ;
|
||||||
static int rx_error_count[MAX_IFACES] = {0,0} ;
|
static int rx_error_count[MAX_IFACES] = {0,0} ;
|
||||||
static int tx_error_count[MAX_IFACES] = {0,0} ;
|
static int tx_error_count[MAX_IFACES] = {0,0} ;
|
||||||
|
|
||||||
|
#define ERROR_LOG_THRESHOLD (200)
|
||||||
|
|
||||||
int _service_pulse_request ( iface_enum iface , unsigned int flags )
|
int _service_pulse_request ( iface_enum iface , unsigned int flags )
|
||||||
{
|
{
|
||||||
unsigned int s = 0 ; /* Sequence number */
|
|
||||||
int n = 0 ; /* message size */
|
|
||||||
int rc = 0 ;
|
|
||||||
|
|
||||||
if (( iface != MGMNT_IFACE ) && ( iface != INFRA_IFACE ))
|
if (( iface != MGMNT_IFACE ) && ( iface != INFRA_IFACE ))
|
||||||
return (FAIL_BAD_CASE);
|
return (FAIL_BAD_CASE);
|
||||||
|
|
||||||
memset ( (char*) &hbs_sock.rx_mesg[iface], 0, sizeof(hbs_message_type));
|
|
||||||
if ( ! hbs_sock.rx_sock[iface] )
|
if ( ! hbs_sock.rx_sock[iface] )
|
||||||
{
|
{
|
||||||
elog ("cannot receive from null rx_mesg[%s] socket\n", get_iface_name_str(iface) );
|
elog_throttled ( rx_error_count[iface], ERROR_LOG_THRESHOLD,
|
||||||
|
"cannot receive from null rx_mesg[%s] socket\n",
|
||||||
|
get_iface_name_str(iface) );
|
||||||
return (FAIL_TO_RECEIVE);
|
return (FAIL_TO_RECEIVE);
|
||||||
}
|
}
|
||||||
else if ( hbs_sock.rx_sock[iface]->sock_ok() == false )
|
else if ( ! hbs_sock.tx_sock[iface] )
|
||||||
{
|
{
|
||||||
elog ("cannot receive from failed rx_mesg[%s] socket\n", get_iface_name_str(iface) );
|
elog_throttled ( tx_error_count[iface], ERROR_LOG_THRESHOLD,
|
||||||
|
"cannot send to null mesg[%s] socket\n",
|
||||||
|
get_iface_name_str(iface) );
|
||||||
|
return (FAIL_TO_TRANSMIT);
|
||||||
|
}
|
||||||
|
else if ( ! hbs_sock.rx_sock[iface]->sock_ok() )
|
||||||
|
{
|
||||||
|
elog_throttled ( rx_error_count[iface], ERROR_LOG_THRESHOLD,
|
||||||
|
"cannot receive from failed rx_mesg[%s] socket\n",
|
||||||
|
get_iface_name_str(iface) );
|
||||||
return (FAIL_TO_RECEIVE);
|
return (FAIL_TO_RECEIVE);
|
||||||
}
|
}
|
||||||
|
else if ( ! hbs_sock.tx_sock[iface]->sock_ok() )
|
||||||
n = hbs_sock.rx_sock[iface]->read((char*)&hbs_sock.rx_mesg[iface], sizeof(hbs_message_type));
|
|
||||||
|
|
||||||
if( n < HBS_HEADER_SIZE )
|
|
||||||
{
|
{
|
||||||
rx_error_count[iface]++ ;
|
elog_throttled ( tx_error_count[iface], ERROR_LOG_THRESHOLD,
|
||||||
|
"cannot send to failed mesg[%s] socket\n",
|
||||||
|
get_iface_name_str(iface) );
|
||||||
|
return (FAIL_TO_TRANSMIT);
|
||||||
|
}
|
||||||
|
|
||||||
/* throtle the log so that if they come back-to-back we avoid flooding */
|
// MEMSET_ZERO(hbs_sock.rx_mesg[iface]);
|
||||||
if ( n == -1 )
|
int rx_bytes = hbs_sock.rx_sock[iface]->read((char*)&hbs_sock.rx_mesg[iface], sizeof(hbs_message_type));
|
||||||
|
if ( rx_bytes < HBS_HEADER_SIZE )
|
||||||
|
{
|
||||||
|
if ( rx_bytes == -1 )
|
||||||
{
|
{
|
||||||
if ( rx_error_count[iface] > 1 )
|
wlog_throttled ( rx_error_count[iface], ERROR_LOG_THRESHOLD,
|
||||||
{
|
"%s receive error (%d:%m)\n",
|
||||||
wlog_throttled ( rx_error_count[iface], 500, "%s receive error (%d:%m)\n", get_iface_name_str(iface), errno );
|
get_iface_name_str(iface), errno );
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
wlog_throttled ( rx_error_count[iface], 500, "%s message underrun (expected %ld but got %d)\n",
|
wlog_throttled ( rx_error_count[iface], ERROR_LOG_THRESHOLD,
|
||||||
get_iface_name_str(iface), sizeof(hbs_message_type), n );
|
"%s message underrun (expected %ld but got %d)\n",
|
||||||
}
|
get_iface_name_str(iface),
|
||||||
if ( rx_error_count[iface] == 100 )
|
sizeof(hbs_message_type), rx_bytes );
|
||||||
{
|
|
||||||
wlog ( "%s is getting a lot of receive errors (%d:%m)\n", get_iface_name_str(iface), errno );
|
|
||||||
}
|
}
|
||||||
return (FAIL_TO_RECEIVE);
|
return (FAIL_TO_RECEIVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Clear the error count since we got a good receive */
|
daemon_config_type * cfg_ptr = daemon_get_cfg_ptr();
|
||||||
rx_error_count[iface] = 0 ;
|
if ( cfg_ptr->debug_msg )
|
||||||
|
|
||||||
#ifdef WANT_NO_SELF_HEARTBEAT_REPLY
|
|
||||||
/* Don't reply to the heartbeat if the request came from myself */
|
|
||||||
if ( ! strncmp ( my_address.data(),
|
|
||||||
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
|
|
||||||
MAX_CHARS_IN_IP_ADDR ))
|
|
||||||
{
|
{
|
||||||
ilog ("%s Refusing to send heartbeat response to self\n", hbs_sock.rx_sock[iface]->get_dst_addr()->toString());
|
mlog ("\n");
|
||||||
return (PASS);
|
mlog ("%s Pulse Req: %s:%5d: %d:%s RRI:%d\n",
|
||||||
|
get_iface_name_str(iface),
|
||||||
|
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
|
||||||
|
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
|
||||||
|
hbs_sock.rx_mesg[iface].s,
|
||||||
|
hbs_sock.rx_mesg[iface].m,
|
||||||
|
hbs_sock.rx_mesg[iface].c);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
/* We use this to monitor pmond on active controller */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Save the sequence number */
|
|
||||||
s = hbs_sock.rx_mesg[iface].s ;
|
|
||||||
|
|
||||||
mlog ("\n");
|
|
||||||
mlog ("%s Pulse Req: %s:%5d: %d: :%s RRI:%d\n", get_iface_name_str(iface),
|
|
||||||
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
|
|
||||||
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
|
|
||||||
hbs_sock.rx_mesg[iface].s,
|
|
||||||
hbs_sock.rx_mesg[iface].m,
|
|
||||||
hbs_sock.rx_mesg[iface].c);
|
|
||||||
|
|
||||||
|
/* verify the message header */
|
||||||
if ( strncmp ( (const char *)&hbs_sock.rx_mesg[iface].m, (const char *)&req_msg_header, HBS_HEADER_SIZE ))
|
if ( strncmp ( (const char *)&hbs_sock.rx_mesg[iface].m, (const char *)&req_msg_header, HBS_HEADER_SIZE ))
|
||||||
{
|
{
|
||||||
wlog_throttled ( rx_error_count[iface], 200, "%s Invalid header (%d:%s)\n",
|
wlog_throttled ( rx_error_count[iface], ERROR_LOG_THRESHOLD,
|
||||||
get_iface_name_str(iface),
|
"%s Invalid header (%d:%s)\n",
|
||||||
hbs_sock.rx_mesg[iface].s,
|
get_iface_name_str(iface),
|
||||||
hbs_sock.rx_mesg[iface].m );
|
hbs_sock.rx_mesg[iface].s,
|
||||||
|
hbs_sock.rx_mesg[iface].m );
|
||||||
mlog ("Detected: %d <%s>\n", HBS_HEADER_SIZE,hbs_sock.rx_mesg[iface].m);
|
|
||||||
mlog ("Expected: %d <%s>\n", HBS_HEADER_SIZE,req_msg_header);
|
|
||||||
return (FAIL_MSG_HEADER) ;
|
return (FAIL_MSG_HEADER) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Manage the Resource Reference Index (RRI) "lookup clue" */
|
/* Manage the Resource Reference Index (RRI) "lookup clue" */
|
||||||
if ( ! strncmp ( &hbs_sock.rx_mesg[iface].m[HBS_HEADER_SIZE], &my_hostname[0], MAX_CHARS_HOSTNAME ))
|
if ( ! strncmp ( &hbs_sock.rx_mesg[iface].m[HBS_HEADER_SIZE], &my_hostname[0], MAX_CHARS_HOSTNAME ))
|
||||||
{
|
{
|
||||||
|
@ -807,32 +799,31 @@ int _service_pulse_request ( iface_enum iface , unsigned int flags )
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add my RRI to the response message */
|
/* Add my RRI to the response message */
|
||||||
hbs_sock.tx_mesg[iface].c = my_rri ;
|
hbs_sock.rx_mesg[iface].c = my_rri ;
|
||||||
|
|
||||||
/* Clear struct */
|
/* Manage OOB flags */
|
||||||
hbs_sock.tx_mesg[iface].s = s ;
|
hbs_sock.rx_mesg[iface].f = flags ;
|
||||||
hbs_sock.tx_mesg[iface].f = flags ;
|
|
||||||
if ( pmonPulse_counter )
|
if ( pmonPulse_counter )
|
||||||
{
|
{
|
||||||
hbs_sock.tx_mesg[iface].f |= ( PMOND_FLAG ) ;
|
hbs_sock.rx_mesg[iface].f |= ( PMOND_FLAG ) ;
|
||||||
}
|
}
|
||||||
if ( infra_network_provisioned == true )
|
if ( infra_network_provisioned == true )
|
||||||
{
|
{
|
||||||
hbs_sock.tx_mesg[iface].f |= INFRA_FLAG ;
|
hbs_sock.rx_mesg[iface].f |= INFRA_FLAG ;
|
||||||
}
|
}
|
||||||
|
|
||||||
n = (int)sizeof(hbs_message_type) ;
|
#define WANT_CLUSTER_INFO_LOG
|
||||||
|
#ifdef WANT_CLUSTER_INFO_LOG
|
||||||
if ( ! hbs_sock.tx_sock[iface] )
|
/* Log the received cluster info */
|
||||||
|
if ( hbs_sock.rx_mesg[iface].v >= HBS_MESSAGE_VERSION )
|
||||||
{
|
{
|
||||||
elog ("cannot send to null tx_mesg[%s] socket\n", get_iface_name_str(iface) );
|
char str[100] ;
|
||||||
return (FAIL_TO_TRANSMIT);
|
// hbs_cluster_log (hbs_sock.rx_mesg[iface].cluster, hbs_sock.rx_mesg[iface].s );
|
||||||
}
|
snprintf ( &str[0], 100, " seq %6d with %d bytes from %s ", hbs_sock.rx_mesg[iface].s, rx_bytes, get_iface_name_str(iface));
|
||||||
else if ( hbs_sock.tx_sock[iface]->sock_ok() == false )
|
string hostname = my_hostname ;
|
||||||
{
|
hbs_cluster_log ( hostname, hbs_sock.rx_mesg[iface].cluster, str );
|
||||||
elog ("cannot send to failed tx_mesg[%s] socket\n", get_iface_name_str(iface) );
|
|
||||||
return (FAIL_TO_TRANSMIT);
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef WANT_PULSE_RESPONSE_FIT
|
#ifdef WANT_PULSE_RESPONSE_FIT
|
||||||
if (( iface == INFRA_IFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_INFRA_RSP )))
|
if (( iface == INFRA_IFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_INFRA_RSP )))
|
||||||
|
@ -848,44 +839,69 @@ int _service_pulse_request ( iface_enum iface , unsigned int flags )
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Send pulse response message with sequence number, flags and resource referecen index */
|
int rc = PASS ;
|
||||||
rc = hbs_sock.tx_sock[iface]->reply(hbs_sock.rx_sock[iface],(char*)&hbs_sock.tx_mesg[iface], n);
|
|
||||||
if ( rc == -1 )
|
/* replace the request header with the response header */
|
||||||
|
memcpy ( &hbs_sock.rx_mesg[iface].m[0], &pulse_resp_tx_hdr[0], HBS_MAX_MSG );
|
||||||
|
|
||||||
|
/* Deal with the cluster info if it exists.
|
||||||
|
* ... Introduced in messaging version 1 */
|
||||||
|
if ( hbs_sock.rx_mesg[iface].v >= HBS_MESSAGE_VERSION )
|
||||||
{
|
{
|
||||||
elog ("Failed to sendto socket %d through %s:%d len:%d (%s) (%d:%s)\n",
|
if ( hbs_sock.rx_mesg[iface].cluster.version < MTCE_HBS_CLUSTER_VERSION )
|
||||||
hbs_sock.tx_sock[iface]->getFD(),
|
{
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
ilog ("Bad cluster verison (%d)", hbs_sock.rx_mesg[iface].cluster.version);
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(),
|
}
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->getSockLen(),
|
// if ( hbs_sock.rx_mesg[iface].cluster.revision != MTCE_HBS_CLUSTER_REVISION )
|
||||||
get_iface_name_str(iface), errno, strerror(errno));
|
// {
|
||||||
|
// ilog ("Bad cluster revision (%d)", hbs_sock.rx_mesg[iface].cluster.revision);
|
||||||
|
// }
|
||||||
|
|
||||||
|
/* Add peer controller cluster data to this controller's response */
|
||||||
|
// hbs_cluster_loop(hbs_sock.rx_mesg[iface]);
|
||||||
}
|
}
|
||||||
else if ( rc != n)
|
|
||||||
|
/* send pulse response message */
|
||||||
|
int tx_bytes = hbs_sock.tx_sock[iface]->reply(hbs_sock.rx_sock[iface],(char*)&hbs_sock.rx_mesg[iface], rx_bytes);
|
||||||
|
if ( tx_bytes == -1 )
|
||||||
{
|
{
|
||||||
/* Avoid log flooding
|
elog_throttled ( tx_error_count[iface], ERROR_LOG_THRESHOLD,
|
||||||
elog ("unicast send failed. (%d)\n", rc); */
|
"pulse tx failed %d:%s:%d len:%d (%s) (%d:%s)\n",
|
||||||
wlog_throttled ( tx_error_count[iface], 200,
|
hbs_sock.tx_sock[iface]->getFD(),
|
||||||
"%s Pulse Rsp: %d:%d bytes < %d:%s > to <%s>\n",
|
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
||||||
get_iface_name_str(iface), n, rc,
|
hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(),
|
||||||
hbs_sock.tx_mesg[iface].s,
|
hbs_sock.tx_sock[iface]->get_dst_addr()->getSockLen(),
|
||||||
&hbs_sock.tx_mesg[iface].m[0],
|
get_iface_name_str(iface), errno, strerror(errno));
|
||||||
|
}
|
||||||
|
else if ( tx_bytes != rx_bytes)
|
||||||
|
{
|
||||||
|
wlog_throttled ( tx_error_count[iface], ERROR_LOG_THRESHOLD,
|
||||||
|
"%s Pulse Rsp: %d:%d bytes < %d:%s >",
|
||||||
|
get_iface_name_str(iface), rx_bytes, tx_bytes,
|
||||||
|
hbs_sock.rx_mesg[iface].s,
|
||||||
&hbs_sock.rx_mesg[iface].m[0]);
|
&hbs_sock.rx_mesg[iface].m[0]);
|
||||||
return (rc);
|
rc = FAIL_DATA_SIZE ;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mlog ("%s Pulse Rsp: %s:%5d: %d:%d:%s RRI:%d (%d)\n",
|
mlog ("%s Pulse Rsp: %s:%5d: %d:%d:%s RRI:%d (%d:%d:%d)\n",
|
||||||
get_iface_name_str(iface),
|
get_iface_name_str(iface),
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
|
||||||
hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(),
|
hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(),
|
||||||
hbs_sock.tx_mesg[iface].s,
|
hbs_sock.rx_mesg[iface].s,
|
||||||
hbs_sock.tx_mesg[iface].f,
|
hbs_sock.rx_mesg[iface].f,
|
||||||
hbs_sock.tx_mesg[iface].m,
|
hbs_sock.rx_mesg[iface].m,
|
||||||
hbs_sock.tx_mesg[iface].c,
|
hbs_sock.rx_mesg[iface].c,
|
||||||
pmonPulse_counter);
|
pmonPulse_counter, rx_bytes, tx_bytes);
|
||||||
/* Clear the error count since we got a good transmit */
|
|
||||||
tx_error_count[iface] = 0 ;
|
|
||||||
}
|
}
|
||||||
return PASS;
|
|
||||||
|
/* Clear the error count since we got a good receive */
|
||||||
|
if ( rx_error_count[iface] )
|
||||||
|
rx_error_count[iface] = 0 ;
|
||||||
|
if ( tx_error_count[iface] )
|
||||||
|
tx_error_count[iface] = 0 ;
|
||||||
|
|
||||||
|
return rc ;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef WANT_FIT_TESTING
|
#ifdef WANT_FIT_TESTING
|
||||||
|
@ -968,6 +984,9 @@ int daemon_init ( string iface, string nodeType_str )
|
||||||
/* Initialize socket construct and pointer to it */
|
/* Initialize socket construct and pointer to it */
|
||||||
memset ( &hbs_sock, 0, sizeof(hbs_sock));
|
memset ( &hbs_sock, 0, sizeof(hbs_sock));
|
||||||
|
|
||||||
|
/* init the utility module */
|
||||||
|
hbs_utils_init ();
|
||||||
|
|
||||||
/* Defaults */
|
/* Defaults */
|
||||||
hbs_config.stall_pmon_thld = -1 ;
|
hbs_config.stall_pmon_thld = -1 ;
|
||||||
hbs_config.stall_mon_period = MTC_HRS_8 ;
|
hbs_config.stall_mon_period = MTC_HRS_8 ;
|
||||||
|
@ -1025,12 +1044,6 @@ int daemon_init ( string iface, string nodeType_str )
|
||||||
rc = FAIL_DAEMON_CONFIG ;
|
rc = FAIL_DAEMON_CONFIG ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Init the heartbeat transmit pulse response message */
|
|
||||||
else if ( hbs_message_init () != PASS )
|
|
||||||
{
|
|
||||||
elog ("Failed to initialize pulse response message\n");
|
|
||||||
rc = FAIL_MESSAGE_INIT ;
|
|
||||||
}
|
|
||||||
/* Setup the heartbeat service messaging sockets */
|
/* Setup the heartbeat service messaging sockets */
|
||||||
else if ( hbs_socket_init () != PASS )
|
else if ( hbs_socket_init () != PASS )
|
||||||
{
|
{
|
||||||
|
@ -1119,6 +1132,11 @@ void daemon_service_run ( void )
|
||||||
ilog ("Sending Heartbeat Ready Event\n");
|
ilog ("Sending Heartbeat Ready Event\n");
|
||||||
hbs_send_event ( MTC_EVENT_MONITOR_READY );
|
hbs_send_event ( MTC_EVENT_MONITOR_READY );
|
||||||
|
|
||||||
|
my_hostname_length = strlen(my_hostname) ;
|
||||||
|
memset ( &pulse_resp_tx_hdr[0], 0, HBS_MAX_MSG );
|
||||||
|
memcpy ( &pulse_resp_tx_hdr[0], &rsp_msg_header[0], HBS_HEADER_SIZE );
|
||||||
|
memcpy ( &pulse_resp_tx_hdr[HBS_HEADER_SIZE], my_hostname, my_hostname_length );
|
||||||
|
|
||||||
/* Run heartbeat service forever or until stop condition */
|
/* Run heartbeat service forever or until stop condition */
|
||||||
for ( ; ; )
|
for ( ; ; )
|
||||||
{
|
{
|
||||||
|
@ -1153,7 +1171,9 @@ void daemon_service_run ( void )
|
||||||
FD_SET(hbs_sock.pmon_pulse_sock->getFD(),&hbs_sock.readfds);
|
FD_SET(hbs_sock.pmon_pulse_sock->getFD(),&hbs_sock.readfds);
|
||||||
FD_SET(hbs_sock.amon_socket, &hbs_sock.readfds);
|
FD_SET(hbs_sock.amon_socket, &hbs_sock.readfds);
|
||||||
FD_SET(hbs_sock.netlink_sock, &hbs_sock.readfds);
|
FD_SET(hbs_sock.netlink_sock, &hbs_sock.readfds);
|
||||||
|
#ifdef WANT_CLUSTER_DEBUG
|
||||||
|
FD_SET(hbs_sock.sm_client_sock->getFD(), &hbs_sock.readfds);
|
||||||
|
#endif
|
||||||
rc = select( socks.back()+1,
|
rc = select( socks.back()+1,
|
||||||
&hbs_sock.readfds, NULL, NULL,
|
&hbs_sock.readfds, NULL, NULL,
|
||||||
&hbs_sock.waitd);
|
&hbs_sock.waitd);
|
||||||
|
@ -1176,6 +1196,19 @@ void daemon_service_run ( void )
|
||||||
/* Only service sockets for the rc > 0 case */
|
/* Only service sockets for the rc > 0 case */
|
||||||
else if ( rc )
|
else if ( rc )
|
||||||
{
|
{
|
||||||
|
#ifdef WANT_CLUSTER_DEBUG
|
||||||
|
if ( hbs_sock.sm_client_sock && FD_ISSET(hbs_sock.sm_client_sock->getFD(), &hbs_sock.readfds ) )
|
||||||
|
{
|
||||||
|
mtce_hbs_cluster_type msg ;
|
||||||
|
/* Receive event messages */
|
||||||
|
memset ( &msg , 0, sizeof(mtce_hbs_cluster_type));
|
||||||
|
int bytes = hbs_sock.sm_client_sock->read((char*)&msg, sizeof(mtce_hbs_cluster_type));
|
||||||
|
if ( bytes )
|
||||||
|
{
|
||||||
|
hbs_cluster_dump (msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (hbs_sock.rx_sock[MGMNT_IFACE]&&FD_ISSET(hbs_sock.rx_sock[MGMNT_IFACE]->getFD(), &hbs_sock.readfds))
|
if (hbs_sock.rx_sock[MGMNT_IFACE]&&FD_ISSET(hbs_sock.rx_sock[MGMNT_IFACE]->getFD(), &hbs_sock.readfds))
|
||||||
{
|
{
|
||||||
/* Receive pulse request and send a response */
|
/* Receive pulse request and send a response */
|
||||||
|
|
|
@ -0,0 +1,748 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* @file Maintenance Heartbeat Agent Cluster Manager Module
|
||||||
|
*
|
||||||
|
*************************************************************************
|
||||||
|
*
|
||||||
|
* This module provides the heartbeat cluster implementation member
|
||||||
|
* functions that the hbsAgent service calls to collect, store and
|
||||||
|
* send heartbeat cluster information to SM upon request.
|
||||||
|
*
|
||||||
|
* See mtceHbsCluster.h for formal API between SM and Mtce.
|
||||||
|
*
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include "nodeBase.h" /* common maintenance constructs and definitions */
|
||||||
|
#include "daemon_common.h" /* common daemon constructs and definitions */
|
||||||
|
#include "hbsBase.h" /* mtce heartbeat constructs and definitions */
|
||||||
|
|
||||||
|
/* Error log throttle counter. */
|
||||||
|
#define THROTTLE_COUNT (500)
|
||||||
|
|
||||||
|
/* Private Heartbeat Cluster Control Structure. */
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
/* Contains the controller number (0 or 1) for this controller. */
|
||||||
|
unsigned short this_controller ;
|
||||||
|
|
||||||
|
/* Preserves which controllers are enabled. */
|
||||||
|
bool controller_0_enabled ;
|
||||||
|
bool controller_1_enabled ;
|
||||||
|
#ifdef THREE_CONTROLLER_SYSTEM
|
||||||
|
bool controller_2_enabled ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Used to prevent log flooding in presence of back to back errors. */
|
||||||
|
unsigned int log_throttle ;
|
||||||
|
|
||||||
|
/* Used to threshold storage-0 not responding state */
|
||||||
|
unsigned int storage_0_not_responding_count[MTCE_HBS_NETWORKS];
|
||||||
|
|
||||||
|
/* Contains the number of monitored networks in the system.
|
||||||
|
* Management only = 1
|
||||||
|
* Management and Inrastructure = 2 */
|
||||||
|
unsigned short monitored_networks ;
|
||||||
|
|
||||||
|
/* This contains the current number of heartbeat enabled hosts.
|
||||||
|
*
|
||||||
|
* Used to improve performance.
|
||||||
|
*
|
||||||
|
* Performance: This value is included in each history entry so
|
||||||
|
* rather than do the size calculation of monitored_hostname_list
|
||||||
|
* each time, this variable is updated from monitored_hostname_list
|
||||||
|
* after each add/del operation. */
|
||||||
|
unsigned short monitored_hosts ;
|
||||||
|
|
||||||
|
/* List of host names being monitored. */
|
||||||
|
std::list<string>monitored_hostname_list ;
|
||||||
|
|
||||||
|
/* The working heartbeat cluster data vault. */
|
||||||
|
mtce_hbs_cluster_type cluster ;
|
||||||
|
|
||||||
|
} hbs_cluster_ctrl_type ;
|
||||||
|
|
||||||
|
/* Cluster control structire construct allocation. */
|
||||||
|
static hbs_cluster_ctrl_type ctrl ;
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_init
|
||||||
|
*
|
||||||
|
* Description : Initialize the cluster structure to default values.
|
||||||
|
*
|
||||||
|
* Assumtions : Called by hbsAgent.cpp before entering the main loop.
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_cluster_init ( unsigned short period )
|
||||||
|
{
|
||||||
|
ctrl.monitored_hosts = 0;
|
||||||
|
ctrl.monitored_hostname_list.clear();
|
||||||
|
|
||||||
|
/* Init the cluster - header. */
|
||||||
|
ctrl.cluster.version = MTCE_HBS_CLUSTER_VERSION ;
|
||||||
|
ctrl.cluster.revision = MTCE_HBS_CLUSTER_REVISION ;
|
||||||
|
ctrl.cluster.magic_number = MTCE_HBS_MAGIC_NUMBER ;
|
||||||
|
|
||||||
|
/* Init the cluster - global / dynamic data. */
|
||||||
|
ctrl.cluster.reqid = 0 ;
|
||||||
|
ctrl.cluster.period_msec = period ;
|
||||||
|
ctrl.cluster.storage0_enabled = false ;
|
||||||
|
ctrl.cluster.histories = 0 ;
|
||||||
|
ctrl.cluster.bytes = BYTES_IN_CLUSTER_VAULT(ctrl.cluster.histories);
|
||||||
|
|
||||||
|
/* The storage-0 thresholding counter for each network. */
|
||||||
|
for ( int n = 0 ; n < MTCE_HBS_NETWORKS ; n++ )
|
||||||
|
ctrl.storage_0_not_responding_count[n] = 0 ;
|
||||||
|
|
||||||
|
for ( int h = 0 ; h < MTCE_HBS_MAX_HISTORY_ELEMENTS ; h++ )
|
||||||
|
hbs_cluster_history_init ( ctrl.cluster.history[h] );
|
||||||
|
|
||||||
|
ilog ("Cluster Info: v%d.%d sig:%x bytes:%d (%ld)",
|
||||||
|
ctrl.cluster.version,
|
||||||
|
ctrl.cluster.revision,
|
||||||
|
ctrl.cluster.magic_number,
|
||||||
|
ctrl.cluster.bytes,
|
||||||
|
sizeof(mtce_hbs_cluster_history_type));
|
||||||
|
|
||||||
|
ctrl.log_throttle = 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_nums
|
||||||
|
*
|
||||||
|
* Description : Set this controller number and the number of monitored
|
||||||
|
* networks in this system.
|
||||||
|
*
|
||||||
|
* These values do not change without a process restart.
|
||||||
|
*
|
||||||
|
* Assumtions : Called by hbsAgent.cpp before entering the main loop.
|
||||||
|
*
|
||||||
|
* Returns : None
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_cluster_nums ( unsigned short this_controller,
|
||||||
|
unsigned short monitored_networks )
|
||||||
|
{
|
||||||
|
ctrl.this_controller = this_controller ;
|
||||||
|
ctrl.monitored_networks = monitored_networks ;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : log_monitored_hosts_list
|
||||||
|
*
|
||||||
|
* Description : Log the list of monitored hosts.
|
||||||
|
* Typically done on a list change.
|
||||||
|
*
|
||||||
|
* Returns : None
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void log_monitored_hosts_list ( void )
|
||||||
|
{
|
||||||
|
std::list<string>::iterator iter_ptr ;
|
||||||
|
string list = "" ;
|
||||||
|
for ( iter_ptr = ctrl.monitored_hostname_list.begin() ;
|
||||||
|
iter_ptr != ctrl.monitored_hostname_list.end() ;
|
||||||
|
iter_ptr++ )
|
||||||
|
{
|
||||||
|
list.append (*(iter_ptr));
|
||||||
|
list.append (" ");
|
||||||
|
}
|
||||||
|
ilog ("cluster of %ld: %s",
|
||||||
|
ctrl.monitored_hostname_list.size(),
|
||||||
|
list.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : cluster_storage0_state
|
||||||
|
*
|
||||||
|
* Description : Record the heartbeat monitoring state of storage-0.
|
||||||
|
*
|
||||||
|
* Parameters : true if storage-0 heartbeating is in the 'started' state.
|
||||||
|
* false if storage-0 heartbeating is in the 'stopped' state.
|
||||||
|
*
|
||||||
|
* Returns : None
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void cluster_storage0_state ( bool enabled )
|
||||||
|
{
|
||||||
|
if ( ctrl.cluster.storage0_enabled != enabled )
|
||||||
|
{
|
||||||
|
ctrl.cluster.storage0_enabled = enabled ;
|
||||||
|
ilog ("storage-0 heartbeat state changed to %s",
|
||||||
|
enabled ? "enabled" : "disabled" );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_manage_controller_state
|
||||||
|
*
|
||||||
|
* Description : Track the monitored enabled state of the controllers.
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_manage_controller_state ( string & hostname, bool enabled )
|
||||||
|
{
|
||||||
|
/* track controller state */
|
||||||
|
if ( hostname == CONTROLLER_0 )
|
||||||
|
{
|
||||||
|
ctrl.controller_0_enabled = enabled ;
|
||||||
|
}
|
||||||
|
else if ( hostname == CONTROLLER_1 )
|
||||||
|
{
|
||||||
|
ctrl.controller_1_enabled = enabled ;
|
||||||
|
}
|
||||||
|
#ifdef THREE_CONTROLLER_SYSTEM
|
||||||
|
else if ( hostname == CONTROLLER_2 )
|
||||||
|
{
|
||||||
|
ctrl.controller_2_enabled = enabled ;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_add
|
||||||
|
*
|
||||||
|
* Description : Add the specified hostname to the enabled hosts list.
|
||||||
|
*
|
||||||
|
* Updates : hostname is added to monitored_hostname_list
|
||||||
|
*
|
||||||
|
* If added host is storage-0 then update its enabled status.
|
||||||
|
* if added host is a controller then update controller state.
|
||||||
|
*
|
||||||
|
* Parameters : hostname string
|
||||||
|
*
|
||||||
|
* Updates : monitored_hostname_list
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_cluster_add ( string & hostname )
|
||||||
|
{
|
||||||
|
/* Consider using 'unique' after instead of remove before update. */
|
||||||
|
ctrl.monitored_hostname_list.remove(hostname) ;
|
||||||
|
ctrl.monitored_hostname_list.push_back(hostname) ;
|
||||||
|
ctrl.monitored_hosts = (unsigned short)ctrl.monitored_hostname_list.size();
|
||||||
|
|
||||||
|
/* Manage storage-0 state */
|
||||||
|
if ( hostname == STORAGE_0 )
|
||||||
|
{
|
||||||
|
cluster_storage0_state ( true );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we get down to 0 monitored hosts then just start fresh */
|
||||||
|
if (( ctrl.monitored_hosts ) == 0 )
|
||||||
|
{
|
||||||
|
hbs_cluster_init ( ctrl.cluster.period_msec );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Manage controller state ; true means enabled in this case. */
|
||||||
|
hbs_manage_controller_state ( hostname, true );
|
||||||
|
|
||||||
|
ilog ("%s added to cluster", hostname.c_str());
|
||||||
|
|
||||||
|
log_monitored_hosts_list ();
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_del
|
||||||
|
*
|
||||||
|
* Description : Delete the specified hostname from the enabled hosts list.
|
||||||
|
*
|
||||||
|
* Updates : hostname is removed from monitored_hostname_list
|
||||||
|
*
|
||||||
|
* If added host is storage-0 then update its enabled status.
|
||||||
|
* if added host is a controller then update controller count.
|
||||||
|
*
|
||||||
|
* Parameters : hostname string
|
||||||
|
*
|
||||||
|
* Updates : monitored_hostname_list
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_cluster_del ( string & hostname )
|
||||||
|
{
|
||||||
|
ctrl.monitored_hostname_list.remove(hostname) ;
|
||||||
|
ctrl.monitored_hosts = (unsigned short)ctrl.monitored_hostname_list.size();
|
||||||
|
|
||||||
|
/* Manage storage-0 state. */
|
||||||
|
if ( hostname == STORAGE_0 )
|
||||||
|
{
|
||||||
|
cluster_storage0_state ( false );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we get down to 0 monitored hosts then just start fresh */
|
||||||
|
if (( ctrl.monitored_hosts ) == 0 )
|
||||||
|
{
|
||||||
|
hbs_cluster_init ( ctrl.cluster.period_msec );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Manage controller state ; false means not enabled in this case. */
|
||||||
|
hbs_manage_controller_state ( hostname , false );
|
||||||
|
|
||||||
|
ilog ("%s deleted from cluster", hostname.c_str());
|
||||||
|
|
||||||
|
log_monitored_hosts_list ();
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_update
|
||||||
|
*
|
||||||
|
* Description : Update this controller's cluster info for the specified
|
||||||
|
* network with
|
||||||
|
*
|
||||||
|
* 1. The number of enabled hosts.
|
||||||
|
* 2. The number of responding hosts.
|
||||||
|
* 3. The oldest history index in the rotational history fifo.
|
||||||
|
* 4. Maintain a back to back non-responding count for storage-0.
|
||||||
|
* Once the count reaches the minimum threshold of
|
||||||
|
* STORAGE_0_NR_THRESHOLD then the specific network history
|
||||||
|
* is updated to indicate storgae-0 is not responding. Once
|
||||||
|
* storage-0 starts responding again with a single response
|
||||||
|
* then that network history is updated to indicate storage-0
|
||||||
|
* is responding.
|
||||||
|
*
|
||||||
|
* Assumptions : Converts heartbeat interface number to cluster network number.
|
||||||
|
*
|
||||||
|
* Parameters : heartbeat interface number ( iface_enum )
|
||||||
|
* network index
|
||||||
|
* number of not responding hosts for this interval
|
||||||
|
*
|
||||||
|
* Updates : This and last history as well as storage-0 not responding
|
||||||
|
* count.
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#define STORAGE_0_NR_THRESHOLD (4)
|
||||||
|
|
||||||
|
void hbs_cluster_update ( iface_enum iface,
|
||||||
|
unsigned short not_responding_hosts,
|
||||||
|
bool storage_0_responding )
|
||||||
|
{
|
||||||
|
if ( ctrl.monitored_hosts == 0 )
|
||||||
|
return ;
|
||||||
|
|
||||||
|
/* convert heartbeat iface enum to cluster network enum. */
|
||||||
|
mtce_hbs_network_enum n ;
|
||||||
|
if ( iface == MGMNT_IFACE )
|
||||||
|
n = MTCE_HBS_NETWORK_MGMT ;
|
||||||
|
else if ( iface == INFRA_IFACE )
|
||||||
|
n = MTCE_HBS_NETWORK_INFRA ;
|
||||||
|
#ifdef MONITORED_OAM_NETWORK
|
||||||
|
else if ( iface == OAM_IFACE )
|
||||||
|
n = MTCE_HBS_NETWORK_OAM ;
|
||||||
|
#endif
|
||||||
|
else
|
||||||
|
return ;
|
||||||
|
|
||||||
|
if ( not_responding_hosts )
|
||||||
|
{
|
||||||
|
clog1 ("controller-%d %s enabled:%d not responding:%d",
|
||||||
|
ctrl.this_controller,
|
||||||
|
hbs_cluster_network_name(n).c_str(),
|
||||||
|
ctrl.monitored_hosts,
|
||||||
|
not_responding_hosts);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
clog1 ("controller-%d %s has %d monitored hosts and all are responding",
|
||||||
|
ctrl.this_controller,
|
||||||
|
hbs_cluster_network_name(n).c_str(),
|
||||||
|
ctrl.monitored_hosts);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Look-up active history array for this network combination */
|
||||||
|
mtce_hbs_cluster_history_type * history_ptr = NULL ;
|
||||||
|
GET_CLUSTER_HISTORY_PTR(ctrl.cluster, ctrl.this_controller ,n);
|
||||||
|
if ( history_ptr == NULL )
|
||||||
|
{
|
||||||
|
if ( ctrl.cluster.histories >= MTCE_HBS_MAX_HISTORY_ELEMENTS )
|
||||||
|
{
|
||||||
|
/* Should never happen but if it does then log without floooding */
|
||||||
|
wlog_throttled ( ctrl.log_throttle, THROTTLE_COUNT,
|
||||||
|
"Unable to store history beyond %d ",
|
||||||
|
ctrl.cluster.histories );
|
||||||
|
return ;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Adding a new history slot. */
|
||||||
|
history_ptr = &ctrl.cluster.history[ctrl.cluster.histories] ;
|
||||||
|
ctrl.cluster.histories++ ;
|
||||||
|
ctrl.cluster.bytes = BYTES_IN_CLUSTER_VAULT(ctrl.cluster.histories);
|
||||||
|
history_ptr->controller = ctrl.this_controller ;
|
||||||
|
history_ptr->network = n ;
|
||||||
|
|
||||||
|
/* Log new network history as its being started. */
|
||||||
|
ilog ("controller-%d %s network history add",
|
||||||
|
ctrl.this_controller,
|
||||||
|
hbs_cluster_network_name(n).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Manage storage-0 status. */
|
||||||
|
if ( ctrl.cluster.storage0_enabled )
|
||||||
|
{
|
||||||
|
/* Handle storage-0 status change from not responding to responding. */
|
||||||
|
if ( storage_0_responding == true )
|
||||||
|
{
|
||||||
|
if (history_ptr->storage0_responding == false)
|
||||||
|
{
|
||||||
|
history_ptr->storage0_responding = true ;
|
||||||
|
ilog ("controller-%d %s heartbeat ; storage-0 is ok",
|
||||||
|
ctrl.this_controller,
|
||||||
|
hbs_cluster_network_name(n).c_str());
|
||||||
|
}
|
||||||
|
if (ctrl.storage_0_not_responding_count[n])
|
||||||
|
ctrl.storage_0_not_responding_count[n] = 0 ;
|
||||||
|
}
|
||||||
|
/* Count the storage-0 not responding case for this network. */
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ctrl.storage_0_not_responding_count[n]++ ;
|
||||||
|
if ( ctrl.storage_0_not_responding_count[n] == 2 )
|
||||||
|
{
|
||||||
|
ilog ("controller-%d %s heartbeat ; storage-0 has 2 misses",
|
||||||
|
ctrl.this_controller,
|
||||||
|
hbs_cluster_network_name(n).c_str() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle storage-0 status change from responding to not responding. */
|
||||||
|
if (( history_ptr->storage0_responding == true ) &&
|
||||||
|
( ctrl.storage_0_not_responding_count[n] >= STORAGE_0_NR_THRESHOLD ))
|
||||||
|
{
|
||||||
|
history_ptr->storage0_responding = false ;
|
||||||
|
ilog ("controller-%d %s heartbeat ; storage-0 is not responding",
|
||||||
|
ctrl.this_controller,
|
||||||
|
hbs_cluster_network_name(n).c_str() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Typical path for storage-0 disabled or normal non-storage system case */
|
||||||
|
if ( history_ptr->storage0_responding == true )
|
||||||
|
history_ptr->storage0_responding = false ;
|
||||||
|
|
||||||
|
/* Handle clearing threshold count when storage-0 is not enabled. */
|
||||||
|
if ( ctrl.storage_0_not_responding_count[n] )
|
||||||
|
ctrl.storage_0_not_responding_count[n] = 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Manage the history entry index.
|
||||||
|
*
|
||||||
|
* Get the previous entry index ...
|
||||||
|
* ... which is the one before the oldest index.
|
||||||
|
* ... which is the index for the next entry.
|
||||||
|
*/
|
||||||
|
unsigned short last_entry_index ;
|
||||||
|
if ( history_ptr->oldest_entry_index == 0 )
|
||||||
|
{
|
||||||
|
/* Go to the end of the array. */
|
||||||
|
last_entry_index = MTCE_HBS_HISTORY_ENTRIES-1 ;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Otherwise, the previous index in the array */
|
||||||
|
last_entry_index = history_ptr->oldest_entry_index - 1 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update the history with this data. */
|
||||||
|
history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled = ctrl.monitored_hosts ;
|
||||||
|
history_ptr->entry[history_ptr->oldest_entry_index].hosts_responding = ctrl.monitored_hosts - not_responding_hosts ;
|
||||||
|
|
||||||
|
if (( history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled !=
|
||||||
|
history_ptr->entry[ last_entry_index].hosts_enabled ) ||
|
||||||
|
( history_ptr->entry[history_ptr->oldest_entry_index].hosts_responding !=
|
||||||
|
history_ptr->entry[ last_entry_index].hosts_responding))
|
||||||
|
{
|
||||||
|
/* Only log on change events. */
|
||||||
|
if ( history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled ==
|
||||||
|
history_ptr->entry[history_ptr->oldest_entry_index].hosts_responding )
|
||||||
|
{
|
||||||
|
ilog ("controller-%d %s cluster of %d is healthy",
|
||||||
|
ctrl.this_controller,
|
||||||
|
hbs_cluster_network_name(n).c_str(),
|
||||||
|
history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ilog ("controller-%d %s cluster of %d with %d responding",
|
||||||
|
ctrl.this_controller,
|
||||||
|
hbs_cluster_network_name(n).c_str(),
|
||||||
|
history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled,
|
||||||
|
history_ptr->entry[history_ptr->oldest_entry_index].hosts_responding);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Increment the entries count till it reaches the max. */
|
||||||
|
if ( history_ptr->entries < MTCE_HBS_HISTORY_ENTRIES )
|
||||||
|
history_ptr->entries++ ;
|
||||||
|
|
||||||
|
/* Manage the next entry update index ; aka the oldest index. */
|
||||||
|
if ( history_ptr->oldest_entry_index == (MTCE_HBS_HISTORY_ENTRIES-1))
|
||||||
|
history_ptr->oldest_entry_index = 0 ;
|
||||||
|
else
|
||||||
|
history_ptr->oldest_entry_index++ ;
|
||||||
|
|
||||||
|
/* clear the log throttle if we are updating history ok. */
|
||||||
|
ctrl.log_throttle = 0 ;
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_append
|
||||||
|
*
|
||||||
|
* Description : Add this controller's cluster info to this pulse
|
||||||
|
* request message.
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_cluster_append ( hbs_message_type & msg )
|
||||||
|
{
|
||||||
|
unsigned short c = ctrl.this_controller ;
|
||||||
|
|
||||||
|
CHECK_CTRL_NTWK_PARMS(c, ctrl.monitored_networks);
|
||||||
|
|
||||||
|
msg.cluster.version = ctrl.cluster.version ;
|
||||||
|
msg.cluster.revision = ctrl.cluster.revision ;
|
||||||
|
msg.cluster.magic_number = ctrl.cluster.magic_number ;
|
||||||
|
msg.cluster.period_msec = ctrl.cluster.period_msec ;
|
||||||
|
msg.cluster.storage0_enabled = ctrl.cluster.storage0_enabled ;
|
||||||
|
msg.cluster.histories = ctrl.cluster.histories ;
|
||||||
|
|
||||||
|
int bytes = BYTES_IN_CLUSTER_VAULT(ctrl.monitored_networks);
|
||||||
|
|
||||||
|
clog1 ("controller-%d appending cluster info to heartbeat message (%d:%d:%d)",
|
||||||
|
c, ctrl.monitored_networks, ctrl.cluster.histories, bytes );
|
||||||
|
|
||||||
|
/* Copy the cluster into the message. */
|
||||||
|
memcpy( &msg.cluster.history[0], &ctrl.cluster.history[c], bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_unused_bytes
|
||||||
|
*
|
||||||
|
* Descrition : Used to set how much data to send in the heartbeat pulse
|
||||||
|
* requests.
|
||||||
|
*
|
||||||
|
* Returns : The number of bytes that are not used in the full
|
||||||
|
* history array cluster structure.
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
unsigned short hbs_cluster_unused_bytes ( void )
|
||||||
|
{
|
||||||
|
if ( ctrl.cluster.histories <= MTCE_HBS_MAX_HISTORY_ELEMENTS )
|
||||||
|
{
|
||||||
|
unsigned short tmp = MTCE_HBS_MAX_HISTORY_ELEMENTS - ctrl.cluster.histories ;
|
||||||
|
return((unsigned short)(sizeof(mtce_hbs_cluster_history_type)*tmp)) ;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_send
|
||||||
|
*
|
||||||
|
* Description: Send the cluster vault to SM.
|
||||||
|
*
|
||||||
|
* Returns : Nothing
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/* NOTE: All code wrapped in this directive will be removed once
|
||||||
|
* active/active heartbeating is delivered in next update */
|
||||||
|
#define WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS
|
||||||
|
|
||||||
|
void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid )
|
||||||
|
{
|
||||||
|
|
||||||
|
#ifdef WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS
|
||||||
|
|
||||||
|
/* To assist SM with duplex integration ...
|
||||||
|
*
|
||||||
|
* This code emulates heartbeat redundancy by duplicating
|
||||||
|
* controller history up to the number of provisioned
|
||||||
|
* controllers until active-active heartbeat is delivered.
|
||||||
|
*/
|
||||||
|
int peer_controller ;
|
||||||
|
bool copy_cluster = false ;
|
||||||
|
if ( ctrl.this_controller == 0 )
|
||||||
|
{
|
||||||
|
peer_controller = 1 ;
|
||||||
|
if ( ctrl.controller_1_enabled )
|
||||||
|
{
|
||||||
|
copy_cluster = true ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
peer_controller = 0 ;
|
||||||
|
if ( ctrl.controller_0_enabled )
|
||||||
|
{
|
||||||
|
copy_cluster = true ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int n, networks = ctrl.cluster.histories ;
|
||||||
|
if ( copy_cluster )
|
||||||
|
{
|
||||||
|
for ( n = 0 ; n < networks ; n++ )
|
||||||
|
{
|
||||||
|
/* copy this controller history to create peer controller */
|
||||||
|
ctrl.cluster.history[ctrl.cluster.histories] = ctrl.cluster.history[n] ;
|
||||||
|
|
||||||
|
/* update the controller */
|
||||||
|
ctrl.cluster.history[ctrl.cluster.histories].controller = peer_controller ;
|
||||||
|
ctrl.cluster.bytes += sizeof(mtce_hbs_cluster_history_type) ;
|
||||||
|
ctrl.cluster.histories++ ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS
|
||||||
|
|
||||||
|
ctrl.cluster.reqid = (unsigned short)reqid ;
|
||||||
|
if (( sm_client_sock ) && ( sm_client_sock->sock_ok() == true ))
|
||||||
|
{
|
||||||
|
int len = sizeof(mtce_hbs_cluster_type)-hbs_cluster_unused_bytes();
|
||||||
|
int bytes = sm_client_sock->write((char*)&ctrl.cluster, len);
|
||||||
|
if ( bytes <= 0 )
|
||||||
|
{
|
||||||
|
elog ("failed to send cluster vault to SM (bytes=%d) (%d:%s)\n",
|
||||||
|
bytes , errno, strerror(errno));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ilog ("heartbeat cluster vault sent to SM (%d bytes)", len );
|
||||||
|
hbs_cluster_dump ( ctrl.cluster );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS
|
||||||
|
|
||||||
|
if ( copy_cluster )
|
||||||
|
{
|
||||||
|
/* Clear out the other controllers data. */
|
||||||
|
for ( n = networks ; n > 0 ; n-- )
|
||||||
|
{
|
||||||
|
/* copy c0 history to another controller */
|
||||||
|
hbs_cluster_history_init(ctrl.cluster.history[ctrl.cluster.histories-1]);
|
||||||
|
ctrl.cluster.bytes -= sizeof(mtce_hbs_cluster_history_type);
|
||||||
|
ctrl.cluster.histories-- ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void hbs_cluster_log ( string & hostname, string prefix )
|
||||||
|
{
|
||||||
|
hbs_cluster_log ( hostname, ctrl.cluster, prefix );
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Active Active Heartbeating and Debug Member Functions
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_cmp
|
||||||
|
*
|
||||||
|
* Descrition : Performs a sanity check over the cluster structure.
|
||||||
|
*
|
||||||
|
* Assumptions : Debug tool, not called at runtime.
|
||||||
|
*
|
||||||
|
* Returns : PASS or FAIL
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
int hbs_cluster_cmp( hbs_message_type & msg )
|
||||||
|
{
|
||||||
|
if ( msg.cluster.version < ctrl.cluster.version )
|
||||||
|
{
|
||||||
|
wlog ("Unexpected version (%d:%d)",
|
||||||
|
msg.cluster.version, ctrl.cluster.version );
|
||||||
|
}
|
||||||
|
else if ( msg.cluster.revision != ctrl.cluster.revision )
|
||||||
|
{
|
||||||
|
wlog ("Unexpected revision (%d:%d)",
|
||||||
|
msg.cluster.revision, ctrl.cluster.revision );
|
||||||
|
}
|
||||||
|
else if ( msg.cluster.magic_number != ctrl.cluster.magic_number )
|
||||||
|
{
|
||||||
|
wlog ("Unexpected magic number (%d:%d)",
|
||||||
|
msg.cluster.magic_number, ctrl.cluster.magic_number );
|
||||||
|
}
|
||||||
|
else if ( msg.cluster.period_msec != ctrl.cluster.period_msec )
|
||||||
|
{
|
||||||
|
wlog ("Cluster Heartbeat period delta (%d:%d)",
|
||||||
|
msg.cluster.period_msec, ctrl.cluster.period_msec );
|
||||||
|
}
|
||||||
|
else if ( msg.cluster.storage0_enabled != ctrl.cluster.storage0_enabled )
|
||||||
|
{
|
||||||
|
wlog ("Cluster storage0 enabled state delta (%d:%d)",
|
||||||
|
msg.cluster.storage0_enabled, ctrl.cluster.storage0_enabled );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return (PASS);
|
||||||
|
}
|
||||||
|
return (FAIL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_save
|
||||||
|
*
|
||||||
|
* Descrition : Copies the other controllers information from msg into
|
||||||
|
* the cluster.
|
||||||
|
*
|
||||||
|
* NOTE: Does not do that right now.
|
||||||
|
*
|
||||||
|
* Assumptions : Place holder until active/active heartbeating is implemented.
|
||||||
|
*
|
||||||
|
* Returns : PASS or FAIL
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
int hbs_cluster_save ( string & hostname,
|
||||||
|
mtce_hbs_network_enum network,
|
||||||
|
hbs_message_type & msg )
|
||||||
|
{
|
||||||
|
// clog ("Add cluster info from peer controller");
|
||||||
|
if ( ctrl.monitored_hosts )
|
||||||
|
{
|
||||||
|
/* compare cluster info and log deltas */
|
||||||
|
// hbs_cluster_cmp( msg );
|
||||||
|
UNUSED(msg);
|
||||||
|
hbs_cluster_log( hostname, ctrl.cluster, hbs_cluster_network_name(network) );
|
||||||
|
}
|
||||||
|
return (PASS);
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* @file StarlingX Maintenance Heartbeat Cluster Manager Module
|
||||||
|
*
|
||||||
|
*************************************************************************
|
||||||
|
*
|
||||||
|
* This module provides API for the hbsAgent service to call to
|
||||||
|
* collect, store and send heartbeat cluster information to SM
|
||||||
|
* upon request. See hbsCluster.h for formal API.
|
||||||
|
*
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __HBSCLUSTER_H__
|
||||||
|
#define __HBSCLUSTER_H__
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include "mtceHbsCluster.h" /* for ... the public API */
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : BYTES_IN_CLUSTER_VAULT
|
||||||
|
*
|
||||||
|
* Description : Calculates the number of bytes in the cluster vault based on
|
||||||
|
* the number of valid history array elements included.
|
||||||
|
*
|
||||||
|
* Parameters :
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#define BYTES_IN_CLUSTER_VAULT(e) \
|
||||||
|
(sizeof(mtce_hbs_cluster_type)-(sizeof(mtce_hbs_cluster_history_type)*(MTCE_HBS_MAX_HISTORY_ELEMENTS-e)))
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : CHECK_CTRL_NTWK_PARMS
|
||||||
|
*
|
||||||
|
* Description :
|
||||||
|
*
|
||||||
|
* Parameters :
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#define CHECK_CTRL_NTWK_PARMS(c,n) \
|
||||||
|
if (( c > MTCE_HBS_MAX_CONTROLLERS ) || \
|
||||||
|
( n > MTCE_HBS_NETWORKS )) \
|
||||||
|
{ \
|
||||||
|
slog ("Invalid parameter: %d:%d", c, n); \
|
||||||
|
return ; \
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : GET_CLUSTER_HISTORY_PTR
|
||||||
|
*
|
||||||
|
* Description :
|
||||||
|
*
|
||||||
|
* Parameters :
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
#define GET_CLUSTER_HISTORY_PTR(cluster, c,n) \
|
||||||
|
for ( int h = 0 ; h < cluster.histories ; h++ ) \
|
||||||
|
{ \
|
||||||
|
if (( cluster.history[h].controller == c ) && \
|
||||||
|
( cluster.history[h].network == n )) \
|
||||||
|
{ \
|
||||||
|
history_ptr = &cluster.history[h] ; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define SET_CONTROLLER_HOSTNAME(c) \
|
||||||
|
if ( c == 0 ) \
|
||||||
|
controller = CONTROLLER_0 ; \
|
||||||
|
else if ( c == 1 ) \
|
||||||
|
controller = CONTROLLER_1 ; \
|
||||||
|
else if ( c == 2 ) \
|
||||||
|
controller = CONTROLLER_2 ; \
|
||||||
|
else \
|
||||||
|
controller = "unknown" \
|
||||||
|
|
||||||
|
#endif // __HBSCLUSTER_H__
|
|
@ -0,0 +1,346 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* @file Maintenance Heartbeat Utilities Module
|
||||||
|
*
|
||||||
|
*************************************************************************
|
||||||
|
*
|
||||||
|
* This module provides heartbeat utilities that are common to both
|
||||||
|
* hbsAgent and hbsClient.
|
||||||
|
*
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include "daemon_common.h" /* common daemon constructs and definitions */
|
||||||
|
#include "hbsBase.h" /* mtce heartbeat constructs and definitions */
|
||||||
|
|
||||||
|
/* hbs_cluster_log utility support. log control array. */
|
||||||
|
bool first_log[MTCE_HBS_MAX_HISTORY_ELEMENTS]; /* has first history log out */
|
||||||
|
bool was_diff [MTCE_HBS_MAX_HISTORY_ELEMENTS]; /* was there a history diff */
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_utils_init
|
||||||
|
*
|
||||||
|
* Description : Module Init function
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_utils_init ( void )
|
||||||
|
{
|
||||||
|
MEMSET_ZERO ( first_log );
|
||||||
|
MEMSET_ZERO ( was_diff );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_history_init
|
||||||
|
*
|
||||||
|
* Description : Initialize a cluster history element.
|
||||||
|
*
|
||||||
|
* Parameters : Reference to a mtce_hbs_cluster_history_type (history element)
|
||||||
|
*
|
||||||
|
* Returns : Nothing
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_cluster_history_init ( mtce_hbs_cluster_history_type & history )
|
||||||
|
{
|
||||||
|
MEMSET_ZERO(history);
|
||||||
|
history.entries_max = MTCE_HBS_HISTORY_ENTRIES ;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_history_clear
|
||||||
|
*
|
||||||
|
* Description : Clear all history in the cluster vault.
|
||||||
|
*
|
||||||
|
* Parameters : mtce_hbs_cluster_type instance : the vault.
|
||||||
|
*
|
||||||
|
* Returns : Nothing
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_cluster_history_clear ( mtce_hbs_cluster_type & cluster )
|
||||||
|
{
|
||||||
|
if ( cluster.histories )
|
||||||
|
{
|
||||||
|
for ( int h = 0 ; h < cluster.histories ; h++ )
|
||||||
|
hbs_cluster_history_init ( cluster.history[h] ) ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : cluster_network_name
|
||||||
|
*
|
||||||
|
* Description : converts what is a heartbeat cluster network id to
|
||||||
|
* network name.
|
||||||
|
*
|
||||||
|
* Parameters : network id
|
||||||
|
*
|
||||||
|
* Returns : network name as a string
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
string hbs_cluster_network_name ( mtce_hbs_network_enum network )
|
||||||
|
{
|
||||||
|
switch ( network )
|
||||||
|
{
|
||||||
|
case MTCE_HBS_NETWORK_MGMT:
|
||||||
|
return ("Mgmnt");
|
||||||
|
case MTCE_HBS_NETWORK_INFRA:
|
||||||
|
return ("Infra");
|
||||||
|
|
||||||
|
#ifdef MONITORED_OAM_NETWORK
|
||||||
|
case MTCE_HBS_NETWORK_OAM:
|
||||||
|
return ("Oam");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
default:
|
||||||
|
slog ("invalid network enum (%d)", network );
|
||||||
|
return ("unknown");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* Name : hbs_cluster_log
|
||||||
|
*
|
||||||
|
* Description : logs changes to the heartbeat cluster
|
||||||
|
*
|
||||||
|
* Parameters : The heartbeat cluster structure
|
||||||
|
*
|
||||||
|
* Returns : Nothing
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
void hbs_cluster_log ( string & hostname,
|
||||||
|
mtce_hbs_cluster_type & cluster,
|
||||||
|
string log_prefix )
|
||||||
|
{
|
||||||
|
// bool want_log = false ;
|
||||||
|
|
||||||
|
clog1 ("log %d histories", cluster.histories );
|
||||||
|
for ( int h = 0 ; h < cluster.histories ; h++ )
|
||||||
|
{
|
||||||
|
if ( cluster.history[h].entries == MTCE_HBS_HISTORY_ENTRIES )
|
||||||
|
{
|
||||||
|
#define MAX_CLUSTER_LINE_LEN 100
|
||||||
|
#define MAX_ENTRY_STR_LEN 10 /* "9999:9999 " */
|
||||||
|
mtce_hbs_cluster_entry_type e = { 0, 0 } ;
|
||||||
|
char str[MAX_CLUSTER_LINE_LEN] ;
|
||||||
|
string line = "";
|
||||||
|
int start = 0 ;
|
||||||
|
int stop = 0 ;
|
||||||
|
bool newline = false ;
|
||||||
|
bool logit = false ;
|
||||||
|
bool first = false ;
|
||||||
|
string controller = "" ;
|
||||||
|
|
||||||
|
mtce_hbs_cluster_history_type * history_ptr = &cluster.history[h] ;
|
||||||
|
|
||||||
|
clog1 ("%s %s has %d entries (controller-%d view from %s)", hostname.c_str(),
|
||||||
|
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
|
||||||
|
history_ptr->entries,
|
||||||
|
history_ptr->controller,
|
||||||
|
log_prefix.c_str());
|
||||||
|
|
||||||
|
|
||||||
|
/* Manage local this_index for log display.
|
||||||
|
* Display oldest to newest ; left to right
|
||||||
|
*
|
||||||
|
* */
|
||||||
|
int this_index = history_ptr->oldest_entry_index ;
|
||||||
|
for ( int count = 0 ; count < history_ptr->entries ; count++ )
|
||||||
|
{
|
||||||
|
if (( line.length() + MAX_ENTRY_STR_LEN ) >=
|
||||||
|
MAX_CLUSTER_LINE_LEN )
|
||||||
|
{
|
||||||
|
newline = true ;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef WANT_MINIMAL_LOGS
|
||||||
|
/* TODO: enable in final update */
|
||||||
|
if (( first_log[h] == true ) && ( newline == false ) &&
|
||||||
|
( history_ptr->entry[this_index].hosts_enabled ==
|
||||||
|
history_ptr->entry[this_index].hosts_responding ))
|
||||||
|
{
|
||||||
|
line.append(". ");
|
||||||
|
continue ;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// want_log = true ;
|
||||||
|
|
||||||
|
if ( count == 0 )
|
||||||
|
{
|
||||||
|
snprintf (&str[0], MAX_ENTRY_STR_LEN , "%d:%d ", // -%d",
|
||||||
|
history_ptr->entry[this_index].hosts_enabled,
|
||||||
|
history_ptr->entry[this_index].hosts_responding ); // , this_index );
|
||||||
|
line.append (str);
|
||||||
|
str[0] = '\0' ;
|
||||||
|
}
|
||||||
|
//#ifdef WANT_DOTS
|
||||||
|
else if (( history_ptr->entry[this_index].hosts_enabled ==
|
||||||
|
e.hosts_enabled ) &&
|
||||||
|
( history_ptr->entry[this_index].hosts_responding ==
|
||||||
|
e.hosts_responding ))
|
||||||
|
{
|
||||||
|
line.append(". ");
|
||||||
|
}
|
||||||
|
//#endif
|
||||||
|
else
|
||||||
|
{
|
||||||
|
snprintf (&str[0], MAX_ENTRY_STR_LEN , "%d:%d ", // -%d",
|
||||||
|
history_ptr->entry[this_index].hosts_enabled,
|
||||||
|
history_ptr->entry[this_index].hosts_responding ); // , this_index );
|
||||||
|
line.append (str);
|
||||||
|
str[0] = '\0' ;
|
||||||
|
logit = true ;
|
||||||
|
was_diff[h] = true ;
|
||||||
|
}
|
||||||
|
if (( logit == false ) && ( first_log[h] == false ))
|
||||||
|
{
|
||||||
|
first_log[h] = true ;
|
||||||
|
logit = true ;
|
||||||
|
}
|
||||||
|
stop++ ;
|
||||||
|
if ( newline == true )
|
||||||
|
{
|
||||||
|
if ( logit )
|
||||||
|
{
|
||||||
|
SET_CONTROLLER_HOSTNAME(history_ptr->controller);
|
||||||
|
if ( hostname == controller )
|
||||||
|
{
|
||||||
|
clog ("%s view %s %s %02d..%02d: %s,",
|
||||||
|
hostname.c_str(),
|
||||||
|
log_prefix.c_str(),
|
||||||
|
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
|
||||||
|
start, stop, line.c_str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
clog ("%s view from %s %s %s %02d..%02d: %s,",
|
||||||
|
controller.c_str(),
|
||||||
|
hostname.c_str(),
|
||||||
|
log_prefix.c_str(),
|
||||||
|
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
|
||||||
|
start, stop, line.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
start = stop + 1 ;
|
||||||
|
line.clear();
|
||||||
|
first = true ;
|
||||||
|
newline = false ;
|
||||||
|
}
|
||||||
|
e = history_ptr->entry[this_index] ;
|
||||||
|
|
||||||
|
/* manage index tracking */
|
||||||
|
if ( this_index == (MTCE_HBS_HISTORY_ENTRIES-1))
|
||||||
|
this_index = 0 ;
|
||||||
|
else
|
||||||
|
this_index++ ;
|
||||||
|
}
|
||||||
|
if (( newline == false ) && ( line.length() ))
|
||||||
|
{
|
||||||
|
// ERIC
|
||||||
|
if (( logit == false ) && ( was_diff[h] == true ))
|
||||||
|
{
|
||||||
|
logit = true ;
|
||||||
|
was_diff[h] = false ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( logit )
|
||||||
|
{
|
||||||
|
if ( first )
|
||||||
|
{
|
||||||
|
clog ("............ %s %s %02d..%02d: %s",
|
||||||
|
log_prefix.c_str(),
|
||||||
|
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
|
||||||
|
start, stop, line.c_str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SET_CONTROLLER_HOSTNAME(history_ptr->controller);
|
||||||
|
if ( hostname == controller )
|
||||||
|
{
|
||||||
|
clog ("%s view %s %s %02d..%02d: %s",
|
||||||
|
hostname.c_str(),
|
||||||
|
log_prefix.c_str(),
|
||||||
|
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
|
||||||
|
start, stop, line.c_str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
clog ("%s view from %s %s %s %02d..%02d: %s",
|
||||||
|
controller.c_str(),
|
||||||
|
hostname.c_str(),
|
||||||
|
log_prefix.c_str(), /* Infra <- */
|
||||||
|
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
|
||||||
|
start, stop, line.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
was_diff[h] = false ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
*
|
||||||
|
* name : hbs_cluster_dump
|
||||||
|
*
|
||||||
|
* Description: Formatted dump of the vault contents to the log file.
|
||||||
|
*
|
||||||
|
***************************************************************************/
|
||||||
|
void hbs_cluster_dump ( mtce_hbs_cluster_type & vault )
|
||||||
|
{
|
||||||
|
syslog ( LOG_INFO, "Cluster Vault Dump: --------------------------------------------------------------------------------------------");
|
||||||
|
syslog ( LOG_INFO, "Cluster Vault: v%d.%d %d msec period ; SM Reqid is %d with storage-0 %s and %d histories in %d bytes",
|
||||||
|
vault.version,
|
||||||
|
vault.revision,
|
||||||
|
vault.period_msec,
|
||||||
|
vault.reqid,
|
||||||
|
vault.storage0_enabled ? "enabled" : "disabled",
|
||||||
|
vault.histories,
|
||||||
|
vault.bytes );
|
||||||
|
for ( int h = 0 ; h < vault.histories ; h++ )
|
||||||
|
{
|
||||||
|
#define MAX_LINE_LEN (500)
|
||||||
|
char str[MAX_LINE_LEN] ;
|
||||||
|
int i = 0 ;
|
||||||
|
for ( int e = 0 ; e < vault.history[h].entries_max ; e++ )
|
||||||
|
{
|
||||||
|
snprintf ( &str[i], MAX_LINE_LEN, "%c[%d:%d]" ,
|
||||||
|
vault.history[h].oldest_entry_index==e ? '>' : ' ',
|
||||||
|
vault.history[h].entry[e].hosts_enabled,
|
||||||
|
vault.history[h].entry[e].hosts_responding);
|
||||||
|
i = strlen(str) ;
|
||||||
|
}
|
||||||
|
syslog ( LOG_INFO, "Cluster Vault: C%d %s S:%s:%s (%d:%d) %s",
|
||||||
|
vault.history[h].controller,
|
||||||
|
hbs_cluster_network_name((mtce_hbs_network_enum)vault.history[h].network).c_str(),
|
||||||
|
vault.storage0_enabled ? "y" : "n",
|
||||||
|
vault.history[h].storage0_responding ? "y" : "n",
|
||||||
|
vault.history[h].entries_max,
|
||||||
|
vault.history[h].entries,
|
||||||
|
str);
|
||||||
|
}
|
||||||
|
// dump_memory ( &vault, 16, vault.bytes );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* @file StarlingX Maintenance Heartbeat Cluster Manager Module
|
||||||
|
*
|
||||||
|
*************************************************************************
|
||||||
|
*
|
||||||
|
* This module provides API for the hbsAgent service to call to
|
||||||
|
* collect, store and send heartbeat cluster information to SM
|
||||||
|
* upon request. See hbsCluster.h for formal API.
|
||||||
|
*
|
||||||
|
*************************************************************************/
|
||||||
|
|
||||||
|
#ifndef __MTCEHBSCLUSTER_H__
|
||||||
|
#define __MTCEHBSCLUSTER_H__
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
/**************************************************************
|
||||||
|
* Implementation Structure
|
||||||
|
*************************************************************/
|
||||||
|
|
||||||
|
#define MTCE_HBS_CLUSTER_VERSION (1)
|
||||||
|
#define MTCE_HBS_CLUSTER_REVISION (0)
|
||||||
|
#define MTCE_HBS_MAGIC_NUMBER (0x5aa5)
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
MTCE_HBS_NETWORK_MGMT = 0,
|
||||||
|
MTCE_HBS_NETWORK_INFRA = 1,
|
||||||
|
#ifdef MONITORED_OAM_NETWORK
|
||||||
|
MTCE_HBS_NETWORK_OAM,
|
||||||
|
#endif
|
||||||
|
MTCE_HBS_NETWORKS
|
||||||
|
} mtce_hbs_network_enum ;
|
||||||
|
|
||||||
|
#ifdef THREE_CONTROLLER_SYSTEM
|
||||||
|
#define MTCE_HBS_MAX_CONTROLLERS (3)
|
||||||
|
#else
|
||||||
|
#define MTCE_HBS_MAX_CONTROLLERS (2)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef MONITORED_OAM_NETWORK
|
||||||
|
#define MTCE_HBS_MAX_NETWORKS (3)
|
||||||
|
#else
|
||||||
|
#define MTCE_HBS_MAX_NETWORKS (2)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// value of 20 at 100 msec period is 2 seconds of history */
|
||||||
|
#define MTCE_HBS_HISTORY_ENTRIES (20)
|
||||||
|
|
||||||
|
/* maximum number of history elements permitted in a cluster history summary */
|
||||||
|
#define MTCE_HBS_MAX_HISTORY_ELEMENTS ((MTCE_HBS_MAX_CONTROLLERS)*(MTCE_HBS_NETWORKS))
|
||||||
|
|
||||||
|
#ifndef ALIGN_PACK
|
||||||
|
#define ALIGN_PACK(x) __attribute__((packed)) x
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* A single element of Heartbeat Cluster History for one heartbeat period */
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
unsigned short hosts_enabled ; /* # of hosts being hb monitored */
|
||||||
|
unsigned short hosts_responding ; /* # of hosts that responsed to hb*/
|
||||||
|
} ALIGN_PACK(mtce_hbs_cluster_entry_type);
|
||||||
|
|
||||||
|
|
||||||
|
/* Heartbeat Cluster History for all monitored networks of a Controller */
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
unsigned short controller :4 ; /* value 0 or 1 (and 2 in future) */
|
||||||
|
unsigned short network :4 ; /* see mtce_hbs_network_enum */
|
||||||
|
unsigned short reserved_bits :7 ; /* future - initted to 0 */
|
||||||
|
unsigned short storage0_responding:1 ; /* 1 = storage-0 is hb healthy */
|
||||||
|
unsigned short entries ; /* # of valid values in .entry */
|
||||||
|
unsigned short entries_max ; /* max size of the enry array */
|
||||||
|
unsigned short oldest_entry_index ; /* the oldest entry in the array */
|
||||||
|
|
||||||
|
/* historical array of entries for a specific network */
|
||||||
|
mtce_hbs_cluster_entry_type entry [MTCE_HBS_HISTORY_ENTRIES] ;
|
||||||
|
|
||||||
|
} ALIGN_PACK(mtce_hbs_cluster_history_type) ;
|
||||||
|
|
||||||
|
/* Heartbeat Cluster History for all monitored networks of all Controllers */
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
/* Header - Static Data - 4 bytes */
|
||||||
|
unsigned char version ; /* public API MTCE_HBS_CLUSTER_VERSION */
|
||||||
|
unsigned char revision ; /* public API MTCE_HBS_CLUSTER_REVISION */
|
||||||
|
unsigned short magic_number ; /* public API MTCE_HBS_MAGIC_NUMBER */
|
||||||
|
|
||||||
|
/* Control - Dynamic Data - 8 bytes */
|
||||||
|
unsigned short reqid ; /* added from SM cluster request */
|
||||||
|
unsigned short period_msec ; /* heartbeat period in milliseconds */
|
||||||
|
unsigned short bytes ; /* total struct size self check */
|
||||||
|
unsigned char storage0_enabled; /* bool containing true or false */
|
||||||
|
unsigned char histories ; /* How many hostory elements follow */
|
||||||
|
|
||||||
|
/* Array of Cluster History
|
||||||
|
*
|
||||||
|
* - histories above specifies how many
|
||||||
|
* elements of this array are populated.
|
||||||
|
*/
|
||||||
|
mtce_hbs_cluster_history_type history [MTCE_HBS_MAX_HISTORY_ELEMENTS] ;
|
||||||
|
|
||||||
|
} ALIGN_PACK(mtce_hbs_cluster_type) ;
|
||||||
|
|
||||||
|
#endif // __HBSCLUSTER_H__
|
|
@ -23,6 +23,7 @@ SRCS += mtcKeyApi.cpp
|
||||||
SRCS += mtcCmdHdlr.cpp
|
SRCS += mtcCmdHdlr.cpp
|
||||||
SRCS += mtcNodeMnfa.cpp
|
SRCS += mtcNodeMnfa.cpp
|
||||||
SRCS += mtcVimApi.cpp
|
SRCS += mtcVimApi.cpp
|
||||||
|
SRCS += mtcStubs.cpp
|
||||||
|
|
||||||
COMPUTE_OBJS = mtcNodeComp.o
|
COMPUTE_OBJS = mtcNodeComp.o
|
||||||
COMPUTE_OBJS += mtcCompMsg.o
|
COMPUTE_OBJS += mtcCompMsg.o
|
||||||
|
|
|
@ -1935,8 +1935,10 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
|
||||||
* the host has not reset yet we have disabled services
|
* the host has not reset yet we have disabled services
|
||||||
* then now we need to reset the host to prevet VM duplication
|
* then now we need to reset the host to prevet VM duplication
|
||||||
* by forcing a full enable */
|
* by forcing a full enable */
|
||||||
if (( node_ptr->uptime_save != 0 ) &&
|
if ((( node_ptr->uptime_save != 0 ) &&
|
||||||
( node_ptr->uptime >= node_ptr->uptime_save ))
|
( node_ptr->uptime >= node_ptr->uptime_save )) ||
|
||||||
|
(( node_ptr->uptime_save == 0 ) &&
|
||||||
|
( node_ptr->uptime > MTC_MINS_15 )))
|
||||||
{
|
{
|
||||||
ilog ("%s regained MTCALIVE from host that did not reboot (uptime:%d)\n",
|
ilog ("%s regained MTCALIVE from host that did not reboot (uptime:%d)\n",
|
||||||
node_ptr->hostname.c_str(), node_ptr->uptime );
|
node_ptr->hostname.c_str(), node_ptr->uptime );
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
* Maintenance Agent Stubs
|
||||||
|
*/
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include "nodeClass.h" /* The main link class */
|
||||||
|
|
||||||
|
void hbs_cluster_log ( void ) { }
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright (c) 2013-2016 Wind River Systems, Inc.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# This utility is primarily used by no reboot patching for process restart
|
||||||
|
#
|
||||||
|
# This script sends a jason string containing the the restart command
|
||||||
|
# and ${1} as the specified process name to pmond over the loopback
|
||||||
|
# interface on port 2117
|
||||||
|
#
|
||||||
|
# Linux Standard Base (LSB) Error Codes
|
||||||
|
RETVAL=0
|
||||||
|
GENERIC_ERROR=1
|
||||||
|
INVALID_ARGS=2
|
||||||
|
UNSUPPORTED_FEATURE=3
|
||||||
|
NOT_INSTALLED=5
|
||||||
|
NOT_RUNNING=7
|
||||||
|
|
||||||
|
PROTOCOL="UDP4-DATAGRAM"
|
||||||
|
ADDRESS="127.0.0.1"
|
||||||
|
|
||||||
|
socat_exec=`(which socat) 2> /dev/null`
|
||||||
|
|
||||||
|
if [ -z ${socat_exec} ] ; then
|
||||||
|
logger "Error: $0 cannot find socat exec"
|
||||||
|
exit ${NOT_INSTALLED}
|
||||||
|
fi
|
||||||
|
reqid=123
|
||||||
|
|
||||||
|
if [ "${1}" != "" ] ; then
|
||||||
|
reqid=${1}
|
||||||
|
fi
|
||||||
|
|
||||||
|
port=$(cat /etc/mtc.ini | awk '{if ($1 == "sm_server_port") { print $3; }}')
|
||||||
|
echo "{\"origin\":\"sm\", \"service\":\"heartbeat\", \"request\":\"cluster_info\", \"reqid\": $reqid }" | socat - ${PROTOCOL}:${ADDRESS}:${port}
|
||||||
|
|
||||||
|
exit ${RETVAL}
|
Loading…
Reference in New Issue