Merge "Implement Active-Active Heartbeat as HA Improvement"

This commit is contained in:
Zuul 2018-11-21 16:42:56 +00:00 committed by Gerrit Code Review
commit abf0ff3986
30 changed files with 1678 additions and 837 deletions

View File

@ -39,7 +39,6 @@ typedef struct
{ {
int scheduling_priority ; /**< Scheduling priority of this daemon */ int scheduling_priority ; /**< Scheduling priority of this daemon */
bool active ; /**< Maintenance activity state true|false */ bool active ; /**< Maintenance activity state true|false */
int hbs_pulse_period ; /**< time (msec) between heartbeat requests */
int token_refresh_rate ; /**< token refresh rate in seconds */ int token_refresh_rate ; /**< token refresh rate in seconds */
int hbs_minor_threshold ; /**< heartbeat miss minor threshold */ int hbs_minor_threshold ; /**< heartbeat miss minor threshold */
int hbs_degrade_threshold ; /**< heartbeat miss degrade threshold */ int hbs_degrade_threshold ; /**< heartbeat miss degrade threshold */
@ -351,7 +350,7 @@ extern char *program_invocation_short_name;
} }
#define blog(format, args...) { \ #define blog(format, args...) { \
if ( ltc() ) { if(daemon_get_cfg_ptr()->debug_bmgmt) printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ if ( ltc() ) { if(daemon_get_cfg_ptr()->debug_bmgmt&1) printf ( "%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt : " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
else { if(daemon_get_cfg_ptr()->debug_bmgmt) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ else { if(daemon_get_cfg_ptr()->debug_bmgmt) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: BMgt : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
} }
@ -380,22 +379,22 @@ extern char *program_invocation_short_name;
#define mlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&4 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg4 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define mlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&4 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg4 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&8 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg8 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define mlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&8 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg8 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog(format, args...) { if(daemon_get_cfg_ptr()->debug_json ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define jlog(format, args...) { if(daemon_get_cfg_ptr()->debug_json&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_json&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define jlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_json&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_json&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define jlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_json&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_json&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define jlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_json&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog(format, args...) { if(daemon_get_cfg_ptr()->debug_http) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define hlog(format, args...) { if(daemon_get_cfg_ptr()->debug_http&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_http&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define hlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_http&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_http&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define hlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_http&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_http&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define hlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_http&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog(format, args...) { if(daemon_get_cfg_ptr()->debug_alive ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define alog(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog1(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define alog1(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog2(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define alog2(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog3(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define alog3(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog(format, args...) { if(daemon_get_cfg_ptr()->debug_work) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define qlog(format, args...) { if(daemon_get_cfg_ptr()->debug_work&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_work&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define qlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_work&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_work&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define qlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_work&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_work&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define qlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_work&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
@ -403,8 +402,11 @@ extern char *program_invocation_short_name;
#define flog(format, args...) { if(daemon_get_cfg_ptr()->debug_fsm) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: FSM : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define flog(format, args...) { if(daemon_get_cfg_ptr()->debug_fsm) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: FSM : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define tlog(format, args...) { if(daemon_get_cfg_ptr()->debug_timer) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Timer: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define tlog(format, args...) { if(daemon_get_cfg_ptr()->debug_timer) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Timer: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog(format, args...) { if(daemon_get_cfg_ptr()->debug_state) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Change: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define clog(format, args...) { if(daemon_get_cfg_ptr()->debug_state&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Change: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog1(format, args...) { if(daemon_get_cfg_ptr()->debug_state&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define clog1(format, args...) { if(daemon_get_cfg_ptr()->debug_state&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog2(format, args...) { if(daemon_get_cfg_ptr()->debug_state&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog3(format, args...) { if(daemon_get_cfg_ptr()->debug_state&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define log_event(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Event: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define log_event(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Event: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define log_stress(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Stress: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define log_stress(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Stress: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }

View File

@ -233,6 +233,7 @@ const char * get_mtcNodeCommand_str ( int cmd )
case MTC_CMD_QRY_HOST: return("query host"); case MTC_CMD_QRY_HOST: return("query host");
case MTC_CMD_START_HOST: return("start host service"); case MTC_CMD_START_HOST: return("start host service");
case MTC_CMD_STOP_HOST: return("stop host service"); case MTC_CMD_STOP_HOST: return("stop host service");
case MTC_CMD_ACTIVE_CTRL: return("publish active controller");
/* VM Instance Commands */ /* VM Instance Commands */
case MTC_CMD_ADD_INST: return("add instance"); case MTC_CMD_ADD_INST: return("add instance");

View File

@ -359,6 +359,7 @@ void daemon_exit ( void );
* a power-off to online transition */ * a power-off to online transition */
#define MTC_MTCALIVE_HITS_TO_GO_ONLINE (5) #define MTC_MTCALIVE_HITS_TO_GO_ONLINE (5)
#define CONTROLLER_X ((const char *)"controller-x")
#define CONTROLLER_0 ((const char *)"controller-0") #define CONTROLLER_0 ((const char *)"controller-0")
#define CONTROLLER_1 ((const char *)"controller-1") #define CONTROLLER_1 ((const char *)"controller-1")
#define CONTROLLER_2 ((const char *)"controller-2") #define CONTROLLER_2 ((const char *)"controller-2")
@ -526,7 +527,8 @@ typedef struct
#define MTC_CMD_MOD_HOST (0x11110012) /* Query Host */ #define MTC_CMD_MOD_HOST (0x11110012) /* Query Host */
#define MTC_CMD_QRY_HOST (0x11110013) /* Modify Host */ #define MTC_CMD_QRY_HOST (0x11110013) /* Modify Host */
#define MTC_CMD_START_HOST (0x11110014) /* Start Monitoring Host */ #define MTC_CMD_START_HOST (0x11110014) /* Start Monitoring Host */
#define MTC_CMD_STOP_HOST (0x11110015) /* Stop Moniroting Host */ #define MTC_CMD_STOP_HOST (0x11110015) /* Stop Monitoring Host */
#define MTC_CMD_ACTIVE_CTRL (0x11110016) /* Active Controller */
#define MTC_CMD_ADD_INST (0x11110020) /* Add Inst */ #define MTC_CMD_ADD_INST (0x11110020) /* Add Inst */
#define MTC_CMD_DEL_INST (0x11110021) /* Delete Inst */ #define MTC_CMD_DEL_INST (0x11110021) /* Delete Inst */
@ -643,6 +645,9 @@ typedef struct
#define PMOND_FLAG (0x00000001) /**< Process Monitor O.K. Flag */ #define PMOND_FLAG (0x00000001) /**< Process Monitor O.K. Flag */
#define INFRA_FLAG (0x00000002) /**< Infrastructure iface provisioned Flag */ #define INFRA_FLAG (0x00000002) /**< Infrastructure iface provisioned Flag */
#define CTRLX_MASK (0x00000300) /**< From/To Controller-0/1/2/3 Number */
#define CTRLX_BIT ((unsigned int)8) /**< used to shift right mask into bit 0 */
#define STALL_MON_FLAG (0x00010000) /**< Flag indicating hang monitor running */ #define STALL_MON_FLAG (0x00010000) /**< Flag indicating hang monitor running */
#define STALL_REC_FLAG (0x00020000) /**< Flag indicating hbsClient took action */ #define STALL_REC_FLAG (0x00020000) /**< Flag indicating hbsClient took action */
#define STALL_ERR1_FLAG (0x00100000) /**< Error 1 Flag */ #define STALL_ERR1_FLAG (0x00100000) /**< Error 1 Flag */
@ -1217,15 +1222,15 @@ string get_availStatus_str ( mtc_nodeAvailStatus_enum availStatus );
string get_operState_str ( mtc_nodeOperState_enum operState ); string get_operState_str ( mtc_nodeOperState_enum operState );
string get_adminState_str ( mtc_nodeAdminState_enum adminState ); string get_adminState_str ( mtc_nodeAdminState_enum adminState );
void log_adminAction ( string hostname, void log_adminAction ( string hostname,
mtc_nodeAdminAction_enum currAction, mtc_nodeAdminAction_enum currAction,
mtc_nodeAdminAction_enum newAction ); mtc_nodeAdminAction_enum newAction );
int send_hbs_command ( string hostname, int command ); int send_hbs_command ( string hostname, int command, string controller=CONTROLLER );
int send_hwmon_command ( string hostname, int command ); int send_hwmon_command ( string hostname, int command );
int send_guest_command ( string hostname, int command ); int send_guest_command ( string hostname, int command );
int daemon_log_message ( const char * hostname, int daemon_log_message ( const char * hostname,
const char * filename, const char * filename,
const char * log_str ); const char * log_str );

View File

@ -48,6 +48,7 @@
#define MTC_MINS_20 (1200) #define MTC_MINS_20 (1200)
#define MTC_MINS_30 (1800) #define MTC_MINS_30 (1800)
#define MTC_MINS_40 (2400) #define MTC_MINS_40 (2400)
#define MTC_HRS_1 (3600)
#define MTC_HRS_4 (14400) #define MTC_HRS_4 (14400)
#define MTC_HRS_8 (28800) /* old token refresh rate */ #define MTC_HRS_8 (28800) /* old token refresh rate */

View File

@ -269,7 +269,7 @@ void daemon_dump_cfg ( void )
{ {
daemon_config_type * ptr = daemon_get_cfg_ptr(); daemon_config_type * ptr = daemon_get_cfg_ptr();
ilog ("Configuration Settings\n------------------------------\n"); ilog ("Configuration Settings ...\n");
if ( ptr->scheduling_priority ) { ilog ("scheduling_priority = %d\n", ptr->scheduling_priority ); } if ( ptr->scheduling_priority ) { ilog ("scheduling_priority = %d\n", ptr->scheduling_priority ); }
if ( ptr->infra_degrade_only ) { ilog ("infra_degrade_only = %s\n", ptr->infra_degrade_only ? "Yes" : "No" );} if ( ptr->infra_degrade_only ) { ilog ("infra_degrade_only = %s\n", ptr->infra_degrade_only ? "Yes" : "No" );}
@ -277,7 +277,6 @@ void daemon_dump_cfg ( void )
if ( ptr->active ) { ilog ("active = %s\n", ptr->active ? "Yes" : "No" );} if ( ptr->active ) { ilog ("active = %s\n", ptr->active ? "Yes" : "No" );}
/* hbsAgent */ /* hbsAgent */
if ( ptr->hbs_pulse_period ) { ilog ("hbs_pulse_period = %d\n", ptr->hbs_pulse_period );}
if ( ptr->token_refresh_rate ) { ilog ("token_refresh_rate = %d\n", ptr->token_refresh_rate );} if ( ptr->token_refresh_rate ) { ilog ("token_refresh_rate = %d\n", ptr->token_refresh_rate );}
if ( ptr->hbs_minor_threshold ) { ilog ("hbs_minor_threshold = %d\n", ptr->hbs_minor_threshold );} if ( ptr->hbs_minor_threshold ) { ilog ("hbs_minor_threshold = %d\n", ptr->hbs_minor_threshold );}
if ( ptr->hbs_degrade_threshold ) { ilog ("hbs_degrade_threshold = %d\n", ptr->hbs_degrade_threshold );} if ( ptr->hbs_degrade_threshold ) { ilog ("hbs_degrade_threshold = %d\n", ptr->hbs_degrade_threshold );}

View File

@ -78,6 +78,7 @@ void print_help ( void )
printf ("\t-l --log - Log to file ; /var/log/<daemon>.log\n"); printf ("\t-l --log - Log to file ; /var/log/<daemon>.log\n");
printf ("\t-p --passive - Passive mode ; do not act on failures\n"); printf ("\t-p --passive - Passive mode ; do not act on failures\n");
printf ("\t-v --verbose - Show command line arguments\n"); printf ("\t-v --verbose - Show command line arguments\n");
printf ("\t-V --Virtual - Running in virtual environment\n");
printf ("\t-t --test - Run Test Head\n"); printf ("\t-t --test - Run Test Head\n");
printf ("\t-g --gap - Gap in seconds\n"); printf ("\t-g --gap - Gap in seconds\n");
printf ("\t-m --mode - Word string representing a run mode\n"); printf ("\t-m --mode - Word string representing a run mode\n");
@ -106,6 +107,9 @@ int daemon_get_run_option ( const char * option )
} }
return (1); return (1);
} }
else if ( !strcmp ( option, "Virtual" ) )
return opts.Virtual ;
else if ( !strcmp ( option, "front" ) ) else if ( !strcmp ( option, "front" ) )
return opts.front ; return opts.front ;
@ -118,6 +122,7 @@ void opts_init ( void)
opts.log = false ; opts.log = false ;
opts.test = false ; opts.test = false ;
opts.verbose = false ; opts.verbose = false ;
opts.Virtual = false ;
opts.active = false ; opts.active = false ;
opts.front = false ; opts.front = false ;
opts.front = false ; opts.front = false ;
@ -152,8 +157,8 @@ int parseArg ( int argc, char * argv[], opts_type * opts_ptr )
int cmd_arg_count = 1 ; /* command args start at 1 */ int cmd_arg_count = 1 ; /* command args start at 1 */
/* A string listing of valid short options letters. */ /* A string listing of valid short options letters. */
const char* const short_options = "u:c:p:g:i:m:n:d:hlfpvta"; const char* const short_options = "u:c:p:g:i:m:n:d:hlfpvVta";
/* An array listing of valid long options. */ /* An array listing of valid long options. */
const struct option long_options[] = const struct option long_options[] =
{ {
@ -167,9 +172,10 @@ int parseArg ( int argc, char * argv[], opts_type * opts_ptr )
{ "username" , 1, NULL, 'u' }, { "username" , 1, NULL, 'u' },
{ "help" , 0, NULL, 'h' }, { "help" , 0, NULL, 'h' },
{ "active" , 0, NULL, 'a' }, { "active" , 0, NULL, 'a' },
{ "foreground", 0, NULL, 'f' }, { "foreground", 0, NULL, 'f' },
{ "log" , 0, NULL, 'l' }, { "log" , 0, NULL, 'l' },
{ "verbose" , 0, NULL, 'v' }, { "verbose" , 0, NULL, 'v' },
{ "Virtual" , 0, NULL, 'V' },
{ "test" , 0, NULL, 't' }, { "test" , 0, NULL, 't' },
{ NULL , 0, NULL, 0 } /* Required at end of array. */ { NULL , 0, NULL, 0 } /* Required at end of array. */
}; };
@ -254,19 +260,25 @@ int parseArg ( int argc, char * argv[], opts_type * opts_ptr )
case 't': /* -t or --test */ case 't': /* -t or --test */
{ {
opts_ptr->test = true ; opts_ptr->test = true ;
cmd_arg_count++ ; cmd_arg_count++ ;
break; break;
} }
case 'v': /* -t or --verbose */ case 'v': /* -v or --verbose */
{ {
opts_ptr->verbose = true ; opts_ptr->verbose = true ;
cmd_arg_count++ ; cmd_arg_count++ ;
break;
}
case 'V': /* -V or --Virtual */
{
opts_ptr->Virtual = true ;
cmd_arg_count++ ;
break; break;
} }
case 'a': /* -a or --active */ case 'a': /* -a or --active */
{ {
opts_ptr->active = true ; opts_ptr->active = true ;
cmd_arg_count++ ; cmd_arg_count++ ;
break; break;
} }
case '?': case '?':

View File

@ -33,6 +33,7 @@ typedef struct
int test ; /**< Enable test mode */ int test ; /**< Enable test mode */
int info ; /**< Dump data module info */ int info ; /**< Dump data module info */
int verbose ; /**< Dump command line options */ int verbose ; /**< Dump command line options */
int Virtual ; /**< Set to non-zero when in virtual env */
int active ; /**< Set daemon active */ int active ; /**< Set daemon active */
int debug ; /**< Set tracing debug mode "debug,"test","info","trace" */ int debug ; /**< Set tracing debug mode "debug,"test","info","trace" */
int front ; /**< run in the foreground ; do not daemonize */ int front ; /**< run in the foreground ; do not daemonize */
@ -43,7 +44,7 @@ typedef struct
string username ; string username ;
string command ; string command ;
string password ; string password ;
} opts_type ; } opts_type ;
opts_type * daemon_get_opts_ptr ( void ); opts_type * daemon_get_opts_ptr ( void );

View File

@ -1,3 +1,3 @@
SRC_DIR="$PKG_BASE/src" SRC_DIR="$PKG_BASE/src"
COPY_LIST="$SRC_DIR/*" COPY_LIST="$SRC_DIR/*"
TIS_PATCH_VER=6 TIS_PATCH_VER=7

View File

@ -34,6 +34,7 @@ make install buildroot=%{buildroot} _sysconfdir=%{_sysconfdir} _unitdir=%{_unitd
if [ $1 -eq 1 ] ; then if [ $1 -eq 1 ] ; then
/bin/systemctl enable lighttpd.service /bin/systemctl enable lighttpd.service
/bin/systemctl enable qemu_clean.service /bin/systemctl enable qemu_clean.service
/bin/systemctl enable hbsAgent.service
fi fi
exit 0 exit 0
@ -41,6 +42,9 @@ exit 0
%defattr(-,root,root,-) %defattr(-,root,root,-)
%{_sysconfdir}/init.d/goenabledControl %{_sysconfdir}/init.d/goenabledControl
%license %{_datarootdir}/licenses/mtce-control-1.0/LICENSE %license %{_datarootdir}/licenses/mtce-control-1.0/LICENSE
%{_sysconfdir}/pmon.d/hbsAgent.conf
%{_sysconfdir}/init.d/hbsAgent
%{_unitdir}/hbsAgent.service
%clean %clean
rm -rf $RPM_BUILD_ROOT rm -rf $RPM_BUILD_ROOT

View File

@ -1,19 +1,32 @@
SOURCE1 = goenabled SOURCE1 = LICENSE
SOURCE2 = LICENSE SOURCE2 = goenabled
SOURCE3 = hbsAgent
SOURCE4 = hbsAgent.conf
SOURCE5 = hbsAgent.service
local_etc_pmond = $(_sysconfdir)/pmond.d local_etc_pmond = $(_sysconfdir)/pmon.d
local_etc_goenabledd = $(_sysconfdir)/goenabled.d local_etc_goenabledd = $(_sysconfdir)/goenabled.d
.PHONY: default .PHONY: default
install: install:
# Controller-Only Init Scripts
install -m 755 -p -D scripts/$(SOURCE1) $(buildroot)/$(_sysconfdir)/init.d/goenabledControl
# Controller-Only Process Monitor Config files
install -m 755 -d $(buildroot)/$(local_etc_pmond)
# Controller-Only Go Enabled Test
install -m 755 -d $(buildroot)/$(local_etc_goenabledd)
# for license # for license
install -m 755 -d $(buildroot)/$(_datarootdir)/licenses/mtce-control-1.0 install -m 755 -d $(buildroot)/$(_datarootdir)/licenses/mtce-control-1.0
install -p -D -m 600 $(SOURCE2) $(buildroot)/$(_datarootdir)/licenses/mtce-control-1.0/LICENSE install -m 600 -p -D $(SOURCE1) $(buildroot)/$(_datarootdir)/licenses/mtce-control-1.0/LICENSE
# Controller-Only Init Scripts
install -m 755 -d $(buildroot)/$(_sysconfdir)/init.d
install -m 755 -p -D scripts/$(SOURCE2) $(buildroot)/$(_sysconfdir)/init.d/goenabledControl
install -m 755 -p -D scripts/$(SOURCE3) $(buildroot)/$(_sysconfdir)/init.d/hbsAgent
# Controller-Only Process Monitor Config files
install -m 755 -d $(buildroot)/$(local_etc_pmond)
install -m 644 -p -D scripts/$(SOURCE4) $(buildroot)/$(local_etc_pmond)/hbsAgent.conf
# Controller-Only Heartbeat Service file
install -m 644 -p -D scripts/$(SOURCE5) $(buildroot)/$(_unitdir)/hbsAgent.service
# Controller-Only Go Enabled Test
install -m 755 -d $(buildroot)/$(local_etc_goenabledd)

View File

@ -0,0 +1,117 @@
#! /bin/sh
#
# Copyright (c) 2018 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# chkconfig: 2345 95 95
#
### BEGIN INIT INFO
# Provides: hbsAgent
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Heartbeat Agent Daemon
### END INIT INFO
. /etc/init.d/functions
DAEMON_NAME="hbsAgent"
DAEMON="/usr/local/bin/${DAEMON_NAME}"
PIDFILE="/var/run/${DAEMON_NAME}.pid"
VIRT_TOOL='virt-what'
# controller-1:~$ sudo virt-what
# virtualbox ... in virtualbox
# kvm ... in qemu
# Linux Standard Base (LSB) Error Codes
RETVAL=0
GENERIC_ERROR=1
INVALID_ARGS=2
UNSUPPORTED_FEATURE=3
NOT_INSTALLED=5
NOT_RUNNING=7
PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
export PATH
if [ ! -e "${DAEMON}" ] ; then
logger "${DAEMON} is missing"
exit ${NOT_INSTALLED}
fi
case "$1" in
start)
logger "Starting ${DAEMON_NAME}"
echo -n "Starting ${DAEMON_NAME}: "
if [ -n "`pidof ${DAEMON_NAME}`" ] ; then
echo -n "is already running "
RETVAL=0
else
tool=$(which ${VIRT_TOOL})
if [ $? -eq 0 ] ; then
virtual=`${VIRT_TOOL}`
else
virtual=""
fi
if [ "${virtual}" == "virtualbox" -o "${virtual}" == "kvm" ] ; then
start-stop-daemon --start -b -x ${DAEMON} -- -l -a -V
else
start-stop-daemon --start -b -x ${DAEMON} -- -l -a
fi
RETVAL=$?
fi
if [ ${RETVAL} -eq 0 ] ; then
pid=`pidof ${DAEMON_NAME}`
echo "OK"
logger "${DAEMON} (${pid})"
else
echo "FAIL"
RETVAL=${GENERIC_ERROR}
fi
;;
stop)
logger "Stopping ${DAEMON_NAME}"
echo -n "Stopping ${DAEMON_NAME}: "
if [ -n "`pidof ${DAEMON_NAME}`" ] ; then
killproc ${DAEMON_NAME}
fi
if [ -n "`pidof ${DAEMON_NAME}`" ] ; then
echo "FAIL"
RETVAL=${NOT_RUNNING}
else
echo "OK"
fi
rm -f ${PIDFILE}
;;
restart)
$0 stop
$0 start
;;
status)
pid=`pidof ${DAEMON_NAME}`
RETVAL=$?
if [ ${RETVAL} -eq 0 ] ; then
echo "${DAEMON_NAME} is running"
else
echo "${DAEMON_NAME} is NOT running"
RETVAL=${NOT_RUNNING}
fi
;;
condrestart)
$0 restart
;;
*)
echo "usage: $0 { start | stop | status | restart | condrestart | status }"
;;
esac
exit ${RETVAL}

View File

@ -0,0 +1,25 @@
[process]
process = hbsAgent
service = hbsAgent
pidfile = /var/run/hbsAgent.pid
style = lsb ; ocf or lsb
severity = major ; minor, major, critical
restarts = 1 ; restart retries before error assertion
interval = 10 ; number of seconds to wait between restarts
debounce = 10 ; number of seconds that a process needs to remain
; running before degrade is removed and retry count
; is cleared.
startuptime = 5 ; Seconds to wait after process start before starting the debounce monitor
mode = passive ; Monitoring mode: passive (default) or active
; passive: process death monitoring (default: always)
; active : heartbeat monitoring, i.e. request / response messaging
; ignore : do not monitor or stop monitoring
quorum = 0 ; process is in the host watchdog quorum
; Active Monitoring Options
port = 2201
period = 5 ; monitor period in seconds
timeout = 4 ; Messaging timeout period in seconds, must be shorter than period
threshold = 5 ; Number of back to back heartbeat failures before action

View File

@ -0,0 +1,22 @@
[Unit]
Description=Titanium Cloud Maintenance Heartbeat Agent
After=network.target syslog.service config.service
Before=pmon.service
[Service]
Type=forking
ExecStart=/etc/rc.d/init.d/hbsAgent start
ExecStop=/etc/rc.d/init.d/hbsAgent start
PIDFile=/var/run/hbsAgent.pid
KillMode=process
SendSIGKILL=no
# Process recovery is handled by pmond if its running.
# Delay 10 seconds to give pmond a chance to recover
# before systemd kicks in to do it as a backup plan.
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target

View File

@ -1,3 +1,3 @@
SRC_DIR="src" SRC_DIR="src"
TIS_PATCH_VER=140 TIS_PATCH_VER=142
BUILD_IS_SLOW=5 BUILD_IS_SLOW=5

View File

@ -313,7 +313,6 @@ install -m 755 -d %{buildroot}/usr/lib/ocf
install -m 755 -d %{buildroot}/usr/lib/ocf/resource.d install -m 755 -d %{buildroot}/usr/lib/ocf/resource.d
install -m 755 -d %{buildroot}/usr/lib/ocf/resource.d/platform install -m 755 -d %{buildroot}/usr/lib/ocf/resource.d/platform
install -m 755 -p -D %{_buildsubdir}/scripts/mtcAgent %{buildroot}/usr/lib/ocf/resource.d/platform/mtcAgent install -m 755 -p -D %{_buildsubdir}/scripts/mtcAgent %{buildroot}/usr/lib/ocf/resource.d/platform/mtcAgent
install -m 755 -p -D %{_buildsubdir}/scripts/hbsAgent %{buildroot}/usr/lib/ocf/resource.d/platform/hbsAgent
install -m 755 -p -D %{_buildsubdir}/hwmon/scripts/ocf/hwmon %{buildroot}/usr/lib/ocf/resource.d/platform/hwmon install -m 755 -p -D %{_buildsubdir}/hwmon/scripts/ocf/hwmon %{buildroot}/usr/lib/ocf/resource.d/platform/hwmon
# config files # config files
@ -482,7 +481,6 @@ install -m 755 -d %{buildroot}/var/run
# SM OCF Start/Stop/Monitor Scripts # SM OCF Start/Stop/Monitor Scripts
%{ocf_resourced}/platform/mtcAgent %{ocf_resourced}/platform/mtcAgent
%{ocf_resourced}/platform/hbsAgent
# Config files # Config files
%config(noreplace)/etc/mtc.ini %config(noreplace)/etc/mtc.ini

View File

@ -47,6 +47,11 @@ int alarm_register_user ( msgClassSock * sock_ptr )
return (rc); return (rc);
} }
void alarm_unregister_user ( void )
{
user_sock_ptr = NULL ;
}
/* Construct an alarm request json string in the following form /* Construct an alarm request json string in the following form
{\"mtcalarm\":[{\"alarmid\":\"200.009\",\"hostname\":\"compute-3\",\"operation\":\"set\",\"severity\":\"major\",\"entity\":\"Infrastructure\",\"prefix\":\"service=heartbeat\"}, {\"alarmid\":\"200.005\",\"hostname\":\"compute-3\",\"operation\":\"set\",\"severity\":\"major\",\"entity\":\"Management\",\"prefix\":\"service=heartbeat\"}]}" {\"mtcalarm\":[{\"alarmid\":\"200.009\",\"hostname\":\"compute-3\",\"operation\":\"set\",\"severity\":\"major\",\"entity\":\"Infrastructure\",\"prefix\":\"service=heartbeat\"}, {\"alarmid\":\"200.005\",\"hostname\":\"compute-3\",\"operation\":\"set\",\"severity\":\"major\",\"entity\":\"Management\",\"prefix\":\"service=heartbeat\"}]}"
@ -73,6 +78,17 @@ int alarm_ ( string hostname, const char * id, EFmAlarmStateT state, EFmAlarmSev
string msg_type ; string msg_type ;
string sev ; string sev ;
if ( user_sock_ptr == NULL )
{
slog ("alarm socket is NULL");
return (FAIL_NULL_POINTER );
}
else if ( ! user_sock_ptr->sock_ok() )
{
elog ("alarm socket is not ok");
return (FAIL_OPERATION);
}
if ( state == FM_ALARM_STATE_MSG ) if ( state == FM_ALARM_STATE_MSG )
msg_type = "msg" ; msg_type = "msg" ;
else if ( state == FM_ALARM_STATE_SET ) else if ( state == FM_ALARM_STATE_SET )
@ -127,7 +143,8 @@ int alarm_ ( string hostname, const char * id, EFmAlarmStateT state, EFmAlarmSev
} }
else else
{ {
ilog ("%s %s\n", hostname.c_str(), request); ilog ("%s %s %s %s %s", hostname.c_str(), entity, msg_type.c_str(), sev.c_str(), id);
mlog ("%s %s\n", hostname.c_str(), request);
return ( PASS ) ; return ( PASS ) ;
} }
daemon_signal_hdlr (); daemon_signal_hdlr ();

View File

@ -68,6 +68,7 @@ EFmAlarmSeverityT alarmUtil_getSev_enum ( string severity );
#ifndef __MODULE_PRIVATE__ #ifndef __MODULE_PRIVATE__
int alarm_register_user ( msgClassSock * sock_ptr ); int alarm_register_user ( msgClassSock * sock_ptr );
void alarm_unregister_user ( void );
/* Public API */ /* Public API */
int alarm_ ( string hostname, const char * id, EFmAlarmStateT state, EFmAlarmSeverityT severity, const char * entity, string prefix ); int alarm_ ( string hostname, const char * id, EFmAlarmStateT state, EFmAlarmSeverityT severity, const char * entity, string prefix );

View File

@ -36,6 +36,7 @@ using namespace std;
#include "mtcAlarm.h" #include "mtcAlarm.h"
#include "alarm.h" #include "alarm.h"
#include "hbsAlarm.h" #include "hbsAlarm.h"
#include "hbsBase.h"
/** Initialize the supplied command buffer */ /** Initialize the supplied command buffer */
void mtcCmd_init ( mtcCmd & cmd ) void mtcCmd_init ( mtcCmd & cmd )
@ -263,7 +264,8 @@ nodeLinkClass::nodeLinkClass()
/* Make no assumption on the service */ /* Make no assumption on the service */
maintenance = false ; maintenance = false ;
heartbeat = false ; heartbeat = false ;
active = false ; active = false ; /* run active */
active_controller = false ; /* true if this controller is active */
/* Set some defaults for the hearbeat service */ /* Set some defaults for the hearbeat service */
hbs_ready = false ; hbs_ready = false ;
@ -1156,26 +1158,26 @@ void nodeLinkClass::print_node_info ( void )
if (( i == INFRA_IFACE ) && ( infra_network_provisioned == false )) if (( i == INFRA_IFACE ) && ( infra_network_provisioned == false ))
continue ; continue ;
syslog ( LOG_INFO, "+--------------+-----+-----+------+-----+------+------------+---------+-----------------+\n"); syslog ( LOG_INFO, "+--------------+-----+-------+-------+-------+-------+------------+----------+-----------------+\n");
syslog ( LOG_INFO, "| %s: %3d | Mon | Mis | Max | Deg | Fail | Pulses Tot | Pulses | %s (%4d) |\n" , syslog ( LOG_INFO, "| %s: %3d | Mon | Mis | Max | Deg | Fail | Pulses Tot | Pulses | %s (%4d) |\n" ,
get_iface_name_str ((iface_enum)i), hosts, hbs_disabled ? "DISABLED" : "Enabled ", hbs_pulse_period ); get_iface_name_str ((iface_enum)i), hosts, hbs_disabled ? "DISABLED" : "Enabled ", hbs_pulse_period );
syslog ( LOG_INFO, "+--------------+-----+-----+------+-----+------+------------+---------+-----------------+\n"); syslog ( LOG_INFO, "+--------------+-----+-------+-------+-------+-------+------------+----------+-----------------+\n");
for ( struct node * ptr = head ; ptr != NULL ; ptr = ptr->next ) for ( struct node * ptr = head ; ptr != NULL ; ptr = ptr->next )
{ {
syslog ( LOG_INFO, "| %-12s | %c | %3i | %4i | %3i | %4i | %8x | %7x | %d msec\n", syslog ( LOG_INFO, "| %-12s | %c | %5i | %5i | %5i | %5i | %10x | %8x | %d msec\n",
ptr->hostname.c_str(), ptr->hostname.c_str(),
ptr->monitor[i] ? 'Y' : 'n', ptr->monitor[i] ? 'Y' : 'n',
ptr->hbs_misses_count[i], ptr->hbs_misses_count[i],
ptr->max_count[i], ptr->max_count[i],
ptr->hbs_degrade_count[i], ptr->hbs_degrade_count[i],
ptr->hbs_failure_count[i], ptr->hbs_failure_count[i],
ptr->hbs_count[i], ptr->hbs_count[i],
ptr->b2b_pulses_count[i], ptr->b2b_pulses_count[i],
hbs_pulse_period ); hbs_pulse_period );
} }
} }
syslog ( LOG_INFO, "+--------------+-----+-----+------+-----+------+------------+---------+-----------------+\n"); syslog ( LOG_INFO, "+--------------+-----+-------+-------+-------+-------+------------+----------+-----------------+\n");
} }
} }
@ -7778,7 +7780,7 @@ int nodeLinkClass::remPulse ( struct node * node_ptr, iface_enum iface, bool cle
{ {
/* This default RC allows the caller to filter out unexpected pulse responses */ /* This default RC allows the caller to filter out unexpected pulse responses */
int rc = ENXIO ; int rc = ENXIO ;
if ( head == NULL ) if ( head == NULL )
{ {
return -ENODEV ; return -ENODEV ;
@ -7962,6 +7964,16 @@ int nodeLinkClass::remPulse ( struct node * node_ptr, iface_enum iface, bool cle
} }
pulses[iface]-- ; pulses[iface]-- ;
} }
else if ( node_ptr )
{
dlog ("%s unexpected pulse response ; %s",
node_ptr->hostname.c_str(),
get_iface_name_str(iface));
}
else
{
slog ("null pointer");
}
return rc ; return rc ;
} }
@ -7972,6 +7984,13 @@ int nodeLinkClass::remPulse ( struct node * node_ptr, iface_enum iface, bool cle
* By index does not require a lookup whereas hostname does */ * By index does not require a lookup whereas hostname does */
int nodeLinkClass::remove_pulse ( string & hostname, iface_enum iface, int index, unsigned int flags ) int nodeLinkClass::remove_pulse ( string & hostname, iface_enum iface, int index, unsigned int flags )
{ {
/* TODO: consider removing this check */
if ( hostname == "localhost" )
{
/* localhost is not a supported hostname and indicates
* an unconfigured host response ; return the ignore response */
return(ENXIO);
}
if ( index ) if ( index )
{ {
int rc = remPulse_by_index ( hostname, index , iface, true , flags ); int rc = remPulse_by_index ( hostname, index , iface, true , flags );
@ -7984,16 +8003,6 @@ int nodeLinkClass::remove_pulse ( string & hostname, iface_enum iface, int index
} }
else else
{ {
if ( hostname.compare("localhost") )
{
get_hbs_monitor_state ( hostname , iface ) ;
}
else
{
/* localhost is not a supported hostname and indicates
* an unconfigured host response ; return the ignore response */
return(ENXIO);
}
} }
return ( remPulse_by_name ( hostname , iface, true, flags )); return ( remPulse_by_name ( hostname , iface, true, flags ));
} }
@ -8016,7 +8025,6 @@ void nodeLinkClass::clear_pulse_list ( iface_enum iface )
} }
} }
/** Runs in the hbsAgent to set or clear heartbat alarms for all supported interfaces */ /** Runs in the hbsAgent to set or clear heartbat alarms for all supported interfaces */
void nodeLinkClass::manage_heartbeat_alarm ( struct nodeLinkClass::node * node_ptr, EFmAlarmSeverityT sev, int iface ) void nodeLinkClass::manage_heartbeat_alarm ( struct nodeLinkClass::node * node_ptr, EFmAlarmSeverityT sev, int iface )
{ {
@ -8142,7 +8150,6 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
storage_0_responding = false ; storage_0_responding = false ;
} }
/* Don't log single misses unless in debug mode */
if ( pulse_ptr->b2b_misses_count[iface] > 1 ) if ( pulse_ptr->b2b_misses_count[iface] > 1 )
{ {
if ( pulse_ptr->b2b_misses_count[iface] >= hbs_failure_threshold ) if ( pulse_ptr->b2b_misses_count[iface] >= hbs_failure_threshold )
@ -8207,7 +8214,10 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
{ {
if ( pulse_ptr->b2b_misses_count[iface] == hbs_minor_threshold ) if ( pulse_ptr->b2b_misses_count[iface] == hbs_minor_threshold )
{ {
send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_MINOR_SET, iface ); if ( this->active_controller )
{
send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_MINOR_SET, iface );
}
pulse_ptr->hbs_minor[iface] = true ; pulse_ptr->hbs_minor[iface] = true ;
pulse_ptr->hbs_minor_count[iface]++ ; pulse_ptr->hbs_minor_count[iface]++ ;
wlog ("%s %s -> MINOR\n", pulse_ptr->hostname.c_str(), get_iface_name_str(iface)); wlog ("%s %s -> MINOR\n", pulse_ptr->hostname.c_str(), get_iface_name_str(iface));
@ -8215,10 +8225,17 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
} }
if ( pulse_ptr->b2b_misses_count[iface] == hbs_degrade_threshold ) if ( pulse_ptr->b2b_misses_count[iface] == hbs_degrade_threshold )
{ {
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_MAJOR, iface ); if ( this->active_controller )
{
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_MAJOR, iface );
/* report this host as failed */ /* report this host as failed */
if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_SET, iface ) == PASS ) if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_SET, iface ) == PASS )
{
pulse_ptr->hbs_degrade[iface] = true ;
}
}
else
{ {
pulse_ptr->hbs_degrade[iface] = true ; pulse_ptr->hbs_degrade[iface] = true ;
} }
@ -8231,11 +8248,17 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
( pulse_ptr->hbs_degrade[iface] == false )) ( pulse_ptr->hbs_degrade[iface] == false ))
{ {
wlog ("%s -> DEGRADED - Auto-Correction\n", pulse_ptr->hostname.c_str()); wlog ("%s -> DEGRADED - Auto-Correction\n", pulse_ptr->hostname.c_str());
if ( this->active_controller )
{
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_MAJOR, iface );
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_MAJOR, iface ); /* report this host as failed */
if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_SET, iface ) == PASS )
/* report this host as failed */ {
if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_DEGRADE_SET, iface ) == PASS ) pulse_ptr->hbs_degrade[iface] = true ;
}
}
else
{ {
pulse_ptr->hbs_degrade[iface] = true ; pulse_ptr->hbs_degrade[iface] = true ;
} }
@ -8250,11 +8273,16 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
/* Only print the log at the threshold boundary */ /* Only print the log at the threshold boundary */
if ( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold ) if ( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold )
{ {
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface ); if ( this->active_controller )
{
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface );
}
wlog_throttled ( pulse_ptr->no_work_log_throttle, 500, wlog_throttled ( pulse_ptr->no_work_log_throttle, 500,
"%s %s *** Heartbeat Loss *** (degrade only)\n", pulse_ptr->hostname.c_str(), "%s %s *** Heartbeat Loss *** (degrade only)\n", pulse_ptr->hostname.c_str(),
get_iface_name_str(iface) ); get_iface_name_str(iface) );
this->print_node_info ();
hbs_cluster_log ( this->my_hostname, "event", true );
} }
} }
@ -8268,35 +8296,46 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
/* Only print the log at the threshold boundary */ /* Only print the log at the threshold boundary */
if ( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold ) if ( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold )
{ {
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface ); if ( this->active_controller )
{
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface );
}
wlog_throttled ( pulse_ptr->no_work_log_throttle, 500, wlog_throttled ( pulse_ptr->no_work_log_throttle, 500,
"%s %s *** Heartbeat Loss *** (degrade only)\n", pulse_ptr->hostname.c_str(), "%s %s *** Heartbeat Loss *** (degrade only)\n", pulse_ptr->hostname.c_str(),
get_iface_name_str(iface) ); get_iface_name_str(iface) );
this->print_node_info ();
hbs_cluster_log ( this->my_hostname, "event", true );
} }
} }
else if (( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold ) && else if (( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold ) &&
( pulse_ptr->hbs_failure[iface] == false )) ( pulse_ptr->hbs_failure[iface] == false ))
{ {
elog ("%s %s -> FAILED\n", pulse_ptr->hostname.c_str(),
get_iface_name_str(iface) );
elog ("%s %s *** Heartbeat Loss ***\n", pulse_ptr->hostname.c_str(), elog ("%s %s *** Heartbeat Loss ***\n", pulse_ptr->hostname.c_str(),
get_iface_name_str(iface) ); get_iface_name_str(iface) );
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface ); if ( this->active_controller )
{
manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface );
/* report this host as failed */ /* report this host as failed */
if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_LOSS , iface ) == PASS ) if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_LOSS , iface ) == PASS )
{
pulse_ptr->hbs_failure[iface] = true ;
}
}
else
{ {
pulse_ptr->hbs_failure[iface] = true ; pulse_ptr->hbs_failure[iface] = true ;
this->print_node_info ();
hbs_cluster_log ( this->my_hostname, "event", true );
} }
pulse_ptr->hbs_failure_count[iface]++ ; pulse_ptr->hbs_failure_count[iface]++ ;
} }
if ( pulse_ptr->b2b_misses_count[iface] > pulse_ptr->max_count[iface] ) if ( pulse_ptr->b2b_misses_count[iface] > pulse_ptr->max_count[iface] )
pulse_ptr->max_count[iface] = pulse_ptr->b2b_misses_count[iface] ; pulse_ptr->max_count[iface] = pulse_ptr->b2b_misses_count[iface] ;
} }
if ( remPulse_by_name ( pulse_ptr->hostname, iface, false, NULL_PULSE_FLAGS )) if ( remPulse_by_name ( pulse_ptr->hostname, iface, false, NULL_PULSE_FLAGS ))
{ {
elog ("%s %s not in pulse list\n", pulse_ptr->hostname.c_str(), elog ("%s %s not in pulse list\n", pulse_ptr->hostname.c_str(),

View File

@ -1266,6 +1266,10 @@ public:
bool maintenance ; bool maintenance ;
bool heartbeat ; bool heartbeat ;
/* Set to true if this controller is active.
* Currently only used by heartbeat service. */
bool active_controller ;
/* offline_handler tuning controls */ /* offline_handler tuning controls */
int offline_threshold ; /* number of back to back mtcAlive misses before offline */ int offline_threshold ; /* number of back to back mtcAlive misses before offline */
int offline_period ; /* offline handler mtcAlive request period */ int offline_period ; /* offline handler mtcAlive request period */

File diff suppressed because it is too large Load Diff

View File

@ -47,6 +47,9 @@
/** Maximum service fail count before action */ /** Maximum service fail count before action */
#define MAX_FAIL_COUNT (1) #define MAX_FAIL_COUNT (1)
/** Audit Rate/Count */
#define AUDIT_RATE (9)
/** Heartbeat pulse request/response message header byte size */ /** Heartbeat pulse request/response message header byte size */
#define HBS_HEADER_SIZE (15) #define HBS_HEADER_SIZE (15)
@ -60,13 +63,16 @@ const char rsp_msg_header [HBS_HEADER_SIZE+1] = {"cgts pulse rsp:"};
#define HBS_MAX_MSG (HBS_HEADER_SIZE+MAX_CHARS_HOSTNAME) #define HBS_MAX_MSG (HBS_HEADER_SIZE+MAX_CHARS_HOSTNAME)
#define HBS_MESSAGE_VERSION (1) // 0 -> 1 with intro of cluster info #define HBS_MESSAGE_VERSION (1) // 0 -> 1 with intro of cluster info
/* Heartbeat control structure */ /* Heartbeat control structure */
typedef struct typedef struct
{ {
unsigned int controller ;
unsigned int audit ;
unsigned int nodetype ; unsigned int nodetype ;
bool clear_alarms ; bool clear_alarms ;
bool locked ;
} hbs_ctrl_type ; } hbs_ctrl_type ;
hbs_ctrl_type * get_hbs_ctrl_ptr ( void ); hbs_ctrl_type * get_hbs_ctrl_ptr ( void );
@ -218,22 +224,17 @@ void hbs_utils_init ( void );
/* network enum to name lookup */ /* network enum to name lookup */
string hbs_cluster_network_name ( mtce_hbs_network_enum network ); string hbs_cluster_network_name ( mtce_hbs_network_enum network );
/* Produce formatted clog's that characterize current and changing cluster
* history for a given network. Each log is controller/network specific. */
void hbs_cluster_log ( string & hostname, mtce_hbs_cluster_type & cluster, string prefix );
/* Initialize the specified history array */ /* Initialize the specified history array */
void hbs_cluster_history_init ( mtce_hbs_cluster_history_type & history ); void hbs_cluster_history_init ( mtce_hbs_cluster_history_type & history );
/* Clear all history in the cluster vault */ /* Clear all history in the cluster vault */
void hbs_cluster_history_clear( mtce_hbs_cluster_type & cluster ); void hbs_cluster_history_clear( mtce_hbs_cluster_type & cluster );
/******** Heartbeat Agent Cluster Functions in hbsCluster.cpp ********/ /******** Heartbeat Agent Cluster Functions in hbsCluster.cpp ********/
/* Set the cluster vault to default state. /* Set the cluster vault to default state.
* Called upon daemon init or heartbeat period change. */ * Called upon daemon init or heartbeat period change. */
void hbs_cluster_init ( unsigned short period ); void hbs_cluster_init ( unsigned short period , msgClassSock * sm_socket_ptr );
/* Calculate number of bytes that is unused in the cluster data structure. /* Calculate number of bytes that is unused in the cluster data structure.
* Primarily to know how many history elements are missing. */ * Primarily to know how many history elements are missing. */
@ -286,7 +287,9 @@ void hbs_cluster_append ( hbs_message_type & msg );
/* Produce formatted clog's that characterize current and changing cluster /* Produce formatted clog's that characterize current and changing cluster
* history for a given network. Each log is controller/network specific. */ * history for a given network. Each log is controller/network specific. */
void hbs_cluster_log ( string & hostname, string prefix ); void hbs_cluster_log ( string & hostname, string prefix, bool force=false );
void hbs_cluster_log ( string & hostname, mtce_hbs_cluster_type & cluster, string prefix, bool force=false );
/* Service SM cluster info request */ /* Service SM cluster info request */
void hbs_sm_handler ( void ); void hbs_sm_handler ( void );
@ -294,8 +297,14 @@ void hbs_sm_handler ( void );
/* send the cluster vault to SM */ /* send the cluster vault to SM */
void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid ); void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid );
/* copy cluster data from src to dst */
void hbs_cluster_copy ( mtce_hbs_cluster_type & src, mtce_hbs_cluster_type & dst );
/* print the contents of the vault */ /* print the contents of the vault */
void hbs_cluster_dump ( mtce_hbs_cluster_type & vault ); void hbs_cluster_dump ( mtce_hbs_cluster_type & vault, string log_prefix, bool force );
/* Heartbeat service state audit */
void hbs_state_audit ( void );
/** /**
* @} hbs_base * @} hbs_base

View File

@ -66,6 +66,8 @@ extern "C"
#include "amon.h" /* for ... active monitoring utilities */ #include "amon.h" /* for ... active monitoring utilities */
} }
#define MAX_LEN (300)
/* Where to send events */ /* Where to send events */
string mtcAgent_ip = "" ; string mtcAgent_ip = "" ;
@ -96,12 +98,17 @@ typedef struct
static char pulse_resp_tx_hdr [HBS_MAX_MSG]; static char pulse_resp_tx_hdr [HBS_MAX_MSG];
static char my_hostname [MAX_HOST_NAME_SIZE+1]; static char my_hostname [MAX_HOST_NAME_SIZE+1];
static string hostname = "" ;
static char my_hostname_length ; static char my_hostname_length ;
static string my_macaddr = "" ; static string my_macaddr = "" ;
static string my_address = "" ; static string my_address = "" ;
static unsigned int my_nodetype= CGTS_NODE_NULL ; static unsigned int my_nodetype= CGTS_NODE_NULL ;
static stallMon_type stallMon ; static stallMon_type stallMon ;
/* Cached Cluster view from controllers */
mtce_hbs_cluster_type controller_cluster_cache[MTCE_HBS_MAX_CONTROLLERS];
void daemon_sigchld_hdlr ( void ) void daemon_sigchld_hdlr ( void )
{ {
; /* dlog("Received SIGCHLD ... no action\n"); */ ; /* dlog("Received SIGCHLD ... no action\n"); */
@ -407,16 +414,17 @@ int daemon_configure ( void )
else else
{ {
ilog("Realtime Pri: FIFO/%i \n", hbs_config.scheduling_priority ); ilog("Realtime Pri: FIFO/%i \n", hbs_config.scheduling_priority );
ilog("Multicast: %s\n", hbs_config.multicast ); ilog("Multicast : %s\n", hbs_config.multicast );
hbs_config.mgmnt_iface = daemon_get_iface_master ( hbs_config.mgmnt_iface ); hbs_config.mgmnt_iface = daemon_get_iface_master ( hbs_config.mgmnt_iface );
ilog("Mgmnt iface : %s\n", hbs_config.mgmnt_iface ); ilog("Mgmnt Name : %s\n", hbs_config.mgmnt_iface );
ilog("Mgmnt RxPort: %d\n", hbs_config.hbs_client_mgmnt_port ); ilog("Mgmnt Port : %d (rx)", hbs_config.hbs_client_mgmnt_port );
ilog("Mgmnt TxPort: %d\n", hbs_config.hbs_agent_mgmnt_port ); ilog("Mgmnt Port : %d (tx)", hbs_config.hbs_agent_mgmnt_port );
get_iface_macaddr ( hbs_config.mgmnt_iface, my_macaddr ); get_iface_macaddr ( hbs_config.mgmnt_iface, my_macaddr );
get_iface_address ( hbs_config.mgmnt_iface, my_address, true ); get_iface_address ( hbs_config.mgmnt_iface, my_address, true );
get_hostname ( &my_hostname[0], MAX_HOST_NAME_SIZE ); get_hostname ( &my_hostname[0], MAX_HOST_NAME_SIZE );
hostname = my_hostname ;
/* Fetch the infrastructure interface name. /* Fetch the infrastructure interface name.
* calls daemon_get_iface_master inside so the * calls daemon_get_iface_master inside so the
@ -427,11 +435,14 @@ int daemon_configure ( void )
if (strcmp(hbs_config.infra_iface, hbs_config.mgmnt_iface)) if (strcmp(hbs_config.infra_iface, hbs_config.mgmnt_iface))
{ {
infra_network_provisioned = true ; infra_network_provisioned = true ;
ilog ("Infra iface : %s\n", hbs_config.infra_iface ); ilog ("Infra Name : %s\n", hbs_config.infra_iface );
} }
} }
ilog("Infra RxPort: %d\n", hbs_config.hbs_client_infra_port ); if ( infra_network_provisioned == true )
ilog("Infra TxPort: %d\n", hbs_config.hbs_agent_infra_port ); {
ilog("Infra Port : %d (rx)", hbs_config.hbs_client_infra_port );
ilog("Infra Port : %d (tx)", hbs_config.hbs_agent_infra_port );
}
/* initialize the stall detection monitor */ /* initialize the stall detection monitor */
stallMon_init (); stallMon_init ();
@ -663,7 +674,37 @@ int get_pmon_pulses ( void )
return (pulses); return (pulses);
} }
static unsigned int my_rri = 0 ; /*************************************************************
*
* Name : have_other_controller_history
*
* Description: returns true if there is cached history for any
* controller number other than this one supplied.
*
*************************************************************/
bool have_other_controller_history ( unsigned short controller )
{
if ( controller < MTCE_HBS_MAX_CONTROLLERS )
{
/* look for history for any controller other than the one specified */
for ( int c = 0 ; c < MTCE_HBS_MAX_CONTROLLERS ; c++ )
{
/* skip specified controller */
if ( c != controller )
{
if ( controller_cluster_cache[c].histories )
{
return true ;
}
}
}
}
return false ;
}
static unsigned int rri[MTCE_HBS_MAX_CONTROLLERS] = {0,0} ;
/************************************************************* /*************************************************************
* *
@ -766,12 +807,13 @@ int _service_pulse_request ( iface_enum iface , unsigned int flags )
daemon_config_type * cfg_ptr = daemon_get_cfg_ptr(); daemon_config_type * cfg_ptr = daemon_get_cfg_ptr();
if ( cfg_ptr->debug_msg ) if ( cfg_ptr->debug_msg )
{ {
mlog ("\n"); mlog (" ");
mlog ("%s Pulse Req: %s:%5d: %d:%s RRI:%d\n", mlog ("%s Pulse Req: %s:%d s:%d f:%x [%s] RRI:%d\n",
get_iface_name_str(iface), get_iface_name_str(iface),
hbs_sock.rx_sock[iface]->get_dst_addr()->toString(), hbs_sock.rx_sock[iface]->get_dst_addr()->toString(),
hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(), hbs_sock.rx_sock[iface]->get_dst_addr()->getPort(),
hbs_sock.rx_mesg[iface].s, hbs_sock.rx_mesg[iface].s,
hbs_sock.rx_mesg[iface].f,
hbs_sock.rx_mesg[iface].m, hbs_sock.rx_mesg[iface].m,
hbs_sock.rx_mesg[iface].c); hbs_sock.rx_mesg[iface].c);
} }
@ -787,19 +829,9 @@ int _service_pulse_request ( iface_enum iface , unsigned int flags )
return (FAIL_MSG_HEADER) ; return (FAIL_MSG_HEADER) ;
} }
/* Update local copy for the controller this pulse came from */
/* Manage the Resource Reference Index (RRI) "lookup clue" */ /* ... before the flags are cleared and setup for the reply. */
if ( ! strncmp ( &hbs_sock.rx_mesg[iface].m[HBS_HEADER_SIZE], &my_hostname[0], MAX_CHARS_HOSTNAME )) unsigned int controller = (hbs_sock.rx_mesg[iface].f & CTRLX_MASK ) >> CTRLX_BIT ;
{
if( my_rri!= hbs_sock.rx_mesg[iface].c )
{
my_rri = hbs_sock.rx_mesg[iface].c ;
ilog ("%s Caching New RRI: %d\n", &my_hostname[0], my_rri );
}
}
/* Add my RRI to the response message */
hbs_sock.rx_mesg[iface].c = my_rri ;
/* Manage OOB flags */ /* Manage OOB flags */
hbs_sock.rx_mesg[iface].f = flags ; hbs_sock.rx_mesg[iface].f = flags ;
@ -807,23 +839,102 @@ int _service_pulse_request ( iface_enum iface , unsigned int flags )
{ {
hbs_sock.rx_mesg[iface].f |= ( PMOND_FLAG ) ; hbs_sock.rx_mesg[iface].f |= ( PMOND_FLAG ) ;
} }
if ( infra_network_provisioned == true ) if ( infra_network_provisioned == true )
{ {
hbs_sock.rx_mesg[iface].f |= INFRA_FLAG ; hbs_sock.rx_mesg[iface].f |= INFRA_FLAG ;
} }
#define WANT_CLUSTER_INFO_LOG /*************************************************************************
#ifdef WANT_CLUSTER_INFO_LOG ***** C L U S T E R D A T A M A N A G E M E N T ******
/* Log the received cluster info */ * *
if ( hbs_sock.rx_mesg[iface].v >= HBS_MESSAGE_VERSION ) * TODO: Add support for 3 controllers.
* Only 2 suppoerted by some of this code.
***** ******/
if ( controller >= MTCE_HBS_MAX_CONTROLLERS )
{ {
char str[100] ; wlog ("invalid controller number: %d ; dropping message", controller );
// hbs_cluster_log (hbs_sock.rx_mesg[iface].cluster, hbs_sock.rx_mesg[iface].s ); return ( FAIL_INVALID_DATA );
snprintf ( &str[0], 100, " seq %6d with %d bytes from %s ", hbs_sock.rx_mesg[iface].s, rx_bytes, get_iface_name_str(iface));
string hostname = my_hostname ;
hbs_cluster_log ( hostname, hbs_sock.rx_mesg[iface].cluster, str );
} }
#endif
/* Manage the Resource Reference Index (RRI) "lookup clue"
* With the introduction of active-active heartbeating the hbsClient
* is responsible for servicing pulses from both controllers.
* This means that hbsClient needs to manage an rri for each controller. */
if ( ! strncmp ( &hbs_sock.rx_mesg[iface].m[HBS_HEADER_SIZE], &my_hostname[0], MAX_CHARS_HOSTNAME ))
{
if( rri[controller] != hbs_sock.rx_mesg[iface].c )
{
rri[controller] = hbs_sock.rx_mesg[iface].c ;
ilog ("Caching New RRI: %d (from controller-%d)\n", rri[controller], controller );
}
}
/* Log the received cluster info
* ... if the message version shows that it is supported */
if ( hbs_sock.rx_mesg[iface].v )
{
char str[MAX_LEN] ;
snprintf ( &str[0], MAX_LEN, " seq %6d with %d bytes from %s ", (int)hbs_sock.rx_mesg[iface].s, rx_bytes, get_iface_name_str(iface));
hbs_cluster_log ( hostname, hbs_sock.rx_mesg[iface].cluster, str );
/* add the controller back in */
hbs_sock.rx_mesg[iface].f |= ( controller << CTRLX_BIT );
/* Add my RRI to the response message */
hbs_sock.rx_mesg[iface].c = rri[controller] ;
if ( hbs_sock.rx_mesg[iface].cluster.histories > MTCE_HBS_MAX_NETWORKS )
{
slog ("controller-%d provided %d network histories ; max is %d per controller",
controller,
hbs_sock.rx_mesg[iface].cluster.histories,
MTCE_HBS_MAX_NETWORKS );
}
else if ( hbs_sock.rx_mesg[iface].cluster.bytes != ( BYTES_IN_CLUSTER_VAULT(hbs_sock.rx_mesg[iface].cluster.histories)))
{
slog ("controller-%d provided %d bytes of history ; expected %d",
controller,
hbs_sock.rx_mesg[iface].cluster.bytes,
(unsigned short)(BYTES_IN_CLUSTER_VAULT(hbs_sock.rx_mesg[iface].cluster.histories)));
}
else if ( hbs_sock.rx_mesg[iface].cluster.histories )
{
hbs_cluster_copy ( hbs_sock.rx_mesg[iface].cluster,
controller_cluster_cache[controller] );
clog1 ("controller-%d cluster info from %s pulse request saved to cache",
controller, get_iface_name_str(iface));
hbs_sock.rx_mesg[iface].cluster.histories = 0 ;
if ( have_other_controller_history ( controller ) == true )
{
/* Now copy the other controller's cached cluster info into
* this controlers response */
hbs_cluster_copy ( controller_cluster_cache[controller?0:1],
hbs_sock.rx_mesg[iface].cluster );
if ( daemon_get_cfg_ptr()->debug_state & 4 )
{
string dump_banner = "" ;
dump_banner.append("controller-") ;
dump_banner.append(itos(controller?0:1));
dump_banner.append(" cluster info from cache injected into controller-");
dump_banner.append(itos(controller));
dump_banner.append(":");
dump_banner.append(get_iface_name_str(iface));
dump_banner.append(" pulse response");
hbs_cluster_dump ( hbs_sock.rx_mesg[iface].cluster, dump_banner, true );
}
}
}
}
/* Cluster Data management end */
/* replace the request header with the response header */
memcpy ( &hbs_sock.rx_mesg[iface].m[0], &pulse_resp_tx_hdr[0], HBS_MAX_MSG );
#ifdef WANT_PULSE_RESPONSE_FIT #ifdef WANT_PULSE_RESPONSE_FIT
if (( iface == INFRA_IFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_INFRA_RSP ))) if (( iface == INFRA_IFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_INFRA_RSP )))
@ -839,29 +950,11 @@ int _service_pulse_request ( iface_enum iface , unsigned int flags )
} }
#endif #endif
int rc = PASS ; /* reuse the rx_bytes variable */
rx_bytes = sizeof(hbs_message_type)-sizeof(mtce_hbs_cluster_type)+BYTES_IN_CLUSTER_VAULT(hbs_sock.rx_mesg[iface].cluster.histories);
/* replace the request header with the response header */
memcpy ( &hbs_sock.rx_mesg[iface].m[0], &pulse_resp_tx_hdr[0], HBS_MAX_MSG );
/* Deal with the cluster info if it exists.
* ... Introduced in messaging version 1 */
if ( hbs_sock.rx_mesg[iface].v >= HBS_MESSAGE_VERSION )
{
if ( hbs_sock.rx_mesg[iface].cluster.version < MTCE_HBS_CLUSTER_VERSION )
{
ilog ("Bad cluster verison (%d)", hbs_sock.rx_mesg[iface].cluster.version);
}
// if ( hbs_sock.rx_mesg[iface].cluster.revision != MTCE_HBS_CLUSTER_REVISION )
// {
// ilog ("Bad cluster revision (%d)", hbs_sock.rx_mesg[iface].cluster.revision);
// }
/* Add peer controller cluster data to this controller's response */
// hbs_cluster_loop(hbs_sock.rx_mesg[iface]);
}
/* send pulse response message */ /* send pulse response message */
int rc = PASS ;
int tx_bytes = hbs_sock.tx_sock[iface]->reply(hbs_sock.rx_sock[iface],(char*)&hbs_sock.rx_mesg[iface], rx_bytes); int tx_bytes = hbs_sock.tx_sock[iface]->reply(hbs_sock.rx_sock[iface],(char*)&hbs_sock.rx_mesg[iface], rx_bytes);
if ( tx_bytes == -1 ) if ( tx_bytes == -1 )
{ {
@ -884,15 +977,15 @@ int _service_pulse_request ( iface_enum iface , unsigned int flags )
} }
else else
{ {
mlog ("%s Pulse Rsp: %s:%5d: %d:%d:%s RRI:%d (%d:%d:%d)\n", mlog ("%s Pulse Rsp: %s:%d: s:%d f:%x [%s] RRI:%d (%x:%d:%d)\n",
get_iface_name_str(iface), get_iface_name_str(iface),
hbs_sock.tx_sock[iface]->get_dst_addr()->toString(), hbs_sock.tx_sock[iface]->get_dst_addr()->toString(),
hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(), hbs_sock.tx_sock[iface]->get_dst_addr()->getPort(),
hbs_sock.rx_mesg[iface].s, hbs_sock.rx_mesg[iface].s,
hbs_sock.rx_mesg[iface].f, hbs_sock.rx_mesg[iface].f,
hbs_sock.rx_mesg[iface].m, hbs_sock.rx_mesg[iface].m,
hbs_sock.rx_mesg[iface].c, hbs_sock.rx_mesg[iface].c,
pmonPulse_counter, rx_bytes, tx_bytes); pmonPulse_counter, rx_bytes, tx_bytes);
} }
/* Clear the error count since we got a good receive */ /* Clear the error count since we got a good receive */
@ -984,6 +1077,10 @@ int daemon_init ( string iface, string nodeType_str )
/* Initialize socket construct and pointer to it */ /* Initialize socket construct and pointer to it */
memset ( &hbs_sock, 0, sizeof(hbs_sock)); memset ( &hbs_sock, 0, sizeof(hbs_sock));
/* Initialize the controller cluster view data bounce structure */
for ( int c = 0 ; c < MTCE_HBS_MAX_CONTROLLERS ; c++ )
memset ( &controller_cluster_cache[c], 0, sizeof(mtce_hbs_cluster_type)) ;
/* init the utility module */ /* init the utility module */
hbs_utils_init (); hbs_utils_init ();
@ -1007,6 +1104,11 @@ int daemon_init ( string iface, string nodeType_str )
/* convert node type to integer */ /* convert node type to integer */
my_nodetype = get_host_function_mask ( nodeType_str ) ; my_nodetype = get_host_function_mask ( nodeType_str ) ;
if ( my_nodetype & CONTROLLER_TYPE )
{
/* is controller but don't know what one yet. */
set_hn((char*)CONTROLLER_X);
}
ilog ("Node Type : %s (%d)\n", nodeType_str.c_str(), my_nodetype ); ilog ("Node Type : %s (%d)\n", nodeType_str.c_str(), my_nodetype );
/* Bind signal handlers */ /* Bind signal handlers */
@ -1058,7 +1160,6 @@ int daemon_init ( string iface, string nodeType_str )
int stall_threshold_log = 0 ; int stall_threshold_log = 0 ;
int stall_times_threshold_log = 0 ; int stall_times_threshold_log = 0 ;
#define MAX_LEN 300
void daemon_service_run ( void ) void daemon_service_run ( void )
{ {
#ifdef WANT_DAEMON_DEBUG #ifdef WANT_DAEMON_DEBUG
@ -1205,7 +1306,7 @@ void daemon_service_run ( void )
int bytes = hbs_sock.sm_client_sock->read((char*)&msg, sizeof(mtce_hbs_cluster_type)); int bytes = hbs_sock.sm_client_sock->read((char*)&msg, sizeof(mtce_hbs_cluster_type));
if ( bytes ) if ( bytes )
{ {
hbs_cluster_dump (msg); hbs_cluster_dump (msg, "Cluster info received", true );
} }
} }
#endif #endif

View File

@ -64,11 +64,19 @@ typedef struct
/* The working heartbeat cluster data vault. */ /* The working heartbeat cluster data vault. */
mtce_hbs_cluster_type cluster ; mtce_hbs_cluster_type cluster ;
bool cluster_change ;
int cluster_change_threshold_count ;
int cluster_change_difference_count ;
msgClassSock * sm_socket_ptr ;
} hbs_cluster_ctrl_type ; } hbs_cluster_ctrl_type ;
/* Cluster control structire construct allocation. */ /* Cluster control structire construct allocation. */
static hbs_cluster_ctrl_type ctrl ; static hbs_cluster_ctrl_type ctrl ;
#define STORAGE_0_NR_THRESHOLD (4)
#define CLUSTER_CHANGE_THRESHOLD (50000)
/**************************************************************************** /****************************************************************************
* *
@ -80,7 +88,7 @@ static hbs_cluster_ctrl_type ctrl ;
* *
***************************************************************************/ ***************************************************************************/
void hbs_cluster_init ( unsigned short period ) void hbs_cluster_init ( unsigned short period, msgClassSock * sm_socket_ptr )
{ {
ctrl.monitored_hosts = 0; ctrl.monitored_hosts = 0;
ctrl.monitored_hostname_list.clear(); ctrl.monitored_hostname_list.clear();
@ -104,13 +112,17 @@ void hbs_cluster_init ( unsigned short period )
for ( int h = 0 ; h < MTCE_HBS_MAX_HISTORY_ELEMENTS ; h++ ) for ( int h = 0 ; h < MTCE_HBS_MAX_HISTORY_ELEMENTS ; h++ )
hbs_cluster_history_init ( ctrl.cluster.history[h] ); hbs_cluster_history_init ( ctrl.cluster.history[h] );
ilog ("Cluster Info: v%d.%d sig:%x bytes:%d (%ld)", clog ("Cluster Info: v%d.%d sig:%x bytes:%d (%ld)",
ctrl.cluster.version, ctrl.cluster.version,
ctrl.cluster.revision, ctrl.cluster.revision,
ctrl.cluster.magic_number, ctrl.cluster.magic_number,
ctrl.cluster.bytes, ctrl.cluster.bytes,
sizeof(mtce_hbs_cluster_history_type)); sizeof(mtce_hbs_cluster_history_type));
if ( sm_socket_ptr )
{
ctrl.sm_socket_ptr = sm_socket_ptr ;
}
ctrl.log_throttle = 0 ; ctrl.log_throttle = 0 ;
} }
@ -140,7 +152,7 @@ void hbs_cluster_nums ( unsigned short this_controller,
/**************************************************************************** /****************************************************************************
* *
* Name : log_monitored_hosts_list * Name : cluster_list
* *
* Description : Log the list of monitored hosts. * Description : Log the list of monitored hosts.
* Typically done on a list change. * Typically done on a list change.
@ -149,7 +161,7 @@ void hbs_cluster_nums ( unsigned short this_controller,
* *
***************************************************************************/ ***************************************************************************/
void log_monitored_hosts_list ( void ) void cluster_list ( void )
{ {
std::list<string>::iterator iter_ptr ; std::list<string>::iterator iter_ptr ;
string list = "" ; string list = "" ;
@ -160,9 +172,7 @@ void log_monitored_hosts_list ( void )
list.append (*(iter_ptr)); list.append (*(iter_ptr));
list.append (" "); list.append (" ");
} }
ilog ("cluster of %ld: %s", ilog ("cluster: %s", list.c_str());
ctrl.monitored_hostname_list.size(),
list.c_str());
} }
@ -186,6 +196,7 @@ void cluster_storage0_state ( bool enabled )
ctrl.cluster.storage0_enabled = enabled ; ctrl.cluster.storage0_enabled = enabled ;
ilog ("storage-0 heartbeat state changed to %s", ilog ("storage-0 heartbeat state changed to %s",
enabled ? "enabled" : "disabled" ); enabled ? "enabled" : "disabled" );
ctrl.cluster_change = true ;
} }
} }
@ -237,13 +248,30 @@ void hbs_manage_controller_state ( string & hostname, bool enabled )
void hbs_cluster_add ( string & hostname ) void hbs_cluster_add ( string & hostname )
{ {
/* Consider using 'unique' after instead of remove before update. */ bool already_in_list = false ;
ctrl.monitored_hostname_list.remove(hostname) ; std::list<string>::iterator hostname_ptr ;
ctrl.monitored_hostname_list.push_back(hostname) ; for ( hostname_ptr = ctrl.monitored_hostname_list.begin();
ctrl.monitored_hosts = (unsigned short)ctrl.monitored_hostname_list.size(); hostname_ptr != ctrl.monitored_hostname_list.end() ;
hostname_ptr++ )
{
if ( hostname_ptr->compare(hostname) == 0 )
{
already_in_list = true ;
break ;
}
}
if ( already_in_list == false )
{
ctrl.monitored_hostname_list.push_back(hostname) ;
ctrl.monitored_hosts = (unsigned short)ctrl.monitored_hostname_list.size();
ilog ("%s added to cluster", hostname.c_str());
cluster_list ();
ctrl.cluster_change = true ;
}
/* Manage storage-0 state */ /* Manage storage-0 state */
if ( hostname == STORAGE_0 ) if ( hostname.compare(STORAGE_0) == 0 )
{ {
cluster_storage0_state ( true ); cluster_storage0_state ( true );
} }
@ -251,15 +279,18 @@ void hbs_cluster_add ( string & hostname )
/* If we get down to 0 monitored hosts then just start fresh */ /* If we get down to 0 monitored hosts then just start fresh */
if (( ctrl.monitored_hosts ) == 0 ) if (( ctrl.monitored_hosts ) == 0 )
{ {
hbs_cluster_init ( ctrl.cluster.period_msec ); hbs_cluster_init ( ctrl.cluster.period_msec, NULL );
} }
/* Manage controller state ; true means enabled in this case. */ /* Manage controller state ; true means enabled in this case. */
hbs_manage_controller_state ( hostname, true ); hbs_manage_controller_state ( hostname, true );
ilog ("%s added to cluster", hostname.c_str()); if (( ctrl.cluster_change ) && ( ctrl.sm_socket_ptr ))
{
hbs_cluster_send( ctrl.sm_socket_ptr, 0 );
ctrl.cluster_change = false ;
}
log_monitored_hosts_list ();
} }
/**************************************************************************** /****************************************************************************
@ -281,27 +312,46 @@ void hbs_cluster_add ( string & hostname )
void hbs_cluster_del ( string & hostname ) void hbs_cluster_del ( string & hostname )
{ {
ctrl.monitored_hostname_list.remove(hostname) ; std::list<string>::iterator hostname_ptr ;
ctrl.monitored_hosts = (unsigned short)ctrl.monitored_hostname_list.size(); for ( hostname_ptr = ctrl.monitored_hostname_list.begin();
hostname_ptr != ctrl.monitored_hostname_list.end() ;
/* Manage storage-0 state. */ hostname_ptr++ )
if ( hostname == STORAGE_0 )
{ {
cluster_storage0_state ( false ); if ( hostname_ptr->compare(hostname) == 0 )
{
ctrl.monitored_hostname_list.remove(hostname) ;
ctrl.monitored_hosts = (unsigned short)ctrl.monitored_hostname_list.size();
/* Manage storage-0 state. */
if ( hostname.compare(STORAGE_0) == 0 )
{
cluster_storage0_state ( false );
}
/* If we get down to 0 monitored hosts then just start fresh */
if (( ctrl.monitored_hosts ) == 0 )
{
hbs_cluster_init ( ctrl.cluster.period_msec, NULL );
}
/* Manage controller state ; false means not enabled in this case. */
hbs_manage_controller_state ( hostname , false );
ilog ("%s deleted from cluster", hostname.c_str());
cluster_list ();
ctrl.cluster_change = true ;
break ;
}
} }
/* If we get down to 0 monitored hosts then just start fresh */ if (( ctrl.cluster_change ) && ( ctrl.sm_socket_ptr ))
if (( ctrl.monitored_hosts ) == 0 )
{ {
hbs_cluster_init ( ctrl.cluster.period_msec ); hbs_cluster_send( ctrl.sm_socket_ptr, 0 );
ctrl.cluster_change = false ;
} }
/* Manage controller state ; false means not enabled in this case. */
hbs_manage_controller_state ( hostname , false );
ilog ("%s deleted from cluster", hostname.c_str());
log_monitored_hosts_list ();
} }
/**************************************************************************** /****************************************************************************
@ -309,7 +359,7 @@ void hbs_cluster_del ( string & hostname )
* Name : hbs_cluster_update * Name : hbs_cluster_update
* *
* Description : Update this controller's cluster info for the specified * Description : Update this controller's cluster info for the specified
* network with * network with ...
* *
* 1. The number of enabled hosts. * 1. The number of enabled hosts.
* 2. The number of responding hosts. * 2. The number of responding hosts.
@ -333,7 +383,6 @@ void hbs_cluster_del ( string & hostname )
* *
***************************************************************************/ ***************************************************************************/
#define STORAGE_0_NR_THRESHOLD (4)
void hbs_cluster_update ( iface_enum iface, void hbs_cluster_update ( iface_enum iface,
unsigned short not_responding_hosts, unsigned short not_responding_hosts,
@ -357,7 +406,7 @@ void hbs_cluster_update ( iface_enum iface,
if ( not_responding_hosts ) if ( not_responding_hosts )
{ {
clog1 ("controller-%d %s enabled:%d not responding:%d", clog ("controller-%d %s enabled:%d not responding:%d",
ctrl.this_controller, ctrl.this_controller,
hbs_cluster_network_name(n).c_str(), hbs_cluster_network_name(n).c_str(),
ctrl.monitored_hosts, ctrl.monitored_hosts,
@ -365,7 +414,7 @@ void hbs_cluster_update ( iface_enum iface,
} }
else else
{ {
clog1 ("controller-%d %s has %d monitored hosts and all are responding", clog ("controller-%d %s has %d monitored hosts and all are responding",
ctrl.this_controller, ctrl.this_controller,
hbs_cluster_network_name(n).c_str(), hbs_cluster_network_name(n).c_str(),
ctrl.monitored_hosts); ctrl.monitored_hosts);
@ -394,9 +443,11 @@ void hbs_cluster_update ( iface_enum iface,
history_ptr->network = n ; history_ptr->network = n ;
/* Log new network history as its being started. */ /* Log new network history as its being started. */
ilog ("controller-%d %s network history add", ilog ("controller-%d added new controller-%d:%s history to vault ; now have %d network views",
ctrl.this_controller, ctrl.this_controller,
hbs_cluster_network_name(n).c_str()); ctrl.this_controller,
hbs_cluster_network_name(n).c_str(),
ctrl.cluster.histories);
} }
} }
@ -457,7 +508,9 @@ void hbs_cluster_update ( iface_enum iface,
* ... which is the index for the next entry. * ... which is the index for the next entry.
*/ */
unsigned short last_entry_index ; unsigned short last_entry_index ;
if ( history_ptr->oldest_entry_index == 0 ) unsigned short oldest_entry_index = history_ptr->oldest_entry_index ;
if ( oldest_entry_index == 0 )
{ {
/* Go to the end of the array. */ /* Go to the end of the array. */
last_entry_index = MTCE_HBS_HISTORY_ENTRIES-1 ; last_entry_index = MTCE_HBS_HISTORY_ENTRIES-1 ;
@ -465,43 +518,88 @@ void hbs_cluster_update ( iface_enum iface,
else else
{ {
/* Otherwise, the previous index in the array */ /* Otherwise, the previous index in the array */
last_entry_index = history_ptr->oldest_entry_index - 1 ; last_entry_index = oldest_entry_index - 1 ;
} }
/* Update the history with this data. */ bool logit = false ;
history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled = ctrl.monitored_hosts ; string logit_reason = "" ;
history_ptr->entry[history_ptr->oldest_entry_index].hosts_responding = ctrl.monitored_hosts - not_responding_hosts ;
if (( history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled != /* Update the history with this data. */
history_ptr->entry[ last_entry_index].hosts_enabled ) || history_ptr->entry[oldest_entry_index].hosts_enabled = ctrl.monitored_hosts ;
( history_ptr->entry[history_ptr->oldest_entry_index].hosts_responding != history_ptr->entry[oldest_entry_index].hosts_responding = ctrl.monitored_hosts - not_responding_hosts ;
history_ptr->entry[ last_entry_index].hosts_responding))
if (( history_ptr->entry[oldest_entry_index].hosts_enabled !=
history_ptr->entry[ last_entry_index].hosts_enabled ) ||
( history_ptr->entry[oldest_entry_index].hosts_responding !=
history_ptr->entry[ last_entry_index].hosts_responding))
{ {
/* Only log on change events. */ /* Only log on change events. */
if ( history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled == if ( history_ptr->entry[oldest_entry_index].hosts_enabled ==
history_ptr->entry[history_ptr->oldest_entry_index].hosts_responding ) history_ptr->entry[oldest_entry_index].hosts_responding )
{ {
ilog ("controller-%d %s cluster of %d is healthy", ilog ("controller-%d %s cluster of %d is healthy",
ctrl.this_controller, ctrl.this_controller,
hbs_cluster_network_name(n).c_str(), hbs_cluster_network_name(n).c_str(),
history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled); history_ptr->entry[oldest_entry_index].hosts_enabled);
ctrl.cluster_change_threshold_count = 0 ;
ctrl.cluster_change_difference_count = 0 ;
} }
else else
{ {
ilog ("controller-%d %s cluster of %d with %d responding", ctrl.cluster_change_threshold_count++ ;
ctrl.this_controller, ctrl.cluster_change_difference_count =
hbs_cluster_network_name(n).c_str(), history_ptr->entry[oldest_entry_index].hosts_enabled -
history_ptr->entry[history_ptr->oldest_entry_index].hosts_enabled, history_ptr->entry[oldest_entry_index].hosts_responding ;
history_ptr->entry[history_ptr->oldest_entry_index].hosts_responding);
} }
} }
if ( daemon_get_cfg_ptr()->debug_state&4 )
{
logit = true ;
logit_reason = "(debug)" ;
}
// else if (( ctrl.cluster_change_threshold_count == 1 ) &&
// ( cluster_change == false ))
// {
// logit = true ;
// logit_reason = "" ;
// }
else if ( ctrl.cluster_change_threshold_count >= CLUSTER_CHANGE_THRESHOLD )
{
logit = true ;
ctrl.cluster_change_threshold_count = 0 ;
logit_reason = "(threshold)" ;
}
else
{
int delta =
history_ptr->entry[oldest_entry_index].hosts_enabled -
history_ptr->entry[oldest_entry_index].hosts_responding ;
if ( delta != ctrl.cluster_change_difference_count )
{
logit = true ;
ctrl.cluster_change_difference_count = delta ;
logit_reason = "(delta)" ;
}
}
if ( logit )
{
ilog ("controller-%d %s cluster of %d with %d responding (%d:%d) %s",
ctrl.this_controller,
hbs_cluster_network_name(n).c_str(),
history_ptr->entry[oldest_entry_index].hosts_enabled,
history_ptr->entry[oldest_entry_index].hosts_responding,
ctrl.cluster_change_difference_count,
not_responding_hosts,
logit_reason.c_str());
}
/* Increment the entries count till it reaches the max. */ /* Increment the entries count till it reaches the max. */
if ( history_ptr->entries < MTCE_HBS_HISTORY_ENTRIES ) if ( history_ptr->entries < MTCE_HBS_HISTORY_ENTRIES )
history_ptr->entries++ ; history_ptr->entries++ ;
/* Manage the next entry update index ; aka the oldest index. */ /* Manage the next entry update index ; aka the oldest index. */
if ( history_ptr->oldest_entry_index == (MTCE_HBS_HISTORY_ENTRIES-1)) if ( oldest_entry_index == (MTCE_HBS_HISTORY_ENTRIES-1))
history_ptr->oldest_entry_index = 0 ; history_ptr->oldest_entry_index = 0 ;
else else
history_ptr->oldest_entry_index++ ; history_ptr->oldest_entry_index++ ;
@ -521,24 +619,31 @@ void hbs_cluster_update ( iface_enum iface,
void hbs_cluster_append ( hbs_message_type & msg ) void hbs_cluster_append ( hbs_message_type & msg )
{ {
unsigned short c = ctrl.this_controller ; CHECK_CTRL_NTWK_PARMS(ctrl.this_controller, ctrl.monitored_networks);
CHECK_CTRL_NTWK_PARMS(c, ctrl.monitored_networks);
msg.cluster.version = ctrl.cluster.version ; msg.cluster.version = ctrl.cluster.version ;
msg.cluster.revision = ctrl.cluster.revision ; msg.cluster.revision = ctrl.cluster.revision ;
msg.cluster.magic_number = ctrl.cluster.magic_number ; msg.cluster.magic_number = ctrl.cluster.magic_number ;
msg.cluster.period_msec = ctrl.cluster.period_msec ; msg.cluster.period_msec = ctrl.cluster.period_msec ;
msg.cluster.storage0_enabled = ctrl.cluster.storage0_enabled ; msg.cluster.storage0_enabled = ctrl.cluster.storage0_enabled ;
msg.cluster.histories = ctrl.cluster.histories ; msg.cluster.histories = 0 ;
int bytes = BYTES_IN_CLUSTER_VAULT(ctrl.monitored_networks); /* Copy this controller's cluster history into the broadcast request. */
for ( int h = 0 ; h < ctrl.cluster.histories ; h++ )
{
if ( ctrl.cluster.history[h].controller == ctrl.this_controller )
{
memcpy( &msg.cluster.history[msg.cluster.histories],
&ctrl.cluster.history[h],
sizeof(mtce_hbs_cluster_history_type));
clog1 ("controller-%d appending cluster info to heartbeat message (%d:%d:%d)", msg.cluster.histories++ ;
c, ctrl.monitored_networks, ctrl.cluster.histories, bytes ); }
}
msg.cluster.bytes = BYTES_IN_CLUSTER_VAULT(msg.cluster.histories);
/* Copy the cluster into the message. */ clog2 ("controller-%d appending cluster info to heartbeat message (%d:%d:%d)",
memcpy( &msg.cluster.history[0], &ctrl.cluster.history[c], bytes); ctrl.this_controller, ctrl.monitored_networks, ctrl.cluster.histories, msg.cluster.bytes );
} }
/**************************************************************************** /****************************************************************************
@ -574,57 +679,8 @@ unsigned short hbs_cluster_unused_bytes ( void )
* *
***************************************************************************/ ***************************************************************************/
/* NOTE: All code wrapped in this directive will be removed once
* active/active heartbeating is delivered in next update */
#define WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS
void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid ) void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid )
{ {
#ifdef WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS
/* To assist SM with duplex integration ...
*
* This code emulates heartbeat redundancy by duplicating
* controller history up to the number of provisioned
* controllers until active-active heartbeat is delivered.
*/
int peer_controller ;
bool copy_cluster = false ;
if ( ctrl.this_controller == 0 )
{
peer_controller = 1 ;
if ( ctrl.controller_1_enabled )
{
copy_cluster = true ;
}
}
else
{
peer_controller = 0 ;
if ( ctrl.controller_0_enabled )
{
copy_cluster = true ;
}
}
int n, networks = ctrl.cluster.histories ;
if ( copy_cluster )
{
for ( n = 0 ; n < networks ; n++ )
{
/* copy this controller history to create peer controller */
ctrl.cluster.history[ctrl.cluster.histories] = ctrl.cluster.history[n] ;
/* update the controller */
ctrl.cluster.history[ctrl.cluster.histories].controller = peer_controller ;
ctrl.cluster.bytes += sizeof(mtce_hbs_cluster_history_type) ;
ctrl.cluster.histories++ ;
}
}
#endif // WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS
ctrl.cluster.reqid = (unsigned short)reqid ; ctrl.cluster.reqid = (unsigned short)reqid ;
if (( sm_client_sock ) && ( sm_client_sock->sock_ok() == true )) if (( sm_client_sock ) && ( sm_client_sock->sock_ok() == true ))
{ {
@ -637,34 +693,82 @@ void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid )
} }
else else
{ {
ilog ("heartbeat cluster vault sent to SM (%d bytes)", len ); string reason = "" ;
hbs_cluster_dump ( ctrl.cluster ); // ilog ("heartbeat cluster vault sent to SM (%d bytes)", len );
if ( reqid )
reason = "cluster query" ;
else
reason = "cluster event" ;
hbs_cluster_dump ( ctrl.cluster, reason, true );
} }
} }
else
#ifdef WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS
if ( copy_cluster )
{ {
/* Clear out the other controllers data. */ wlog ("cannot send cluster info due to socket error");
for ( n = networks ; n > 0 ; n-- ) }
}
/****************************************************************************
*
* Name : hbs_history_save
*
* Descrition : Copy the history sample to the vault.
*
* Returns : Nothing.
*
***************************************************************************/
void hbs_history_save ( string hostname, mtce_hbs_cluster_history_type & sample )
{
for ( int h = 0 ; h < ctrl.cluster.histories ; h++ )
{
if (( ctrl.cluster.history[h].controller == sample.controller ) &&
( ctrl.cluster.history[h].network == sample.network ))
{ {
/* copy c0 history to another controller */ memcpy( &ctrl.cluster.history[h], &sample,
hbs_cluster_history_init(ctrl.cluster.history[ctrl.cluster.histories-1]); sizeof(mtce_hbs_cluster_history_type));
ctrl.cluster.bytes -= sizeof(mtce_hbs_cluster_history_type);
ctrl.cluster.histories-- ; clog1 ("controller-%d updated vault with controller-%d:%s network history through %s (histories:%d)",
ctrl.this_controller,
sample.controller,
hbs_cluster_network_name((mtce_hbs_network_enum)sample.network).c_str(),
hostname.c_str(),
ctrl.cluster.histories);
return ;
} }
} }
#endif // WANT_ACTIVE_ACTIVE_HEARTBEAT_RESULTS /* not found ? Add a new one */
memcpy( &ctrl.cluster.history[ctrl.cluster.histories], &sample,
sizeof(mtce_hbs_cluster_history_type));
ctrl.cluster.histories++ ;
ctrl.cluster.bytes = BYTES_IN_CLUSTER_VAULT(ctrl.cluster.histories);
ilog ("controller-%d added new controller-%d:%s history to vault ; now have %d network views",
ctrl.this_controller,
sample.controller,
hbs_cluster_network_name((mtce_hbs_network_enum)sample.network).c_str(),
ctrl.cluster.histories);
} }
void hbs_state_audit ( void )
{
hbs_cluster_dump ( ctrl.cluster, "Audit", true );
}
void hbs_cluster_log ( string & hostname, string prefix ) void hbs_cluster_log ( string & hostname, string prefix )
{ {
hbs_cluster_log ( hostname, ctrl.cluster, prefix ); hbs_cluster_log ( hostname, ctrl.cluster, prefix );
} }
void hbs_cluster_log ( string & hostname,
string log_prefix,
bool force )
{
hbs_cluster_log (hostname, ctrl.cluster, log_prefix, force );
}
/**************************************************************************** /****************************************************************************
* *
* Active Active Heartbeating and Debug Member Functions * Active Active Heartbeating and Debug Member Functions
@ -724,10 +828,6 @@ int hbs_cluster_cmp( hbs_message_type & msg )
* Descrition : Copies the other controllers information from msg into * Descrition : Copies the other controllers information from msg into
* the cluster. * the cluster.
* *
* NOTE: Does not do that right now.
*
* Assumptions : Place holder until active/active heartbeating is implemented.
*
* Returns : PASS or FAIL * Returns : PASS or FAIL
* *
***************************************************************************/ ***************************************************************************/
@ -736,12 +836,29 @@ int hbs_cluster_save ( string & hostname,
mtce_hbs_network_enum network, mtce_hbs_network_enum network,
hbs_message_type & msg ) hbs_message_type & msg )
{ {
// clog ("Add cluster info from peer controller"); /* cluster info is only supported in HBS_MESSAGE_VERSION 1 */
if ( ctrl.monitored_hosts ) if ( msg.v < HBS_MESSAGE_VERSION )
return FAIL_NOT_SUPPORTED ;
if ( ! ctrl.monitored_hosts )
return RETRY ;
if ( msg.cluster.histories == 0 )
return PASS ;
for ( int h = 0 ; h < msg.cluster.histories ; h++ )
{ {
/* compare cluster info and log deltas */ if ( msg.cluster.history[h].network >= MTCE_HBS_MAX_NETWORKS )
// hbs_cluster_cmp( msg ); {
UNUSED(msg); elog ("Invalid network id (%d:%d:%d)",
h,
msg.cluster.history[h].controller,
msg.cluster.history[h].network );
}
else if ( msg.cluster.history[h].controller != ctrl.this_controller )
{
hbs_history_save ( hostname, msg.cluster.history[h] );
}
hbs_cluster_log( hostname, ctrl.cluster, hbs_cluster_network_name(network) ); hbs_cluster_log( hostname, ctrl.cluster, hbs_cluster_network_name(network) );
} }
return (PASS); return (PASS);

View File

@ -241,10 +241,11 @@ int mtcSmgrApi_active_services ( string hostname , bool * yes_no_ptr )
return(PASS); return(PASS);
} }
int send_hbs_command ( string hostname, int command ) int send_hbs_command ( string hostname, int command, string controller )
{ {
UNUSED(hostname); UNUSED(hostname);
UNUSED(command); UNUSED(command);
UNUSED(controller);
return(PASS); return(PASS);
} }

View File

@ -111,6 +111,33 @@ string hbs_cluster_network_name ( mtce_hbs_network_enum network )
} }
} }
/****************************************************************************
*
* Name : hbs_cluster_copy
*
* Descrition : Copies cluster from src to dst.
*
* Returns : Nothing.
*
***************************************************************************/
void hbs_cluster_copy ( mtce_hbs_cluster_type & src, mtce_hbs_cluster_type & dst )
{
dst.version = src.version ;
dst.revision = src.revision ;
dst.magic_number = src.magic_number ;
dst.period_msec = src.period_msec ;
dst.histories = src.histories ;
dst.storage0_enabled = src.storage0_enabled ;
for ( int h = 0 ; h < dst.histories ; h++ )
{
memcpy( &dst.history[h],
&src.history[h],
sizeof(mtce_hbs_cluster_history_type));
}
dst.bytes = BYTES_IN_CLUSTER_VAULT(dst.histories);
}
/**************************************************************************** /****************************************************************************
* *
@ -126,11 +153,9 @@ string hbs_cluster_network_name ( mtce_hbs_network_enum network )
void hbs_cluster_log ( string & hostname, void hbs_cluster_log ( string & hostname,
mtce_hbs_cluster_type & cluster, mtce_hbs_cluster_type & cluster,
string log_prefix ) string log_prefix,
bool force )
{ {
// bool want_log = false ;
clog1 ("log %d histories", cluster.histories );
for ( int h = 0 ; h < cluster.histories ; h++ ) for ( int h = 0 ; h < cluster.histories ; h++ )
{ {
if ( cluster.history[h].entries == MTCE_HBS_HISTORY_ENTRIES ) if ( cluster.history[h].entries == MTCE_HBS_HISTORY_ENTRIES )
@ -140,8 +165,6 @@ void hbs_cluster_log ( string & hostname,
mtce_hbs_cluster_entry_type e = { 0, 0 } ; mtce_hbs_cluster_entry_type e = { 0, 0 } ;
char str[MAX_CLUSTER_LINE_LEN] ; char str[MAX_CLUSTER_LINE_LEN] ;
string line = ""; string line = "";
int start = 0 ;
int stop = 0 ;
bool newline = false ; bool newline = false ;
bool logit = false ; bool logit = false ;
bool first = false ; bool first = false ;
@ -149,18 +172,13 @@ void hbs_cluster_log ( string & hostname,
mtce_hbs_cluster_history_type * history_ptr = &cluster.history[h] ; mtce_hbs_cluster_history_type * history_ptr = &cluster.history[h] ;
clog1 ("%s %s has %d entries (controller-%d view from %s)", hostname.c_str(),
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
history_ptr->entries,
history_ptr->controller,
log_prefix.c_str());
/* Manage local this_index for log display. /* Manage local this_index for log display.
* Display oldest to newest ; left to right * Display oldest to newest ; left to right
* *
* */ * */
int this_index = history_ptr->oldest_entry_index ; int this_index = history_ptr->oldest_entry_index ;
int debug = daemon_get_cfg_ptr()->debug_state ;
for ( int count = 0 ; count < history_ptr->entries ; count++ ) for ( int count = 0 ; count < history_ptr->entries ; count++ )
{ {
if (( line.length() + MAX_ENTRY_STR_LEN ) >= if (( line.length() + MAX_ENTRY_STR_LEN ) >=
@ -180,13 +198,11 @@ void hbs_cluster_log ( string & hostname,
} }
#endif #endif
// want_log = true ;
if ( count == 0 ) if ( count == 0 )
{ {
snprintf (&str[0], MAX_ENTRY_STR_LEN , "%d:%d ", // -%d", snprintf (&str[0], MAX_ENTRY_STR_LEN , "%d:%d ", // -%d",
history_ptr->entry[this_index].hosts_enabled, history_ptr->entry[this_index].hosts_enabled,
history_ptr->entry[this_index].hosts_responding ); // , this_index ); history_ptr->entry[this_index].hosts_responding );
line.append (str); line.append (str);
str[0] = '\0' ; str[0] = '\0' ;
} }
@ -203,7 +219,7 @@ void hbs_cluster_log ( string & hostname,
{ {
snprintf (&str[0], MAX_ENTRY_STR_LEN , "%d:%d ", // -%d", snprintf (&str[0], MAX_ENTRY_STR_LEN , "%d:%d ", // -%d",
history_ptr->entry[this_index].hosts_enabled, history_ptr->entry[this_index].hosts_enabled,
history_ptr->entry[this_index].hosts_responding ); // , this_index ); history_ptr->entry[this_index].hosts_responding );
line.append (str); line.append (str);
str[0] = '\0' ; str[0] = '\0' ;
logit = true ; logit = true ;
@ -214,31 +230,21 @@ void hbs_cluster_log ( string & hostname,
first_log[h] = true ; first_log[h] = true ;
logit = true ; logit = true ;
} }
stop++ ;
if ( newline == true ) if ( newline == true )
{ {
if ( logit ) if ( logit )
{ {
SET_CONTROLLER_HOSTNAME(history_ptr->controller); SET_CONTROLLER_HOSTNAME(history_ptr->controller);
if ( hostname == controller ) if (( force ) || ( debug&2 ))
{ {
clog ("%s view %s %s %02d..%02d: %s,", syslog ( LOG_INFO, "%s view from %s %s %s: %s",
hostname.c_str(), controller.c_str(),
log_prefix.c_str(), hostname.c_str(),
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(), log_prefix.c_str(),
start, stop, line.c_str()); hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
} line.c_str());
else
{
clog ("%s view from %s %s %s %02d..%02d: %s,",
controller.c_str(),
hostname.c_str(),
log_prefix.c_str(),
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
start, stop, line.c_str());
} }
} }
start = stop + 1 ;
line.clear(); line.clear();
first = true ; first = true ;
newline = false ; newline = false ;
@ -253,7 +259,6 @@ void hbs_cluster_log ( string & hostname,
} }
if (( newline == false ) && ( line.length() )) if (( newline == false ) && ( line.length() ))
{ {
// ERIC
if (( logit == false ) && ( was_diff[h] == true )) if (( logit == false ) && ( was_diff[h] == true ))
{ {
logit = true ; logit = true ;
@ -264,30 +269,25 @@ void hbs_cluster_log ( string & hostname,
{ {
if ( first ) if ( first )
{ {
clog ("............ %s %s %02d..%02d: %s", if (( force ) || ( debug&2 ))
log_prefix.c_str(), {
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(), syslog ( LOG_INFO, "............ %s %s: %s",
start, stop, line.c_str()); log_prefix.c_str(),
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
line.c_str());
}
} }
else else
{ {
SET_CONTROLLER_HOSTNAME(history_ptr->controller); SET_CONTROLLER_HOSTNAME(history_ptr->controller);
if ( hostname == controller ) if (( force ) || ( debug&2 ))
{ {
clog ("%s view %s %s %02d..%02d: %s", syslog ( LOG_INFO, "%s view from %s %s %s: %s",
hostname.c_str(), controller.c_str(),
log_prefix.c_str(), hostname.c_str(),
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(), log_prefix.c_str(), /* Infra <- */
start, stop, line.c_str()); hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
} line.c_str());
else
{
clog ("%s view from %s %s %s %02d..%02d: %s",
controller.c_str(),
hostname.c_str(),
log_prefix.c_str(), /* Infra <- */
hbs_cluster_network_name((mtce_hbs_network_enum)history_ptr->network).c_str(),
start, stop, line.c_str());
} }
} }
} }
@ -307,40 +307,62 @@ void hbs_cluster_log ( string & hostname,
* Description: Formatted dump of the vault contents to the log file. * Description: Formatted dump of the vault contents to the log file.
* *
***************************************************************************/ ***************************************************************************/
void hbs_cluster_dump ( mtce_hbs_cluster_type & vault ) void hbs_cluster_dump ( mtce_hbs_cluster_type & vault, string log_prefix, bool force )
{ {
syslog ( LOG_INFO, "Cluster Vault Dump: --------------------------------------------------------------------------------------------"); if ( vault.version == 0 )
syslog ( LOG_INFO, "Cluster Vault: v%d.%d %d msec period ; SM Reqid is %d with storage-0 %s and %d histories in %d bytes", return ;
vault.version,
vault.revision, int debug = daemon_get_cfg_ptr()->debug_state ;
vault.period_msec,
vault.reqid, if (( debug & 2 ) || ( force == true ))
vault.storage0_enabled ? "enabled" : "disabled",
vault.histories,
vault.bytes );
for ( int h = 0 ; h < vault.histories ; h++ )
{ {
#define MAX_LINE_LEN (500) ilog ("%s", log_prefix.c_str());
char str[MAX_LINE_LEN] ; syslog ( LOG_INFO, "Cluster Vault : v%d.%d %d msec heartbeat period %s;%d network heartbeat response histories (%d bytes)",
int i = 0 ; vault.version,
for ( int e = 0 ; e < vault.history[h].entries_max ; e++ ) vault.revision,
{ vault.period_msec,
snprintf ( &str[i], MAX_LINE_LEN, "%c[%d:%d]" , vault.storage0_enabled ? " with storage-0: enabled " : "",
vault.history[h].oldest_entry_index==e ? '>' : ' ', vault.histories,
vault.history[h].entry[e].hosts_enabled, vault.bytes );
vault.history[h].entry[e].hosts_responding); }
i = strlen(str) ;
} if (( debug & 4 ) || ( force == true ))
syslog ( LOG_INFO, "Cluster Vault: C%d %s S:%s:%s (%d:%d) %s", {
vault.history[h].controller, for ( int h = 0 ; h < vault.histories ; h++ )
hbs_cluster_network_name((mtce_hbs_network_enum)vault.history[h].network).c_str(), {
vault.storage0_enabled ? "y" : "n", #define MAX_LINE_LEN (500)
vault.history[h].storage0_responding ? "y" : "n", char str[MAX_LINE_LEN] ;
vault.history[h].entries_max, int i = 0 ;
vault.history[h].entries, for ( int e = 0 ; e < vault.history[h].entries_max ; e++ )
str); {
snprintf ( &str[i], MAX_LINE_LEN, "%c[%d:%d]" ,
vault.history[h].oldest_entry_index==e ? '>' : ' ',
vault.history[h].entry[e].hosts_enabled,
vault.history[h].entry[e].hosts_responding);
i = strlen(str) ;
}
if ( vault.storage0_enabled )
{
syslog ( LOG_INFO, "Cluster Vault : C%d %s S:%s %s",
vault.history[h].controller,
hbs_cluster_network_name((mtce_hbs_network_enum)vault.history[h].network).c_str(),
vault.history[h].storage0_responding ? "y" : "n",
str);
}
else
{
syslog ( LOG_INFO, "Cluster Vault : C%d %s %s",
vault.history[h].controller,
hbs_cluster_network_name((mtce_hbs_network_enum)vault.history[h].network).c_str(),
str);
}
}
}
if ( debug & 8 )
{
dump_memory ( &vault, 16, vault.bytes );
} }
// dump_memory ( &vault, 16, vault.bytes );
} }

View File

@ -46,6 +46,7 @@ CONTROL_OBJS += mtcHttpSvr.o
CONTROL_OBJS += mtcCmdHdlr.o CONTROL_OBJS += mtcCmdHdlr.o
CONTROL_OBJS += mtcNodeMnfa.o CONTROL_OBJS += mtcNodeMnfa.o
CONTROL_OBJS += mtcVimApi.o CONTROL_OBJS += mtcVimApi.o
CONTROL_OBJS += mtcStubs.o
CONTROL_OBJS += ../common/nodeClass.o CONTROL_OBJS += ../common/nodeClass.o
OBJS = $(SRCS:.cpp=.o) OBJS = $(SRCS:.cpp=.o)

View File

@ -48,6 +48,7 @@ using namespace std;
#include "mtcAlarm.h" /* for ... mtcAlarm... */ #include "mtcAlarm.h" /* for ... mtcAlarm... */
#include "nodeUtil.h" /* for ... get_event_str ... */ #include "nodeUtil.h" /* for ... get_event_str ... */
int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr );
/* Throttle logging of messages from unknown IP addresses */ /* Throttle logging of messages from unknown IP addresses */
std::list<string> unknown_ip_list ; std::list<string> unknown_ip_list ;
@ -766,7 +767,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface )
return ( rc ); return ( rc );
} }
int send_hbs_command ( string hostname, int cmd ) int send_hbs_command ( string hostname, int cmd, string controller )
{ {
int bytes = 0 ; int bytes = 0 ;
int bytes_to_send = 0 ; int bytes_to_send = 0 ;
@ -776,18 +777,6 @@ int send_hbs_command ( string hostname, int cmd )
mtc_message_type event ; mtc_message_type event ;
mtc_socket_type * sock_ptr = get_sockPtr (); mtc_socket_type * sock_ptr = get_sockPtr ();
/* We don't heartbeat self */
if (( obj_ptr->is_active_controller (hostname) ) &&
(( cmd == MTC_CMD_ADD_HOST ) ||
( cmd == MTC_CMD_DEL_HOST ) ||
( cmd == MTC_CMD_START_HOST ) ||
( cmd == MTC_CMD_STOP_HOST )))
{
dlog ("%s refusing to '%s' self to heartbeat service\n",
hostname.c_str(), get_event_str(cmd).c_str());
return (PASS);
}
memset (&event, 0 , sizeof(mtc_message_type)); memset (&event, 0 , sizeof(mtc_message_type));
snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_hbs_cmd_req_header() ); snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_hbs_cmd_req_header() );
snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME , "%s", hostname.data()); snprintf ( &event.hdr[MSG_HEADER_SIZE] , MAX_CHARS_HOSTNAME , "%s", hostname.data());
@ -795,48 +784,72 @@ int send_hbs_command ( string hostname, int cmd )
/* There is no buffer data in any of these messages */ /* There is no buffer data in any of these messages */
bytes_to_send = ((sizeof(mtc_message_type))-(BUF_SIZE)) ; bytes_to_send = ((sizeof(mtc_message_type))-(BUF_SIZE)) ;
switch ( cmd )
{
case MTC_CMD_STOP_HOST:
ilog ("%s sending 'stop' to heartbeat service\n", hostname.c_str());
break ;
case MTC_CMD_START_HOST:
obj_ptr->manage_heartbeat_clear ( hostname , MAX_IFACES );
ilog ("%s sending 'start' to heartbeat service\n", hostname.c_str());
break ;
case MTC_CMD_DEL_HOST:
ilog ("%s sending 'delete' to heartbeat service\n", hostname.c_str());
break ;
case MTC_CMD_ADD_HOST:
obj_ptr->manage_heartbeat_clear ( hostname, MAX_IFACES );
ilog ("%s sending 'add' to heartbeat service\n", hostname.c_str());
break ;
case MTC_RESTART_HBS:
ilog ("%s sending 'restart' to heartbeat service\n", hostname.c_str());
break ;
case MTC_BACKOFF_HBS:
ilog ("%s requesting heartbeat period backoff\n", hostname.c_str());
break ;
case MTC_RECOVER_HBS:
ilog ("%s requesting heartbeat period recovery\n", hostname.c_str());
break ;
default:
{
slog ("%s Unsupported command operation 0x%x\n", hostname.c_str(), cmd );
return (FAIL_BAD_PARM);
}
}
event.cmd = cmd ; event.cmd = cmd ;
event.num = 1 ; event.num = 1 ;
event.parm[0] = obj_ptr->get_nodetype(hostname); event.parm[0] = obj_ptr->get_nodetype(hostname);
/* send to hbsAgent daemon port */ /* send to hbsAgent daemon port */
bytes = sock_ptr->mtc_to_hbs_sock->write((char*) &event, bytes_to_send); std::list<string> controllers ;
if ( bytes <= 0 ) controllers.clear();
if ( controller == CONTROLLER )
{ {
wlog ("Cannot send to heartbeat service\n"); controllers.push_back(CONTROLLER_0);
rc = FAIL_TO_TRANSMIT ; controllers.push_back(CONTROLLER_1);
}
else
{
controllers.push_back(controller);
}
string ip = "" ;
std::list<string>::iterator unit ;
for ( unit = controllers.begin () ;
unit != controllers.end () ;
unit++ )
{
switch ( cmd )
{
case MTC_CMD_ACTIVE_CTRL:
mlog3 ("%s sending 'activity state' to %s heartbeat service\n", hostname.c_str(), unit->c_str());
break ;
case MTC_CMD_STOP_HOST:
ilog ("%s sending 'stop' to %s heartbeat service\n", hostname.c_str(), unit->c_str());
break ;
case MTC_CMD_START_HOST:
obj_ptr->manage_heartbeat_clear ( hostname , MAX_IFACES );
ilog ("%s sending 'start' to %s heartbeat service\n", hostname.c_str(), unit->c_str());
break ;
case MTC_CMD_DEL_HOST:
ilog ("%s sending 'delete' to %s heartbeat service\n", hostname.c_str(), unit->c_str());
break ;
case MTC_CMD_ADD_HOST:
obj_ptr->manage_heartbeat_clear ( hostname, MAX_IFACES );
ilog ("%s sending 'add' to %s heartbeat service\n", hostname.c_str(), unit->c_str());
break ;
case MTC_RESTART_HBS:
ilog ("%s sending 'restart' to %s heartbeat service\n", hostname.c_str(), unit->c_str());
break ;
case MTC_BACKOFF_HBS:
ilog ("%s requesting %s heartbeat period backoff\n", hostname.c_str(), unit->c_str());
break ;
case MTC_RECOVER_HBS:
ilog ("%s requesting %s heartbeat period recovery\n", hostname.c_str(), unit->c_str());
break ;
default:
{
slog ("%s Unsupported command operation 0x%x\n", hostname.c_str(), cmd );
rc = FAIL_BAD_PARM ;
continue ;
}
}
ip = get_mtcInv_ptr()->get_hostaddr(*unit) ;
bytes = sock_ptr->mtc_to_hbs_sock->write((char*) &event, bytes_to_send, ip.data());
if ( bytes <= 0 )
{
wlog ("%s failed to send command (0x%x) to heartbeat service at %s\n", unit->c_str(), cmd, ip.c_str() );
rc = FAIL_TO_TRANSMIT ;
}
} }
return rc ; return rc ;
} }
@ -954,6 +967,14 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr )
/* Assert the degrade condition with the 'false' (i.e. not clear)*/ /* Assert the degrade condition with the 'false' (i.e. not clear)*/
obj_ptr->manage_heartbeat_degrade ( hostname, iface, false ); obj_ptr->manage_heartbeat_degrade ( hostname, iface, false );
} }
/* Otherwise the action must be alarm only or none ; both of which
* are already handled by the hbsAgent, so do nothing */
else
{
ilog ("%s heartbeat degrade event dropped ; action is not fail or degrade (%s)\n",
hostname.c_str(),
get_iface_name_str(iface));
}
} }
else else
{ {
@ -1003,7 +1024,7 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr )
* are already handled by the hbsAgent, so do nothing */ * are already handled by the hbsAgent, so do nothing */
else else
{ {
dlog ("%s heartbeat loss event dropped (%s)\n", ilog ("%s heartbeat loss event dropped ; action is not fail or degrade (%s)\n",
hostname.c_str(), hostname.c_str(),
get_iface_name_str(iface)); get_iface_name_str(iface));
} }
@ -1070,6 +1091,7 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr )
else if ( msg.cmd == MTC_EVENT_HEARTBEAT_READY ) else if ( msg.cmd == MTC_EVENT_HEARTBEAT_READY )
{ {
string controller = CONTROLLER ;
std::list<string>::iterator temp ; std::list<string>::iterator temp ;
/* no heartbeating in simplex mode */ /* no heartbeating in simplex mode */
@ -1078,7 +1100,17 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr )
return (PASS); return (PASS);
} }
ilog ("Received 'Heartbeat Service Ready' Event\n"); /* get the controller that sent this ready event */
if (( msg.buf[0] != '\0' ) && ( strnlen( msg.buf, BUF_SIZE) <= MAX_CHARS_HOSTNAME ))
{
controller = msg.buf ;
ilog ("%s Heartbeat Service Ready Event (%s)\n",
msg.buf, sock_ptr->mtc_event_rx_sock->get_src_str());
}
else
{
ilog ("Heartbeat Service Ready Event\n");
}
obj_ptr->hbs_ready = true ; obj_ptr->hbs_ready = true ;
/* Run Maintenance on Inventory */ /* Run Maintenance on Inventory */
@ -1093,25 +1125,17 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr )
* the heartbeat service. This tell the heartbeat * the heartbeat service. This tell the heartbeat
* service about all the hosts so that it will * service about all the hosts so that it will
* send heartbeat oob flag events to mtce. */ * send heartbeat oob flag events to mtce. */
if ( send_hbs_command( hostname, MTC_CMD_ADD_HOST ) != PASS ) if ( send_hbs_command( hostname, MTC_CMD_ADD_HOST, controller ) != PASS )
{ {
elog ("%s Failed to send inventory to heartbeat service\n", hostname.c_str()); elog ("%s Failed to send inventory to heartbeat service\n", hostname.c_str());
} }
/* Send the start event to the heartbeat service for all enabled hosts except /* Send the start event to the heartbeat service for all enabled hosts */
* for the active controller which is not actively monitored */ if (( obj_ptr->get_adminState ( hostname ) == MTC_ADMIN_STATE__UNLOCKED ) &&
if ( obj_ptr->is_active_controller ( hostname ) == false ) ( obj_ptr->get_operState ( hostname ) == MTC_OPER_STATE__ENABLED ) &&
((obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__AVAILABLE ) ||
(obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__DEGRADED )))
{ {
if (( obj_ptr->get_adminState ( hostname ) == MTC_ADMIN_STATE__UNLOCKED ) && send_hbs_command ( hostname, MTC_CMD_START_HOST, controller );
( obj_ptr->get_operState ( hostname ) == MTC_OPER_STATE__ENABLED ) &&
((obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__AVAILABLE ) ||
(obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__DEGRADED )))
{
send_hbs_command ( hostname, MTC_CMD_START_HOST );
}
}
else
{
dlog ("%s Refusing to start heartbeat of self\n", hostname.c_str() );
} }
} }
} }

View File

@ -802,7 +802,11 @@ int mtc_socket_init ( void )
/***********************************************************/ /***********************************************************/
int port = daemon_get_cfg_ptr()->hbs_to_mtc_event_port ; int port = daemon_get_cfg_ptr()->hbs_to_mtc_event_port ;
mtc_sock.mtc_event_rx_sock = new msgClassRx(LOOPBACK_IP, port, IPPROTO_UDP);
/* listen to this port on any interface so that the hbsAgent running
* locally or on peer controller can get events into mtcAgent */
mtc_sock.mtc_event_rx_sock =
new msgClassRx(mtcInv.my_float_ip.data(), port, IPPROTO_UDP);
rc = mtc_sock.mtc_event_rx_sock->return_status; rc = mtc_sock.mtc_event_rx_sock->return_status;
if ( rc ) if ( rc )
{ {
@ -820,7 +824,7 @@ int mtc_socket_init ( void )
/***********************************************************/ /***********************************************************/
port = daemon_get_cfg_ptr()->mtc_to_hbs_cmd_port ; port = daemon_get_cfg_ptr()->mtc_to_hbs_cmd_port ;
sock_ptr->mtc_to_hbs_sock = new msgClassTx(LOOPBACK_IP, port, IPPROTO_UDP); sock_ptr->mtc_to_hbs_sock = new msgClassTx(CONTROLLER, port, IPPROTO_UDP, mtc_config.mgmnt_iface);
rc = sock_ptr->mtc_to_hbs_sock->return_status; rc = sock_ptr->mtc_to_hbs_sock->return_status;
if ( rc ) if ( rc )
{ {
@ -1281,11 +1285,14 @@ void daemon_service_run ( void )
mtcInv.inotify_shadow_file_fd , mtcInv.inotify_shadow_file_fd ,
mtcInv.inotify_shadow_file_wd ); mtcInv.inotify_shadow_file_wd );
/* Add this controller to the heartbeat service so that we /* inform the heartbeat service that this controller is active */
* receive the out-of-band heartbeat 'flags' even though send_hbs_command ( mtcInv.my_hostname, MTC_CMD_ACTIVE_CTRL );
* we don't self monitor the active controller specifically
* This add may be duplicate but covers the initial config case */ /* Add this controller to the heartbeat service so that
* the peer hbsAgent also gets this controllers inventory
* and this hbsAgent receives the out-of-band heartbeat 'flags' */
send_hbs_command ( mtcInv.my_hostname, MTC_CMD_ADD_HOST ); send_hbs_command ( mtcInv.my_hostname, MTC_CMD_ADD_HOST );
send_hbs_command ( mtcInv.my_hostname, MTC_CMD_START_HOST );
socks.clear(); socks.clear();
socks.push_front (mtc_sock.mtc_event_rx_sock->getFD()); // service_events socks.push_front (mtc_sock.mtc_event_rx_sock->getFD()); // service_events

View File

@ -6205,6 +6205,13 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
#endif #endif
/* Audits for this controller host only */
if ( node_ptr->hostname == this->my_hostname )
{
/* Remind the heartbeat service that this is the active ctrl */
send_hbs_command ( this->my_hostname, MTC_CMD_ACTIVE_CTRL );
}
/* Manage active controller auto recovery bool. /* Manage active controller auto recovery bool.
* If the inactive controller is inservice then disable * If the inactive controller is inservice then disable
* controller autorecovery. Otherwise enable it but in this case * controller autorecovery. Otherwise enable it but in this case

View File

@ -14,4 +14,10 @@ using namespace std;
#include "nodeClass.h" /* The main link class */ #include "nodeClass.h" /* The main link class */
void hbs_cluster_log ( void ) { } void hbs_cluster_log ( string & hostname, string prefix, bool force=false )
{
UNUSED(hostname);
UNUSED(prefix);
UNUSED(force);
}