Merge "Add pxeboot network mtcAlive messaging to Maintenance"

This commit is contained in:
Zuul 2024-04-03 16:39:29 +00:00 committed by Gerrit Code Review
commit 89766131af
24 changed files with 2948 additions and 619 deletions

View File

@ -1,7 +1,7 @@
#ifndef __INCLUDE_FITCODES_H__
#define __INCLUDE_FITCODES_H__
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -43,6 +43,8 @@
#define MTC_CMD_FIT__NO_MGMNT_ACK ("/var/run/fit/no_mgmnt_ack") /* mtcClient */
#define MTC_CMD_FIT__NO_CLSTR_ACK ("/var/run/fit/no_clstr_ack") /* mtcClient */
#define MTC_CMD_FIT__NO_MTCALIVE ("/var/run/fit/no_mtcalive") /* mtcClient */
#define MTC_CMD_FIT__PXEBOOT_RXSOCK ("/var/run/fit/pxeboot_rxsock") /* mtcClient */
#define MTC_CMD_FIT__PXEBOOT_TXSOCK ("/var/run/fit/pxeboot_txsock") /* mtcClient */
#define MTC_CMD_FIT__MGMNT_RXSOCK ("/var/run/fit/mgmnt_rxsock") /* mtcClient */
#define MTC_CMD_FIT__MGMNT_TXSOCK ("/var/run/fit/mgmnt_txsock") /* mtcClient */
#define MTC_CMD_FIT__CLSTR_RXSOCK ("/var/run/fit/clstr_rxsock") /* mtcClient */
@ -183,4 +185,7 @@
#define FIT_CODE__HWMON__SET_DB_GROUP_STATUS (177)
#define FIT_CODE__HWMON__SET_DB_GROUP_STATE (178)
#define TESTMASK__MSG__MTCALIVE_STRESS (0x00000001)
#endif /* __INCLUDE_FITCODES_H__ */

View File

@ -48,6 +48,7 @@ typedef struct
char* mgmnt_iface ; /**< management interface name pointer */
char* clstr_iface ; /**< cluster-host interface name pointer */
char* pxeboot_iface ; /**< pxeboot interface name pointer */
char* multicast ; /**< Multicast address */
int ha_port ; /**< HA REST API Port Number */
int vim_cmd_port ; /**< Mtce -> VIM Command REST API Port */
@ -75,6 +76,8 @@ typedef struct
char* barbican_api_host ; /**< Barbican REST API host IP address */
int barbican_api_port ; /**< Barbican REST API port number */
int mtc_tx_pxeboot_port ; /**< mtcAgent/Client pxeboot nwk tx port */
int mtc_rx_pxeboot_port ; /**< mtcClient listens pxeboot nwk cmd reqs */
int mtc_rx_mgmnt_port ; /**< mtcClient listens mgmnt nwk cmd reqs */
int mtc_rx_clstr_port ; /**< mtcClient listens clstr nwk cmd reqs */
int mtc_tx_mgmnt_port ; /**< mtcClient sends mgmnt nwk cmds/resp's */
@ -258,6 +261,22 @@ extern char *program_invocation_short_name;
else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
}
/** mtcAlive alog logger macro with throttling */
#define alog_throttled(cnt,max,format,args...) { \
if ( daemon_get_cfg_ptr()->debug_alive ) \
{ \
if ( ++cnt == 1 ) \
{ \
if (ltc()) { printf ("%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Alive: " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
} \
if ( cnt >= max ) \
{ \
cnt = 0 ; \
} \
} \
}
/** Error logger macro with throttling */
#define elog_throttled(cnt,max,format,args...) { \
if ( ++cnt == 1 ) \
@ -389,37 +408,37 @@ extern char *program_invocation_short_name;
#define plog(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Info : " format, getpid(), lc(), _hn(), _pn, "|-|", __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&1 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&2 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg2 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&4 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg4 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&8 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg8 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&2 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg1 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&4 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg2 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&8 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg3 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog(format, args...) { if(daemon_get_cfg_ptr()->debug_json&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_json&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_json&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_json&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_json&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_json&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_json&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog(format, args...) { if(daemon_get_cfg_ptr()->debug_http&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_http&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_http&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_http&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_http&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_http&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_http&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog1(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog2(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog3(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog1(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog2(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog3(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog(format, args...) { if(daemon_get_cfg_ptr()->debug_work&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_work&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_work&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_work&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_work&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_work&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_work&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define flog(format, args...) { if(daemon_get_cfg_ptr()->debug_fsm) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: FSM : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define tlog(format, args...) { if(daemon_get_cfg_ptr()->debug_timer) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Timer: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog(format, args...) { if(daemon_get_cfg_ptr()->debug_state&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Change: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog1(format, args...) { if(daemon_get_cfg_ptr()->debug_state&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog2(format, args...) { if(daemon_get_cfg_ptr()->debug_state&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog3(format, args...) { if(daemon_get_cfg_ptr()->debug_state&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog1(format, args...) { if(daemon_get_cfg_ptr()->debug_state&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog2(format, args...) { if(daemon_get_cfg_ptr()->debug_state&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog3(format, args...) { if(daemon_get_cfg_ptr()->debug_state&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define log_event(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Event: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }

View File

@ -257,49 +257,17 @@ void print_mtc_message ( string hostname,
const char * iface,
bool force )
{
/* Handle raw json string messages differently.
* Those messages just have a json string that starts at the header */
if ( msg.hdr[0] == '{' )
{
if ( force )
{
ilog ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
else if (( daemon_get_cfg_ptr()->debug_alive&1) && ( msg.cmd == MTC_MSG_MTCALIVE ))
{
alog ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
else
{
mlog1 ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
return ;
}
string str = "" ;
if ( msg.buf[0] )
str = msg.buf ;
if ( force )
{
ilog ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n",
ilog ("%s%s %s %s %s network: %x:%x:%x.%x.%x.%x [%s] %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
direction ? "" : " tx" ,
get_mtcNodeCommand_str (msg.cmd),
direction ? "from" : "to" ,
iface,
msg.ver,
msg.rev,
msg.cmd,
msg.num,
msg.parm[0],
@ -309,15 +277,31 @@ void print_mtc_message ( string hostname,
msg.hdr,
str.c_str());
}
else if ( msg.cmd == MTC_MSG_MTCALIVE || msg.cmd == MTC_REQ_MTCALIVE )
{
alog ("%s%s %s %s %s network: [%x:%x:%x:%x:%x:%x:%s] %s",
hostname.c_str(),
direction ? "" : " tx" ,
get_mtcNodeCommand_str (msg.cmd),
direction ? "from" : "to" ,
iface,
msg.cmd,
msg.num,
msg.parm[0],
msg.parm[1],
msg.parm[2],
msg.parm[3],
msg.hdr,
str.c_str());
}
else
{
mlog1 ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
mlog1 ("%s%s %s %s %s network: %x:%x:%x.%x.%x.%x [%s] %s",
hostname.c_str(),
direction ? "" : " tx" ,
get_mtcNodeCommand_str (msg.cmd),
direction ? "from" : "to" ,
iface,
msg.ver,
msg.rev,
msg.cmd,
msg.num,
msg.parm[0],
@ -344,6 +328,8 @@ static std::string configStages_str [MTC_CONFIG__STAGES +1] ;
static std::string addStages_str [MTC_ADD__STAGES +1] ;
static std::string delStages_str [MTC_DEL__STAGES +1] ;
static std::string subStages_str [MTC_SUBSTAGE__STAGES +1] ;
static std::string mtcAliveStages_str [MTC_MTCALIVE__STAGES +1] ;
void mtc_stages_init ( void )
{
@ -377,7 +363,7 @@ void mtc_stages_init ( void )
enableStages_str [MTC_ENABLE__FAILURE ] = "Failure";
enableStages_str [MTC_ENABLE__FAILURE_WAIT ] = "Failure-Wait";
enableStages_str [MTC_ENABLE__FAILURE_SWACT_WAIT ] = "Failure-Swact-Wait";
enableStages_str [MTC_ENABLE__STAGES ] = "unknown" ;
enableStages_str [MTC_ENABLE__STAGES ] = "Enable-Unknown" ;
recoveryStages_str[MTC_RECOVERY__START ] = "Handler-Start";
recoveryStages_str[MTC_RECOVERY__RETRY_WAIT ] = "Req-Retry-Wait";
@ -402,7 +388,7 @@ void mtc_stages_init ( void )
recoveryStages_str[MTC_RECOVERY__FAILURE ] = "Failure";
recoveryStages_str[MTC_RECOVERY__WORKQUEUE_WAIT ] = "WorkQ-Wait";
recoveryStages_str[MTC_RECOVERY__ENABLE ] = "Enable";
recoveryStages_str[MTC_RECOVERY__STAGES ] = "unknown";
recoveryStages_str[MTC_RECOVERY__STAGES ] = "Recovery-Unknown";
disableStages_str [MTC_DISABLE__START ] = "Disable-Start";
disableStages_str [MTC_DISABLE__HANDLE_POWERON_SEND ] = "Disable-PowerOn-Send";
@ -416,7 +402,7 @@ void mtc_stages_init ( void )
disableStages_str [MTC_DISABLE__TASK_STATE_UPDATE ] = "Disable-States-Update";
disableStages_str [MTC_DISABLE__WORKQUEUE_WAIT ] = "Disable-WorkQ-Wait";
disableStages_str [MTC_DISABLE__DISABLED ] = "Host-Disabled";
disableStages_str [MTC_DISABLE__STAGES ] = "Unknown";
disableStages_str [MTC_DISABLE__STAGES ] = "Disable-Unknown";
powerStages_str [MTC_POWERON__START ] = "Power-On-Start";
powerStages_str [MTC_POWERON__POWER_STATUS_WAIT ] = "Power-On-Status";
@ -445,17 +431,16 @@ void mtc_stages_init ( void )
powercycleStages_str [MTC_POWERCYCLE__POWEROFF_WAIT ] = "Power-Cycle-Off-Wait";
powercycleStages_str [MTC_POWERCYCLE__POWERON ] = "Power-Cycle-On";
powercycleStages_str [MTC_POWERCYCLE__POWERON_REQWAIT] = "Power-Cycle-On-Req-Wait";
powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY] = "Power-Cycle-On-Verify";
powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY ] = "Power-Cycle-On-Verify";
powercycleStages_str [MTC_POWERCYCLE__POWERON_WAIT ] = "Power-Cycle-On-Wait";
powercycleStages_str [MTC_POWERCYCLE__DONE ] = "Power-Cycle-Done";
powercycleStages_str [MTC_POWERCYCLE__FAIL ] = "Power-Cycle-Fail";
powercycleStages_str [MTC_POWERCYCLE__HOLDOFF ] = "Power-Cycle-Hold-Off";
powercycleStages_str [MTC_POWERCYCLE__COOLOFF ] = "Power-Cycle-Cool-Off";
powercycleStages_str [MTC_POWERCYCLE__POWEROFF_CMND_WAIT] = "Power-Cycle-Off-Cmnd-Wait";
powercycleStages_str [MTC_POWERCYCLE__POWERON_CMND_WAIT] = "Power-Cycle-On-Cmnd-Wait";
powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY_WAIT]= "Power-Cycle-On-Verify-Wait";
powercycleStages_str [MTC_POWERCYCLE__STAGES ] = "Power-Cycle-Unknown";
resetStages_str [MTC_RESET__START ] = "Reset-Start";
resetStages_str [MTC_RESET__REQ_SEND ] = "Reset-Req-Send";
@ -529,6 +514,7 @@ void mtc_stages_init ( void )
delStages_str [MTC_DEL__START ] = "Del-Start";
delStages_str [MTC_DEL__WAIT ] = "Del-Wait";
delStages_str [MTC_DEL__DONE ] = "Del-Done";
delStages_str [MTC_DEL__STAGES ] = "Del-Unknown";
subStages_str [MTC_SUBSTAGE__START ] = "subStage-Start";
subStages_str [MTC_SUBSTAGE__SEND ] = "subStage-Send";
@ -536,6 +522,15 @@ void mtc_stages_init ( void )
subStages_str [MTC_SUBSTAGE__WAIT ] = "subStage-Wait";
subStages_str [MTC_SUBSTAGE__DONE ] = "subStage-Done";
subStages_str [MTC_SUBSTAGE__FAIL ] = "subStage-Fail";
subStages_str [MTC_SUBSTAGE__STAGES ] = "subStage-Unknown";
mtcAliveStages_str[MTC_MTCALIVE__START ] = "mtcAlive-Start";
mtcAliveStages_str[MTC_MTCALIVE__MONITOR ] = "mtcAlive-Monitor";
mtcAliveStages_str[MTC_MTCALIVE__WAIT ] = "mtcAlive-Wait";
mtcAliveStages_str[MTC_MTCALIVE__CHECK ] = "mtcAlive-Check";
mtcAliveStages_str[MTC_MTCALIVE__SEND ] = "mtcAlive-Send";
mtcAliveStages_str[MTC_MTCALIVE__FAIL ] = "mtcAlive-Fail";
mtcAliveStages_str[MTC_MTCALIVE__STAGES ] = "mtcAlive-Unknown";
}
string get_delStages_str ( mtc_delStages_enum stage )
@ -666,6 +661,15 @@ string get_subStages_str ( mtc_subStages_enum stage )
return (subStages_str[stage]);
}
string get_mtcAliveStages_str ( mtc_mtcAliveStages_enum stage )
{
if ( stage >= MTC_MTCALIVE__STAGES )
{
return (mtcAliveStages_str[MTC_MTCALIVE__STAGES]);
}
return (mtcAliveStages_str[stage]);
}
void log_adminAction ( string hostname,
mtc_nodeAdminAction_enum currAction,
mtc_nodeAdminAction_enum newAction )

View File

@ -67,10 +67,11 @@ void daemon_exit ( void );
#define FAIL_BM_PASSWORD (122*256)
#define MTC_PARM_LOCK_PERSIST_IDX (0) // node lock command
#define MTC_PARM_UPTIME_IDX (0)
#define MTC_PARM_HEALTH_IDX (1)
#define MTC_PARM_FLAGS_IDX (2)
#define MTC_PARM_MAX_IDX (3)
#define MTC_PARM_UPTIME_IDX (0) // mtcAlive message
#define MTC_PARM_HEALTH_IDX (1) // mtcAlive message
#define MTC_PARM_FLAGS_IDX (2) // mtcAlive message
#define MTC_PARM_SEQ_IDX (3) // mtcAlive message
#define MTC_PARM_MAX_IDX (4) // mtcAlive message
/** 'I Am <state>' flags for maintenance.
*
@ -111,6 +112,8 @@ void daemon_exit ( void );
#define SMGMT_UNHEALTHY_FILE ((const char *)"/var/run/.sm_node_unhealthy")
#define UNLOCK_READY_FILE ((const char *)"/etc/platform/.unlock_ready")
#define STILL_SIMPLEX_FILE ((const char *)"/etc/platform/simplex")
#define FIRST_CONTROLLER_FILE ((const char *)"/etc/platform/.first_controller")
#define INIT_CONFIG_COMPLETE ((const char *)"/etc/platform/.initial_config_complete")
/** path to and module init file name */
#define MTCE_CONF_FILE ((const char *)"/etc/mtc.conf")
@ -153,6 +156,8 @@ void daemon_exit ( void );
#define PMON_CONF_FILE_DIR ((const char *)"/etc/pmon.d")
#define BM_DNSMASQ_FILENAME ((const char *)"dnsmasq.bmc_hosts")
#define OPT_PLATFORM_CONFIG_DIR ((const char *)"/opt/platform/config")
#define DNSMASQ_HOSTS_FILE ((const char *)"dnsmasq.hosts")
/* supported BMC communication protocols ; access method */
typedef enum
@ -415,6 +420,7 @@ typedef enum
#define CONTROLLER_1 ((const char *)"controller-1")
#define CONTROLLER_2 ((const char *)"controller-2")
#define CONTROLLER ((const char *)"controller")
#define CONTROLLERS (2)
#define STORAGE_0 ((const char *)"storage-0")
#define STORAGE_1 ((const char *)"storage-1")
@ -461,7 +467,8 @@ typedef enum
/** Interface Codes **/
#define MGMNT_INTERFACE (0)
#define CLSTR_INTERFACE (1)
#define PXEBOOT_INTERFACE (2)
#define MTCALIVE_INTERFACES_MAX (3)
/** Maintenance Inventory struct */
typedef struct
@ -1205,6 +1212,19 @@ typedef enum
/** Return the string representing the specified 'sensor' stage */
string get_sensorStages_str ( mtc_sensorStages_enum stage );
typedef enum
{
MTC_MTCALIVE__START = 0,
MTC_MTCALIVE__MONITOR,
MTC_MTCALIVE__WAIT,
MTC_MTCALIVE__CHECK,
MTC_MTCALIVE__SEND,
MTC_MTCALIVE__FAIL,
MTC_MTCALIVE__STAGES
} mtc_mtcAliveStages_enum ;
string get_mtcAliveStages_str ( mtc_mtcAliveStages_enum stage );
typedef enum
{
MTC_OFFLINE__IDLE = 0,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2017 Wind River Systems, Inc.
* Copyright (c) 2013-2017, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -29,6 +29,7 @@
#include <dirent.h>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <stdlib.h>
#include <stdio.h>
@ -1350,24 +1351,582 @@ int get_pid_by_name_proc ( string procname )
}
const char mgmnt_iface_str[] = { "Mgmnt" } ;
const char clstr_iface_str[] = { "Clstr" } ;
const char null_iface_str[] = { "Null" } ;
const char pxeboot_iface_str[] = { "Pxeboot" } ;
const char mgmnt_iface_str[] = { "Mgmnt" } ;
const char clstr_iface_str[] = { "Clstr" } ;
const char null_iface_str[] = { "Null" } ;
const char * get_iface_name_str ( int iface )
{
switch ( iface )
{
case MGMNT_IFACE:
return mgmnt_iface_str;
return mgmnt_iface_str;
case CLSTR_IFACE:
return clstr_iface_str;
case PXEBOOT_INTERFACE:
return pxeboot_iface_str;
default:
return null_iface_str ;
}
}
/**********************************************************************
* Name : get_interface_name_str
*
* Purpose : get mtcAgent/Client interface name strings
*
* Return : pointer to the interface name string
**********************************************************************/
const char * get_interface_name_str ( int iface )
{
switch ( iface )
{
case MGMNT_INTERFACE:
return mgmnt_iface_str;
case CLSTR_INTERFACE:
return clstr_iface_str;
case PXEBOOT_INTERFACE:
return pxeboot_iface_str;
default:
return null_iface_str ;
}
}
/**********************************************************************
* Name : get_iface_type_str
*
* Purpose : get interface type string
*
* Return : pointer to the interface type string
**********************************************************************/
const char ethernet_iface_type_str[] = { "ethernet" };
const char vlan_iface_type_str[] = { "vlan" };
const char bond_iface_type_str[] = { "bond" };
const char unknown_iface_type_str[] = { "unknown" };
const char * get_iface_type_str ( iface_type_enum type_enum )
{
switch ( type_enum )
{
case ethernet: return ethernet_iface_type_str;
case vlan: return vlan_iface_type_str;
case bond: return bond_iface_type_str;
}
return unknown_iface_type_str;
}
/********************************************************************
* Name : get_iface_type
*
* Purpose : Fetch the specified interface's type as
* physical ethernet, vlan or bond.
*
* Description: This function opens the uevents file in /sys/class/net
* for the specified interface and uses DEVTYPE, in that
* info, to determine the specified interface type.
* A missing DEVTYPE label implies that its a standard
* physical 'ethernet' interface type.
*
* Example:
*
* sysadmin@controller-0:~$ cat /sys/class/net/vlan163/uevent
* DEVTYPE=vlan
* INTERFACE=vlan163
* IFINDEX=41
*
* Updates: iface_type_enum (ethernet, vlan or bond) on PASS
*
* Returns: PASS or FAIL_OPERATION
* ******************************************************************/
int get_iface_type ( string iface,
iface_type_enum & iface_type )
{
int rc = PASS ;
/* determine the interface type though uevent */
string uevent_iface_file = INTERFACES_DIR + iface + "/uevent";
ifstream _uevent( uevent_iface_file.data() );
if ( _uevent )
{
string line;
while( getline( _uevent, line ) )
{
if ( line.find ("DEVTYPE") == 0 )
{
if ( line.find ("=vlan") != string::npos )
iface_type = vlan;
else if ( line.find ("=bond") != string::npos )
iface_type = bond;
else
iface_type = ethernet ;
break;
}
}
}
else
{
wlog ("Failed to find file: %s", uevent_iface_file.c_str());
rc = FAIL_FILE_OPEN ;
}
return (rc);
}
/*****************************************************************************
* Name : get_iface_parent
*
* Purpose : Gets the ifname of the linked parent interface
*
* Updates : parent interface name.
*
* Returns : Returns PASS, FAIL_FILE_OPEN or FAIL_NOT_FOUND
****************************************************************************/
int get_iface_parent ( int network, string & ifname, string & parent )
{
int rc = PASS ;
/* build the full file path */
string iflink_file = INTERFACES_DIR + ifname + "/iflink";
/* declare a file stream based on the full file path */
ifstream iflink_file_stream ( iflink_file.c_str() );
/* open the file stream */
if (iflink_file_stream.is_open())
{
int iflink = -1;
string iflink_line;
char * dummy_ptr ;
char iface_buffer [IF_NAMESIZE] = "";
/* start clean */
MEMSET_ZERO (iface_buffer[0]);
while ( getline (iflink_file_stream, iflink_line) )
{
iflink = strtol(iflink_line.c_str(), &dummy_ptr, 10);
}
iflink_file_stream.close();
/*
* load iface_buffer with the name of the network interface
* corresponding to iflink.
*/
if_indextoname (iflink, iface_buffer);
if (iface_buffer[0] != '\0')
{
parent = iface_buffer;
dlog ("%s network interface name: %s",
get_interface_name_str(network),
parent.c_str());
}
else
{
wlog ("%s network parent interface not found for ifname:%s",
get_interface_name_str(network), ifname.c_str() );
rc = FAIL_NOT_FOUND ;
}
}
else
{
wlog ("failed to open %s", iflink_file.c_str());
rc = FAIL_FILE_OPEN ;
}
return rc ;
}
/********************************************************************
* Name : get_bond_mode
*
* Purpose : Get the mode of a Linux bonding interface.
*
* Description: Returns the data in /sys/class/net/bonding/mode
* as update to 'bond_mode' string reference argument.
*
* Example : $ cat /sys/class/net/pxeboot0/bonding/mode
* 802.3ad 4
*
* Updates : bond_mode
*
* Returns : PASS or FAIL_FILE_OPEN if no bonding/mode file is found.
*
* ******************************************************************/
int get_bond_mode ( int network,
string bond_name,
string & bond_mode)
{
int rc = PASS ;
string bond_mode_file = INTERFACES_DIR + bond_name + "/bonding/mode";
ifstream bond_mode_data ( bond_mode_file.data() );
if (!bond_mode_data)
{
wlog ("Failed to find bonding mode file: %s",
bond_mode_file.c_str());
rc = FAIL_FILE_OPEN ;
}
else
{
getline ( bond_mode_data, bond_mode );
if ( ! bond_mode.empty() )
{
ilog ("%s network %s mode: %s",
get_interface_name_str(network),
bond_name.c_str(),
bond_mode.c_str());
}
}
return rc ;
}
/*********************************************************************
* Name : get_bond_slaves
*
* Purpose : Get a bonded interface slave names.
*
* Description: Returns the data in /sys/class/net/bonding/slaves
* as updates to reference arguments.
*
* Updates : slave1 and slave2
*
* Returns : PASS or FAIL_FILE_OPEN if no slaves file is found.
*
*********************************************************************/
int get_bond_slaves ( int network,
string bond_name,
string & slave1,
string & slave2 )
{
int rc = 0 ;
string bonded_interface_file = INTERFACES_DIR + \
bond_name + \
"/bonding/slaves";
ifstream slaves(bonded_interface_file.data());
if (!slaves)
{
wlog ("failed to open file: %s", bonded_interface_file.c_str());
rc = FAIL_FILE_OPEN ;
}
else
{
char *token ;
string bond_slaves ;
getline ( slaves, bond_slaves );
if ( ! bond_slaves.empty() )
{
dlog ("%s network %s slaves: %s",
get_interface_name_str(network),
bond_name.c_str(),
bond_slaves.c_str());
token = strtok((char *)bond_slaves.data(), " ");
if ( token != NULL )
slave1 = token ;
token = strtok(NULL, " ");
if ( token != NULL )
slave2 = token ;
}
}
return rc ;
}
/*****************************************************************************
* Name : get_iface_info
*
* Purpose : Update the iface_info with interface type details and heirarchy.
*
* Description: Lookup the interface type, bond, vlan or physical ethernet.
* Then for each case add interface info and create a 'chain'
* string that represents the heirarchy.
*
* - ethernet - enp0s8
* - vlan - vlan16 -> enp0s8
* - bond - pxeboot0 -> enp0s8 and enp0s9
* - bonded vlan - vlan16 -> pxeboot0 -> enp0s8 and enp0s9
*
* Updates : iface_info with learned interface type, parent, bond mode
* and slaves
* Returns : Returns PASS, FAIL_FILE_OPEN, FAIL_NOT_FOUND, FAIL_INVALID_DATA
*
*****************************************************************************/
int get_iface_info ( int network, string iface, iface_info_type & iface_info )
{
const char * network_str_ptr = get_interface_name_str (network) ;
iface_info.iface_name = iface ;
iface_info.iface_type = ethernet;
iface_info.chain = "" ;
int rc = get_iface_type ( iface_info.iface_name, iface_info.iface_type );
if ( rc )
{
wlog ("failed to get interface type from iface: %s", iface.c_str());
return rc ;
}
switch ( iface_info.iface_type )
{
case ethernet:
{
iface_info.parent = iface_info.iface_name ;
ilog ("%s network %s parent: %s", network_str_ptr, iface_info.iface_name.c_str(), iface_info.parent.c_str());
iface_info.chain.append (iface_info.parent);
break ;
}
case vlan:
{
if (( rc = get_iface_parent (MGMNT_INTERFACE, iface_info.iface_name, iface_info.parent )) == PASS )
{
ilog ("%s network %s parent: %s", network_str_ptr, iface_info.iface_name.c_str(), iface_info.parent.c_str());
if (( rc = get_iface_type ( iface_info.parent, iface_info.iface_type )) == PASS )
{
if ( iface_info.iface_type == bond )
{
get_bond_mode ( network, iface_info.parent, iface_info.bond_mode);
iface_info.chain.append( iface_info.iface_name + " -> " + iface_info.parent + " (" + iface_info.bond_mode + ")");
if (( rc = get_bond_slaves ( MGMNT_INTERFACE, iface_info.parent, iface_info.slave1, iface_info.slave2 )) == PASS )
{
iface_info.chain.append(" -> " + iface_info.slave1 + " and " + iface_info.slave2);
ilog ("%s network %s slaves: %s and %s",
network_str_ptr, iface_info.parent.c_str(),
iface_info.slave1.c_str(), iface_info.slave2.c_str());
}
else
{
wlog ("failed to get slaves from bond: %s ; rc:%d", iface_info.parent.c_str(), rc);
rc = FAIL_NOT_FOUND ;
}
}
else
{
wlog ("%s network iface: %s", network_str_ptr, iface_info.iface_name.c_str());
iface_info.chain.append( iface_info.iface_name + " -> " + iface_info.parent);
}
}
else
{
wlog ("failed to get %s network interface type from iface: %s ; rc:%d",
network_str_ptr, iface_info.parent.c_str(), rc);
rc = FAIL_NOT_FOUND ;
}
}
else
{
wlog ("failed to get parent interface from %s ; rc:%d", iface_info.iface_name.c_str(), rc );
}
break ;
}
case bond:
{
iface_info.parent = iface_info.iface_name ;
ilog ("%s network %s", network_str_ptr, iface_info.iface_name.c_str());
get_bond_mode (network, iface_info.parent, iface_info.bond_mode);
iface_info.chain.append(iface_info.parent + " (" + iface_info.bond_mode + ")");
if (( rc = get_bond_slaves ( network, iface_info.parent, iface_info.slave1, iface_info.slave2 )) == PASS )
{
iface_info.chain.append(" -> " + iface_info.slave1 + " and " + iface_info.slave2);
ilog ("%s network %s slaves: %s and %s",
network_str_ptr, iface_info.parent.c_str(),
iface_info.slave1.c_str(), iface_info.slave2.c_str());
}
else
{
wlog ("failed to get slaves from bond: %s ; rc:%d", iface_info.iface_name.c_str(), rc);
rc = FAIL_NOT_FOUND ;
}
break ;
}
default:
{
wlog ("failed: unknown interface type: %d", iface_info.iface_type);
rc = FAIL_INVALID_DATA ;
break ;
}
}
if ( !iface_info.chain.empty() )
{
ilog ("Interface Chain: %s", iface_info.chain.c_str());
}
return rc ;
}
/*****************************************************************************
* Name : get_pxeboot_dhcp_addr
*
* Purpose : get the pxeboot address from dhcp leases file.
*
* Description: Worker and storage nodes DHCP for their pxeboot IP address.
*
* Therefore, the pxeboot address for non-controller nodes is taken from
* the 'fixed-address' label of the last tuple of the management interface's
* /var/lib/dhcp leases file.
*
* Assumptions: If this lookup is for the pxeboot interface then the caller
* is expected to suffix the interface name with a ":2"
*
* Example:
*
* sysadmin@worker-0:~$ cat /var/lib/dhcp/dhclient.enp0s3:2.leases
* lease {
* interface "enp0s3:2";
* fixed-address 169.254.202.159; <-- non-controller pxeboot address
* option subnet-mask 255.255.255.0;
*
* Returns: a string containing the unit's pxeboot address
******************************************************************************/
string get_pxeboot_dhcp_addr ( string iface )
{
// Struct to hold the items extracted from the lease.
// ... currently only the fixed-address is needed.
struct Lease { string address; };
#define DHCP_LEASES_DIR ((const char *) "/var/lib/dhcp")
string pxeboot_address = "" ; // return value
mlog ("learning pxeboot address ...");
Lease last_lease; // defaults to null info
string lease_filename = "" ;
DIR* dhcp_dir = opendir(DHCP_LEASES_DIR);
if ( dhcp_dir != NULL)
{
struct dirent* entry;
while ((entry = readdir(dhcp_dir)) != nullptr)
{
string _filename = entry->d_name;
// Check if the entry contains the interface name
if (_filename.find(iface) != string::npos)
lease_filename = _filename ;
}
closedir(dhcp_dir);
}
else
{
ilog ( "no dhcp leases");
return pxeboot_address ; // is null
}
string full_path = DHCP_LEASES_DIR;
full_path.append("/");
full_path.append(lease_filename);
if ( lease_filename.empty() )
{
ilog ("dhcp lease file %s/%s not found", DHCP_LEASES_DIR, iface.c_str());
return pxeboot_address ; // is null
}
mlog ("pxeboot dhcp lease file: %s", full_path.c_str());
ifstream lease_file(full_path);
if (lease_file.is_open())
{
string line;
// Iterate through the file line by line
while (getline(lease_file, line))
{
// search for new 'lease' entries
if (line.find("lease {") != string::npos)
{
// point to the new lease
last_lease = Lease();
}
// If 'fixed-address' is found, update the last_lease
if (line.find("fixed-address") != string::npos)
{
istringstream leaseStream(line);
string token;
leaseStream >> token; // ignore "fixed-address" label
leaseStream >> last_lease.address; // just want the address
// If there is a ';' at the end of the line, remove it.
if (!last_lease.address.empty() && last_lease.address.back() == ';')
last_lease.address.pop_back();
}
}
// The 'last_lease' should now contain this host's pxeboot address.
// Close the file and return the lease struct.
lease_file.close();
}
else
{
wlog ("unable to open dhcp lease file: %s", full_path.c_str());
}
pxeboot_address = last_lease.address ;
return (pxeboot_address);
}
/*****************************************************************************
* Name : get_pxeboot_static_addr
*
* Purpose : Get pxeboot address from pxeboot network interface config file.
*
* Description: The controller nodes pxeboot addresses are static.
* Therefore, the pxeboot address for a controller node is
* taken from the 'address' label inside the pxeboot network
* interface file.
*
* Assumptions: If this lookup is for the pxeboot interface then the caller
* is expected to suffix the interface name with a ":2"
*
* Example:
*
* sysadmin@controller-1:/etc/network/interfaces.d$ cat ifcfg-enp0s8:2
* iface enp0s8:2 inet static
* address 169.254.202.3 <-- controller pxeboot address
* netmask 255.255.255.0
*
* Returns: a string containing the host's pxeboot address
****************************************************************************/
string get_pxeboot_static_addr ( string iface )
{
string pxeboot_address = "" ; // return value
string interface_file = NETWORK_INTERFACES_DIR ;
interface_file.append("/ifcfg-");
interface_file.append(iface);
if ( daemon_is_file_present (interface_file.data()))
{
ifstream iface_file(interface_file);
if (iface_file.is_open())
{
string line;
// Iterate through the file line by line ...
while (getline(iface_file, line))
{
// search for new 'address' entry where
// address is the first word of the line.
size_t position = line.find("address");
if ( position == 0 )
{
istringstream fileStream(line);
string token;
fileStream >> token; // ignore "address" label
fileStream >> pxeboot_address; // just want the address
ilog ("found pxeboot address in %s", interface_file.c_str());
}
}
// close the file and return the pxeboot address.
iface_file.close();
}
else
{
wlog ("unable to open %s file for interface:%s",
interface_file.c_str(),
iface.c_str());
}
}
else
{
// This is normal for a controller before it is unlocked.
ilog ("no %s file present", interface_file.c_str() );
}
return (pxeboot_address);
}
string get_event_str ( int event_code )
{
@ -1534,7 +2093,7 @@ int send_log_message ( msgSock_type * sock_ptr,
}
else
{
mlog2 ("%s:%s\n%s", &log.hostname[0], &log.filename[0], log_str );
mlog1 ("%s:%s\n%s", &log.hostname[0], &log.filename[0], log_str );
}
return rc ;
}

View File

@ -2,7 +2,7 @@
#define __INCLUDE_NODEUTIL_H__
/*
* Copyright (c) 2013-2014, 2016, 2019 Wind River Systems, Inc.
* Copyright (c) 2013-2014, 2016, 2019, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -28,6 +28,8 @@ using namespace std;
#define NODEUTIL_LATENCY_MON_START ((const char *)"start")
void nodeUtil_latency_log ( string hostname, const char * label_ptr, int msecs );
// path to the Debian network interfaces directory
#define NETWORK_INTERFACES_DIR (const char *)("/etc/network/interfaces.d")
/* Common socket type struct */
typedef struct
@ -65,7 +67,68 @@ string get_iface_mac ( const char * iface_ptr );
void print_inv ( node_inv_type & info );
int get_iface_attrs ( const char * iface_ptr, int & index, int & speed , int & duplex , string & autoneg );
const char * get_iface_name_str ( int iface );
const char * get_interface_name_str ( int iface );
/* Used to learn the pxeboot address */
enum iface_type_enum { ethernet = 0, vlan = 1, bond = 2 };
typedef struct
{
string iface_name ;
iface_type_enum iface_type = ethernet ;
/* vlan link ; physical or bond
*
* The parent interface is the physical network interface
* to which the VLAN is associated.*/
string parent = "" ;
/* bond links ; two physical interfaces
*
* A bond is a logical interface created by combining multiple
* physical network interfaces, known as "slaves"*/
string slave1 = "" ;
string slave2 = "" ;
/* bonding mode ; active-backup, balanced-xor, 802.3ad, etc.
* A string that represents the bonding mode string and id
* Example: 802.3ad 4 */
string bond_mode = "" ;
/* string representing the iface hierarchy.
*
* ethernet
* bond -> slaves
* vlan -> bond -> slaves
*
* This interface chain string exists soley for the purpose
* of logging for the report tool system info. */
string chain = "" ;
} iface_info_type ;
#define INTERFACES_DIR ((const char *)"/sys/class/net/")
const char * get_iface_type_str ( iface_type_enum type_enum );
int get_iface_type ( string iface,
iface_type_enum & iface_type );
int get_iface_parent ( int network,
string & ifname,
string & parent );
int get_bond_slaves ( int network,
string bonded_iface,
string & slave1,
string & slave2 );
int get_bond_mode ( int network,
string bonded_iface,
string & bond_mode);
int get_iface_info ( int network,
string iface,
iface_info_type & iface_info);
// For the mtcClient pxeboot address learning.
string get_pxeboot_dhcp_addr ( string iface ); // worker/storage
string get_pxeboot_static_addr ( string iface ); // controllers
unsigned int get_host_function_mask ( string & nodeType_str );
bool is_combo_system (unsigned int nodetype_mask );

View File

@ -1,7 +1,7 @@
#ifndef __INCLUDE_RETURNCODES_H__
#define __INCLUDE_RETURNCODES_H__
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -116,7 +116,7 @@
#define FAIL_DUP_HOSTNAME (92)
#define FAIL_DUP_IPADDR (93)
#define FAIL_DUP_MACADDR (94)
#define FAIL____UNUSED____95 (95)
#define FAIL_INVALID_IP (95)
#define FAIL_LOCATE_KEY_VALUE (96)
#define FAIL_JSON_OBJECT (97)
#define FAIL_EXTERNAL_API (98)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2014, 2016 Wind River Systems, Inc.
* Copyright (c) 2013-2014, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -43,6 +43,7 @@ void daemon_config_default ( daemon_config_type* config_ptr )
config_ptr->sysinv_mtc_inv_label = strdup("none");
config_ptr->mgmnt_iface = strdup("none");
config_ptr->clstr_iface = strdup("none");
config_ptr->pxeboot_iface = strdup("none");
config_ptr->sysinv_api_bind_ip = strdup("none");
config_ptr->mode = strdup("none");
config_ptr->fit_host = strdup("none");
@ -354,8 +355,10 @@ void daemon_dump_cfg ( void )
if ( ptr->mtc_rx_mgmnt_port ) { ilog ("mtc_rx_mgmnt_port = %d\n", ptr->mtc_rx_mgmnt_port );}
if ( ptr->mtc_rx_clstr_port ) { ilog ("mtc_rx_clstr_port = %d\n", ptr->mtc_rx_clstr_port );}
if ( ptr->mtc_rx_pxeboot_port ) { ilog ("mtc_rx_pxeboot_port = %d\n", ptr->mtc_rx_pxeboot_port );}
if ( ptr->mtc_tx_mgmnt_port ) { ilog ("mtc_tx_mgmnt_port = %d\n", ptr->mtc_tx_mgmnt_port );}
if ( ptr->mtc_tx_clstr_port ) { ilog ("mtc_tx_clstr_port = %d\n", ptr->mtc_tx_clstr_port );}
if ( ptr->mtc_tx_pxeboot_port ) { ilog ("mtc_tx_pxeboot_port = %d\n", ptr->mtc_tx_pxeboot_port );}
if ( ptr->agent_rx_port ) { ilog ("agent_rx_port = %d\n", ptr->agent_rx_port );}
if ( ptr->client_rx_port ) { ilog ("client_rx_port = %d\n", ptr->client_rx_port );}
if ( ptr->mtc_to_hbs_cmd_port ) { ilog ("mtc_to_hbs_cmd_port = %d\n", ptr->mtc_to_hbs_cmd_port );}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2017,2019 Wind River Systems, Inc.
* Copyright (c) 2016-2017,2019, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -83,7 +83,7 @@ void alarmMgr_queue_clear ( void )
************************************************************************/
void alarmMgr_queue_alarm ( queue_entry_type entry )
{
alog ("%s adding %s to alarm queue [size=%ld]\n",
dlog ("%s adding %s to alarm queue [size=%ld]\n",
entry.hostname.c_str(),
entry.alarmid.c_str(),
alarm_queue.size() );
@ -116,7 +116,7 @@ void alarmMgr_queue_alarm ( queue_entry_type entry )
void alarmMgr_service_queue ( void )
{
alog1 ("Elements: %ld\n", alarm_queue.size());
dlog1 ("Elements: %ld\n", alarm_queue.size());
if ( alarm_queue.empty() )
return ;
@ -138,7 +138,7 @@ void alarmMgr_service_queue ( void )
string action = entry.operation ;
action.append (" alarm");
alog ("%s %s operation:%s severity:%s entity:%s prefix:%s\n",
dlog ("%s %s operation:%s severity:%s entity:%s prefix:%s\n",
entry.hostname.c_str(),
entry.alarmid.c_str(),
entry.operation.c_str(),

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013 Wind River Systems, Inc.
* Copyright (c) 2013, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -153,7 +153,7 @@ EFmAlarmSeverityT alarmUtil_query ( string hostname,
ENTITY_PREFIX, hostname.data(), instance.data());
}
alog ("entity_instance:%s\n", alarm_filter.entity_instance_id );
dlog ("entity_instance:%s\n", alarm_filter.entity_instance_id );
if (( rc = fm_get_fault ( &alarm_filter, &alarm_query )) == FM_ERR_OK )
{
dlog ("Found with Severity: %d\n", alarm_query.severity );
@ -185,7 +185,7 @@ int alarmUtil_query_identity ( string identity, SFmAlarmDataT * alarm_list_ptr,
memset(&alarm_filter, 0, sizeof(alarm_filter));
snprintf ( alarm_filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", identity.data());
rc = fm_get_faults_by_id ( &alarm_filter.alarm_id, alarm_list_ptr, &max_alarms );
alog ("%s fm_get_faults_by_id rc = %d\n", alarm_filter.alarm_id, rc );
dlog ("%s fm_get_faults_by_id rc = %d\n", alarm_filter.alarm_id, rc );
if ( rc == FM_ERR_OK )
{
return (PASS);
@ -261,18 +261,18 @@ int alarmUtil ( string & hostname,
{
if ( alarm.alarm_state == FM_ALARM_STATE_SET )
{
alog ("%s setting %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id );
dlog ("%s setting %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id );
}
else
{
alog ("%s creating %s %s log\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id );
dlog ("%s creating %s %s log\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id );
}
/* Debug Logs */
alog ("%s Alarm Reason: %s\n", hostname.c_str(), alarm.reason_text );
alog ("%s Alarm Action: %s\n", hostname.c_str(), alarm.proposed_repair_action );
alog ("%s Alarm Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id );
alog ("%s Alarm State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n",
dlog ("%s Alarm Reason: %s\n", hostname.c_str(), alarm.reason_text );
dlog ("%s Alarm Action: %s\n", hostname.c_str(), alarm.proposed_repair_action );
dlog ("%s Alarm Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id );
dlog ("%s Alarm State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n",
hostname.c_str(),
alarm.alarm_state,
alarm.severity,
@ -310,7 +310,7 @@ int alarmUtil ( string & hostname,
snprintf(filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.alarm_id);
snprintf(filter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.entity_instance_id);
alog ( "fm_clear_fault: %s %s:%s", hostname.c_str(), alarm.entity_instance_id, alarm.alarm_id );
dlog ( "fm_clear_fault: %s %s:%s", hostname.c_str(), alarm.entity_instance_id, alarm.alarm_id );
#ifdef WANT_FIT_TESTING
if (( daemon_is_file_present ( MTC_CMD_FIT__FM_ERROR_CODE )) &&

View File

@ -13,6 +13,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <errno.h> /* for ENODEV, EFAULT and ENXIO */
#include <unistd.h> /* for close and usleep */
@ -284,6 +286,9 @@ nodeLinkClass::nodeLinkClass()
my_local_ip.clear() ;
my_float_ip.clear() ;
my_clstr_ip.clear() ;
my_pxeboot_ip.clear();
my_pxeboot_if.clear();
active_controller_hostname.clear() ;
inactive_controller_hostname.clear() ;
@ -301,6 +306,7 @@ nodeLinkClass::nodeLinkClass()
mgmnt_link_up_and_running = false ;
clstr_link_up_and_running = false ;
clstr_network_provisioned = false ;
pxeboot_network_provisioned=false ;
clstr_degrade_only = false ;
dor_mode_active = false ;
@ -492,11 +498,13 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
/* init the new node */
ptr->hostname = hostname ;
ptr->pxeboot_hostname = "";
ptr->ip = "" ;
ptr->mac = "" ;
ptr->clstr_ip = "" ;
ptr->clstr_mac = "" ;
ptr->pxeboot_ip= "" ;
/* key value dictionary */
ptr->mtce_info = "" ;
@ -551,18 +559,35 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
ptr->mtcAlive_purge = 0 ;
ptr->offline_search_count = 0 ;
ptr->mtcAlive_mgmnt = false ;
ptr->mtcAlive_clstr = false ;
ptr->mtcAlive_pxeboot = false ;
/* These counts are incremented in the set_mtcAlive member
* function and cleared in the reset progression handler. */
ptr->mtcAlive_mgmnt_count = 0 ;
ptr->mtcAlive_clstr_count = 0 ;
ptr->mtcAlive_pxeboot_count = 0 ;
// Clear all the mtcAlive_sequence numbers and monitoring trackers
for (int i = 0 ; i < MTCALIVE_INTERFACES_MAX ; i++)
{
ptr->mtcAlive_sequence[i] =
ptr->mtcAlive_sequence_save[i] =
ptr->mtcAlive_sequence_miss[i] =
ptr->mtcAlive_log_throttle [i] = 0 ;
}
ptr->pxeboot_mtcAlive_not_seen_log_throttle = 0 ;
ptr->pxeboot_mtcAlive_loss_log_throttle = 0 ;
ptr->bmc_reset_pending_log_throttle = 0 ;
ptr->reboot_cmd_ack_mgmnt = false ;
ptr->reboot_cmd_ack_clstr = false ;
ptr->unlock_cmd_ack = false ;
ptr->reboot_cmd_ack_pxeboot = false ;
ptr->offline_log_throttle = 0 ;
ptr->offline_log_reported = true ;
ptr->online_log_reported = false ;
@ -585,6 +610,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
mtcTimer_init ( ptr->mtcCmd_timer, hostname, "mtcCmd timer"); /* Init node's mtcCmd timer */
mtcTimer_init ( ptr->mtcConfig_timer, hostname, "mtcConfig timer"); /* Init node's mtcConfig timer */
mtcTimer_init ( ptr->mtcAlive_timer , hostname, "mtcAlive timer"); /* Init node's mtcAlive timer */
mtcTimer_init ( ptr->online_timer , hostname, "online timer"); /* Init node's online timer */
mtcTimer_init ( ptr->offline_timer, hostname, "offline timer"); /* Init node's FH offline timer */
mtcTimer_init ( ptr->http_timer, hostname, "http timer" ); /* Init node's http timer */
mtcTimer_init ( ptr->bm_timer, hostname, "bm timer" ); /* Init node's bm timer */
@ -620,6 +646,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
ptr->resetStage = MTC_RESET__START ;
ptr->enableStage = MTC_ENABLE__START ;
ptr->disableStage = MTC_DISABLE__START ;
ptr->mtcAliveStage = MTC_MTCALIVE__START ;
ptr->oos_test_count = 0 ;
ptr->insv_test_count = 0 ;
@ -818,6 +845,11 @@ struct nodeLinkClass::node* nodeLinkClass::getNode ( string hostname )
{
return ptr ;
}
/* Node can be looked up by pxeboot ip */
if ( !hostname.compare ( ptr->pxeboot_ip ))
{
return ptr ;
}
if (( ptr->next == NULL ) || ( ptr == tail ))
break ;
@ -911,6 +943,7 @@ int nodeLinkClass::remNode( string hostname )
mtcTimer_fini ( ptr->mtcTimer );
mtcTimer_fini ( ptr->mtcSwact_timer );
mtcTimer_fini ( ptr->mtcAlive_timer );
mtcTimer_fini ( ptr->online_timer );
mtcTimer_fini ( ptr->offline_timer );
mtcTimer_fini ( ptr->mtcCmd_timer );
mtcTimer_fini ( ptr->http_timer );
@ -1559,12 +1592,12 @@ int nodeLinkClass::avail_status_change ( string hostname,
( avail != MTC_AVAIL_STATUS__ONLINE )))
{
/* Free the mtc timer if in use */
if ( node_ptr->mtcAlive_timer.tid )
if ( node_ptr->online_timer.tid )
{
tlog ("%s Stopping mtcAlive timer\n", node_ptr->hostname.c_str());
mtcTimer_stop ( node_ptr->mtcAlive_timer );
node_ptr->mtcAlive_timer.ring = false ;
node_ptr->mtcAlive_timer.tid = NULL ;
mtcTimer_stop ( node_ptr->online_timer );
node_ptr->online_timer.ring = false ;
node_ptr->online_timer.tid = NULL ;
}
node_ptr->onlineStage = MTC_ONLINE__START ;
}
@ -1641,6 +1674,7 @@ int nodeLinkClass::lazy_graceful_fs_reboot ( struct nodeLinkClass::node * node_p
/* Should never get there but if we do resend the reboot request
* but this time not Lazy */
send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) ;
send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, PXEBOOT_INTERFACE ) ;
}
return (FAIL);
}
@ -3448,6 +3482,137 @@ void nodeLinkClass::mtcInfo_handler ( void )
}
}
/**************************************************************************
*
* Name : pxebootInfo_loader
*
* Purpose : Load node pxeboot hostnames and ip addresses.
*
* Description: For each provisioned node, this function parses the
* /opt/platform/config/<sw_version>/dnsmasq.hosts file
* with each node's management network mac address as the
* primary search string and loads the pxeboot ip address
* and pxeboot hostname where matches are found.
*
* Parameters : Optional my_mac address for initial process startup
* to get just its own my_pxeboot_ip address before the
* nodeLinkClass host chain is created.
*
* Updates : this->my_pxeboot_ip if my_mac is specified.
* node_ptr->pxeboot_ip for all hosts if my_mac is empty.
*
* Retruns : Nothing
*
**************************************************************************/
void nodeLinkClass::pxebootInfo_loader ( string my_mac )
{
string dnsmasq_hosts_file = OPT_PLATFORM_CONFIG_DIR ;
dnsmasq_hosts_file.append("/");
dnsmasq_hosts_file.append(sw_version);
dnsmasq_hosts_file.append("/");
dnsmasq_hosts_file.append(DNSMASQ_HOSTS_FILE);
if ( daemon_is_file_present ( dnsmasq_hosts_file.data()) == false )
{
ilog ("%s file not present", dnsmasq_hosts_file.c_str());
return ;
}
// Open the dnsmasq_hosts_file for reading
ifstream filestream ( dnsmasq_hosts_file.c_str() );
// Check if the file is open
if (!filestream.is_open())
{
elog ("failed to open seemingly present %s file", dnsmasq_hosts_file.c_str());
return ;
}
// Read each line from the file
string line;
while (getline(filestream, line))
{
// Skip lines starting with "pxecontroller"
if (line.compare(0, 13, "pxecontroller") == 0)
continue;
// Create a stringstream to parse the comma-delimited fields
stringstream dnsmasq_hosts(line);
string mac, hostname, ip ;
// Extract fields
getline(dnsmasq_hosts, mac, ',');
getline(dnsmasq_hosts, hostname, ',');
getline(dnsmasq_hosts, ip, ',');
dlog ("pxebootInfo: %s %s %s", mac.c_str(), hostname.c_str(), ip.c_str());
if ( my_mac.empty() )
{
if ( ! head )
{
elog ("cannot read inventory ; head is null");
// Close the file stream
filestream.close();
return ;
}
// Search for the node that matches each mac address in inventory
bool found = false ;
for ( struct node * node_ptr = head ; ; node_ptr = node_ptr->next )
{
if ( !mac.compare(node_ptr->mac) )
{
node_ptr->pxeboot_hostname = hostname ;
if ( !ip.empty() && ( ip != node_ptr->pxeboot_ip ))
{
// pxeboot ip address found and is different
if ( node_ptr->pxeboot_ip.empty() )
{
ilog ("%s pxeboot hostname: %s has pxeboot ip: %s",
node_ptr->hostname.c_str(),
node_ptr->pxeboot_hostname.c_str(),
ip.c_str());
}
else
{
wlog ("%s pxeboot ip changed from %s to %s",
node_ptr->hostname.c_str(),
node_ptr->pxeboot_ip.c_str(),
ip.c_str());
}
node_ptr->pxeboot_ip = ip ;
// Also load the my_pxeboot_ip at the process level for eacy access
if (( node_ptr->hostname == this->my_hostname ) && ( this->my_pxeboot_ip != ip ))
this->my_pxeboot_ip = ip ;
}
found = true ;
break ;
}
if (( node_ptr->next == NULL ) || ( node_ptr == tail ))
break ;
}
if ( found == false )
{
wlog ("no host found matching mac address:%s", mac.c_str());
}
}
else if ( !mac.compare( my_mac ) )
{
// Handle the process startup 'my mac' case
if ( !ip.empty() )
this->my_pxeboot_ip = ip ;
else
{
wlog ("failed to lookup pxeboot ip from mac %s", my_mac.c_str());
}
}
}
// Close the file stream
filestream.close();
}
/* Lock Rules
*
* 1. Cannot lock this controller
@ -3701,6 +3866,17 @@ string nodeLinkClass::get_clstr_hostaddr ( string & hostname )
return ( null_str );
}
string nodeLinkClass::get_pxeboot_hostaddr ( string hostname )
{
nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
return ( node_ptr->pxeboot_ip );
}
return ( null_str );
}
string nodeLinkClass::get_hostIfaceMac ( string & hostname, int iface )
{
nodeLinkClass::node* node_ptr ;
@ -3730,6 +3906,30 @@ int nodeLinkClass::set_hostaddr ( string & hostname, string & ip )
return ( rc );
}
int nodeLinkClass::set_pxeboot_hostaddr ( string hostname, string ip )
{
int rc = FAIL_HOSTNAME_LOOKUP ;
nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
if (( hostUtil_is_valid_ip_addr(ip)) && ( node_ptr->pxeboot_ip != ip ))
{
node_ptr->pxeboot_ip = ip ;
ilog ("%s pxeboot ip set to %s",
node_ptr->hostname.c_str(),
node_ptr->pxeboot_ip.c_str());
rc = PASS ;
}
else
{
rc = FAIL_INVALID_IP ;
}
}
return ( rc );
}
int nodeLinkClass::set_clstr_hostaddr ( string & hostname, string & ip )
{
int rc = FAIL ;
@ -3759,7 +3959,8 @@ string nodeLinkClass::get_hostname ( string hostaddr )
( hostaddr == LOCALHOST ) ||
( hostaddr == my_local_ip ) ||
( hostaddr == my_float_ip ) ||
( hostaddr == my_clstr_ip ))
( hostaddr == my_clstr_ip ) ||
( hostaddr == my_pxeboot_ip ))
{
return(this->my_hostname);
}
@ -3889,6 +4090,8 @@ void nodeLinkClass::set_cmd_resp ( string & hostname, mtc_message_type & msg, in
if ( iface == MGMNT_INTERFACE )
node_ptr->reboot_cmd_ack_mgmnt = 1 ;
else if ( iface == PXEBOOT_INTERFACE )
node_ptr->reboot_cmd_ack_pxeboot = 1 ;
else if ( iface == CLSTR_INTERFACE )
node_ptr->reboot_cmd_ack_clstr = 1 ;
}
@ -3923,9 +4126,8 @@ unsigned int nodeLinkClass::get_cmd_resp ( string & hostname )
*
* Name : set_mtcAlive
*
* Description: Set the mgmnt or clust specific mtc alive received bool.
*
* Used in the offline handler to verify overall offline state.
* Description: Set mtcAlive driven controls and status for the
* pxeboot, mgmnt and cluster networks.
*
* Interfaces : Public with hostname.
* Private by node pointer.
@ -3933,52 +4135,108 @@ unsigned int nodeLinkClass::get_cmd_resp ( string & hostname )
* If mtcAlive is ungated then
*
* 1. manage the online/offline state bools
* 2. increment the mtcAlive count and
* 2. increment the mtcAlive count
* 3. set the mtcAlive received bool for the specified interface
*
*****************************************************************************/
void nodeLinkClass::set_mtcAlive ( string & hostname, int interface )
void nodeLinkClass::set_mtcAlive ( string & hostname, unsigned int sequence, int iface )
{
nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
this->set_mtcAlive ( node_ptr, interface );
this->set_mtcAlive ( node_ptr, sequence, iface );
}
}
void nodeLinkClass::set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface )
#define MTCALIVE_LOG_THROTTLE (1000)
void nodeLinkClass::set_mtcAlive ( struct nodeLinkClass::node * node_ptr, unsigned int sequence, int iface)
{
if ( node_ptr )
{
if ( node_ptr->mtcAlive_gate == false )
{
bool state_change = false ;
node_ptr->mtcAlive_online = true ;
node_ptr->mtcAlive_offline = false ;
node_ptr->mtcAlive_count++ ;
if ( interface == CLSTR_INTERFACE )
if ( iface == CLSTR_INTERFACE )
{
if ( node_ptr->mtcAlive_clstr == false )
{
alog ("%s %s mtcAlive received",
node_ptr->hostname.c_str(),
get_iface_name_str(interface));
node_ptr->mtcAlive_clstr_count++ ;
node_ptr->mtcAlive_clstr = true ;
state_change = true ;
}
node_ptr->mtcAlive_clstr_count++ ;
}
else if ( iface == MGMNT_INTERFACE )
{
if ( node_ptr->mtcAlive_mgmnt == false )
{
node_ptr->mtcAlive_mgmnt = true ;
state_change = true ;
}
node_ptr->mtcAlive_mgmnt_count++ ;
}
else if ( iface == PXEBOOT_INTERFACE )
{
if ( node_ptr->mtcAlive_pxeboot == false )
{
node_ptr->mtcAlive_pxeboot = true ;
state_change = true ;
}
node_ptr->mtcAlive_pxeboot_count++ ;
}
else
{
wlog("%s mtcAlive received from unknown network %d",
node_ptr->hostname.c_str(), iface);
return ;
}
if ( state_change )
{
ilog ("%s mtcAlive received from %s network with uptime:%d ; seq:%d",
node_ptr->hostname.c_str(),
get_iface_name_str(iface),
node_ptr->uptime,
sequence);
node_ptr->mtcAlive_log_throttle[iface] = 0 ;
}
else if ( node_ptr->mtcAlive_sequence[iface]+1 != sequence)
{
if ( sequence < node_ptr->mtcAlive_sequence[iface]+1 )
{
wlog ("%s mtcAlive received from %s network with uptime:%d ; out-of-sequence ; expect:%d detect:%d ; correcting",
node_ptr->hostname.c_str(),
get_iface_name_str(iface),
node_ptr->uptime,
node_ptr->mtcAlive_sequence[iface]+1,
sequence);
}
else
{
wlog ("%s mtcAlive received from %s network with uptime:%d ; missed %d mtcalive msgs ; expect:%d detect:%d ; correcting",
node_ptr->hostname.c_str(),
get_iface_name_str(iface),
node_ptr->uptime,
sequence-(node_ptr->mtcAlive_sequence[iface]+1),
node_ptr->mtcAlive_sequence[iface]+1,
sequence);
}
}
else
{
if ( node_ptr->mtcAlive_mgmnt == false )
{
alog ("%s %s mtcAlive received",
node_ptr->hostname.c_str(),
get_iface_name_str(interface));
node_ptr->mtcAlive_mgmnt_count++ ;
node_ptr->mtcAlive_mgmnt = true ;
}
alog_throttled (node_ptr->mtcAlive_log_throttle[iface], MTCALIVE_LOG_THROTTLE,
"%s mtcAlive received from %s network with uptime:%d ; seq:%d",
node_ptr->hostname.c_str(),
get_iface_name_str(iface),
node_ptr->uptime,
sequence);
}
// update running sequence number for this interface
node_ptr->mtcAlive_sequence[iface] = sequence ;
}
}
}
@ -4291,7 +4549,6 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface )
((node_ptr->adminAction != MTC_ADMIN_ACTION__ENABLE ) &&
(node_ptr->adminAction != MTC_ADMIN_ACTION__UNLOCK )))
{
wlog ("%s mtcAlive reporting locked while unlocked ; correcting", node_ptr->hostname.c_str());
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_UNLOCKED, MGMNT_INTERFACE );
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_UNLOCKED, CLSTR_INTERFACE );
}
@ -4302,7 +4559,13 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface )
if (( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) &&
( node_ptr->adminAction != MTC_ADMIN_ACTION__LOCK ))
{
wlog ("%s mtcAlive reporting unlocked while locked ; correcting", node_ptr->hostname.c_str());
// Avoid printing this warning log in simplex mode.
// The locked flag is lost over a reboot in simplex mode.
if ( daemon_is_file_present ( STILL_SIMPLEX_FILE ) == false )
{
wlog ("%s mtcAlive reporting unlocked while locked ; correcting",
node_ptr->hostname.c_str());
}
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE );
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, CLSTR_INTERFACE );
}
@ -6243,6 +6506,10 @@ int nodeLinkClass::update_host_functions ( string hostname , string functions )
}
rc = PASS ;
}
else
{
wlog ("%s getNode lookup failed", hostname.c_str());
}
return (rc);
}
@ -6930,12 +7197,12 @@ int nodeLinkClass::availStatusChange ( struct nodeLinkClass::node * node_ptr,
( newAvailStatus != MTC_AVAIL_STATUS__ONLINE )))
{
/* Free the mtc timer if in use */
if ( node_ptr->mtcAlive_timer.tid )
if ( node_ptr->online_timer.tid )
{
tlog ("%s Stopping mtcAlive timer\n", node_ptr->hostname.c_str());
mtcTimer_stop ( node_ptr->mtcAlive_timer );
node_ptr->mtcAlive_timer.ring = false ;
node_ptr->mtcAlive_timer.tid = NULL ;
mtcTimer_stop ( node_ptr->online_timer );
node_ptr->online_timer.ring = false ;
node_ptr->online_timer.tid = NULL ;
}
node_ptr->onlineStage = MTC_ONLINE__START ;
}
@ -7265,6 +7532,28 @@ int nodeLinkClass::subStageChange ( struct nodeLinkClass::node * node_ptr,
}
}
/** Host mtcAlive Stage Change member function */
int nodeLinkClass::mtcAliveStageChange ( struct nodeLinkClass::node * node_ptr,
mtc_mtcAliveStages_enum newHdlrStage )
{
if ( newHdlrStage < MTC_MTCALIVE__STAGES )
{
clog ("%s stage %s -> %s",
node_ptr->hostname.c_str(),
get_mtcAliveStages_str(node_ptr->mtcAliveStage).c_str(),
get_mtcAliveStages_str(newHdlrStage).c_str());
node_ptr->mtcAliveStage = newHdlrStage ;
return (PASS) ;
}
else
{
slog ("%s Invalid mtcAlive stage (%d)", node_ptr->hostname.c_str(), newHdlrStage );
node_ptr->mtcAliveStage = MTC_MTCALIVE__START ;
return (FAIL) ;
}
}
struct nodeLinkClass::node * nodeLinkClass::get_mtcTimer_timer ( timer_t tid )
{
/* check for empty list condition */
@ -7537,6 +7826,23 @@ struct nodeLinkClass::node * nodeLinkClass::get_mtcAlive_timer ( timer_t tid )
return static_cast<struct node *>(NULL);
}
struct nodeLinkClass::node * nodeLinkClass::get_online_timer ( timer_t tid )
{
/* check for empty list condition */
if ( tid != NULL )
{
for ( struct node * ptr = head ; ; ptr = ptr->next )
{
if ( ptr->online_timer.tid == tid )
{
return ptr ;
}
if (( ptr->next == NULL ) || ( ptr == tail ))
break ;
}
}
return static_cast<struct node *>(NULL);
}
struct nodeLinkClass::node * nodeLinkClass::get_offline_timer ( timer_t tid )
{
@ -9231,17 +9537,53 @@ void nodeLinkClass::mem_log_state2 ( struct nodeLinkClass::node * node_ptr )
mem_log (str);
}
void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr )
void nodeLinkClass::mem_log_mtcalive_state ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d Net:%d:%d:%d",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_online ? 'Y' : 'N',
node_ptr->mtcAlive_offline ? 'Y' : 'N',
node_ptr->mtcAlive_count,
node_ptr->mtcAlive_gate ? "closed" : "open",
node_ptr->mtcAlive_misses);
node_ptr->mtcAlive_misses,
node_ptr->mtcAlive_mgmnt,
node_ptr->mtcAlive_clstr,
node_ptr->mtcAlive_pxeboot );
mem_log (str);
}
void nodeLinkClass::mem_log_mtcalive_data ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: Pxeboot:%d seq:%d Mgmt:%d seq:%d Clstr:%d seq:%d",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_pxeboot_count,
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_mgmnt_count,
node_ptr->mtcAlive_sequence[MGMNT_INTERFACE],
node_ptr->mtcAlive_clstr_count,
node_ptr->mtcAlive_sequence[CLSTR_INTERFACE]);
mem_log (str);
}
void nodeLinkClass::mem_log_mtcalive_pxeboot ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tPxeboot mtcAlive: Prov:%c Rxed:%c ring:%c miss:%d seq:%d save:%d ",
node_ptr->hostname.c_str(),
this->pxeboot_network_provisioned ? 'Y' : 'N',
node_ptr->mtcAlive_pxeboot ? 'Y' : 'N',
node_ptr->mtcAlive_timer.ring ? 'Y' : 'N',
node_ptr->mtcAlive_sequence_miss [PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence [PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence_save [PXEBOOT_INTERFACE]);
mem_log (str);
}
@ -9273,7 +9615,7 @@ void nodeLinkClass::mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr )
void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d Power:%d Cycle:%d\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d Power:%d Cycle:%d mtcAlive:%d\n",
node_ptr->hostname.c_str(),
node_ptr->addStage,
node_ptr->offlineStage,
@ -9282,7 +9624,8 @@ void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr )
node_ptr->enableStage,
node_ptr->disableStage,
node_ptr->powerStage,
node_ptr->powercycleStage);
node_ptr->powercycleStage,
node_ptr->mtcAliveStage);
mem_log (str);
}
@ -9319,11 +9662,13 @@ void nodeLinkClass::mem_log_reset_info ( struct nodeLinkClass::node * node_ptr )
void nodeLinkClass::mem_log_network ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s %s cluster_host_ip: %s Uptime: %u\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t mac:%s mgmt:%s clstr: %s pxeboot:%s:%s Uptime: %u\n",
node_ptr->hostname.c_str(),
node_ptr->mac.c_str(),
node_ptr->ip.c_str(),
node_ptr->clstr_ip.c_str(),
node_ptr->pxeboot_hostname.c_str(),
node_ptr->pxeboot_ip.c_str(),
node_ptr->uptime );
mem_log (str);
}
@ -9430,23 +9775,25 @@ void nodeLinkClass::memDumpNodeState ( string hostname )
{
if ( maintenance == true )
{
mem_log_dor ( node_ptr );
mem_log_identity ( node_ptr );
mem_log_type_info ( node_ptr );
mem_log_network ( node_ptr );
mem_log_state1 ( node_ptr );
mem_log_state2 ( node_ptr );
// mem_log_reset_info ( node_ptr );
mem_log_power_info ( node_ptr );
mem_log_alarm1 ( node_ptr );
mem_log_alarm2 ( node_ptr );
mem_log_mtcalive ( node_ptr );
mem_log_stage ( node_ptr );
mem_log_bm ( node_ptr );
mem_log_ping ( node_ptr );
mem_log_test_info ( node_ptr );
mem_log_thread_info( node_ptr );
workQueue_dump ( node_ptr );
mem_log_dor ( node_ptr );
mem_log_identity ( node_ptr );
mem_log_type_info ( node_ptr );
mem_log_network ( node_ptr );
mem_log_mtcalive_state ( node_ptr );
mem_log_mtcalive_data ( node_ptr );
mem_log_mtcalive_pxeboot ( node_ptr );
mem_log_state1 ( node_ptr );
mem_log_state2 ( node_ptr );
// mem_log_reset_info ( node_ptr );
mem_log_power_info ( node_ptr );
mem_log_alarm1 ( node_ptr );
mem_log_alarm2 ( node_ptr );
mem_log_stage ( node_ptr );
mem_log_bm ( node_ptr );
mem_log_ping ( node_ptr );
mem_log_test_info ( node_ptr );
mem_log_thread_info ( node_ptr );
workQueue_dump ( node_ptr );
}
if ( heartbeat == true )
{

View File

@ -121,6 +121,12 @@ private:
/** The Mac address of the host node */
std::string mac ;
/** The pxeboot network IP address of the host node */
std::string pxeboot_ip ;
/** The pxeboot network hostname of the host node */
std::string pxeboot_hostname ;
/** The cluster-host network IP address of the host node */
std::string clstr_ip ;
@ -279,6 +285,7 @@ private:
mtc_configStages_enum configStage ;
mtc_resetProgStages_enum resetProgStage ;
mtc_reinstallStages_enum reinstallStage ;
mtc_mtcAliveStages_enum mtcAliveStage ;
/** Board management specific FSM Stages */
mtc_powerStages_enum powerStage ;
@ -315,10 +322,25 @@ private:
int mtcAlive_hits ;
int mtcAlive_purge ;
int mtcAlive_mgmnt_count ; /* count the mgmnt network mtcAlive messages */
int mtcAlive_clstr_count ; /* count the clstr network mtcAlive messages */
bool mtcAlive_mgmnt ; /* set true when mtcAlive is rx'd from mgmnt network */
bool mtcAlive_clstr ; /* set true when mtcAlive is rx'd from clstr network */
/* TODO: (emacdona) make these an array of interfaces */
bool mtcAlive_mgmnt ; /* set true when mtcAlive is rx'd from mgmnt network */
bool mtcAlive_clstr ; /* set true when mtcAlive is rx'd from clstr network */
bool mtcAlive_pxeboot ; /* set true when mtcAlive is rx'd from pxeboot network */
/* TODO: (emacdona) make these an array of interfaces */
int mtcAlive_mgmnt_count ; /* count the mgmnt network mtcAlive messages */
int mtcAlive_clstr_count ; /* count the clstr network mtcAlive messages */
int mtcAlive_pxeboot_count ; /* count the pxeboot network mtcAlive messages */
/* tracks the sequence number of the last <iface> mtcAlive message */
unsigned int mtcAlive_sequence [MTCALIVE_INTERFACES_MAX] ;
unsigned int mtcAlive_sequence_save[MTCALIVE_INTERFACES_MAX] ;
unsigned int mtcAlive_sequence_miss[MTCALIVE_INTERFACES_MAX] ;
unsigned int mtcAlive_log_throttle [MTCALIVE_INTERFACES_MAX] ;
/* pxeboot mtcAlive monitor log throttles */
int pxeboot_mtcAlive_not_seen_log_throttle ;
int pxeboot_mtcAlive_loss_log_throttle ;
/* used to log time leading up to reset */
int bmc_reset_pending_log_throttle ;
@ -334,14 +356,12 @@ private:
bool online_log_reported ; /* availStatus switches between these states */
/* and failed */
/** Host's mtc timer struct. Use to time handler stages.
*
* reset -> reset command response
* reboot -> then wait for mtcalive message
* mtcalive -> then wait for go enabled message
*/
/* timer for pxeboot_mtcAlive_monitor fsm */
struct mtc_timer mtcAlive_timer ;
/* timer for online_handler fsm. */
struct mtc_timer online_timer ;
/* the fault handling offline handler timer */
struct mtc_timer offline_timer ;
@ -456,6 +476,7 @@ private:
bool unlock_cmd_ack ; /* set true when a unlocked command ack is rx'ed */
bool reboot_cmd_ack_mgmnt ;
bool reboot_cmd_ack_clstr ;
bool reboot_cmd_ack_pxeboot ;
/** Tracks back to back Fast Fault Recovery counts */
int graceful_recovery_counter;
@ -849,6 +870,9 @@ private:
/* Starts the specified 'reset or powercycle' recovery monitor */
int hwmon_recovery_monitor ( struct nodeLinkClass::node * node_ptr, int hwmon_event );
/* Monitors pxeboot mtcAlive messages and manages associated alarm */
int pxeboot_mtcAlive_monitor ( struct nodeLinkClass::node * node_ptr );
/* server specific power state query handler */
bool (*is_poweron_handler) (string hostname, string query_response );
@ -865,7 +889,7 @@ private:
bool get_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr );
void ctl_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr, bool gate_state );
void set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface );
void set_mtcAlive ( struct nodeLinkClass::node * node_ptr, unsigned int sequence, int iface);
/********* mtcInfo in the database ************/
int mtcInfo_set ( struct nodeLinkClass::node * node_ptr, string key, string value );
@ -1087,6 +1111,10 @@ private:
int subStageChange ( struct nodeLinkClass::node * node_ptr,
mtc_subStages_enum newHdlrStage );
/** mtcAlive Stage Change member function */
int mtcAliveStageChange ( struct nodeLinkClass::node * node_ptr,
mtc_mtcAliveStages_enum newHdlrStage );
int failed_state_change ( struct nodeLinkClass::node * node_ptr );
/* issue a
@ -1125,6 +1153,7 @@ private:
struct nodeLinkClass::node * get_mtcTimer_timer ( timer_t tid );
struct nodeLinkClass::node * get_mtcConfig_timer ( timer_t tid );
struct nodeLinkClass::node * get_mtcAlive_timer ( timer_t tid );
struct nodeLinkClass::node * get_online_timer ( timer_t tid );
struct nodeLinkClass::node * get_offline_timer ( timer_t tid );
struct nodeLinkClass::node * get_mtcSwact_timer ( timer_t tid );
struct nodeLinkClass::node * get_mtcCmd_timer ( timer_t tid );
@ -1316,26 +1345,28 @@ private:
void mem_log_general_mtce_hosts ( void );
void mem_log_mnfa ( void );
void mem_log_dor ( struct nodeLinkClass::node * node_ptr );
void mem_log_identity ( struct nodeLinkClass::node * node_ptr );
void mem_log_network ( struct nodeLinkClass::node * node_ptr );
void mem_log_state1 ( struct nodeLinkClass::node * node_ptr );
void mem_log_state2 ( struct nodeLinkClass::node * node_ptr );
void mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr );
void mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr );
void mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr );
void mem_log_stage ( struct nodeLinkClass::node * node_ptr );
void mem_log_test_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_bm ( struct nodeLinkClass::node * node_ptr );
void mem_log_ping ( struct nodeLinkClass::node * node_ptr );
void mem_log_heartbeat ( struct nodeLinkClass::node * node_ptr );
void mem_log_hbs_cnts ( struct nodeLinkClass::node * node_ptr );
void mem_log_type_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_reset_info( struct nodeLinkClass::node * node_ptr );
void mem_log_power_info( struct nodeLinkClass::node * node_ptr );
void mem_log_thread_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_dor ( struct nodeLinkClass::node * node_ptr );
void mem_log_identity ( struct nodeLinkClass::node * node_ptr );
void mem_log_network ( struct nodeLinkClass::node * node_ptr );
void mem_log_state1 ( struct nodeLinkClass::node * node_ptr );
void mem_log_state2 ( struct nodeLinkClass::node * node_ptr );
void mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr );
void mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr );
void mem_log_mtcalive_state ( struct nodeLinkClass::node * node_ptr );
void mem_log_mtcalive_data ( struct nodeLinkClass::node * node_ptr );
void mem_log_mtcalive_pxeboot ( struct nodeLinkClass::node * node_ptr );
void mem_log_stage ( struct nodeLinkClass::node * node_ptr );
void mem_log_test_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_bm ( struct nodeLinkClass::node * node_ptr );
void mem_log_ping ( struct nodeLinkClass::node * node_ptr );
void mem_log_heartbeat ( struct nodeLinkClass::node * node_ptr );
void mem_log_hbs_cnts ( struct nodeLinkClass::node * node_ptr );
void mem_log_type_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_reset_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_power_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_thread_info ( struct nodeLinkClass::node * node_ptr );
void print_node_info ( struct nodeLinkClass::node * node_ptr );
void print_node_info ( struct nodeLinkClass::node * node_ptr );
// #endif
@ -1349,9 +1380,12 @@ public:
system_type_enum system_type ;
string functions ; /**< comma delimited string list of functions supported */
bool maintenance ;
bool heartbeat ;
string sw_version; /* fetched from /etc/build.info using daemon_sw_version */
string functions ; /* comma delimited string list of functions supported */
bool maintenance ; /* the mtcAgent */
bool heartbeat ; /* the hbsAgent */
/* Set to true if this controller is active.
* Currently only used by heartbeat service. */
@ -1403,10 +1437,12 @@ public:
{ active = state ; }
/** Store the hostname of this controller */
string my_hostname ; /**< */
string my_local_ip ; /**< Primary IP address */
string my_float_ip ; /**< Secondary (floating) IP address */
string my_clstr_ip ; /**< Cluster network IP address */
string my_hostname ; /** My Hostname */
string my_local_ip ; /** Primary IP address */
string my_float_ip ; /** Secondary (floating) IP address */
string my_clstr_ip ; /** Cluster network IP address */
string my_pxeboot_ip ; /** Pxeboot network IP address */
string my_pxeboot_if ; /** Pxeboot interface name */
/********* New Public Constructs for IPMI Comamnd Handling ***********/
@ -1448,12 +1484,18 @@ public:
/** get cluster-host network ip address for any hostname */
string get_clstr_hostaddr ( string & hostname );
/** get the pxeboot network address for any hostname */
string get_pxeboot_hostaddr ( string hostname );
/** set a node's ip address */
int set_hostaddr ( string & hostname, string & ip );
/** set a node's cluster-host ip address */
int set_clstr_hostaddr ( string & hostname, string & ip );
/* set the pxeboot network address for any hostname */
int set_pxeboot_hostaddr ( string hostname, string ip );
/** get hostname for any hostname */
string get_hostname ( string hostaddr );
@ -1684,6 +1726,12 @@ public:
* network is provisioned and configured for this daemon to use */
bool clstr_network_provisioned ;
/** A boolean that is used to quickly determine if the pxeboot network
* is provisioned.
* The pxeboot network is considered unprovisioned while the management
* interface is on the 'lo' (localhost) interface. */
bool pxeboot_network_provisioned ;
/** A debug bool hat allows cluster-host heartbeat failures to only
* cause host degrade rather than failure */
bool clstr_degrade_only ;
@ -1758,6 +1806,7 @@ public:
struct mtc_timer mtcTimer_mnfa ;
struct mtc_timer mtcTimer_token ;
struct mtc_timer mtcTimer_uptime ;
struct mtc_timer mtcTimer_loop ; // main loop timer
/* System Level DOR recovery timer
* Note: tid != NULL represents DOR Mode Active */
@ -1775,10 +1824,15 @@ public:
/** Returns true when a 'maintenance alive' message for that
* hostnamed node is received */
void set_mtcAlive ( string & hostname, int iface );
void set_mtcAlive ( string & hostname, unsigned int sequence, int iface );
bool get_mtcAlive_gate ( string & hostname );
void ctl_mtcAlive_gate ( string & hostname, bool gated );
/* Updates my_pxeboot_ip if my_mac is specified.
* Otherwise, tries to update the pxeboot ip and
* hostname for each provisioned node in the system. */
void pxebootInfo_loader ( string my_mac = "" );
/** Store the latest mtce flags for the specified host
* current flags are defined in nodebase.h
#define MTC_FLAG__I_AM_CONFIGURED (0x00000001)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019 Wind River Systems, Inc.
* Copyright (c) 2019, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -23,14 +23,15 @@ using namespace std;
#endif
#define __AREA__ "mon"
#ifndef INTERFACES_DIR
#define INTERFACES_DIR ((const char *)"/sys/class/net/")
#endif
#define PLATFORM_DIR ((const char *)"/etc/platform/platform.conf")
#define LMON_DIR ((const char *)"/etc/lmon/lmon.conf")
#define INTERFACES_MAX (4) /* maximum number of interfaces to monitor */
enum interface_type { ethernet = 0, vlan = 1, bond = 2 };
string iface_type ( interface_type type_enum );
string iface_type ( iface_type_enum type_enum );
/* daemon only supports the GET request */
#define HTTP_SUPPORTED_METHODS (EVHTTP_REQ_GET)
@ -68,7 +69,7 @@ typedef struct
/* true if the interface is configured.
* i.e. the name label shown above is found in platform.conf */
bool used ;
interface_type type_enum ;
iface_type_enum type_enum ;
/* true if the link is up ; false otherwise */
bool interface_one_link_up ;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019 Wind River Systems, Inc.
* Copyright (c) 2019, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -35,7 +35,7 @@
*
****************************************************************************/
string iface_type ( interface_type type_enum )
string iface_type ( iface_type_enum type_enum )
{
switch(type_enum)
{
@ -187,7 +187,7 @@ int lmon_get_link_state ( int ioctl_socket,
* Name : lmon_interfaces_init
*
* Purpose : Map an interface (mgmt, oam or cluster-host) to a physical port.
* See interface_type enum in lmon.h
* See iface_type_enum enum in nodeUtil.h
*
*****************************************************************************/

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2017, 2023 Wind River Systems, Inc.
* Copyright (c) 2013-2017, 2023-2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -360,10 +360,40 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->reboot_cmd_ack_mgmnt = false ;
node_ptr->reboot_cmd_ack_clstr = false ;
node_ptr->reboot_cmd_ack_pxeboot = false ;
/* send reboot command */
node_ptr->cmdReq = MTC_CMD_REBOOT ;
node_ptr->cmdRsp = MTC_CMD_NONE ;
// Send the reboot command on all provisioned networks
if ( this->pxeboot_network_provisioned == true )
{
if (( rc = send_mtc_cmd ( node_ptr->hostname,
MTC_CMD_REBOOT,
PXEBOOT_INTERFACE )) != PASS )
{
// Don't report a warning log if the far end pxeboot
// network address is not learned yet.
if ( rc != FAIL_HOSTADDR_LOOKUP )
{
wlog ("%s reboot request failed (%s) (rc:%d)\n",
node_ptr->hostname.c_str(),
get_iface_name_str(PXEBOOT_INTERFACE), rc);
}
else
{
ilog ("%s %s network address not learned yet ; can't reboot",
node_ptr->hostname.c_str(),
get_iface_name_str(PXEBOOT_INTERFACE));
}
}
else
{
send_reboot_ok = true ;
}
}
if (( rc = send_mtc_cmd ( node_ptr->hostname,
MTC_CMD_REBOOT,
MGMNT_INTERFACE )) != PASS )
@ -383,9 +413,20 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
MTC_CMD_REBOOT,
CLSTR_INTERFACE )) != PASS )
{
wlog ("%s reboot request failed (%s) (rc:%d)\n",
node_ptr->hostname.c_str(),
get_iface_name_str(CLSTR_INTERFACE), rc);
// Don't report a warning log if the far end cluster
// network IP is not learned yet.
if ( rc != FAIL_HOSTADDR_LOOKUP )
{
wlog ("%s reboot request failed (%s) (rc:%d)",
node_ptr->hostname.c_str(),
get_iface_name_str(CLSTR_INTERFACE), rc);
}
else
{
ilog ("%s %s network address not learned yet ; can't reboot",
node_ptr->hostname.c_str(),
get_iface_name_str(CLSTR_INTERFACE));
}
}
else
{
@ -446,6 +487,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
* messages from the remote host during the reset delay window */
node_ptr->mtcAlive_mgmnt_count = 0 ;
node_ptr->mtcAlive_clstr_count = 0 ;
node_ptr->mtcAlive_pxeboot_count = 0 ;
wlog ("%s ... bmc reset in %d secs", node_ptr->hostname.c_str(), reset_delay);
mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, reset_delay );
@ -472,11 +514,25 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
}
else
{
// log the acks
string nwk_ack = "" ;
if ( node_ptr->reboot_cmd_ack_pxeboot )
nwk_ack.append(get_iface_name_str(PXEBOOT_INTERFACE));
if ( node_ptr->reboot_cmd_ack_mgmnt )
{
if ( !nwk_ack.empty() )
nwk_ack.append(",");
nwk_ack.append(get_iface_name_str(MGMNT_INTERFACE));
}
if ( node_ptr->reboot_cmd_ack_clstr )
{
if ( !nwk_ack.empty() )
nwk_ack.append(",");
nwk_ack.append(get_iface_name_str(CLSTR_INTERFACE));
}
/* declare successful reboot */
plog ("%s reboot request succeeded (%s %s)",
node_ptr->hostname.c_str(),
node_ptr->reboot_cmd_ack_mgmnt ? get_iface_name_str(MGMNT_INTERFACE) : "",
node_ptr->reboot_cmd_ack_clstr ? get_iface_name_str(CLSTR_INTERFACE) : "");
plog ("%s reboot request succeeded (%s)", node_ptr->hostname.c_str(), nwk_ack.c_str());
if ( node_ptr->cmd.task == true )
{
@ -499,6 +555,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
* messages from the remote host during the reset delay window */
node_ptr->mtcAlive_mgmnt_count = 0 ;
node_ptr->mtcAlive_clstr_count = 0 ;
node_ptr->mtcAlive_pxeboot_count = 0 ;
wlog ("%s max reboot retries reached ; still not offline ; reset in %3d secs",
node_ptr->hostname.c_str(), reset_delay);
@ -566,7 +623,8 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
* or the failure of just one (mgmnt or clstr) networks to mistakenly
* cancel the reset. Prevent the cancel if
* - the node uptime is high and
* - not receiving mtcAlive both mgmnt and clstr networks.
* - not receiving mtcAlive on any mtcAlive networks ;
* mgmnt, clstr and pxeboot networks.
*
* Note: online does not mean both networks are receiving mtcAlive,
* Currently just mgmnt needs to see mtcAlive for the node to
@ -578,15 +636,17 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
if (( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) &&
( node_ptr->uptime < MTC_MINS_5 ) &&
( node_ptr->mtcAlive_mgmnt_count ) &&
( node_ptr->mtcAlive_clstr_count ))
( node_ptr->mtcAlive_clstr_count ) &&
( node_ptr->mtcAlive_pxeboot_count ))
{
mtcTimer_reset ( node_ptr->mtcCmd_timer );
ilog ("%s cancelling reset ; host is online ; delay:%d uptime:%d mtcAlive:%d:%d ",
ilog ("%s cancelling reset ; host is online ; delay:%d uptime:%d mtcAlive:%d:%d:%d ",
node_ptr->hostname.c_str(),
bmc_reset_delay,
node_ptr->uptime,
node_ptr->mtcAlive_mgmnt_count,
node_ptr->mtcAlive_clstr_count);
node_ptr->mtcAlive_clstr_count,
node_ptr->mtcAlive_pxeboot_count);
node_ptr->mtcCmd_work_fifo_ptr->status = PASS ;
node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ;
}
@ -602,13 +662,14 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
#define BMC_RESET_PENDING_LOG_THROTTLE (1000)
wlog_throttled ( node_ptr->bmc_reset_pending_log_throttle,
BMC_RESET_PENDING_LOG_THROTTLE,
"%s reset in %3ld secs ; delay:%d uptime:%d mtcAlive:%d:%d",
"%s reset in %3ld secs ; delay:%d uptime:%d mtcAlive:%d:%d:%d",
node_ptr->hostname.c_str(),
reset_delay-diff_time.secs,
bmc_reset_delay,
node_ptr->uptime,
node_ptr->mtcAlive_mgmnt_count,
node_ptr->mtcAlive_clstr_count);
node_ptr->mtcAlive_clstr_count,
node_ptr->mtcAlive_pxeboot_count);
}
}
break ; /* waiting path */
@ -813,6 +874,8 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
/* update the timer hostname */
node_ptr->mtcTimer.hostname = name ;
node_ptr->mtcAlive_timer.hostname = name ;
node_ptr->online_timer.hostname = name ;
node_ptr->offline_timer.hostname = name ;
node_ptr->mtcSwact_timer.hostname = name ;
node_ptr->mtcCmd_timer.hostname = name ;
node_ptr->oosTestTimer.hostname = name ;

View File

@ -50,12 +50,6 @@ extern "C"
#include "amon.h" /* for ... active monitoring utilities */
}
extern char *program_invocation_short_name;
int mtcAlive_mgmnt_sequence = 0 ;
int mtcAlive_clstr_sequence = 0 ;
/************************************************************************
*
* Name : stop pmon
@ -107,18 +101,18 @@ void stop_pmon( void )
/* Receive and process commands from controller maintenance */
int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
int bytes = 0 ;
mtc_message_type msg ;
int rc = FAIL ;
ssize_t bytes_received = 0 ;
ctrl_type * ctrl_ptr = get_ctrl_ptr() ;
bool log_ack = true ;
const char * iface_name_ptr = get_interface_name_str(interface) ;
if ( interface == CLSTR_INTERFACE )
{
if ( ! ctrl_ptr->clstr_iface_provisioned )
{
wlog ("cannot receive from unprovisioned %s interface\n",
get_iface_name_str(interface) );
wlog ("cannot receive from unprovisioned %s interface", iface_name_ptr);
return (rc);
}
}
@ -126,17 +120,57 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
/* clean the rx/tx buffer */
memset ((void*)&msg,0,sizeof(mtc_message_type));
string hostaddr = "" ;
if ( interface == MGMNT_INTERFACE )
if ( interface == PXEBOOT_INTERFACE )
{
if (( sock_ptr->mtc_client_rx_socket ) &&
( sock_ptr->mtc_client_rx_socket->sock_ok() == true ))
if ( sock_ptr->pxeboot_rx_socket )
{
rc = sock_ptr->mtc_client_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
hostaddr = sock_ptr->mtc_client_rx_socket->get_src_str();
struct sockaddr_in client_addr;
socklen_t addr_len = sizeof(client_addr);
// Receive data
bytes_received = recvfrom(sock_ptr->pxeboot_rx_socket,
(char*)&msg.hdr[0],
sizeof(mtc_message_type), 0,
(struct sockaddr*)&client_addr, &addr_len);
// Terminate the buffer
msg.hdr[bytes_received] = '\0' ;
// Log with debug_msg lane 2
if ( daemon_get_cfg_ptr()->debug_msg&2 )
{
// log the message ; both header and buffer
string _buf = msg.buf[0] ? msg.buf : "empty";
ilog ("Received %ld bytes (%s) from %s:%d - %s:%s",
bytes_received,
iface_name_ptr,
inet_ntoa(client_addr.sin_addr),
ntohs(client_addr.sin_port),
&msg.hdr[0], _buf.c_str());
// dump_memory (&msg.hdr[0], 16, bytes_received);
}
hostaddr = inet_ntoa(client_addr.sin_addr);
}
}
else if ( interface == MGMNT_INTERFACE )
{
if (( sock_ptr->mtc_client_mgmt_rx_socket ) &&
( sock_ptr->mtc_client_mgmt_rx_socket->sock_ok() == true ))
{
rc = bytes_received = sock_ptr->mtc_client_mgmt_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
hostaddr = sock_ptr->mtc_client_mgmt_rx_socket->get_src_str();
// Log with debug_msg lane 2
if ( daemon_get_cfg_ptr()->debug_msg&2 )
{
// Log the message ; both header and buffer
string _buf = msg.buf[0] ? msg.buf : "empty";
ilog ("Received %ld bytes (%s) from %s - %s:%s", bytes_received,
iface_name_ptr, hostaddr.c_str(), &msg.hdr[0], _buf.c_str());
}
}
else
{
elog ("cannot read from null or failed 'mtc_client_rx_socket'\n");
elog ("cannot read from null or failed 'mtc_client_mgmt_rx_socket'\n");
return (FAIL_TO_RECEIVE);
}
}
@ -145,8 +179,18 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if (( sock_ptr->mtc_client_clstr_rx_socket ) &&
( sock_ptr->mtc_client_clstr_rx_socket->sock_ok() == true ))
{
rc = sock_ptr->mtc_client_clstr_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
rc = bytes_received = sock_ptr->mtc_client_clstr_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
hostaddr = sock_ptr->mtc_client_clstr_rx_socket->get_src_str();
// Log with debug_msg lane 2
if ( daemon_get_cfg_ptr()->debug_msg&2 )
{
// Log the message ; both header and buffer
string _buf = msg.buf[0] ? msg.buf : "empty";
ilog ("Received %ld bytes (%s) from %s: %s:%s",
bytes_received, iface_name_ptr,
hostaddr.c_str(), &msg.hdr[0], _buf.c_str());
}
}
else
{
@ -174,11 +218,9 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
self = true ;
}
string interface_name = get_iface_name_str (interface) ;
string interface_name = get_interface_name_str (interface) ;
string command_name = get_mtcNodeCommand_str(msg.cmd) ;
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), false );
/* Message version greater than zero have the hosts management
* mac address appended to the header string */
if (( !self ) && ( msg.ver >= MTC_CMD_FEATURE_VER__MACADDR_IN_CMD ))
@ -186,18 +228,18 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
/* the minus 1 is to back up from the null char that is accounted for in the hearder size */
if ( strncmp ( &msg.hdr[MSG_HEADER_SIZE-1], ctrl_ptr->macaddr.data(), MSG_HEADER_SIZE ))
{
wlog ("%s command not for this host (exp:%s det:%s) ; ignoring ...\n",
wlog ("%s req command from %s network not for this host (exp:%s det:%s) ; ignoring ...\n",
command_name.c_str(),
iface_name_ptr,
ctrl_ptr->macaddr.c_str(),
&msg.hdr[MSG_HEADER_SIZE-1]);
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), true );
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, iface_name_ptr, true );
return (FAIL_INVALID_DATA);
}
}
print_mtc_message ( hostaddr, MTC_CMD_RX, msg, get_iface_name_str(interface), rc );
if ( rc )
return rc;
if ( ! hostaddr.empty() )
print_mtc_message ( hostaddr, MTC_CMD_RX, msg, iface_name_ptr, false );
/* Check for response messages */
if ( strstr ( &msg.hdr[0], get_cmd_req_msg_header() ) )
@ -205,20 +247,25 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
rc = PASS ;
if ( msg.cmd == MTC_REQ_MTCALIVE )
{
mlog1 ("mtcAlive request received (%s network)\n", interface_name.c_str());
ilog ("mtcAlive request received from %s network", iface_name_ptr);
if ( interface == PXEBOOT_INTERFACE )
{
alog2 ("pxeboot mtcAlive buffer: %s", &msg.buf[0]);
load_pxebootInfo_msg(msg);
}
return ( send_mtcAlive_msg ( sock_ptr, get_who_i_am(), interface ));
}
else if ( msg.cmd == MTC_MSG_INFO )
{
mlog1("mtc 'info' message received (%s network)\n", interface_name.c_str());
alog2 ("mtc 'info' message received from %s network", iface_name_ptr);
load_mtcInfo_msg ( msg );
return ( PASS ); /* no ack for this message */
}
else if ( msg.cmd == MTC_CMD_SYNC )
{
ilog ("mtc '%s' message received (%s network)\n",
ilog ("mtc '%s' message received from %s network",
get_mtcNodeCommand_str(msg.cmd),
interface_name.c_str());
iface_name_ptr);
ilog ("Sync Start");
sync ();
@ -233,7 +280,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
/* Only recreate the file if its not already present */
if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == false )
{
ilog ("%s locked (%s)", get_hostname().c_str(), interface_name.c_str() );
ilog ("%s locked (%s)", get_hostname().c_str(), iface_name_ptr);
daemon_log ( NODE_LOCKED_FILE, ADMIN_LOCKED_STR);
}
@ -254,7 +301,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if ( msg.cmd == MTC_MSG_UNLOCKED )
{
ilog ("%s unlocked (%s)", get_hostname().c_str(), interface_name.c_str() );
ilog ("%s unlocked received from %s network", get_hostname().c_str(), iface_name_ptr);
/* Only remove the file if it is present */
if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == true )
@ -264,7 +311,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if ( daemon_is_file_present ( NODE_LOCKED_FILE_BACKUP ) == true )
{
daemon_remove_file ( NODE_LOCKED_FILE_BACKUP );
ilog ("cleared node locked backup flag (%s)", interface_name.c_str() );
ilog ("cleared node locked backup flag (%s)", iface_name_ptr);
}
}
else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED_FAILED )
@ -297,7 +344,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else
{
ilog ("GoEnabled request posted (%s)\n", interface_name.c_str());
ilog ("GoEnabled request posted (%s)", iface_name_ptr);
ctrl_ptr->posted_script_set.push_back ( GOENABLED_MAIN_SCRIPTS );
ctrl_ptr->posted_script_set.unique();
}
@ -324,7 +371,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else
{
ilog ("GoEnabled Subf request posted (%s)\n", interface_name.c_str());
ilog ("GoEnabled Subf request posted (%s)", iface_name_ptr);
/* Cleanup test result flag files */
if ( daemon_is_file_present ( GOENABLED_SUBF_PASS) )
@ -345,15 +392,15 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if ( msg.cmd == MTC_CMD_REBOOT )
{
ilog ("%s command received (%s)",
ilog ("%s command received from %s network",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
}
else if ( msg.cmd == MTC_CMD_LAZY_REBOOT )
{
ilog ("%s command received (%s) ; delay:%d seconds\n",
ilog ("%s command received from %s network ; delay:%d seconds",
command_name.c_str(),
interface_name.c_str(),
iface_name_ptr,
msg.num ? msg.parm[0] : 0 );
}
else if ( is_host_services_cmd ( msg.cmd ) == true )
@ -378,9 +425,9 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
ctrl_ptr->posted_script_set.push_back ( HOSTSERVICES_SCRIPTS );
ctrl_ptr->posted_script_set.unique ();
ilog ("%s request posted (%s)\n",
ilog ("%s request posted from %s network",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
ctrl_ptr->hostservices.posted = msg.cmd ;
ctrl_ptr->hostservices.monitor = MTC_CMD_NONE ;
@ -391,17 +438,17 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if ( ( daemon_is_file_present ( MTC_CMD_FIT__START_SVCS )))
{
rc = FAIL_FIT ;
wlog ("%s Start Services - fit failure (%s)\n",
wlog ("%s Start Services - fit failure (%s)",
command_name.c_str(),
interface_name.c_str() );
iface_name_ptr);
}
/* Fault insertion - fail to send host services ACK */
if ( ( daemon_is_file_present ( MTC_CMD_FIT__NO_HS_ACK )))
{
wlog ("%s Start Services - fit no ACK (%s)\n",
wlog ("%s Start Services - fit no ACK (%s)",
command_name.c_str(),
interface_name.c_str() );
iface_name_ptr);
return (PASS);
}
@ -421,20 +468,21 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if ( msg.cmd == MTC_CMD_WIPEDISK )
{
ilog ("Reload command received (%s)\n", interface_name.c_str());
ilog ("Reload command received from %s network", iface_name_ptr);
}
else if ( msg.cmd == MTC_CMD_RESET )
{
ilog ("Reset command received (%s)\n", interface_name.c_str());
ilog ("Reset command received from %s network", iface_name_ptr);
}
else if ( msg.cmd == MTC_CMD_LOOPBACK )
{
ilog ("Loopback command received (%s)\n", interface_name.c_str());
ilog ("Loopback command received from %s network", iface_name_ptr);
}
else
{
rc = FAIL_BAD_CASE ;
elog ( "Unsupported maintenance command (%d)\n", msg.cmd );
wlog ( "Unsupported maintenance command (%d) with %ld bytes received from %s network",
msg.cmd, bytes_received, iface_name_ptr );
}
snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header());
@ -443,12 +491,12 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( msg.cmd == MTC_MSG_MAIN_GOENABLED )
{
ilog ("main function goEnabled results acknowledged (%s)\n", interface_name.c_str());
ilog ("main function goEnabled results acknowledged from %s network", iface_name_ptr);
return (PASS);
}
else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED )
{
ilog ("sub-function goEnabled results acknowledged (%s)\n", interface_name.c_str());
ilog ("sub-function goEnabled results acknowledged from %s network", iface_name_ptr);
return (PASS);
}
else
@ -460,14 +508,25 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
else if ( strstr ( &msg.hdr[0], get_worker_msg_header()) )
{
elog ("unsupported worker message\n");
print_mtc_message ( &msg );
if ( msg.cmd == MTC_MSG_MTCALIVE )
{
wlog ("unexpected mtcAlive message from %s from %s network",
hostaddr.c_str(), iface_name_ptr);
}
else
{
wlog ("unsupported worker message from %s from %s network",
hostaddr.c_str(), iface_name_ptr);
}
wlog ("WARNING: mtcClient is receiving mtcAgent bound mtcAlive messages");
// dump_memory (&msg, 16, bytes_received);
return PASS ;
}
else
{
elog ("unsupported message\n");
print_mtc_message ( &msg );
wlog ("unsupported message from %s from %s network", hostaddr.c_str(), iface_name_ptr);
// dump_memory (&msg, 16, bytes_received);
return PASS ;
}
@ -481,73 +540,109 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
rc = PASS ;
bytes = sizeof(mtc_message_type)-BUF_SIZE;
int bytes = sizeof(mtc_message_type)-BUF_SIZE;
if ( interface == PXEBOOT_INTERFACE )
{
int flags = 0 ; // no tx flags
if ( sock_ptr->pxeboot_tx_socket <= 0 )
{
elog("pxeboot_tx_socket not ok (%d)", sock_ptr->pxeboot_tx_socket);
return (FAIL_SOCKET_SENDTO);
}
if ( log_ack )
{
ilog ("sending %s ack to %s over %s network",
command_name.c_str(),
hostaddr.c_str(),
iface_name_ptr);
}
struct sockaddr_in hostAddr;
memset(&hostAddr, 0, sizeof(hostAddr));
print_mtc_message ( hostaddr.data(), MTC_CMD_TX, msg, iface_name_ptr, false );
hostAddr.sin_addr.s_addr = inet_addr(hostaddr.data());
hostAddr.sin_family = AF_INET;
hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port);
ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, &msg.hdr[0], bytes, flags,
(const struct sockaddr*)&hostAddr, sizeof(hostAddr));
if (bytes_sent <= 0)
{
elog ("failed to send %s ack to %s:%d on %s network (rc:%ld) (%d:%m)",
command_name.c_str(),
hostaddr.c_str(),
hostAddr.sin_port,
iface_name_ptr,
bytes_sent, errno);
}
}
/* send the message back either over the mgmnt or clstr interface */
if ( interface == MGMNT_INTERFACE )
else if ( interface == MGMNT_INTERFACE )
{
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes);
rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes);
if ( rc <= 0 )
{
elog ("%s reply send (mtc_client_tx_socket) failed (%s) (rc:%d)",
elog ("%s reply send (mtc_client_mgmt_tx_socket) failed (%s) (rc:%d)",
command_name.c_str(),
interface_name.c_str(), rc);
iface_name_ptr, rc);
}
else if ( log_ack )
{
ilog ("%s reply send (%s)",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
}
}
else
{
elog ("cannot send to null or failed socket (%s network)\n",
interface_name.c_str() );
elog ("cannot send to null or failed socket (%s)", iface_name_ptr);
}
}
else if ( interface == CLSTR_INTERFACE )
{
if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true ))
if (( sock_ptr->mtc_client_clstr_tx_socket_c0 ) &&
( sock_ptr->mtc_client_clstr_tx_socket_c0->sock_ok() == true ))
{
rc = sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes);
rc = sock_ptr->mtc_client_clstr_tx_socket_c0->write((char*)&msg.hdr[0], bytes);
if ( rc <= 0 )
{
elog ("%s reply send (mtc_client_tx_socket_c0_clstr) failed (%s) (rc:%d)",
elog ("%s reply send (mtc_client_clstr_tx_socket_c0) failed (%s) (rc:%d)",
command_name.c_str(),
interface_name.c_str(), rc);
iface_name_ptr, rc);
}
else if ( log_ack )
{
ilog ("%s reply send (%s)",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
}
}
if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true ))
if (( sock_ptr->mtc_client_clstr_tx_socket_c1 ) &&
( sock_ptr->mtc_client_clstr_tx_socket_c1->sock_ok() == true ))
{
rc = sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes);
rc = sock_ptr->mtc_client_clstr_tx_socket_c1->write((char*)&msg.hdr[0], bytes);
if ( rc <= 0 )
{
elog ("%s reply send (mtc_client_tx_socket_c1_clstr) failed (%s) (rc:%d)",
elog ("%s reply send (mtc_client_clstr_tx_socket_c1) failed (%s) (rc:%d)",
command_name.c_str(),
interface_name.c_str(), rc);
iface_name_ptr, rc);
}
else if ( log_ack )
{
ilog ("%s reply send (%s)",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
}
}
}
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, interface_name.data(), (rc != bytes) );
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, iface_name_ptr, (rc != bytes) );
/* get the shutdown delay config alue */
int delay = daemon_get_cfg_ptr()->failsafe_shutdown_delay ;
@ -560,11 +655,11 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) )
{
ilog ("Reboot - fit bypass (%s)\n", interface_name.c_str());
ilog ("Reboot - fit bypass (%s)", iface_name_ptr);
return (PASS);
}
stop_pmon();
ilog ("Reboot (%s)\n", interface_name.c_str());
ilog ("Reboot (%s)", iface_name_ptr);
daemon_log ( NODE_RESET_FILE, "reboot command" );
fork_sysreq_reboot ( delay );
rc = system("/usr/bin/systemctl reboot");
@ -581,7 +676,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
do
{
ilog ("Lazy Reboot (%s) ; rebooting in %d seconds\n", interface_name.c_str(), msg.num ? msg.parm[0] : 1 );
ilog ("Lazy Reboot (%s) ; rebooting in %d seconds", iface_name_ptr, msg.num ? msg.parm[0] : 1 );
sleep (1);
if ( msg.parm[0] % 5 )
{
@ -592,7 +687,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else
{
ilog ("Lazy Reboot (%s) ; now\n", interface_name.c_str() );
ilog ("Lazy Reboot (%s) ; now", iface_name_ptr);
}
fork_sysreq_reboot ( delay );
@ -602,11 +697,11 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_RESET ) )
{
ilog ("Reset - fit bypass (%s)\n", interface_name.c_str());
ilog ("Reset - fit bypass (%s)", iface_name_ptr);
return (PASS);
}
stop_pmon();
ilog ("Reset 'reboot -f' (%s)\n", interface_name.c_str());
ilog ("Reset 'reboot -f' (%s)", iface_name_ptr);
daemon_log ( NODE_RESET_FILE, "reset command" );
fork_sysreq_reboot ( delay/2 );
rc = system("/usr/bin/systemctl reboot --force");
@ -617,7 +712,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_WIPEDISK ) )
{
ilog ("Wipedisk - fit bypass (%s)\n", interface_name.c_str());
ilog ("Wipedisk - fit bypass (%s)", iface_name_ptr);
return (PASS);
}
/* We fork a reboot as a fail safe.
@ -636,7 +731,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if( 0 == parent ) /* we're the child */
{
ilog ("Disk wipe in progress (%s)\n", interface_name.c_str());
ilog ("Disk wipe in progress (%s)", iface_name_ptr);
daemon_log ( NODE_RESET_FILE, "wipedisk command" );
rc = system("/usr/local/bin/wipedisk --force");
ilog ("Disk wipe complete - Forcing Reboot ...\n");
@ -727,35 +822,35 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char
event.cmd = cmd ;
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
if ( bytes == 0 )
{
slog ("message send failed ; message size=0 for cmd:0x%x is 0\n", event.cmd );
rc = FAIL_NO_DATA ;
}
else if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes )
else if ((rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes )
{
elog ("message send failed. (%d) (%d:%s) \n", rc, errno, strerror(errno));
elog ("message: %d bytes to <%s:%d>\n", bytes,
sock_ptr->mtc_client_tx_socket->get_dst_str(),
sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort());
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(),
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort());
rc = FAIL_TO_TRANSMIT ;
}
else
{
mlog2 ("Transmit: %x bytes to %s:%d\n", bytes,
sock_ptr->mtc_client_tx_socket->get_dst_str(),
sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort());
print_mtc_message ( get_hostname(), MTC_CMD_TX, event, get_iface_name_str(MGMNT_INTERFACE), false );
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(),
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort());
print_mtc_message ( get_hostname(), MTC_CMD_TX, event, get_interface_name_str(MGMNT_INTERFACE), false );
rc = PASS ;
}
}
else
{
elog ("cannot send to null or failed socket (%s network)\n",
get_iface_name_str (MGMNT_INTERFACE) );
elog ("cannot send to null or failed socket (%s)",
get_interface_name_str (MGMNT_INTERFACE) );
rc = FAIL_SOCKET_SENDTO ;
}
return rc ;
@ -765,10 +860,23 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char
*
* Name : create_mtcAlive_msg
*
* Description: Creates a common mtcAlive message
* Description: Creates a common mtcAlive message that consists of the
* - out-of-band health/status flags
* - host uptime
* - json string of some of the host's info
* {
* "hostname":"controller-0",
* "personality":"controller,worker",
* "pxeboot_ip":"169.254.202.2",
* "mgmt_ip":"192.168.204.2",
* "cluster_host_ip":"192.168.206.2",
* "mgmt_mac":"08:00:27:9f:ef:57",
* "interface":"Mgmnt",
* "sequence":145
* }
*
****************************************************************************/
int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int interface )
int create_mtcAlive_msg ( ctrl_type * ctrl_ptr, mtc_message_type & msg, int cmd, string identity, int interface )
{
static int _sm_unhealthy_debounce_counter [MAX_IFACES] = {0,0} ;
@ -843,7 +951,7 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int
if ( ++_sm_unhealthy_debounce_counter[interface] > MAX_SM_UNHEALTHY_DEBOUNCE )
{
wlog("SM Unhealthy flag set (%s)",
get_iface_name_str(interface));
get_interface_name_str(interface));
msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SM_UNHEALTHY ;
}
else
@ -851,7 +959,7 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int
wlog("SM Unhealthy debounce %d of %d (%s)",
_sm_unhealthy_debounce_counter[interface],
MAX_SM_UNHEALTHY_DEBOUNCE,
get_iface_name_str(interface));
get_interface_name_str(interface));
}
}
else
@ -859,19 +967,32 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int
_sm_unhealthy_debounce_counter[interface] = 0 ;
}
/* add the interface and sequence number to the mtcAlice message */
/* add the interface and sequence number to the mtcAlive message */
identity.append ( ",\"interface\":\"");
identity.append (get_iface_name_str(interface));
identity.append (get_interface_name_str(interface));
identity.append("\",\"sequence\":");
if ( interface == CLSTR_INTERFACE )
if ( interface == PXEBOOT_INTERFACE )
{
identity.append(itos(mtcAlive_clstr_sequence++));
ctrl_ptr->mtcAlive_pxeboot_sequence++ ;
identity.append(itos(ctrl_ptr->mtcAlive_pxeboot_sequence));
msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_pxeboot_sequence ;
}
else if ( interface == MGMNT_INTERFACE )
{
ctrl_ptr->mtcAlive_mgmnt_sequence++ ;
identity.append(itos(ctrl_ptr->mtcAlive_mgmnt_sequence));
msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_mgmnt_sequence ;
}
else if ( interface == CLSTR_INTERFACE )
{
ctrl_ptr->mtcAlive_clstr_sequence++ ;
identity.append(itos(ctrl_ptr->mtcAlive_clstr_sequence));
msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_clstr_sequence ;
}
else
{
identity.append(itos(mtcAlive_mgmnt_sequence++));
}
identity.append(itos(0));
identity.append("}");
memcpy ( &msg.buf[0], identity.c_str(), identity.size() );
@ -896,40 +1017,40 @@ int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity )
{
int interface = MGMNT_INTERFACE ;
mtc_message_type msg ;
int bytes = create_mtcAlive_msg ( msg, cmd, identity, interface );
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
int bytes = create_mtcAlive_msg ( get_ctrl_ptr(), msg, cmd, identity, interface );
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
/* Send back to requester - TODO: consider sending back to both as multicast */
if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes)) != bytes )
if ((rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes)) != bytes )
{
if ( rc == -1 )
{
wlog_throttled (send_mtc_msg_failed, 100 ,
"failed to send <%s:%d> (%d:%m)\n",
sock_ptr->mtc_client_tx_socket->get_dst_str(),
sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort(), errno );
"failed to send <%s:%d> (%d:%m)",
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(),
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort(), errno );
}
else
{
wlog_throttled ( send_mtc_msg_failed, 100 ,
"sent only %d of %d bytes to <%s:%d>\n",
rc, bytes,
sock_ptr->mtc_client_tx_socket->get_dst_str(),
sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort());
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(),
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort());
}
}
else
{
send_mtc_msg_failed = 0 ;
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false );
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_interface_name_str(interface), false );
rc = PASS ;
}
}
else
{
elog ("cannot send to null or failed socket (%s network)\n",
get_iface_name_str (MGMNT_INTERFACE) );
elog ("cannot send to null or failed socket (%s)",
get_interface_name_str (MGMNT_INTERFACE) );
}
}
else
@ -943,57 +1064,134 @@ int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity )
int send_mtcAlive_msg_failed = 0 ;
int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interface )
{
int flags = 0 ; // no tx flags
/* get a pointer to the process control structure */
ctrl_type * ctrl_ptr = get_ctrl_ptr() ;
if (( interface == PXEBOOT_INTERFACE ) &&
( ctrl_ptr->pxeboot_iface_provisioned == false ))
return (PASS) ;
if (( interface == CLSTR_INTERFACE ) &&
( get_ctrl_ptr()->clstr_iface_provisioned != true ))
( ctrl_ptr->clstr_iface_provisioned != true ))
{
dlog2 ("cannot send to unprovisioned %s interface\n",
get_iface_name_str(interface) );
dlog2 ("cannot send to unprovisioned %s interface",
get_interface_name_str(interface) );
return (FAIL);
}
mtc_message_type msg ;
int bytes = create_mtcAlive_msg ( msg, MTC_MSG_MTCALIVE, identity, interface );
int bytes = create_mtcAlive_msg ( ctrl_ptr, msg, MTC_MSG_MTCALIVE, identity, interface );
if ( interface == MGMNT_INTERFACE )
if ( interface == PXEBOOT_INTERFACE )
{
/* Send to controller-0 pxeboot address */
if ( sock_ptr->pxeboot_tx_socket <= 0 )
{
elog("pxeboot_tx_socket not ok (%d)", sock_ptr->pxeboot_tx_socket);
return (FAIL_SOCKET_SENDTO);
}
// TODO: Consider adding controllers info to ctrl struct
string controllers[CONTROLLERS] = {CONTROLLER_0, CONTROLLER_1};
alog1 ("sending mtcAlive to both controllers");
for (int c = 0 ; c < CONTROLLERS ; c++)
{
string pxeboot_addr_cx ;
struct sockaddr_in hostAddr;
memset(&hostAddr, 0, sizeof(hostAddr));
if (controllers[c] == CONTROLLER_1)
{
if ( ctrl_ptr->system_type != SYSTEM_TYPE__AIO__SIMPLEX )
pxeboot_addr_cx = ctrl_ptr->pxeboot_addr_c1;
else
continue; // skip controller-1 for SX systems
}
else
pxeboot_addr_cx = ctrl_ptr->pxeboot_addr_c0;
if ( pxeboot_addr_cx.empty() )
{
if ( ctrl_ptr->pxeboot_address_learned[c] == true )
{
ctrl_ptr->pxeboot_address_learned[c] = false ;
wlog ( "%s pxeboot address not learned ; unable to send pxeboot mtcAlive",
controllers[c].c_str() );
}
continue ;
}
if ( ctrl_ptr->pxeboot_address_learned[c] == false )
{
// Only log this if the not learned log was produced.
// Which is most likely case on process startup.
ilog ("sending pxeboot network mtcAlive msg on port %d to %s at %s",
sock_ptr->mtc_tx_pxeboot_port,
controllers[c].c_str(),
pxeboot_addr_cx.c_str());
ctrl_ptr->pxeboot_address_learned[c] = true ;
}
print_mtc_message ( controllers[c], MTC_CMD_TX, msg, get_interface_name_str(PXEBOOT_INTERFACE), false );
hostAddr.sin_addr.s_addr = inet_addr(pxeboot_addr_cx.data());
hostAddr.sin_family = AF_INET;
hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port); // 2102
alog1 ("sending pxeboot network mtcAlive msg to %s", controllers[c].c_str() );
ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, &msg.hdr[0], bytes, flags,
(const struct sockaddr*)&hostAddr, sizeof(hostAddr));
if (bytes_sent <= 0)
{
elog ("failed to send mtcAlive to %s using %s:%d (pxeboot) (rc:%ld) (%d:%m)",
controllers[c].c_str(), pxeboot_addr_cx.c_str(), hostAddr.sin_port, bytes_sent, errno);
}
} // for loop
}
else if ( interface == MGMNT_INTERFACE )
{
/* Send to controller floating address */
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
print_mtc_message ( CONTROLLER, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false );
sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes) ;
alog1 ("sending mgmt network mtcAlive msg to %s", CONTROLLER);
print_mtc_message ( CONTROLLER, MTC_CMD_TX, msg, get_interface_name_str(MGMNT_INTERFACE), false );
sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes) ;
}
else
{
elog("mtc_client_tx_socket not ok");
elog("mtc_client_mgmt_tx_socket not ok");
}
}
else if ( interface == CLSTR_INTERFACE )
{
/* Send to controller-0 cluster address */
if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true ))
if (( sock_ptr->mtc_client_clstr_tx_socket_c0 ) &&
( sock_ptr->mtc_client_clstr_tx_socket_c0->sock_ok() == true ))
{
print_mtc_message ( CONTROLLER_0, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes ) ;
alog1 ("sending clstr network mtcAlive msg to %s", CONTROLLER_0);
print_mtc_message ( CONTROLLER_0, MTC_CMD_TX, msg, get_interface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_clstr_tx_socket_c0->write((char*)&msg.hdr[0], bytes ) ;
}
else
{
elog("mtc_client_tx_socket_c0_clstr not ok");
elog("mtc_client_clstr_tx_socket_c0 not ok");
}
/* Send to controller-1 cluster address */
if ( get_ctrl_ptr()->system_type != SYSTEM_TYPE__AIO__SIMPLEX )
{
if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true ))
if (( sock_ptr->mtc_client_clstr_tx_socket_c1 ) &&
( sock_ptr->mtc_client_clstr_tx_socket_c1->sock_ok() == true ))
{
print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes ) ;
alog1 ("sending clstr mtcAlive msg to %s", CONTROLLER_1);
print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_interface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_clstr_tx_socket_c1->write((char*)&msg.hdr[0], bytes ) ;
}
else
{
elog("mtc_client_tx_socket_c1_clstr not ok");
elog("mtc_client_clstr_tx_socket_c1 not ok");
}
}
}
@ -1040,11 +1238,11 @@ int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, s
int rc = FAIL ;
/* Send to controller floating address */
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
print_mtc_message ( hostname, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false );
rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes, address.data(), port ) ;
print_mtc_message ( hostname, MTC_CMD_TX, msg, get_interface_name_str(MGMNT_INTERFACE), false );
rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes, address.data(), port ) ;
if ( 0 >= rc )
{
elog("failed to send command to mtcClient (%d) (%d:%s)", rc, errno, strerror(errno));
@ -1055,7 +1253,7 @@ int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, s
}
else
{
elog("mtc_client_tx_socket not ok");
elog("mtc_client_mgmt_tx_socket not ok");
rc = FAIL_BAD_STATE ;
}
return (rc) ;

View File

@ -125,13 +125,53 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
mtc_message_type msg ;
int bytes = 0 ;
int rc = PASS ;
if ( iface == CLSTR_INTERFACE )
string hostaddr = "" ;
string hostname = "" ;
const char * iface_name_ptr = get_iface_name_str(iface);
if ( iface == PXEBOOT_INTERFACE )
{
struct sockaddr_in client_addr;
socklen_t addr_len = sizeof(client_addr);
// Receive data
bytes = recvfrom(sock_ptr->pxeboot_rx_socket,
(char*)&msg.hdr[0],
sizeof(mtc_message_type), 0,
(struct sockaddr*)&client_addr, &addr_len);
// As a non-blocking socket this is normal to occur
// due to batch handling.
if ( bytes == -1 )
return RETRY ;
// Log with debug_msg lane 2
if ( daemon_get_cfg_ptr()->debug_msg&2 )
{
// log the message ; both header and buffer
string _buf = msg.buf[0] ? msg.buf : "empty";
mlog3 ("Received %d bytes (%s) from %s:%d - cmd:%d:%s hdr:%s buf:%s",
bytes,
iface_name_ptr,
inet_ntoa(client_addr.sin_addr),
ntohs(client_addr.sin_port),
msg.cmd,
get_mtcNodeCommand_str(msg.cmd),
&msg.hdr[0], _buf.c_str());
}
hostaddr = inet_ntoa(client_addr.sin_addr);
hostname = obj_ptr->get_hostname ( hostaddr ) ; // based on pxeboot ip
}
else if ( iface == CLSTR_INTERFACE )
{
if ( ( obj_ptr ) &&
( obj_ptr->clstr_network_provisioned == true ) &&
( sock_ptr->mtc_agent_clstr_rx_socket ))
{
mlog3 ("clstr network 'recvfrom' start");
bytes = sock_ptr->mtc_agent_clstr_rx_socket->read((char*)&msg, sizeof(msg));
mlog3 ("clstr network 'recvfrom' stop");
}
else
{
@ -140,7 +180,9 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
}
else
{
bytes = sock_ptr->mtc_agent_rx_socket->read((char*)&msg, sizeof(msg));
mlog3 ("mgmt network 'recvfrom' start");
bytes = sock_ptr->mtc_agent_mgmt_rx_socket->read((char*)&msg, sizeof(msg));
mlog3 ("mgmt network 'recvfrom' stop");
}
msg.buf[BUF_SIZE-1] = '\0';
@ -160,17 +202,14 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
zero_unused_msg_buf (msg, bytes);
/* get the sender's hostname */
string hostaddr = "" ;
string hostname = "" ;
if ( iface == CLSTR_INTERFACE )
{
hostaddr = sock_ptr->mtc_agent_clstr_rx_socket->get_src_str();
hostname = obj_ptr->get_hostname ( hostaddr ) ;
}
else
else if ( iface == MGMNT_INTERFACE )
{
hostaddr = sock_ptr->mtc_agent_rx_socket->get_src_str();
hostaddr = sock_ptr->mtc_agent_mgmt_rx_socket->get_src_str();
hostname = obj_ptr->get_hostname ( hostaddr ) ;
}
@ -181,17 +220,26 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
if (( msg.cmd == MTC_MSG_MTCALIVE ) &&
(( rc = jsonUtil_get_key_val ( &msg.buf[0], "hostname", hostname )) == PASS ))
{
ilog ("%s learned from mtcAlive", hostname.c_str());
string curr_hostaddr = obj_ptr->get_pxeboot_hostaddr ( hostname );
if ( curr_hostaddr != hostaddr )
{
ilog ("%s hostname learned from %s mtcAlive ; hostaddr:%s was:%s",
hostname.c_str(),
iface_name_ptr,
hostaddr.c_str(),
curr_hostaddr.c_str());
obj_ptr->set_pxeboot_hostaddr ( hostname, hostaddr );
}
}
else
{
wlog ("unknown hostname message ... dropping" ); /* make dlog */
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), true );
print_mtc_message ( hostname, MTC_CMD_RX, msg, iface_name_ptr, true );
return (FAIL_GET_HOSTNAME);
}
}
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false );
print_mtc_message ( hostname, MTC_CMD_RX, msg, iface_name_ptr, false );
/* handle messages that are not mtc_message_type
* but rather are simply a json string */
@ -199,7 +247,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
{
string service ;
mlog1 ("%s\n", &msg.hdr[0] );
mlog3 ("%s\n", &msg.hdr[0] );
rc = jsonUtil_get_key_val(&msg.hdr[0],"service", service );
if ( rc == PASS )
@ -256,7 +304,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
hostname.c_str(),
get_mtcNodeCommand_str(msg.cmd),
msg.parm[0],
get_iface_name_str(iface));
iface_name_ptr);
}
else
{
@ -264,7 +312,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
hostname.c_str(),
get_mtcNodeCommand_str(msg.cmd),
msg.parm[0],
get_iface_name_str(iface));
iface_name_ptr);
}
}
}
@ -309,16 +357,16 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
obj_ptr->set_uptime ( hostname , msg.parm[MTC_PARM_UPTIME_IDX], false );
obj_ptr->set_health ( hostname , msg.parm[MTC_PARM_HEALTH_IDX] );
obj_ptr->set_mtce_flags ( hostname , msg.parm[MTC_PARM_FLAGS_IDX], iface );
obj_ptr->set_mtcAlive ( hostname, iface );
obj_ptr->set_mtcAlive ( hostname , msg.parm[MTC_PARM_SEQ_IDX], iface);
mlog1("%s Uptime:%d Health:%d Flags:0x%x mtcAlive:%s (%s)\n",
mlog2("%s Uptime:%d Health:%d Flags:0x%x Seq:%5d mtcAlive:%s (%s)\n",
hostname.c_str(),
msg.parm[MTC_PARM_UPTIME_IDX],
msg.parm[MTC_PARM_HEALTH_IDX],
msg.parm[MTC_PARM_FLAGS_IDX],
msg.parm[MTC_PARM_SEQ_IDX],
obj_ptr->get_mtcAlive_gate ( hostname ) ? "gated" : "open",
get_iface_name_str(iface));
iface_name_ptr);
}
else if ( msg.cmd == MTC_MSG_MAIN_GOENABLED )
{
@ -426,7 +474,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
if (( rc | rc1 ) != PASS )
{
elog ("received invalid event [rc:%d:%d]", rc, rc1);
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), true );
print_mtc_message ( hostname, MTC_CMD_RX, msg, iface_name_ptr, true );
return ( FAIL_INVALID_OPERATION );
}
switch ( msg.cmd )
@ -613,6 +661,8 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
mtc_message_type mtc_cmd ;
string data = "" ;
mtc_socket_type * sock_ptr = get_sockPtr ();
nodeLinkClass * obj_ptr = get_mtcInv_ptr ();
const char * iface_name_ptr = get_iface_name_str(interface);
memset (&mtc_cmd,0,sizeof(mtc_message_type));
/* Add the command version to he message */
@ -627,7 +677,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
data = "{\"mtcInfo\":" + json_dict + "}";
ilog("%s mtc info update", hostname.c_str());
ilog("%s mtc info update: %s", hostname.c_str(), data.c_str());
rc = PASS ;
break ;
}
@ -636,6 +686,30 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s" , get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
if ( interface == PXEBOOT_INTERFACE )
{
if ( !obj_ptr->pxeboot_network_provisioned ) return PASS;
/* There is no pxeboot floating IP so the mtcClient cannot use
* a resolvable name label like 'CONTROLLER' as it does for
* management nwk.
* Therefore, the mtcClient on each node needs to be told the
* controller's pxeboot ip addresses so it knows where to send. */
obj_ptr->pxebootInfo_loader();
data = "{\"pxebootInfo\":{" ;
data.append ("\"address\":\"");
data.append (obj_ptr->my_pxeboot_ip);
data.append ("\",\"");
data.append (CONTROLLER_0);
data.append ("\":\"");
data.append (obj_ptr->get_pxeboot_hostaddr(CONTROLLER_0));
data.append ("\",\"");
data.append (CONTROLLER_1);
data.append ("\":\"");
data.append (obj_ptr->get_pxeboot_hostaddr(CONTROLLER_1));
data.append ("\"}}");
alog1("%s pxeboot info update:%s", hostname.c_str(), data.c_str());
}
rc = PASS ;
break ;
}
@ -668,7 +742,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
ilog ("%s sending '%s' request (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
get_iface_name_str(interface));
iface_name_ptr);
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
@ -688,7 +762,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
ilog ("%s sending '%s' request (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
get_iface_name_str(interface));
iface_name_ptr);
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
@ -713,7 +787,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
{
mlog ("%s sending 'Locked' notification (%s)",
hostname.c_str(),
get_iface_name_str(interface));
iface_name_ptr);
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
@ -738,7 +812,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
{
ilog ("%s sending 'UnLocked' notification (%s)",
hostname.c_str(),
get_iface_name_str(interface));
iface_name_ptr);
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
@ -754,20 +828,27 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
if ( rc == PASS )
{
int bytes = 0;
nodeLinkClass * obj_ptr = get_mtcInv_ptr ();
string iface_address ;
/* add the mac address of the target card to the header
* Note: the minus 1 is to overwrite the null */
snprintf ( &mtc_cmd.hdr[MSG_HEADER_SIZE-1], MSG_HEADER_SIZE, "%s", obj_ptr->get_hostIfaceMac(hostname, MGMNT_IFACE).data());
/* Update the sender's address */
if (interface == PXEBOOT_INTERFACE)
iface_address = obj_ptr->my_pxeboot_ip ;
else if (interface == CLSTR_INTERFACE)
iface_address = obj_ptr->my_clstr_ip ;
else
iface_address = obj_ptr->my_float_ip ;
/* If data is empty then at least add where the message came from */
if ( data.empty() )
{
data = "{\"address\":\"";
data.append(obj_ptr->my_float_ip) ;
data.append(iface_address) ;
data.append("\",\"interface\":\"");
data.append(get_iface_name_str(interface));
data.append(iface_name_ptr);
data.append("\"}");
}
else
@ -778,7 +859,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
snprintf ( &mtc_cmd.buf[0], data.length()+1, "%s", data.data());
bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(data.length()+1)));
print_mtc_message ( hostname, MTC_CMD_TX, mtc_cmd, get_iface_name_str(interface), force ) ;
print_mtc_message ( hostname, MTC_CMD_TX, mtc_cmd, iface_name_ptr, force ) ;
if (interface == MGMNT_INTERFACE)
{
@ -791,13 +872,55 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
return (FAIL_HOSTADDR_LOOKUP);
}
mlog ("%s sending %s request to %s (%s)",
mlog ("%s sending %s request to %s:%d (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
hostaddr.c_str(),
get_iface_name_str(interface));
sock_ptr->mtc_mgmnt_cmd_port,
iface_name_ptr);
rc = sock_ptr->mtc_agent_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_mgmnt_cmd_port);
rc = sock_ptr->mtc_agent_mgmt_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_mgmnt_cmd_port);
}
else if ((interface == PXEBOOT_INTERFACE) && (sock_ptr->pxeboot_tx_socket))
{
string pxeboot_hostAddr = obj_ptr->get_pxeboot_hostaddr(hostname);
if (hostUtil_is_valid_ip_addr(pxeboot_hostAddr))
{
// Set up sockaddr_in with the host pxeboot address and its rx port number
int flags = 0 ;
struct sockaddr_in hostAddr;
memset(&hostAddr, 0, sizeof(hostAddr));
hostAddr.sin_family = AF_INET; // pxeboot network is IPV4 only
hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port);
hostAddr.sin_addr.s_addr = inet_addr(pxeboot_hostAddr.c_str());
mlog ("%s sending %s request to %s:%d (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
pxeboot_hostAddr.c_str(),
sock_ptr->mtc_rx_pxeboot_port,
iface_name_ptr);
ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket,
(char *)&mtc_cmd,
bytes, flags,
(const struct sockaddr*)&hostAddr,
sizeof(hostAddr));
if (bytes_sent <= 0)
{
elog ("%s failed to send %d:%s command to %s:%d (%s) (%d:%m)",
hostname.c_str(), cmd,
get_mtcNodeCommand_str(cmd),
pxeboot_hostAddr.c_str(),
sock_ptr->mtc_rx_pxeboot_port,
iface_name_ptr,
errno);
}
}
else
{
return (FAIL_HOSTADDR_LOOKUP);
}
}
else if ((interface == CLSTR_INTERFACE) &&
( obj_ptr->clstr_network_provisioned == true ) &&
@ -805,17 +928,25 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
{
string clstr_hostaddr = obj_ptr->get_clstr_hostaddr(hostname);
if ( hostUtil_is_valid_ip_addr( clstr_hostaddr ) != true )
return (FAIL_NO_CLSTR_PROV);
return (FAIL_HOSTADDR_LOOKUP);
mlog ("%s sending %s request to %s (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
clstr_hostaddr.c_str(),
get_iface_name_str(interface));
iface_name_ptr);
rc = sock_ptr->mtc_agent_clstr_tx_socket->write((char *)&mtc_cmd, bytes, clstr_hostaddr.c_str(), sock_ptr->mtc_clstr_cmd_port);
}
else if ( interface == CLSTR_INTERFACE )
{
// This path can be taken if the cluster interface
mlog ("%s to %s network not sent", get_mtcNodeCommand_str(cmd), iface_name_ptr);
}
else
{
wlog ("%s to %s network not sent", get_mtcNodeCommand_str(cmd), iface_name_ptr);
}
if ( 0 > rc )
{
elog("%s Failed to send command (rc:%i)\n", hostname.c_str(), rc);
@ -944,7 +1075,7 @@ int send_hbs_command ( string hostname, int cmd, string controller )
{
if ( cmd == MTC_CMD_ACTIVE_CTRL )
{
mlog3 ("%s %s sent to %s %s",
mlog1 ("%s %s sent to %s %s",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
unit->c_str(),
@ -1162,7 +1293,7 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr )
else
{
/* The interface that the heartbeat loss occurred over is
* specified in parm[0 for this command
* specified in parm[0] for this command
* 0 = MGMNT_IFACE
* 1 = CLSTR_IFACE
* else default to 0 (MGMNT_IFACE) to be backwards compatible

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
#ifndef __INCLUDE_MTCNODECOMP_HH__
#define __INCLUDE_MTCNODECOMP_HH__
/*
* Copyright (c) 2015-2016 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
* Copyright (c) 2015-2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
*/
/**
@ -92,6 +92,30 @@ typedef struct
string mgmnt_iface ;
string clstr_iface ;
// Controller-0 USB installs lead to management interface,
// and therefore the pxeboot interface, being the localhost 'lo'.
// Trying to setup the pxeboot socket and do messaging over that
// socket is not possible so this bool tracks when the pxeboot
// interface is not correct.
bool pxeboot_iface_provisioned ;
string pxeboot_iface ;
string pxeboot_addr ;
string pxeboot_addr_c0 ;
string pxeboot_addr_c1 ;
// Assume address is learned to start even though it's likely not.
// This enabled the first not learned log followed by a learned
// log once it is.
bool pxeboot_address_learned [CONTROLLERS] = { true, true };
// mtcAlive current running sequence number storage
unsigned int mtcAlive_pxeboot_sequence = 0 ;
unsigned int mtcAlive_mgmnt_sequence = 0 ;
unsigned int mtcAlive_clstr_sequence = 0 ;
/* Maintain pxeboot, management and cluser network interface information */
iface_info_type iface_info[MTCALIVE_INTERFACES_MAX];
unsigned int nodetype ;
unsigned int function ;
unsigned int subfunction ;
@ -131,5 +155,6 @@ bool is_subfunction_worker ( void );
int run_goenabled_scripts ( mtc_socket_type * sock_ptr , string requestor );
int run_hostservices_scripts ( unsigned int cmd );
void load_mtcInfo_msg ( mtc_message_type & msg );
void load_pxebootInfo_msg ( mtc_message_type & msg );
#endif
#endif // __INCLUDE_MTCNODECOMP_HH__

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013, 2016, 2023 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2023-2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -134,21 +134,21 @@ msgSock_type * get_mtclogd_sockPtr ( void )
/******************************************************/
/* Socket Close functions */
/******************************************************/
static void mtc_agent_tx_socket_close ( void )
static void mtc_agent_mgmt_tx_socket_close ( void )
{
if (mtc_sock.mtc_agent_tx_socket)
if (mtc_sock.mtc_agent_mgmt_tx_socket)
{
delete mtc_sock.mtc_agent_tx_socket;
mtc_sock.mtc_agent_tx_socket = NULL;
delete mtc_sock.mtc_agent_mgmt_tx_socket;
mtc_sock.mtc_agent_mgmt_tx_socket = NULL;
}
}
static void mtc_agent_rx_socket_close ( void )
static void mtc_agent_mgmt_rx_socket_close ( void )
{
if (mtc_sock.mtc_agent_rx_socket)
if (mtc_sock.mtc_agent_mgmt_rx_socket)
{
delete (mtc_sock.mtc_agent_rx_socket);
mtc_sock.mtc_agent_rx_socket = NULL;
delete (mtc_sock.mtc_agent_mgmt_rx_socket);
mtc_sock.mtc_agent_mgmt_rx_socket = NULL;
}
}
@ -170,7 +170,7 @@ static void mtc_agent_clstr_rx_socket_close ( void )
}
}
static void mtc_event_rx_sock_close ( void )
static void event_rx_sock_close ( void )
{
if (mtc_sock.mtc_event_rx_sock)
{
@ -224,6 +224,24 @@ static void ioctl_sock_close ( void )
}
}
static void pxeboot_rx_socket_close ( void )
{
if ( mtc_sock.pxeboot_rx_socket )
{
close (mtc_sock.pxeboot_rx_socket);
mtc_sock.pxeboot_rx_socket = 0 ;
}
}
static void pxeboot_tx_socket_close ( void )
{
if ( mtc_sock.pxeboot_tx_socket )
{
close (mtc_sock.pxeboot_tx_socket);
mtc_sock.pxeboot_tx_socket = 0 ;
}
}
/* close all the sockets */
static void mtc_socket_fini(void)
{
@ -231,17 +249,64 @@ static void mtc_socket_fini(void)
set_inotify_close(mtcInv.inotify_shadow_file_fd,
mtcInv.inotify_shadow_file_wd);
pxeboot_tx_socket_close();
pxeboot_rx_socket_close();
mtc_agent_clstr_tx_socket_close();
mtc_agent_clstr_rx_socket_close();
mtc_agent_tx_socket_close();
mtc_agent_rx_socket_close();
mtc_event_rx_sock_close();
mtc_agent_mgmt_tx_socket_close();
mtc_agent_mgmt_rx_socket_close();
event_rx_sock_close();
mtc_to_hbs_sock_close();
hwmon_cmd_sock_close();
mtclogd_socket_close();
mtcHttpSvr_fini(mtce_event);
}
void setup_pxeboot_tx_socket ( void )
{
if ( !mtcInv.pxeboot_network_provisioned ) return ;
pxeboot_tx_socket_close();
ilog ("Creating pxeboot transmit socket");
if ((mtc_sock.pxeboot_tx_socket = socket(AF_INET, SOCK_DGRAM, 0)) <= 0)
{
elog ("failed to create IPV4 pxeboot network transmit socket ; (%d:%m)", errno);
}
}
void setup_pxeboot_rx_socket ( void )
{
if ( !mtcInv.pxeboot_network_provisioned ) return ;
pxeboot_rx_socket_close ();
ilog ("Creating pxeboot receive socket on %s:%d",
mtcInv.my_pxeboot_ip.c_str(),
mtc_sock.mtc_rx_pxeboot_port);
struct sockaddr_in pxeboot_addr ;
// Create the socket
if ((mtc_sock.pxeboot_rx_socket = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0)) == -1)
{
elog ("failed to create IPV4 pxeboot network receive socket ; (%d:%m)", errno);
}
// Initialize pxeboot address structure
memset(&pxeboot_addr, 0, sizeof(pxeboot_addr));
pxeboot_addr.sin_family = AF_INET;
pxeboot_addr.sin_port = htons(mtc_sock.mtc_rx_pxeboot_port);
pxeboot_addr.sin_addr.s_addr = inet_addr(mtcInv.my_pxeboot_ip.data());
// Bind the pxeboot unit address and messaging port to socket
if (bind(mtc_sock.pxeboot_rx_socket, (const struct sockaddr*)&pxeboot_addr, sizeof(pxeboot_addr)) == -1)
{
elog ("failed to bind %s:%d to socket (%d:%m)",
mtcInv.my_pxeboot_ip.c_str(),
mtc_sock.mtc_rx_pxeboot_port,
errno);
pxeboot_rx_socket_close();
}
}
void daemon_exit(void)
{
/* Cancel the uptime timer */
@ -360,6 +425,20 @@ static int mtc_config_handler ( void * user,
config_ptr->mtc_rx_clstr_port = atoi(value);
config_ptr->mask |= CONFIG_CLIENT_MTC_CLSTR_PORT ;
}
else if (MATCH("agent", "mtc_rx_pxeboot_port"))
{
config_ptr->mtc_rx_pxeboot_port = atoi(value);
mtc_sock.mtc_rx_pxeboot_port = mtc_config.mtc_rx_pxeboot_port ;
}
else if (MATCH("client", "mtc_rx_pxeboot_port"))
{
// Get the mtcClient's pxeboot network receive port number
// and use it as the mtcAgent's pxeboot network transmit port.
// So that the mtcAgent can send the mtcClient messages over the
// pxeboot network.
config_ptr->mtc_tx_pxeboot_port = atoi(value);
mtc_sock.mtc_tx_pxeboot_port = config_ptr->mtc_tx_pxeboot_port ;
}
else if (MATCH("agent", "token_refresh_rate"))
{
config_ptr->token_refresh_rate = atoi(value);
@ -791,34 +870,34 @@ int mtc_socket_init ( void )
int socket_size = 0 ;
char ip_address[INET6_ADDRSTRLEN];
/***********************************************************/
/* Setup UDP Maintenance Command Transmit Socket Mgmnt I/F */
/***********************************************************/
/**********************************************************************/
/* Setup UDP Maintenance Command Transmit Socket to the Mgmnt network */
/**********************************************************************/
mtc_sock.mtc_mgmnt_cmd_port = mtc_config.cmd_port;
msgClassAddr::getAddressFromInterface(mtc_config.mgmnt_iface, ip_address, INET6_ADDRSTRLEN);
mtc_sock.mtc_agent_tx_socket =
mtc_sock.mtc_agent_mgmt_tx_socket =
new msgClassTx(ip_address, mtc_sock.mtc_mgmnt_cmd_port, IPPROTO_UDP, mtc_config.mgmnt_iface);
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_tx_socket"))
mtc_sock.mtc_agent_tx_socket->return_status = FAIL ;
if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_mgmt_tx_socket"))
mtc_sock.mtc_agent_mgmt_tx_socket->return_status = FAIL ;
#endif
if ((mtc_sock.mtc_agent_tx_socket == NULL) ||
(mtc_sock.mtc_agent_tx_socket->return_status))
if ((mtc_sock.mtc_agent_mgmt_tx_socket == NULL) ||
(mtc_sock.mtc_agent_mgmt_tx_socket->return_status))
{
elog("Failed to create mtcClient command socket on port %d for %s (%d:%s)\n",
mtc_sock.mtc_mgmnt_cmd_port,
mtc_config.mgmnt_iface,
errno,
strerror(errno));
mtc_agent_tx_socket_close();
mtc_agent_mgmt_tx_socket_close();
return (FAIL_SOCKET_CREATE) ;
}
/***********************************************************/
/* Setup UDP Maintenance Command Transmit Socket Clstr I/F */
/***********************************************************/
/**********************************************************************/
/* Setup UDP Maintenance Command Transmit Socket to the Clstr network */
/**********************************************************************/
if (strlen(mtc_config.clstr_iface))
{
mtc_sock.mtc_clstr_cmd_port = mtc_config.mtc_rx_clstr_port;
@ -845,48 +924,49 @@ int mtc_socket_init ( void )
/*********************************************************************
* Setup Maintenance Command Reply and Event Receiver Socket
* - management interface
* - management network
* - pxeboot network
*
* This socket is used to receive command replies over the management
* interface and asynchronous events from the mtcClient and other
* maintenance service daemons.
*********************************************************************/
mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port;
mtc_sock.mtc_agent_rx_socket =
mtc_sock.mtc_agent_mgmt_rx_socket =
new msgClassRx(CONTROLLER, mtc_sock.mtc_agent_port, IPPROTO_UDP);
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_rx_socket"))
mtc_sock.mtc_agent_rx_socket = NULL ;
if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_mgmt_rx_socket"))
mtc_sock.mtc_agent_mgmt_rx_socket = NULL ;
#endif
if ((mtc_sock.mtc_agent_rx_socket == NULL) ||
(mtc_sock.mtc_agent_rx_socket->return_status))
if ((mtc_sock.mtc_agent_mgmt_rx_socket == NULL) ||
(mtc_sock.mtc_agent_mgmt_rx_socket->return_status))
{
elog("Failed to create mtcClient receive socket on port %d for %s (%d:%m)\n",
mtc_sock.mtc_agent_port,
mtc_config.mgmnt_iface,
errno);
mtc_agent_rx_socket_close();
mtc_agent_mgmt_rx_socket_close();
return (FAIL_SOCKET_CREATE );
}
/* Set messaging buffer size */
/* if we need a bigger then default we can use a sysctl to raise the max */
socket_size = MTC_AGENT_RX_BUFF_SIZE;
if ((rc = mtc_sock.mtc_agent_rx_socket->setSocketMemory(mtc_config.mgmnt_iface, "mtce command and event receiver (Mgmnt network)", socket_size)) != PASS)
if ((rc = mtc_sock.mtc_agent_mgmt_rx_socket->setSocketMemory(mtc_config.mgmnt_iface, "mtce command and event receiver (Mgmnt network)", socket_size)) != PASS)
{
elog("setsockopt failed for SO_RCVBUF (%d:%m)\n", errno);
mtc_agent_rx_socket_close();
mtc_agent_mgmt_rx_socket_close();
return (FAIL_SOCKET_OPTION);
}
socklen_t optlen = sizeof(mtc_sock.mtc_agent_rx_socket_size);
getsockopt(mtc_sock.mtc_agent_rx_socket->getFD(), SOL_SOCKET, SO_RCVBUF,
&mtc_sock.mtc_agent_rx_socket_size, &optlen);
socklen_t optlen = sizeof(mtc_sock.mtc_agent_mgmt_rx_socket_size);
getsockopt(mtc_sock.mtc_agent_mgmt_rx_socket->getFD(), SOL_SOCKET, SO_RCVBUF,
&mtc_sock.mtc_agent_mgmt_rx_socket_size, &optlen);
ilog("Listening On: 'mtc client receive' socket %d (%d rx bytes - req:%d) (%s)\n",
mtc_sock.mtc_agent_port,
mtc_sock.mtc_agent_rx_socket_size, MTC_AGENT_RX_BUFF_SIZE,
mtc_sock.mtc_agent_mgmt_rx_socket_size, MTC_AGENT_RX_BUFF_SIZE,
mtc_config.mgmnt_iface);
/*********************************************************************
@ -967,7 +1047,7 @@ int mtc_socket_init ( void )
mtc_config.hbs_to_mtc_event_port,
mtc_config.mgmnt_iface,
errno);
mtc_event_rx_sock_close();
event_rx_sock_close();
return ( FAIL_SOCKET_CREATE );
}
@ -1077,6 +1157,8 @@ int daemon_init ( string iface, string nodetype )
}
mtcInv.system_type = daemon_system_type ();
mtcInv.sw_version = daemon_sw_version();
ilog ("SW Version : %s", mtcInv.sw_version.c_str());
/* Get and store my hostname */
if ( mtc_hostname_read () != PASS )
@ -1370,9 +1452,6 @@ void daemon_service_run ( void )
/* Init board management stuff */
bmcUtil_init ();
/* log the currect software version */
ilog ("SW VERSION : %s\n", daemon_sw_version ().c_str());
/* Collect inventory in active state only */
if ( mtc_config.active == true )
{
@ -1383,6 +1462,32 @@ void daemon_service_run ( void )
daemon_exit ();
}
string my_mac = "" ;
get_iface_macaddr ( mtc_config.mgmnt_iface , my_mac );
dlog ("Mgmt IF mac: %s", my_mac.c_str());
mtcInv.my_pxeboot_if = daemon_mgmnt_iface() ;
if (( mtcInv.my_pxeboot_if != LOOPBACK_IF ) && ( !my_mac.empty() ))
{
mtcInv.pxeboot_network_provisioned = true ;
mtc_config.pxeboot_iface = daemon_get_iface_master ((char*)mtcInv.my_pxeboot_if.data());
{
string ifname = mtc_config.pxeboot_iface ;
if ( get_iface_parent ( PXEBOOT_INTERFACE, ifname, mtcInv.my_pxeboot_if ) == PASS )
{
ilog ("Pxeboot IF : %s", mtcInv.my_pxeboot_if.c_str() );
}
}
mtcInv.pxebootInfo_loader ( my_mac );
ilog ("Pxeboot IP : %s", mtcInv.my_pxeboot_ip.empty() ? "none" : mtcInv.my_pxeboot_ip.c_str());
/************************************************************************/
/* Setup UDP IPV4 Maintenance pxeboot network Transmit/Receive Sockets */
/************************************************************************/
setup_pxeboot_rx_socket ();
setup_pxeboot_tx_socket ();
}
/* The following are base object controller timers ; init them */
mtcTimer_init ( mtcInv.mtcTimer_token, mtcInv.my_hostname, "token timer" );
mtcTimer_init ( mtcInv.mtcTimer_uptime,mtcInv.my_hostname, "uptime timer" );
@ -1390,7 +1495,6 @@ void daemon_service_run ( void )
mtcTimer_init ( mtcInv.mtcTimer_dor, mtcInv.my_hostname, "DOR mode timer" );
if ( get_link_state ( mtc_sock.ioctl_sock, mtc_config.mgmnt_iface, &mtcInv.mgmnt_link_up_and_running ) )
{
mtcInv.mgmnt_link_up_and_running = false ;
wlog ("Failed to query %s operational state ; defaulting to down\n", mtc_config.mgmnt_iface );
@ -1483,12 +1587,17 @@ void daemon_service_run ( void )
send_hbs_command ( mtcInv.my_hostname, MTC_CMD_START_HOST );
socks.clear();
socks.push_front (mtc_sock.mtc_event_rx_sock->getFD()); // service_events
socks.push_front (mtc_sock.mtc_agent_rx_socket->getFD()); // mtc_service_inbox
// service_events
socks.push_front (mtc_sock.mtc_event_rx_sock->getFD());
// mtc_service_inbox - receive sockets from Pxeboot, Mgmt and Clstr network
if ( mtc_sock.pxeboot_rx_socket )
socks.push_front (mtc_sock.pxeboot_rx_socket);
socks.push_front (mtc_sock.mtc_agent_mgmt_rx_socket->getFD());
if ( mtcInv.clstr_network_provisioned == true )
{
socks.push_front (mtc_sock.mtc_agent_clstr_rx_socket->getFD()); // mtc_service_inbox
socks.push_front (mtc_sock.mtc_agent_clstr_rx_socket->getFD());
}
socks.push_front (mtc_sock.netlink_sock);
@ -1559,9 +1668,57 @@ void daemon_service_run ( void )
* where it had commanded the hbsAgent to heartbeat at a reduced rate. */
send_hbs_command ( mtcInv.my_hostname, MTC_RECOVER_HBS );
// Used to track mtcAgent incoming messaging rate
#define LOOP_TIMER_PERIOD_SECS (60)
#define MSGS_PER_SEC_THRESHOLD (20)
#define MSGS_CNT_IDX_INBOX (0)
#define MSGS_CNT_IDX_EVENT (1)
#define MSGS_CNT_IDX_PMOND (2)
#define MSGS_CNT_IDX_HTTP (3)
#define MSGS_CNT_IDX_NETLINK (4)
#define MSGS_CNT_IDX_INOTIFY (5)
#define MSGS_CNT_IDX_MAX (6)
static unsigned int messages_tally[MSGS_CNT_IDX_MAX] = {0,0,0,0,0,0} ;
static float messages_total = 0 ;
mtcTimer_init ( mtcInv.mtcTimer_loop, mtcInv.my_hostname, "loop timer" );
/* Run Maintenance service forever */
for ( ; ; )
for ( mtc_sock.msg_rate = 0 ; ; )
{
if ( mtcTimer_expired ( mtcInv.mtcTimer_loop ) )
{
// Maintain an incoming messaging rate.
for ( int m = MSGS_CNT_IDX_INBOX ; m < MSGS_CNT_IDX_MAX ; m++ )
messages_total += messages_tally[m] ;
float rate_per_sec = messages_total/LOOP_TIMER_PERIOD_SECS ;
// Only log the messaging rate log when
// - the rate is above basic MSGS_PER_SEC_THRESHOLD ; first log
// - the messaging rate changes by half of the threshold in either direction
if (( mtc_config.debug_msg ) ||
(( rate_per_sec > MSGS_PER_SEC_THRESHOLD ) &&
(( rate_per_sec > (mtc_sock.msg_rate+(MSGS_PER_SEC_THRESHOLD/2))) ||
( rate_per_sec < (mtc_sock.msg_rate-(MSGS_PER_SEC_THRESHOLD/2))))))
{
ilog ("%d messages processed ; rate: %.1f msgs/sec] [%d:%d:%d:%d:%d:%d]",
(int)messages_total, rate_per_sec,
messages_tally[MSGS_CNT_IDX_INBOX],
messages_tally[MSGS_CNT_IDX_EVENT],
messages_tally[MSGS_CNT_IDX_PMOND],
messages_tally[MSGS_CNT_IDX_HTTP],
messages_tally[MSGS_CNT_IDX_NETLINK],
messages_tally[MSGS_CNT_IDX_INOTIFY]);
// Save this message rate for next compare
mtc_sock.msg_rate = rate_per_sec ;
}
// clean the stats and restart the timer
messages_total = 0 ;
for ( int m = MSGS_CNT_IDX_INBOX ; m < MSGS_CNT_IDX_MAX ; m++ )
messages_tally[m] = 0 ;
mtcTimer_start ( mtcInv.mtcTimer_loop, mtcTimer_handler, LOOP_TIMER_PERIOD_SECS );
}
daemon_signal_hdlr ();
/**
* Can't just run 'mtcHttpSvr_look' off select as it is seen to miss events.
@ -1587,13 +1744,17 @@ void daemon_service_run ( void )
/* Initialize the master fd_set */
FD_ZERO(&mtc_sock.readfds);
FD_SET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds);
FD_SET(mtc_sock.mtc_agent_rx_socket->getFD(), &mtc_sock.readfds);
FD_SET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds);
FD_SET(mtc_sock.mtc_agent_mgmt_rx_socket->getFD(), &mtc_sock.readfds);
if ( mtcInv.clstr_network_provisioned == true )
{
FD_SET(mtc_sock.mtc_agent_clstr_rx_socket->getFD(),&mtc_sock.readfds);
}
// Listen to the pxeboot rx socket if it is setup
if ( mtc_sock.pxeboot_rx_socket > 0 )
{
FD_SET(mtc_sock.pxeboot_rx_socket, &mtc_sock.readfds);
}
if ( mtce_event.fd )
{
FD_SET(mtce_event.fd, &mtc_sock.readfds);
@ -1631,44 +1792,95 @@ void daemon_service_run ( void )
{
if ( FD_ISSET( mtce_event.fd , &mtc_sock.readfds))
{
mlog3 ("http socket fired");
messages_tally[MSGS_CNT_IDX_HTTP]++ ;
mtcHttpSvr_look ( mtce_event );
mlog3 ("http handling done");
}
if (FD_ISSET(mtc_sock.netlink_sock, &mtc_sock.readfds))
{
dlog ("netlink socket fired\n");
mlog3 ("netlink socket fired");
messages_tally[MSGS_CNT_IDX_NETLINK]++ ;
if ( mtcInv.service_netlink_events ( mtc_sock.netlink_sock, mtc_sock.ioctl_sock ) != PASS )
{
elog ("service_netlink_events failed (rc:%d)\n", rc );
}
mlog3 ("netlink handling done");
}
if (FD_ISSET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds))
{
mlog3 ("events socket fired");
messages_tally[MSGS_CNT_IDX_EVENT]++ ;
if ( (rc = service_events ( &mtcInv, &mtc_sock )) != PASS )
{
elog ("service_events failed (rc:%d)\n", rc );
}
mlog3 ("events handling done");
}
if ( FD_ISSET(mtc_sock.mtc_agent_rx_socket->getFD(), &mtc_sock.readfds))
if ( mtc_sock.pxeboot_rx_socket && FD_ISSET(mtc_sock.pxeboot_rx_socket, &mtc_sock.readfds))
{
int cnt = 0 ;
/* Service up to MAX_RX_MSG_BATCH of messages at once */
mlog3 ("pxeboot network socket fired");
for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ )
{
rc = mtc_service_inbox ( &mtcInv, &mtc_sock , MGMNT_INTERFACE) ;
if ( rc > RETRY )
mlog3 ("... service inbox ; message %d", cnt+1);
rc = mtc_service_inbox ( &mtcInv, &mtc_sock , PXEBOOT_INTERFACE) ;
if ( rc == RETRY )
{
mlog2 ("mtc_service_inbox failed (rc:%d) (Mgmnt)\n", rc );
mlog3 ("... service inbox done");
break ;
}
if ( rc == RETRY )
messages_tally[MSGS_CNT_IDX_INBOX]++ ;
if ( rc > RETRY )
{
wlog ("mtc_service_inbox failed (rc:%d) (pxeboot)", rc );
break ;
}
else
{
mlog3 ("......more messages to service");
}
}
if ( cnt > 1 )
if ( cnt > (MAX_RX_MSG_BATCH/2) )
{
mlog2 ("serviced %d messages in one batch (Mgmnt)\n", cnt );
ilog ("serviced %d messages in one batch (pxeboot)", cnt );
}
mlog3 ("pxeboot network message handling done");
}
if ( FD_ISSET(mtc_sock.mtc_agent_mgmt_rx_socket->getFD(), &mtc_sock.readfds))
{
int cnt = 0 ;
/* Service up to MAX_RX_MSG_BATCH of messages at once */
mlog3 ("mgmt network socket fired");
for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ )
{
mlog3 ("... service inbox ; message %d", cnt+1);
rc = mtc_service_inbox ( &mtcInv, &mtc_sock , MGMNT_INTERFACE) ;
if ( rc == RETRY )
{
mlog3 ("... service inbox done");
break ;
}
messages_tally[MSGS_CNT_IDX_INBOX]++ ;
if ( rc > RETRY )
{
wlog ("mtc_service_inbox failed (rc:%d) (Mgmnt)", rc );
break ;
}
else
{
mlog3 ("......more messages to service");
}
}
if ( cnt > (MAX_RX_MSG_BATCH/2) )
{
ilog ("serviced %d messages in one batch (Mgmnt)", cnt );
}
mlog3 ("mgmt network message handling done");
}
if (( mtcInv.clstr_network_provisioned == true ) &&
@ -1677,24 +1889,38 @@ void daemon_service_run ( void )
{
int cnt = 0 ;
/* Service up to MAX_RX_MSG_BATCH of messages at once */
mlog3 ("clstr network socket fired");
for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ )
{
mlog3 ("... service inbox ; message %d", cnt+1);
rc = mtc_service_inbox ( &mtcInv, &mtc_sock, CLSTR_INTERFACE ) ;
if ( rc > RETRY )
if ( rc == RETRY )
{
mlog2 ("mtc_service_inbox failed (rc:%d) (Clstr)\n", rc );
mlog3 ("... service inbox done");
break ;
}
if ( rc == RETRY )
messages_tally[MSGS_CNT_IDX_INBOX]++ ;
if ( rc > RETRY )
{
mlog ("mtc_service_inbox failed (rc:%d) (Clstr)\n", rc );
break ;
}
else
{
mlog3 ("......more messages to service");
}
}
if ( cnt > 1 )
if ( cnt > (MAX_RX_MSG_BATCH/2) )
{
mlog2 ("serviced %d messages in one batch (Clstr)\n", cnt ); // ERIC dlog
ilog ("serviced %d messages in one batch (Clstr)", cnt );
}
mlog3 ("mgmt network message handling done");
}
if (FD_ISSET(mtcInv.inotify_shadow_file_fd, &mtc_sock.readfds))
{
mlog3 ("inotify socket fired");
messages_tally[MSGS_CNT_IDX_INOTIFY]++ ;
rc = get_inotify_events ( mtcInv.inotify_shadow_file_fd, (IN_MODIFY | IN_CREATE | IN_IGNORED) );
if ( rc )
{
@ -1715,6 +1941,7 @@ void daemon_service_run ( void )
wlog ("Reselecting on %s change (Select:%d)\n", SHADOW_FILE, mtcInv.inotify_shadow_file_fd );
}
}
mlog3 ("inotify event handling done");
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -91,6 +91,14 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
*/
nodeLinkClass::online_handler ( node_ptr );
/*
* Always run the mtcAlive handler.
*
* - monitor host's mtcAlive messaging
* - manage host's mtcAlive missing alarm
*/
nodeLinkClass::pxeboot_mtcAlive_monitor ( node_ptr );
if ( node_ptr->adminAction == MTC_ADMIN_ACTION__DELETE )
{
flog ("%s -> Delete Action\n", node_ptr->hostname.c_str());

View File

@ -130,6 +130,15 @@ void nodeLinkClass::timer_handler ( int sig, siginfo_t *si, void *uc)
return ;
}
/* Is this TID a online timer TID ? */
node_ptr = get_online_timer ( *tid_ptr );
if ( node_ptr )
{
mtcTimer_stop_int_safe ( node_ptr->online_timer );
node_ptr->online_timer.ring = true ;
return ;
}
/* Is this TID a mtcAlive timer TID ? */
node_ptr = get_mtcAlive_timer ( *tid_ptr );
if ( node_ptr )
@ -247,6 +256,14 @@ void nodeLinkClass::timer_handler ( int sig, siginfo_t *si, void *uc)
return ;
}
/* daemon main loop timer */
if ( *tid_ptr == mtcTimer_loop.tid )
{
mtcTimer_stop_int_safe ( mtcTimer_loop );
mtcTimer_loop.ring = true ;
return ;
}
/* is the http request timer ? */
node_ptr = get_http_timer ( *tid_ptr );
if ( node_ptr )
@ -1968,7 +1985,9 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->hostname.c_str());
node_ptr->reboot_cmd_ack_mgmnt = false ;
node_ptr->reboot_cmd_ack_clstr = false ;
node_ptr->reboot_cmd_ack_pxeboot = false ;
send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) ;
send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, PXEBOOT_INTERFACE ) ;
/* If the cluster-host network is provisioned then try
* and issue a reset over it to expedite the recovery
@ -2038,6 +2057,12 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
ilog ("%s backup bmc reset aborted due to management network reboot request ACK",
node_ptr->hostname.c_str());
}
else if ( node_ptr->reboot_cmd_ack_pxeboot )
{
reset_aborted = true ;
ilog ("%s backup bmc reset aborted due to pxeboot network reboot request ACK",
node_ptr->hostname.c_str());
}
else if ( node_ptr->reboot_cmd_ack_clstr )
{
reset_aborted = true ;
@ -3331,6 +3356,7 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->mtcAlive_count = 0 ;
node_ptr->mtcAlive_mgmnt = false ;
node_ptr->mtcAlive_clstr = false ;
node_ptr->mtcAlive_pxeboot = false ;
node_ptr->offline_log_throttle = 0 ;
node_ptr->offline_search_count = 0 ;
@ -3362,22 +3388,25 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
* are cleared. Need to also clear the
* offline_search_count here as well.
**/
if (( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_clstr ) && node_ptr->offline_search_count )
if (( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_clstr || node_ptr->mtcAlive_pxeboot ) && node_ptr->offline_search_count )
{
node_ptr->mtcAlive_online = true ;
ilog ("%s still seeing mtcAlive (%d) (Mgmt:%c:%d Clstr:%c:%d) ; restart offline_search_count=%d of %d\n",
ilog ("%s still seeing mtcAlive (%d) (Mgmt:%c:%d Clstr:%c:%d Pxeboot:%c:%d) ; restart offline_search_count=%d of %d\n",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_count,
node_ptr->mtcAlive_mgmnt ? 'Y' : 'n',
node_ptr->mtcAlive_mgmnt_count,
node_ptr->mtcAlive_clstr ? 'Y' : 'n',
node_ptr->mtcAlive_clstr_count,
node_ptr->mtcAlive_pxeboot ? 'Y' : 'n',
node_ptr->mtcAlive_pxeboot_count,
node_ptr->offline_search_count,
offline_threshold );
node_ptr->offline_search_count = 0 ; /* reset the count */
}
node_ptr->mtcAlive_mgmnt = false ;
node_ptr->mtcAlive_clstr = false ;
node_ptr->mtcAlive_pxeboot = false ;
/* Request a mtcAlive from host from Mgmnt and Clstr (if provisioned) */
send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, MGMNT_INTERFACE );
@ -3410,6 +3439,10 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
{
node_ptr->mtcAlive_online = false ;
// Clear all the mtcAlive_sequence numbers
for (int i = 0 ; i < MTCALIVE_INTERFACES_MAX ; i++)
node_ptr->mtcAlive_sequence[i] = 0;
plog ("%s going offline ; (threshold (%d msec * %d)\n",
node_ptr->hostname.c_str(),
offline_period,
@ -3532,23 +3565,23 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->mtcAlive_misses = 0 ;
/* Start mtcAlive message timer */
mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler, online_period );
mtcTimer_start ( node_ptr->online_timer, mtcTimer_handler, online_period );
node_ptr->onlineStage = MTC_ONLINE__WAITING ;
break ;
}
case MTC_ONLINE__RETRYING:
{
/* Start mtcAlive message timer */
mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler, online_period );
mtcTimer_start ( node_ptr->online_timer, mtcTimer_handler, online_period );
node_ptr->onlineStage = MTC_ONLINE__WAITING ;
break ;
}
case MTC_ONLINE__WAITING:
{
if ( node_ptr->mtcAlive_timer.ring == false )
if ( node_ptr->online_timer.ring == false )
break ;
alog ("%s mtcAlive [%s] [ misses:%d]\n",
alog2 ("%s mtcAlive [%s] [ misses:%d]\n",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_online ? "Yes" : "No",
node_ptr->mtcAlive_misses );
@ -3581,7 +3614,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
else
{
/* handle retries < MTC_OFFLINE_MISSES */
node_ptr->mtcAlive_timer.ring = false ;
node_ptr->online_timer.ring = false ;
node_ptr->onlineStage = MTC_ONLINE__RETRYING ;
break ;
}
@ -3629,7 +3662,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
}
/* Start over */
node_ptr->mtcAlive_timer.ring = false ;
node_ptr->online_timer.ring = false ;
node_ptr->onlineStage = MTC_ONLINE__START ;
break ;
}
@ -7523,7 +7556,148 @@ int nodeLinkClass::oos_test_handler ( struct nodeLinkClass::node * node_ptr )
return (PASS);
}
///////////////////////////////////////////////////////////////////////////////
//
// Name : pxeboot_mtcAlive_monitor
//
// Purpose : Monitor pxeboot network mtcAlive and manage associated alarm.
//
// Description: Monitor pxeboot mtcAlive messages.
// Request mtcAlive when not receiving mtcAlive messages.
// Debounce mtcAlive messaging and manage alarm accordingly.
//
// Parameters : nodeLinkClass::node struct pointer - node_ptr
//
// Returns : PASS
//
///////////////////////////////////////////////////////////////////////////////
#define PXEBOOT_MTCALIVE_MONITOR_RATE_SECS (10)
#define PXEBOOT_MTCALIVE_LOSS_THRESHOLD (6)
#define PXEBOOT_MTCALIVE_NOT_SEEN_LOG_THROTTLE (6)
#define PXEBOOT_MTCALIVE_LOSS_LOG_THROTTLE (6)
int nodeLinkClass::pxeboot_mtcAlive_monitor ( struct nodeLinkClass::node * node_ptr )
{
// ERIK: TODO: Comment out once verified
flog ("%s pxeboot mtcAlive fsm stage: %s",
node_ptr->hostname.c_str(),
get_mtcAliveStages_str(node_ptr->mtcAliveStage).c_str());
if ( !this->pxeboot_network_provisioned ) return PASS ;
switch (node_ptr->mtcAliveStage)
{
case MTC_MTCALIVE__START:
{
alog2 ("%s mtcAlive start", node_ptr->hostname.c_str());
mtcTimer_reset ( node_ptr->mtcAlive_timer );
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] = 0 ;
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__SEND);
break ;
}
case MTC_MTCALIVE__SEND:
{
/* pxeboot info refresh audit */
if ( node_ptr->hostname == my_hostname )
pxebootInfo_loader ();
alog2 ("%s mtcAlive send", node_ptr->hostname.c_str());
send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, PXEBOOT_INTERFACE );
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] = 0 ;
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE] = 0 ;
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__MONITOR);
break ;
}
case MTC_MTCALIVE__MONITOR:
{
alog2 ("%s mtcAlive monitor", node_ptr->hostname.c_str());
mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler,
PXEBOOT_MTCALIVE_MONITOR_RATE_SECS );
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__WAIT);
break ;
}
case MTC_MTCALIVE__WAIT:
{
if ( mtcTimer_expired ( node_ptr->mtcAlive_timer ) )
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__CHECK);
break ;
}
case MTC_MTCALIVE__CHECK:
{
if ( node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] > node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] )
{
// Typical success path
alog2 ("%s pxeboot mtcAlive received %d messages since last audit ; this:%d last:%d",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] - node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE]);
// Now that we received a message we can dec the missed count
if ( node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE] )
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE]-- ;
node_ptr->pxeboot_mtcAlive_not_seen_log_throttle = 0 ;
node_ptr->pxeboot_mtcAlive_loss_log_throttle = 0 ;
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__MONITOR);
}
else if ( node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] < node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] )
{
// unexpected case
wlog ("%s mtcAlive out-of-sequence ; this:%d last:%d",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE]);
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE]++ ;
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__START);
}
else if ( ++node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE] < PXEBOOT_MTCALIVE_LOSS_THRESHOLD )
{
// Missing pxeboot mtcAlive
alog ("%s pxeboot mtcAlive miss count %d ; sending request",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE]);
send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, PXEBOOT_INTERFACE );
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__MONITOR);
}
else if ( node_ptr->mtcAlive_pxeboot == true )
{
wlog_throttled (node_ptr->pxeboot_mtcAlive_loss_log_throttle,
PXEBOOT_MTCALIVE_LOSS_LOG_THROTTLE,
"%s pxeboot mtcAlive loss ; missed: %d ; last: count:%d seq: %d ; sending request",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_pxeboot_count,
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE]);
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__SEND);
}
else
{
ilog_throttled (node_ptr->pxeboot_mtcAlive_not_seen_log_throttle,
PXEBOOT_MTCALIVE_NOT_SEEN_LOG_THROTTLE,
"%s pxeboot mtcAlive not seen yet ; sending request",
node_ptr->hostname.c_str());
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__SEND);
}
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] = node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] ;
// TODO (emacdona): Need to handle loss case that manages raising the alarm
// Transition to MTC_MTCALIVE__FAIL
break ;
}
case MTC_MTCALIVE__FAIL:
{
wlog ("%s mtcAlive fail", node_ptr->hostname.c_str());
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__START);
break ;
}
default:
{
slog ("%s mtcAlive fsm default", node_ptr->hostname.c_str());
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__START);
break ;
}
}
return (PASS);
}
int local_counter = 0 ;

View File

@ -1,7 +1,7 @@
#ifndef __INCLUDE_MTCNODEMSG_HH__
#define __INCLUDE_MTCNODEMSG_HH__
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -53,7 +53,7 @@ using namespace std;
#define MTC_AGENT_RX_BUFF_SIZE (MAX_NODES*MAX_MSG)
#define MAX_RX_MSG_BATCH (20)
#define MAX_RX_MSG_BATCH (50)
/** Maintenance messaging socket control structure */
typedef struct
@ -63,25 +63,26 @@ typedef struct
/** UDP sockets used by the mtcAgent to transmit and receive
* maintenance commands to the client (compute) node and
* receive the compute node reply in the receive direction */
msgClassSock* mtc_agent_tx_socket ; /**< tx to mtc client mgmnt */
msgClassSock* mtc_agent_clstr_tx_socket; /**< tx to mtc client clstr */
msgClassSock* mtc_agent_rx_socket ; /**< rx from mtc client mgmnt */
msgClassSock* mtc_agent_clstr_rx_socket; /**< rx from mtc client clstr */
int mtc_agent_port ; /**< the agent rx port number */
msgClassSock* mtc_agent_mgmt_tx_socket ; /**< tx to mtc client mgmnt */
msgClassSock* mtc_agent_mgmt_rx_socket ; /**< rx from mtc client mgmnt */
msgClassSock* mtc_agent_clstr_tx_socket ; /**< tx to mtc client clstr */
msgClassSock* mtc_agent_clstr_rx_socket ; /**< rx from mtc client clstr */
int mtc_agent_port ; /**< the agent rx port number */
int mtc_rx_mgmnt_port ; /**< the agent rx port number */
struct sockaddr_in agent_addr; /**< socket attributes struct */
int mtc_agent_rx_socket_size ;
int mtc_agent_mgmt_rx_socket_size ;
int mtc_agent_clstr_rx_socket_size ;
/** UDP sockets used by the mtcClient to receive maintenance
* commands from and transmit replies to the mtcAgent */
msgClassSock* mtc_client_rx_socket ; /**< rx from controller */
msgClassSock* mtc_client_tx_socket ; /**< tx to controller mgmnt */
msgClassSock* mtc_client_tx_socket_c0_clstr ; /**< tx to controller-0 clstr i/f */
msgClassSock* mtc_client_tx_socket_c1_clstr ; /**< tx to controller-1 clstr i/f */
msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */
int mtc_mgmnt_cmd_port ; /**< mtc command port mgmnt i/f */
int mtc_clstr_cmd_port ; /**< mtc command port clstr i/f */
msgClassSock* mtc_client_mgmt_rx_socket ; /**< rx from controller mgmt */
msgClassSock* mtc_client_mgmt_tx_socket ; /**< tx to controller mgmnt */
msgClassSock* mtc_client_clstr_tx_socket_c0 ; /**< tx to controller-0 clstr */
msgClassSock* mtc_client_clstr_tx_socket_c1 ; /**< tx to controller-1 clstr */
msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */
int mtc_mgmnt_cmd_port ; /**< mtc command port mgmnt */
int mtc_clstr_cmd_port ; /**< mtc command port clstr */
struct sockaddr_in mtc_cmd_addr ; /**< socket attributes mgmnt */
/***************************************************************/
@ -106,6 +107,12 @@ typedef struct
struct timeval waitd ;
fd_set readfds;
/** IPV4 Pxeboot transmit and receive sockets and ports */
int pxeboot_tx_socket ;
int mtc_tx_pxeboot_port ;
int pxeboot_rx_socket ;
int mtc_rx_pxeboot_port ;
/** Active Monitor Socket */
int amon_socket ;
@ -115,7 +122,7 @@ typedef struct
int netlink_sock ; /* netlink socket */
int ioctl_sock ; /* general ioctl socket */
float msg_rate ;
} mtc_socket_type ;

View File

@ -16,11 +16,11 @@ inventory_port = 6385 ; The Inventory Port Number
keystone_port = 5000 ; The Keystone Port Number
ha_port = 7777 ; The Inventory Port Number
mtc_agent_port = 2101 ; OBS: ........ Active Controller Maintenance Rx Port
mtc_rx_mgmnt_port = 2101 ; Active Controller Maintenance Mgmnt Network Rx Port
mtc_rx_clstr_port = 2111 ; Active Controller Maintenance Clstr Network Rx Port
mtc_rx_mgmnt_port = 2101 ; mtcAgent management network msg receive port
mtc_rx_pxeboot_port = 2102 ; mtcAgent pxeboot network msg receive port
mtc_rx_clstr_port = 2111 ; mtcAgent cluster-host msg network receive port
hbs_agent_mgmnt_port = 2103 ; Management Interface Heartbeat Pulse Response Rx Port
hbs_agent_clstr_port = 2113 ; Cluster-host Interface Heartbeat Pulse Response Rx Port
clstr_agent_port = 2110 ; Agent Command Response RX Port
mtc_to_hbs_cmd_port = 2104 ; Mtc to Hbs Command Port Number
mtc_to_guest_cmd_port = 2108 ; Mtc to guestAgent Command port
hbs_to_mtc_event_port = 2107 ; Hbs to Mtc Event Port Number
@ -87,6 +87,7 @@ scheduling_priority = 45 ; realtime scheduling; range of 1 .. 99
mtc_rx_mgmnt_port = 2118 ; Client Maintenance Command Rx Port
mtc_rx_clstr_port = 2115 ; Client Maintenance Command Rx Port
mtc_rx_pxeboot_port = 2119 ; Client Maintenance pxeboot Command RX Port
hbs_client_mgmnt_port = 2106 ; Management Interface Heartbeat Pulse Request Rx Port
hbs_client_clstr_port = 2116 ; Cluster-host Interface Heartbeat Pulse Request Rx Port
hwmon_cmd_port = 2114 ; hwmond Command Rx Port Number