diff --git a/mtce-common/src/common/fitCodes.h b/mtce-common/src/common/fitCodes.h index 023f3483..91009e42 100644 --- a/mtce-common/src/common/fitCodes.h +++ b/mtce-common/src/common/fitCodes.h @@ -1,7 +1,7 @@ #ifndef __INCLUDE_FITCODES_H__ #define __INCLUDE_FITCODES_H__ /* - * Copyright (c) 2013, 2016 Wind River Systems, Inc. + * Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -43,6 +43,8 @@ #define MTC_CMD_FIT__NO_MGMNT_ACK ("/var/run/fit/no_mgmnt_ack") /* mtcClient */ #define MTC_CMD_FIT__NO_CLSTR_ACK ("/var/run/fit/no_clstr_ack") /* mtcClient */ #define MTC_CMD_FIT__NO_MTCALIVE ("/var/run/fit/no_mtcalive") /* mtcClient */ +#define MTC_CMD_FIT__PXEBOOT_RXSOCK ("/var/run/fit/pxeboot_rxsock") /* mtcClient */ +#define MTC_CMD_FIT__PXEBOOT_TXSOCK ("/var/run/fit/pxeboot_txsock") /* mtcClient */ #define MTC_CMD_FIT__MGMNT_RXSOCK ("/var/run/fit/mgmnt_rxsock") /* mtcClient */ #define MTC_CMD_FIT__MGMNT_TXSOCK ("/var/run/fit/mgmnt_txsock") /* mtcClient */ #define MTC_CMD_FIT__CLSTR_RXSOCK ("/var/run/fit/clstr_rxsock") /* mtcClient */ @@ -183,4 +185,7 @@ #define FIT_CODE__HWMON__SET_DB_GROUP_STATUS (177) #define FIT_CODE__HWMON__SET_DB_GROUP_STATE (178) + +#define TESTMASK__MSG__MTCALIVE_STRESS (0x00000001) + #endif /* __INCLUDE_FITCODES_H__ */ diff --git a/mtce-common/src/common/logMacros.h b/mtce-common/src/common/logMacros.h index 55f6d293..41de0aa7 100644 --- a/mtce-common/src/common/logMacros.h +++ b/mtce-common/src/common/logMacros.h @@ -48,6 +48,7 @@ typedef struct char* mgmnt_iface ; /**< management interface name pointer */ char* clstr_iface ; /**< cluster-host interface name pointer */ + char* pxeboot_iface ; /**< pxeboot interface name pointer */ char* multicast ; /**< Multicast address */ int ha_port ; /**< HA REST API Port Number */ int vim_cmd_port ; /**< Mtce -> VIM Command REST API Port */ @@ -75,6 +76,8 @@ typedef struct char* barbican_api_host ; /**< Barbican REST API host IP address */ int barbican_api_port ; /**< Barbican REST API port number */ + int mtc_tx_pxeboot_port ; /**< mtcAgent/Client pxeboot nwk tx port */ + int mtc_rx_pxeboot_port ; /**< mtcClient listens pxeboot nwk cmd reqs */ int mtc_rx_mgmnt_port ; /**< mtcClient listens mgmnt nwk cmd reqs */ int mtc_rx_clstr_port ; /**< mtcClient listens clstr nwk cmd reqs */ int mtc_tx_mgmnt_port ; /**< mtcClient sends mgmnt nwk cmds/resp's */ @@ -258,6 +261,22 @@ extern char *program_invocation_short_name; else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ } +/** mtcAlive alog logger macro with throttling */ +#define alog_throttled(cnt,max,format,args...) { \ + if ( daemon_get_cfg_ptr()->debug_alive ) \ + { \ + if ( ++cnt == 1 ) \ + { \ + if (ltc()) { printf ("%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Alive: " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \ + } \ + if ( cnt >= max ) \ + { \ + cnt = 0 ; \ + } \ + } \ +} + /** Error logger macro with throttling */ #define elog_throttled(cnt,max,format,args...) { \ if ( ++cnt == 1 ) \ @@ -389,37 +408,37 @@ extern char *program_invocation_short_name; #define plog(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Info : " format, getpid(), lc(), _hn(), _pn, "|-|", __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define mlog(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&1 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define mlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&2 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg2 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define mlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&4 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg4 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define mlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&8 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg8 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define mlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&2 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg1 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define mlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&4 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg2 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define mlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&8 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg3 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define jlog(format, args...) { if(daemon_get_cfg_ptr()->debug_json&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define jlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_json&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define jlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_json&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define jlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_json&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define jlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_json&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define jlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_json&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define jlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_json&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define hlog(format, args...) { if(daemon_get_cfg_ptr()->debug_http&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define hlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_http&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define hlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_http&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define hlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_http&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define hlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_http&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define hlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_http&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define hlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_http&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define alog(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define alog1(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define alog2(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define alog3(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define alog1(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define alog2(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define alog3(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define qlog(format, args...) { if(daemon_get_cfg_ptr()->debug_work&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define qlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_work&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define qlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_work&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define qlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_work&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define qlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_work&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define qlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_work&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define qlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_work&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define flog(format, args...) { if(daemon_get_cfg_ptr()->debug_fsm) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: FSM : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define tlog(format, args...) { if(daemon_get_cfg_ptr()->debug_timer) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Timer: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define clog(format, args...) { if(daemon_get_cfg_ptr()->debug_state&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Change: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define clog1(format, args...) { if(daemon_get_cfg_ptr()->debug_state&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define clog2(format, args...) { if(daemon_get_cfg_ptr()->debug_state&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } -#define clog3(format, args...) { if(daemon_get_cfg_ptr()->debug_state&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define clog1(format, args...) { if(daemon_get_cfg_ptr()->debug_state&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define clog2(format, args...) { if(daemon_get_cfg_ptr()->debug_state&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } +#define clog3(format, args...) { if(daemon_get_cfg_ptr()->debug_state&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } #define log_event(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Event: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } diff --git a/mtce-common/src/common/nodeBase.cpp b/mtce-common/src/common/nodeBase.cpp index 750515b3..2204a387 100755 --- a/mtce-common/src/common/nodeBase.cpp +++ b/mtce-common/src/common/nodeBase.cpp @@ -257,49 +257,17 @@ void print_mtc_message ( string hostname, const char * iface, bool force ) { - /* Handle raw json string messages differently. - * Those messages just have a json string that starts at the header */ - if ( msg.hdr[0] == '{' ) - { - if ( force ) - { - ilog ("%s %s (%s network) - %s\n", - hostname.c_str(), - direction ? "rx <-" : "tx ->" , - iface, - msg.hdr); - } - else if (( daemon_get_cfg_ptr()->debug_alive&1) && ( msg.cmd == MTC_MSG_MTCALIVE )) - { - alog ("%s %s (%s network) - %s\n", - hostname.c_str(), - direction ? "rx <-" : "tx ->" , - iface, - msg.hdr); - } - else - { - mlog1 ("%s %s (%s network) - %s\n", - hostname.c_str(), - direction ? "rx <-" : "tx ->" , - iface, - msg.hdr); - } - return ; - } - string str = "" ; if ( msg.buf[0] ) str = msg.buf ; if ( force ) { - ilog ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n", + ilog ("%s%s %s %s %s network: %x:%x:%x.%x.%x.%x [%s] %s\n", hostname.c_str(), - direction ? "rx <-" : "tx ->" , + direction ? "" : " tx" , get_mtcNodeCommand_str (msg.cmd), + direction ? "from" : "to" , iface, - msg.ver, - msg.rev, msg.cmd, msg.num, msg.parm[0], @@ -309,15 +277,31 @@ void print_mtc_message ( string hostname, msg.hdr, str.c_str()); } + else if ( msg.cmd == MTC_MSG_MTCALIVE || msg.cmd == MTC_REQ_MTCALIVE ) + { + alog ("%s%s %s %s %s network: [%x:%x:%x:%x:%x:%x:%s] %s", + hostname.c_str(), + direction ? "" : " tx" , + get_mtcNodeCommand_str (msg.cmd), + direction ? "from" : "to" , + iface, + msg.cmd, + msg.num, + msg.parm[0], + msg.parm[1], + msg.parm[2], + msg.parm[3], + msg.hdr, + str.c_str()); + } else { - mlog1 ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n", - hostname.c_str(), - direction ? "rx <-" : "tx ->" , + mlog1 ("%s%s %s %s %s network: %x:%x:%x.%x.%x.%x [%s] %s", + hostname.c_str(), + direction ? "" : " tx" , get_mtcNodeCommand_str (msg.cmd), + direction ? "from" : "to" , iface, - msg.ver, - msg.rev, msg.cmd, msg.num, msg.parm[0], @@ -344,6 +328,8 @@ static std::string configStages_str [MTC_CONFIG__STAGES +1] ; static std::string addStages_str [MTC_ADD__STAGES +1] ; static std::string delStages_str [MTC_DEL__STAGES +1] ; static std::string subStages_str [MTC_SUBSTAGE__STAGES +1] ; +static std::string mtcAliveStages_str [MTC_MTCALIVE__STAGES +1] ; + void mtc_stages_init ( void ) { @@ -377,7 +363,7 @@ void mtc_stages_init ( void ) enableStages_str [MTC_ENABLE__FAILURE ] = "Failure"; enableStages_str [MTC_ENABLE__FAILURE_WAIT ] = "Failure-Wait"; enableStages_str [MTC_ENABLE__FAILURE_SWACT_WAIT ] = "Failure-Swact-Wait"; - enableStages_str [MTC_ENABLE__STAGES ] = "unknown" ; + enableStages_str [MTC_ENABLE__STAGES ] = "Enable-Unknown" ; recoveryStages_str[MTC_RECOVERY__START ] = "Handler-Start"; recoveryStages_str[MTC_RECOVERY__RETRY_WAIT ] = "Req-Retry-Wait"; @@ -402,7 +388,7 @@ void mtc_stages_init ( void ) recoveryStages_str[MTC_RECOVERY__FAILURE ] = "Failure"; recoveryStages_str[MTC_RECOVERY__WORKQUEUE_WAIT ] = "WorkQ-Wait"; recoveryStages_str[MTC_RECOVERY__ENABLE ] = "Enable"; - recoveryStages_str[MTC_RECOVERY__STAGES ] = "unknown"; + recoveryStages_str[MTC_RECOVERY__STAGES ] = "Recovery-Unknown"; disableStages_str [MTC_DISABLE__START ] = "Disable-Start"; disableStages_str [MTC_DISABLE__HANDLE_POWERON_SEND ] = "Disable-PowerOn-Send"; @@ -416,7 +402,7 @@ void mtc_stages_init ( void ) disableStages_str [MTC_DISABLE__TASK_STATE_UPDATE ] = "Disable-States-Update"; disableStages_str [MTC_DISABLE__WORKQUEUE_WAIT ] = "Disable-WorkQ-Wait"; disableStages_str [MTC_DISABLE__DISABLED ] = "Host-Disabled"; - disableStages_str [MTC_DISABLE__STAGES ] = "Unknown"; + disableStages_str [MTC_DISABLE__STAGES ] = "Disable-Unknown"; powerStages_str [MTC_POWERON__START ] = "Power-On-Start"; powerStages_str [MTC_POWERON__POWER_STATUS_WAIT ] = "Power-On-Status"; @@ -445,17 +431,16 @@ void mtc_stages_init ( void ) powercycleStages_str [MTC_POWERCYCLE__POWEROFF_WAIT ] = "Power-Cycle-Off-Wait"; powercycleStages_str [MTC_POWERCYCLE__POWERON ] = "Power-Cycle-On"; powercycleStages_str [MTC_POWERCYCLE__POWERON_REQWAIT] = "Power-Cycle-On-Req-Wait"; - powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY] = "Power-Cycle-On-Verify"; + powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY ] = "Power-Cycle-On-Verify"; powercycleStages_str [MTC_POWERCYCLE__POWERON_WAIT ] = "Power-Cycle-On-Wait"; powercycleStages_str [MTC_POWERCYCLE__DONE ] = "Power-Cycle-Done"; powercycleStages_str [MTC_POWERCYCLE__FAIL ] = "Power-Cycle-Fail"; powercycleStages_str [MTC_POWERCYCLE__HOLDOFF ] = "Power-Cycle-Hold-Off"; powercycleStages_str [MTC_POWERCYCLE__COOLOFF ] = "Power-Cycle-Cool-Off"; - powercycleStages_str [MTC_POWERCYCLE__POWEROFF_CMND_WAIT] = "Power-Cycle-Off-Cmnd-Wait"; powercycleStages_str [MTC_POWERCYCLE__POWERON_CMND_WAIT] = "Power-Cycle-On-Cmnd-Wait"; powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY_WAIT]= "Power-Cycle-On-Verify-Wait"; - + powercycleStages_str [MTC_POWERCYCLE__STAGES ] = "Power-Cycle-Unknown"; resetStages_str [MTC_RESET__START ] = "Reset-Start"; resetStages_str [MTC_RESET__REQ_SEND ] = "Reset-Req-Send"; @@ -529,6 +514,7 @@ void mtc_stages_init ( void ) delStages_str [MTC_DEL__START ] = "Del-Start"; delStages_str [MTC_DEL__WAIT ] = "Del-Wait"; delStages_str [MTC_DEL__DONE ] = "Del-Done"; + delStages_str [MTC_DEL__STAGES ] = "Del-Unknown"; subStages_str [MTC_SUBSTAGE__START ] = "subStage-Start"; subStages_str [MTC_SUBSTAGE__SEND ] = "subStage-Send"; @@ -536,6 +522,15 @@ void mtc_stages_init ( void ) subStages_str [MTC_SUBSTAGE__WAIT ] = "subStage-Wait"; subStages_str [MTC_SUBSTAGE__DONE ] = "subStage-Done"; subStages_str [MTC_SUBSTAGE__FAIL ] = "subStage-Fail"; + subStages_str [MTC_SUBSTAGE__STAGES ] = "subStage-Unknown"; + + mtcAliveStages_str[MTC_MTCALIVE__START ] = "mtcAlive-Start"; + mtcAliveStages_str[MTC_MTCALIVE__MONITOR ] = "mtcAlive-Monitor"; + mtcAliveStages_str[MTC_MTCALIVE__WAIT ] = "mtcAlive-Wait"; + mtcAliveStages_str[MTC_MTCALIVE__CHECK ] = "mtcAlive-Check"; + mtcAliveStages_str[MTC_MTCALIVE__SEND ] = "mtcAlive-Send"; + mtcAliveStages_str[MTC_MTCALIVE__FAIL ] = "mtcAlive-Fail"; + mtcAliveStages_str[MTC_MTCALIVE__STAGES ] = "mtcAlive-Unknown"; } string get_delStages_str ( mtc_delStages_enum stage ) @@ -666,6 +661,15 @@ string get_subStages_str ( mtc_subStages_enum stage ) return (subStages_str[stage]); } +string get_mtcAliveStages_str ( mtc_mtcAliveStages_enum stage ) +{ + if ( stage >= MTC_MTCALIVE__STAGES ) + { + return (mtcAliveStages_str[MTC_MTCALIVE__STAGES]); + } + return (mtcAliveStages_str[stage]); +} + void log_adminAction ( string hostname, mtc_nodeAdminAction_enum currAction, mtc_nodeAdminAction_enum newAction ) diff --git a/mtce-common/src/common/nodeBase.h b/mtce-common/src/common/nodeBase.h index 38a188d7..5a14eeb9 100755 --- a/mtce-common/src/common/nodeBase.h +++ b/mtce-common/src/common/nodeBase.h @@ -67,10 +67,11 @@ void daemon_exit ( void ); #define FAIL_BM_PASSWORD (122*256) #define MTC_PARM_LOCK_PERSIST_IDX (0) // node lock command -#define MTC_PARM_UPTIME_IDX (0) -#define MTC_PARM_HEALTH_IDX (1) -#define MTC_PARM_FLAGS_IDX (2) -#define MTC_PARM_MAX_IDX (3) +#define MTC_PARM_UPTIME_IDX (0) // mtcAlive message +#define MTC_PARM_HEALTH_IDX (1) // mtcAlive message +#define MTC_PARM_FLAGS_IDX (2) // mtcAlive message +#define MTC_PARM_SEQ_IDX (3) // mtcAlive message +#define MTC_PARM_MAX_IDX (4) // mtcAlive message /** 'I Am ' flags for maintenance. * @@ -111,6 +112,8 @@ void daemon_exit ( void ); #define SMGMT_UNHEALTHY_FILE ((const char *)"/var/run/.sm_node_unhealthy") #define UNLOCK_READY_FILE ((const char *)"/etc/platform/.unlock_ready") #define STILL_SIMPLEX_FILE ((const char *)"/etc/platform/simplex") +#define FIRST_CONTROLLER_FILE ((const char *)"/etc/platform/.first_controller") +#define INIT_CONFIG_COMPLETE ((const char *)"/etc/platform/.initial_config_complete") /** path to and module init file name */ #define MTCE_CONF_FILE ((const char *)"/etc/mtc.conf") @@ -153,6 +156,8 @@ void daemon_exit ( void ); #define PMON_CONF_FILE_DIR ((const char *)"/etc/pmon.d") #define BM_DNSMASQ_FILENAME ((const char *)"dnsmasq.bmc_hosts") +#define OPT_PLATFORM_CONFIG_DIR ((const char *)"/opt/platform/config") +#define DNSMASQ_HOSTS_FILE ((const char *)"dnsmasq.hosts") /* supported BMC communication protocols ; access method */ typedef enum @@ -415,6 +420,7 @@ typedef enum #define CONTROLLER_1 ((const char *)"controller-1") #define CONTROLLER_2 ((const char *)"controller-2") #define CONTROLLER ((const char *)"controller") +#define CONTROLLERS (2) #define STORAGE_0 ((const char *)"storage-0") #define STORAGE_1 ((const char *)"storage-1") @@ -461,7 +467,8 @@ typedef enum /** Interface Codes **/ #define MGMNT_INTERFACE (0) #define CLSTR_INTERFACE (1) - +#define PXEBOOT_INTERFACE (2) +#define MTCALIVE_INTERFACES_MAX (3) /** Maintenance Inventory struct */ typedef struct @@ -1205,6 +1212,19 @@ typedef enum /** Return the string representing the specified 'sensor' stage */ string get_sensorStages_str ( mtc_sensorStages_enum stage ); +typedef enum +{ + MTC_MTCALIVE__START = 0, + MTC_MTCALIVE__MONITOR, + MTC_MTCALIVE__WAIT, + MTC_MTCALIVE__CHECK, + MTC_MTCALIVE__SEND, + MTC_MTCALIVE__FAIL, + MTC_MTCALIVE__STAGES +} mtc_mtcAliveStages_enum ; + +string get_mtcAliveStages_str ( mtc_mtcAliveStages_enum stage ); + typedef enum { MTC_OFFLINE__IDLE = 0, diff --git a/mtce-common/src/common/nodeUtil.cpp b/mtce-common/src/common/nodeUtil.cpp index 566a99dd..86681213 100755 --- a/mtce-common/src/common/nodeUtil.cpp +++ b/mtce-common/src/common/nodeUtil.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2013-2017 Wind River Systems, Inc. +* Copyright (c) 2013-2017, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -1350,24 +1351,582 @@ int get_pid_by_name_proc ( string procname ) } - -const char mgmnt_iface_str[] = { "Mgmnt" } ; -const char clstr_iface_str[] = { "Clstr" } ; -const char null_iface_str[] = { "Null" } ; +const char pxeboot_iface_str[] = { "Pxeboot" } ; +const char mgmnt_iface_str[] = { "Mgmnt" } ; +const char clstr_iface_str[] = { "Clstr" } ; +const char null_iface_str[] = { "Null" } ; const char * get_iface_name_str ( int iface ) { switch ( iface ) { case MGMNT_IFACE: - return mgmnt_iface_str; + return mgmnt_iface_str; case CLSTR_IFACE: return clstr_iface_str; + case PXEBOOT_INTERFACE: + return pxeboot_iface_str; default: return null_iface_str ; } } +/********************************************************************** + * Name : get_interface_name_str + * + * Purpose : get mtcAgent/Client interface name strings + * + * Return : pointer to the interface name string + **********************************************************************/ +const char * get_interface_name_str ( int iface ) +{ + switch ( iface ) + { + case MGMNT_INTERFACE: + return mgmnt_iface_str; + case CLSTR_INTERFACE: + return clstr_iface_str; + case PXEBOOT_INTERFACE: + return pxeboot_iface_str; + default: + return null_iface_str ; + } +} + +/********************************************************************** + * Name : get_iface_type_str + * + * Purpose : get interface type string + * + * Return : pointer to the interface type string + **********************************************************************/ +const char ethernet_iface_type_str[] = { "ethernet" }; +const char vlan_iface_type_str[] = { "vlan" }; +const char bond_iface_type_str[] = { "bond" }; +const char unknown_iface_type_str[] = { "unknown" }; + +const char * get_iface_type_str ( iface_type_enum type_enum ) +{ + switch ( type_enum ) + { + case ethernet: return ethernet_iface_type_str; + case vlan: return vlan_iface_type_str; + case bond: return bond_iface_type_str; + } + return unknown_iface_type_str; +} + +/******************************************************************** + * Name : get_iface_type + * + * Purpose : Fetch the specified interface's type as + * physical ethernet, vlan or bond. + * + * Description: This function opens the uevents file in /sys/class/net + * for the specified interface and uses DEVTYPE, in that + * info, to determine the specified interface type. + * A missing DEVTYPE label implies that its a standard + * physical 'ethernet' interface type. + * + * Example: + * + * sysadmin@controller-0:~$ cat /sys/class/net/vlan163/uevent + * DEVTYPE=vlan + * INTERFACE=vlan163 + * IFINDEX=41 + * + * Updates: iface_type_enum (ethernet, vlan or bond) on PASS + * + * Returns: PASS or FAIL_OPERATION + * ******************************************************************/ +int get_iface_type ( string iface, + iface_type_enum & iface_type ) +{ + int rc = PASS ; + + /* determine the interface type though uevent */ + string uevent_iface_file = INTERFACES_DIR + iface + "/uevent"; + ifstream _uevent( uevent_iface_file.data() ); + if ( _uevent ) + { + string line; + while( getline( _uevent, line ) ) + { + if ( line.find ("DEVTYPE") == 0 ) + { + if ( line.find ("=vlan") != string::npos ) + iface_type = vlan; + else if ( line.find ("=bond") != string::npos ) + iface_type = bond; + else + iface_type = ethernet ; + break; + } + } + } + else + { + wlog ("Failed to find file: %s", uevent_iface_file.c_str()); + rc = FAIL_FILE_OPEN ; + } + return (rc); +} + +/***************************************************************************** + * Name : get_iface_parent + * + * Purpose : Gets the ifname of the linked parent interface + * + * Updates : parent interface name. + * + * Returns : Returns PASS, FAIL_FILE_OPEN or FAIL_NOT_FOUND + ****************************************************************************/ +int get_iface_parent ( int network, string & ifname, string & parent ) +{ + int rc = PASS ; + + /* build the full file path */ + string iflink_file = INTERFACES_DIR + ifname + "/iflink"; + + /* declare a file stream based on the full file path */ + ifstream iflink_file_stream ( iflink_file.c_str() ); + + /* open the file stream */ + if (iflink_file_stream.is_open()) + { + int iflink = -1; + string iflink_line; + char * dummy_ptr ; + char iface_buffer [IF_NAMESIZE] = ""; + + /* start clean */ + MEMSET_ZERO (iface_buffer[0]); + + while ( getline (iflink_file_stream, iflink_line) ) + { + iflink = strtol(iflink_line.c_str(), &dummy_ptr, 10); + } + iflink_file_stream.close(); + + /* + * load iface_buffer with the name of the network interface + * corresponding to iflink. + */ + if_indextoname (iflink, iface_buffer); + + if (iface_buffer[0] != '\0') + { + parent = iface_buffer; + dlog ("%s network interface name: %s", + get_interface_name_str(network), + parent.c_str()); + } + else + { + wlog ("%s network parent interface not found for ifname:%s", + get_interface_name_str(network), ifname.c_str() ); + rc = FAIL_NOT_FOUND ; + } + } + else + { + wlog ("failed to open %s", iflink_file.c_str()); + rc = FAIL_FILE_OPEN ; + } + return rc ; +} + +/******************************************************************** + * Name : get_bond_mode + * + * Purpose : Get the mode of a Linux bonding interface. + * + * Description: Returns the data in /sys/class/net/bonding/mode + * as update to 'bond_mode' string reference argument. + * + * Example : $ cat /sys/class/net/pxeboot0/bonding/mode + * 802.3ad 4 + * + * Updates : bond_mode + * + * Returns : PASS or FAIL_FILE_OPEN if no bonding/mode file is found. + * + * ******************************************************************/ +int get_bond_mode ( int network, + string bond_name, + string & bond_mode) +{ + int rc = PASS ; + string bond_mode_file = INTERFACES_DIR + bond_name + "/bonding/mode"; + + ifstream bond_mode_data ( bond_mode_file.data() ); + if (!bond_mode_data) + { + wlog ("Failed to find bonding mode file: %s", + bond_mode_file.c_str()); + rc = FAIL_FILE_OPEN ; + } + else + { + getline ( bond_mode_data, bond_mode ); + if ( ! bond_mode.empty() ) + { + ilog ("%s network %s mode: %s", + get_interface_name_str(network), + bond_name.c_str(), + bond_mode.c_str()); + } + } + return rc ; +} + +/********************************************************************* + * Name : get_bond_slaves + * + * Purpose : Get a bonded interface slave names. + * + * Description: Returns the data in /sys/class/net/bonding/slaves + * as updates to reference arguments. + * + * Updates : slave1 and slave2 + * + * Returns : PASS or FAIL_FILE_OPEN if no slaves file is found. + * + *********************************************************************/ +int get_bond_slaves ( int network, + string bond_name, + string & slave1, + string & slave2 ) +{ + int rc = 0 ; + string bonded_interface_file = INTERFACES_DIR + \ + bond_name + \ + "/bonding/slaves"; + ifstream slaves(bonded_interface_file.data()); + if (!slaves) + { + wlog ("failed to open file: %s", bonded_interface_file.c_str()); + rc = FAIL_FILE_OPEN ; + } + else + { + char *token ; + string bond_slaves ; + getline ( slaves, bond_slaves ); + if ( ! bond_slaves.empty() ) + { + dlog ("%s network %s slaves: %s", + get_interface_name_str(network), + bond_name.c_str(), + bond_slaves.c_str()); + + token = strtok((char *)bond_slaves.data(), " "); + if ( token != NULL ) + slave1 = token ; + token = strtok(NULL, " "); + if ( token != NULL ) + slave2 = token ; + } + } + return rc ; +} + +/***************************************************************************** + * Name : get_iface_info + * + * Purpose : Update the iface_info with interface type details and heirarchy. + * + * Description: Lookup the interface type, bond, vlan or physical ethernet. + * Then for each case add interface info and create a 'chain' + * string that represents the heirarchy. + * + * - ethernet - enp0s8 + * - vlan - vlan16 -> enp0s8 + * - bond - pxeboot0 -> enp0s8 and enp0s9 + * - bonded vlan - vlan16 -> pxeboot0 -> enp0s8 and enp0s9 + * + * Updates : iface_info with learned interface type, parent, bond mode + * and slaves + * Returns : Returns PASS, FAIL_FILE_OPEN, FAIL_NOT_FOUND, FAIL_INVALID_DATA + * + *****************************************************************************/ +int get_iface_info ( int network, string iface, iface_info_type & iface_info ) +{ + const char * network_str_ptr = get_interface_name_str (network) ; + + iface_info.iface_name = iface ; + iface_info.iface_type = ethernet; + iface_info.chain = "" ; + + int rc = get_iface_type ( iface_info.iface_name, iface_info.iface_type ); + if ( rc ) + { + wlog ("failed to get interface type from iface: %s", iface.c_str()); + return rc ; + } + switch ( iface_info.iface_type ) + { + case ethernet: + { + iface_info.parent = iface_info.iface_name ; + ilog ("%s network %s parent: %s", network_str_ptr, iface_info.iface_name.c_str(), iface_info.parent.c_str()); + iface_info.chain.append (iface_info.parent); + break ; + } + case vlan: + { + if (( rc = get_iface_parent (MGMNT_INTERFACE, iface_info.iface_name, iface_info.parent )) == PASS ) + { + ilog ("%s network %s parent: %s", network_str_ptr, iface_info.iface_name.c_str(), iface_info.parent.c_str()); + if (( rc = get_iface_type ( iface_info.parent, iface_info.iface_type )) == PASS ) + { + if ( iface_info.iface_type == bond ) + { + get_bond_mode ( network, iface_info.parent, iface_info.bond_mode); + iface_info.chain.append( iface_info.iface_name + " -> " + iface_info.parent + " (" + iface_info.bond_mode + ")"); + if (( rc = get_bond_slaves ( MGMNT_INTERFACE, iface_info.parent, iface_info.slave1, iface_info.slave2 )) == PASS ) + { + iface_info.chain.append(" -> " + iface_info.slave1 + " and " + iface_info.slave2); + ilog ("%s network %s slaves: %s and %s", + network_str_ptr, iface_info.parent.c_str(), + iface_info.slave1.c_str(), iface_info.slave2.c_str()); + } + else + { + wlog ("failed to get slaves from bond: %s ; rc:%d", iface_info.parent.c_str(), rc); + rc = FAIL_NOT_FOUND ; + } + } + else + { + wlog ("%s network iface: %s", network_str_ptr, iface_info.iface_name.c_str()); + iface_info.chain.append( iface_info.iface_name + " -> " + iface_info.parent); + } + } + else + { + wlog ("failed to get %s network interface type from iface: %s ; rc:%d", + network_str_ptr, iface_info.parent.c_str(), rc); + rc = FAIL_NOT_FOUND ; + } + } + else + { + wlog ("failed to get parent interface from %s ; rc:%d", iface_info.iface_name.c_str(), rc ); + } + break ; + } + case bond: + { + iface_info.parent = iface_info.iface_name ; + ilog ("%s network %s", network_str_ptr, iface_info.iface_name.c_str()); + get_bond_mode (network, iface_info.parent, iface_info.bond_mode); + + iface_info.chain.append(iface_info.parent + " (" + iface_info.bond_mode + ")"); + if (( rc = get_bond_slaves ( network, iface_info.parent, iface_info.slave1, iface_info.slave2 )) == PASS ) + { + iface_info.chain.append(" -> " + iface_info.slave1 + " and " + iface_info.slave2); + ilog ("%s network %s slaves: %s and %s", + network_str_ptr, iface_info.parent.c_str(), + iface_info.slave1.c_str(), iface_info.slave2.c_str()); + } + else + { + wlog ("failed to get slaves from bond: %s ; rc:%d", iface_info.iface_name.c_str(), rc); + rc = FAIL_NOT_FOUND ; + } + break ; + } + default: + { + wlog ("failed: unknown interface type: %d", iface_info.iface_type); + rc = FAIL_INVALID_DATA ; + break ; + } + } + if ( !iface_info.chain.empty() ) + { + ilog ("Interface Chain: %s", iface_info.chain.c_str()); + } + return rc ; +} + +/***************************************************************************** + * Name : get_pxeboot_dhcp_addr + * + * Purpose : get the pxeboot address from dhcp leases file. + * + * Description: Worker and storage nodes DHCP for their pxeboot IP address. + * + * Therefore, the pxeboot address for non-controller nodes is taken from + * the 'fixed-address' label of the last tuple of the management interface's + * /var/lib/dhcp leases file. + * + * Assumptions: If this lookup is for the pxeboot interface then the caller + * is expected to suffix the interface name with a ":2" + * + * Example: + * + * sysadmin@worker-0:~$ cat /var/lib/dhcp/dhclient.enp0s3:2.leases + * lease { + * interface "enp0s3:2"; + * fixed-address 169.254.202.159; <-- non-controller pxeboot address + * option subnet-mask 255.255.255.0; + * + * Returns: a string containing the unit's pxeboot address + ******************************************************************************/ +string get_pxeboot_dhcp_addr ( string iface ) +{ + // Struct to hold the items extracted from the lease. + // ... currently only the fixed-address is needed. + struct Lease { string address; }; + #define DHCP_LEASES_DIR ((const char *) "/var/lib/dhcp") + string pxeboot_address = "" ; // return value + + mlog ("learning pxeboot address ..."); + + Lease last_lease; // defaults to null info + string lease_filename = "" ; + DIR* dhcp_dir = opendir(DHCP_LEASES_DIR); + if ( dhcp_dir != NULL) + { + struct dirent* entry; + while ((entry = readdir(dhcp_dir)) != nullptr) + { + string _filename = entry->d_name; + // Check if the entry contains the interface name + if (_filename.find(iface) != string::npos) + lease_filename = _filename ; + } + closedir(dhcp_dir); + } + else + { + ilog ( "no dhcp leases"); + return pxeboot_address ; // is null + } + + string full_path = DHCP_LEASES_DIR; + full_path.append("/"); + full_path.append(lease_filename); + + if ( lease_filename.empty() ) + { + ilog ("dhcp lease file %s/%s not found", DHCP_LEASES_DIR, iface.c_str()); + return pxeboot_address ; // is null + } + + mlog ("pxeboot dhcp lease file: %s", full_path.c_str()); + + ifstream lease_file(full_path); + if (lease_file.is_open()) + { + string line; + + // Iterate through the file line by line + while (getline(lease_file, line)) + { + // search for new 'lease' entries + if (line.find("lease {") != string::npos) + { + // point to the new lease + last_lease = Lease(); + } + + // If 'fixed-address' is found, update the last_lease + if (line.find("fixed-address") != string::npos) + { + istringstream leaseStream(line); + string token; + leaseStream >> token; // ignore "fixed-address" label + leaseStream >> last_lease.address; // just want the address + + // If there is a ';' at the end of the line, remove it. + if (!last_lease.address.empty() && last_lease.address.back() == ';') + last_lease.address.pop_back(); + } + } + // The 'last_lease' should now contain this host's pxeboot address. + // Close the file and return the lease struct. + lease_file.close(); + } + else + { + wlog ("unable to open dhcp lease file: %s", full_path.c_str()); + } + pxeboot_address = last_lease.address ; + return (pxeboot_address); +} + +/***************************************************************************** + * Name : get_pxeboot_static_addr + * + * Purpose : Get pxeboot address from pxeboot network interface config file. + * + * Description: The controller nodes pxeboot addresses are static. + * Therefore, the pxeboot address for a controller node is + * taken from the 'address' label inside the pxeboot network + * interface file. + * + * Assumptions: If this lookup is for the pxeboot interface then the caller + * is expected to suffix the interface name with a ":2" + * + + * Example: + * + * sysadmin@controller-1:/etc/network/interfaces.d$ cat ifcfg-enp0s8:2 + * iface enp0s8:2 inet static + * address 169.254.202.3 <-- controller pxeboot address + * netmask 255.255.255.0 + * + * Returns: a string containing the host's pxeboot address + ****************************************************************************/ +string get_pxeboot_static_addr ( string iface ) +{ + string pxeboot_address = "" ; // return value + string interface_file = NETWORK_INTERFACES_DIR ; + interface_file.append("/ifcfg-"); + interface_file.append(iface); + + if ( daemon_is_file_present (interface_file.data())) + { + ifstream iface_file(interface_file); + if (iface_file.is_open()) + { + string line; + + // Iterate through the file line by line ... + while (getline(iface_file, line)) + { + // search for new 'address' entry where + // address is the first word of the line. + size_t position = line.find("address"); + if ( position == 0 ) + { + istringstream fileStream(line); + string token; + fileStream >> token; // ignore "address" label + fileStream >> pxeboot_address; // just want the address + ilog ("found pxeboot address in %s", interface_file.c_str()); + } + } + // close the file and return the pxeboot address. + iface_file.close(); + } + else + { + wlog ("unable to open %s file for interface:%s", + interface_file.c_str(), + iface.c_str()); + } + } + else + { + // This is normal for a controller before it is unlocked. + ilog ("no %s file present", interface_file.c_str() ); + } + return (pxeboot_address); +} string get_event_str ( int event_code ) { @@ -1534,7 +2093,7 @@ int send_log_message ( msgSock_type * sock_ptr, } else { - mlog2 ("%s:%s\n%s", &log.hostname[0], &log.filename[0], log_str ); + mlog1 ("%s:%s\n%s", &log.hostname[0], &log.filename[0], log_str ); } return rc ; } diff --git a/mtce-common/src/common/nodeUtil.h b/mtce-common/src/common/nodeUtil.h index c56f3796..938319e1 100755 --- a/mtce-common/src/common/nodeUtil.h +++ b/mtce-common/src/common/nodeUtil.h @@ -2,7 +2,7 @@ #define __INCLUDE_NODEUTIL_H__ /* -* Copyright (c) 2013-2014, 2016, 2019 Wind River Systems, Inc. +* Copyright (c) 2013-2014, 2016, 2019, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -28,6 +28,8 @@ using namespace std; #define NODEUTIL_LATENCY_MON_START ((const char *)"start") void nodeUtil_latency_log ( string hostname, const char * label_ptr, int msecs ); +// path to the Debian network interfaces directory +#define NETWORK_INTERFACES_DIR (const char *)("/etc/network/interfaces.d") /* Common socket type struct */ typedef struct @@ -65,7 +67,68 @@ string get_iface_mac ( const char * iface_ptr ); void print_inv ( node_inv_type & info ); int get_iface_attrs ( const char * iface_ptr, int & index, int & speed , int & duplex , string & autoneg ); + const char * get_iface_name_str ( int iface ); +const char * get_interface_name_str ( int iface ); + +/* Used to learn the pxeboot address */ +enum iface_type_enum { ethernet = 0, vlan = 1, bond = 2 }; +typedef struct +{ + string iface_name ; + iface_type_enum iface_type = ethernet ; + + /* vlan link ; physical or bond + * + * The parent interface is the physical network interface + * to which the VLAN is associated.*/ + string parent = "" ; + + /* bond links ; two physical interfaces + * + * A bond is a logical interface created by combining multiple + * physical network interfaces, known as "slaves"*/ + string slave1 = "" ; + string slave2 = "" ; + + /* bonding mode ; active-backup, balanced-xor, 802.3ad, etc. + * A string that represents the bonding mode string and id + * Example: 802.3ad 4 */ + string bond_mode = "" ; + + /* string representing the iface hierarchy. + * + * ethernet + * bond -> slaves + * vlan -> bond -> slaves + * + * This interface chain string exists soley for the purpose + * of logging for the report tool system info. */ + string chain = "" ; +} iface_info_type ; + +#define INTERFACES_DIR ((const char *)"/sys/class/net/") + +const char * get_iface_type_str ( iface_type_enum type_enum ); +int get_iface_type ( string iface, + iface_type_enum & iface_type ); +int get_iface_parent ( int network, + string & ifname, + string & parent ); +int get_bond_slaves ( int network, + string bonded_iface, + string & slave1, + string & slave2 ); +int get_bond_mode ( int network, + string bonded_iface, + string & bond_mode); +int get_iface_info ( int network, + string iface, + iface_info_type & iface_info); + +// For the mtcClient pxeboot address learning. +string get_pxeboot_dhcp_addr ( string iface ); // worker/storage +string get_pxeboot_static_addr ( string iface ); // controllers unsigned int get_host_function_mask ( string & nodeType_str ); bool is_combo_system (unsigned int nodetype_mask ); diff --git a/mtce-common/src/common/returnCodes.h b/mtce-common/src/common/returnCodes.h index 2984a1e2..7201fcb6 100644 --- a/mtce-common/src/common/returnCodes.h +++ b/mtce-common/src/common/returnCodes.h @@ -1,7 +1,7 @@ #ifndef __INCLUDE_RETURNCODES_H__ #define __INCLUDE_RETURNCODES_H__ /* - * Copyright (c) 2013, 2016 Wind River Systems, Inc. + * Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -116,7 +116,7 @@ #define FAIL_DUP_HOSTNAME (92) #define FAIL_DUP_IPADDR (93) #define FAIL_DUP_MACADDR (94) -#define FAIL____UNUSED____95 (95) +#define FAIL_INVALID_IP (95) #define FAIL_LOCATE_KEY_VALUE (96) #define FAIL_JSON_OBJECT (97) #define FAIL_EXTERNAL_API (98) diff --git a/mtce-common/src/daemon/daemon_config.cpp b/mtce-common/src/daemon/daemon_config.cpp index 713d3b65..3bd49b52 100644 --- a/mtce-common/src/daemon/daemon_config.cpp +++ b/mtce-common/src/daemon/daemon_config.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +* Copyright (c) 2013-2014, 2016, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -43,6 +43,7 @@ void daemon_config_default ( daemon_config_type* config_ptr ) config_ptr->sysinv_mtc_inv_label = strdup("none"); config_ptr->mgmnt_iface = strdup("none"); config_ptr->clstr_iface = strdup("none"); + config_ptr->pxeboot_iface = strdup("none"); config_ptr->sysinv_api_bind_ip = strdup("none"); config_ptr->mode = strdup("none"); config_ptr->fit_host = strdup("none"); @@ -354,8 +355,10 @@ void daemon_dump_cfg ( void ) if ( ptr->mtc_rx_mgmnt_port ) { ilog ("mtc_rx_mgmnt_port = %d\n", ptr->mtc_rx_mgmnt_port );} if ( ptr->mtc_rx_clstr_port ) { ilog ("mtc_rx_clstr_port = %d\n", ptr->mtc_rx_clstr_port );} + if ( ptr->mtc_rx_pxeboot_port ) { ilog ("mtc_rx_pxeboot_port = %d\n", ptr->mtc_rx_pxeboot_port );} if ( ptr->mtc_tx_mgmnt_port ) { ilog ("mtc_tx_mgmnt_port = %d\n", ptr->mtc_tx_mgmnt_port );} if ( ptr->mtc_tx_clstr_port ) { ilog ("mtc_tx_clstr_port = %d\n", ptr->mtc_tx_clstr_port );} + if ( ptr->mtc_tx_pxeboot_port ) { ilog ("mtc_tx_pxeboot_port = %d\n", ptr->mtc_tx_pxeboot_port );} if ( ptr->agent_rx_port ) { ilog ("agent_rx_port = %d\n", ptr->agent_rx_port );} if ( ptr->client_rx_port ) { ilog ("client_rx_port = %d\n", ptr->client_rx_port );} if ( ptr->mtc_to_hbs_cmd_port ) { ilog ("mtc_to_hbs_cmd_port = %d\n", ptr->mtc_to_hbs_cmd_port );} diff --git a/mtce/src/alarm/alarmMgr.cpp b/mtce/src/alarm/alarmMgr.cpp index 2d196222..42f65af2 100644 --- a/mtce/src/alarm/alarmMgr.cpp +++ b/mtce/src/alarm/alarmMgr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017,2019 Wind River Systems, Inc. + * Copyright (c) 2016-2017,2019, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -83,7 +83,7 @@ void alarmMgr_queue_clear ( void ) ************************************************************************/ void alarmMgr_queue_alarm ( queue_entry_type entry ) { - alog ("%s adding %s to alarm queue [size=%ld]\n", + dlog ("%s adding %s to alarm queue [size=%ld]\n", entry.hostname.c_str(), entry.alarmid.c_str(), alarm_queue.size() ); @@ -116,7 +116,7 @@ void alarmMgr_queue_alarm ( queue_entry_type entry ) void alarmMgr_service_queue ( void ) { - alog1 ("Elements: %ld\n", alarm_queue.size()); + dlog1 ("Elements: %ld\n", alarm_queue.size()); if ( alarm_queue.empty() ) return ; @@ -138,7 +138,7 @@ void alarmMgr_service_queue ( void ) string action = entry.operation ; action.append (" alarm"); - alog ("%s %s operation:%s severity:%s entity:%s prefix:%s\n", + dlog ("%s %s operation:%s severity:%s entity:%s prefix:%s\n", entry.hostname.c_str(), entry.alarmid.c_str(), entry.operation.c_str(), diff --git a/mtce/src/alarm/alarmUtil.cpp b/mtce/src/alarm/alarmUtil.cpp index 5c0c2c92..16ba6d18 100644 --- a/mtce/src/alarm/alarmUtil.cpp +++ b/mtce/src/alarm/alarmUtil.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Wind River Systems, Inc. + * Copyright (c) 2013, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -153,7 +153,7 @@ EFmAlarmSeverityT alarmUtil_query ( string hostname, ENTITY_PREFIX, hostname.data(), instance.data()); } - alog ("entity_instance:%s\n", alarm_filter.entity_instance_id ); + dlog ("entity_instance:%s\n", alarm_filter.entity_instance_id ); if (( rc = fm_get_fault ( &alarm_filter, &alarm_query )) == FM_ERR_OK ) { dlog ("Found with Severity: %d\n", alarm_query.severity ); @@ -185,7 +185,7 @@ int alarmUtil_query_identity ( string identity, SFmAlarmDataT * alarm_list_ptr, memset(&alarm_filter, 0, sizeof(alarm_filter)); snprintf ( alarm_filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", identity.data()); rc = fm_get_faults_by_id ( &alarm_filter.alarm_id, alarm_list_ptr, &max_alarms ); - alog ("%s fm_get_faults_by_id rc = %d\n", alarm_filter.alarm_id, rc ); + dlog ("%s fm_get_faults_by_id rc = %d\n", alarm_filter.alarm_id, rc ); if ( rc == FM_ERR_OK ) { return (PASS); @@ -261,18 +261,18 @@ int alarmUtil ( string & hostname, { if ( alarm.alarm_state == FM_ALARM_STATE_SET ) { - alog ("%s setting %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id ); + dlog ("%s setting %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id ); } else { - alog ("%s creating %s %s log\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id ); + dlog ("%s creating %s %s log\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id ); } /* Debug Logs */ - alog ("%s Alarm Reason: %s\n", hostname.c_str(), alarm.reason_text ); - alog ("%s Alarm Action: %s\n", hostname.c_str(), alarm.proposed_repair_action ); - alog ("%s Alarm Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id ); - alog ("%s Alarm State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n", + dlog ("%s Alarm Reason: %s\n", hostname.c_str(), alarm.reason_text ); + dlog ("%s Alarm Action: %s\n", hostname.c_str(), alarm.proposed_repair_action ); + dlog ("%s Alarm Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id ); + dlog ("%s Alarm State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n", hostname.c_str(), alarm.alarm_state, alarm.severity, @@ -310,7 +310,7 @@ int alarmUtil ( string & hostname, snprintf(filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.alarm_id); snprintf(filter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.entity_instance_id); - alog ( "fm_clear_fault: %s %s:%s", hostname.c_str(), alarm.entity_instance_id, alarm.alarm_id ); + dlog ( "fm_clear_fault: %s %s:%s", hostname.c_str(), alarm.entity_instance_id, alarm.alarm_id ); #ifdef WANT_FIT_TESTING if (( daemon_is_file_present ( MTC_CMD_FIT__FM_ERROR_CODE )) && diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp index b7ef0ea4..4d5e7d47 100755 --- a/mtce/src/common/nodeClass.cpp +++ b/mtce/src/common/nodeClass.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include /* for ENODEV, EFAULT and ENXIO */ #include /* for close and usleep */ @@ -284,6 +286,9 @@ nodeLinkClass::nodeLinkClass() my_local_ip.clear() ; my_float_ip.clear() ; my_clstr_ip.clear() ; + my_pxeboot_ip.clear(); + my_pxeboot_if.clear(); + active_controller_hostname.clear() ; inactive_controller_hostname.clear() ; @@ -301,6 +306,7 @@ nodeLinkClass::nodeLinkClass() mgmnt_link_up_and_running = false ; clstr_link_up_and_running = false ; clstr_network_provisioned = false ; + pxeboot_network_provisioned=false ; clstr_degrade_only = false ; dor_mode_active = false ; @@ -492,11 +498,13 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) /* init the new node */ ptr->hostname = hostname ; + ptr->pxeboot_hostname = ""; ptr->ip = "" ; ptr->mac = "" ; ptr->clstr_ip = "" ; ptr->clstr_mac = "" ; + ptr->pxeboot_ip= "" ; /* key value dictionary */ ptr->mtce_info = "" ; @@ -551,18 +559,35 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) ptr->mtcAlive_purge = 0 ; ptr->offline_search_count = 0 ; + ptr->mtcAlive_mgmnt = false ; ptr->mtcAlive_clstr = false ; + ptr->mtcAlive_pxeboot = false ; /* These counts are incremented in the set_mtcAlive member * function and cleared in the reset progression handler. */ ptr->mtcAlive_mgmnt_count = 0 ; ptr->mtcAlive_clstr_count = 0 ; + ptr->mtcAlive_pxeboot_count = 0 ; + + // Clear all the mtcAlive_sequence numbers and monitoring trackers + for (int i = 0 ; i < MTCALIVE_INTERFACES_MAX ; i++) + { + ptr->mtcAlive_sequence[i] = + ptr->mtcAlive_sequence_save[i] = + ptr->mtcAlive_sequence_miss[i] = + ptr->mtcAlive_log_throttle [i] = 0 ; + } + ptr->pxeboot_mtcAlive_not_seen_log_throttle = 0 ; + ptr->pxeboot_mtcAlive_loss_log_throttle = 0 ; + ptr->bmc_reset_pending_log_throttle = 0 ; ptr->reboot_cmd_ack_mgmnt = false ; ptr->reboot_cmd_ack_clstr = false ; ptr->unlock_cmd_ack = false ; + ptr->reboot_cmd_ack_pxeboot = false ; + ptr->offline_log_throttle = 0 ; ptr->offline_log_reported = true ; ptr->online_log_reported = false ; @@ -585,6 +610,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) mtcTimer_init ( ptr->mtcCmd_timer, hostname, "mtcCmd timer"); /* Init node's mtcCmd timer */ mtcTimer_init ( ptr->mtcConfig_timer, hostname, "mtcConfig timer"); /* Init node's mtcConfig timer */ mtcTimer_init ( ptr->mtcAlive_timer , hostname, "mtcAlive timer"); /* Init node's mtcAlive timer */ + mtcTimer_init ( ptr->online_timer , hostname, "online timer"); /* Init node's online timer */ mtcTimer_init ( ptr->offline_timer, hostname, "offline timer"); /* Init node's FH offline timer */ mtcTimer_init ( ptr->http_timer, hostname, "http timer" ); /* Init node's http timer */ mtcTimer_init ( ptr->bm_timer, hostname, "bm timer" ); /* Init node's bm timer */ @@ -620,6 +646,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) ptr->resetStage = MTC_RESET__START ; ptr->enableStage = MTC_ENABLE__START ; ptr->disableStage = MTC_DISABLE__START ; + ptr->mtcAliveStage = MTC_MTCALIVE__START ; ptr->oos_test_count = 0 ; ptr->insv_test_count = 0 ; @@ -818,6 +845,11 @@ struct nodeLinkClass::node* nodeLinkClass::getNode ( string hostname ) { return ptr ; } + /* Node can be looked up by pxeboot ip */ + if ( !hostname.compare ( ptr->pxeboot_ip )) + { + return ptr ; + } if (( ptr->next == NULL ) || ( ptr == tail )) break ; @@ -911,6 +943,7 @@ int nodeLinkClass::remNode( string hostname ) mtcTimer_fini ( ptr->mtcTimer ); mtcTimer_fini ( ptr->mtcSwact_timer ); mtcTimer_fini ( ptr->mtcAlive_timer ); + mtcTimer_fini ( ptr->online_timer ); mtcTimer_fini ( ptr->offline_timer ); mtcTimer_fini ( ptr->mtcCmd_timer ); mtcTimer_fini ( ptr->http_timer ); @@ -1559,12 +1592,12 @@ int nodeLinkClass::avail_status_change ( string hostname, ( avail != MTC_AVAIL_STATUS__ONLINE ))) { /* Free the mtc timer if in use */ - if ( node_ptr->mtcAlive_timer.tid ) + if ( node_ptr->online_timer.tid ) { tlog ("%s Stopping mtcAlive timer\n", node_ptr->hostname.c_str()); - mtcTimer_stop ( node_ptr->mtcAlive_timer ); - node_ptr->mtcAlive_timer.ring = false ; - node_ptr->mtcAlive_timer.tid = NULL ; + mtcTimer_stop ( node_ptr->online_timer ); + node_ptr->online_timer.ring = false ; + node_ptr->online_timer.tid = NULL ; } node_ptr->onlineStage = MTC_ONLINE__START ; } @@ -1641,6 +1674,7 @@ int nodeLinkClass::lazy_graceful_fs_reboot ( struct nodeLinkClass::node * node_p /* Should never get there but if we do resend the reboot request * but this time not Lazy */ send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) ; + send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, PXEBOOT_INTERFACE ) ; } return (FAIL); } @@ -3448,6 +3482,137 @@ void nodeLinkClass::mtcInfo_handler ( void ) } } +/************************************************************************** + * + * Name : pxebootInfo_loader + * + * Purpose : Load node pxeboot hostnames and ip addresses. + * + * Description: For each provisioned node, this function parses the + * /opt/platform/config//dnsmasq.hosts file + * with each node's management network mac address as the + * primary search string and loads the pxeboot ip address + * and pxeboot hostname where matches are found. + * + * Parameters : Optional my_mac address for initial process startup + * to get just its own my_pxeboot_ip address before the + * nodeLinkClass host chain is created. + * + * Updates : this->my_pxeboot_ip if my_mac is specified. + * node_ptr->pxeboot_ip for all hosts if my_mac is empty. + * + * Retruns : Nothing + * + **************************************************************************/ +void nodeLinkClass::pxebootInfo_loader ( string my_mac ) +{ + string dnsmasq_hosts_file = OPT_PLATFORM_CONFIG_DIR ; + dnsmasq_hosts_file.append("/"); + dnsmasq_hosts_file.append(sw_version); + dnsmasq_hosts_file.append("/"); + dnsmasq_hosts_file.append(DNSMASQ_HOSTS_FILE); + + if ( daemon_is_file_present ( dnsmasq_hosts_file.data()) == false ) + { + ilog ("%s file not present", dnsmasq_hosts_file.c_str()); + return ; + } + + // Open the dnsmasq_hosts_file for reading + ifstream filestream ( dnsmasq_hosts_file.c_str() ); + + // Check if the file is open + if (!filestream.is_open()) + { + elog ("failed to open seemingly present %s file", dnsmasq_hosts_file.c_str()); + return ; + } + + // Read each line from the file + string line; + while (getline(filestream, line)) + { + // Skip lines starting with "pxecontroller" + if (line.compare(0, 13, "pxecontroller") == 0) + continue; + + // Create a stringstream to parse the comma-delimited fields + stringstream dnsmasq_hosts(line); + string mac, hostname, ip ; + + // Extract fields + getline(dnsmasq_hosts, mac, ','); + getline(dnsmasq_hosts, hostname, ','); + getline(dnsmasq_hosts, ip, ','); + dlog ("pxebootInfo: %s %s %s", mac.c_str(), hostname.c_str(), ip.c_str()); + + if ( my_mac.empty() ) + { + if ( ! head ) + { + elog ("cannot read inventory ; head is null"); + // Close the file stream + filestream.close(); + return ; + } + + // Search for the node that matches each mac address in inventory + bool found = false ; + for ( struct node * node_ptr = head ; ; node_ptr = node_ptr->next ) + { + if ( !mac.compare(node_ptr->mac) ) + { + node_ptr->pxeboot_hostname = hostname ; + if ( !ip.empty() && ( ip != node_ptr->pxeboot_ip )) + { + // pxeboot ip address found and is different + if ( node_ptr->pxeboot_ip.empty() ) + { + ilog ("%s pxeboot hostname: %s has pxeboot ip: %s", + node_ptr->hostname.c_str(), + node_ptr->pxeboot_hostname.c_str(), + ip.c_str()); + } + else + { + wlog ("%s pxeboot ip changed from %s to %s", + node_ptr->hostname.c_str(), + node_ptr->pxeboot_ip.c_str(), + ip.c_str()); + } + node_ptr->pxeboot_ip = ip ; + + // Also load the my_pxeboot_ip at the process level for eacy access + if (( node_ptr->hostname == this->my_hostname ) && ( this->my_pxeboot_ip != ip )) + this->my_pxeboot_ip = ip ; + } + found = true ; + break ; + } + if (( node_ptr->next == NULL ) || ( node_ptr == tail )) + break ; + } + if ( found == false ) + { + wlog ("no host found matching mac address:%s", mac.c_str()); + } + } + else if ( !mac.compare( my_mac ) ) + { + // Handle the process startup 'my mac' case + if ( !ip.empty() ) + this->my_pxeboot_ip = ip ; + else + { + wlog ("failed to lookup pxeboot ip from mac %s", my_mac.c_str()); + } + } + } + + // Close the file stream + filestream.close(); +} + /* Lock Rules * * 1. Cannot lock this controller @@ -3701,6 +3866,17 @@ string nodeLinkClass::get_clstr_hostaddr ( string & hostname ) return ( null_str ); } +string nodeLinkClass::get_pxeboot_hostaddr ( string hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return ( node_ptr->pxeboot_ip ); + } + return ( null_str ); +} + string nodeLinkClass::get_hostIfaceMac ( string & hostname, int iface ) { nodeLinkClass::node* node_ptr ; @@ -3730,6 +3906,30 @@ int nodeLinkClass::set_hostaddr ( string & hostname, string & ip ) return ( rc ); } +int nodeLinkClass::set_pxeboot_hostaddr ( string hostname, string ip ) +{ + int rc = FAIL_HOSTNAME_LOOKUP ; + + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + if (( hostUtil_is_valid_ip_addr(ip)) && ( node_ptr->pxeboot_ip != ip )) + { + node_ptr->pxeboot_ip = ip ; + ilog ("%s pxeboot ip set to %s", + node_ptr->hostname.c_str(), + node_ptr->pxeboot_ip.c_str()); + rc = PASS ; + } + else + { + rc = FAIL_INVALID_IP ; + } + } + return ( rc ); +} + int nodeLinkClass::set_clstr_hostaddr ( string & hostname, string & ip ) { int rc = FAIL ; @@ -3759,7 +3959,8 @@ string nodeLinkClass::get_hostname ( string hostaddr ) ( hostaddr == LOCALHOST ) || ( hostaddr == my_local_ip ) || ( hostaddr == my_float_ip ) || - ( hostaddr == my_clstr_ip )) + ( hostaddr == my_clstr_ip ) || + ( hostaddr == my_pxeboot_ip )) { return(this->my_hostname); } @@ -3889,6 +4090,8 @@ void nodeLinkClass::set_cmd_resp ( string & hostname, mtc_message_type & msg, in if ( iface == MGMNT_INTERFACE ) node_ptr->reboot_cmd_ack_mgmnt = 1 ; + else if ( iface == PXEBOOT_INTERFACE ) + node_ptr->reboot_cmd_ack_pxeboot = 1 ; else if ( iface == CLSTR_INTERFACE ) node_ptr->reboot_cmd_ack_clstr = 1 ; } @@ -3923,9 +4126,8 @@ unsigned int nodeLinkClass::get_cmd_resp ( string & hostname ) * * Name : set_mtcAlive * - * Description: Set the mgmnt or clust specific mtc alive received bool. - * - * Used in the offline handler to verify overall offline state. + * Description: Set mtcAlive driven controls and status for the + * pxeboot, mgmnt and cluster networks. * * Interfaces : Public with hostname. * Private by node pointer. @@ -3933,52 +4135,108 @@ unsigned int nodeLinkClass::get_cmd_resp ( string & hostname ) * If mtcAlive is ungated then * * 1. manage the online/offline state bools - * 2. increment the mtcAlive count and + * 2. increment the mtcAlive count * 3. set the mtcAlive received bool for the specified interface * *****************************************************************************/ -void nodeLinkClass::set_mtcAlive ( string & hostname, int interface ) +void nodeLinkClass::set_mtcAlive ( string & hostname, unsigned int sequence, int iface ) { nodeLinkClass::node* node_ptr ; node_ptr = nodeLinkClass::getNode ( hostname ); if ( node_ptr != NULL ) { - this->set_mtcAlive ( node_ptr, interface ); + this->set_mtcAlive ( node_ptr, sequence, iface ); } } -void nodeLinkClass::set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface ) +#define MTCALIVE_LOG_THROTTLE (1000) +void nodeLinkClass::set_mtcAlive ( struct nodeLinkClass::node * node_ptr, unsigned int sequence, int iface) { if ( node_ptr ) { if ( node_ptr->mtcAlive_gate == false ) { + bool state_change = false ; + node_ptr->mtcAlive_online = true ; node_ptr->mtcAlive_offline = false ; node_ptr->mtcAlive_count++ ; - if ( interface == CLSTR_INTERFACE ) + if ( iface == CLSTR_INTERFACE ) { if ( node_ptr->mtcAlive_clstr == false ) { - alog ("%s %s mtcAlive received", - node_ptr->hostname.c_str(), - get_iface_name_str(interface)); - node_ptr->mtcAlive_clstr_count++ ; node_ptr->mtcAlive_clstr = true ; + state_change = true ; + } + node_ptr->mtcAlive_clstr_count++ ; + } + else if ( iface == MGMNT_INTERFACE ) + { + if ( node_ptr->mtcAlive_mgmnt == false ) + { + node_ptr->mtcAlive_mgmnt = true ; + state_change = true ; + } + node_ptr->mtcAlive_mgmnt_count++ ; + } + else if ( iface == PXEBOOT_INTERFACE ) + { + if ( node_ptr->mtcAlive_pxeboot == false ) + { + node_ptr->mtcAlive_pxeboot = true ; + state_change = true ; + } + node_ptr->mtcAlive_pxeboot_count++ ; + } + else + { + wlog("%s mtcAlive received from unknown network %d", + node_ptr->hostname.c_str(), iface); + return ; + } + if ( state_change ) + { + ilog ("%s mtcAlive received from %s network with uptime:%d ; seq:%d", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + node_ptr->uptime, + sequence); + node_ptr->mtcAlive_log_throttle[iface] = 0 ; + } + else if ( node_ptr->mtcAlive_sequence[iface]+1 != sequence) + { + if ( sequence < node_ptr->mtcAlive_sequence[iface]+1 ) + { + wlog ("%s mtcAlive received from %s network with uptime:%d ; out-of-sequence ; expect:%d detect:%d ; correcting", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + node_ptr->uptime, + node_ptr->mtcAlive_sequence[iface]+1, + sequence); + } + else + { + wlog ("%s mtcAlive received from %s network with uptime:%d ; missed %d mtcalive msgs ; expect:%d detect:%d ; correcting", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + node_ptr->uptime, + sequence-(node_ptr->mtcAlive_sequence[iface]+1), + node_ptr->mtcAlive_sequence[iface]+1, + sequence); } } else { - if ( node_ptr->mtcAlive_mgmnt == false ) - { - alog ("%s %s mtcAlive received", - node_ptr->hostname.c_str(), - get_iface_name_str(interface)); - node_ptr->mtcAlive_mgmnt_count++ ; - node_ptr->mtcAlive_mgmnt = true ; - } + alog_throttled (node_ptr->mtcAlive_log_throttle[iface], MTCALIVE_LOG_THROTTLE, + "%s mtcAlive received from %s network with uptime:%d ; seq:%d", + node_ptr->hostname.c_str(), + get_iface_name_str(iface), + node_ptr->uptime, + sequence); } + // update running sequence number for this interface + node_ptr->mtcAlive_sequence[iface] = sequence ; } } } @@ -4291,7 +4549,6 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface ) ((node_ptr->adminAction != MTC_ADMIN_ACTION__ENABLE ) && (node_ptr->adminAction != MTC_ADMIN_ACTION__UNLOCK ))) { - wlog ("%s mtcAlive reporting locked while unlocked ; correcting", node_ptr->hostname.c_str()); send_mtc_cmd ( node_ptr->hostname , MTC_MSG_UNLOCKED, MGMNT_INTERFACE ); send_mtc_cmd ( node_ptr->hostname , MTC_MSG_UNLOCKED, CLSTR_INTERFACE ); } @@ -4302,7 +4559,13 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface ) if (( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) && ( node_ptr->adminAction != MTC_ADMIN_ACTION__LOCK )) { - wlog ("%s mtcAlive reporting unlocked while locked ; correcting", node_ptr->hostname.c_str()); + // Avoid printing this warning log in simplex mode. + // The locked flag is lost over a reboot in simplex mode. + if ( daemon_is_file_present ( STILL_SIMPLEX_FILE ) == false ) + { + wlog ("%s mtcAlive reporting unlocked while locked ; correcting", + node_ptr->hostname.c_str()); + } send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE ); send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, CLSTR_INTERFACE ); } @@ -6243,6 +6506,10 @@ int nodeLinkClass::update_host_functions ( string hostname , string functions ) } rc = PASS ; } + else + { + wlog ("%s getNode lookup failed", hostname.c_str()); + } return (rc); } @@ -6930,12 +7197,12 @@ int nodeLinkClass::availStatusChange ( struct nodeLinkClass::node * node_ptr, ( newAvailStatus != MTC_AVAIL_STATUS__ONLINE ))) { /* Free the mtc timer if in use */ - if ( node_ptr->mtcAlive_timer.tid ) + if ( node_ptr->online_timer.tid ) { tlog ("%s Stopping mtcAlive timer\n", node_ptr->hostname.c_str()); - mtcTimer_stop ( node_ptr->mtcAlive_timer ); - node_ptr->mtcAlive_timer.ring = false ; - node_ptr->mtcAlive_timer.tid = NULL ; + mtcTimer_stop ( node_ptr->online_timer ); + node_ptr->online_timer.ring = false ; + node_ptr->online_timer.tid = NULL ; } node_ptr->onlineStage = MTC_ONLINE__START ; } @@ -7265,6 +7532,28 @@ int nodeLinkClass::subStageChange ( struct nodeLinkClass::node * node_ptr, } } +/** Host mtcAlive Stage Change member function */ +int nodeLinkClass::mtcAliveStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_mtcAliveStages_enum newHdlrStage ) +{ + if ( newHdlrStage < MTC_MTCALIVE__STAGES ) + { + clog ("%s stage %s -> %s", + node_ptr->hostname.c_str(), + get_mtcAliveStages_str(node_ptr->mtcAliveStage).c_str(), + get_mtcAliveStages_str(newHdlrStage).c_str()); + + node_ptr->mtcAliveStage = newHdlrStage ; + return (PASS) ; + } + else + { + slog ("%s Invalid mtcAlive stage (%d)", node_ptr->hostname.c_str(), newHdlrStage ); + node_ptr->mtcAliveStage = MTC_MTCALIVE__START ; + return (FAIL) ; + } +} + struct nodeLinkClass::node * nodeLinkClass::get_mtcTimer_timer ( timer_t tid ) { /* check for empty list condition */ @@ -7537,6 +7826,23 @@ struct nodeLinkClass::node * nodeLinkClass::get_mtcAlive_timer ( timer_t tid ) return static_cast(NULL); } +struct nodeLinkClass::node * nodeLinkClass::get_online_timer ( timer_t tid ) +{ + /* check for empty list condition */ + if ( tid != NULL ) + { + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if ( ptr->online_timer.tid == tid ) + { + return ptr ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + } + return static_cast(NULL); +} struct nodeLinkClass::node * nodeLinkClass::get_offline_timer ( timer_t tid ) { @@ -9231,17 +9537,53 @@ void nodeLinkClass::mem_log_state2 ( struct nodeLinkClass::node * node_ptr ) mem_log (str); } -void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr ) +void nodeLinkClass::mem_log_mtcalive_state ( struct nodeLinkClass::node * node_ptr ) { char str[MAX_MEM_LOG_DATA] ; - snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d\n", + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d Net:%d:%d:%d", node_ptr->hostname.c_str(), node_ptr->mtcAlive_online ? 'Y' : 'N', node_ptr->mtcAlive_offline ? 'Y' : 'N', node_ptr->mtcAlive_count, node_ptr->mtcAlive_gate ? "closed" : "open", - node_ptr->mtcAlive_misses); + node_ptr->mtcAlive_misses, + node_ptr->mtcAlive_mgmnt, + node_ptr->mtcAlive_clstr, + node_ptr->mtcAlive_pxeboot ); + + mem_log (str); +} + +void nodeLinkClass::mem_log_mtcalive_data ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: Pxeboot:%d seq:%d Mgmt:%d seq:%d Clstr:%d seq:%d", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_pxeboot_count, + node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE], + node_ptr->mtcAlive_mgmnt_count, + node_ptr->mtcAlive_sequence[MGMNT_INTERFACE], + node_ptr->mtcAlive_clstr_count, + node_ptr->mtcAlive_sequence[CLSTR_INTERFACE]); + + mem_log (str); +} + +void nodeLinkClass::mem_log_mtcalive_pxeboot ( struct nodeLinkClass::node * node_ptr ) +{ + char str[MAX_MEM_LOG_DATA] ; + + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tPxeboot mtcAlive: Prov:%c Rxed:%c ring:%c miss:%d seq:%d save:%d ", + node_ptr->hostname.c_str(), + this->pxeboot_network_provisioned ? 'Y' : 'N', + node_ptr->mtcAlive_pxeboot ? 'Y' : 'N', + node_ptr->mtcAlive_timer.ring ? 'Y' : 'N', + node_ptr->mtcAlive_sequence_miss [PXEBOOT_INTERFACE], + node_ptr->mtcAlive_sequence [PXEBOOT_INTERFACE], + node_ptr->mtcAlive_sequence_save [PXEBOOT_INTERFACE]); + mem_log (str); } @@ -9273,7 +9615,7 @@ void nodeLinkClass::mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr ) void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr ) { char str[MAX_MEM_LOG_DATA] ; - snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d Power:%d Cycle:%d\n", + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d Power:%d Cycle:%d mtcAlive:%d\n", node_ptr->hostname.c_str(), node_ptr->addStage, node_ptr->offlineStage, @@ -9282,7 +9624,8 @@ void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr ) node_ptr->enableStage, node_ptr->disableStage, node_ptr->powerStage, - node_ptr->powercycleStage); + node_ptr->powercycleStage, + node_ptr->mtcAliveStage); mem_log (str); } @@ -9319,11 +9662,13 @@ void nodeLinkClass::mem_log_reset_info ( struct nodeLinkClass::node * node_ptr ) void nodeLinkClass::mem_log_network ( struct nodeLinkClass::node * node_ptr ) { char str[MAX_MEM_LOG_DATA] ; - snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s %s cluster_host_ip: %s Uptime: %u\n", + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t mac:%s mgmt:%s clstr: %s pxeboot:%s:%s Uptime: %u\n", node_ptr->hostname.c_str(), node_ptr->mac.c_str(), node_ptr->ip.c_str(), node_ptr->clstr_ip.c_str(), + node_ptr->pxeboot_hostname.c_str(), + node_ptr->pxeboot_ip.c_str(), node_ptr->uptime ); mem_log (str); } @@ -9430,23 +9775,25 @@ void nodeLinkClass::memDumpNodeState ( string hostname ) { if ( maintenance == true ) { - mem_log_dor ( node_ptr ); - mem_log_identity ( node_ptr ); - mem_log_type_info ( node_ptr ); - mem_log_network ( node_ptr ); - mem_log_state1 ( node_ptr ); - mem_log_state2 ( node_ptr ); - // mem_log_reset_info ( node_ptr ); - mem_log_power_info ( node_ptr ); - mem_log_alarm1 ( node_ptr ); - mem_log_alarm2 ( node_ptr ); - mem_log_mtcalive ( node_ptr ); - mem_log_stage ( node_ptr ); - mem_log_bm ( node_ptr ); - mem_log_ping ( node_ptr ); - mem_log_test_info ( node_ptr ); - mem_log_thread_info( node_ptr ); - workQueue_dump ( node_ptr ); + mem_log_dor ( node_ptr ); + mem_log_identity ( node_ptr ); + mem_log_type_info ( node_ptr ); + mem_log_network ( node_ptr ); + mem_log_mtcalive_state ( node_ptr ); + mem_log_mtcalive_data ( node_ptr ); + mem_log_mtcalive_pxeboot ( node_ptr ); + mem_log_state1 ( node_ptr ); + mem_log_state2 ( node_ptr ); + // mem_log_reset_info ( node_ptr ); + mem_log_power_info ( node_ptr ); + mem_log_alarm1 ( node_ptr ); + mem_log_alarm2 ( node_ptr ); + mem_log_stage ( node_ptr ); + mem_log_bm ( node_ptr ); + mem_log_ping ( node_ptr ); + mem_log_test_info ( node_ptr ); + mem_log_thread_info ( node_ptr ); + workQueue_dump ( node_ptr ); } if ( heartbeat == true ) { diff --git a/mtce/src/common/nodeClass.h b/mtce/src/common/nodeClass.h index 16ad523a..9536f00f 100755 --- a/mtce/src/common/nodeClass.h +++ b/mtce/src/common/nodeClass.h @@ -121,6 +121,12 @@ private: /** The Mac address of the host node */ std::string mac ; + /** The pxeboot network IP address of the host node */ + std::string pxeboot_ip ; + + /** The pxeboot network hostname of the host node */ + std::string pxeboot_hostname ; + /** The cluster-host network IP address of the host node */ std::string clstr_ip ; @@ -279,6 +285,7 @@ private: mtc_configStages_enum configStage ; mtc_resetProgStages_enum resetProgStage ; mtc_reinstallStages_enum reinstallStage ; + mtc_mtcAliveStages_enum mtcAliveStage ; /** Board management specific FSM Stages */ mtc_powerStages_enum powerStage ; @@ -315,10 +322,25 @@ private: int mtcAlive_hits ; int mtcAlive_purge ; - int mtcAlive_mgmnt_count ; /* count the mgmnt network mtcAlive messages */ - int mtcAlive_clstr_count ; /* count the clstr network mtcAlive messages */ - bool mtcAlive_mgmnt ; /* set true when mtcAlive is rx'd from mgmnt network */ - bool mtcAlive_clstr ; /* set true when mtcAlive is rx'd from clstr network */ + /* TODO: (emacdona) make these an array of interfaces */ + bool mtcAlive_mgmnt ; /* set true when mtcAlive is rx'd from mgmnt network */ + bool mtcAlive_clstr ; /* set true when mtcAlive is rx'd from clstr network */ + bool mtcAlive_pxeboot ; /* set true when mtcAlive is rx'd from pxeboot network */ + + /* TODO: (emacdona) make these an array of interfaces */ + int mtcAlive_mgmnt_count ; /* count the mgmnt network mtcAlive messages */ + int mtcAlive_clstr_count ; /* count the clstr network mtcAlive messages */ + int mtcAlive_pxeboot_count ; /* count the pxeboot network mtcAlive messages */ + + /* tracks the sequence number of the last mtcAlive message */ + unsigned int mtcAlive_sequence [MTCALIVE_INTERFACES_MAX] ; + unsigned int mtcAlive_sequence_save[MTCALIVE_INTERFACES_MAX] ; + unsigned int mtcAlive_sequence_miss[MTCALIVE_INTERFACES_MAX] ; + unsigned int mtcAlive_log_throttle [MTCALIVE_INTERFACES_MAX] ; + + /* pxeboot mtcAlive monitor log throttles */ + int pxeboot_mtcAlive_not_seen_log_throttle ; + int pxeboot_mtcAlive_loss_log_throttle ; /* used to log time leading up to reset */ int bmc_reset_pending_log_throttle ; @@ -334,14 +356,12 @@ private: bool online_log_reported ; /* availStatus switches between these states */ /* and failed */ - /** Host's mtc timer struct. Use to time handler stages. - * - * reset -> reset command response - * reboot -> then wait for mtcalive message - * mtcalive -> then wait for go enabled message - */ + /* timer for pxeboot_mtcAlive_monitor fsm */ struct mtc_timer mtcAlive_timer ; + /* timer for online_handler fsm. */ + struct mtc_timer online_timer ; + /* the fault handling offline handler timer */ struct mtc_timer offline_timer ; @@ -456,6 +476,7 @@ private: bool unlock_cmd_ack ; /* set true when a unlocked command ack is rx'ed */ bool reboot_cmd_ack_mgmnt ; bool reboot_cmd_ack_clstr ; + bool reboot_cmd_ack_pxeboot ; /** Tracks back to back Fast Fault Recovery counts */ int graceful_recovery_counter; @@ -849,6 +870,9 @@ private: /* Starts the specified 'reset or powercycle' recovery monitor */ int hwmon_recovery_monitor ( struct nodeLinkClass::node * node_ptr, int hwmon_event ); + /* Monitors pxeboot mtcAlive messages and manages associated alarm */ + int pxeboot_mtcAlive_monitor ( struct nodeLinkClass::node * node_ptr ); + /* server specific power state query handler */ bool (*is_poweron_handler) (string hostname, string query_response ); @@ -865,7 +889,7 @@ private: bool get_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr ); void ctl_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr, bool gate_state ); - void set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface ); + void set_mtcAlive ( struct nodeLinkClass::node * node_ptr, unsigned int sequence, int iface); /********* mtcInfo in the database ************/ int mtcInfo_set ( struct nodeLinkClass::node * node_ptr, string key, string value ); @@ -1087,6 +1111,10 @@ private: int subStageChange ( struct nodeLinkClass::node * node_ptr, mtc_subStages_enum newHdlrStage ); + /** mtcAlive Stage Change member function */ + int mtcAliveStageChange ( struct nodeLinkClass::node * node_ptr, + mtc_mtcAliveStages_enum newHdlrStage ); + int failed_state_change ( struct nodeLinkClass::node * node_ptr ); /* issue a @@ -1125,6 +1153,7 @@ private: struct nodeLinkClass::node * get_mtcTimer_timer ( timer_t tid ); struct nodeLinkClass::node * get_mtcConfig_timer ( timer_t tid ); struct nodeLinkClass::node * get_mtcAlive_timer ( timer_t tid ); + struct nodeLinkClass::node * get_online_timer ( timer_t tid ); struct nodeLinkClass::node * get_offline_timer ( timer_t tid ); struct nodeLinkClass::node * get_mtcSwact_timer ( timer_t tid ); struct nodeLinkClass::node * get_mtcCmd_timer ( timer_t tid ); @@ -1316,26 +1345,28 @@ private: void mem_log_general_mtce_hosts ( void ); void mem_log_mnfa ( void ); - void mem_log_dor ( struct nodeLinkClass::node * node_ptr ); - void mem_log_identity ( struct nodeLinkClass::node * node_ptr ); - void mem_log_network ( struct nodeLinkClass::node * node_ptr ); - void mem_log_state1 ( struct nodeLinkClass::node * node_ptr ); - void mem_log_state2 ( struct nodeLinkClass::node * node_ptr ); - void mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr ); - void mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr ); - void mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr ); - void mem_log_stage ( struct nodeLinkClass::node * node_ptr ); - void mem_log_test_info ( struct nodeLinkClass::node * node_ptr ); - void mem_log_bm ( struct nodeLinkClass::node * node_ptr ); - void mem_log_ping ( struct nodeLinkClass::node * node_ptr ); - void mem_log_heartbeat ( struct nodeLinkClass::node * node_ptr ); - void mem_log_hbs_cnts ( struct nodeLinkClass::node * node_ptr ); - void mem_log_type_info ( struct nodeLinkClass::node * node_ptr ); - void mem_log_reset_info( struct nodeLinkClass::node * node_ptr ); - void mem_log_power_info( struct nodeLinkClass::node * node_ptr ); - void mem_log_thread_info ( struct nodeLinkClass::node * node_ptr ); + void mem_log_dor ( struct nodeLinkClass::node * node_ptr ); + void mem_log_identity ( struct nodeLinkClass::node * node_ptr ); + void mem_log_network ( struct nodeLinkClass::node * node_ptr ); + void mem_log_state1 ( struct nodeLinkClass::node * node_ptr ); + void mem_log_state2 ( struct nodeLinkClass::node * node_ptr ); + void mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr ); + void mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr ); + void mem_log_mtcalive_state ( struct nodeLinkClass::node * node_ptr ); + void mem_log_mtcalive_data ( struct nodeLinkClass::node * node_ptr ); + void mem_log_mtcalive_pxeboot ( struct nodeLinkClass::node * node_ptr ); + void mem_log_stage ( struct nodeLinkClass::node * node_ptr ); + void mem_log_test_info ( struct nodeLinkClass::node * node_ptr ); + void mem_log_bm ( struct nodeLinkClass::node * node_ptr ); + void mem_log_ping ( struct nodeLinkClass::node * node_ptr ); + void mem_log_heartbeat ( struct nodeLinkClass::node * node_ptr ); + void mem_log_hbs_cnts ( struct nodeLinkClass::node * node_ptr ); + void mem_log_type_info ( struct nodeLinkClass::node * node_ptr ); + void mem_log_reset_info ( struct nodeLinkClass::node * node_ptr ); + void mem_log_power_info ( struct nodeLinkClass::node * node_ptr ); + void mem_log_thread_info ( struct nodeLinkClass::node * node_ptr ); - void print_node_info ( struct nodeLinkClass::node * node_ptr ); + void print_node_info ( struct nodeLinkClass::node * node_ptr ); // #endif @@ -1349,9 +1380,12 @@ public: system_type_enum system_type ; - string functions ; /**< comma delimited string list of functions supported */ - bool maintenance ; - bool heartbeat ; + string sw_version; /* fetched from /etc/build.info using daemon_sw_version */ + string functions ; /* comma delimited string list of functions supported */ + + bool maintenance ; /* the mtcAgent */ + bool heartbeat ; /* the hbsAgent */ + /* Set to true if this controller is active. * Currently only used by heartbeat service. */ @@ -1403,10 +1437,12 @@ public: { active = state ; } /** Store the hostname of this controller */ - string my_hostname ; /**< */ - string my_local_ip ; /**< Primary IP address */ - string my_float_ip ; /**< Secondary (floating) IP address */ - string my_clstr_ip ; /**< Cluster network IP address */ + string my_hostname ; /** My Hostname */ + string my_local_ip ; /** Primary IP address */ + string my_float_ip ; /** Secondary (floating) IP address */ + string my_clstr_ip ; /** Cluster network IP address */ + string my_pxeboot_ip ; /** Pxeboot network IP address */ + string my_pxeboot_if ; /** Pxeboot interface name */ /********* New Public Constructs for IPMI Comamnd Handling ***********/ @@ -1448,12 +1484,18 @@ public: /** get cluster-host network ip address for any hostname */ string get_clstr_hostaddr ( string & hostname ); + /** get the pxeboot network address for any hostname */ + string get_pxeboot_hostaddr ( string hostname ); + /** set a node's ip address */ int set_hostaddr ( string & hostname, string & ip ); /** set a node's cluster-host ip address */ int set_clstr_hostaddr ( string & hostname, string & ip ); + /* set the pxeboot network address for any hostname */ + int set_pxeboot_hostaddr ( string hostname, string ip ); + /** get hostname for any hostname */ string get_hostname ( string hostaddr ); @@ -1684,6 +1726,12 @@ public: * network is provisioned and configured for this daemon to use */ bool clstr_network_provisioned ; + /** A boolean that is used to quickly determine if the pxeboot network + * is provisioned. + * The pxeboot network is considered unprovisioned while the management + * interface is on the 'lo' (localhost) interface. */ + bool pxeboot_network_provisioned ; + /** A debug bool hat allows cluster-host heartbeat failures to only * cause host degrade rather than failure */ bool clstr_degrade_only ; @@ -1758,6 +1806,7 @@ public: struct mtc_timer mtcTimer_mnfa ; struct mtc_timer mtcTimer_token ; struct mtc_timer mtcTimer_uptime ; + struct mtc_timer mtcTimer_loop ; // main loop timer /* System Level DOR recovery timer * Note: tid != NULL represents DOR Mode Active */ @@ -1775,10 +1824,15 @@ public: /** Returns true when a 'maintenance alive' message for that * hostnamed node is received */ - void set_mtcAlive ( string & hostname, int iface ); + void set_mtcAlive ( string & hostname, unsigned int sequence, int iface ); bool get_mtcAlive_gate ( string & hostname ); void ctl_mtcAlive_gate ( string & hostname, bool gated ); + /* Updates my_pxeboot_ip if my_mac is specified. + * Otherwise, tries to update the pxeboot ip and + * hostname for each provisioned node in the system. */ + void pxebootInfo_loader ( string my_mac = "" ); + /** Store the latest mtce flags for the specified host * current flags are defined in nodebase.h #define MTC_FLAG__I_AM_CONFIGURED (0x00000001) diff --git a/mtce/src/lmon/lmon.h b/mtce/src/lmon/lmon.h index 9ab62ae7..4e8ebe97 100644 --- a/mtce/src/lmon/lmon.h +++ b/mtce/src/lmon/lmon.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Wind River Systems, Inc. + * Copyright (c) 2019, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -23,14 +23,15 @@ using namespace std; #endif #define __AREA__ "mon" +#ifndef INTERFACES_DIR #define INTERFACES_DIR ((const char *)"/sys/class/net/") +#endif #define PLATFORM_DIR ((const char *)"/etc/platform/platform.conf") #define LMON_DIR ((const char *)"/etc/lmon/lmon.conf") #define INTERFACES_MAX (4) /* maximum number of interfaces to monitor */ -enum interface_type { ethernet = 0, vlan = 1, bond = 2 }; -string iface_type ( interface_type type_enum ); +string iface_type ( iface_type_enum type_enum ); /* daemon only supports the GET request */ #define HTTP_SUPPORTED_METHODS (EVHTTP_REQ_GET) @@ -68,7 +69,7 @@ typedef struct /* true if the interface is configured. * i.e. the name label shown above is found in platform.conf */ bool used ; - interface_type type_enum ; + iface_type_enum type_enum ; /* true if the link is up ; false otherwise */ bool interface_one_link_up ; diff --git a/mtce/src/lmon/lmonUtil.cpp b/mtce/src/lmon/lmonUtil.cpp index d0225e8d..8800d33f 100644 --- a/mtce/src/lmon/lmonUtil.cpp +++ b/mtce/src/lmon/lmonUtil.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Wind River Systems, Inc. + * Copyright (c) 2019, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -35,7 +35,7 @@ * ****************************************************************************/ -string iface_type ( interface_type type_enum ) +string iface_type ( iface_type_enum type_enum ) { switch(type_enum) { @@ -187,7 +187,7 @@ int lmon_get_link_state ( int ioctl_socket, * Name : lmon_interfaces_init * * Purpose : Map an interface (mgmt, oam or cluster-host) to a physical port. - * See interface_type enum in lmon.h + * See iface_type_enum enum in nodeUtil.h * *****************************************************************************/ diff --git a/mtce/src/maintenance/mtcCmdHdlr.cpp b/mtce/src/maintenance/mtcCmdHdlr.cpp index 9b12c97c..0da59f3c 100644 --- a/mtce/src/maintenance/mtcCmdHdlr.cpp +++ b/mtce/src/maintenance/mtcCmdHdlr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017, 2023 Wind River Systems, Inc. + * Copyright (c) 2013-2017, 2023-2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -360,10 +360,40 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->reboot_cmd_ack_mgmnt = false ; node_ptr->reboot_cmd_ack_clstr = false ; + node_ptr->reboot_cmd_ack_pxeboot = false ; /* send reboot command */ node_ptr->cmdReq = MTC_CMD_REBOOT ; node_ptr->cmdRsp = MTC_CMD_NONE ; + + // Send the reboot command on all provisioned networks + if ( this->pxeboot_network_provisioned == true ) + { + if (( rc = send_mtc_cmd ( node_ptr->hostname, + MTC_CMD_REBOOT, + PXEBOOT_INTERFACE )) != PASS ) + { + // Don't report a warning log if the far end pxeboot + // network address is not learned yet. + if ( rc != FAIL_HOSTADDR_LOOKUP ) + { + wlog ("%s reboot request failed (%s) (rc:%d)\n", + node_ptr->hostname.c_str(), + get_iface_name_str(PXEBOOT_INTERFACE), rc); + } + else + { + ilog ("%s %s network address not learned yet ; can't reboot", + node_ptr->hostname.c_str(), + get_iface_name_str(PXEBOOT_INTERFACE)); + } + } + else + { + send_reboot_ok = true ; + } + } + if (( rc = send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE )) != PASS ) @@ -383,9 +413,20 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) MTC_CMD_REBOOT, CLSTR_INTERFACE )) != PASS ) { - wlog ("%s reboot request failed (%s) (rc:%d)\n", - node_ptr->hostname.c_str(), - get_iface_name_str(CLSTR_INTERFACE), rc); + // Don't report a warning log if the far end cluster + // network IP is not learned yet. + if ( rc != FAIL_HOSTADDR_LOOKUP ) + { + wlog ("%s reboot request failed (%s) (rc:%d)", + node_ptr->hostname.c_str(), + get_iface_name_str(CLSTR_INTERFACE), rc); + } + else + { + ilog ("%s %s network address not learned yet ; can't reboot", + node_ptr->hostname.c_str(), + get_iface_name_str(CLSTR_INTERFACE)); + } } else { @@ -446,6 +487,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) * messages from the remote host during the reset delay window */ node_ptr->mtcAlive_mgmnt_count = 0 ; node_ptr->mtcAlive_clstr_count = 0 ; + node_ptr->mtcAlive_pxeboot_count = 0 ; wlog ("%s ... bmc reset in %d secs", node_ptr->hostname.c_str(), reset_delay); mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, reset_delay ); @@ -472,11 +514,25 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) } else { + // log the acks + string nwk_ack = "" ; + if ( node_ptr->reboot_cmd_ack_pxeboot ) + nwk_ack.append(get_iface_name_str(PXEBOOT_INTERFACE)); + if ( node_ptr->reboot_cmd_ack_mgmnt ) + { + if ( !nwk_ack.empty() ) + nwk_ack.append(","); + nwk_ack.append(get_iface_name_str(MGMNT_INTERFACE)); + } + if ( node_ptr->reboot_cmd_ack_clstr ) + { + if ( !nwk_ack.empty() ) + nwk_ack.append(","); + nwk_ack.append(get_iface_name_str(CLSTR_INTERFACE)); + } + /* declare successful reboot */ - plog ("%s reboot request succeeded (%s %s)", - node_ptr->hostname.c_str(), - node_ptr->reboot_cmd_ack_mgmnt ? get_iface_name_str(MGMNT_INTERFACE) : "", - node_ptr->reboot_cmd_ack_clstr ? get_iface_name_str(CLSTR_INTERFACE) : ""); + plog ("%s reboot request succeeded (%s)", node_ptr->hostname.c_str(), nwk_ack.c_str()); if ( node_ptr->cmd.task == true ) { @@ -499,6 +555,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) * messages from the remote host during the reset delay window */ node_ptr->mtcAlive_mgmnt_count = 0 ; node_ptr->mtcAlive_clstr_count = 0 ; + node_ptr->mtcAlive_pxeboot_count = 0 ; wlog ("%s max reboot retries reached ; still not offline ; reset in %3d secs", node_ptr->hostname.c_str(), reset_delay); @@ -566,7 +623,8 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) * or the failure of just one (mgmnt or clstr) networks to mistakenly * cancel the reset. Prevent the cancel if * - the node uptime is high and - * - not receiving mtcAlive both mgmnt and clstr networks. + * - not receiving mtcAlive on any mtcAlive networks ; + * mgmnt, clstr and pxeboot networks. * * Note: online does not mean both networks are receiving mtcAlive, * Currently just mgmnt needs to see mtcAlive for the node to @@ -578,15 +636,17 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) if (( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) && ( node_ptr->uptime < MTC_MINS_5 ) && ( node_ptr->mtcAlive_mgmnt_count ) && - ( node_ptr->mtcAlive_clstr_count )) + ( node_ptr->mtcAlive_clstr_count ) && + ( node_ptr->mtcAlive_pxeboot_count )) { mtcTimer_reset ( node_ptr->mtcCmd_timer ); - ilog ("%s cancelling reset ; host is online ; delay:%d uptime:%d mtcAlive:%d:%d ", + ilog ("%s cancelling reset ; host is online ; delay:%d uptime:%d mtcAlive:%d:%d:%d ", node_ptr->hostname.c_str(), bmc_reset_delay, node_ptr->uptime, node_ptr->mtcAlive_mgmnt_count, - node_ptr->mtcAlive_clstr_count); + node_ptr->mtcAlive_clstr_count, + node_ptr->mtcAlive_pxeboot_count); node_ptr->mtcCmd_work_fifo_ptr->status = PASS ; node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ; } @@ -602,13 +662,14 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) #define BMC_RESET_PENDING_LOG_THROTTLE (1000) wlog_throttled ( node_ptr->bmc_reset_pending_log_throttle, BMC_RESET_PENDING_LOG_THROTTLE, - "%s reset in %3ld secs ; delay:%d uptime:%d mtcAlive:%d:%d", + "%s reset in %3ld secs ; delay:%d uptime:%d mtcAlive:%d:%d:%d", node_ptr->hostname.c_str(), reset_delay-diff_time.secs, bmc_reset_delay, node_ptr->uptime, node_ptr->mtcAlive_mgmnt_count, - node_ptr->mtcAlive_clstr_count); + node_ptr->mtcAlive_clstr_count, + node_ptr->mtcAlive_pxeboot_count); } } break ; /* waiting path */ @@ -813,6 +874,8 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) /* update the timer hostname */ node_ptr->mtcTimer.hostname = name ; node_ptr->mtcAlive_timer.hostname = name ; + node_ptr->online_timer.hostname = name ; + node_ptr->offline_timer.hostname = name ; node_ptr->mtcSwact_timer.hostname = name ; node_ptr->mtcCmd_timer.hostname = name ; node_ptr->oosTestTimer.hostname = name ; diff --git a/mtce/src/maintenance/mtcCompMsg.cpp b/mtce/src/maintenance/mtcCompMsg.cpp index 446f1f53..3ec8b93b 100755 --- a/mtce/src/maintenance/mtcCompMsg.cpp +++ b/mtce/src/maintenance/mtcCompMsg.cpp @@ -50,12 +50,6 @@ extern "C" #include "amon.h" /* for ... active monitoring utilities */ } -extern char *program_invocation_short_name; - -int mtcAlive_mgmnt_sequence = 0 ; -int mtcAlive_clstr_sequence = 0 ; - - /************************************************************************ * * Name : stop pmon @@ -107,18 +101,18 @@ void stop_pmon( void ) /* Receive and process commands from controller maintenance */ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { - int bytes = 0 ; mtc_message_type msg ; int rc = FAIL ; + ssize_t bytes_received = 0 ; ctrl_type * ctrl_ptr = get_ctrl_ptr() ; bool log_ack = true ; + const char * iface_name_ptr = get_interface_name_str(interface) ; if ( interface == CLSTR_INTERFACE ) { if ( ! ctrl_ptr->clstr_iface_provisioned ) { - wlog ("cannot receive from unprovisioned %s interface\n", - get_iface_name_str(interface) ); + wlog ("cannot receive from unprovisioned %s interface", iface_name_ptr); return (rc); } } @@ -126,17 +120,57 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) /* clean the rx/tx buffer */ memset ((void*)&msg,0,sizeof(mtc_message_type)); string hostaddr = "" ; - if ( interface == MGMNT_INTERFACE ) + if ( interface == PXEBOOT_INTERFACE ) { - if (( sock_ptr->mtc_client_rx_socket ) && - ( sock_ptr->mtc_client_rx_socket->sock_ok() == true )) + if ( sock_ptr->pxeboot_rx_socket ) { - rc = sock_ptr->mtc_client_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); - hostaddr = sock_ptr->mtc_client_rx_socket->get_src_str(); + struct sockaddr_in client_addr; + socklen_t addr_len = sizeof(client_addr); + + // Receive data + bytes_received = recvfrom(sock_ptr->pxeboot_rx_socket, + (char*)&msg.hdr[0], + sizeof(mtc_message_type), 0, + (struct sockaddr*)&client_addr, &addr_len); + // Terminate the buffer + msg.hdr[bytes_received] = '\0' ; + + // Log with debug_msg lane 2 + if ( daemon_get_cfg_ptr()->debug_msg&2 ) + { + // log the message ; both header and buffer + string _buf = msg.buf[0] ? msg.buf : "empty"; + ilog ("Received %ld bytes (%s) from %s:%d - %s:%s", + bytes_received, + iface_name_ptr, + inet_ntoa(client_addr.sin_addr), + ntohs(client_addr.sin_port), + &msg.hdr[0], _buf.c_str()); + // dump_memory (&msg.hdr[0], 16, bytes_received); + } + hostaddr = inet_ntoa(client_addr.sin_addr); + } + } + else if ( interface == MGMNT_INTERFACE ) + { + if (( sock_ptr->mtc_client_mgmt_rx_socket ) && + ( sock_ptr->mtc_client_mgmt_rx_socket->sock_ok() == true )) + { + rc = bytes_received = sock_ptr->mtc_client_mgmt_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); + hostaddr = sock_ptr->mtc_client_mgmt_rx_socket->get_src_str(); + + // Log with debug_msg lane 2 + if ( daemon_get_cfg_ptr()->debug_msg&2 ) + { + // Log the message ; both header and buffer + string _buf = msg.buf[0] ? msg.buf : "empty"; + ilog ("Received %ld bytes (%s) from %s - %s:%s", bytes_received, + iface_name_ptr, hostaddr.c_str(), &msg.hdr[0], _buf.c_str()); + } } else { - elog ("cannot read from null or failed 'mtc_client_rx_socket'\n"); + elog ("cannot read from null or failed 'mtc_client_mgmt_rx_socket'\n"); return (FAIL_TO_RECEIVE); } } @@ -145,8 +179,18 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) if (( sock_ptr->mtc_client_clstr_rx_socket ) && ( sock_ptr->mtc_client_clstr_rx_socket->sock_ok() == true )) { - rc = sock_ptr->mtc_client_clstr_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); + rc = bytes_received = sock_ptr->mtc_client_clstr_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); hostaddr = sock_ptr->mtc_client_clstr_rx_socket->get_src_str(); + + // Log with debug_msg lane 2 + if ( daemon_get_cfg_ptr()->debug_msg&2 ) + { + // Log the message ; both header and buffer + string _buf = msg.buf[0] ? msg.buf : "empty"; + ilog ("Received %ld bytes (%s) from %s: %s:%s", + bytes_received, iface_name_ptr, + hostaddr.c_str(), &msg.hdr[0], _buf.c_str()); + } } else { @@ -174,11 +218,9 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { self = true ; } - string interface_name = get_iface_name_str (interface) ; + string interface_name = get_interface_name_str (interface) ; string command_name = get_mtcNodeCommand_str(msg.cmd) ; - print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), false ); - /* Message version greater than zero have the hosts management * mac address appended to the header string */ if (( !self ) && ( msg.ver >= MTC_CMD_FEATURE_VER__MACADDR_IN_CMD )) @@ -186,18 +228,18 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) /* the minus 1 is to back up from the null char that is accounted for in the hearder size */ if ( strncmp ( &msg.hdr[MSG_HEADER_SIZE-1], ctrl_ptr->macaddr.data(), MSG_HEADER_SIZE )) { - wlog ("%s command not for this host (exp:%s det:%s) ; ignoring ...\n", + wlog ("%s req command from %s network not for this host (exp:%s det:%s) ; ignoring ...\n", command_name.c_str(), + iface_name_ptr, ctrl_ptr->macaddr.c_str(), &msg.hdr[MSG_HEADER_SIZE-1]); - print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), true ); + print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, iface_name_ptr, true ); return (FAIL_INVALID_DATA); } } - print_mtc_message ( hostaddr, MTC_CMD_RX, msg, get_iface_name_str(interface), rc ); - if ( rc ) - return rc; + if ( ! hostaddr.empty() ) + print_mtc_message ( hostaddr, MTC_CMD_RX, msg, iface_name_ptr, false ); /* Check for response messages */ if ( strstr ( &msg.hdr[0], get_cmd_req_msg_header() ) ) @@ -205,20 +247,25 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) rc = PASS ; if ( msg.cmd == MTC_REQ_MTCALIVE ) { - mlog1 ("mtcAlive request received (%s network)\n", interface_name.c_str()); + ilog ("mtcAlive request received from %s network", iface_name_ptr); + if ( interface == PXEBOOT_INTERFACE ) + { + alog2 ("pxeboot mtcAlive buffer: %s", &msg.buf[0]); + load_pxebootInfo_msg(msg); + } return ( send_mtcAlive_msg ( sock_ptr, get_who_i_am(), interface )); } else if ( msg.cmd == MTC_MSG_INFO ) { - mlog1("mtc 'info' message received (%s network)\n", interface_name.c_str()); + alog2 ("mtc 'info' message received from %s network", iface_name_ptr); load_mtcInfo_msg ( msg ); return ( PASS ); /* no ack for this message */ } else if ( msg.cmd == MTC_CMD_SYNC ) { - ilog ("mtc '%s' message received (%s network)\n", + ilog ("mtc '%s' message received from %s network", get_mtcNodeCommand_str(msg.cmd), - interface_name.c_str()); + iface_name_ptr); ilog ("Sync Start"); sync (); @@ -233,7 +280,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) /* Only recreate the file if its not already present */ if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == false ) { - ilog ("%s locked (%s)", get_hostname().c_str(), interface_name.c_str() ); + ilog ("%s locked (%s)", get_hostname().c_str(), iface_name_ptr); daemon_log ( NODE_LOCKED_FILE, ADMIN_LOCKED_STR); } @@ -254,7 +301,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else if ( msg.cmd == MTC_MSG_UNLOCKED ) { - ilog ("%s unlocked (%s)", get_hostname().c_str(), interface_name.c_str() ); + ilog ("%s unlocked received from %s network", get_hostname().c_str(), iface_name_ptr); /* Only remove the file if it is present */ if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == true ) @@ -264,7 +311,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) if ( daemon_is_file_present ( NODE_LOCKED_FILE_BACKUP ) == true ) { daemon_remove_file ( NODE_LOCKED_FILE_BACKUP ); - ilog ("cleared node locked backup flag (%s)", interface_name.c_str() ); + ilog ("cleared node locked backup flag (%s)", iface_name_ptr); } } else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED_FAILED ) @@ -297,7 +344,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else { - ilog ("GoEnabled request posted (%s)\n", interface_name.c_str()); + ilog ("GoEnabled request posted (%s)", iface_name_ptr); ctrl_ptr->posted_script_set.push_back ( GOENABLED_MAIN_SCRIPTS ); ctrl_ptr->posted_script_set.unique(); } @@ -324,7 +371,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else { - ilog ("GoEnabled Subf request posted (%s)\n", interface_name.c_str()); + ilog ("GoEnabled Subf request posted (%s)", iface_name_ptr); /* Cleanup test result flag files */ if ( daemon_is_file_present ( GOENABLED_SUBF_PASS) ) @@ -345,15 +392,15 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else if ( msg.cmd == MTC_CMD_REBOOT ) { - ilog ("%s command received (%s)", + ilog ("%s command received from %s network", command_name.c_str(), - interface_name.c_str()); + iface_name_ptr); } else if ( msg.cmd == MTC_CMD_LAZY_REBOOT ) { - ilog ("%s command received (%s) ; delay:%d seconds\n", + ilog ("%s command received from %s network ; delay:%d seconds", command_name.c_str(), - interface_name.c_str(), + iface_name_ptr, msg.num ? msg.parm[0] : 0 ); } else if ( is_host_services_cmd ( msg.cmd ) == true ) @@ -378,9 +425,9 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) ctrl_ptr->posted_script_set.push_back ( HOSTSERVICES_SCRIPTS ); ctrl_ptr->posted_script_set.unique (); - ilog ("%s request posted (%s)\n", + ilog ("%s request posted from %s network", command_name.c_str(), - interface_name.c_str()); + iface_name_ptr); ctrl_ptr->hostservices.posted = msg.cmd ; ctrl_ptr->hostservices.monitor = MTC_CMD_NONE ; @@ -391,17 +438,17 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) if ( ( daemon_is_file_present ( MTC_CMD_FIT__START_SVCS ))) { rc = FAIL_FIT ; - wlog ("%s Start Services - fit failure (%s)\n", + wlog ("%s Start Services - fit failure (%s)", command_name.c_str(), - interface_name.c_str() ); + iface_name_ptr); } /* Fault insertion - fail to send host services ACK */ if ( ( daemon_is_file_present ( MTC_CMD_FIT__NO_HS_ACK ))) { - wlog ("%s Start Services - fit no ACK (%s)\n", + wlog ("%s Start Services - fit no ACK (%s)", command_name.c_str(), - interface_name.c_str() ); + iface_name_ptr); return (PASS); } @@ -421,20 +468,21 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else if ( msg.cmd == MTC_CMD_WIPEDISK ) { - ilog ("Reload command received (%s)\n", interface_name.c_str()); + ilog ("Reload command received from %s network", iface_name_ptr); } else if ( msg.cmd == MTC_CMD_RESET ) { - ilog ("Reset command received (%s)\n", interface_name.c_str()); + ilog ("Reset command received from %s network", iface_name_ptr); } else if ( msg.cmd == MTC_CMD_LOOPBACK ) { - ilog ("Loopback command received (%s)\n", interface_name.c_str()); + ilog ("Loopback command received from %s network", iface_name_ptr); } else { rc = FAIL_BAD_CASE ; - elog ( "Unsupported maintenance command (%d)\n", msg.cmd ); + wlog ( "Unsupported maintenance command (%d) with %ld bytes received from %s network", + msg.cmd, bytes_received, iface_name_ptr ); } snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header()); @@ -443,12 +491,12 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { if ( msg.cmd == MTC_MSG_MAIN_GOENABLED ) { - ilog ("main function goEnabled results acknowledged (%s)\n", interface_name.c_str()); + ilog ("main function goEnabled results acknowledged from %s network", iface_name_ptr); return (PASS); } else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED ) { - ilog ("sub-function goEnabled results acknowledged (%s)\n", interface_name.c_str()); + ilog ("sub-function goEnabled results acknowledged from %s network", iface_name_ptr); return (PASS); } else @@ -460,14 +508,25 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) else if ( strstr ( &msg.hdr[0], get_worker_msg_header()) ) { - elog ("unsupported worker message\n"); - print_mtc_message ( &msg ); + if ( msg.cmd == MTC_MSG_MTCALIVE ) + { + wlog ("unexpected mtcAlive message from %s from %s network", + hostaddr.c_str(), iface_name_ptr); + } + else + { + wlog ("unsupported worker message from %s from %s network", + hostaddr.c_str(), iface_name_ptr); + } + + wlog ("WARNING: mtcClient is receiving mtcAgent bound mtcAlive messages"); + // dump_memory (&msg, 16, bytes_received); return PASS ; } else { - elog ("unsupported message\n"); - print_mtc_message ( &msg ); + wlog ("unsupported message from %s from %s network", hostaddr.c_str(), iface_name_ptr); + // dump_memory (&msg, 16, bytes_received); return PASS ; } @@ -481,73 +540,109 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { rc = PASS ; - bytes = sizeof(mtc_message_type)-BUF_SIZE; + int bytes = sizeof(mtc_message_type)-BUF_SIZE; + + if ( interface == PXEBOOT_INTERFACE ) + { + int flags = 0 ; // no tx flags + if ( sock_ptr->pxeboot_tx_socket <= 0 ) + { + elog("pxeboot_tx_socket not ok (%d)", sock_ptr->pxeboot_tx_socket); + return (FAIL_SOCKET_SENDTO); + } + + if ( log_ack ) + { + ilog ("sending %s ack to %s over %s network", + command_name.c_str(), + hostaddr.c_str(), + iface_name_ptr); + } + + struct sockaddr_in hostAddr; + memset(&hostAddr, 0, sizeof(hostAddr)); + print_mtc_message ( hostaddr.data(), MTC_CMD_TX, msg, iface_name_ptr, false ); + hostAddr.sin_addr.s_addr = inet_addr(hostaddr.data()); + hostAddr.sin_family = AF_INET; + hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port); + + ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, &msg.hdr[0], bytes, flags, + (const struct sockaddr*)&hostAddr, sizeof(hostAddr)); + if (bytes_sent <= 0) + { + elog ("failed to send %s ack to %s:%d on %s network (rc:%ld) (%d:%m)", + command_name.c_str(), + hostaddr.c_str(), + hostAddr.sin_port, + iface_name_ptr, + bytes_sent, errno); + } + } /* send the message back either over the mgmnt or clstr interface */ - if ( interface == MGMNT_INTERFACE ) + else if ( interface == MGMNT_INTERFACE ) { - if (( sock_ptr->mtc_client_tx_socket ) && - ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + if (( sock_ptr->mtc_client_mgmt_tx_socket ) && + ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { - rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes); + rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes); if ( rc <= 0 ) { - elog ("%s reply send (mtc_client_tx_socket) failed (%s) (rc:%d)", + elog ("%s reply send (mtc_client_mgmt_tx_socket) failed (%s) (rc:%d)", command_name.c_str(), - interface_name.c_str(), rc); + iface_name_ptr, rc); } else if ( log_ack ) { ilog ("%s reply send (%s)", command_name.c_str(), - interface_name.c_str()); + iface_name_ptr); } } else { - elog ("cannot send to null or failed socket (%s network)\n", - interface_name.c_str() ); + elog ("cannot send to null or failed socket (%s)", iface_name_ptr); } } else if ( interface == CLSTR_INTERFACE ) { - if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) && - ( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true )) + if (( sock_ptr->mtc_client_clstr_tx_socket_c0 ) && + ( sock_ptr->mtc_client_clstr_tx_socket_c0->sock_ok() == true )) { - rc = sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes); + rc = sock_ptr->mtc_client_clstr_tx_socket_c0->write((char*)&msg.hdr[0], bytes); if ( rc <= 0 ) { - elog ("%s reply send (mtc_client_tx_socket_c0_clstr) failed (%s) (rc:%d)", + elog ("%s reply send (mtc_client_clstr_tx_socket_c0) failed (%s) (rc:%d)", command_name.c_str(), - interface_name.c_str(), rc); + iface_name_ptr, rc); } else if ( log_ack ) { ilog ("%s reply send (%s)", command_name.c_str(), - interface_name.c_str()); + iface_name_ptr); } } - if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) && - ( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true )) + if (( sock_ptr->mtc_client_clstr_tx_socket_c1 ) && + ( sock_ptr->mtc_client_clstr_tx_socket_c1->sock_ok() == true )) { - rc = sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes); + rc = sock_ptr->mtc_client_clstr_tx_socket_c1->write((char*)&msg.hdr[0], bytes); if ( rc <= 0 ) { - elog ("%s reply send (mtc_client_tx_socket_c1_clstr) failed (%s) (rc:%d)", + elog ("%s reply send (mtc_client_clstr_tx_socket_c1) failed (%s) (rc:%d)", command_name.c_str(), - interface_name.c_str(), rc); + iface_name_ptr, rc); } else if ( log_ack ) { ilog ("%s reply send (%s)", command_name.c_str(), - interface_name.c_str()); + iface_name_ptr); } } } - print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, interface_name.data(), (rc != bytes) ); + print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, iface_name_ptr, (rc != bytes) ); /* get the shutdown delay config alue */ int delay = daemon_get_cfg_ptr()->failsafe_shutdown_delay ; @@ -560,11 +655,11 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) ) { - ilog ("Reboot - fit bypass (%s)\n", interface_name.c_str()); + ilog ("Reboot - fit bypass (%s)", iface_name_ptr); return (PASS); } stop_pmon(); - ilog ("Reboot (%s)\n", interface_name.c_str()); + ilog ("Reboot (%s)", iface_name_ptr); daemon_log ( NODE_RESET_FILE, "reboot command" ); fork_sysreq_reboot ( delay ); rc = system("/usr/bin/systemctl reboot"); @@ -581,7 +676,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { do { - ilog ("Lazy Reboot (%s) ; rebooting in %d seconds\n", interface_name.c_str(), msg.num ? msg.parm[0] : 1 ); + ilog ("Lazy Reboot (%s) ; rebooting in %d seconds", iface_name_ptr, msg.num ? msg.parm[0] : 1 ); sleep (1); if ( msg.parm[0] % 5 ) { @@ -592,7 +687,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else { - ilog ("Lazy Reboot (%s) ; now\n", interface_name.c_str() ); + ilog ("Lazy Reboot (%s) ; now", iface_name_ptr); } fork_sysreq_reboot ( delay ); @@ -602,11 +697,11 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { if ( daemon_is_file_present ( MTC_CMD_FIT__NO_RESET ) ) { - ilog ("Reset - fit bypass (%s)\n", interface_name.c_str()); + ilog ("Reset - fit bypass (%s)", iface_name_ptr); return (PASS); } stop_pmon(); - ilog ("Reset 'reboot -f' (%s)\n", interface_name.c_str()); + ilog ("Reset 'reboot -f' (%s)", iface_name_ptr); daemon_log ( NODE_RESET_FILE, "reset command" ); fork_sysreq_reboot ( delay/2 ); rc = system("/usr/bin/systemctl reboot --force"); @@ -617,7 +712,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) if ( daemon_is_file_present ( MTC_CMD_FIT__NO_WIPEDISK ) ) { - ilog ("Wipedisk - fit bypass (%s)\n", interface_name.c_str()); + ilog ("Wipedisk - fit bypass (%s)", iface_name_ptr); return (PASS); } /* We fork a reboot as a fail safe. @@ -636,7 +731,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else if( 0 == parent ) /* we're the child */ { - ilog ("Disk wipe in progress (%s)\n", interface_name.c_str()); + ilog ("Disk wipe in progress (%s)", iface_name_ptr); daemon_log ( NODE_RESET_FILE, "wipedisk command" ); rc = system("/usr/local/bin/wipedisk --force"); ilog ("Disk wipe complete - Forcing Reboot ...\n"); @@ -727,35 +822,35 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char event.cmd = cmd ; - if (( sock_ptr->mtc_client_tx_socket ) && - ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + if (( sock_ptr->mtc_client_mgmt_tx_socket ) && + ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { if ( bytes == 0 ) { slog ("message send failed ; message size=0 for cmd:0x%x is 0\n", event.cmd ); rc = FAIL_NO_DATA ; } - else if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes ) + else if ((rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes ) { elog ("message send failed. (%d) (%d:%s) \n", rc, errno, strerror(errno)); elog ("message: %d bytes to <%s:%d>\n", bytes, - sock_ptr->mtc_client_tx_socket->get_dst_str(), - sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort()); + sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(), + sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort()); rc = FAIL_TO_TRANSMIT ; } else { mlog2 ("Transmit: %x bytes to %s:%d\n", bytes, - sock_ptr->mtc_client_tx_socket->get_dst_str(), - sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort()); - print_mtc_message ( get_hostname(), MTC_CMD_TX, event, get_iface_name_str(MGMNT_INTERFACE), false ); + sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(), + sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort()); + print_mtc_message ( get_hostname(), MTC_CMD_TX, event, get_interface_name_str(MGMNT_INTERFACE), false ); rc = PASS ; } } else { - elog ("cannot send to null or failed socket (%s network)\n", - get_iface_name_str (MGMNT_INTERFACE) ); + elog ("cannot send to null or failed socket (%s)", + get_interface_name_str (MGMNT_INTERFACE) ); rc = FAIL_SOCKET_SENDTO ; } return rc ; @@ -765,10 +860,23 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char * * Name : create_mtcAlive_msg * - * Description: Creates a common mtcAlive message + * Description: Creates a common mtcAlive message that consists of the + * - out-of-band health/status flags + * - host uptime + * - json string of some of the host's info + * { + * "hostname":"controller-0", + * "personality":"controller,worker", + * "pxeboot_ip":"169.254.202.2", + * "mgmt_ip":"192.168.204.2", + * "cluster_host_ip":"192.168.206.2", + * "mgmt_mac":"08:00:27:9f:ef:57", + * "interface":"Mgmnt", + * "sequence":145 + * } * ****************************************************************************/ -int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int interface ) +int create_mtcAlive_msg ( ctrl_type * ctrl_ptr, mtc_message_type & msg, int cmd, string identity, int interface ) { static int _sm_unhealthy_debounce_counter [MAX_IFACES] = {0,0} ; @@ -843,7 +951,7 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int if ( ++_sm_unhealthy_debounce_counter[interface] > MAX_SM_UNHEALTHY_DEBOUNCE ) { wlog("SM Unhealthy flag set (%s)", - get_iface_name_str(interface)); + get_interface_name_str(interface)); msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SM_UNHEALTHY ; } else @@ -851,7 +959,7 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int wlog("SM Unhealthy debounce %d of %d (%s)", _sm_unhealthy_debounce_counter[interface], MAX_SM_UNHEALTHY_DEBOUNCE, - get_iface_name_str(interface)); + get_interface_name_str(interface)); } } else @@ -859,19 +967,32 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int _sm_unhealthy_debounce_counter[interface] = 0 ; } - /* add the interface and sequence number to the mtcAlice message */ + /* add the interface and sequence number to the mtcAlive message */ identity.append ( ",\"interface\":\""); - identity.append (get_iface_name_str(interface)); + identity.append (get_interface_name_str(interface)); identity.append("\",\"sequence\":"); - if ( interface == CLSTR_INTERFACE ) + if ( interface == PXEBOOT_INTERFACE ) { - identity.append(itos(mtcAlive_clstr_sequence++)); + ctrl_ptr->mtcAlive_pxeboot_sequence++ ; + identity.append(itos(ctrl_ptr->mtcAlive_pxeboot_sequence)); + msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_pxeboot_sequence ; + } + else if ( interface == MGMNT_INTERFACE ) + { + ctrl_ptr->mtcAlive_mgmnt_sequence++ ; + identity.append(itos(ctrl_ptr->mtcAlive_mgmnt_sequence)); + msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_mgmnt_sequence ; + } + else if ( interface == CLSTR_INTERFACE ) + { + ctrl_ptr->mtcAlive_clstr_sequence++ ; + identity.append(itos(ctrl_ptr->mtcAlive_clstr_sequence)); + msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_clstr_sequence ; } else - { - identity.append(itos(mtcAlive_mgmnt_sequence++)); - } + identity.append(itos(0)); + identity.append("}"); memcpy ( &msg.buf[0], identity.c_str(), identity.size() ); @@ -896,40 +1017,40 @@ int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity ) { int interface = MGMNT_INTERFACE ; mtc_message_type msg ; - int bytes = create_mtcAlive_msg ( msg, cmd, identity, interface ); - if (( sock_ptr->mtc_client_tx_socket ) && - ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + int bytes = create_mtcAlive_msg ( get_ctrl_ptr(), msg, cmd, identity, interface ); + if (( sock_ptr->mtc_client_mgmt_tx_socket ) && + ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { /* Send back to requester - TODO: consider sending back to both as multicast */ - if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes)) != bytes ) + if ((rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes)) != bytes ) { if ( rc == -1 ) { wlog_throttled (send_mtc_msg_failed, 100 , - "failed to send <%s:%d> (%d:%m)\n", - sock_ptr->mtc_client_tx_socket->get_dst_str(), - sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort(), errno ); + "failed to send <%s:%d> (%d:%m)", + sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(), + sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort(), errno ); } else { wlog_throttled ( send_mtc_msg_failed, 100 , "sent only %d of %d bytes to <%s:%d>\n", rc, bytes, - sock_ptr->mtc_client_tx_socket->get_dst_str(), - sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort()); + sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(), + sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort()); } } else { send_mtc_msg_failed = 0 ; - print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false ); + print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_interface_name_str(interface), false ); rc = PASS ; } } else { - elog ("cannot send to null or failed socket (%s network)\n", - get_iface_name_str (MGMNT_INTERFACE) ); + elog ("cannot send to null or failed socket (%s)", + get_interface_name_str (MGMNT_INTERFACE) ); } } else @@ -943,57 +1064,134 @@ int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity ) int send_mtcAlive_msg_failed = 0 ; int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interface ) { + int flags = 0 ; // no tx flags + + /* get a pointer to the process control structure */ + ctrl_type * ctrl_ptr = get_ctrl_ptr() ; + + if (( interface == PXEBOOT_INTERFACE ) && + ( ctrl_ptr->pxeboot_iface_provisioned == false )) + return (PASS) ; + if (( interface == CLSTR_INTERFACE ) && - ( get_ctrl_ptr()->clstr_iface_provisioned != true )) + ( ctrl_ptr->clstr_iface_provisioned != true )) { - dlog2 ("cannot send to unprovisioned %s interface\n", - get_iface_name_str(interface) ); + dlog2 ("cannot send to unprovisioned %s interface", + get_interface_name_str(interface) ); return (FAIL); } mtc_message_type msg ; - int bytes = create_mtcAlive_msg ( msg, MTC_MSG_MTCALIVE, identity, interface ); + int bytes = create_mtcAlive_msg ( ctrl_ptr, msg, MTC_MSG_MTCALIVE, identity, interface ); - if ( interface == MGMNT_INTERFACE ) + if ( interface == PXEBOOT_INTERFACE ) + { + /* Send to controller-0 pxeboot address */ + if ( sock_ptr->pxeboot_tx_socket <= 0 ) + { + elog("pxeboot_tx_socket not ok (%d)", sock_ptr->pxeboot_tx_socket); + return (FAIL_SOCKET_SENDTO); + } + + // TODO: Consider adding controllers info to ctrl struct + string controllers[CONTROLLERS] = {CONTROLLER_0, CONTROLLER_1}; + alog1 ("sending mtcAlive to both controllers"); + for (int c = 0 ; c < CONTROLLERS ; c++) + { + string pxeboot_addr_cx ; + struct sockaddr_in hostAddr; + memset(&hostAddr, 0, sizeof(hostAddr)); + + if (controllers[c] == CONTROLLER_1) + { + if ( ctrl_ptr->system_type != SYSTEM_TYPE__AIO__SIMPLEX ) + pxeboot_addr_cx = ctrl_ptr->pxeboot_addr_c1; + else + continue; // skip controller-1 for SX systems + } + else + pxeboot_addr_cx = ctrl_ptr->pxeboot_addr_c0; + + if ( pxeboot_addr_cx.empty() ) + { + if ( ctrl_ptr->pxeboot_address_learned[c] == true ) + { + ctrl_ptr->pxeboot_address_learned[c] = false ; + wlog ( "%s pxeboot address not learned ; unable to send pxeboot mtcAlive", + controllers[c].c_str() ); + } + continue ; + } + + if ( ctrl_ptr->pxeboot_address_learned[c] == false ) + { + // Only log this if the not learned log was produced. + // Which is most likely case on process startup. + ilog ("sending pxeboot network mtcAlive msg on port %d to %s at %s", + sock_ptr->mtc_tx_pxeboot_port, + controllers[c].c_str(), + pxeboot_addr_cx.c_str()); + ctrl_ptr->pxeboot_address_learned[c] = true ; + } + + print_mtc_message ( controllers[c], MTC_CMD_TX, msg, get_interface_name_str(PXEBOOT_INTERFACE), false ); + hostAddr.sin_addr.s_addr = inet_addr(pxeboot_addr_cx.data()); + hostAddr.sin_family = AF_INET; + hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port); // 2102 + alog1 ("sending pxeboot network mtcAlive msg to %s", controllers[c].c_str() ); + ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, &msg.hdr[0], bytes, flags, + (const struct sockaddr*)&hostAddr, sizeof(hostAddr)); + if (bytes_sent <= 0) + { + elog ("failed to send mtcAlive to %s using %s:%d (pxeboot) (rc:%ld) (%d:%m)", + controllers[c].c_str(), pxeboot_addr_cx.c_str(), hostAddr.sin_port, bytes_sent, errno); + } + } // for loop + } + + else if ( interface == MGMNT_INTERFACE ) { /* Send to controller floating address */ - if (( sock_ptr->mtc_client_tx_socket ) && - ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + if (( sock_ptr->mtc_client_mgmt_tx_socket ) && + ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { - print_mtc_message ( CONTROLLER, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); - sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes) ; + alog1 ("sending mgmt network mtcAlive msg to %s", CONTROLLER); + print_mtc_message ( CONTROLLER, MTC_CMD_TX, msg, get_interface_name_str(MGMNT_INTERFACE), false ); + sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes) ; } else { - elog("mtc_client_tx_socket not ok"); + elog("mtc_client_mgmt_tx_socket not ok"); } } else if ( interface == CLSTR_INTERFACE ) { /* Send to controller-0 cluster address */ - if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) && - ( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true )) + if (( sock_ptr->mtc_client_clstr_tx_socket_c0 ) && + ( sock_ptr->mtc_client_clstr_tx_socket_c0->sock_ok() == true )) { - print_mtc_message ( CONTROLLER_0, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false ); - sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes ) ; + alog1 ("sending clstr network mtcAlive msg to %s", CONTROLLER_0); + print_mtc_message ( CONTROLLER_0, MTC_CMD_TX, msg, get_interface_name_str(CLSTR_INTERFACE), false ); + sock_ptr->mtc_client_clstr_tx_socket_c0->write((char*)&msg.hdr[0], bytes ) ; } else { - elog("mtc_client_tx_socket_c0_clstr not ok"); + elog("mtc_client_clstr_tx_socket_c0 not ok"); } /* Send to controller-1 cluster address */ if ( get_ctrl_ptr()->system_type != SYSTEM_TYPE__AIO__SIMPLEX ) { - if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) && - ( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true )) + if (( sock_ptr->mtc_client_clstr_tx_socket_c1 ) && + ( sock_ptr->mtc_client_clstr_tx_socket_c1->sock_ok() == true )) { - print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false ); - sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes ) ; + alog1 ("sending clstr mtcAlive msg to %s", CONTROLLER_1); + print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_interface_name_str(CLSTR_INTERFACE), false ); + sock_ptr->mtc_client_clstr_tx_socket_c1->write((char*)&msg.hdr[0], bytes ) ; } else { - elog("mtc_client_tx_socket_c1_clstr not ok"); + elog("mtc_client_clstr_tx_socket_c1 not ok"); } } } @@ -1040,11 +1238,11 @@ int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, s int rc = FAIL ; /* Send to controller floating address */ - if (( sock_ptr->mtc_client_tx_socket ) && - ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + if (( sock_ptr->mtc_client_mgmt_tx_socket ) && + ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { - print_mtc_message ( hostname, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); - rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes, address.data(), port ) ; + print_mtc_message ( hostname, MTC_CMD_TX, msg, get_interface_name_str(MGMNT_INTERFACE), false ); + rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes, address.data(), port ) ; if ( 0 >= rc ) { elog("failed to send command to mtcClient (%d) (%d:%s)", rc, errno, strerror(errno)); @@ -1055,7 +1253,7 @@ int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, s } else { - elog("mtc_client_tx_socket not ok"); + elog("mtc_client_mgmt_tx_socket not ok"); rc = FAIL_BAD_STATE ; } return (rc) ; diff --git a/mtce/src/maintenance/mtcCtrlMsg.cpp b/mtce/src/maintenance/mtcCtrlMsg.cpp index 1b1ccb5c..96f8ef8b 100755 --- a/mtce/src/maintenance/mtcCtrlMsg.cpp +++ b/mtce/src/maintenance/mtcCtrlMsg.cpp @@ -125,13 +125,53 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, mtc_message_type msg ; int bytes = 0 ; int rc = PASS ; - if ( iface == CLSTR_INTERFACE ) + string hostaddr = "" ; + string hostname = "" ; + const char * iface_name_ptr = get_iface_name_str(iface); + + if ( iface == PXEBOOT_INTERFACE ) + { + + struct sockaddr_in client_addr; + socklen_t addr_len = sizeof(client_addr); + + // Receive data + bytes = recvfrom(sock_ptr->pxeboot_rx_socket, + (char*)&msg.hdr[0], + sizeof(mtc_message_type), 0, + (struct sockaddr*)&client_addr, &addr_len); + + // As a non-blocking socket this is normal to occur + // due to batch handling. + if ( bytes == -1 ) + return RETRY ; + + // Log with debug_msg lane 2 + if ( daemon_get_cfg_ptr()->debug_msg&2 ) + { + // log the message ; both header and buffer + string _buf = msg.buf[0] ? msg.buf : "empty"; + mlog3 ("Received %d bytes (%s) from %s:%d - cmd:%d:%s hdr:%s buf:%s", + bytes, + iface_name_ptr, + inet_ntoa(client_addr.sin_addr), + ntohs(client_addr.sin_port), + msg.cmd, + get_mtcNodeCommand_str(msg.cmd), + &msg.hdr[0], _buf.c_str()); + } + hostaddr = inet_ntoa(client_addr.sin_addr); + hostname = obj_ptr->get_hostname ( hostaddr ) ; // based on pxeboot ip + } + else if ( iface == CLSTR_INTERFACE ) { if ( ( obj_ptr ) && ( obj_ptr->clstr_network_provisioned == true ) && ( sock_ptr->mtc_agent_clstr_rx_socket )) { + mlog3 ("clstr network 'recvfrom' start"); bytes = sock_ptr->mtc_agent_clstr_rx_socket->read((char*)&msg, sizeof(msg)); + mlog3 ("clstr network 'recvfrom' stop"); } else { @@ -140,7 +180,9 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, } else { - bytes = sock_ptr->mtc_agent_rx_socket->read((char*)&msg, sizeof(msg)); + mlog3 ("mgmt network 'recvfrom' start"); + bytes = sock_ptr->mtc_agent_mgmt_rx_socket->read((char*)&msg, sizeof(msg)); + mlog3 ("mgmt network 'recvfrom' stop"); } msg.buf[BUF_SIZE-1] = '\0'; @@ -160,17 +202,14 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, zero_unused_msg_buf (msg, bytes); - /* get the sender's hostname */ - string hostaddr = "" ; - string hostname = "" ; if ( iface == CLSTR_INTERFACE ) { hostaddr = sock_ptr->mtc_agent_clstr_rx_socket->get_src_str(); hostname = obj_ptr->get_hostname ( hostaddr ) ; } - else + else if ( iface == MGMNT_INTERFACE ) { - hostaddr = sock_ptr->mtc_agent_rx_socket->get_src_str(); + hostaddr = sock_ptr->mtc_agent_mgmt_rx_socket->get_src_str(); hostname = obj_ptr->get_hostname ( hostaddr ) ; } @@ -181,17 +220,26 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, if (( msg.cmd == MTC_MSG_MTCALIVE ) && (( rc = jsonUtil_get_key_val ( &msg.buf[0], "hostname", hostname )) == PASS )) { - ilog ("%s learned from mtcAlive", hostname.c_str()); + string curr_hostaddr = obj_ptr->get_pxeboot_hostaddr ( hostname ); + if ( curr_hostaddr != hostaddr ) + { + ilog ("%s hostname learned from %s mtcAlive ; hostaddr:%s was:%s", + hostname.c_str(), + iface_name_ptr, + hostaddr.c_str(), + curr_hostaddr.c_str()); + obj_ptr->set_pxeboot_hostaddr ( hostname, hostaddr ); + } } else { wlog ("unknown hostname message ... dropping" ); /* make dlog */ - print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), true ); + print_mtc_message ( hostname, MTC_CMD_RX, msg, iface_name_ptr, true ); return (FAIL_GET_HOSTNAME); } } - print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false ); + print_mtc_message ( hostname, MTC_CMD_RX, msg, iface_name_ptr, false ); /* handle messages that are not mtc_message_type * but rather are simply a json string */ @@ -199,7 +247,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, { string service ; - mlog1 ("%s\n", &msg.hdr[0] ); + mlog3 ("%s\n", &msg.hdr[0] ); rc = jsonUtil_get_key_val(&msg.hdr[0],"service", service ); if ( rc == PASS ) @@ -256,7 +304,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, hostname.c_str(), get_mtcNodeCommand_str(msg.cmd), msg.parm[0], - get_iface_name_str(iface)); + iface_name_ptr); } else { @@ -264,7 +312,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, hostname.c_str(), get_mtcNodeCommand_str(msg.cmd), msg.parm[0], - get_iface_name_str(iface)); + iface_name_ptr); } } } @@ -309,16 +357,16 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, obj_ptr->set_uptime ( hostname , msg.parm[MTC_PARM_UPTIME_IDX], false ); obj_ptr->set_health ( hostname , msg.parm[MTC_PARM_HEALTH_IDX] ); obj_ptr->set_mtce_flags ( hostname , msg.parm[MTC_PARM_FLAGS_IDX], iface ); - obj_ptr->set_mtcAlive ( hostname, iface ); + obj_ptr->set_mtcAlive ( hostname , msg.parm[MTC_PARM_SEQ_IDX], iface); - mlog1("%s Uptime:%d Health:%d Flags:0x%x mtcAlive:%s (%s)\n", + mlog2("%s Uptime:%d Health:%d Flags:0x%x Seq:%5d mtcAlive:%s (%s)\n", hostname.c_str(), msg.parm[MTC_PARM_UPTIME_IDX], msg.parm[MTC_PARM_HEALTH_IDX], msg.parm[MTC_PARM_FLAGS_IDX], + msg.parm[MTC_PARM_SEQ_IDX], obj_ptr->get_mtcAlive_gate ( hostname ) ? "gated" : "open", - get_iface_name_str(iface)); - + iface_name_ptr); } else if ( msg.cmd == MTC_MSG_MAIN_GOENABLED ) { @@ -426,7 +474,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, if (( rc | rc1 ) != PASS ) { elog ("received invalid event [rc:%d:%d]", rc, rc1); - print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), true ); + print_mtc_message ( hostname, MTC_CMD_RX, msg, iface_name_ptr, true ); return ( FAIL_INVALID_OPERATION ); } switch ( msg.cmd ) @@ -613,6 +661,8 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict mtc_message_type mtc_cmd ; string data = "" ; mtc_socket_type * sock_ptr = get_sockPtr (); + nodeLinkClass * obj_ptr = get_mtcInv_ptr (); + const char * iface_name_ptr = get_iface_name_str(interface); memset (&mtc_cmd,0,sizeof(mtc_message_type)); /* Add the command version to he message */ @@ -627,7 +677,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict mtc_cmd.cmd = cmd ; mtc_cmd.num = 0 ; data = "{\"mtcInfo\":" + json_dict + "}"; - ilog("%s mtc info update", hostname.c_str()); + ilog("%s mtc info update: %s", hostname.c_str(), data.c_str()); rc = PASS ; break ; } @@ -636,6 +686,30 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s" , get_cmd_req_msg_header() ); mtc_cmd.cmd = cmd ; mtc_cmd.num = 0 ; + if ( interface == PXEBOOT_INTERFACE ) + { + if ( !obj_ptr->pxeboot_network_provisioned ) return PASS; + + /* There is no pxeboot floating IP so the mtcClient cannot use + * a resolvable name label like 'CONTROLLER' as it does for + * management nwk. + * Therefore, the mtcClient on each node needs to be told the + * controller's pxeboot ip addresses so it knows where to send. */ + obj_ptr->pxebootInfo_loader(); + data = "{\"pxebootInfo\":{" ; + data.append ("\"address\":\""); + data.append (obj_ptr->my_pxeboot_ip); + data.append ("\",\""); + data.append (CONTROLLER_0); + data.append ("\":\""); + data.append (obj_ptr->get_pxeboot_hostaddr(CONTROLLER_0)); + data.append ("\",\""); + data.append (CONTROLLER_1); + data.append ("\":\""); + data.append (obj_ptr->get_pxeboot_hostaddr(CONTROLLER_1)); + data.append ("\"}}"); + alog1("%s pxeboot info update:%s", hostname.c_str(), data.c_str()); + } rc = PASS ; break ; } @@ -668,7 +742,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict ilog ("%s sending '%s' request (%s)", hostname.c_str(), get_mtcNodeCommand_str(cmd), - get_iface_name_str(interface)); + iface_name_ptr); snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); mtc_cmd.cmd = cmd ; mtc_cmd.num = 0 ; @@ -688,7 +762,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict ilog ("%s sending '%s' request (%s)", hostname.c_str(), get_mtcNodeCommand_str(cmd), - get_iface_name_str(interface)); + iface_name_ptr); snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); mtc_cmd.cmd = cmd ; mtc_cmd.num = 0 ; @@ -713,7 +787,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict { mlog ("%s sending 'Locked' notification (%s)", hostname.c_str(), - get_iface_name_str(interface)); + iface_name_ptr); snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); mtc_cmd.cmd = cmd ; mtc_cmd.num = 0 ; @@ -738,7 +812,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict { ilog ("%s sending 'UnLocked' notification (%s)", hostname.c_str(), - get_iface_name_str(interface)); + iface_name_ptr); snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() ); mtc_cmd.cmd = cmd ; mtc_cmd.num = 0 ; @@ -754,20 +828,27 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict if ( rc == PASS ) { int bytes = 0; - - nodeLinkClass * obj_ptr = get_mtcInv_ptr (); + string iface_address ; /* add the mac address of the target card to the header * Note: the minus 1 is to overwrite the null */ snprintf ( &mtc_cmd.hdr[MSG_HEADER_SIZE-1], MSG_HEADER_SIZE, "%s", obj_ptr->get_hostIfaceMac(hostname, MGMNT_IFACE).data()); + /* Update the sender's address */ + if (interface == PXEBOOT_INTERFACE) + iface_address = obj_ptr->my_pxeboot_ip ; + else if (interface == CLSTR_INTERFACE) + iface_address = obj_ptr->my_clstr_ip ; + else + iface_address = obj_ptr->my_float_ip ; + /* If data is empty then at least add where the message came from */ if ( data.empty() ) { data = "{\"address\":\""; - data.append(obj_ptr->my_float_ip) ; + data.append(iface_address) ; data.append("\",\"interface\":\""); - data.append(get_iface_name_str(interface)); + data.append(iface_name_ptr); data.append("\"}"); } else @@ -778,7 +859,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict snprintf ( &mtc_cmd.buf[0], data.length()+1, "%s", data.data()); bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(data.length()+1))); - print_mtc_message ( hostname, MTC_CMD_TX, mtc_cmd, get_iface_name_str(interface), force ) ; + print_mtc_message ( hostname, MTC_CMD_TX, mtc_cmd, iface_name_ptr, force ) ; if (interface == MGMNT_INTERFACE) { @@ -791,13 +872,55 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict return (FAIL_HOSTADDR_LOOKUP); } - mlog ("%s sending %s request to %s (%s)", + mlog ("%s sending %s request to %s:%d (%s)", hostname.c_str(), get_mtcNodeCommand_str(cmd), hostaddr.c_str(), - get_iface_name_str(interface)); + sock_ptr->mtc_mgmnt_cmd_port, + iface_name_ptr); - rc = sock_ptr->mtc_agent_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_mgmnt_cmd_port); + rc = sock_ptr->mtc_agent_mgmt_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_mgmnt_cmd_port); + } + else if ((interface == PXEBOOT_INTERFACE) && (sock_ptr->pxeboot_tx_socket)) + { + string pxeboot_hostAddr = obj_ptr->get_pxeboot_hostaddr(hostname); + if (hostUtil_is_valid_ip_addr(pxeboot_hostAddr)) + { + // Set up sockaddr_in with the host pxeboot address and its rx port number + int flags = 0 ; + struct sockaddr_in hostAddr; + memset(&hostAddr, 0, sizeof(hostAddr)); + hostAddr.sin_family = AF_INET; // pxeboot network is IPV4 only + hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port); + hostAddr.sin_addr.s_addr = inet_addr(pxeboot_hostAddr.c_str()); + + mlog ("%s sending %s request to %s:%d (%s)", + hostname.c_str(), + get_mtcNodeCommand_str(cmd), + pxeboot_hostAddr.c_str(), + sock_ptr->mtc_rx_pxeboot_port, + iface_name_ptr); + + ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, + (char *)&mtc_cmd, + bytes, flags, + (const struct sockaddr*)&hostAddr, + sizeof(hostAddr)); + if (bytes_sent <= 0) + { + elog ("%s failed to send %d:%s command to %s:%d (%s) (%d:%m)", + hostname.c_str(), cmd, + get_mtcNodeCommand_str(cmd), + pxeboot_hostAddr.c_str(), + sock_ptr->mtc_rx_pxeboot_port, + iface_name_ptr, + errno); + } + } + else + { + return (FAIL_HOSTADDR_LOOKUP); + } } else if ((interface == CLSTR_INTERFACE) && ( obj_ptr->clstr_network_provisioned == true ) && @@ -805,17 +928,25 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict { string clstr_hostaddr = obj_ptr->get_clstr_hostaddr(hostname); if ( hostUtil_is_valid_ip_addr( clstr_hostaddr ) != true ) - return (FAIL_NO_CLSTR_PROV); + return (FAIL_HOSTADDR_LOOKUP); mlog ("%s sending %s request to %s (%s)", hostname.c_str(), get_mtcNodeCommand_str(cmd), clstr_hostaddr.c_str(), - get_iface_name_str(interface)); + iface_name_ptr); rc = sock_ptr->mtc_agent_clstr_tx_socket->write((char *)&mtc_cmd, bytes, clstr_hostaddr.c_str(), sock_ptr->mtc_clstr_cmd_port); } - + else if ( interface == CLSTR_INTERFACE ) + { + // This path can be taken if the cluster interface + mlog ("%s to %s network not sent", get_mtcNodeCommand_str(cmd), iface_name_ptr); + } + else + { + wlog ("%s to %s network not sent", get_mtcNodeCommand_str(cmd), iface_name_ptr); + } if ( 0 > rc ) { elog("%s Failed to send command (rc:%i)\n", hostname.c_str(), rc); @@ -944,7 +1075,7 @@ int send_hbs_command ( string hostname, int cmd, string controller ) { if ( cmd == MTC_CMD_ACTIVE_CTRL ) { - mlog3 ("%s %s sent to %s %s", + mlog1 ("%s %s sent to %s %s", hostname.c_str(), get_mtcNodeCommand_str(cmd), unit->c_str(), @@ -1162,7 +1293,7 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr ) else { /* The interface that the heartbeat loss occurred over is - * specified in parm[0 for this command + * specified in parm[0] for this command * 0 = MGMNT_IFACE * 1 = CLSTR_IFACE * else default to 0 (MGMNT_IFACE) to be backwards compatible diff --git a/mtce/src/maintenance/mtcNodeComp.cpp b/mtce/src/maintenance/mtcNodeComp.cpp index 2cc822c7..3c25d760 100644 --- a/mtce/src/maintenance/mtcNodeComp.cpp +++ b/mtce/src/maintenance/mtcNodeComp.cpp @@ -160,12 +160,25 @@ void timer_handler ( int sig, siginfo_t *si, void *uc) } } -void _close_mgmnt_rx_socket ( void ) +/********************************************/ +/* Network receive socket 'close' functions */ +/********************************************/ + +void _close_pxeboot_rx_socket ( void ) { - if ( mtc_sock.mtc_client_rx_socket ) + if ( mtc_sock.pxeboot_rx_socket ) { - delete(mtc_sock.mtc_client_rx_socket); - mtc_sock.mtc_client_rx_socket = 0 ; + close (mtc_sock.pxeboot_rx_socket); + mtc_sock.pxeboot_rx_socket = 0 ; + } +} + +void _close_mgmt_rx_socket ( void ) +{ + if ( mtc_sock.mtc_client_mgmt_rx_socket ) + { + delete(mtc_sock.mtc_client_mgmt_rx_socket); + mtc_sock.mtc_client_mgmt_rx_socket = 0 ; } } @@ -178,26 +191,39 @@ void _close_clstr_rx_socket ( void ) } } -void _close_mgmnt_tx_socket ( void ) +/*********************************************/ +/* Network transmit socket 'close' functions */ +/*********************************************/ + +void _close_pxeboot_tx_socket ( void ) { - if (mtc_sock.mtc_client_tx_socket) + if ( mtc_sock.pxeboot_tx_socket ) { - delete (mtc_sock.mtc_client_tx_socket); - mtc_sock.mtc_client_tx_socket = 0 ; + close (mtc_sock.pxeboot_tx_socket); + mtc_sock.pxeboot_tx_socket = 0 ; + } +} + +void _close_mgmt_tx_socket ( void ) +{ + if (mtc_sock.mtc_client_mgmt_tx_socket) + { + delete (mtc_sock.mtc_client_mgmt_tx_socket); + mtc_sock.mtc_client_mgmt_tx_socket = 0 ; } } void _close_clstr_tx_sockets ( void ) { - if (mtc_sock.mtc_client_tx_socket_c0_clstr) + if (mtc_sock.mtc_client_clstr_tx_socket_c0) { - delete (mtc_sock.mtc_client_tx_socket_c0_clstr); - mtc_sock.mtc_client_tx_socket_c0_clstr = 0 ; + delete (mtc_sock.mtc_client_clstr_tx_socket_c0); + mtc_sock.mtc_client_clstr_tx_socket_c0 = 0 ; } - if (mtc_sock.mtc_client_tx_socket_c1_clstr) + if (mtc_sock.mtc_client_clstr_tx_socket_c1) { - delete (mtc_sock.mtc_client_tx_socket_c1_clstr); - mtc_sock.mtc_client_tx_socket_c1_clstr = 0 ; + delete (mtc_sock.mtc_client_clstr_tx_socket_c1); + mtc_sock.mtc_client_clstr_tx_socket_c1 = 0 ; } } @@ -214,9 +240,9 @@ void daemon_exit ( void ) { daemon_files_fini (); - _close_mgmnt_rx_socket (); + _close_mgmt_rx_socket (); _close_clstr_rx_socket (); - _close_mgmnt_tx_socket (); + _close_mgmt_tx_socket (); _close_clstr_tx_sockets(); _close_amon_sock (); @@ -246,6 +272,18 @@ static int mtc_config_handler ( void * user, config_ptr->mtc_rx_clstr_port = atoi(value); config_ptr->mask |= CONFIG_CLIENT_MTC_CLSTR_PORT ; } + else if (MATCH("agent", "mtc_rx_pxeboot_port")) + { + // The mtcClient fetches the mtcAgent's pxeboot receive + // port and uses it for the mtcClient's pxeboot transmitter. + config_ptr->mtc_tx_pxeboot_port = atoi(value); + mtc_sock.mtc_tx_pxeboot_port = config_ptr->mtc_tx_pxeboot_port; + } + else if (MATCH("client", "mtc_rx_pxeboot_port")) + { + config_ptr->mtc_rx_pxeboot_port = atoi(value); + mtc_sock.mtc_rx_pxeboot_port = mtc_config.mtc_rx_pxeboot_port; + } else if (MATCH("timeouts", "failsafe_shutdown_delay")) { config_ptr->failsafe_shutdown_delay = atoi(value); @@ -277,11 +315,11 @@ int daemon_configure ( void ) get_debug_options ( MTCE_CONF_FILE, &mtc_config ); - /* Verify loaded config against an expected mask + /* Verify loaded config against an expected mask * as an ini file fault detection method */ if ( mtc_config.mask != CONFIG_CLIENT_MASK ) { - elog ("Failed Compute Mtc Configuration (%x)\n", + elog ("Failed Compute Mtc Configuration (%x)", (( -1 ^ mtc_config.mask ) & CONFIG_CLIENT_MASK) ); rc = FAIL_INI_CONFIG ; } @@ -306,9 +344,9 @@ int daemon_configure ( void ) /* Initialization Utilities */ /****************************/ -void setup_mgmnt_rx_socket ( void ) +void setup_mgmt_rx_socket ( void ) { - dlog ("setup of mgmnt RX\n"); + dlog ("setup of Mgmt receive socket"); ctrl.mgmnt_iface = daemon_mgmnt_iface() ; ctrl.mgmnt_iface = daemon_get_iface_master ((char*)ctrl.mgmnt_iface.data()); @@ -318,39 +356,102 @@ void setup_mgmnt_rx_socket ( void ) get_iface_macaddr ( ctrl.mgmnt_iface.data(), ctrl.macaddr ); get_iface_address ( ctrl.mgmnt_iface.data(), ctrl.address , true ); - _close_mgmnt_rx_socket (); - mtc_sock.mtc_client_rx_socket = new msgClassRx(ctrl.address.c_str(),mtc_sock.mtc_mgmnt_cmd_port, IPPROTO_UDP, ctrl.mgmnt_iface.data(), false ); + _close_mgmt_rx_socket (); + mtc_sock.mtc_client_mgmt_rx_socket = new msgClassRx(ctrl.address.c_str(),mtc_sock.mtc_mgmnt_cmd_port, IPPROTO_UDP, ctrl.mgmnt_iface.data(), false ); /* update health of socket */ - if ( mtc_sock.mtc_client_rx_socket ) + if ( mtc_sock.mtc_client_mgmt_rx_socket ) { /* look for fault insertion request */ if ( daemon_is_file_present ( MTC_CMD_FIT__MGMNT_RXSOCK ) ) - mtc_sock.mtc_client_rx_socket->return_status = FAIL ; + mtc_sock.mtc_client_mgmt_rx_socket->return_status = FAIL ; - if ( mtc_sock.mtc_client_rx_socket->return_status == PASS ) + if ( mtc_sock.mtc_client_mgmt_rx_socket->return_status == PASS ) { - mtc_sock.mtc_client_rx_socket->sock_ok (true); + mtc_sock.mtc_client_mgmt_rx_socket->sock_ok (true); } else { elog ("failed to init 'management rx' socket (rc:%d)\n", - mtc_sock.mtc_client_rx_socket->return_status ); - mtc_sock.mtc_client_rx_socket->sock_ok (false); + mtc_sock.mtc_client_mgmt_rx_socket->return_status ); + mtc_sock.mtc_client_mgmt_rx_socket->sock_ok (false); } } } } - -void setup_clstr_rx_socket ( void ) +void setup_pxeboot_rx_socket ( void ) { - if ( ctrl.clstr_iface_provisioned == false ) + if ( !ctrl.pxeboot_iface_provisioned ) return ; + string log_prefix = "setup pxeboot receive socket" ; + + /* The pxeboot interface is always the management interface */ + ctrl.pxeboot_iface = daemon_mgmnt_iface() ; + ctrl.pxeboot_iface = daemon_get_iface_master ((char*)ctrl.pxeboot_iface.data()); + + /* Use the learned parent if it exists and is not the same */ + if ( ! ctrl.iface_info[PXEBOOT_INTERFACE].parent.empty() ) + if ( ctrl.pxeboot_iface != ctrl.iface_info[PXEBOOT_INTERFACE].parent ) + ctrl.pxeboot_iface = ctrl.iface_info[PXEBOOT_INTERFACE].parent ; + + if ( ctrl.pxeboot_iface.empty() ) { + wlog ("cannot %s without a pxeboot iface: %s", + log_prefix.c_str(), + ctrl.pxeboot_iface.c_str()); + } + else if ( mtc_sock.mtc_rx_pxeboot_port <= 0 ) + { + wlog ("cannot %s without a valid ; port: %d", + log_prefix.c_str(), + mtc_sock.mtc_rx_pxeboot_port) + } + else if ( ctrl.pxeboot_addr.empty() ) + { + wlog ("cannot %s socket on %s port %d with no pxeboot address", + log_prefix.c_str(), + ctrl.pxeboot_iface.c_str(), + mtc_sock.mtc_rx_pxeboot_port) return ; } - dlog ("setup of cluster-host RX\n"); + ilog ("%s on %s:%s:%d", + log_prefix.c_str(), + ctrl.pxeboot_iface.c_str(), + ctrl.pxeboot_addr.c_str(), + mtc_sock.mtc_rx_pxeboot_port); + + _close_pxeboot_rx_socket (); + + struct sockaddr_in pxeboot_addr ; + + // Create the socket + if ((mtc_sock.pxeboot_rx_socket = socket(AF_INET, SOCK_DGRAM, 0)) == -1) + { + elog ("failed to create IPV4 pxeboot receive socket"); + } + + // Initialize pxeboot address structure + memset(&pxeboot_addr, 0, sizeof(pxeboot_addr)); + + pxeboot_addr.sin_family = AF_INET; + pxeboot_addr.sin_port = htons(mtc_sock.mtc_rx_pxeboot_port); + pxeboot_addr.sin_addr.s_addr = inet_addr(ctrl.pxeboot_addr.data()); + + // Bind the pxeboot unit address and messaging port to socket + if (bind(mtc_sock.pxeboot_rx_socket, (const struct sockaddr*)&pxeboot_addr, sizeof(pxeboot_addr)) == -1) + { + elog ("failed to bind %s:%d to socket", + ctrl.pxeboot_addr.c_str(), + mtc_sock.mtc_rx_pxeboot_port); + _close_pxeboot_rx_socket(); + } +} + +void setup_clstr_rx_socket ( void ) +{ + if ( !ctrl.clstr_iface_provisioned ) return ; + ilog ("setup of cluster-host receive socket"); /* Fetch the cluster-host interface name. * calls daemon_get_iface_master inside so the * aggrigated name is returned if it exists */ @@ -393,84 +494,92 @@ void setup_clstr_rx_socket ( void ) } } -void setup_mgmnt_tx_socket ( void ) +void setup_mgmt_tx_socket ( void ) { - dlog ("setup of mgmnt TX\n"); - _close_mgmnt_tx_socket (); - mtc_sock.mtc_client_tx_socket = new msgClassTx(CONTROLLER,mtc_sock.mtc_agent_port, IPPROTO_UDP, ctrl.mgmnt_iface.data()); + ilog ("setup of Mgmt network transmit socket"); + _close_mgmt_tx_socket (); + mtc_sock.mtc_client_mgmt_tx_socket = new msgClassTx(CONTROLLER,mtc_sock.mtc_agent_port, IPPROTO_UDP, ctrl.mgmnt_iface.data()); - if ( mtc_sock.mtc_client_tx_socket ) + if ( mtc_sock.mtc_client_mgmt_tx_socket ) { /* look for fault insertion request */ if ( daemon_is_file_present ( MTC_CMD_FIT__MGMNT_TXSOCK ) ) - mtc_sock.mtc_client_tx_socket->return_status = FAIL ; + mtc_sock.mtc_client_mgmt_tx_socket->return_status = FAIL ; - if ( mtc_sock.mtc_client_tx_socket->return_status == PASS ) + if ( mtc_sock.mtc_client_mgmt_tx_socket->return_status == PASS ) { - mtc_sock.mtc_client_tx_socket->sock_ok(true); + mtc_sock.mtc_client_mgmt_tx_socket->sock_ok(true); } else { elog ("failed to init 'management tx' socket (rc:%d)\n", - mtc_sock.mtc_client_tx_socket->return_status ); - mtc_sock.mtc_client_tx_socket->sock_ok(false); + mtc_sock.mtc_client_mgmt_tx_socket->return_status ); + mtc_sock.mtc_client_mgmt_tx_socket->sock_ok(false); } } } +// Send mtcAlive messages to the controllers +void setup_pxeboot_tx_socket ( void ) +{ + if ( !ctrl.pxeboot_iface_provisioned ) return ; + ilog ("setup of pxeboot transmit socket"); + _close_pxeboot_tx_socket (); + if ((mtc_sock.pxeboot_tx_socket = socket(AF_INET, SOCK_DGRAM, 0)) == -1) + { + elog ("failed to setup pxeboot network transmit socket ; (%d:%m)", errno); + } +} + void setup_clstr_tx_sockets ( void ) { - if ( ctrl.clstr_iface_provisioned == false ) - { - return ; - } - - dlog ("setup of %s TX\n", CONTROLLER_0_CLUSTER_HOST); + if ( !ctrl.clstr_iface_provisioned ) return ; + ilog ("setup of %s transmit sockets", CONTROLLER_0_CLUSTER_HOST); _close_clstr_tx_sockets (); - mtc_sock.mtc_client_tx_socket_c0_clstr = + mtc_sock.mtc_client_clstr_tx_socket_c0 = new msgClassTx(CONTROLLER_0_CLUSTER_HOST, mtc_sock.mtc_agent_port, IPPROTO_UDP, mtc_config.clstr_iface); - if ( mtc_sock.mtc_client_tx_socket_c0_clstr ) + if ( mtc_sock.mtc_client_clstr_tx_socket_c0 ) { - if ( mtc_sock.mtc_client_tx_socket_c0_clstr->return_status == PASS ) + if ( mtc_sock.mtc_client_clstr_tx_socket_c0->return_status == PASS ) { - mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok(true); + mtc_sock.mtc_client_clstr_tx_socket_c0->sock_ok(true); } else { elog ("failed to init '%s' tx socket (rc:%d)\n", CONTROLLER_0_CLUSTER_HOST, - mtc_sock.mtc_client_tx_socket_c0_clstr->return_status ); - mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok(false); + mtc_sock.mtc_client_clstr_tx_socket_c0->return_status ); + mtc_sock.mtc_client_clstr_tx_socket_c0->sock_ok(false); } } if ( ctrl.system_type != SYSTEM_TYPE__AIO__SIMPLEX ) { dlog ("setup of %s TX\n", CONTROLLER_1_CLUSTER_HOST); - mtc_sock.mtc_client_tx_socket_c1_clstr = + mtc_sock.mtc_client_clstr_tx_socket_c1 = new msgClassTx(CONTROLLER_1_CLUSTER_HOST, mtc_sock.mtc_agent_port, IPPROTO_UDP, mtc_config.clstr_iface); - if ( mtc_sock.mtc_client_tx_socket_c1_clstr ) + if ( mtc_sock.mtc_client_clstr_tx_socket_c1 ) { - if ( mtc_sock.mtc_client_tx_socket_c1_clstr->return_status == PASS ) + if ( mtc_sock.mtc_client_clstr_tx_socket_c1->return_status == PASS ) { - mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok(true); + mtc_sock.mtc_client_clstr_tx_socket_c1->sock_ok(true); } else { elog ("failed to init '%s' tx socket (rc:%d)\n", CONTROLLER_0_CLUSTER_HOST, - mtc_sock.mtc_client_tx_socket_c1_clstr->return_status ); - mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok(false); + mtc_sock.mtc_client_clstr_tx_socket_c1->return_status ); + mtc_sock.mtc_client_clstr_tx_socket_c1->sock_ok(false); } } } @@ -479,6 +588,7 @@ void setup_clstr_tx_sockets ( void ) void setup_amon_socket ( void ) { + ilog ("setup of active monitoring socket"); char filename [MAX_FILENAME_LEN] ; string port_string ; @@ -496,13 +606,13 @@ void setup_amon_socket ( void ) if ( mtc_sock.amon_socket ) { int val = 1; - - /* Make the active monitor socket non-blocking */ + + /* Make the active monitor socket non-blocking */ if ( 0 > ioctl(mtc_sock.amon_socket, FIONBIO, (char *)&val) ) - { - elog ("Failed to set amon socket non-blocking\n"); + { + elog ("Failed to set amon socket non-blocking"); close (mtc_sock.amon_socket); - } + } else { ilog ("Active Monitor Socket %d\n", mtc_sock.amon_socket ); @@ -516,19 +626,22 @@ void setup_amon_socket ( void ) * * Construct the messaging sockets * - * 1. Unicast receive socket mgmnt (mtc_client_rx_socket) - * 2. Unicast receive socket clstr (mtc_client_clstr_rx_socket) - * 3. Unicast transmit socket mgmnt (mtc_client_tx_socket) - * 4. Unicast transmit socket clstr (mtc_client_tx_socket_c?_clstr) + * 1. Unicast UDP Mgmt network RX socket - mtc_client_mgmt_rx_socket (msgClass) + * 2. Unicast UDP Clstr network RX socket - mtc_client_clstr_rx_socket (msgClass) + * 3. Unicast UDP Pxeboot network RX socket - mtc_clinet_pxeboot_rx_socket (raw) * - * 5. socket for pmond acive monitoring + * 4. Unicast UDP Mgmt network TX socket - mtc_client_mgmt_tx_socket (msgClass) + * 5. Unicast UDP Clstr network TX socket - mtc_client_clstr_tx_socket_c? (msgClass) + * 6. Unicast UDP Pxeboot network TX socket - mtc_clinet_pxeboot_tx_socket (raw) + * + * 7. Unicase UDP lo network active monitor - amon_socket (raw) * *******************************************************************/ int mtc_socket_init ( void ) { /* Setup the Management Interface Recieve Socket */ /* Read the port config strings into the socket struct */ - mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port; + mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port; mtc_sock.mtc_mgmnt_cmd_port = mtc_config.mtc_rx_mgmnt_port; mtc_sock.mtc_clstr_cmd_port = mtc_config.mtc_rx_clstr_port; @@ -537,14 +650,16 @@ int mtc_socket_init ( void ) ilog ("Controller : %s\n", ctrl.mtcAgent_ip.c_str()); /************************************************************/ - /* Setup the Mgmnt Interface Receive Socket */ + /* Setup Mgmnt Network messaging sockets to/from mtcAgent */ /************************************************************/ - setup_mgmnt_rx_socket (); + setup_mgmt_rx_socket (); + setup_mgmt_tx_socket (); /************************************************************/ - /* Setup the Mgmnt Interface Transmit messaging to mtcAgent */ + /* Setup Pxeboot Network messaging sockets to/from mtcAgent */ /************************************************************/ - setup_mgmnt_tx_socket (); + setup_pxeboot_rx_socket (); + setup_pxeboot_tx_socket (); /* Manage Cluster-host network setup */ string mgmnt_iface_name = daemon_mgmnt_iface(); @@ -598,6 +713,10 @@ string _self_identify ( string nodetype ) ctrl.who_i_am.append( nodetype.data() ); ctrl.who_i_am.append( "\""); + ctrl.who_i_am.append( ",\"pxeboot_ip\":\""); + ctrl.who_i_am.append( ctrl.pxeboot_addr.data() ); + ctrl.who_i_am.append( "\""); + ctrl.who_i_am.append( ",\"mgmt_ip\":\""); ctrl.who_i_am.append( ctrl.address.data() ); ctrl.who_i_am.append( "\""); @@ -605,7 +724,7 @@ string _self_identify ( string nodetype ) ctrl.who_i_am.append( ",\"cluster_host_ip\":\""); ctrl.who_i_am.append( ctrl.address_clstr.data() ); ctrl.who_i_am.append( "\""); - + ctrl.who_i_am.append( ",\"mgmt_mac\":\""); ctrl.who_i_am.append( ctrl.macaddr.data() ); ctrl.who_i_am.append( "\""); @@ -1023,6 +1142,112 @@ int issue_reset_and_cleanup ( void ) return (rc); } +/***************************************************************************** + * Name : learn_my_pxeboot_address + * + * Purpose : Learn my pxeboot ip address. + * + * Description: + * + * worker and storage nodes' learn their DHCP pxeboot ip from a + * local /var/lib/dhcp/ file. + * + * controllers learn their STATIC pxeboot address based on + * their mac address from the dnsmasq.hosts file. + * + * However, the pxeboot address for a system node installed + * controller, before it is unlocked, is DHCP'ed from + * /etc/network/interfaces.d/ifcfg-pxeboot created by the + * kickstart. So until the controller is unlocked its pxeboot + * address must be learned like the worker and storage nodes. + * That being from the local dhcp file. + * + * Note: In cases where the pxeboot interface name is the same as the + * management interface name then the ifcfg file for the pxeboot + * interface is suffixed with ":2" so that ifupdown can handle + * each interface independently during networking.service start. + * This is true for ethernet type interfaces as well as the + * bond interface when there are no vlans. In these cases the + * pxeboot and management interface names are the same and need + * distinction. + * + * Parameters : None + * + * Returns : PASS or failed return from get_iface_info + * + *****************************************************************************/ +int learn_my_pxeboot_address ( void ) +{ + int rc = PASS ; + if ( ctrl.pxeboot_iface_provisioned == false ) return rc ; + + if ( (rc = get_iface_info ( PXEBOOT_INTERFACE, ctrl.pxeboot_iface, ctrl.iface_info[PXEBOOT_INTERFACE] )) == PASS ) + { + string ifcfg_file_suffix = ":2" ; // Assume ifcfg file suffix ':2' for first boot after install case + iface_info_type * iface_info_ptr = &ctrl.iface_info[PXEBOOT_INTERFACE] ; + iface_info_ptr->iface_name = ctrl.pxeboot_iface ; + + ilog ("... Type: %s", get_iface_type_str(iface_info_ptr->iface_type)); + ilog ("... Parent: %s", iface_info_ptr->parent.empty() ? "none" : iface_info_ptr->parent.c_str()); + if ( iface_info_ptr->iface_type == bond ) + { + ilog ("... Bond Slaves: %s and %s", + iface_info_ptr->slave1.empty() ? "none" : iface_info_ptr->slave1.c_str(), + iface_info_ptr->slave2.empty() ? "none" : iface_info_ptr->slave2.c_str()); + ilog ("... Bond Mode: %s", + iface_info_ptr->bond_mode.empty() ? "unknown" : iface_info_ptr->bond_mode.c_str()); + } + ilog ("Pxeboot IF Name: %s", iface_info_ptr->parent.c_str()); + + // To handle the first reboot after install where the kickstart adds a ':2' + // to the boot interface we always try the dhcp search with the ':2' first. + ctrl.pxeboot_addr = get_pxeboot_dhcp_addr ( iface_info_ptr->parent + ifcfg_file_suffix); + if ( !ctrl.pxeboot_addr.empty() ) + { + ilog ("pxeboot dhcp lease address: %s ; initial", ctrl.pxeboot_addr.c_str()); + } + // If the pxeboot address is not found above then do the full search. + else + { + // If the pxeboot interface is not same as the management interface + // name then we need to remove the ":2" suffix. + // The ':2' is something the kickstart and the networking management + // adds to the interface name to distinguish between mgmt and pxeboot + // interfaces when they are the same. + if ( iface_info_ptr->parent != std::string(ctrl.mgmnt_iface)) + ifcfg_file_suffix = "" ; + + ctrl.pxeboot_addr = get_pxeboot_dhcp_addr ( iface_info_ptr->parent + ifcfg_file_suffix); + if ( !ctrl.pxeboot_addr.empty() ) + { + ilog ("pxeboot dhcp lease address: %s", ctrl.pxeboot_addr.c_str()); + } + // Now, override that local address if its found in the controller leases file. + if ( ctrl.nodetype & CONTROLLER_TYPE ) + { + string temp_pxeboot_addr= get_pxeboot_static_addr ( iface_info_ptr->parent + ifcfg_file_suffix ); + if ( !temp_pxeboot_addr.empty() ) + { + ctrl.pxeboot_addr = temp_pxeboot_addr ; + ilog ("pxeboot static address: %s", ctrl.pxeboot_addr.c_str()); + } + } + } + if ( ctrl.pxeboot_addr.empty() ) + { + elog ("failed to get pxeboot address"); + } + else + { + ilog ("Pxeboot IP: %s", ctrl.pxeboot_addr.c_str()); + } + } + else + { + elog ("failed to get interface info ; rc:%d", rc); + } + return (rc); +} /* The main service loop */ int daemon_init ( string iface, string nodetype_str ) @@ -1040,6 +1265,7 @@ int daemon_init ( string iface, string nodetype_str ) ctrl.subfunction = 0 ; ctrl.system_type = daemon_system_type (); ctrl.clstr_iface_provisioned = false ; + ctrl.pxeboot_iface_provisioned = false ; ctrl.peer_ctrlr_reset.sync = false ; /* convert node type to integer */ @@ -1053,6 +1279,28 @@ int daemon_init ( string iface, string nodetype_str ) /* Assign interface to config */ ctrl.mgmnt_iface = iface ; + // Condition gates for pxeboot network provisioning. + // The pxeboot network is only provisioned while management is not on 'lo' + if ( iface != LOOPBACK_IF ) + { + // ... and while this is not the first unconfigured controller. + if (( daemon_is_file_present ( FIRST_CONTROLLER_FILE ) == true ) && + ( daemon_is_file_present ( INIT_CONFIG_COMPLETE ) == false )) + { + // This check prevents trying to setup the pxeboot + // network on the oam interface immediately following + // initial controller-0 network install. + // All other cases get a provisioned pxeboot network. + dlog ("pxeboot network not provisionable yet"); + } + else + { + // Ready to do pxeboot messaging + ctrl.pxeboot_iface = ctrl.mgmnt_iface ; + ilog ("Pxeboot iface %s", ctrl.pxeboot_iface.c_str()); + ctrl.pxeboot_iface_provisioned = true ; + } + } if ( daemon_files_init () != PASS ) { printf ("Pid, log or other files could not be opened\n"); @@ -1081,6 +1329,11 @@ int daemon_init ( string iface, string nodetype_str ) rc = FAIL_NODETYPE; } + if (( rc = learn_my_pxeboot_address () ) != PASS ) + { + wlog ("failed to learn my pxeboot address ; rc:%d", rc ); + } + /* Setup the heartbeat service messaging sockets */ if ( (rc = mtc_socket_init ()) != PASS ) { @@ -1199,10 +1452,10 @@ void daemon_service_run ( void ) FD_ZERO(&mtc_sock.readfds); socks.clear(); - if ( mtc_sock.mtc_client_rx_socket && mtc_sock.mtc_client_rx_socket->return_status==PASS ) + if ( mtc_sock.mtc_client_mgmt_rx_socket && mtc_sock.mtc_client_mgmt_rx_socket->return_status==PASS ) { - socks.push_front (mtc_sock.mtc_client_rx_socket->getFD()); - FD_SET(mtc_sock.mtc_client_rx_socket->getFD(), &mtc_sock.readfds); + socks.push_front (mtc_sock.mtc_client_mgmt_rx_socket->getFD()); + FD_SET(mtc_sock.mtc_client_mgmt_rx_socket->getFD(), &mtc_sock.readfds); } if (( ctrl.clstr_iface_provisioned == true ) && @@ -1213,6 +1466,12 @@ void daemon_service_run ( void ) FD_SET(mtc_sock.mtc_client_clstr_rx_socket->getFD(), &mtc_sock.readfds); } + if ( mtc_sock.pxeboot_rx_socket ) + { + socks.push_front (mtc_sock.pxeboot_rx_socket); + FD_SET(mtc_sock.pxeboot_rx_socket, &mtc_sock.readfds); + } + mtc_sock.amon_socket = active_monitor_get_sel_obj (); if ( mtc_sock.amon_socket ) { @@ -1226,15 +1485,6 @@ void daemon_service_run ( void ) /* Call select() and wait only up to SOCKET_WAIT */ socks.sort(); - -#ifdef WANT_SELECTS - ilog_throttled ( select_log_count, 200 , "Selects: mgmnt:%d clstr:%d amon:%d - Size:%ld First:%d Last:%d\n", - mtc_sock.mtc_client_rx_socket, - mtc_sock.mtc_client_clstr_rx_socket, - mtc_sock.amon_socket, - socks.size(), socks.front(), socks.back()); -#endif - rc = select( socks.back()+1, &mtc_sock.readfds, NULL, NULL, &mtc_sock.waitd); @@ -1251,23 +1501,40 @@ void daemon_service_run ( void ) } else { - if ((mtc_sock.mtc_client_rx_socket && mtc_sock.mtc_client_rx_socket->return_status==PASS) && FD_ISSET(mtc_sock.mtc_client_rx_socket->getFD(), &mtc_sock.readfds)) - { - mtc_service_command ( sock_ptr, MGMNT_INTERFACE ); - } - if (( ctrl.clstr_iface_provisioned == true ) && - ( !ctrl.address_clstr.empty() ) && - ( mtc_sock.mtc_client_clstr_rx_socket ) && - ( mtc_sock.mtc_client_clstr_rx_socket->return_status==PASS) && - ( FD_ISSET(mtc_sock.mtc_client_clstr_rx_socket->getFD(), &mtc_sock.readfds))) - { - mtc_service_command ( sock_ptr, CLSTR_INTERFACE ); - } - if ( FD_ISSET(mtc_sock.amon_socket, &mtc_sock.readfds)) - { - dlog3 ("Active Monitor Select Fired\n"); - active_monitor_dispatch (); - } + // Is there a Pxeboot network message present ? + if (mtc_sock.pxeboot_rx_socket && + FD_ISSET(mtc_sock.pxeboot_rx_socket, &mtc_sock.readfds)) + { + mlog3 ("pxeboot rx socket fired"); + mtc_service_command ( sock_ptr, PXEBOOT_INTERFACE ); + } + + // Is there a Mgmt network message present ? + if ((mtc_sock.mtc_client_mgmt_rx_socket && + mtc_sock.mtc_client_mgmt_rx_socket->return_status==PASS) && + FD_ISSET(mtc_sock.mtc_client_mgmt_rx_socket->getFD(), &mtc_sock.readfds)) + { + mlog3 ("mgmt rx socket fired"); + mtc_service_command ( sock_ptr, MGMNT_INTERFACE ); + } + + // Is there a cluster host network message present ? + if (( ctrl.clstr_iface_provisioned == true ) && + ( !ctrl.address_clstr.empty() ) && + ( mtc_sock.mtc_client_clstr_rx_socket ) && + ( mtc_sock.mtc_client_clstr_rx_socket->return_status==PASS) && + ( FD_ISSET(mtc_sock.mtc_client_clstr_rx_socket->getFD(), &mtc_sock.readfds))) + { + mlog3 ("clstr rx socket fired"); + mtc_service_command ( sock_ptr, CLSTR_INTERFACE ); + } + + // Is there a active monitor request pesent + if ( FD_ISSET(mtc_sock.amon_socket, &mtc_sock.readfds)) + { + mlog3 ("Active Monitor Select Fired\n"); + active_monitor_dispatch (); + } } if (( ctrl.active_script_set == GOENABLED_MAIN_SCRIPTS ) || @@ -1386,25 +1653,41 @@ void daemon_service_run ( void ) /** * Look for failing sockets and try to recover them, * but only one at a time if there are multiple failing. - * Priority is the command receiver, thehn transmitter, + * Priority is the command receiver, then transmitter, * followed by the cluster-host and others. **/ - /* Mgmnt Rx */ - if (( mtc_sock.mtc_client_rx_socket == NULL ) || - ( mtc_sock.mtc_client_rx_socket->sock_ok() == false )) + /* Mgmt Rx */ + if (( mtc_sock.mtc_client_mgmt_rx_socket == NULL ) || + ( mtc_sock.mtc_client_mgmt_rx_socket->sock_ok() == false )) { - wlog ("calling setup_mgmnt_rx_socket (auto-recovery)\n"); - setup_mgmnt_rx_socket(); + wlog ("calling setup_mgmt_rx_socket (auto-recovery)\n"); + setup_mgmt_rx_socket(); socket_reinit = true ; } - /* Mgmnt Tx */ - else if (( mtc_sock.mtc_client_tx_socket == NULL ) || - ( mtc_sock.mtc_client_tx_socket->sock_ok() == false )) + /* Mgmt Tx */ + else if (( mtc_sock.mtc_client_mgmt_tx_socket == NULL ) || + ( mtc_sock.mtc_client_mgmt_tx_socket->sock_ok() == false )) { - wlog ("calling setup_mgmnt_tx_socket\n"); - setup_mgmnt_tx_socket(); + wlog ("calling setup_mgmt_tx_socket (auto-recovery)"); + setup_mgmt_tx_socket(); + socket_reinit = true ; + } + + /* Pxeboot Rx */ + else if ((ctrl.pxeboot_iface_provisioned == true) && (mtc_sock.pxeboot_rx_socket <= 0)) + { + wlog ("calling setup_pxeboot_rx_socket (auto-recovery)"); + setup_pxeboot_rx_socket(); + socket_reinit = true ; + } + + /* Pxeboot Tx */ + else if ((ctrl.pxeboot_iface_provisioned == true) && (mtc_sock.pxeboot_tx_socket == 0)) + { + wlog ("calling setup_pxeboot_tx_socket (auto-recovery)"); + setup_pxeboot_tx_socket(); socket_reinit = true ; } @@ -1413,7 +1696,7 @@ void daemon_service_run ( void ) (( mtc_sock.mtc_client_clstr_rx_socket == NULL ) || ( mtc_sock.mtc_client_clstr_rx_socket->sock_ok() == false ))) { - wlog ("calling setup_clstr_rx_socket (auto-recovery)\n"); + wlog ("calling setup_clstr_rx_socket (auto-recovery)"); setup_clstr_rx_socket(); socket_reinit = true ; } @@ -1421,10 +1704,10 @@ void daemon_service_run ( void ) /* Clstr Tx ; AIO SX */ else if ((ctrl.system_type == SYSTEM_TYPE__AIO__SIMPLEX) && ( ctrl.clstr_iface_provisioned == true ) && - (( mtc_sock.mtc_client_tx_socket_c0_clstr == NULL ) || - ( mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok() == false ))) + (( mtc_sock.mtc_client_clstr_tx_socket_c0 == NULL ) || + ( mtc_sock.mtc_client_clstr_tx_socket_c0->sock_ok() == false ))) { - wlog ("calling setup_clstr_tx_sockets (auto-recovery)\n"); + wlog ("calling setup_clstr_tx_sockets (auto-recovery)"); setup_clstr_tx_sockets(); socket_reinit = true ; } @@ -1432,12 +1715,12 @@ void daemon_service_run ( void ) /* Clstr Tx ; not AIO SX */ else if ((ctrl.system_type != SYSTEM_TYPE__AIO__SIMPLEX) && ( ctrl.clstr_iface_provisioned == true ) && - (( mtc_sock.mtc_client_tx_socket_c0_clstr == NULL ) || - ( mtc_sock.mtc_client_tx_socket_c1_clstr == NULL ) || - ( mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok() == false ) || - ( mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok() == false ))) + (( mtc_sock.mtc_client_clstr_tx_socket_c0 == NULL ) || + ( mtc_sock.mtc_client_clstr_tx_socket_c1 == NULL ) || + ( mtc_sock.mtc_client_clstr_tx_socket_c0->sock_ok() == false ) || + ( mtc_sock.mtc_client_clstr_tx_socket_c1->sock_ok() == false ))) { - wlog ("calling setup_clstr_tx_sockets (auto-recovery)\n"); + wlog ("calling setup_clstr_tx_sockets (auto-recovery)"); setup_clstr_tx_sockets(); socket_reinit = true ; } @@ -1445,7 +1728,7 @@ void daemon_service_run ( void ) else if ( mtc_sock.amon_socket <= 0 ) { setup_amon_socket (); - wlog ("calling setup_amon_socket (auto-recovery)\n"); + wlog ("calling setup_amon_socket (auto-recovery)"); socket_reinit = true ; } else @@ -1455,10 +1738,14 @@ void daemon_service_run ( void ) if ( socket_reinit ) { + if (( mtc_sock.pxeboot_tx_socket <= 0 ) || ( mtc_sock.pxeboot_rx_socket <= 0 )) + learn_my_pxeboot_address (); + /* re-get identity if interfaces are re-initialized */ string who_i_am = _self_identify ( ctrl.nodetype_str ); } - + alog1 ("sending mtcAlive on all provisioned mtcAlive networks"); + send_mtcAlive_msg ( sock_ptr, ctrl.who_i_am, PXEBOOT_INTERFACE ); send_mtcAlive_msg ( sock_ptr, ctrl.who_i_am, MGMNT_INTERFACE ); if (( ctrl.clstr_iface_provisioned == true ) && ( mtc_sock.mtc_client_clstr_rx_socket != NULL ) && @@ -1474,21 +1761,27 @@ void daemon_service_run ( void ) if ( daemon_is_file_present ( MTC_CMD_FIT__DIR ) ) { + /* fault insertion testing */ + if ( daemon_is_file_present ( MTC_CMD_FIT__PXEBOOT_RXSOCK )) + _close_pxeboot_rx_socket(); + if ( daemon_is_file_present ( MTC_CMD_FIT__PXEBOOT_TXSOCK )) + _close_pxeboot_tx_socket (); + /* fault insertion testing */ if ( daemon_is_file_present ( MTC_CMD_FIT__MGMNT_RXSOCK )) { - if ( mtc_sock.mtc_client_rx_socket ) + if ( mtc_sock.mtc_client_mgmt_rx_socket ) { - mtc_sock.mtc_client_rx_socket->sock_ok (false); - _close_mgmnt_rx_socket(); + mtc_sock.mtc_client_mgmt_rx_socket->sock_ok (false); + _close_mgmt_rx_socket(); } } if ( daemon_is_file_present ( MTC_CMD_FIT__MGMNT_TXSOCK )) { - if ( mtc_sock.mtc_client_tx_socket ) + if ( mtc_sock.mtc_client_mgmt_tx_socket ) { - mtc_sock.mtc_client_tx_socket->sock_ok (false); - _close_mgmnt_tx_socket (); + mtc_sock.mtc_client_mgmt_tx_socket->sock_ok (false); + _close_mgmt_tx_socket (); } } if ( daemon_is_file_present ( MTC_CMD_FIT__CLSTR_RXSOCK )) @@ -1498,10 +1791,10 @@ void daemon_service_run ( void ) } if ( daemon_is_file_present ( MTC_CMD_FIT__CLSTR_TXSOCK )) { - if ( mtc_sock.mtc_client_tx_socket_c0_clstr ) - mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok (false); - if ( mtc_sock.mtc_client_tx_socket_c1_clstr ) - mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok (false); + if ( mtc_sock.mtc_client_clstr_tx_socket_c0 ) + mtc_sock.mtc_client_clstr_tx_socket_c0->sock_ok (false); + if ( mtc_sock.mtc_client_clstr_tx_socket_c1 ) + mtc_sock.mtc_client_clstr_tx_socket_c1->sock_ok (false); } if ( daemon_is_file_present ( MTC_CMD_FIT__AMON_SOCK )) { @@ -1509,6 +1802,7 @@ void daemon_service_run ( void ) } } } + /* service controller specific audits */ if ( ctrl.nodetype & CONTROLLER_TYPE ) { @@ -1554,6 +1848,24 @@ void daemon_service_run ( void ) ctrl.peer_ctrlr_reset.audit_timer.ring = false ; } } + + // mtcAlive Stress Test. Send the mtcAgent a lot of messages + #define MTCALIVE_STRESS_FILE ((const char*)"/var/run/mtcAlive_stress") + if (( daemon_get_cfg_ptr()->testmask & TESTMASK__MSG__MTCALIVE_STRESS ) && + ( daemon_is_file_present ( MTCALIVE_STRESS_FILE ))) + { + int loops = daemon_get_file_int ( MTCALIVE_STRESS_FILE ); + slog ("mtcAlive Stress Test: Sending %d mtcAlive on each network.", loops); + for ( int loop = 0 ; loop < loops ; loop++ ) + { + send_mtcAlive_msg ( sock_ptr, ctrl.who_i_am, PXEBOOT_INTERFACE ); + send_mtcAlive_msg ( sock_ptr, ctrl.who_i_am, MGMNT_INTERFACE ); + send_mtcAlive_msg ( sock_ptr, ctrl.who_i_am, CLSTR_INTERFACE ); + + // Service signal handler just in case the loaded loops number is big + daemon_signal_hdlr (); + } + } daemon_signal_hdlr (); } daemon_exit(); @@ -2065,6 +2377,114 @@ void load_mtcInfo_msg ( mtc_message_type & msg ) } } +/*************************************************************************** + * + * Name : load_pxebootInfo_msg + * + * Description: Extract the pxeboot info from the MTC_REQ_MTCALIVE message. + * + * Assumptions: Contains a json string with the controller pxeboot + * network unit IP addresses in the following form. + * Address can be empty of an unprovisioned controller. + * + * { "pxebootInfo":{ + * "controller-0":"169.254.202.2", + * "controller-1":"169.254.202.3" + * } + * } + * + * Returns : Nothing + * + ***************************************************************************/ +void load_pxebootInfo_msg ( mtc_message_type & msg ) +{ + struct json_object *_obj = json_tokener_parse( &msg.buf[0] ); + if ( _obj ) + { + const char dict_label [] = "pxebootInfo" ; + struct json_object *info_obj = (struct json_object *)(NULL); + json_bool json_rc = json_object_object_get_ex( _obj, + &dict_label[0], + &info_obj ); + + if ( ( json_rc == true ) && ( info_obj ) ) + { + struct json_object *ctrl_obj = (struct json_object *)(NULL); + string pxeboot_addr_cx[CONTROLLERS] = {CONTROLLER_0, CONTROLLER_1}; + for (int c = 0 ; c < CONTROLLERS ; c++) + { + // used to store the in-loop controller current pxeboot address + string cur_pxeboot_addr ; + // only updated if the address changes + string new_pxeboot_addr ; + // current loop controller hostname + string controller = pxeboot_addr_cx[c] ; + + // get the current pxeboot address for the in loop controller + cur_pxeboot_addr = (controller == CONTROLLER_0) ? ctrl.pxeboot_addr_c0 : ctrl.pxeboot_addr_c1; + + json_bool json_rc = + json_object_object_get_ex( info_obj, controller.data(), &ctrl_obj ); + if (( json_rc == true ) && (ctrl_obj)) + { + jlog ("controller-x obj data: %s", json_object_get_string(ctrl_obj)); + + // get the in-loop controller pxeboot address from the msg + string now_pxeboot_addr = json_object_get_string(ctrl_obj); + if ( now_pxeboot_addr != cur_pxeboot_addr ) + { + if ( now_pxeboot_addr.empty() ) + { + new_pxeboot_addr = now_pxeboot_addr ; + wlog ("%s pxeboot address now null ; was %s", controller.c_str(), + cur_pxeboot_addr.empty() ? "null" : cur_pxeboot_addr.c_str()); + } + else if ( cur_pxeboot_addr.empty() ) + { + new_pxeboot_addr = now_pxeboot_addr ; + ilog ("%s pxeboot ip: %s", controller.c_str(), new_pxeboot_addr.c_str()); + } + else + { + new_pxeboot_addr = now_pxeboot_addr ; + ilog ("%s pxeboot ip: %s ; change from %s", controller.c_str(), + new_pxeboot_addr.c_str(), cur_pxeboot_addr.c_str()); + } + } + else if ( !cur_pxeboot_addr.empty() ) + { + alog1 ("%s pxeboot ip %s ; unchanged", controller.c_str(), cur_pxeboot_addr.c_str()); + } + + // now manage the change + if ( !new_pxeboot_addr.empty() ) + { + if ( controller == CONTROLLER_0 ) + ctrl.pxeboot_addr_c0 = new_pxeboot_addr ; + else + ctrl.pxeboot_addr_c1 = new_pxeboot_addr ; + } + } + else + { + wlog ("Failed to parse %s pxeboot ip from '%s' : %s", + controller.c_str(), &dict_label[0], &msg.buf[0]); + } + } // for loop + } + else + { + elog("Failed to parse '%s' from mtcAlive request message: %s", + &dict_label[0], &msg.buf[0]); + } + json_object_put(_obj); + } + else + { + elog("Failed to tokenize mtcAlive request message data: %s", + &msg.buf[0]); + } +} /* Push daemon state to log file */ void daemon_dump_info ( void ) diff --git a/mtce/src/maintenance/mtcNodeComp.h b/mtce/src/maintenance/mtcNodeComp.h index 190500c6..a61a9317 100644 --- a/mtce/src/maintenance/mtcNodeComp.h +++ b/mtce/src/maintenance/mtcNodeComp.h @@ -1,10 +1,10 @@ #ifndef __INCLUDE_MTCNODECOMP_HH__ #define __INCLUDE_MTCNODECOMP_HH__ /* - * Copyright (c) 2015-2016 Wind River Systems, Inc. -* -* SPDX-License-Identifier: Apache-2.0 -* + * Copyright (c) 2015-2016, 2024 Wind River Systems, Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * */ /** @@ -92,6 +92,30 @@ typedef struct string mgmnt_iface ; string clstr_iface ; + // Controller-0 USB installs lead to management interface, + // and therefore the pxeboot interface, being the localhost 'lo'. + // Trying to setup the pxeboot socket and do messaging over that + // socket is not possible so this bool tracks when the pxeboot + // interface is not correct. + bool pxeboot_iface_provisioned ; + string pxeboot_iface ; + string pxeboot_addr ; + string pxeboot_addr_c0 ; + string pxeboot_addr_c1 ; + + // Assume address is learned to start even though it's likely not. + // This enabled the first not learned log followed by a learned + // log once it is. + bool pxeboot_address_learned [CONTROLLERS] = { true, true }; + + // mtcAlive current running sequence number storage + unsigned int mtcAlive_pxeboot_sequence = 0 ; + unsigned int mtcAlive_mgmnt_sequence = 0 ; + unsigned int mtcAlive_clstr_sequence = 0 ; + + /* Maintain pxeboot, management and cluser network interface information */ + iface_info_type iface_info[MTCALIVE_INTERFACES_MAX]; + unsigned int nodetype ; unsigned int function ; unsigned int subfunction ; @@ -131,5 +155,6 @@ bool is_subfunction_worker ( void ); int run_goenabled_scripts ( mtc_socket_type * sock_ptr , string requestor ); int run_hostservices_scripts ( unsigned int cmd ); void load_mtcInfo_msg ( mtc_message_type & msg ); +void load_pxebootInfo_msg ( mtc_message_type & msg ); -#endif +#endif // __INCLUDE_MTCNODECOMP_HH__ diff --git a/mtce/src/maintenance/mtcNodeCtrl.cpp b/mtce/src/maintenance/mtcNodeCtrl.cpp index e5bdf25e..d3beb195 100644 --- a/mtce/src/maintenance/mtcNodeCtrl.cpp +++ b/mtce/src/maintenance/mtcNodeCtrl.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2016, 2023 Wind River Systems, Inc. + * Copyright (c) 2013, 2016, 2023-2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -134,21 +134,21 @@ msgSock_type * get_mtclogd_sockPtr ( void ) /******************************************************/ /* Socket Close functions */ /******************************************************/ -static void mtc_agent_tx_socket_close ( void ) +static void mtc_agent_mgmt_tx_socket_close ( void ) { - if (mtc_sock.mtc_agent_tx_socket) + if (mtc_sock.mtc_agent_mgmt_tx_socket) { - delete mtc_sock.mtc_agent_tx_socket; - mtc_sock.mtc_agent_tx_socket = NULL; + delete mtc_sock.mtc_agent_mgmt_tx_socket; + mtc_sock.mtc_agent_mgmt_tx_socket = NULL; } } -static void mtc_agent_rx_socket_close ( void ) +static void mtc_agent_mgmt_rx_socket_close ( void ) { - if (mtc_sock.mtc_agent_rx_socket) + if (mtc_sock.mtc_agent_mgmt_rx_socket) { - delete (mtc_sock.mtc_agent_rx_socket); - mtc_sock.mtc_agent_rx_socket = NULL; + delete (mtc_sock.mtc_agent_mgmt_rx_socket); + mtc_sock.mtc_agent_mgmt_rx_socket = NULL; } } @@ -170,7 +170,7 @@ static void mtc_agent_clstr_rx_socket_close ( void ) } } -static void mtc_event_rx_sock_close ( void ) +static void event_rx_sock_close ( void ) { if (mtc_sock.mtc_event_rx_sock) { @@ -224,6 +224,24 @@ static void ioctl_sock_close ( void ) } } +static void pxeboot_rx_socket_close ( void ) +{ + if ( mtc_sock.pxeboot_rx_socket ) + { + close (mtc_sock.pxeboot_rx_socket); + mtc_sock.pxeboot_rx_socket = 0 ; + } +} + +static void pxeboot_tx_socket_close ( void ) +{ + if ( mtc_sock.pxeboot_tx_socket ) + { + close (mtc_sock.pxeboot_tx_socket); + mtc_sock.pxeboot_tx_socket = 0 ; + } +} + /* close all the sockets */ static void mtc_socket_fini(void) { @@ -231,17 +249,64 @@ static void mtc_socket_fini(void) set_inotify_close(mtcInv.inotify_shadow_file_fd, mtcInv.inotify_shadow_file_wd); + pxeboot_tx_socket_close(); + pxeboot_rx_socket_close(); mtc_agent_clstr_tx_socket_close(); mtc_agent_clstr_rx_socket_close(); - mtc_agent_tx_socket_close(); - mtc_agent_rx_socket_close(); - mtc_event_rx_sock_close(); + mtc_agent_mgmt_tx_socket_close(); + mtc_agent_mgmt_rx_socket_close(); + event_rx_sock_close(); mtc_to_hbs_sock_close(); hwmon_cmd_sock_close(); mtclogd_socket_close(); mtcHttpSvr_fini(mtce_event); } +void setup_pxeboot_tx_socket ( void ) +{ + if ( !mtcInv.pxeboot_network_provisioned ) return ; + pxeboot_tx_socket_close(); + ilog ("Creating pxeboot transmit socket"); + if ((mtc_sock.pxeboot_tx_socket = socket(AF_INET, SOCK_DGRAM, 0)) <= 0) + { + elog ("failed to create IPV4 pxeboot network transmit socket ; (%d:%m)", errno); + } +} + +void setup_pxeboot_rx_socket ( void ) +{ + if ( !mtcInv.pxeboot_network_provisioned ) return ; + pxeboot_rx_socket_close (); + ilog ("Creating pxeboot receive socket on %s:%d", + mtcInv.my_pxeboot_ip.c_str(), + mtc_sock.mtc_rx_pxeboot_port); + + struct sockaddr_in pxeboot_addr ; + + // Create the socket + if ((mtc_sock.pxeboot_rx_socket = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0)) == -1) + { + elog ("failed to create IPV4 pxeboot network receive socket ; (%d:%m)", errno); + } + + // Initialize pxeboot address structure + memset(&pxeboot_addr, 0, sizeof(pxeboot_addr)); + + pxeboot_addr.sin_family = AF_INET; + pxeboot_addr.sin_port = htons(mtc_sock.mtc_rx_pxeboot_port); + pxeboot_addr.sin_addr.s_addr = inet_addr(mtcInv.my_pxeboot_ip.data()); + + // Bind the pxeboot unit address and messaging port to socket + if (bind(mtc_sock.pxeboot_rx_socket, (const struct sockaddr*)&pxeboot_addr, sizeof(pxeboot_addr)) == -1) + { + elog ("failed to bind %s:%d to socket (%d:%m)", + mtcInv.my_pxeboot_ip.c_str(), + mtc_sock.mtc_rx_pxeboot_port, + errno); + pxeboot_rx_socket_close(); + } +} + void daemon_exit(void) { /* Cancel the uptime timer */ @@ -360,6 +425,20 @@ static int mtc_config_handler ( void * user, config_ptr->mtc_rx_clstr_port = atoi(value); config_ptr->mask |= CONFIG_CLIENT_MTC_CLSTR_PORT ; } + else if (MATCH("agent", "mtc_rx_pxeboot_port")) + { + config_ptr->mtc_rx_pxeboot_port = atoi(value); + mtc_sock.mtc_rx_pxeboot_port = mtc_config.mtc_rx_pxeboot_port ; + } + else if (MATCH("client", "mtc_rx_pxeboot_port")) + { + // Get the mtcClient's pxeboot network receive port number + // and use it as the mtcAgent's pxeboot network transmit port. + // So that the mtcAgent can send the mtcClient messages over the + // pxeboot network. + config_ptr->mtc_tx_pxeboot_port = atoi(value); + mtc_sock.mtc_tx_pxeboot_port = config_ptr->mtc_tx_pxeboot_port ; + } else if (MATCH("agent", "token_refresh_rate")) { config_ptr->token_refresh_rate = atoi(value); @@ -791,34 +870,34 @@ int mtc_socket_init ( void ) int socket_size = 0 ; char ip_address[INET6_ADDRSTRLEN]; - /***********************************************************/ - /* Setup UDP Maintenance Command Transmit Socket Mgmnt I/F */ - /***********************************************************/ + /**********************************************************************/ + /* Setup UDP Maintenance Command Transmit Socket to the Mgmnt network */ + /**********************************************************************/ mtc_sock.mtc_mgmnt_cmd_port = mtc_config.cmd_port; msgClassAddr::getAddressFromInterface(mtc_config.mgmnt_iface, ip_address, INET6_ADDRSTRLEN); - mtc_sock.mtc_agent_tx_socket = + mtc_sock.mtc_agent_mgmt_tx_socket = new msgClassTx(ip_address, mtc_sock.mtc_mgmnt_cmd_port, IPPROTO_UDP, mtc_config.mgmnt_iface); #ifdef WANT_FIT_TESTING - if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_tx_socket")) - mtc_sock.mtc_agent_tx_socket->return_status = FAIL ; + if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_mgmt_tx_socket")) + mtc_sock.mtc_agent_mgmt_tx_socket->return_status = FAIL ; #endif - if ((mtc_sock.mtc_agent_tx_socket == NULL) || - (mtc_sock.mtc_agent_tx_socket->return_status)) + if ((mtc_sock.mtc_agent_mgmt_tx_socket == NULL) || + (mtc_sock.mtc_agent_mgmt_tx_socket->return_status)) { elog("Failed to create mtcClient command socket on port %d for %s (%d:%s)\n", mtc_sock.mtc_mgmnt_cmd_port, mtc_config.mgmnt_iface, errno, strerror(errno)); - mtc_agent_tx_socket_close(); + mtc_agent_mgmt_tx_socket_close(); return (FAIL_SOCKET_CREATE) ; } - /***********************************************************/ - /* Setup UDP Maintenance Command Transmit Socket Clstr I/F */ - /***********************************************************/ + /**********************************************************************/ + /* Setup UDP Maintenance Command Transmit Socket to the Clstr network */ + /**********************************************************************/ if (strlen(mtc_config.clstr_iface)) { mtc_sock.mtc_clstr_cmd_port = mtc_config.mtc_rx_clstr_port; @@ -845,48 +924,49 @@ int mtc_socket_init ( void ) /********************************************************************* * Setup Maintenance Command Reply and Event Receiver Socket - * - management interface + * - management network + * - pxeboot network * * This socket is used to receive command replies over the management * interface and asynchronous events from the mtcClient and other * maintenance service daemons. *********************************************************************/ mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port; - mtc_sock.mtc_agent_rx_socket = + mtc_sock.mtc_agent_mgmt_rx_socket = new msgClassRx(CONTROLLER, mtc_sock.mtc_agent_port, IPPROTO_UDP); #ifdef WANT_FIT_TESTING - if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_rx_socket")) - mtc_sock.mtc_agent_rx_socket = NULL ; + if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_mgmt_rx_socket")) + mtc_sock.mtc_agent_mgmt_rx_socket = NULL ; #endif - if ((mtc_sock.mtc_agent_rx_socket == NULL) || - (mtc_sock.mtc_agent_rx_socket->return_status)) + if ((mtc_sock.mtc_agent_mgmt_rx_socket == NULL) || + (mtc_sock.mtc_agent_mgmt_rx_socket->return_status)) { elog("Failed to create mtcClient receive socket on port %d for %s (%d:%m)\n", mtc_sock.mtc_agent_port, mtc_config.mgmnt_iface, errno); - mtc_agent_rx_socket_close(); + mtc_agent_mgmt_rx_socket_close(); return (FAIL_SOCKET_CREATE ); } /* Set messaging buffer size */ /* if we need a bigger then default we can use a sysctl to raise the max */ socket_size = MTC_AGENT_RX_BUFF_SIZE; - if ((rc = mtc_sock.mtc_agent_rx_socket->setSocketMemory(mtc_config.mgmnt_iface, "mtce command and event receiver (Mgmnt network)", socket_size)) != PASS) + if ((rc = mtc_sock.mtc_agent_mgmt_rx_socket->setSocketMemory(mtc_config.mgmnt_iface, "mtce command and event receiver (Mgmnt network)", socket_size)) != PASS) { elog("setsockopt failed for SO_RCVBUF (%d:%m)\n", errno); - mtc_agent_rx_socket_close(); + mtc_agent_mgmt_rx_socket_close(); return (FAIL_SOCKET_OPTION); } - socklen_t optlen = sizeof(mtc_sock.mtc_agent_rx_socket_size); - getsockopt(mtc_sock.mtc_agent_rx_socket->getFD(), SOL_SOCKET, SO_RCVBUF, - &mtc_sock.mtc_agent_rx_socket_size, &optlen); + socklen_t optlen = sizeof(mtc_sock.mtc_agent_mgmt_rx_socket_size); + getsockopt(mtc_sock.mtc_agent_mgmt_rx_socket->getFD(), SOL_SOCKET, SO_RCVBUF, + &mtc_sock.mtc_agent_mgmt_rx_socket_size, &optlen); ilog("Listening On: 'mtc client receive' socket %d (%d rx bytes - req:%d) (%s)\n", mtc_sock.mtc_agent_port, - mtc_sock.mtc_agent_rx_socket_size, MTC_AGENT_RX_BUFF_SIZE, + mtc_sock.mtc_agent_mgmt_rx_socket_size, MTC_AGENT_RX_BUFF_SIZE, mtc_config.mgmnt_iface); /********************************************************************* @@ -967,7 +1047,7 @@ int mtc_socket_init ( void ) mtc_config.hbs_to_mtc_event_port, mtc_config.mgmnt_iface, errno); - mtc_event_rx_sock_close(); + event_rx_sock_close(); return ( FAIL_SOCKET_CREATE ); } @@ -1077,6 +1157,8 @@ int daemon_init ( string iface, string nodetype ) } mtcInv.system_type = daemon_system_type (); + mtcInv.sw_version = daemon_sw_version(); + ilog ("SW Version : %s", mtcInv.sw_version.c_str()); /* Get and store my hostname */ if ( mtc_hostname_read () != PASS ) @@ -1370,9 +1452,6 @@ void daemon_service_run ( void ) /* Init board management stuff */ bmcUtil_init (); - /* log the currect software version */ - ilog ("SW VERSION : %s\n", daemon_sw_version ().c_str()); - /* Collect inventory in active state only */ if ( mtc_config.active == true ) { @@ -1383,6 +1462,32 @@ void daemon_service_run ( void ) daemon_exit (); } + + string my_mac = "" ; + get_iface_macaddr ( mtc_config.mgmnt_iface , my_mac ); + dlog ("Mgmt IF mac: %s", my_mac.c_str()); + mtcInv.my_pxeboot_if = daemon_mgmnt_iface() ; + if (( mtcInv.my_pxeboot_if != LOOPBACK_IF ) && ( !my_mac.empty() )) + { + mtcInv.pxeboot_network_provisioned = true ; + mtc_config.pxeboot_iface = daemon_get_iface_master ((char*)mtcInv.my_pxeboot_if.data()); + { + string ifname = mtc_config.pxeboot_iface ; + if ( get_iface_parent ( PXEBOOT_INTERFACE, ifname, mtcInv.my_pxeboot_if ) == PASS ) + { + ilog ("Pxeboot IF : %s", mtcInv.my_pxeboot_if.c_str() ); + } + } + mtcInv.pxebootInfo_loader ( my_mac ); + ilog ("Pxeboot IP : %s", mtcInv.my_pxeboot_ip.empty() ? "none" : mtcInv.my_pxeboot_ip.c_str()); + + /************************************************************************/ + /* Setup UDP IPV4 Maintenance pxeboot network Transmit/Receive Sockets */ + /************************************************************************/ + setup_pxeboot_rx_socket (); + setup_pxeboot_tx_socket (); + } + /* The following are base object controller timers ; init them */ mtcTimer_init ( mtcInv.mtcTimer_token, mtcInv.my_hostname, "token timer" ); mtcTimer_init ( mtcInv.mtcTimer_uptime,mtcInv.my_hostname, "uptime timer" ); @@ -1390,7 +1495,6 @@ void daemon_service_run ( void ) mtcTimer_init ( mtcInv.mtcTimer_dor, mtcInv.my_hostname, "DOR mode timer" ); if ( get_link_state ( mtc_sock.ioctl_sock, mtc_config.mgmnt_iface, &mtcInv.mgmnt_link_up_and_running ) ) - { mtcInv.mgmnt_link_up_and_running = false ; wlog ("Failed to query %s operational state ; defaulting to down\n", mtc_config.mgmnt_iface ); @@ -1483,12 +1587,17 @@ void daemon_service_run ( void ) send_hbs_command ( mtcInv.my_hostname, MTC_CMD_START_HOST ); socks.clear(); - socks.push_front (mtc_sock.mtc_event_rx_sock->getFD()); // service_events - socks.push_front (mtc_sock.mtc_agent_rx_socket->getFD()); // mtc_service_inbox + // service_events + socks.push_front (mtc_sock.mtc_event_rx_sock->getFD()); + + // mtc_service_inbox - receive sockets from Pxeboot, Mgmt and Clstr network + if ( mtc_sock.pxeboot_rx_socket ) + socks.push_front (mtc_sock.pxeboot_rx_socket); + socks.push_front (mtc_sock.mtc_agent_mgmt_rx_socket->getFD()); if ( mtcInv.clstr_network_provisioned == true ) { - socks.push_front (mtc_sock.mtc_agent_clstr_rx_socket->getFD()); // mtc_service_inbox + socks.push_front (mtc_sock.mtc_agent_clstr_rx_socket->getFD()); } socks.push_front (mtc_sock.netlink_sock); @@ -1559,9 +1668,57 @@ void daemon_service_run ( void ) * where it had commanded the hbsAgent to heartbeat at a reduced rate. */ send_hbs_command ( mtcInv.my_hostname, MTC_RECOVER_HBS ); + // Used to track mtcAgent incoming messaging rate + #define LOOP_TIMER_PERIOD_SECS (60) + #define MSGS_PER_SEC_THRESHOLD (20) + #define MSGS_CNT_IDX_INBOX (0) + #define MSGS_CNT_IDX_EVENT (1) + #define MSGS_CNT_IDX_PMOND (2) + #define MSGS_CNT_IDX_HTTP (3) + #define MSGS_CNT_IDX_NETLINK (4) + #define MSGS_CNT_IDX_INOTIFY (5) + #define MSGS_CNT_IDX_MAX (6) + static unsigned int messages_tally[MSGS_CNT_IDX_MAX] = {0,0,0,0,0,0} ; + static float messages_total = 0 ; + mtcTimer_init ( mtcInv.mtcTimer_loop, mtcInv.my_hostname, "loop timer" ); + /* Run Maintenance service forever */ - for ( ; ; ) + for ( mtc_sock.msg_rate = 0 ; ; ) { + if ( mtcTimer_expired ( mtcInv.mtcTimer_loop ) ) + { + // Maintain an incoming messaging rate. + for ( int m = MSGS_CNT_IDX_INBOX ; m < MSGS_CNT_IDX_MAX ; m++ ) + messages_total += messages_tally[m] ; + float rate_per_sec = messages_total/LOOP_TIMER_PERIOD_SECS ; + + // Only log the messaging rate log when + // - the rate is above basic MSGS_PER_SEC_THRESHOLD ; first log + // - the messaging rate changes by half of the threshold in either direction + if (( mtc_config.debug_msg ) || + (( rate_per_sec > MSGS_PER_SEC_THRESHOLD ) && + (( rate_per_sec > (mtc_sock.msg_rate+(MSGS_PER_SEC_THRESHOLD/2))) || + ( rate_per_sec < (mtc_sock.msg_rate-(MSGS_PER_SEC_THRESHOLD/2)))))) + { + ilog ("%d messages processed ; rate: %.1f msgs/sec] [%d:%d:%d:%d:%d:%d]", + (int)messages_total, rate_per_sec, + messages_tally[MSGS_CNT_IDX_INBOX], + messages_tally[MSGS_CNT_IDX_EVENT], + messages_tally[MSGS_CNT_IDX_PMOND], + messages_tally[MSGS_CNT_IDX_HTTP], + messages_tally[MSGS_CNT_IDX_NETLINK], + messages_tally[MSGS_CNT_IDX_INOTIFY]); + + // Save this message rate for next compare + mtc_sock.msg_rate = rate_per_sec ; + } + + // clean the stats and restart the timer + messages_total = 0 ; + for ( int m = MSGS_CNT_IDX_INBOX ; m < MSGS_CNT_IDX_MAX ; m++ ) + messages_tally[m] = 0 ; + mtcTimer_start ( mtcInv.mtcTimer_loop, mtcTimer_handler, LOOP_TIMER_PERIOD_SECS ); + } daemon_signal_hdlr (); /** * Can't just run 'mtcHttpSvr_look' off select as it is seen to miss events. @@ -1587,13 +1744,17 @@ void daemon_service_run ( void ) /* Initialize the master fd_set */ FD_ZERO(&mtc_sock.readfds); - FD_SET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds); - FD_SET(mtc_sock.mtc_agent_rx_socket->getFD(), &mtc_sock.readfds); + FD_SET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds); + FD_SET(mtc_sock.mtc_agent_mgmt_rx_socket->getFD(), &mtc_sock.readfds); if ( mtcInv.clstr_network_provisioned == true ) { FD_SET(mtc_sock.mtc_agent_clstr_rx_socket->getFD(),&mtc_sock.readfds); } - + // Listen to the pxeboot rx socket if it is setup + if ( mtc_sock.pxeboot_rx_socket > 0 ) + { + FD_SET(mtc_sock.pxeboot_rx_socket, &mtc_sock.readfds); + } if ( mtce_event.fd ) { FD_SET(mtce_event.fd, &mtc_sock.readfds); @@ -1631,44 +1792,95 @@ void daemon_service_run ( void ) { if ( FD_ISSET( mtce_event.fd , &mtc_sock.readfds)) { + mlog3 ("http socket fired"); + messages_tally[MSGS_CNT_IDX_HTTP]++ ; mtcHttpSvr_look ( mtce_event ); + mlog3 ("http handling done"); } if (FD_ISSET(mtc_sock.netlink_sock, &mtc_sock.readfds)) { - dlog ("netlink socket fired\n"); + mlog3 ("netlink socket fired"); + messages_tally[MSGS_CNT_IDX_NETLINK]++ ; if ( mtcInv.service_netlink_events ( mtc_sock.netlink_sock, mtc_sock.ioctl_sock ) != PASS ) { elog ("service_netlink_events failed (rc:%d)\n", rc ); } + mlog3 ("netlink handling done"); } if (FD_ISSET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds)) { + mlog3 ("events socket fired"); + messages_tally[MSGS_CNT_IDX_EVENT]++ ; if ( (rc = service_events ( &mtcInv, &mtc_sock )) != PASS ) { elog ("service_events failed (rc:%d)\n", rc ); } + mlog3 ("events handling done"); } - if ( FD_ISSET(mtc_sock.mtc_agent_rx_socket->getFD(), &mtc_sock.readfds)) + if ( mtc_sock.pxeboot_rx_socket && FD_ISSET(mtc_sock.pxeboot_rx_socket, &mtc_sock.readfds)) { int cnt = 0 ; /* Service up to MAX_RX_MSG_BATCH of messages at once */ + mlog3 ("pxeboot network socket fired"); for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ ) { - rc = mtc_service_inbox ( &mtcInv, &mtc_sock , MGMNT_INTERFACE) ; - if ( rc > RETRY ) + mlog3 ("... service inbox ; message %d", cnt+1); + rc = mtc_service_inbox ( &mtcInv, &mtc_sock , PXEBOOT_INTERFACE) ; + if ( rc == RETRY ) { - mlog2 ("mtc_service_inbox failed (rc:%d) (Mgmnt)\n", rc ); + mlog3 ("... service inbox done"); break ; } - if ( rc == RETRY ) + messages_tally[MSGS_CNT_IDX_INBOX]++ ; + if ( rc > RETRY ) + { + wlog ("mtc_service_inbox failed (rc:%d) (pxeboot)", rc ); break ; + } + else + { + mlog3 ("......more messages to service"); + } } - if ( cnt > 1 ) + if ( cnt > (MAX_RX_MSG_BATCH/2) ) { - mlog2 ("serviced %d messages in one batch (Mgmnt)\n", cnt ); + ilog ("serviced %d messages in one batch (pxeboot)", cnt ); } + mlog3 ("pxeboot network message handling done"); + } + + if ( FD_ISSET(mtc_sock.mtc_agent_mgmt_rx_socket->getFD(), &mtc_sock.readfds)) + { + int cnt = 0 ; + /* Service up to MAX_RX_MSG_BATCH of messages at once */ + mlog3 ("mgmt network socket fired"); + for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ ) + { + mlog3 ("... service inbox ; message %d", cnt+1); + rc = mtc_service_inbox ( &mtcInv, &mtc_sock , MGMNT_INTERFACE) ; + if ( rc == RETRY ) + { + mlog3 ("... service inbox done"); + break ; + } + messages_tally[MSGS_CNT_IDX_INBOX]++ ; + if ( rc > RETRY ) + { + wlog ("mtc_service_inbox failed (rc:%d) (Mgmnt)", rc ); + break ; + } + else + { + mlog3 ("......more messages to service"); + } + } + if ( cnt > (MAX_RX_MSG_BATCH/2) ) + { + ilog ("serviced %d messages in one batch (Mgmnt)", cnt ); + } + mlog3 ("mgmt network message handling done"); } if (( mtcInv.clstr_network_provisioned == true ) && @@ -1677,24 +1889,38 @@ void daemon_service_run ( void ) { int cnt = 0 ; /* Service up to MAX_RX_MSG_BATCH of messages at once */ + mlog3 ("clstr network socket fired"); for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ ) { + mlog3 ("... service inbox ; message %d", cnt+1); + rc = mtc_service_inbox ( &mtcInv, &mtc_sock, CLSTR_INTERFACE ) ; - if ( rc > RETRY ) + if ( rc == RETRY ) { - mlog2 ("mtc_service_inbox failed (rc:%d) (Clstr)\n", rc ); + mlog3 ("... service inbox done"); break ; } - if ( rc == RETRY ) + messages_tally[MSGS_CNT_IDX_INBOX]++ ; + if ( rc > RETRY ) + { + mlog ("mtc_service_inbox failed (rc:%d) (Clstr)\n", rc ); break ; + } + else + { + mlog3 ("......more messages to service"); + } } - if ( cnt > 1 ) + if ( cnt > (MAX_RX_MSG_BATCH/2) ) { - mlog2 ("serviced %d messages in one batch (Clstr)\n", cnt ); // ERIC dlog + ilog ("serviced %d messages in one batch (Clstr)", cnt ); } + mlog3 ("mgmt network message handling done"); } if (FD_ISSET(mtcInv.inotify_shadow_file_fd, &mtc_sock.readfds)) { + mlog3 ("inotify socket fired"); + messages_tally[MSGS_CNT_IDX_INOTIFY]++ ; rc = get_inotify_events ( mtcInv.inotify_shadow_file_fd, (IN_MODIFY | IN_CREATE | IN_IGNORED) ); if ( rc ) { @@ -1715,6 +1941,7 @@ void daemon_service_run ( void ) wlog ("Reselecting on %s change (Select:%d)\n", SHADOW_FILE, mtcInv.inotify_shadow_file_fd ); } } + mlog3 ("inotify event handling done"); } } diff --git a/mtce/src/maintenance/mtcNodeFsm.cpp b/mtce/src/maintenance/mtcNodeFsm.cpp index 98c0e8a4..df9a8d54 100755 --- a/mtce/src/maintenance/mtcNodeFsm.cpp +++ b/mtce/src/maintenance/mtcNodeFsm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2016 Wind River Systems, Inc. + * Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -91,6 +91,14 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr ) */ nodeLinkClass::online_handler ( node_ptr ); + /* + * Always run the mtcAlive handler. + * + * - monitor host's mtcAlive messaging + * - manage host's mtcAlive missing alarm + */ + nodeLinkClass::pxeboot_mtcAlive_monitor ( node_ptr ); + if ( node_ptr->adminAction == MTC_ADMIN_ACTION__DELETE ) { flog ("%s -> Delete Action\n", node_ptr->hostname.c_str()); diff --git a/mtce/src/maintenance/mtcNodeHdlrs.cpp b/mtce/src/maintenance/mtcNodeHdlrs.cpp index ec90f667..465c3750 100755 --- a/mtce/src/maintenance/mtcNodeHdlrs.cpp +++ b/mtce/src/maintenance/mtcNodeHdlrs.cpp @@ -130,6 +130,15 @@ void nodeLinkClass::timer_handler ( int sig, siginfo_t *si, void *uc) return ; } + /* Is this TID a online timer TID ? */ + node_ptr = get_online_timer ( *tid_ptr ); + if ( node_ptr ) + { + mtcTimer_stop_int_safe ( node_ptr->online_timer ); + node_ptr->online_timer.ring = true ; + return ; + } + /* Is this TID a mtcAlive timer TID ? */ node_ptr = get_mtcAlive_timer ( *tid_ptr ); if ( node_ptr ) @@ -247,6 +256,14 @@ void nodeLinkClass::timer_handler ( int sig, siginfo_t *si, void *uc) return ; } + /* daemon main loop timer */ + if ( *tid_ptr == mtcTimer_loop.tid ) + { + mtcTimer_stop_int_safe ( mtcTimer_loop ); + mtcTimer_loop.ring = true ; + return ; + } + /* is the http request timer ? */ node_ptr = get_http_timer ( *tid_ptr ); if ( node_ptr ) @@ -1968,7 +1985,9 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->hostname.c_str()); node_ptr->reboot_cmd_ack_mgmnt = false ; node_ptr->reboot_cmd_ack_clstr = false ; + node_ptr->reboot_cmd_ack_pxeboot = false ; send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) ; + send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, PXEBOOT_INTERFACE ) ; /* If the cluster-host network is provisioned then try * and issue a reset over it to expedite the recovery @@ -2038,6 +2057,12 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) ilog ("%s backup bmc reset aborted due to management network reboot request ACK", node_ptr->hostname.c_str()); } + else if ( node_ptr->reboot_cmd_ack_pxeboot ) + { + reset_aborted = true ; + ilog ("%s backup bmc reset aborted due to pxeboot network reboot request ACK", + node_ptr->hostname.c_str()); + } else if ( node_ptr->reboot_cmd_ack_clstr ) { reset_aborted = true ; @@ -3331,6 +3356,7 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->mtcAlive_count = 0 ; node_ptr->mtcAlive_mgmnt = false ; node_ptr->mtcAlive_clstr = false ; + node_ptr->mtcAlive_pxeboot = false ; node_ptr->offline_log_throttle = 0 ; node_ptr->offline_search_count = 0 ; @@ -3362,22 +3388,25 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr ) * are cleared. Need to also clear the * offline_search_count here as well. **/ - if (( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_clstr ) && node_ptr->offline_search_count ) + if (( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_clstr || node_ptr->mtcAlive_pxeboot ) && node_ptr->offline_search_count ) { node_ptr->mtcAlive_online = true ; - ilog ("%s still seeing mtcAlive (%d) (Mgmt:%c:%d Clstr:%c:%d) ; restart offline_search_count=%d of %d\n", + ilog ("%s still seeing mtcAlive (%d) (Mgmt:%c:%d Clstr:%c:%d Pxeboot:%c:%d) ; restart offline_search_count=%d of %d\n", node_ptr->hostname.c_str(), node_ptr->mtcAlive_count, node_ptr->mtcAlive_mgmnt ? 'Y' : 'n', node_ptr->mtcAlive_mgmnt_count, node_ptr->mtcAlive_clstr ? 'Y' : 'n', node_ptr->mtcAlive_clstr_count, + node_ptr->mtcAlive_pxeboot ? 'Y' : 'n', + node_ptr->mtcAlive_pxeboot_count, node_ptr->offline_search_count, offline_threshold ); node_ptr->offline_search_count = 0 ; /* reset the count */ } node_ptr->mtcAlive_mgmnt = false ; node_ptr->mtcAlive_clstr = false ; + node_ptr->mtcAlive_pxeboot = false ; /* Request a mtcAlive from host from Mgmnt and Clstr (if provisioned) */ send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, MGMNT_INTERFACE ); @@ -3410,6 +3439,10 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr ) { node_ptr->mtcAlive_online = false ; + // Clear all the mtcAlive_sequence numbers + for (int i = 0 ; i < MTCALIVE_INTERFACES_MAX ; i++) + node_ptr->mtcAlive_sequence[i] = 0; + plog ("%s going offline ; (threshold (%d msec * %d)\n", node_ptr->hostname.c_str(), offline_period, @@ -3532,23 +3565,23 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->mtcAlive_misses = 0 ; /* Start mtcAlive message timer */ - mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler, online_period ); + mtcTimer_start ( node_ptr->online_timer, mtcTimer_handler, online_period ); node_ptr->onlineStage = MTC_ONLINE__WAITING ; break ; } case MTC_ONLINE__RETRYING: { /* Start mtcAlive message timer */ - mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler, online_period ); + mtcTimer_start ( node_ptr->online_timer, mtcTimer_handler, online_period ); node_ptr->onlineStage = MTC_ONLINE__WAITING ; break ; } case MTC_ONLINE__WAITING: { - if ( node_ptr->mtcAlive_timer.ring == false ) + if ( node_ptr->online_timer.ring == false ) break ; - alog ("%s mtcAlive [%s] [ misses:%d]\n", + alog2 ("%s mtcAlive [%s] [ misses:%d]\n", node_ptr->hostname.c_str(), node_ptr->mtcAlive_online ? "Yes" : "No", node_ptr->mtcAlive_misses ); @@ -3581,7 +3614,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr ) else { /* handle retries < MTC_OFFLINE_MISSES */ - node_ptr->mtcAlive_timer.ring = false ; + node_ptr->online_timer.ring = false ; node_ptr->onlineStage = MTC_ONLINE__RETRYING ; break ; } @@ -3629,7 +3662,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr ) } /* Start over */ - node_ptr->mtcAlive_timer.ring = false ; + node_ptr->online_timer.ring = false ; node_ptr->onlineStage = MTC_ONLINE__START ; break ; } @@ -7523,7 +7556,148 @@ int nodeLinkClass::oos_test_handler ( struct nodeLinkClass::node * node_ptr ) return (PASS); } +/////////////////////////////////////////////////////////////////////////////// +// +// Name : pxeboot_mtcAlive_monitor +// +// Purpose : Monitor pxeboot network mtcAlive and manage associated alarm. +// +// Description: Monitor pxeboot mtcAlive messages. +// Request mtcAlive when not receiving mtcAlive messages. +// Debounce mtcAlive messaging and manage alarm accordingly. +// +// Parameters : nodeLinkClass::node struct pointer - node_ptr +// +// Returns : PASS +// +/////////////////////////////////////////////////////////////////////////////// +#define PXEBOOT_MTCALIVE_MONITOR_RATE_SECS (10) +#define PXEBOOT_MTCALIVE_LOSS_THRESHOLD (6) +#define PXEBOOT_MTCALIVE_NOT_SEEN_LOG_THROTTLE (6) +#define PXEBOOT_MTCALIVE_LOSS_LOG_THROTTLE (6) +int nodeLinkClass::pxeboot_mtcAlive_monitor ( struct nodeLinkClass::node * node_ptr ) +{ + // ERIK: TODO: Comment out once verified + flog ("%s pxeboot mtcAlive fsm stage: %s", + node_ptr->hostname.c_str(), + get_mtcAliveStages_str(node_ptr->mtcAliveStage).c_str()); + if ( !this->pxeboot_network_provisioned ) return PASS ; + + switch (node_ptr->mtcAliveStage) + { + case MTC_MTCALIVE__START: + { + alog2 ("%s mtcAlive start", node_ptr->hostname.c_str()); + mtcTimer_reset ( node_ptr->mtcAlive_timer ); + node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] = 0 ; + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__SEND); + break ; + } + case MTC_MTCALIVE__SEND: + { + /* pxeboot info refresh audit */ + if ( node_ptr->hostname == my_hostname ) + pxebootInfo_loader (); + alog2 ("%s mtcAlive send", node_ptr->hostname.c_str()); + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, PXEBOOT_INTERFACE ); + node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] = 0 ; + node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE] = 0 ; + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__MONITOR); + break ; + } + case MTC_MTCALIVE__MONITOR: + { + alog2 ("%s mtcAlive monitor", node_ptr->hostname.c_str()); + mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler, + PXEBOOT_MTCALIVE_MONITOR_RATE_SECS ); + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__WAIT); + break ; + } + case MTC_MTCALIVE__WAIT: + { + if ( mtcTimer_expired ( node_ptr->mtcAlive_timer ) ) + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__CHECK); + break ; + } + case MTC_MTCALIVE__CHECK: + { + if ( node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] > node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] ) + { + // Typical success path + alog2 ("%s pxeboot mtcAlive received %d messages since last audit ; this:%d last:%d", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] - node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE], + node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE], + node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE]); + + // Now that we received a message we can dec the missed count + if ( node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE] ) + node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE]-- ; + node_ptr->pxeboot_mtcAlive_not_seen_log_throttle = 0 ; + node_ptr->pxeboot_mtcAlive_loss_log_throttle = 0 ; + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__MONITOR); + } + else if ( node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] < node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] ) + { + // unexpected case + wlog ("%s mtcAlive out-of-sequence ; this:%d last:%d", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE], + node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE]); + node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE]++ ; + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__START); + } + else if ( ++node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE] < PXEBOOT_MTCALIVE_LOSS_THRESHOLD ) + { + // Missing pxeboot mtcAlive + alog ("%s pxeboot mtcAlive miss count %d ; sending request", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE]); + send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, PXEBOOT_INTERFACE ); + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__MONITOR); + } + else if ( node_ptr->mtcAlive_pxeboot == true ) + { + wlog_throttled (node_ptr->pxeboot_mtcAlive_loss_log_throttle, + PXEBOOT_MTCALIVE_LOSS_LOG_THROTTLE, + "%s pxeboot mtcAlive loss ; missed: %d ; last: count:%d seq: %d ; sending request", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE], + node_ptr->mtcAlive_pxeboot_count, + node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE]); + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__SEND); + } + else + { + ilog_throttled (node_ptr->pxeboot_mtcAlive_not_seen_log_throttle, + PXEBOOT_MTCALIVE_NOT_SEEN_LOG_THROTTLE, + "%s pxeboot mtcAlive not seen yet ; sending request", + node_ptr->hostname.c_str()); + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__SEND); + } + node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] = node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] ; + + // TODO (emacdona): Need to handle loss case that manages raising the alarm + // Transition to MTC_MTCALIVE__FAIL + + break ; + } + case MTC_MTCALIVE__FAIL: + { + wlog ("%s mtcAlive fail", node_ptr->hostname.c_str()); + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__START); + break ; + } + default: + { + slog ("%s mtcAlive fsm default", node_ptr->hostname.c_str()); + mtcAliveStageChange (node_ptr, MTC_MTCALIVE__START); + break ; + } + } + return (PASS); +} int local_counter = 0 ; diff --git a/mtce/src/maintenance/mtcNodeMsg.h b/mtce/src/maintenance/mtcNodeMsg.h index 11319c0f..12a7f7ff 100755 --- a/mtce/src/maintenance/mtcNodeMsg.h +++ b/mtce/src/maintenance/mtcNodeMsg.h @@ -1,7 +1,7 @@ #ifndef __INCLUDE_MTCNODEMSG_HH__ #define __INCLUDE_MTCNODEMSG_HH__ /* - * Copyright (c) 2013, 2016 Wind River Systems, Inc. + * Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * @@ -53,7 +53,7 @@ using namespace std; #define MTC_AGENT_RX_BUFF_SIZE (MAX_NODES*MAX_MSG) -#define MAX_RX_MSG_BATCH (20) +#define MAX_RX_MSG_BATCH (50) /** Maintenance messaging socket control structure */ typedef struct @@ -63,25 +63,26 @@ typedef struct /** UDP sockets used by the mtcAgent to transmit and receive * maintenance commands to the client (compute) node and * receive the compute node reply in the receive direction */ - msgClassSock* mtc_agent_tx_socket ; /**< tx to mtc client mgmnt */ - msgClassSock* mtc_agent_clstr_tx_socket; /**< tx to mtc client clstr */ - msgClassSock* mtc_agent_rx_socket ; /**< rx from mtc client mgmnt */ - msgClassSock* mtc_agent_clstr_rx_socket; /**< rx from mtc client clstr */ - int mtc_agent_port ; /**< the agent rx port number */ + msgClassSock* mtc_agent_mgmt_tx_socket ; /**< tx to mtc client mgmnt */ + msgClassSock* mtc_agent_mgmt_rx_socket ; /**< rx from mtc client mgmnt */ + msgClassSock* mtc_agent_clstr_tx_socket ; /**< tx to mtc client clstr */ + msgClassSock* mtc_agent_clstr_rx_socket ; /**< rx from mtc client clstr */ + int mtc_agent_port ; /**< the agent rx port number */ + int mtc_rx_mgmnt_port ; /**< the agent rx port number */ struct sockaddr_in agent_addr; /**< socket attributes struct */ - int mtc_agent_rx_socket_size ; + int mtc_agent_mgmt_rx_socket_size ; int mtc_agent_clstr_rx_socket_size ; /** UDP sockets used by the mtcClient to receive maintenance * commands from and transmit replies to the mtcAgent */ - msgClassSock* mtc_client_rx_socket ; /**< rx from controller */ - msgClassSock* mtc_client_tx_socket ; /**< tx to controller mgmnt */ - msgClassSock* mtc_client_tx_socket_c0_clstr ; /**< tx to controller-0 clstr i/f */ - msgClassSock* mtc_client_tx_socket_c1_clstr ; /**< tx to controller-1 clstr i/f */ - msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */ - int mtc_mgmnt_cmd_port ; /**< mtc command port mgmnt i/f */ - int mtc_clstr_cmd_port ; /**< mtc command port clstr i/f */ + msgClassSock* mtc_client_mgmt_rx_socket ; /**< rx from controller mgmt */ + msgClassSock* mtc_client_mgmt_tx_socket ; /**< tx to controller mgmnt */ + msgClassSock* mtc_client_clstr_tx_socket_c0 ; /**< tx to controller-0 clstr */ + msgClassSock* mtc_client_clstr_tx_socket_c1 ; /**< tx to controller-1 clstr */ + msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */ + int mtc_mgmnt_cmd_port ; /**< mtc command port mgmnt */ + int mtc_clstr_cmd_port ; /**< mtc command port clstr */ struct sockaddr_in mtc_cmd_addr ; /**< socket attributes mgmnt */ /***************************************************************/ @@ -106,6 +107,12 @@ typedef struct struct timeval waitd ; fd_set readfds; + /** IPV4 Pxeboot transmit and receive sockets and ports */ + int pxeboot_tx_socket ; + int mtc_tx_pxeboot_port ; + int pxeboot_rx_socket ; + int mtc_rx_pxeboot_port ; + /** Active Monitor Socket */ int amon_socket ; @@ -115,7 +122,7 @@ typedef struct int netlink_sock ; /* netlink socket */ int ioctl_sock ; /* general ioctl socket */ - + float msg_rate ; } mtc_socket_type ; diff --git a/mtce/src/scripts/mtc.conf b/mtce/src/scripts/mtc.conf index 53f252b0..3bc57cba 100644 --- a/mtce/src/scripts/mtc.conf +++ b/mtce/src/scripts/mtc.conf @@ -16,11 +16,11 @@ inventory_port = 6385 ; The Inventory Port Number keystone_port = 5000 ; The Keystone Port Number ha_port = 7777 ; The Inventory Port Number mtc_agent_port = 2101 ; OBS: ........ Active Controller Maintenance Rx Port -mtc_rx_mgmnt_port = 2101 ; Active Controller Maintenance Mgmnt Network Rx Port -mtc_rx_clstr_port = 2111 ; Active Controller Maintenance Clstr Network Rx Port +mtc_rx_mgmnt_port = 2101 ; mtcAgent management network msg receive port +mtc_rx_pxeboot_port = 2102 ; mtcAgent pxeboot network msg receive port +mtc_rx_clstr_port = 2111 ; mtcAgent cluster-host msg network receive port hbs_agent_mgmnt_port = 2103 ; Management Interface Heartbeat Pulse Response Rx Port hbs_agent_clstr_port = 2113 ; Cluster-host Interface Heartbeat Pulse Response Rx Port -clstr_agent_port = 2110 ; Agent Command Response RX Port mtc_to_hbs_cmd_port = 2104 ; Mtc to Hbs Command Port Number mtc_to_guest_cmd_port = 2108 ; Mtc to guestAgent Command port hbs_to_mtc_event_port = 2107 ; Hbs to Mtc Event Port Number @@ -87,6 +87,7 @@ scheduling_priority = 45 ; realtime scheduling; range of 1 .. 99 mtc_rx_mgmnt_port = 2118 ; Client Maintenance Command Rx Port mtc_rx_clstr_port = 2115 ; Client Maintenance Command Rx Port +mtc_rx_pxeboot_port = 2119 ; Client Maintenance pxeboot Command RX Port hbs_client_mgmnt_port = 2106 ; Management Interface Heartbeat Pulse Request Rx Port hbs_client_clstr_port = 2116 ; Cluster-host Interface Heartbeat Pulse Request Rx Port hwmon_cmd_port = 2114 ; hwmond Command Rx Port Number