Add pxeboot network mtcAlive messaging to Maintenance

The introduction of the new pxeboot network requires maintenance
verify and report on messaging failures over that network.

Towards that, this update introduces periodic mtcAlive messaging
between the mtcAgent and mtcClinet.

Test Plan:

PASS: Verify install and provision each system type with a mix
             of networking modes ; ethernet, bond and vlan
             - AIO SX, AIO DX, AIO DX plus
             - Standard System 2+1
             - Storage System 2+1+1
PASS: Verify feature with physical on management interface
PASS: Verify feature with vlan on management interface
PASS: Verify feature with bonded management interface
PASS: Verify feature with bonded vlans on management interface
PASS: Verify in bonded cases handling with 2, 1 or no slaves found
PASS: Verify mgmt-combined or separate cluster-host network
PASS: Verify mtcClient pxeboot interface address learning
             - for worker and storage nodes       ; dhcp leases file
             - for controller nodes before unlock ; dhcp leases file
             - for controller nodes after unlock  ; static from ifcfg
             - from controller within 10 seconds of process restart
PASS: Verify mtcAgent pxeboot interface address learning from
             dnsmasq.hosts file
PASS: Verify pxeboot mtcAlive initiation, handling, loss detection
             and recovery
PASS: Verify success and failure handling of all new pxeboot ip
             address learning functions ;
             - dhcp - all system node installs.
             - dnsmasq.hosts - active controller for all hosts.
             - interfaces.d - controller's mtcClient pxeboot address.
             - pxeboot req mtcAlive - mtcAgent mtcAlive request message.
PASS: Verify mtcClient pxeboot network 'mtcAlive request' and 'reboot'
             command handling for ethernet, vlan and bond configs.
PASS: Verify mtcAlive sequence number monitoring, out-of-sequence
             detection, handling and logging.
PASS: Verify pxeboot rx socket binding and non-blocking attribute
PASS: Verify mtcAgent handling stress soaking of sustained incoming
             500+ msgs/sec ; batch handling and logging.
PASS: Verify mtcAgent and mtcClient pxeboot tx and rx socket messaging,
             failure recovery handling and logging.
PASS: Verify pxeboot receiver is not setup on the oam interface on
             controller-0 first install until after initial config
             complete.

Regression:

PASS: Verify mtcAgent/mtcClient online and offline state management
PASS: Verify mtcAgent/mtcClient command handling
      - over management network
      - over cluster-host network
PASS: Verify mtcClient interface chain log for all iface types
      - bond    : vlan123 -> pxeboot0 (802.3ad 4) -> enp0s8 and enp0s9
      - vlan    : vlan123 -> enp0s8
      - ethernet: enp0s8
PASS: Verify mtcAgent/mtcClient handling and logging including debug
      logging for standard operations
      - node install and unlock
      - node lock and unlock
      - node reinstall, reboot, reset
PASS: Verify graceful recovery handling of heartbeat loss failure.
      - node reboot
      - management interface down
PASS: Verify systemcontroller and subcloud install with dc-libvirt
PASS: Verify no log flooding, coredumps, memory leaks

Story: 2010940
Task: 49541
Change-Id: Ibc87b85e3e0e07c3b8c40b5291bd3372506fbdfb
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2024-03-12 20:53:25 +00:00
parent cc679681e2
commit 14bb67789e
24 changed files with 2948 additions and 619 deletions

View File

@ -1,7 +1,7 @@
#ifndef __INCLUDE_FITCODES_H__
#define __INCLUDE_FITCODES_H__
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -43,6 +43,8 @@
#define MTC_CMD_FIT__NO_MGMNT_ACK ("/var/run/fit/no_mgmnt_ack") /* mtcClient */
#define MTC_CMD_FIT__NO_CLSTR_ACK ("/var/run/fit/no_clstr_ack") /* mtcClient */
#define MTC_CMD_FIT__NO_MTCALIVE ("/var/run/fit/no_mtcalive") /* mtcClient */
#define MTC_CMD_FIT__PXEBOOT_RXSOCK ("/var/run/fit/pxeboot_rxsock") /* mtcClient */
#define MTC_CMD_FIT__PXEBOOT_TXSOCK ("/var/run/fit/pxeboot_txsock") /* mtcClient */
#define MTC_CMD_FIT__MGMNT_RXSOCK ("/var/run/fit/mgmnt_rxsock") /* mtcClient */
#define MTC_CMD_FIT__MGMNT_TXSOCK ("/var/run/fit/mgmnt_txsock") /* mtcClient */
#define MTC_CMD_FIT__CLSTR_RXSOCK ("/var/run/fit/clstr_rxsock") /* mtcClient */
@ -183,4 +185,7 @@
#define FIT_CODE__HWMON__SET_DB_GROUP_STATUS (177)
#define FIT_CODE__HWMON__SET_DB_GROUP_STATE (178)
#define TESTMASK__MSG__MTCALIVE_STRESS (0x00000001)
#endif /* __INCLUDE_FITCODES_H__ */

View File

@ -48,6 +48,7 @@ typedef struct
char* mgmnt_iface ; /**< management interface name pointer */
char* clstr_iface ; /**< cluster-host interface name pointer */
char* pxeboot_iface ; /**< pxeboot interface name pointer */
char* multicast ; /**< Multicast address */
int ha_port ; /**< HA REST API Port Number */
int vim_cmd_port ; /**< Mtce -> VIM Command REST API Port */
@ -75,6 +76,8 @@ typedef struct
char* barbican_api_host ; /**< Barbican REST API host IP address */
int barbican_api_port ; /**< Barbican REST API port number */
int mtc_tx_pxeboot_port ; /**< mtcAgent/Client pxeboot nwk tx port */
int mtc_rx_pxeboot_port ; /**< mtcClient listens pxeboot nwk cmd reqs */
int mtc_rx_mgmnt_port ; /**< mtcClient listens mgmnt nwk cmd reqs */
int mtc_rx_clstr_port ; /**< mtcClient listens clstr nwk cmd reqs */
int mtc_tx_mgmnt_port ; /**< mtcClient sends mgmnt nwk cmds/resp's */
@ -258,6 +261,22 @@ extern char *program_invocation_short_name;
else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Error : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
}
/** mtcAlive alog logger macro with throttling */
#define alog_throttled(cnt,max,format,args...) { \
if ( daemon_get_cfg_ptr()->debug_alive ) \
{ \
if ( ++cnt == 1 ) \
{ \
if (ltc()) { printf ("%s [%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Alive: " format, pt(), getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
else { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; } \
} \
if ( cnt >= max ) \
{ \
cnt = 0 ; \
} \
} \
}
/** Error logger macro with throttling */
#define elog_throttled(cnt,max,format,args...) { \
if ( ++cnt == 1 ) \
@ -389,37 +408,37 @@ extern char *program_invocation_short_name;
#define plog(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Info : " format, getpid(), lc(), _hn(), _pn, "|-|", __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&1 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&2 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg2 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&4 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg4 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&8 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg8 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&2 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg1 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&4 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg2 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define mlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_msg&8 ) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Msg3 : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog(format, args...) { if(daemon_get_cfg_ptr()->debug_json&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_json&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_json&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_json&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_json&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_json&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define jlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_json&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Json3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog(format, args...) { if(daemon_get_cfg_ptr()->debug_http&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_http&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_http&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_http&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_http&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_http&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define hlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_http&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Http3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog1(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog2(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog3(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog1(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog2(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define alog3(format, args...) { if(daemon_get_cfg_ptr()->debug_alive&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Alive3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog(format, args...) { if(daemon_get_cfg_ptr()->debug_work&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_work&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_work&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_work&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog1(format, args...) { if(daemon_get_cfg_ptr()->debug_work&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog2(format, args...) { if(daemon_get_cfg_ptr()->debug_work&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define qlog3(format, args...) { if(daemon_get_cfg_ptr()->debug_work&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Work3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define flog(format, args...) { if(daemon_get_cfg_ptr()->debug_fsm) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: FSM : " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define tlog(format, args...) { if(daemon_get_cfg_ptr()->debug_timer) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Timer: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog(format, args...) { if(daemon_get_cfg_ptr()->debug_state&1) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Change: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog1(format, args...) { if(daemon_get_cfg_ptr()->debug_state&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog2(format, args...) { if(daemon_get_cfg_ptr()->debug_state&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang4: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog3(format, args...) { if(daemon_get_cfg_ptr()->debug_state&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang8: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog1(format, args...) { if(daemon_get_cfg_ptr()->debug_state&2) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang1: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog2(format, args...) { if(daemon_get_cfg_ptr()->debug_state&4) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang2: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define clog3(format, args...) { if(daemon_get_cfg_ptr()->debug_state&8) syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s:Chang3: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }
#define log_event(format, args...) { syslog(LOG_INFO, "[%d.%05d] %s %s %-3s %-18s(%4d) %-24s: Event: " format, getpid(), lc(), _hn(), _pn, __AREA__, __FILE__, __LINE__, __FUNCTION__, ##args) ; }

View File

@ -257,49 +257,17 @@ void print_mtc_message ( string hostname,
const char * iface,
bool force )
{
/* Handle raw json string messages differently.
* Those messages just have a json string that starts at the header */
if ( msg.hdr[0] == '{' )
{
if ( force )
{
ilog ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
else if (( daemon_get_cfg_ptr()->debug_alive&1) && ( msg.cmd == MTC_MSG_MTCALIVE ))
{
alog ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
else
{
mlog1 ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
return ;
}
string str = "" ;
if ( msg.buf[0] )
str = msg.buf ;
if ( force )
{
ilog ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n",
ilog ("%s%s %s %s %s network: %x:%x:%x.%x.%x.%x [%s] %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
direction ? "" : " tx" ,
get_mtcNodeCommand_str (msg.cmd),
direction ? "from" : "to" ,
iface,
msg.ver,
msg.rev,
msg.cmd,
msg.num,
msg.parm[0],
@ -309,15 +277,31 @@ void print_mtc_message ( string hostname,
msg.hdr,
str.c_str());
}
else if ( msg.cmd == MTC_MSG_MTCALIVE || msg.cmd == MTC_REQ_MTCALIVE )
{
alog ("%s%s %s %s %s network: [%x:%x:%x:%x:%x:%x:%s] %s",
hostname.c_str(),
direction ? "" : " tx" ,
get_mtcNodeCommand_str (msg.cmd),
direction ? "from" : "to" ,
iface,
msg.cmd,
msg.num,
msg.parm[0],
msg.parm[1],
msg.parm[2],
msg.parm[3],
msg.hdr,
str.c_str());
}
else
{
mlog1 ("%s %s %s (%s network) %d.%d %x:%x:%x.%x.%x.%x [%s] %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
mlog1 ("%s%s %s %s %s network: %x:%x:%x.%x.%x.%x [%s] %s",
hostname.c_str(),
direction ? "" : " tx" ,
get_mtcNodeCommand_str (msg.cmd),
direction ? "from" : "to" ,
iface,
msg.ver,
msg.rev,
msg.cmd,
msg.num,
msg.parm[0],
@ -344,6 +328,8 @@ static std::string configStages_str [MTC_CONFIG__STAGES +1] ;
static std::string addStages_str [MTC_ADD__STAGES +1] ;
static std::string delStages_str [MTC_DEL__STAGES +1] ;
static std::string subStages_str [MTC_SUBSTAGE__STAGES +1] ;
static std::string mtcAliveStages_str [MTC_MTCALIVE__STAGES +1] ;
void mtc_stages_init ( void )
{
@ -377,7 +363,7 @@ void mtc_stages_init ( void )
enableStages_str [MTC_ENABLE__FAILURE ] = "Failure";
enableStages_str [MTC_ENABLE__FAILURE_WAIT ] = "Failure-Wait";
enableStages_str [MTC_ENABLE__FAILURE_SWACT_WAIT ] = "Failure-Swact-Wait";
enableStages_str [MTC_ENABLE__STAGES ] = "unknown" ;
enableStages_str [MTC_ENABLE__STAGES ] = "Enable-Unknown" ;
recoveryStages_str[MTC_RECOVERY__START ] = "Handler-Start";
recoveryStages_str[MTC_RECOVERY__RETRY_WAIT ] = "Req-Retry-Wait";
@ -402,7 +388,7 @@ void mtc_stages_init ( void )
recoveryStages_str[MTC_RECOVERY__FAILURE ] = "Failure";
recoveryStages_str[MTC_RECOVERY__WORKQUEUE_WAIT ] = "WorkQ-Wait";
recoveryStages_str[MTC_RECOVERY__ENABLE ] = "Enable";
recoveryStages_str[MTC_RECOVERY__STAGES ] = "unknown";
recoveryStages_str[MTC_RECOVERY__STAGES ] = "Recovery-Unknown";
disableStages_str [MTC_DISABLE__START ] = "Disable-Start";
disableStages_str [MTC_DISABLE__HANDLE_POWERON_SEND ] = "Disable-PowerOn-Send";
@ -416,7 +402,7 @@ void mtc_stages_init ( void )
disableStages_str [MTC_DISABLE__TASK_STATE_UPDATE ] = "Disable-States-Update";
disableStages_str [MTC_DISABLE__WORKQUEUE_WAIT ] = "Disable-WorkQ-Wait";
disableStages_str [MTC_DISABLE__DISABLED ] = "Host-Disabled";
disableStages_str [MTC_DISABLE__STAGES ] = "Unknown";
disableStages_str [MTC_DISABLE__STAGES ] = "Disable-Unknown";
powerStages_str [MTC_POWERON__START ] = "Power-On-Start";
powerStages_str [MTC_POWERON__POWER_STATUS_WAIT ] = "Power-On-Status";
@ -445,17 +431,16 @@ void mtc_stages_init ( void )
powercycleStages_str [MTC_POWERCYCLE__POWEROFF_WAIT ] = "Power-Cycle-Off-Wait";
powercycleStages_str [MTC_POWERCYCLE__POWERON ] = "Power-Cycle-On";
powercycleStages_str [MTC_POWERCYCLE__POWERON_REQWAIT] = "Power-Cycle-On-Req-Wait";
powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY] = "Power-Cycle-On-Verify";
powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY ] = "Power-Cycle-On-Verify";
powercycleStages_str [MTC_POWERCYCLE__POWERON_WAIT ] = "Power-Cycle-On-Wait";
powercycleStages_str [MTC_POWERCYCLE__DONE ] = "Power-Cycle-Done";
powercycleStages_str [MTC_POWERCYCLE__FAIL ] = "Power-Cycle-Fail";
powercycleStages_str [MTC_POWERCYCLE__HOLDOFF ] = "Power-Cycle-Hold-Off";
powercycleStages_str [MTC_POWERCYCLE__COOLOFF ] = "Power-Cycle-Cool-Off";
powercycleStages_str [MTC_POWERCYCLE__POWEROFF_CMND_WAIT] = "Power-Cycle-Off-Cmnd-Wait";
powercycleStages_str [MTC_POWERCYCLE__POWERON_CMND_WAIT] = "Power-Cycle-On-Cmnd-Wait";
powercycleStages_str [MTC_POWERCYCLE__POWERON_VERIFY_WAIT]= "Power-Cycle-On-Verify-Wait";
powercycleStages_str [MTC_POWERCYCLE__STAGES ] = "Power-Cycle-Unknown";
resetStages_str [MTC_RESET__START ] = "Reset-Start";
resetStages_str [MTC_RESET__REQ_SEND ] = "Reset-Req-Send";
@ -529,6 +514,7 @@ void mtc_stages_init ( void )
delStages_str [MTC_DEL__START ] = "Del-Start";
delStages_str [MTC_DEL__WAIT ] = "Del-Wait";
delStages_str [MTC_DEL__DONE ] = "Del-Done";
delStages_str [MTC_DEL__STAGES ] = "Del-Unknown";
subStages_str [MTC_SUBSTAGE__START ] = "subStage-Start";
subStages_str [MTC_SUBSTAGE__SEND ] = "subStage-Send";
@ -536,6 +522,15 @@ void mtc_stages_init ( void )
subStages_str [MTC_SUBSTAGE__WAIT ] = "subStage-Wait";
subStages_str [MTC_SUBSTAGE__DONE ] = "subStage-Done";
subStages_str [MTC_SUBSTAGE__FAIL ] = "subStage-Fail";
subStages_str [MTC_SUBSTAGE__STAGES ] = "subStage-Unknown";
mtcAliveStages_str[MTC_MTCALIVE__START ] = "mtcAlive-Start";
mtcAliveStages_str[MTC_MTCALIVE__MONITOR ] = "mtcAlive-Monitor";
mtcAliveStages_str[MTC_MTCALIVE__WAIT ] = "mtcAlive-Wait";
mtcAliveStages_str[MTC_MTCALIVE__CHECK ] = "mtcAlive-Check";
mtcAliveStages_str[MTC_MTCALIVE__SEND ] = "mtcAlive-Send";
mtcAliveStages_str[MTC_MTCALIVE__FAIL ] = "mtcAlive-Fail";
mtcAliveStages_str[MTC_MTCALIVE__STAGES ] = "mtcAlive-Unknown";
}
string get_delStages_str ( mtc_delStages_enum stage )
@ -666,6 +661,15 @@ string get_subStages_str ( mtc_subStages_enum stage )
return (subStages_str[stage]);
}
string get_mtcAliveStages_str ( mtc_mtcAliveStages_enum stage )
{
if ( stage >= MTC_MTCALIVE__STAGES )
{
return (mtcAliveStages_str[MTC_MTCALIVE__STAGES]);
}
return (mtcAliveStages_str[stage]);
}
void log_adminAction ( string hostname,
mtc_nodeAdminAction_enum currAction,
mtc_nodeAdminAction_enum newAction )

View File

@ -67,10 +67,11 @@ void daemon_exit ( void );
#define FAIL_BM_PASSWORD (122*256)
#define MTC_PARM_LOCK_PERSIST_IDX (0) // node lock command
#define MTC_PARM_UPTIME_IDX (0)
#define MTC_PARM_HEALTH_IDX (1)
#define MTC_PARM_FLAGS_IDX (2)
#define MTC_PARM_MAX_IDX (3)
#define MTC_PARM_UPTIME_IDX (0) // mtcAlive message
#define MTC_PARM_HEALTH_IDX (1) // mtcAlive message
#define MTC_PARM_FLAGS_IDX (2) // mtcAlive message
#define MTC_PARM_SEQ_IDX (3) // mtcAlive message
#define MTC_PARM_MAX_IDX (4) // mtcAlive message
/** 'I Am <state>' flags for maintenance.
*
@ -111,6 +112,8 @@ void daemon_exit ( void );
#define SMGMT_UNHEALTHY_FILE ((const char *)"/var/run/.sm_node_unhealthy")
#define UNLOCK_READY_FILE ((const char *)"/etc/platform/.unlock_ready")
#define STILL_SIMPLEX_FILE ((const char *)"/etc/platform/simplex")
#define FIRST_CONTROLLER_FILE ((const char *)"/etc/platform/.first_controller")
#define INIT_CONFIG_COMPLETE ((const char *)"/etc/platform/.initial_config_complete")
/** path to and module init file name */
#define MTCE_CONF_FILE ((const char *)"/etc/mtc.conf")
@ -153,6 +156,8 @@ void daemon_exit ( void );
#define PMON_CONF_FILE_DIR ((const char *)"/etc/pmon.d")
#define BM_DNSMASQ_FILENAME ((const char *)"dnsmasq.bmc_hosts")
#define OPT_PLATFORM_CONFIG_DIR ((const char *)"/opt/platform/config")
#define DNSMASQ_HOSTS_FILE ((const char *)"dnsmasq.hosts")
/* supported BMC communication protocols ; access method */
typedef enum
@ -415,6 +420,7 @@ typedef enum
#define CONTROLLER_1 ((const char *)"controller-1")
#define CONTROLLER_2 ((const char *)"controller-2")
#define CONTROLLER ((const char *)"controller")
#define CONTROLLERS (2)
#define STORAGE_0 ((const char *)"storage-0")
#define STORAGE_1 ((const char *)"storage-1")
@ -461,7 +467,8 @@ typedef enum
/** Interface Codes **/
#define MGMNT_INTERFACE (0)
#define CLSTR_INTERFACE (1)
#define PXEBOOT_INTERFACE (2)
#define MTCALIVE_INTERFACES_MAX (3)
/** Maintenance Inventory struct */
typedef struct
@ -1205,6 +1212,19 @@ typedef enum
/** Return the string representing the specified 'sensor' stage */
string get_sensorStages_str ( mtc_sensorStages_enum stage );
typedef enum
{
MTC_MTCALIVE__START = 0,
MTC_MTCALIVE__MONITOR,
MTC_MTCALIVE__WAIT,
MTC_MTCALIVE__CHECK,
MTC_MTCALIVE__SEND,
MTC_MTCALIVE__FAIL,
MTC_MTCALIVE__STAGES
} mtc_mtcAliveStages_enum ;
string get_mtcAliveStages_str ( mtc_mtcAliveStages_enum stage );
typedef enum
{
MTC_OFFLINE__IDLE = 0,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2017 Wind River Systems, Inc.
* Copyright (c) 2013-2017, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -29,6 +29,7 @@
#include <dirent.h>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <stdlib.h>
#include <stdio.h>
@ -1350,24 +1351,582 @@ int get_pid_by_name_proc ( string procname )
}
const char mgmnt_iface_str[] = { "Mgmnt" } ;
const char clstr_iface_str[] = { "Clstr" } ;
const char null_iface_str[] = { "Null" } ;
const char pxeboot_iface_str[] = { "Pxeboot" } ;
const char mgmnt_iface_str[] = { "Mgmnt" } ;
const char clstr_iface_str[] = { "Clstr" } ;
const char null_iface_str[] = { "Null" } ;
const char * get_iface_name_str ( int iface )
{
switch ( iface )
{
case MGMNT_IFACE:
return mgmnt_iface_str;
return mgmnt_iface_str;
case CLSTR_IFACE:
return clstr_iface_str;
case PXEBOOT_INTERFACE:
return pxeboot_iface_str;
default:
return null_iface_str ;
}
}
/**********************************************************************
* Name : get_interface_name_str
*
* Purpose : get mtcAgent/Client interface name strings
*
* Return : pointer to the interface name string
**********************************************************************/
const char * get_interface_name_str ( int iface )
{
switch ( iface )
{
case MGMNT_INTERFACE:
return mgmnt_iface_str;
case CLSTR_INTERFACE:
return clstr_iface_str;
case PXEBOOT_INTERFACE:
return pxeboot_iface_str;
default:
return null_iface_str ;
}
}
/**********************************************************************
* Name : get_iface_type_str
*
* Purpose : get interface type string
*
* Return : pointer to the interface type string
**********************************************************************/
const char ethernet_iface_type_str[] = { "ethernet" };
const char vlan_iface_type_str[] = { "vlan" };
const char bond_iface_type_str[] = { "bond" };
const char unknown_iface_type_str[] = { "unknown" };
const char * get_iface_type_str ( iface_type_enum type_enum )
{
switch ( type_enum )
{
case ethernet: return ethernet_iface_type_str;
case vlan: return vlan_iface_type_str;
case bond: return bond_iface_type_str;
}
return unknown_iface_type_str;
}
/********************************************************************
* Name : get_iface_type
*
* Purpose : Fetch the specified interface's type as
* physical ethernet, vlan or bond.
*
* Description: This function opens the uevents file in /sys/class/net
* for the specified interface and uses DEVTYPE, in that
* info, to determine the specified interface type.
* A missing DEVTYPE label implies that its a standard
* physical 'ethernet' interface type.
*
* Example:
*
* sysadmin@controller-0:~$ cat /sys/class/net/vlan163/uevent
* DEVTYPE=vlan
* INTERFACE=vlan163
* IFINDEX=41
*
* Updates: iface_type_enum (ethernet, vlan or bond) on PASS
*
* Returns: PASS or FAIL_OPERATION
* ******************************************************************/
int get_iface_type ( string iface,
iface_type_enum & iface_type )
{
int rc = PASS ;
/* determine the interface type though uevent */
string uevent_iface_file = INTERFACES_DIR + iface + "/uevent";
ifstream _uevent( uevent_iface_file.data() );
if ( _uevent )
{
string line;
while( getline( _uevent, line ) )
{
if ( line.find ("DEVTYPE") == 0 )
{
if ( line.find ("=vlan") != string::npos )
iface_type = vlan;
else if ( line.find ("=bond") != string::npos )
iface_type = bond;
else
iface_type = ethernet ;
break;
}
}
}
else
{
wlog ("Failed to find file: %s", uevent_iface_file.c_str());
rc = FAIL_FILE_OPEN ;
}
return (rc);
}
/*****************************************************************************
* Name : get_iface_parent
*
* Purpose : Gets the ifname of the linked parent interface
*
* Updates : parent interface name.
*
* Returns : Returns PASS, FAIL_FILE_OPEN or FAIL_NOT_FOUND
****************************************************************************/
int get_iface_parent ( int network, string & ifname, string & parent )
{
int rc = PASS ;
/* build the full file path */
string iflink_file = INTERFACES_DIR + ifname + "/iflink";
/* declare a file stream based on the full file path */
ifstream iflink_file_stream ( iflink_file.c_str() );
/* open the file stream */
if (iflink_file_stream.is_open())
{
int iflink = -1;
string iflink_line;
char * dummy_ptr ;
char iface_buffer [IF_NAMESIZE] = "";
/* start clean */
MEMSET_ZERO (iface_buffer[0]);
while ( getline (iflink_file_stream, iflink_line) )
{
iflink = strtol(iflink_line.c_str(), &dummy_ptr, 10);
}
iflink_file_stream.close();
/*
* load iface_buffer with the name of the network interface
* corresponding to iflink.
*/
if_indextoname (iflink, iface_buffer);
if (iface_buffer[0] != '\0')
{
parent = iface_buffer;
dlog ("%s network interface name: %s",
get_interface_name_str(network),
parent.c_str());
}
else
{
wlog ("%s network parent interface not found for ifname:%s",
get_interface_name_str(network), ifname.c_str() );
rc = FAIL_NOT_FOUND ;
}
}
else
{
wlog ("failed to open %s", iflink_file.c_str());
rc = FAIL_FILE_OPEN ;
}
return rc ;
}
/********************************************************************
* Name : get_bond_mode
*
* Purpose : Get the mode of a Linux bonding interface.
*
* Description: Returns the data in /sys/class/net/bonding/mode
* as update to 'bond_mode' string reference argument.
*
* Example : $ cat /sys/class/net/pxeboot0/bonding/mode
* 802.3ad 4
*
* Updates : bond_mode
*
* Returns : PASS or FAIL_FILE_OPEN if no bonding/mode file is found.
*
* ******************************************************************/
int get_bond_mode ( int network,
string bond_name,
string & bond_mode)
{
int rc = PASS ;
string bond_mode_file = INTERFACES_DIR + bond_name + "/bonding/mode";
ifstream bond_mode_data ( bond_mode_file.data() );
if (!bond_mode_data)
{
wlog ("Failed to find bonding mode file: %s",
bond_mode_file.c_str());
rc = FAIL_FILE_OPEN ;
}
else
{
getline ( bond_mode_data, bond_mode );
if ( ! bond_mode.empty() )
{
ilog ("%s network %s mode: %s",
get_interface_name_str(network),
bond_name.c_str(),
bond_mode.c_str());
}
}
return rc ;
}
/*********************************************************************
* Name : get_bond_slaves
*
* Purpose : Get a bonded interface slave names.
*
* Description: Returns the data in /sys/class/net/bonding/slaves
* as updates to reference arguments.
*
* Updates : slave1 and slave2
*
* Returns : PASS or FAIL_FILE_OPEN if no slaves file is found.
*
*********************************************************************/
int get_bond_slaves ( int network,
string bond_name,
string & slave1,
string & slave2 )
{
int rc = 0 ;
string bonded_interface_file = INTERFACES_DIR + \
bond_name + \
"/bonding/slaves";
ifstream slaves(bonded_interface_file.data());
if (!slaves)
{
wlog ("failed to open file: %s", bonded_interface_file.c_str());
rc = FAIL_FILE_OPEN ;
}
else
{
char *token ;
string bond_slaves ;
getline ( slaves, bond_slaves );
if ( ! bond_slaves.empty() )
{
dlog ("%s network %s slaves: %s",
get_interface_name_str(network),
bond_name.c_str(),
bond_slaves.c_str());
token = strtok((char *)bond_slaves.data(), " ");
if ( token != NULL )
slave1 = token ;
token = strtok(NULL, " ");
if ( token != NULL )
slave2 = token ;
}
}
return rc ;
}
/*****************************************************************************
* Name : get_iface_info
*
* Purpose : Update the iface_info with interface type details and heirarchy.
*
* Description: Lookup the interface type, bond, vlan or physical ethernet.
* Then for each case add interface info and create a 'chain'
* string that represents the heirarchy.
*
* - ethernet - enp0s8
* - vlan - vlan16 -> enp0s8
* - bond - pxeboot0 -> enp0s8 and enp0s9
* - bonded vlan - vlan16 -> pxeboot0 -> enp0s8 and enp0s9
*
* Updates : iface_info with learned interface type, parent, bond mode
* and slaves
* Returns : Returns PASS, FAIL_FILE_OPEN, FAIL_NOT_FOUND, FAIL_INVALID_DATA
*
*****************************************************************************/
int get_iface_info ( int network, string iface, iface_info_type & iface_info )
{
const char * network_str_ptr = get_interface_name_str (network) ;
iface_info.iface_name = iface ;
iface_info.iface_type = ethernet;
iface_info.chain = "" ;
int rc = get_iface_type ( iface_info.iface_name, iface_info.iface_type );
if ( rc )
{
wlog ("failed to get interface type from iface: %s", iface.c_str());
return rc ;
}
switch ( iface_info.iface_type )
{
case ethernet:
{
iface_info.parent = iface_info.iface_name ;
ilog ("%s network %s parent: %s", network_str_ptr, iface_info.iface_name.c_str(), iface_info.parent.c_str());
iface_info.chain.append (iface_info.parent);
break ;
}
case vlan:
{
if (( rc = get_iface_parent (MGMNT_INTERFACE, iface_info.iface_name, iface_info.parent )) == PASS )
{
ilog ("%s network %s parent: %s", network_str_ptr, iface_info.iface_name.c_str(), iface_info.parent.c_str());
if (( rc = get_iface_type ( iface_info.parent, iface_info.iface_type )) == PASS )
{
if ( iface_info.iface_type == bond )
{
get_bond_mode ( network, iface_info.parent, iface_info.bond_mode);
iface_info.chain.append( iface_info.iface_name + " -> " + iface_info.parent + " (" + iface_info.bond_mode + ")");
if (( rc = get_bond_slaves ( MGMNT_INTERFACE, iface_info.parent, iface_info.slave1, iface_info.slave2 )) == PASS )
{
iface_info.chain.append(" -> " + iface_info.slave1 + " and " + iface_info.slave2);
ilog ("%s network %s slaves: %s and %s",
network_str_ptr, iface_info.parent.c_str(),
iface_info.slave1.c_str(), iface_info.slave2.c_str());
}
else
{
wlog ("failed to get slaves from bond: %s ; rc:%d", iface_info.parent.c_str(), rc);
rc = FAIL_NOT_FOUND ;
}
}
else
{
wlog ("%s network iface: %s", network_str_ptr, iface_info.iface_name.c_str());
iface_info.chain.append( iface_info.iface_name + " -> " + iface_info.parent);
}
}
else
{
wlog ("failed to get %s network interface type from iface: %s ; rc:%d",
network_str_ptr, iface_info.parent.c_str(), rc);
rc = FAIL_NOT_FOUND ;
}
}
else
{
wlog ("failed to get parent interface from %s ; rc:%d", iface_info.iface_name.c_str(), rc );
}
break ;
}
case bond:
{
iface_info.parent = iface_info.iface_name ;
ilog ("%s network %s", network_str_ptr, iface_info.iface_name.c_str());
get_bond_mode (network, iface_info.parent, iface_info.bond_mode);
iface_info.chain.append(iface_info.parent + " (" + iface_info.bond_mode + ")");
if (( rc = get_bond_slaves ( network, iface_info.parent, iface_info.slave1, iface_info.slave2 )) == PASS )
{
iface_info.chain.append(" -> " + iface_info.slave1 + " and " + iface_info.slave2);
ilog ("%s network %s slaves: %s and %s",
network_str_ptr, iface_info.parent.c_str(),
iface_info.slave1.c_str(), iface_info.slave2.c_str());
}
else
{
wlog ("failed to get slaves from bond: %s ; rc:%d", iface_info.iface_name.c_str(), rc);
rc = FAIL_NOT_FOUND ;
}
break ;
}
default:
{
wlog ("failed: unknown interface type: %d", iface_info.iface_type);
rc = FAIL_INVALID_DATA ;
break ;
}
}
if ( !iface_info.chain.empty() )
{
ilog ("Interface Chain: %s", iface_info.chain.c_str());
}
return rc ;
}
/*****************************************************************************
* Name : get_pxeboot_dhcp_addr
*
* Purpose : get the pxeboot address from dhcp leases file.
*
* Description: Worker and storage nodes DHCP for their pxeboot IP address.
*
* Therefore, the pxeboot address for non-controller nodes is taken from
* the 'fixed-address' label of the last tuple of the management interface's
* /var/lib/dhcp leases file.
*
* Assumptions: If this lookup is for the pxeboot interface then the caller
* is expected to suffix the interface name with a ":2"
*
* Example:
*
* sysadmin@worker-0:~$ cat /var/lib/dhcp/dhclient.enp0s3:2.leases
* lease {
* interface "enp0s3:2";
* fixed-address 169.254.202.159; <-- non-controller pxeboot address
* option subnet-mask 255.255.255.0;
*
* Returns: a string containing the unit's pxeboot address
******************************************************************************/
string get_pxeboot_dhcp_addr ( string iface )
{
// Struct to hold the items extracted from the lease.
// ... currently only the fixed-address is needed.
struct Lease { string address; };
#define DHCP_LEASES_DIR ((const char *) "/var/lib/dhcp")
string pxeboot_address = "" ; // return value
mlog ("learning pxeboot address ...");
Lease last_lease; // defaults to null info
string lease_filename = "" ;
DIR* dhcp_dir = opendir(DHCP_LEASES_DIR);
if ( dhcp_dir != NULL)
{
struct dirent* entry;
while ((entry = readdir(dhcp_dir)) != nullptr)
{
string _filename = entry->d_name;
// Check if the entry contains the interface name
if (_filename.find(iface) != string::npos)
lease_filename = _filename ;
}
closedir(dhcp_dir);
}
else
{
ilog ( "no dhcp leases");
return pxeboot_address ; // is null
}
string full_path = DHCP_LEASES_DIR;
full_path.append("/");
full_path.append(lease_filename);
if ( lease_filename.empty() )
{
ilog ("dhcp lease file %s/%s not found", DHCP_LEASES_DIR, iface.c_str());
return pxeboot_address ; // is null
}
mlog ("pxeboot dhcp lease file: %s", full_path.c_str());
ifstream lease_file(full_path);
if (lease_file.is_open())
{
string line;
// Iterate through the file line by line
while (getline(lease_file, line))
{
// search for new 'lease' entries
if (line.find("lease {") != string::npos)
{
// point to the new lease
last_lease = Lease();
}
// If 'fixed-address' is found, update the last_lease
if (line.find("fixed-address") != string::npos)
{
istringstream leaseStream(line);
string token;
leaseStream >> token; // ignore "fixed-address" label
leaseStream >> last_lease.address; // just want the address
// If there is a ';' at the end of the line, remove it.
if (!last_lease.address.empty() && last_lease.address.back() == ';')
last_lease.address.pop_back();
}
}
// The 'last_lease' should now contain this host's pxeboot address.
// Close the file and return the lease struct.
lease_file.close();
}
else
{
wlog ("unable to open dhcp lease file: %s", full_path.c_str());
}
pxeboot_address = last_lease.address ;
return (pxeboot_address);
}
/*****************************************************************************
* Name : get_pxeboot_static_addr
*
* Purpose : Get pxeboot address from pxeboot network interface config file.
*
* Description: The controller nodes pxeboot addresses are static.
* Therefore, the pxeboot address for a controller node is
* taken from the 'address' label inside the pxeboot network
* interface file.
*
* Assumptions: If this lookup is for the pxeboot interface then the caller
* is expected to suffix the interface name with a ":2"
*
* Example:
*
* sysadmin@controller-1:/etc/network/interfaces.d$ cat ifcfg-enp0s8:2
* iface enp0s8:2 inet static
* address 169.254.202.3 <-- controller pxeboot address
* netmask 255.255.255.0
*
* Returns: a string containing the host's pxeboot address
****************************************************************************/
string get_pxeboot_static_addr ( string iface )
{
string pxeboot_address = "" ; // return value
string interface_file = NETWORK_INTERFACES_DIR ;
interface_file.append("/ifcfg-");
interface_file.append(iface);
if ( daemon_is_file_present (interface_file.data()))
{
ifstream iface_file(interface_file);
if (iface_file.is_open())
{
string line;
// Iterate through the file line by line ...
while (getline(iface_file, line))
{
// search for new 'address' entry where
// address is the first word of the line.
size_t position = line.find("address");
if ( position == 0 )
{
istringstream fileStream(line);
string token;
fileStream >> token; // ignore "address" label
fileStream >> pxeboot_address; // just want the address
ilog ("found pxeboot address in %s", interface_file.c_str());
}
}
// close the file and return the pxeboot address.
iface_file.close();
}
else
{
wlog ("unable to open %s file for interface:%s",
interface_file.c_str(),
iface.c_str());
}
}
else
{
// This is normal for a controller before it is unlocked.
ilog ("no %s file present", interface_file.c_str() );
}
return (pxeboot_address);
}
string get_event_str ( int event_code )
{
@ -1534,7 +2093,7 @@ int send_log_message ( msgSock_type * sock_ptr,
}
else
{
mlog2 ("%s:%s\n%s", &log.hostname[0], &log.filename[0], log_str );
mlog1 ("%s:%s\n%s", &log.hostname[0], &log.filename[0], log_str );
}
return rc ;
}

View File

@ -2,7 +2,7 @@
#define __INCLUDE_NODEUTIL_H__
/*
* Copyright (c) 2013-2014, 2016, 2019 Wind River Systems, Inc.
* Copyright (c) 2013-2014, 2016, 2019, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -28,6 +28,8 @@ using namespace std;
#define NODEUTIL_LATENCY_MON_START ((const char *)"start")
void nodeUtil_latency_log ( string hostname, const char * label_ptr, int msecs );
// path to the Debian network interfaces directory
#define NETWORK_INTERFACES_DIR (const char *)("/etc/network/interfaces.d")
/* Common socket type struct */
typedef struct
@ -65,7 +67,68 @@ string get_iface_mac ( const char * iface_ptr );
void print_inv ( node_inv_type & info );
int get_iface_attrs ( const char * iface_ptr, int & index, int & speed , int & duplex , string & autoneg );
const char * get_iface_name_str ( int iface );
const char * get_interface_name_str ( int iface );
/* Used to learn the pxeboot address */
enum iface_type_enum { ethernet = 0, vlan = 1, bond = 2 };
typedef struct
{
string iface_name ;
iface_type_enum iface_type = ethernet ;
/* vlan link ; physical or bond
*
* The parent interface is the physical network interface
* to which the VLAN is associated.*/
string parent = "" ;
/* bond links ; two physical interfaces
*
* A bond is a logical interface created by combining multiple
* physical network interfaces, known as "slaves"*/
string slave1 = "" ;
string slave2 = "" ;
/* bonding mode ; active-backup, balanced-xor, 802.3ad, etc.
* A string that represents the bonding mode string and id
* Example: 802.3ad 4 */
string bond_mode = "" ;
/* string representing the iface hierarchy.
*
* ethernet
* bond -> slaves
* vlan -> bond -> slaves
*
* This interface chain string exists soley for the purpose
* of logging for the report tool system info. */
string chain = "" ;
} iface_info_type ;
#define INTERFACES_DIR ((const char *)"/sys/class/net/")
const char * get_iface_type_str ( iface_type_enum type_enum );
int get_iface_type ( string iface,
iface_type_enum & iface_type );
int get_iface_parent ( int network,
string & ifname,
string & parent );
int get_bond_slaves ( int network,
string bonded_iface,
string & slave1,
string & slave2 );
int get_bond_mode ( int network,
string bonded_iface,
string & bond_mode);
int get_iface_info ( int network,
string iface,
iface_info_type & iface_info);
// For the mtcClient pxeboot address learning.
string get_pxeboot_dhcp_addr ( string iface ); // worker/storage
string get_pxeboot_static_addr ( string iface ); // controllers
unsigned int get_host_function_mask ( string & nodeType_str );
bool is_combo_system (unsigned int nodetype_mask );

View File

@ -1,7 +1,7 @@
#ifndef __INCLUDE_RETURNCODES_H__
#define __INCLUDE_RETURNCODES_H__
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -116,7 +116,7 @@
#define FAIL_DUP_HOSTNAME (92)
#define FAIL_DUP_IPADDR (93)
#define FAIL_DUP_MACADDR (94)
#define FAIL____UNUSED____95 (95)
#define FAIL_INVALID_IP (95)
#define FAIL_LOCATE_KEY_VALUE (96)
#define FAIL_JSON_OBJECT (97)
#define FAIL_EXTERNAL_API (98)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2014, 2016 Wind River Systems, Inc.
* Copyright (c) 2013-2014, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -43,6 +43,7 @@ void daemon_config_default ( daemon_config_type* config_ptr )
config_ptr->sysinv_mtc_inv_label = strdup("none");
config_ptr->mgmnt_iface = strdup("none");
config_ptr->clstr_iface = strdup("none");
config_ptr->pxeboot_iface = strdup("none");
config_ptr->sysinv_api_bind_ip = strdup("none");
config_ptr->mode = strdup("none");
config_ptr->fit_host = strdup("none");
@ -354,8 +355,10 @@ void daemon_dump_cfg ( void )
if ( ptr->mtc_rx_mgmnt_port ) { ilog ("mtc_rx_mgmnt_port = %d\n", ptr->mtc_rx_mgmnt_port );}
if ( ptr->mtc_rx_clstr_port ) { ilog ("mtc_rx_clstr_port = %d\n", ptr->mtc_rx_clstr_port );}
if ( ptr->mtc_rx_pxeboot_port ) { ilog ("mtc_rx_pxeboot_port = %d\n", ptr->mtc_rx_pxeboot_port );}
if ( ptr->mtc_tx_mgmnt_port ) { ilog ("mtc_tx_mgmnt_port = %d\n", ptr->mtc_tx_mgmnt_port );}
if ( ptr->mtc_tx_clstr_port ) { ilog ("mtc_tx_clstr_port = %d\n", ptr->mtc_tx_clstr_port );}
if ( ptr->mtc_tx_pxeboot_port ) { ilog ("mtc_tx_pxeboot_port = %d\n", ptr->mtc_tx_pxeboot_port );}
if ( ptr->agent_rx_port ) { ilog ("agent_rx_port = %d\n", ptr->agent_rx_port );}
if ( ptr->client_rx_port ) { ilog ("client_rx_port = %d\n", ptr->client_rx_port );}
if ( ptr->mtc_to_hbs_cmd_port ) { ilog ("mtc_to_hbs_cmd_port = %d\n", ptr->mtc_to_hbs_cmd_port );}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2017,2019 Wind River Systems, Inc.
* Copyright (c) 2016-2017,2019, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -83,7 +83,7 @@ void alarmMgr_queue_clear ( void )
************************************************************************/
void alarmMgr_queue_alarm ( queue_entry_type entry )
{
alog ("%s adding %s to alarm queue [size=%ld]\n",
dlog ("%s adding %s to alarm queue [size=%ld]\n",
entry.hostname.c_str(),
entry.alarmid.c_str(),
alarm_queue.size() );
@ -116,7 +116,7 @@ void alarmMgr_queue_alarm ( queue_entry_type entry )
void alarmMgr_service_queue ( void )
{
alog1 ("Elements: %ld\n", alarm_queue.size());
dlog1 ("Elements: %ld\n", alarm_queue.size());
if ( alarm_queue.empty() )
return ;
@ -138,7 +138,7 @@ void alarmMgr_service_queue ( void )
string action = entry.operation ;
action.append (" alarm");
alog ("%s %s operation:%s severity:%s entity:%s prefix:%s\n",
dlog ("%s %s operation:%s severity:%s entity:%s prefix:%s\n",
entry.hostname.c_str(),
entry.alarmid.c_str(),
entry.operation.c_str(),

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013 Wind River Systems, Inc.
* Copyright (c) 2013, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -153,7 +153,7 @@ EFmAlarmSeverityT alarmUtil_query ( string hostname,
ENTITY_PREFIX, hostname.data(), instance.data());
}
alog ("entity_instance:%s\n", alarm_filter.entity_instance_id );
dlog ("entity_instance:%s\n", alarm_filter.entity_instance_id );
if (( rc = fm_get_fault ( &alarm_filter, &alarm_query )) == FM_ERR_OK )
{
dlog ("Found with Severity: %d\n", alarm_query.severity );
@ -185,7 +185,7 @@ int alarmUtil_query_identity ( string identity, SFmAlarmDataT * alarm_list_ptr,
memset(&alarm_filter, 0, sizeof(alarm_filter));
snprintf ( alarm_filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", identity.data());
rc = fm_get_faults_by_id ( &alarm_filter.alarm_id, alarm_list_ptr, &max_alarms );
alog ("%s fm_get_faults_by_id rc = %d\n", alarm_filter.alarm_id, rc );
dlog ("%s fm_get_faults_by_id rc = %d\n", alarm_filter.alarm_id, rc );
if ( rc == FM_ERR_OK )
{
return (PASS);
@ -261,18 +261,18 @@ int alarmUtil ( string & hostname,
{
if ( alarm.alarm_state == FM_ALARM_STATE_SET )
{
alog ("%s setting %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id );
dlog ("%s setting %s %s alarm\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id );
}
else
{
alog ("%s creating %s %s log\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id );
dlog ("%s creating %s %s log\n", hostname.c_str(), alarm.alarm_id, alarm.entity_instance_id );
}
/* Debug Logs */
alog ("%s Alarm Reason: %s\n", hostname.c_str(), alarm.reason_text );
alog ("%s Alarm Action: %s\n", hostname.c_str(), alarm.proposed_repair_action );
alog ("%s Alarm Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id );
alog ("%s Alarm State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n",
dlog ("%s Alarm Reason: %s\n", hostname.c_str(), alarm.reason_text );
dlog ("%s Alarm Action: %s\n", hostname.c_str(), alarm.proposed_repair_action );
dlog ("%s Alarm Ident : %s : %s\n", hostname.c_str(), alarm.entity_type_id, alarm.entity_instance_id );
dlog ("%s Alarm State : state:%d sev:%d type:%d cause:%d sa:%c supp:%c\n",
hostname.c_str(),
alarm.alarm_state,
alarm.severity,
@ -310,7 +310,7 @@ int alarmUtil ( string & hostname,
snprintf(filter.alarm_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.alarm_id);
snprintf(filter.entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", alarm.entity_instance_id);
alog ( "fm_clear_fault: %s %s:%s", hostname.c_str(), alarm.entity_instance_id, alarm.alarm_id );
dlog ( "fm_clear_fault: %s %s:%s", hostname.c_str(), alarm.entity_instance_id, alarm.alarm_id );
#ifdef WANT_FIT_TESTING
if (( daemon_is_file_present ( MTC_CMD_FIT__FM_ERROR_CODE )) &&

View File

@ -13,6 +13,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <errno.h> /* for ENODEV, EFAULT and ENXIO */
#include <unistd.h> /* for close and usleep */
@ -284,6 +286,9 @@ nodeLinkClass::nodeLinkClass()
my_local_ip.clear() ;
my_float_ip.clear() ;
my_clstr_ip.clear() ;
my_pxeboot_ip.clear();
my_pxeboot_if.clear();
active_controller_hostname.clear() ;
inactive_controller_hostname.clear() ;
@ -301,6 +306,7 @@ nodeLinkClass::nodeLinkClass()
mgmnt_link_up_and_running = false ;
clstr_link_up_and_running = false ;
clstr_network_provisioned = false ;
pxeboot_network_provisioned=false ;
clstr_degrade_only = false ;
dor_mode_active = false ;
@ -492,11 +498,13 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
/* init the new node */
ptr->hostname = hostname ;
ptr->pxeboot_hostname = "";
ptr->ip = "" ;
ptr->mac = "" ;
ptr->clstr_ip = "" ;
ptr->clstr_mac = "" ;
ptr->pxeboot_ip= "" ;
/* key value dictionary */
ptr->mtce_info = "" ;
@ -551,18 +559,35 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
ptr->mtcAlive_purge = 0 ;
ptr->offline_search_count = 0 ;
ptr->mtcAlive_mgmnt = false ;
ptr->mtcAlive_clstr = false ;
ptr->mtcAlive_pxeboot = false ;
/* These counts are incremented in the set_mtcAlive member
* function and cleared in the reset progression handler. */
ptr->mtcAlive_mgmnt_count = 0 ;
ptr->mtcAlive_clstr_count = 0 ;
ptr->mtcAlive_pxeboot_count = 0 ;
// Clear all the mtcAlive_sequence numbers and monitoring trackers
for (int i = 0 ; i < MTCALIVE_INTERFACES_MAX ; i++)
{
ptr->mtcAlive_sequence[i] =
ptr->mtcAlive_sequence_save[i] =
ptr->mtcAlive_sequence_miss[i] =
ptr->mtcAlive_log_throttle [i] = 0 ;
}
ptr->pxeboot_mtcAlive_not_seen_log_throttle = 0 ;
ptr->pxeboot_mtcAlive_loss_log_throttle = 0 ;
ptr->bmc_reset_pending_log_throttle = 0 ;
ptr->reboot_cmd_ack_mgmnt = false ;
ptr->reboot_cmd_ack_clstr = false ;
ptr->unlock_cmd_ack = false ;
ptr->reboot_cmd_ack_pxeboot = false ;
ptr->offline_log_throttle = 0 ;
ptr->offline_log_reported = true ;
ptr->online_log_reported = false ;
@ -585,6 +610,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
mtcTimer_init ( ptr->mtcCmd_timer, hostname, "mtcCmd timer"); /* Init node's mtcCmd timer */
mtcTimer_init ( ptr->mtcConfig_timer, hostname, "mtcConfig timer"); /* Init node's mtcConfig timer */
mtcTimer_init ( ptr->mtcAlive_timer , hostname, "mtcAlive timer"); /* Init node's mtcAlive timer */
mtcTimer_init ( ptr->online_timer , hostname, "online timer"); /* Init node's online timer */
mtcTimer_init ( ptr->offline_timer, hostname, "offline timer"); /* Init node's FH offline timer */
mtcTimer_init ( ptr->http_timer, hostname, "http timer" ); /* Init node's http timer */
mtcTimer_init ( ptr->bm_timer, hostname, "bm timer" ); /* Init node's bm timer */
@ -620,6 +646,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
ptr->resetStage = MTC_RESET__START ;
ptr->enableStage = MTC_ENABLE__START ;
ptr->disableStage = MTC_DISABLE__START ;
ptr->mtcAliveStage = MTC_MTCALIVE__START ;
ptr->oos_test_count = 0 ;
ptr->insv_test_count = 0 ;
@ -818,6 +845,11 @@ struct nodeLinkClass::node* nodeLinkClass::getNode ( string hostname )
{
return ptr ;
}
/* Node can be looked up by pxeboot ip */
if ( !hostname.compare ( ptr->pxeboot_ip ))
{
return ptr ;
}
if (( ptr->next == NULL ) || ( ptr == tail ))
break ;
@ -911,6 +943,7 @@ int nodeLinkClass::remNode( string hostname )
mtcTimer_fini ( ptr->mtcTimer );
mtcTimer_fini ( ptr->mtcSwact_timer );
mtcTimer_fini ( ptr->mtcAlive_timer );
mtcTimer_fini ( ptr->online_timer );
mtcTimer_fini ( ptr->offline_timer );
mtcTimer_fini ( ptr->mtcCmd_timer );
mtcTimer_fini ( ptr->http_timer );
@ -1559,12 +1592,12 @@ int nodeLinkClass::avail_status_change ( string hostname,
( avail != MTC_AVAIL_STATUS__ONLINE )))
{
/* Free the mtc timer if in use */
if ( node_ptr->mtcAlive_timer.tid )
if ( node_ptr->online_timer.tid )
{
tlog ("%s Stopping mtcAlive timer\n", node_ptr->hostname.c_str());
mtcTimer_stop ( node_ptr->mtcAlive_timer );
node_ptr->mtcAlive_timer.ring = false ;
node_ptr->mtcAlive_timer.tid = NULL ;
mtcTimer_stop ( node_ptr->online_timer );
node_ptr->online_timer.ring = false ;
node_ptr->online_timer.tid = NULL ;
}
node_ptr->onlineStage = MTC_ONLINE__START ;
}
@ -1641,6 +1674,7 @@ int nodeLinkClass::lazy_graceful_fs_reboot ( struct nodeLinkClass::node * node_p
/* Should never get there but if we do resend the reboot request
* but this time not Lazy */
send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) ;
send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, PXEBOOT_INTERFACE ) ;
}
return (FAIL);
}
@ -3448,6 +3482,137 @@ void nodeLinkClass::mtcInfo_handler ( void )
}
}
/**************************************************************************
*
* Name : pxebootInfo_loader
*
* Purpose : Load node pxeboot hostnames and ip addresses.
*
* Description: For each provisioned node, this function parses the
* /opt/platform/config/<sw_version>/dnsmasq.hosts file
* with each node's management network mac address as the
* primary search string and loads the pxeboot ip address
* and pxeboot hostname where matches are found.
*
* Parameters : Optional my_mac address for initial process startup
* to get just its own my_pxeboot_ip address before the
* nodeLinkClass host chain is created.
*
* Updates : this->my_pxeboot_ip if my_mac is specified.
* node_ptr->pxeboot_ip for all hosts if my_mac is empty.
*
* Retruns : Nothing
*
**************************************************************************/
void nodeLinkClass::pxebootInfo_loader ( string my_mac )
{
string dnsmasq_hosts_file = OPT_PLATFORM_CONFIG_DIR ;
dnsmasq_hosts_file.append("/");
dnsmasq_hosts_file.append(sw_version);
dnsmasq_hosts_file.append("/");
dnsmasq_hosts_file.append(DNSMASQ_HOSTS_FILE);
if ( daemon_is_file_present ( dnsmasq_hosts_file.data()) == false )
{
ilog ("%s file not present", dnsmasq_hosts_file.c_str());
return ;
}
// Open the dnsmasq_hosts_file for reading
ifstream filestream ( dnsmasq_hosts_file.c_str() );
// Check if the file is open
if (!filestream.is_open())
{
elog ("failed to open seemingly present %s file", dnsmasq_hosts_file.c_str());
return ;
}
// Read each line from the file
string line;
while (getline(filestream, line))
{
// Skip lines starting with "pxecontroller"
if (line.compare(0, 13, "pxecontroller") == 0)
continue;
// Create a stringstream to parse the comma-delimited fields
stringstream dnsmasq_hosts(line);
string mac, hostname, ip ;
// Extract fields
getline(dnsmasq_hosts, mac, ',');
getline(dnsmasq_hosts, hostname, ',');
getline(dnsmasq_hosts, ip, ',');
dlog ("pxebootInfo: %s %s %s", mac.c_str(), hostname.c_str(), ip.c_str());
if ( my_mac.empty() )
{
if ( ! head )
{
elog ("cannot read inventory ; head is null");
// Close the file stream
filestream.close();
return ;
}
// Search for the node that matches each mac address in inventory
bool found = false ;
for ( struct node * node_ptr = head ; ; node_ptr = node_ptr->next )
{
if ( !mac.compare(node_ptr->mac) )
{
node_ptr->pxeboot_hostname = hostname ;
if ( !ip.empty() && ( ip != node_ptr->pxeboot_ip ))
{
// pxeboot ip address found and is different
if ( node_ptr->pxeboot_ip.empty() )
{
ilog ("%s pxeboot hostname: %s has pxeboot ip: %s",
node_ptr->hostname.c_str(),
node_ptr->pxeboot_hostname.c_str(),
ip.c_str());
}
else
{
wlog ("%s pxeboot ip changed from %s to %s",
node_ptr->hostname.c_str(),
node_ptr->pxeboot_ip.c_str(),
ip.c_str());
}
node_ptr->pxeboot_ip = ip ;
// Also load the my_pxeboot_ip at the process level for eacy access
if (( node_ptr->hostname == this->my_hostname ) && ( this->my_pxeboot_ip != ip ))
this->my_pxeboot_ip = ip ;
}
found = true ;
break ;
}
if (( node_ptr->next == NULL ) || ( node_ptr == tail ))
break ;
}
if ( found == false )
{
wlog ("no host found matching mac address:%s", mac.c_str());
}
}
else if ( !mac.compare( my_mac ) )
{
// Handle the process startup 'my mac' case
if ( !ip.empty() )
this->my_pxeboot_ip = ip ;
else
{
wlog ("failed to lookup pxeboot ip from mac %s", my_mac.c_str());
}
}
}
// Close the file stream
filestream.close();
}
/* Lock Rules
*
* 1. Cannot lock this controller
@ -3701,6 +3866,17 @@ string nodeLinkClass::get_clstr_hostaddr ( string & hostname )
return ( null_str );
}
string nodeLinkClass::get_pxeboot_hostaddr ( string hostname )
{
nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
return ( node_ptr->pxeboot_ip );
}
return ( null_str );
}
string nodeLinkClass::get_hostIfaceMac ( string & hostname, int iface )
{
nodeLinkClass::node* node_ptr ;
@ -3730,6 +3906,30 @@ int nodeLinkClass::set_hostaddr ( string & hostname, string & ip )
return ( rc );
}
int nodeLinkClass::set_pxeboot_hostaddr ( string hostname, string ip )
{
int rc = FAIL_HOSTNAME_LOOKUP ;
nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
if (( hostUtil_is_valid_ip_addr(ip)) && ( node_ptr->pxeboot_ip != ip ))
{
node_ptr->pxeboot_ip = ip ;
ilog ("%s pxeboot ip set to %s",
node_ptr->hostname.c_str(),
node_ptr->pxeboot_ip.c_str());
rc = PASS ;
}
else
{
rc = FAIL_INVALID_IP ;
}
}
return ( rc );
}
int nodeLinkClass::set_clstr_hostaddr ( string & hostname, string & ip )
{
int rc = FAIL ;
@ -3759,7 +3959,8 @@ string nodeLinkClass::get_hostname ( string hostaddr )
( hostaddr == LOCALHOST ) ||
( hostaddr == my_local_ip ) ||
( hostaddr == my_float_ip ) ||
( hostaddr == my_clstr_ip ))
( hostaddr == my_clstr_ip ) ||
( hostaddr == my_pxeboot_ip ))
{
return(this->my_hostname);
}
@ -3889,6 +4090,8 @@ void nodeLinkClass::set_cmd_resp ( string & hostname, mtc_message_type & msg, in
if ( iface == MGMNT_INTERFACE )
node_ptr->reboot_cmd_ack_mgmnt = 1 ;
else if ( iface == PXEBOOT_INTERFACE )
node_ptr->reboot_cmd_ack_pxeboot = 1 ;
else if ( iface == CLSTR_INTERFACE )
node_ptr->reboot_cmd_ack_clstr = 1 ;
}
@ -3923,9 +4126,8 @@ unsigned int nodeLinkClass::get_cmd_resp ( string & hostname )
*
* Name : set_mtcAlive
*
* Description: Set the mgmnt or clust specific mtc alive received bool.
*
* Used in the offline handler to verify overall offline state.
* Description: Set mtcAlive driven controls and status for the
* pxeboot, mgmnt and cluster networks.
*
* Interfaces : Public with hostname.
* Private by node pointer.
@ -3933,52 +4135,108 @@ unsigned int nodeLinkClass::get_cmd_resp ( string & hostname )
* If mtcAlive is ungated then
*
* 1. manage the online/offline state bools
* 2. increment the mtcAlive count and
* 2. increment the mtcAlive count
* 3. set the mtcAlive received bool for the specified interface
*
*****************************************************************************/
void nodeLinkClass::set_mtcAlive ( string & hostname, int interface )
void nodeLinkClass::set_mtcAlive ( string & hostname, unsigned int sequence, int iface )
{
nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
this->set_mtcAlive ( node_ptr, interface );
this->set_mtcAlive ( node_ptr, sequence, iface );
}
}
void nodeLinkClass::set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface )
#define MTCALIVE_LOG_THROTTLE (1000)
void nodeLinkClass::set_mtcAlive ( struct nodeLinkClass::node * node_ptr, unsigned int sequence, int iface)
{
if ( node_ptr )
{
if ( node_ptr->mtcAlive_gate == false )
{
bool state_change = false ;
node_ptr->mtcAlive_online = true ;
node_ptr->mtcAlive_offline = false ;
node_ptr->mtcAlive_count++ ;
if ( interface == CLSTR_INTERFACE )
if ( iface == CLSTR_INTERFACE )
{
if ( node_ptr->mtcAlive_clstr == false )
{
alog ("%s %s mtcAlive received",
node_ptr->hostname.c_str(),
get_iface_name_str(interface));
node_ptr->mtcAlive_clstr_count++ ;
node_ptr->mtcAlive_clstr = true ;
state_change = true ;
}
node_ptr->mtcAlive_clstr_count++ ;
}
else if ( iface == MGMNT_INTERFACE )
{
if ( node_ptr->mtcAlive_mgmnt == false )
{
node_ptr->mtcAlive_mgmnt = true ;
state_change = true ;
}
node_ptr->mtcAlive_mgmnt_count++ ;
}
else if ( iface == PXEBOOT_INTERFACE )
{
if ( node_ptr->mtcAlive_pxeboot == false )
{
node_ptr->mtcAlive_pxeboot = true ;
state_change = true ;
}
node_ptr->mtcAlive_pxeboot_count++ ;
}
else
{
wlog("%s mtcAlive received from unknown network %d",
node_ptr->hostname.c_str(), iface);
return ;
}
if ( state_change )
{
ilog ("%s mtcAlive received from %s network with uptime:%d ; seq:%d",
node_ptr->hostname.c_str(),
get_iface_name_str(iface),
node_ptr->uptime,
sequence);
node_ptr->mtcAlive_log_throttle[iface] = 0 ;
}
else if ( node_ptr->mtcAlive_sequence[iface]+1 != sequence)
{
if ( sequence < node_ptr->mtcAlive_sequence[iface]+1 )
{
wlog ("%s mtcAlive received from %s network with uptime:%d ; out-of-sequence ; expect:%d detect:%d ; correcting",
node_ptr->hostname.c_str(),
get_iface_name_str(iface),
node_ptr->uptime,
node_ptr->mtcAlive_sequence[iface]+1,
sequence);
}
else
{
wlog ("%s mtcAlive received from %s network with uptime:%d ; missed %d mtcalive msgs ; expect:%d detect:%d ; correcting",
node_ptr->hostname.c_str(),
get_iface_name_str(iface),
node_ptr->uptime,
sequence-(node_ptr->mtcAlive_sequence[iface]+1),
node_ptr->mtcAlive_sequence[iface]+1,
sequence);
}
}
else
{
if ( node_ptr->mtcAlive_mgmnt == false )
{
alog ("%s %s mtcAlive received",
node_ptr->hostname.c_str(),
get_iface_name_str(interface));
node_ptr->mtcAlive_mgmnt_count++ ;
node_ptr->mtcAlive_mgmnt = true ;
}
alog_throttled (node_ptr->mtcAlive_log_throttle[iface], MTCALIVE_LOG_THROTTLE,
"%s mtcAlive received from %s network with uptime:%d ; seq:%d",
node_ptr->hostname.c_str(),
get_iface_name_str(iface),
node_ptr->uptime,
sequence);
}
// update running sequence number for this interface
node_ptr->mtcAlive_sequence[iface] = sequence ;
}
}
}
@ -4291,7 +4549,6 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface )
((node_ptr->adminAction != MTC_ADMIN_ACTION__ENABLE ) &&
(node_ptr->adminAction != MTC_ADMIN_ACTION__UNLOCK )))
{
wlog ("%s mtcAlive reporting locked while unlocked ; correcting", node_ptr->hostname.c_str());
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_UNLOCKED, MGMNT_INTERFACE );
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_UNLOCKED, CLSTR_INTERFACE );
}
@ -4302,7 +4559,13 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface )
if (( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) &&
( node_ptr->adminAction != MTC_ADMIN_ACTION__LOCK ))
{
wlog ("%s mtcAlive reporting unlocked while locked ; correcting", node_ptr->hostname.c_str());
// Avoid printing this warning log in simplex mode.
// The locked flag is lost over a reboot in simplex mode.
if ( daemon_is_file_present ( STILL_SIMPLEX_FILE ) == false )
{
wlog ("%s mtcAlive reporting unlocked while locked ; correcting",
node_ptr->hostname.c_str());
}
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE );
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, CLSTR_INTERFACE );
}
@ -6243,6 +6506,10 @@ int nodeLinkClass::update_host_functions ( string hostname , string functions )
}
rc = PASS ;
}
else
{
wlog ("%s getNode lookup failed", hostname.c_str());
}
return (rc);
}
@ -6930,12 +7197,12 @@ int nodeLinkClass::availStatusChange ( struct nodeLinkClass::node * node_ptr,
( newAvailStatus != MTC_AVAIL_STATUS__ONLINE )))
{
/* Free the mtc timer if in use */
if ( node_ptr->mtcAlive_timer.tid )
if ( node_ptr->online_timer.tid )
{
tlog ("%s Stopping mtcAlive timer\n", node_ptr->hostname.c_str());
mtcTimer_stop ( node_ptr->mtcAlive_timer );
node_ptr->mtcAlive_timer.ring = false ;
node_ptr->mtcAlive_timer.tid = NULL ;
mtcTimer_stop ( node_ptr->online_timer );
node_ptr->online_timer.ring = false ;
node_ptr->online_timer.tid = NULL ;
}
node_ptr->onlineStage = MTC_ONLINE__START ;
}
@ -7265,6 +7532,28 @@ int nodeLinkClass::subStageChange ( struct nodeLinkClass::node * node_ptr,
}
}
/** Host mtcAlive Stage Change member function */
int nodeLinkClass::mtcAliveStageChange ( struct nodeLinkClass::node * node_ptr,
mtc_mtcAliveStages_enum newHdlrStage )
{
if ( newHdlrStage < MTC_MTCALIVE__STAGES )
{
clog ("%s stage %s -> %s",
node_ptr->hostname.c_str(),
get_mtcAliveStages_str(node_ptr->mtcAliveStage).c_str(),
get_mtcAliveStages_str(newHdlrStage).c_str());
node_ptr->mtcAliveStage = newHdlrStage ;
return (PASS) ;
}
else
{
slog ("%s Invalid mtcAlive stage (%d)", node_ptr->hostname.c_str(), newHdlrStage );
node_ptr->mtcAliveStage = MTC_MTCALIVE__START ;
return (FAIL) ;
}
}
struct nodeLinkClass::node * nodeLinkClass::get_mtcTimer_timer ( timer_t tid )
{
/* check for empty list condition */
@ -7537,6 +7826,23 @@ struct nodeLinkClass::node * nodeLinkClass::get_mtcAlive_timer ( timer_t tid )
return static_cast<struct node *>(NULL);
}
struct nodeLinkClass::node * nodeLinkClass::get_online_timer ( timer_t tid )
{
/* check for empty list condition */
if ( tid != NULL )
{
for ( struct node * ptr = head ; ; ptr = ptr->next )
{
if ( ptr->online_timer.tid == tid )
{
return ptr ;
}
if (( ptr->next == NULL ) || ( ptr == tail ))
break ;
}
}
return static_cast<struct node *>(NULL);
}
struct nodeLinkClass::node * nodeLinkClass::get_offline_timer ( timer_t tid )
{
@ -9231,17 +9537,53 @@ void nodeLinkClass::mem_log_state2 ( struct nodeLinkClass::node * node_ptr )
mem_log (str);
}
void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr )
void nodeLinkClass::mem_log_mtcalive_state ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d Net:%d:%d:%d",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_online ? 'Y' : 'N',
node_ptr->mtcAlive_offline ? 'Y' : 'N',
node_ptr->mtcAlive_count,
node_ptr->mtcAlive_gate ? "closed" : "open",
node_ptr->mtcAlive_misses);
node_ptr->mtcAlive_misses,
node_ptr->mtcAlive_mgmnt,
node_ptr->mtcAlive_clstr,
node_ptr->mtcAlive_pxeboot );
mem_log (str);
}
void nodeLinkClass::mem_log_mtcalive_data ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: Pxeboot:%d seq:%d Mgmt:%d seq:%d Clstr:%d seq:%d",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_pxeboot_count,
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_mgmnt_count,
node_ptr->mtcAlive_sequence[MGMNT_INTERFACE],
node_ptr->mtcAlive_clstr_count,
node_ptr->mtcAlive_sequence[CLSTR_INTERFACE]);
mem_log (str);
}
void nodeLinkClass::mem_log_mtcalive_pxeboot ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tPxeboot mtcAlive: Prov:%c Rxed:%c ring:%c miss:%d seq:%d save:%d ",
node_ptr->hostname.c_str(),
this->pxeboot_network_provisioned ? 'Y' : 'N',
node_ptr->mtcAlive_pxeboot ? 'Y' : 'N',
node_ptr->mtcAlive_timer.ring ? 'Y' : 'N',
node_ptr->mtcAlive_sequence_miss [PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence [PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence_save [PXEBOOT_INTERFACE]);
mem_log (str);
}
@ -9273,7 +9615,7 @@ void nodeLinkClass::mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr )
void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d Power:%d Cycle:%d\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d Power:%d Cycle:%d mtcAlive:%d\n",
node_ptr->hostname.c_str(),
node_ptr->addStage,
node_ptr->offlineStage,
@ -9282,7 +9624,8 @@ void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr )
node_ptr->enableStage,
node_ptr->disableStage,
node_ptr->powerStage,
node_ptr->powercycleStage);
node_ptr->powercycleStage,
node_ptr->mtcAliveStage);
mem_log (str);
}
@ -9319,11 +9662,13 @@ void nodeLinkClass::mem_log_reset_info ( struct nodeLinkClass::node * node_ptr )
void nodeLinkClass::mem_log_network ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s %s cluster_host_ip: %s Uptime: %u\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t mac:%s mgmt:%s clstr: %s pxeboot:%s:%s Uptime: %u\n",
node_ptr->hostname.c_str(),
node_ptr->mac.c_str(),
node_ptr->ip.c_str(),
node_ptr->clstr_ip.c_str(),
node_ptr->pxeboot_hostname.c_str(),
node_ptr->pxeboot_ip.c_str(),
node_ptr->uptime );
mem_log (str);
}
@ -9430,23 +9775,25 @@ void nodeLinkClass::memDumpNodeState ( string hostname )
{
if ( maintenance == true )
{
mem_log_dor ( node_ptr );
mem_log_identity ( node_ptr );
mem_log_type_info ( node_ptr );
mem_log_network ( node_ptr );
mem_log_state1 ( node_ptr );
mem_log_state2 ( node_ptr );
// mem_log_reset_info ( node_ptr );
mem_log_power_info ( node_ptr );
mem_log_alarm1 ( node_ptr );
mem_log_alarm2 ( node_ptr );
mem_log_mtcalive ( node_ptr );
mem_log_stage ( node_ptr );
mem_log_bm ( node_ptr );
mem_log_ping ( node_ptr );
mem_log_test_info ( node_ptr );
mem_log_thread_info( node_ptr );
workQueue_dump ( node_ptr );
mem_log_dor ( node_ptr );
mem_log_identity ( node_ptr );
mem_log_type_info ( node_ptr );
mem_log_network ( node_ptr );
mem_log_mtcalive_state ( node_ptr );
mem_log_mtcalive_data ( node_ptr );
mem_log_mtcalive_pxeboot ( node_ptr );
mem_log_state1 ( node_ptr );
mem_log_state2 ( node_ptr );
// mem_log_reset_info ( node_ptr );
mem_log_power_info ( node_ptr );
mem_log_alarm1 ( node_ptr );
mem_log_alarm2 ( node_ptr );
mem_log_stage ( node_ptr );
mem_log_bm ( node_ptr );
mem_log_ping ( node_ptr );
mem_log_test_info ( node_ptr );
mem_log_thread_info ( node_ptr );
workQueue_dump ( node_ptr );
}
if ( heartbeat == true )
{

View File

@ -121,6 +121,12 @@ private:
/** The Mac address of the host node */
std::string mac ;
/** The pxeboot network IP address of the host node */
std::string pxeboot_ip ;
/** The pxeboot network hostname of the host node */
std::string pxeboot_hostname ;
/** The cluster-host network IP address of the host node */
std::string clstr_ip ;
@ -279,6 +285,7 @@ private:
mtc_configStages_enum configStage ;
mtc_resetProgStages_enum resetProgStage ;
mtc_reinstallStages_enum reinstallStage ;
mtc_mtcAliveStages_enum mtcAliveStage ;
/** Board management specific FSM Stages */
mtc_powerStages_enum powerStage ;
@ -315,10 +322,25 @@ private:
int mtcAlive_hits ;
int mtcAlive_purge ;
int mtcAlive_mgmnt_count ; /* count the mgmnt network mtcAlive messages */
int mtcAlive_clstr_count ; /* count the clstr network mtcAlive messages */
bool mtcAlive_mgmnt ; /* set true when mtcAlive is rx'd from mgmnt network */
bool mtcAlive_clstr ; /* set true when mtcAlive is rx'd from clstr network */
/* TODO: (emacdona) make these an array of interfaces */
bool mtcAlive_mgmnt ; /* set true when mtcAlive is rx'd from mgmnt network */
bool mtcAlive_clstr ; /* set true when mtcAlive is rx'd from clstr network */
bool mtcAlive_pxeboot ; /* set true when mtcAlive is rx'd from pxeboot network */
/* TODO: (emacdona) make these an array of interfaces */
int mtcAlive_mgmnt_count ; /* count the mgmnt network mtcAlive messages */
int mtcAlive_clstr_count ; /* count the clstr network mtcAlive messages */
int mtcAlive_pxeboot_count ; /* count the pxeboot network mtcAlive messages */
/* tracks the sequence number of the last <iface> mtcAlive message */
unsigned int mtcAlive_sequence [MTCALIVE_INTERFACES_MAX] ;
unsigned int mtcAlive_sequence_save[MTCALIVE_INTERFACES_MAX] ;
unsigned int mtcAlive_sequence_miss[MTCALIVE_INTERFACES_MAX] ;
unsigned int mtcAlive_log_throttle [MTCALIVE_INTERFACES_MAX] ;
/* pxeboot mtcAlive monitor log throttles */
int pxeboot_mtcAlive_not_seen_log_throttle ;
int pxeboot_mtcAlive_loss_log_throttle ;
/* used to log time leading up to reset */
int bmc_reset_pending_log_throttle ;
@ -334,14 +356,12 @@ private:
bool online_log_reported ; /* availStatus switches between these states */
/* and failed */
/** Host's mtc timer struct. Use to time handler stages.
*
* reset -> reset command response
* reboot -> then wait for mtcalive message
* mtcalive -> then wait for go enabled message
*/
/* timer for pxeboot_mtcAlive_monitor fsm */
struct mtc_timer mtcAlive_timer ;
/* timer for online_handler fsm. */
struct mtc_timer online_timer ;
/* the fault handling offline handler timer */
struct mtc_timer offline_timer ;
@ -456,6 +476,7 @@ private:
bool unlock_cmd_ack ; /* set true when a unlocked command ack is rx'ed */
bool reboot_cmd_ack_mgmnt ;
bool reboot_cmd_ack_clstr ;
bool reboot_cmd_ack_pxeboot ;
/** Tracks back to back Fast Fault Recovery counts */
int graceful_recovery_counter;
@ -849,6 +870,9 @@ private:
/* Starts the specified 'reset or powercycle' recovery monitor */
int hwmon_recovery_monitor ( struct nodeLinkClass::node * node_ptr, int hwmon_event );
/* Monitors pxeboot mtcAlive messages and manages associated alarm */
int pxeboot_mtcAlive_monitor ( struct nodeLinkClass::node * node_ptr );
/* server specific power state query handler */
bool (*is_poweron_handler) (string hostname, string query_response );
@ -865,7 +889,7 @@ private:
bool get_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr );
void ctl_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr, bool gate_state );
void set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface );
void set_mtcAlive ( struct nodeLinkClass::node * node_ptr, unsigned int sequence, int iface);
/********* mtcInfo in the database ************/
int mtcInfo_set ( struct nodeLinkClass::node * node_ptr, string key, string value );
@ -1087,6 +1111,10 @@ private:
int subStageChange ( struct nodeLinkClass::node * node_ptr,
mtc_subStages_enum newHdlrStage );
/** mtcAlive Stage Change member function */
int mtcAliveStageChange ( struct nodeLinkClass::node * node_ptr,
mtc_mtcAliveStages_enum newHdlrStage );
int failed_state_change ( struct nodeLinkClass::node * node_ptr );
/* issue a
@ -1125,6 +1153,7 @@ private:
struct nodeLinkClass::node * get_mtcTimer_timer ( timer_t tid );
struct nodeLinkClass::node * get_mtcConfig_timer ( timer_t tid );
struct nodeLinkClass::node * get_mtcAlive_timer ( timer_t tid );
struct nodeLinkClass::node * get_online_timer ( timer_t tid );
struct nodeLinkClass::node * get_offline_timer ( timer_t tid );
struct nodeLinkClass::node * get_mtcSwact_timer ( timer_t tid );
struct nodeLinkClass::node * get_mtcCmd_timer ( timer_t tid );
@ -1316,26 +1345,28 @@ private:
void mem_log_general_mtce_hosts ( void );
void mem_log_mnfa ( void );
void mem_log_dor ( struct nodeLinkClass::node * node_ptr );
void mem_log_identity ( struct nodeLinkClass::node * node_ptr );
void mem_log_network ( struct nodeLinkClass::node * node_ptr );
void mem_log_state1 ( struct nodeLinkClass::node * node_ptr );
void mem_log_state2 ( struct nodeLinkClass::node * node_ptr );
void mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr );
void mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr );
void mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr );
void mem_log_stage ( struct nodeLinkClass::node * node_ptr );
void mem_log_test_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_bm ( struct nodeLinkClass::node * node_ptr );
void mem_log_ping ( struct nodeLinkClass::node * node_ptr );
void mem_log_heartbeat ( struct nodeLinkClass::node * node_ptr );
void mem_log_hbs_cnts ( struct nodeLinkClass::node * node_ptr );
void mem_log_type_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_reset_info( struct nodeLinkClass::node * node_ptr );
void mem_log_power_info( struct nodeLinkClass::node * node_ptr );
void mem_log_thread_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_dor ( struct nodeLinkClass::node * node_ptr );
void mem_log_identity ( struct nodeLinkClass::node * node_ptr );
void mem_log_network ( struct nodeLinkClass::node * node_ptr );
void mem_log_state1 ( struct nodeLinkClass::node * node_ptr );
void mem_log_state2 ( struct nodeLinkClass::node * node_ptr );
void mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr );
void mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr );
void mem_log_mtcalive_state ( struct nodeLinkClass::node * node_ptr );
void mem_log_mtcalive_data ( struct nodeLinkClass::node * node_ptr );
void mem_log_mtcalive_pxeboot ( struct nodeLinkClass::node * node_ptr );
void mem_log_stage ( struct nodeLinkClass::node * node_ptr );
void mem_log_test_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_bm ( struct nodeLinkClass::node * node_ptr );
void mem_log_ping ( struct nodeLinkClass::node * node_ptr );
void mem_log_heartbeat ( struct nodeLinkClass::node * node_ptr );
void mem_log_hbs_cnts ( struct nodeLinkClass::node * node_ptr );
void mem_log_type_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_reset_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_power_info ( struct nodeLinkClass::node * node_ptr );
void mem_log_thread_info ( struct nodeLinkClass::node * node_ptr );
void print_node_info ( struct nodeLinkClass::node * node_ptr );
void print_node_info ( struct nodeLinkClass::node * node_ptr );
// #endif
@ -1349,9 +1380,12 @@ public:
system_type_enum system_type ;
string functions ; /**< comma delimited string list of functions supported */
bool maintenance ;
bool heartbeat ;
string sw_version; /* fetched from /etc/build.info using daemon_sw_version */
string functions ; /* comma delimited string list of functions supported */
bool maintenance ; /* the mtcAgent */
bool heartbeat ; /* the hbsAgent */
/* Set to true if this controller is active.
* Currently only used by heartbeat service. */
@ -1403,10 +1437,12 @@ public:
{ active = state ; }
/** Store the hostname of this controller */
string my_hostname ; /**< */
string my_local_ip ; /**< Primary IP address */
string my_float_ip ; /**< Secondary (floating) IP address */
string my_clstr_ip ; /**< Cluster network IP address */
string my_hostname ; /** My Hostname */
string my_local_ip ; /** Primary IP address */
string my_float_ip ; /** Secondary (floating) IP address */
string my_clstr_ip ; /** Cluster network IP address */
string my_pxeboot_ip ; /** Pxeboot network IP address */
string my_pxeboot_if ; /** Pxeboot interface name */
/********* New Public Constructs for IPMI Comamnd Handling ***********/
@ -1448,12 +1484,18 @@ public:
/** get cluster-host network ip address for any hostname */
string get_clstr_hostaddr ( string & hostname );
/** get the pxeboot network address for any hostname */
string get_pxeboot_hostaddr ( string hostname );
/** set a node's ip address */
int set_hostaddr ( string & hostname, string & ip );
/** set a node's cluster-host ip address */
int set_clstr_hostaddr ( string & hostname, string & ip );
/* set the pxeboot network address for any hostname */
int set_pxeboot_hostaddr ( string hostname, string ip );
/** get hostname for any hostname */
string get_hostname ( string hostaddr );
@ -1684,6 +1726,12 @@ public:
* network is provisioned and configured for this daemon to use */
bool clstr_network_provisioned ;
/** A boolean that is used to quickly determine if the pxeboot network
* is provisioned.
* The pxeboot network is considered unprovisioned while the management
* interface is on the 'lo' (localhost) interface. */
bool pxeboot_network_provisioned ;
/** A debug bool hat allows cluster-host heartbeat failures to only
* cause host degrade rather than failure */
bool clstr_degrade_only ;
@ -1758,6 +1806,7 @@ public:
struct mtc_timer mtcTimer_mnfa ;
struct mtc_timer mtcTimer_token ;
struct mtc_timer mtcTimer_uptime ;
struct mtc_timer mtcTimer_loop ; // main loop timer
/* System Level DOR recovery timer
* Note: tid != NULL represents DOR Mode Active */
@ -1775,10 +1824,15 @@ public:
/** Returns true when a 'maintenance alive' message for that
* hostnamed node is received */
void set_mtcAlive ( string & hostname, int iface );
void set_mtcAlive ( string & hostname, unsigned int sequence, int iface );
bool get_mtcAlive_gate ( string & hostname );
void ctl_mtcAlive_gate ( string & hostname, bool gated );
/* Updates my_pxeboot_ip if my_mac is specified.
* Otherwise, tries to update the pxeboot ip and
* hostname for each provisioned node in the system. */
void pxebootInfo_loader ( string my_mac = "" );
/** Store the latest mtce flags for the specified host
* current flags are defined in nodebase.h
#define MTC_FLAG__I_AM_CONFIGURED (0x00000001)

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019 Wind River Systems, Inc.
* Copyright (c) 2019, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -23,14 +23,15 @@ using namespace std;
#endif
#define __AREA__ "mon"
#ifndef INTERFACES_DIR
#define INTERFACES_DIR ((const char *)"/sys/class/net/")
#endif
#define PLATFORM_DIR ((const char *)"/etc/platform/platform.conf")
#define LMON_DIR ((const char *)"/etc/lmon/lmon.conf")
#define INTERFACES_MAX (4) /* maximum number of interfaces to monitor */
enum interface_type { ethernet = 0, vlan = 1, bond = 2 };
string iface_type ( interface_type type_enum );
string iface_type ( iface_type_enum type_enum );
/* daemon only supports the GET request */
#define HTTP_SUPPORTED_METHODS (EVHTTP_REQ_GET)
@ -68,7 +69,7 @@ typedef struct
/* true if the interface is configured.
* i.e. the name label shown above is found in platform.conf */
bool used ;
interface_type type_enum ;
iface_type_enum type_enum ;
/* true if the link is up ; false otherwise */
bool interface_one_link_up ;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019 Wind River Systems, Inc.
* Copyright (c) 2019, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -35,7 +35,7 @@
*
****************************************************************************/
string iface_type ( interface_type type_enum )
string iface_type ( iface_type_enum type_enum )
{
switch(type_enum)
{
@ -187,7 +187,7 @@ int lmon_get_link_state ( int ioctl_socket,
* Name : lmon_interfaces_init
*
* Purpose : Map an interface (mgmt, oam or cluster-host) to a physical port.
* See interface_type enum in lmon.h
* See iface_type_enum enum in nodeUtil.h
*
*****************************************************************************/

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2017, 2023 Wind River Systems, Inc.
* Copyright (c) 2013-2017, 2023-2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -360,10 +360,40 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->reboot_cmd_ack_mgmnt = false ;
node_ptr->reboot_cmd_ack_clstr = false ;
node_ptr->reboot_cmd_ack_pxeboot = false ;
/* send reboot command */
node_ptr->cmdReq = MTC_CMD_REBOOT ;
node_ptr->cmdRsp = MTC_CMD_NONE ;
// Send the reboot command on all provisioned networks
if ( this->pxeboot_network_provisioned == true )
{
if (( rc = send_mtc_cmd ( node_ptr->hostname,
MTC_CMD_REBOOT,
PXEBOOT_INTERFACE )) != PASS )
{
// Don't report a warning log if the far end pxeboot
// network address is not learned yet.
if ( rc != FAIL_HOSTADDR_LOOKUP )
{
wlog ("%s reboot request failed (%s) (rc:%d)\n",
node_ptr->hostname.c_str(),
get_iface_name_str(PXEBOOT_INTERFACE), rc);
}
else
{
ilog ("%s %s network address not learned yet ; can't reboot",
node_ptr->hostname.c_str(),
get_iface_name_str(PXEBOOT_INTERFACE));
}
}
else
{
send_reboot_ok = true ;
}
}
if (( rc = send_mtc_cmd ( node_ptr->hostname,
MTC_CMD_REBOOT,
MGMNT_INTERFACE )) != PASS )
@ -383,9 +413,20 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
MTC_CMD_REBOOT,
CLSTR_INTERFACE )) != PASS )
{
wlog ("%s reboot request failed (%s) (rc:%d)\n",
node_ptr->hostname.c_str(),
get_iface_name_str(CLSTR_INTERFACE), rc);
// Don't report a warning log if the far end cluster
// network IP is not learned yet.
if ( rc != FAIL_HOSTADDR_LOOKUP )
{
wlog ("%s reboot request failed (%s) (rc:%d)",
node_ptr->hostname.c_str(),
get_iface_name_str(CLSTR_INTERFACE), rc);
}
else
{
ilog ("%s %s network address not learned yet ; can't reboot",
node_ptr->hostname.c_str(),
get_iface_name_str(CLSTR_INTERFACE));
}
}
else
{
@ -446,6 +487,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
* messages from the remote host during the reset delay window */
node_ptr->mtcAlive_mgmnt_count = 0 ;
node_ptr->mtcAlive_clstr_count = 0 ;
node_ptr->mtcAlive_pxeboot_count = 0 ;
wlog ("%s ... bmc reset in %d secs", node_ptr->hostname.c_str(), reset_delay);
mtcTimer_start ( node_ptr->mtcCmd_timer, mtcTimer_handler, reset_delay );
@ -472,11 +514,25 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
}
else
{
// log the acks
string nwk_ack = "" ;
if ( node_ptr->reboot_cmd_ack_pxeboot )
nwk_ack.append(get_iface_name_str(PXEBOOT_INTERFACE));
if ( node_ptr->reboot_cmd_ack_mgmnt )
{
if ( !nwk_ack.empty() )
nwk_ack.append(",");
nwk_ack.append(get_iface_name_str(MGMNT_INTERFACE));
}
if ( node_ptr->reboot_cmd_ack_clstr )
{
if ( !nwk_ack.empty() )
nwk_ack.append(",");
nwk_ack.append(get_iface_name_str(CLSTR_INTERFACE));
}
/* declare successful reboot */
plog ("%s reboot request succeeded (%s %s)",
node_ptr->hostname.c_str(),
node_ptr->reboot_cmd_ack_mgmnt ? get_iface_name_str(MGMNT_INTERFACE) : "",
node_ptr->reboot_cmd_ack_clstr ? get_iface_name_str(CLSTR_INTERFACE) : "");
plog ("%s reboot request succeeded (%s)", node_ptr->hostname.c_str(), nwk_ack.c_str());
if ( node_ptr->cmd.task == true )
{
@ -499,6 +555,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
* messages from the remote host during the reset delay window */
node_ptr->mtcAlive_mgmnt_count = 0 ;
node_ptr->mtcAlive_clstr_count = 0 ;
node_ptr->mtcAlive_pxeboot_count = 0 ;
wlog ("%s max reboot retries reached ; still not offline ; reset in %3d secs",
node_ptr->hostname.c_str(), reset_delay);
@ -566,7 +623,8 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
* or the failure of just one (mgmnt or clstr) networks to mistakenly
* cancel the reset. Prevent the cancel if
* - the node uptime is high and
* - not receiving mtcAlive both mgmnt and clstr networks.
* - not receiving mtcAlive on any mtcAlive networks ;
* mgmnt, clstr and pxeboot networks.
*
* Note: online does not mean both networks are receiving mtcAlive,
* Currently just mgmnt needs to see mtcAlive for the node to
@ -578,15 +636,17 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
if (( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) &&
( node_ptr->uptime < MTC_MINS_5 ) &&
( node_ptr->mtcAlive_mgmnt_count ) &&
( node_ptr->mtcAlive_clstr_count ))
( node_ptr->mtcAlive_clstr_count ) &&
( node_ptr->mtcAlive_pxeboot_count ))
{
mtcTimer_reset ( node_ptr->mtcCmd_timer );
ilog ("%s cancelling reset ; host is online ; delay:%d uptime:%d mtcAlive:%d:%d ",
ilog ("%s cancelling reset ; host is online ; delay:%d uptime:%d mtcAlive:%d:%d:%d ",
node_ptr->hostname.c_str(),
bmc_reset_delay,
node_ptr->uptime,
node_ptr->mtcAlive_mgmnt_count,
node_ptr->mtcAlive_clstr_count);
node_ptr->mtcAlive_clstr_count,
node_ptr->mtcAlive_pxeboot_count);
node_ptr->mtcCmd_work_fifo_ptr->status = PASS ;
node_ptr->mtcCmd_work_fifo_ptr->stage = MTC_CMD_STAGE__DONE ;
}
@ -602,13 +662,14 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
#define BMC_RESET_PENDING_LOG_THROTTLE (1000)
wlog_throttled ( node_ptr->bmc_reset_pending_log_throttle,
BMC_RESET_PENDING_LOG_THROTTLE,
"%s reset in %3ld secs ; delay:%d uptime:%d mtcAlive:%d:%d",
"%s reset in %3ld secs ; delay:%d uptime:%d mtcAlive:%d:%d:%d",
node_ptr->hostname.c_str(),
reset_delay-diff_time.secs,
bmc_reset_delay,
node_ptr->uptime,
node_ptr->mtcAlive_mgmnt_count,
node_ptr->mtcAlive_clstr_count);
node_ptr->mtcAlive_clstr_count,
node_ptr->mtcAlive_pxeboot_count);
}
}
break ; /* waiting path */
@ -813,6 +874,8 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
/* update the timer hostname */
node_ptr->mtcTimer.hostname = name ;
node_ptr->mtcAlive_timer.hostname = name ;
node_ptr->online_timer.hostname = name ;
node_ptr->offline_timer.hostname = name ;
node_ptr->mtcSwact_timer.hostname = name ;
node_ptr->mtcCmd_timer.hostname = name ;
node_ptr->oosTestTimer.hostname = name ;

View File

@ -50,12 +50,6 @@ extern "C"
#include "amon.h" /* for ... active monitoring utilities */
}
extern char *program_invocation_short_name;
int mtcAlive_mgmnt_sequence = 0 ;
int mtcAlive_clstr_sequence = 0 ;
/************************************************************************
*
* Name : stop pmon
@ -107,18 +101,18 @@ void stop_pmon( void )
/* Receive and process commands from controller maintenance */
int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
int bytes = 0 ;
mtc_message_type msg ;
int rc = FAIL ;
ssize_t bytes_received = 0 ;
ctrl_type * ctrl_ptr = get_ctrl_ptr() ;
bool log_ack = true ;
const char * iface_name_ptr = get_interface_name_str(interface) ;
if ( interface == CLSTR_INTERFACE )
{
if ( ! ctrl_ptr->clstr_iface_provisioned )
{
wlog ("cannot receive from unprovisioned %s interface\n",
get_iface_name_str(interface) );
wlog ("cannot receive from unprovisioned %s interface", iface_name_ptr);
return (rc);
}
}
@ -126,17 +120,57 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
/* clean the rx/tx buffer */
memset ((void*)&msg,0,sizeof(mtc_message_type));
string hostaddr = "" ;
if ( interface == MGMNT_INTERFACE )
if ( interface == PXEBOOT_INTERFACE )
{
if (( sock_ptr->mtc_client_rx_socket ) &&
( sock_ptr->mtc_client_rx_socket->sock_ok() == true ))
if ( sock_ptr->pxeboot_rx_socket )
{
rc = sock_ptr->mtc_client_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
hostaddr = sock_ptr->mtc_client_rx_socket->get_src_str();
struct sockaddr_in client_addr;
socklen_t addr_len = sizeof(client_addr);
// Receive data
bytes_received = recvfrom(sock_ptr->pxeboot_rx_socket,
(char*)&msg.hdr[0],
sizeof(mtc_message_type), 0,
(struct sockaddr*)&client_addr, &addr_len);
// Terminate the buffer
msg.hdr[bytes_received] = '\0' ;
// Log with debug_msg lane 2
if ( daemon_get_cfg_ptr()->debug_msg&2 )
{
// log the message ; both header and buffer
string _buf = msg.buf[0] ? msg.buf : "empty";
ilog ("Received %ld bytes (%s) from %s:%d - %s:%s",
bytes_received,
iface_name_ptr,
inet_ntoa(client_addr.sin_addr),
ntohs(client_addr.sin_port),
&msg.hdr[0], _buf.c_str());
// dump_memory (&msg.hdr[0], 16, bytes_received);
}
hostaddr = inet_ntoa(client_addr.sin_addr);
}
}
else if ( interface == MGMNT_INTERFACE )
{
if (( sock_ptr->mtc_client_mgmt_rx_socket ) &&
( sock_ptr->mtc_client_mgmt_rx_socket->sock_ok() == true ))
{
rc = bytes_received = sock_ptr->mtc_client_mgmt_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
hostaddr = sock_ptr->mtc_client_mgmt_rx_socket->get_src_str();
// Log with debug_msg lane 2
if ( daemon_get_cfg_ptr()->debug_msg&2 )
{
// Log the message ; both header and buffer
string _buf = msg.buf[0] ? msg.buf : "empty";
ilog ("Received %ld bytes (%s) from %s - %s:%s", bytes_received,
iface_name_ptr, hostaddr.c_str(), &msg.hdr[0], _buf.c_str());
}
}
else
{
elog ("cannot read from null or failed 'mtc_client_rx_socket'\n");
elog ("cannot read from null or failed 'mtc_client_mgmt_rx_socket'\n");
return (FAIL_TO_RECEIVE);
}
}
@ -145,8 +179,18 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if (( sock_ptr->mtc_client_clstr_rx_socket ) &&
( sock_ptr->mtc_client_clstr_rx_socket->sock_ok() == true ))
{
rc = sock_ptr->mtc_client_clstr_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
rc = bytes_received = sock_ptr->mtc_client_clstr_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type));
hostaddr = sock_ptr->mtc_client_clstr_rx_socket->get_src_str();
// Log with debug_msg lane 2
if ( daemon_get_cfg_ptr()->debug_msg&2 )
{
// Log the message ; both header and buffer
string _buf = msg.buf[0] ? msg.buf : "empty";
ilog ("Received %ld bytes (%s) from %s: %s:%s",
bytes_received, iface_name_ptr,
hostaddr.c_str(), &msg.hdr[0], _buf.c_str());
}
}
else
{
@ -174,11 +218,9 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
self = true ;
}
string interface_name = get_iface_name_str (interface) ;
string interface_name = get_interface_name_str (interface) ;
string command_name = get_mtcNodeCommand_str(msg.cmd) ;
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), false );
/* Message version greater than zero have the hosts management
* mac address appended to the header string */
if (( !self ) && ( msg.ver >= MTC_CMD_FEATURE_VER__MACADDR_IN_CMD ))
@ -186,18 +228,18 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
/* the minus 1 is to back up from the null char that is accounted for in the hearder size */
if ( strncmp ( &msg.hdr[MSG_HEADER_SIZE-1], ctrl_ptr->macaddr.data(), MSG_HEADER_SIZE ))
{
wlog ("%s command not for this host (exp:%s det:%s) ; ignoring ...\n",
wlog ("%s req command from %s network not for this host (exp:%s det:%s) ; ignoring ...\n",
command_name.c_str(),
iface_name_ptr,
ctrl_ptr->macaddr.c_str(),
&msg.hdr[MSG_HEADER_SIZE-1]);
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), true );
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, iface_name_ptr, true );
return (FAIL_INVALID_DATA);
}
}
print_mtc_message ( hostaddr, MTC_CMD_RX, msg, get_iface_name_str(interface), rc );
if ( rc )
return rc;
if ( ! hostaddr.empty() )
print_mtc_message ( hostaddr, MTC_CMD_RX, msg, iface_name_ptr, false );
/* Check for response messages */
if ( strstr ( &msg.hdr[0], get_cmd_req_msg_header() ) )
@ -205,20 +247,25 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
rc = PASS ;
if ( msg.cmd == MTC_REQ_MTCALIVE )
{
mlog1 ("mtcAlive request received (%s network)\n", interface_name.c_str());
ilog ("mtcAlive request received from %s network", iface_name_ptr);
if ( interface == PXEBOOT_INTERFACE )
{
alog2 ("pxeboot mtcAlive buffer: %s", &msg.buf[0]);
load_pxebootInfo_msg(msg);
}
return ( send_mtcAlive_msg ( sock_ptr, get_who_i_am(), interface ));
}
else if ( msg.cmd == MTC_MSG_INFO )
{
mlog1("mtc 'info' message received (%s network)\n", interface_name.c_str());
alog2 ("mtc 'info' message received from %s network", iface_name_ptr);
load_mtcInfo_msg ( msg );
return ( PASS ); /* no ack for this message */
}
else if ( msg.cmd == MTC_CMD_SYNC )
{
ilog ("mtc '%s' message received (%s network)\n",
ilog ("mtc '%s' message received from %s network",
get_mtcNodeCommand_str(msg.cmd),
interface_name.c_str());
iface_name_ptr);
ilog ("Sync Start");
sync ();
@ -233,7 +280,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
/* Only recreate the file if its not already present */
if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == false )
{
ilog ("%s locked (%s)", get_hostname().c_str(), interface_name.c_str() );
ilog ("%s locked (%s)", get_hostname().c_str(), iface_name_ptr);
daemon_log ( NODE_LOCKED_FILE, ADMIN_LOCKED_STR);
}
@ -254,7 +301,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if ( msg.cmd == MTC_MSG_UNLOCKED )
{
ilog ("%s unlocked (%s)", get_hostname().c_str(), interface_name.c_str() );
ilog ("%s unlocked received from %s network", get_hostname().c_str(), iface_name_ptr);
/* Only remove the file if it is present */
if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == true )
@ -264,7 +311,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if ( daemon_is_file_present ( NODE_LOCKED_FILE_BACKUP ) == true )
{
daemon_remove_file ( NODE_LOCKED_FILE_BACKUP );
ilog ("cleared node locked backup flag (%s)", interface_name.c_str() );
ilog ("cleared node locked backup flag (%s)", iface_name_ptr);
}
}
else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED_FAILED )
@ -297,7 +344,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else
{
ilog ("GoEnabled request posted (%s)\n", interface_name.c_str());
ilog ("GoEnabled request posted (%s)", iface_name_ptr);
ctrl_ptr->posted_script_set.push_back ( GOENABLED_MAIN_SCRIPTS );
ctrl_ptr->posted_script_set.unique();
}
@ -324,7 +371,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else
{
ilog ("GoEnabled Subf request posted (%s)\n", interface_name.c_str());
ilog ("GoEnabled Subf request posted (%s)", iface_name_ptr);
/* Cleanup test result flag files */
if ( daemon_is_file_present ( GOENABLED_SUBF_PASS) )
@ -345,15 +392,15 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if ( msg.cmd == MTC_CMD_REBOOT )
{
ilog ("%s command received (%s)",
ilog ("%s command received from %s network",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
}
else if ( msg.cmd == MTC_CMD_LAZY_REBOOT )
{
ilog ("%s command received (%s) ; delay:%d seconds\n",
ilog ("%s command received from %s network ; delay:%d seconds",
command_name.c_str(),
interface_name.c_str(),
iface_name_ptr,
msg.num ? msg.parm[0] : 0 );
}
else if ( is_host_services_cmd ( msg.cmd ) == true )
@ -378,9 +425,9 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
ctrl_ptr->posted_script_set.push_back ( HOSTSERVICES_SCRIPTS );
ctrl_ptr->posted_script_set.unique ();
ilog ("%s request posted (%s)\n",
ilog ("%s request posted from %s network",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
ctrl_ptr->hostservices.posted = msg.cmd ;
ctrl_ptr->hostservices.monitor = MTC_CMD_NONE ;
@ -391,17 +438,17 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if ( ( daemon_is_file_present ( MTC_CMD_FIT__START_SVCS )))
{
rc = FAIL_FIT ;
wlog ("%s Start Services - fit failure (%s)\n",
wlog ("%s Start Services - fit failure (%s)",
command_name.c_str(),
interface_name.c_str() );
iface_name_ptr);
}
/* Fault insertion - fail to send host services ACK */
if ( ( daemon_is_file_present ( MTC_CMD_FIT__NO_HS_ACK )))
{
wlog ("%s Start Services - fit no ACK (%s)\n",
wlog ("%s Start Services - fit no ACK (%s)",
command_name.c_str(),
interface_name.c_str() );
iface_name_ptr);
return (PASS);
}
@ -421,20 +468,21 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if ( msg.cmd == MTC_CMD_WIPEDISK )
{
ilog ("Reload command received (%s)\n", interface_name.c_str());
ilog ("Reload command received from %s network", iface_name_ptr);
}
else if ( msg.cmd == MTC_CMD_RESET )
{
ilog ("Reset command received (%s)\n", interface_name.c_str());
ilog ("Reset command received from %s network", iface_name_ptr);
}
else if ( msg.cmd == MTC_CMD_LOOPBACK )
{
ilog ("Loopback command received (%s)\n", interface_name.c_str());
ilog ("Loopback command received from %s network", iface_name_ptr);
}
else
{
rc = FAIL_BAD_CASE ;
elog ( "Unsupported maintenance command (%d)\n", msg.cmd );
wlog ( "Unsupported maintenance command (%d) with %ld bytes received from %s network",
msg.cmd, bytes_received, iface_name_ptr );
}
snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header());
@ -443,12 +491,12 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( msg.cmd == MTC_MSG_MAIN_GOENABLED )
{
ilog ("main function goEnabled results acknowledged (%s)\n", interface_name.c_str());
ilog ("main function goEnabled results acknowledged from %s network", iface_name_ptr);
return (PASS);
}
else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED )
{
ilog ("sub-function goEnabled results acknowledged (%s)\n", interface_name.c_str());
ilog ("sub-function goEnabled results acknowledged from %s network", iface_name_ptr);
return (PASS);
}
else
@ -460,14 +508,25 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
else if ( strstr ( &msg.hdr[0], get_worker_msg_header()) )
{
elog ("unsupported worker message\n");
print_mtc_message ( &msg );
if ( msg.cmd == MTC_MSG_MTCALIVE )
{
wlog ("unexpected mtcAlive message from %s from %s network",
hostaddr.c_str(), iface_name_ptr);
}
else
{
wlog ("unsupported worker message from %s from %s network",
hostaddr.c_str(), iface_name_ptr);
}
wlog ("WARNING: mtcClient is receiving mtcAgent bound mtcAlive messages");
// dump_memory (&msg, 16, bytes_received);
return PASS ;
}
else
{
elog ("unsupported message\n");
print_mtc_message ( &msg );
wlog ("unsupported message from %s from %s network", hostaddr.c_str(), iface_name_ptr);
// dump_memory (&msg, 16, bytes_received);
return PASS ;
}
@ -481,73 +540,109 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
rc = PASS ;
bytes = sizeof(mtc_message_type)-BUF_SIZE;
int bytes = sizeof(mtc_message_type)-BUF_SIZE;
if ( interface == PXEBOOT_INTERFACE )
{
int flags = 0 ; // no tx flags
if ( sock_ptr->pxeboot_tx_socket <= 0 )
{
elog("pxeboot_tx_socket not ok (%d)", sock_ptr->pxeboot_tx_socket);
return (FAIL_SOCKET_SENDTO);
}
if ( log_ack )
{
ilog ("sending %s ack to %s over %s network",
command_name.c_str(),
hostaddr.c_str(),
iface_name_ptr);
}
struct sockaddr_in hostAddr;
memset(&hostAddr, 0, sizeof(hostAddr));
print_mtc_message ( hostaddr.data(), MTC_CMD_TX, msg, iface_name_ptr, false );
hostAddr.sin_addr.s_addr = inet_addr(hostaddr.data());
hostAddr.sin_family = AF_INET;
hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port);
ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, &msg.hdr[0], bytes, flags,
(const struct sockaddr*)&hostAddr, sizeof(hostAddr));
if (bytes_sent <= 0)
{
elog ("failed to send %s ack to %s:%d on %s network (rc:%ld) (%d:%m)",
command_name.c_str(),
hostaddr.c_str(),
hostAddr.sin_port,
iface_name_ptr,
bytes_sent, errno);
}
}
/* send the message back either over the mgmnt or clstr interface */
if ( interface == MGMNT_INTERFACE )
else if ( interface == MGMNT_INTERFACE )
{
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes);
rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes);
if ( rc <= 0 )
{
elog ("%s reply send (mtc_client_tx_socket) failed (%s) (rc:%d)",
elog ("%s reply send (mtc_client_mgmt_tx_socket) failed (%s) (rc:%d)",
command_name.c_str(),
interface_name.c_str(), rc);
iface_name_ptr, rc);
}
else if ( log_ack )
{
ilog ("%s reply send (%s)",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
}
}
else
{
elog ("cannot send to null or failed socket (%s network)\n",
interface_name.c_str() );
elog ("cannot send to null or failed socket (%s)", iface_name_ptr);
}
}
else if ( interface == CLSTR_INTERFACE )
{
if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true ))
if (( sock_ptr->mtc_client_clstr_tx_socket_c0 ) &&
( sock_ptr->mtc_client_clstr_tx_socket_c0->sock_ok() == true ))
{
rc = sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes);
rc = sock_ptr->mtc_client_clstr_tx_socket_c0->write((char*)&msg.hdr[0], bytes);
if ( rc <= 0 )
{
elog ("%s reply send (mtc_client_tx_socket_c0_clstr) failed (%s) (rc:%d)",
elog ("%s reply send (mtc_client_clstr_tx_socket_c0) failed (%s) (rc:%d)",
command_name.c_str(),
interface_name.c_str(), rc);
iface_name_ptr, rc);
}
else if ( log_ack )
{
ilog ("%s reply send (%s)",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
}
}
if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true ))
if (( sock_ptr->mtc_client_clstr_tx_socket_c1 ) &&
( sock_ptr->mtc_client_clstr_tx_socket_c1->sock_ok() == true ))
{
rc = sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes);
rc = sock_ptr->mtc_client_clstr_tx_socket_c1->write((char*)&msg.hdr[0], bytes);
if ( rc <= 0 )
{
elog ("%s reply send (mtc_client_tx_socket_c1_clstr) failed (%s) (rc:%d)",
elog ("%s reply send (mtc_client_clstr_tx_socket_c1) failed (%s) (rc:%d)",
command_name.c_str(),
interface_name.c_str(), rc);
iface_name_ptr, rc);
}
else if ( log_ack )
{
ilog ("%s reply send (%s)",
command_name.c_str(),
interface_name.c_str());
iface_name_ptr);
}
}
}
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, interface_name.data(), (rc != bytes) );
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, iface_name_ptr, (rc != bytes) );
/* get the shutdown delay config alue */
int delay = daemon_get_cfg_ptr()->failsafe_shutdown_delay ;
@ -560,11 +655,11 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) )
{
ilog ("Reboot - fit bypass (%s)\n", interface_name.c_str());
ilog ("Reboot - fit bypass (%s)", iface_name_ptr);
return (PASS);
}
stop_pmon();
ilog ("Reboot (%s)\n", interface_name.c_str());
ilog ("Reboot (%s)", iface_name_ptr);
daemon_log ( NODE_RESET_FILE, "reboot command" );
fork_sysreq_reboot ( delay );
rc = system("/usr/bin/systemctl reboot");
@ -581,7 +676,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
do
{
ilog ("Lazy Reboot (%s) ; rebooting in %d seconds\n", interface_name.c_str(), msg.num ? msg.parm[0] : 1 );
ilog ("Lazy Reboot (%s) ; rebooting in %d seconds", iface_name_ptr, msg.num ? msg.parm[0] : 1 );
sleep (1);
if ( msg.parm[0] % 5 )
{
@ -592,7 +687,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else
{
ilog ("Lazy Reboot (%s) ; now\n", interface_name.c_str() );
ilog ("Lazy Reboot (%s) ; now", iface_name_ptr);
}
fork_sysreq_reboot ( delay );
@ -602,11 +697,11 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_RESET ) )
{
ilog ("Reset - fit bypass (%s)\n", interface_name.c_str());
ilog ("Reset - fit bypass (%s)", iface_name_ptr);
return (PASS);
}
stop_pmon();
ilog ("Reset 'reboot -f' (%s)\n", interface_name.c_str());
ilog ("Reset 'reboot -f' (%s)", iface_name_ptr);
daemon_log ( NODE_RESET_FILE, "reset command" );
fork_sysreq_reboot ( delay/2 );
rc = system("/usr/bin/systemctl reboot --force");
@ -617,7 +712,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_WIPEDISK ) )
{
ilog ("Wipedisk - fit bypass (%s)\n", interface_name.c_str());
ilog ("Wipedisk - fit bypass (%s)", iface_name_ptr);
return (PASS);
}
/* We fork a reboot as a fail safe.
@ -636,7 +731,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if( 0 == parent ) /* we're the child */
{
ilog ("Disk wipe in progress (%s)\n", interface_name.c_str());
ilog ("Disk wipe in progress (%s)", iface_name_ptr);
daemon_log ( NODE_RESET_FILE, "wipedisk command" );
rc = system("/usr/local/bin/wipedisk --force");
ilog ("Disk wipe complete - Forcing Reboot ...\n");
@ -727,35 +822,35 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char
event.cmd = cmd ;
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
if ( bytes == 0 )
{
slog ("message send failed ; message size=0 for cmd:0x%x is 0\n", event.cmd );
rc = FAIL_NO_DATA ;
}
else if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes )
else if ((rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes )
{
elog ("message send failed. (%d) (%d:%s) \n", rc, errno, strerror(errno));
elog ("message: %d bytes to <%s:%d>\n", bytes,
sock_ptr->mtc_client_tx_socket->get_dst_str(),
sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort());
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(),
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort());
rc = FAIL_TO_TRANSMIT ;
}
else
{
mlog2 ("Transmit: %x bytes to %s:%d\n", bytes,
sock_ptr->mtc_client_tx_socket->get_dst_str(),
sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort());
print_mtc_message ( get_hostname(), MTC_CMD_TX, event, get_iface_name_str(MGMNT_INTERFACE), false );
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(),
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort());
print_mtc_message ( get_hostname(), MTC_CMD_TX, event, get_interface_name_str(MGMNT_INTERFACE), false );
rc = PASS ;
}
}
else
{
elog ("cannot send to null or failed socket (%s network)\n",
get_iface_name_str (MGMNT_INTERFACE) );
elog ("cannot send to null or failed socket (%s)",
get_interface_name_str (MGMNT_INTERFACE) );
rc = FAIL_SOCKET_SENDTO ;
}
return rc ;
@ -765,10 +860,23 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char
*
* Name : create_mtcAlive_msg
*
* Description: Creates a common mtcAlive message
* Description: Creates a common mtcAlive message that consists of the
* - out-of-band health/status flags
* - host uptime
* - json string of some of the host's info
* {
* "hostname":"controller-0",
* "personality":"controller,worker",
* "pxeboot_ip":"169.254.202.2",
* "mgmt_ip":"192.168.204.2",
* "cluster_host_ip":"192.168.206.2",
* "mgmt_mac":"08:00:27:9f:ef:57",
* "interface":"Mgmnt",
* "sequence":145
* }
*
****************************************************************************/
int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int interface )
int create_mtcAlive_msg ( ctrl_type * ctrl_ptr, mtc_message_type & msg, int cmd, string identity, int interface )
{
static int _sm_unhealthy_debounce_counter [MAX_IFACES] = {0,0} ;
@ -843,7 +951,7 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int
if ( ++_sm_unhealthy_debounce_counter[interface] > MAX_SM_UNHEALTHY_DEBOUNCE )
{
wlog("SM Unhealthy flag set (%s)",
get_iface_name_str(interface));
get_interface_name_str(interface));
msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SM_UNHEALTHY ;
}
else
@ -851,7 +959,7 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int
wlog("SM Unhealthy debounce %d of %d (%s)",
_sm_unhealthy_debounce_counter[interface],
MAX_SM_UNHEALTHY_DEBOUNCE,
get_iface_name_str(interface));
get_interface_name_str(interface));
}
}
else
@ -859,19 +967,32 @@ int create_mtcAlive_msg ( mtc_message_type & msg, int cmd, string identity, int
_sm_unhealthy_debounce_counter[interface] = 0 ;
}
/* add the interface and sequence number to the mtcAlice message */
/* add the interface and sequence number to the mtcAlive message */
identity.append ( ",\"interface\":\"");
identity.append (get_iface_name_str(interface));
identity.append (get_interface_name_str(interface));
identity.append("\",\"sequence\":");
if ( interface == CLSTR_INTERFACE )
if ( interface == PXEBOOT_INTERFACE )
{
identity.append(itos(mtcAlive_clstr_sequence++));
ctrl_ptr->mtcAlive_pxeboot_sequence++ ;
identity.append(itos(ctrl_ptr->mtcAlive_pxeboot_sequence));
msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_pxeboot_sequence ;
}
else if ( interface == MGMNT_INTERFACE )
{
ctrl_ptr->mtcAlive_mgmnt_sequence++ ;
identity.append(itos(ctrl_ptr->mtcAlive_mgmnt_sequence));
msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_mgmnt_sequence ;
}
else if ( interface == CLSTR_INTERFACE )
{
ctrl_ptr->mtcAlive_clstr_sequence++ ;
identity.append(itos(ctrl_ptr->mtcAlive_clstr_sequence));
msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_clstr_sequence ;
}
else
{
identity.append(itos(mtcAlive_mgmnt_sequence++));
}
identity.append(itos(0));
identity.append("}");
memcpy ( &msg.buf[0], identity.c_str(), identity.size() );
@ -896,40 +1017,40 @@ int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity )
{
int interface = MGMNT_INTERFACE ;
mtc_message_type msg ;
int bytes = create_mtcAlive_msg ( msg, cmd, identity, interface );
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
int bytes = create_mtcAlive_msg ( get_ctrl_ptr(), msg, cmd, identity, interface );
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
/* Send back to requester - TODO: consider sending back to both as multicast */
if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes)) != bytes )
if ((rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes)) != bytes )
{
if ( rc == -1 )
{
wlog_throttled (send_mtc_msg_failed, 100 ,
"failed to send <%s:%d> (%d:%m)\n",
sock_ptr->mtc_client_tx_socket->get_dst_str(),
sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort(), errno );
"failed to send <%s:%d> (%d:%m)",
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(),
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort(), errno );
}
else
{
wlog_throttled ( send_mtc_msg_failed, 100 ,
"sent only %d of %d bytes to <%s:%d>\n",
rc, bytes,
sock_ptr->mtc_client_tx_socket->get_dst_str(),
sock_ptr->mtc_client_tx_socket->get_dst_addr()->getPort());
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(),
sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort());
}
}
else
{
send_mtc_msg_failed = 0 ;
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false );
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_interface_name_str(interface), false );
rc = PASS ;
}
}
else
{
elog ("cannot send to null or failed socket (%s network)\n",
get_iface_name_str (MGMNT_INTERFACE) );
elog ("cannot send to null or failed socket (%s)",
get_interface_name_str (MGMNT_INTERFACE) );
}
}
else
@ -943,57 +1064,134 @@ int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity )
int send_mtcAlive_msg_failed = 0 ;
int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interface )
{
int flags = 0 ; // no tx flags
/* get a pointer to the process control structure */
ctrl_type * ctrl_ptr = get_ctrl_ptr() ;
if (( interface == PXEBOOT_INTERFACE ) &&
( ctrl_ptr->pxeboot_iface_provisioned == false ))
return (PASS) ;
if (( interface == CLSTR_INTERFACE ) &&
( get_ctrl_ptr()->clstr_iface_provisioned != true ))
( ctrl_ptr->clstr_iface_provisioned != true ))
{
dlog2 ("cannot send to unprovisioned %s interface\n",
get_iface_name_str(interface) );
dlog2 ("cannot send to unprovisioned %s interface",
get_interface_name_str(interface) );
return (FAIL);
}
mtc_message_type msg ;
int bytes = create_mtcAlive_msg ( msg, MTC_MSG_MTCALIVE, identity, interface );
int bytes = create_mtcAlive_msg ( ctrl_ptr, msg, MTC_MSG_MTCALIVE, identity, interface );
if ( interface == MGMNT_INTERFACE )
if ( interface == PXEBOOT_INTERFACE )
{
/* Send to controller-0 pxeboot address */
if ( sock_ptr->pxeboot_tx_socket <= 0 )
{
elog("pxeboot_tx_socket not ok (%d)", sock_ptr->pxeboot_tx_socket);
return (FAIL_SOCKET_SENDTO);
}
// TODO: Consider adding controllers info to ctrl struct
string controllers[CONTROLLERS] = {CONTROLLER_0, CONTROLLER_1};
alog1 ("sending mtcAlive to both controllers");
for (int c = 0 ; c < CONTROLLERS ; c++)
{
string pxeboot_addr_cx ;
struct sockaddr_in hostAddr;
memset(&hostAddr, 0, sizeof(hostAddr));
if (controllers[c] == CONTROLLER_1)
{
if ( ctrl_ptr->system_type != SYSTEM_TYPE__AIO__SIMPLEX )
pxeboot_addr_cx = ctrl_ptr->pxeboot_addr_c1;
else
continue; // skip controller-1 for SX systems
}
else
pxeboot_addr_cx = ctrl_ptr->pxeboot_addr_c0;
if ( pxeboot_addr_cx.empty() )
{
if ( ctrl_ptr->pxeboot_address_learned[c] == true )
{
ctrl_ptr->pxeboot_address_learned[c] = false ;
wlog ( "%s pxeboot address not learned ; unable to send pxeboot mtcAlive",
controllers[c].c_str() );
}
continue ;
}
if ( ctrl_ptr->pxeboot_address_learned[c] == false )
{
// Only log this if the not learned log was produced.
// Which is most likely case on process startup.
ilog ("sending pxeboot network mtcAlive msg on port %d to %s at %s",
sock_ptr->mtc_tx_pxeboot_port,
controllers[c].c_str(),
pxeboot_addr_cx.c_str());
ctrl_ptr->pxeboot_address_learned[c] = true ;
}
print_mtc_message ( controllers[c], MTC_CMD_TX, msg, get_interface_name_str(PXEBOOT_INTERFACE), false );
hostAddr.sin_addr.s_addr = inet_addr(pxeboot_addr_cx.data());
hostAddr.sin_family = AF_INET;
hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port); // 2102
alog1 ("sending pxeboot network mtcAlive msg to %s", controllers[c].c_str() );
ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, &msg.hdr[0], bytes, flags,
(const struct sockaddr*)&hostAddr, sizeof(hostAddr));
if (bytes_sent <= 0)
{
elog ("failed to send mtcAlive to %s using %s:%d (pxeboot) (rc:%ld) (%d:%m)",
controllers[c].c_str(), pxeboot_addr_cx.c_str(), hostAddr.sin_port, bytes_sent, errno);
}
} // for loop
}
else if ( interface == MGMNT_INTERFACE )
{
/* Send to controller floating address */
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
print_mtc_message ( CONTROLLER, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false );
sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes) ;
alog1 ("sending mgmt network mtcAlive msg to %s", CONTROLLER);
print_mtc_message ( CONTROLLER, MTC_CMD_TX, msg, get_interface_name_str(MGMNT_INTERFACE), false );
sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes) ;
}
else
{
elog("mtc_client_tx_socket not ok");
elog("mtc_client_mgmt_tx_socket not ok");
}
}
else if ( interface == CLSTR_INTERFACE )
{
/* Send to controller-0 cluster address */
if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true ))
if (( sock_ptr->mtc_client_clstr_tx_socket_c0 ) &&
( sock_ptr->mtc_client_clstr_tx_socket_c0->sock_ok() == true ))
{
print_mtc_message ( CONTROLLER_0, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes ) ;
alog1 ("sending clstr network mtcAlive msg to %s", CONTROLLER_0);
print_mtc_message ( CONTROLLER_0, MTC_CMD_TX, msg, get_interface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_clstr_tx_socket_c0->write((char*)&msg.hdr[0], bytes ) ;
}
else
{
elog("mtc_client_tx_socket_c0_clstr not ok");
elog("mtc_client_clstr_tx_socket_c0 not ok");
}
/* Send to controller-1 cluster address */
if ( get_ctrl_ptr()->system_type != SYSTEM_TYPE__AIO__SIMPLEX )
{
if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true ))
if (( sock_ptr->mtc_client_clstr_tx_socket_c1 ) &&
( sock_ptr->mtc_client_clstr_tx_socket_c1->sock_ok() == true ))
{
print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes ) ;
alog1 ("sending clstr mtcAlive msg to %s", CONTROLLER_1);
print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_interface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_clstr_tx_socket_c1->write((char*)&msg.hdr[0], bytes ) ;
}
else
{
elog("mtc_client_tx_socket_c1_clstr not ok");
elog("mtc_client_clstr_tx_socket_c1 not ok");
}
}
}
@ -1040,11 +1238,11 @@ int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, s
int rc = FAIL ;
/* Send to controller floating address */
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
if (( sock_ptr->mtc_client_mgmt_tx_socket ) &&
( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true ))
{
print_mtc_message ( hostname, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false );
rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes, address.data(), port ) ;
print_mtc_message ( hostname, MTC_CMD_TX, msg, get_interface_name_str(MGMNT_INTERFACE), false );
rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes, address.data(), port ) ;
if ( 0 >= rc )
{
elog("failed to send command to mtcClient (%d) (%d:%s)", rc, errno, strerror(errno));
@ -1055,7 +1253,7 @@ int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, s
}
else
{
elog("mtc_client_tx_socket not ok");
elog("mtc_client_mgmt_tx_socket not ok");
rc = FAIL_BAD_STATE ;
}
return (rc) ;

View File

@ -125,13 +125,53 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
mtc_message_type msg ;
int bytes = 0 ;
int rc = PASS ;
if ( iface == CLSTR_INTERFACE )
string hostaddr = "" ;
string hostname = "" ;
const char * iface_name_ptr = get_iface_name_str(iface);
if ( iface == PXEBOOT_INTERFACE )
{
struct sockaddr_in client_addr;
socklen_t addr_len = sizeof(client_addr);
// Receive data
bytes = recvfrom(sock_ptr->pxeboot_rx_socket,
(char*)&msg.hdr[0],
sizeof(mtc_message_type), 0,
(struct sockaddr*)&client_addr, &addr_len);
// As a non-blocking socket this is normal to occur
// due to batch handling.
if ( bytes == -1 )
return RETRY ;
// Log with debug_msg lane 2
if ( daemon_get_cfg_ptr()->debug_msg&2 )
{
// log the message ; both header and buffer
string _buf = msg.buf[0] ? msg.buf : "empty";
mlog3 ("Received %d bytes (%s) from %s:%d - cmd:%d:%s hdr:%s buf:%s",
bytes,
iface_name_ptr,
inet_ntoa(client_addr.sin_addr),
ntohs(client_addr.sin_port),
msg.cmd,
get_mtcNodeCommand_str(msg.cmd),
&msg.hdr[0], _buf.c_str());
}
hostaddr = inet_ntoa(client_addr.sin_addr);
hostname = obj_ptr->get_hostname ( hostaddr ) ; // based on pxeboot ip
}
else if ( iface == CLSTR_INTERFACE )
{
if ( ( obj_ptr ) &&
( obj_ptr->clstr_network_provisioned == true ) &&
( sock_ptr->mtc_agent_clstr_rx_socket ))
{
mlog3 ("clstr network 'recvfrom' start");
bytes = sock_ptr->mtc_agent_clstr_rx_socket->read((char*)&msg, sizeof(msg));
mlog3 ("clstr network 'recvfrom' stop");
}
else
{
@ -140,7 +180,9 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
}
else
{
bytes = sock_ptr->mtc_agent_rx_socket->read((char*)&msg, sizeof(msg));
mlog3 ("mgmt network 'recvfrom' start");
bytes = sock_ptr->mtc_agent_mgmt_rx_socket->read((char*)&msg, sizeof(msg));
mlog3 ("mgmt network 'recvfrom' stop");
}
msg.buf[BUF_SIZE-1] = '\0';
@ -160,17 +202,14 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
zero_unused_msg_buf (msg, bytes);
/* get the sender's hostname */
string hostaddr = "" ;
string hostname = "" ;
if ( iface == CLSTR_INTERFACE )
{
hostaddr = sock_ptr->mtc_agent_clstr_rx_socket->get_src_str();
hostname = obj_ptr->get_hostname ( hostaddr ) ;
}
else
else if ( iface == MGMNT_INTERFACE )
{
hostaddr = sock_ptr->mtc_agent_rx_socket->get_src_str();
hostaddr = sock_ptr->mtc_agent_mgmt_rx_socket->get_src_str();
hostname = obj_ptr->get_hostname ( hostaddr ) ;
}
@ -181,17 +220,26 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
if (( msg.cmd == MTC_MSG_MTCALIVE ) &&
(( rc = jsonUtil_get_key_val ( &msg.buf[0], "hostname", hostname )) == PASS ))
{
ilog ("%s learned from mtcAlive", hostname.c_str());
string curr_hostaddr = obj_ptr->get_pxeboot_hostaddr ( hostname );
if ( curr_hostaddr != hostaddr )
{
ilog ("%s hostname learned from %s mtcAlive ; hostaddr:%s was:%s",
hostname.c_str(),
iface_name_ptr,
hostaddr.c_str(),
curr_hostaddr.c_str());
obj_ptr->set_pxeboot_hostaddr ( hostname, hostaddr );
}
}
else
{
wlog ("unknown hostname message ... dropping" ); /* make dlog */
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), true );
print_mtc_message ( hostname, MTC_CMD_RX, msg, iface_name_ptr, true );
return (FAIL_GET_HOSTNAME);
}
}
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false );
print_mtc_message ( hostname, MTC_CMD_RX, msg, iface_name_ptr, false );
/* handle messages that are not mtc_message_type
* but rather are simply a json string */
@ -199,7 +247,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
{
string service ;
mlog1 ("%s\n", &msg.hdr[0] );
mlog3 ("%s\n", &msg.hdr[0] );
rc = jsonUtil_get_key_val(&msg.hdr[0],"service", service );
if ( rc == PASS )
@ -256,7 +304,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
hostname.c_str(),
get_mtcNodeCommand_str(msg.cmd),
msg.parm[0],
get_iface_name_str(iface));
iface_name_ptr);
}
else
{
@ -264,7 +312,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
hostname.c_str(),
get_mtcNodeCommand_str(msg.cmd),
msg.parm[0],
get_iface_name_str(iface));
iface_name_ptr);
}
}
}
@ -309,16 +357,16 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
obj_ptr->set_uptime ( hostname , msg.parm[MTC_PARM_UPTIME_IDX], false );
obj_ptr->set_health ( hostname , msg.parm[MTC_PARM_HEALTH_IDX] );
obj_ptr->set_mtce_flags ( hostname , msg.parm[MTC_PARM_FLAGS_IDX], iface );
obj_ptr->set_mtcAlive ( hostname, iface );
obj_ptr->set_mtcAlive ( hostname , msg.parm[MTC_PARM_SEQ_IDX], iface);
mlog1("%s Uptime:%d Health:%d Flags:0x%x mtcAlive:%s (%s)\n",
mlog2("%s Uptime:%d Health:%d Flags:0x%x Seq:%5d mtcAlive:%s (%s)\n",
hostname.c_str(),
msg.parm[MTC_PARM_UPTIME_IDX],
msg.parm[MTC_PARM_HEALTH_IDX],
msg.parm[MTC_PARM_FLAGS_IDX],
msg.parm[MTC_PARM_SEQ_IDX],
obj_ptr->get_mtcAlive_gate ( hostname ) ? "gated" : "open",
get_iface_name_str(iface));
iface_name_ptr);
}
else if ( msg.cmd == MTC_MSG_MAIN_GOENABLED )
{
@ -426,7 +474,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
if (( rc | rc1 ) != PASS )
{
elog ("received invalid event [rc:%d:%d]", rc, rc1);
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), true );
print_mtc_message ( hostname, MTC_CMD_RX, msg, iface_name_ptr, true );
return ( FAIL_INVALID_OPERATION );
}
switch ( msg.cmd )
@ -613,6 +661,8 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
mtc_message_type mtc_cmd ;
string data = "" ;
mtc_socket_type * sock_ptr = get_sockPtr ();
nodeLinkClass * obj_ptr = get_mtcInv_ptr ();
const char * iface_name_ptr = get_iface_name_str(interface);
memset (&mtc_cmd,0,sizeof(mtc_message_type));
/* Add the command version to he message */
@ -627,7 +677,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
data = "{\"mtcInfo\":" + json_dict + "}";
ilog("%s mtc info update", hostname.c_str());
ilog("%s mtc info update: %s", hostname.c_str(), data.c_str());
rc = PASS ;
break ;
}
@ -636,6 +686,30 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s" , get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
if ( interface == PXEBOOT_INTERFACE )
{
if ( !obj_ptr->pxeboot_network_provisioned ) return PASS;
/* There is no pxeboot floating IP so the mtcClient cannot use
* a resolvable name label like 'CONTROLLER' as it does for
* management nwk.
* Therefore, the mtcClient on each node needs to be told the
* controller's pxeboot ip addresses so it knows where to send. */
obj_ptr->pxebootInfo_loader();
data = "{\"pxebootInfo\":{" ;
data.append ("\"address\":\"");
data.append (obj_ptr->my_pxeboot_ip);
data.append ("\",\"");
data.append (CONTROLLER_0);
data.append ("\":\"");
data.append (obj_ptr->get_pxeboot_hostaddr(CONTROLLER_0));
data.append ("\",\"");
data.append (CONTROLLER_1);
data.append ("\":\"");
data.append (obj_ptr->get_pxeboot_hostaddr(CONTROLLER_1));
data.append ("\"}}");
alog1("%s pxeboot info update:%s", hostname.c_str(), data.c_str());
}
rc = PASS ;
break ;
}
@ -668,7 +742,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
ilog ("%s sending '%s' request (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
get_iface_name_str(interface));
iface_name_ptr);
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
@ -688,7 +762,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
ilog ("%s sending '%s' request (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
get_iface_name_str(interface));
iface_name_ptr);
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
@ -713,7 +787,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
{
mlog ("%s sending 'Locked' notification (%s)",
hostname.c_str(),
get_iface_name_str(interface));
iface_name_ptr);
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
@ -738,7 +812,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
{
ilog ("%s sending 'UnLocked' notification (%s)",
hostname.c_str(),
get_iface_name_str(interface));
iface_name_ptr);
snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header() );
mtc_cmd.cmd = cmd ;
mtc_cmd.num = 0 ;
@ -754,20 +828,27 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
if ( rc == PASS )
{
int bytes = 0;
nodeLinkClass * obj_ptr = get_mtcInv_ptr ();
string iface_address ;
/* add the mac address of the target card to the header
* Note: the minus 1 is to overwrite the null */
snprintf ( &mtc_cmd.hdr[MSG_HEADER_SIZE-1], MSG_HEADER_SIZE, "%s", obj_ptr->get_hostIfaceMac(hostname, MGMNT_IFACE).data());
/* Update the sender's address */
if (interface == PXEBOOT_INTERFACE)
iface_address = obj_ptr->my_pxeboot_ip ;
else if (interface == CLSTR_INTERFACE)
iface_address = obj_ptr->my_clstr_ip ;
else
iface_address = obj_ptr->my_float_ip ;
/* If data is empty then at least add where the message came from */
if ( data.empty() )
{
data = "{\"address\":\"";
data.append(obj_ptr->my_float_ip) ;
data.append(iface_address) ;
data.append("\",\"interface\":\"");
data.append(get_iface_name_str(interface));
data.append(iface_name_ptr);
data.append("\"}");
}
else
@ -778,7 +859,7 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
snprintf ( &mtc_cmd.buf[0], data.length()+1, "%s", data.data());
bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(data.length()+1)));
print_mtc_message ( hostname, MTC_CMD_TX, mtc_cmd, get_iface_name_str(interface), force ) ;
print_mtc_message ( hostname, MTC_CMD_TX, mtc_cmd, iface_name_ptr, force ) ;
if (interface == MGMNT_INTERFACE)
{
@ -791,13 +872,55 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
return (FAIL_HOSTADDR_LOOKUP);
}
mlog ("%s sending %s request to %s (%s)",
mlog ("%s sending %s request to %s:%d (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
hostaddr.c_str(),
get_iface_name_str(interface));
sock_ptr->mtc_mgmnt_cmd_port,
iface_name_ptr);
rc = sock_ptr->mtc_agent_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_mgmnt_cmd_port);
rc = sock_ptr->mtc_agent_mgmt_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_mgmnt_cmd_port);
}
else if ((interface == PXEBOOT_INTERFACE) && (sock_ptr->pxeboot_tx_socket))
{
string pxeboot_hostAddr = obj_ptr->get_pxeboot_hostaddr(hostname);
if (hostUtil_is_valid_ip_addr(pxeboot_hostAddr))
{
// Set up sockaddr_in with the host pxeboot address and its rx port number
int flags = 0 ;
struct sockaddr_in hostAddr;
memset(&hostAddr, 0, sizeof(hostAddr));
hostAddr.sin_family = AF_INET; // pxeboot network is IPV4 only
hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port);
hostAddr.sin_addr.s_addr = inet_addr(pxeboot_hostAddr.c_str());
mlog ("%s sending %s request to %s:%d (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
pxeboot_hostAddr.c_str(),
sock_ptr->mtc_rx_pxeboot_port,
iface_name_ptr);
ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket,
(char *)&mtc_cmd,
bytes, flags,
(const struct sockaddr*)&hostAddr,
sizeof(hostAddr));
if (bytes_sent <= 0)
{
elog ("%s failed to send %d:%s command to %s:%d (%s) (%d:%m)",
hostname.c_str(), cmd,
get_mtcNodeCommand_str(cmd),
pxeboot_hostAddr.c_str(),
sock_ptr->mtc_rx_pxeboot_port,
iface_name_ptr,
errno);
}
}
else
{
return (FAIL_HOSTADDR_LOOKUP);
}
}
else if ((interface == CLSTR_INTERFACE) &&
( obj_ptr->clstr_network_provisioned == true ) &&
@ -805,17 +928,25 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict
{
string clstr_hostaddr = obj_ptr->get_clstr_hostaddr(hostname);
if ( hostUtil_is_valid_ip_addr( clstr_hostaddr ) != true )
return (FAIL_NO_CLSTR_PROV);
return (FAIL_HOSTADDR_LOOKUP);
mlog ("%s sending %s request to %s (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
clstr_hostaddr.c_str(),
get_iface_name_str(interface));
iface_name_ptr);
rc = sock_ptr->mtc_agent_clstr_tx_socket->write((char *)&mtc_cmd, bytes, clstr_hostaddr.c_str(), sock_ptr->mtc_clstr_cmd_port);
}
else if ( interface == CLSTR_INTERFACE )
{
// This path can be taken if the cluster interface
mlog ("%s to %s network not sent", get_mtcNodeCommand_str(cmd), iface_name_ptr);
}
else
{
wlog ("%s to %s network not sent", get_mtcNodeCommand_str(cmd), iface_name_ptr);
}
if ( 0 > rc )
{
elog("%s Failed to send command (rc:%i)\n", hostname.c_str(), rc);
@ -944,7 +1075,7 @@ int send_hbs_command ( string hostname, int cmd, string controller )
{
if ( cmd == MTC_CMD_ACTIVE_CTRL )
{
mlog3 ("%s %s sent to %s %s",
mlog1 ("%s %s sent to %s %s",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
unit->c_str(),
@ -1162,7 +1293,7 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr )
else
{
/* The interface that the heartbeat loss occurred over is
* specified in parm[0 for this command
* specified in parm[0] for this command
* 0 = MGMNT_IFACE
* 1 = CLSTR_IFACE
* else default to 0 (MGMNT_IFACE) to be backwards compatible

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
#ifndef __INCLUDE_MTCNODECOMP_HH__
#define __INCLUDE_MTCNODECOMP_HH__
/*
* Copyright (c) 2015-2016 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
* Copyright (c) 2015-2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
*/
/**
@ -92,6 +92,30 @@ typedef struct
string mgmnt_iface ;
string clstr_iface ;
// Controller-0 USB installs lead to management interface,
// and therefore the pxeboot interface, being the localhost 'lo'.
// Trying to setup the pxeboot socket and do messaging over that
// socket is not possible so this bool tracks when the pxeboot
// interface is not correct.
bool pxeboot_iface_provisioned ;
string pxeboot_iface ;
string pxeboot_addr ;
string pxeboot_addr_c0 ;
string pxeboot_addr_c1 ;
// Assume address is learned to start even though it's likely not.
// This enabled the first not learned log followed by a learned
// log once it is.
bool pxeboot_address_learned [CONTROLLERS] = { true, true };
// mtcAlive current running sequence number storage
unsigned int mtcAlive_pxeboot_sequence = 0 ;
unsigned int mtcAlive_mgmnt_sequence = 0 ;
unsigned int mtcAlive_clstr_sequence = 0 ;
/* Maintain pxeboot, management and cluser network interface information */
iface_info_type iface_info[MTCALIVE_INTERFACES_MAX];
unsigned int nodetype ;
unsigned int function ;
unsigned int subfunction ;
@ -131,5 +155,6 @@ bool is_subfunction_worker ( void );
int run_goenabled_scripts ( mtc_socket_type * sock_ptr , string requestor );
int run_hostservices_scripts ( unsigned int cmd );
void load_mtcInfo_msg ( mtc_message_type & msg );
void load_pxebootInfo_msg ( mtc_message_type & msg );
#endif
#endif // __INCLUDE_MTCNODECOMP_HH__

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013, 2016, 2023 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2023-2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -134,21 +134,21 @@ msgSock_type * get_mtclogd_sockPtr ( void )
/******************************************************/
/* Socket Close functions */
/******************************************************/
static void mtc_agent_tx_socket_close ( void )
static void mtc_agent_mgmt_tx_socket_close ( void )
{
if (mtc_sock.mtc_agent_tx_socket)
if (mtc_sock.mtc_agent_mgmt_tx_socket)
{
delete mtc_sock.mtc_agent_tx_socket;
mtc_sock.mtc_agent_tx_socket = NULL;
delete mtc_sock.mtc_agent_mgmt_tx_socket;
mtc_sock.mtc_agent_mgmt_tx_socket = NULL;
}
}
static void mtc_agent_rx_socket_close ( void )
static void mtc_agent_mgmt_rx_socket_close ( void )
{
if (mtc_sock.mtc_agent_rx_socket)
if (mtc_sock.mtc_agent_mgmt_rx_socket)
{
delete (mtc_sock.mtc_agent_rx_socket);
mtc_sock.mtc_agent_rx_socket = NULL;
delete (mtc_sock.mtc_agent_mgmt_rx_socket);
mtc_sock.mtc_agent_mgmt_rx_socket = NULL;
}
}
@ -170,7 +170,7 @@ static void mtc_agent_clstr_rx_socket_close ( void )
}
}
static void mtc_event_rx_sock_close ( void )
static void event_rx_sock_close ( void )
{
if (mtc_sock.mtc_event_rx_sock)
{
@ -224,6 +224,24 @@ static void ioctl_sock_close ( void )
}
}
static void pxeboot_rx_socket_close ( void )
{
if ( mtc_sock.pxeboot_rx_socket )
{
close (mtc_sock.pxeboot_rx_socket);
mtc_sock.pxeboot_rx_socket = 0 ;
}
}
static void pxeboot_tx_socket_close ( void )
{
if ( mtc_sock.pxeboot_tx_socket )
{
close (mtc_sock.pxeboot_tx_socket);
mtc_sock.pxeboot_tx_socket = 0 ;
}
}
/* close all the sockets */
static void mtc_socket_fini(void)
{
@ -231,17 +249,64 @@ static void mtc_socket_fini(void)
set_inotify_close(mtcInv.inotify_shadow_file_fd,
mtcInv.inotify_shadow_file_wd);
pxeboot_tx_socket_close();
pxeboot_rx_socket_close();
mtc_agent_clstr_tx_socket_close();
mtc_agent_clstr_rx_socket_close();
mtc_agent_tx_socket_close();
mtc_agent_rx_socket_close();
mtc_event_rx_sock_close();
mtc_agent_mgmt_tx_socket_close();
mtc_agent_mgmt_rx_socket_close();
event_rx_sock_close();
mtc_to_hbs_sock_close();
hwmon_cmd_sock_close();
mtclogd_socket_close();
mtcHttpSvr_fini(mtce_event);
}
void setup_pxeboot_tx_socket ( void )
{
if ( !mtcInv.pxeboot_network_provisioned ) return ;
pxeboot_tx_socket_close();
ilog ("Creating pxeboot transmit socket");
if ((mtc_sock.pxeboot_tx_socket = socket(AF_INET, SOCK_DGRAM, 0)) <= 0)
{
elog ("failed to create IPV4 pxeboot network transmit socket ; (%d:%m)", errno);
}
}
void setup_pxeboot_rx_socket ( void )
{
if ( !mtcInv.pxeboot_network_provisioned ) return ;
pxeboot_rx_socket_close ();
ilog ("Creating pxeboot receive socket on %s:%d",
mtcInv.my_pxeboot_ip.c_str(),
mtc_sock.mtc_rx_pxeboot_port);
struct sockaddr_in pxeboot_addr ;
// Create the socket
if ((mtc_sock.pxeboot_rx_socket = socket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0)) == -1)
{
elog ("failed to create IPV4 pxeboot network receive socket ; (%d:%m)", errno);
}
// Initialize pxeboot address structure
memset(&pxeboot_addr, 0, sizeof(pxeboot_addr));
pxeboot_addr.sin_family = AF_INET;
pxeboot_addr.sin_port = htons(mtc_sock.mtc_rx_pxeboot_port);
pxeboot_addr.sin_addr.s_addr = inet_addr(mtcInv.my_pxeboot_ip.data());
// Bind the pxeboot unit address and messaging port to socket
if (bind(mtc_sock.pxeboot_rx_socket, (const struct sockaddr*)&pxeboot_addr, sizeof(pxeboot_addr)) == -1)
{
elog ("failed to bind %s:%d to socket (%d:%m)",
mtcInv.my_pxeboot_ip.c_str(),
mtc_sock.mtc_rx_pxeboot_port,
errno);
pxeboot_rx_socket_close();
}
}
void daemon_exit(void)
{
/* Cancel the uptime timer */
@ -360,6 +425,20 @@ static int mtc_config_handler ( void * user,
config_ptr->mtc_rx_clstr_port = atoi(value);
config_ptr->mask |= CONFIG_CLIENT_MTC_CLSTR_PORT ;
}
else if (MATCH("agent", "mtc_rx_pxeboot_port"))
{
config_ptr->mtc_rx_pxeboot_port = atoi(value);
mtc_sock.mtc_rx_pxeboot_port = mtc_config.mtc_rx_pxeboot_port ;
}
else if (MATCH("client", "mtc_rx_pxeboot_port"))
{
// Get the mtcClient's pxeboot network receive port number
// and use it as the mtcAgent's pxeboot network transmit port.
// So that the mtcAgent can send the mtcClient messages over the
// pxeboot network.
config_ptr->mtc_tx_pxeboot_port = atoi(value);
mtc_sock.mtc_tx_pxeboot_port = config_ptr->mtc_tx_pxeboot_port ;
}
else if (MATCH("agent", "token_refresh_rate"))
{
config_ptr->token_refresh_rate = atoi(value);
@ -791,34 +870,34 @@ int mtc_socket_init ( void )
int socket_size = 0 ;
char ip_address[INET6_ADDRSTRLEN];
/***********************************************************/
/* Setup UDP Maintenance Command Transmit Socket Mgmnt I/F */
/***********************************************************/
/**********************************************************************/
/* Setup UDP Maintenance Command Transmit Socket to the Mgmnt network */
/**********************************************************************/
mtc_sock.mtc_mgmnt_cmd_port = mtc_config.cmd_port;
msgClassAddr::getAddressFromInterface(mtc_config.mgmnt_iface, ip_address, INET6_ADDRSTRLEN);
mtc_sock.mtc_agent_tx_socket =
mtc_sock.mtc_agent_mgmt_tx_socket =
new msgClassTx(ip_address, mtc_sock.mtc_mgmnt_cmd_port, IPPROTO_UDP, mtc_config.mgmnt_iface);
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_tx_socket"))
mtc_sock.mtc_agent_tx_socket->return_status = FAIL ;
if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_mgmt_tx_socket"))
mtc_sock.mtc_agent_mgmt_tx_socket->return_status = FAIL ;
#endif
if ((mtc_sock.mtc_agent_tx_socket == NULL) ||
(mtc_sock.mtc_agent_tx_socket->return_status))
if ((mtc_sock.mtc_agent_mgmt_tx_socket == NULL) ||
(mtc_sock.mtc_agent_mgmt_tx_socket->return_status))
{
elog("Failed to create mtcClient command socket on port %d for %s (%d:%s)\n",
mtc_sock.mtc_mgmnt_cmd_port,
mtc_config.mgmnt_iface,
errno,
strerror(errno));
mtc_agent_tx_socket_close();
mtc_agent_mgmt_tx_socket_close();
return (FAIL_SOCKET_CREATE) ;
}
/***********************************************************/
/* Setup UDP Maintenance Command Transmit Socket Clstr I/F */
/***********************************************************/
/**********************************************************************/
/* Setup UDP Maintenance Command Transmit Socket to the Clstr network */
/**********************************************************************/
if (strlen(mtc_config.clstr_iface))
{
mtc_sock.mtc_clstr_cmd_port = mtc_config.mtc_rx_clstr_port;
@ -845,48 +924,49 @@ int mtc_socket_init ( void )
/*********************************************************************
* Setup Maintenance Command Reply and Event Receiver Socket
* - management interface
* - management network
* - pxeboot network
*
* This socket is used to receive command replies over the management
* interface and asynchronous events from the mtcClient and other
* maintenance service daemons.
*********************************************************************/
mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port;
mtc_sock.mtc_agent_rx_socket =
mtc_sock.mtc_agent_mgmt_rx_socket =
new msgClassRx(CONTROLLER, mtc_sock.mtc_agent_port, IPPROTO_UDP);
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_rx_socket"))
mtc_sock.mtc_agent_rx_socket = NULL ;
if ( daemon_want_fit ( FIT_CODE__SOCKET_SETUP, mtcInv.my_hostname, "mtc_agent_mgmt_rx_socket"))
mtc_sock.mtc_agent_mgmt_rx_socket = NULL ;
#endif
if ((mtc_sock.mtc_agent_rx_socket == NULL) ||
(mtc_sock.mtc_agent_rx_socket->return_status))
if ((mtc_sock.mtc_agent_mgmt_rx_socket == NULL) ||
(mtc_sock.mtc_agent_mgmt_rx_socket->return_status))
{
elog("Failed to create mtcClient receive socket on port %d for %s (%d:%m)\n",
mtc_sock.mtc_agent_port,
mtc_config.mgmnt_iface,
errno);
mtc_agent_rx_socket_close();
mtc_agent_mgmt_rx_socket_close();
return (FAIL_SOCKET_CREATE );
}
/* Set messaging buffer size */
/* if we need a bigger then default we can use a sysctl to raise the max */
socket_size = MTC_AGENT_RX_BUFF_SIZE;
if ((rc = mtc_sock.mtc_agent_rx_socket->setSocketMemory(mtc_config.mgmnt_iface, "mtce command and event receiver (Mgmnt network)", socket_size)) != PASS)
if ((rc = mtc_sock.mtc_agent_mgmt_rx_socket->setSocketMemory(mtc_config.mgmnt_iface, "mtce command and event receiver (Mgmnt network)", socket_size)) != PASS)
{
elog("setsockopt failed for SO_RCVBUF (%d:%m)\n", errno);
mtc_agent_rx_socket_close();
mtc_agent_mgmt_rx_socket_close();
return (FAIL_SOCKET_OPTION);
}
socklen_t optlen = sizeof(mtc_sock.mtc_agent_rx_socket_size);
getsockopt(mtc_sock.mtc_agent_rx_socket->getFD(), SOL_SOCKET, SO_RCVBUF,
&mtc_sock.mtc_agent_rx_socket_size, &optlen);
socklen_t optlen = sizeof(mtc_sock.mtc_agent_mgmt_rx_socket_size);
getsockopt(mtc_sock.mtc_agent_mgmt_rx_socket->getFD(), SOL_SOCKET, SO_RCVBUF,
&mtc_sock.mtc_agent_mgmt_rx_socket_size, &optlen);
ilog("Listening On: 'mtc client receive' socket %d (%d rx bytes - req:%d) (%s)\n",
mtc_sock.mtc_agent_port,
mtc_sock.mtc_agent_rx_socket_size, MTC_AGENT_RX_BUFF_SIZE,
mtc_sock.mtc_agent_mgmt_rx_socket_size, MTC_AGENT_RX_BUFF_SIZE,
mtc_config.mgmnt_iface);
/*********************************************************************
@ -967,7 +1047,7 @@ int mtc_socket_init ( void )
mtc_config.hbs_to_mtc_event_port,
mtc_config.mgmnt_iface,
errno);
mtc_event_rx_sock_close();
event_rx_sock_close();
return ( FAIL_SOCKET_CREATE );
}
@ -1077,6 +1157,8 @@ int daemon_init ( string iface, string nodetype )
}
mtcInv.system_type = daemon_system_type ();
mtcInv.sw_version = daemon_sw_version();
ilog ("SW Version : %s", mtcInv.sw_version.c_str());
/* Get and store my hostname */
if ( mtc_hostname_read () != PASS )
@ -1370,9 +1452,6 @@ void daemon_service_run ( void )
/* Init board management stuff */
bmcUtil_init ();
/* log the currect software version */
ilog ("SW VERSION : %s\n", daemon_sw_version ().c_str());
/* Collect inventory in active state only */
if ( mtc_config.active == true )
{
@ -1383,6 +1462,32 @@ void daemon_service_run ( void )
daemon_exit ();
}
string my_mac = "" ;
get_iface_macaddr ( mtc_config.mgmnt_iface , my_mac );
dlog ("Mgmt IF mac: %s", my_mac.c_str());
mtcInv.my_pxeboot_if = daemon_mgmnt_iface() ;
if (( mtcInv.my_pxeboot_if != LOOPBACK_IF ) && ( !my_mac.empty() ))
{
mtcInv.pxeboot_network_provisioned = true ;
mtc_config.pxeboot_iface = daemon_get_iface_master ((char*)mtcInv.my_pxeboot_if.data());
{
string ifname = mtc_config.pxeboot_iface ;
if ( get_iface_parent ( PXEBOOT_INTERFACE, ifname, mtcInv.my_pxeboot_if ) == PASS )
{
ilog ("Pxeboot IF : %s", mtcInv.my_pxeboot_if.c_str() );
}
}
mtcInv.pxebootInfo_loader ( my_mac );
ilog ("Pxeboot IP : %s", mtcInv.my_pxeboot_ip.empty() ? "none" : mtcInv.my_pxeboot_ip.c_str());
/************************************************************************/
/* Setup UDP IPV4 Maintenance pxeboot network Transmit/Receive Sockets */
/************************************************************************/
setup_pxeboot_rx_socket ();
setup_pxeboot_tx_socket ();
}
/* The following are base object controller timers ; init them */
mtcTimer_init ( mtcInv.mtcTimer_token, mtcInv.my_hostname, "token timer" );
mtcTimer_init ( mtcInv.mtcTimer_uptime,mtcInv.my_hostname, "uptime timer" );
@ -1390,7 +1495,6 @@ void daemon_service_run ( void )
mtcTimer_init ( mtcInv.mtcTimer_dor, mtcInv.my_hostname, "DOR mode timer" );
if ( get_link_state ( mtc_sock.ioctl_sock, mtc_config.mgmnt_iface, &mtcInv.mgmnt_link_up_and_running ) )
{
mtcInv.mgmnt_link_up_and_running = false ;
wlog ("Failed to query %s operational state ; defaulting to down\n", mtc_config.mgmnt_iface );
@ -1483,12 +1587,17 @@ void daemon_service_run ( void )
send_hbs_command ( mtcInv.my_hostname, MTC_CMD_START_HOST );
socks.clear();
socks.push_front (mtc_sock.mtc_event_rx_sock->getFD()); // service_events
socks.push_front (mtc_sock.mtc_agent_rx_socket->getFD()); // mtc_service_inbox
// service_events
socks.push_front (mtc_sock.mtc_event_rx_sock->getFD());
// mtc_service_inbox - receive sockets from Pxeboot, Mgmt and Clstr network
if ( mtc_sock.pxeboot_rx_socket )
socks.push_front (mtc_sock.pxeboot_rx_socket);
socks.push_front (mtc_sock.mtc_agent_mgmt_rx_socket->getFD());
if ( mtcInv.clstr_network_provisioned == true )
{
socks.push_front (mtc_sock.mtc_agent_clstr_rx_socket->getFD()); // mtc_service_inbox
socks.push_front (mtc_sock.mtc_agent_clstr_rx_socket->getFD());
}
socks.push_front (mtc_sock.netlink_sock);
@ -1559,9 +1668,57 @@ void daemon_service_run ( void )
* where it had commanded the hbsAgent to heartbeat at a reduced rate. */
send_hbs_command ( mtcInv.my_hostname, MTC_RECOVER_HBS );
// Used to track mtcAgent incoming messaging rate
#define LOOP_TIMER_PERIOD_SECS (60)
#define MSGS_PER_SEC_THRESHOLD (20)
#define MSGS_CNT_IDX_INBOX (0)
#define MSGS_CNT_IDX_EVENT (1)
#define MSGS_CNT_IDX_PMOND (2)
#define MSGS_CNT_IDX_HTTP (3)
#define MSGS_CNT_IDX_NETLINK (4)
#define MSGS_CNT_IDX_INOTIFY (5)
#define MSGS_CNT_IDX_MAX (6)
static unsigned int messages_tally[MSGS_CNT_IDX_MAX] = {0,0,0,0,0,0} ;
static float messages_total = 0 ;
mtcTimer_init ( mtcInv.mtcTimer_loop, mtcInv.my_hostname, "loop timer" );
/* Run Maintenance service forever */
for ( ; ; )
for ( mtc_sock.msg_rate = 0 ; ; )
{
if ( mtcTimer_expired ( mtcInv.mtcTimer_loop ) )
{
// Maintain an incoming messaging rate.
for ( int m = MSGS_CNT_IDX_INBOX ; m < MSGS_CNT_IDX_MAX ; m++ )
messages_total += messages_tally[m] ;
float rate_per_sec = messages_total/LOOP_TIMER_PERIOD_SECS ;
// Only log the messaging rate log when
// - the rate is above basic MSGS_PER_SEC_THRESHOLD ; first log
// - the messaging rate changes by half of the threshold in either direction
if (( mtc_config.debug_msg ) ||
(( rate_per_sec > MSGS_PER_SEC_THRESHOLD ) &&
(( rate_per_sec > (mtc_sock.msg_rate+(MSGS_PER_SEC_THRESHOLD/2))) ||
( rate_per_sec < (mtc_sock.msg_rate-(MSGS_PER_SEC_THRESHOLD/2))))))
{
ilog ("%d messages processed ; rate: %.1f msgs/sec] [%d:%d:%d:%d:%d:%d]",
(int)messages_total, rate_per_sec,
messages_tally[MSGS_CNT_IDX_INBOX],
messages_tally[MSGS_CNT_IDX_EVENT],
messages_tally[MSGS_CNT_IDX_PMOND],
messages_tally[MSGS_CNT_IDX_HTTP],
messages_tally[MSGS_CNT_IDX_NETLINK],
messages_tally[MSGS_CNT_IDX_INOTIFY]);
// Save this message rate for next compare
mtc_sock.msg_rate = rate_per_sec ;
}
// clean the stats and restart the timer
messages_total = 0 ;
for ( int m = MSGS_CNT_IDX_INBOX ; m < MSGS_CNT_IDX_MAX ; m++ )
messages_tally[m] = 0 ;
mtcTimer_start ( mtcInv.mtcTimer_loop, mtcTimer_handler, LOOP_TIMER_PERIOD_SECS );
}
daemon_signal_hdlr ();
/**
* Can't just run 'mtcHttpSvr_look' off select as it is seen to miss events.
@ -1587,13 +1744,17 @@ void daemon_service_run ( void )
/* Initialize the master fd_set */
FD_ZERO(&mtc_sock.readfds);
FD_SET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds);
FD_SET(mtc_sock.mtc_agent_rx_socket->getFD(), &mtc_sock.readfds);
FD_SET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds);
FD_SET(mtc_sock.mtc_agent_mgmt_rx_socket->getFD(), &mtc_sock.readfds);
if ( mtcInv.clstr_network_provisioned == true )
{
FD_SET(mtc_sock.mtc_agent_clstr_rx_socket->getFD(),&mtc_sock.readfds);
}
// Listen to the pxeboot rx socket if it is setup
if ( mtc_sock.pxeboot_rx_socket > 0 )
{
FD_SET(mtc_sock.pxeboot_rx_socket, &mtc_sock.readfds);
}
if ( mtce_event.fd )
{
FD_SET(mtce_event.fd, &mtc_sock.readfds);
@ -1631,44 +1792,95 @@ void daemon_service_run ( void )
{
if ( FD_ISSET( mtce_event.fd , &mtc_sock.readfds))
{
mlog3 ("http socket fired");
messages_tally[MSGS_CNT_IDX_HTTP]++ ;
mtcHttpSvr_look ( mtce_event );
mlog3 ("http handling done");
}
if (FD_ISSET(mtc_sock.netlink_sock, &mtc_sock.readfds))
{
dlog ("netlink socket fired\n");
mlog3 ("netlink socket fired");
messages_tally[MSGS_CNT_IDX_NETLINK]++ ;
if ( mtcInv.service_netlink_events ( mtc_sock.netlink_sock, mtc_sock.ioctl_sock ) != PASS )
{
elog ("service_netlink_events failed (rc:%d)\n", rc );
}
mlog3 ("netlink handling done");
}
if (FD_ISSET(mtc_sock.mtc_event_rx_sock->getFD(), &mtc_sock.readfds))
{
mlog3 ("events socket fired");
messages_tally[MSGS_CNT_IDX_EVENT]++ ;
if ( (rc = service_events ( &mtcInv, &mtc_sock )) != PASS )
{
elog ("service_events failed (rc:%d)\n", rc );
}
mlog3 ("events handling done");
}
if ( FD_ISSET(mtc_sock.mtc_agent_rx_socket->getFD(), &mtc_sock.readfds))
if ( mtc_sock.pxeboot_rx_socket && FD_ISSET(mtc_sock.pxeboot_rx_socket, &mtc_sock.readfds))
{
int cnt = 0 ;
/* Service up to MAX_RX_MSG_BATCH of messages at once */
mlog3 ("pxeboot network socket fired");
for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ )
{
rc = mtc_service_inbox ( &mtcInv, &mtc_sock , MGMNT_INTERFACE) ;
if ( rc > RETRY )
mlog3 ("... service inbox ; message %d", cnt+1);
rc = mtc_service_inbox ( &mtcInv, &mtc_sock , PXEBOOT_INTERFACE) ;
if ( rc == RETRY )
{
mlog2 ("mtc_service_inbox failed (rc:%d) (Mgmnt)\n", rc );
mlog3 ("... service inbox done");
break ;
}
if ( rc == RETRY )
messages_tally[MSGS_CNT_IDX_INBOX]++ ;
if ( rc > RETRY )
{
wlog ("mtc_service_inbox failed (rc:%d) (pxeboot)", rc );
break ;
}
else
{
mlog3 ("......more messages to service");
}
}
if ( cnt > 1 )
if ( cnt > (MAX_RX_MSG_BATCH/2) )
{
mlog2 ("serviced %d messages in one batch (Mgmnt)\n", cnt );
ilog ("serviced %d messages in one batch (pxeboot)", cnt );
}
mlog3 ("pxeboot network message handling done");
}
if ( FD_ISSET(mtc_sock.mtc_agent_mgmt_rx_socket->getFD(), &mtc_sock.readfds))
{
int cnt = 0 ;
/* Service up to MAX_RX_MSG_BATCH of messages at once */
mlog3 ("mgmt network socket fired");
for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ )
{
mlog3 ("... service inbox ; message %d", cnt+1);
rc = mtc_service_inbox ( &mtcInv, &mtc_sock , MGMNT_INTERFACE) ;
if ( rc == RETRY )
{
mlog3 ("... service inbox done");
break ;
}
messages_tally[MSGS_CNT_IDX_INBOX]++ ;
if ( rc > RETRY )
{
wlog ("mtc_service_inbox failed (rc:%d) (Mgmnt)", rc );
break ;
}
else
{
mlog3 ("......more messages to service");
}
}
if ( cnt > (MAX_RX_MSG_BATCH/2) )
{
ilog ("serviced %d messages in one batch (Mgmnt)", cnt );
}
mlog3 ("mgmt network message handling done");
}
if (( mtcInv.clstr_network_provisioned == true ) &&
@ -1677,24 +1889,38 @@ void daemon_service_run ( void )
{
int cnt = 0 ;
/* Service up to MAX_RX_MSG_BATCH of messages at once */
mlog3 ("clstr network socket fired");
for ( ; cnt < MAX_RX_MSG_BATCH ; cnt++ )
{
mlog3 ("... service inbox ; message %d", cnt+1);
rc = mtc_service_inbox ( &mtcInv, &mtc_sock, CLSTR_INTERFACE ) ;
if ( rc > RETRY )
if ( rc == RETRY )
{
mlog2 ("mtc_service_inbox failed (rc:%d) (Clstr)\n", rc );
mlog3 ("... service inbox done");
break ;
}
if ( rc == RETRY )
messages_tally[MSGS_CNT_IDX_INBOX]++ ;
if ( rc > RETRY )
{
mlog ("mtc_service_inbox failed (rc:%d) (Clstr)\n", rc );
break ;
}
else
{
mlog3 ("......more messages to service");
}
}
if ( cnt > 1 )
if ( cnt > (MAX_RX_MSG_BATCH/2) )
{
mlog2 ("serviced %d messages in one batch (Clstr)\n", cnt ); // ERIC dlog
ilog ("serviced %d messages in one batch (Clstr)", cnt );
}
mlog3 ("mgmt network message handling done");
}
if (FD_ISSET(mtcInv.inotify_shadow_file_fd, &mtc_sock.readfds))
{
mlog3 ("inotify socket fired");
messages_tally[MSGS_CNT_IDX_INOTIFY]++ ;
rc = get_inotify_events ( mtcInv.inotify_shadow_file_fd, (IN_MODIFY | IN_CREATE | IN_IGNORED) );
if ( rc )
{
@ -1715,6 +1941,7 @@ void daemon_service_run ( void )
wlog ("Reselecting on %s change (Select:%d)\n", SHADOW_FILE, mtcInv.inotify_shadow_file_fd );
}
}
mlog3 ("inotify event handling done");
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -91,6 +91,14 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
*/
nodeLinkClass::online_handler ( node_ptr );
/*
* Always run the mtcAlive handler.
*
* - monitor host's mtcAlive messaging
* - manage host's mtcAlive missing alarm
*/
nodeLinkClass::pxeboot_mtcAlive_monitor ( node_ptr );
if ( node_ptr->adminAction == MTC_ADMIN_ACTION__DELETE )
{
flog ("%s -> Delete Action\n", node_ptr->hostname.c_str());

View File

@ -130,6 +130,15 @@ void nodeLinkClass::timer_handler ( int sig, siginfo_t *si, void *uc)
return ;
}
/* Is this TID a online timer TID ? */
node_ptr = get_online_timer ( *tid_ptr );
if ( node_ptr )
{
mtcTimer_stop_int_safe ( node_ptr->online_timer );
node_ptr->online_timer.ring = true ;
return ;
}
/* Is this TID a mtcAlive timer TID ? */
node_ptr = get_mtcAlive_timer ( *tid_ptr );
if ( node_ptr )
@ -247,6 +256,14 @@ void nodeLinkClass::timer_handler ( int sig, siginfo_t *si, void *uc)
return ;
}
/* daemon main loop timer */
if ( *tid_ptr == mtcTimer_loop.tid )
{
mtcTimer_stop_int_safe ( mtcTimer_loop );
mtcTimer_loop.ring = true ;
return ;
}
/* is the http request timer ? */
node_ptr = get_http_timer ( *tid_ptr );
if ( node_ptr )
@ -1968,7 +1985,9 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->hostname.c_str());
node_ptr->reboot_cmd_ack_mgmnt = false ;
node_ptr->reboot_cmd_ack_clstr = false ;
node_ptr->reboot_cmd_ack_pxeboot = false ;
send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) ;
send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, PXEBOOT_INTERFACE ) ;
/* If the cluster-host network is provisioned then try
* and issue a reset over it to expedite the recovery
@ -2038,6 +2057,12 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
ilog ("%s backup bmc reset aborted due to management network reboot request ACK",
node_ptr->hostname.c_str());
}
else if ( node_ptr->reboot_cmd_ack_pxeboot )
{
reset_aborted = true ;
ilog ("%s backup bmc reset aborted due to pxeboot network reboot request ACK",
node_ptr->hostname.c_str());
}
else if ( node_ptr->reboot_cmd_ack_clstr )
{
reset_aborted = true ;
@ -3331,6 +3356,7 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->mtcAlive_count = 0 ;
node_ptr->mtcAlive_mgmnt = false ;
node_ptr->mtcAlive_clstr = false ;
node_ptr->mtcAlive_pxeboot = false ;
node_ptr->offline_log_throttle = 0 ;
node_ptr->offline_search_count = 0 ;
@ -3362,22 +3388,25 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
* are cleared. Need to also clear the
* offline_search_count here as well.
**/
if (( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_clstr ) && node_ptr->offline_search_count )
if (( node_ptr->mtcAlive_mgmnt || node_ptr->mtcAlive_clstr || node_ptr->mtcAlive_pxeboot ) && node_ptr->offline_search_count )
{
node_ptr->mtcAlive_online = true ;
ilog ("%s still seeing mtcAlive (%d) (Mgmt:%c:%d Clstr:%c:%d) ; restart offline_search_count=%d of %d\n",
ilog ("%s still seeing mtcAlive (%d) (Mgmt:%c:%d Clstr:%c:%d Pxeboot:%c:%d) ; restart offline_search_count=%d of %d\n",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_count,
node_ptr->mtcAlive_mgmnt ? 'Y' : 'n',
node_ptr->mtcAlive_mgmnt_count,
node_ptr->mtcAlive_clstr ? 'Y' : 'n',
node_ptr->mtcAlive_clstr_count,
node_ptr->mtcAlive_pxeboot ? 'Y' : 'n',
node_ptr->mtcAlive_pxeboot_count,
node_ptr->offline_search_count,
offline_threshold );
node_ptr->offline_search_count = 0 ; /* reset the count */
}
node_ptr->mtcAlive_mgmnt = false ;
node_ptr->mtcAlive_clstr = false ;
node_ptr->mtcAlive_pxeboot = false ;
/* Request a mtcAlive from host from Mgmnt and Clstr (if provisioned) */
send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, MGMNT_INTERFACE );
@ -3410,6 +3439,10 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
{
node_ptr->mtcAlive_online = false ;
// Clear all the mtcAlive_sequence numbers
for (int i = 0 ; i < MTCALIVE_INTERFACES_MAX ; i++)
node_ptr->mtcAlive_sequence[i] = 0;
plog ("%s going offline ; (threshold (%d msec * %d)\n",
node_ptr->hostname.c_str(),
offline_period,
@ -3532,23 +3565,23 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->mtcAlive_misses = 0 ;
/* Start mtcAlive message timer */
mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler, online_period );
mtcTimer_start ( node_ptr->online_timer, mtcTimer_handler, online_period );
node_ptr->onlineStage = MTC_ONLINE__WAITING ;
break ;
}
case MTC_ONLINE__RETRYING:
{
/* Start mtcAlive message timer */
mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler, online_period );
mtcTimer_start ( node_ptr->online_timer, mtcTimer_handler, online_period );
node_ptr->onlineStage = MTC_ONLINE__WAITING ;
break ;
}
case MTC_ONLINE__WAITING:
{
if ( node_ptr->mtcAlive_timer.ring == false )
if ( node_ptr->online_timer.ring == false )
break ;
alog ("%s mtcAlive [%s] [ misses:%d]\n",
alog2 ("%s mtcAlive [%s] [ misses:%d]\n",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_online ? "Yes" : "No",
node_ptr->mtcAlive_misses );
@ -3581,7 +3614,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
else
{
/* handle retries < MTC_OFFLINE_MISSES */
node_ptr->mtcAlive_timer.ring = false ;
node_ptr->online_timer.ring = false ;
node_ptr->onlineStage = MTC_ONLINE__RETRYING ;
break ;
}
@ -3629,7 +3662,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
}
/* Start over */
node_ptr->mtcAlive_timer.ring = false ;
node_ptr->online_timer.ring = false ;
node_ptr->onlineStage = MTC_ONLINE__START ;
break ;
}
@ -7523,7 +7556,148 @@ int nodeLinkClass::oos_test_handler ( struct nodeLinkClass::node * node_ptr )
return (PASS);
}
///////////////////////////////////////////////////////////////////////////////
//
// Name : pxeboot_mtcAlive_monitor
//
// Purpose : Monitor pxeboot network mtcAlive and manage associated alarm.
//
// Description: Monitor pxeboot mtcAlive messages.
// Request mtcAlive when not receiving mtcAlive messages.
// Debounce mtcAlive messaging and manage alarm accordingly.
//
// Parameters : nodeLinkClass::node struct pointer - node_ptr
//
// Returns : PASS
//
///////////////////////////////////////////////////////////////////////////////
#define PXEBOOT_MTCALIVE_MONITOR_RATE_SECS (10)
#define PXEBOOT_MTCALIVE_LOSS_THRESHOLD (6)
#define PXEBOOT_MTCALIVE_NOT_SEEN_LOG_THROTTLE (6)
#define PXEBOOT_MTCALIVE_LOSS_LOG_THROTTLE (6)
int nodeLinkClass::pxeboot_mtcAlive_monitor ( struct nodeLinkClass::node * node_ptr )
{
// ERIK: TODO: Comment out once verified
flog ("%s pxeboot mtcAlive fsm stage: %s",
node_ptr->hostname.c_str(),
get_mtcAliveStages_str(node_ptr->mtcAliveStage).c_str());
if ( !this->pxeboot_network_provisioned ) return PASS ;
switch (node_ptr->mtcAliveStage)
{
case MTC_MTCALIVE__START:
{
alog2 ("%s mtcAlive start", node_ptr->hostname.c_str());
mtcTimer_reset ( node_ptr->mtcAlive_timer );
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] = 0 ;
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__SEND);
break ;
}
case MTC_MTCALIVE__SEND:
{
/* pxeboot info refresh audit */
if ( node_ptr->hostname == my_hostname )
pxebootInfo_loader ();
alog2 ("%s mtcAlive send", node_ptr->hostname.c_str());
send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, PXEBOOT_INTERFACE );
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] = 0 ;
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE] = 0 ;
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__MONITOR);
break ;
}
case MTC_MTCALIVE__MONITOR:
{
alog2 ("%s mtcAlive monitor", node_ptr->hostname.c_str());
mtcTimer_start ( node_ptr->mtcAlive_timer, mtcTimer_handler,
PXEBOOT_MTCALIVE_MONITOR_RATE_SECS );
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__WAIT);
break ;
}
case MTC_MTCALIVE__WAIT:
{
if ( mtcTimer_expired ( node_ptr->mtcAlive_timer ) )
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__CHECK);
break ;
}
case MTC_MTCALIVE__CHECK:
{
if ( node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] > node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] )
{
// Typical success path
alog2 ("%s pxeboot mtcAlive received %d messages since last audit ; this:%d last:%d",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] - node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE]);
// Now that we received a message we can dec the missed count
if ( node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE] )
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE]-- ;
node_ptr->pxeboot_mtcAlive_not_seen_log_throttle = 0 ;
node_ptr->pxeboot_mtcAlive_loss_log_throttle = 0 ;
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__MONITOR);
}
else if ( node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] < node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] )
{
// unexpected case
wlog ("%s mtcAlive out-of-sequence ; this:%d last:%d",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE]);
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE]++ ;
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__START);
}
else if ( ++node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE] < PXEBOOT_MTCALIVE_LOSS_THRESHOLD )
{
// Missing pxeboot mtcAlive
alog ("%s pxeboot mtcAlive miss count %d ; sending request",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE]);
send_mtc_cmd ( node_ptr->hostname, MTC_REQ_MTCALIVE, PXEBOOT_INTERFACE );
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__MONITOR);
}
else if ( node_ptr->mtcAlive_pxeboot == true )
{
wlog_throttled (node_ptr->pxeboot_mtcAlive_loss_log_throttle,
PXEBOOT_MTCALIVE_LOSS_LOG_THROTTLE,
"%s pxeboot mtcAlive loss ; missed: %d ; last: count:%d seq: %d ; sending request",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_sequence_miss[PXEBOOT_INTERFACE],
node_ptr->mtcAlive_pxeboot_count,
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE]);
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__SEND);
}
else
{
ilog_throttled (node_ptr->pxeboot_mtcAlive_not_seen_log_throttle,
PXEBOOT_MTCALIVE_NOT_SEEN_LOG_THROTTLE,
"%s pxeboot mtcAlive not seen yet ; sending request",
node_ptr->hostname.c_str());
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__SEND);
}
node_ptr->mtcAlive_sequence_save[PXEBOOT_INTERFACE] = node_ptr->mtcAlive_sequence[PXEBOOT_INTERFACE] ;
// TODO (emacdona): Need to handle loss case that manages raising the alarm
// Transition to MTC_MTCALIVE__FAIL
break ;
}
case MTC_MTCALIVE__FAIL:
{
wlog ("%s mtcAlive fail", node_ptr->hostname.c_str());
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__START);
break ;
}
default:
{
slog ("%s mtcAlive fsm default", node_ptr->hostname.c_str());
mtcAliveStageChange (node_ptr, MTC_MTCALIVE__START);
break ;
}
}
return (PASS);
}
int local_counter = 0 ;

View File

@ -1,7 +1,7 @@
#ifndef __INCLUDE_MTCNODEMSG_HH__
#define __INCLUDE_MTCNODEMSG_HH__
/*
* Copyright (c) 2013, 2016 Wind River Systems, Inc.
* Copyright (c) 2013, 2016, 2024 Wind River Systems, Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
@ -53,7 +53,7 @@ using namespace std;
#define MTC_AGENT_RX_BUFF_SIZE (MAX_NODES*MAX_MSG)
#define MAX_RX_MSG_BATCH (20)
#define MAX_RX_MSG_BATCH (50)
/** Maintenance messaging socket control structure */
typedef struct
@ -63,25 +63,26 @@ typedef struct
/** UDP sockets used by the mtcAgent to transmit and receive
* maintenance commands to the client (compute) node and
* receive the compute node reply in the receive direction */
msgClassSock* mtc_agent_tx_socket ; /**< tx to mtc client mgmnt */
msgClassSock* mtc_agent_clstr_tx_socket; /**< tx to mtc client clstr */
msgClassSock* mtc_agent_rx_socket ; /**< rx from mtc client mgmnt */
msgClassSock* mtc_agent_clstr_rx_socket; /**< rx from mtc client clstr */
int mtc_agent_port ; /**< the agent rx port number */
msgClassSock* mtc_agent_mgmt_tx_socket ; /**< tx to mtc client mgmnt */
msgClassSock* mtc_agent_mgmt_rx_socket ; /**< rx from mtc client mgmnt */
msgClassSock* mtc_agent_clstr_tx_socket ; /**< tx to mtc client clstr */
msgClassSock* mtc_agent_clstr_rx_socket ; /**< rx from mtc client clstr */
int mtc_agent_port ; /**< the agent rx port number */
int mtc_rx_mgmnt_port ; /**< the agent rx port number */
struct sockaddr_in agent_addr; /**< socket attributes struct */
int mtc_agent_rx_socket_size ;
int mtc_agent_mgmt_rx_socket_size ;
int mtc_agent_clstr_rx_socket_size ;
/** UDP sockets used by the mtcClient to receive maintenance
* commands from and transmit replies to the mtcAgent */
msgClassSock* mtc_client_rx_socket ; /**< rx from controller */
msgClassSock* mtc_client_tx_socket ; /**< tx to controller mgmnt */
msgClassSock* mtc_client_tx_socket_c0_clstr ; /**< tx to controller-0 clstr i/f */
msgClassSock* mtc_client_tx_socket_c1_clstr ; /**< tx to controller-1 clstr i/f */
msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */
int mtc_mgmnt_cmd_port ; /**< mtc command port mgmnt i/f */
int mtc_clstr_cmd_port ; /**< mtc command port clstr i/f */
msgClassSock* mtc_client_mgmt_rx_socket ; /**< rx from controller mgmt */
msgClassSock* mtc_client_mgmt_tx_socket ; /**< tx to controller mgmnt */
msgClassSock* mtc_client_clstr_tx_socket_c0 ; /**< tx to controller-0 clstr */
msgClassSock* mtc_client_clstr_tx_socket_c1 ; /**< tx to controller-1 clstr */
msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */
int mtc_mgmnt_cmd_port ; /**< mtc command port mgmnt */
int mtc_clstr_cmd_port ; /**< mtc command port clstr */
struct sockaddr_in mtc_cmd_addr ; /**< socket attributes mgmnt */
/***************************************************************/
@ -106,6 +107,12 @@ typedef struct
struct timeval waitd ;
fd_set readfds;
/** IPV4 Pxeboot transmit and receive sockets and ports */
int pxeboot_tx_socket ;
int mtc_tx_pxeboot_port ;
int pxeboot_rx_socket ;
int mtc_rx_pxeboot_port ;
/** Active Monitor Socket */
int amon_socket ;
@ -115,7 +122,7 @@ typedef struct
int netlink_sock ; /* netlink socket */
int ioctl_sock ; /* general ioctl socket */
float msg_rate ;
} mtc_socket_type ;

View File

@ -16,11 +16,11 @@ inventory_port = 6385 ; The Inventory Port Number
keystone_port = 5000 ; The Keystone Port Number
ha_port = 7777 ; The Inventory Port Number
mtc_agent_port = 2101 ; OBS: ........ Active Controller Maintenance Rx Port
mtc_rx_mgmnt_port = 2101 ; Active Controller Maintenance Mgmnt Network Rx Port
mtc_rx_clstr_port = 2111 ; Active Controller Maintenance Clstr Network Rx Port
mtc_rx_mgmnt_port = 2101 ; mtcAgent management network msg receive port
mtc_rx_pxeboot_port = 2102 ; mtcAgent pxeboot network msg receive port
mtc_rx_clstr_port = 2111 ; mtcAgent cluster-host msg network receive port
hbs_agent_mgmnt_port = 2103 ; Management Interface Heartbeat Pulse Response Rx Port
hbs_agent_clstr_port = 2113 ; Cluster-host Interface Heartbeat Pulse Response Rx Port
clstr_agent_port = 2110 ; Agent Command Response RX Port
mtc_to_hbs_cmd_port = 2104 ; Mtc to Hbs Command Port Number
mtc_to_guest_cmd_port = 2108 ; Mtc to guestAgent Command port
hbs_to_mtc_event_port = 2107 ; Hbs to Mtc Event Port Number
@ -87,6 +87,7 @@ scheduling_priority = 45 ; realtime scheduling; range of 1 .. 99
mtc_rx_mgmnt_port = 2118 ; Client Maintenance Command Rx Port
mtc_rx_clstr_port = 2115 ; Client Maintenance Command Rx Port
mtc_rx_pxeboot_port = 2119 ; Client Maintenance pxeboot Command RX Port
hbs_client_mgmnt_port = 2106 ; Management Interface Heartbeat Pulse Request Rx Port
hbs_client_clstr_port = 2116 ; Cluster-host Interface Heartbeat Pulse Request Rx Port
hwmon_cmd_port = 2114 ; hwmond Command Rx Port Number