Fix maintenance cluster-host messaging

Maintenance's success path messaging does not depend on cluster
network messaging. However, there are a number of failure mode
cases that do depend on cluster network messaging to properly
diagnose and offer a higher availability handling for some
failure cases.

For instance, when the management interface goes down, without cluster
network messaging remote hosts can be isolated. Being able to command-
reboot a host over cluster-host network offers higher availability.

Maintenance is designed to use the cluster network, if provisioned, as a
backup path for mtcAlive, node locked, reboot and several other commands
and acknowledgements.

Unfortunately, it was recently observed that maintenance is using
the 'nfs-controller' label to resolve cluster network addressing
which resolves to management network IPs. As a result all messages
intended to be going over the cluster-host network are instead just
redundant management network messages.

During debug of this issue several additional cluster network
messaging related issues were observed and fixed.

This update implements the following fixes

1. since there is no floating address for the cluster network the
   mtcClient was modified to send messages to both controllers where
   only the active controller will be listening and acting.
2. fixes port number mtce listens for cluster-host network messages
3. fixes port number mtce sends cluster-host network messages to.
4. mtcAlive messages are also sent on provisioned cluster network.
5. locked state notifications and acks sent on provisioned cluster network.
6. reboot request and acks sent on provisioned cluster network.
7. fixed command acknowledgement messaging.

This update also

1. envelopes the mtcAlive gate control to allow debug tracing of all gate
   state changes.
2. moves graceful recovery handling heartbeat failure state clear to the
   end of the recovery handler, just before heartbeat start.
3. adds sm unhealthy support to fail and automatically recover the
   inactive controller from an SM UNHEALTHY state.

----------
Test Plan:
----------

Functional:

PASS: Verify management network messaging
PASS: Verify cluster-host network messaging
PASS: Verify cluster-host messages with tcpdump
PASS: Verify cluster-host network mtcAlive messaging
PASS: Verify reboot request and ack reply over management network
PASS: Verify reboot request and ack reply over cluster-host network
PASS: Verify lock state notification and ack reply over management network
PASS: Verify lock state notification and ack reply over cluster-host network
PASS: Verify acknowledgement messaging
PASS: Verify maintenance daemon logging
PASS: Verify maintenance socket initialization

System:

PASS: Verify compute system install
PASS: Verify AIO system install

Feature:

PASS: Verify sm node unhealth handling (active:ignore, inactive:recover)

Change-Id: I092596d3e22438dd8a613a073614c188f6f5721d
Closes-Bug: #835268
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2019-07-12 14:18:20 -04:00
parent 3aea82253e
commit 62532a7eac
14 changed files with 551 additions and 374 deletions

View File

@ -265,6 +265,14 @@ void print_mtc_message ( string hostname,
iface,
msg.hdr);
}
else if (( daemon_get_cfg_ptr()->debug_alive&1) && ( msg.cmd == MTC_MSG_MTCALIVE ))
{
alog ("%s %s (%s network) - %s\n",
hostname.c_str(),
direction ? "rx <-" : "tx ->" ,
iface,
msg.hdr);
}
else
{
mlog1 ("%s %s (%s network) - %s\n",
@ -276,7 +284,7 @@ void print_mtc_message ( string hostname,
return ;
}
string str = "-" ;
string str = "" ;
if ( msg.buf[0] )
str = msg.buf ;
if ( force )

View File

@ -393,6 +393,8 @@ void daemon_exit ( void );
/* This label will resolve to an IP on the management network */
#define CONTROLLER_NFS ((const char *)"controller-nfs")
#define CONTROLLER_0_CLUSTER_HOST ((const char *)"controller-0-cluster-host")
#define CONTROLLER_1_CLUSTER_HOST ((const char *)"controller-1-cluster-host")
/* Maintenance Daemon Services - actual names of the daemons */
/* ... controller only service / daemons */

View File

@ -699,7 +699,7 @@ int get_hostname ( char * hostname_ptr, int max_len )
rc = gethostname(hostname_ptr, max_len );
if ( rc == PASS )
{
ilog ("Hostname : %s\n", hostname_ptr);
ilog ("%s", hostname_ptr);
}
else
{
@ -751,7 +751,7 @@ int get_iface_address ( const char * iface_ptr, string & ip_addr , bool retry )
if ( rc == PASS )
{
ip_addr = ip_cstr;
dlog ("IP Address : %s\n", ip_addr.c_str() );
ilog ("%s %s\n", iface_ptr, ip_addr.c_str());
}
else
{

View File

@ -233,9 +233,6 @@ int daemon_run_testhead ( void );
#define CONFIG_AGENT_SECRET_PORT 0x20000000 /**< Barbican HTTP port */
#define CONFIG_AGENT_VIM_EVENT_PORT 0x40000000 /**< VIM Event Port Mask */
#define CONFIG_AGENT_PORT CONFIG_AGENT_MTC_MGMNT_PORT
#define CONFIG_CLIENT_PORT CONFIG_CLIENT_MTC_MGMNT_PORT
typedef struct {
struct timespec ts ;
struct tm t;

View File

@ -545,7 +545,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
ptr->action = "none" ;
ptr->clear_task = false ;
ptr->mtcAlive_gate = true ;
ctl_mtcAlive_gate( ptr , true ) ;
ptr->mtcAlive_online = false ;
ptr->mtcAlive_offline = true ;
ptr->mtcAlive_misses = 0 ;
@ -1463,7 +1463,7 @@ int nodeLinkClass::avail_status_change ( string hostname,
{
node_ptr->mtcAlive_misses = 0 ;
node_ptr->mtcAlive_hits = 0 ;
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate ( node_ptr, false ) ;
}
/* check for need to generate power on log */
@ -1696,16 +1696,10 @@ int nodeLinkClass::alarm_insv_failure ( struct nodeLinkClass::node * node_ptr )
/* Clear the enable alarm and degrade flag */
int nodeLinkClass::alarm_enabled_clear ( struct nodeLinkClass::node * node_ptr, bool force )
{
if ( node_ptr->degrade_mask & DEGRADE_MASK_ENABLE )
{
node_ptr->degrade_mask &= ~DEGRADE_MASK_ENABLE ;
}
unsigned int clear_mask = DEGRADE_MASK_ENABLE |
DEGRADE_MASK_INSV_TEST ;
/* The inservice test degrade flag needs to be cleared too. */
if ( node_ptr->degrade_mask & DEGRADE_MASK_INSV_TEST )
{
node_ptr->degrade_mask &= ~DEGRADE_MASK_INSV_TEST ;
}
node_ptr->degrade_mask &= ~clear_mask ;
if (( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CLEAR ) ||
( force == true ))
@ -2350,18 +2344,19 @@ int nodeLinkClass::mod_host ( node_inv_type & inv )
modify = true ; /* we have a delta */
}
if ( node_ptr->clstr_ip.compare ( inv.clstr_ip ) )
{
if ( hostUtil_is_valid_ip_addr ( inv.clstr_ip ))
{
plog ("%s Modify 'clstr_ip' from %s -> %s\n",
node_ptr->hostname.c_str(),
node_ptr->clstr_ip.c_str(), inv.clstr_ip.c_str() );
modify = true ; /* we have a delta */
node_ptr->clstr_ip = inv.clstr_ip ;
}
if (( hostUtil_is_valid_ip_addr ( inv.clstr_ip )) &&
( node_ptr->clstr_ip != inv.clstr_ip ))
{
plog ("%s Modify 'clstr_ip' from %s -> %s\n",
node_ptr->hostname.c_str(),
node_ptr->clstr_ip.c_str(),
inv.clstr_ip.c_str() );
modify = true ; /* we have a delta */
node_ptr->clstr_ip = inv.clstr_ip ;
}
if ( (!inv.name.empty()) && (node_ptr->hostname.compare ( inv.name)) )
{
mtcCmd cmd ;
@ -3455,17 +3450,14 @@ void nodeLinkClass::set_cmd_resp ( string & hostname, mtc_message_type & msg )
}
else
{
node_ptr->cmdRsp = msg.cmd ;
if ( msg.num > 0 )
node_ptr->cmdRsp_status = msg.parm[0] ;
else
node_ptr->cmdRsp_status = -1 ;
dlog ("%s '%s' command response status [%u:%s]\n",
hostname.c_str(),
node_ptr->cmdName.c_str(),
msg.num ? node_ptr->cmdRsp_status : PASS,
node_ptr->cmdRsp_status_string.empty() ? "empty" : node_ptr->cmdRsp_status_string.c_str());
if ( node_ptr->cmdRsp != msg.cmd )
{
node_ptr->cmdRsp = msg.cmd ;
if ( msg.num > 0 )
node_ptr->cmdRsp_status = msg.parm[0] ;
else
node_ptr->cmdRsp_status = -1 ;
}
}
}
}
@ -3514,7 +3506,12 @@ int nodeLinkClass::set_activeClient ( string hostname, mtc_client_enum client )
*
* Name : set_mtcAlive
*
* Description:
* Description: Set the mgmnt or clust specific mtc alive received bool.
*
* Used in the offline handler to verify overall offline state.
*
* Interfaces : Public with hostname.
* Private by node pointer.
*
* If mtcAlive is ungated then
*
@ -3528,6 +3525,14 @@ void nodeLinkClass::set_mtcAlive ( string & hostname, int interface )
nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
this->set_mtcAlive ( node_ptr, interface );
}
}
void nodeLinkClass::set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface )
{
if ( node_ptr )
{
if ( node_ptr->mtcAlive_gate == false )
{
@ -3537,48 +3542,110 @@ void nodeLinkClass::set_mtcAlive ( string & hostname, int interface )
if ( interface == CLSTR_INTERFACE )
{
node_ptr->mtcAlive_clstr = true ;
if ( node_ptr->mtcAlive_clstr == false )
{
alog2 ("%s %s mtcAlive received",
node_ptr->hostname.c_str(),
get_iface_name_str(interface));
node_ptr->mtcAlive_clstr = true ;
}
}
else
{
node_ptr->mtcAlive_mgmnt = true ;
if ( node_ptr->mtcAlive_mgmnt == false )
{
alog2 ("%s %s mtcAlive received",
node_ptr->hostname.c_str(),
get_iface_name_str(interface));
node_ptr->mtcAlive_mgmnt = true ;
}
}
}
}
}
/*****************************************************************************
*
* Name : get_mtcAlive
*
* Description: Return the current mtcAlive gate state.
*
* Interfaces : Public with hostname.
* Private by node pointer.
*
****************************************************************************/
bool nodeLinkClass::get_mtcAlive_gate ( string & hostname )
{
nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
return ( get_mtcAlive_gate (node_ptr)) ;
}
/* If we can't find the node then assume alive messages are gated */
return (true);
}
bool nodeLinkClass::get_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr )
{
if ( node_ptr )
{
alog3 ("%s mtcAlive gate: %s",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_gate ? "closed" : "open" );
return ( node_ptr->mtcAlive_gate ) ;
}
/* If we can't find the node then gate off the alive messages */
return (true);
}
void nodeLinkClass::ctl_mtcAlive_gate ( string & hostname, bool gated )
/*****************************************************************************
*
* Name : ctl_mtcAlive_gate
*
* Description: Control the mtcAlive gate state.
* Produce an alog on state changes.
*
* Interfaces : Public with hostname.
* Private by node pointer.
*
****************************************************************************/
void nodeLinkClass::ctl_mtcAlive_gate ( string & hostname, bool gate_state )
{
nodeLinkClass::node* node_ptr ;
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
node_ptr->mtcAlive_gate = gated ;
if ( gated == true )
ctl_mtcAlive_gate ( node_ptr, gate_state );
}
}
void nodeLinkClass::ctl_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr,
bool gate_state )
{
if ( node_ptr )
{
if ( node_ptr->mtcAlive_gate != gate_state )
{
alog ("%s mtcAlive gated\n", node_ptr->hostname.c_str());
}
else
{
alog ("%s mtcAlive ungated\n", node_ptr->hostname.c_str());
node_ptr->mtcAlive_gate = gate_state ;
if ( node_ptr->mtcAlive_gate == true )
{
alog ("%s mtcAlive gate closed",
node_ptr->hostname.c_str());
}
else
{
alog ("%s mtcAlive gate open",
node_ptr->hostname.c_str());
}
}
}
}
/* Main-Function Go Enabled member Functions */
/* Main-Function Go Enabled member Functions */
void nodeLinkClass::set_goEnabled ( string & hostname )
{
nodeLinkClass::node* node_ptr ;
@ -3691,7 +3758,7 @@ void nodeLinkClass::set_uptime_refresh_ctr ( string & hostname, int value )
if ( node_ptr != NULL )
{
node_ptr->uptime_refresh_counter = value ;
}
}
}
@ -3706,7 +3773,7 @@ int nodeLinkClass::get_uptime_refresh_ctr ( string & hostname )
return (0);
}
void nodeLinkClass::set_mtce_flags ( string hostname, int flags )
void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface )
{
nodeLinkClass::node* node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
@ -3718,6 +3785,35 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags )
else
node_ptr->goEnabled = false ;
/*
* Fail the inactive controller if the sm unhealthy flag is set.
* Degrade for the active controller.
*/
if (( flags & MTC_FLAG__SM_UNHEALTHY ) &&
(( node_ptr->operState == MTC_OPER_STATE__ENABLED ) ||
( node_ptr->adminAction == MTC_ADMIN_ACTION__RECOVER )))
{
if (( hostname == CONTROLLER_0 ) || ( hostname == CONTROLLER_1 ))
{
elog ("%s reported unhealthy by SM (%s)",
hostname.c_str(),
get_iface_name_str(iface));
if ( hostname != this->my_hostname )
{
force_full_enable ( node_ptr );
}
/* no else cause because mtcAgent does nothing if this file
* is present on the active controller. */
}
else
{
slog ("%s reported unhealthy by SM ; compare error",
hostname.c_str());
}
}
/* Track host patching state by Out-Of-Band flag */
if ( flags & MTC_FLAG__PATCHING )
{
@ -6235,7 +6331,7 @@ int nodeLinkClass::availStatusChange ( struct nodeLinkClass::node * node_ptr,
{
node_ptr->mtcAlive_misses = 0 ;
node_ptr->mtcAlive_hits = 0 ;
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate ( node_ptr, false ) ;
}
/* check for need to generate power on log */
@ -8175,7 +8271,7 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
// pulse_ptr->max_count[iface]++ ;
/*
* Update storage_0_responding reference to false if storgate-0
* Update storage_0_responding reference to false if storage-0
* is found in the pulse lots list.
*/
if ( pulse_ptr->hostname == STORAGE_0 )
@ -8572,12 +8668,12 @@ void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr )
{
char str[MAX_MEM_LOG_DATA] ;
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: on:%c off:%c Cnt:%d State:%s Misses:%d\n",
snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d\n",
node_ptr->hostname.c_str(),
node_ptr->mtcAlive_online ? 'Y' : 'N',
node_ptr->mtcAlive_offline ? 'Y' : 'N',
node_ptr->mtcAlive_count,
node_ptr->mtcAlive_gate ? "gated" : "rxing",
node_ptr->mtcAlive_gate ? "closed" : "open",
node_ptr->mtcAlive_misses);
mem_log (str);
}

View File

@ -818,6 +818,10 @@ private:
void start_offline_handler ( struct nodeLinkClass::node * node_ptr );
void stop_offline_handler ( struct nodeLinkClass::node * node_ptr );
bool get_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr );
void ctl_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr, bool gate_state );
void set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface );
/*****************************************************************************
*
* Name : ipmi_command_send
@ -1701,7 +1705,7 @@ public:
#define MTC_FLAG__I_AM_HEALTHY (0x00000004)
#define MTC_FLAG__I_AM_LOCKED (0x00000008)
*/
void set_mtce_flags ( string hostname, int flags );
void set_mtce_flags ( string hostname, int flags, int iface );
/** Updates the node's health code
* Codes are found in nodeBase.h

View File

@ -356,6 +356,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
}
case MTC_CMD_STAGE__REBOOT:
{
int rc = PASS ;
bool send_reboot_ok = false ;
node_ptr->reboot_cmd_ack_mgmnt = false ;
@ -364,11 +365,13 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
/* send reboot command */
node_ptr->cmdReq = MTC_CMD_REBOOT ;
node_ptr->cmdRsp = MTC_CMD_NONE ;
plog ("%s Performing REBOOT (mgmnt network)\n", node_ptr->hostname.c_str());
if ( send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) != PASS )
if (( rc = send_mtc_cmd ( node_ptr->hostname,
MTC_CMD_REBOOT,
MGMNT_INTERFACE )) != PASS )
{
wlog ("%s REBOOT Request Failed (mgmnt network)\n",
node_ptr->hostname.c_str());
wlog ("%s reboot request failed (%s) (rc:%d)\n",
node_ptr->hostname.c_str(),
get_iface_name_str(MGMNT_INTERFACE), rc);
}
else
{
@ -377,11 +380,13 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr )
if ( clstr_network_provisioned == true )
{
plog ("%s Performing REBOOT (cluster-host network)\n", node_ptr->hostname.c_str());
if ( send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, CLSTR_INTERFACE ) != PASS )
if (( rc = send_mtc_cmd ( node_ptr->hostname,
MTC_CMD_REBOOT,
CLSTR_INTERFACE )) != PASS )
{
wlog ("%s REBOOT Request Failed (cluster-host network)\n",
node_ptr->hostname.c_str());
wlog ("%s 'reboot' request failed (%s) (rc:%d)\n",
node_ptr->hostname.c_str(),
get_iface_name_str(CLSTR_INTERFACE), rc);
}
else
{

View File

@ -62,6 +62,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
mtc_message_type msg ;
int rc = FAIL ;
ctrl_type * ctrl_ptr = get_ctrl_ptr() ;
bool log_ack = true ;
if ( interface == CLSTR_INTERFACE )
{
@ -124,6 +125,10 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
self = true ;
}
string interface_name = get_iface_name_str (interface) ;
string command_name = get_mtcNodeCommand_str(msg.cmd) ;
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), false );
/* Message version greater than zero have the hosts management
* mac address appended to the header string */
@ -133,10 +138,11 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if ( strncmp ( &msg.hdr[MSG_HEADER_SIZE-1], ctrl_ptr->macaddr.data(), MSG_HEADER_SIZE ))
{
wlog ("%s command not for this host (exp:%s det:%s) ; ignoring ...\n",
get_mtcNodeCommand_str(msg.cmd),
command_name.c_str(),
ctrl_ptr->macaddr.c_str(),
&msg.hdr[MSG_HEADER_SIZE-1]);
rc = FAIL_INVALID_DATA ;
print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), true );
return (FAIL_INVALID_DATA);
}
}
@ -150,7 +156,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
rc = PASS ;
if ( msg.cmd == MTC_REQ_MTCALIVE )
{
mlog1 ("mtcAlive request received (%s network)\n", get_iface_name_str (interface));
mlog1 ("mtcAlive request received (%s network)\n", interface_name.c_str());
return ( send_mtcAlive_msg ( sock_ptr, get_who_i_am(), interface ));
}
else if ( msg.cmd == MTC_MSG_LOCKED )
@ -158,10 +164,15 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
/* Only recreate the file if its not already present */
if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == false )
{
log_ack = true ;
ilog ("%s locked (%s)", get_hostname().c_str(), interface_name.c_str() );
daemon_log ( NODE_LOCKED_FILE,
"This node is currently in the administratively locked state" );
}
return (PASS);
else
{
log_ack = false ;
}
}
else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED_FAILED )
{
@ -193,7 +204,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else
{
ilog ("GoEnabled request posted (%s)\n",get_iface_name_str (interface));
ilog ("GoEnabled request posted (%s)\n", interface_name.c_str());
ctrl_ptr->posted_script_set.push_back ( GOENABLED_MAIN_SCRIPTS );
ctrl_ptr->posted_script_set.unique();
}
@ -220,7 +231,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else
{
ilog ("GoEnabled Subf request posted (%s)\n", get_iface_name_str (interface));
ilog ("GoEnabled Subf request posted (%s)\n", interface_name.c_str());
/* Cleanup test result flag files */
if ( daemon_is_file_present ( GOENABLED_SUBF_PASS) )
@ -241,11 +252,16 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if ( msg.cmd == MTC_CMD_REBOOT )
{
ilog ("Reboot command received (%s)\n", get_iface_name_str (interface));
ilog ("%s command received (%s)",
command_name.c_str(),
interface_name.c_str());
}
else if ( msg.cmd == MTC_CMD_LAZY_REBOOT )
{
ilog ("Lazy Reboot command received (%s) ; delay:%d seconds\n", get_iface_name_str (interface), msg.num ? msg.parm[0] : 0 );
ilog ("%s command received (%s) ; delay:%d seconds\n",
command_name.c_str(),
interface_name.c_str(),
msg.num ? msg.parm[0] : 0 );
}
else if ( is_host_services_cmd ( msg.cmd ) == true )
{
@ -258,7 +274,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
( ctrl_ptr->hostservices.monitor == msg.cmd ))
{
wlog ("%s already in progress (%d:%d)\n",
get_mtcNodeCommand_str(msg.cmd),
command_name.c_str(),
ctrl_ptr->hostservices.posted,
ctrl_ptr->hostservices.monitor );
@ -270,8 +286,8 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
ctrl_ptr->posted_script_set.unique ();
ilog ("%s request posted (%s)\n",
get_mtcNodeCommand_str(msg.cmd),
get_iface_name_str (interface));
command_name.c_str(),
interface_name.c_str());
ctrl_ptr->hostservices.posted = msg.cmd ;
ctrl_ptr->hostservices.monitor = MTC_CMD_NONE ;
@ -283,16 +299,16 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
rc = FAIL_FIT ;
wlog ("%s Start Services - fit failure (%s)\n",
get_mtcNodeCommand_str(msg.cmd),
get_iface_name_str (interface) );
command_name.c_str(),
interface_name.c_str() );
}
/* Fault insertion - fail to send host services ACK */
if ( ( daemon_is_file_present ( MTC_CMD_FIT__NO_HS_ACK )))
{
wlog ("%s Start Services - fit no ACK (%s)\n",
get_mtcNodeCommand_str(msg.cmd),
get_iface_name_str (interface) );
command_name.c_str(),
interface_name.c_str() );
return (PASS);
}
@ -312,15 +328,15 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if ( msg.cmd == MTC_CMD_WIPEDISK )
{
ilog ("Reload command received (%s)\n", get_iface_name_str (interface));
ilog ("Reload command received (%s)\n", interface_name.c_str());
}
else if ( msg.cmd == MTC_CMD_RESET )
{
ilog ("Reset command received (%s)\n", get_iface_name_str (interface));
ilog ("Reset command received (%s)\n", interface_name.c_str());
}
else if ( msg.cmd == MTC_CMD_LOOPBACK )
{
ilog ("Loopback command received (%s)\n", get_iface_name_str (interface));
ilog ("Loopback command received (%s)\n", interface_name.c_str());
}
else
{
@ -334,12 +350,12 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( msg.cmd == MTC_MSG_MAIN_GOENABLED )
{
ilog ("main function goEnabled results acknowledged (%s)\n", get_iface_name_str (interface));
ilog ("main function goEnabled results acknowledged (%s)\n", interface_name.c_str());
return (PASS);
}
else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED )
{
ilog ("sub-function goEnabled results acknowledged (%s)\n", get_iface_name_str (interface));
ilog ("sub-function goEnabled results acknowledged (%s)\n", interface_name.c_str());
return (PASS);
}
else
@ -351,7 +367,13 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
else if ( strstr ( &msg.hdr[0], get_worker_msg_header()) )
{
elog ("Unsupported Message\n");
elog ("unsupported worker message\n");
print_mtc_message ( &msg );
return PASS ;
}
else
{
elog ("unsupported message\n");
print_mtc_message ( &msg );
return PASS ;
}
@ -364,57 +386,75 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
* if ( rc == PASS )
**********************************************************/
{
rc = PASS ;
bytes = sizeof(mtc_message_type)-BUF_SIZE;
/* Fault insertion for no command ACK */
if (( interface == MGMNT_INTERFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_MGMNT_ACK )))
{
wlog ("%s reply ack message - fit bypass (%s)\n",
get_mtcNodeCommand_str(msg.cmd),
get_iface_name_str (interface) );
}
else if (( interface == CLSTR_INTERFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_CLSTR_ACK )))
{
wlog ("%s reply ack message - fit bypass (%s)\n",
get_mtcNodeCommand_str(msg.cmd),
get_iface_name_str (interface) );
}
/* Otherwise, send the message back either over the mgmnt or clstr interface */
else if ( interface == MGMNT_INTERFACE )
/* send the message back either over the mgmnt or clstr interface */
if ( interface == MGMNT_INTERFACE )
{
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
{
rc=sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes);
rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes);
if ( rc <= 0 )
{
elog ("%s reply send (mtc_client_tx_socket) failed (%s) (rc:%d)",
command_name.c_str(),
interface_name.c_str(), rc);
}
else if ( log_ack )
{
ilog ("%s reply send (%s)",
command_name.c_str(),
interface_name.c_str());
}
}
else
{
elog ("cannot send to null or failed socket (%s network)\n",
get_iface_name_str (interface) );
interface_name.c_str() );
}
}
else if ( interface == CLSTR_INTERFACE )
{
if (( sock_ptr->mtc_client_clstr_tx_socket ) &&
( sock_ptr->mtc_client_clstr_tx_socket->sock_ok() == true ))
if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true ))
{
rc = sock_ptr->mtc_client_clstr_tx_socket->write((char*)&msg.hdr[0], bytes);
rc = sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes);
if ( rc <= 0 )
{
elog ("%s reply send (mtc_client_tx_socket_c0_clstr) failed (%s) (rc:%d)",
command_name.c_str(),
interface_name.c_str(), rc);
}
else if ( log_ack )
{
ilog ("%s reply send (%s)",
command_name.c_str(),
interface_name.c_str());
}
}
else
if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true ))
{
elog ("cannot send to null or failed socket (%s network)\n",
get_iface_name_str (interface) );
rc = sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes);
if ( rc <= 0 )
{
elog ("%s reply send (mtc_client_tx_socket_c1_clstr) failed (%s) (rc:%d)",
command_name.c_str(),
interface_name.c_str(), rc);
}
else if ( log_ack )
{
ilog ("%s reply send (%s)",
command_name.c_str(),
interface_name.c_str());
}
}
}
if (rc != bytes )
{
elog ("failed to send reply message (%d)\n", rc);
}
else
{
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false );
}
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, interface_name.data(), (rc != bytes) );
/* get the shutdown delay config alue */
int delay = daemon_get_cfg_ptr()->failsafe_shutdown_delay ;
@ -427,10 +467,10 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) )
{
ilog ("Reboot - fit bypass (%s)\n", get_iface_name_str (interface));
ilog ("Reboot - fit bypass (%s)\n", interface_name.c_str());
return (PASS);
}
ilog ("Reboot (%s)\n", get_iface_name_str (interface));
ilog ("Reboot (%s)\n", interface_name.c_str());
daemon_log ( NODE_RESET_FILE, "reboot command" );
fork_sysreq_reboot ( delay );
rc = system("/usr/bin/systemctl reboot");
@ -439,7 +479,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) )
{
ilog ("Lazy Reboot - fit bypass (%s)\n", get_iface_name_str (interface));
ilog ("Lazy Reboot - fit bypass (%s)\n", interface_name.c_str());
return (PASS);
}
daemon_log ( NODE_RESET_FILE, "lazy reboot command" );
@ -447,7 +487,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
do
{
ilog ("Lazy Reboot (%s) ; rebooting in %d seconds\n", get_iface_name_str (interface), msg.num ? msg.parm[0] : 1 );
ilog ("Lazy Reboot (%s) ; rebooting in %d seconds\n", interface_name.c_str(), msg.num ? msg.parm[0] : 1 );
sleep (1);
if ( msg.parm[0] % 5 )
{
@ -458,7 +498,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else
{
ilog ("Lazy Reboot (%s) ; now\n", get_iface_name_str (interface) );
ilog ("Lazy Reboot (%s) ; now\n", interface_name.c_str() );
}
fork_sysreq_reboot ( delay );
rc = system("/usr/bin/systemctl reboot");
@ -467,10 +507,10 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
{
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_RESET ) )
{
ilog ("Reset - fit bypass (%s)\n", get_iface_name_str (interface));
ilog ("Reset - fit bypass (%s)\n", interface_name.c_str());
return (PASS);
}
ilog ("Reset 'reboot -f' (%s)\n", get_iface_name_str (interface));
ilog ("Reset 'reboot -f' (%s)\n", interface_name.c_str());
daemon_log ( NODE_RESET_FILE, "reset command" );
fork_sysreq_reboot ( delay/2 );
rc = system("/usr/bin/systemctl reboot --force");
@ -481,7 +521,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_WIPEDISK ) )
{
ilog ("Wipedisk - fit bypass (%s)\n", get_iface_name_str (interface));
ilog ("Wipedisk - fit bypass (%s)\n", interface_name.c_str());
return (PASS);
}
/* We fork a reboot as a fail safe.
@ -499,7 +539,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
else if( 0 == parent ) /* we're the child */
{
ilog ("Disk wipe in progress (%s)\n", get_iface_name_str (interface));
ilog ("Disk wipe in progress (%s)\n", interface_name.c_str());
daemon_log ( NODE_RESET_FILE, "wipedisk command" );
rc = system("/usr/local/bin/wipedisk --force");
ilog ("Disk wipe complete - Forcing Reboot ...\n");
@ -509,7 +549,6 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
}
rc = PASS ;
fflush(stdout);
}
return (rc);
}
@ -761,28 +800,56 @@ int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity )
int send_mtcAlive_msg_failed = 0 ;
int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interface )
{
mtc_message_type msg ;
msgClassSock * mtcAlive_tx_sock_ptr = NULL ;
int rc = FAIL ;
if (( interface == CLSTR_INTERFACE ) &&
( get_ctrl_ptr()->clstr_iface_provisioned != true ))
{
dlog2 ("cannot send to unprovisioned %s interface\n",
get_iface_name_str(interface) );
return (rc);
return (FAIL);
}
mtc_message_type msg ;
int bytes = create_mtcAlive_msg ( msg, MTC_MSG_MTCALIVE, identity, interface );
if ( interface == MGMNT_INTERFACE )
{
/* management interface */
mtcAlive_tx_sock_ptr = sock_ptr->mtc_client_tx_socket ;
/* Send to controller floating address */
if (( sock_ptr->mtc_client_tx_socket ) &&
( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
{
print_mtc_message ( CONTROLLER, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false );
sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes) ;
}
else
{
elog("mtc_client_tx_socket not ok");
}
}
else if ( interface == CLSTR_INTERFACE )
{
/* cluster-host interface */
mtcAlive_tx_sock_ptr = sock_ptr->mtc_client_clstr_tx_socket ;
/* Send to controller-0 cluster address */
if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true ))
{
print_mtc_message ( CONTROLLER_0, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes ) ;
}
else
{
elog("mtc_client_tx_socket_c0_clstr not ok");
}
/* Send to controller-1 cluster address */
if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) &&
( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true ))
{
print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false );
sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes ) ;
}
else
{
elog("mtc_client_tx_socket_c1_clstr not ok");
}
}
else
{
@ -791,53 +858,7 @@ int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interfa
return (FAIL_BAD_PARM);
}
if ( daemon_is_file_present ( MTC_CMD_FIT__NO_MTCALIVE ))
{
wlog ("mtcAlive - fit bypass\n");
return (PASS);
}
else
{
int bytes = create_mtcAlive_msg ( msg, MTC_MSG_MTCALIVE, identity, interface );
if (( mtcAlive_tx_sock_ptr ) &&
( mtcAlive_tx_sock_ptr->sock_ok() == true ))
{
if ((rc = mtcAlive_tx_sock_ptr->write((char*)&msg.hdr[0], bytes)) != bytes )
{
if ( rc == -1 )
{
wlog_throttled (send_mtcAlive_msg_failed, 100 ,
"failed to send <%s:%d> (%d:%m) (%s)\n",
mtcAlive_tx_sock_ptr->get_dst_str(),
mtcAlive_tx_sock_ptr->get_dst_addr()->getPort(),
errno, get_iface_name_str(interface) );
}
else
{
wlog_throttled ( send_mtcAlive_msg_failed, 100 ,
"sent only %d of %d bytes to <%s:%d> (%s)\n",
rc, bytes,
mtcAlive_tx_sock_ptr->get_dst_str(),
mtcAlive_tx_sock_ptr->get_dst_addr()->getPort(),
get_iface_name_str(interface) );
}
rc = FAIL_SOCKET_SENDTO ;
}
else
{
send_mtcAlive_msg_failed = 0 ;
print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false );
rc = PASS ;
}
}
else
{
elog ("cannot send to null or failed socket (%s network)\n",
get_iface_name_str(interface));
}
}
return (rc) ;
return (PASS) ;
}
/* Accelerated Virtual Switch 'events' socket

View File

@ -50,9 +50,6 @@ using namespace std;
int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr );
/* Throttle logging of messages from unknown IP addresses */
std::list<string> unknown_ip_list ;
/* Send specified command to the guestAgent daemon */
int send_guest_command ( string hostname, int command )
{
@ -163,6 +160,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
zero_unused_msg_buf (msg, bytes);
/* get the sender's hostname */
string hostaddr = "" ;
string hostname = "" ;
if ( iface == CLSTR_INTERFACE )
@ -175,20 +173,22 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
hostaddr = sock_ptr->mtc_agent_rx_socket->get_src_str();
hostname = obj_ptr->get_hostname ( hostaddr ) ;
}
/* lookup failed if hostname remains empty. */
if ( hostname.empty() )
{
std::list<string>::iterator iter ;
iter = std::find (unknown_ip_list.begin(), unknown_ip_list.end(), hostaddr );
if ( iter == unknown_ip_list.end() )
/* try and learn the cluster ip from a mtcAlive message. */
if (( msg.cmd == MTC_MSG_MTCALIVE ) &&
(( rc = jsonUtil_get_key_val ( &msg.buf[0], "hostname", hostname )) == PASS ))
{
mlog3 ( "Received message from unknown IP <%s>\n", hostaddr.c_str());
unknown_ip_list.push_front(hostaddr);
ilog ("%s learned from mtcAlive", hostname.c_str());
}
else
{
wlog ("unknown hostname message ... dropping" ); /* make dlog */
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), true );
return (FAIL_GET_HOSTNAME);
}
return (FAIL_NOT_FOUND);
}
else if ( ! hostaddr.empty() )
{
unknown_ip_list.remove (hostaddr);
}
print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false );
@ -244,6 +244,26 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
else if ( strstr ( &msg.hdr[0], get_cmd_rsp_msg_header() ) )
{
obj_ptr->set_cmd_resp ( hostname , msg ) ;
if ( msg.num > 0 )
{
if (( msg.cmd != MTC_MSG_LOCKED ) &&
( msg.cmd != MTC_CMD_HOST_SVCS_RESULT ))
{
ilog ("%s '%s' ACK (rc:%d) (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(msg.cmd),
msg.parm[0],
get_iface_name_str(iface));
}
else
{
mlog ("%s '%s' ACK (rc:%d) (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(msg.cmd),
msg.parm[0],
get_iface_name_str(iface));
}
}
}
/*
@ -267,30 +287,35 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
wlog ("%s failed to load functions from mtcAlive message\n", hostname.c_str());
return (FAIL_NODETYPE);
}
if ( obj_ptr->clstr_network_provisioned == true )
{
string cluster_host_ip = "";
/* Get the clstr ip address if it is provisioned */
rc = jsonUtil_get_key_val ( &msg.buf[0], "cluster_host_ip", cluster_host_ip );
if ( rc == PASS )
{
obj_ptr->set_clstr_hostaddr ( hostname, cluster_host_ip );
}
else
{
wlog ("%s missing 'cluster_host_ip' value (rc:%d)\n", hostname.c_str(), rc);
}
}
obj_ptr->set_uptime ( hostname , msg.parm[MTC_PARM_UPTIME_IDX], false );
obj_ptr->set_health ( hostname , msg.parm[MTC_PARM_HEALTH_IDX] );
obj_ptr->set_mtce_flags ( hostname , msg.parm[MTC_PARM_FLAGS_IDX] );
obj_ptr->set_mtce_flags ( hostname , msg.parm[MTC_PARM_FLAGS_IDX], iface );
obj_ptr->set_mtcAlive ( hostname, iface );
mlog1("%s Uptime:%d Health:%d Flags:0x%x mtcAlive:%s\n",
mlog1("%s Uptime:%d Health:%d Flags:0x%x mtcAlive:%s (%s)\n",
hostname.c_str(),
msg.parm[MTC_PARM_UPTIME_IDX],
msg.parm[MTC_PARM_HEALTH_IDX],
msg.parm[MTC_PARM_FLAGS_IDX],
obj_ptr->get_mtcAlive_gate ( hostname ) ? "gated" : "open");
obj_ptr->get_mtcAlive_gate ( hostname ) ? "gated" : "open",
get_iface_name_str(iface));
string cluster_host_ip = "";
/* Get the clstr ip address if it is provisioned */
rc = jsonUtil_get_key_val ( &msg.buf[0], "cluster_host_ip", cluster_host_ip );
if ( rc == PASS )
{
obj_ptr->set_clstr_hostaddr ( hostname, cluster_host_ip );
}
else
{
mlog ("%s null or missing 'cluster_host_ip' value (rc:%d)\n", hostname.c_str(), rc);
}
}
else if ( msg.cmd == MTC_MSG_MAIN_GOENABLED )
{
@ -546,19 +571,6 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr,
wlog ( "Received unsupported or badly formed message\n" );
}
/* Only do this if the debug level is appropriate */
if ( daemon_get_cfg_ptr()->debug_msg )
{
int count = 0 ;
std::list<string>::iterator iter ;
for ( iter = unknown_ip_list.begin () ;
iter != unknown_ip_list.end () ;
iter++ )
{
count++ ;
mlog3 ("Unknown IP [%d]:%s\n", count, iter->c_str());
}
}
return (rc);
}
@ -667,55 +679,56 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface )
{
int bytes = 0;
/* Temporarily get IP from node inventory till dns is available */
nodeLinkClass * obj_ptr = get_mtcInv_ptr ();
/* add the mac address of the target card to the header
* Note: the minus 1 is to overwqrite the null */
* Note: the minus 1 is to overwrite the null */
snprintf ( &mtc_cmd.hdr[MSG_HEADER_SIZE-1], MSG_HEADER_SIZE, "%s", obj_ptr->get_hostIfaceMac(hostname, MGMNT_IFACE).data());
/* Lets add the controller's floating ip in the buffer so hat he host knowns where to reply */
snprintf ( &mtc_cmd.buf[0], obj_ptr->my_float_ip.length()+1, "%s", obj_ptr->my_float_ip.data());
/* only send the minimum amount of data */
bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(obj_ptr->my_float_ip.length()+1))) ;
string data = "{\"address\":\"";
data.append(obj_ptr->my_float_ip) ;
data.append("\",\"interface\":\"");
data.append(get_iface_name_str(interface));
data.append("\"}");
snprintf ( &mtc_cmd.buf[0], data.length()+1, "%s", data.data());
bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(data.length()+1)));
print_mtc_message ( hostname, MTC_CMD_TX, mtc_cmd, get_iface_name_str(interface), force ) ;
if (interface == MGMNT_INTERFACE)
{
string hostaddr = obj_ptr->get_hostaddr(hostname);
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__INVALIDATE_MGMNT_IP, hostname ) )
hostaddr = "none" ;
#endif
if ( hostUtil_is_valid_ip_addr ( hostaddr ) != true )
{
wlog("%s has no management IP assigned\n", hostname.c_str());
wlog("%s has invalid management addr '%s'\n",
hostname.c_str(),
hostaddr.c_str());
return (FAIL_HOSTADDR_LOOKUP);
}
/* rc = message size */
rc = sock_ptr->mtc_agent_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_cmd_port);
mlog ("%s sending %s request to %s (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
hostaddr.c_str(),
get_iface_name_str(interface));
rc = sock_ptr->mtc_agent_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_mgmnt_cmd_port);
}
else if ((interface == CLSTR_INTERFACE) &&
( obj_ptr->clstr_network_provisioned == true ) &&
( sock_ptr->mtc_agent_clstr_tx_socket != NULL ))
{
/* SETUP TX -> COMPUTE SOCKET CLSTR INTERFACE */
string clstr_hostaddr = obj_ptr->get_clstr_hostaddr(hostname);
#ifdef WANT_FIT_TESTING
if ( daemon_want_fit ( FIT_CODE__INVALIDATE_CLSTR_IP, hostname ) )
clstr_hostaddr = "none" ;
#endif
if ( hostUtil_is_valid_ip_addr( clstr_hostaddr ) != true )
{
return (FAIL_NO_CLSTR_PROV);
}
rc = sock_ptr->mtc_agent_clstr_tx_socket->write((char *)&mtc_cmd, bytes, clstr_hostaddr.c_str(), sock_ptr->mtc_cmd_port);
mlog ("%s sending %s request to %s (%s)",
hostname.c_str(),
get_mtcNodeCommand_str(cmd),
clstr_hostaddr.c_str(),
get_iface_name_str(interface));
rc = sock_ptr->mtc_agent_clstr_tx_socket->write((char *)&mtc_cmd, bytes, clstr_hostaddr.c_str(), sock_ptr->mtc_clstr_cmd_port);
}
if ( 0 > rc )

View File

@ -171,12 +171,17 @@ void _close_mgmnt_tx_socket ( void )
}
}
void _close_clstr_tx_socket ( void )
void _close_clstr_tx_sockets ( void )
{
if (mtc_sock.mtc_client_clstr_tx_socket)
if (mtc_sock.mtc_client_tx_socket_c0_clstr)
{
delete (mtc_sock.mtc_client_clstr_tx_socket);
mtc_sock.mtc_client_clstr_tx_socket = 0 ;
delete (mtc_sock.mtc_client_tx_socket_c0_clstr);
mtc_sock.mtc_client_tx_socket_c0_clstr = 0 ;
}
if (mtc_sock.mtc_client_tx_socket_c1_clstr)
{
delete (mtc_sock.mtc_client_tx_socket_c1_clstr);
mtc_sock.mtc_client_tx_socket_c1_clstr = 0 ;
}
}
@ -196,7 +201,7 @@ void daemon_exit ( void )
_close_mgmnt_rx_socket ();
_close_clstr_rx_socket ();
_close_mgmnt_tx_socket ();
_close_clstr_tx_socket ();
_close_clstr_tx_sockets();
_close_amon_sock ();
exit (0) ;
@ -214,13 +219,18 @@ static int mtc_config_handler ( void * user,
if (MATCH("agent", "mtc_agent_port"))
{
config_ptr->mtc_agent_port = atoi(value);
config_ptr->mask |= CONFIG_AGENT_PORT ;
config_ptr->mask |= CONFIG_AGENT_MTC_MGMNT_PORT ;
}
else if (MATCH("client", "mtc_rx_mgmnt_port"))
{
config_ptr->mtc_rx_mgmnt_port = atoi(value);
config_ptr->mask |= CONFIG_CLIENT_MTC_MGMNT_PORT ;
}
else if (MATCH("client", "mtc_rx_clstr_port"))
{
config_ptr->mtc_rx_clstr_port = atoi(value);
config_ptr->mask |= CONFIG_CLIENT_MTC_CLSTR_PORT ;
}
else if (MATCH("timeouts", "failsafe_shutdown_delay"))
{
config_ptr->failsafe_shutdown_delay = atoi(value);
@ -289,10 +299,9 @@ void setup_mgmnt_rx_socket ( void )
ilog("Mgmnt iface : %s\n", ctrl.mgmnt_iface.c_str() );
get_iface_macaddr ( ctrl.mgmnt_iface.data(), ctrl.macaddr );
get_iface_address ( ctrl.mgmnt_iface.data(), ctrl.address , true );
get_hostname ( &ctrl.hostname[0], MAX_HOST_NAME_SIZE );
_close_mgmnt_rx_socket ();
mtc_sock.mtc_client_rx_socket = new msgClassRx(ctrl.address.c_str(),mtc_sock.mtc_cmd_port, IPPROTO_UDP, ctrl.mgmnt_iface.data(), false );
mtc_sock.mtc_client_rx_socket = new msgClassRx(ctrl.address.c_str(),mtc_sock.mtc_mgmnt_cmd_port, IPPROTO_UDP, ctrl.mgmnt_iface.data(), false );
/* update health of socket */
if ( mtc_sock.mtc_client_rx_socket )
@ -328,12 +337,13 @@ void setup_clstr_rx_socket ( void )
* calls daemon_get_iface_master inside so the
* aggrigated name is returned if it exists */
get_clstr_iface (&mtc_config.clstr_iface );
if ( strlen(mtc_config.clstr_iface) )
ctrl.clstr_iface = mtc_config.clstr_iface ;
if ( !ctrl.clstr_iface.empty())
{
/* Only get the cluster-host network address if it is provisioned */
if ( get_iface_address ( mtc_config.clstr_iface, ctrl.address_clstr, false ) == PASS )
if ( get_iface_address ( ctrl.clstr_iface.data(), ctrl.address_clstr, false ) == PASS )
{
ilog ("Cluster-host iface : %s\n", mtc_config.clstr_iface );
ilog ("Cluster-host iface : %s\n", ctrl.clstr_iface.c_str());
ilog ("Cluster-host addr : %s\n", ctrl.address_clstr.c_str());
}
}
@ -342,7 +352,7 @@ void setup_clstr_rx_socket ( void )
_close_clstr_rx_socket ();
/* Only set up the socket if an cluster-host interface is provisioned */
mtc_sock.mtc_client_clstr_rx_socket = new msgClassRx(ctrl.address_clstr.c_str(),mtc_sock.mtc_cmd_port, IPPROTO_UDP, ctrl.clstr_iface.data(), false );
mtc_sock.mtc_client_clstr_rx_socket = new msgClassRx(ctrl.address_clstr.c_str(),mtc_sock.mtc_clstr_cmd_port, IPPROTO_UDP, ctrl.clstr_iface.data(), false );
/* update health of socket */
if ( mtc_sock.mtc_client_clstr_rx_socket )
@ -390,32 +400,60 @@ void setup_mgmnt_tx_socket ( void )
}
}
void setup_clstr_tx_socket ( void )
void setup_clstr_tx_sockets ( void )
{
if ( ctrl.clstr_iface_provisioned == false )
{
return ;
}
dlog ("setup of cluster-host TX\n");
_close_clstr_tx_socket ();
mtc_sock.mtc_client_clstr_tx_socket = new msgClassTx(CONTROLLER_NFS,mtc_sock.mtc_agent_port, IPPROTO_UDP, mtc_config.clstr_iface);
dlog ("setup of %s TX\n", CONTROLLER_0_CLUSTER_HOST);
if ( mtc_sock.mtc_client_clstr_tx_socket )
_close_clstr_tx_sockets ();
mtc_sock.mtc_client_tx_socket_c0_clstr =
new msgClassTx(CONTROLLER_0_CLUSTER_HOST,
mtc_sock.mtc_agent_port,
IPPROTO_UDP,
mtc_config.clstr_iface);
if ( mtc_sock.mtc_client_tx_socket_c0_clstr )
{
/* look for fault insertion request */
if ( daemon_is_file_present ( MTC_CMD_FIT__CLSTR_TXSOCK ) )
mtc_sock.mtc_client_clstr_tx_socket->return_status = FAIL ;
if ( mtc_sock.mtc_client_clstr_tx_socket->return_status == PASS )
if ( mtc_sock.mtc_client_tx_socket_c0_clstr->return_status == PASS )
{
mtc_sock.mtc_client_clstr_tx_socket->sock_ok(true);
mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok(true);
}
else
{
elog ("failed to init 'cluster-host tx' socket (rc:%d)\n",
mtc_sock.mtc_client_clstr_tx_socket->return_status );
mtc_sock.mtc_client_clstr_tx_socket->sock_ok(false);
elog ("failed to init '%s' tx socket (rc:%d)\n",
CONTROLLER_0_CLUSTER_HOST,
mtc_sock.mtc_client_tx_socket_c0_clstr->return_status );
mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok(false);
}
}
if ( ctrl.system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX )
{
dlog ("setup of %s TX\n", CONTROLLER_1_CLUSTER_HOST);
mtc_sock.mtc_client_tx_socket_c1_clstr =
new msgClassTx(CONTROLLER_1_CLUSTER_HOST,
mtc_sock.mtc_agent_port,
IPPROTO_UDP,
mtc_config.clstr_iface);
if ( mtc_sock.mtc_client_tx_socket_c1_clstr )
{
if ( mtc_sock.mtc_client_tx_socket_c1_clstr->return_status == PASS )
{
mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok(true);
}
else
{
elog ("failed to init '%s' tx socket (rc:%d)\n",
CONTROLLER_0_CLUSTER_HOST,
mtc_sock.mtc_client_tx_socket_c1_clstr->return_status );
mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok(false);
}
}
}
}
@ -463,7 +501,7 @@ void setup_amon_socket ( void )
* 1. Unicast receive socket mgmnt (mtc_client_rx_socket)
* 2. Unicast receive socket clstr (mtc_client_clstr_rx_socket)
* 3. Unicast transmit socket mgmnt (mtc_client_tx_socket)
* 4. Unicast transmit socket clstr (mtc_client_clstr_tx_socket)
* 4. Unicast transmit socket clstr (mtc_client_tx_socket_c?_clstr)
*
* 5. socket for pmond acive monitoring
*
@ -473,8 +511,10 @@ int mtc_socket_init ( void )
/* Setup the Management Interface Recieve Socket */
/* Read the port config strings into the socket struct */
mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port;
mtc_sock.mtc_cmd_port = mtc_config.mtc_rx_mgmnt_port;
mtc_sock.mtc_mgmnt_cmd_port = mtc_config.mtc_rx_mgmnt_port;
mtc_sock.mtc_clstr_cmd_port = mtc_config.mtc_rx_clstr_port;
get_hostname ( &ctrl.hostname[0], MAX_HOST_NAME_SIZE );
ctrl.mtcAgent_ip = getipbyname ( CONTROLLER );
ilog ("Controller : %s\n", ctrl.mtcAgent_ip.c_str());
@ -489,8 +529,8 @@ int mtc_socket_init ( void )
setup_mgmnt_tx_socket ();
/* Manage Cluster-host network setup */
string clstr_iface_name = daemon_clstr_iface();
string mgmnt_iface_name = daemon_mgmnt_iface();
string clstr_iface_name = daemon_clstr_iface();
if ( !clstr_iface_name.empty() )
{
if ( clstr_iface_name != mgmnt_iface_name )
@ -504,7 +544,7 @@ int mtc_socket_init ( void )
/*************************************************************/
/* Setup the Clstr Interface Transmit Messaging to mtcAgent */
/*************************************************************/
setup_clstr_tx_socket () ;
setup_clstr_tx_sockets () ;
}
}
@ -1225,8 +1265,8 @@ void daemon_service_run ( void )
if (( mtc_sock.mtc_client_rx_socket == NULL ) ||
( mtc_sock.mtc_client_rx_socket->sock_ok() == false ))
{
setup_mgmnt_rx_socket();
wlog ("calling setup_mgmnt_rx_socket (auto-recovery)\n");
setup_mgmnt_rx_socket();
socket_reinit = true ;
}
@ -1234,8 +1274,8 @@ void daemon_service_run ( void )
else if (( mtc_sock.mtc_client_tx_socket == NULL ) ||
( mtc_sock.mtc_client_tx_socket->sock_ok() == false ))
{
setup_mgmnt_tx_socket();
wlog ("calling setup_mgmnt_tx_socket\n");
setup_mgmnt_tx_socket();
socket_reinit = true ;
}
@ -1244,18 +1284,20 @@ void daemon_service_run ( void )
(( mtc_sock.mtc_client_clstr_rx_socket == NULL ) ||
( mtc_sock.mtc_client_clstr_rx_socket->sock_ok() == false )))
{
setup_clstr_rx_socket();
wlog ("calling setup_clstr_rx_socket (auto-recovery)\n");
setup_clstr_rx_socket();
socket_reinit = true ;
}
/* Clstr Tx */
else if (( ctrl.clstr_iface_provisioned == true ) &&
(( mtc_sock.mtc_client_clstr_tx_socket == NULL ) ||
( mtc_sock.mtc_client_clstr_tx_socket->sock_ok() == false )))
(( mtc_sock.mtc_client_tx_socket_c0_clstr == NULL ) ||
( mtc_sock.mtc_client_tx_socket_c1_clstr == NULL ) ||
( mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok() == false ) ||
( mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok() == false )))
{
setup_clstr_tx_socket();
wlog ("calling setup_clstr_tx_socket (auto-recovery)\n");
wlog ("calling setup_clstr_tx_sockets (auto-recovery)\n");
setup_clstr_tx_sockets();
socket_reinit = true ;
}
@ -1311,18 +1353,14 @@ void daemon_service_run ( void )
if ( daemon_is_file_present ( MTC_CMD_FIT__CLSTR_RXSOCK ))
{
if ( mtc_sock.mtc_client_clstr_rx_socket )
{
mtc_sock.mtc_client_clstr_rx_socket->sock_ok (false);
_close_clstr_rx_socket ();
}
}
if ( daemon_is_file_present ( MTC_CMD_FIT__CLSTR_TXSOCK ))
{
if ( mtc_sock.mtc_client_clstr_tx_socket )
{
mtc_sock.mtc_client_clstr_tx_socket->sock_ok (false);
_close_clstr_tx_socket ();
}
if ( mtc_sock.mtc_client_tx_socket_c0_clstr )
mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok (false);
if ( mtc_sock.mtc_client_tx_socket_c1_clstr )
mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok (false);
}
if ( daemon_is_file_present ( MTC_CMD_FIT__AMON_SOCK ))
{

View File

@ -18,8 +18,9 @@
#include <unistd.h>
/** Compute Config mask */
#define CONFIG_CLIENT_MASK (CONFIG_AGENT_PORT |\
CONFIG_CLIENT_MTC_MGMNT_PORT)
#define CONFIG_CLIENT_MASK (CONFIG_AGENT_MTC_MGMNT_PORT |\
CONFIG_CLIENT_MTC_MGMNT_PORT |\
CONFIG_CLIENT_MTC_CLSTR_PORT)
#define MAX_RUN_SCRIPTS (20)

View File

@ -155,18 +155,6 @@ void daemon_exit ( void )
if (mtc_sock.mtc_agent_tx_socket)
delete (mtc_sock.mtc_agent_tx_socket);
if (mtc_sock.mtc_client_rx_socket)
delete(mtc_sock.mtc_client_rx_socket);
if (mtc_sock.mtc_client_tx_socket)
delete (mtc_sock.mtc_client_tx_socket);
if (mtc_sock.mtc_client_clstr_rx_socket)
delete (mtc_sock.mtc_client_clstr_rx_socket);
if (mtc_sock.mtc_client_clstr_tx_socket)
delete (mtc_sock.mtc_client_clstr_tx_socket);
if (mtc_sock.mtc_event_rx_sock)
delete (mtc_sock.mtc_event_rx_sock);
@ -191,7 +179,8 @@ void daemon_exit ( void )
/** Control Config Mask */
#define CONFIG_AGENT_MASK (CONFIG_AGENT_PORT |\
#define CONFIG_AGENT_MASK (CONFIG_AGENT_MTC_MGMNT_PORT |\
CONFIG_CLIENT_MTC_CLSTR_PORT |\
CONFIG_MTC_TO_HBS_CMD_PORT |\
CONFIG_MTC_TO_HWMON_CMD_PORT |\
CONFIG_HBS_TO_MTC_EVENT_PORT |\
@ -201,7 +190,7 @@ void daemon_exit ( void )
CONFIG_AGENT_LOC_TIMEOUT |\
CONFIG_AGENT_INV_EVENT_PORT |\
CONFIG_AGENT_API_RETRIES |\
CONFIG_CLIENT_PORT)
CONFIG_CLIENT_MTC_MGMNT_PORT)
static int mtc_nfvi_handler ( void * user,
const char * section,
@ -250,7 +239,7 @@ static int mtc_config_handler ( void * user,
else if (MATCH("agent", "mtc_agent_port"))
{
config_ptr->mtc_agent_port = atoi(value);
config_ptr->mask |= CONFIG_AGENT_PORT ;
config_ptr->mask |= CONFIG_AGENT_MTC_MGMNT_PORT ;
}
else if (MATCH("agent", "mtc_to_hbs_cmd_port"))
{
@ -279,7 +268,12 @@ static int mtc_config_handler ( void * user,
else if (MATCH("client", "mtc_rx_mgmnt_port"))
{
config_ptr->cmd_port = atoi(value);
config_ptr->mask |= CONFIG_CLIENT_PORT ;
config_ptr->mask |= CONFIG_CLIENT_MTC_MGMNT_PORT ;
}
else if (MATCH("client", "mtc_rx_clstr_port"))
{
config_ptr->mtc_rx_clstr_port = atoi(value);
config_ptr->mask |= CONFIG_CLIENT_MTC_CLSTR_PORT ;
}
else if (MATCH("agent", "token_refresh_rate"))
{
@ -639,6 +633,7 @@ int daemon_configure ( void )
else
{
mtcInv.clstr_network_provisioned = true ;
ilog ("Cluster network is provisioned" );
}
}
@ -697,11 +692,11 @@ int mtc_socket_init ( void )
/* Read the port config strings into the socket struct */
mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port;
mtc_sock.mtc_cmd_port = mtc_config.cmd_port;
mtc_sock.mtc_mgmnt_cmd_port = mtc_config.cmd_port;
/* create transmit socket */
msgClassAddr::getAddressFromInterface(mtc_config.mgmnt_iface, ip_address, INET6_ADDRSTRLEN);
sock_ptr->mtc_agent_tx_socket = new msgClassTx(ip_address, mtc_config.mtc_agent_port, IPPROTO_UDP, mtc_config.mgmnt_iface);
sock_ptr->mtc_agent_tx_socket = new msgClassTx(ip_address, mtc_sock.mtc_mgmnt_cmd_port, IPPROTO_UDP, mtc_config.mgmnt_iface);
rc = sock_ptr->mtc_agent_tx_socket->return_status;
if(rc != PASS)
{
@ -714,9 +709,12 @@ int mtc_socket_init ( void )
/***********************************************************/
if ( strlen( mtc_config.clstr_iface ) )
{
sock_ptr->mtc_clstr_cmd_port = mtc_config.mtc_rx_clstr_port;
/* create clstr transmit socket only if the interface is provisioned */
msgClassAddr::getAddressFromInterface(mtc_config.clstr_iface, ip_address, INET6_ADDRSTRLEN);
sock_ptr->mtc_agent_clstr_tx_socket = new msgClassTx(ip_address, mtc_config.mtc_agent_port, IPPROTO_UDP, mtc_config.clstr_iface);
sock_ptr->mtc_agent_clstr_tx_socket = new msgClassTx(ip_address, mtc_sock.mtc_clstr_cmd_port, IPPROTO_UDP, mtc_config.clstr_iface);
rc = sock_ptr->mtc_agent_clstr_tx_socket->return_status;
if(rc != PASS)
{
@ -778,8 +776,17 @@ int mtc_socket_init ( void )
if ( mtcInv.clstr_network_provisioned == true )
{
sock_ptr->mtc_agent_clstr_rx_socket =
new msgClassRx(CONTROLLER_NFS, sock_ptr->mtc_agent_port, IPPROTO_UDP );
if ( mtcInv.my_hostname == CONTROLLER_0 )
{
sock_ptr->mtc_agent_clstr_rx_socket =
new msgClassRx(CONTROLLER_0_CLUSTER_HOST, sock_ptr->mtc_agent_port, IPPROTO_UDP );
}
else
{
sock_ptr->mtc_agent_clstr_rx_socket =
new msgClassRx(CONTROLLER_1_CLUSTER_HOST, sock_ptr->mtc_agent_port, IPPROTO_UDP );
}
if (( sock_ptr->mtc_agent_clstr_rx_socket == NULL ) ||
( sock_ptr->mtc_agent_clstr_rx_socket->return_status ))
{

View File

@ -773,7 +773,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->mtce_flags = 0 ;
/* Assert the mtc alive gate */
node_ptr->mtcAlive_gate = true ;
this->ctl_mtcAlive_gate ( node_ptr, true ) ;
node_ptr->mtcAlive_online = false ;
node_ptr->mtcAlive_offline = true ;
@ -886,9 +886,9 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
* in the reboot recovery phase now. Look for the mtcAlive */
/* In self-enable we don't need to purge mtcAlive just need
* to wait for one more. Assum,e offline, not online and open
* to wait for one more. Assume offline, not online and open
* the mtcAlive gate. */
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate ( node_ptr, false ) ;
node_ptr->mtcAlive_online = false ;
node_ptr->mtcAlive_offline = true ;
/* set mtcAlive timeout */
@ -1053,7 +1053,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
if ( node_ptr->mtcAlive_purge >= 20 )
{
/* open gate */
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate ( node_ptr, false ) ;
node_ptr->mtcAlive_purge = 0 ;
/* timer is started ok so we can do the stage transition */
@ -1173,12 +1173,12 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
break ;
}
else if ( node_ptr->mtcAlive_gate == true )
else if ( this->get_mtcAlive_gate (node_ptr) == true )
{
slog ("%s mtcAlive gate unexpectedly set, correcting ...\n",
node_ptr->hostname.c_str());
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate ( node_ptr, false ) ;
}
/* wait some more */
@ -1628,7 +1628,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
/* Purge this hosts work queues */
mtcCmd_workQ_purge ( node_ptr );
mtcCmd_doneQ_purge ( node_ptr );
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate ( node_ptr, false );
node_ptr->http_retries_cur = 0 ;
node_ptr->unknown_health_reported = false ;
@ -1648,13 +1648,6 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
/* Disable the heartbeat service for Graceful Recovery */
send_hbs_command ( node_ptr->hostname, MTC_CMD_STOP_HOST );
/* Clear the minor and failure flags if it is set for this host */
for ( int iface = 0 ; iface < MAX_IFACES ; iface++ )
{
hbs_minor_clear ( node_ptr, (iface_enum)iface );
node_ptr->heartbeat_failed[iface] = false ;
}
/* Have we reached the maximum allowed fast recovery attempts.
*
* If we have then force the full enable by
@ -1664,10 +1657,10 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
*/
if ( ++node_ptr->graceful_recovery_counter > MTC_MAX_FAST_ENABLES )
{
/* gate off further mtcAlive messaging timme the offline
* handler runs. This prevents stale messages from making it
* in and prolong the offline detection time */
node_ptr->mtcAlive_gate = true ;
/* gate off further mtcAlive messaging timme the offline
* handler runs. This prevents stale messages from making it
* in and prolong the offline detection time */
this->ctl_mtcAlive_gate ( node_ptr, true ) ;
elog ("%s Graceful Recovery Failed (retries=%d)\n",
node_ptr->hostname.c_str(), node_ptr->graceful_recovery_counter );
@ -2114,12 +2107,12 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
availStatusChange ( node_ptr, MTC_AVAIL_STATUS__OFFLINE );
}
}
else if ( node_ptr->mtcAlive_gate == true )
else if ( this->get_mtcAlive_gate ( node_ptr ) == true )
{
slog ("%s mtcAlive gate unexpectedly set, auto-correcting ...\n",
node_ptr->hostname.c_str());
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate ( node_ptr, false ) ;
}
/* wait some more */
@ -2454,6 +2447,12 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
mtcTimer_reset ( node_ptr->mtcTimer );
}
for ( int iface = 0 ; iface < MAX_IFACES ; iface++ )
{
hbs_minor_clear ( node_ptr, (iface_enum)iface );
node_ptr->heartbeat_failed[iface] = false ;
}
/* Enable the heartbeat service for Graceful Recovery */
send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
@ -3097,7 +3096,7 @@ int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr )
}
/* open the mtcAlive gate while we are disabled */
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate ( node_ptr, false ) ;
disableStageChange( node_ptr, MTC_DISABLE__START );
adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE );
@ -3240,7 +3239,7 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
operState_enum_to_str(node_ptr->operState).c_str(),
availStatus_enum_to_str(node_ptr->availStatus).c_str());
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate ( node_ptr, false ) ;
node_ptr->mtcAlive_mgmnt = false ;
node_ptr->mtcAlive_clstr = false ;
@ -3261,7 +3260,7 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr )
case MTC_OFFLINE__WAIT:
{
/* be sure the mtcAlive gate is open */
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate (node_ptr, false ) ;
if ( mtcTimer_expired ( node_ptr->offline_timer ) == true )
{
if ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE )
@ -3369,12 +3368,12 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
node_ptr->hostname.c_str(),
node_ptr->onlineStage );
if ( node_ptr->mtcAlive_gate == true )
if ( this->get_mtcAlive_gate ( node_ptr ) == true )
{
alog ("%s mtcAlive gate unexpectedly set, correcting ...\n",
node_ptr->hostname.c_str());
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate (node_ptr, false ) ;
}
/* Start with a zero count. This counter is incremented every
@ -3475,7 +3474,8 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
/* ... keep the 'host locked' file on this host refreshed while in the locked state
* ... send it on both interfaces just in case */
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE );
// send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, INFRA_INTERFACE );
if ( clstr_network_provisioned )
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, CLSTR_INTERFACE );
}
/* Start over */
@ -6106,7 +6106,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST );
}
node_ptr->mtcAlive_gate = false ;
this->ctl_mtcAlive_gate(node_ptr, false) ;
node_ptr->addStage = MTC_ADD__DONE ;
break;
}
@ -6522,6 +6522,11 @@ int nodeLinkClass::oos_test_handler ( struct nodeLinkClass::node * node_ptr )
/* Tell the host that it is locked */
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE);
if ( clstr_network_provisioned )
{
ilog ("%s Sending Lock Cluster", node_ptr->hostname.c_str() );
send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, CLSTR_INTERFACE );
}
}
break ;
@ -6668,26 +6673,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
send_hbs_command ( this->my_hostname, MTC_CMD_ACTIVE_CTRL );
}
/* Manage active controller auto recovery bool.
* If the inactive controller is inservice then disable
* controller autorecovery. Otherwise enable it but in this case
* don't change the disable bool as that is used to gate auto
* recovery once the threshoild is reached */
// if ( is_controller ( node_ptr ) && NOT_THIS_HOST )
// {
// if (( node_ptr->ar_disabled == false ) &&
// ( node_ptr->operState == MTC_OPER_STATE__ENABLED ))
// {
// autorecovery_clear ( CONTROLLER_0 );
// autorecovery_clear ( CONTROLLER_1 );
// }
//else if (( node_ptr->ar_disabled == true ) &&
// ( node_ptr->operState != MTC_OPER_STATE__ENABLED ))
//{
// node_ptr->ar_disabled = false ;
//}
// }
/* Monitor the health of the host - no pass file */
if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
( node_ptr->operState == MTC_OPER_STATE__ENABLED ))

View File

@ -74,15 +74,15 @@ typedef struct
int mtc_agent_clstr_rx_socket_size ;
/** UDP sockets used by the mtcClient to receive maintenance
* commands from and transmit replies to the mtcAgent */
msgClassSock* mtc_client_rx_socket ; /**< rx from controller */
msgClassSock* mtc_client_tx_socket ; /**< tx to controller mgmnt */
msgClassSock* mtc_client_clstr_tx_socket ; /**< tx to controller clstr */
msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */
int mtc_cmd_port ; /**< mtc command port number */
struct sockaddr_in mtc_cmd_addr ; /**< socket attributes mgmnt */
* commands from and transmit replies to the mtcAgent */
msgClassSock* mtc_client_rx_socket ; /**< rx from controller */
msgClassSock* mtc_client_tx_socket ; /**< tx to controller mgmnt */
msgClassSock* mtc_client_tx_socket_c0_clstr ; /**< tx to controller-0 clstr i/f */
msgClassSock* mtc_client_tx_socket_c1_clstr ; /**< tx to controller-1 clstr i/f */
msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */
int mtc_mgmnt_cmd_port ; /**< mtc command port mgmnt i/f */
int mtc_clstr_cmd_port ; /**< mtc command port clstr i/f */
struct sockaddr_in mtc_cmd_addr ; /**< socket attributes mgmnt */
/***************************************************************/