diff --git a/mtce-common/src/common/nodeBase.cpp b/mtce-common/src/common/nodeBase.cpp index e8383abb..c333656b 100755 --- a/mtce-common/src/common/nodeBase.cpp +++ b/mtce-common/src/common/nodeBase.cpp @@ -265,6 +265,14 @@ void print_mtc_message ( string hostname, iface, msg.hdr); } + else if (( daemon_get_cfg_ptr()->debug_alive&1) && ( msg.cmd == MTC_MSG_MTCALIVE )) + { + alog ("%s %s (%s network) - %s\n", + hostname.c_str(), + direction ? "rx <-" : "tx ->" , + iface, + msg.hdr); + } else { mlog1 ("%s %s (%s network) - %s\n", @@ -276,7 +284,7 @@ void print_mtc_message ( string hostname, return ; } - string str = "-" ; + string str = "" ; if ( msg.buf[0] ) str = msg.buf ; if ( force ) diff --git a/mtce-common/src/common/nodeBase.h b/mtce-common/src/common/nodeBase.h index 94c80ca0..ff6398cd 100755 --- a/mtce-common/src/common/nodeBase.h +++ b/mtce-common/src/common/nodeBase.h @@ -393,6 +393,8 @@ void daemon_exit ( void ); /* This label will resolve to an IP on the management network */ #define CONTROLLER_NFS ((const char *)"controller-nfs") +#define CONTROLLER_0_CLUSTER_HOST ((const char *)"controller-0-cluster-host") +#define CONTROLLER_1_CLUSTER_HOST ((const char *)"controller-1-cluster-host") /* Maintenance Daemon Services - actual names of the daemons */ /* ... controller only service / daemons */ diff --git a/mtce-common/src/common/nodeUtil.cpp b/mtce-common/src/common/nodeUtil.cpp index e69dbe61..e0f3003a 100755 --- a/mtce-common/src/common/nodeUtil.cpp +++ b/mtce-common/src/common/nodeUtil.cpp @@ -699,7 +699,7 @@ int get_hostname ( char * hostname_ptr, int max_len ) rc = gethostname(hostname_ptr, max_len ); if ( rc == PASS ) { - ilog ("Hostname : %s\n", hostname_ptr); + ilog ("%s", hostname_ptr); } else { @@ -751,7 +751,7 @@ int get_iface_address ( const char * iface_ptr, string & ip_addr , bool retry ) if ( rc == PASS ) { ip_addr = ip_cstr; - dlog ("IP Address : %s\n", ip_addr.c_str() ); + ilog ("%s %s\n", iface_ptr, ip_addr.c_str()); } else { diff --git a/mtce-common/src/daemon/daemon_common.h b/mtce-common/src/daemon/daemon_common.h index 8d7d1044..9ea16339 100755 --- a/mtce-common/src/daemon/daemon_common.h +++ b/mtce-common/src/daemon/daemon_common.h @@ -233,9 +233,6 @@ int daemon_run_testhead ( void ); #define CONFIG_AGENT_SECRET_PORT 0x20000000 /**< Barbican HTTP port */ #define CONFIG_AGENT_VIM_EVENT_PORT 0x40000000 /**< VIM Event Port Mask */ -#define CONFIG_AGENT_PORT CONFIG_AGENT_MTC_MGMNT_PORT -#define CONFIG_CLIENT_PORT CONFIG_CLIENT_MTC_MGMNT_PORT - typedef struct { struct timespec ts ; struct tm t; diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp index 96cd1d89..ec3c3096 100755 --- a/mtce/src/common/nodeClass.cpp +++ b/mtce/src/common/nodeClass.cpp @@ -545,7 +545,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname ) ptr->action = "none" ; ptr->clear_task = false ; - ptr->mtcAlive_gate = true ; + ctl_mtcAlive_gate( ptr , true ) ; ptr->mtcAlive_online = false ; ptr->mtcAlive_offline = true ; ptr->mtcAlive_misses = 0 ; @@ -1463,7 +1463,7 @@ int nodeLinkClass::avail_status_change ( string hostname, { node_ptr->mtcAlive_misses = 0 ; node_ptr->mtcAlive_hits = 0 ; - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate ( node_ptr, false ) ; } /* check for need to generate power on log */ @@ -1696,16 +1696,10 @@ int nodeLinkClass::alarm_insv_failure ( struct nodeLinkClass::node * node_ptr ) /* Clear the enable alarm and degrade flag */ int nodeLinkClass::alarm_enabled_clear ( struct nodeLinkClass::node * node_ptr, bool force ) { - if ( node_ptr->degrade_mask & DEGRADE_MASK_ENABLE ) - { - node_ptr->degrade_mask &= ~DEGRADE_MASK_ENABLE ; - } + unsigned int clear_mask = DEGRADE_MASK_ENABLE | + DEGRADE_MASK_INSV_TEST ; - /* The inservice test degrade flag needs to be cleared too. */ - if ( node_ptr->degrade_mask & DEGRADE_MASK_INSV_TEST ) - { - node_ptr->degrade_mask &= ~DEGRADE_MASK_INSV_TEST ; - } + node_ptr->degrade_mask &= ~clear_mask ; if (( node_ptr->alarms[MTC_ALARM_ID__ENABLE] != FM_ALARM_SEVERITY_CLEAR ) || ( force == true )) @@ -2350,18 +2344,19 @@ int nodeLinkClass::mod_host ( node_inv_type & inv ) modify = true ; /* we have a delta */ } - if ( node_ptr->clstr_ip.compare ( inv.clstr_ip ) ) - { - if ( hostUtil_is_valid_ip_addr ( inv.clstr_ip )) - { - plog ("%s Modify 'clstr_ip' from %s -> %s\n", - node_ptr->hostname.c_str(), - node_ptr->clstr_ip.c_str(), inv.clstr_ip.c_str() ); - modify = true ; /* we have a delta */ - node_ptr->clstr_ip = inv.clstr_ip ; - } + if (( hostUtil_is_valid_ip_addr ( inv.clstr_ip )) && + ( node_ptr->clstr_ip != inv.clstr_ip )) + { + plog ("%s Modify 'clstr_ip' from %s -> %s\n", + node_ptr->hostname.c_str(), + node_ptr->clstr_ip.c_str(), + inv.clstr_ip.c_str() ); + + modify = true ; /* we have a delta */ + node_ptr->clstr_ip = inv.clstr_ip ; } + if ( (!inv.name.empty()) && (node_ptr->hostname.compare ( inv.name)) ) { mtcCmd cmd ; @@ -3455,17 +3450,14 @@ void nodeLinkClass::set_cmd_resp ( string & hostname, mtc_message_type & msg ) } else { - node_ptr->cmdRsp = msg.cmd ; - if ( msg.num > 0 ) - node_ptr->cmdRsp_status = msg.parm[0] ; - else - node_ptr->cmdRsp_status = -1 ; - - dlog ("%s '%s' command response status [%u:%s]\n", - hostname.c_str(), - node_ptr->cmdName.c_str(), - msg.num ? node_ptr->cmdRsp_status : PASS, - node_ptr->cmdRsp_status_string.empty() ? "empty" : node_ptr->cmdRsp_status_string.c_str()); + if ( node_ptr->cmdRsp != msg.cmd ) + { + node_ptr->cmdRsp = msg.cmd ; + if ( msg.num > 0 ) + node_ptr->cmdRsp_status = msg.parm[0] ; + else + node_ptr->cmdRsp_status = -1 ; + } } } } @@ -3514,7 +3506,12 @@ int nodeLinkClass::set_activeClient ( string hostname, mtc_client_enum client ) * * Name : set_mtcAlive * - * Description: + * Description: Set the mgmnt or clust specific mtc alive received bool. + * + * Used in the offline handler to verify overall offline state. + * + * Interfaces : Public with hostname. + * Private by node pointer. * * If mtcAlive is ungated then * @@ -3528,6 +3525,14 @@ void nodeLinkClass::set_mtcAlive ( string & hostname, int interface ) nodeLinkClass::node* node_ptr ; node_ptr = nodeLinkClass::getNode ( hostname ); if ( node_ptr != NULL ) + { + this->set_mtcAlive ( node_ptr, interface ); + } +} + +void nodeLinkClass::set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface ) +{ + if ( node_ptr ) { if ( node_ptr->mtcAlive_gate == false ) { @@ -3537,48 +3542,110 @@ void nodeLinkClass::set_mtcAlive ( string & hostname, int interface ) if ( interface == CLSTR_INTERFACE ) { - node_ptr->mtcAlive_clstr = true ; + if ( node_ptr->mtcAlive_clstr == false ) + { + alog2 ("%s %s mtcAlive received", + node_ptr->hostname.c_str(), + get_iface_name_str(interface)); + node_ptr->mtcAlive_clstr = true ; + } } else { - node_ptr->mtcAlive_mgmnt = true ; + if ( node_ptr->mtcAlive_mgmnt == false ) + { + alog2 ("%s %s mtcAlive received", + node_ptr->hostname.c_str(), + get_iface_name_str(interface)); + node_ptr->mtcAlive_mgmnt = true ; + } } } } } +/***************************************************************************** + * + * Name : get_mtcAlive + * + * Description: Return the current mtcAlive gate state. + * + * Interfaces : Public with hostname. + * Private by node pointer. + * + ****************************************************************************/ + bool nodeLinkClass::get_mtcAlive_gate ( string & hostname ) { nodeLinkClass::node* node_ptr ; node_ptr = nodeLinkClass::getNode ( hostname ); if ( node_ptr != NULL ) { + return ( get_mtcAlive_gate (node_ptr)) ; + } + /* If we can't find the node then assume alive messages are gated */ + return (true); +} + +bool nodeLinkClass::get_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr ) +{ + if ( node_ptr ) + { + alog3 ("%s mtcAlive gate: %s", + node_ptr->hostname.c_str(), + node_ptr->mtcAlive_gate ? "closed" : "open" ); return ( node_ptr->mtcAlive_gate ) ; } /* If we can't find the node then gate off the alive messages */ return (true); } -void nodeLinkClass::ctl_mtcAlive_gate ( string & hostname, bool gated ) +/***************************************************************************** + * + * Name : ctl_mtcAlive_gate + * + * Description: Control the mtcAlive gate state. + * Produce an alog on state changes. + * + * Interfaces : Public with hostname. + * Private by node pointer. + * + ****************************************************************************/ + +void nodeLinkClass::ctl_mtcAlive_gate ( string & hostname, bool gate_state ) { nodeLinkClass::node* node_ptr ; node_ptr = nodeLinkClass::getNode ( hostname ); if ( node_ptr != NULL ) { - node_ptr->mtcAlive_gate = gated ; - if ( gated == true ) + ctl_mtcAlive_gate ( node_ptr, gate_state ); + } +} + +void nodeLinkClass::ctl_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr, + bool gate_state ) +{ + if ( node_ptr ) + { + if ( node_ptr->mtcAlive_gate != gate_state ) { - alog ("%s mtcAlive gated\n", node_ptr->hostname.c_str()); - } - else - { - alog ("%s mtcAlive ungated\n", node_ptr->hostname.c_str()); + node_ptr->mtcAlive_gate = gate_state ; + if ( node_ptr->mtcAlive_gate == true ) + { + alog ("%s mtcAlive gate closed", + node_ptr->hostname.c_str()); + } + else + { + alog ("%s mtcAlive gate open", + node_ptr->hostname.c_str()); + } } } } -/* Main-Function Go Enabled member Functions */ +/* Main-Function Go Enabled member Functions */ void nodeLinkClass::set_goEnabled ( string & hostname ) { nodeLinkClass::node* node_ptr ; @@ -3691,7 +3758,7 @@ void nodeLinkClass::set_uptime_refresh_ctr ( string & hostname, int value ) if ( node_ptr != NULL ) { node_ptr->uptime_refresh_counter = value ; - } + } } @@ -3706,7 +3773,7 @@ int nodeLinkClass::get_uptime_refresh_ctr ( string & hostname ) return (0); } -void nodeLinkClass::set_mtce_flags ( string hostname, int flags ) +void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface ) { nodeLinkClass::node* node_ptr = nodeLinkClass::getNode ( hostname ); if ( node_ptr != NULL ) @@ -3718,6 +3785,35 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags ) else node_ptr->goEnabled = false ; + /* + * Fail the inactive controller if the sm unhealthy flag is set. + * Degrade for the active controller. + */ + if (( flags & MTC_FLAG__SM_UNHEALTHY ) && + (( node_ptr->operState == MTC_OPER_STATE__ENABLED ) || + ( node_ptr->adminAction == MTC_ADMIN_ACTION__RECOVER ))) + { + if (( hostname == CONTROLLER_0 ) || ( hostname == CONTROLLER_1 )) + { + elog ("%s reported unhealthy by SM (%s)", + hostname.c_str(), + get_iface_name_str(iface)); + + if ( hostname != this->my_hostname ) + { + force_full_enable ( node_ptr ); + } + + /* no else cause because mtcAgent does nothing if this file + * is present on the active controller. */ + } + else + { + slog ("%s reported unhealthy by SM ; compare error", + hostname.c_str()); + } + } + /* Track host patching state by Out-Of-Band flag */ if ( flags & MTC_FLAG__PATCHING ) { @@ -6235,7 +6331,7 @@ int nodeLinkClass::availStatusChange ( struct nodeLinkClass::node * node_ptr, { node_ptr->mtcAlive_misses = 0 ; node_ptr->mtcAlive_hits = 0 ; - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate ( node_ptr, false ) ; } /* check for need to generate power on log */ @@ -8175,7 +8271,7 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding ) // pulse_ptr->max_count[iface]++ ; /* - * Update storage_0_responding reference to false if storgate-0 + * Update storage_0_responding reference to false if storage-0 * is found in the pulse lots list. */ if ( pulse_ptr->hostname == STORAGE_0 ) @@ -8572,12 +8668,12 @@ void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr ) { char str[MAX_MEM_LOG_DATA] ; - snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: on:%c off:%c Cnt:%d State:%s Misses:%d\n", + snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d\n", node_ptr->hostname.c_str(), node_ptr->mtcAlive_online ? 'Y' : 'N', node_ptr->mtcAlive_offline ? 'Y' : 'N', node_ptr->mtcAlive_count, - node_ptr->mtcAlive_gate ? "gated" : "rxing", + node_ptr->mtcAlive_gate ? "closed" : "open", node_ptr->mtcAlive_misses); mem_log (str); } diff --git a/mtce/src/common/nodeClass.h b/mtce/src/common/nodeClass.h index c095f89a..756faebb 100755 --- a/mtce/src/common/nodeClass.h +++ b/mtce/src/common/nodeClass.h @@ -818,6 +818,10 @@ private: void start_offline_handler ( struct nodeLinkClass::node * node_ptr ); void stop_offline_handler ( struct nodeLinkClass::node * node_ptr ); + bool get_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr ); + void ctl_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr, bool gate_state ); + void set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface ); + /***************************************************************************** * * Name : ipmi_command_send @@ -1701,7 +1705,7 @@ public: #define MTC_FLAG__I_AM_HEALTHY (0x00000004) #define MTC_FLAG__I_AM_LOCKED (0x00000008) */ - void set_mtce_flags ( string hostname, int flags ); + void set_mtce_flags ( string hostname, int flags, int iface ); /** Updates the node's health code * Codes are found in nodeBase.h diff --git a/mtce/src/maintenance/mtcCmdHdlr.cpp b/mtce/src/maintenance/mtcCmdHdlr.cpp index fcc81c6f..b222e28b 100644 --- a/mtce/src/maintenance/mtcCmdHdlr.cpp +++ b/mtce/src/maintenance/mtcCmdHdlr.cpp @@ -356,6 +356,7 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) } case MTC_CMD_STAGE__REBOOT: { + int rc = PASS ; bool send_reboot_ok = false ; node_ptr->reboot_cmd_ack_mgmnt = false ; @@ -364,11 +365,13 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) /* send reboot command */ node_ptr->cmdReq = MTC_CMD_REBOOT ; node_ptr->cmdRsp = MTC_CMD_NONE ; - plog ("%s Performing REBOOT (mgmnt network)\n", node_ptr->hostname.c_str()); - if ( send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, MGMNT_INTERFACE ) != PASS ) + if (( rc = send_mtc_cmd ( node_ptr->hostname, + MTC_CMD_REBOOT, + MGMNT_INTERFACE )) != PASS ) { - wlog ("%s REBOOT Request Failed (mgmnt network)\n", - node_ptr->hostname.c_str()); + wlog ("%s reboot request failed (%s) (rc:%d)\n", + node_ptr->hostname.c_str(), + get_iface_name_str(MGMNT_INTERFACE), rc); } else { @@ -377,11 +380,13 @@ int nodeLinkClass::cmd_handler ( struct nodeLinkClass::node * node_ptr ) if ( clstr_network_provisioned == true ) { - plog ("%s Performing REBOOT (cluster-host network)\n", node_ptr->hostname.c_str()); - if ( send_mtc_cmd ( node_ptr->hostname, MTC_CMD_REBOOT, CLSTR_INTERFACE ) != PASS ) + if (( rc = send_mtc_cmd ( node_ptr->hostname, + MTC_CMD_REBOOT, + CLSTR_INTERFACE )) != PASS ) { - wlog ("%s REBOOT Request Failed (cluster-host network)\n", - node_ptr->hostname.c_str()); + wlog ("%s 'reboot' request failed (%s) (rc:%d)\n", + node_ptr->hostname.c_str(), + get_iface_name_str(CLSTR_INTERFACE), rc); } else { diff --git a/mtce/src/maintenance/mtcCompMsg.cpp b/mtce/src/maintenance/mtcCompMsg.cpp index 87c8cde2..b5b221e3 100755 --- a/mtce/src/maintenance/mtcCompMsg.cpp +++ b/mtce/src/maintenance/mtcCompMsg.cpp @@ -62,6 +62,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) mtc_message_type msg ; int rc = FAIL ; ctrl_type * ctrl_ptr = get_ctrl_ptr() ; + bool log_ack = true ; if ( interface == CLSTR_INTERFACE ) { @@ -124,6 +125,10 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { self = true ; } + string interface_name = get_iface_name_str (interface) ; + string command_name = get_mtcNodeCommand_str(msg.cmd) ; + + print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), false ); /* Message version greater than zero have the hosts management * mac address appended to the header string */ @@ -133,10 +138,11 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) if ( strncmp ( &msg.hdr[MSG_HEADER_SIZE-1], ctrl_ptr->macaddr.data(), MSG_HEADER_SIZE )) { wlog ("%s command not for this host (exp:%s det:%s) ; ignoring ...\n", - get_mtcNodeCommand_str(msg.cmd), + command_name.c_str(), ctrl_ptr->macaddr.c_str(), &msg.hdr[MSG_HEADER_SIZE-1]); - rc = FAIL_INVALID_DATA ; + print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, interface_name.data(), true ); + return (FAIL_INVALID_DATA); } } @@ -150,7 +156,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) rc = PASS ; if ( msg.cmd == MTC_REQ_MTCALIVE ) { - mlog1 ("mtcAlive request received (%s network)\n", get_iface_name_str (interface)); + mlog1 ("mtcAlive request received (%s network)\n", interface_name.c_str()); return ( send_mtcAlive_msg ( sock_ptr, get_who_i_am(), interface )); } else if ( msg.cmd == MTC_MSG_LOCKED ) @@ -158,10 +164,15 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) /* Only recreate the file if its not already present */ if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == false ) { + log_ack = true ; + ilog ("%s locked (%s)", get_hostname().c_str(), interface_name.c_str() ); daemon_log ( NODE_LOCKED_FILE, "This node is currently in the administratively locked state" ); } - return (PASS); + else + { + log_ack = false ; + } } else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED_FAILED ) { @@ -193,7 +204,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else { - ilog ("GoEnabled request posted (%s)\n",get_iface_name_str (interface)); + ilog ("GoEnabled request posted (%s)\n", interface_name.c_str()); ctrl_ptr->posted_script_set.push_back ( GOENABLED_MAIN_SCRIPTS ); ctrl_ptr->posted_script_set.unique(); } @@ -220,7 +231,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else { - ilog ("GoEnabled Subf request posted (%s)\n", get_iface_name_str (interface)); + ilog ("GoEnabled Subf request posted (%s)\n", interface_name.c_str()); /* Cleanup test result flag files */ if ( daemon_is_file_present ( GOENABLED_SUBF_PASS) ) @@ -241,11 +252,16 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else if ( msg.cmd == MTC_CMD_REBOOT ) { - ilog ("Reboot command received (%s)\n", get_iface_name_str (interface)); + ilog ("%s command received (%s)", + command_name.c_str(), + interface_name.c_str()); } else if ( msg.cmd == MTC_CMD_LAZY_REBOOT ) { - ilog ("Lazy Reboot command received (%s) ; delay:%d seconds\n", get_iface_name_str (interface), msg.num ? msg.parm[0] : 0 ); + ilog ("%s command received (%s) ; delay:%d seconds\n", + command_name.c_str(), + interface_name.c_str(), + msg.num ? msg.parm[0] : 0 ); } else if ( is_host_services_cmd ( msg.cmd ) == true ) { @@ -258,7 +274,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) ( ctrl_ptr->hostservices.monitor == msg.cmd )) { wlog ("%s already in progress (%d:%d)\n", - get_mtcNodeCommand_str(msg.cmd), + command_name.c_str(), ctrl_ptr->hostservices.posted, ctrl_ptr->hostservices.monitor ); @@ -270,8 +286,8 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) ctrl_ptr->posted_script_set.unique (); ilog ("%s request posted (%s)\n", - get_mtcNodeCommand_str(msg.cmd), - get_iface_name_str (interface)); + command_name.c_str(), + interface_name.c_str()); ctrl_ptr->hostservices.posted = msg.cmd ; ctrl_ptr->hostservices.monitor = MTC_CMD_NONE ; @@ -283,16 +299,16 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { rc = FAIL_FIT ; wlog ("%s Start Services - fit failure (%s)\n", - get_mtcNodeCommand_str(msg.cmd), - get_iface_name_str (interface) ); + command_name.c_str(), + interface_name.c_str() ); } /* Fault insertion - fail to send host services ACK */ if ( ( daemon_is_file_present ( MTC_CMD_FIT__NO_HS_ACK ))) { wlog ("%s Start Services - fit no ACK (%s)\n", - get_mtcNodeCommand_str(msg.cmd), - get_iface_name_str (interface) ); + command_name.c_str(), + interface_name.c_str() ); return (PASS); } @@ -312,15 +328,15 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else if ( msg.cmd == MTC_CMD_WIPEDISK ) { - ilog ("Reload command received (%s)\n", get_iface_name_str (interface)); + ilog ("Reload command received (%s)\n", interface_name.c_str()); } else if ( msg.cmd == MTC_CMD_RESET ) { - ilog ("Reset command received (%s)\n", get_iface_name_str (interface)); + ilog ("Reset command received (%s)\n", interface_name.c_str()); } else if ( msg.cmd == MTC_CMD_LOOPBACK ) { - ilog ("Loopback command received (%s)\n", get_iface_name_str (interface)); + ilog ("Loopback command received (%s)\n", interface_name.c_str()); } else { @@ -334,12 +350,12 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { if ( msg.cmd == MTC_MSG_MAIN_GOENABLED ) { - ilog ("main function goEnabled results acknowledged (%s)\n", get_iface_name_str (interface)); + ilog ("main function goEnabled results acknowledged (%s)\n", interface_name.c_str()); return (PASS); } else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED ) { - ilog ("sub-function goEnabled results acknowledged (%s)\n", get_iface_name_str (interface)); + ilog ("sub-function goEnabled results acknowledged (%s)\n", interface_name.c_str()); return (PASS); } else @@ -351,7 +367,13 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) else if ( strstr ( &msg.hdr[0], get_worker_msg_header()) ) { - elog ("Unsupported Message\n"); + elog ("unsupported worker message\n"); + print_mtc_message ( &msg ); + return PASS ; + } + else + { + elog ("unsupported message\n"); print_mtc_message ( &msg ); return PASS ; } @@ -364,57 +386,75 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) * if ( rc == PASS ) **********************************************************/ { + rc = PASS ; + bytes = sizeof(mtc_message_type)-BUF_SIZE; - /* Fault insertion for no command ACK */ - if (( interface == MGMNT_INTERFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_MGMNT_ACK ))) - { - wlog ("%s reply ack message - fit bypass (%s)\n", - get_mtcNodeCommand_str(msg.cmd), - get_iface_name_str (interface) ); - } - else if (( interface == CLSTR_INTERFACE ) && ( daemon_is_file_present ( MTC_CMD_FIT__NO_CLSTR_ACK ))) - { - wlog ("%s reply ack message - fit bypass (%s)\n", - get_mtcNodeCommand_str(msg.cmd), - get_iface_name_str (interface) ); - } - /* Otherwise, send the message back either over the mgmnt or clstr interface */ - else if ( interface == MGMNT_INTERFACE ) + /* send the message back either over the mgmnt or clstr interface */ + if ( interface == MGMNT_INTERFACE ) { if (( sock_ptr->mtc_client_tx_socket ) && ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) { - rc=sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes); + rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes); + if ( rc <= 0 ) + { + elog ("%s reply send (mtc_client_tx_socket) failed (%s) (rc:%d)", + command_name.c_str(), + interface_name.c_str(), rc); + } + else if ( log_ack ) + { + ilog ("%s reply send (%s)", + command_name.c_str(), + interface_name.c_str()); + } } else { elog ("cannot send to null or failed socket (%s network)\n", - get_iface_name_str (interface) ); + interface_name.c_str() ); } } else if ( interface == CLSTR_INTERFACE ) { - if (( sock_ptr->mtc_client_clstr_tx_socket ) && - ( sock_ptr->mtc_client_clstr_tx_socket->sock_ok() == true )) + if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) && + ( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true )) { - rc = sock_ptr->mtc_client_clstr_tx_socket->write((char*)&msg.hdr[0], bytes); + rc = sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes); + if ( rc <= 0 ) + { + elog ("%s reply send (mtc_client_tx_socket_c0_clstr) failed (%s) (rc:%d)", + command_name.c_str(), + interface_name.c_str(), rc); + } + else if ( log_ack ) + { + ilog ("%s reply send (%s)", + command_name.c_str(), + interface_name.c_str()); + } } - else + if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) && + ( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true )) { - elog ("cannot send to null or failed socket (%s network)\n", - get_iface_name_str (interface) ); + rc = sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes); + if ( rc <= 0 ) + { + elog ("%s reply send (mtc_client_tx_socket_c1_clstr) failed (%s) (rc:%d)", + command_name.c_str(), + interface_name.c_str(), rc); + } + else if ( log_ack ) + { + ilog ("%s reply send (%s)", + command_name.c_str(), + interface_name.c_str()); + } } } - if (rc != bytes ) - { - elog ("failed to send reply message (%d)\n", rc); - } - else - { - print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false ); - } + print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, interface_name.data(), (rc != bytes) ); /* get the shutdown delay config alue */ int delay = daemon_get_cfg_ptr()->failsafe_shutdown_delay ; @@ -427,10 +467,10 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) ) { - ilog ("Reboot - fit bypass (%s)\n", get_iface_name_str (interface)); + ilog ("Reboot - fit bypass (%s)\n", interface_name.c_str()); return (PASS); } - ilog ("Reboot (%s)\n", get_iface_name_str (interface)); + ilog ("Reboot (%s)\n", interface_name.c_str()); daemon_log ( NODE_RESET_FILE, "reboot command" ); fork_sysreq_reboot ( delay ); rc = system("/usr/bin/systemctl reboot"); @@ -439,7 +479,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) ) { - ilog ("Lazy Reboot - fit bypass (%s)\n", get_iface_name_str (interface)); + ilog ("Lazy Reboot - fit bypass (%s)\n", interface_name.c_str()); return (PASS); } daemon_log ( NODE_RESET_FILE, "lazy reboot command" ); @@ -447,7 +487,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { do { - ilog ("Lazy Reboot (%s) ; rebooting in %d seconds\n", get_iface_name_str (interface), msg.num ? msg.parm[0] : 1 ); + ilog ("Lazy Reboot (%s) ; rebooting in %d seconds\n", interface_name.c_str(), msg.num ? msg.parm[0] : 1 ); sleep (1); if ( msg.parm[0] % 5 ) { @@ -458,7 +498,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else { - ilog ("Lazy Reboot (%s) ; now\n", get_iface_name_str (interface) ); + ilog ("Lazy Reboot (%s) ; now\n", interface_name.c_str() ); } fork_sysreq_reboot ( delay ); rc = system("/usr/bin/systemctl reboot"); @@ -467,10 +507,10 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { if ( daemon_is_file_present ( MTC_CMD_FIT__NO_RESET ) ) { - ilog ("Reset - fit bypass (%s)\n", get_iface_name_str (interface)); + ilog ("Reset - fit bypass (%s)\n", interface_name.c_str()); return (PASS); } - ilog ("Reset 'reboot -f' (%s)\n", get_iface_name_str (interface)); + ilog ("Reset 'reboot -f' (%s)\n", interface_name.c_str()); daemon_log ( NODE_RESET_FILE, "reset command" ); fork_sysreq_reboot ( delay/2 ); rc = system("/usr/bin/systemctl reboot --force"); @@ -481,7 +521,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) if ( daemon_is_file_present ( MTC_CMD_FIT__NO_WIPEDISK ) ) { - ilog ("Wipedisk - fit bypass (%s)\n", get_iface_name_str (interface)); + ilog ("Wipedisk - fit bypass (%s)\n", interface_name.c_str()); return (PASS); } /* We fork a reboot as a fail safe. @@ -499,7 +539,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } else if( 0 == parent ) /* we're the child */ { - ilog ("Disk wipe in progress (%s)\n", get_iface_name_str (interface)); + ilog ("Disk wipe in progress (%s)\n", interface_name.c_str()); daemon_log ( NODE_RESET_FILE, "wipedisk command" ); rc = system("/usr/local/bin/wipedisk --force"); ilog ("Disk wipe complete - Forcing Reboot ...\n"); @@ -509,7 +549,6 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } rc = PASS ; - fflush(stdout); } return (rc); } @@ -761,28 +800,56 @@ int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity ) int send_mtcAlive_msg_failed = 0 ; int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interface ) { - mtc_message_type msg ; - msgClassSock * mtcAlive_tx_sock_ptr = NULL ; - int rc = FAIL ; - if (( interface == CLSTR_INTERFACE ) && ( get_ctrl_ptr()->clstr_iface_provisioned != true )) { dlog2 ("cannot send to unprovisioned %s interface\n", get_iface_name_str(interface) ); - return (rc); + return (FAIL); } + mtc_message_type msg ; + int bytes = create_mtcAlive_msg ( msg, MTC_MSG_MTCALIVE, identity, interface ); if ( interface == MGMNT_INTERFACE ) { - /* management interface */ - mtcAlive_tx_sock_ptr = sock_ptr->mtc_client_tx_socket ; + /* Send to controller floating address */ + if (( sock_ptr->mtc_client_tx_socket ) && + ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + { + print_mtc_message ( CONTROLLER, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); + sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes) ; + } + else + { + elog("mtc_client_tx_socket not ok"); + } } else if ( interface == CLSTR_INTERFACE ) { - /* cluster-host interface */ - mtcAlive_tx_sock_ptr = sock_ptr->mtc_client_clstr_tx_socket ; + /* Send to controller-0 cluster address */ + if (( sock_ptr->mtc_client_tx_socket_c0_clstr ) && + ( sock_ptr->mtc_client_tx_socket_c0_clstr->sock_ok() == true )) + { + print_mtc_message ( CONTROLLER_0, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false ); + sock_ptr->mtc_client_tx_socket_c0_clstr->write((char*)&msg.hdr[0], bytes ) ; + } + else + { + elog("mtc_client_tx_socket_c0_clstr not ok"); + } + + /* Send to controller-1 cluster address */ + if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) && + ( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true )) + { + print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false ); + sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes ) ; + } + else + { + elog("mtc_client_tx_socket_c1_clstr not ok"); + } } else { @@ -791,53 +858,7 @@ int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interfa return (FAIL_BAD_PARM); } - if ( daemon_is_file_present ( MTC_CMD_FIT__NO_MTCALIVE )) - { - wlog ("mtcAlive - fit bypass\n"); - return (PASS); - } - else - { - int bytes = create_mtcAlive_msg ( msg, MTC_MSG_MTCALIVE, identity, interface ); - - if (( mtcAlive_tx_sock_ptr ) && - ( mtcAlive_tx_sock_ptr->sock_ok() == true )) - { - if ((rc = mtcAlive_tx_sock_ptr->write((char*)&msg.hdr[0], bytes)) != bytes ) - { - if ( rc == -1 ) - { - wlog_throttled (send_mtcAlive_msg_failed, 100 , - "failed to send <%s:%d> (%d:%m) (%s)\n", - mtcAlive_tx_sock_ptr->get_dst_str(), - mtcAlive_tx_sock_ptr->get_dst_addr()->getPort(), - errno, get_iface_name_str(interface) ); - } - else - { - wlog_throttled ( send_mtcAlive_msg_failed, 100 , - "sent only %d of %d bytes to <%s:%d> (%s)\n", - rc, bytes, - mtcAlive_tx_sock_ptr->get_dst_str(), - mtcAlive_tx_sock_ptr->get_dst_addr()->getPort(), - get_iface_name_str(interface) ); - } - rc = FAIL_SOCKET_SENDTO ; - } - else - { - send_mtcAlive_msg_failed = 0 ; - print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_iface_name_str(interface), false ); - rc = PASS ; - } - } - else - { - elog ("cannot send to null or failed socket (%s network)\n", - get_iface_name_str(interface)); - } - } - return (rc) ; + return (PASS) ; } /* Accelerated Virtual Switch 'events' socket diff --git a/mtce/src/maintenance/mtcCtrlMsg.cpp b/mtce/src/maintenance/mtcCtrlMsg.cpp index b2ba4bea..f5019a4d 100755 --- a/mtce/src/maintenance/mtcCtrlMsg.cpp +++ b/mtce/src/maintenance/mtcCtrlMsg.cpp @@ -50,9 +50,6 @@ using namespace std; int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr ); -/* Throttle logging of messages from unknown IP addresses */ -std::list unknown_ip_list ; - /* Send specified command to the guestAgent daemon */ int send_guest_command ( string hostname, int command ) { @@ -163,6 +160,7 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, zero_unused_msg_buf (msg, bytes); + /* get the sender's hostname */ string hostaddr = "" ; string hostname = "" ; if ( iface == CLSTR_INTERFACE ) @@ -175,20 +173,22 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, hostaddr = sock_ptr->mtc_agent_rx_socket->get_src_str(); hostname = obj_ptr->get_hostname ( hostaddr ) ; } + + /* lookup failed if hostname remains empty. */ if ( hostname.empty() ) { - std::list::iterator iter ; - iter = std::find (unknown_ip_list.begin(), unknown_ip_list.end(), hostaddr ); - if ( iter == unknown_ip_list.end() ) + /* try and learn the cluster ip from a mtcAlive message. */ + if (( msg.cmd == MTC_MSG_MTCALIVE ) && + (( rc = jsonUtil_get_key_val ( &msg.buf[0], "hostname", hostname )) == PASS )) { - mlog3 ( "Received message from unknown IP <%s>\n", hostaddr.c_str()); - unknown_ip_list.push_front(hostaddr); + ilog ("%s learned from mtcAlive", hostname.c_str()); + } + else + { + wlog ("unknown hostname message ... dropping" ); /* make dlog */ + print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), true ); + return (FAIL_GET_HOSTNAME); } - return (FAIL_NOT_FOUND); - } - else if ( ! hostaddr.empty() ) - { - unknown_ip_list.remove (hostaddr); } print_mtc_message ( hostname, MTC_CMD_RX, msg, get_iface_name_str(iface), false ); @@ -244,6 +244,26 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, else if ( strstr ( &msg.hdr[0], get_cmd_rsp_msg_header() ) ) { obj_ptr->set_cmd_resp ( hostname , msg ) ; + if ( msg.num > 0 ) + { + if (( msg.cmd != MTC_MSG_LOCKED ) && + ( msg.cmd != MTC_CMD_HOST_SVCS_RESULT )) + { + ilog ("%s '%s' ACK (rc:%d) (%s)", + hostname.c_str(), + get_mtcNodeCommand_str(msg.cmd), + msg.parm[0], + get_iface_name_str(iface)); + } + else + { + mlog ("%s '%s' ACK (rc:%d) (%s)", + hostname.c_str(), + get_mtcNodeCommand_str(msg.cmd), + msg.parm[0], + get_iface_name_str(iface)); + } + } } /* @@ -267,30 +287,35 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, wlog ("%s failed to load functions from mtcAlive message\n", hostname.c_str()); return (FAIL_NODETYPE); } + + if ( obj_ptr->clstr_network_provisioned == true ) + { + string cluster_host_ip = ""; + /* Get the clstr ip address if it is provisioned */ + rc = jsonUtil_get_key_val ( &msg.buf[0], "cluster_host_ip", cluster_host_ip ); + if ( rc == PASS ) + { + obj_ptr->set_clstr_hostaddr ( hostname, cluster_host_ip ); + } + else + { + wlog ("%s missing 'cluster_host_ip' value (rc:%d)\n", hostname.c_str(), rc); + } + } + obj_ptr->set_uptime ( hostname , msg.parm[MTC_PARM_UPTIME_IDX], false ); obj_ptr->set_health ( hostname , msg.parm[MTC_PARM_HEALTH_IDX] ); - obj_ptr->set_mtce_flags ( hostname , msg.parm[MTC_PARM_FLAGS_IDX] ); - + obj_ptr->set_mtce_flags ( hostname , msg.parm[MTC_PARM_FLAGS_IDX], iface ); obj_ptr->set_mtcAlive ( hostname, iface ); - mlog1("%s Uptime:%d Health:%d Flags:0x%x mtcAlive:%s\n", + mlog1("%s Uptime:%d Health:%d Flags:0x%x mtcAlive:%s (%s)\n", hostname.c_str(), msg.parm[MTC_PARM_UPTIME_IDX], msg.parm[MTC_PARM_HEALTH_IDX], msg.parm[MTC_PARM_FLAGS_IDX], - obj_ptr->get_mtcAlive_gate ( hostname ) ? "gated" : "open"); + obj_ptr->get_mtcAlive_gate ( hostname ) ? "gated" : "open", + get_iface_name_str(iface)); - string cluster_host_ip = ""; - /* Get the clstr ip address if it is provisioned */ - rc = jsonUtil_get_key_val ( &msg.buf[0], "cluster_host_ip", cluster_host_ip ); - if ( rc == PASS ) - { - obj_ptr->set_clstr_hostaddr ( hostname, cluster_host_ip ); - } - else - { - mlog ("%s null or missing 'cluster_host_ip' value (rc:%d)\n", hostname.c_str(), rc); - } } else if ( msg.cmd == MTC_MSG_MAIN_GOENABLED ) { @@ -546,19 +571,6 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, wlog ( "Received unsupported or badly formed message\n" ); } - /* Only do this if the debug level is appropriate */ - if ( daemon_get_cfg_ptr()->debug_msg ) - { - int count = 0 ; - std::list::iterator iter ; - for ( iter = unknown_ip_list.begin () ; - iter != unknown_ip_list.end () ; - iter++ ) - { - count++ ; - mlog3 ("Unknown IP [%d]:%s\n", count, iter->c_str()); - } - } return (rc); } @@ -667,55 +679,56 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface ) { int bytes = 0; - /* Temporarily get IP from node inventory till dns is available */ nodeLinkClass * obj_ptr = get_mtcInv_ptr (); /* add the mac address of the target card to the header - * Note: the minus 1 is to overwqrite the null */ + * Note: the minus 1 is to overwrite the null */ snprintf ( &mtc_cmd.hdr[MSG_HEADER_SIZE-1], MSG_HEADER_SIZE, "%s", obj_ptr->get_hostIfaceMac(hostname, MGMNT_IFACE).data()); - /* Lets add the controller's floating ip in the buffer so hat he host knowns where to reply */ - snprintf ( &mtc_cmd.buf[0], obj_ptr->my_float_ip.length()+1, "%s", obj_ptr->my_float_ip.data()); - - /* only send the minimum amount of data */ - bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(obj_ptr->my_float_ip.length()+1))) ; + string data = "{\"address\":\""; + data.append(obj_ptr->my_float_ip) ; + data.append("\",\"interface\":\""); + data.append(get_iface_name_str(interface)); + data.append("\"}"); + snprintf ( &mtc_cmd.buf[0], data.length()+1, "%s", data.data()); + bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(data.length()+1))); print_mtc_message ( hostname, MTC_CMD_TX, mtc_cmd, get_iface_name_str(interface), force ) ; if (interface == MGMNT_INTERFACE) { string hostaddr = obj_ptr->get_hostaddr(hostname); - -#ifdef WANT_FIT_TESTING - if ( daemon_want_fit ( FIT_CODE__INVALIDATE_MGMNT_IP, hostname ) ) - hostaddr = "none" ; -#endif - if ( hostUtil_is_valid_ip_addr ( hostaddr ) != true ) { - wlog("%s has no management IP assigned\n", hostname.c_str()); + wlog("%s has invalid management addr '%s'\n", + hostname.c_str(), + hostaddr.c_str()); return (FAIL_HOSTADDR_LOOKUP); } - /* rc = message size */ - rc = sock_ptr->mtc_agent_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_cmd_port); + + mlog ("%s sending %s request to %s (%s)", + hostname.c_str(), + get_mtcNodeCommand_str(cmd), + hostaddr.c_str(), + get_iface_name_str(interface)); + + rc = sock_ptr->mtc_agent_tx_socket->write((char *)&mtc_cmd, bytes, hostaddr.c_str(), sock_ptr->mtc_mgmnt_cmd_port); } else if ((interface == CLSTR_INTERFACE) && ( obj_ptr->clstr_network_provisioned == true ) && ( sock_ptr->mtc_agent_clstr_tx_socket != NULL )) { - /* SETUP TX -> COMPUTE SOCKET CLSTR INTERFACE */ string clstr_hostaddr = obj_ptr->get_clstr_hostaddr(hostname); - -#ifdef WANT_FIT_TESTING - if ( daemon_want_fit ( FIT_CODE__INVALIDATE_CLSTR_IP, hostname ) ) - clstr_hostaddr = "none" ; -#endif - if ( hostUtil_is_valid_ip_addr( clstr_hostaddr ) != true ) - { return (FAIL_NO_CLSTR_PROV); - } - rc = sock_ptr->mtc_agent_clstr_tx_socket->write((char *)&mtc_cmd, bytes, clstr_hostaddr.c_str(), sock_ptr->mtc_cmd_port); + + mlog ("%s sending %s request to %s (%s)", + hostname.c_str(), + get_mtcNodeCommand_str(cmd), + clstr_hostaddr.c_str(), + get_iface_name_str(interface)); + + rc = sock_ptr->mtc_agent_clstr_tx_socket->write((char *)&mtc_cmd, bytes, clstr_hostaddr.c_str(), sock_ptr->mtc_clstr_cmd_port); } if ( 0 > rc ) diff --git a/mtce/src/maintenance/mtcNodeComp.cpp b/mtce/src/maintenance/mtcNodeComp.cpp index 1f3e1514..1048d72b 100644 --- a/mtce/src/maintenance/mtcNodeComp.cpp +++ b/mtce/src/maintenance/mtcNodeComp.cpp @@ -171,12 +171,17 @@ void _close_mgmnt_tx_socket ( void ) } } -void _close_clstr_tx_socket ( void ) +void _close_clstr_tx_sockets ( void ) { - if (mtc_sock.mtc_client_clstr_tx_socket) + if (mtc_sock.mtc_client_tx_socket_c0_clstr) { - delete (mtc_sock.mtc_client_clstr_tx_socket); - mtc_sock.mtc_client_clstr_tx_socket = 0 ; + delete (mtc_sock.mtc_client_tx_socket_c0_clstr); + mtc_sock.mtc_client_tx_socket_c0_clstr = 0 ; + } + if (mtc_sock.mtc_client_tx_socket_c1_clstr) + { + delete (mtc_sock.mtc_client_tx_socket_c1_clstr); + mtc_sock.mtc_client_tx_socket_c1_clstr = 0 ; } } @@ -196,7 +201,7 @@ void daemon_exit ( void ) _close_mgmnt_rx_socket (); _close_clstr_rx_socket (); _close_mgmnt_tx_socket (); - _close_clstr_tx_socket (); + _close_clstr_tx_sockets(); _close_amon_sock (); exit (0) ; @@ -214,13 +219,18 @@ static int mtc_config_handler ( void * user, if (MATCH("agent", "mtc_agent_port")) { config_ptr->mtc_agent_port = atoi(value); - config_ptr->mask |= CONFIG_AGENT_PORT ; + config_ptr->mask |= CONFIG_AGENT_MTC_MGMNT_PORT ; } else if (MATCH("client", "mtc_rx_mgmnt_port")) { config_ptr->mtc_rx_mgmnt_port = atoi(value); config_ptr->mask |= CONFIG_CLIENT_MTC_MGMNT_PORT ; } + else if (MATCH("client", "mtc_rx_clstr_port")) + { + config_ptr->mtc_rx_clstr_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_MTC_CLSTR_PORT ; + } else if (MATCH("timeouts", "failsafe_shutdown_delay")) { config_ptr->failsafe_shutdown_delay = atoi(value); @@ -289,10 +299,9 @@ void setup_mgmnt_rx_socket ( void ) ilog("Mgmnt iface : %s\n", ctrl.mgmnt_iface.c_str() ); get_iface_macaddr ( ctrl.mgmnt_iface.data(), ctrl.macaddr ); get_iface_address ( ctrl.mgmnt_iface.data(), ctrl.address , true ); - get_hostname ( &ctrl.hostname[0], MAX_HOST_NAME_SIZE ); _close_mgmnt_rx_socket (); - mtc_sock.mtc_client_rx_socket = new msgClassRx(ctrl.address.c_str(),mtc_sock.mtc_cmd_port, IPPROTO_UDP, ctrl.mgmnt_iface.data(), false ); + mtc_sock.mtc_client_rx_socket = new msgClassRx(ctrl.address.c_str(),mtc_sock.mtc_mgmnt_cmd_port, IPPROTO_UDP, ctrl.mgmnt_iface.data(), false ); /* update health of socket */ if ( mtc_sock.mtc_client_rx_socket ) @@ -328,12 +337,13 @@ void setup_clstr_rx_socket ( void ) * calls daemon_get_iface_master inside so the * aggrigated name is returned if it exists */ get_clstr_iface (&mtc_config.clstr_iface ); - if ( strlen(mtc_config.clstr_iface) ) + ctrl.clstr_iface = mtc_config.clstr_iface ; + if ( !ctrl.clstr_iface.empty()) { /* Only get the cluster-host network address if it is provisioned */ - if ( get_iface_address ( mtc_config.clstr_iface, ctrl.address_clstr, false ) == PASS ) + if ( get_iface_address ( ctrl.clstr_iface.data(), ctrl.address_clstr, false ) == PASS ) { - ilog ("Cluster-host iface : %s\n", mtc_config.clstr_iface ); + ilog ("Cluster-host iface : %s\n", ctrl.clstr_iface.c_str()); ilog ("Cluster-host addr : %s\n", ctrl.address_clstr.c_str()); } } @@ -342,7 +352,7 @@ void setup_clstr_rx_socket ( void ) _close_clstr_rx_socket (); /* Only set up the socket if an cluster-host interface is provisioned */ - mtc_sock.mtc_client_clstr_rx_socket = new msgClassRx(ctrl.address_clstr.c_str(),mtc_sock.mtc_cmd_port, IPPROTO_UDP, ctrl.clstr_iface.data(), false ); + mtc_sock.mtc_client_clstr_rx_socket = new msgClassRx(ctrl.address_clstr.c_str(),mtc_sock.mtc_clstr_cmd_port, IPPROTO_UDP, ctrl.clstr_iface.data(), false ); /* update health of socket */ if ( mtc_sock.mtc_client_clstr_rx_socket ) @@ -390,32 +400,60 @@ void setup_mgmnt_tx_socket ( void ) } } -void setup_clstr_tx_socket ( void ) +void setup_clstr_tx_sockets ( void ) { if ( ctrl.clstr_iface_provisioned == false ) { return ; } - dlog ("setup of cluster-host TX\n"); - _close_clstr_tx_socket (); - mtc_sock.mtc_client_clstr_tx_socket = new msgClassTx(CONTROLLER_NFS,mtc_sock.mtc_agent_port, IPPROTO_UDP, mtc_config.clstr_iface); + dlog ("setup of %s TX\n", CONTROLLER_0_CLUSTER_HOST); - if ( mtc_sock.mtc_client_clstr_tx_socket ) + _close_clstr_tx_sockets (); + + mtc_sock.mtc_client_tx_socket_c0_clstr = + new msgClassTx(CONTROLLER_0_CLUSTER_HOST, + mtc_sock.mtc_agent_port, + IPPROTO_UDP, + mtc_config.clstr_iface); + + if ( mtc_sock.mtc_client_tx_socket_c0_clstr ) { - /* look for fault insertion request */ - if ( daemon_is_file_present ( MTC_CMD_FIT__CLSTR_TXSOCK ) ) - mtc_sock.mtc_client_clstr_tx_socket->return_status = FAIL ; - - if ( mtc_sock.mtc_client_clstr_tx_socket->return_status == PASS ) + if ( mtc_sock.mtc_client_tx_socket_c0_clstr->return_status == PASS ) { - mtc_sock.mtc_client_clstr_tx_socket->sock_ok(true); + mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok(true); } else { - elog ("failed to init 'cluster-host tx' socket (rc:%d)\n", - mtc_sock.mtc_client_clstr_tx_socket->return_status ); - mtc_sock.mtc_client_clstr_tx_socket->sock_ok(false); + elog ("failed to init '%s' tx socket (rc:%d)\n", + CONTROLLER_0_CLUSTER_HOST, + mtc_sock.mtc_client_tx_socket_c0_clstr->return_status ); + mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok(false); + } + } + if ( ctrl.system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX ) + { + dlog ("setup of %s TX\n", CONTROLLER_1_CLUSTER_HOST); + + mtc_sock.mtc_client_tx_socket_c1_clstr = + new msgClassTx(CONTROLLER_1_CLUSTER_HOST, + mtc_sock.mtc_agent_port, + IPPROTO_UDP, + mtc_config.clstr_iface); + + if ( mtc_sock.mtc_client_tx_socket_c1_clstr ) + { + if ( mtc_sock.mtc_client_tx_socket_c1_clstr->return_status == PASS ) + { + mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok(true); + } + else + { + elog ("failed to init '%s' tx socket (rc:%d)\n", + CONTROLLER_0_CLUSTER_HOST, + mtc_sock.mtc_client_tx_socket_c1_clstr->return_status ); + mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok(false); + } } } } @@ -463,7 +501,7 @@ void setup_amon_socket ( void ) * 1. Unicast receive socket mgmnt (mtc_client_rx_socket) * 2. Unicast receive socket clstr (mtc_client_clstr_rx_socket) * 3. Unicast transmit socket mgmnt (mtc_client_tx_socket) - * 4. Unicast transmit socket clstr (mtc_client_clstr_tx_socket) + * 4. Unicast transmit socket clstr (mtc_client_tx_socket_c?_clstr) * * 5. socket for pmond acive monitoring * @@ -473,8 +511,10 @@ int mtc_socket_init ( void ) /* Setup the Management Interface Recieve Socket */ /* Read the port config strings into the socket struct */ mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port; - mtc_sock.mtc_cmd_port = mtc_config.mtc_rx_mgmnt_port; + mtc_sock.mtc_mgmnt_cmd_port = mtc_config.mtc_rx_mgmnt_port; + mtc_sock.mtc_clstr_cmd_port = mtc_config.mtc_rx_clstr_port; + get_hostname ( &ctrl.hostname[0], MAX_HOST_NAME_SIZE ); ctrl.mtcAgent_ip = getipbyname ( CONTROLLER ); ilog ("Controller : %s\n", ctrl.mtcAgent_ip.c_str()); @@ -489,8 +529,8 @@ int mtc_socket_init ( void ) setup_mgmnt_tx_socket (); /* Manage Cluster-host network setup */ - string clstr_iface_name = daemon_clstr_iface(); string mgmnt_iface_name = daemon_mgmnt_iface(); + string clstr_iface_name = daemon_clstr_iface(); if ( !clstr_iface_name.empty() ) { if ( clstr_iface_name != mgmnt_iface_name ) @@ -504,7 +544,7 @@ int mtc_socket_init ( void ) /*************************************************************/ /* Setup the Clstr Interface Transmit Messaging to mtcAgent */ /*************************************************************/ - setup_clstr_tx_socket () ; + setup_clstr_tx_sockets () ; } } @@ -1225,8 +1265,8 @@ void daemon_service_run ( void ) if (( mtc_sock.mtc_client_rx_socket == NULL ) || ( mtc_sock.mtc_client_rx_socket->sock_ok() == false )) { - setup_mgmnt_rx_socket(); wlog ("calling setup_mgmnt_rx_socket (auto-recovery)\n"); + setup_mgmnt_rx_socket(); socket_reinit = true ; } @@ -1234,8 +1274,8 @@ void daemon_service_run ( void ) else if (( mtc_sock.mtc_client_tx_socket == NULL ) || ( mtc_sock.mtc_client_tx_socket->sock_ok() == false )) { - setup_mgmnt_tx_socket(); wlog ("calling setup_mgmnt_tx_socket\n"); + setup_mgmnt_tx_socket(); socket_reinit = true ; } @@ -1244,18 +1284,20 @@ void daemon_service_run ( void ) (( mtc_sock.mtc_client_clstr_rx_socket == NULL ) || ( mtc_sock.mtc_client_clstr_rx_socket->sock_ok() == false ))) { - setup_clstr_rx_socket(); wlog ("calling setup_clstr_rx_socket (auto-recovery)\n"); + setup_clstr_rx_socket(); socket_reinit = true ; } /* Clstr Tx */ else if (( ctrl.clstr_iface_provisioned == true ) && - (( mtc_sock.mtc_client_clstr_tx_socket == NULL ) || - ( mtc_sock.mtc_client_clstr_tx_socket->sock_ok() == false ))) + (( mtc_sock.mtc_client_tx_socket_c0_clstr == NULL ) || + ( mtc_sock.mtc_client_tx_socket_c1_clstr == NULL ) || + ( mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok() == false ) || + ( mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok() == false ))) { - setup_clstr_tx_socket(); - wlog ("calling setup_clstr_tx_socket (auto-recovery)\n"); + wlog ("calling setup_clstr_tx_sockets (auto-recovery)\n"); + setup_clstr_tx_sockets(); socket_reinit = true ; } @@ -1311,18 +1353,14 @@ void daemon_service_run ( void ) if ( daemon_is_file_present ( MTC_CMD_FIT__CLSTR_RXSOCK )) { if ( mtc_sock.mtc_client_clstr_rx_socket ) - { mtc_sock.mtc_client_clstr_rx_socket->sock_ok (false); - _close_clstr_rx_socket (); - } } if ( daemon_is_file_present ( MTC_CMD_FIT__CLSTR_TXSOCK )) { - if ( mtc_sock.mtc_client_clstr_tx_socket ) - { - mtc_sock.mtc_client_clstr_tx_socket->sock_ok (false); - _close_clstr_tx_socket (); - } + if ( mtc_sock.mtc_client_tx_socket_c0_clstr ) + mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok (false); + if ( mtc_sock.mtc_client_tx_socket_c1_clstr ) + mtc_sock.mtc_client_tx_socket_c1_clstr->sock_ok (false); } if ( daemon_is_file_present ( MTC_CMD_FIT__AMON_SOCK )) { diff --git a/mtce/src/maintenance/mtcNodeComp.h b/mtce/src/maintenance/mtcNodeComp.h index b0917bfc..612144f8 100644 --- a/mtce/src/maintenance/mtcNodeComp.h +++ b/mtce/src/maintenance/mtcNodeComp.h @@ -18,8 +18,9 @@ #include /** Compute Config mask */ -#define CONFIG_CLIENT_MASK (CONFIG_AGENT_PORT |\ - CONFIG_CLIENT_MTC_MGMNT_PORT) +#define CONFIG_CLIENT_MASK (CONFIG_AGENT_MTC_MGMNT_PORT |\ + CONFIG_CLIENT_MTC_MGMNT_PORT |\ + CONFIG_CLIENT_MTC_CLSTR_PORT) #define MAX_RUN_SCRIPTS (20) diff --git a/mtce/src/maintenance/mtcNodeCtrl.cpp b/mtce/src/maintenance/mtcNodeCtrl.cpp index d01de614..26fca0f0 100644 --- a/mtce/src/maintenance/mtcNodeCtrl.cpp +++ b/mtce/src/maintenance/mtcNodeCtrl.cpp @@ -155,18 +155,6 @@ void daemon_exit ( void ) if (mtc_sock.mtc_agent_tx_socket) delete (mtc_sock.mtc_agent_tx_socket); - if (mtc_sock.mtc_client_rx_socket) - delete(mtc_sock.mtc_client_rx_socket); - - if (mtc_sock.mtc_client_tx_socket) - delete (mtc_sock.mtc_client_tx_socket); - - if (mtc_sock.mtc_client_clstr_rx_socket) - delete (mtc_sock.mtc_client_clstr_rx_socket); - - if (mtc_sock.mtc_client_clstr_tx_socket) - delete (mtc_sock.mtc_client_clstr_tx_socket); - if (mtc_sock.mtc_event_rx_sock) delete (mtc_sock.mtc_event_rx_sock); @@ -191,7 +179,8 @@ void daemon_exit ( void ) /** Control Config Mask */ -#define CONFIG_AGENT_MASK (CONFIG_AGENT_PORT |\ +#define CONFIG_AGENT_MASK (CONFIG_AGENT_MTC_MGMNT_PORT |\ + CONFIG_CLIENT_MTC_CLSTR_PORT |\ CONFIG_MTC_TO_HBS_CMD_PORT |\ CONFIG_MTC_TO_HWMON_CMD_PORT |\ CONFIG_HBS_TO_MTC_EVENT_PORT |\ @@ -201,7 +190,7 @@ void daemon_exit ( void ) CONFIG_AGENT_LOC_TIMEOUT |\ CONFIG_AGENT_INV_EVENT_PORT |\ CONFIG_AGENT_API_RETRIES |\ - CONFIG_CLIENT_PORT) + CONFIG_CLIENT_MTC_MGMNT_PORT) static int mtc_nfvi_handler ( void * user, const char * section, @@ -250,7 +239,7 @@ static int mtc_config_handler ( void * user, else if (MATCH("agent", "mtc_agent_port")) { config_ptr->mtc_agent_port = atoi(value); - config_ptr->mask |= CONFIG_AGENT_PORT ; + config_ptr->mask |= CONFIG_AGENT_MTC_MGMNT_PORT ; } else if (MATCH("agent", "mtc_to_hbs_cmd_port")) { @@ -279,7 +268,12 @@ static int mtc_config_handler ( void * user, else if (MATCH("client", "mtc_rx_mgmnt_port")) { config_ptr->cmd_port = atoi(value); - config_ptr->mask |= CONFIG_CLIENT_PORT ; + config_ptr->mask |= CONFIG_CLIENT_MTC_MGMNT_PORT ; + } + else if (MATCH("client", "mtc_rx_clstr_port")) + { + config_ptr->mtc_rx_clstr_port = atoi(value); + config_ptr->mask |= CONFIG_CLIENT_MTC_CLSTR_PORT ; } else if (MATCH("agent", "token_refresh_rate")) { @@ -639,6 +633,7 @@ int daemon_configure ( void ) else { mtcInv.clstr_network_provisioned = true ; + ilog ("Cluster network is provisioned" ); } } @@ -697,11 +692,11 @@ int mtc_socket_init ( void ) /* Read the port config strings into the socket struct */ mtc_sock.mtc_agent_port = mtc_config.mtc_agent_port; - mtc_sock.mtc_cmd_port = mtc_config.cmd_port; + mtc_sock.mtc_mgmnt_cmd_port = mtc_config.cmd_port; /* create transmit socket */ msgClassAddr::getAddressFromInterface(mtc_config.mgmnt_iface, ip_address, INET6_ADDRSTRLEN); - sock_ptr->mtc_agent_tx_socket = new msgClassTx(ip_address, mtc_config.mtc_agent_port, IPPROTO_UDP, mtc_config.mgmnt_iface); + sock_ptr->mtc_agent_tx_socket = new msgClassTx(ip_address, mtc_sock.mtc_mgmnt_cmd_port, IPPROTO_UDP, mtc_config.mgmnt_iface); rc = sock_ptr->mtc_agent_tx_socket->return_status; if(rc != PASS) { @@ -714,9 +709,12 @@ int mtc_socket_init ( void ) /***********************************************************/ if ( strlen( mtc_config.clstr_iface ) ) { + sock_ptr->mtc_clstr_cmd_port = mtc_config.mtc_rx_clstr_port; + /* create clstr transmit socket only if the interface is provisioned */ msgClassAddr::getAddressFromInterface(mtc_config.clstr_iface, ip_address, INET6_ADDRSTRLEN); - sock_ptr->mtc_agent_clstr_tx_socket = new msgClassTx(ip_address, mtc_config.mtc_agent_port, IPPROTO_UDP, mtc_config.clstr_iface); + sock_ptr->mtc_agent_clstr_tx_socket = new msgClassTx(ip_address, mtc_sock.mtc_clstr_cmd_port, IPPROTO_UDP, mtc_config.clstr_iface); + rc = sock_ptr->mtc_agent_clstr_tx_socket->return_status; if(rc != PASS) { @@ -778,8 +776,17 @@ int mtc_socket_init ( void ) if ( mtcInv.clstr_network_provisioned == true ) { - sock_ptr->mtc_agent_clstr_rx_socket = - new msgClassRx(CONTROLLER_NFS, sock_ptr->mtc_agent_port, IPPROTO_UDP ); + if ( mtcInv.my_hostname == CONTROLLER_0 ) + { + sock_ptr->mtc_agent_clstr_rx_socket = + new msgClassRx(CONTROLLER_0_CLUSTER_HOST, sock_ptr->mtc_agent_port, IPPROTO_UDP ); + } + else + { + sock_ptr->mtc_agent_clstr_rx_socket = + new msgClassRx(CONTROLLER_1_CLUSTER_HOST, sock_ptr->mtc_agent_port, IPPROTO_UDP ); + } + if (( sock_ptr->mtc_agent_clstr_rx_socket == NULL ) || ( sock_ptr->mtc_agent_clstr_rx_socket->return_status )) { diff --git a/mtce/src/maintenance/mtcNodeHdlrs.cpp b/mtce/src/maintenance/mtcNodeHdlrs.cpp index 154861aa..5e7e05bf 100755 --- a/mtce/src/maintenance/mtcNodeHdlrs.cpp +++ b/mtce/src/maintenance/mtcNodeHdlrs.cpp @@ -773,7 +773,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->mtce_flags = 0 ; /* Assert the mtc alive gate */ - node_ptr->mtcAlive_gate = true ; + this->ctl_mtcAlive_gate ( node_ptr, true ) ; node_ptr->mtcAlive_online = false ; node_ptr->mtcAlive_offline = true ; @@ -886,9 +886,9 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr ) * in the reboot recovery phase now. Look for the mtcAlive */ /* In self-enable we don't need to purge mtcAlive just need - * to wait for one more. Assum,e offline, not online and open + * to wait for one more. Assume offline, not online and open * the mtcAlive gate. */ - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate ( node_ptr, false ) ; node_ptr->mtcAlive_online = false ; node_ptr->mtcAlive_offline = true ; /* set mtcAlive timeout */ @@ -1053,7 +1053,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr ) if ( node_ptr->mtcAlive_purge >= 20 ) { /* open gate */ - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate ( node_ptr, false ) ; node_ptr->mtcAlive_purge = 0 ; /* timer is started ok so we can do the stage transition */ @@ -1173,12 +1173,12 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr ) break ; } - else if ( node_ptr->mtcAlive_gate == true ) + else if ( this->get_mtcAlive_gate (node_ptr) == true ) { slog ("%s mtcAlive gate unexpectedly set, correcting ...\n", node_ptr->hostname.c_str()); - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate ( node_ptr, false ) ; } /* wait some more */ @@ -1628,7 +1628,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) /* Purge this hosts work queues */ mtcCmd_workQ_purge ( node_ptr ); mtcCmd_doneQ_purge ( node_ptr ); - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate ( node_ptr, false ); node_ptr->http_retries_cur = 0 ; node_ptr->unknown_health_reported = false ; @@ -1648,13 +1648,6 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) /* Disable the heartbeat service for Graceful Recovery */ send_hbs_command ( node_ptr->hostname, MTC_CMD_STOP_HOST ); - /* Clear the minor and failure flags if it is set for this host */ - for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) - { - hbs_minor_clear ( node_ptr, (iface_enum)iface ); - node_ptr->heartbeat_failed[iface] = false ; - } - /* Have we reached the maximum allowed fast recovery attempts. * * If we have then force the full enable by @@ -1664,10 +1657,10 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) */ if ( ++node_ptr->graceful_recovery_counter > MTC_MAX_FAST_ENABLES ) { - /* gate off further mtcAlive messaging timme the offline - * handler runs. This prevents stale messages from making it - * in and prolong the offline detection time */ - node_ptr->mtcAlive_gate = true ; + /* gate off further mtcAlive messaging timme the offline + * handler runs. This prevents stale messages from making it + * in and prolong the offline detection time */ + this->ctl_mtcAlive_gate ( node_ptr, true ) ; elog ("%s Graceful Recovery Failed (retries=%d)\n", node_ptr->hostname.c_str(), node_ptr->graceful_recovery_counter ); @@ -2114,12 +2107,12 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) availStatusChange ( node_ptr, MTC_AVAIL_STATUS__OFFLINE ); } } - else if ( node_ptr->mtcAlive_gate == true ) + else if ( this->get_mtcAlive_gate ( node_ptr ) == true ) { slog ("%s mtcAlive gate unexpectedly set, auto-correcting ...\n", node_ptr->hostname.c_str()); - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate ( node_ptr, false ) ; } /* wait some more */ @@ -2454,6 +2447,12 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr ) mtcTimer_reset ( node_ptr->mtcTimer ); } + for ( int iface = 0 ; iface < MAX_IFACES ; iface++ ) + { + hbs_minor_clear ( node_ptr, (iface_enum)iface ); + node_ptr->heartbeat_failed[iface] = false ; + } + /* Enable the heartbeat service for Graceful Recovery */ send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST ); @@ -3097,7 +3096,7 @@ int nodeLinkClass::disable_handler ( struct nodeLinkClass::node * node_ptr ) } /* open the mtcAlive gate while we are disabled */ - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate ( node_ptr, false ) ; disableStageChange( node_ptr, MTC_DISABLE__START ); adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); @@ -3240,7 +3239,7 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr ) operState_enum_to_str(node_ptr->operState).c_str(), availStatus_enum_to_str(node_ptr->availStatus).c_str()); - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate ( node_ptr, false ) ; node_ptr->mtcAlive_mgmnt = false ; node_ptr->mtcAlive_clstr = false ; @@ -3261,7 +3260,7 @@ int nodeLinkClass::offline_handler ( struct nodeLinkClass::node * node_ptr ) case MTC_OFFLINE__WAIT: { /* be sure the mtcAlive gate is open */ - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate (node_ptr, false ) ; if ( mtcTimer_expired ( node_ptr->offline_timer ) == true ) { if ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) @@ -3369,12 +3368,12 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->hostname.c_str(), node_ptr->onlineStage ); - if ( node_ptr->mtcAlive_gate == true ) + if ( this->get_mtcAlive_gate ( node_ptr ) == true ) { alog ("%s mtcAlive gate unexpectedly set, correcting ...\n", node_ptr->hostname.c_str()); - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate (node_ptr, false ) ; } /* Start with a zero count. This counter is incremented every @@ -3475,7 +3474,8 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr ) /* ... keep the 'host locked' file on this host refreshed while in the locked state * ... send it on both interfaces just in case */ send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE ); - // send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, INFRA_INTERFACE ); + if ( clstr_network_provisioned ) + send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, CLSTR_INTERFACE ); } /* Start over */ @@ -6106,7 +6106,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr ) send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); } - node_ptr->mtcAlive_gate = false ; + this->ctl_mtcAlive_gate(node_ptr, false) ; node_ptr->addStage = MTC_ADD__DONE ; break; } @@ -6522,6 +6522,11 @@ int nodeLinkClass::oos_test_handler ( struct nodeLinkClass::node * node_ptr ) /* Tell the host that it is locked */ send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE); + if ( clstr_network_provisioned ) + { + ilog ("%s Sending Lock Cluster", node_ptr->hostname.c_str() ); + send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, CLSTR_INTERFACE ); + } } break ; @@ -6668,26 +6673,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr ) send_hbs_command ( this->my_hostname, MTC_CMD_ACTIVE_CTRL ); } - /* Manage active controller auto recovery bool. - * If the inactive controller is inservice then disable - * controller autorecovery. Otherwise enable it but in this case - * don't change the disable bool as that is used to gate auto - * recovery once the threshoild is reached */ -// if ( is_controller ( node_ptr ) && NOT_THIS_HOST ) -// { -// if (( node_ptr->ar_disabled == false ) && -// ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) -// { -// autorecovery_clear ( CONTROLLER_0 ); -// autorecovery_clear ( CONTROLLER_1 ); -// } - //else if (( node_ptr->ar_disabled == true ) && - // ( node_ptr->operState != MTC_OPER_STATE__ENABLED )) - //{ - // node_ptr->ar_disabled = false ; - //} - // } - /* Monitor the health of the host - no pass file */ if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && ( node_ptr->operState == MTC_OPER_STATE__ENABLED )) diff --git a/mtce/src/maintenance/mtcNodeMsg.h b/mtce/src/maintenance/mtcNodeMsg.h index 0eb9e628..6816354c 100755 --- a/mtce/src/maintenance/mtcNodeMsg.h +++ b/mtce/src/maintenance/mtcNodeMsg.h @@ -74,15 +74,15 @@ typedef struct int mtc_agent_clstr_rx_socket_size ; /** UDP sockets used by the mtcClient to receive maintenance - * commands from and transmit replies to the mtcAgent */ - msgClassSock* mtc_client_rx_socket ; /**< rx from controller */ - msgClassSock* mtc_client_tx_socket ; /**< tx to controller mgmnt */ - msgClassSock* mtc_client_clstr_tx_socket ; /**< tx to controller clstr */ - msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */ - int mtc_cmd_port ; /**< mtc command port number */ - struct sockaddr_in mtc_cmd_addr ; /**< socket attributes mgmnt */ - - + * commands from and transmit replies to the mtcAgent */ + msgClassSock* mtc_client_rx_socket ; /**< rx from controller */ + msgClassSock* mtc_client_tx_socket ; /**< tx to controller mgmnt */ + msgClassSock* mtc_client_tx_socket_c0_clstr ; /**< tx to controller-0 clstr i/f */ + msgClassSock* mtc_client_tx_socket_c1_clstr ; /**< tx to controller-1 clstr i/f */ + msgClassSock* mtc_client_clstr_rx_socket ; /**< rx from controller clstr */ + int mtc_mgmnt_cmd_port ; /**< mtc command port mgmnt i/f */ + int mtc_clstr_cmd_port ; /**< mtc command port clstr i/f */ + struct sockaddr_in mtc_cmd_addr ; /**< socket attributes mgmnt */ /***************************************************************/