diff --git a/mtce-common/src/common/bmcUtil.cpp b/mtce-common/src/common/bmcUtil.cpp index 43bed414..964e4eca 100644 --- a/mtce-common/src/common/bmcUtil.cpp +++ b/mtce-common/src/common/bmcUtil.cpp @@ -274,9 +274,9 @@ void bmcUtil_create_pw_file ( thread_info_type * info_ptr, * *************************************************************************/ -string bmcUtil_create_data_fn ( string & hostname, - string file_suffix, - bmc_protocol_enum protocol ) +string bmcUtil_create_data_fn ( const string & hostname, + string file_suffix, + bmc_protocol_enum protocol ) { /* create the output filename */ string datafile ; diff --git a/mtce-common/src/common/bmcUtil.h b/mtce-common/src/common/bmcUtil.h index 0208b88c..8c2a351d 100644 --- a/mtce-common/src/common/bmcUtil.h +++ b/mtce-common/src/common/bmcUtil.h @@ -82,6 +82,14 @@ typedef struct } bmc_info_type ; +typedef struct +{ + string hostname; + string host_ip ; + string bm_ip ; + string bm_un ; + string bm_pw ; +} bmcUtil_accessInfo_type ; /* BMC commands */ typedef enum @@ -107,6 +115,7 @@ typedef enum #define BMC_QUERY_FILE_SUFFIX ((const char *)("_root_query")) #define BMC_INFO_FILE_SUFFIX ((const char *)("_bmc_info")) #define BMC_POWER_CMD_FILE_SUFFIX ((const char *)("_power_cmd_result")) +#define BMC_RESET_CMD_FILE_SUFFIX ((const char *)("_reset")) #define BMC_BOOTDEV_CMD_FILE_SUFFIX ((const char *)("_bootdev")) #define BMC_RESTART_CAUSE_FILE_SUFFIX ((const char *)("_restart_cause")) #define BMC_POWER_STATUS_FILE_SUFFIX ((const char *)("_power_status")) @@ -137,9 +146,9 @@ void bmcUtil_create_pw_file ( thread_info_type * info_ptr, bmc_protocol_enum protocol ); /* create the output filename */ -string bmcUtil_create_data_fn ( string & hostname, - string file_suffix, - bmc_protocol_enum protocol ); +string bmcUtil_create_data_fn ( const string & hostname, + string file_suffix, + bmc_protocol_enum protocol ); /* Get power state from query response data. */ int bmcUtil_is_power_on ( string hostname, diff --git a/mtce-common/src/common/hostUtil.cpp b/mtce-common/src/common/hostUtil.cpp index 06b15c16..5e5c4a16 100644 --- a/mtce-common/src/common/hostUtil.cpp +++ b/mtce-common/src/common/hostUtil.cpp @@ -130,6 +130,14 @@ bool hostUtil_is_valid_username ( string un ) return (false); } +bool hostUtil_is_valid_pw ( string pw ) +{ + if ( !pw.empty() ) + if ( pw.compare(NONE) ) + return (true); + return (false); +} + bool hostUtil_is_valid_mac_addr ( string mac ) { if ( !mac.empty() ) diff --git a/mtce-common/src/common/hostUtil.h b/mtce-common/src/common/hostUtil.h index 09e19b53..d9a561a8 100644 --- a/mtce-common/src/common/hostUtil.h +++ b/mtce-common/src/common/hostUtil.h @@ -46,6 +46,7 @@ string hostUtil_getPrefixPath ( void ); bool hostUtil_is_valid_uuid ( string uuid ); bool hostUtil_is_valid_ip_addr ( string ip ); bool hostUtil_is_valid_username ( string un ); +bool hostUtil_is_valid_pw ( string pw ); bool hostUtil_is_valid_bm_type ( string bm_type ); int hostUtil_mktmpfile ( string hostname, string basename, string & filename, string data ); diff --git a/mtce-common/src/common/ipmiUtil.cpp b/mtce-common/src/common/ipmiUtil.cpp index c5e03193..0679df2e 100644 --- a/mtce-common/src/common/ipmiUtil.cpp +++ b/mtce-common/src/common/ipmiUtil.cpp @@ -202,3 +202,66 @@ int ipmiUtil_bmc_info_load ( string hostname, const char * filename, bmc_info_ty ipmiUtil_bmc_info_log ( hostname, bmc_info, rc ); return (rc); } + + +int ipmiUtil_reset_host_now ( string hostname, + bmcUtil_accessInfo_type accessInfo, + string output_filename) +{ + dlog("%s %s BMC [IP:%s UN:%s]", + accessInfo.hostname.c_str(), + accessInfo.host_ip.c_str(), + accessInfo.bm_ip.c_str(), + accessInfo.bm_un.c_str()); + + if (daemon_is_file_present ( BMC_OUTPUT_DIR ) == false ) + daemon_make_dir(BMC_OUTPUT_DIR) ; + if (daemon_is_file_present ( IPMITOOL_OUTPUT_DIR ) == false ) + daemon_make_dir(IPMITOOL_OUTPUT_DIR) ; + + /* create temp password file */ + thread_info_type info ; + info.hostname = accessInfo.hostname ; + info.password_file = "" ; + info.pw_file_fd = 0 ; + + /* Use common utility to create a temp pw file */ + bmcUtil_create_pw_file ( &info, accessInfo.bm_pw, BMC_PROTOCOL__IPMITOOL ); + + /* create request */ + string request = + ipmiUtil_create_request ( IPMITOOL_POWER_RESET_CMD, + accessInfo.bm_ip, + accessInfo.bm_un, + info.password_file, + output_filename ); + + /* issue request + * + * Note: Could launch a thread to avoid any stall. + * However, mtcClient can withstand up to a 25 second stall + * before pmon will fail it due to active monitoring. + * UT showed that there is no stall at all. */ + unsigned long long latency_threshold_secs = DEFAULT_SYSTEM_REQUEST_LATENCY_SECS ; + unsigned long long before_time = gettime_monotonic_nsec () ; + int rc = system ( request.data()) ; + unsigned long long after_time = gettime_monotonic_nsec () ; + unsigned long long delta_time = after_time-before_time ; + if ( rc ) + { + wlog("system call failed ; rc:%d [%d:%s]", rc, errno, strerror(errno) ); + rc = FAIL_SYSTEM_CALL ; + } + if ( delta_time > (latency_threshold_secs*1000000000)) + { + wlog ("%s bmc system call took %2llu.%-8llu sec", hostname.c_str(), + (delta_time > NSEC_TO_SEC) ? (delta_time/NSEC_TO_SEC) : 0, + (delta_time > NSEC_TO_SEC) ? (delta_time%NSEC_TO_SEC) : 0); + } + + /* Cleanup */ + if ( info.pw_file_fd > 0 ) + close(info.pw_file_fd); + daemon_remove_file ( info.password_file.data()); + return (rc); +} diff --git a/mtce-common/src/common/ipmiUtil.h b/mtce-common/src/common/ipmiUtil.h index 7cc9edbc..39e8b9fd 100644 --- a/mtce-common/src/common/ipmiUtil.h +++ b/mtce-common/src/common/ipmiUtil.h @@ -57,6 +57,8 @@ int ipmiUtil_init ( void ); int ipmiUtil_bmc_info_load ( string hostname, const char * filename, bmc_info_type & mc_info ); +int ipmiUtil_reset_host_now ( string hostname, bmcUtil_accessInfo_type accessInfo, string output_filename ); + /* Create the ipmi request */ string ipmiUtil_create_request ( string cmd, string & ip, string & un, string & pw, string & out ); diff --git a/mtce-common/src/common/nodeBase.cpp b/mtce-common/src/common/nodeBase.cpp index 3c7c482a..e1a0572a 100755 --- a/mtce-common/src/common/nodeBase.cpp +++ b/mtce-common/src/common/nodeBase.cpp @@ -149,6 +149,8 @@ const char * get_mtcNodeCommand_str ( int cmd ) case MTC_REQ_MTCALIVE: return ("mtcAlive req"); case MTC_MSG_LOCKED: return ("locked msg"); case MTC_CMD_LAZY_REBOOT: return ("lazy reboot"); + case MTC_MSG_INFO: return ("info msg"); + case MTC_CMD_SYNC: return ("sync"); /* goenabled commands and messages */ case MTC_MSG_MAIN_GOENABLED: return ("goEnabled main msg"); @@ -199,7 +201,8 @@ const char * get_mtcNodeCommand_str ( int cmd ) case MTC_EVENT_PMON_MAJOR: return("pmon major event"); case MTC_EVENT_PMON_MINOR: return("pmon minor event"); case MTC_EVENT_PMON_LOG: return("pmon log"); - case MTC_EVENT_PMOND_RAISE: return("pmon raise"); + case MTC_EVENT_PMOND_RAISE: return("pmond raise"); + case MTC_EVENT_PMOND_CLEAR: return("pmond clear"); /* data port events */ case MTC_EVENT_AVS_CLEAR: return("AVS clear"); diff --git a/mtce-common/src/common/nodeBase.h b/mtce-common/src/common/nodeBase.h index 0603c0ce..bf7abb8e 100755 --- a/mtce-common/src/common/nodeBase.h +++ b/mtce-common/src/common/nodeBase.h @@ -751,7 +751,9 @@ typedef struct #define MTC_CMD_START_STORAGE_SVCS 19 /* to host */ #define MTC_CMD_LAZY_REBOOT 20 /* to host */ #define MTC_CMD_HOST_SVCS_RESULT 21 /* to host */ -#define MTC_CMD_LAST 22 +#define MTC_MSG_INFO 22 /* to host */ +#define MTC_CMD_SYNC 23 /* to host */ +#define MTC_CMD_LAST 24 #define RESET_PROG_MAX_REBOOTS_B4_RESET (5) #define RESET_PROG_MAX_REBOOTS_B4_RETRY (RESET_PROG_MAX_REBOOTS_B4_RESET+2) @@ -1263,6 +1265,14 @@ typedef enum MTC_AR_DISABLE_CAUSE__NONE, } autorecovery_disable_cause_enum ; +/* code that represents a specific group of maintenance information + * ... typically for a specific feature */ +typedef enum +{ + MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO, + MTC_INFO_CODE__LAST +} mtcInfo_enum ; + /* Service Based Auto Recovery Control Structure */ typedef struct { diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp index ae43fe64..f22a3539 100755 --- a/mtce/src/common/nodeClass.cpp +++ b/mtce/src/common/nodeClass.cpp @@ -3295,6 +3295,102 @@ void nodeLinkClass::mtcInfo_log ( struct nodeLinkClass::node * node_ptr ) } } +/*************************************************************************** + * + * Name : build_mtcInfo_dict + * + * Purpose : Build a json dictionary for the specified info code enum + * + * Assumptions : Only MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO is supported + * + * Returns : Returns a json dictionary of mtcInfo. + * + * { + * "controller-0":{ + * "ip":"192.168.204.2", + * "bm_ip":"xxx.xxx.xx.23", + * "bm_un":"root", + * "bm_pw":"root" + * }, + * "controller-1":{ + * "ip":"192.168.204.3", + * "bm_ip":"xxx.xxx.xx.24", + * "bm_un":"root", + * "bm_pw":"root" + * } + * } + * + **************************************************************************/ + +string nodeLinkClass::build_mtcInfo_dict ( mtcInfo_enum mtcInfo_code ) +{ + string mtcInfo_dict = "" ; + + /* loop/exit control */ + int temp = 0 ; + + /* should never happen but better to be safe */ + if ( head == NULL ) + return mtcInfo_dict ; + + /* force the update to be a dictionary */ + mtcInfo_dict = "{" ; + + for ( struct node * ptr = head ; ; ptr = ptr->next ) + { + if (( ptr->nodetype & CONTROLLER_TYPE ) && + ( mtcInfo_code == MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO )) + { + if ( temp ) + mtcInfo_dict.append(","); + mtcInfo_dict.append("\"" + ptr->hostname + "\":{"); + mtcInfo_dict.append("\"mgmt_ip\":\"" + ptr->ip + "\","); + mtcInfo_dict.append("\"bm_ip\":\"" + ptr->bm_ip + "\","); + mtcInfo_dict.append("\"bm_un\":\"" + ptr->bm_un + "\","); + mtcInfo_dict.append("\"bm_pw\":\"" + ptr->bm_pw + "\"}"); + if ( ++temp >= 2 ) + break ; + } + if (( ptr->next == NULL ) || ( ptr == tail )) + break ; + } + mtcInfo_dict.append("}"); + return mtcInfo_dict ; +} + +/************************************************************************** + * + * Name : mtcInfo_handler + * + * Purpose : Send mtcInfo update to provisioned controllers when + * the push flag is set. + * + **************************************************************************/ + +void nodeLinkClass::mtcInfo_handler ( void ) +{ + /* This is set in the bm_handler once access to the BMC using + * provisioned credentials have been verified. */ + if ( this->want_mtcInfo_push ) + { + /* handler will enhance when more codes are introduced */ + mtcInfo_enum mtcInfo_code = MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO ; + + string mtcInfo_dict = build_mtcInfo_dict(mtcInfo_code); + if ( ! mtcInfo_dict.empty() ) + { + string temp = CONTROLLER_0 ; + send_mtc_cmd ( temp, MTC_MSG_INFO, MGMNT_INTERFACE, mtcInfo_dict); + if ( this->controllers > 1 ) + { + temp = CONTROLLER_1; + send_mtc_cmd ( temp, MTC_MSG_INFO, MGMNT_INTERFACE, mtcInfo_dict); + } + } + this->want_mtcInfo_push = false ; + } +} + /* Lock Rules * * 1. Cannot lock this controller @@ -4422,6 +4518,18 @@ string nodeLinkClass::get_bm_ip ( string hostname ) return (""); } +string nodeLinkClass::get_bm_pw ( string hostname ) +{ + nodeLinkClass::node* node_ptr ; + node_ptr = nodeLinkClass::getNode ( hostname ); + if ( node_ptr != NULL ) + { + return (node_ptr->bm_pw); + } + elog ("%s bm pw lookup failed\n", hostname.c_str() ); + return (""); +} + string nodeLinkClass::get_bm_un ( string hostname ) { nodeLinkClass::node* node_ptr ; diff --git a/mtce/src/common/nodeClass.h b/mtce/src/common/nodeClass.h index 42ca79d6..9b667316 100755 --- a/mtce/src/common/nodeClass.h +++ b/mtce/src/common/nodeClass.h @@ -828,10 +828,13 @@ private: int oos_test_handler ( struct nodeLinkClass::node * node_ptr ); int insv_test_handler ( struct nodeLinkClass::node * node_ptr ); int stress_handler ( struct nodeLinkClass::node * node_ptr ); - int bmc_handler ( struct nodeLinkClass::node * node_ptr ); + int bmc_handler ( struct nodeLinkClass::node * node_ptr ); int degrade_handler ( struct nodeLinkClass::node * node_ptr ); + int uptime_handler ( void ); + void mtcInfo_handler ( void ); + int host_services_handler ( struct nodeLinkClass::node * node_ptr ); /* Starts the specified 'reset or powercycle' recovery monitor */ @@ -851,13 +854,22 @@ private: void ctl_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr, bool gate_state ); void set_mtcAlive ( struct nodeLinkClass::node * node_ptr, int interface ); + /********* mtcInfo in the database ************/ int mtcInfo_set ( struct nodeLinkClass::node * node_ptr, string key, string value ); string mtcInfo_get ( struct nodeLinkClass::node * node_ptr, string key ); void mtcInfo_clr ( struct nodeLinkClass::node * node_ptr, string key ); void mtcInfo_log ( struct nodeLinkClass::node * node_ptr ); - int set_mtcInfo ( struct nodeLinkClass::node * node_ptr, string & mtc_info ); + /********* mtcInfo that gets puished out to daemons ***********/ + + + /* flag telling mtce when a mtcInfo push needs to be done */ + bool want_mtcInfo_push = false ; + + /* performs the mtcInfo push */ + void push_mtcInfo ( void ); + /***************************************************************************** * * Name : bmc_command_send @@ -1192,11 +1204,11 @@ private: * Set to true when the autorecovery threshold is reached * and we want to avoid taking further autorecovery action * even though it may be requested. */ - bool autorecovery_disabled ; + bool autorecovery_disabled = false ; /* Set to true by fault detection methods that are * autorecoverable when in simplex mode. */ - bool autorecovery_enabled ; + bool autorecovery_enabled = false ; /** Tracks the number of hosts that 'are currently' in service trouble * wrt heartbeat (above minor threshold). @@ -1464,11 +1476,14 @@ public: /***********************************************************/ + /** Number of provisioned controllers */ + int controllers = 0 ; + /** Number of provisioned hosts (nodes) */ - int hosts ; + int hosts = 0 ; /* Set to True while waiting for UNLOCK_READY_FILE in simplex mode */ - bool unlock_ready_wait ; + bool unlock_ready_wait = false ; /** Host has been deleted */ bool host_deleted ; @@ -1517,6 +1532,9 @@ public: /** Return the number of inventoried hosts */ int num_hosts ( void ); + /** Return the number of inventoried controllers */ + int num_controllers ( void ); + /** ********************************************************************** * * Name : nodeLinkClass::workQueue_enqueue @@ -1664,6 +1682,9 @@ public: /* Clear heartbeat failed flag for all interfaces */ void manage_heartbeat_clear ( string hostname, iface_enum iface ); + /* Build a json dictionary of containing code specified maintenance info */ + string build_mtcInfo_dict ( mtcInfo_enum mtcInfo_code ); + /** Test and Debug Members and Variables */ /** Print node info banner */ @@ -1789,6 +1810,7 @@ public: string get_bm_ip ( string hostname ); string get_bm_un ( string hostname ); + string get_bm_pw ( string hostname ); string get_bm_type ( string hostname ); string get_hostname_from_bm_ip ( string bm_ip ); diff --git a/mtce/src/heartbeat/Makefile b/mtce/src/heartbeat/Makefile index a625f20b..a0fdb8da 100755 --- a/mtce/src/heartbeat/Makefile +++ b/mtce/src/heartbeat/Makefile @@ -13,7 +13,7 @@ LDLIBS = -lstdc++ -ldaemon -lcommon -lthreadUtil -lpthread -lfmcommon -lalarm -l INCLUDES = -I. -I/usr/include/mtce-daemon -I/usr/include/mtce-common INCLUDES += -I../common -I../alarm -I../maintenance -I../public -CCFLAGS = -g -O2 -Wall -Wextra -Werror +CCFLAGS = -g -O2 -Wall -Wextra -Werror -std=c++11 STATIC_ANALYSIS_TOOL = cppcheck STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) diff --git a/mtce/src/heartbeat/hbsStubs.cpp b/mtce/src/heartbeat/hbsStubs.cpp index 81326d17..474a7221 100644 --- a/mtce/src/heartbeat/hbsStubs.cpp +++ b/mtce/src/heartbeat/hbsStubs.cpp @@ -279,8 +279,14 @@ void nodeLinkClass::mnfa_enter ( void ) void nodeLinkClass::mnfa_exit ( bool force ) { force = force ; } -int send_mtc_cmd ( string & hostname, int cmd, int interface ) -{ UNUSED(hostname); UNUSED(cmd); UNUSED(interface); return PASS ; } +int send_mtc_cmd ( string & hostname, int cmd, int interface, string json_dict) +{ + UNUSED(hostname); + UNUSED(cmd); + UNUSED(interface); + UNUSED(json_dict); + return PASS ; +} int nodeLinkClass::mtcInvApi_subf_states ( string hostname, string oper_subf, diff --git a/mtce/src/maintenance/Makefile b/mtce/src/maintenance/Makefile index 7d11f6ac..767ffefe 100755 --- a/mtce/src/maintenance/Makefile +++ b/mtce/src/maintenance/Makefile @@ -54,7 +54,7 @@ BINS = mtcAgent mtcClient LDLIBS += -lstdc++ -ldaemon -lcommon -lthreadUtil -lbmcUtils -lfmcommon -lalarm -lpthread -lrt -levent -ljson-c -lamon -lcrypto -luuid INCLUDES = -I. -I/usr/include/mtce-daemon -I/usr/include/mtce-common INCLUDES += -I../common -I../alarm -I../heartbeat -I../hwmon -I../public -CCFLAGS += -g -O2 -Wall -Wextra -Werror -Wno-missing-braces +CCFLAGS += -g -O2 -Wall -Wextra -Werror -Wno-missing-braces -std=c++11 STATIC_ANALYSIS_TOOL = cppcheck STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0) diff --git a/mtce/src/maintenance/mtcCompMsg.cpp b/mtce/src/maintenance/mtcCompMsg.cpp index d3793553..a3076338 100755 --- a/mtce/src/maintenance/mtcCompMsg.cpp +++ b/mtce/src/maintenance/mtcCompMsg.cpp @@ -20,7 +20,7 @@ #include #include -#include /* for ... unix domain sockets */ +#include /* for ... unix domain sockets */ #include #include #include @@ -29,8 +29,8 @@ #include #include #include -#include /* for the list of conf file names */ - +#include /* for ... list of conf file names */ +#include /* for ... sync */ using namespace std; @@ -204,6 +204,24 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) mlog1 ("mtcAlive request received (%s network)\n", interface_name.c_str()); return ( send_mtcAlive_msg ( sock_ptr, get_who_i_am(), interface )); } + else if ( msg.cmd == MTC_MSG_INFO ) + { + mlog1("mtc 'info' message received (%s network)\n", interface_name.c_str()); + load_mtcInfo_msg ( msg ); + return ( PASS ); /* no ack for this message */ + } + else if ( msg.cmd == MTC_CMD_SYNC ) + { + ilog ("mtc '%s' message received (%s network)\n", + get_mtcNodeCommand_str(msg.cmd), + interface_name.c_str()); + + ilog ("Sync Start"); + sync (); + ilog ("Sync Done"); + + return ( PASS ); /* no ack for this message */ + } else if ( msg.cmd == MTC_MSG_LOCKED ) { /* Only recreate the file if its not already present */ @@ -603,7 +621,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) } /** Send an event to the mtcAgent **/ -int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_name_ptr ) +int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char * mtce_name_ptr ) { mtc_message_type event ; @@ -619,6 +637,24 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_na /* We don't use the buffer for mtce events to remove it from the size */ bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); } + else if ( cmd == MTC_EVENT_MONITOR_READY ) + { + string event_info = "{\"" ; + event_info.append(MTC_JSON_INV_NAME); + event_info.append("\":\""); + event_info.append(get_hostname()); + event_info.append("\",\""); + event_info.append(MTC_JSON_SERVICE); + event_info.append("\":\""); + event_info.append(MTC_SERVICE_MTCCLIENT_NAME ); + event_info.append("\"}"); + + size_t len = event_info.length()+1 ; + snprintf ( &event.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header()); + snprintf ( &event.buf[0], len, "%s", event_info.data()); + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len)); + ilog ("%s %s ready", get_hostname().c_str(), MTC_SERVICE_MTCCLIENT_NAME); + } else if (( cmd == MTC_EVENT_AVS_CLEAR ) || ( cmd == MTC_EVENT_AVS_MAJOR ) || ( cmd == MTC_EVENT_AVS_CRITICAL )) @@ -666,7 +702,7 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_na { if ( bytes == 0 ) { - slog ("message send failed ; message size=0 for cmd:%d is 0\n", event.cmd ); + slog ("message send failed ; message size=0 for cmd:0x%x is 0\n", event.cmd ); rc = FAIL_NO_DATA ; } else if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes ) @@ -933,32 +969,59 @@ int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interfa return (PASS) ; } -/* Accelerated Virtual Switch 'events' socket - * - for receiving data port state change event - * Event strings are - * - * {"type":"port-state", "severity":"critical|major|clear"} - * - * type:port-state - the provider network data port status has changed to the supplied fault severity - * - * severity: - * critical - port has failed and is not part of an aggregate or is the last port in an aggregate (degrade, disable services) - * major - port has failed and is part of an aggregate with other inservice-ports (degrade only) - * clear - port has recovered from a failed state and is operational (clear degrade, enable services) - * - * NOTE: The port status can transition from any of the above states to any other state. - * - * The neutron agent monitors the vswitch ports at a 2 second interval. - * If a port changes link state during the polling period, it will - * raise/clear the alarm, but now also calculates the impact of that port - * failure on the provider network data interface. - * - * The overall aggregated state across all provider network interfaces will - * be reported to maintenance when ports enter a link down or up state. - * The agent will also periodically send the current provider network port - * status to maintenance every 30 seconds. - * - */ +int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, string address, int port) +{ + mtc_message_type msg ; + int bytes = 0 ; + MEMSET_ZERO (msg); + snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header()); + msg.cmd = cmd ; + + switch ( cmd ) + { + case MTC_CMD_SYNC: + { + ilog ("Sending '%s' command to %s:%s:%d", + get_mtcNodeCommand_str(cmd), + hostname.c_str(), + address.c_str(), port); + + msg.num = 0 ; + + /* buffer not used in this message */ + bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); + + break ; + } + default: + { + slog("Unsupported command ; %s:%d", get_mtcNodeCommand_str(cmd), cmd ); + return (FAIL_BAD_CASE); + } + } + int rc = FAIL ; + + /* Send to controller floating address */ + if (( sock_ptr->mtc_client_tx_socket ) && + ( sock_ptr->mtc_client_tx_socket->sock_ok() == true )) + { + print_mtc_message ( hostname, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false ); + rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes, address.data(), port ) ; + if ( 0 >= rc ) + { + elog("failed to send command to mtcClient (%d) (%d:%s)", rc, errno, strerror(errno)); + rc = FAIL_SOCKET_SENDTO ; + } + else + rc = PASS ; + } + else + { + elog("mtc_client_tx_socket not ok"); + rc = FAIL_BAD_STATE ; + } + return (rc) ; +} int mtcCompMsg_testhead ( void ) { diff --git a/mtce/src/maintenance/mtcCtrlMsg.cpp b/mtce/src/maintenance/mtcCtrlMsg.cpp index 6a820ed1..8f21aa6a 100755 --- a/mtce/src/maintenance/mtcCtrlMsg.cpp +++ b/mtce/src/maintenance/mtcCtrlMsg.cpp @@ -443,6 +443,34 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, obj_ptr->declare_service_ready ( hostname, MTC_SERVICE_HEARTBEAT ); return (PASS); } + else if ( service == MTC_SERVICE_MTCCLIENT_NAME ) + { + ilog ("%s %s ready", hostname.c_str(), MTC_SERVICE_MTCCLIENT_NAME); + + /* if this ready event is from the mtcClient of a + * controller that has valid bmc access info then + * build the 'peer controller kill' mtcInfo and + * send it to that mtcClient */ + if ( obj_ptr->get_nodetype ( hostname ) & CONTROLLER_TYPE ) + { + string bm_pw = obj_ptr->get_bm_pw ( hostname ) ; + if ( !bm_pw.empty() && ( bm_pw != NONE )) + { + string bm_un = obj_ptr->get_bm_un ( hostname ) ; + string bm_ip = obj_ptr->get_bm_ip ( hostname ) ; + if (( hostUtil_is_valid_username ( bm_un )) && + ( hostUtil_is_valid_ip_addr ( bm_ip ))) + { + send_mtc_cmd ( hostname, + MTC_MSG_INFO, + MGMNT_INTERFACE, + obj_ptr->build_mtcInfo_dict ( + MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO)); + } + } + } + return (PASS); + } if ( service == MTC_SERVICE_HWMOND_NAME ) { std::list::iterator temp ; @@ -578,11 +606,12 @@ int mtc_service_inbox ( nodeLinkClass * obj_ptr, return (rc); } -int send_mtc_cmd ( string & hostname, int cmd , int interface ) +int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict ) { int rc = FAIL ; bool force = false ; mtc_message_type mtc_cmd ; + string data = "" ; mtc_socket_type * sock_ptr = get_sockPtr (); memset (&mtc_cmd,0,sizeof(mtc_message_type)); @@ -592,6 +621,16 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface ) switch ( cmd ) { + case MTC_MSG_INFO: + { + snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s" , get_cmd_req_msg_header() ); + mtc_cmd.cmd = cmd ; + mtc_cmd.num = 0 ; + data = "{\"mtcInfo\":" + json_dict + "}"; + ilog("%s mtc info update", hostname.c_str()); + rc = PASS ; + break ; + } case MTC_REQ_MTCALIVE: { snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s" , get_cmd_req_msg_header() ); @@ -689,11 +728,20 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface ) * Note: the minus 1 is to overwrite the null */ snprintf ( &mtc_cmd.hdr[MSG_HEADER_SIZE-1], MSG_HEADER_SIZE, "%s", obj_ptr->get_hostIfaceMac(hostname, MGMNT_IFACE).data()); - string data = "{\"address\":\""; - data.append(obj_ptr->my_float_ip) ; - data.append("\",\"interface\":\""); - data.append(get_iface_name_str(interface)); - data.append("\"}"); + /* If data is empty then at least add where the message came from */ + if ( data.empty() ) + { + data = "{\"address\":\""; + data.append(obj_ptr->my_float_ip) ; + data.append("\",\"interface\":\""); + data.append(get_iface_name_str(interface)); + data.append("\"}"); + } + else + { + ; /* data is already pre loaded by the command case above */ + } + /* copy data into message buffer */ snprintf ( &mtc_cmd.buf[0], data.length()+1, "%s", data.data()); bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(data.length()+1))); diff --git a/mtce/src/maintenance/mtcNodeComp.cpp b/mtce/src/maintenance/mtcNodeComp.cpp index 6e58c7fe..c9a65708 100644 --- a/mtce/src/maintenance/mtcNodeComp.cpp +++ b/mtce/src/maintenance/mtcNodeComp.cpp @@ -43,9 +43,9 @@ #include #include #include -//#include /* for ... syslog */ #include #include +#include /* for ... json_tokener_parse */ using namespace std; @@ -56,6 +56,10 @@ using namespace std; #include "nodeBase.h" /* for ... Common Definitions */ #include "nodeTimers.h" /* fpr ... Timer Service */ #include "nodeUtil.h" /* for ... Common Utilities */ +#include "hostUtil.h" /* for ... hostUtil_is_valid_... */ +#include "jsonUtil.h" /* for ... jsonUtil_get_key_value_string */ +#include "bmcUtil.h" /* for ... bmcUtil_accessInfo_type */ +#include "ipmiUtil.h" /* for ... ipmiUtil_reset_host_now */ #include "nodeMacro.h" /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */ #include "mtcNodeMsg.h" /* for ... common maintenance messaging */ #include "mtcNodeComp.h" /* for ... this module header */ @@ -96,7 +100,7 @@ string get_hostname ( void ) * Daemon Configuration Structure - The allocated struct * @see daemon_common.h for daemon_config_type struct format. */ -static daemon_config_type mtc_config ; +static daemon_config_type mtc_config ; daemon_config_type * daemon_get_cfg_ptr () { return &mtc_config ; } /** @@ -106,6 +110,8 @@ daemon_config_type * daemon_get_cfg_ptr () { return &mtc_config ; } static mtc_socket_type mtc_sock ; static mtc_socket_type * sock_ptr ; +static bmcUtil_accessInfo_type peer_controller = {"none","none","none","none","none"}; +static bmcUtil_accessInfo_type this_controller = {"none","none","none","none","none"}; int run_goenabled_scripts ( string type ); @@ -138,6 +144,16 @@ void timer_handler ( int sig, siginfo_t *si, void *uc) mtcTimer_stop_int_safe ( ctrl.hostservices.timer ); ctrl.hostservices.timer.ring = true ; } + else if ( *tid_ptr == ctrl.peer_ctrlr_reset.sync_timer.tid ) + { + ctrl.peer_ctrlr_reset.sync_timer.ring = true ; + mtcTimer_stop_int_safe ( ctrl.peer_ctrlr_reset.sync_timer ); + } + else if ( *tid_ptr == ctrl.peer_ctrlr_reset.audit_timer.tid ) + { + /* use auto restart */ + ctrl.peer_ctrlr_reset.audit_timer.ring = true ; + } else { mtcTimer_stop_tid_int_safe ( tid_ptr ); @@ -207,9 +223,8 @@ void daemon_exit ( void ) exit (0) ; } - /* Startup config read */ -static int mtc_config_handler ( void * user, +static int mtc_config_handler ( void * user, const char * section, const char * name, const char * value) @@ -236,11 +251,14 @@ static int mtc_config_handler ( void * user, config_ptr->failsafe_shutdown_delay = atoi(value); ilog ("Shutdown TO : %d secs\n", config_ptr->failsafe_shutdown_delay ); } - else + if (( ctrl.nodetype & CONTROLLER_TYPE ) && + (MATCH("client", "sync_b4_peer_ctrlr_reset"))) { - return (PASS); + ctrl.peer_ctrlr_reset.sync = atoi(value); + ilog("SyncB4 Reset: %s", + ctrl.peer_ctrlr_reset.sync ? "Yes" : "No" ); } - return (FAIL); + return (PASS); } /* Read the mtc.ini file and load control */ @@ -946,6 +964,65 @@ void _manage_goenabled_tests ( void ) _scripts_cleanup (ctrl.active_script_set) ; } +int issue_reset_and_cleanup ( void ) +{ + int rc = FAIL ; + const char peer_ctrlr [] = "Peer controller reset" ; + + ilog("SM %s request", peer_ctrlr ); + /* check creds */ + if (( hostUtil_is_valid_ip_addr ( peer_controller.bm_ip ) == false ) || + ( hostUtil_is_valid_username ( peer_controller.bm_un ) == false ) || + ( hostUtil_is_valid_pw ( peer_controller.bm_pw ) == false )) + { + elog("%s cannot reset peer BMC host at %s due to invalid credentials", + ctrl.hostname, peer_controller.bm_ip.c_str()); + return (rc); + } + + /* create output filename - no need to delete after operation */ + string output_filename = bmcUtil_create_data_fn ( ctrl.hostname, + BMC_RESET_CMD_FILE_SUFFIX, + BMC_PROTOCOL__IPMITOOL ); + if ( output_filename.empty() ) + { + elog("%s ; failed to create output filename", peer_ctrlr); + rc = FAIL_STRING_EMPTY ; + } + else if ( ipmiUtil_reset_host_now ( ctrl.hostname, + peer_controller, + output_filename ) == PASS ) + { + string result = daemon_get_file_str ( output_filename.data() ); + ilog("%s succeeded", peer_ctrlr); + + /* don't fail the operation if the result is unexpected ; but log it */ + if ( result.compare( IPMITOOL_POWER_RESET_RESP ) ) + { + dlog("... but reset command output was unexpected ; %s", + result.c_str()); + } + rc = PASS ; + } + else + { + elog("%s failed", peer_ctrlr); + rc = FAIL_OPERATION ; + } + + if ( rc == PASS ) + { + /* give the host a chance to reset before + * telling SM the reset is done */ + sleep (2) ; + + /* Don't want to remove the file if the reset was not successful */ + dlog("removing %s", RESET_PEER_NOW ); + daemon_remove_file ( RESET_PEER_NOW ); + } + return (rc); +} + /* The main service loop */ int daemon_init ( string iface, string nodetype_str ) @@ -963,6 +1040,7 @@ int daemon_init ( string iface, string nodetype_str ) ctrl.subfunction = 0 ; ctrl.system_type = daemon_system_type (); ctrl.clstr_iface_provisioned = false ; + ctrl.peer_ctrlr_reset.sync = false ; /* convert node type to integer */ ctrl.nodetype = get_host_function_mask ( nodetype_str ) ; @@ -1018,6 +1096,13 @@ int daemon_init ( string iface, string nodetype_str ) mtcTimer_init ( ctrl.goenabled.timer, &ctrl.hostname[0], "goenable timer" ); mtcTimer_init ( ctrl.hostservices.timer, &ctrl.hostname[0], "host services timer" ); + /* initialize peer controller reset feature */ + mtcTimer_init ( ctrl.peer_ctrlr_reset.audit_timer, &ctrl.hostname[0], "peer ctrlr reset audit timer" ), + mtcTimer_init ( ctrl.peer_ctrlr_reset.sync_timer, &ctrl.hostname[0], "peer ctrlr reset sync timer" ), + ctrl.peer_ctrlr_reset.sync_timer.ring = false ; + ctrl.peer_ctrlr_reset.audit_timer.ring = false ; + ctrl.peer_ctrlr_reset.audit_period = PEER_CTRLR_AUDIT_PERIOD ; + /* initialize the script group control structures */ script_ctrl_init ( &ctrl.goenabled ); script_ctrl_init ( &ctrl.hostservices ); @@ -1073,6 +1158,17 @@ void daemon_service_run ( void ) /* Send first mtcAlive ASAP */ mtcTimer_start ( ctrl.timer, timer_handler, 1 ); + /* Monitor for peer controller reset requests when this + * daemon runs on a controller */ + if ( ctrl.nodetype & CONTROLLER_TYPE ) + { + mtcTimer_start ( ctrl.peer_ctrlr_reset.audit_timer, + timer_handler, + ctrl.peer_ctrlr_reset.audit_period ); + } + + mtce_send_event ( sock_ptr, MTC_EVENT_MONITOR_READY, NULL ); + /* lets go select so that the sock does not go crazy */ dlog ("%s running main loop with %d msecs socket timeout\n", &ctrl.hostname[0], (SOCKET_WAIT/1000) ); @@ -1384,7 +1480,51 @@ void daemon_service_run ( void ) } } } - + /* service controller specific audits */ + if ( ctrl.nodetype & CONTROLLER_TYPE ) + { + /* peer controller reset service audit */ + if ( ctrl.peer_ctrlr_reset.audit_timer.ring ) + { + if ( daemon_is_file_present ( RESET_PEER_NOW ) ) + { + if ( ctrl.peer_ctrlr_reset.sync ) + { + if ( ctrl.peer_ctrlr_reset.sync_timer.ring ) + { + issue_reset_and_cleanup (); + ctrl.peer_ctrlr_reset.sync_timer.ring = false ; + } + else if ( ctrl.peer_ctrlr_reset.sync_timer.tid == NULL ) + { + if ( send_mtcClient_cmd ( &mtc_sock, + MTC_CMD_SYNC, + peer_controller.hostname, + peer_controller.host_ip, + mtc_config.mtc_rx_mgmnt_port) == PASS ) + { + mtcTimer_start ( ctrl.peer_ctrlr_reset.sync_timer, timer_handler, MTC_SECS_10 ); + ilog("... waiting for peer controller to sync - %d secs", MTC_SECS_10); + } + else + { + elog("failed to send 'sync' command to peer controller mtcClient"); + ctrl.peer_ctrlr_reset.sync_timer.ring = true ; + } + } + else + { + ; /* wait longer */ + } + } + else + { + issue_reset_and_cleanup (); + } + } + ctrl.peer_ctrlr_reset.audit_timer.ring = false ; + } + } daemon_signal_hdlr (); } daemon_exit(); @@ -1750,7 +1890,6 @@ void daemon_sigchld_hdlr ( void ) } default: { - wlog ("child handler running with no active script set (%d)\n", ctrl.active_script_set ); return ; } } @@ -1820,6 +1959,84 @@ void daemon_sigchld_hdlr ( void ) } } +/*************************************************************************** + * + * Name : load_mtcInfo_msg + * + * Description: Extract the mtc info from the MTC_MSG_INFO message. + * + * Assumptions: So far only the peer controller reset feature uses this. + * + * Returns : Nothing + * + ***************************************************************************/ + +void load_mtcInfo_msg ( mtc_message_type & msg ) +{ + if ( ctrl.nodetype & CONTROLLER_TYPE ) + { + mlog1("%s", &msg.buf[0]); + struct json_object *_obj = json_tokener_parse( &msg.buf[0] ); + if ( _obj ) + { + if ( strcmp(&ctrl.hostname[0], CONTROLLER_0 )) + peer_controller.hostname = CONTROLLER_0 ; + else + peer_controller.hostname = CONTROLLER_1 ; + + struct json_object *info_obj = (struct json_object *)(NULL); + json_bool json_rc = json_object_object_get_ex( _obj, + "mtcInfo", + &info_obj ); + if ( ( json_rc == TRUE ) && ( info_obj )) + { + struct json_object *ctrl_obj = (struct json_object *)(NULL); + json_bool json_rc = + json_object_object_get_ex( info_obj, + peer_controller.hostname.data(), + &ctrl_obj ); + + if (( json_rc == TRUE ) && ( ctrl_obj )) + { + peer_controller.host_ip = jsonUtil_get_key_value_string(ctrl_obj, MTC_JSON_INV_HOSTIP) ; + peer_controller.bm_ip = jsonUtil_get_key_value_string(ctrl_obj, MTC_JSON_INV_BMIP) ; + peer_controller.bm_un = jsonUtil_get_key_value_string(ctrl_obj, "bm_un"); + peer_controller.bm_pw = jsonUtil_get_key_value_string(ctrl_obj, "bm_pw"); + + /* log the mc info but not the bmc password ; only + * indicate that it looks 'ok' or 'is 'none' */ + ilog ("%s is my peer [host:%s bmc:%s:%s:%s]", + peer_controller.hostname.c_str(), + peer_controller.host_ip.c_str(), + peer_controller.bm_ip.c_str(), + peer_controller.bm_un.c_str(), + hostUtil_is_valid_pw(peer_controller.bm_pw) ? "ok":"none"); + } + else + { + wlog("peer mtcInfo missing (rc:%d) ; %s", + json_rc, &msg.buf[0]); + } + } + else + { + wlog("mtcInfo label parse error (rc:%d) ; %s", + json_rc, &msg.buf[0]); + } + json_object_put(_obj); + } + else + { + wlog("message buffer tokenize error ; %s", &msg.buf[0]); + } + } + else + { + slog("%s got mtcInfo ; unexpected for this nodetype", ctrl.hostname); + } +} + + /* Push daemon state to log file */ void daemon_dump_info ( void ) { @@ -1853,13 +2070,13 @@ int daemon_run_testhead ( void ) * STAGE 1: some test ************************************************/ printf ( "| Test %d : Maintenance Service Test ............. ", stage ); - if ( rc != PASS ) + if ( rc != PASS ) { FAILED_STR ; rc = FAIL ; } else - PASSED ; + PASSED ; printf ("+---------------------------------------------------------+\n"); return PASS ; diff --git a/mtce/src/maintenance/mtcNodeComp.h b/mtce/src/maintenance/mtcNodeComp.h index 612144f8..190500c6 100644 --- a/mtce/src/maintenance/mtcNodeComp.h +++ b/mtce/src/maintenance/mtcNodeComp.h @@ -17,6 +17,10 @@ #include #include +using namespace std; + +#include "nodeTimers.h" /* for ... Timer Service */ + /** Compute Config mask */ #define CONFIG_CLIENT_MASK (CONFIG_AGENT_MTC_MGMNT_PORT |\ CONFIG_CLIENT_MTC_MGMNT_PORT |\ @@ -59,6 +63,22 @@ typedef struct } script_ctrl_type ; void script_ctrl_init ( script_ctrl_type * script_ctrl_ptr ); +/* peer controller reset control structure and associated definitions */ + +/* This is a flag file set by SM when SM wants maintanence to perform a + * BMC reset of the other (peer) controller */ +#define RESET_PEER_NOW "/var/run/.sm_reset_peer" + +#define PEER_CTRLR_AUDIT_PERIOD (2) +typedef struct +{ + struct + mtc_timer sync_timer ; + mtc_timer audit_timer ; + int audit_period ; + bool sync ; +} peer_ctrlr_reset_type ; + typedef struct { char hostname [MAX_HOST_NAME_SIZE+1]; @@ -76,7 +96,7 @@ typedef struct unsigned int function ; unsigned int subfunction ; - struct mtc_timer timer ; /* mtcAlive timer */ + struct mtc_timer timer ; /* mtcAlive timer */ bool clstr_iface_provisioned ; @@ -102,6 +122,7 @@ typedef struct /* Where to send events */ string mtcAgent_ip ; + peer_ctrlr_reset_type peer_ctrlr_reset; } ctrl_type ; ctrl_type * get_ctrl_ptr ( void ); @@ -109,5 +130,6 @@ ctrl_type * get_ctrl_ptr ( void ); bool is_subfunction_worker ( void ); int run_goenabled_scripts ( mtc_socket_type * sock_ptr , string requestor ); int run_hostservices_scripts ( unsigned int cmd ); +void load_mtcInfo_msg ( mtc_message_type & msg ); #endif diff --git a/mtce/src/maintenance/mtcNodeCtrl.cpp b/mtce/src/maintenance/mtcNodeCtrl.cpp index 6732ca88..152217c6 100644 --- a/mtce/src/maintenance/mtcNodeCtrl.cpp +++ b/mtce/src/maintenance/mtcNodeCtrl.cpp @@ -1326,6 +1326,7 @@ void nodeLinkClass::fsm ( void ) daemon_signal_hdlr (); mtcHttpSvr_look ( mtce_event ); } + mtcInv.mtcInfo_handler(); } } diff --git a/mtce/src/maintenance/mtcNodeHdlrs.cpp b/mtce/src/maintenance/mtcNodeHdlrs.cpp index de5ae2a4..b898c375 100755 --- a/mtce/src/maintenance/mtcNodeHdlrs.cpp +++ b/mtce/src/maintenance/mtcNodeHdlrs.cpp @@ -6166,6 +6166,8 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr ) if ( is_controller(node_ptr) ) { + this->controllers++ ; + mtc_cmd_enum state = CONTROLLER_DISABLED ; if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && @@ -6635,6 +6637,8 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) mtcInfo_set ( node_ptr, MTCE_INFO_KEY__BMC_PROTOCOL, BMC_PROTOCOL__IPMI_STR ); node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ; } + /* store mtcInfo, which specifies the selected BMC protocol, + * into the sysinv database */ mtcInvApi_update_mtcInfo ( node_ptr ); ilog ("%s bmc control using %s:%s", @@ -6751,8 +6755,15 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) node_ptr->bmc_thread_ctrl.done = true ; node_ptr->bmc_thread_info.command = 0 ; } + /* store mtcInfo, which specifies the selected BMC protocol, + * into the sysinv database */ mtcInvApi_update_mtcInfo ( node_ptr ); + /* push the BMC access info out to the mtcClient when + * a controller's BMC connection is established/verified */ + if ( node_ptr->nodetype & CONTROLLER_TYPE ) + this->want_mtcInfo_push = true ; + send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); } @@ -6942,6 +6953,11 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr ) } } /* end power off detection handling */ + /* push the BMC access info out to the mtcClient when + * a controller's BMC connection is established/verified */ + if ( node_ptr->nodetype & CONTROLLER_TYPE ) + this->want_mtcInfo_push = true ; + send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST ); send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST ); diff --git a/mtce/src/maintenance/mtcNodeMsg.h b/mtce/src/maintenance/mtcNodeMsg.h index 6816354c..11319c0f 100755 --- a/mtce/src/maintenance/mtcNodeMsg.h +++ b/mtce/src/maintenance/mtcNodeMsg.h @@ -125,11 +125,13 @@ int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interfa int recv_mtc_reply_noblock ( void ); -int send_mtc_cmd ( string & hostname, int cmd, int interface ); +int send_mtc_cmd ( string & hostname, int cmd, int interface , string json_dict="" ); int mtc_service_command ( mtc_socket_type * sock_ptr , int interface ); int mtc_set_availStatus ( string & hostname, mtc_nodeAvailStatus_enum status ); -int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_name_ptr ); +int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char * mtce_name_ptr ); int mtc_clstr_init ( mtc_socket_type * sock_ptr , char * iface ); string get_who_i_am ( void ); +int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, string address, int port); + #endif diff --git a/mtce/src/scripts/mtc.conf b/mtce/src/scripts/mtc.conf index edfd6c5d..461766b0 100644 --- a/mtce/src/scripts/mtc.conf +++ b/mtce/src/scripts/mtc.conf @@ -87,6 +87,10 @@ sched_delay_threshold = 300 ; scheduler delay time in msecs that will trigger daemon_log_port = 2121 ; daemon logger port mtcalarm_req_port = 2122 ; +sync_b4_peer_ctrlr_reset = 0 ; issue a sync command to peer controller mtcClient + ; before issuing BMC reset. + + [timeouts] ; configurable maintenance timeout values in seconds failsafe_shutdown_delay = 120;