/* * Copyright (c) 2013-2018, 2024 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * */ /** * @file * Wind River CGTS Platform Node Maintenance "Compute Messaging" * Implementation */ /** * @detail * Detailed description ... * * */ #include #include #include /* for ... unix domain sockets */ #include #include #include #include /* for ... syslog */ #include /* for ... waitpid */ #include #include #include #include /* for ... list of conf file names */ #include /* for ... sync */ using namespace std; #define __AREA__ "msg" #include "nodeClass.h" /* for ... maintenance class nodeLinkClass */ #include "jsonUtil.h" /* for ... Json utilities */ #include /* for ... json-c json string parsing */ #include "mtcNodeMsg.h" /* for ... daemon socket structure */ #include "mtcNodeComp.h" /* for ... this module header */ #include "nodeUtil.h" /* for ... Utility Service Header */ #include "daemon_common.h" #include "regexUtil.h" /* for ... Regex and String utilities */ extern "C" { #include "amon.h" /* for ... active monitoring utilities */ } /************************************************************************ * * Name : stop pmon * * Purpose : Used before issuing the self reboot so that pmond * does not try and recover any processes, * most importantly this one which would * cancel the sysreq failsafe thread. * ************************************************************************/ void stop_pmon( void ) { /* max pipe command response length */ #define PIPE_COMMAND_RESPON_LEN (100) ilog("Stopping collectd."); int rc = system("/usr/local/sbin/pmon-stop collectd"); sleep (2); ilog("Stopping pmon to prevent process recovery during shutdown"); for ( int retry = 0 ; retry < 5 ; retry++ ) { char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ; rc = system("/usr/bin/systemctl stop pmon"); sleep(2); /* confirm pmon is no longer active */ execute_pipe_cmd ( "/usr/bin/systemctl is-active pmon", &pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN ); if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 ) { string temp = pipe_cmd_output ; if ( temp.find ("inactive") != string::npos ) { ilog("pmon is now inactive (%d:%d)", retry, rc); break ; } else { ilog("pmon is not inactive (%s) ; retrying (%d:%d)", temp.c_str(), retry, rc); } } else { elog("pmon status query failed ; retrying (%d:%d)", retry, rc); } } } /* Receive and process commands from controller maintenance */ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface ) { mtc_message_type msg ; int rc = FAIL ; ssize_t bytes_received = 0 ; ctrl_type * ctrl_ptr = get_ctrl_ptr() ; bool log_ack = true ; const char * iface_name_ptr = get_interface_name_str(interface) ; if ( interface == CLSTR_INTERFACE ) { if ( ! ctrl_ptr->clstr_iface_provisioned ) { wlog ("cannot receive from unprovisioned %s interface", iface_name_ptr); return (rc); } } /* clean the rx/tx buffer */ memset ((void*)&msg,0,sizeof(mtc_message_type)); string hostaddr = "" ; if ( interface == PXEBOOT_INTERFACE ) { if ( sock_ptr->pxeboot_rx_socket ) { struct sockaddr_in client_addr; socklen_t addr_len = sizeof(client_addr); // Receive data bytes_received = recvfrom(sock_ptr->pxeboot_rx_socket, (char*)&msg.hdr[0], sizeof(mtc_message_type), 0, (struct sockaddr*)&client_addr, &addr_len); // Terminate the buffer msg.hdr[bytes_received] = '\0' ; // Log with debug_msg lane 2 if ( daemon_get_cfg_ptr()->debug_msg&2 ) { // log the message ; both header and buffer string _buf = msg.buf[0] ? msg.buf : "empty"; ilog ("Received %ld bytes (%s) from %s:%d - %s:%s", bytes_received, iface_name_ptr, inet_ntoa(client_addr.sin_addr), ntohs(client_addr.sin_port), &msg.hdr[0], _buf.c_str()); // dump_memory (&msg.hdr[0], 16, bytes_received); } hostaddr = inet_ntoa(client_addr.sin_addr); } } else if ( interface == MGMNT_INTERFACE ) { if (( sock_ptr->mtc_client_mgmt_rx_socket ) && ( sock_ptr->mtc_client_mgmt_rx_socket->sock_ok() == true )) { rc = bytes_received = sock_ptr->mtc_client_mgmt_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); hostaddr = sock_ptr->mtc_client_mgmt_rx_socket->get_src_str(); // Log with debug_msg lane 2 if ( daemon_get_cfg_ptr()->debug_msg&2 ) { // Log the message ; both header and buffer string _buf = msg.buf[0] ? msg.buf : "empty"; ilog ("Received %ld bytes (%s) from %s - %s:%s", bytes_received, iface_name_ptr, hostaddr.c_str(), &msg.hdr[0], _buf.c_str()); } } else { elog ("cannot read from null or failed 'mtc_client_mgmt_rx_socket'\n"); return (FAIL_TO_RECEIVE); } } else if ( interface == CLSTR_INTERFACE ) { if (( sock_ptr->mtc_client_clstr_rx_socket ) && ( sock_ptr->mtc_client_clstr_rx_socket->sock_ok() == true )) { rc = bytes_received = sock_ptr->mtc_client_clstr_rx_socket->read((char*)&msg.hdr[0], sizeof(mtc_message_type)); hostaddr = sock_ptr->mtc_client_clstr_rx_socket->get_src_str(); // Log with debug_msg lane 2 if ( daemon_get_cfg_ptr()->debug_msg&2 ) { // Log the message ; both header and buffer string _buf = msg.buf[0] ? msg.buf : "empty"; ilog ("Received %ld bytes (%s) from %s: %s:%s", bytes_received, iface_name_ptr, hostaddr.c_str(), &msg.hdr[0], _buf.c_str()); } } else { elog ("cannot read from null or failed 'mtc_client_clstr_rx_socket'\n"); return (FAIL_TO_RECEIVE); } } if( rc <= 0 ) { if ( ( errno == EINTR ) || ( errno == EAGAIN )) { return (RETRY); } else { return (FAIL_TO_RECEIVE); } } rc = PASS ; bool self = false ; if (( hostaddr == ctrl_ptr->address ) || ( hostaddr == ctrl_ptr->address_clstr )) { self = true ; } string interface_name = get_interface_name_str (interface) ; string command_name = get_mtcNodeCommand_str(msg.cmd) ; /* Message version greater than zero have the hosts management * mac address appended to the header string */ if (( !self ) && ( msg.ver >= MTC_CMD_FEATURE_VER__MACADDR_IN_CMD )) { /* the minus 1 is to back up from the null char that is accounted for in the hearder size */ if ( strncmp ( &msg.hdr[MSG_HEADER_SIZE-1], ctrl_ptr->macaddr.data(), MSG_HEADER_SIZE )) { wlog ("%s req command from %s network not for this host (exp:%s det:%s) ; ignoring ...\n", command_name.c_str(), iface_name_ptr, ctrl_ptr->macaddr.c_str(), &msg.hdr[MSG_HEADER_SIZE-1]); print_mtc_message ( get_hostname(), MTC_CMD_RX, msg, iface_name_ptr, true ); return (FAIL_INVALID_DATA); } } if ( ! hostaddr.empty() ) print_mtc_message ( hostaddr, MTC_CMD_RX, msg, iface_name_ptr, false ); /* Check for response messages */ if ( strstr ( &msg.hdr[0], get_cmd_req_msg_header() ) ) { rc = PASS ; if ( msg.cmd == MTC_REQ_MTCALIVE ) { alog1 ("mtcAlive request received from %s network", iface_name_ptr); if ( interface == PXEBOOT_INTERFACE ) { alog2 ("pxeboot mtcAlive buffer: %s", &msg.buf[0]); load_pxebootInfo_msg(msg); #ifdef WANT_FIT_TESTING if ( ! daemon_want_fit ( FIT_CODE__FAIL_PXEBOOT_MTCALIVE ) ) #endif { send_mtcAlive_msg ( sock_ptr, ctrl_ptr->who_i_am, interface ); } } return (rc); } else if ( msg.cmd == MTC_MSG_INFO ) { alog2 ("mtc 'info' message received from %s network", iface_name_ptr); load_mtcInfo_msg ( msg ); return ( PASS ); /* no ack for this message */ } else if ( msg.cmd == MTC_CMD_SYNC ) { ilog ("mtc '%s' message received from %s network", get_mtcNodeCommand_str(msg.cmd), iface_name_ptr); ilog ("Sync Start"); sync (); ilog ("Sync Done"); return ( PASS ); /* no ack for this message */ } else if ( msg.cmd == MTC_MSG_LOCKED ) { log_ack = false ; /* Only recreate the file if its not already present */ if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == false ) { ilog ("%s locked (%s)", get_hostname().c_str(), iface_name_ptr); daemon_log ( NODE_LOCKED_FILE, ADMIN_LOCKED_STR); } /* Only create the non-volatile NODE_LOCKED_FILE_BACKUP file if the * LOCK_PERSIST flag is present. */ if ( msg.num && msg.parm[MTC_PARM_LOCK_PERSIST_IDX] ) { /* Preserve the node locked state in a non-volatile backup * file that persists over reboot. * Maintaining the legacy NODE_LOCKED_FILE as other sw looks at it. */ if ( daemon_is_file_present ( NODE_LOCKED_FILE_BACKUP ) == false ) daemon_log ( NODE_LOCKED_FILE_BACKUP, ADMIN_LOCKED_STR ); } /* Otherwise if we get a locked message without the LOCK_PERSIST flag * then remove the non-volatile NODE_LOCKED_FILE_BACKUP file if exists */ else if ( daemon_is_file_present ( NODE_LOCKED_FILE_BACKUP ) == true ) daemon_remove_file ( NODE_LOCKED_FILE_BACKUP ); } else if ( msg.cmd == MTC_MSG_UNLOCKED ) { ilog ("%s unlocked received from %s network", get_hostname().c_str(), iface_name_ptr); /* Only remove the file if it is present */ if ( daemon_is_file_present ( NODE_LOCKED_FILE ) == true ) { daemon_remove_file ( NODE_LOCKED_FILE ); } if ( daemon_is_file_present ( NODE_LOCKED_FILE_BACKUP ) == true ) { daemon_remove_file ( NODE_LOCKED_FILE_BACKUP ); ilog ("cleared node locked backup flag (%s)", iface_name_ptr); } } else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED_FAILED ) { /* remove the GOENABLED_SUBF_PASS and create GOENABLED_SUBF_FAIL file */ daemon_remove_file ( GOENABLED_SUBF_PASS ); daemon_log ( GOENABLED_SUBF_FAIL, "host subfunction has failed as instructed by maintenance."); return (PASS); } else if ( msg.cmd == MTC_REQ_MAIN_GOENABLED ) { time_t time_now ; double goenabled_age ; ctrl_type * ctrl_ptr = get_ctrl_ptr () ; time (&time_now); // current time in seconds (UTC) goenabled_age = difftime ( time_now, ctrl_ptr->goenabled_main_time ); /* Check to see if we are already running the requested test */ if ( ctrl_ptr->active_script_set == GOENABLED_MAIN_SCRIPTS ) { ilog ("GoEnabled In-Progress\n"); } /* Report PASS immediately if there was a recent PASS already */ else if (( daemon_is_file_present ( GOENABLED_MAIN_PASS ) && ( goenabled_age < MTC_MINS_20 ))) { ilog ("GoEnabled Passed (%f seconds ago)\n", goenabled_age ); send_mtc_msg ( sock_ptr, MTC_MSG_MAIN_GOENABLED, "" ); } else { ilog ("GoEnabled request posted (%s)", iface_name_ptr); ctrl_ptr->posted_script_set.push_back ( GOENABLED_MAIN_SCRIPTS ); ctrl_ptr->posted_script_set.unique(); } rc = PASS ; } else if ( msg.cmd == MTC_REQ_SUBF_GOENABLED ) { time_t time_now ; double goenabled_age ; ctrl_type * ctrl_ptr = get_ctrl_ptr () ; time (&time_now); // current time in seconds (UTC) goenabled_age = difftime ( time_now, ctrl_ptr->goenabled_subf_time ); if ( ctrl_ptr->active_script_set == GOENABLED_SUBF_SCRIPTS ) { ilog ("GoEnabled SubF In-Progress\n"); } /* eport PASS immediately if there was a recent PASS already */ else if (( daemon_is_file_present ( GOENABLED_SUBF_PASS ) && ( goenabled_age < MTC_MINS_20 ))) { ilog ("GoEnabled SubF Passed (%f seconds ago)\n", goenabled_age); send_mtc_msg ( sock_ptr, MTC_MSG_SUBF_GOENABLED, "" ); } else { ilog ("GoEnabled Subf request posted (%s)", iface_name_ptr); /* Cleanup test result flag files */ if ( daemon_is_file_present ( GOENABLED_SUBF_PASS) ) { ilog ("clearing stale %s file\n", GOENABLED_SUBF_PASS ); daemon_remove_file (GOENABLED_SUBF_PASS) ; } if ( daemon_is_file_present ( GOENABLED_SUBF_FAIL) ) { ilog ("clearing stale %s file\n", GOENABLED_SUBF_FAIL ); daemon_remove_file (GOENABLED_SUBF_FAIL) ; } ctrl_ptr->posted_script_set.push_back ( GOENABLED_SUBF_SCRIPTS ); ctrl_ptr->posted_script_set.unique(); } rc = PASS ; } else if ( msg.cmd == MTC_CMD_REBOOT ) { ilog ("%s command received from %s network", command_name.c_str(), iface_name_ptr); } else if ( msg.cmd == MTC_CMD_LAZY_REBOOT ) { ilog ("%s command received from %s network ; delay:%d seconds", command_name.c_str(), iface_name_ptr, msg.num ? msg.parm[0] : 0 ); } else if ( is_host_services_cmd ( msg.cmd ) == true ) { ctrl_type * ctrl_ptr = get_ctrl_ptr () ; /* Check to see if this command is already running. * hostservices.posted is set to command on launch * hostservices.monitor is set to command while monitoring */ if (( ctrl_ptr->hostservices.posted == msg.cmd ) || ( ctrl_ptr->hostservices.monitor == msg.cmd )) { wlog ("%s already in progress (%d:%d)\n", command_name.c_str(), ctrl_ptr->hostservices.posted, ctrl_ptr->hostservices.monitor ); rc = PASS ; } else { ctrl_ptr->posted_script_set.push_back ( HOSTSERVICES_SCRIPTS ); ctrl_ptr->posted_script_set.unique (); ilog ("%s request posted from %s network", command_name.c_str(), iface_name_ptr); ctrl_ptr->hostservices.posted = msg.cmd ; ctrl_ptr->hostservices.monitor = MTC_CMD_NONE ; rc = PASS ; } /* Fault insertion - fail host services command */ if ( ( daemon_is_file_present ( MTC_CMD_FIT__START_SVCS ))) { rc = FAIL_FIT ; wlog ("%s Start Services - fit failure (%s)", command_name.c_str(), iface_name_ptr); } /* Fault insertion - fail to send host services ACK */ if ( ( daemon_is_file_present ( MTC_CMD_FIT__NO_HS_ACK ))) { wlog ("%s Start Services - fit no ACK (%s)", command_name.c_str(), iface_name_ptr); return (PASS); } /* inform mtcAgent of enhanced ost services support */ msg.parm[1] = MTC_ENHANCED_HOST_SERVICES ; msg.parm[0] = rc ; msg.num = 2 ; if ( rc ) { snprintf (msg.buf, BUF_SIZE, "host service launch failed (rc:%d)", rc ); } else { snprintf (msg.buf, BUF_SIZE, "host service launched"); } } else if ( msg.cmd == MTC_CMD_WIPEDISK ) { ilog ("Reload command received from %s network", iface_name_ptr); } else if ( msg.cmd == MTC_CMD_RESET ) { ilog ("Reset command received from %s network", iface_name_ptr); } else if ( msg.cmd == MTC_CMD_LOOPBACK ) { ilog ("Loopback command received from %s network", iface_name_ptr); } else { rc = FAIL_BAD_CASE ; wlog ( "Unsupported maintenance command (%d) with %ld bytes received from %s network", msg.cmd, bytes_received, iface_name_ptr ); } snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header()); } else if ( strstr ( &msg.hdr[0], get_msg_rep_msg_header()) ) { if ( msg.cmd == MTC_MSG_MAIN_GOENABLED ) { ilog ("main function goEnabled results acknowledged from %s network", iface_name_ptr); return (PASS); } else if ( msg.cmd == MTC_MSG_SUBF_GOENABLED ) { ilog ("sub-function goEnabled results acknowledged from %s network", iface_name_ptr); return (PASS); } else { dlog2 ( "reply message for command %d\n", msg.cmd ); return (PASS); } } else if ( strstr ( &msg.hdr[0], get_worker_msg_header()) ) { if ( msg.cmd == MTC_MSG_MTCALIVE ) { wlog ("unexpected mtcAlive message from %s from %s network", hostaddr.c_str(), iface_name_ptr); } else { wlog ("unsupported worker message from %s from %s network", hostaddr.c_str(), iface_name_ptr); } wlog ("WARNING: mtcClient is receiving mtcAgent bound mtcAlive messages"); // dump_memory (&msg, 16, bytes_received); return PASS ; } else { wlog ("unsupported message from %s from %s network", hostaddr.c_str(), iface_name_ptr); // dump_memory (&msg, 16, bytes_received); return PASS ; } /*********************************************************** * * If we get here, the response should be sent * regardless of the execution status. * * if ( rc == PASS ) **********************************************************/ { rc = PASS ; int bytes = sizeof(mtc_message_type)-BUF_SIZE; if ( interface == PXEBOOT_INTERFACE ) { int flags = 0 ; // no tx flags if ( sock_ptr->pxeboot_tx_socket <= 0 ) { elog("pxeboot_tx_socket not ok (%d)", sock_ptr->pxeboot_tx_socket); return (FAIL_SOCKET_SENDTO); } if ( log_ack ) { ilog ("sending %s ack to %s over %s network", command_name.c_str(), hostaddr.c_str(), iface_name_ptr); } struct sockaddr_in hostAddr; memset(&hostAddr, 0, sizeof(hostAddr)); print_mtc_message ( hostaddr.data(), MTC_CMD_TX, msg, iface_name_ptr, false ); hostAddr.sin_addr.s_addr = inet_addr(hostaddr.data()); hostAddr.sin_family = AF_INET; hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port); ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, &msg.hdr[0], bytes, flags, (const struct sockaddr*)&hostAddr, sizeof(hostAddr)); if (bytes_sent <= 0) { elog ("failed to send %s ack to %s:%d on %s network (rc:%ld) (%d:%m)", command_name.c_str(), hostaddr.c_str(), hostAddr.sin_port, iface_name_ptr, bytes_sent, errno); } } /* send the message back either over the mgmnt or clstr interface */ else if ( interface == MGMNT_INTERFACE ) { if (( sock_ptr->mtc_client_mgmt_tx_socket ) && ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes); if ( rc <= 0 ) { elog ("%s reply send (mtc_client_mgmt_tx_socket) failed (%s) (rc:%d)", command_name.c_str(), iface_name_ptr, rc); } else if ( log_ack ) { ilog ("%s reply send (%s)", command_name.c_str(), iface_name_ptr); } } else { elog ("cannot send to null or failed socket (%s)", iface_name_ptr); } } else if ( interface == CLSTR_INTERFACE ) { if (( sock_ptr->mtc_client_clstr_tx_socket_c0 ) && ( sock_ptr->mtc_client_clstr_tx_socket_c0->sock_ok() == true )) { rc = sock_ptr->mtc_client_clstr_tx_socket_c0->write((char*)&msg.hdr[0], bytes); if ( rc <= 0 ) { elog ("%s reply send (mtc_client_clstr_tx_socket_c0) failed (%s) (rc:%d)", command_name.c_str(), iface_name_ptr, rc); } else if ( log_ack ) { ilog ("%s reply send (%s)", command_name.c_str(), iface_name_ptr); } } if (( sock_ptr->mtc_client_clstr_tx_socket_c1 ) && ( sock_ptr->mtc_client_clstr_tx_socket_c1->sock_ok() == true )) { rc = sock_ptr->mtc_client_clstr_tx_socket_c1->write((char*)&msg.hdr[0], bytes); if ( rc <= 0 ) { elog ("%s reply send (mtc_client_clstr_tx_socket_c1) failed (%s) (rc:%d)", command_name.c_str(), iface_name_ptr, rc); } else if ( log_ack ) { ilog ("%s reply send (%s)", command_name.c_str(), iface_name_ptr); } } } print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, iface_name_ptr, (rc != bytes) ); /* get the shutdown delay config alue */ int delay = daemon_get_cfg_ptr()->failsafe_shutdown_delay ; if ( delay < 1 ) delay = 2 ; daemon_dump_info (); if ( msg.cmd == MTC_CMD_REBOOT ) { if ( daemon_is_file_present ( MTC_CMD_FIT__NO_REBOOT ) ) { ilog ("Reboot - fit bypass (%s)", iface_name_ptr); return (PASS); } stop_pmon(); ilog ("Reboot (%s)", iface_name_ptr); daemon_log ( NODE_RESET_FILE, "reboot command" ); fork_sysreq_reboot ( delay ); rc = system("/usr/bin/systemctl reboot"); } if ( msg.cmd == MTC_CMD_LAZY_REBOOT ) { daemon_log ( NODE_RESET_FILE, "lazy reboot command" ); /* stop pmon before issuing the lazy reboot so that it does not * try and recover any processes, most importantly this one */ stop_pmon(); if ( msg.num >= 1 ) { do { ilog ("Lazy Reboot (%s) ; rebooting in %d seconds", iface_name_ptr, msg.num ? msg.parm[0] : 1 ); sleep (1); if ( msg.parm[0] % 5 ) { /* service the active monitoring every 5 seconds */ active_monitor_dispatch (); } } while ( msg.parm[0]-- > 0 ) ; } else { ilog ("Lazy Reboot (%s) ; now", iface_name_ptr); } fork_sysreq_reboot ( delay ); rc = system("/usr/bin/systemctl reboot"); } else if ( msg.cmd == MTC_CMD_RESET ) { if ( daemon_is_file_present ( MTC_CMD_FIT__NO_RESET ) ) { ilog ("Reset - fit bypass (%s)", iface_name_ptr); return (PASS); } stop_pmon(); ilog ("Reset 'reboot -f' (%s)", iface_name_ptr); daemon_log ( NODE_RESET_FILE, "reset command" ); fork_sysreq_reboot ( delay/2 ); rc = system("/usr/bin/systemctl reboot --force"); } else if ( msg.cmd == MTC_CMD_WIPEDISK ) { int parent = 0 ; if ( daemon_is_file_present ( MTC_CMD_FIT__NO_WIPEDISK ) ) { ilog ("Wipedisk - fit bypass (%s)", iface_name_ptr); return (PASS); } /* We fork a reboot as a fail safe. * If something goes wrong we should reboot anyway */ stop_pmon(); fork_sysreq_reboot ( delay/2 ); /* We fork the wipedisk command as it may take upwards of 30s * If we hold this thread for that long pmon will kill mtcClient * which will prevent the reboot command from being issued */ if ( 0 > ( parent = double_fork())) { elog ("failed to fork wipedisk command\n"); } else if( 0 == parent ) /* we're the child */ { ilog ("Disk wipe in progress (%s)", iface_name_ptr); daemon_log ( NODE_RESET_FILE, "wipedisk command" ); rc = system("/usr/local/bin/wipedisk --force"); ilog ("Disk wipe complete - Forcing Reboot ...\n"); rc = system("/usr/bin/systemctl reboot --force"); exit (0); } } rc = PASS ; } return (rc); } /** Send an event to the mtcAgent **/ int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char * mtce_name_ptr ) { mtc_message_type event ; ctrl_type *ctrl_ptr = get_ctrl_ptr(); int rc = PASS ; int bytes = 0 ; memset (&event, 0 , sizeof(mtc_message_type)); if ( cmd == MTC_EVENT_LOOPBACK ) { snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_loopback_header() ); /* We don't use the buffer for mtce events to remove it from the size */ bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); } else if ( cmd == MTC_EVENT_MONITOR_READY ) { string event_info = "{\"" ; event_info.append(MTC_JSON_INV_NAME); event_info.append("\":\""); event_info.append(get_hostname()); event_info.append("\",\""); event_info.append(MTC_JSON_SERVICE); event_info.append("\":\""); event_info.append(MTC_SERVICE_MTCCLIENT_NAME ); event_info.append("\",\"active_controller_pxeboot_address\":\""); event_info.append(ctrl_ptr->pxeboot_addr_active_controller); event_info.append("\",\""); event_info.append(MTC_JSON_FEATURES); event_info.append("\":[\""); event_info.append(MTC_PXEBOOT_MTCALIVE); event_info.append("\"]}"); size_t len = event_info.length()+1 ; snprintf ( &event.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header()); snprintf ( &event.buf[0], len, "%s", event_info.data()); bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len)); dlog ("%s %s ready", get_hostname().c_str(), MTC_SERVICE_MTCCLIENT_NAME); } else if (( cmd == MTC_EVENT_AVS_CLEAR ) || ( cmd == MTC_EVENT_AVS_MAJOR ) || ( cmd == MTC_EVENT_AVS_CRITICAL )) { snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_mtce_event_header() ); /* We don't use the buffer for mtce events so remove it from the size */ bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); } else if ( is_host_services_cmd ( cmd ) == true ) { snprintf ( &event.hdr[0] , MSG_HEADER_SIZE, "%s", get_cmd_rsp_msg_header() ); if ( mtce_name_ptr ) { /* add the error message to the message buffer */ size_t len = strnlen ( mtce_name_ptr, MAX_MTCE_EVENT_NAME_LEN ); /* We don't use the buffer for mtce events to remove it from the size */ bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len)); snprintf ( &event.buf[0], MAX_MTCE_EVENT_NAME_LEN , "%s", mtce_name_ptr ); rc = FAIL_OPERATION ; } else { /* We don't use the buffer in the pass case */ bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); rc = PASS ; } event.cmd = cmd ; event.parm[0] = rc ; event.num = 1 ; } else { elog ("Unsupported mtce event (%d)\n", cmd ); return ( FAIL_BAD_CASE ); } event.cmd = cmd ; if (( sock_ptr->mtc_client_mgmt_tx_socket ) && ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { if ( bytes == 0 ) { slog ("message send failed ; message size=0 for cmd:0x%x is 0\n", event.cmd ); rc = FAIL_NO_DATA ; } else if ((rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes ) { elog ("message send failed. (%d) (%d:%s) \n", rc, errno, strerror(errno)); elog ("message: %d bytes to <%s:%d>\n", bytes, sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(), sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort()); rc = FAIL_TO_TRANSMIT ; } else { mlog2 ("Transmit: %x bytes to %s:%d\n", bytes, sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(), sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort()); print_mtc_message ( get_hostname(), MTC_CMD_TX, event, get_interface_name_str(MGMNT_INTERFACE), false ); rc = PASS ; } } else { elog ("cannot send to null or failed management network socket (%s)", get_interface_name_str (MGMNT_INTERFACE) ); rc = FAIL_SOCKET_SENDTO ; } // Only the events sent on the pxeboot network are: // - ready event if (( cmd == MTC_EVENT_MONITOR_READY ) && ( sock_ptr->pxeboot_tx_socket > 0 ) && ( !ctrl_ptr->pxeboot_addr_active_controller.empty())) { int flags = 0 ; // no tx flags struct sockaddr_in hostAddr; memset(&hostAddr, 0, sizeof(hostAddr)); print_mtc_message ( ctrl_ptr->pxeboot_addr_active_controller.data(), MTC_CMD_TX, event, get_interface_name_str(PXEBOOT_INTERFACE), false); hostAddr.sin_addr.s_addr = inet_addr(ctrl_ptr->pxeboot_addr_active_controller.data()); hostAddr.sin_family = AF_INET; hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port); ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, &event.hdr[0], bytes, flags, (const struct sockaddr*)&hostAddr, sizeof(hostAddr)); if (bytes_sent <= 0) { elog ("failed to send %s to %s:%d on %s network (rc:%ld) (%d:%m)", get_mtcNodeCommand_str(event.cmd), ctrl_ptr->pxeboot_addr_active_controller.c_str(), hostAddr.sin_port, get_interface_name_str(PXEBOOT_INTERFACE), bytes_sent, errno); } } return rc ; } /**************************************************************************** * * Name : create_mtcAlive_msg * * Description: Creates a common mtcAlive message that consists of the * - out-of-band health/status flags * - host uptime * - json string of some of the host's info * { * "hostname":"controller-0", * "personality":"controller,worker", * "pxeboot_ip":"169.254.202.2", * "mgmt_ip":"192.168.204.2", * "cluster_host_ip":"192.168.206.2", * "mgmt_mac":"08:00:27:9f:ef:57", * "interface":"Mgmnt", * "sequence":145 * } * ****************************************************************************/ int create_mtcAlive_msg ( ctrl_type * ctrl_ptr, mtc_message_type & msg, int cmd, string identity, int interface ) { static int _sm_unhealthy_debounce_counter [MAX_IFACES] = {0,0} ; struct timespec ts ; clock_gettime (CLOCK_MONOTONIC, &ts ); /* Get health state of the host - presently limited to the following * * during boot = NODE_HEALTH_UNKNOWN * /var/run/.config_pass = NODE_HEALTHY * /var/run/.config_fail = NODE_UNHEALTHY * * */ /* Init the message buffer */ MEMSET_ZERO (msg); snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_worker_msg_header()); msg.cmd = cmd ; msg.num = MTC_PARM_MAX_IDX ; /* Insert the host uptime */ msg.parm[MTC_PARM_UPTIME_IDX] = ts.tv_sec ; /* Insert the host health - TO BE OBSOLTETED */ msg.parm[MTC_PARM_HEALTH_IDX] = get_node_health( get_hostname() ) ; /* Insert the mtce flags */ msg.parm[MTC_PARM_FLAGS_IDX] = 0 ; //Check if LUKS FS manager service is active int exitstatus = system("cryptsetup status luks_encrypted_vault"); if ( 0 != exitstatus ) msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__LUKS_VOL_FAILED ; if ( daemon_is_file_present ( CONFIG_COMPLETE_FILE ) ) msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_CONFIGURED ; if ( daemon_is_file_present ( CONFIG_FAIL_FILE ) ) msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_NOT_HEALTHY ; if ( daemon_is_file_present ( CONFIG_PASS_FILE ) ) msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_HEALTHY ; if ( daemon_is_file_present ( NODE_LOCKED_FILE ) ) msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__I_AM_LOCKED ; if ( daemon_is_file_present ( GOENABLED_MAIN_PASS ) ) msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__MAIN_GOENABLED ; if ( daemon_is_file_present ( PATCHING_IN_PROG_FILE ) ) msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__PATCHING ; if ( daemon_is_file_present ( NODE_IS_PATCHED_FILE ) ) msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__PATCHED ; /* manage the worker subfunction flag */ if ( is_subfunction_worker () == true ) { if ( daemon_is_file_present ( CONFIG_COMPLETE_WORKER ) ) { msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SUBF_CONFIGURED ; /* Only set the go enabled subfunction flag if the pass file only exists */ if (( daemon_is_file_present ( GOENABLED_SUBF_PASS ) == true ) && ( daemon_is_file_present ( GOENABLED_SUBF_FAIL ) == false )) { msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SUBF_GOENABLED ; } } } if ( daemon_is_file_present ( SMGMT_DEGRADED_FILE ) ) msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SM_DEGRADED ; if ( daemon_is_file_present ( SMGMT_UNHEALTHY_FILE ) ) { /* debounce 6 mtcAlive messages = ~25-30 second debounce */ #define MAX_SM_UNHEALTHY_DEBOUNCE (6) if ( ++_sm_unhealthy_debounce_counter[interface] > MAX_SM_UNHEALTHY_DEBOUNCE ) { wlog("SM Unhealthy flag set (%s)", get_interface_name_str(interface)); msg.parm[MTC_PARM_FLAGS_IDX] |= MTC_FLAG__SM_UNHEALTHY ; } else { wlog("SM Unhealthy debounce %d of %d (%s)", _sm_unhealthy_debounce_counter[interface], MAX_SM_UNHEALTHY_DEBOUNCE, get_interface_name_str(interface)); } } else { _sm_unhealthy_debounce_counter[interface] = 0 ; } /* add the interface and sequence number to the mtcAlive message */ identity.append ( ",\"interface\":\""); identity.append (get_interface_name_str(interface)); identity.append("\",\"sequence\":"); if ( interface == PXEBOOT_INTERFACE ) { ctrl_ptr->mtcAlive_pxeboot_sequence++ ; identity.append(itos(ctrl_ptr->mtcAlive_pxeboot_sequence)); msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_pxeboot_sequence ; } else if ( interface == MGMNT_INTERFACE ) { ctrl_ptr->mtcAlive_mgmnt_sequence++ ; identity.append(itos(ctrl_ptr->mtcAlive_mgmnt_sequence)); msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_mgmnt_sequence ; } else if ( interface == CLSTR_INTERFACE ) { ctrl_ptr->mtcAlive_clstr_sequence++ ; identity.append(itos(ctrl_ptr->mtcAlive_clstr_sequence)); msg.parm[MTC_PARM_SEQ_IDX] = ctrl_ptr->mtcAlive_clstr_sequence ; } else identity.append(itos(0)); identity.append("}"); memcpy ( &msg.buf[0], identity.c_str(), identity.size() ); /* Send only the data we care about */ return (((sizeof(mtc_message_type))-(BUF_SIZE)+(identity.size())+1)); } /* Send GOENABLED messages to the controller */ int send_mtc_msg_failed = 0 ; int send_mtc_msg ( mtc_socket_type * sock_ptr, int cmd , string identity ) { int rc = FAIL ; if (( cmd == MTC_MSG_MAIN_GOENABLED ) || ( cmd == MTC_MSG_SUBF_GOENABLED ) || ( cmd == MTC_MSG_MAIN_GOENABLED_FAILED ) || ( cmd == MTC_MSG_SUBF_GOENABLED_FAILED )) { int interface = MGMNT_INTERFACE ; mtc_message_type msg ; int bytes = create_mtcAlive_msg ( get_ctrl_ptr(), msg, cmd, identity, interface ); if (( sock_ptr->mtc_client_mgmt_tx_socket ) && ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { /* Send back to requester - TODO: consider sending back to both as multicast */ if ((rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes)) != bytes ) { if ( rc == -1 ) { wlog_throttled (send_mtc_msg_failed, 100 , "failed to send <%s:%d> (%d:%m)", sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(), sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort(), errno ); } else { wlog_throttled ( send_mtc_msg_failed, 100 , "sent only %d of %d bytes to <%s:%d>\n", rc, bytes, sock_ptr->mtc_client_mgmt_tx_socket->get_dst_str(), sock_ptr->mtc_client_mgmt_tx_socket->get_dst_addr()->getPort()); } } else { send_mtc_msg_failed = 0 ; print_mtc_message ( get_hostname(), MTC_CMD_TX, msg, get_interface_name_str(interface), false ); rc = PASS ; } } else { elog ("cannot send to null or failed socket (%s)", get_interface_name_str (MGMNT_INTERFACE) ); } } else { elog ( "Unsupported Mtc command (%d)\n", cmd ); } return (PASS) ; } int send_mtcAlive_msg_failed = 0 ; int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interface ) { int flags = 0 ; // no tx flags /* get a pointer to the process control structure */ ctrl_type * ctrl_ptr = get_ctrl_ptr() ; if (( interface == PXEBOOT_INTERFACE ) && ( ctrl_ptr->pxeboot_iface_provisioned == false )) return (PASS) ; if (( interface == CLSTR_INTERFACE ) && ( ctrl_ptr->clstr_iface_provisioned != true )) { dlog2 ("cannot send to unprovisioned %s interface", get_interface_name_str(interface) ); return (FAIL); } mtc_message_type msg ; int bytes = create_mtcAlive_msg ( ctrl_ptr, msg, MTC_MSG_MTCALIVE, identity, interface ); if ( interface == PXEBOOT_INTERFACE ) { /* Send to controller-0 pxeboot address */ if ( sock_ptr->pxeboot_tx_socket <= 0 ) { elog("pxeboot_tx_socket not ok (%d)", sock_ptr->pxeboot_tx_socket); return (FAIL_SOCKET_SENDTO); } // TODO: Consider adding controllers info to ctrl struct string controllers[CONTROLLERS] = {CONTROLLER_0, CONTROLLER_1}; alog1 ("sending mtcAlive to both controllers"); for (int c = 0 ; c < CONTROLLERS ; c++) { string pxeboot_addr_cx ; struct sockaddr_in hostAddr; memset(&hostAddr, 0, sizeof(hostAddr)); if (controllers[c] == CONTROLLER_1) { if ( ctrl_ptr->system_type != SYSTEM_TYPE__AIO__SIMPLEX ) pxeboot_addr_cx = ctrl_ptr->pxeboot_addr_c1; else continue; // skip controller-1 for SX systems } else pxeboot_addr_cx = ctrl_ptr->pxeboot_addr_c0; if ( pxeboot_addr_cx.empty() ) { if ( ctrl_ptr->pxeboot_address_learned[c] == true ) { ctrl_ptr->pxeboot_address_learned[c] = false ; wlog ( "%s pxeboot address not learned ; unable to send pxeboot mtcAlive", controllers[c].c_str() ); } continue ; } if ( ctrl_ptr->pxeboot_address_learned[c] == false ) { // Only log this if the not learned log was produced. // Which is most likely case on process startup. ilog ("sending pxeboot network mtcAlive msg on port %d to %s at %s", sock_ptr->mtc_tx_pxeboot_port, controllers[c].c_str(), pxeboot_addr_cx.c_str()); ctrl_ptr->pxeboot_address_learned[c] = true ; } print_mtc_message ( controllers[c], MTC_CMD_TX, msg, get_interface_name_str(PXEBOOT_INTERFACE), false ); hostAddr.sin_addr.s_addr = inet_addr(pxeboot_addr_cx.data()); hostAddr.sin_family = AF_INET; hostAddr.sin_port = htons(sock_ptr->mtc_tx_pxeboot_port); // 2102 alog1 ("sending pxeboot network mtcAlive msg to %s", controllers[c].c_str() ); ssize_t bytes_sent = sendto(sock_ptr->pxeboot_tx_socket, &msg.hdr[0], bytes, flags, (const struct sockaddr*)&hostAddr, sizeof(hostAddr)); if (bytes_sent <= 0) { elog ("failed to send mtcAlive to %s using %s:%d (pxeboot) (rc:%ld) (%d:%m)", controllers[c].c_str(), pxeboot_addr_cx.c_str(), hostAddr.sin_port, bytes_sent, errno); } } // for loop } else if ( interface == MGMNT_INTERFACE ) { /* Send to controller floating address */ if (( sock_ptr->mtc_client_mgmt_tx_socket ) && ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { alog1 ("sending mgmt network mtcAlive msg to %s", CONTROLLER); print_mtc_message ( CONTROLLER, MTC_CMD_TX, msg, get_interface_name_str(MGMNT_INTERFACE), false ); sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes) ; } else { elog("mtc_client_mgmt_tx_socket not ok"); } } else if ( interface == CLSTR_INTERFACE ) { /* Send to controller-0 cluster address */ if (( sock_ptr->mtc_client_clstr_tx_socket_c0 ) && ( sock_ptr->mtc_client_clstr_tx_socket_c0->sock_ok() == true )) { alog1 ("sending clstr network mtcAlive msg to %s", CONTROLLER_0); print_mtc_message ( CONTROLLER_0, MTC_CMD_TX, msg, get_interface_name_str(CLSTR_INTERFACE), false ); sock_ptr->mtc_client_clstr_tx_socket_c0->write((char*)&msg.hdr[0], bytes ) ; } else { elog("mtc_client_clstr_tx_socket_c0 not ok"); } /* Send to controller-1 cluster address */ if ( get_ctrl_ptr()->system_type != SYSTEM_TYPE__AIO__SIMPLEX ) { if (( sock_ptr->mtc_client_clstr_tx_socket_c1 ) && ( sock_ptr->mtc_client_clstr_tx_socket_c1->sock_ok() == true )) { alog1 ("sending clstr mtcAlive msg to %s", CONTROLLER_1); print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_interface_name_str(CLSTR_INTERFACE), false ); sock_ptr->mtc_client_clstr_tx_socket_c1->write((char*)&msg.hdr[0], bytes ) ; } else { elog("mtc_client_clstr_tx_socket_c1 not ok"); } } } else { wlog_throttled ( send_mtcAlive_msg_failed, 100, "Unsupported interface (%d)\n", interface ); return (FAIL_BAD_PARM); } return (PASS) ; } int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, string address, int port) { mtc_message_type msg ; int bytes = 0 ; MEMSET_ZERO (msg); snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header()); msg.cmd = cmd ; switch ( cmd ) { case MTC_CMD_SYNC: { ilog ("Sending '%s' command to %s:%s:%d", get_mtcNodeCommand_str(cmd), hostname.c_str(), address.c_str(), port); msg.num = 0 ; /* buffer not used in this message */ bytes = ((sizeof(mtc_message_type))-(BUF_SIZE)); break ; } default: { slog("Unsupported command ; %s:%d", get_mtcNodeCommand_str(cmd), cmd ); return (FAIL_BAD_CASE); } } int rc = FAIL ; /* Send to controller floating address */ if (( sock_ptr->mtc_client_mgmt_tx_socket ) && ( sock_ptr->mtc_client_mgmt_tx_socket->sock_ok() == true )) { print_mtc_message ( hostname, MTC_CMD_TX, msg, get_interface_name_str(MGMNT_INTERFACE), false ); rc = sock_ptr->mtc_client_mgmt_tx_socket->write((char*)&msg.hdr[0], bytes, address.data(), port ) ; if ( 0 >= rc ) { elog("failed to send command to mtcClient (%d) (%d:%s)", rc, errno, strerror(errno)); rc = FAIL_SOCKET_SENDTO ; } else rc = PASS ; } else { elog("mtc_client_mgmt_tx_socket not ok"); rc = FAIL_BAD_STATE ; } return (rc) ; } int mtcCompMsg_testhead ( void ) { return (PASS); }