diff --git a/service-mgmt/sm-1.0.0/centos/build_srpm.data b/service-mgmt/sm-1.0.0/centos/build_srpm.data index 9ee81464..918657b2 100644 --- a/service-mgmt/sm-1.0.0/centos/build_srpm.data +++ b/service-mgmt/sm-1.0.0/centos/build_srpm.data @@ -2,4 +2,4 @@ SRC_DIR=$PKG_BASE COPY_LIST="$PKG_BASE/LICENSE" TAR_NAME=sm VERSION=1.0.0 -TIS_PATCH_VER=27 +TIS_PATCH_VER=28 diff --git a/service-mgmt/sm-1.0.0/src/sm_cluster_hbs_info_msg.cpp b/service-mgmt/sm-1.0.0/src/sm_cluster_hbs_info_msg.cpp index c2158788..ca91c18a 100644 --- a/service-mgmt/sm-1.0.0/src/sm_cluster_hbs_info_msg.cpp +++ b/service-mgmt/sm-1.0.0/src/sm_cluster_hbs_info_msg.cpp @@ -21,7 +21,15 @@ #include "sm_debug.h" #include "sm_limits.h" #include "sm_selobj.h" -#include "sm_timer.h" +#include "sm_worker_thread.h" + +// uncomment when debugging this module to enabled DPRINTFD output to log file +// #define __DEBUG__MSG__ + +#ifdef __DEBUG__MSG__ +#undef DPRINTFD +#define DPRINTFD DPRINTFI +#endif #define LOOPBACK_IP "127.0.0.1" #define SM_CLIENT_PORT_KEY "sm_client_port" @@ -64,6 +72,45 @@ bool operator!=(const SmClusterHbsStateT& lhs, const SmClusterHbsStateT& rhs) return !(lhs == rhs); } +void log_cluster_hbs_state(const SmClusterHbsStateT& state) +{ + if(0 == state.last_update) + { + DPRINTFI("Cluster hbs state not available"); + return; + } + + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + int secs_since_update = ts.tv_sec - state.last_update; + + if(state.storage0_enabled) + { + DPRINTFI("Cluster hbs last updated %d secs ago, storage-0 is provisioned, " + "from controller-0: %d nodes enabled, %d nodes reachable, storage-0 %s responding " + "from controller-1: %d nodes enabled, %d nodes reachable, storage-0 %s responding", + secs_since_update, + state.controllers[0].number_of_node_enabled, + state.controllers[0].number_of_node_reachable, + state.controllers[0].storage0_responding ? "is" : "is not", + state.controllers[1].number_of_node_enabled, + state.controllers[1].number_of_node_reachable, + state.controllers[1].storage0_responding ? "is" : "is not" + ); + }else + { + DPRINTFI("Cluster hbs last updated %d secs ago, storage-0 is not provisioned, " + "from controller-0: %d nodes enabled, %d nodes reachable, " + "from controller-1: %d nodes enabled, %d nodes reachable", + secs_since_update, + state.controllers[0].number_of_node_enabled, + state.controllers[0].number_of_node_reachable, + state.controllers[1].number_of_node_enabled, + state.controllers[1].number_of_node_reachable + ); + } +} + pthread_mutex_t SmClusterHbsInfoMsg::_mutex; const unsigned short Invalid_Req_Id = 0; int SmClusterHbsInfoMsg::_sock = -1; @@ -103,8 +150,8 @@ bool SmClusterHbsInfoMsg::_process_cluster_hbs_history(mtce_hbs_cluster_history_ DPRINTFE("Invalid oldest entry index %d", history.oldest_entry_index); return false; } - int newest_entry_index = (history.oldest_entry_index + history.entries) % MTCE_HBS_HISTORY_ENTRIES; + int newest_entry_index = (history.oldest_entry_index + history.entries - 1) % MTCE_HBS_HISTORY_ENTRIES; mtce_hbs_cluster_entry_type& entry = history.entry[newest_entry_index]; SmClusterHbsInfoT& controller_state = state.controllers[history.controller]; @@ -112,7 +159,11 @@ bool SmClusterHbsInfoMsg::_process_cluster_hbs_history(mtce_hbs_cluster_history_ if(entry.hosts_responding > controller_state.number_of_node_reachable) { controller_state.number_of_node_reachable = entry.hosts_responding; + controller_state.number_of_node_enabled = entry.hosts_enabled; } + + DPRINTFD("Oldest index %d, entries %d, newest index %d, nodes %d", + history.oldest_entry_index, history.entries, newest_entry_index, entry.hosts_responding); return true; } @@ -123,7 +174,6 @@ void SmClusterHbsInfoMsg::_cluster_hbs_info_msg_received( int selobj, int64_t us while(true) { int bytes_read = recv( selobj, &msg, sizeof(msg), MSG_NOSIGNAL | MSG_DONTWAIT ); - DPRINTFD("msg received %d bytes. buffer size %d", bytes_read, sizeof(msg)); if(bytes_read < 0) { if(EAGAIN != errno) @@ -132,6 +182,7 @@ void SmClusterHbsInfoMsg::_cluster_hbs_info_msg_received( int selobj, int64_t us } return; } + DPRINTFD("msg received %d bytes. buffer size %d", bytes_read, sizeof(msg)); if(size_of_msg_header > (unsigned int)bytes_read) { DPRINTFE("size not right, msg size %d, expected not less than %d", @@ -173,6 +224,8 @@ void SmClusterHbsInfoMsg::_cluster_hbs_info_msg_received( int selobj, int64_t us { _cluster_hbs_state_previous = _cluster_hbs_state_current; _cluster_hbs_state_current = state; + DPRINTFD("cluster hbs state changed"); + log_cluster_hbs_state(_cluster_hbs_state_current); } else { @@ -220,6 +273,13 @@ SmErrorT SmClusterHbsInfoMsg::_get_address(const char* port_key, struct sockaddr return SM_OKAY; } +static void send_query(SmSimpleAction&) +{ + SmClusterHbsInfoMsg::cluster_hbs_info_query(); +} + +static SmSimpleAction _query_hbs_cluster_info_action("send hbs-cluster query", send_query); + // **************************************************************************** // SmClusterHbsInfoMsg::cluster_hbs_info_query - // trigger a query of cluster hbs info. @@ -244,35 +304,37 @@ bool SmClusterHbsInfoMsg::cluster_hbs_info_query(cluster_hbs_query_ready_callbac char query[request_size]; unsigned short reqid; struct timespec ts; - mutex_holder holder(&_mutex); - if(0 != clock_gettime(CLOCK_REALTIME, &ts)) { - DPRINTFE("Failed to get realtime"); - reqid = (unsigned short)1; - }else - { - unsigned short* v = (unsigned short*)(&ts.tv_nsec); - reqid = (*v) % 0xFFFE + 1; - } + mutex_holder holder(&_mutex); + if(0 != clock_gettime(CLOCK_REALTIME, &ts)) + { + DPRINTFE("Failed to get realtime"); + reqid = (unsigned short)1; + }else + { + unsigned short* v = (unsigned short*)(&ts.tv_nsec); + reqid = (*v) % 0xFFFE + 1; + } - struct sockaddr_in addr; - if(SM_OKAY != _get_address(SM_SERVER_PORT_KEY, &addr)) - { - DPRINTFE("Failed to get address"); - return false; - } + struct sockaddr_in addr; + if(SM_OKAY != _get_address(SM_SERVER_PORT_KEY, &addr)) + { + DPRINTFE("Failed to get address"); + return false; + } - int msg_size = snprintf(query, sizeof(query), json_fmt, reqid); + int msg_size = snprintf(query, sizeof(query), json_fmt, reqid); - DPRINTFD("msg (%d:%d) to send %s", msg_size + 1, strlen(query), query); - if(0 > sendto(_sock, query, msg_size + 1, 0, (sockaddr*)&addr, sizeof(addr))) - { - DPRINTFE("Failed to send msg. Error %s", strerror(errno)); - return false; - } - if(NULL != callback) - { - _callbacks.push_back(callback); + DPRINTFD("send %d bytes %s", msg_size, query); + if(0 > sendto(_sock, query, msg_size, 0, (sockaddr*)&addr, sizeof(addr))) + { + DPRINTFE("Failed to send msg. Error %s", strerror(errno)); + return false; + } + if(NULL != callback) + { + _callbacks.push_back(callback); + } } return true; } @@ -386,6 +448,7 @@ SmErrorT SmClusterHbsInfoMsg::initialize() return SM_FAILED; } + SmWorkerThread::get_worker().add_action(&_query_hbs_cluster_info_action); return SM_OKAY; } @@ -432,6 +495,7 @@ void SmClusterHbsInfoMsg::dump_hbs_record(FILE* fp) if(0 != _cluster_hbs_state_previous.last_update) { + t = ts.tv_sec - _cluster_hbs_state_previous.last_update; fprintf(fp, "\n Previous state, since %d seconds ago\n", (int)t); fprintf(fp, " storage-0 is %s configured\n", _cluster_hbs_state_previous.storage0_enabled ? "" : "not"); diff --git a/service-mgmt/sm-1.0.0/src/sm_cluster_hbs_info_msg.h b/service-mgmt/sm-1.0.0/src/sm_cluster_hbs_info_msg.h index 73a1b566..1cb304fd 100644 --- a/service-mgmt/sm-1.0.0/src/sm_cluster_hbs_info_msg.h +++ b/service-mgmt/sm-1.0.0/src/sm_cluster_hbs_info_msg.h @@ -21,7 +21,10 @@ struct _SmClusterHbsInfoT { bool storage0_responding; int number_of_node_reachable; - _SmClusterHbsInfoT() : storage0_responding(false), number_of_node_reachable(0) + int number_of_node_enabled; + _SmClusterHbsInfoT() : storage0_responding(false), + number_of_node_reachable(0), + number_of_node_enabled(0) { } }; @@ -47,6 +50,12 @@ typedef struct bool operator==(const SmClusterHbsStateT& lhs, const SmClusterHbsStateT& rhs); bool operator!=(const SmClusterHbsStateT& lhs, const SmClusterHbsStateT& rhs); +inline bool is_valid(const SmClusterHbsStateT& state) +{ + return state.last_update > 0; +} + +void log_cluster_hbs_state(const SmClusterHbsStateT& state); typedef void(*cluster_hbs_query_ready_callback)(); // **************************************************************************** diff --git a/service-mgmt/sm-1.0.0/src/sm_failover.c b/service-mgmt/sm-1.0.0/src/sm_failover.c index 3924e328..c55bc118 100644 --- a/service-mgmt/sm-1.0.0/src/sm_failover.c +++ b/service-mgmt/sm-1.0.0/src/sm_failover.c @@ -29,6 +29,7 @@ #include "sm_service_domain_neighbor_fsm.h" #include "sm_service_domain_member_table.h" #include "sm_service_domain_interface_fsm.h" +#include "sm_service_domain_fsm.h" #include "sm_heartbeat_msg.h" #include "sm_node_swact_monitor.h" #include "sm_util_types.h" @@ -602,36 +603,33 @@ SmFailoverActionResultT sm_failover_swact() // **************************************************************************** // Failover - fail self // ================== -SmFailoverActionResultT sm_failover_fail_self() +SmErrorT sm_failover_fail_self() { DPRINTFI("To disable %s", _host_name); SmErrorT error = sm_node_fsm_event_handler( - _host_name, SM_NODE_EVENT_DISABLED, NULL, "Host is isolated" ); + _host_name, SM_NODE_EVENT_DISABLED, NULL, "Host is failed" ); if( SM_OKAY != error ) { DPRINTFE("Failed to disable %s, error: %s", _host_name, sm_error_str(error)); - return SM_FAILOVER_ACTION_RESULT_FAILED; + return SM_FAILED; } sm_node_utils_set_unhealthy(); error = sm_node_api_fail_node( _host_name ); - if (SM_OKAY == error ) - { - return SM_FAILOVER_ACTION_RESULT_OK; - } - else + if (SM_OKAY != error ) { DPRINTFE("Failed to set %s failed, error %s.", _host_name, sm_error_str(error)); - return SM_FAILOVER_ACTION_RESULT_FAILED; + return SM_FAILED; } + return SM_OKAY; } // **************************************************************************** // **************************************************************************** // Failover - disable node // ================== -SmFailoverActionResultT sm_failover_disable_node(char* node_name) +SmErrorT sm_failover_disable_node(char* node_name) { DPRINTFI("To disable %s", node_name); @@ -645,9 +643,9 @@ SmFailoverActionResultT sm_failover_disable_node(char* node_name) { DPRINTFE( "Failed to disable node %s, error=%s.", node_name, sm_error_str( error ) ); - return SM_FAILOVER_ACTION_RESULT_FAILED; + return SM_FAILED; } - return SM_FAILOVER_ACTION_RESULT_OK; + return SM_OKAY; } // **************************************************************************** @@ -796,6 +794,21 @@ bool this_controller_unlocked() } // **************************************************************************** +static SmErrorT sm_ensure_leader_scheduler() +{ + char controller_domain[] = "controller"; + char reason_text[SM_LOG_REASON_TEXT_MAX_CHAR] = "Loss of heartbeat"; + + SmErrorT error = sm_service_domain_fsm_set_state( + controller_domain, + SM_SERVICE_DOMAIN_STATE_LEADER, + reason_text ); + if(SM_OKAY != error) + { + DPRINTFE("Failed to ensure leader scheduler. Error %s", sm_error_str(error)); + } + return error; +} // **************************************************************************** // Failover - set system to scheduled status // ================== @@ -808,6 +821,16 @@ SmErrorT sm_failover_set_system(const SmSystemFailoverStatus& failover_status) SmNodeScheduleStateT host_target_state, peer_target_state; host_target_state = failover_status.get_host_schedule_state(); peer_target_state = failover_status.get_peer_schedule_state(); + SmHeartbeatStateT heartbeat_state = failover_status.get_heartbeat_state(); + if(SM_HEARTBEAT_OK != heartbeat_state) + { + if(SM_OKAY != sm_ensure_leader_scheduler()) + { + DPRINTFE("Failed to set new leader scheduler to local"); + return SM_FAILED; + } + } + if(SM_NODE_STATE_ACTIVE == host_target_state) { if(SM_NODE_STATE_STANDBY == _host_state && @@ -818,8 +841,7 @@ SmErrorT sm_failover_set_system(const SmSystemFailoverStatus& failover_status) DPRINTFE("Failed to activate %s.", _host_name); return SM_FAILED; } - result = sm_failover_disable_node(_peer_name); - if(SM_FAILOVER_ACTION_RESULT_FAILED == result) + if(SM_OKAY != sm_failover_disable_node(_peer_name)) { DPRINTFE("Failed to disable node %s.", _peer_name); return SM_FAILED; @@ -839,8 +861,7 @@ SmErrorT sm_failover_set_system(const SmSystemFailoverStatus& failover_status) } else if(SM_NODE_STATE_FAILED == host_target_state) { - result = sm_failover_disable_node(_host_name); - if(SM_FAILOVER_ACTION_RESULT_FAILED == result) + if(SM_OKAY != sm_failover_fail_self()) { DPRINTFE("Failed disable host %s.", _host_name); return SM_FAILED; diff --git a/service-mgmt/sm-1.0.0/src/sm_failover_fail_pending_state.cpp b/service-mgmt/sm-1.0.0/src/sm_failover_fail_pending_state.cpp index 9e7f7711..b23a33d8 100644 --- a/service-mgmt/sm-1.0.0/src/sm_failover_fail_pending_state.cpp +++ b/service-mgmt/sm-1.0.0/src/sm_failover_fail_pending_state.cpp @@ -6,6 +6,7 @@ #include "sm_failover_fail_pending_state.h" #include #include +#include "sm_cluster_hbs_info_msg.h" #include "sm_types.h" #include "sm_limits.h" #include "sm_debug.h" @@ -17,7 +18,8 @@ #include "sm_node_api.h" #include "sm_worker_thread.h" -static const int FAIL_PENDING_TIMEOUT = 2000; //2000ms +static const int FAIL_PENDING_TIMEOUT = 2000; // 2seconds +static const int DELAY_QUERY_HBS_MS = FAIL_PENDING_TIMEOUT - 200; // give 200ms for hbs agent to respond static SmTimerIdT action_timer_id = SM_TIMER_ID_INVALID; static const int RESET_TIMEOUT = 10 * 1000; // 10 seconds for a reset command to reboot a node @@ -294,6 +296,20 @@ SmErrorT SmFailoverFailPendingState::enter_state() return error; } +void _cluster_hbs_response_callback() +{ + const SmClusterHbsStateT& cluster_hbs_state = SmClusterHbsInfoMsg::get_current_state(); + log_cluster_hbs_state(cluster_hbs_state); + SmSystemFailoverStatus::get_status().set_cluster_hbs_state(cluster_hbs_state); +} + +bool SmFailoverFailPendingState::_delay_query_hbs_timeout( + SmTimerIdT timer_id, int64_t user_data) +{ + SmClusterHbsInfoMsg::cluster_hbs_info_query(_cluster_hbs_response_callback); + return false; +} + SmErrorT SmFailoverFailPendingState::_register_timer() { SmErrorT error; @@ -303,31 +319,47 @@ SmErrorT SmFailoverFailPendingState::_register_timer() this->_deregister_timer(); } - error = sm_timer_register( timer_name, FAIL_PENDING_TIMEOUT, - SmFailoverFailPendingState::_fail_pending_timeout, - 0, &this->_pending_timer_id); + error = sm_timer_register(timer_name, FAIL_PENDING_TIMEOUT, + SmFailoverFailPendingState::_fail_pending_timeout, + 0, &this->_pending_timer_id); + + const char* delay_query_hbs_timer_name = "DELAY QUERY HBS"; + + error = sm_timer_register(delay_query_hbs_timer_name, DELAY_QUERY_HBS_MS, + SmFailoverFailPendingState::_delay_query_hbs_timeout, + 0, &this->_delay_query_hbs_timer_id); return error; } SmErrorT SmFailoverFailPendingState::_deregister_timer() { - SmErrorT error; - if(SM_TIMER_ID_INVALID == this->_pending_timer_id) + SmErrorT error = SM_OKAY; + if(SM_TIMER_ID_INVALID != this->_pending_timer_id) { - return SM_OKAY; + error = sm_timer_deregister(this->_pending_timer_id); + if( SM_OKAY != error ) + { + DPRINTFE( "Failed to cancel fail pending timer, error=%s.", + sm_error_str( error ) ); + }else + { + this->_pending_timer_id = SM_TIMER_ID_INVALID; + } } - error = sm_timer_deregister(this->_pending_timer_id); - if( SM_OKAY != error ) + if(SM_TIMER_ID_INVALID != this->_delay_query_hbs_timer_id) { - DPRINTFE( "Failed to cancel fail pending timer, error=%s.", - sm_error_str( error ) ); - }else - { - this->_pending_timer_id = SM_TIMER_ID_INVALID; + error = sm_timer_deregister(this->_delay_query_hbs_timer_id); + if( SM_OKAY != error ) + { + DPRINTFE( "Failed to cancel query hbs info timer, error=%s.", + sm_error_str( error ) ); + }else + { + this->_delay_query_hbs_timer_id = SM_TIMER_ID_INVALID; + } } - return error; } diff --git a/service-mgmt/sm-1.0.0/src/sm_failover_fail_pending_state.h b/service-mgmt/sm-1.0.0/src/sm_failover_fail_pending_state.h index 7b2c7fe4..58732d5c 100644 --- a/service-mgmt/sm-1.0.0/src/sm_failover_fail_pending_state.h +++ b/service-mgmt/sm-1.0.0/src/sm_failover_fail_pending_state.h @@ -22,8 +22,11 @@ class SmFailoverFailPendingState : public SmFSMState private: SmTimerIdT _pending_timer_id; + SmTimerIdT _delay_query_hbs_timer_id; static bool _fail_pending_timeout(SmTimerIdT timer_id, int64_t user_data); + static bool _delay_query_hbs_timeout(SmTimerIdT timer_id, int64_t user_data); + static void cluster_hbs_response_callback(); SmErrorT _register_timer(); SmErrorT _deregister_timer(); }; diff --git a/service-mgmt/sm-1.0.0/src/sm_failover_failed_state.cpp b/service-mgmt/sm-1.0.0/src/sm_failover_failed_state.cpp index 577a733a..d2ae6831 100644 --- a/service-mgmt/sm-1.0.0/src/sm_failover_failed_state.cpp +++ b/service-mgmt/sm-1.0.0/src/sm_failover_failed_state.cpp @@ -10,41 +10,15 @@ #include "sm_failover_fsm.h" #include "sm_failover_ss.h" -static void _audit_failover_state() -{ - SmSystemFailoverStatus& failover_status = SmSystemFailoverStatus::get_status(); - SmErrorT error = sm_failover_ss_get_survivor(failover_status); - SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state(); - SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state(); - if(SM_OKAY != error) - { - DPRINTFE("Failed to get failover survivor. Error %s", sm_error_str(error)); - return; - } - - if(SM_NODE_STATE_FAILED == host_state) - { - // don't need to set to failed state, already here - } - else if(SM_NODE_STATE_STANDBY == host_state && SM_NODE_STATE_ACTIVE == peer_state) - { - // standby is the only possible state to be scheduled to from failed state - SmFailoverFSM::get_fsm().set_state(SM_FAILOVER_STATE_NORMAL); - }else - { - DPRINTFE("Runtime error: unexpected scheduling state: %s", - sm_node_schedule_state_str(host_state)); - } -} - SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data) { + // Currently the only supported scenario to recover from failure is + // reboot triggered by mtce. + // So once entering failed state, wait for reboot to reenter the normal state. switch (event) { case SM_FAILOVER_EVENT_IF_STATE_CHANGED: - DPRINTFI("sm_heartbeat_recover event received."); - _audit_failover_state(); - + // event will be fired, but couldn't bring fsm state back to normal break; default: diff --git a/service-mgmt/sm-1.0.0/src/sm_failover_fsm.cpp b/service-mgmt/sm-1.0.0/src/sm_failover_fsm.cpp index d0f3cbaa..c719dda0 100644 --- a/service-mgmt/sm-1.0.0/src/sm_failover_fsm.cpp +++ b/service-mgmt/sm-1.0.0/src/sm_failover_fsm.cpp @@ -40,6 +40,7 @@ void SmIFStateChangedEventData::set_interface_state( DPRINTFE("Runtime error: invalid interface type %d", interface_type); } } + SmFailoverInterfaceStateT SmIFStateChangedEventData::get_interface_state(SmInterfaceTypeT interface_type) const { switch (interface_type) @@ -54,7 +55,6 @@ SmFailoverInterfaceStateT SmIFStateChangedEventData::get_interface_state(SmInter DPRINTFE("Runtime error: invalid interface type %d", interface_type); return SM_FAILOVER_INTERFACE_UNKNOWN; } - } SmErrorT SmFSMState::enter_state() diff --git a/service-mgmt/sm-1.0.0/src/sm_failover_normal_state.cpp b/service-mgmt/sm-1.0.0/src/sm_failover_normal_state.cpp index cdef009d..32affc8f 100644 --- a/service-mgmt/sm-1.0.0/src/sm_failover_normal_state.cpp +++ b/service-mgmt/sm-1.0.0/src/sm_failover_normal_state.cpp @@ -13,6 +13,7 @@ #include "sm_failover_utils.h" #include "sm_failover_fsm.h" #include "sm_failover_ss.h" +#include "sm_cluster_hbs_info_msg.h" SmErrorT SmFailoverNormalState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data) { @@ -79,6 +80,29 @@ SmErrorT SmFailoverNormalState::exit_state() failover_status.set_peer_pre_failure_schedule_state(peer_state); } + const SmClusterHbsStateT& cluster_hbs_state_cur = SmClusterHbsInfoMsg::get_current_state(); + const SmClusterHbsStateT& cluster_hbs_state_pre = SmClusterHbsInfoMsg::get_previous_state(); + SmClusterHbsStateT pre_failure_cluster_hsb_state; + if(!is_valid(cluster_hbs_state_cur)) + { + DPRINTFE("No cluster hbs state available"); + }else + { + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + if(ts.tv_sec - cluster_hbs_state_cur.last_update <= 1 && cluster_hbs_state_pre.last_update != 0) + { + // cluster hbs state changed within past 1 second, take the pre state as pre-failure state. + pre_failure_cluster_hsb_state = cluster_hbs_state_pre; + }else + { + pre_failure_cluster_hsb_state = cluster_hbs_state_cur; + } + + log_cluster_hbs_state(pre_failure_cluster_hsb_state); + failover_status.set_pre_failure_cluster_hbs_state(pre_failure_cluster_hsb_state); + } + SmFSMState::exit_state(); return SM_OKAY; } diff --git a/service-mgmt/sm-1.0.0/src/sm_failover_ss.c b/service-mgmt/sm-1.0.0/src/sm_failover_ss.c index 8ed1b41f..e1d50e85 100644 --- a/service-mgmt/sm-1.0.0/src/sm_failover_ss.c +++ b/service-mgmt/sm-1.0.0/src/sm_failover_ss.c @@ -5,6 +5,7 @@ // #include "sm_failover_ss.h" +#include #include #include "sm_debug.h" #include "sm_limits.h" @@ -15,6 +16,14 @@ #include "sm_node_api.h" #include "sm_failover.h" +// uncomment when debugging this module to enabled DPRINTFD output to log file +// #define __DEBUG__MSG__ + +#ifdef __DEBUG__MSG__ +#undef DPRINTFD +#define DPRINTFD DPRINTFI +#endif + // SmErrorT _get_survivor_dc(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection); @@ -117,6 +126,26 @@ void SmSystemFailoverStatus::set_host_pre_failure_schedule_state(SmNodeScheduleS } } +void SmSystemFailoverStatus::set_cluster_hbs_state(const SmClusterHbsStateT& state) +{ + if( !is_valid(state) ) + { + DPRINTFE("Runtime error. Invalid cluster hbs state"); + return; + } + _cluster_hbs_state = state; +} + +void SmSystemFailoverStatus::set_pre_failure_cluster_hbs_state(const SmClusterHbsStateT& state) +{ + if( !is_valid(state) ) + { + DPRINTFE("Runtime error. Invalid cluster hbs state"); + return; + } + _pre_failure_cluster_hbs_state = state; +} + void SmSystemFailoverStatus::set_peer_schedule_state(SmNodeScheduleStateT state) { if(_is_valid_schedule_state(state)) @@ -250,6 +279,8 @@ SmErrorT _get_system_status(SmSystemStatusT& sys_status, char host_name[], char sys_status.heartbeat_state = SM_HEARTBEAT_LOSS; } + SmSystemFailoverStatus::get_status().set_heartbeat_state(sys_status.heartbeat_state); + sys_status.host_status.node_name = host_name; sys_status.host_status.interface_state = sm_failover_if_state_get(); sys_status.host_status.current_schedule_state = host_state; @@ -319,8 +350,154 @@ SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSys }else { DPRINTFI("Loss of heartbeat ALL"); - selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE); - selection.set_peer_schedule_state(SM_NODE_STATE_FAILED); + bool expect_storage_0 = false; + SmClusterHbsStateT pre_failure_cluster_hbs_state = selection.get_pre_failure_cluster_hbs_state(); + SmClusterHbsStateT current_cluster_hbs_state = selection.get_cluster_hbs_state(); + bool has_cluser_info = true; + int max_nodes_available = 0; + if(is_valid(pre_failure_cluster_hbs_state)) + { + expect_storage_0 = pre_failure_cluster_hbs_state.storage0_enabled; + for(unsigned int i = 0; i < max_controllers; i ++) + { + if(max_nodes_available < pre_failure_cluster_hbs_state.controllers[i].number_of_node_reachable) + { + max_nodes_available = pre_failure_cluster_hbs_state.controllers[i].number_of_node_reachable; + } + } + }else if(is_valid(current_cluster_hbs_state)) + { + expect_storage_0 = current_cluster_hbs_state.storage0_enabled; + for(unsigned int i = 0; i < max_controllers; i ++) + { + if(max_nodes_available < pre_failure_cluster_hbs_state.controllers[i].number_of_node_reachable) + { + max_nodes_available = pre_failure_cluster_hbs_state.controllers[i].number_of_node_reachable; + } + } + }else + { + has_cluser_info = false; + } + + if(has_cluser_info && max_nodes_available > 1) + { + DPRINTFD("storage-0 is %s", expect_storage_0 ? "enabled":"not enabled"); + int this_controller_index, peer_controller_index; + + char host_name[SM_NODE_NAME_MAX_CHAR]; + SmErrorT error = sm_node_utils_get_hostname(host_name); + if( SM_OKAY != error ) + { + DPRINTFE( "Failed to get hostname, error=%s.", + sm_error_str( error ) ); + return SM_FAILED; + } + + if(0 == strncmp(SM_NODE_CONTROLLER_0_NAME, host_name, sizeof(SM_NODE_CONTROLLER_0_NAME))) + { + this_controller_index = 0; + peer_controller_index = 1; + }else + { + this_controller_index = 1; + peer_controller_index = 0; + } + + bool survivor_selected = false; + if(expect_storage_0) + { + if(current_cluster_hbs_state.controllers[this_controller_index].storage0_responding && + !current_cluster_hbs_state.controllers[peer_controller_index].storage0_responding) + { + DPRINTFI("peer cannot reach storage-0. host can. host will be survivor"); + selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE); + selection.set_peer_schedule_state(SM_NODE_STATE_FAILED); + survivor_selected = true; + }else if(!current_cluster_hbs_state.controllers[this_controller_index].storage0_responding && + current_cluster_hbs_state.controllers[peer_controller_index].storage0_responding) + { + DPRINTFI("host cannot reach storage-0. peer can. peer will be survivor"); + selection.set_host_schedule_state(SM_NODE_STATE_FAILED); + selection.set_peer_schedule_state(SM_NODE_STATE_ACTIVE); + survivor_selected = true; + } + } + + if(!survivor_selected) + { + // so no storage-0 or storage-0 state same on both side + if(current_cluster_hbs_state.controllers[this_controller_index].number_of_node_reachable > + current_cluster_hbs_state.controllers[peer_controller_index].number_of_node_reachable) + { + DPRINTFI("host reaches %d nodes, peer reaches %d nodes, host will be survivor", + current_cluster_hbs_state.controllers[this_controller_index].number_of_node_reachable, + current_cluster_hbs_state.controllers[peer_controller_index].number_of_node_reachable + ); + selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE); + selection.set_peer_schedule_state(SM_NODE_STATE_FAILED); + survivor_selected = true; + }else if (current_cluster_hbs_state.controllers[this_controller_index].number_of_node_reachable < + current_cluster_hbs_state.controllers[peer_controller_index].number_of_node_reachable) + { + DPRINTFI("host reaches %d nodes, peer reaches %d nodes, peer will be survivor", + current_cluster_hbs_state.controllers[this_controller_index].number_of_node_reachable, + current_cluster_hbs_state.controllers[peer_controller_index].number_of_node_reachable + ); + selection.set_host_schedule_state(SM_NODE_STATE_FAILED); + selection.set_peer_schedule_state(SM_NODE_STATE_ACTIVE); + survivor_selected = true; + }else + { + if(pre_failure_cluster_hbs_state != current_cluster_hbs_state) + { + if(0 == current_cluster_hbs_state.controllers[this_controller_index].number_of_node_reachable) + { + // Cannot reach any nodes, I am dead + DPRINTFI("host cannot reach any nodes, peer will be survivor", + current_cluster_hbs_state.controllers[this_controller_index].number_of_node_reachable, + current_cluster_hbs_state.controllers[peer_controller_index].number_of_node_reachable + ); + selection.set_host_schedule_state(SM_NODE_STATE_FAILED); + selection.set_peer_schedule_state(SM_NODE_STATE_ACTIVE); + }else + { + // equaly split, failed the standby + if(SM_NODE_STATE_ACTIVE == system_status.host_status.current_schedule_state) + { + DPRINTFI("host reaches %d nodes, peer reaches %d nodes, host will be survivor", + current_cluster_hbs_state.controllers[this_controller_index].number_of_node_reachable, + current_cluster_hbs_state.controllers[peer_controller_index].number_of_node_reachable + ); + selection.set_peer_schedule_state(SM_NODE_STATE_FAILED); + }else + { + DPRINTFI("host reaches %d nodes, peer reaches %d nodes, peer will be survivor", + current_cluster_hbs_state.controllers[this_controller_index].number_of_node_reachable, + current_cluster_hbs_state.controllers[peer_controller_index].number_of_node_reachable + ); + selection.set_host_schedule_state(SM_NODE_STATE_FAILED); + } + } + } + else + { + // no connectivity status changed? peer sm is not responding + DPRINTFI("Peer SM is not responding, host will be survivor"); + selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE); + selection.set_peer_schedule_state(SM_NODE_STATE_FAILED); + } + } + } + } + else + { + // no cluster info, peer is assumed down + // the connecting to majority nodes rule is postponed + DPRINTFI("No cluster hbs info, host will be survivor"); + selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE); + selection.set_peer_schedule_state(SM_NODE_STATE_FAILED); + } } if(SM_SYSTEM_MODE_CPE_DUPLEX == system_status.system_mode) diff --git a/service-mgmt/sm-1.0.0/src/sm_failover_ss.h b/service-mgmt/sm-1.0.0/src/sm_failover_ss.h index 5a4609cb..9fc2a85c 100644 --- a/service-mgmt/sm-1.0.0/src/sm_failover_ss.h +++ b/service-mgmt/sm-1.0.0/src/sm_failover_ss.h @@ -8,6 +8,7 @@ #define __SM_FAILOVER_SS_H__ #include #include "sm_types.h" +#include "sm_cluster_hbs_info_msg.h" typedef struct { @@ -30,13 +31,13 @@ typedef enum SM_HEARTBEAT_INDIRECT, //no heartbeat SM_HEARTBEAT_LOSS -}SmHeartbeatStatusT; +}SmHeartbeatStateT; typedef struct { SmNodeStatusT host_status; SmNodeStatusT peer_status; - SmHeartbeatStatusT heartbeat_state; + SmHeartbeatStateT heartbeat_state; SmSystemModeT system_mode; }SmSystemStatusT; @@ -48,11 +49,30 @@ class SmSystemFailoverStatus inline SmNodeScheduleStateT get_host_schedule_state() const { return _host_schedule_state; } + inline SmNodeScheduleStateT get_host_pre_failure_schedule_state() const { return _host_pre_failure_schedule_state; } + + inline SmClusterHbsStateT get_cluster_hbs_state() const { + return _cluster_hbs_state; + } + + inline SmClusterHbsStateT get_pre_failure_cluster_hbs_state() const { + return _pre_failure_cluster_hbs_state; + } + + inline void set_heartbeat_state(SmHeartbeatStateT heartbeat_state) + { + _heartbeat_state = heartbeat_state; + } + inline SmHeartbeatStateT get_heartbeat_state() const { + return _heartbeat_state; + } void set_host_schedule_state(SmNodeScheduleStateT state); void set_host_pre_failure_schedule_state(SmNodeScheduleStateT state); + void set_cluster_hbs_state(const SmClusterHbsStateT& state); + void set_pre_failure_cluster_hbs_state(const SmClusterHbsStateT& state); inline SmNodeScheduleStateT get_peer_schedule_state() const { return _peer_schedule_state; } @@ -68,8 +88,11 @@ class SmSystemFailoverStatus SmSystemFailoverStatus(); SmNodeScheduleStateT _host_pre_failure_schedule_state; SmNodeScheduleStateT _peer_pre_failure_schedule_state; + SmClusterHbsStateT _pre_failure_cluster_hbs_state; SmNodeScheduleStateT _host_schedule_state; SmNodeScheduleStateT _peer_schedule_state; + SmClusterHbsStateT _cluster_hbs_state; + SmHeartbeatStateT _heartbeat_state; static const char filename[]; static const char file_format[];