Merge "Introduce failover FSM"
This commit is contained in:
commit
24cd0141ae
|
@ -2,4 +2,4 @@ SRC_DIR=$PKG_BASE
|
||||||
COPY_LIST="$PKG_BASE/LICENSE"
|
COPY_LIST="$PKG_BASE/LICENSE"
|
||||||
TAR_NAME=sm
|
TAR_NAME=sm
|
||||||
VERSION=1.0.0
|
VERSION=1.0.0
|
||||||
TIS_PATCH_VER=24
|
TIS_PATCH_VER=26
|
||||||
|
|
|
@ -107,6 +107,11 @@ SRCS+=sm_swact_state.c
|
||||||
SRCS+=sm_worker_thread.cpp
|
SRCS+=sm_worker_thread.cpp
|
||||||
SRCS+=sm_task_affining_thread.c
|
SRCS+=sm_task_affining_thread.c
|
||||||
SRCS+=sm_node_swact_monitor.cpp
|
SRCS+=sm_node_swact_monitor.cpp
|
||||||
|
SRCS+=sm_failover_fsm.cpp
|
||||||
|
SRCS+=sm_failover_normal_state.cpp
|
||||||
|
SRCS+=sm_failover_fail_pending_state.cpp
|
||||||
|
SRCS+=sm_failover_failed_state.cpp
|
||||||
|
SRCS+=sm_failover_survived_state.cpp
|
||||||
SRCS+=sm_failover_ss.c
|
SRCS+=sm_failover_ss.c
|
||||||
SRCS+=sm_service_domain_interface_not_in_use_state.c
|
SRCS+=sm_service_domain_interface_not_in_use_state.c
|
||||||
SRCS+=sm_configuration_table.c
|
SRCS+=sm_configuration_table.c
|
||||||
|
|
|
@ -34,15 +34,8 @@
|
||||||
#include "sm_util_types.h"
|
#include "sm_util_types.h"
|
||||||
#include "sm_failover_ss.h"
|
#include "sm_failover_ss.h"
|
||||||
#include "sm_failover_utils.h"
|
#include "sm_failover_utils.h"
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
typedef enum
|
#include "sm_api.h"
|
||||||
{
|
|
||||||
SM_FAILOVER_INTERFACE_UNKNOWN,
|
|
||||||
SM_FAILOVER_INTERFACE_OK,
|
|
||||||
SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT,
|
|
||||||
SM_FAILOVER_INTERFACE_DOWN,
|
|
||||||
SM_FAILOVER_INTERFACE_RECOVERING
|
|
||||||
}SmFailoverInterfaceStateT;
|
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
|
@ -57,7 +50,7 @@ typedef enum
|
||||||
//as part of the gradual delivery of enhancement with more
|
//as part of the gradual delivery of enhancement with more
|
||||||
//complex algorithm to determine the failover survivor routine
|
//complex algorithm to determine the failover survivor routine
|
||||||
//the SM_FAILOVER_ACTION_ROUTINE will redirect the lookup to
|
//the SM_FAILOVER_ACTION_ROUTINE will redirect the lookup to
|
||||||
//new logic. Until all actions are migrate to new logic, the
|
//new logic. Until all actions are migrated to new logic, the
|
||||||
//lookup tables will be eliminated.
|
//lookup tables will be eliminated.
|
||||||
SM_FAILOVER_ACTION_ROUTINE = 1 << 31,
|
SM_FAILOVER_ACTION_ROUTINE = 1 << 31,
|
||||||
}SmFailoverActionT;
|
}SmFailoverActionT;
|
||||||
|
@ -181,6 +174,79 @@ static bool _hello_msg_alive = true;
|
||||||
static SmSystemModeT _system_mode;
|
static SmSystemModeT _system_mode;
|
||||||
static time_t _last_report_ts = 0;
|
static time_t _last_report_ts = 0;
|
||||||
static int _heartbeat_count = 0;
|
static int _heartbeat_count = 0;
|
||||||
|
static bool _peer_ready = false;
|
||||||
|
|
||||||
|
// The class is to accommodate batching i/f state changes
|
||||||
|
// during the failover_audit interval.
|
||||||
|
class FailoverEventHolder
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
FailoverEventHolder();
|
||||||
|
void push_event(SmFailoverEventT, const ISmFSMEventData* event_data);
|
||||||
|
void send_event();
|
||||||
|
private:
|
||||||
|
bool _event_to_send;
|
||||||
|
SmFailoverEventT _failover_event;
|
||||||
|
const ISmFSMEventData* _event_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
FailoverEventHolder::FailoverEventHolder()
|
||||||
|
{
|
||||||
|
_event_to_send = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FailoverEventHolder::push_event(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||||
|
{
|
||||||
|
SmFailoverEventT list[] = {
|
||||||
|
SM_INTERFACE_DOWN,
|
||||||
|
SM_HEARTBEAT_LOST,
|
||||||
|
SM_INTERFACE_UP,
|
||||||
|
SM_HEARTBEAT_RECOVER
|
||||||
|
};
|
||||||
|
|
||||||
|
unsigned int i;
|
||||||
|
if(!_event_to_send)
|
||||||
|
{
|
||||||
|
_event_to_send = true;
|
||||||
|
for(i = 0; i < sizeof(list) / sizeof(SmFailoverEventT); i ++)
|
||||||
|
{
|
||||||
|
if(event == list[i])
|
||||||
|
{
|
||||||
|
_failover_event = event;
|
||||||
|
_event_data = event_data;
|
||||||
|
_event_to_send = true;
|
||||||
|
DPRINTFI("Push event %d", event);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DPRINTFE("Runtime error: unsupported event %d", event);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; list[i] != _failover_event && i < sizeof(list) / sizeof(SmFailoverEventT); i ++)
|
||||||
|
{
|
||||||
|
if(list[i] == event)
|
||||||
|
{
|
||||||
|
_failover_event = event;
|
||||||
|
_event_data = event_data;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DPRINTFE("Runtime error: unsupported event %d", event);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FailoverEventHolder::send_event()
|
||||||
|
{
|
||||||
|
if(_event_to_send)
|
||||||
|
{
|
||||||
|
DPRINTFI("Send event %d", _failover_event);
|
||||||
|
SmFailoverFSM::get_fsm().send_event(_failover_event, _event_data);
|
||||||
|
_event_to_send = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static FailoverEventHolder _event_holder;
|
||||||
|
|
||||||
// std 2+2/2+2+x failover actions
|
// std 2+2/2+2+x failover actions
|
||||||
SmFailoverActionPairT action_map_std_infra[16] =
|
SmFailoverActionPairT action_map_std_infra[16] =
|
||||||
|
@ -435,6 +501,8 @@ void sm_failover_lost_heartbeat( SmFailoverInterfaceT* interface )
|
||||||
{
|
{
|
||||||
DPRINTFI("Interface %s lose heartbeat.", interface->interface_name);
|
DPRINTFI("Interface %s lose heartbeat.", interface->interface_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_event_holder.push_event(SM_HEARTBEAT_LOST, NULL);
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
@ -466,6 +534,8 @@ void sm_failover_heartbeat_restore( SmFailoverInterfaceT* interface )
|
||||||
{
|
{
|
||||||
DPRINTFI("Interface %s heartbeat is OK.", interface->interface_name);
|
DPRINTFI("Interface %s heartbeat is OK.", interface->interface_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_event_holder.push_event(SM_HEARTBEAT_RECOVER, NULL);
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
@ -501,6 +571,8 @@ void sm_failover_interface_down( const char* const interface_name )
|
||||||
{
|
{
|
||||||
DPRINTFI("%d domain interfaces are impacted as i/f %s is down.",
|
DPRINTFI("%d domain interfaces are impacted as i/f %s is down.",
|
||||||
impacted, interface_name);
|
impacted, interface_name);
|
||||||
|
|
||||||
|
_event_holder.push_event(SM_INTERFACE_DOWN, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
@ -537,6 +609,8 @@ void sm_failover_interface_up( const char* const interface_name )
|
||||||
{
|
{
|
||||||
DPRINTFI("%d domain interfaces are impacted as i/f %s is up.",
|
DPRINTFI("%d domain interfaces are impacted as i/f %s is up.",
|
||||||
impacted, interface_name);
|
impacted, interface_name);
|
||||||
|
|
||||||
|
_event_holder.push_event(SM_INTERFACE_UP, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
@ -656,13 +730,22 @@ int _failover_get_if_state()
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// Failover - get interface state
|
// Failover - get interface state
|
||||||
// ==================
|
// ==================
|
||||||
SmErrorT sm_failover_if_state_get(SmHeartbeatMsgIfStateT *if_state)
|
SmHeartbeatMsgIfStateT sm_failover_if_state_get()
|
||||||
|
{
|
||||||
|
mutex_holder holder(&_mutex);
|
||||||
|
int if_state_flag = _failover_get_if_state();
|
||||||
|
return (if_state_flag & 0b0111); //the lower 3 bits i/f state flag
|
||||||
|
}
|
||||||
|
// ****************************************************************************
|
||||||
|
|
||||||
|
// ****************************************************************************
|
||||||
|
// Failover - get peer node interface state
|
||||||
|
// ==================
|
||||||
|
SmHeartbeatMsgIfStateT sm_failover_get_peer_if_state()
|
||||||
{
|
{
|
||||||
mutex_holder holder(&_mutex);
|
mutex_holder holder(&_mutex);
|
||||||
|
|
||||||
int if_state_flag = _failover_get_if_state();
|
return (_peer_if_state & 0b0111); //the lower 3 bits i/f state flag
|
||||||
*if_state = (if_state_flag & 0b0111); //the lower 3 bits i/f state flag
|
|
||||||
return SM_OKAY;
|
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
@ -923,84 +1006,66 @@ bool this_controller_unlocked()
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// Failover - pack schedule state into 2 bit (active/standby/failed)
|
// Failover - set system to scheduled status
|
||||||
// SM starts managing failover after the node is scheduled, i.e, cannot be
|
// ==================
|
||||||
// in SM_NODE_STATE_UNKNOWN or SM_NODE_STATE_INIT state
|
SmErrorT sm_failover_set_system(const SmSystemFailoverStatus& failover_status)
|
||||||
// =======================
|
|
||||||
unsigned int sm_failover_pack_schedule_state(SmNodeScheduleStateT state)
|
|
||||||
{
|
{
|
||||||
static const unsigned int failed_state_bit_flag = 0;
|
//valid combinations of target system scheduling are:
|
||||||
static const unsigned int active_state_bit_flag = 1;
|
// active/standby
|
||||||
static const unsigned int standby_state_bit_flag = 1 << 1;
|
// active/failed
|
||||||
unsigned int res;
|
SmFailoverActionResultT result;
|
||||||
switch(state)
|
SmNodeScheduleStateT host_target_state, peer_target_state;
|
||||||
|
host_target_state = failover_status.get_host_schedule_state();
|
||||||
|
peer_target_state = failover_status.get_peer_schedule_state();
|
||||||
|
if(SM_NODE_STATE_ACTIVE == host_target_state)
|
||||||
{
|
{
|
||||||
case SM_NODE_STATE_ACTIVE:
|
if(SM_NODE_STATE_STANDBY == _host_state &&
|
||||||
res = active_state_bit_flag;
|
SM_NODE_STATE_FAILED == peer_target_state)
|
||||||
break;
|
{
|
||||||
case SM_NODE_STATE_STANDBY:
|
result = sm_failover_activate_self();
|
||||||
res = standby_state_bit_flag;
|
if(SM_FAILOVER_ACTION_RESULT_FAILED == result)
|
||||||
break;
|
{
|
||||||
case SM_NODE_STATE_FAILED:
|
DPRINTFE("Failed to activate %s.", _host_name);
|
||||||
res = failed_state_bit_flag;
|
return SM_FAILED;
|
||||||
break;
|
}
|
||||||
default:
|
result = sm_failover_disable_node(_peer_name);
|
||||||
res = failed_state_bit_flag;
|
if(SM_FAILOVER_ACTION_RESULT_FAILED == result)
|
||||||
|
{
|
||||||
|
DPRINTFE("Failed to disable node %s.", _peer_name);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}else if(SM_NODE_STATE_STANDBY == host_target_state)
|
||||||
|
{
|
||||||
|
if(SM_NODE_STATE_ACTIVE == _host_state)
|
||||||
|
{
|
||||||
|
result = sm_failover_swact();
|
||||||
|
if(SM_FAILOVER_ACTION_RESULT_FAILED == result)
|
||||||
|
{
|
||||||
|
DPRINTFE("Failed to swact.");
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return res;
|
else if(SM_NODE_STATE_FAILED == host_target_state)
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Failover - convert target scheduling state to action
|
|
||||||
// =======================
|
|
||||||
int sm_failover_get_action(const SmSystemFailoverStatus& failover_status)
|
|
||||||
{
|
|
||||||
DPRINTFI("Host to %s, Peer to %s.",
|
|
||||||
sm_node_schedule_state_str(failover_status.host_schedule_state),
|
|
||||||
sm_node_schedule_state_str(failover_status.peer_schedule_state)
|
|
||||||
);
|
|
||||||
unsigned int host_flag, peer_flag;
|
|
||||||
host_flag = sm_failover_pack_schedule_state(failover_status.host_schedule_state);
|
|
||||||
peer_flag = sm_failover_pack_schedule_state(failover_status.peer_schedule_state);
|
|
||||||
|
|
||||||
unsigned int flag = (host_flag | (peer_flag << 2));
|
|
||||||
DPRINTFI("Failover scheduling flag %d", flag);
|
|
||||||
SmFailoverActionPairT* actions;
|
|
||||||
int action;
|
|
||||||
|
|
||||||
SmFailoverActionPairT action_map[16] = {
|
|
||||||
{SM_FAILOVER_ACTION_FAIL_NODE, SM_FAILOVER_ACTION_FAIL_NODE}, //00 00
|
|
||||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_FAIL_NODE}, //01 00
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //10 00
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11 00
|
|
||||||
{SM_FAILOVER_ACTION_FAIL_NODE, SM_FAILOVER_ACTION_ACTIVATE}, //00 01
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //01 01
|
|
||||||
{SM_FAILOVER_ACTION_SWACT, SM_FAILOVER_ACTION_NO_ACTION}, //10 01
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11 01
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //00 10
|
|
||||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //01 10
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //10 10
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11 10
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //00 11
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //01 11
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //10 11
|
|
||||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11 11
|
|
||||||
};
|
|
||||||
|
|
||||||
bool is_active = is_active_controller();
|
|
||||||
actions = &(action_map[flag & 0xf]);
|
|
||||||
if(is_active)
|
|
||||||
{
|
{
|
||||||
action = actions->active_controller_action;
|
result = sm_failover_disable_node(_host_name);
|
||||||
|
if(SM_FAILOVER_ACTION_RESULT_FAILED == result)
|
||||||
|
{
|
||||||
|
DPRINTFE("Failed disable host %s.", _host_name);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
action = actions->standby_controller_action;
|
DPRINTFE("Runtime error. Unexpected scheduling state %s for host %s",
|
||||||
|
sm_node_schedule_state_str(host_target_state),
|
||||||
|
_host_name);
|
||||||
|
return SM_FAILED;
|
||||||
}
|
}
|
||||||
return action;
|
|
||||||
|
return SM_OKAY;
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
@ -1112,6 +1177,8 @@ void sm_failover_audit()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_event_holder.send_event();
|
||||||
|
|
||||||
int64_t curr_node_state = if_state_flag;
|
int64_t curr_node_state = if_state_flag;
|
||||||
|
|
||||||
if( _hello_msg_alive )
|
if( _hello_msg_alive )
|
||||||
|
@ -1160,34 +1227,9 @@ void sm_failover_audit()
|
||||||
_log_nodes_state(action);
|
_log_nodes_state(action);
|
||||||
|
|
||||||
DPRINTFI("Action to take %d", action);
|
DPRINTFI("Action to take %d", action);
|
||||||
|
|
||||||
if (action & SM_FAILOVER_ACTION_ROUTINE)
|
if (action & SM_FAILOVER_ACTION_ROUTINE)
|
||||||
{
|
{
|
||||||
SmSystemStatusT sys_status;
|
action = SM_FAILOVER_ACTION_NO_ACTION;
|
||||||
SmSystemFailoverStatus failover_status;
|
|
||||||
sys_status.system_mode = _system_mode;
|
|
||||||
if(if_state_flag & SM_FAILOVER_HEARTBEAT_ALIVE)
|
|
||||||
{
|
|
||||||
sys_status.heartbeat_state = SM_HEARTBEAT_OK;
|
|
||||||
}else
|
|
||||||
{
|
|
||||||
sys_status.heartbeat_state = SM_HEARTBEAT_LOSS;
|
|
||||||
}
|
|
||||||
|
|
||||||
sys_status.host_status.node_name = _host_name;
|
|
||||||
sys_status.host_status.interface_state = if_state_flag & 0x7;
|
|
||||||
sys_status.host_status.current_schedule_state = _host_state;
|
|
||||||
sys_status.peer_status.node_name = _peer_name;
|
|
||||||
sys_status.peer_status.interface_state = _peer_if_state & 0x7;
|
|
||||||
sys_status.peer_status.current_schedule_state = sm_get_controller_state(_peer_name);
|
|
||||||
SmErrorT error = sm_failover_ss_get_survivor(sys_status, failover_status);
|
|
||||||
if(SM_OKAY != error)
|
|
||||||
{
|
|
||||||
DPRINTFE("Failed to determine failover state. ");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
action = sm_failover_get_action(failover_status);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (action & SM_FAILOVER_ACTION_ACTIVATE)
|
if (action & SM_FAILOVER_ACTION_ACTIVATE)
|
||||||
|
@ -1367,6 +1409,67 @@ SmErrorT sm_failover_action()
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
// ****************************************************************************
|
||||||
|
// Failover - get node interface info
|
||||||
|
// ==================
|
||||||
|
SmFailoverInterfaceStateT sm_failover_get_interface_info(SmInterfaceTypeT interface_type)
|
||||||
|
{
|
||||||
|
SmFailoverInterfaceInfo* res = NULL;
|
||||||
|
switch(interface_type)
|
||||||
|
{
|
||||||
|
case SM_INTERFACE_MGMT:
|
||||||
|
res = _mgmt_interface_info;
|
||||||
|
break;
|
||||||
|
case SM_INTERFACE_INFRA:
|
||||||
|
res = _infra_interface_info;
|
||||||
|
break;
|
||||||
|
case SM_INTERFACE_OAM:
|
||||||
|
res = _oam_interface_info;
|
||||||
|
break;
|
||||||
|
case SM_INTERFACE_UNKNOWN:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if(NULL != res)
|
||||||
|
{
|
||||||
|
return res->get_state();
|
||||||
|
}
|
||||||
|
return SM_FAILOVER_INTERFACE_UNKNOWN;
|
||||||
|
}
|
||||||
|
// ****************************************************************************
|
||||||
|
|
||||||
|
static void sm_failover_node_set_callback( char node_name[],
|
||||||
|
SmNodeSetActionT action, SmNodeAdminStateT admin_state,
|
||||||
|
SmNodeOperationalStateT oper_state, SmNodeAvailStatusT avail_status,
|
||||||
|
int seqno )
|
||||||
|
{
|
||||||
|
if(0 == strcmp(node_name, _host_name))
|
||||||
|
{
|
||||||
|
// care only the peer
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool peer_ready = false;
|
||||||
|
if(SM_NODE_SET_ACTION_UNLOCK == action)
|
||||||
|
{
|
||||||
|
peer_ready = SM_NODE_ADMIN_STATE_UNLOCKED == admin_state &&
|
||||||
|
SM_NODE_OPERATIONAL_STATE_ENABLED == oper_state &&
|
||||||
|
(SM_NODE_AVAIL_STATUS_AVAILABLE == avail_status ||
|
||||||
|
SM_NODE_AVAIL_STATUS_DEGRADED == avail_status);
|
||||||
|
}else if (SM_NODE_SET_ACTION_LOCK == action)
|
||||||
|
{
|
||||||
|
peer_ready = false;
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(peer_ready && !_peer_ready)
|
||||||
|
{
|
||||||
|
DPRINTFI("Peer %s is now online.", node_name);
|
||||||
|
}
|
||||||
|
_peer_ready = peer_ready;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SmApiCallbacksT _api_callbacks = {0};
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// Failover - Initialize
|
// Failover - Initialize
|
||||||
// ======================
|
// ======================
|
||||||
|
@ -1375,6 +1478,14 @@ SmErrorT sm_failover_initialize( void )
|
||||||
void* user_data[1] = {&_total_interfaces};
|
void* user_data[1] = {&_total_interfaces};
|
||||||
bool enabled;
|
bool enabled;
|
||||||
SmErrorT error;
|
SmErrorT error;
|
||||||
|
|
||||||
|
error = SmFailoverFSM::initialize();
|
||||||
|
if( SM_OKAY != error )
|
||||||
|
{
|
||||||
|
DPRINTFE( "Failed to initialize failover FSM, error %s.", sm_error_str( error ) );
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
_system_mode = sm_node_utils_get_system_mode();
|
_system_mode = sm_node_utils_get_system_mode();
|
||||||
DPRINTFI("System mode %s", sm_system_mode_str(_system_mode));
|
DPRINTFI("System mode %s", sm_system_mode_str(_system_mode));
|
||||||
|
|
||||||
|
@ -1404,6 +1515,17 @@ SmErrorT sm_failover_initialize( void )
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_api_callbacks.node_set = sm_failover_node_set_callback;
|
||||||
|
|
||||||
|
error = sm_api_register_callbacks( &_api_callbacks );
|
||||||
|
if( SM_OKAY != error )
|
||||||
|
{
|
||||||
|
DPRINTFE( "Failed to register api callbacks, error=%s.",
|
||||||
|
sm_error_str( error ) );
|
||||||
|
return( error );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if( 0 == strcmp( SM_NODE_CONTROLLER_0_NAME, _host_name ) )
|
if( 0 == strcmp( SM_NODE_CONTROLLER_0_NAME, _host_name ) )
|
||||||
{
|
{
|
||||||
snprintf( _peer_name, sizeof(_peer_name), "%s", SM_NODE_CONTROLLER_1_NAME );
|
snprintf( _peer_name, sizeof(_peer_name), "%s", SM_NODE_CONTROLLER_1_NAME );
|
||||||
|
@ -1491,6 +1613,13 @@ SmErrorT sm_failover_finalize( void )
|
||||||
_total_interfaces = 0;
|
_total_interfaces = 0;
|
||||||
|
|
||||||
SmErrorT error;
|
SmErrorT error;
|
||||||
|
error = sm_api_deregister_callbacks( &_api_callbacks );
|
||||||
|
if( SM_OKAY != error )
|
||||||
|
{
|
||||||
|
DPRINTFE( "Failed to deregister api callbacks, error=%s.",
|
||||||
|
sm_error_str( error ) );
|
||||||
|
}
|
||||||
|
|
||||||
sm_timer_deregister( failover_audit_timer_id );
|
sm_timer_deregister( failover_audit_timer_id );
|
||||||
if( NULL != _sm_db_handle )
|
if( NULL != _sm_db_handle )
|
||||||
{
|
{
|
||||||
|
@ -1504,6 +1633,13 @@ SmErrorT sm_failover_finalize( void )
|
||||||
_sm_db_handle = NULL;
|
_sm_db_handle = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
error = SmFailoverFSM::finalize();
|
||||||
|
if( SM_OKAY != error )
|
||||||
|
{
|
||||||
|
DPRINTFE( "Failed to finalize failover FSM, error %s.", sm_error_str( error ) );
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
return SM_OKAY;
|
return SM_OKAY;
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
@ -1575,6 +1711,11 @@ void dump_peer_if_state(FILE* fp)
|
||||||
fprintf(fp, " Peer Interface state: %d\n", _peer_if_state);
|
fprintf(fp, " Peer Interface state: %d\n", _peer_if_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void dump_failover_fsm_state(FILE* fp)
|
||||||
|
{
|
||||||
|
fprintf(fp, " Failover FSM state: %d\n", SmFailoverFSM::get_fsm().get_state());
|
||||||
|
}
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// Failover - dump state
|
// Failover - dump state
|
||||||
// ======================
|
// ======================
|
||||||
|
@ -1582,6 +1723,7 @@ void sm_failover_dump_state(FILE* fp)
|
||||||
{
|
{
|
||||||
fprintf( fp, "Failover \n\n" );
|
fprintf( fp, "Failover \n\n" );
|
||||||
dump_host_state(fp);
|
dump_host_state(fp);
|
||||||
|
dump_failover_fsm_state(fp);
|
||||||
dump_interfaces_state(fp);
|
dump_interfaces_state(fp);
|
||||||
dump_peer_if_state(fp);
|
dump_peer_if_state(fp);
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "sm_types.h"
|
#include "sm_types.h"
|
||||||
#include "sm_service_domain_interface_table.h"
|
#include "sm_service_domain_interface_table.h"
|
||||||
#include "sm_db_nodes.h"
|
#include "sm_db_nodes.h"
|
||||||
|
#include "sm_failover_ss.h"
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
@ -88,7 +89,7 @@ extern void sm_failover_if_state_update(const char node_name[],
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// Failover - get local interface state
|
// Failover - get local interface state
|
||||||
extern SmErrorT sm_failover_if_state_get(SmHeartbeatMsgIfStateT*);
|
extern SmHeartbeatMsgIfStateT sm_failover_if_state_get();
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
@ -115,6 +116,27 @@ SmErrorT sm_failover_degrade_clear(SmFailoverDegradeSourceT source);
|
||||||
SmErrorT sm_failover_get_node(char* node_name, SmDbNodeT& node);
|
SmErrorT sm_failover_get_node(char* node_name, SmDbNodeT& node);
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
// ****************************************************************************
|
||||||
|
// Failover - get node interface info
|
||||||
|
// ==================
|
||||||
|
SmFailoverInterfaceStateT sm_failover_get_interface_info(SmInterfaceTypeT interface_type);
|
||||||
|
// ****************************************************************************
|
||||||
|
|
||||||
|
|
||||||
|
// ****************************************************************************
|
||||||
|
// Failover - get peer node interface state
|
||||||
|
// ==================
|
||||||
|
SmHeartbeatMsgIfStateT sm_failover_get_peer_if_state();
|
||||||
|
// ****************************************************************************
|
||||||
|
|
||||||
|
|
||||||
|
// ****************************************************************************
|
||||||
|
// Failover - set system to scheduled status
|
||||||
|
// ==================
|
||||||
|
SmErrorT sm_failover_set_system(const SmSystemFailoverStatus& failover_status);
|
||||||
|
// ****************************************************************************
|
||||||
|
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// Failover - dump state
|
// Failover - dump state
|
||||||
// ==================
|
// ==================
|
||||||
|
|
|
@ -0,0 +1,141 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#include "sm_failover_fail_pending_state.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "sm_types.h"
|
||||||
|
#include "sm_limits.h"
|
||||||
|
#include "sm_debug.h"
|
||||||
|
#include "sm_failover.h"
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
|
#include "sm_failover_ss.h"
|
||||||
|
#include "sm_failover_utils.h"
|
||||||
|
#include "sm_node_utils.h"
|
||||||
|
#include "sm_node_api.h"
|
||||||
|
|
||||||
|
static const int FAIL_PENDING_TIMEOUT = 2000; //2000ms
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFailPendingState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||||
|
{
|
||||||
|
//SmFSMEventDataTypeT event_data_type = event_data->get_event_data_type();
|
||||||
|
switch (event)
|
||||||
|
{
|
||||||
|
case SM_HEARTBEAT_LOST:
|
||||||
|
case SM_INTERFACE_DOWN:
|
||||||
|
//getting worse, extend wait time
|
||||||
|
case SM_INTERFACE_UP:
|
||||||
|
case SM_HEARTBEAT_RECOVER:
|
||||||
|
// Things are improving, extend wait time
|
||||||
|
this->_register_timer();
|
||||||
|
break;
|
||||||
|
case SM_FAIL_PENDING_TIMEOUT:
|
||||||
|
{
|
||||||
|
SmSystemFailoverStatus failover_status;
|
||||||
|
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
|
||||||
|
SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state();
|
||||||
|
SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state();
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
DPRINTFE("Failed to get failover survivor. Error %s", sm_error_str(error));
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = sm_failover_set_system(failover_status);
|
||||||
|
if(SM_NODE_STATE_FAILED == host_state)
|
||||||
|
{
|
||||||
|
this->fsm.set_state(SM_FAILOVER_STATE_FAILED);
|
||||||
|
}
|
||||||
|
else if(SM_NODE_STATE_ACTIVE == host_state)
|
||||||
|
{
|
||||||
|
if(SM_NODE_STATE_FAILED == peer_state)
|
||||||
|
{
|
||||||
|
this->fsm.set_state(SM_FAILOVER_STATE_SURVIVED);
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if(SM_NODE_STATE_STANDBY == host_state)
|
||||||
|
{
|
||||||
|
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
DPRINTFE("Runtime error: unexpected scheduling state: %s",
|
||||||
|
sm_node_schedule_state_str(host_state));
|
||||||
|
}
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
|
||||||
|
}
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SmFailoverFailPendingState::_fail_pending_timeout(
|
||||||
|
SmTimerIdT timer_id, int64_t user_data)
|
||||||
|
{
|
||||||
|
SmFailoverFSM::get_fsm().send_event(SM_FAIL_PENDING_TIMEOUT, NULL);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFailPendingState::enter_state()
|
||||||
|
{
|
||||||
|
DPRINTFI("Entering Pending state");
|
||||||
|
SmErrorT error = this->_register_timer();
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
DPRINTFE("Failed to register fail pending timer. Error %s", sm_error_str(error));
|
||||||
|
}
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFailPendingState::_register_timer()
|
||||||
|
{
|
||||||
|
SmErrorT error;
|
||||||
|
const char* timer_name = "FAIL PENDING TIMER";
|
||||||
|
if(SM_TIMER_ID_INVALID != this->_pending_timer_id)
|
||||||
|
{
|
||||||
|
this->_deregister_timer();
|
||||||
|
}
|
||||||
|
|
||||||
|
error = sm_timer_register( timer_name, FAIL_PENDING_TIMEOUT,
|
||||||
|
SmFailoverFailPendingState::_fail_pending_timeout,
|
||||||
|
0, &this->_pending_timer_id);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFailPendingState::_deregister_timer()
|
||||||
|
{
|
||||||
|
SmErrorT error;
|
||||||
|
if(SM_TIMER_ID_INVALID == this->_pending_timer_id)
|
||||||
|
{
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = sm_timer_deregister(this->_pending_timer_id);
|
||||||
|
if( SM_OKAY != error )
|
||||||
|
{
|
||||||
|
DPRINTFE( "Failed to cancel fail pending timer, error=%s.",
|
||||||
|
sm_error_str( error ) );
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
this->_pending_timer_id = SM_TIMER_ID_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFailPendingState::exit_state()
|
||||||
|
{
|
||||||
|
SmErrorT error = this->_deregister_timer();
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
DPRINTFE("Failed to deregister fail pending timer. Error %s", sm_error_str(error));
|
||||||
|
}
|
||||||
|
DPRINTFI("Exiting Pending state");
|
||||||
|
return error;
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#ifndef __SM_FAILOVER_FAIL_PENDING_STATE_H__
|
||||||
|
#define __SM_FAILOVER_FAIL_PENDING_STATE_H__
|
||||||
|
#include "sm_types.h"
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
|
#include "sm_timer.h"
|
||||||
|
|
||||||
|
class SmFailoverFailPendingState : public SmFSMState
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SmFailoverFailPendingState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
|
||||||
|
SmErrorT enter_state();
|
||||||
|
SmErrorT exit_state();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
|
||||||
|
|
||||||
|
private:
|
||||||
|
SmTimerIdT _pending_timer_id;
|
||||||
|
|
||||||
|
static bool _fail_pending_timeout(SmTimerIdT timer_id, int64_t user_data);
|
||||||
|
SmErrorT _register_timer();
|
||||||
|
SmErrorT _deregister_timer();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif //__SM_FAILOVER_FAIL_PENDING_STATE_H__
|
|
@ -0,0 +1,66 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#include "sm_failover_failed_state.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "sm_types.h"
|
||||||
|
#include "sm_debug.h"
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
|
#include "sm_failover_ss.h"
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFailedState::enter_state()
|
||||||
|
{
|
||||||
|
DPRINTFI("Entering Failed state");
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||||
|
{
|
||||||
|
switch (event)
|
||||||
|
{
|
||||||
|
case SM_HEARTBEAT_LOST:
|
||||||
|
case SM_INTERFACE_DOWN:
|
||||||
|
case SM_INTERFACE_UP:
|
||||||
|
// ignore, recover is possible only when heartbeat is recovered
|
||||||
|
break;
|
||||||
|
case SM_HEARTBEAT_RECOVER:
|
||||||
|
{
|
||||||
|
SmSystemFailoverStatus failover_status;
|
||||||
|
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
|
||||||
|
SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state();
|
||||||
|
SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state();
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
DPRINTFE("Failed to get failover survivor. Error %s", sm_error_str(error));
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(SM_NODE_STATE_FAILED == host_state)
|
||||||
|
{
|
||||||
|
// don't need to set to failed state, already here
|
||||||
|
}
|
||||||
|
else if(SM_NODE_STATE_STANDBY == host_state && SM_NODE_STATE_ACTIVE == peer_state)
|
||||||
|
{
|
||||||
|
// standby is the only possible state to be scheduled to from failed state
|
||||||
|
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
DPRINTFE("Runtime error: unexpected scheduling state: %s",
|
||||||
|
sm_node_schedule_state_str(host_state));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
|
||||||
|
}
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFailedState::exit_state()
|
||||||
|
{
|
||||||
|
DPRINTFI("Exiting Failed state");
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#ifndef __SM_FAILOVER_FAILED_STATE_H__
|
||||||
|
#define __SM_FAILOVER_FAILED_STATE_H__
|
||||||
|
#include "sm_types.h"
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
|
#include "sm_timer.h"
|
||||||
|
|
||||||
|
class SmFailoverFailedState : public SmFSMState
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SmFailoverFailedState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
|
||||||
|
SmErrorT enter_state();
|
||||||
|
SmErrorT exit_state();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif //__SM_FAILOVER_FAILED_STATE_H__
|
|
@ -0,0 +1,164 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "sm_debug.h"
|
||||||
|
#include "sm_failover_normal_state.h"
|
||||||
|
#include "sm_failover_fail_pending_state.h"
|
||||||
|
#include "sm_failover_failed_state.h"
|
||||||
|
#include "sm_failover_survived_state.h"
|
||||||
|
|
||||||
|
SmErrorT SmFSMState::enter_state()
|
||||||
|
{
|
||||||
|
// default implementation, nothing to do
|
||||||
|
return SM_OKAY;
|
||||||
|
};
|
||||||
|
|
||||||
|
SmErrorT SmFSMState::exit_state()
|
||||||
|
{
|
||||||
|
// default implementation, nothing to do
|
||||||
|
return SM_OKAY;
|
||||||
|
};
|
||||||
|
|
||||||
|
SmFailoverFSM SmFailoverFSM::_the_fsm;
|
||||||
|
|
||||||
|
void SmFailoverFSM::deregister_states()
|
||||||
|
{
|
||||||
|
for(int i = 0; i < MaxState; i ++)
|
||||||
|
{
|
||||||
|
SmFSMState* state = _state_handlers[i];
|
||||||
|
if(NULL != state)
|
||||||
|
{
|
||||||
|
_state_handlers[i] = NULL;
|
||||||
|
delete state;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SmFailoverFSM::SmFailoverFSM()
|
||||||
|
{
|
||||||
|
for(int i = 0; i < MaxState; i ++)
|
||||||
|
{
|
||||||
|
_state_handlers[i] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SmFailoverFSM::~SmFailoverFSM()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFSM::send_event(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||||
|
{
|
||||||
|
SmFSMState* state_handler = this->_state_handlers[this->get_state()];
|
||||||
|
if(NULL == state_handler)
|
||||||
|
{
|
||||||
|
DPRINTFE("Runtime error. No handler for state %d", this->get_state());
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
DPRINTFI("send_event %d\n", event);
|
||||||
|
state_handler->event_handler(event, event_data);
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFSM::set_state(SmFailoverStateT state)
|
||||||
|
{
|
||||||
|
if(state >= MaxState)
|
||||||
|
{
|
||||||
|
DPRINTFE("Runtime error. Don't understand state %d", state);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
if(state == this->get_state())
|
||||||
|
{
|
||||||
|
DPRINTFI("Already in %d state", state);
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmFSMState* state_handler = this->_state_handlers[this->get_state()];
|
||||||
|
if(NULL == state_handler)
|
||||||
|
{
|
||||||
|
DPRINTFE("State %d was not registered", this->get_state());
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT error = state_handler->exit_state();
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
DPRINTFE("Exit state failed (%d), %d", this->get_state(), error);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmFSMState* new_state_handler = this->_state_handlers[state];
|
||||||
|
if(NULL == new_state_handler)
|
||||||
|
{
|
||||||
|
DPRINTFE("State %d was not registered", state);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
error = new_state_handler->enter_state();
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
DPRINTFE("Enter state failed (%d), %d", this->get_state(), error);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->_current_state = state;
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFSM::register_fsm_state(SmFailoverStateT state, SmFSMState* state_handler)
|
||||||
|
{
|
||||||
|
if(state >= MaxState)
|
||||||
|
{
|
||||||
|
DPRINTFE("Runtime error. Invalid state %d", state);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(NULL != this->_state_handlers[state])
|
||||||
|
{
|
||||||
|
DPRINTFE("Runtime error. Duplicated state handle for state %d", state);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->_state_handlers[state] = state_handler;
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFSM::init_state()
|
||||||
|
{
|
||||||
|
SmErrorT error;
|
||||||
|
SmFailoverStateT state = SM_FAILOVER_STATE_INITIAL;
|
||||||
|
SmFSMState* new_state_handler = _state_handlers[state];
|
||||||
|
if(NULL == new_state_handler)
|
||||||
|
{
|
||||||
|
DPRINTFE("State %d was not registered", state);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
error = new_state_handler->enter_state();
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
DPRINTFE("Enter state failed (%d), %d", this->get_state(), error);
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
|
_current_state = state;
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFSM::initialize()
|
||||||
|
{
|
||||||
|
SmFailoverFSM& fsm = SmFailoverFSM::get_fsm();
|
||||||
|
fsm.register_fsm_state(SM_FAILOVER_STATE_NORMAL, new SmFailoverNormalState(fsm));
|
||||||
|
fsm.register_fsm_state(SM_FAILOVER_STATE_FAIL_PENDING, new SmFailoverFailPendingState(fsm));
|
||||||
|
fsm.register_fsm_state(SM_FAILOVER_STATE_FAILED, new SmFailoverFailedState(fsm));
|
||||||
|
fsm.register_fsm_state(SM_FAILOVER_STATE_SURVIVED, new SmFailoverSurvivedState(fsm));
|
||||||
|
|
||||||
|
return fsm.init_state();
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverFSM::finalize()
|
||||||
|
{
|
||||||
|
SmFailoverFSM::get_fsm().deregister_states();
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#ifndef __SM_FAILOVER_FSM_H__
|
||||||
|
#define __SM_FAILOVER_FSM_H__
|
||||||
|
#include "sm_types.h"
|
||||||
|
|
||||||
|
typedef enum{
|
||||||
|
SM_FAILOVER_STATE_NORMAL,
|
||||||
|
SM_FAILOVER_STATE_FAIL_PENDING,
|
||||||
|
SM_FAILOVER_STATE_FAILED,
|
||||||
|
SM_FAILOVER_STATE_SURVIVED,
|
||||||
|
SM_FAILOVER_STATE_MAX
|
||||||
|
}SmFailoverStateT;
|
||||||
|
|
||||||
|
#define SM_FAILOVER_STATE_INITIAL SM_FAILOVER_STATE_NORMAL
|
||||||
|
|
||||||
|
typedef enum{
|
||||||
|
SM_HEARTBEAT_LOST,
|
||||||
|
SM_INTERFACE_DOWN,
|
||||||
|
SM_INTERFACE_UP,
|
||||||
|
SM_HEARTBEAT_RECOVER,
|
||||||
|
SM_FAIL_PENDING_TIMEOUT,
|
||||||
|
}SmFailoverEventT;
|
||||||
|
|
||||||
|
typedef int SmFSMEventDataTypeT;
|
||||||
|
|
||||||
|
class ISmFSMEventData
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual SmFSMEventDataTypeT get_event_data_type() const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SmFailoverFSM;
|
||||||
|
|
||||||
|
class SmFSMState
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SmFSMState(SmFailoverFSM& fsm) : fsm(fsm) {};
|
||||||
|
virtual ~SmFSMState(){};
|
||||||
|
virtual SmErrorT enter_state();
|
||||||
|
virtual SmErrorT exit_state();
|
||||||
|
protected:
|
||||||
|
SmFailoverFSM& fsm;
|
||||||
|
virtual SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)=0;
|
||||||
|
friend SmFailoverFSM;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SmFailoverFSM
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SmFailoverFSM();
|
||||||
|
virtual ~SmFailoverFSM();
|
||||||
|
|
||||||
|
SmErrorT send_event(SmFailoverEventT event, const ISmFSMEventData* event_data);
|
||||||
|
|
||||||
|
SmErrorT register_fsm_state(SmFailoverStateT state, SmFSMState* state_handler);
|
||||||
|
|
||||||
|
SmErrorT set_state(SmFailoverStateT state);
|
||||||
|
inline int get_state() const {return this->_current_state;}
|
||||||
|
|
||||||
|
static SmErrorT initialize();
|
||||||
|
static SmErrorT finalize();
|
||||||
|
inline static SmFailoverFSM& get_fsm() {
|
||||||
|
return _the_fsm;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static const int MaxState = SM_FAILOVER_STATE_MAX;
|
||||||
|
SmFSMState* _state_handlers[MaxState];
|
||||||
|
int _current_state;
|
||||||
|
|
||||||
|
static SmFailoverFSM _the_fsm;
|
||||||
|
SmErrorT init_state();
|
||||||
|
void deregister_states();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif //__SM_FAILOVER_FSM_H__
|
|
@ -0,0 +1,42 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#include "sm_failover_normal_state.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "sm_types.h"
|
||||||
|
#include "sm_debug.h"
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
|
|
||||||
|
SmErrorT SmFailoverNormalState::enter_state()
|
||||||
|
{
|
||||||
|
DPRINTFI("Entering normal state");
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverNormalState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||||
|
{
|
||||||
|
switch (event)
|
||||||
|
{
|
||||||
|
case SM_HEARTBEAT_LOST:
|
||||||
|
case SM_INTERFACE_DOWN:
|
||||||
|
this->fsm.set_state(SM_FAILOVER_STATE_FAIL_PENDING);
|
||||||
|
break;
|
||||||
|
case SM_INTERFACE_UP:
|
||||||
|
DPRINTFI("Interface is up");
|
||||||
|
break;
|
||||||
|
case SM_HEARTBEAT_RECOVER:
|
||||||
|
DPRINTFI("Heartbeat is recovered");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
|
||||||
|
}
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverNormalState::exit_state()
|
||||||
|
{
|
||||||
|
DPRINTFI("Exiting normal state");
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
|
@ -0,0 +1,22 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#ifndef __SM_FAILOVER_NORMAL_STATE_H__
|
||||||
|
#define __SM_FAILOVER_NORMAL_STATE_H__
|
||||||
|
#include "sm_types.h"
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
|
|
||||||
|
class SmFailoverNormalState : public SmFSMState
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SmFailoverNormalState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
|
||||||
|
SmErrorT enter_state();
|
||||||
|
SmErrorT exit_state();
|
||||||
|
protected:
|
||||||
|
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif //__SM_FAILOVER_NORMAL_STATE_H__
|
|
@ -5,8 +5,77 @@
|
||||||
//
|
//
|
||||||
|
|
||||||
#include "sm_failover_ss.h"
|
#include "sm_failover_ss.h"
|
||||||
|
#include <time.h>
|
||||||
#include "sm_debug.h"
|
#include "sm_debug.h"
|
||||||
|
#include "sm_limits.h"
|
||||||
|
#include "sm_node_utils.h"
|
||||||
|
#include "sm_node_api.h"
|
||||||
|
#include "sm_failover_utils.h"
|
||||||
|
#include "sm_node_utils.h"
|
||||||
|
#include "sm_node_api.h"
|
||||||
|
#include "sm_failover.h"
|
||||||
|
|
||||||
|
SmSystemFailoverStatus::SmSystemFailoverStatus()
|
||||||
|
{
|
||||||
|
SmErrorT error;
|
||||||
|
char host_name[SM_NODE_NAME_MAX_CHAR];
|
||||||
|
char peer_name[SM_NODE_NAME_MAX_CHAR];
|
||||||
|
error = sm_node_utils_get_hostname(host_name);
|
||||||
|
if( SM_OKAY != error )
|
||||||
|
{
|
||||||
|
DPRINTFE( "Failed to get hostname, error=%s.",
|
||||||
|
sm_error_str( error ) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
error = sm_node_api_get_peername(peer_name);
|
||||||
|
if( SM_OKAY != error )
|
||||||
|
{
|
||||||
|
DPRINTFE( "Failed to get peername, error=%s.",
|
||||||
|
sm_error_str( error ) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
_host_schedule_state = sm_get_controller_state(host_name);
|
||||||
|
_peer_schedule_state = sm_get_controller_state(peer_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
SmSystemFailoverStatus::~SmSystemFailoverStatus()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SmSystemFailoverStatus::_is_valid_schedule_state(SmNodeScheduleStateT state)
|
||||||
|
{
|
||||||
|
return (SM_NODE_STATE_ACTIVE == state ||
|
||||||
|
SM_NODE_STATE_STANDBY == state ||
|
||||||
|
SM_NODE_STATE_FAILED == state);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SmSystemFailoverStatus::set_host_schedule_state(SmNodeScheduleStateT state)
|
||||||
|
{
|
||||||
|
if(_is_valid_schedule_state(state))
|
||||||
|
{
|
||||||
|
if(_host_schedule_state != state)
|
||||||
|
{
|
||||||
|
_host_schedule_state=state;
|
||||||
|
}
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
DPRINTFE("Runtime error, schedule state unknown %d", state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SmSystemFailoverStatus::set_peer_schedule_state(SmNodeScheduleStateT state)
|
||||||
|
{
|
||||||
|
if(_is_valid_schedule_state(state))
|
||||||
|
{
|
||||||
|
if(_peer_schedule_state != state)
|
||||||
|
{
|
||||||
|
_peer_schedule_state = state;
|
||||||
|
}
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
DPRINTFE("Runtime error, schedule state unknown %d", state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
|
@ -38,15 +107,76 @@ static int get_node_if_healthy_score(unsigned int interface_state)
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
SmErrorT _get_system_status(SmSystemStatusT& sys_status, char host_name[], char peer_name[])
|
||||||
|
{
|
||||||
|
SmErrorT error;
|
||||||
|
SmNodeScheduleStateT host_state;
|
||||||
|
|
||||||
|
error = sm_node_utils_get_hostname(host_name);
|
||||||
|
if( SM_OKAY != error )
|
||||||
|
{
|
||||||
|
DPRINTFE( "Failed to get hostname, error=%s.",
|
||||||
|
sm_error_str( error ) );
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
error = sm_node_api_get_peername(peer_name);
|
||||||
|
if( SM_OKAY != error )
|
||||||
|
{
|
||||||
|
DPRINTFE( "Failed to get peername, error=%s.",
|
||||||
|
sm_error_str( error ) );
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
host_state = sm_get_controller_state(host_name);
|
||||||
|
|
||||||
|
sys_status.system_mode = sm_node_utils_get_system_mode();
|
||||||
|
sys_status.host_status.mgmt_state = sm_failover_get_interface_info(SM_INTERFACE_MGMT);
|
||||||
|
sys_status.host_status.infra_state = sm_failover_get_interface_info(SM_INTERFACE_INFRA);
|
||||||
|
sys_status.host_status.oam_state = sm_failover_get_interface_info(SM_INTERFACE_OAM);
|
||||||
|
|
||||||
|
if(SM_FAILOVER_INTERFACE_OK == sys_status.host_status.mgmt_state ||
|
||||||
|
SM_FAILOVER_INTERFACE_OK == sys_status.host_status.oam_state ||
|
||||||
|
SM_FAILOVER_INTERFACE_OK == sys_status.host_status.infra_state)
|
||||||
|
{
|
||||||
|
sys_status.heartbeat_state = SM_HEARTBEAT_OK;
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
sys_status.heartbeat_state = SM_HEARTBEAT_LOSS;
|
||||||
|
}
|
||||||
|
|
||||||
|
sys_status.host_status.node_name = host_name;
|
||||||
|
sys_status.host_status.interface_state = sm_failover_if_state_get();
|
||||||
|
sys_status.host_status.current_schedule_state = host_state;
|
||||||
|
sys_status.peer_status.node_name = peer_name;
|
||||||
|
|
||||||
|
sys_status.peer_status.interface_state = sm_failover_get_peer_if_state();
|
||||||
|
sys_status.peer_status.current_schedule_state = sm_get_controller_state(peer_name);
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// sm_failover_ss_get_survivor - select the failover survivor
|
// sm_failover_ss_get_survivor - select the failover survivor
|
||||||
// This is the main entry/container for the failover logic to determine how
|
// This is the main entry/container for the failover logic to determine how
|
||||||
// to schedule the controllers, i.e, active/standby or active/failure.
|
// to schedule the controllers, i.e, active/standby or active/failure.
|
||||||
// ===================
|
// ===================
|
||||||
|
SmErrorT sm_failover_ss_get_survivor(SmSystemFailoverStatus& selection)
|
||||||
|
{
|
||||||
|
SmSystemStatusT sys_status;
|
||||||
|
char host_name[SM_NODE_NAME_MAX_CHAR];
|
||||||
|
char peer_name[SM_NODE_NAME_MAX_CHAR];
|
||||||
|
|
||||||
|
SmErrorT error = _get_system_status(sys_status, host_name, peer_name);
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
DPRINTFE("Failed to retrieve system status. Error %s", sm_error_str(error));
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
return sm_failover_ss_get_survivor(sys_status, selection);
|
||||||
|
}
|
||||||
|
|
||||||
SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection)
|
SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection)
|
||||||
{
|
{
|
||||||
selection.host_schedule_state = system_status.host_status.current_schedule_state;
|
selection.set_host_schedule_state(system_status.host_status.current_schedule_state);
|
||||||
selection.peer_schedule_state = system_status.peer_status.current_schedule_state;
|
selection.set_peer_schedule_state(system_status.peer_status.current_schedule_state);
|
||||||
if(SM_HEARTBEAT_OK == system_status.heartbeat_state)
|
if(SM_HEARTBEAT_OK == system_status.heartbeat_state)
|
||||||
{
|
{
|
||||||
int host_healthy_score, peer_healthy_score;
|
int host_healthy_score, peer_healthy_score;
|
||||||
|
@ -55,26 +185,26 @@ SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSys
|
||||||
if( peer_healthy_score < host_healthy_score )
|
if( peer_healthy_score < host_healthy_score )
|
||||||
{
|
{
|
||||||
//host is more healthy
|
//host is more healthy
|
||||||
selection.host_schedule_state = SM_NODE_STATE_ACTIVE;
|
selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||||
selection.peer_schedule_state = SM_NODE_STATE_STANDBY;
|
selection.set_peer_schedule_state(SM_NODE_STATE_STANDBY);
|
||||||
}else if(peer_healthy_score > host_healthy_score)
|
}else if(peer_healthy_score > host_healthy_score)
|
||||||
{
|
{
|
||||||
//peer is more healthy
|
//peer is more healthy
|
||||||
selection.host_schedule_state = SM_NODE_STATE_STANDBY;
|
selection.set_host_schedule_state(SM_NODE_STATE_STANDBY);
|
||||||
selection.peer_schedule_state = SM_NODE_STATE_ACTIVE;
|
selection.set_peer_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(system_status.host_status.current_schedule_state != selection.host_schedule_state ||
|
if(system_status.host_status.current_schedule_state != selection.get_host_schedule_state() ||
|
||||||
system_status.peer_status.current_schedule_state != selection.peer_schedule_state )
|
system_status.peer_status.current_schedule_state != selection.get_peer_schedule_state() )
|
||||||
{
|
{
|
||||||
DPRINTFI("Uncontrolled swact starts. Host from %s to %s, Peer from %s to %s.",
|
DPRINTFI("Uncontrolled swact starts. Host from %s to %s, Peer from %s to %s.",
|
||||||
sm_node_schedule_state_str(system_status.host_status.current_schedule_state),
|
sm_node_schedule_state_str(system_status.host_status.current_schedule_state),
|
||||||
sm_node_schedule_state_str(selection.host_schedule_state),
|
sm_node_schedule_state_str(selection.get_host_schedule_state()),
|
||||||
sm_node_schedule_state_str(system_status.peer_status.current_schedule_state),
|
sm_node_schedule_state_str(system_status.peer_status.current_schedule_state),
|
||||||
sm_node_schedule_state_str(selection.peer_schedule_state)
|
sm_node_schedule_state_str(selection.get_peer_schedule_state())
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return SM_OKAY;
|
return SM_OKAY;
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
|
@ -8,14 +8,18 @@
|
||||||
#define __SM_FAILOVER_SS_H__
|
#define __SM_FAILOVER_SS_H__
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "sm_types.h"
|
#include "sm_types.h"
|
||||||
#ifdef __cplusplus
|
//#ifdef __cplusplus
|
||||||
extern "C" {
|
//extern "C" {
|
||||||
#endif
|
//#endif
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
const char* node_name;
|
const char* node_name;
|
||||||
unsigned int interface_state;
|
unsigned int interface_state;
|
||||||
|
unsigned int heartbeat_state;
|
||||||
|
SmFailoverInterfaceStateT mgmt_state;
|
||||||
|
SmFailoverInterfaceStateT infra_state;
|
||||||
|
SmFailoverInterfaceStateT oam_state;
|
||||||
SmNodeScheduleStateT current_schedule_state;
|
SmNodeScheduleStateT current_schedule_state;
|
||||||
}SmNodeStatusT;
|
}SmNodeStatusT;
|
||||||
|
|
||||||
|
@ -40,18 +44,36 @@ typedef struct
|
||||||
}SmSystemStatusT;
|
}SmSystemStatusT;
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
class SmSystemFailoverStatus
|
||||||
{
|
{
|
||||||
SmNodeScheduleStateT host_schedule_state;
|
public:
|
||||||
SmNodeScheduleStateT peer_schedule_state;
|
SmSystemFailoverStatus();
|
||||||
}SmSystemFailoverStatus;
|
virtual ~SmSystemFailoverStatus();
|
||||||
|
inline SmNodeScheduleStateT get_host_schedule_state() const {
|
||||||
|
return _host_schedule_state;
|
||||||
|
}
|
||||||
|
void set_host_schedule_state(SmNodeScheduleStateT state);
|
||||||
|
inline SmNodeScheduleStateT get_peer_schedule_state() const {
|
||||||
|
return _peer_schedule_state;
|
||||||
|
}
|
||||||
|
void set_peer_schedule_state(SmNodeScheduleStateT state);
|
||||||
|
private:
|
||||||
|
SmNodeScheduleStateT _host_schedule_state;
|
||||||
|
SmNodeScheduleStateT _peer_schedule_state;
|
||||||
|
static const char filename[];
|
||||||
|
static const char file_format[];
|
||||||
|
|
||||||
|
bool _is_valid_schedule_state(SmNodeScheduleStateT state);
|
||||||
|
};
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// sm_failover_ss_get_survivor - select the failover survivor
|
// sm_failover_ss_get_survivor - select the failover survivor
|
||||||
// ===================
|
// ===================
|
||||||
|
SmErrorT sm_failover_ss_get_survivor(SmSystemFailoverStatus& selection);
|
||||||
|
|
||||||
SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection);
|
SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
//#ifdef __cplusplus
|
||||||
}
|
//}
|
||||||
#endif
|
//#endif
|
||||||
#endif // __SM_FAILOVER_SS_H__
|
#endif // __SM_FAILOVER_SS_H__
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#include "sm_failover_survived_state.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "sm_types.h"
|
||||||
|
#include "sm_debug.h"
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
|
#include "sm_failover_ss.h"
|
||||||
|
|
||||||
|
SmErrorT SmFailoverSurvivedState::enter_state()
|
||||||
|
{
|
||||||
|
DPRINTFI("Entering Survived state");
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverSurvivedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||||
|
{
|
||||||
|
switch (event)
|
||||||
|
{
|
||||||
|
case SM_HEARTBEAT_LOST:
|
||||||
|
case SM_INTERFACE_DOWN:
|
||||||
|
case SM_INTERFACE_UP:
|
||||||
|
// ignore, recover is possible only when heartbeat is recovered
|
||||||
|
break;
|
||||||
|
case SM_HEARTBEAT_RECOVER:
|
||||||
|
{
|
||||||
|
SmSystemFailoverStatus failover_status;
|
||||||
|
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
|
||||||
|
SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state();
|
||||||
|
SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state();
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
DPRINTFE("Failed to get failover survivor. Error %s", sm_error_str(error));
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(SM_NODE_STATE_ACTIVE == host_state && SM_NODE_STATE_STANDBY == peer_state)
|
||||||
|
{
|
||||||
|
// Active is the only possible state to be scheduled to from survived state
|
||||||
|
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
DPRINTFE("Runtime error: unexpected scheduling state: host %s, peer %s",
|
||||||
|
sm_node_schedule_state_str(host_state),
|
||||||
|
sm_node_schedule_state_str(peer_state));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
|
||||||
|
}
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
|
||||||
|
SmErrorT SmFailoverSurvivedState::exit_state()
|
||||||
|
{
|
||||||
|
DPRINTFI("Exiting Survived state");
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
|
@ -0,0 +1,23 @@
|
||||||
|
//
|
||||||
|
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||||
|
//
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
#ifndef __SM_FAILOVER_SURVIVED_STATE_H__
|
||||||
|
#define __SM_FAILOVER_SURVIVED_STATE_H__
|
||||||
|
#include "sm_types.h"
|
||||||
|
#include "sm_failover_fsm.h"
|
||||||
|
|
||||||
|
class SmFailoverSurvivedState : public SmFSMState
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
SmFailoverSurvivedState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
|
||||||
|
SmErrorT enter_state();
|
||||||
|
SmErrorT exit_state();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#endif //__SM_FAILOVER_SURVIVED_STATE_H__
|
|
@ -208,11 +208,7 @@ SmErrorT sm_heartbeat_msg_send_alive( SmNetworkTypeT network_type, char node_nam
|
||||||
|
|
||||||
heartbeat_msg.u.if_state_msg.msg_size = htons((uint16_t)sizeof(SmHeartbeatMsgAliveRev2T));
|
heartbeat_msg.u.if_state_msg.msg_size = htons((uint16_t)sizeof(SmHeartbeatMsgAliveRev2T));
|
||||||
SmHeartbeatMsgIfStateT if_state;
|
SmHeartbeatMsgIfStateT if_state;
|
||||||
SmErrorT error = sm_failover_if_state_get(&if_state);
|
if_state = sm_failover_if_state_get();
|
||||||
if(SM_OKAY != error)
|
|
||||||
{
|
|
||||||
return SM_FAILED;
|
|
||||||
}
|
|
||||||
heartbeat_msg.u.if_state_msg.if_state = htonl(if_state);
|
heartbeat_msg.u.if_state_msg.if_state = htonl(if_state);
|
||||||
|
|
||||||
if( SM_AUTH_TYPE_HMAC_SHA512 == auth_type )
|
if( SM_AUTH_TYPE_HMAC_SHA512 == auth_type )
|
||||||
|
|
|
@ -58,6 +58,36 @@ SmErrorT sm_node_api_get_hostname( char node_name[] )
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
// ****************************************************************************
|
||||||
|
// Node API - Get Peer Name
|
||||||
|
// ========================
|
||||||
|
SmErrorT sm_node_api_get_peername(char peer_name[SM_NODE_NAME_MAX_CHAR])
|
||||||
|
{
|
||||||
|
char node_name[SM_NODE_NAME_MAX_CHAR];
|
||||||
|
SmErrorT error = sm_node_api_get_hostname(node_name);
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
char format[] = "name <> '%s'";
|
||||||
|
char query[SM_NODE_NAME_MAX_CHAR + sizeof(format)];
|
||||||
|
unsigned int cnt = snprintf(query, sizeof(query), format, node_name);
|
||||||
|
if(cnt < sizeof(query) - 1)
|
||||||
|
{
|
||||||
|
SmDbNodeT node;
|
||||||
|
error = sm_db_nodes_query(_sm_db_handle, query, &node);
|
||||||
|
if(SM_OKAY != error)
|
||||||
|
{
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
strncpy(peer_name, node.name, SM_NODE_NAME_MAX_CHAR);
|
||||||
|
return SM_OKAY;
|
||||||
|
}
|
||||||
|
return SM_FAILED;
|
||||||
|
}
|
||||||
|
// ****************************************************************************
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// Node API - Configuration Complete
|
// Node API - Configuration Complete
|
||||||
// =================================
|
// =================================
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
|
|
||||||
#include "sm_types.h"
|
#include "sm_types.h"
|
||||||
#include "sm_uuid.h"
|
#include "sm_uuid.h"
|
||||||
|
#include "sm_limits.h"
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
@ -21,6 +22,12 @@ extern "C" {
|
||||||
extern SmErrorT sm_node_api_get_hostname( char node_name[] );
|
extern SmErrorT sm_node_api_get_hostname( char node_name[] );
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
|
// ****************************************************************************
|
||||||
|
// Node API - Get Peer Name
|
||||||
|
// ========================
|
||||||
|
extern SmErrorT sm_node_api_get_peername(char peer_name[SM_NODE_NAME_MAX_CHAR]);
|
||||||
|
// ****************************************************************************
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// Node API - Configuration Complete
|
// Node API - Configuration Complete
|
||||||
// =================================
|
// =================================
|
||||||
|
|
|
@ -706,6 +706,16 @@ typedef enum
|
||||||
SM_NODE_STATE_MAX
|
SM_NODE_STATE_MAX
|
||||||
}SmNodeScheduleStateT;
|
}SmNodeScheduleStateT;
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
SM_FAILOVER_INTERFACE_UNKNOWN,
|
||||||
|
SM_FAILOVER_INTERFACE_OK,
|
||||||
|
SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT,
|
||||||
|
SM_FAILOVER_INTERFACE_DOWN,
|
||||||
|
SM_FAILOVER_INTERFACE_RECOVERING
|
||||||
|
}SmFailoverInterfaceStateT;
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
// Types - System Mode String
|
// Types - System Mode String
|
||||||
// =======================================
|
// =======================================
|
||||||
|
|
Loading…
Reference in New Issue