split-brain avoidance improvement
This change enables one way communication via BMC (if configured) through mtce. when 2 controllers lost all communications to each other. The algorithm is: when communications all lost, both active and standby controllers, verify its interfaces (mgmt, infra, and oam) if active controller is healthy, it will request a bmc reset thorugh mtce, against standby controller. if standby controller is healthy, it will active itself and wait a total 45 seconds before requesting a bmc reset through mtce, against the active controller. Changes also include: 1. adding new initial failover state. initial state is a state before the node is enabled 2. remove failover thread. using worker thread action to perform time consuming operations 3. remove entire failover action table Story: 2003577 Task: 24901 Change-Id: I7d294d40e84469df6b6a6f6dd490cf3c4557b711 Signed-off-by: Bin Qian <bin.qian@windriver.com>
This commit is contained in:
parent
d987f1bcdf
commit
133da10b08
|
@ -102,12 +102,12 @@ SRCS+=sm_service_heartbeat_thread.c
|
|||
SRCS+=sm_main_event_handler.c
|
||||
SRCS+=fm_api_wrapper.c
|
||||
SRCS+=sm_failover.c
|
||||
SRCS+=sm_failover_thread.c
|
||||
SRCS+=sm_swact_state.c
|
||||
SRCS+=sm_worker_thread.cpp
|
||||
SRCS+=sm_task_affining_thread.c
|
||||
SRCS+=sm_node_swact_monitor.cpp
|
||||
SRCS+=sm_failover_fsm.cpp
|
||||
SRCS+=sm_failover_initial_state.cpp
|
||||
SRCS+=sm_failover_normal_state.cpp
|
||||
SRCS+=sm_failover_fail_pending_state.cpp
|
||||
SRCS+=sm_failover_failed_state.cpp
|
||||
|
|
|
@ -37,24 +37,6 @@
|
|||
#include "sm_failover_fsm.h"
|
||||
#include "sm_api.h"
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SM_FAILOVER_ACTION_NO_ACTION = 0,
|
||||
SM_FAILOVER_ACTION_DISABLE_STANDBY = 1,
|
||||
SM_FAILOVER_ACTION_DISABLE = 2,
|
||||
SM_FAILOVER_ACTION_SWACT = 4,
|
||||
SM_FAILOVER_ACTION_DEGRADE = 8,
|
||||
SM_FAILOVER_ACTION_ACTIVATE = 16,
|
||||
SM_FAILOVER_ACTION_FAIL_NODE = 32,
|
||||
SM_FAILOVER_ACTION_UNDEFINED = 64,
|
||||
//as part of the gradual delivery of enhancement with more
|
||||
//complex algorithm to determine the failover survivor routine
|
||||
//the SM_FAILOVER_ACTION_ROUTINE will redirect the lookup to
|
||||
//new logic. Until all actions are migrated to new logic, the
|
||||
//lookup tables will be eliminated.
|
||||
SM_FAILOVER_ACTION_ROUTINE = 1 << 31,
|
||||
}SmFailoverActionT;
|
||||
|
||||
#define SM_FAILOVER_STATE_TRANSITION_TIME_IN_MS 2000
|
||||
#define SM_FAILOVER_MULTI_FAILURE_WAIT_TIMER_IN_MS 2000
|
||||
#define SM_FAILOVER_RECOVERY_INTERVAL_IN_SEC 100
|
||||
|
@ -132,16 +114,6 @@ class SmFailoverInterfaceInfo
|
|||
}
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SM_FAILOVER_INFRA_DOWN = 1,
|
||||
SM_FAILOVER_MGMT_DOWN = 2,
|
||||
SM_FAILOVER_OAM_DOWN = 4,
|
||||
SM_FAILOVER_HEARTBEAT_ALIVE = 8,
|
||||
SM_FAILOVER_HELLO_MSG_ALIVE = 16,
|
||||
SM_FAILOVER_PEER_DISABLED = 0x4000,
|
||||
}SmFailoverCommFaultBitFlagT;
|
||||
|
||||
SmTimerIdT failover_audit_timer_id;
|
||||
static char _host_name[SM_NODE_NAME_MAX_CHAR];
|
||||
static char _peer_name[SM_NODE_NAME_MAX_CHAR];
|
||||
|
@ -159,13 +131,12 @@ static SmFailoverInterfaceInfo* _oam_interface_info = NULL;
|
|||
static SmFailoverInterfaceInfo* _mgmt_interface_info = NULL;
|
||||
static SmFailoverInterfaceInfo* _infra_interface_info = NULL;
|
||||
static SmFailoverInterfaceInfo _peer_if_list[SM_INTERFACE_MAX];
|
||||
static pthread_mutex_t _mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_mutex_t _mutex;
|
||||
static SmDbHandleT* _sm_db_handle = NULL;
|
||||
|
||||
static SmNodeScheduleStateT _host_state;
|
||||
static SmNodeScheduleStateT _host_state_at_last_action; // host state when action was taken last time
|
||||
static int64_t _node_comm_state = -1;
|
||||
static bool _to_disable_peer = false; //set flag for failover thread to send http request to mtce to disable peer
|
||||
static int _prev_if_state_flag = -1;
|
||||
time_t _last_if_state_ms = 0;
|
||||
static SmNodeScheduleStateT _prev_host_state= SM_NODE_STATE_UNKNOWN;
|
||||
|
@ -175,194 +146,12 @@ static SmSystemModeT _system_mode;
|
|||
static time_t _last_report_ts = 0;
|
||||
static int _heartbeat_count = 0;
|
||||
|
||||
// The class is to accommodate batching i/f state changes
|
||||
// during the failover_audit interval.
|
||||
class FailoverEventHolder
|
||||
{
|
||||
public:
|
||||
FailoverEventHolder();
|
||||
void push_event(SmFailoverEventT, const ISmFSMEventData* event_data);
|
||||
void send_event();
|
||||
private:
|
||||
bool _event_to_send;
|
||||
SmFailoverEventT _failover_event;
|
||||
const ISmFSMEventData* _event_data;
|
||||
};
|
||||
|
||||
FailoverEventHolder::FailoverEventHolder()
|
||||
{
|
||||
_event_to_send = false;
|
||||
}
|
||||
|
||||
void FailoverEventHolder::push_event(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
SmFailoverEventT list[] = {
|
||||
SM_INTERFACE_DOWN,
|
||||
SM_HEARTBEAT_LOST,
|
||||
SM_INTERFACE_UP,
|
||||
SM_HEARTBEAT_RECOVER
|
||||
};
|
||||
|
||||
unsigned int i;
|
||||
if(!_event_to_send)
|
||||
{
|
||||
_event_to_send = true;
|
||||
for(i = 0; i < sizeof(list) / sizeof(SmFailoverEventT); i ++)
|
||||
{
|
||||
if(event == list[i])
|
||||
{
|
||||
_failover_event = event;
|
||||
_event_data = event_data;
|
||||
_event_to_send = true;
|
||||
DPRINTFI("Push event %d", event);
|
||||
return;
|
||||
}
|
||||
}
|
||||
DPRINTFE("Runtime error: unsupported event %d", event);
|
||||
return;
|
||||
}
|
||||
|
||||
for(i = 0; list[i] != _failover_event && i < sizeof(list) / sizeof(SmFailoverEventT); i ++)
|
||||
{
|
||||
if(list[i] == event)
|
||||
{
|
||||
_failover_event = event;
|
||||
_event_data = event_data;
|
||||
return;
|
||||
}
|
||||
}
|
||||
DPRINTFE("Runtime error: unsupported event %d", event);
|
||||
}
|
||||
|
||||
void FailoverEventHolder::send_event()
|
||||
{
|
||||
if(_event_to_send)
|
||||
{
|
||||
DPRINTFI("Send event %d", _failover_event);
|
||||
SmFailoverFSM::get_fsm().send_event(_failover_event, _event_data);
|
||||
_event_to_send = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static FailoverEventHolder _event_holder;
|
||||
|
||||
// std 2+2/2+2+x failover actions
|
||||
SmFailoverActionPairT action_map_std_infra[16] =
|
||||
{
|
||||
//Active action standby action if_state
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_ACTIVATE}, //0
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //1
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //2
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //3
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_ACTIVATE}, //4
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //5
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //6
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //7
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //8
|
||||
{SM_FAILOVER_ACTION_ROUTINE, SM_FAILOVER_ACTION_ROUTINE}, //9
|
||||
{SM_FAILOVER_ACTION_ROUTINE, SM_FAILOVER_ACTION_ROUTINE}, //10
|
||||
{SM_FAILOVER_ACTION_ROUTINE, SM_FAILOVER_ACTION_ROUTINE}, //11
|
||||
{SM_FAILOVER_ACTION_ROUTINE, SM_FAILOVER_ACTION_DEGRADE}, //12
|
||||
{SM_FAILOVER_ACTION_ROUTINE, SM_FAILOVER_ACTION_ROUTINE}, //13
|
||||
{SM_FAILOVER_ACTION_ROUTINE, SM_FAILOVER_ACTION_ROUTINE}, //14
|
||||
{SM_FAILOVER_ACTION_ROUTINE, SM_FAILOVER_ACTION_ROUTINE} //15
|
||||
};
|
||||
|
||||
SmFailoverActionPairT action_map_std_no_infra[16] =
|
||||
{
|
||||
//Active action standby action if_state
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_ACTIVATE}, //0
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //1
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //2
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //3
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_ACTIVATE}, //4
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //5
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //6
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //7
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //8
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //9
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //10
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11
|
||||
{SM_FAILOVER_ACTION_SWACT, SM_FAILOVER_ACTION_NO_ACTION}, //12
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //13
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //14
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED} //15
|
||||
};
|
||||
|
||||
// cpe duplex failover actions
|
||||
SmFailoverActionPairT *action_map_cpe_infra = action_map_std_infra;
|
||||
SmFailoverActionPairT *action_map_cpe_no_infra = action_map_std_no_infra;
|
||||
|
||||
// cpe duplex-direct failover actions
|
||||
SmFailoverActionPairT action_map_cpe_dc_infra[16] =
|
||||
{
|
||||
//Active action standby action if_state
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //0
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //1
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //2
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //3
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //4
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //5
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //6
|
||||
{SM_FAILOVER_ACTION_FAIL_NODE, SM_FAILOVER_ACTION_FAIL_NODE}, //7
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //8
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_NO_ACTION}, //9
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_NO_ACTION}, //10
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_NO_ACTION}, //11
|
||||
{SM_FAILOVER_ACTION_SWACT, SM_FAILOVER_ACTION_DEGRADE}, //12
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_NO_ACTION}, //13
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_NO_ACTION}, //14
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED} //15
|
||||
};
|
||||
|
||||
SmFailoverActionPairT action_map_cpe_dc_no_infra[16] =
|
||||
{
|
||||
//Active action standby action if_state
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //0
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //1
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //2
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //3
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_ACTIVATE}, //4
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //5
|
||||
{SM_FAILOVER_ACTION_FAIL_NODE, SM_FAILOVER_ACTION_ACTIVATE}, //6
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //7
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //8
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //9
|
||||
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_NO_ACTION}, //10
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11
|
||||
{SM_FAILOVER_ACTION_SWACT, SM_FAILOVER_ACTION_DEGRADE}, //12
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //13
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //14
|
||||
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED} //15
|
||||
};
|
||||
|
||||
SmFailoverActionPairT action_map_simplex[16] =
|
||||
{
|
||||
//Active action standby action
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION},
|
||||
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}
|
||||
};
|
||||
SmFailoverActionPairT *action_maps_no_infra[SM_SYSTEM_MODE_MAX] = {0};
|
||||
SmFailoverActionPairT *action_maps_infra[SM_SYSTEM_MODE_MAX] = {0};
|
||||
static bool _if_state_changed = false;
|
||||
|
||||
SmErrorT sm_exec_json_command(const char* cmd, char result_buf[], int result_len);
|
||||
SmErrorT sm_failover_get_node_oper_state(char* node_name, SmNodeOperationalStateT *state);
|
||||
void _log_nodes_state(int action);
|
||||
void _log_nodes_state();
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover - interface check
|
||||
|
@ -501,7 +290,7 @@ void sm_failover_lost_heartbeat( SmFailoverInterfaceT* interface )
|
|||
DPRINTFI("Interface %s lose heartbeat.", interface->interface_name);
|
||||
}
|
||||
|
||||
_event_holder.push_event(SM_HEARTBEAT_LOST, NULL);
|
||||
_if_state_changed = true;
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
@ -534,7 +323,7 @@ void sm_failover_heartbeat_restore( SmFailoverInterfaceT* interface )
|
|||
DPRINTFI("Interface %s heartbeat is OK.", interface->interface_name);
|
||||
}
|
||||
|
||||
_event_holder.push_event(SM_HEARTBEAT_RECOVER, NULL);
|
||||
_if_state_changed = true;
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
@ -571,7 +360,7 @@ void sm_failover_interface_down( const char* const interface_name )
|
|||
DPRINTFI("%d domain interfaces are impacted as i/f %s is down.",
|
||||
impacted, interface_name);
|
||||
|
||||
_event_holder.push_event(SM_INTERFACE_DOWN, NULL);
|
||||
_if_state_changed = true;
|
||||
}
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
@ -609,7 +398,7 @@ void sm_failover_interface_up( const char* const interface_name )
|
|||
DPRINTFI("%d domain interfaces are impacted as i/f %s is up.",
|
||||
impacted, interface_name);
|
||||
|
||||
_event_holder.push_event(SM_INTERFACE_UP, NULL);
|
||||
_if_state_changed = true;
|
||||
}
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
@ -685,7 +474,7 @@ bool is_infra_configured()
|
|||
// ****************************************************************************
|
||||
// Failover - get interface state
|
||||
// ==================
|
||||
int _failover_get_if_state()
|
||||
int sm_failover_get_if_state()
|
||||
{
|
||||
SmFailoverInterfaceStateT mgmt_state = _mgmt_interface_info->get_state();
|
||||
SmFailoverInterfaceStateT oam_state = _oam_interface_info->get_state();
|
||||
|
@ -732,7 +521,7 @@ int _failover_get_if_state()
|
|||
SmHeartbeatMsgIfStateT sm_failover_if_state_get()
|
||||
{
|
||||
mutex_holder holder(&_mutex);
|
||||
int if_state_flag = _failover_get_if_state();
|
||||
int if_state_flag = sm_failover_get_if_state();
|
||||
return (if_state_flag & 0b0111); //the lower 3 bits i/f state flag
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
@ -760,7 +549,7 @@ SmNodeScheduleStateT get_controller_state()
|
|||
// ****************************************************************************
|
||||
// Failover - is active controller
|
||||
// ==================
|
||||
bool is_active_controller()
|
||||
bool sm_is_active_controller()
|
||||
{
|
||||
return SM_NODE_STATE_ACTIVE == _host_state;
|
||||
}
|
||||
|
@ -846,8 +635,10 @@ SmFailoverActionResultT sm_failover_disable_node(char* node_name)
|
|||
DPRINTFI("To disable %s", node_name);
|
||||
|
||||
SmErrorT error;
|
||||
char reason_text[SM_LOG_REASON_TEXT_MAX_CHAR];
|
||||
snprintf(reason_text, sizeof(reason_text), "%s has failed", node_name);
|
||||
error = sm_node_fsm_event_handler(
|
||||
node_name, SM_NODE_EVENT_DISABLED, NULL, "Host is isolated" );
|
||||
node_name, SM_NODE_EVENT_DISABLED, NULL, reason_text );
|
||||
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
|
@ -855,7 +646,6 @@ SmFailoverActionResultT sm_failover_disable_node(char* node_name)
|
|||
node_name, sm_error_str( error ) );
|
||||
return SM_FAILOVER_ACTION_RESULT_FAILED;
|
||||
}
|
||||
_to_disable_peer = true;
|
||||
return SM_FAILOVER_ACTION_RESULT_OK;
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
@ -876,13 +666,13 @@ static void active_self_callback(void* user_data[], SmServiceDomainT* domain)
|
|||
// ****************************************************************************
|
||||
// Failover - active self
|
||||
// =======================
|
||||
SmFailoverActionResultT sm_failover_activate_self()
|
||||
SmErrorT sm_failover_activate_self()
|
||||
{
|
||||
DPRINTFI("Uncontrolled swact start (active local only)");
|
||||
SmNodeSwactMonitor::SwactStart(SM_NODE_STATE_ACTIVE);
|
||||
sm_service_domain_table_foreach( NULL, active_self_callback);
|
||||
|
||||
return SM_FAILOVER_ACTION_RESULT_OK;
|
||||
return SM_OKAY;
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
@ -1022,8 +812,7 @@ SmErrorT sm_failover_set_system(const SmSystemFailoverStatus& failover_status)
|
|||
if(SM_NODE_STATE_STANDBY == _host_state &&
|
||||
SM_NODE_STATE_FAILED == peer_target_state)
|
||||
{
|
||||
result = sm_failover_activate_self();
|
||||
if(SM_FAILOVER_ACTION_RESULT_FAILED == result)
|
||||
if(SM_OKAY != sm_failover_activate_self())
|
||||
{
|
||||
DPRINTFE("Failed to activate %s.", _host_name);
|
||||
return SM_FAILED;
|
||||
|
@ -1097,7 +886,6 @@ void sm_failover_audit()
|
|||
_prev_host_state = _host_state;
|
||||
}
|
||||
|
||||
bool is_active = is_active_controller();
|
||||
bool in_transition = false;
|
||||
bool infra_configured = is_infra_configured();
|
||||
in_transition = in_transition &&
|
||||
|
@ -1116,20 +904,7 @@ void sm_failover_audit()
|
|||
return;
|
||||
}
|
||||
|
||||
SmFailoverActionPairT* actions;
|
||||
int action;
|
||||
|
||||
SmFailoverActionPairT *action_map = NULL;
|
||||
|
||||
SmFailoverActionPairT **action_map_mode_list = action_maps_no_infra;
|
||||
if(infra_configured)
|
||||
{
|
||||
action_map_mode_list = action_maps_infra;
|
||||
}
|
||||
|
||||
action_map = action_map_mode_list[(int) _system_mode];
|
||||
|
||||
int if_state_flag = _failover_get_if_state();
|
||||
int if_state_flag = sm_failover_get_if_state();
|
||||
if(if_state_flag & SM_FAILOVER_HEARTBEAT_ALIVE)
|
||||
{
|
||||
_heartbeat_count ++;
|
||||
|
@ -1176,7 +951,21 @@ void sm_failover_audit()
|
|||
return;
|
||||
}
|
||||
|
||||
_event_holder.send_event();
|
||||
if(_if_state_changed)
|
||||
{
|
||||
SmIFStateChangedEventData event_data;
|
||||
event_data.set_interface_state(SM_INTERFACE_OAM, _oam_interface_info->get_state());
|
||||
event_data.set_interface_state(SM_INTERFACE_MGMT, _mgmt_interface_info->get_state());
|
||||
if( NULL != _infra_interface_info)
|
||||
{
|
||||
event_data.set_interface_state(SM_INTERFACE_INFRA, _infra_interface_info->get_state());
|
||||
}else
|
||||
{
|
||||
event_data.set_interface_state(SM_INTERFACE_INFRA, SM_FAILOVER_INTERFACE_UNKNOWN);
|
||||
}
|
||||
SmFailoverFSM::get_fsm().send_event(SM_FAILOVER_EVENT_IF_STATE_CHANGED, &event_data);
|
||||
_if_state_changed = false;
|
||||
}
|
||||
|
||||
int64_t curr_node_state = if_state_flag;
|
||||
|
||||
|
@ -1200,74 +989,7 @@ void sm_failover_audit()
|
|||
_host_state_at_last_action = _host_state;
|
||||
_peer_if_state_at_last_action = _peer_if_state;
|
||||
|
||||
actions = &(action_map[if_state_flag & 0xf]);
|
||||
if(is_active)
|
||||
{
|
||||
action = actions->active_controller_action;
|
||||
if(if_state_flag & SM_FAILOVER_OAM_DOWN)
|
||||
{
|
||||
if(_peer_if_state & SM_FAILOVER_OAM_DOWN)
|
||||
{
|
||||
DPRINTFI("No swact, oam down on both controllers");
|
||||
action &= (~SM_FAILOVER_ACTION_SWACT);
|
||||
}
|
||||
else
|
||||
{
|
||||
DPRINTFI("Swact to %s, it's oam is UP", _peer_name);
|
||||
action &= SM_FAILOVER_ACTION_SWACT;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
action = actions->standby_controller_action;
|
||||
}
|
||||
|
||||
_log_nodes_state(action);
|
||||
|
||||
DPRINTFI("Action to take %d", action);
|
||||
if (action & SM_FAILOVER_ACTION_ROUTINE)
|
||||
{
|
||||
action = SM_FAILOVER_ACTION_NO_ACTION;
|
||||
}
|
||||
|
||||
if (action & SM_FAILOVER_ACTION_ACTIVATE)
|
||||
{
|
||||
DPRINTFI("ACTIVE");
|
||||
_retry |= ( SM_FAILOVER_ACTION_RESULT_OK != sm_failover_activate_self());
|
||||
}
|
||||
|
||||
if(action & SM_FAILOVER_ACTION_DEGRADE) {
|
||||
DPRINTFI("DEGRADE");
|
||||
_retry |= ( SM_OKAY != sm_failover_degrade(SM_FAILOVER_DEGRADE_SOURCE_IF_DOWN) );
|
||||
}
|
||||
else
|
||||
{
|
||||
_retry |= ( SM_OKAY != sm_failover_degrade_clear(SM_FAILOVER_DEGRADE_SOURCE_IF_DOWN) );
|
||||
}
|
||||
|
||||
if(action & SM_FAILOVER_ACTION_SWACT)
|
||||
{
|
||||
DPRINTFI("SWACT");
|
||||
_retry |= ( SM_FAILOVER_ACTION_RESULT_OK != sm_failover_swact() );
|
||||
}
|
||||
|
||||
if(action & SM_FAILOVER_ACTION_DISABLE)
|
||||
{
|
||||
DPRINTFI("DISABLE");
|
||||
_retry |= (SM_FAILOVER_ACTION_RESULT_OK != sm_failover_disable_node(_host_name));
|
||||
}
|
||||
|
||||
if(action & SM_FAILOVER_ACTION_FAIL_NODE)
|
||||
{
|
||||
DPRINTFI("FAIL SELF");
|
||||
_retry |= (SM_FAILOVER_ACTION_RESULT_OK != sm_failover_fail_self());
|
||||
}
|
||||
|
||||
if(action & SM_FAILOVER_ACTION_DISABLE_STANDBY) {
|
||||
DPRINTFI("DISABLE STANDBY");
|
||||
_retry |= (SM_FAILOVER_ACTION_RESULT_OK != sm_failover_disable_node(_peer_name));
|
||||
}
|
||||
_log_nodes_state();
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
@ -1340,7 +1062,7 @@ SmErrorT sm_exec_mtce_command(const char cmd[], char result_buf[], int result_le
|
|||
// ****************************************************************************
|
||||
// Failover - log current state
|
||||
// =======================
|
||||
void _log_nodes_state(int action)
|
||||
void _log_nodes_state()
|
||||
{
|
||||
SmDbNodeT host, peer;
|
||||
SmErrorT error = sm_failover_get_node(_host_name, host);
|
||||
|
@ -1358,35 +1080,22 @@ void _log_nodes_state(int action)
|
|||
sm_node_avail_status_str(peer.avail_status)
|
||||
);
|
||||
}
|
||||
DPRINTFI("Host state %d, I/F state %d, peer I/F state %d, action %d",
|
||||
DPRINTFI("Host state %d, I/F state %d, peer I/F state %d",
|
||||
_node_comm_state,
|
||||
_peer_if_state,
|
||||
_host_state,
|
||||
action
|
||||
_host_state
|
||||
);
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover - failover action
|
||||
// Failover - disable peer node
|
||||
// =======================
|
||||
SmErrorT sm_failover_action()
|
||||
SmErrorT sm_failover_disable_peer()
|
||||
{
|
||||
//This runs in failover thread for long action
|
||||
// so main thread and hello msg are not blocked
|
||||
bool *flag = NULL;
|
||||
char result_buf[1024];
|
||||
char cmd_buf[1024];
|
||||
bool to_disable_peer;
|
||||
{
|
||||
mutex_holder holder(&_mutex);
|
||||
|
||||
to_disable_peer = _to_disable_peer;
|
||||
}
|
||||
|
||||
if ( to_disable_peer )
|
||||
{
|
||||
flag = &_to_disable_peer;
|
||||
snprintf(cmd_buf, sizeof(cmd_buf), "curl --header \"Content-Type:application/json\" "
|
||||
"--header \"Accept: application/json\" --header \"User-Agent: sm/1.0\" "
|
||||
"--data '{\"action\": \"event\", \"hostname\":\"%s\", \"operational\": \"disabled\", \"availability\": \"failed\"}' "
|
||||
|
@ -1395,15 +1104,9 @@ SmErrorT sm_failover_action()
|
|||
SmErrorT error = sm_exec_mtce_command(cmd_buf, result_buf, sizeof(result_buf));
|
||||
if (SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE("Failed to disable peer %s", _peer_name);
|
||||
return SM_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
if( flag )
|
||||
{
|
||||
mutex_holder holder(&_mutex);
|
||||
* flag = false;
|
||||
}
|
||||
return SM_OKAY;
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
@ -1436,6 +1139,26 @@ SmFailoverInterfaceStateT sm_failover_get_interface_info(SmInterfaceTypeT interf
|
|||
}
|
||||
// ****************************************************************************
|
||||
|
||||
static void sm_failover_interface_change_callback(
|
||||
SmHwInterfaceChangeDataT* if_change )
|
||||
{
|
||||
switch ( if_change->interface_state )
|
||||
{
|
||||
case SM_INTERFACE_STATE_DISABLED:
|
||||
DPRINTFI("Interface %s is down", if_change->interface_name);
|
||||
sm_failover_interface_down(if_change->interface_name);
|
||||
break;
|
||||
case SM_INTERFACE_STATE_ENABLED:
|
||||
DPRINTFI("Interface %s is up", if_change->interface_name);
|
||||
sm_failover_interface_up(if_change->interface_name);
|
||||
break;
|
||||
default:
|
||||
DPRINTFI("Interface %s state changed to %d",
|
||||
if_change->interface_name, if_change->interface_state);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover - Initialize
|
||||
// ======================
|
||||
|
@ -1445,6 +1168,28 @@ SmErrorT sm_failover_initialize( void )
|
|||
bool enabled;
|
||||
SmErrorT error;
|
||||
|
||||
pthread_mutexattr_t attr;
|
||||
|
||||
pthread_mutexattr_init(&attr);
|
||||
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
|
||||
int res = pthread_mutex_init(&_mutex, &attr);
|
||||
if( 0 != res )
|
||||
{
|
||||
DPRINTFE("Failed to initialize mutex, error %d", res);
|
||||
return SM_FAILED;
|
||||
}
|
||||
|
||||
SmHwCallbacksT callbacks;
|
||||
memset( &callbacks, 0, sizeof(callbacks) );
|
||||
callbacks.interface_change = sm_failover_interface_change_callback;
|
||||
error = sm_hw_initialize( &callbacks );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize hardware module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( error );
|
||||
}
|
||||
|
||||
error = SmFailoverFSM::initialize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
|
@ -1455,16 +1200,6 @@ SmErrorT sm_failover_initialize( void )
|
|||
_system_mode = sm_node_utils_get_system_mode();
|
||||
DPRINTFI("System mode %s", sm_system_mode_str(_system_mode));
|
||||
|
||||
action_maps_no_infra[SM_SYSTEM_MODE_STANDARD] = action_map_std_no_infra;
|
||||
action_maps_no_infra[SM_SYSTEM_MODE_CPE_DUPLEX] = action_map_cpe_no_infra;
|
||||
action_maps_no_infra[SM_SYSTEM_MODE_CPE_DUPLEX_DC] = action_map_cpe_dc_no_infra;
|
||||
action_maps_no_infra[SM_SYSTEM_MODE_CPE_SIMPLEX] = action_map_simplex;
|
||||
|
||||
action_maps_infra[SM_SYSTEM_MODE_STANDARD] = action_map_std_infra;
|
||||
action_maps_infra[SM_SYSTEM_MODE_CPE_DUPLEX] = action_map_cpe_infra;
|
||||
action_maps_infra[SM_SYSTEM_MODE_CPE_DUPLEX_DC] = action_map_cpe_dc_infra;
|
||||
action_maps_infra[SM_SYSTEM_MODE_CPE_SIMPLEX] = action_map_simplex;
|
||||
|
||||
error = sm_db_connect( SM_DATABASE_NAME, &_sm_db_handle );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
|
@ -1552,7 +1287,7 @@ SmErrorT sm_failover_initialize( void )
|
|||
}
|
||||
}
|
||||
|
||||
error = sm_timer_register( "failover audit", 1000,
|
||||
error = sm_timer_register( "failover audit", 50,
|
||||
sm_failover_audit_timeout,
|
||||
0, &failover_audit_timer_id );
|
||||
|
||||
|
@ -1661,7 +1396,8 @@ void dump_peer_if_state(FILE* fp)
|
|||
|
||||
void dump_failover_fsm_state(FILE* fp)
|
||||
{
|
||||
fprintf(fp, " Failover FSM state: %d\n", SmFailoverFSM::get_fsm().get_state());
|
||||
SmFailoverStateT state = SmFailoverFSM::get_fsm().get_state();
|
||||
fprintf(fp, " Failover FSM state: %s\n", sm_failover_state_str(state));
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
|
|
|
@ -29,6 +29,16 @@ typedef enum
|
|||
SM_FAILOVER_DEGRADE_SOURCE_IF_DOWN = 2
|
||||
}SmFailoverDegradeSourceT;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SM_FAILOVER_INFRA_DOWN = 1,
|
||||
SM_FAILOVER_MGMT_DOWN = 2,
|
||||
SM_FAILOVER_OAM_DOWN = 4,
|
||||
SM_FAILOVER_HEARTBEAT_ALIVE = 8,
|
||||
SM_FAILOVER_HELLO_MSG_ALIVE = 16,
|
||||
SM_FAILOVER_PEER_DISABLED = 0x4000,
|
||||
}SmFailoverCommFaultBitFlagT;
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover - check
|
||||
// ==================
|
||||
|
@ -136,6 +146,29 @@ SmHeartbeatMsgIfStateT sm_failover_get_peer_if_state();
|
|||
SmErrorT sm_failover_set_system(const SmSystemFailoverStatus& failover_status);
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover - active self
|
||||
// =======================
|
||||
SmErrorT sm_failover_activate_self();
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover - disable peer node
|
||||
// =======================
|
||||
SmErrorT sm_failover_disable_peer();
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover - is active controller
|
||||
// ==================
|
||||
bool sm_is_active_controller();
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover - get interface state
|
||||
// ==================
|
||||
int sm_failover_get_if_state();
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover - dump state
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
//
|
||||
#include "sm_failover_fail_pending_state.h"
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include "sm_types.h"
|
||||
#include "sm_limits.h"
|
||||
#include "sm_debug.h"
|
||||
|
@ -14,9 +15,164 @@
|
|||
#include "sm_failover_utils.h"
|
||||
#include "sm_node_utils.h"
|
||||
#include "sm_node_api.h"
|
||||
#include "sm_worker_thread.h"
|
||||
|
||||
static const int FAIL_PENDING_TIMEOUT = 2000; //2000ms
|
||||
|
||||
static SmTimerIdT action_timer_id = SM_TIMER_ID_INVALID;
|
||||
static const int RESET_TIMEOUT = 10 * 1000; // 10 seconds for a reset command to reboot a node
|
||||
static const int GO_ACTIVE_TIMEOUT = 30 * 1000; // 30 seconds for standby node go active
|
||||
static const int WAIT_RESET_TIMEOUT = RESET_TIMEOUT + 5 * 1000; // extra 5 seconds for sending reset command
|
||||
|
||||
static void disable_peer(SmSimpleAction&)
|
||||
{
|
||||
SmErrorT error;
|
||||
int counter = 0;
|
||||
bool done = false;
|
||||
DPRINTFI("To send mtce api to disable peer");
|
||||
|
||||
while(!done && counter < 5)
|
||||
{
|
||||
error = sm_failover_disable_peer();
|
||||
counter ++;
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
int wait_interval_us = 1000000; // 1 second
|
||||
DPRINTFE("Failed to disable peer, reattempt in %d ms",
|
||||
wait_interval_us / 1000);
|
||||
usleep(wait_interval_us); // wait 1 second before retry
|
||||
}else
|
||||
{
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
static SmSimpleAction _disable_peer_action("disable-peer", disable_peer);
|
||||
|
||||
static bool wait_for_reset_timeout(SmTimerIdT timer_id, int64_t user_data)
|
||||
{
|
||||
//timer never rearm, clear the timer id
|
||||
action_timer_id = SM_TIMER_ID_INVALID;
|
||||
sm_failover_utils_reset_stayfailed_flag();
|
||||
DPRINTFI("stayfailed flag file is removed");
|
||||
SmFailoverFSM::get_fsm().set_state(SM_FAILOVER_STATE_SURVIVED);
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool reset_standby_timeout(SmTimerIdT timer_id, int64_t user_data)
|
||||
{
|
||||
//timer never rearm, clear the timer id
|
||||
action_timer_id = SM_TIMER_ID_INVALID;
|
||||
sm_failover_utils_set_stayfailed_flag();
|
||||
DPRINTFI("stayfailed flag file is set");
|
||||
const char* timer_name = "confirm standby reset";
|
||||
SmErrorT error = sm_timer_register( timer_name, WAIT_RESET_TIMEOUT + WAIT_RESET_TIMEOUT,
|
||||
wait_for_reset_timeout,
|
||||
0, &action_timer_id);
|
||||
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFE("Failed to register confirm standby reset timer");
|
||||
}
|
||||
DPRINTFI("wait for %d secs...", (WAIT_RESET_TIMEOUT + WAIT_RESET_TIMEOUT) / 1000);
|
||||
return false;
|
||||
}
|
||||
|
||||
SmErrorT blind_guess_start_active()
|
||||
{
|
||||
SmErrorT error;
|
||||
SmWorkerThread::get_worker().add_priority_action(&_disable_peer_action);
|
||||
|
||||
const char* timer_name = "reset standby";
|
||||
error = sm_timer_register( timer_name, RESET_TIMEOUT,
|
||||
reset_standby_timeout,
|
||||
0, &action_timer_id);
|
||||
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFE("Failed to register reset standby timer");
|
||||
return SM_FAILED;
|
||||
}
|
||||
|
||||
DPRINTFI("wait for %d secs...", RESET_TIMEOUT / 1000);
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
static void standby_go_active_failed()
|
||||
{
|
||||
sm_failover_utils_set_stayfailed_flag();
|
||||
DPRINTFI("stayfailed flag file is set");
|
||||
SmFailoverFSM::get_fsm().set_state(SM_FAILOVER_STATE_FAILED);
|
||||
}
|
||||
|
||||
bool standby_wait_timeout(SmTimerIdT timer_id, int64_t user_data)
|
||||
{
|
||||
//timer never rearm, clear the timer id
|
||||
action_timer_id = SM_TIMER_ID_INVALID;
|
||||
sm_failover_utils_reset_stayfailed_flag();
|
||||
DPRINTFI("stayfailed flag file is removed");
|
||||
char host_name[SM_NODE_NAME_MAX_CHAR];
|
||||
SmErrorT error = sm_node_utils_get_hostname(host_name);
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to get hostname, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
|
||||
standby_go_active_failed();
|
||||
return false;
|
||||
}
|
||||
|
||||
SmNodeScheduleStateT host_state = sm_get_controller_state(host_name);
|
||||
if(SM_NODE_STATE_ACTIVE != host_state)
|
||||
{
|
||||
standby_go_active_failed();
|
||||
return false;
|
||||
}
|
||||
|
||||
SmWorkerThread::get_worker().add_priority_action(&_disable_peer_action);
|
||||
|
||||
SmFailoverFSM::get_fsm().set_state(SM_FAILOVER_STATE_SURVIVED);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void blind_guess_start_standby()
|
||||
{
|
||||
SmErrorT error;
|
||||
sm_failover_utils_set_stayfailed_flag();
|
||||
DPRINTFI("set stayfailed flag file");
|
||||
|
||||
sm_failover_activate_self();
|
||||
const char* timer_name = "active controller action timeout";
|
||||
error = sm_timer_register( timer_name, WAIT_RESET_TIMEOUT + GO_ACTIVE_TIMEOUT,
|
||||
standby_wait_timeout,
|
||||
0, &action_timer_id);
|
||||
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFE("Failed to register blindguess action timer");
|
||||
}
|
||||
DPRINTFI("wait for %d secs", (WAIT_RESET_TIMEOUT + GO_ACTIVE_TIMEOUT) / 1000);
|
||||
}
|
||||
|
||||
void blind_guess_scenario_start()
|
||||
{
|
||||
SmSystemFailoverStatus& failover_status = SmSystemFailoverStatus::get_status();
|
||||
SmNodeScheduleStateT host_state = failover_status.get_host_pre_failure_schedule_state();
|
||||
SmNodeScheduleStateT peer_state = failover_status.get_peer_pre_failure_schedule_state();
|
||||
DPRINTFI("Pre failure host state %s, peer state %s",
|
||||
sm_node_schedule_state_str(host_state), sm_node_schedule_state_str(peer_state));
|
||||
if(SM_NODE_STATE_ACTIVE == host_state)
|
||||
{
|
||||
if(SM_NODE_STATE_STANDBY == peer_state)
|
||||
{
|
||||
blind_guess_start_active();
|
||||
}
|
||||
}else if(SM_NODE_STATE_STANDBY == host_state)
|
||||
{
|
||||
blind_guess_start_standby();
|
||||
}
|
||||
}
|
||||
|
||||
SmFailoverFailPendingState::SmFailoverFailPendingState(SmFailoverFSM& fsm) : SmFSMState(fsm)
|
||||
{
|
||||
this->_pending_timer_id = SM_TIMER_ID_INVALID;
|
||||
|
@ -30,19 +186,53 @@ SmFailoverFailPendingState::~SmFailoverFailPendingState()
|
|||
SmErrorT SmFailoverFailPendingState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
//SmFSMEventDataTypeT event_data_type = event_data->get_event_data_type();
|
||||
bool duplex = false;
|
||||
switch (event)
|
||||
{
|
||||
case SM_HEARTBEAT_LOST:
|
||||
case SM_INTERFACE_DOWN:
|
||||
//getting worse, extend wait time
|
||||
case SM_INTERFACE_UP:
|
||||
case SM_HEARTBEAT_RECOVER:
|
||||
// Things are improving, extend wait time
|
||||
case SM_FAILOVER_EVENT_IF_STATE_CHANGED:
|
||||
// Situation are changing, extend wait time
|
||||
this->_register_timer();
|
||||
break;
|
||||
case SM_FAIL_PENDING_TIMEOUT:
|
||||
|
||||
case SM_FAILOVER_EVENT_FAIL_PENDING_TIMEOUT:
|
||||
sm_node_utils_is_aio_duplex(&duplex);
|
||||
if( duplex &&
|
||||
0 == (sm_failover_get_if_state() & SM_FAILOVER_HEARTBEAT_ALIVE))
|
||||
{
|
||||
SmSystemFailoverStatus failover_status;
|
||||
SmSystemModeT system_mode = sm_node_utils_get_system_mode();
|
||||
SmInterfaceTypeT interfaces_to_check[3] = {SM_INTERFACE_UNKNOWN};
|
||||
bool healthy = true;
|
||||
if(sm_failover_utils_is_stayfailed())
|
||||
{
|
||||
healthy = false;
|
||||
}
|
||||
|
||||
if(SM_SYSTEM_MODE_CPE_DUPLEX_DC == system_mode)
|
||||
{
|
||||
interfaces_to_check[0] = SM_INTERFACE_OAM;
|
||||
}else
|
||||
{
|
||||
interfaces_to_check[0] = SM_INTERFACE_MGMT;
|
||||
interfaces_to_check[1] = SM_INTERFACE_INFRA;
|
||||
}
|
||||
for(int i = 0; interfaces_to_check[i] != SM_INTERFACE_UNKNOWN; i ++)
|
||||
{
|
||||
SmFailoverInterfaceStateT if_state = sm_failover_get_interface_info(interfaces_to_check[i]);
|
||||
if(SM_FAILOVER_INTERFACE_DOWN == if_state)
|
||||
{
|
||||
healthy = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(healthy)
|
||||
{
|
||||
blind_guess_scenario_start();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
SmSystemFailoverStatus& failover_status = SmSystemFailoverStatus::get_status();
|
||||
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
|
||||
SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state();
|
||||
SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state();
|
||||
|
@ -79,7 +269,9 @@ SmErrorT SmFailoverFailPendingState::event_handler(SmFailoverEventT event, const
|
|||
}
|
||||
break;
|
||||
default:
|
||||
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
|
||||
DPRINTFE("Runtime error, unexpected event %s, at state %s",
|
||||
sm_failover_event_str(event),
|
||||
sm_failover_state_str(this->fsm.get_state()));
|
||||
}
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
@ -87,13 +279,13 @@ SmErrorT SmFailoverFailPendingState::event_handler(SmFailoverEventT event, const
|
|||
bool SmFailoverFailPendingState::_fail_pending_timeout(
|
||||
SmTimerIdT timer_id, int64_t user_data)
|
||||
{
|
||||
SmFailoverFSM::get_fsm().send_event(SM_FAIL_PENDING_TIMEOUT, NULL);
|
||||
SmFailoverFSM::get_fsm().send_event(SM_FAILOVER_EVENT_FAIL_PENDING_TIMEOUT, NULL);
|
||||
return false;
|
||||
}
|
||||
|
||||
SmErrorT SmFailoverFailPendingState::enter_state()
|
||||
{
|
||||
DPRINTFI("Entering Pending state");
|
||||
SmFSMState::enter_state();
|
||||
SmErrorT error = this->_register_timer();
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
|
@ -146,6 +338,15 @@ SmErrorT SmFailoverFailPendingState::exit_state()
|
|||
{
|
||||
DPRINTFE("Failed to deregister fail pending timer. Error %s", sm_error_str(error));
|
||||
}
|
||||
DPRINTFI("Exiting Pending state");
|
||||
if(SM_TIMER_ID_INVALID != action_timer_id)
|
||||
{
|
||||
error = sm_timer_deregister(action_timer_id);
|
||||
action_timer_id = SM_TIMER_ID_INVALID;
|
||||
if( SM_OKAY != error)
|
||||
{
|
||||
DPRINTFE("Failed to deregister action timer. Error %s", sm_error_str(error));
|
||||
}
|
||||
}
|
||||
SmFSMState::exit_state();
|
||||
return error;
|
||||
}
|
||||
|
|
|
@ -10,31 +10,16 @@
|
|||
#include "sm_failover_fsm.h"
|
||||
#include "sm_failover_ss.h"
|
||||
|
||||
SmErrorT SmFailoverFailedState::enter_state()
|
||||
static void _audit_failover_state()
|
||||
{
|
||||
DPRINTFI("Entering Failed state");
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
switch (event)
|
||||
{
|
||||
case SM_HEARTBEAT_LOST:
|
||||
case SM_INTERFACE_DOWN:
|
||||
case SM_INTERFACE_UP:
|
||||
// ignore, recover is possible only when heartbeat is recovered
|
||||
break;
|
||||
case SM_HEARTBEAT_RECOVER:
|
||||
{
|
||||
SmSystemFailoverStatus failover_status;
|
||||
SmSystemFailoverStatus& failover_status = SmSystemFailoverStatus::get_status();
|
||||
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
|
||||
SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state();
|
||||
SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state();
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFE("Failed to get failover survivor. Error %s", sm_error_str(error));
|
||||
return error;
|
||||
return;
|
||||
}
|
||||
|
||||
if(SM_NODE_STATE_FAILED == host_state)
|
||||
|
@ -44,7 +29,7 @@ SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmF
|
|||
else if(SM_NODE_STATE_STANDBY == host_state && SM_NODE_STATE_ACTIVE == peer_state)
|
||||
{
|
||||
// standby is the only possible state to be scheduled to from failed state
|
||||
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
|
||||
SmFailoverFSM::get_fsm().set_state(SM_FAILOVER_STATE_NORMAL);
|
||||
}else
|
||||
{
|
||||
DPRINTFE("Runtime error: unexpected scheduling state: %s",
|
||||
|
@ -52,15 +37,20 @@ SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmF
|
|||
}
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
|
||||
}
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
SmErrorT SmFailoverFailedState::exit_state()
|
||||
SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
DPRINTFI("Exiting Failed state");
|
||||
switch (event)
|
||||
{
|
||||
case SM_FAILOVER_EVENT_IF_STATE_CHANGED:
|
||||
DPRINTFI("sm_heartbeat_recover event received.");
|
||||
_audit_failover_state();
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
DPRINTFE("Runtime error, unexpected event %s, at state %s",
|
||||
sm_failover_event_str(event),
|
||||
sm_failover_state_str(this->fsm.get_state()));
|
||||
}
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
|
|
@ -13,8 +13,6 @@ class SmFailoverFailedState : public SmFSMState
|
|||
{
|
||||
public:
|
||||
SmFailoverFailedState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
|
||||
SmErrorT enter_state();
|
||||
SmErrorT exit_state();
|
||||
|
||||
protected:
|
||||
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
|
||||
|
|
|
@ -6,20 +6,66 @@
|
|||
#include "sm_failover_fsm.h"
|
||||
#include <stdlib.h>
|
||||
#include "sm_debug.h"
|
||||
#include "sm_failover_initial_state.h"
|
||||
#include "sm_failover_normal_state.h"
|
||||
#include "sm_failover_fail_pending_state.h"
|
||||
#include "sm_failover_failed_state.h"
|
||||
#include "sm_failover_survived_state.h"
|
||||
|
||||
SmFSMEventDataTypeT SmIFStateChangedEventData::get_event_data_type() const
|
||||
{
|
||||
return SmIFStateChangedEventDataType;
|
||||
}
|
||||
|
||||
|
||||
SmIFStateChangedEventData::SmIFStateChangedEventData()
|
||||
{
|
||||
}
|
||||
|
||||
void SmIFStateChangedEventData::set_interface_state(
|
||||
SmInterfaceTypeT interface_type, SmFailoverInterfaceStateT interface_state)
|
||||
{
|
||||
switch (interface_type)
|
||||
{
|
||||
case SM_INTERFACE_OAM:
|
||||
_oam_state = interface_state;
|
||||
break;
|
||||
case SM_INTERFACE_MGMT:
|
||||
_mgmt_state = interface_state;
|
||||
break;
|
||||
case SM_INTERFACE_INFRA:
|
||||
_infra_state = interface_state;
|
||||
break;
|
||||
default:
|
||||
DPRINTFE("Runtime error: invalid interface type %d", interface_type);
|
||||
}
|
||||
}
|
||||
SmFailoverInterfaceStateT SmIFStateChangedEventData::get_interface_state(SmInterfaceTypeT interface_type) const
|
||||
{
|
||||
switch (interface_type)
|
||||
{
|
||||
case SM_INTERFACE_OAM:
|
||||
return _oam_state;
|
||||
case SM_INTERFACE_MGMT:
|
||||
return _mgmt_state;
|
||||
case SM_INTERFACE_INFRA:
|
||||
return _infra_state;
|
||||
default:
|
||||
DPRINTFE("Runtime error: invalid interface type %d", interface_type);
|
||||
return SM_FAILOVER_INTERFACE_UNKNOWN;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
SmErrorT SmFSMState::enter_state()
|
||||
{
|
||||
// default implementation, nothing to do
|
||||
DPRINTFI("Entering %s state", sm_failover_state_str(fsm.get_state()));
|
||||
return SM_OKAY;
|
||||
};
|
||||
|
||||
SmErrorT SmFSMState::exit_state()
|
||||
{
|
||||
// default implementation, nothing to do
|
||||
DPRINTFI("Exiting %s state", sm_failover_state_str(fsm.get_state()));
|
||||
return SM_OKAY;
|
||||
};
|
||||
|
||||
|
@ -58,7 +104,7 @@ SmErrorT SmFailoverFSM::send_event(SmFailoverEventT event, const ISmFSMEventData
|
|||
DPRINTFE("Runtime error. No handler for state %d", this->get_state());
|
||||
return SM_FAILED;
|
||||
}
|
||||
DPRINTFI("send_event %d\n", event);
|
||||
DPRINTFI("send_event %s\n", sm_failover_event_str(event));
|
||||
state_handler->event_handler(event, event_data);
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
@ -96,6 +142,9 @@ SmErrorT SmFailoverFSM::set_state(SmFailoverStateT state)
|
|||
DPRINTFE("State %d was not registered", state);
|
||||
return SM_FAILED;
|
||||
}
|
||||
|
||||
this->_current_state = state;
|
||||
|
||||
error = new_state_handler->enter_state();
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
|
@ -103,7 +152,6 @@ SmErrorT SmFailoverFSM::set_state(SmFailoverStateT state)
|
|||
return SM_FAILED;
|
||||
}
|
||||
|
||||
this->_current_state = state;
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
|
@ -149,6 +197,7 @@ SmErrorT SmFailoverFSM::init_state()
|
|||
SmErrorT SmFailoverFSM::initialize()
|
||||
{
|
||||
SmFailoverFSM& fsm = SmFailoverFSM::get_fsm();
|
||||
fsm.register_fsm_state(SM_FAILOVER_STATE_INITIAL, new SmFailoverInitialState(fsm));
|
||||
fsm.register_fsm_state(SM_FAILOVER_STATE_NORMAL, new SmFailoverNormalState(fsm));
|
||||
fsm.register_fsm_state(SM_FAILOVER_STATE_FAIL_PENDING, new SmFailoverFailPendingState(fsm));
|
||||
fsm.register_fsm_state(SM_FAILOVER_STATE_FAILED, new SmFailoverFailedState(fsm));
|
||||
|
|
|
@ -7,24 +7,6 @@
|
|||
#define __SM_FAILOVER_FSM_H__
|
||||
#include "sm_types.h"
|
||||
|
||||
typedef enum{
|
||||
SM_FAILOVER_STATE_NORMAL,
|
||||
SM_FAILOVER_STATE_FAIL_PENDING,
|
||||
SM_FAILOVER_STATE_FAILED,
|
||||
SM_FAILOVER_STATE_SURVIVED,
|
||||
SM_FAILOVER_STATE_MAX
|
||||
}SmFailoverStateT;
|
||||
|
||||
#define SM_FAILOVER_STATE_INITIAL SM_FAILOVER_STATE_NORMAL
|
||||
|
||||
typedef enum{
|
||||
SM_HEARTBEAT_LOST,
|
||||
SM_INTERFACE_DOWN,
|
||||
SM_INTERFACE_UP,
|
||||
SM_HEARTBEAT_RECOVER,
|
||||
SM_FAIL_PENDING_TIMEOUT,
|
||||
}SmFailoverEventT;
|
||||
|
||||
typedef int SmFSMEventDataTypeT;
|
||||
|
||||
class ISmFSMEventData
|
||||
|
@ -59,7 +41,7 @@ class SmFailoverFSM
|
|||
SmErrorT register_fsm_state(SmFailoverStateT state, SmFSMState* state_handler);
|
||||
|
||||
SmErrorT set_state(SmFailoverStateT state);
|
||||
inline int get_state() const {return this->_current_state;}
|
||||
inline SmFailoverStateT get_state() const {return this->_current_state;}
|
||||
|
||||
static SmErrorT initialize();
|
||||
static SmErrorT finalize();
|
||||
|
@ -70,12 +52,27 @@ class SmFailoverFSM
|
|||
private:
|
||||
static const int MaxState = SM_FAILOVER_STATE_MAX;
|
||||
SmFSMState* _state_handlers[MaxState];
|
||||
int _current_state;
|
||||
SmFailoverStateT _current_state;
|
||||
|
||||
static SmFailoverFSM _the_fsm;
|
||||
SmErrorT init_state();
|
||||
void deregister_states();
|
||||
};
|
||||
|
||||
class SmIFStateChangedEventData: public ISmFSMEventData
|
||||
{
|
||||
public:
|
||||
static const SmFSMEventDataTypeT SmIFStateChangedEventDataType = 2;
|
||||
SmFSMEventDataTypeT get_event_data_type() const;
|
||||
SmIFStateChangedEventData();
|
||||
void set_interface_state(
|
||||
SmInterfaceTypeT interface_type, SmFailoverInterfaceStateT interface_state);
|
||||
SmFailoverInterfaceStateT get_interface_state(SmInterfaceTypeT interface_type) const;
|
||||
|
||||
private:
|
||||
SmFailoverInterfaceStateT _mgmt_state;
|
||||
SmFailoverInterfaceStateT _infra_state;
|
||||
SmFailoverInterfaceStateT _oam_state;
|
||||
};
|
||||
|
||||
#endif //__SM_FAILOVER_FSM_H__
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
//
|
||||
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "sm_failover_initial_state.h"
|
||||
#include "sm_types.h"
|
||||
#include "sm_debug.h"
|
||||
#include "sm_failover_fsm.h"
|
||||
|
||||
SmErrorT SmFailoverInitialState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
switch (event)
|
||||
{
|
||||
case SM_FAILOVER_EVENT_HEARTBEAT_ENABLED:
|
||||
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
|
||||
break;
|
||||
case SM_FAILOVER_EVENT_IF_STATE_CHANGED:
|
||||
case SM_FAILOVER_EVENT_FAIL_PENDING_TIMEOUT:
|
||||
case SM_FAILOVER_EVENT_NODE_ENABLED:
|
||||
break;
|
||||
default:
|
||||
DPRINTFE("Runtime error, unexpected event %s, at state %s",
|
||||
sm_failover_event_str(event),
|
||||
sm_failover_state_str(this->fsm.get_state()));
|
||||
}
|
||||
return SM_OKAY;
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
//
|
||||
// Copyright (c) 2018 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#ifndef __SM_FAILOVER_INITIAL_STATE_H__
|
||||
#define __SM_FAILOVER_INITIAL_STATE_H__
|
||||
#include "sm_types.h"
|
||||
#include "sm_failover_fsm.h"
|
||||
|
||||
class SmFailoverInitialState : public SmFSMState
|
||||
{
|
||||
public:
|
||||
SmFailoverInitialState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
|
||||
protected:
|
||||
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
|
||||
};
|
||||
|
||||
|
||||
#endif //__SM_FAILOVER_INITIAL_STATE_H__
|
|
@ -5,38 +5,80 @@
|
|||
//
|
||||
#include "sm_failover_normal_state.h"
|
||||
#include <stdlib.h>
|
||||
#include "sm_limits.h"
|
||||
#include "sm_types.h"
|
||||
#include "sm_debug.h"
|
||||
#include "sm_node_api.h"
|
||||
#include "sm_node_utils.h"
|
||||
#include "sm_failover_utils.h"
|
||||
#include "sm_failover_fsm.h"
|
||||
|
||||
SmErrorT SmFailoverNormalState::enter_state()
|
||||
{
|
||||
DPRINTFI("Entering normal state");
|
||||
return SM_OKAY;
|
||||
}
|
||||
#include "sm_failover_ss.h"
|
||||
|
||||
SmErrorT SmFailoverNormalState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
switch (event)
|
||||
{
|
||||
case SM_HEARTBEAT_LOST:
|
||||
case SM_INTERFACE_DOWN:
|
||||
case SM_FAILOVER_EVENT_IF_STATE_CHANGED:
|
||||
if(NULL != event_data &&
|
||||
SmIFStateChangedEventData::SmIFStateChangedEventDataType == event_data->get_event_data_type())
|
||||
{
|
||||
const SmIFStateChangedEventData* data = (const SmIFStateChangedEventData*) event_data;
|
||||
|
||||
SmFailoverInterfaceStateT oam_state, mgmt_state, infra_state;
|
||||
oam_state = data->get_interface_state(SM_INTERFACE_OAM);
|
||||
mgmt_state = data->get_interface_state(SM_INTERFACE_MGMT);
|
||||
infra_state = data->get_interface_state(SM_INTERFACE_INFRA);
|
||||
if(oam_state != SM_FAILOVER_INTERFACE_OK ||
|
||||
mgmt_state != SM_FAILOVER_INTERFACE_OK ||
|
||||
(infra_state != SM_FAILOVER_INTERFACE_OK && infra_state != SM_FAILOVER_INTERFACE_UNKNOWN))
|
||||
{
|
||||
this->fsm.set_state(SM_FAILOVER_STATE_FAIL_PENDING);
|
||||
}
|
||||
}else
|
||||
{
|
||||
DPRINTFE("Runtime error: receive invalid event data type");
|
||||
}
|
||||
|
||||
break;
|
||||
case SM_INTERFACE_UP:
|
||||
DPRINTFI("Interface is up");
|
||||
break;
|
||||
case SM_HEARTBEAT_RECOVER:
|
||||
DPRINTFI("Heartbeat is recovered");
|
||||
break;
|
||||
|
||||
default:
|
||||
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
|
||||
DPRINTFE("Runtime error, unexpected event %s, at state %s",
|
||||
sm_failover_event_str(event),
|
||||
sm_failover_state_str(this->fsm.get_state()));
|
||||
}
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
SmErrorT SmFailoverNormalState::exit_state()
|
||||
{
|
||||
DPRINTFI("Exiting normal state");
|
||||
SmSystemFailoverStatus& failover_status = SmSystemFailoverStatus::get_status();
|
||||
char host_name[SM_NODE_NAME_MAX_CHAR];
|
||||
SmErrorT error = sm_node_utils_get_hostname(host_name);
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to get hostname, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
failover_status.set_host_pre_failure_schedule_state(SM_NODE_STATE_UNKNOWN);
|
||||
}else
|
||||
{
|
||||
SmNodeScheduleStateT host_state = sm_get_controller_state(host_name);
|
||||
DPRINTFI("Blind guess pre failure host state %s", sm_node_schedule_state_str(host_state));
|
||||
failover_status.set_host_pre_failure_schedule_state(host_state);
|
||||
}
|
||||
|
||||
char peer_name[SM_NODE_NAME_MAX_CHAR];
|
||||
error = sm_node_api_get_peername(peer_name);
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to get peername, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
failover_status.set_peer_pre_failure_schedule_state(SM_NODE_STATE_UNKNOWN);
|
||||
}else
|
||||
{
|
||||
SmNodeScheduleStateT peer_state = sm_get_controller_state(peer_name);
|
||||
failover_status.set_peer_pre_failure_schedule_state(peer_state);
|
||||
}
|
||||
|
||||
SmFSMState::exit_state();
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
|
|
@ -12,7 +12,6 @@ class SmFailoverNormalState : public SmFSMState
|
|||
{
|
||||
public:
|
||||
SmFailoverNormalState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
|
||||
SmErrorT enter_state();
|
||||
SmErrorT exit_state();
|
||||
protected:
|
||||
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
|
||||
|
|
|
@ -15,6 +15,40 @@
|
|||
#include "sm_node_api.h"
|
||||
#include "sm_failover.h"
|
||||
|
||||
//
|
||||
SmErrorT _get_survivor_dc(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection);
|
||||
|
||||
// select standby as failed
|
||||
SmErrorT _fail_standby(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection)
|
||||
{
|
||||
if(SM_NODE_STATE_STANDBY == system_status.host_status.current_schedule_state)
|
||||
{
|
||||
selection.set_host_schedule_state(SM_NODE_STATE_FAILED);
|
||||
selection.set_peer_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||
}else if(SM_NODE_STATE_STANDBY == system_status.peer_status.current_schedule_state)
|
||||
{
|
||||
selection.set_peer_schedule_state(SM_NODE_STATE_FAILED);
|
||||
selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||
}else
|
||||
{
|
||||
DPRINTFE("Runtime error. Unexpected scheduling state: host %s, peer %s (no standby)",
|
||||
sm_node_schedule_state_str(system_status.host_status.current_schedule_state),
|
||||
sm_node_schedule_state_str(system_status.peer_status.current_schedule_state));
|
||||
return SM_FAILED;
|
||||
}
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
const char SmSystemFailoverStatus::filename[] = "/var/lib/sm/failover.status";
|
||||
const char SmSystemFailoverStatus::file_format[] =
|
||||
"This is a very important system file.\n"
|
||||
"Any modification is strictly forbidden and will cause serious consequence.\n"
|
||||
"host_schedule_state=%1d\n"
|
||||
"peer_schedule_state=%1d\n"
|
||||
"last_update=%19s\n";
|
||||
|
||||
SmSystemFailoverStatus SmSystemFailoverStatus::_failover_status;
|
||||
|
||||
SmSystemFailoverStatus::SmSystemFailoverStatus()
|
||||
{
|
||||
SmErrorT error;
|
||||
|
@ -42,11 +76,17 @@ SmSystemFailoverStatus::~SmSystemFailoverStatus()
|
|||
{
|
||||
}
|
||||
|
||||
SmSystemFailoverStatus& SmSystemFailoverStatus::get_status()
|
||||
{
|
||||
return _failover_status;
|
||||
}
|
||||
|
||||
bool SmSystemFailoverStatus::_is_valid_schedule_state(SmNodeScheduleStateT state)
|
||||
{
|
||||
return (SM_NODE_STATE_ACTIVE == state ||
|
||||
SM_NODE_STATE_STANDBY == state ||
|
||||
SM_NODE_STATE_FAILED == state);
|
||||
SM_NODE_STATE_FAILED == state ||
|
||||
SM_NODE_STATE_INIT == state);
|
||||
}
|
||||
|
||||
void SmSystemFailoverStatus::set_host_schedule_state(SmNodeScheduleStateT state)
|
||||
|
@ -63,6 +103,20 @@ void SmSystemFailoverStatus::set_host_schedule_state(SmNodeScheduleStateT state)
|
|||
}
|
||||
}
|
||||
|
||||
void SmSystemFailoverStatus::set_host_pre_failure_schedule_state(SmNodeScheduleStateT state)
|
||||
{
|
||||
if(_is_valid_schedule_state(state))
|
||||
{
|
||||
if(_host_pre_failure_schedule_state != state)
|
||||
{
|
||||
_host_pre_failure_schedule_state = state;
|
||||
}
|
||||
}else
|
||||
{
|
||||
DPRINTFE("Runtime error, schedule state unknown %d", state);
|
||||
}
|
||||
}
|
||||
|
||||
void SmSystemFailoverStatus::set_peer_schedule_state(SmNodeScheduleStateT state)
|
||||
{
|
||||
if(_is_valid_schedule_state(state))
|
||||
|
@ -77,12 +131,65 @@ void SmSystemFailoverStatus::set_peer_schedule_state(SmNodeScheduleStateT state)
|
|||
}
|
||||
}
|
||||
|
||||
typedef enum
|
||||
void SmSystemFailoverStatus::set_peer_pre_failure_schedule_state(SmNodeScheduleStateT state)
|
||||
{
|
||||
SM_FAILOVER_INFRA_DOWN = 1,
|
||||
SM_FAILOVER_MGMT_DOWN = 2,
|
||||
SM_FAILOVER_OAM_DOWN = 4,
|
||||
}SmFailoverCommFaultBitFlagT;
|
||||
if(_is_valid_schedule_state(state))
|
||||
{
|
||||
if(_peer_pre_failure_schedule_state != state)
|
||||
{
|
||||
_peer_pre_failure_schedule_state = state;
|
||||
}
|
||||
}else
|
||||
{
|
||||
DPRINTFE("Runtime error, schedule state unknown %d", state);
|
||||
}
|
||||
}
|
||||
|
||||
void SmSystemFailoverStatus::serialize()
|
||||
{
|
||||
FILE* f;
|
||||
time_t last_update;
|
||||
struct tm* local_time;
|
||||
time(&last_update);
|
||||
local_time = localtime(&last_update);
|
||||
char timestamp[20];
|
||||
|
||||
|
||||
sprintf(timestamp, "%04d-%02d-%02dT%02d:%02d:%02d", local_time->tm_year + 1900,
|
||||
local_time->tm_mon + 1, local_time->tm_mday, local_time->tm_hour, local_time->tm_min, local_time->tm_sec);
|
||||
|
||||
f = fopen(filename, "w");
|
||||
fprintf(f, file_format, _host_schedule_state, _peer_schedule_state, timestamp);
|
||||
for(int i = 0; i < 72; i ++)
|
||||
{
|
||||
fputs(".", f);
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
void SmSystemFailoverStatus::deserialize()
|
||||
{
|
||||
// FILE* f;
|
||||
// char timestamp[20];
|
||||
//
|
||||
// _host_schedule_state = _peer_schedule_state = SM_NODE_STATE_UNKNOWN;
|
||||
// int host_state, peer_state;
|
||||
// f = fopen(filename, "r");
|
||||
// if(NULL != f)
|
||||
// {
|
||||
// DPRINTFI("Loading schedule state from %s", filename);
|
||||
// int cnt = fscanf(f, file_format, &host_state, &peer_state, timestamp);
|
||||
// fclose(f);
|
||||
// if(cnt != 3)
|
||||
// {
|
||||
// DPRINTFE("Runtime error, %s has been modified.", filename);
|
||||
// }else
|
||||
// {
|
||||
// set_host_schedule_state((SmNodeScheduleStateT)host_state);
|
||||
// set_peer_schedule_state((SmNodeScheduleStateT)peer_state);
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
// sm_failover_ss get_node_if_healthy_score - get node interface healthy score
|
||||
|
@ -98,7 +205,7 @@ static int get_node_if_healthy_score(unsigned int interface_state)
|
|||
{
|
||||
healthy_score -= 2;
|
||||
}
|
||||
if(interface_state & SM_FAILOVER_INFRA_DOWN)
|
||||
if(interface_state & SM_FAILOVER_MGMT_DOWN)
|
||||
{
|
||||
healthy_score -= 4;
|
||||
}
|
||||
|
@ -152,7 +259,6 @@ SmErrorT _get_system_status(SmSystemStatusT& sys_status, char host_name[], char
|
|||
sys_status.peer_status.current_schedule_state = sm_get_controller_state(peer_name);
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
// sm_failover_ss_get_survivor - select the failover survivor
|
||||
// This is the main entry/container for the failover logic to determine how
|
||||
|
@ -175,10 +281,17 @@ SmErrorT sm_failover_ss_get_survivor(SmSystemFailoverStatus& selection)
|
|||
|
||||
SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection)
|
||||
{
|
||||
DPRINTFI("get survivor %s %s, %s %s",
|
||||
system_status.host_status.node_name,
|
||||
sm_node_schedule_state_str(system_status.host_status.current_schedule_state),
|
||||
system_status.peer_status.node_name,
|
||||
sm_node_schedule_state_str(system_status.peer_status.current_schedule_state));
|
||||
|
||||
selection.set_host_schedule_state(system_status.host_status.current_schedule_state);
|
||||
selection.set_peer_schedule_state(system_status.peer_status.current_schedule_state);
|
||||
if(SM_HEARTBEAT_OK == system_status.heartbeat_state)
|
||||
{
|
||||
DPRINTFI("Heartbeat alive");
|
||||
int host_healthy_score, peer_healthy_score;
|
||||
host_healthy_score = get_node_if_healthy_score(system_status.host_status.interface_state);
|
||||
peer_healthy_score = get_node_if_healthy_score(system_status.peer_status.interface_state);
|
||||
|
@ -187,16 +300,52 @@ SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSys
|
|||
//host is more healthy
|
||||
selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||
selection.set_peer_schedule_state(SM_NODE_STATE_STANDBY);
|
||||
if(system_status.peer_status.interface_state & SM_FAILOVER_MGMT_DOWN)
|
||||
{
|
||||
DPRINTFI("Disable peer, host go active");
|
||||
selection.set_peer_schedule_state(SM_NODE_STATE_FAILED);
|
||||
}
|
||||
}else if(peer_healthy_score > host_healthy_score)
|
||||
{
|
||||
//peer is more healthy
|
||||
selection.set_host_schedule_state(SM_NODE_STATE_STANDBY);
|
||||
selection.set_peer_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||
if(system_status.host_status.interface_state & SM_FAILOVER_MGMT_DOWN)
|
||||
{
|
||||
DPRINTFI("Disable host, peer go active");
|
||||
selection.set_host_schedule_state(SM_NODE_STATE_FAILED);
|
||||
}
|
||||
}
|
||||
}else
|
||||
{
|
||||
DPRINTFI("Loss of heartbeat ALL");
|
||||
selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||
selection.set_peer_schedule_state(SM_NODE_STATE_FAILED);
|
||||
}
|
||||
|
||||
if(system_status.host_status.current_schedule_state != selection.get_host_schedule_state() ||
|
||||
system_status.peer_status.current_schedule_state != selection.get_peer_schedule_state() )
|
||||
if(SM_SYSTEM_MODE_CPE_DUPLEX == system_status.system_mode)
|
||||
{
|
||||
}
|
||||
|
||||
if(SM_SYSTEM_MODE_CPE_DUPLEX_DC == system_status.system_mode)
|
||||
{
|
||||
return _get_survivor_dc(system_status, selection);
|
||||
}
|
||||
|
||||
SmNodeScheduleStateT host_schedule_state, peer_schedule_state;
|
||||
host_schedule_state = selection.get_host_schedule_state();
|
||||
peer_schedule_state = selection.get_peer_schedule_state();
|
||||
DPRINTFI("Host from %s to %s, Peer from %s to %s.",
|
||||
sm_node_schedule_state_str(system_status.host_status.current_schedule_state),
|
||||
sm_node_schedule_state_str(host_schedule_state),
|
||||
sm_node_schedule_state_str(system_status.peer_status.current_schedule_state),
|
||||
sm_node_schedule_state_str(peer_schedule_state)
|
||||
);
|
||||
|
||||
if((system_status.host_status.current_schedule_state == SM_NODE_STATE_ACTIVE &&
|
||||
host_schedule_state != SM_NODE_STATE_ACTIVE) ||
|
||||
(system_status.peer_status.current_schedule_state == SM_NODE_STATE_ACTIVE &&
|
||||
peer_schedule_state != SM_NODE_STATE_ACTIVE))
|
||||
{
|
||||
DPRINTFI("Uncontrolled swact starts. Host from %s to %s, Peer from %s to %s.",
|
||||
sm_node_schedule_state_str(system_status.host_status.current_schedule_state),
|
||||
|
@ -205,6 +354,43 @@ SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSys
|
|||
sm_node_schedule_state_str(selection.get_peer_schedule_state())
|
||||
);
|
||||
}
|
||||
|
||||
selection.serialize();
|
||||
return SM_OKAY;
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Direct connected
|
||||
SmErrorT _get_survivor_dc(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection)
|
||||
{
|
||||
if(SM_SYSTEM_MODE_CPE_DUPLEX_DC != system_status.system_mode)
|
||||
{
|
||||
DPRINTFE("Runtime error, not the right system mode %d", system_status.system_mode);
|
||||
return SM_FAILED;
|
||||
}
|
||||
|
||||
if(SM_HEARTBEAT_LOSS == system_status.heartbeat_state)
|
||||
{
|
||||
if(system_status.host_status.mgmt_state == SM_FAILOVER_INTERFACE_DOWN &&
|
||||
(system_status.host_status.infra_state == SM_FAILOVER_INTERFACE_DOWN ||
|
||||
system_status.host_status.infra_state == SM_FAILOVER_INTERFACE_UNKNOWN))
|
||||
{
|
||||
if(SM_FAILOVER_INTERFACE_DOWN == system_status.host_status.oam_state)
|
||||
{
|
||||
selection.set_host_schedule_state(SM_NODE_STATE_FAILED);
|
||||
selection.set_peer_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||
}else
|
||||
{
|
||||
selection.set_peer_schedule_state(SM_NODE_STATE_FAILED);
|
||||
selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||
}
|
||||
}else
|
||||
{
|
||||
selection.set_peer_schedule_state(SM_NODE_STATE_FAILED);
|
||||
selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE);
|
||||
}
|
||||
}
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
|
|
@ -8,9 +8,6 @@
|
|||
#define __SM_FAILOVER_SS_H__
|
||||
#include <stdio.h>
|
||||
#include "sm_types.h"
|
||||
//#ifdef __cplusplus
|
||||
//extern "C" {
|
||||
//#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
@ -47,23 +44,37 @@ typedef struct
|
|||
class SmSystemFailoverStatus
|
||||
{
|
||||
public:
|
||||
SmSystemFailoverStatus();
|
||||
virtual ~SmSystemFailoverStatus();
|
||||
inline SmNodeScheduleStateT get_host_schedule_state() const {
|
||||
return _host_schedule_state;
|
||||
}
|
||||
inline SmNodeScheduleStateT get_host_pre_failure_schedule_state() const {
|
||||
return _host_pre_failure_schedule_state;
|
||||
}
|
||||
void set_host_schedule_state(SmNodeScheduleStateT state);
|
||||
void set_host_pre_failure_schedule_state(SmNodeScheduleStateT state);
|
||||
inline SmNodeScheduleStateT get_peer_schedule_state() const {
|
||||
return _peer_schedule_state;
|
||||
}
|
||||
inline SmNodeScheduleStateT get_peer_pre_failure_schedule_state() const {
|
||||
return _peer_pre_failure_schedule_state;
|
||||
}
|
||||
void set_peer_schedule_state(SmNodeScheduleStateT state);
|
||||
void set_peer_pre_failure_schedule_state(SmNodeScheduleStateT state);
|
||||
void serialize();
|
||||
void deserialize();
|
||||
static SmSystemFailoverStatus& get_status();
|
||||
private:
|
||||
SmSystemFailoverStatus();
|
||||
SmNodeScheduleStateT _host_pre_failure_schedule_state;
|
||||
SmNodeScheduleStateT _peer_pre_failure_schedule_state;
|
||||
SmNodeScheduleStateT _host_schedule_state;
|
||||
SmNodeScheduleStateT _peer_schedule_state;
|
||||
static const char filename[];
|
||||
static const char file_format[];
|
||||
|
||||
bool _is_valid_schedule_state(SmNodeScheduleStateT state);
|
||||
static SmSystemFailoverStatus _failover_status;
|
||||
};
|
||||
|
||||
// ****************************************************************************
|
||||
|
@ -73,7 +84,4 @@ SmErrorT sm_failover_ss_get_survivor(SmSystemFailoverStatus& selection);
|
|||
|
||||
SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection);
|
||||
|
||||
//#ifdef __cplusplus
|
||||
//}
|
||||
//#endif
|
||||
#endif // __SM_FAILOVER_SS_H__
|
||||
|
|
|
@ -10,37 +10,26 @@
|
|||
#include "sm_failover_fsm.h"
|
||||
#include "sm_failover_ss.h"
|
||||
|
||||
SmErrorT SmFailoverSurvivedState::enter_state()
|
||||
static void _audit_failover_state()
|
||||
{
|
||||
DPRINTFI("Entering Survived state");
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
SmErrorT SmFailoverSurvivedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
switch (event)
|
||||
{
|
||||
case SM_HEARTBEAT_LOST:
|
||||
case SM_INTERFACE_DOWN:
|
||||
case SM_INTERFACE_UP:
|
||||
// ignore, recover is possible only when heartbeat is recovered
|
||||
break;
|
||||
case SM_HEARTBEAT_RECOVER:
|
||||
{
|
||||
SmSystemFailoverStatus failover_status;
|
||||
SmSystemFailoverStatus& failover_status = SmSystemFailoverStatus::get_status();
|
||||
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
|
||||
SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state();
|
||||
SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state();
|
||||
if(SM_OKAY != error)
|
||||
{
|
||||
DPRINTFE("Failed to get failover survivor. Error %s", sm_error_str(error));
|
||||
return error;
|
||||
return;
|
||||
}
|
||||
|
||||
if(SM_NODE_STATE_ACTIVE == host_state && SM_NODE_STATE_STANDBY == peer_state)
|
||||
if(SM_NODE_STATE_ACTIVE == host_state && SM_NODE_STATE_FAILED == peer_state)
|
||||
{
|
||||
// nothing changed yet
|
||||
}
|
||||
else if(SM_NODE_STATE_ACTIVE == host_state && SM_NODE_STATE_STANDBY == peer_state)
|
||||
{
|
||||
// Active is the only possible state to be scheduled to from survived state
|
||||
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
|
||||
SmFailoverFSM::get_fsm().set_state(SM_FAILOVER_STATE_NORMAL);
|
||||
}else
|
||||
{
|
||||
DPRINTFE("Runtime error: unexpected scheduling state: host %s, peer %s",
|
||||
|
@ -48,15 +37,21 @@ SmErrorT SmFailoverSurvivedState::event_handler(SmFailoverEventT event, const IS
|
|||
sm_node_schedule_state_str(peer_state));
|
||||
}
|
||||
}
|
||||
|
||||
SmErrorT SmFailoverSurvivedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
|
||||
{
|
||||
switch (event)
|
||||
{
|
||||
case SM_FAILOVER_EVENT_IF_STATE_CHANGED:
|
||||
break;
|
||||
case SM_FAILOVER_EVENT_NODE_ENABLED:
|
||||
_audit_failover_state();
|
||||
break;
|
||||
default:
|
||||
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
|
||||
DPRINTFE("Runtime error, unexpected event %s, at state %s",
|
||||
sm_failover_event_str(event),
|
||||
sm_failover_state_str(this->fsm.get_state()));
|
||||
}
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
||||
SmErrorT SmFailoverSurvivedState::exit_state()
|
||||
{
|
||||
DPRINTFI("Exiting Survived state");
|
||||
return SM_OKAY;
|
||||
}
|
||||
|
|
|
@ -12,8 +12,6 @@ class SmFailoverSurvivedState : public SmFSMState
|
|||
{
|
||||
public:
|
||||
SmFailoverSurvivedState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
|
||||
SmErrorT enter_state();
|
||||
SmErrorT exit_state();
|
||||
|
||||
protected:
|
||||
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
|
||||
|
|
|
@ -1,244 +0,0 @@
|
|||
//
|
||||
// Copyright (c) 2017 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "sm_failover_thread.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sm_failover.h"
|
||||
#include "sm_types.h"
|
||||
#include "sm_time.h"
|
||||
#include "sm_debug.h"
|
||||
#include "sm_trap.h"
|
||||
#include "sm_selobj.h"
|
||||
#include "sm_thread_health.h"
|
||||
#include "sm_hw.h"
|
||||
#include "sm_timer.h"
|
||||
|
||||
#define SM_FAILOVER_THREAD_TICK_INTERVAL_IN_MS 1000
|
||||
|
||||
static const char sm_failover_thread_name[] = "sm_failover";
|
||||
static const int sm_failover_tick_interval_in_ms = 1000;
|
||||
|
||||
static sig_atomic_t _stay_on = 1;
|
||||
static bool _thread_created = false;
|
||||
static pthread_t _failover_thread;
|
||||
|
||||
static void sm_failover_interface_change_callback(
|
||||
SmHwInterfaceChangeDataT* if_change )
|
||||
{
|
||||
switch ( if_change->interface_state )
|
||||
{
|
||||
case SM_INTERFACE_STATE_DISABLED:
|
||||
DPRINTFI("Interface %s is down", if_change->interface_name);
|
||||
sm_failover_interface_down(if_change->interface_name);
|
||||
break;
|
||||
case SM_INTERFACE_STATE_ENABLED:
|
||||
DPRINTFI("Interface %s is up", if_change->interface_name);
|
||||
sm_failover_interface_up(if_change->interface_name);
|
||||
break;
|
||||
default:
|
||||
DPRINTFI("Interface %s state changed to %d",
|
||||
if_change->interface_name, if_change->interface_state);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Thread - Initialize Thread
|
||||
// ============================================
|
||||
SmErrorT sm_failover_thread_initialize_thread( void )
|
||||
{
|
||||
SmErrorT error;
|
||||
SmHwCallbacksT callbacks;
|
||||
|
||||
error = sm_selobj_initialize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize selection object module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
error = sm_timer_initialize( SM_FAILOVER_THREAD_TICK_INTERVAL_IN_MS );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize timer module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
memset( &callbacks, 0, sizeof(callbacks) );
|
||||
callbacks.interface_change = sm_failover_interface_change_callback;
|
||||
error = sm_hw_initialize( &callbacks );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize hardware module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( error );
|
||||
}
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Thread - Finalize Thread
|
||||
// ==========================================
|
||||
SmErrorT sm_failover_thread_finalize_thread( void )
|
||||
{
|
||||
SmErrorT error;
|
||||
|
||||
error = sm_timer_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to finialize timer module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
error = sm_selobj_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to finialize selection object module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Thread - Main
|
||||
// ===============================
|
||||
static void* sm_failover_thread_main( void* arguments )
|
||||
{
|
||||
SmErrorT error;
|
||||
|
||||
pthread_setname_np( pthread_self(), sm_failover_thread_name );
|
||||
sm_debug_set_thread_info();
|
||||
sm_trap_set_thread_info();
|
||||
|
||||
DPRINTFI( "Starting" );
|
||||
|
||||
error = sm_failover_thread_initialize_thread();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failover thread initialize failed, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
pthread_exit( NULL );
|
||||
}
|
||||
|
||||
DPRINTFI( "Started." );
|
||||
|
||||
while( _stay_on )
|
||||
{
|
||||
error = sm_selobj_dispatch( sm_failover_tick_interval_in_ms );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Selection object dispatch failed, error=%s.",
|
||||
sm_error_str(error) );
|
||||
break;
|
||||
}
|
||||
|
||||
sm_failover_action();
|
||||
}
|
||||
|
||||
DPRINTFI( "Shutting down." );
|
||||
|
||||
error = sm_failover_thread_finalize_thread();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failover finalize thread failed, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
DPRINTFI( "Shutdown complete." );
|
||||
|
||||
return( NULL );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Thread - Start
|
||||
// ================================
|
||||
SmErrorT sm_failover_thread_start( void )
|
||||
{
|
||||
int result;
|
||||
|
||||
_stay_on = 1;
|
||||
_thread_created = false;
|
||||
|
||||
result = pthread_create( &_failover_thread, NULL,
|
||||
sm_failover_thread_main, NULL );
|
||||
if( 0 != result )
|
||||
{
|
||||
DPRINTFE( "Failed to start failover thread, error=%s.",
|
||||
strerror(result) );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
_thread_created = true;
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Thread - Stop
|
||||
// ===============================
|
||||
SmErrorT sm_failover_thread_stop( void )
|
||||
{
|
||||
_stay_on = 0;
|
||||
|
||||
if( _thread_created )
|
||||
{
|
||||
long ms_expired;
|
||||
SmTimeT time_prev;
|
||||
int result;
|
||||
|
||||
sm_time_get( &time_prev );
|
||||
|
||||
while( true )
|
||||
{
|
||||
result = pthread_tryjoin_np( _failover_thread, NULL );
|
||||
if(( 0 != result )&&( EBUSY != result ))
|
||||
{
|
||||
if(( ESRCH != result )&&( EINVAL != result ))
|
||||
{
|
||||
DPRINTFE( "Failed to wait for failover thread "
|
||||
"exit, sending kill signal, error=%s.",
|
||||
strerror(result) );
|
||||
pthread_kill( _failover_thread, SIGKILL );
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
ms_expired = sm_time_get_elapsed_ms( &time_prev );
|
||||
if( 12000 <= ms_expired )
|
||||
{
|
||||
DPRINTFE( "Failed to stop failover thread, sending "
|
||||
"kill signal." );
|
||||
pthread_kill( _failover_thread, SIGKILL );
|
||||
break;
|
||||
}
|
||||
|
||||
usleep( 250000 ); // 250 milliseconds.
|
||||
}
|
||||
}
|
||||
|
||||
_thread_created = false;
|
||||
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
|
@ -1,33 +0,0 @@
|
|||
//
|
||||
// Copyright (c) 2017 Wind River Systems, Inc.
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#ifndef __SM_FAILOVER_THREAD_H__
|
||||
#define __SM_FAILOVER_THREAD_H__
|
||||
|
||||
#include "sm_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Thread - Start
|
||||
// ================================
|
||||
extern SmErrorT sm_failover_thread_start( void );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Thread - Stop
|
||||
// ===============================
|
||||
extern SmErrorT sm_failover_thread_stop( void );
|
||||
// ****************************************************************************
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,12 +1,16 @@
|
|||
#include "sm_failover_utils.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include "sm_debug.h"
|
||||
#include "sm_service_domain_member_table.h"
|
||||
#include "sm_service_domain_assignment_table.h"
|
||||
#include "sm_service_domain_table.h"
|
||||
|
||||
#define SM_NODE_STAY_FAILED_FILE "/var/run/.sm_stay_fail"
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover utilities - loop all service domain members
|
||||
// ==================
|
||||
|
@ -90,3 +94,48 @@ SmNodeScheduleStateT sm_get_controller_state(
|
|||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Utilities - Set StayFailed
|
||||
// ==============================
|
||||
SmErrorT sm_failover_utils_set_stayfailed_flag()
|
||||
{
|
||||
int fd = open( SM_NODE_STAY_FAILED_FILE,
|
||||
O_RDWR | O_CREAT, S_IRUSR | S_IRGRP | S_IROTH);
|
||||
if( 0 > fd )
|
||||
{
|
||||
DPRINTFE( "Failed to create file (%s), error=%s.",
|
||||
SM_NODE_STAY_FAILED_FILE, strerror(errno) );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
close(fd);
|
||||
return( SM_OKAY );
|
||||
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Utilities - Set StayFailed
|
||||
// ==============================
|
||||
SmErrorT sm_failover_utils_reset_stayfailed_flag()
|
||||
{
|
||||
unlink( SM_NODE_STAY_FAILED_FILE );
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Failover Utilities - check StayFailed
|
||||
// ==============================
|
||||
bool sm_failover_utils_is_stayfailed()
|
||||
{
|
||||
if( 0 == access( SM_NODE_STAY_FAILED_FILE, F_OK ) )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
|
|
@ -15,7 +15,25 @@ extern "C" {
|
|||
// ****************************************************************************
|
||||
// Failover Utilities - get node controller state
|
||||
// ==============================
|
||||
SmNodeScheduleStateT sm_get_controller_state(const char node_name[]);
|
||||
extern SmNodeScheduleStateT sm_get_controller_state(const char node_name[]);
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// failover Utilities - Set StayFailed
|
||||
// ==============================
|
||||
extern SmErrorT sm_failover_utils_set_stayfailed_flag();
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Node Utilities - Set StayFailed
|
||||
// ==============================
|
||||
extern SmErrorT sm_failover_utils_reset_stayfailed_flag();
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Node Utilities - is StayFailed set
|
||||
// ==============================
|
||||
extern bool sm_failover_utils_is_stayfailed();
|
||||
// ****************************************************************************
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "sm_service_api.h"
|
||||
#include "sm_failover.h"
|
||||
#include "sm_node_swact_monitor.h"
|
||||
#include "sm_failover_fsm.h"
|
||||
|
||||
#define SM_NODE_AUDIT_TIMER_IN_MS 1000
|
||||
#define SM_INTERFACE_AUDIT_TIMER_IN_MS 1000
|
||||
|
@ -177,6 +178,23 @@ static void sm_main_event_handler_api_node_set_callback( char node_name[],
|
|||
return;
|
||||
}
|
||||
|
||||
char peer_name[SM_NODE_NAME_MAX_CHAR] = {0};
|
||||
error = sm_node_api_get_peername(peer_name);
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to get peername, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
if(SM_NODE_SET_ACTION_EVENT == action &&
|
||||
SM_NODE_ADMIN_STATE_UNLOCKED == admin_state &&
|
||||
SM_NODE_OPERATIONAL_STATE_ENABLED == oper_state &&
|
||||
(SM_NODE_AVAIL_STATUS_AVAILABLE == avail_status ||
|
||||
SM_NODE_AVAIL_STATUS_DEGRADED == avail_status ) &&
|
||||
0 == strcmp(node_name, peer_name))
|
||||
{
|
||||
SmFailoverFSM::get_fsm().send_event(SM_FAILOVER_EVENT_NODE_ENABLED, NULL);
|
||||
}
|
||||
|
||||
if( SM_NODE_SET_ACTION_SWACT == action )
|
||||
{
|
||||
SmNodeSwactMonitor::SwactStart(SM_NODE_STATE_STANDBY);
|
||||
|
|
|
@ -51,7 +51,6 @@
|
|||
#include "sm_service_action_table.h"
|
||||
#include "sm_heartbeat_thread.h"
|
||||
#include "sm_failover.h"
|
||||
#include "sm_failover_thread.h"
|
||||
#include "sm_task_affining_thread.h"
|
||||
#include "sm_worker_thread.h"
|
||||
|
||||
|
@ -328,6 +327,14 @@ static SmErrorT sm_process_initialize( void )
|
|||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
error = sm_failover_initialize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize failover handler module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
error = sm_service_heartbeat_thread_start();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
|
@ -344,22 +351,6 @@ static SmErrorT sm_process_initialize( void )
|
|||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
error = sm_failover_thread_start();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to start the failover thread, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
error = sm_failover_initialize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to initialize failover handler module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
return( SM_FAILED );
|
||||
}
|
||||
|
||||
// Start a task affining thread for AIO duplex system
|
||||
if(_is_aio_duplex)
|
||||
{
|
||||
|
@ -394,20 +385,6 @@ static SmErrorT sm_process_finalize( void )
|
|||
}
|
||||
}
|
||||
|
||||
error = sm_failover_thread_stop();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to stop failover thread, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
error = sm_failover_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to finalize failover handler module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
error = sm_main_event_handler_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
|
@ -422,6 +399,13 @@ static SmErrorT sm_process_finalize( void )
|
|||
sm_error_str(error) );
|
||||
}
|
||||
|
||||
error = sm_failover_finalize();
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to finalize failover handler module, error=%s.",
|
||||
sm_error_str( error ) );
|
||||
}
|
||||
|
||||
error = sm_service_heartbeat_api_finalize( true );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "sm_msg.h"
|
||||
#include "sm_service_domain_utils.h"
|
||||
#include "sm_service_domain_fsm.h"
|
||||
#include "sm_failover_fsm.h"
|
||||
|
||||
// ****************************************************************************
|
||||
// Service Domain Initial State - Hello Timer
|
||||
|
@ -122,6 +123,7 @@ SmErrorT sm_service_domain_initial_state_exit( SmServiceDomainT* domain )
|
|||
|
||||
sm_heartbeat_enable();
|
||||
|
||||
SmFailoverFSM::get_fsm().send_event(SM_FAILOVER_EVENT_HEARTBEAT_ENABLED, NULL);
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
|
|
@ -9,8 +9,33 @@
|
|||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <string.h>
|
||||
#include "sm_util_types.h"
|
||||
#include "sm_debug.h"
|
||||
#include "sm_trap.h"
|
||||
|
||||
SmSimpleAction::SmSimpleAction(const char* action_name, SmSimpleActionCallback callback)
|
||||
{
|
||||
_callback = callback;
|
||||
strncpy(_action_name, action_name, sizeof(_action_name));
|
||||
}
|
||||
|
||||
SmSimpleAction::~SmSimpleAction()
|
||||
{
|
||||
}
|
||||
|
||||
void SmSimpleAction::action()
|
||||
{
|
||||
if(NULL != _callback)
|
||||
{
|
||||
_callback(*this);
|
||||
}
|
||||
}
|
||||
|
||||
const char* SmSimpleAction::get_name() const
|
||||
{
|
||||
return _action_name;
|
||||
}
|
||||
|
||||
SmWorkerThread SmWorkerThread::_the_worker;
|
||||
|
||||
|
@ -68,6 +93,10 @@ SmWorkerThread::~SmWorkerThread()
|
|||
// ****************************************************************************
|
||||
void* SmWorkerThread::thread_helper(SmWorkerThread* workerThread)
|
||||
{
|
||||
pthread_setname_np( pthread_self(), "worker" );
|
||||
sm_debug_set_thread_info();
|
||||
sm_trap_set_thread_info();
|
||||
DPRINTFI("worker thread started");
|
||||
workerThread->thread_run();
|
||||
return 0;
|
||||
}
|
||||
|
@ -126,20 +155,33 @@ SmErrorT SmWorkerThread::stop()
|
|||
// ****************************************************************************
|
||||
// SmWorkerThread::add_action add a regular priority action
|
||||
// ****************************************************************************
|
||||
void SmWorkerThread::add_action(SmAction& action)
|
||||
void SmWorkerThread::add_action(SmAction* action)
|
||||
{
|
||||
mutex_holder(&this->_mutex);
|
||||
this->_regular_queue.push(&action);
|
||||
this->_regular_queue.push(action);
|
||||
int res = sem_post(&_sem);
|
||||
if(0 != res)
|
||||
{
|
||||
DPRINTFE("failed to signal semaphore. error %d", errno);
|
||||
}
|
||||
|
||||
DPRINTFI("Action %s is added", action->get_name());
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// SmWorkerThread::add_priority_action add a high priority action
|
||||
// ****************************************************************************
|
||||
void SmWorkerThread::add_priority_action(SmAction& action)
|
||||
void SmWorkerThread::add_priority_action(SmAction* action)
|
||||
{
|
||||
mutex_holder(&this->_mutex);
|
||||
this->_priority_queue.push(&action);
|
||||
this->_priority_queue.push(action);
|
||||
int res = sem_post(&_sem);
|
||||
if(0 != res)
|
||||
{
|
||||
DPRINTFE("failed to signal semaphore. error %d", errno);
|
||||
}
|
||||
DPRINTFI("Action %s is added to priority queue", action->get_name());
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
|
@ -177,7 +219,9 @@ void SmWorkerThread::thread_run()
|
|||
}
|
||||
if( NULL != action )
|
||||
{
|
||||
DPRINTFI("Executing action %s", action->get_name());
|
||||
action->action();
|
||||
DPRINTFI("Executing action %s completed", action->get_name());
|
||||
}
|
||||
}else if(ETIMEDOUT != errno)
|
||||
{
|
||||
|
|
|
@ -18,10 +18,23 @@ class SmAction
|
|||
{
|
||||
public:
|
||||
virtual void action() = 0;
|
||||
virtual const char* get_name() const = 0;
|
||||
};
|
||||
|
||||
class SmSimpleAction : public SmAction
|
||||
{
|
||||
public:
|
||||
typedef void(*SmSimpleActionCallback)(SmSimpleAction&);
|
||||
SmSimpleAction(const char* action_name, SmSimpleActionCallback callback);
|
||||
virtual ~SmSimpleAction();
|
||||
void action();
|
||||
const char* get_name() const;
|
||||
private:
|
||||
SmSimpleActionCallback _callback;
|
||||
char _action_name[32];
|
||||
};
|
||||
|
||||
typedef std::queue<SmAction*> SmActionQueueT;
|
||||
class SmWorkerThread;
|
||||
|
||||
// ****************************************************************************
|
||||
// SmWorkerThread work thread class
|
||||
|
@ -36,13 +49,13 @@ class SmWorkerThread
|
|||
A normal priority action will be scheduled after all
|
||||
high priority actions.
|
||||
*/
|
||||
void add_action(SmAction& action);
|
||||
void add_action(SmAction* action);
|
||||
/*
|
||||
Add an action to high priority FCFS queue.
|
||||
A high priority action is nonpreemptive. It will
|
||||
be scheduled after the current action.
|
||||
*/
|
||||
void add_priority_action(SmAction& action);
|
||||
void add_priority_action(SmAction* action);
|
||||
|
||||
|
||||
// retrieve singleton object
|
||||
|
|
|
@ -166,6 +166,25 @@ _sm_service_domain_interface_event_mappings[SM_SERVICE_DOMAIN_INTERFACE_EVENT_MA
|
|||
{ SM_SERVICE_DOMAIN_INTERFACE_EVENT_NOT_IN_USE, "not-in-use" }
|
||||
};
|
||||
|
||||
static SmValueStrMappingT
|
||||
_sm_failover_event_mappings[SM_FAILOVER_EVENT_MAX] =
|
||||
{
|
||||
{SM_FAILOVER_EVENT_HEARTBEAT_ENABLED, "heartbeat-enabled"},
|
||||
{SM_FAILOVER_EVENT_IF_STATE_CHANGED, "interface-state-changed"},
|
||||
{SM_FAILOVER_EVENT_FAIL_PENDING_TIMEOUT, "fail-pending-timeout"},
|
||||
{SM_FAILOVER_EVENT_NODE_ENABLED, "node-enabled"}
|
||||
};
|
||||
|
||||
static SmValueStrMappingT
|
||||
_sm_failover_state_mappings[SM_FAILOVER_STATE_MAX] =
|
||||
{
|
||||
{SM_FAILOVER_STATE_INITIAL, "initial"},
|
||||
{SM_FAILOVER_STATE_NORMAL, "normal"},
|
||||
{SM_FAILOVER_STATE_FAIL_PENDING, "fail-pending"},
|
||||
{SM_FAILOVER_STATE_FAILED, "failed"},
|
||||
{SM_FAILOVER_STATE_SURVIVED, "survived"}
|
||||
};
|
||||
|
||||
static SmValueStrMappingT
|
||||
_sm_service_domain_neighbor_state_mappings[SM_SERVICE_DOMAIN_NEIGHBOR_STATE_MAX] =
|
||||
{
|
||||
|
@ -953,6 +972,28 @@ const char* sm_service_domain_interface_event_str(
|
|||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Types - Failover Event String
|
||||
// =============================================
|
||||
const char* sm_failover_event_str( SmFailoverEventT event )
|
||||
{
|
||||
return( sm_mapping_get_str( _sm_failover_event_mappings,
|
||||
SM_FAILOVER_EVENT_MAX,
|
||||
event ) );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Types - Failover State String
|
||||
// =============================================
|
||||
const char* sm_failover_state_str( SmFailoverStateT state )
|
||||
{
|
||||
return( sm_mapping_get_str( _sm_failover_state_mappings,
|
||||
SM_FAILOVER_STATE_MAX,
|
||||
state ) );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Types - Service Domain Neighbor State Value
|
||||
// ===========================================
|
||||
|
|
|
@ -279,6 +279,23 @@ typedef enum
|
|||
SM_SERVICE_DOMAIN_EVENT_DATA_MAX
|
||||
} SmServiceDomainEventDataT;
|
||||
|
||||
typedef enum{
|
||||
SM_FAILOVER_STATE_INITIAL,
|
||||
SM_FAILOVER_STATE_NORMAL,
|
||||
SM_FAILOVER_STATE_FAIL_PENDING,
|
||||
SM_FAILOVER_STATE_FAILED,
|
||||
SM_FAILOVER_STATE_SURVIVED,
|
||||
SM_FAILOVER_STATE_MAX
|
||||
}SmFailoverStateT;
|
||||
|
||||
typedef enum{
|
||||
SM_FAILOVER_EVENT_HEARTBEAT_ENABLED,
|
||||
SM_FAILOVER_EVENT_IF_STATE_CHANGED,
|
||||
SM_FAILOVER_EVENT_FAIL_PENDING_TIMEOUT,
|
||||
SM_FAILOVER_EVENT_NODE_ENABLED,
|
||||
SM_FAILOVER_EVENT_MAX
|
||||
}SmFailoverEventT;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SM_SERVICE_DOMAIN_INTERFACE_EVENT_NIL,
|
||||
|
@ -931,6 +948,18 @@ extern const char* sm_service_domain_interface_event_str(
|
|||
SmServiceDomainInterfaceEventT event );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Types - Failover Event String
|
||||
// =============================================
|
||||
extern const char* sm_failover_event_str( SmFailoverEventT event );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Types - Failover State String
|
||||
// =============================================
|
||||
extern const char* sm_failover_state_str( SmFailoverStateT state );
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Types - Service Domain Neighbor State Value
|
||||
// ===========================================
|
||||
|
|
|
@ -3,7 +3,7 @@ BEGIN TRANSACTION;
|
|||
CREATE TABLE NODES ( ID INTEGER PRIMARY KEY AUTOINCREMENT, NAME CHAR(32), ADMINISTRATIVE_STATE CHAR(32), OPERATIONAL_STATE CHAR(32), AVAILABILITY_STATUS CHAR(32), READY_STATE CHAR(32), STATE_UUID CHAR(42));
|
||||
CREATE TABLE NODE_HISTORY ( ID INTEGER PRIMARY KEY AUTOINCREMENT, NAME CHAR(32), ADMINISTRATIVE_STATE CHAR(32), OPERATIONAL_STATE CHAR(32), AVAILABILITY_STATUS CHAR(32), READY_STATE CHAR(32), STATE_UUID CHAR(42));
|
||||
CREATE TABLE SERVICE_DOMAIN_INTERFACES ( ID INTEGER PRIMARY KEY AUTOINCREMENT, PROVISIONED CHAR(32), SERVICE_DOMAIN CHAR(32), SERVICE_DOMAIN_INTERFACE CHAR(32), PATH_TYPE CHAR(32), AUTH_TYPE CHAR(32), AUTH_KEY CHAR(128), INTERFACE_NAME CHAR(32), INTERFACE_STATE CHAR(32), NETWORK_TYPE CHAR(32), NETWORK_MULTICAST CHAR(256), NETWORK_ADDRESS CHAR(256), NETWORK_PORT INT, NETWORK_HEARTBEAT_PORT INT, NETWORK_PEER_ADDRESS CHAR(256), NETWORK_PEER_PORT INT, NETWORK_PEER_HEARTBEAT_PORT INT , INTERFACE_CONNECT_TYPE CHAR(32) DEFAULT 'tor');
|
||||
INSERT INTO "SERVICE_DOMAIN_INTERFACES" VALUES(1,'yes','controller','management-interface','primary','none','','','','','','','','','','','','tor');
|
||||
INSERT INTO "SERVICE_DOMAIN_INTERFACES" VALUES(1,'yes','controller','management-interface','secondary','none','','','','','','','','','','','','tor');
|
||||
INSERT INTO "SERVICE_DOMAIN_INTERFACES" VALUES(2,'yes','controller','oam-interface','secondary','hmac-sha512','titanium-server','','','','','','','','','','','tor');
|
||||
INSERT INTO "SERVICE_DOMAIN_INTERFACES" VALUES(3,'yes','controller','infrastructure-interface','secondary','none','','','','','','','','','','','','tor');
|
||||
CREATE TABLE SERVICE_DOMAINS ( ID INTEGER PRIMARY KEY AUTOINCREMENT, PROVISIONED CHAR(32), NAME CHAR(32), ORCHESTRATION CHAR(32), DESIGNATION CHAR(32), PREEMPT CHAR(32), PRIORITY INT, HELLO_INTERVAL INT, DEAD_INTERVAL INT, WAIT_INTERVAL INT, EXCHANGE_INTERVAL INT, STATE CHAR(32), SPLIT_BRAIN_RECOVERY CHAR(32), LEADER CHAR(32), GENERATION INT);
|
||||
|
|
Loading…
Reference in New Issue