Introduce failover FSM

Introduce failover FSM to handle communication failure between
controllers.
Failover FSM has 4 states:
Normal: when system running with full redundency
Fail Pending: communication failure occured
Failed: the controller is determined as failure. Its peer will
        assume service
Survived: the controller is determined as survivor. Its peer has
        failed

The controllers are in one of the below possible state pairs:
normal/normal, fail-pending/fail-pending, failed/survived

A failed controller will not resume responsbility before the
system restores its full redundency (normal/normal)

A survivor will not fail before the system restores its
full redundency (normal/normal)

Future implementation may allow an administrator to force
a failed controller become active, to manually recover
(with possiblity of losing data), should the survivor is
no longer capable to provide service.

Story: 2003577
Task: 26404

Change-Id: I51635e9e60b6fb6bad89e06c9f08d3f28e21db82
Signed-off-by: Bin Qian <bin.qian@windriver.com>
This commit is contained in:
Bin Qian 2018-09-04 11:20:23 -04:00
parent 68b5ce3835
commit edc8a56472
20 changed files with 1158 additions and 137 deletions

View File

@ -2,4 +2,4 @@ SRC_DIR=$PKG_BASE
COPY_LIST="$PKG_BASE/LICENSE"
TAR_NAME=sm
VERSION=1.0.0
TIS_PATCH_VER=24
TIS_PATCH_VER=26

View File

@ -107,6 +107,11 @@ SRCS+=sm_swact_state.c
SRCS+=sm_worker_thread.cpp
SRCS+=sm_task_affining_thread.c
SRCS+=sm_node_swact_monitor.cpp
SRCS+=sm_failover_fsm.cpp
SRCS+=sm_failover_normal_state.cpp
SRCS+=sm_failover_fail_pending_state.cpp
SRCS+=sm_failover_failed_state.cpp
SRCS+=sm_failover_survived_state.cpp
SRCS+=sm_failover_ss.c
SRCS+=sm_service_domain_interface_not_in_use_state.c
SRCS+=sm_configuration_table.c

View File

@ -34,15 +34,8 @@
#include "sm_util_types.h"
#include "sm_failover_ss.h"
#include "sm_failover_utils.h"
typedef enum
{
SM_FAILOVER_INTERFACE_UNKNOWN,
SM_FAILOVER_INTERFACE_OK,
SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT,
SM_FAILOVER_INTERFACE_DOWN,
SM_FAILOVER_INTERFACE_RECOVERING
}SmFailoverInterfaceStateT;
#include "sm_failover_fsm.h"
#include "sm_api.h"
typedef enum
{
@ -57,7 +50,7 @@ typedef enum
//as part of the gradual delivery of enhancement with more
//complex algorithm to determine the failover survivor routine
//the SM_FAILOVER_ACTION_ROUTINE will redirect the lookup to
//new logic. Until all actions are migrate to new logic, the
//new logic. Until all actions are migrated to new logic, the
//lookup tables will be eliminated.
SM_FAILOVER_ACTION_ROUTINE = 1 << 31,
}SmFailoverActionT;
@ -181,6 +174,79 @@ static bool _hello_msg_alive = true;
static SmSystemModeT _system_mode;
static time_t _last_report_ts = 0;
static int _heartbeat_count = 0;
static bool _peer_ready = false;
// The class is to accommodate batching i/f state changes
// during the failover_audit interval.
class FailoverEventHolder
{
public:
FailoverEventHolder();
void push_event(SmFailoverEventT, const ISmFSMEventData* event_data);
void send_event();
private:
bool _event_to_send;
SmFailoverEventT _failover_event;
const ISmFSMEventData* _event_data;
};
FailoverEventHolder::FailoverEventHolder()
{
_event_to_send = false;
}
void FailoverEventHolder::push_event(SmFailoverEventT event, const ISmFSMEventData* event_data)
{
SmFailoverEventT list[] = {
SM_INTERFACE_DOWN,
SM_HEARTBEAT_LOST,
SM_INTERFACE_UP,
SM_HEARTBEAT_RECOVER
};
unsigned int i;
if(!_event_to_send)
{
_event_to_send = true;
for(i = 0; i < sizeof(list) / sizeof(SmFailoverEventT); i ++)
{
if(event == list[i])
{
_failover_event = event;
_event_data = event_data;
_event_to_send = true;
DPRINTFI("Push event %d", event);
return;
}
}
DPRINTFE("Runtime error: unsupported event %d", event);
return;
}
for(i = 0; list[i] != _failover_event && i < sizeof(list) / sizeof(SmFailoverEventT); i ++)
{
if(list[i] == event)
{
_failover_event = event;
_event_data = event_data;
return;
}
}
DPRINTFE("Runtime error: unsupported event %d", event);
}
void FailoverEventHolder::send_event()
{
if(_event_to_send)
{
DPRINTFI("Send event %d", _failover_event);
SmFailoverFSM::get_fsm().send_event(_failover_event, _event_data);
_event_to_send = false;
}
}
static FailoverEventHolder _event_holder;
// std 2+2/2+2+x failover actions
SmFailoverActionPairT action_map_std_infra[16] =
@ -435,6 +501,8 @@ void sm_failover_lost_heartbeat( SmFailoverInterfaceT* interface )
{
DPRINTFI("Interface %s lose heartbeat.", interface->interface_name);
}
_event_holder.push_event(SM_HEARTBEAT_LOST, NULL);
}
// ****************************************************************************
@ -466,6 +534,8 @@ void sm_failover_heartbeat_restore( SmFailoverInterfaceT* interface )
{
DPRINTFI("Interface %s heartbeat is OK.", interface->interface_name);
}
_event_holder.push_event(SM_HEARTBEAT_RECOVER, NULL);
}
// ****************************************************************************
@ -501,6 +571,8 @@ void sm_failover_interface_down( const char* const interface_name )
{
DPRINTFI("%d domain interfaces are impacted as i/f %s is down.",
impacted, interface_name);
_event_holder.push_event(SM_INTERFACE_DOWN, NULL);
}
}
// ****************************************************************************
@ -537,6 +609,8 @@ void sm_failover_interface_up( const char* const interface_name )
{
DPRINTFI("%d domain interfaces are impacted as i/f %s is up.",
impacted, interface_name);
_event_holder.push_event(SM_INTERFACE_UP, NULL);
}
}
// ****************************************************************************
@ -656,13 +730,22 @@ int _failover_get_if_state()
// ****************************************************************************
// Failover - get interface state
// ==================
SmErrorT sm_failover_if_state_get(SmHeartbeatMsgIfStateT *if_state)
SmHeartbeatMsgIfStateT sm_failover_if_state_get()
{
mutex_holder holder(&_mutex);
int if_state_flag = _failover_get_if_state();
return (if_state_flag & 0b0111); //the lower 3 bits i/f state flag
}
// ****************************************************************************
// ****************************************************************************
// Failover - get peer node interface state
// ==================
SmHeartbeatMsgIfStateT sm_failover_get_peer_if_state()
{
mutex_holder holder(&_mutex);
int if_state_flag = _failover_get_if_state();
*if_state = (if_state_flag & 0b0111); //the lower 3 bits i/f state flag
return SM_OKAY;
return (_peer_if_state & 0b0111); //the lower 3 bits i/f state flag
}
// ****************************************************************************
@ -923,84 +1006,66 @@ bool this_controller_unlocked()
}
// ****************************************************************************
// ****************************************************************************
// Failover - pack schedule state into 2 bit (active/standby/failed)
// SM starts managing failover after the node is scheduled, i.e, cannot be
// in SM_NODE_STATE_UNKNOWN or SM_NODE_STATE_INIT state
// =======================
unsigned int sm_failover_pack_schedule_state(SmNodeScheduleStateT state)
// Failover - set system to scheduled status
// ==================
SmErrorT sm_failover_set_system(const SmSystemFailoverStatus& failover_status)
{
static const unsigned int failed_state_bit_flag = 0;
static const unsigned int active_state_bit_flag = 1;
static const unsigned int standby_state_bit_flag = 1 << 1;
unsigned int res;
switch(state)
//valid combinations of target system scheduling are:
// active/standby
// active/failed
SmFailoverActionResultT result;
SmNodeScheduleStateT host_target_state, peer_target_state;
host_target_state = failover_status.get_host_schedule_state();
peer_target_state = failover_status.get_peer_schedule_state();
if(SM_NODE_STATE_ACTIVE == host_target_state)
{
case SM_NODE_STATE_ACTIVE:
res = active_state_bit_flag;
break;
case SM_NODE_STATE_STANDBY:
res = standby_state_bit_flag;
break;
case SM_NODE_STATE_FAILED:
res = failed_state_bit_flag;
break;
default:
res = failed_state_bit_flag;
if(SM_NODE_STATE_STANDBY == _host_state &&
SM_NODE_STATE_FAILED == peer_target_state)
{
result = sm_failover_activate_self();
if(SM_FAILOVER_ACTION_RESULT_FAILED == result)
{
DPRINTFE("Failed to activate %s.", _host_name);
return SM_FAILED;
}
result = sm_failover_disable_node(_peer_name);
if(SM_FAILOVER_ACTION_RESULT_FAILED == result)
{
DPRINTFE("Failed to disable node %s.", _peer_name);
return SM_FAILED;
}
}
}else if(SM_NODE_STATE_STANDBY == host_target_state)
{
if(SM_NODE_STATE_ACTIVE == _host_state)
{
result = sm_failover_swact();
if(SM_FAILOVER_ACTION_RESULT_FAILED == result)
{
DPRINTFE("Failed to swact.");
return SM_FAILED;
}
}
}
return res;
}
// ****************************************************************************
// ****************************************************************************
// Failover - convert target scheduling state to action
// =======================
int sm_failover_get_action(const SmSystemFailoverStatus& failover_status)
{
DPRINTFI("Host to %s, Peer to %s.",
sm_node_schedule_state_str(failover_status.host_schedule_state),
sm_node_schedule_state_str(failover_status.peer_schedule_state)
);
unsigned int host_flag, peer_flag;
host_flag = sm_failover_pack_schedule_state(failover_status.host_schedule_state);
peer_flag = sm_failover_pack_schedule_state(failover_status.peer_schedule_state);
unsigned int flag = (host_flag | (peer_flag << 2));
DPRINTFI("Failover scheduling flag %d", flag);
SmFailoverActionPairT* actions;
int action;
SmFailoverActionPairT action_map[16] = {
{SM_FAILOVER_ACTION_FAIL_NODE, SM_FAILOVER_ACTION_FAIL_NODE}, //00 00
{SM_FAILOVER_ACTION_DISABLE_STANDBY, SM_FAILOVER_ACTION_FAIL_NODE}, //01 00
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //10 00
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11 00
{SM_FAILOVER_ACTION_FAIL_NODE, SM_FAILOVER_ACTION_ACTIVATE}, //00 01
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //01 01
{SM_FAILOVER_ACTION_SWACT, SM_FAILOVER_ACTION_NO_ACTION}, //10 01
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11 01
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //00 10
{SM_FAILOVER_ACTION_NO_ACTION, SM_FAILOVER_ACTION_NO_ACTION}, //01 10
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //10 10
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11 10
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //00 11
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //01 11
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //10 11
{SM_FAILOVER_ACTION_UNDEFINED, SM_FAILOVER_ACTION_UNDEFINED}, //11 11
};
bool is_active = is_active_controller();
actions = &(action_map[flag & 0xf]);
if(is_active)
else if(SM_NODE_STATE_FAILED == host_target_state)
{
action = actions->active_controller_action;
result = sm_failover_disable_node(_host_name);
if(SM_FAILOVER_ACTION_RESULT_FAILED == result)
{
DPRINTFE("Failed disable host %s.", _host_name);
return SM_FAILED;
}
}
else
{
action = actions->standby_controller_action;
DPRINTFE("Runtime error. Unexpected scheduling state %s for host %s",
sm_node_schedule_state_str(host_target_state),
_host_name);
return SM_FAILED;
}
return action;
return SM_OKAY;
}
// ****************************************************************************
@ -1112,6 +1177,8 @@ void sm_failover_audit()
return;
}
_event_holder.send_event();
int64_t curr_node_state = if_state_flag;
if( _hello_msg_alive )
@ -1160,34 +1227,9 @@ void sm_failover_audit()
_log_nodes_state(action);
DPRINTFI("Action to take %d", action);
if (action & SM_FAILOVER_ACTION_ROUTINE)
{
SmSystemStatusT sys_status;
SmSystemFailoverStatus failover_status;
sys_status.system_mode = _system_mode;
if(if_state_flag & SM_FAILOVER_HEARTBEAT_ALIVE)
{
sys_status.heartbeat_state = SM_HEARTBEAT_OK;
}else
{
sys_status.heartbeat_state = SM_HEARTBEAT_LOSS;
}
sys_status.host_status.node_name = _host_name;
sys_status.host_status.interface_state = if_state_flag & 0x7;
sys_status.host_status.current_schedule_state = _host_state;
sys_status.peer_status.node_name = _peer_name;
sys_status.peer_status.interface_state = _peer_if_state & 0x7;
sys_status.peer_status.current_schedule_state = sm_get_controller_state(_peer_name);
SmErrorT error = sm_failover_ss_get_survivor(sys_status, failover_status);
if(SM_OKAY != error)
{
DPRINTFE("Failed to determine failover state. ");
return;
}
action = sm_failover_get_action(failover_status);
action = SM_FAILOVER_ACTION_NO_ACTION;
}
if (action & SM_FAILOVER_ACTION_ACTIVATE)
@ -1367,6 +1409,67 @@ SmErrorT sm_failover_action()
}
// ****************************************************************************
// ****************************************************************************
// Failover - get node interface info
// ==================
SmFailoverInterfaceStateT sm_failover_get_interface_info(SmInterfaceTypeT interface_type)
{
SmFailoverInterfaceInfo* res = NULL;
switch(interface_type)
{
case SM_INTERFACE_MGMT:
res = _mgmt_interface_info;
break;
case SM_INTERFACE_INFRA:
res = _infra_interface_info;
break;
case SM_INTERFACE_OAM:
res = _oam_interface_info;
break;
case SM_INTERFACE_UNKNOWN:
break;
}
if(NULL != res)
{
return res->get_state();
}
return SM_FAILOVER_INTERFACE_UNKNOWN;
}
// ****************************************************************************
static void sm_failover_node_set_callback( char node_name[],
SmNodeSetActionT action, SmNodeAdminStateT admin_state,
SmNodeOperationalStateT oper_state, SmNodeAvailStatusT avail_status,
int seqno )
{
if(0 == strcmp(node_name, _host_name))
{
// care only the peer
return;
}
bool peer_ready = false;
if(SM_NODE_SET_ACTION_UNLOCK == action)
{
peer_ready = SM_NODE_ADMIN_STATE_UNLOCKED == admin_state &&
SM_NODE_OPERATIONAL_STATE_ENABLED == oper_state &&
(SM_NODE_AVAIL_STATUS_AVAILABLE == avail_status ||
SM_NODE_AVAIL_STATUS_DEGRADED == avail_status);
}else if (SM_NODE_SET_ACTION_LOCK == action)
{
peer_ready = false;
}else
{
return;
}
if(peer_ready && !_peer_ready)
{
DPRINTFI("Peer %s is now online.", node_name);
}
_peer_ready = peer_ready;
}
static SmApiCallbacksT _api_callbacks = {0};
// ****************************************************************************
// Failover - Initialize
// ======================
@ -1375,6 +1478,14 @@ SmErrorT sm_failover_initialize( void )
void* user_data[1] = {&_total_interfaces};
bool enabled;
SmErrorT error;
error = SmFailoverFSM::initialize();
if( SM_OKAY != error )
{
DPRINTFE( "Failed to initialize failover FSM, error %s.", sm_error_str( error ) );
return error;
}
_system_mode = sm_node_utils_get_system_mode();
DPRINTFI("System mode %s", sm_system_mode_str(_system_mode));
@ -1404,6 +1515,17 @@ SmErrorT sm_failover_initialize( void )
return error;
}
_api_callbacks.node_set = sm_failover_node_set_callback;
error = sm_api_register_callbacks( &_api_callbacks );
if( SM_OKAY != error )
{
DPRINTFE( "Failed to register api callbacks, error=%s.",
sm_error_str( error ) );
return( error );
}
if( 0 == strcmp( SM_NODE_CONTROLLER_0_NAME, _host_name ) )
{
snprintf( _peer_name, sizeof(_peer_name), "%s", SM_NODE_CONTROLLER_1_NAME );
@ -1491,6 +1613,13 @@ SmErrorT sm_failover_finalize( void )
_total_interfaces = 0;
SmErrorT error;
error = sm_api_deregister_callbacks( &_api_callbacks );
if( SM_OKAY != error )
{
DPRINTFE( "Failed to deregister api callbacks, error=%s.",
sm_error_str( error ) );
}
sm_timer_deregister( failover_audit_timer_id );
if( NULL != _sm_db_handle )
{
@ -1504,6 +1633,13 @@ SmErrorT sm_failover_finalize( void )
_sm_db_handle = NULL;
}
error = SmFailoverFSM::finalize();
if( SM_OKAY != error )
{
DPRINTFE( "Failed to finalize failover FSM, error %s.", sm_error_str( error ) );
return error;
}
return SM_OKAY;
}
// ****************************************************************************
@ -1575,6 +1711,11 @@ void dump_peer_if_state(FILE* fp)
fprintf(fp, " Peer Interface state: %d\n", _peer_if_state);
}
void dump_failover_fsm_state(FILE* fp)
{
fprintf(fp, " Failover FSM state: %d\n", SmFailoverFSM::get_fsm().get_state());
}
// ****************************************************************************
// Failover - dump state
// ======================
@ -1582,6 +1723,7 @@ void sm_failover_dump_state(FILE* fp)
{
fprintf( fp, "Failover \n\n" );
dump_host_state(fp);
dump_failover_fsm_state(fp);
dump_interfaces_state(fp);
dump_peer_if_state(fp);
}

View File

@ -9,6 +9,7 @@
#include "sm_types.h"
#include "sm_service_domain_interface_table.h"
#include "sm_db_nodes.h"
#include "sm_failover_ss.h"
#ifdef __cplusplus
extern "C" {
#endif
@ -88,7 +89,7 @@ extern void sm_failover_if_state_update(const char node_name[],
// ****************************************************************************
// Failover - get local interface state
extern SmErrorT sm_failover_if_state_get(SmHeartbeatMsgIfStateT*);
extern SmHeartbeatMsgIfStateT sm_failover_if_state_get();
// ****************************************************************************
// ****************************************************************************
@ -115,6 +116,27 @@ SmErrorT sm_failover_degrade_clear(SmFailoverDegradeSourceT source);
SmErrorT sm_failover_get_node(char* node_name, SmDbNodeT& node);
// ****************************************************************************
// ****************************************************************************
// Failover - get node interface info
// ==================
SmFailoverInterfaceStateT sm_failover_get_interface_info(SmInterfaceTypeT interface_type);
// ****************************************************************************
// ****************************************************************************
// Failover - get peer node interface state
// ==================
SmHeartbeatMsgIfStateT sm_failover_get_peer_if_state();
// ****************************************************************************
// ****************************************************************************
// Failover - set system to scheduled status
// ==================
SmErrorT sm_failover_set_system(const SmSystemFailoverStatus& failover_status);
// ****************************************************************************
// ****************************************************************************
// Failover - dump state
// ==================

View File

@ -0,0 +1,141 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#include "sm_failover_fail_pending_state.h"
#include <stdlib.h>
#include "sm_types.h"
#include "sm_limits.h"
#include "sm_debug.h"
#include "sm_failover.h"
#include "sm_failover_fsm.h"
#include "sm_failover_ss.h"
#include "sm_failover_utils.h"
#include "sm_node_utils.h"
#include "sm_node_api.h"
static const int FAIL_PENDING_TIMEOUT = 2000; //2000ms
SmErrorT SmFailoverFailPendingState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
{
//SmFSMEventDataTypeT event_data_type = event_data->get_event_data_type();
switch (event)
{
case SM_HEARTBEAT_LOST:
case SM_INTERFACE_DOWN:
//getting worse, extend wait time
case SM_INTERFACE_UP:
case SM_HEARTBEAT_RECOVER:
// Things are improving, extend wait time
this->_register_timer();
break;
case SM_FAIL_PENDING_TIMEOUT:
{
SmSystemFailoverStatus failover_status;
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state();
SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state();
if(SM_OKAY != error)
{
DPRINTFE("Failed to get failover survivor. Error %s", sm_error_str(error));
return error;
}
error = sm_failover_set_system(failover_status);
if(SM_NODE_STATE_FAILED == host_state)
{
this->fsm.set_state(SM_FAILOVER_STATE_FAILED);
}
else if(SM_NODE_STATE_ACTIVE == host_state)
{
if(SM_NODE_STATE_FAILED == peer_state)
{
this->fsm.set_state(SM_FAILOVER_STATE_SURVIVED);
}else
{
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
}
}
else if(SM_NODE_STATE_STANDBY == host_state)
{
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
}else
{
DPRINTFE("Runtime error: unexpected scheduling state: %s",
sm_node_schedule_state_str(host_state));
}
return error;
}
break;
default:
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
}
return SM_OKAY;
}
bool SmFailoverFailPendingState::_fail_pending_timeout(
SmTimerIdT timer_id, int64_t user_data)
{
SmFailoverFSM::get_fsm().send_event(SM_FAIL_PENDING_TIMEOUT, NULL);
return false;
}
SmErrorT SmFailoverFailPendingState::enter_state()
{
DPRINTFI("Entering Pending state");
SmErrorT error = this->_register_timer();
if(SM_OKAY != error)
{
DPRINTFE("Failed to register fail pending timer. Error %s", sm_error_str(error));
}
return error;
}
SmErrorT SmFailoverFailPendingState::_register_timer()
{
SmErrorT error;
const char* timer_name = "FAIL PENDING TIMER";
if(SM_TIMER_ID_INVALID != this->_pending_timer_id)
{
this->_deregister_timer();
}
error = sm_timer_register( timer_name, FAIL_PENDING_TIMEOUT,
SmFailoverFailPendingState::_fail_pending_timeout,
0, &this->_pending_timer_id);
return error;
}
SmErrorT SmFailoverFailPendingState::_deregister_timer()
{
SmErrorT error;
if(SM_TIMER_ID_INVALID == this->_pending_timer_id)
{
return SM_OKAY;
}
error = sm_timer_deregister(this->_pending_timer_id);
if( SM_OKAY != error )
{
DPRINTFE( "Failed to cancel fail pending timer, error=%s.",
sm_error_str( error ) );
}else
{
this->_pending_timer_id = SM_TIMER_ID_INVALID;
}
return error;
}
SmErrorT SmFailoverFailPendingState::exit_state()
{
SmErrorT error = this->_deregister_timer();
if(SM_OKAY != error)
{
DPRINTFE("Failed to deregister fail pending timer. Error %s", sm_error_str(error));
}
DPRINTFI("Exiting Pending state");
return error;
}

View File

@ -0,0 +1,31 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#ifndef __SM_FAILOVER_FAIL_PENDING_STATE_H__
#define __SM_FAILOVER_FAIL_PENDING_STATE_H__
#include "sm_types.h"
#include "sm_failover_fsm.h"
#include "sm_timer.h"
class SmFailoverFailPendingState : public SmFSMState
{
public:
SmFailoverFailPendingState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
SmErrorT enter_state();
SmErrorT exit_state();
protected:
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
private:
SmTimerIdT _pending_timer_id;
static bool _fail_pending_timeout(SmTimerIdT timer_id, int64_t user_data);
SmErrorT _register_timer();
SmErrorT _deregister_timer();
};
#endif //__SM_FAILOVER_FAIL_PENDING_STATE_H__

View File

@ -0,0 +1,66 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#include "sm_failover_failed_state.h"
#include <stdlib.h>
#include "sm_types.h"
#include "sm_debug.h"
#include "sm_failover_fsm.h"
#include "sm_failover_ss.h"
SmErrorT SmFailoverFailedState::enter_state()
{
DPRINTFI("Entering Failed state");
return SM_OKAY;
}
SmErrorT SmFailoverFailedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
{
switch (event)
{
case SM_HEARTBEAT_LOST:
case SM_INTERFACE_DOWN:
case SM_INTERFACE_UP:
// ignore, recover is possible only when heartbeat is recovered
break;
case SM_HEARTBEAT_RECOVER:
{
SmSystemFailoverStatus failover_status;
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state();
SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state();
if(SM_OKAY != error)
{
DPRINTFE("Failed to get failover survivor. Error %s", sm_error_str(error));
return error;
}
if(SM_NODE_STATE_FAILED == host_state)
{
// don't need to set to failed state, already here
}
else if(SM_NODE_STATE_STANDBY == host_state && SM_NODE_STATE_ACTIVE == peer_state)
{
// standby is the only possible state to be scheduled to from failed state
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
}else
{
DPRINTFE("Runtime error: unexpected scheduling state: %s",
sm_node_schedule_state_str(host_state));
}
}
break;
default:
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
}
return SM_OKAY;
}
SmErrorT SmFailoverFailedState::exit_state()
{
DPRINTFI("Exiting Failed state");
return SM_OKAY;
}

View File

@ -0,0 +1,25 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#ifndef __SM_FAILOVER_FAILED_STATE_H__
#define __SM_FAILOVER_FAILED_STATE_H__
#include "sm_types.h"
#include "sm_failover_fsm.h"
#include "sm_timer.h"
class SmFailoverFailedState : public SmFSMState
{
public:
SmFailoverFailedState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
SmErrorT enter_state();
SmErrorT exit_state();
protected:
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
};
#endif //__SM_FAILOVER_FAILED_STATE_H__

View File

@ -0,0 +1,164 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#include "sm_failover_fsm.h"
#include <stdlib.h>
#include "sm_debug.h"
#include "sm_failover_normal_state.h"
#include "sm_failover_fail_pending_state.h"
#include "sm_failover_failed_state.h"
#include "sm_failover_survived_state.h"
SmErrorT SmFSMState::enter_state()
{
// default implementation, nothing to do
return SM_OKAY;
};
SmErrorT SmFSMState::exit_state()
{
// default implementation, nothing to do
return SM_OKAY;
};
SmFailoverFSM SmFailoverFSM::_the_fsm;
void SmFailoverFSM::deregister_states()
{
for(int i = 0; i < MaxState; i ++)
{
SmFSMState* state = _state_handlers[i];
if(NULL != state)
{
_state_handlers[i] = NULL;
delete state;
}
}
}
SmFailoverFSM::SmFailoverFSM()
{
for(int i = 0; i < MaxState; i ++)
{
_state_handlers[i] = NULL;
}
}
SmFailoverFSM::~SmFailoverFSM()
{
}
SmErrorT SmFailoverFSM::send_event(SmFailoverEventT event, const ISmFSMEventData* event_data)
{
SmFSMState* state_handler = this->_state_handlers[this->get_state()];
if(NULL == state_handler)
{
DPRINTFE("Runtime error. No handler for state %d", this->get_state());
return SM_FAILED;
}
DPRINTFI("send_event %d\n", event);
state_handler->event_handler(event, event_data);
return SM_OKAY;
}
SmErrorT SmFailoverFSM::set_state(SmFailoverStateT state)
{
if(state >= MaxState)
{
DPRINTFE("Runtime error. Don't understand state %d", state);
return SM_FAILED;
}
if(state == this->get_state())
{
DPRINTFI("Already in %d state", state);
return SM_OKAY;
}
SmFSMState* state_handler = this->_state_handlers[this->get_state()];
if(NULL == state_handler)
{
DPRINTFE("State %d was not registered", this->get_state());
return SM_FAILED;
}
SmErrorT error = state_handler->exit_state();
if(SM_OKAY != error)
{
DPRINTFE("Exit state failed (%d), %d", this->get_state(), error);
return SM_FAILED;
}
SmFSMState* new_state_handler = this->_state_handlers[state];
if(NULL == new_state_handler)
{
DPRINTFE("State %d was not registered", state);
return SM_FAILED;
}
error = new_state_handler->enter_state();
if(SM_OKAY != error)
{
DPRINTFE("Enter state failed (%d), %d", this->get_state(), error);
return SM_FAILED;
}
this->_current_state = state;
return SM_OKAY;
}
SmErrorT SmFailoverFSM::register_fsm_state(SmFailoverStateT state, SmFSMState* state_handler)
{
if(state >= MaxState)
{
DPRINTFE("Runtime error. Invalid state %d", state);
return SM_FAILED;
}
if(NULL != this->_state_handlers[state])
{
DPRINTFE("Runtime error. Duplicated state handle for state %d", state);
return SM_FAILED;
}
this->_state_handlers[state] = state_handler;
return SM_OKAY;
}
SmErrorT SmFailoverFSM::init_state()
{
SmErrorT error;
SmFailoverStateT state = SM_FAILOVER_STATE_INITIAL;
SmFSMState* new_state_handler = _state_handlers[state];
if(NULL == new_state_handler)
{
DPRINTFE("State %d was not registered", state);
return SM_FAILED;
}
error = new_state_handler->enter_state();
if(SM_OKAY != error)
{
DPRINTFE("Enter state failed (%d), %d", this->get_state(), error);
return SM_FAILED;
}
_current_state = state;
return SM_OKAY;
}
SmErrorT SmFailoverFSM::initialize()
{
SmFailoverFSM& fsm = SmFailoverFSM::get_fsm();
fsm.register_fsm_state(SM_FAILOVER_STATE_NORMAL, new SmFailoverNormalState(fsm));
fsm.register_fsm_state(SM_FAILOVER_STATE_FAIL_PENDING, new SmFailoverFailPendingState(fsm));
fsm.register_fsm_state(SM_FAILOVER_STATE_FAILED, new SmFailoverFailedState(fsm));
fsm.register_fsm_state(SM_FAILOVER_STATE_SURVIVED, new SmFailoverSurvivedState(fsm));
return fsm.init_state();
}
SmErrorT SmFailoverFSM::finalize()
{
SmFailoverFSM::get_fsm().deregister_states();
return SM_OKAY;
}

View File

@ -0,0 +1,81 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#ifndef __SM_FAILOVER_FSM_H__
#define __SM_FAILOVER_FSM_H__
#include "sm_types.h"
typedef enum{
SM_FAILOVER_STATE_NORMAL,
SM_FAILOVER_STATE_FAIL_PENDING,
SM_FAILOVER_STATE_FAILED,
SM_FAILOVER_STATE_SURVIVED,
SM_FAILOVER_STATE_MAX
}SmFailoverStateT;
#define SM_FAILOVER_STATE_INITIAL SM_FAILOVER_STATE_NORMAL
typedef enum{
SM_HEARTBEAT_LOST,
SM_INTERFACE_DOWN,
SM_INTERFACE_UP,
SM_HEARTBEAT_RECOVER,
SM_FAIL_PENDING_TIMEOUT,
}SmFailoverEventT;
typedef int SmFSMEventDataTypeT;
class ISmFSMEventData
{
public:
virtual SmFSMEventDataTypeT get_event_data_type() const = 0;
};
class SmFailoverFSM;
class SmFSMState
{
public:
SmFSMState(SmFailoverFSM& fsm) : fsm(fsm) {};
virtual ~SmFSMState(){};
virtual SmErrorT enter_state();
virtual SmErrorT exit_state();
protected:
SmFailoverFSM& fsm;
virtual SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)=0;
friend SmFailoverFSM;
};
class SmFailoverFSM
{
public:
SmFailoverFSM();
virtual ~SmFailoverFSM();
SmErrorT send_event(SmFailoverEventT event, const ISmFSMEventData* event_data);
SmErrorT register_fsm_state(SmFailoverStateT state, SmFSMState* state_handler);
SmErrorT set_state(SmFailoverStateT state);
inline int get_state() const {return this->_current_state;}
static SmErrorT initialize();
static SmErrorT finalize();
inline static SmFailoverFSM& get_fsm() {
return _the_fsm;
}
private:
static const int MaxState = SM_FAILOVER_STATE_MAX;
SmFSMState* _state_handlers[MaxState];
int _current_state;
static SmFailoverFSM _the_fsm;
SmErrorT init_state();
void deregister_states();
};
#endif //__SM_FAILOVER_FSM_H__

View File

@ -0,0 +1,42 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#include "sm_failover_normal_state.h"
#include <stdlib.h>
#include "sm_types.h"
#include "sm_debug.h"
#include "sm_failover_fsm.h"
SmErrorT SmFailoverNormalState::enter_state()
{
DPRINTFI("Entering normal state");
return SM_OKAY;
}
SmErrorT SmFailoverNormalState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
{
switch (event)
{
case SM_HEARTBEAT_LOST:
case SM_INTERFACE_DOWN:
this->fsm.set_state(SM_FAILOVER_STATE_FAIL_PENDING);
break;
case SM_INTERFACE_UP:
DPRINTFI("Interface is up");
break;
case SM_HEARTBEAT_RECOVER:
DPRINTFI("Heartbeat is recovered");
break;
default:
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
}
return SM_OKAY;
}
SmErrorT SmFailoverNormalState::exit_state()
{
DPRINTFI("Exiting normal state");
return SM_OKAY;
}

View File

@ -0,0 +1,22 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#ifndef __SM_FAILOVER_NORMAL_STATE_H__
#define __SM_FAILOVER_NORMAL_STATE_H__
#include "sm_types.h"
#include "sm_failover_fsm.h"
class SmFailoverNormalState : public SmFSMState
{
public:
SmFailoverNormalState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
SmErrorT enter_state();
SmErrorT exit_state();
protected:
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
};
#endif //__SM_FAILOVER_NORMAL_STATE_H__

View File

@ -5,8 +5,77 @@
//
#include "sm_failover_ss.h"
#include <time.h>
#include "sm_debug.h"
#include "sm_limits.h"
#include "sm_node_utils.h"
#include "sm_node_api.h"
#include "sm_failover_utils.h"
#include "sm_node_utils.h"
#include "sm_node_api.h"
#include "sm_failover.h"
SmSystemFailoverStatus::SmSystemFailoverStatus()
{
SmErrorT error;
char host_name[SM_NODE_NAME_MAX_CHAR];
char peer_name[SM_NODE_NAME_MAX_CHAR];
error = sm_node_utils_get_hostname(host_name);
if( SM_OKAY != error )
{
DPRINTFE( "Failed to get hostname, error=%s.",
sm_error_str( error ) );
return;
}
error = sm_node_api_get_peername(peer_name);
if( SM_OKAY != error )
{
DPRINTFE( "Failed to get peername, error=%s.",
sm_error_str( error ) );
return;
}
_host_schedule_state = sm_get_controller_state(host_name);
_peer_schedule_state = sm_get_controller_state(peer_name);
}
SmSystemFailoverStatus::~SmSystemFailoverStatus()
{
}
bool SmSystemFailoverStatus::_is_valid_schedule_state(SmNodeScheduleStateT state)
{
return (SM_NODE_STATE_ACTIVE == state ||
SM_NODE_STATE_STANDBY == state ||
SM_NODE_STATE_FAILED == state);
}
void SmSystemFailoverStatus::set_host_schedule_state(SmNodeScheduleStateT state)
{
if(_is_valid_schedule_state(state))
{
if(_host_schedule_state != state)
{
_host_schedule_state=state;
}
}else
{
DPRINTFE("Runtime error, schedule state unknown %d", state);
}
}
void SmSystemFailoverStatus::set_peer_schedule_state(SmNodeScheduleStateT state)
{
if(_is_valid_schedule_state(state))
{
if(_peer_schedule_state != state)
{
_peer_schedule_state = state;
}
}else
{
DPRINTFE("Runtime error, schedule state unknown %d", state);
}
}
typedef enum
{
@ -38,15 +107,76 @@ static int get_node_if_healthy_score(unsigned int interface_state)
}
// ****************************************************************************
SmErrorT _get_system_status(SmSystemStatusT& sys_status, char host_name[], char peer_name[])
{
SmErrorT error;
SmNodeScheduleStateT host_state;
error = sm_node_utils_get_hostname(host_name);
if( SM_OKAY != error )
{
DPRINTFE( "Failed to get hostname, error=%s.",
sm_error_str( error ) );
return error;
}
error = sm_node_api_get_peername(peer_name);
if( SM_OKAY != error )
{
DPRINTFE( "Failed to get peername, error=%s.",
sm_error_str( error ) );
return error;
}
host_state = sm_get_controller_state(host_name);
sys_status.system_mode = sm_node_utils_get_system_mode();
sys_status.host_status.mgmt_state = sm_failover_get_interface_info(SM_INTERFACE_MGMT);
sys_status.host_status.infra_state = sm_failover_get_interface_info(SM_INTERFACE_INFRA);
sys_status.host_status.oam_state = sm_failover_get_interface_info(SM_INTERFACE_OAM);
if(SM_FAILOVER_INTERFACE_OK == sys_status.host_status.mgmt_state ||
SM_FAILOVER_INTERFACE_OK == sys_status.host_status.oam_state ||
SM_FAILOVER_INTERFACE_OK == sys_status.host_status.infra_state)
{
sys_status.heartbeat_state = SM_HEARTBEAT_OK;
}else
{
sys_status.heartbeat_state = SM_HEARTBEAT_LOSS;
}
sys_status.host_status.node_name = host_name;
sys_status.host_status.interface_state = sm_failover_if_state_get();
sys_status.host_status.current_schedule_state = host_state;
sys_status.peer_status.node_name = peer_name;
sys_status.peer_status.interface_state = sm_failover_get_peer_if_state();
sys_status.peer_status.current_schedule_state = sm_get_controller_state(peer_name);
return SM_OKAY;
}
// ****************************************************************************
// sm_failover_ss_get_survivor - select the failover survivor
// This is the main entry/container for the failover logic to determine how
// to schedule the controllers, i.e, active/standby or active/failure.
// ===================
SmErrorT sm_failover_ss_get_survivor(SmSystemFailoverStatus& selection)
{
SmSystemStatusT sys_status;
char host_name[SM_NODE_NAME_MAX_CHAR];
char peer_name[SM_NODE_NAME_MAX_CHAR];
SmErrorT error = _get_system_status(sys_status, host_name, peer_name);
if(SM_OKAY != error)
{
DPRINTFE("Failed to retrieve system status. Error %s", sm_error_str(error));
return error;
}
return sm_failover_ss_get_survivor(sys_status, selection);
}
SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection)
{
selection.host_schedule_state = system_status.host_status.current_schedule_state;
selection.peer_schedule_state = system_status.peer_status.current_schedule_state;
selection.set_host_schedule_state(system_status.host_status.current_schedule_state);
selection.set_peer_schedule_state(system_status.peer_status.current_schedule_state);
if(SM_HEARTBEAT_OK == system_status.heartbeat_state)
{
int host_healthy_score, peer_healthy_score;
@ -55,26 +185,26 @@ SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSys
if( peer_healthy_score < host_healthy_score )
{
//host is more healthy
selection.host_schedule_state = SM_NODE_STATE_ACTIVE;
selection.peer_schedule_state = SM_NODE_STATE_STANDBY;
selection.set_host_schedule_state(SM_NODE_STATE_ACTIVE);
selection.set_peer_schedule_state(SM_NODE_STATE_STANDBY);
}else if(peer_healthy_score > host_healthy_score)
{
//peer is more healthy
selection.host_schedule_state = SM_NODE_STATE_STANDBY;
selection.peer_schedule_state = SM_NODE_STATE_ACTIVE;
selection.set_host_schedule_state(SM_NODE_STATE_STANDBY);
selection.set_peer_schedule_state(SM_NODE_STATE_ACTIVE);
}
}
if(system_status.host_status.current_schedule_state != selection.host_schedule_state ||
system_status.peer_status.current_schedule_state != selection.peer_schedule_state )
if(system_status.host_status.current_schedule_state != selection.get_host_schedule_state() ||
system_status.peer_status.current_schedule_state != selection.get_peer_schedule_state() )
{
DPRINTFI("Uncontrolled swact starts. Host from %s to %s, Peer from %s to %s.",
sm_node_schedule_state_str(system_status.host_status.current_schedule_state),
sm_node_schedule_state_str(selection.host_schedule_state),
sm_node_schedule_state_str(selection.get_host_schedule_state()),
sm_node_schedule_state_str(system_status.peer_status.current_schedule_state),
sm_node_schedule_state_str(selection.peer_schedule_state)
sm_node_schedule_state_str(selection.get_peer_schedule_state())
);
}
return SM_OKAY;
}
// ****************************************************************************
// ****************************************************************************

View File

@ -8,14 +8,18 @@
#define __SM_FAILOVER_SS_H__
#include <stdio.h>
#include "sm_types.h"
#ifdef __cplusplus
extern "C" {
#endif
//#ifdef __cplusplus
//extern "C" {
//#endif
typedef struct
{
const char* node_name;
unsigned int interface_state;
unsigned int heartbeat_state;
SmFailoverInterfaceStateT mgmt_state;
SmFailoverInterfaceStateT infra_state;
SmFailoverInterfaceStateT oam_state;
SmNodeScheduleStateT current_schedule_state;
}SmNodeStatusT;
@ -40,18 +44,36 @@ typedef struct
}SmSystemStatusT;
typedef struct
class SmSystemFailoverStatus
{
SmNodeScheduleStateT host_schedule_state;
SmNodeScheduleStateT peer_schedule_state;
}SmSystemFailoverStatus;
public:
SmSystemFailoverStatus();
virtual ~SmSystemFailoverStatus();
inline SmNodeScheduleStateT get_host_schedule_state() const {
return _host_schedule_state;
}
void set_host_schedule_state(SmNodeScheduleStateT state);
inline SmNodeScheduleStateT get_peer_schedule_state() const {
return _peer_schedule_state;
}
void set_peer_schedule_state(SmNodeScheduleStateT state);
private:
SmNodeScheduleStateT _host_schedule_state;
SmNodeScheduleStateT _peer_schedule_state;
static const char filename[];
static const char file_format[];
bool _is_valid_schedule_state(SmNodeScheduleStateT state);
};
// ****************************************************************************
// sm_failover_ss_get_survivor - select the failover survivor
// ===================
SmErrorT sm_failover_ss_get_survivor(SmSystemFailoverStatus& selection);
SmErrorT sm_failover_ss_get_survivor(const SmSystemStatusT& system_status, SmSystemFailoverStatus& selection);
#ifdef __cplusplus
}
#endif
//#ifdef __cplusplus
//}
//#endif
#endif // __SM_FAILOVER_SS_H__

View File

@ -0,0 +1,62 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#include "sm_failover_survived_state.h"
#include <stdlib.h>
#include "sm_types.h"
#include "sm_debug.h"
#include "sm_failover_fsm.h"
#include "sm_failover_ss.h"
SmErrorT SmFailoverSurvivedState::enter_state()
{
DPRINTFI("Entering Survived state");
return SM_OKAY;
}
SmErrorT SmFailoverSurvivedState::event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data)
{
switch (event)
{
case SM_HEARTBEAT_LOST:
case SM_INTERFACE_DOWN:
case SM_INTERFACE_UP:
// ignore, recover is possible only when heartbeat is recovered
break;
case SM_HEARTBEAT_RECOVER:
{
SmSystemFailoverStatus failover_status;
SmErrorT error = sm_failover_ss_get_survivor(failover_status);
SmNodeScheduleStateT host_state = failover_status.get_host_schedule_state();
SmNodeScheduleStateT peer_state = failover_status.get_peer_schedule_state();
if(SM_OKAY != error)
{
DPRINTFE("Failed to get failover survivor. Error %s", sm_error_str(error));
return error;
}
if(SM_NODE_STATE_ACTIVE == host_state && SM_NODE_STATE_STANDBY == peer_state)
{
// Active is the only possible state to be scheduled to from survived state
this->fsm.set_state(SM_FAILOVER_STATE_NORMAL);
}else
{
DPRINTFE("Runtime error: unexpected scheduling state: host %s, peer %s",
sm_node_schedule_state_str(host_state),
sm_node_schedule_state_str(peer_state));
}
}
break;
default:
DPRINTFE("Runtime error, unexpected event %d, at state %d", event, this->fsm.get_state());
}
return SM_OKAY;
}
SmErrorT SmFailoverSurvivedState::exit_state()
{
DPRINTFI("Exiting Survived state");
return SM_OKAY;
}

View File

@ -0,0 +1,23 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#ifndef __SM_FAILOVER_SURVIVED_STATE_H__
#define __SM_FAILOVER_SURVIVED_STATE_H__
#include "sm_types.h"
#include "sm_failover_fsm.h"
class SmFailoverSurvivedState : public SmFSMState
{
public:
SmFailoverSurvivedState(SmFailoverFSM& fsm) : SmFSMState(fsm){}
SmErrorT enter_state();
SmErrorT exit_state();
protected:
SmErrorT event_handler(SmFailoverEventT event, const ISmFSMEventData* event_data);
};
#endif //__SM_FAILOVER_SURVIVED_STATE_H__

View File

@ -208,11 +208,7 @@ SmErrorT sm_heartbeat_msg_send_alive( SmNetworkTypeT network_type, char node_nam
heartbeat_msg.u.if_state_msg.msg_size = htons((uint16_t)sizeof(SmHeartbeatMsgAliveRev2T));
SmHeartbeatMsgIfStateT if_state;
SmErrorT error = sm_failover_if_state_get(&if_state);
if(SM_OKAY != error)
{
return SM_FAILED;
}
if_state = sm_failover_if_state_get();
heartbeat_msg.u.if_state_msg.if_state = htonl(if_state);
if( SM_AUTH_TYPE_HMAC_SHA512 == auth_type )

View File

@ -58,6 +58,36 @@ SmErrorT sm_node_api_get_hostname( char node_name[] )
}
// ****************************************************************************
// ****************************************************************************
// Node API - Get Peer Name
// ========================
SmErrorT sm_node_api_get_peername(char peer_name[SM_NODE_NAME_MAX_CHAR])
{
char node_name[SM_NODE_NAME_MAX_CHAR];
SmErrorT error = sm_node_api_get_hostname(node_name);
if(SM_OKAY != error)
{
return error;
}
char format[] = "name <> '%s'";
char query[SM_NODE_NAME_MAX_CHAR + sizeof(format)];
unsigned int cnt = snprintf(query, sizeof(query), format, node_name);
if(cnt < sizeof(query) - 1)
{
SmDbNodeT node;
error = sm_db_nodes_query(_sm_db_handle, query, &node);
if(SM_OKAY != error)
{
return error;
}
strncpy(peer_name, node.name, SM_NODE_NAME_MAX_CHAR);
return SM_OKAY;
}
return SM_FAILED;
}
// ****************************************************************************
// ****************************************************************************
// Node API - Configuration Complete
// =================================

View File

@ -10,6 +10,7 @@
#include "sm_types.h"
#include "sm_uuid.h"
#include "sm_limits.h"
#ifdef __cplusplus
extern "C" {
@ -21,6 +22,12 @@ extern "C" {
extern SmErrorT sm_node_api_get_hostname( char node_name[] );
// ****************************************************************************
// ****************************************************************************
// Node API - Get Peer Name
// ========================
extern SmErrorT sm_node_api_get_peername(char peer_name[SM_NODE_NAME_MAX_CHAR]);
// ****************************************************************************
// ****************************************************************************
// Node API - Configuration Complete
// =================================

View File

@ -706,6 +706,16 @@ typedef enum
SM_NODE_STATE_MAX
}SmNodeScheduleStateT;
typedef enum
{
SM_FAILOVER_INTERFACE_UNKNOWN,
SM_FAILOVER_INTERFACE_OK,
SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT,
SM_FAILOVER_INTERFACE_DOWN,
SM_FAILOVER_INTERFACE_RECOVERING
}SmFailoverInterfaceStateT;
// ****************************************************************************
// Types - System Mode String
// =======================================