Bug1845393 remove interface recovering state

In the case of a switch recycle, the connected nic will go down and up
but the communication will restore after the switch is up and running.
This could take a few seconds (much longer than anticipated).

This holds off the i/f state update to the peer.

Also remove the batching interface failover state change. This is already
handled in the failover fsm fail_pending state.

Change-Id: Ia810927dbbc4b3821f7915e6a42bceeac43d9e46
Closes-Bug: 1845393
Signed-off-by: Bin Qian <bin.qian@windriver.com>
This commit is contained in:
Bin Qian 2019-10-02 13:03:09 -04:00
parent c8735e882a
commit fc0828238f
2 changed files with 7 additions and 65 deletions

View File

@ -730,8 +730,7 @@ typedef enum
SM_FAILOVER_INTERFACE_UNKNOWN,
SM_FAILOVER_INTERFACE_OK,
SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT,
SM_FAILOVER_INTERFACE_DOWN,
SM_FAILOVER_INTERFACE_RECOVERING
SM_FAILOVER_INTERFACE_DOWN
}SmFailoverInterfaceStateT;
// ****************************************************************************

View File

@ -145,7 +145,6 @@ static SmNodeScheduleStateT _prev_host_state= SM_NODE_STATE_UNKNOWN;
static bool _hello_msg_alive = true;
static SmSystemModeT _system_mode;
static time_t _last_report_ts = 0;
static int _heartbeat_count = 0;
@ -394,7 +393,9 @@ void sm_failover_interface_up( const char* const interface_name )
DPRINTFI("Interface %s is up", interface_name);
}
impacted ++;
if(iter->set_state(SM_FAILOVER_INTERFACE_RECOVERING))
SmFailoverInterfaceStateT state = iter->get_state();
if(SM_FAILOVER_INTERFACE_OK != state &&
iter->set_state(SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT))
{
DPRINTFI("Domain interface %s is recovering, as i/f %s is now up.",
interface->service_domain_interface, interface_name);
@ -561,31 +562,6 @@ bool sm_is_active_controller()
}
// ****************************************************************************
// ****************************************************************************
// Failover - interface is in transit state
// ==================
bool sm_failover_if_transit_state(SmFailoverInterfaceInfo* if_info)
{
SmFailoverInterfaceStateT if_state = if_info->get_state();
if( SM_FAILOVER_INTERFACE_RECOVERING == if_state )
{
const SmServiceDomainInterfaceT* interface = if_info->get_interface();
if ( if_info->state_in_transition() )
{
DPRINTFI( "If %s is reconvering, wait for either trun OK or missing heartbeat",
interface->service_domain_interface);
return true;
}else
{
if_info->set_state(SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT);
DPRINTFI( "If %s missing heartbeat", interface->service_domain_interface);
return false;
}
}
return false;
}
// ****************************************************************************
// ****************************************************************************
// Failover - swact controller
// ==================
@ -939,23 +915,6 @@ void sm_failover_audit()
_prev_host_state = _host_state;
}
bool in_transition = false;
in_transition = in_transition ||
sm_failover_if_transit_state(_mgmt_interface_info);
in_transition = in_transition ||
sm_failover_if_transit_state(_oam_interface_info);
if( is_cluster_host_interface_configured() )
{
in_transition = in_transition ||
sm_failover_if_transit_state(_cluster_host_interface_info);
}
if(in_transition)
{
//if state in transition, wait for next audit
return;
}
int if_state_flag = sm_failover_get_if_state();
if(if_state_flag & SM_FAILOVER_HEARTBEAT_ALIVE)
{
@ -967,25 +926,10 @@ void sm_failover_audit()
}
if( _prev_if_state_flag != if_state_flag)
{
_last_report_ts = now_ms;
DPRINTFI("Interface state flag %d", if_state_flag);
if( SM_FAILOVER_MULTI_FAILURE_WAIT_TIMER_IN_MS > now_ms - _last_if_state_ms )
{
DPRINTFD("interface state just changed. wait %d ms for concurrent changes",
SM_FAILOVER_MULTI_FAILURE_WAIT_TIMER_IN_MS);
return;
}else
{
_last_if_state_ms = now_ms;
_prev_if_state_flag = if_state_flag;
}
}
if(now_ms - _last_report_ts > SM_FAILOVER_INTERFACE_STATE_REPORT_INTERVAL_MS)
{
_last_report_ts = now_ms;
DPRINTFD("Interface state flag %d", if_state_flag);
_last_if_state_ms = now_ms;
_prev_if_state_flag = if_state_flag;
}
if(!peer_controller_enabled())
@ -1439,8 +1383,7 @@ static KeyStringMapT if_state_map[] = {
{SM_FAILOVER_INTERFACE_UNKNOWN, "Unknown"},
{SM_NODE_STATE_ACTIVE, "Active"},
{SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT, "Missing heartbeat"},
{SM_FAILOVER_INTERFACE_DOWN, "Down"},
{SM_FAILOVER_INTERFACE_RECOVERING, "Recovering" }
{SM_FAILOVER_INTERFACE_DOWN, "Down"}
};
void dump_if_state(FILE* fp, SmFailoverInterfaceInfo* interface, const char* if_name )