Merge "Fix MNFA recovery race condition that leads to stuck degrade"

This commit is contained in:
Zuul 2019-10-07 14:43:27 +00:00 committed by Gerrit Code Review
commit 523983cd6f
2 changed files with 6 additions and 19 deletions

View File

@ -7516,16 +7516,12 @@ int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool sen
node_ptr = nodeLinkClass::getNode ( hostname );
if ( node_ptr != NULL )
{
bool want_log = true ;
for ( int iface = 0 ; iface < MAX_IFACES ; iface++ )
{
if ( iface == CLSTR_IFACE )
{
if ( this->clstr_network_provisioned == false )
continue ;
if ( node_ptr->monitor[MGMNT_IFACE] == true_false )
want_log = false ;
}
if ( send_clear == true )
@ -7536,11 +7532,7 @@ int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool sen
if ( true_false == true )
{
if ( want_log )
{
ilog ("%s starting heartbeat service \n",
hostname.c_str());
}
ilog ("%s heartbeat start", hostname.c_str());
node_ptr->no_work_log_throttle = 0 ;
node_ptr->b2b_misses_count[iface] = 0 ;
node_ptr->hbs_misses_count[iface] = 0 ;
@ -7552,11 +7544,7 @@ int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool sen
}
else
{
if ( want_log )
{
ilog ("%s stopping heartbeat service\n",
hostname.c_str());
}
ilog ("%s heartbeat stop", hostname.c_str());
}
node_ptr->monitor[iface] = true_false ;
}

View File

@ -1963,11 +1963,11 @@ void daemon_service_run ( void )
hbsInv.mon_host ( hostname, true, true );
}
}
else if ( msg.cmd == MTC_RESTART_HBS )
else if (( msg.cmd == MTC_RESTART_HBS ) &&
( hostname != hbsInv.my_hostname ))
{
hbsInv.mon_host ( hostname, false, false );
hbsInv.mon_host ( hostname, true, false );
ilog ("%s restarting heartbeat service\n", hostname.c_str());
hbsInv.mon_host ( hostname, true, true );
ilog ("%s heartbeat restart", hostname.c_str());
hbsInv.print_node_info();
}
else if ( msg.cmd == MTC_RECOVER_HBS )
@ -1978,7 +1978,6 @@ void daemon_service_run ( void )
}
else if ( msg.cmd == MTC_BACKOFF_HBS )
{
hbsInv.hbs_pulse_period = (hbsInv.hbs_pulse_period_save * HBS_BACKOFF_FACTOR) ;
ilog ("%s starting heartbeat backoff (period:%d msecs)\n", hostname.c_str(), hbsInv.hbs_pulse_period );
hbs_cluster_change ( "backoff" );