remove incorrect logging when standby controller failed

Add condition for the logging so to log only when the active controller
failure which triggers a uncontrollered swact.
The following changes are made:
1. move get_controller_state to a new sm_failover_utils.c and renamed it
   to sm_get_controller_state.
2. use the above function to check ensure to log only when the controller
   schedulering state is changing (swact).

Closes-Bug: 1788697

Change-Id: I145b579c2d31e8c9e184894774d3a1c06c9149d7
Signed-off-by: Bin Qian <bin.qian@windriver.com>
This commit is contained in:
Bin Qian 2018-08-23 14:06:48 -04:00
parent 677f0fb630
commit 53a055cb3a
7 changed files with 135 additions and 80 deletions

View File

@ -108,6 +108,7 @@ SRCS+=sm_task_affining_thread.c
SRCS+=sm_node_swact_monitor.cpp
SRCS+=sm_service_domain_interface_not_in_use_state.c
SRCS+=sm_configuration_table.c
SRCS+=sm_failover_utils.c
OBJS= $(SRCS:.c=.o)
CCFLAGS= -g -O2 -Wall -Werror -Wformat -std=c++11

View File

@ -32,6 +32,7 @@
#include "sm_heartbeat_msg.h"
#include "sm_node_swact_monitor.h"
#include "sm_util_types.h"
#include "sm_failover_utils.h"
typedef enum
{
@ -658,84 +659,12 @@ SmErrorT sm_failover_if_state_get(SmHeartbeatMsgIfStateT *if_state)
}
// ****************************************************************************
// ****************************************************************************
// Failover - get interface state
// ==================
void service_domain_member_foreach_cb(void* user_data[], SmServiceDomainMemberT* member)
{
if( 0 == strcmp(member->service_group_aggregate, "controller-aggregate"))
{
SmServiceDomainAssignmentT* assignment = sm_service_domain_assignment_table_read(
member->name,
_host_name,
member->service_group_name
);
bool* is_active = (bool*) user_data[0];
bool* is_standby = (bool*) user_data[1];
bool* is_init = (bool*) user_data[2];
bool* is_failed = (bool*) user_data[3];
if( NULL == assignment )
{
*is_init = true;
DPRINTFD("Waiting for service assignments being scheduled.");
return;
}
if( SM_SERVICE_GROUP_STATE_ACTIVE == assignment->desired_state )
{
*is_active = true;
}else if (SM_SERVICE_GROUP_STATE_STANDBY == assignment->desired_state )
{
*is_standby = true;
}else if ( SM_SERVICE_GROUP_STATE_DISABLED == assignment->desired_state)
{
*is_failed = true;
}else if ( SM_SERVICE_GROUP_STATE_INITIAL == assignment->desired_state )
{
*is_init = true;
}
}
}
// ****************************************************************************
// ****************************************************************************
// Failover - callback for service domain table loop
// ==================
static void service_domain_table_each_callback(void* user_data[], SmServiceDomainT* domain)
{
sm_service_domain_member_table_foreach(
domain->name,
user_data,
service_domain_member_foreach_cb);
}
// ****************************************************************************
// ****************************************************************************
// Failover - get controller state
// ==================
SmNodeScheduleStateT get_controller_state()
{
SmNodeScheduleStateT state = SM_NODE_STATE_UNKNOWN;
bool is_active = false;
bool is_standby = false;
bool is_init = false;
bool is_failed = false;
void* user_data[] = {(void*) &is_active, (void*) &is_standby, (void*) &is_init, (void*) &is_failed};
sm_service_domain_table_foreach( user_data, service_domain_table_each_callback);
if( is_init )
{
state = SM_NODE_STATE_INIT;
}
else if ( is_standby )
{
state = SM_NODE_STATE_STANDBY;
}
else if ( is_active )
{
state = SM_NODE_STATE_ACTIVE;
}
return state;
return sm_get_controller_state(_host_name);
}
// ****************************************************************************

View File

@ -133,8 +133,6 @@ extern SmErrorT sm_failover_initialize( void );
extern SmErrorT sm_failover_finalize( void );
// ****************************************************************************
SmNodeScheduleStateT get_controller_state();
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,92 @@
#include "sm_failover_utils.h"
#include <stdlib.h>
#include <string.h>
#include "sm_debug.h"
#include "sm_service_domain_member_table.h"
#include "sm_service_domain_assignment_table.h"
#include "sm_service_domain_table.h"
// ****************************************************************************
// Failover utilities - loop all service domain members
// ==================
static void service_domain_member_foreach_cb(void* user_data[], SmServiceDomainMemberT* member)
{
char* node_name = (char*)user_data[4];
if( 0 == strcmp(member->service_group_aggregate, "controller-aggregate"))
{
SmServiceDomainAssignmentT* assignment = sm_service_domain_assignment_table_read(
member->name,
node_name,
member->service_group_name
);
bool* is_active = (bool*) user_data[0];
bool* is_standby = (bool*) user_data[1];
bool* is_init = (bool*) user_data[2];
bool* is_failed = (bool*) user_data[3];
if( NULL == assignment )
{
*is_init = true;
DPRINTFD("Waiting for service assignments to be scheduled.");
return;
}
if( SM_SERVICE_GROUP_STATE_ACTIVE == assignment->desired_state )
{
*is_active = true;
}else if (SM_SERVICE_GROUP_STATE_STANDBY == assignment->desired_state )
{
*is_standby = true;
}else if ( SM_SERVICE_GROUP_STATE_DISABLED == assignment->desired_state)
{
*is_failed = true;
}else if ( SM_SERVICE_GROUP_STATE_INITIAL == assignment->desired_state )
{
*is_init = true;
}
}
}
// ****************************************************************************
// ****************************************************************************
// Failover utilities - callback for service domain table loop
// ==================
static void service_domain_table_each_callback(void* user_data[], SmServiceDomainT* domain)
{
sm_service_domain_member_table_foreach(
domain->name,
user_data,
service_domain_member_foreach_cb);
}
// ****************************************************************************
// ****************************************************************************
// Failover utilities - get controller state
// ==================
SmNodeScheduleStateT sm_get_controller_state(
const char node_name[])
{
SmNodeScheduleStateT state = SM_NODE_STATE_UNKNOWN;
bool is_active = false;
bool is_standby = false;
bool is_init = false;
bool is_failed = false;
void* user_data[] = {(void*) &is_active, (void*) &is_standby,
(void*) &is_init, (void*) &is_failed, (void*)node_name};
sm_service_domain_table_foreach( user_data, service_domain_table_each_callback);
if( is_init )
{
state = SM_NODE_STATE_INIT;
}
else if ( is_standby )
{
state = SM_NODE_STATE_STANDBY;
}
else if ( is_active )
{
state = SM_NODE_STATE_ACTIVE;
}
return state;
}
// ****************************************************************************

View File

@ -0,0 +1,25 @@
//
// Copyright (c) 2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
#ifndef __SM_FAILOVER_UTILS_H__
#define __SM_FAILOVER_UTILS_H__
#include "sm_types.h"
#ifdef __cplusplus
extern "C" {
#endif
// ****************************************************************************
// Failover Utilities - get node controller state
// ==============================
SmNodeScheduleStateT sm_get_controller_state(const char node_name[]);
// ****************************************************************************
#ifdef __cplusplus
}
#endif
#endif // __SM_FAILOVER_UTILS_H__

View File

@ -22,6 +22,7 @@
#include "sm_node_api.h"
#include "sm_node_swact_monitor.h"
#include "sm_node_utils.h"
#include "sm_failover_utils.h"
static SmDbHandleT* _sm_db_handle = NULL;
@ -321,14 +322,21 @@ static void sm_service_domain_filter_by_assignment( void* user_data[],
DPRINTFE( "Failed to get hostname, error=%s.",
sm_error_str( error ) );
hostname[0] = '\0';
return error;
}
DPRINTFI("Uncontrolled swact start");
SmNodeScheduleStateT current_schedule_state = sm_get_controller_state( hostname );
SmNodeScheduleStateT to_schedule_state;
if(0 == strcmp(hostname, assignment->node_name))
{
SmNodeSwactMonitor::SwactStart(SM_NODE_STATE_STANDBY);
to_schedule_state = SM_NODE_STATE_STANDBY;
}else
{
SmNodeSwactMonitor::SwactStart(SM_NODE_STATE_ACTIVE);
to_schedule_state = SM_NODE_STATE_ACTIVE;
}
if(current_schedule_state != to_schedule_state)
{
DPRINTFI("Uncontrolled swact start");
SmNodeSwactMonitor::SwactStart(to_schedule_state);
}
list = SM_SERVICE_DOMAIN_SCHEDULING_LIST_FAILED;

View File

@ -35,7 +35,7 @@
#include "sm_log.h"
#include "sm_node_utils.h"
#include "sm_node_swact_monitor.h"
#include "sm_failover.h"
#include "sm_failover_utils.h"
#include "sm_swact_state.h"
static SmListT* _callbacks = NULL;
@ -265,7 +265,6 @@ static SmErrorT sm_service_group_fsm_enter_state( SmServiceGroupT* service_group
sm_service_group_table_foreach( user_data, sm_service_group_state_check );
if( all_good )
{
SmNodeScheduleStateT controller_state = get_controller_state();
char hostname[SM_NODE_NAME_MAX_CHAR];
error = sm_node_utils_get_hostname( hostname );
if( SM_OKAY != error )
@ -273,7 +272,10 @@ static SmErrorT sm_service_group_fsm_enter_state( SmServiceGroupT* service_group
DPRINTFE( "Failed to get hostname, error=%s.",
sm_error_str( error ) );
hostname[0] = '\0';
return error;
}
SmNodeScheduleStateT controller_state = sm_get_controller_state(hostname);
if( SM_NODE_STATE_ACTIVE == controller_state )
{
SmNodeSwactMonitor::SwactUpdate(hostname, SM_NODE_STATE_ACTIVE );