Disable admin network failover behaviour

A requirement for a subcloud's admin network is that its
subnet information be able to be updated without host
lock / unlock.

Accordingly, the service domain interface and admin-ip
service in SM must be provisioned / deprovisioned at
runtime.

In an AIO-DX system this can cause issues in certain
circumstances as the disablement / enablement must be
done via puppet and can be affected by the ordering a
user performs each action as well as the timing of the
currently running manifests on each host.

This commit disables the failover behaviour for the admin
network, as link flapping and heartbeat losses are expected
as the service domain interface is provisioned/deprovisioned.

Also in this commit is the disablement of heartbeat messages
on service domain interface de-provision to prevent log
spamming, as well as a couple other minor issues that were
found while testing.

Depends-On: https://review.opendev.org/c/starlingx/stx-puppet/+/889872

Test plan:

- No uncontrolled swacts while re-configuring admin subnets
  or reverting to the management subnet (deleting the admin
  address pool) dozens of times.

- Alarms still generated on interface down / heartbeat loss

- Switching back and forth between admin network / mgmt
  network via dcmanager.

Story: 2010319
Task: 47707

Change-Id: I761b5b20b6de198ef763b2d3480e6f7cd380f952
Signed-off-by: Steven Webster <steven.webster@windriver.com>
This commit is contained in:
Steven Webster 2023-07-27 09:43:56 -04:00
parent ac8f60b120
commit 4a96509146
4 changed files with 52 additions and 22 deletions

View File

@ -174,16 +174,6 @@ static bool sm_failover_failed_recovery_criteria_met( void )
{
criteria_met = true ;
}
if ( criteria_met && is_admin_interface_configured() )
{
criteria_met = false ;
admin_state = sm_failover_get_interface_info(SM_INTERFACE_ADMIN);
if (( admin_state == SM_FAILOVER_INTERFACE_OK ) || ( admin_state == SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT ))
{
criteria_met = true;
}
}
}
DPRINTFI("Oam:%s ; Mgmt:%s ; Cluster:%s ; Admin:%s recovery criteria met: %s",

View File

@ -25,15 +25,13 @@ SmErrorT SmFailoverNormalState::event_handler(SmFailoverEventT event, const ISmF
{
const SmIFStateChangedEventData* data = (const SmIFStateChangedEventData*) event_data;
SmFailoverInterfaceStateT oam_state, mgmt_state, cluster_host_state, admin_state;
SmFailoverInterfaceStateT oam_state, mgmt_state, cluster_host_state;
oam_state = data->get_interface_state(SM_INTERFACE_OAM);
mgmt_state = data->get_interface_state(SM_INTERFACE_MGMT);
cluster_host_state = data->get_interface_state(SM_INTERFACE_CLUSTER_HOST);
admin_state = data->get_interface_state(SM_INTERFACE_ADMIN);
if(oam_state != SM_FAILOVER_INTERFACE_OK ||
mgmt_state != SM_FAILOVER_INTERFACE_OK ||
(cluster_host_state != SM_FAILOVER_INTERFACE_OK && cluster_host_state != SM_FAILOVER_INTERFACE_UNKNOWN) ||
(admin_state != SM_FAILOVER_INTERFACE_OK && admin_state != SM_FAILOVER_INTERFACE_UNKNOWN))
(cluster_host_state != SM_FAILOVER_INTERFACE_OK && cluster_host_state != SM_FAILOVER_INTERFACE_UNKNOWN))
{
this->fsm.set_state(SM_FAILOVER_STATE_FAIL_PENDING);
}

View File

@ -763,7 +763,8 @@ static int sm_send_msg(SmServiceDomainInterfaceT* interface, SmMsgT* msg )
dst_addr4.sin_family = AF_INET;
dst_addr4.sin_port = htons(interface->network_port);
if ( SM_INTERFACE_OAM != interface->interface_type )
if (( SM_INTERFACE_OAM != interface->interface_type ) &&
( SM_INTERFACE_ADMIN != interface->interface_type ))
{
ipv4_dst = &(interface->network_multicast.u.ipv4);
}
@ -795,7 +796,8 @@ static int sm_send_ipv6_msg(SmServiceDomainInterfaceT* interface, SmMsgT* msg )
dst_addr6.sin6_family = AF_INET6;
dst_addr6.sin6_port = htons(interface->network_port);
if ( SM_INTERFACE_OAM != interface->interface_type )
if (( SM_INTERFACE_OAM != interface->interface_type ) &&
( SM_INTERFACE_ADMIN != interface->interface_type ))
{
ipv6_dst = &(interface->network_multicast.u.ipv6);
}else

View File

@ -16,6 +16,7 @@
#include "sm_node_fsm.h"
#include "sm_service_domain_interface_table.h"
#include "sm_service_domain_interface_fsm.h"
#include "sm_heartbeat.h"
#include "sm_log.h"
static void sm_service_domain_interface_api_send_event( void* user_data[],
@ -250,17 +251,37 @@ SmErrorT sm_service_domain_interface_api_node_disabled( void )
// ****************************************************************************
// Service Domain Interface API - Interface Provisioned
// ============================================
SmErrorT sm_service_domain_interface_api_provisioned( SmServiceDomainInterfaceT* interface )
SmErrorT sm_service_domain_interface_api_provisioned(
SmServiceDomainInterfaceT* interface )
{
char reason_text[SM_LOG_REASON_TEXT_MAX_CHAR];
SmServiceDomainInterfaceEventT event;
void* user_data[] = { &event, reason_text };
SmErrorT error;
error = sm_heartbeat_delete_interface( interface );
if( SM_OKAY != error )
{
DPRINTFE( "Failed to delete interface from heartbeat thread: %s",
sm_error_str( error ) );
return( error );
}
sm_service_domain_interface_api_get_hw_interface(interface);
error = sm_heartbeat_add_interface( interface );
if( SM_OKAY != error )
{
DPRINTFE( "Failed to add messaging interface for service domain (%s), "
"error=%s.", interface->service_domain,
sm_error_str( error ) );
return( error );
}
/* The following will restart heartbeat services with the updated networking config via the FSM */
event = SM_SERVICE_DOMAIN_INTERFACE_EVENT_UNKNOWN;
snprintf( reason_text, sizeof(reason_text), "%s interface is enabled",
interface->service_domain_interface );
sm_service_domain_interface_api_send_event( user_data, interface );
return( SM_OKAY );
@ -270,19 +291,38 @@ SmErrorT sm_service_domain_interface_api_provisioned( SmServiceDomainInterfaceT*
// ****************************************************************************
// Service Domain Interface API - Interface Deprovisioned
// ============================================
SmErrorT sm_service_domain_interface_api_deprovisioned( SmServiceDomainInterfaceT* interface )
SmErrorT sm_service_domain_interface_api_deprovisioned(
SmServiceDomainInterfaceT* interface )
{
SmErrorT error;
char reason_text[SM_LOG_REASON_TEXT_MAX_CHAR];
SmServiceDomainInterfaceEventT event;
void* user_data[] = { &event, reason_text };
event = SM_SERVICE_DOMAIN_INTERFACE_EVENT_NOT_IN_USE;
snprintf( reason_text, sizeof(reason_text), "%s interface is disabled",
interface->service_domain_interface );
sm_service_domain_interface_api_send_event( user_data, interface );
error = sm_heartbeat_delete_interface( interface );
if( SM_OKAY != error )
{
DPRINTFE( "Failed to delete interface from heartbeat thread: %s",
sm_error_str( error ) );
return( error );
}
error = sm_heartbeat_delete_peer_interface( interface->interface_name,
&(interface->network_peer_address),
interface->network_heartbeat_port );
if( SM_OKAY != error )
{
DPRINTFE( "Failed to delete peer messaging interface for service "
"domain (%s), error=%s.", interface->service_domain,
sm_error_str( error ) );
return( error );
}
return( SM_OKAY );
}
// ****************************************************************************