From 4a9650914639d02d45646ecf98a238121e5596c5 Mon Sep 17 00:00:00 2001 From: Steven Webster Date: Thu, 27 Jul 2023 09:43:56 -0400 Subject: [PATCH] Disable admin network failover behaviour A requirement for a subcloud's admin network is that its subnet information be able to be updated without host lock / unlock. Accordingly, the service domain interface and admin-ip service in SM must be provisioned / deprovisioned at runtime. In an AIO-DX system this can cause issues in certain circumstances as the disablement / enablement must be done via puppet and can be affected by the ordering a user performs each action as well as the timing of the currently running manifests on each host. This commit disables the failover behaviour for the admin network, as link flapping and heartbeat losses are expected as the service domain interface is provisioned/deprovisioned. Also in this commit is the disablement of heartbeat messages on service domain interface de-provision to prevent log spamming, as well as a couple other minor issues that were found while testing. Depends-On: https://review.opendev.org/c/starlingx/stx-puppet/+/889872 Test plan: - No uncontrolled swacts while re-configuring admin subnets or reverting to the management subnet (deleting the admin address pool) dozens of times. - Alarms still generated on interface down / heartbeat loss - Switching back and forth between admin network / mgmt network via dcmanager. Story: 2010319 Task: 47707 Change-Id: I761b5b20b6de198ef763b2d3480e6f7cd380f952 Signed-off-by: Steven Webster --- .../sm/src/sm_failover_failed_state.cpp | 10 ---- .../sm/src/sm_failover_normal_state.cpp | 6 +-- service-mgmt/sm/src/sm_msg.c | 6 ++- .../sm/src/sm_service_domain_interface_api.c | 52 ++++++++++++++++--- 4 files changed, 52 insertions(+), 22 deletions(-) diff --git a/service-mgmt/sm/src/sm_failover_failed_state.cpp b/service-mgmt/sm/src/sm_failover_failed_state.cpp index dc3cb3d3..53892202 100644 --- a/service-mgmt/sm/src/sm_failover_failed_state.cpp +++ b/service-mgmt/sm/src/sm_failover_failed_state.cpp @@ -174,16 +174,6 @@ static bool sm_failover_failed_recovery_criteria_met( void ) { criteria_met = true ; } - - if ( criteria_met && is_admin_interface_configured() ) - { - criteria_met = false ; - admin_state = sm_failover_get_interface_info(SM_INTERFACE_ADMIN); - if (( admin_state == SM_FAILOVER_INTERFACE_OK ) || ( admin_state == SM_FAILOVER_INTERFACE_MISSING_HEARTBEAT )) - { - criteria_met = true; - } - } } DPRINTFI("Oam:%s ; Mgmt:%s ; Cluster:%s ; Admin:%s recovery criteria met: %s", diff --git a/service-mgmt/sm/src/sm_failover_normal_state.cpp b/service-mgmt/sm/src/sm_failover_normal_state.cpp index f6a98fdd..efa8587c 100644 --- a/service-mgmt/sm/src/sm_failover_normal_state.cpp +++ b/service-mgmt/sm/src/sm_failover_normal_state.cpp @@ -25,15 +25,13 @@ SmErrorT SmFailoverNormalState::event_handler(SmFailoverEventT event, const ISmF { const SmIFStateChangedEventData* data = (const SmIFStateChangedEventData*) event_data; - SmFailoverInterfaceStateT oam_state, mgmt_state, cluster_host_state, admin_state; + SmFailoverInterfaceStateT oam_state, mgmt_state, cluster_host_state; oam_state = data->get_interface_state(SM_INTERFACE_OAM); mgmt_state = data->get_interface_state(SM_INTERFACE_MGMT); cluster_host_state = data->get_interface_state(SM_INTERFACE_CLUSTER_HOST); - admin_state = data->get_interface_state(SM_INTERFACE_ADMIN); if(oam_state != SM_FAILOVER_INTERFACE_OK || mgmt_state != SM_FAILOVER_INTERFACE_OK || - (cluster_host_state != SM_FAILOVER_INTERFACE_OK && cluster_host_state != SM_FAILOVER_INTERFACE_UNKNOWN) || - (admin_state != SM_FAILOVER_INTERFACE_OK && admin_state != SM_FAILOVER_INTERFACE_UNKNOWN)) + (cluster_host_state != SM_FAILOVER_INTERFACE_OK && cluster_host_state != SM_FAILOVER_INTERFACE_UNKNOWN)) { this->fsm.set_state(SM_FAILOVER_STATE_FAIL_PENDING); } diff --git a/service-mgmt/sm/src/sm_msg.c b/service-mgmt/sm/src/sm_msg.c index e2c198e4..0258aa1b 100644 --- a/service-mgmt/sm/src/sm_msg.c +++ b/service-mgmt/sm/src/sm_msg.c @@ -763,7 +763,8 @@ static int sm_send_msg(SmServiceDomainInterfaceT* interface, SmMsgT* msg ) dst_addr4.sin_family = AF_INET; dst_addr4.sin_port = htons(interface->network_port); - if ( SM_INTERFACE_OAM != interface->interface_type ) + if (( SM_INTERFACE_OAM != interface->interface_type ) && + ( SM_INTERFACE_ADMIN != interface->interface_type )) { ipv4_dst = &(interface->network_multicast.u.ipv4); } @@ -795,7 +796,8 @@ static int sm_send_ipv6_msg(SmServiceDomainInterfaceT* interface, SmMsgT* msg ) dst_addr6.sin6_family = AF_INET6; dst_addr6.sin6_port = htons(interface->network_port); - if ( SM_INTERFACE_OAM != interface->interface_type ) + if (( SM_INTERFACE_OAM != interface->interface_type ) && + ( SM_INTERFACE_ADMIN != interface->interface_type )) { ipv6_dst = &(interface->network_multicast.u.ipv6); }else diff --git a/service-mgmt/sm/src/sm_service_domain_interface_api.c b/service-mgmt/sm/src/sm_service_domain_interface_api.c index e4cb7fb9..15e34eb7 100644 --- a/service-mgmt/sm/src/sm_service_domain_interface_api.c +++ b/service-mgmt/sm/src/sm_service_domain_interface_api.c @@ -16,6 +16,7 @@ #include "sm_node_fsm.h" #include "sm_service_domain_interface_table.h" #include "sm_service_domain_interface_fsm.h" +#include "sm_heartbeat.h" #include "sm_log.h" static void sm_service_domain_interface_api_send_event( void* user_data[], @@ -250,17 +251,37 @@ SmErrorT sm_service_domain_interface_api_node_disabled( void ) // **************************************************************************** // Service Domain Interface API - Interface Provisioned // ============================================ -SmErrorT sm_service_domain_interface_api_provisioned( SmServiceDomainInterfaceT* interface ) +SmErrorT sm_service_domain_interface_api_provisioned( + SmServiceDomainInterfaceT* interface ) { char reason_text[SM_LOG_REASON_TEXT_MAX_CHAR]; SmServiceDomainInterfaceEventT event; void* user_data[] = { &event, reason_text }; + SmErrorT error; + error = sm_heartbeat_delete_interface( interface ); + if( SM_OKAY != error ) + { + DPRINTFE( "Failed to delete interface from heartbeat thread: %s", + sm_error_str( error ) ); + return( error ); + } + + sm_service_domain_interface_api_get_hw_interface(interface); + + error = sm_heartbeat_add_interface( interface ); + if( SM_OKAY != error ) + { + DPRINTFE( "Failed to add messaging interface for service domain (%s), " + "error=%s.", interface->service_domain, + sm_error_str( error ) ); + return( error ); + } + + /* The following will restart heartbeat services with the updated networking config via the FSM */ event = SM_SERVICE_DOMAIN_INTERFACE_EVENT_UNKNOWN; - snprintf( reason_text, sizeof(reason_text), "%s interface is enabled", interface->service_domain_interface ); - sm_service_domain_interface_api_send_event( user_data, interface ); return( SM_OKAY ); @@ -270,19 +291,38 @@ SmErrorT sm_service_domain_interface_api_provisioned( SmServiceDomainInterfaceT* // **************************************************************************** // Service Domain Interface API - Interface Deprovisioned // ============================================ -SmErrorT sm_service_domain_interface_api_deprovisioned( SmServiceDomainInterfaceT* interface ) +SmErrorT sm_service_domain_interface_api_deprovisioned( + SmServiceDomainInterfaceT* interface ) { + SmErrorT error; char reason_text[SM_LOG_REASON_TEXT_MAX_CHAR]; SmServiceDomainInterfaceEventT event; void* user_data[] = { &event, reason_text }; event = SM_SERVICE_DOMAIN_INTERFACE_EVENT_NOT_IN_USE; - snprintf( reason_text, sizeof(reason_text), "%s interface is disabled", interface->service_domain_interface ); - sm_service_domain_interface_api_send_event( user_data, interface ); + error = sm_heartbeat_delete_interface( interface ); + if( SM_OKAY != error ) + { + DPRINTFE( "Failed to delete interface from heartbeat thread: %s", + sm_error_str( error ) ); + return( error ); + } + + error = sm_heartbeat_delete_peer_interface( interface->interface_name, + &(interface->network_peer_address), + interface->network_heartbeat_port ); + if( SM_OKAY != error ) + { + DPRINTFE( "Failed to delete peer messaging interface for service " + "domain (%s), error=%s.", interface->service_domain, + sm_error_str( error ) ); + return( error ); + } + return( SM_OKAY ); } // ****************************************************************************