From 91fa44188cd6fa24645e958550cf3c9c0ca3e654 Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Fri, 2 Feb 2024 16:52:50 +0000 Subject: [PATCH] Add node locked gate to SM enable for DX systems Service Management (SM) sometimes selects and activates services on a locked controller following a dead office recovery. This update adds a node locked check to SM's enable handler to block enable if present much like the existing goenabled check blocks enable if not present in the same function. The enable gate file is /etc/mtc/tmp/.node_locked on the local host. Maintenance manages the presence or absence of this file based on the node's administrative state. This update also cleans up some extra whitespace in the changed file. Test Plan: PASS: Verify system build. PASS: Verify AIO SX install. PASS: Verify AIO DX install. PASS: Verify Standard DX system install with worker and storage. For Both 'AIO DX' and 'Standard DX with worker and storage': PASS: Verify SM does not activate on a locked DX controller. PASS: ... DOR case PASS: ... Uncontrolled Swact case PASS: Verify Standard DX behavior over DOR with one locked controller while the only unlocked controller does not recover. PASS: Verify behavior after above test case once the only unlocked controller does recover. PASS: Verify lock of the standby controller and its sm logs PASS: Verify manually creating the new Nv locked file on the active controller will cause SM to go disabled and shut down all services on that controller. ... If there is another unlocked controller then verify it takes over as an uncontrolled swact. ... If there is no unlocked standby controller then verify SM remains shutdown until the manually created Nv node locked file is removed. At which point SM proceeds to activate services on that controller again. PASS: Verify SM ignores the node locked flag file for AIO SX systems. PASS: Verify lock/unlock of AIO SX controller. PASS: Verify original reported issue is resolved for AIO DX systems. Regression: PASS: Verify controlled swact with unlocked enabled standby. PASS: Verify uncontrolled swact with unlocked enabled standby. PASS: Verify standby controller lock/unlock soak loop (10). PASS: Verify swact loop soak (10). PASS: Verify no crash or core dumps. PASS: Verify SM logging Closes-Bug: 2051578 Change-Id: If8e27ef30d62096fa77c3868f4d460b18e10ade2 (cherry picked from commit 23d0d8ab2f3225f10594547c5f8a67c409f815a0) --- service-mgmt/sm-common/src/sm_node_utils.c | 36 +++++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/service-mgmt/sm-common/src/sm_node_utils.c b/service-mgmt/sm-common/src/sm_node_utils.c index ade940c9..11edbd6e 100644 --- a/service-mgmt/sm-common/src/sm_node_utils.c +++ b/service-mgmt/sm-common/src/sm_node_utils.c @@ -1,5 +1,5 @@ // -// Copyright (c) 2014-2023 Wind River Systems, Inc. +// Copyright (c) 2014-2024 Wind River Systems, Inc. // // SPDX-License-Identifier: Apache-2.0 // @@ -18,6 +18,7 @@ #include "sm_types.h" #include "sm_debug.h" +#define SM_NODE_LOCKED_FILE "/etc/mtc/tmp/.node_locked" #define SM_NODE_GO_ENABLE_FILE "/var/run/goenabled" #define SM_NODE_GO_ENABLE_FILE_SIMPLEX "/var/run/.goenabled" #define SM_NODE_UNHEALTHY_FILE "/var/run/.sm_node_unhealthy" @@ -50,7 +51,7 @@ static SmErrorT sm_node_utils_read_platform_config( const char key[], char format[1024]; char line[1024]; char val[1024]; - + value[0] = '\0'; fp = fopen( SM_NODE_PLATFORM_CONFIG_FILE, "r" ); @@ -156,7 +157,7 @@ SmErrorT sm_node_utils_is_aio( bool* is_aio ) else { *is_aio = ( IsTrue == _is_aio ); - } + } return SM_OKAY; } @@ -346,7 +347,7 @@ SmErrorT sm_node_utils_get_hostname( char node_name[] ) // **************************************************************************** // **************************************************************************** -// Node Utilities - Configuration Complete +// Node Utilities - Configuration Complete // ======================================= SmErrorT sm_node_utils_config_complete( bool* complete ) { @@ -369,7 +370,7 @@ SmErrorT sm_node_utils_config_complete( bool* complete ) *complete = true; - return( SM_OKAY ); + return( SM_OKAY ); } // **************************************************************************** @@ -381,6 +382,7 @@ typedef enum NODE_UNHEALTHY_FILE_EXISTS, NODE_DISABLED_LICENSE_INVALID, NODE_DISABLED_FAILOVER, + NODE_IS_LOCKED, NODE_ENABLED }SmNodeEnabledBlockingStateT; static SmNodeEnabledBlockingStateT blocking_state = BLOCKING_STATE_INIT; @@ -394,6 +396,7 @@ SmErrorT sm_node_utils_enabled( bool* enabled, char reason_text[] ) *enabled = false; reason_text[0] = '\0'; const char* goenabled_file = SM_NODE_GO_ENABLE_FILE; + const char* node_locked_file = SM_NODE_LOCKED_FILE; bool is_aio_simplex = false; SmErrorT error = sm_node_utils_is_aio_simplex(&is_aio_simplex); @@ -409,6 +412,23 @@ SmErrorT sm_node_utils_enabled( bool* enabled, char reason_text[] ) goenabled_file = SM_NODE_GO_ENABLE_FILE_SIMPLEX; } + // AIO SX Case: Need to support SM + // 1. activating on the only locked controller + // 2. maintaining an active state after locking the only controller + if( (!is_aio_simplex) && ( 0 == access( node_locked_file, F_OK ))) + { + if(blocking_state != NODE_IS_LOCKED) + { + blocking_state = NODE_IS_LOCKED; + DPRINTFI("Node enable: blocked. Node is locked ; the %s file is present", node_locked_file); + } + + snprintf( reason_text, SM_LOG_REASON_TEXT_MAX_CHAR, + "node is locked" ); + + return( SM_OKAY ); + } + if(0 > access( goenabled_file, F_OK )) { if( ENOENT == errno ) @@ -518,7 +538,7 @@ bool sm_node_utils_set_failover( bool to_disable ) // **************************************************************************** // Node Utilities - Set Unhealthy // ============================== -SmErrorT sm_node_utils_set_unhealthy( void ) +SmErrorT sm_node_utils_set_unhealthy( void ) { int fd = open( SM_NODE_UNHEALTHY_FILE, O_RDWR | O_CREAT, S_IRUSR | S_IRGRP | S_IROTH); @@ -563,13 +583,13 @@ SmErrorT sm_node_utils_is_aio_duplex( bool* is_aio_duplex ) if( IsUnknown == _is_aio_duplex ) { SmErrorT error; - bool is_aio = false; + bool is_aio = false; error = sm_node_utils_is_aio( &is_aio ); if( SM_OKAY != error) { return error; } - + if ( !is_aio ) { *is_aio_duplex = false;