From 7fef84e36e0b0b1b56ec8c57fdd374e665824af8 Mon Sep 17 00:00:00 2001 From: Fabiano Correa Mercer Date: Fri, 5 Apr 2024 16:54:36 -0300 Subject: [PATCH] sw-patch-agent waits the new mgmt IP config During the management network reconfiguration, the system is restarted to controller_config script runs the puppet code and update all services to use the new mgmt IP address. But the sw-patch services start before the controller_config. When they start they get the mgmt_ip using the python socket lib that uses the IP address from the /etc/hosts. But /etc/hosts at that time is not updated yet, so it get the old management network IP. To fix this issue, the sw-patch services will wait for the puppet code to be applied to make sure the /etc/hosts and new management network IPs were installed in the system. Tests done: IPv4 AIO-SX fresh install IPv4 AIO-DX fresh install IPv4 DC with subcloud AIO-SX fresh install IPv4 AIO-SX mgmt reconfig and apply a non-reboot-required patch IPv4 AIO-SX mgmt reconfig and apply a reboot-required patch IPv4 subcloud AIO-SX mgmt reconfig and apply a non-reboot-required patch IPv4 subcloud AIO-SX mgmt reconfig and apply a reboot-required patch For this test the sw-patch was in failed state after the reboot, It happens even without the mgmt reconfig and this fix Partial-Bug: #2060066 Story: 2010722 Task: 49827 Depends-On: https://review.opendev.org/c/starlingx/config/+/914710 Change-Id: Ie544425513ef4fede73b4b55770ad6857cdf7eed Signed-off-by: Fabiano Correa Mercer --- software/software/config.py | 14 +++++++++++++- sw-patch/cgcs-patch/cgcs_patch/config.py | 14 +++++++++++++- sw-patch/cgcs-patch/cgcs_patch/constants.py | 3 +++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/software/software/config.py b/software/software/config.py index e7264621..3cabbb52 100644 --- a/software/software/config.py +++ b/software/software/config.py @@ -131,8 +131,20 @@ def read_config(): def get_mgmt_ip(): # Check if initial config is complete - if not os.path.exists('/etc/platform/.initial_config_complete'): + if not os.path.exists(tsc.INITIAL_CONFIG_COMPLETE_FLAG): return None + + # Due to https://storyboard.openstack.org/#!/story/2010722 + # the management IP for AIO-SX can be reconfigured during the startup. + # Check if /var/run/._config_complete exists to be sure that IP + # address will be the correct mgmt IP + try: + if tsc.system_mode == constants.SYSTEM_MODE_SIMPLEX and \ + not os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE): + return None + except Exception: + logging.info("not able to get system_mode, continue sw-patch services") + mgmt_hostname = socket.gethostname() return utils.gethostbyname(mgmt_hostname) diff --git a/sw-patch/cgcs-patch/cgcs_patch/config.py b/sw-patch/cgcs-patch/cgcs_patch/config.py index 603d2fb8..d7f9dc4c 100644 --- a/sw-patch/cgcs-patch/cgcs_patch/config.py +++ b/sw-patch/cgcs-patch/cgcs_patch/config.py @@ -79,8 +79,20 @@ def read_config(): def get_mgmt_ip(): # Check if initial config is complete - if not os.path.exists('/etc/platform/.initial_config_complete'): + if not os.path.exists(tsc.INITIAL_CONFIG_COMPLETE_FLAG): return None + + # Due to https://storyboard.openstack.org/#!/story/2010722 + # the management IP for AIO-SX can be reconfigured during the startup. + # Check if /var/run/._config_complete exists to be sure that IP + # address will be the correct mgmt IP + try: + if tsc.system_mode == constants.SYSTEM_MODE_SIMPLEX and \ + not os.path.exists(tsc.VOLATILE_CONTROLLER_CONFIG_COMPLETE): + return None + except Exception: + logging.info("not able to get system_mode, continue sw-patch services") + mgmt_hostname = socket.gethostname() return utils.gethostbyname(mgmt_hostname) diff --git a/sw-patch/cgcs-patch/cgcs_patch/constants.py b/sw-patch/cgcs-patch/cgcs_patch/constants.py index 06830c6e..491399d3 100644 --- a/sw-patch/cgcs-patch/cgcs_patch/constants.py +++ b/sw-patch/cgcs-patch/cgcs_patch/constants.py @@ -56,3 +56,6 @@ LOOPBACK_INTERFACE_NAME = "lo" SEMANTIC_PREAPPLY = 'pre-apply' SEMANTIC_PREREMOVE = 'pre-remove' SEMANTIC_ACTIONS = [SEMANTIC_PREAPPLY, SEMANTIC_PREREMOVE] + +SYSTEM_MODE_SIMPLEX = "simplex" +SYSTEM_MODE_DUPLEX = "duplex"