diff --git a/.zuul.yaml b/.zuul.yaml index e1ed9252..206ca5a1 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -53,6 +53,7 @@ nodeset: debian-bullseye required-projects: - starlingx/config + - starlingx/fault files: - software/* vars: @@ -66,6 +67,7 @@ nodeset: debian-bullseye required-projects: - starlingx/config + - starlingx/fault files: - software/* vars: diff --git a/software/debian/deb_folder/control b/software/debian/deb_folder/control index 334bc6e0..bc1d6302 100644 --- a/software/debian/deb_folder/control +++ b/software/debian/deb_folder/control @@ -9,6 +9,7 @@ Build-Depends: debhelper-compat (= 13), python3-wheel, build-info-dev Build-Depends-Indep: + python3-fm-api, python3-keystonemiddleware, python3-oslo.config Standards-Version: 4.5.1 @@ -20,7 +21,8 @@ Architecture: all Depends: ${python3:Depends}, ${misc:Depends}, gir1.2-ostree-1.0, - python3-argcomplete + python3-argcomplete, + python3-fm-api, Description: StarlingX unified software deployment and management StarlingX unified software deployment and management. diff --git a/software/software/constants.py b/software/software/constants.py index a7859563..f20e44ff 100644 --- a/software/software/constants.py +++ b/software/software/constants.py @@ -179,6 +179,12 @@ WORKER_SUMMARY_DIR = "%s/summary" % SOFTWARE_STORAGE_DIR WORKER_DATETIME_FORMAT = "%Y%m%dT%H%M%S%f" UNKNOWN_SOFTWARE_VERSION = "0.0.0" +LAST_IN_SYNC = "last_in_sync" + +SYSTEM_MODE_SIMPLEX = "simplex" +SYSTEM_MODE_DUPLEX = "duplex" + + class DEPLOY_STATES(Enum): ACTIVATE = 'activate' diff --git a/software/software/software_controller.py b/software/software/software_controller.py index 241adeed..7056e7e7 100644 --- a/software/software/software_controller.py +++ b/software/software/software_controller.py @@ -23,6 +23,10 @@ import threading import time from wsgiref import simple_server +from fm_api import fm_api +from fm_api import constants as fm_constants + + from oslo_config import cfg as oslo_cfg import software.apt_utils as apt_utils @@ -63,6 +67,8 @@ from software.software_functions import LOG from software.software_functions import audit_log_info from software.software_functions import repo_root_dir from software.software_functions import ReleaseData +from software.software_functions import is_deploy_state_in_sync +from software.software_functions import is_deployment_in_progress from software.release_verify import verify_files import software.config as cfg import software.utils as utils @@ -786,6 +792,11 @@ class PatchController(PatchService): self.check_patch_states() self.base_pkgdata = BasePackageData() + # This is for alarm cache. It will be used to store the last raising alarm id + self.usm_alarm = {constants.LAST_IN_SYNC: False} + self.hostname = socket.gethostname() + self.fm_api = fm_api.FaultAPIs() + self.allow_insvc_patching = True if os.path.exists(app_dependency_filename): @@ -802,6 +813,14 @@ class PatchController(PatchService): else: self.write_state_file() + system_mode = utils.get_platform_conf("system_mode") + if system_mode == constants.SYSTEM_MODE_SIMPLEX: + self.standby_controller = "controller-0" + elif system_mode == constants.SYSTEM_MODE_DUPLEX: + self.standby_controller = "controller-0" \ + if self.hostname == "controller-1" \ + else "controller-1" + @property def release_collection(self): # for this stage, the SWReleaseCollection behaves as a broker which @@ -3062,6 +3081,65 @@ class PatchController(PatchService): func(*args, **kwargs) self._update_state_to_peer() + def handle_deploy_state_sync(self, alarm_instance_id): + """ + Handle the deploy state sync. + If deploy state is in sync, clear the alarm. + If not, raise the alarm. + """ + is_in_sync = is_deploy_state_in_sync() + + # Deploy in sync state is not changed, no need to update the alarm + if is_in_sync == self.usm_alarm.get(constants.LAST_IN_SYNC): + return + + try: + out_of_sync_alarm_fault = sc.fm_api.get_fault( + fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC, alarm_instance_id) + + LOG.info("software.json in sync: %s", is_in_sync) + + if out_of_sync_alarm_fault and is_in_sync: + # There was an out of sync alarm raised, but local software.json is in sync, + # we clear the alarm + LOG.info("Clearing alarm: %s ", out_of_sync_alarm_fault.alarm_id) + self.fm_api.clear_fault( + fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC, + alarm_instance_id) + + # Deploy in sync state is changed, update the cache + self.usm_alarm[constants.LAST_IN_SYNC] = is_in_sync + + elif (not out_of_sync_alarm_fault) and (not is_in_sync): + # There was no out of sync alarm raised, but local software.json is not in sync, + # we raise the alarm + LOG.info("Raising alarm: %s ", + fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC) + out_of_sync_fault = fm_api.Fault( + alarm_id=fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC, + alarm_state=fm_constants.FM_ALARM_STATE_SET, + entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, + entity_instance_id=alarm_instance_id, + severity=fm_constants.FM_ALARM_SEVERITY_MAJOR, + reason_text="Software deployment in progress", + alarm_type=fm_constants.FM_ALARM_TYPE_11, + probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_65, + proposed_repair_action="Wait for deployment to complete", + service_affecting=False + ) + + self.fm_api.set_fault(out_of_sync_fault) + + # Deploy in sync state is changed, update the cache + self.usm_alarm[constants.LAST_IN_SYNC] = is_in_sync + + else: + # Shouldn't come to here + LOG.error("Unexpected case in handling deploy state sync. ") + + except Exception as ex: + LOG.exception("Failed in handling deploy state sync. Error: %s" % str(ex)) + def _get_software_upgrade(self): """ Get the current software upgrade from/to versions and state @@ -3244,7 +3322,15 @@ class PatchControllerMainThread(threading.Thread): # We only can use one inverval SEND_MSG_INTERVAL_IN_SECONDS = 30.0 + alarm_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, + sc.standby_controller) + try: + # Update the out of sync alarm cache when the thread starts + out_of_sync_alarm_fault = sc.fm_api.get_fault( + fm_constants.FM_ALARM_ID_SW_UPGRADE_DEPLOY_STATE_OUT_OF_SYNC, alarm_instance_id) + sc.usm_alarm[constants.LAST_IN_SYNC] = not out_of_sync_alarm_fault + sock_in = sc.setup_socket() while sock_in is None: @@ -3445,11 +3531,12 @@ class PatchControllerMainThread(threading.Thread): SEND_MSG_INTERVAL_IN_SECONDS) # Only send the deploy state update from the active controller - if utils.is_active_controller(): + if is_deployment_in_progress(sc.release_data.metadata) and utils.is_active_controller(): try: sc.socket_lock.acquire() deploy_state_update = SoftwareMessageDeployStateUpdate() deploy_state_update.send(sc.sock_out) + sc.handle_deploy_state_sync(alarm_instance_id) except Exception as e: LOG.exception("Failed to send deploy state update. Error: %s", str(e)) finally: diff --git a/software/software/software_functions.py b/software/software/software_functions.py index 013c3993..0bff6986 100644 --- a/software/software/software_functions.py +++ b/software/software/software_functions.py @@ -1287,3 +1287,39 @@ def parse_release_metadata(filename): continue data[child.tag] = child.text return data + + +def is_deploy_state_in_sync(): + """ + Check if deploy state in sync + :return: bool true if in sync, false otherwise + """ + if os.path.isfile(constants.SOFTWARE_JSON_FILE) \ + and os.path.isfile(constants.SYNCED_SOFTWARE_JSON_FILE): + + working_data_deploy_state = utils.load_from_json_file( + constants.SOFTWARE_JSON_FILE) + + synced_data_deploy_state = utils.load_from_json_file( + constants.SYNCED_SOFTWARE_JSON_FILE) + + working_deploy_state = working_data_deploy_state.get("deploy", {}) + + synced_deploy_state = synced_data_deploy_state.get("deploy", {}) + + working_deploy_host_state = working_data_deploy_state.get("deploy_host", {}) + + synced_deploy_host_state = synced_data_deploy_state.get("deploy_host", {}) + + return working_deploy_state == synced_deploy_state \ + and working_deploy_host_state == synced_deploy_host_state + return False + + +def is_deployment_in_progress(release_metadata): + """ + Check if at least one deployment is in progress + :param release_metadata: dict of release metadata + :return: bool true if in progress, false otherwise + """ + return any(release['state'] == constants.DEPLOYING for release in release_metadata.values()) diff --git a/software/software/tests/base.py b/software/software/tests/base.py new file mode 100644 index 00000000..5d86a574 --- /dev/null +++ b/software/software/tests/base.py @@ -0,0 +1,4 @@ +import sys +from unittest import mock + +sys.modules['fm_core'] = mock.Mock() diff --git a/software/software/tests/test_software_controller.py b/software/software/tests/test_software_controller.py index 8ae947df..dd1a3729 100644 --- a/software/software/tests/test_software_controller.py +++ b/software/software/tests/test_software_controller.py @@ -3,6 +3,10 @@ # # Copyright (c) 2023-2024 Wind River Systems, Inc. # + +# This import has to be first +from software.tests import base # pylint: disable=unused-import + from software.software_controller import PatchController from software.software_controller import ReleaseValidationFailure import unittest @@ -144,8 +148,12 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.os.path.isfile') @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_in_sync_controller_api_files_identical(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, mock_isfile): controller = PatchController() @@ -159,8 +167,12 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.os.path.isfile') @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_in_sync_controller_api_files_not_identical(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, mock_isfile): controller = PatchController() @@ -174,8 +186,12 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.os.path.isfile') @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_in_sync_controller_api_files_not_exist(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, # pylint: disable=unused-argument mock_isfile): controller = PatchController() @@ -188,8 +204,12 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.os.path.isfile') @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_in_sync_controller_api_one_file_exist(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, # pylint: disable=unused-argument mock_isfile): controller = PatchController() @@ -204,8 +224,12 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_get_software_host_upgrade_deployed(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, # pylint: disable=unused-argument ): controller = PatchController() @@ -229,8 +253,12 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_get_software_host_upgrade_deploying(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, # pylint: disable=unused-argument ): controller = PatchController() @@ -254,8 +282,12 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_get_all_software_host_upgrade_deploying(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, # pylint: disable=unused-argument ): controller = PatchController() @@ -284,22 +316,31 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_get_software_host_upgrade_none_state(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, # pylint: disable=unused-argument ): controller = PatchController() # Test when the deploy or deploy_hosts is None - controller._get_software_upgrade = MagicMock(return_value=None) # pylint: disable=protected-access + controller._get_software_upgrade = MagicMock( # pylint: disable=protected-access + return_value=None) controller.db_api_instance.get_deploy_host.return_value = None result = controller.get_one_software_host_upgrade("host1") self.assertIsNone(result) @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_get_software_upgrade_get_deploy_all(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, # pylint: disable=unused-argument ): @@ -329,8 +370,12 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.json.load') @patch('software.software_controller.open', new_callable=mock_open) + @patch('software.software_controller.utils.get_platform_conf', return_value='simplex') + @patch('software.software_controller.open', new_callable=mock_open) def test_get_software_upgrade_get_deploy_all_none(self, + mock_dummy_open_config, # pylint: disable=unused-argument mock_dummy, # pylint: disable=unused-argument + mock_dummy_open, # pylint: disable=unused-argument mock_json_load, # pylint: disable=unused-argument ): diff --git a/software/software/utils.py b/software/software/utils.py index b12c3320..618104fc 100644 --- a/software/software/utils.py +++ b/software/software/utils.py @@ -22,6 +22,7 @@ import webob import software.constants as constants from software.exceptions import StateValidationFailure from software.exceptions import SoftwareServiceError +from tsconfig.tsconfig import PLATFORM_CONF_FILE LOG = logging.getLogger('main_logger') @@ -439,3 +440,22 @@ def is_active_controller(): keyring_file = f"/opt/platform/.keyring/{constants.SW_VERSION}/.CREDENTIAL" return os.path.exists(keyring_file) + + +def get_platform_conf(key): + """ + Get the value of given key in platform.conf + :param key: key to get + :return: value + """ + value = None + + with open(PLATFORM_CONF_FILE) as fp: + lines = fp.readlines() + for line in lines: + if line.find(key) != -1: + value = line.split('=')[1] + value = value.replace('\n', '') + break + + return value diff --git a/software/tox.ini b/software/tox.ini index 15e67227..2c955e2b 100644 --- a/software/tox.ini +++ b/software/tox.ini @@ -16,10 +16,11 @@ allowlist_externals = find basepython = python3 deps = -r{toxinidir}/requirements.txt -r{toxinidir}/test-requirements.txt + -e{[tox]stxdir}/fault/fm-api/source -e{[tox]stxdir}/config/tsconfig/tsconfig -install_command = pip install \ - -c{env:UPPER_CONSTRAINTS_FILE:https://opendev.org/starlingx/root/raw/branch/master/build-tools/requirements/debian/upper-constraints.txt} \ +install_command = pip install -v -v -v \ + -c {env:UPPER_CONSTRAINTS_FILE:https://opendev.org/starlingx/root/raw/branch/master/build-tools/requirements/debian/upper-constraints.txt} \ {opts} {packages} passenv = XDG_CACHE_HOME