From ebe177d91858d470d140cab867faa7cdee547bab Mon Sep 17 00:00:00 2001 From: Bin Qian Date: Tue, 12 Mar 2024 22:27:25 +0000 Subject: [PATCH] USM deploy state This change introduced state machines for release state, deploy state and deploy host state. This change removed the direct reference to the software metadata from software-controller and other modules. Replaced with encapuslated release_data module. Also include changes: 1. removed required parameter for software deploy activate and software deploy complete RestAPI. 2. ensure reload metadata for each request 3. added feed_repo and commit-id to the deploy entity, to be subsequently passed to deploy operations. 4. fix issues TCs: passed: software upload major and patching releases passed: software deploy start major and patching releases passed: software deploy host (mock) major and patching release passed: software activate major and patching release passed: software complete major release and patching release passed: redeploy host after host deploy failed both major and patching release Story: 2010676 Task: 49849 Change-Id: I4b1894560eccb8ef4f613633a73bf3887b2b93fb Signed-off-by: Bin Qian --- software-client/debian/deb_folder/control | 1 + software-client/requirements.txt | 1 + .../software_client/common/utils.py | 58 +- software-client/software_client/constants.py | 27 +- software-client/software_client/v1/deploy.py | 138 +- .../software_client/v1/deploy_shell.py | 36 +- .../software_client/v1/release_shell.py | 9 +- software/scripts/deploy-precheck | 2 +- software/scripts/prep-data-migration | 2 + .../software/api/controllers/v1/software.py | 38 +- software/software/constants.py | 109 -- software/software/db/api.py | 13 +- software/software/deploy_host_state.py | 69 + software/software/deploy_state.py | 198 +++ software/software/exceptions.py | 85 +- software/software/parsable_error.py | 5 +- software/software/release_data.py | 215 ++- software/software/release_state.py | 94 ++ software/software/software_controller.py | 1257 ++++++++--------- software/software/software_entities.py | 84 +- software/software/software_functions.py | 256 ++-- software/software/states.py | 126 ++ .../tests/test_software_controller.py | 57 +- .../software/tests/test_software_function.py | 6 +- software/software/utils.py | 15 +- software/tox.ini | 4 +- 26 files changed, 1648 insertions(+), 1257 deletions(-) create mode 100644 software/software/deploy_host_state.py create mode 100644 software/software/deploy_state.py create mode 100644 software/software/release_state.py create mode 100644 software/software/states.py diff --git a/software-client/debian/deb_folder/control b/software-client/debian/deb_folder/control index 018da48c..5f194a8b 100644 --- a/software-client/debian/deb_folder/control +++ b/software-client/debian/deb_folder/control @@ -5,6 +5,7 @@ Maintainer: StarlingX Developers Build-Depends: debhelper-compat (= 13), dh-python, python3-all, + python3-tabulate, python3-setuptools, python3-wheel Build-Depends-Indep: diff --git a/software-client/requirements.txt b/software-client/requirements.txt index 4ba0a8f3..7f473965 100644 --- a/software-client/requirements.txt +++ b/software-client/requirements.txt @@ -7,4 +7,5 @@ oslo.serialization netaddr pecan requests_toolbelt +tabulate WebOb diff --git a/software-client/software_client/common/utils.py b/software-client/software_client/common/utils.py index f1b71a95..441115c5 100644 --- a/software-client/software_client/common/utils.py +++ b/software-client/software_client/common/utils.py @@ -21,15 +21,36 @@ import json import os import re import textwrap - +from tabulate import tabulate from oslo_utils import importutils from six.moves import zip from software_client.common.http_errors import HTTP_ERRORS +# TODO(bqian) remove below overrides when switching to +# system command style CLI display for USM CLI is ready +from tabulate import _table_formats +from tabulate import TableFormat +from tabulate import Line +from tabulate import DataRow + +simple = TableFormat( + lineabove=Line("", "-", " ", ""), + linebelowheader=Line("", "=", " ", ""), + linebetweenrows=None, + linebelow=Line("", "-", " ", ""), + headerrow=DataRow("", " ", ""), + datarow=DataRow("", " ", ""), + padding=0, + with_header_hide=["lineabove", "linebelow"], +) + +# _table_formats['pretty'] = simple +##################################################### TERM_WIDTH = 72 + class HelpFormatter(argparse.HelpFormatter): def start_section(self, heading): # Title-case the headings @@ -158,6 +179,41 @@ def display_info(resp): _display_info(text) +def display_result_list(header_data_list, data): + header = [h for h in header_data_list] + table = [] + for d in data: + row = [] + for _, k in header_data_list.items(): + row.append(d[k]) + table.append(row) + if len(table) == 0: + print("No data") + else: + print(tabulate(table, header, tablefmt='pretty', colalign=("left", "left"))) + + +def display_detail_result(data): + header = ["Property", "Value"] + table = [] + for k, v in data.items(): + if isinstance(v, list): + if len(v) > 0: + row = [k, v[0]] + v.pop(0) + else: + row = [k, ''] + table.append(row) + + for r in v: + row = ['', r] + table.append(row) + else: + row = [k, v] + table.append(row) + print(tabulate(table, header, tablefmt='pretty', colalign=("left", "left"))) + + def print_result_list(header_data_list, data_list, has_error, sort_key=0): """ Print a list of data in a simple table format diff --git a/software-client/software_client/constants.py b/software-client/software_client/constants.py index 0a54e095..782a1aad 100644 --- a/software-client/software_client/constants.py +++ b/software-client/software_client/constants.py @@ -10,36 +10,11 @@ CONTROLLER_FLOATING_HOSTNAME = "controller" SOFTWARE_STORAGE_DIR = "/opt/software" -AVAILABLE_DIR = "%s/metadata/available" % SOFTWARE_STORAGE_DIR -UNAVAILABLE_DIR = "%s/metadata/unavailable" % SOFTWARE_STORAGE_DIR -DEPLOYING_START_DIR = "%s/metadata/deploying_start" % SOFTWARE_STORAGE_DIR -DEPLOYING_HOST_DIR = "%s/metadata/deploying_host" % SOFTWARE_STORAGE_DIR -DEPLOYING_ACTIVATE_DIR = "%s/metadata/deploying_activate" % SOFTWARE_STORAGE_DIR -DEPLOYING_COMPLETE_DIR = "%s/metadata/deploying_complete" % SOFTWARE_STORAGE_DIR -DEPLOYED_DIR = "%s/metadata/deployed" % SOFTWARE_STORAGE_DIR -REMOVING_DIR = "%s/metadata/removing" % SOFTWARE_STORAGE_DIR -ABORTING_DIR = "%s/metadata/aborting" % SOFTWARE_STORAGE_DIR -COMMITTED_DIR = "%s/metadata/committed" % SOFTWARE_STORAGE_DIR -SEMANTICS_DIR = "%s/semantics" % SOFTWARE_STORAGE_DIR - PATCH_AGENT_STATE_IDLE = "idle" PATCH_AGENT_STATE_INSTALLING = "installing" PATCH_AGENT_STATE_INSTALL_FAILED = "install-failed" PATCH_AGENT_STATE_INSTALL_REJECTED = "install-rejected" -ABORTING = 'aborting' -AVAILABLE = 'available' -COMMITTED = 'committed' -DEPLOYED = 'deployed' -DEPLOYING_ACTIVATE = 'deploying-activate' -DEPLOYING_COMPLETE = 'deploying-complete' -DEPLOYING_HOST = 'deploying-host' -DEPLOYING_START = 'deploying-start' -REMOVING = 'removing' -UNAVAILABLE = 'unavailable' -UNKNOWN = 'n/a' - - STATUS_DEVELOPEMENT = 'DEV' STATUS_OBSOLETE = 'OBS' STATUS_RELEASED = 'REL' @@ -61,9 +36,11 @@ PATCH_EXTENSION = ".patch" SUPPORTED_UPLOAD_FILE_EXT = [ISO_EXTENSION, SIG_EXTENSION, PATCH_EXTENSION] SCRATCH_DIR = "/scratch" +# host deploy state DEPLOYING = 'deploying' FAILED = 'failed' PENDING = 'pending' +DEPLOYED = 'deployed' # Authorization modes of software cli KEYSTONE = 'keystone' diff --git a/software-client/software_client/v1/deploy.py b/software-client/software_client/v1/deploy.py index 4aa9d2e5..76f943ef 100644 --- a/software-client/software_client/v1/deploy.py +++ b/software-client/software_client/v1/deploy.py @@ -54,10 +54,10 @@ class DeployManager(base.Manager): def host(self, args): # args.deployment is a string - agent_ip = args.agent + hostname = args.host # Issue deploy_host request and poll for results - path = "/v1/software/deploy_host/%s" % (agent_ip) + path = "/v1/software/deploy_host/%s" % (hostname) if args.force: path += "/force" @@ -69,7 +69,8 @@ class DeployManager(base.Manager): print(data["error"]) rc = 1 else: - rc = self.wait_for_install_complete(agent_ip) + # NOTE(bqian) should consider return host_list instead. + rc = self.wait_for_install_complete(hostname) elif req.status_code == 500: print("An internal error has occurred. " "Please check /var/log/software.log for details") @@ -84,26 +85,20 @@ class DeployManager(base.Manager): return rc def activate(self, args): - # args.deployment is a string - deployment = args.deployment - # Ignore interrupts during this function signal.signal(signal.SIGINT, signal.SIG_IGN) # Issue deploy_start request - path = "/v1/software/deploy_activate/%s" % (deployment) + path = "/v1/software/deploy_activate" return self._create(path, body={}) def complete(self, args): - # args.deployment is a string - deployment = args.deployment - # Ignore interrupts during this function signal.signal(signal.SIGINT, signal.SIG_IGN) # Issue deploy_start request - path = "/v1/software/deploy_complete/%s" % (deployment) + path = "/v1/software/deploy_complete/" return self._create(path, body={}) @@ -113,40 +108,9 @@ class DeployManager(base.Manager): def show(self): path = '/v1/software/deploy' - req, data = self._list(path, "") + return self._list(path, "") - if req.status_code >= 500: - print("An internal error has occurred. Please check /var/log/software.log for details") - return 1 - elif req.status_code >= 400: - print("Respond code %d. Error: %s" % (req.status_code, req.reason)) - return 1 - - if not data: - print("No deploy in progress.") - else: - data = data[0] - data["reboot_required"] = "Yes" if data.get("reboot_required") else "No" - data_list = [[k, v] for k, v in data.items()] - transposed_data_list = list(zip(*data_list)) - - transposed_data_list[0] = [s.title().replace('_', ' ') for s in transposed_data_list[0]] - # Find the longest header string in each column - header_lengths = [len(str(x)) for x in transposed_data_list[0]] - # Find the longest content string in each column - content_lengths = [len(str(x)) for x in transposed_data_list[1]] - # Find the max of the two for each column - col_lengths = [(x if x > y else y) for x, y in zip(header_lengths, content_lengths)] - - print(' '.join(f"{x.center(col_lengths[i])}" for i, - x in enumerate(transposed_data_list[0]))) - print(' '.join('=' * length for length in col_lengths)) - print(' '.join(f"{x.center(col_lengths[i])}" for i, - x in enumerate(transposed_data_list[1]))) - - return 0 - - def wait_for_install_complete(self, agent_ip): + def wait_for_install_complete(self, hostname): url = "/v1/software/host_list" rc = 0 @@ -163,55 +127,49 @@ class DeployManager(base.Manager): except requests.exceptions.ConnectionError: # The local software-controller may have restarted. retriable_count += 1 - if retriable_count <= max_retries: - continue - else: + if retriable_count > max_retries: print("Lost communications with the software controller") rc = 1 - break - - if req.status_code == 200: - data = data.get("data", None) - if not data: - print("Invalid host-list data returned:") - utils.print_result_debug(req, data) - rc = 1 - break - - host_state = None - - for d in data: - if d['hostname'] == agent_ip: - host_state = d.get('host_state') - - if host_state == constants.DEPLOYING: - # Still deploying - sys.stdout.write(".") - sys.stdout.flush() - elif host_state == constants.FAILED: - print("\nDeployment failed. Please check logs for details.") - rc = 1 - break - elif host_state == constants.DEPLOYED: - print("\nDeployment was successful.") - rc = 0 - break - else: - print("\nReported unknown state: %s" % host_state) - rc = 1 - break - - elif req.status_code == 500: - print("An internal error has occurred. Please check /var/log/software.log for details") - rc = 1 - break + return rc else: - m = re.search("(Error message:.*)", req.text, re.MULTILINE) - if m: - print(m.group(0)) - else: - print(vars(req)) - rc = 1 break + if req.status_code == 200: + if not data: + print("Invalid host-list data returned:") + utils.print_result_debug(req, data) + rc = 1 + + host_state = None + + for d in data: + if d['hostname'] == hostname: + host_state = d.get('host_state') + + if host_state == constants.DEPLOYING: + print("\nDeployment started.") + rc = 0 + elif host_state == constants.FAILED: + print("\nDeployment failed. Please check logs for details.") + rc = 1 + elif host_state == constants.DEPLOYED: + print("\nDeployment was successful.") + rc = 0 + elif host_state == constants.PENDING: + print("\nDeployment pending.") + else: + print("\nReported unknown state: %s" % host_state) + rc = 1 + + elif req.status_code == 500: + print("An internal error has occurred. Please check /var/log/software.log for details") + rc = 1 + else: + m = re.search("(Error message:.*)", req.text, re.MULTILINE) + if m: + print(m.group(0)) + else: + print(vars(req)) + rc = 1 + return rc diff --git a/software-client/software_client/v1/deploy_shell.py b/software-client/software_client/v1/deploy_shell.py index 06f9e48f..95b234ec 100644 --- a/software-client/software_client/v1/deploy_shell.py +++ b/software-client/software_client/v1/deploy_shell.py @@ -21,20 +21,26 @@ from software_client.common import utils help="List all deployments that have this state") def do_show(cc, args): """Show the software deployments states""" - # TODO(bqian) modify the cli to display with generic tabulated output - return cc.deploy.show() + resp, data = cc.deploy.show() + if args.debug: + utils.print_result_debug(resp, data) + else: + header_data_list = {"From Release": "from_release", "To Release": "to_release", "RR": "reboot_required", "State": "state"} + utils.display_result_list(header_data_list, data) + + return utils.check_rc(resp, data) def do_host_list(cc, args): """List of hosts for software deployment """ - req, data = cc.deploy.host_list() - # TODO(bqian) modify display with generic tabulated output + resp, data = cc.deploy.host_list() if args.debug: - utils.print_result_debug(req, data) + utils.print_result_debug(resp, data) else: - utils.print_software_deploy_host_list_result(req, data) + header_data_list = {"Host": "hostname", "From Release": "software_release", "To Release": "target_release", "RR": "reboot_required", "State": "host_state"} + utils.display_result_list(header_data_list, data) - return utils.check_rc(req, data) + return utils.check_rc(resp, data) @utils.arg('deployment', @@ -68,17 +74,17 @@ def do_precheck(cc, args): help='Allow bypassing non-critical checks') def do_start(cc, args): """Start the software deployment""" - req, data = cc.deploy.start(args) + resp, data = cc.deploy.start(args) if args.debug: - utils.print_result_debug(req, data) + utils.print_result_debug(resp, data) else: - utils.print_software_op_result(req, data) + utils.display_info(resp) - return utils.check_rc(req, data) + return utils.check_rc(resp, data) -@utils.arg('agent', - help="Agent on which host deploy is triggered") +@utils.arg('host', + help="Name of the host that the deploy is triggered") @utils.arg('-f', '--force', action='store_true', @@ -89,8 +95,6 @@ def do_host(cc, args): return cc.deploy.host(args) -@utils.arg('deployment', - help='Deployment ID to activate') def do_activate(cc, args): """Activate the software deployment""" req, data = cc.deploy.activate(args) @@ -101,8 +105,6 @@ def do_activate(cc, args): return utils.check_rc(req, data) -@utils.arg('deployment', - help='Deployment ID to complete') def do_complete(cc, args): """Complete the software deployment""" req, data = cc.deploy.complete(args) diff --git a/software-client/software_client/v1/release_shell.py b/software-client/software_client/v1/release_shell.py index ed76bce3..433885b9 100644 --- a/software-client/software_client/v1/release_shell.py +++ b/software-client/software_client/v1/release_shell.py @@ -22,10 +22,8 @@ def do_list(cc, args): if args.debug: utils.print_result_debug(req, data) else: - header_data_list = ["Release", "RR", "State"] - data_list = [(k, v["reboot_required"], v["state"]) for k, v in data["sd"].items()] - has_error = 'error' in data and data["error"] - utils.print_result_list(header_data_list, data_list, has_error) + header_data_list = {"Release": "release_id", "RR": "reboot_required", "State": "state"} + utils.display_result_list(header_data_list, data) return utils.check_rc(req, data) @@ -45,7 +43,8 @@ def do_show(cc, args): if args.debug: utils.print_result_debug(req, data) else: - utils.print_release_show_result(req, data, list_packages=list_packages) + for d in data: + utils.display_detail_result(d) return utils.check_rc(req, data) diff --git a/software/scripts/deploy-precheck b/software/scripts/deploy-precheck index c28a1838..cdb72548 100644 --- a/software/scripts/deploy-precheck +++ b/software/scripts/deploy-precheck @@ -100,7 +100,7 @@ class HealthCheck(object): print("Could not check required patches...") return False, required_patches - applied_patches = list(response.json()["sd"].keys()) + applied_patches = [release['release_id'] for release in response.json()] missing_patch = list(set(required_patches) - set(applied_patches)) if missing_patch: success = False diff --git a/software/scripts/prep-data-migration b/software/scripts/prep-data-migration index 4337b50d..2db25c19 100644 --- a/software/scripts/prep-data-migration +++ b/software/scripts/prep-data-migration @@ -240,6 +240,8 @@ class DataMigration(object): platform_config_dir = os.path.join(PLATFORM_PATH, "config") from_config_dir = os.path.join(platform_config_dir, self.from_release) to_config_dir = os.path.join(platform_config_dir, self.to_release) + if os.path.isdir(to_config_dir): + shutil.rmtree(to_config_dir) shutil.copytree(from_config_dir, to_config_dir) except Exception as e: LOG.exception("Failed to create platform config for release %s. " diff --git a/software/software/api/controllers/v1/software.py b/software/software/api/controllers/v1/software.py index 7379a324..59fc0f06 100644 --- a/software/software/api/controllers/v1/software.py +++ b/software/software/api/controllers/v1/software.py @@ -12,11 +12,12 @@ from pecan import expose from pecan import request import shutil +from software import constants from software.exceptions import SoftwareError from software.exceptions import SoftwareServiceError +from software.release_data import reload_release_data from software.software_controller import sc -import software.utils as utils -import software.constants as constants +from software import utils LOG = logging.getLogger('main_logger') @@ -26,6 +27,7 @@ class SoftwareAPIController(object): @expose('json') def commit_patch(self, *args): + reload_release_data() result = sc.patch_commit(list(args)) sc.software_sync() @@ -33,12 +35,14 @@ class SoftwareAPIController(object): @expose('json') def commit_dry_run(self, *args): + reload_release_data() result = sc.patch_commit(list(args), dry_run=True) return result @expose('json') @expose('query.xml', content_type='application/xml') def delete(self, *args): + reload_release_data() result = sc.software_release_delete_api(list(args)) sc.software_sync() @@ -46,28 +50,26 @@ class SoftwareAPIController(object): @expose('json') @expose('query.xml', content_type='application/xml') - def deploy_activate(self, *args): - if sc.any_patch_host_installing(): - raise SoftwareServiceError(error="Rejected: One or more nodes are installing a release.") + def deploy_activate(self): + reload_release_data() - result = sc.software_deploy_activate_api(list(args)[0]) + result = sc.software_deploy_activate_api() sc.software_sync() return result @expose('json') @expose('query.xml', content_type='application/xml') - def deploy_complete(self, *args): - if sc.any_patch_host_installing(): - raise SoftwareServiceError(error="Rejected: One or more nodes are installing a release.") - - result = sc.software_deploy_complete_api(list(args)[0]) + def deploy_complete(self): + reload_release_data() + result = sc.software_deploy_complete_api() sc.software_sync() return result @expose('json') @expose('query.xml', content_type='application/xml') def deploy_host(self, *args): + reload_release_data() if len(list(args)) == 0: return dict(error="Host must be specified for install") force = False @@ -81,6 +83,7 @@ class SoftwareAPIController(object): @expose('json') @expose('query.xml', content_type='application/xml') def deploy_precheck(self, *args, **kwargs): + reload_release_data() force = False if 'force' in list(args): force = True @@ -92,6 +95,7 @@ class SoftwareAPIController(object): @expose('json') @expose('query.xml', content_type='application/xml') def deploy_start(self, *args, **kwargs): + reload_release_data() # if --force is provided force = 'force' in list(args) @@ -107,6 +111,7 @@ class SoftwareAPIController(object): @expose('json', method="GET") def deploy(self): + reload_release_data() from_release = request.GET.get("from_release") to_release = request.GET.get("to_release") result = sc.software_deploy_show_api(from_release, to_release) @@ -115,25 +120,30 @@ class SoftwareAPIController(object): @expose('json') @expose('query.xml', content_type='application/xml') def install_local(self): + reload_release_data() result = sc.software_install_local_api() return result @expose('json') def is_available(self, *args): + reload_release_data() return sc.is_available(list(args)) @expose('json') def is_committed(self, *args): + reload_release_data() return sc.is_committed(list(args)) @expose('json') def is_deployed(self, *args): + reload_release_data() return sc.is_deployed(list(args)) @expose('json') @expose('show.xml', content_type='application/xml') def show(self, *args): + reload_release_data() result = sc.software_release_query_specific_cached(list(args)) return result @@ -141,6 +151,7 @@ class SoftwareAPIController(object): @expose('json') @expose('query.xml', content_type='application/xml') def upload(self): + reload_release_data() is_local = False temp_dir = None uploaded_files = [] @@ -186,12 +197,13 @@ class SoftwareAPIController(object): @expose('json') @expose('query.xml', content_type='application/xml') def query(self, **kwargs): + reload_release_data() sd = sc.software_release_query_cached(**kwargs) - - return dict(sd=sd) + return sd @expose('json', method="GET") def host_list(self): + reload_release_data() result = sc.deploy_host_list() return result diff --git a/software/software/constants.py b/software/software/constants.py index f20e44ff..8d9f953e 100644 --- a/software/software/constants.py +++ b/software/software/constants.py @@ -4,7 +4,6 @@ Copyright (c) 2023-2024 Wind River Systems, Inc. SPDX-License-Identifier: Apache-2.0 """ -from enum import Enum import os try: # The tsconfig module is only available at runtime @@ -34,91 +33,8 @@ RC_UNHEALTHY = 3 DEPLOY_PRECHECK_SCRIPT = "deploy-precheck" DEPLOY_START_SCRIPT = "software-deploy-start" -AVAILABLE_DIR = "%s/metadata/available" % SOFTWARE_STORAGE_DIR -UNAVAILABLE_DIR = "%s/metadata/unavailable" % SOFTWARE_STORAGE_DIR -DEPLOYING_DIR = "%s/metadata/deploying" % SOFTWARE_STORAGE_DIR -DEPLOYED_DIR = "%s/metadata/deployed" % SOFTWARE_STORAGE_DIR -REMOVING_DIR = "%s/metadata/removing" % SOFTWARE_STORAGE_DIR - -# TODO(bqian) states to be removed once current references are removed -DEPLOYING_START_DIR = "%s/metadata/deploying_start" % SOFTWARE_STORAGE_DIR -DEPLOYING_HOST_DIR = "%s/metadata/deploying_host" % SOFTWARE_STORAGE_DIR -DEPLOYING_ACTIVATE_DIR = "%s/metadata/deploying_activate" % SOFTWARE_STORAGE_DIR -DEPLOYING_COMPLETE_DIR = "%s/metadata/deploying_complete" % SOFTWARE_STORAGE_DIR -ABORTING_DIR = "%s/metadata/aborting" % SOFTWARE_STORAGE_DIR -COMMITTED_DIR = "%s/metadata/committed" % SOFTWARE_STORAGE_DIR SEMANTICS_DIR = "%s/semantics" % SOFTWARE_STORAGE_DIR -DEPLOY_STATE_METADATA_DIR = \ - [ - AVAILABLE_DIR, - UNAVAILABLE_DIR, - DEPLOYING_DIR, - DEPLOYED_DIR, - REMOVING_DIR, - # TODO(bqian) states to be removed once current references are removed - DEPLOYING_START_DIR, - DEPLOYING_HOST_DIR, - DEPLOYING_ACTIVATE_DIR, - DEPLOYING_COMPLETE_DIR, - ABORTING_DIR, - COMMITTED_DIR, - ] - -# new release state needs to be added to VALID_RELEASE_STATES list -AVAILABLE = 'available' -UNAVAILABLE = 'unavailable' -DEPLOYING = 'deploying' -DEPLOYED = 'deployed' -REMOVING = 'removing' - -DELETABLE_STATE = [AVAILABLE, UNAVAILABLE] - -# TODO(bqian) states to be removed once current references are removed -ABORTING = 'aborting' -COMMITTED = 'committed' -DEPLOYING_ACTIVATE = 'deploying-activate' -DEPLOYING_COMPLETE = 'deploying-complete' -DEPLOYING_HOST = 'deploying-host' -DEPLOYING_START = 'deploying-start' -UNAVAILABLE = 'unavailable' -UNKNOWN = 'n/a' - -VALID_DEPLOY_START_STATES = [ - AVAILABLE, - DEPLOYED, -] - -# host deploy substate -HOST_DEPLOY_PENDING = 'pending' -HOST_DEPLOY_STARTED = 'deploy-started' -HOST_DEPLOY_DONE = 'deploy-done' -HOST_DEPLOY_FAILED = 'deploy-failed' - -VALID_HOST_DEPLOY_STATE = [ - HOST_DEPLOY_PENDING, - HOST_DEPLOY_STARTED, - HOST_DEPLOY_DONE, - HOST_DEPLOY_FAILED -] - -VALID_RELEASE_STATES = [AVAILABLE, UNAVAILABLE, DEPLOYING, DEPLOYED, - REMOVING] - -RELEASE_STATE_TO_DIR_MAP = {AVAILABLE: AVAILABLE_DIR, - UNAVAILABLE: UNAVAILABLE_DIR, - DEPLOYING: DEPLOYING_DIR, - DEPLOYED: DEPLOYED_DIR, - REMOVING: REMOVING_DIR} - -# valid release state transition below could still be changed as -# development continue -RELEASE_STATE_VALID_TRANSITION = { - AVAILABLE: [DEPLOYING], - DEPLOYING: [DEPLOYED], - DEPLOYED: [REMOVING, UNAVAILABLE] -} - STATUS_DEVELOPEMENT = 'DEV' STATUS_OBSOLETE = 'OBS' STATUS_RELEASED = 'REL' @@ -147,11 +63,6 @@ SEMANTIC_ACTIONS = [SEMANTIC_PREAPPLY, SEMANTIC_PREREMOVE] CHECKOUT_FOLDER = "checked_out_commit" -DEPLOYMENT_STATE_ACTIVE = "Active" -DEPLOYMENT_STATE_INACTIVE = "Inactive" -DEPLOYMENT_STATE_PRESTAGING = "Prestaging" -DEPLOYMENT_STATE_PRESTAGED = "Prestaged" - FEED_DIR = "/var/www/pages/feed/" UPGRADE_FEED_DIR = FEED_DIR TMP_DIR = "/tmp" @@ -183,23 +94,3 @@ LAST_IN_SYNC = "last_in_sync" SYSTEM_MODE_SIMPLEX = "simplex" SYSTEM_MODE_DUPLEX = "duplex" - - - -class DEPLOY_STATES(Enum): - ACTIVATE = 'activate' - ACTIVATE_DONE = 'activate-done' - ACTIVATE_FAILED = 'activate-failed' - START = 'start' - START_DONE = 'start-done' - START_FAILED = 'start-failed' - HOST = 'host' - HOST_DONE = 'host-done' - HOST_FAILED = 'host-failed' - - -class DEPLOY_HOST_STATES(Enum): - DEPLOYED = 'deployed' - DEPLOYING = 'deploying' - FAILED = 'failed' - PENDING = 'pending' diff --git a/software/software/db/api.py b/software/software/db/api.py index 13f1397f..316f1fcb 100644 --- a/software/software/db/api.py +++ b/software/software/db/api.py @@ -9,7 +9,7 @@ import logging import threading from software.software_entities import DeployHandler from software.software_entities import DeployHostHandler -from software.constants import DEPLOY_STATES +from software.states import DEPLOY_STATES LOG = logging.getLogger('main_logger') @@ -32,9 +32,9 @@ class SoftwareAPI: self.deploy_handler = DeployHandler() self.deploy_host_handler = DeployHostHandler() - def create_deploy(self, from_release, to_release, reboot_required: bool): + def create_deploy(self, from_release, to_release, feed_repo, commit_id, reboot_required: bool): self.begin_update() - self.deploy_handler.create(from_release, to_release, reboot_required) + self.deploy_handler.create(from_release, to_release, feed_repo, commit_id, reboot_required) self.end_update() def get_deploy(self, from_release, to_release): @@ -79,6 +79,13 @@ class SoftwareAPI: finally: self.end_update() + def get_deploy_host_by_hostname(self, hostname): + self.begin_update() + try: + return self.deploy_host_handler.query(hostname) + finally: + self.end_update() + def update_deploy_host(self, hostname, state): self.begin_update() try: diff --git a/software/software/deploy_host_state.py b/software/software/deploy_host_state.py new file mode 100644 index 00000000..5d97c09d --- /dev/null +++ b/software/software/deploy_host_state.py @@ -0,0 +1,69 @@ +""" +Copyright (c) 2024 Wind River Systems, Inc. + +SPDX-License-Identifier: Apache-2.0 + +""" + +import logging + +from software.db.api import get_instance +from software.exceptions import InvalidOperation +from software.states import DEPLOY_HOST_STATES + +LOG = logging.getLogger('main_logger') + + +deploy_host_state_transition = { + DEPLOY_HOST_STATES.PENDING: [DEPLOY_HOST_STATES.DEPLOYING], + DEPLOY_HOST_STATES.DEPLOYING: [DEPLOY_HOST_STATES.DEPLOYED, DEPLOY_HOST_STATES.FAILED], + DEPLOY_HOST_STATES.FAILED: [DEPLOY_HOST_STATES.DEPLOYING], + DEPLOY_HOST_STATES.DEPLOYED: [] +} + + +class DeployHostState(object): + _callbacks = [] + + @staticmethod + def register_event_listener(callback): + if callback not in DeployHostState._callbacks: + LOG.info("Register event listener %s", callback.__qualname__) + DeployHostState._callbacks.append(callback) + + def __init__(self, hostname): + self._hostname = hostname + + def check_transition(self, target_state: DEPLOY_HOST_STATES): + db_api = get_instance() + deploy_host = db_api.get_deploy_host_by_hostname(self._hostname) + if deploy_host is not None: + cur_state = DEPLOY_HOST_STATES(deploy_host['state']) + if target_state in deploy_host_state_transition[cur_state]: + return True + else: + LOG.error('Host %s is not part of deployment' % self._hostname) + return False + + def transform(self, target_state: DEPLOY_HOST_STATES): + db_api = get_instance() + db_api.begin_update() + try: + if self.check_transition(target_state): + db_api.update_deploy_host(self._hostname, target_state) + for callback in DeployHostState._callbacks: + callback(self._hostname, target_state) + else: + msg = "Host can not transform to %s from current state" % target_state.value + raise InvalidOperation(msg) + finally: + db_api.end_update() + + def deploy_started(self): + self.transform(DEPLOY_HOST_STATES.DEPLOYING) + + def deployed(self): + self.transform(DEPLOY_HOST_STATES.DEPLOYED) + + def deploy_failed(self): + self.transform(DEPLOY_HOST_STATES.FAILED) diff --git a/software/software/deploy_state.py b/software/software/deploy_state.py new file mode 100644 index 00000000..6884ad10 --- /dev/null +++ b/software/software/deploy_state.py @@ -0,0 +1,198 @@ +""" +Copyright (c) 2024 Wind River Systems, Inc. + +SPDX-License-Identifier: Apache-2.0 + +""" +import logging + +from software.db.api import get_instance +from software.exceptions import InvalidOperation +from software.release_data import SWRelease +from software.states import DEPLOY_STATES +from software.states import DEPLOY_HOST_STATES + +LOG = logging.getLogger('main_logger') + + +deploy_state_transition = { + None: [DEPLOY_STATES.START], # Fake state for no deploy in progress + DEPLOY_STATES.START: [DEPLOY_STATES.START_DONE, DEPLOY_STATES.START_FAILED], + DEPLOY_STATES.START_FAILED: [DEPLOY_STATES.ABORT], + DEPLOY_STATES.ABORT: [DEPLOY_STATES.ABORT_DONE], + DEPLOY_STATES.START_DONE: [DEPLOY_STATES.ABORT, DEPLOY_STATES.HOST], + DEPLOY_STATES.HOST: [DEPLOY_STATES.HOST, + DEPLOY_STATES.ABORT, + DEPLOY_STATES.HOST_FAILED, + DEPLOY_STATES.HOST_DONE], + DEPLOY_STATES.HOST_FAILED: [DEPLOY_STATES.HOST, # deploy-host can reattempt + DEPLOY_STATES.ABORT, + DEPLOY_STATES.HOST_FAILED, + DEPLOY_STATES.HOST_DONE], + DEPLOY_STATES.HOST_DONE: [DEPLOY_STATES.ABORT, DEPLOY_STATES.ACTIVATE], + DEPLOY_STATES.ACTIVATE: [DEPLOY_STATES.ACTIVATE_DONE, DEPLOY_STATES.ACTIVATE_FAILED], + DEPLOY_STATES.ACTIVATE_DONE: [DEPLOY_STATES.ABORT, None], # abort after deploy-activated? + DEPLOY_STATES.ACTIVATE_FAILED: [DEPLOY_STATES.ACTIVATE, DEPLOY_STATES.ABORT], + DEPLOY_STATES.ABORT_DONE: [] # waitng for being deleted +} + + +class DeployState(object): + _callbacks = [] + _instance = None + + @staticmethod + def register_event_listener(callback): + """register event listener to be triggered when a state transition is completed""" + if callback is not None: + if callback not in DeployState._callbacks: + LOG.debug("Register event listener %s", callback.__qualname__) + DeployState._callbacks.append(callback) + + @staticmethod + def get_deploy_state(): + db_api_instance = get_instance() + deploys = db_api_instance.get_deploy_all() + if not deploys: + state = None # No deploy in progress == None + else: + deploy = deploys[0] + state = DEPLOY_STATES(deploy['state']) + return state + + @staticmethod + def get_instance(): + if DeployState._instance is None: + DeployState._instance = DeployState() + return DeployState._instance + + @staticmethod + def host_deploy_updated(_hostname, _host_new_state): + db_api_instance = get_instance() + deploy_hosts = db_api_instance.get_deploy_host() + deploy_state = DeployState.get_instance() + all_states = [] + for deploy_host in deploy_hosts: + if deploy_host['state'] not in all_states: + all_states.append(deploy_host['state']) + + LOG.info("Host deploy state %s" % str(all_states)) + if DEPLOY_HOST_STATES.FAILED.value in all_states: + deploy_state.deploy_host_failed() + elif DEPLOY_HOST_STATES.PENDING.value in all_states or \ + DEPLOY_HOST_STATES.DEPLOYING.value in all_states: + deploy_state.deploy_host() + elif all_states == [DEPLOY_HOST_STATES.DEPLOYED.value]: + deploy_state.deploy_host_completed() + + def __init__(self): + self._from_release = None + self._to_release = None + self._reboot_required = None + + def check_transition(self, target_state: DEPLOY_STATES): + cur_state = DeployState.get_deploy_state() + if cur_state is not None: + cur_state = DEPLOY_STATES(cur_state) + if target_state in deploy_state_transition[cur_state]: + return True + # TODO(bqian) reverse lookup the operation that is not permitted, as feedback + msg = f"Deploy state transform not permitted from {str(cur_state)} to {str(target_state)}" + LOG.info(msg) + return False + + def transform(self, target_state: DEPLOY_STATES): + db_api = get_instance() + db_api.begin_update() + try: + if self.check_transition(target_state): + # None means not existing or deleting + if target_state is not None: + db_api.update_deploy(target_state) + else: + # TODO(bqian) check the current state, and provide guidence on what is + # the possible next operation + if target_state is None: + msg = "Deployment can not deleted in current state." + else: + msg = "Host can not transform to %s from current state" % target_state.value() + raise InvalidOperation(msg) + finally: + db_api.end_update() + + for callback in DeployState._callbacks: + LOG.debug("Calling event listener %s", callback.__qualname__) + callback(target_state) + + # below are list of events to drive the FSM + def start(self, from_release, to_release, feed_repo, commit_id, reboot_required): + # start is special, it needs to create the deploy entity + if isinstance(from_release, SWRelease): + from_release = from_release.sw_release + if isinstance(to_release, SWRelease): + to_release = to_release.sw_release + + msg = f"Start deploy {to_release}, current sw {from_release}" + LOG.info(msg) + db_api_instance = get_instance() + db_api_instance.create_deploy(from_release, to_release, feed_repo, commit_id, reboot_required) + + def start_failed(self): + self.transform(DEPLOY_STATES.START_FAILED) + + def start_done(self): + self.transform(DEPLOY_STATES.START_DONE) + + def deploy_host(self): + self.transform(DEPLOY_STATES.HOST) + + def abort(self): + self.transform(DEPLOY_STATES.ABORT) + + def deploy_host_completed(self): + # depends on the deploy state, the deploy can be transformed + # to HOST_DONE (from DEPLOY_HOST) or ABORT_DONE (ABORT) + state = DeployState.get_deploy_state() + if state == DEPLOY_STATES.ABORT: + self.transform(DEPLOY_STATES.ABORT_DONE) + else: + self.transform(DEPLOY_STATES.HOST_DONE) + + def deploy_host_failed(self): + self.transform(DEPLOY_STATES.HOST_FAILED) + + def activate(self): + self.transform(DEPLOY_STATES.ACTIVATE) + + def activate_completed(self): + self.transform(DEPLOY_STATES.ACTIVATE_DONE) + + def activate_failed(self): + self.transform(DEPLOY_STATES.ACTIVATE_FAILED) + + def completed(self): + self.transform(None) + # delete the deploy and deploy host entities + db_api = get_instance() + db_api.begin_update() + try: + db_api.delete_deploy_host_all() + db_api.delete_deploy() + finally: + db_api.end_update() + + +def require_deploy_state(require_states, prompt): + def wrap(func): + def exec_op(*args, **kwargs): + state = DeployState.get_deploy_state() + if state in require_states: + res = func(*args, **kwargs) + return res + else: + msg = "" + if prompt: + msg = prompt.format(state=state, require_states=require_states) + raise InvalidOperation(msg) + return exec_op + return wrap diff --git a/software/software/exceptions.py b/software/software/exceptions.py index 3e49897d..aa8bdf1a 100644 --- a/software/software/exceptions.py +++ b/software/software/exceptions.py @@ -6,6 +6,57 @@ SPDX-License-Identifier: Apache-2.0 """ +class InternalError(Exception): + """This is an internal error aka bug""" + pass + + +class SoftwareServiceError(Exception): + """ + This is a service error, such as file system issue or configuration + issue, which is expected at design time for a valid reason. + This exception type will provide detail information to the user. + see ExceptionHook for detail + """ + def __init__(self, info="", warn="", error=""): + self._info = info + self._warn = warn + self._error = error + + @property + def info(self): + return self._info if self._info is not None else "" + + @property + def warning(self): + return self._warn if self._warn is not None else "" + + @property + def error(self): + return self._error if self._error is not None else "" + + +class InvalidOperation(SoftwareServiceError): + """Invalid operation, such as deploy a host that is already deployed """ + def __init__(self, msg): + super().__init__(error=msg) + + +class ReleaseNotFound(SoftwareServiceError): + def __init__(self, release_ids): + if not isinstance(release_ids, list): + release_ids = [release_ids] + super().__init__(error="Release %s can not be found" % ', '.join(release_ids)) + + +class HostNotFound(SoftwareServiceError): + def __init__(self, hostname): + super().__init__(error="Host %s can not be found" % hostname) + + +# TODO(bqian) gradually convert SoftwareError based exception to +# either SoftwareServiceError for user visible exceptions, or +# InternalError for internal error (bug) class SoftwareError(Exception): """Base class for software exceptions.""" @@ -57,7 +108,7 @@ class SoftwareFail(SoftwareError): pass -class ReleaseValidationFailure(SoftwareError): +class ReleaseValidationFailure(SoftwareServiceError): """Release validation error.""" pass @@ -67,7 +118,7 @@ class UpgradeNotSupported(SoftwareError): pass -class ReleaseMismatchFailure(SoftwareError): +class ReleaseMismatchFailure(SoftwareServiceError): """Release mismatch error.""" pass @@ -128,33 +179,3 @@ class FileSystemError(SoftwareError): Likely fixable by a root user. """ pass - - -class InternalError(Exception): - """This is an internal error aka bug""" - pass - - -class SoftwareServiceError(Exception): - """ - This is a service error, such as file system issue or configuration - issue, which is expected at design time for a valid reason. - This exception type will provide detail information to the user. - see ExceptionHook for detail - """ - def __init__(self, info="", warn="", error=""): - self._info = info - self._warn = warn - self._error = error - - @property - def info(self): - return self._info if self._info is not None else "" - - @property - def warning(self): - return self._warn if self._warn is not None else "" - - @property - def error(self): - return self._error if self._error is not None else "" diff --git a/software/software/parsable_error.py b/software/software/parsable_error.py index b526fd37..706cc341 100644 --- a/software/software/parsable_error.py +++ b/software/software/parsable_error.py @@ -84,11 +84,12 @@ class ParsableErrorMiddleware(object): # simple check xml is valid body = [et.ElementTree.tostring( et.ElementTree.fromstring('' + - '\n'.join(app_iter) + ''))] + '\n'.join(app_iter) + + ''))] except et.ElementTree.ParseError as err: LOG.error('Error parsing HTTP response: %s' % err) body = ['%s' % state['status_code'] + - ''] + ''] state['headers'].append(('Content-Type', 'application/xml')) else: if six.PY3: diff --git a/software/software/release_data.py b/software/software/release_data.py index beeef3e1..26713204 100644 --- a/software/software/release_data.py +++ b/software/software/release_data.py @@ -7,11 +7,13 @@ import os from packaging import version import shutil -from software import constants +import threading +from software import states from software.exceptions import FileSystemError -from software.exceptions import InternalError +from software.exceptions import ReleaseNotFound from software.software_functions import LOG from software import utils +from software.software_functions import ReleaseData class SWRelease(object): @@ -22,6 +24,7 @@ class SWRelease(object): self._metadata = metadata self._contents = contents self._sw_version = None + self._release = None @property def metadata(self): @@ -40,21 +43,8 @@ class SWRelease(object): return self.metadata['state'] @staticmethod - def is_valid_state_transition(from_state, to_state): - if to_state not in constants.VALID_RELEASE_STATES: - msg = "Invalid state %s." % to_state - LOG.error(msg) - # this is a bug - raise InternalError(msg) - - if from_state in constants.RELEASE_STATE_VALID_TRANSITION: - if to_state in constants.RELEASE_STATE_VALID_TRANSITION[from_state]: - return True - return False - - @staticmethod - def ensure_state_transition(to_state): - to_dir = constants.RELEASE_STATE_TO_DIR_MAP[to_state] + def _ensure_state_transition(to_state): + to_dir = states.RELEASE_STATE_TO_DIR_MAP[to_state] if not os.path.isdir(to_dir): try: os.makedirs(to_dir, mode=0o755, exist_ok=True) @@ -63,27 +53,27 @@ class SWRelease(object): raise FileSystemError(error) def update_state(self, state): - if SWRelease.is_valid_state_transition(self.state, state): - LOG.info("%s state from %s to %s" % (self.id, self.state, state)) - SWRelease.ensure_state_transition(state) + LOG.info("%s state from %s to %s" % (self.id, self.state, state)) + SWRelease._ensure_state_transition(state) - to_dir = constants.RELEASE_STATE_TO_DIR_MAP[state] - from_dir = constants.RELEASE_STATE_TO_DIR_MAP[self.state] - try: - shutil.move("%s/%s-metadata.xml" % (from_dir, self.id), - "%s/%s-metadata.xml" % (to_dir, self.id)) - except shutil.Error: - msg = "Failed to move the metadata for %s" % self.id - LOG.exception(msg) - raise FileSystemError(msg) + to_dir = states.RELEASE_STATE_TO_DIR_MAP[state] + from_dir = states.RELEASE_STATE_TO_DIR_MAP[self.state] + try: + shutil.move("%s/%s-metadata.xml" % (from_dir, self.id), + "%s/%s-metadata.xml" % (to_dir, self.id)) + except shutil.Error: + msg = "Failed to move the metadata for %s" % self.id + LOG.exception(msg) + raise FileSystemError(msg) - self.metadata['state'] = state - else: - # this is a bug - error = "Invalid state transition %s, current is %s, target state is %s" % \ - (self.id, self.state, state) - LOG.info(error) - raise InternalError(error) + self.metadata['state'] = state + + @property + def version_obj(self): + '''returns packaging.version object''' + if self._release is None: + self._release = version.parse(self.sw_release) + return self._release @property def sw_release(self): @@ -97,7 +87,14 @@ class SWRelease(object): self._sw_version = utils.get_major_release_version(self.sw_release) return self._sw_version + @property + def component(self): + return self._get_by_key('component') + def _get_latest_commit(self): + if 'number_of_commits' not in self.contents: + return None + num_commits = self.contents['number_of_commits'] if int(num_commits) > 0: commit_tag = "commit%s" % num_commits @@ -119,6 +116,14 @@ class SWRelease(object): # latest commit return None + @property + def base_commit_id(self): + commit = None + base = self.contents.get('base') + if base: + commit = base.get('commit') + return commit + def _get_by_key(self, key, default=None): if key in self._metadata: return self._metadata[key] @@ -147,16 +152,28 @@ class SWRelease(object): @property def unremovable(self): - return self._get_by_key('unremovable') + return self._get_by_key('unremovable') == "Y" @property def reboot_required(self): - return self._get_by_key('reboot_required') + return self._get_by_key('reboot_required') == "Y" + + @property + def requires_release_ids(self): + return self._get_by_key('requires') or [] + + @property + def packages(self): + return self._get_by_key('packages') @property def restart_script(self): return self._get_by_key('restart_script') + @property + def apply_active_release_only(self): + return self._get_by_key('apply_active_release_only') + @property def commit_checksum(self): commit = self._get_latest_commit() @@ -167,15 +184,76 @@ class SWRelease(object): # latest commit return None + def get_all_dependencies(self, filter_states=None): + """ + :return: sorted list of all direct and indirect required releases + raise ReleaseNotFound if one of the release is not uploaded. + """ + def _get_all_deps(release_id, release_collection, deps): + release = release_collection[release_id] + if release is None: + raise ReleaseNotFound([release_id]) + + if filter_states and release.state not in filter_states: + return + + for id in release.requires_release_ids: + if id not in deps: + deps.append(id) + _get_all_deps(id, release_collection, deps) + + all_deps = [] + release_collection = get_SWReleaseCollection() + _get_all_deps(self.id, release_collection, all_deps) + releases = sorted([release_collection[id] for id in all_deps]) + return releases + + def __lt__(self, other): + return self.version_obj < other.version_obj + + def __le__(self, other): + return self.version_obj <= other.version_obj + + def __eq__(self, other): + return self.version_obj == other.version_obj + + def __ge__(self, other): + return self.version_obj >= other.version_obj + + def __gt__(self, other): + return self.version_obj > other.version_obj + + def __ne__(self, other): + return self.version_obj != other.version_obj + @property def is_ga_release(self): ver = version.parse(self.sw_release) - _, _, pp = ver.release + if len(ver.release) == 2: + pp = 0 + else: + _, _, pp = ver.release return pp == 0 @property def is_deletable(self): - return self.state in constants.DELETABLE_STATE + return self.state in states.DELETABLE_STATE + + def to_query_dict(self): + data = {"release_id": self.id, + "state": self.state, + "sw_version": self.sw_release, + "component": self.component, + "status": self.status, + "unremovable": self.unremovable, + "summary": self.summary, + "description": self.description, + "install_instructions": self.install_instructions, + "warnings": self.warnings, + "reboot_required": self.reboot_required, + "requires": self.requires_release_ids[:], + "packages": self.packages[:]} + return data class SWReleaseCollection(object): @@ -191,11 +269,23 @@ class SWReleaseCollection(object): sw_release = SWRelease(rel_id, rel_data, contents) self._sw_releases[rel_id] = sw_release + @property + def running_release(self): + latest = None + for rel in self.iterate_releases_by_state(states.DEPLOYED): + if latest is None or rel.version_obj > latest.version_obj: + latest = rel + + return latest + def get_release_by_id(self, rel_id): if rel_id in self._sw_releases: return self._sw_releases[rel_id] return None + def __getitem__(self, rel_id): + return self.get_release_by_id(rel_id) + def get_release_by_commit_id(self, commit_id): for _, sw_release in self._sw_releases: if sw_release.commit_id == commit_id: @@ -219,15 +309,44 @@ class SWReleaseCollection(object): yield self._sw_releases[rel_id] def update_state(self, list_of_releases, state): - for release_id in list_of_releases: - release = self.get_release_by_id(release_id) - if release is not None: - if SWRelease.is_valid_state_transition(release.state, state): - SWRelease.ensure_state_transition(state) - else: - LOG.error("release %s not found" % release_id) - for release_id in list_of_releases: release = self.get_release_by_id(release_id) if release is not None: release.update_state(state) + + +class LocalStorage(object): + def __init__(self): + self._storage = threading.local() + + def get_value(self, key): + if hasattr(self._storage, key): + return getattr(self._storage, key) + else: + return None + + def set_value(self, key, value): + setattr(self._storage, key, value) + + def void_value(self, key): + if hasattr(self._storage, key): + delattr(self._storage, key) + + +_local_storage = LocalStorage() + + +def get_SWReleaseCollection(): + release_data = _local_storage.get_value('release_data') + if release_data is None: + LOG.info("Load release_data") + release_data = ReleaseData() + release_data.load_all() + LOG.info("release_data loaded") + _local_storage.set_value('release_data', release_data) + + return SWReleaseCollection(release_data) + + +def reload_release_data(): + _local_storage.void_value('release_data') diff --git a/software/software/release_state.py b/software/software/release_state.py new file mode 100644 index 00000000..388e0624 --- /dev/null +++ b/software/software/release_state.py @@ -0,0 +1,94 @@ +# +# SPDX-License-Identifier: Apache-2.0 +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +import logging + +from software import states +from software.exceptions import ReleaseNotFound +from software.release_data import get_SWReleaseCollection +from software.release_data import reload_release_data + + +LOG = logging.getLogger('main_logger') + +# valid release state transition below will still be changed as +# development continue +release_state_transition = { + states.AVAILABLE: [states.DEPLOYING], + states.DEPLOYING: [states.DEPLOYED, states.AVAILABLE], + states.DEPLOYED: [states.REMOVING, states.UNAVAILABLE, states.COMMITTED], + states.REMOVING: [states.AVAILABLE], + states.COMMITTED: [], + states.UNAVAILABLE: [], +} + + +class ReleaseState(object): + def __init__(self, release_ids=None, release_state=None): + not_found_list = [] + release_collection = get_SWReleaseCollection() + if release_ids: + self._release_ids = release_ids[:] + not_found_list = [rel_id for rel_id in release_ids if release_collection[rel_id] is None] + elif release_state: + self._release_ids = [rel.id for rel in release_collection.iterate_releases_by_state(release_state)] + + if len(not_found_list) > 0: + raise ReleaseNotFound(not_found_list) + + @staticmethod + def deploy_updated(target_state): + if target_state is None: # completed + deploying = ReleaseState(release_state=states.DEPLOYING) + + if deploying.is_major_release_deployment(): + deployed = ReleaseState(release_state=states.DEPLOYED) + deployed.replaced() + + deploying.deploy_completed() + + def check_transition(self, target_state): + """check ALL releases can transform to target state""" + release_collection = get_SWReleaseCollection() + for rel_id in self._release_ids: + state = release_collection[rel_id].state + if target_state not in release_state_transition[state]: + return False + return True + + def transform(self, target_state): + if self.check_transition(target_state): + release_collection = get_SWReleaseCollection() + release_collection.update_state(self._release_ids, target_state) + + reload_release_data() + + def is_major_release_deployment(self): + release_collection = get_SWReleaseCollection() + for rel_id in self._release_ids: + release = release_collection.get_release_by_id(rel_id) + if release.is_ga_release: + return True + return False + + def start_deploy(self): + self.transform(states.DEPLOYING) + + def deploy_completed(self): + self.transform(states.DEPLOYED) + + def committed(self): + self.transform(states.COMMITTED) + + def replaced(self): + """ + Current running release is replaced with a new deployed release + This indicates a major release deploy is completed and running + release become "unavailable" + """ + self.transform(states.UNAVAILABLE) + + def start_remove(self): + self.transform(states.REMOVING) diff --git a/software/software/software_controller.py b/software/software/software_controller.py index 6b8cc8cd..66975fbf 100644 --- a/software/software/software_controller.py +++ b/software/software/software_controller.py @@ -13,6 +13,7 @@ import configparser import gc import json import os +from packaging import version import select import sh import shutil @@ -33,10 +34,12 @@ import software.apt_utils as apt_utils import software.ostree_utils as ostree_utils from software.api import app from software.authapi import app as auth_app -from software.constants import DEPLOY_STATES +from software.states import DEPLOY_STATES from software.base import PatchService from software.dc_utils import get_subcloud_groupby_version +from software.deploy_state import require_deploy_state from software.exceptions import APTOSTreeCommandFail +from software.exceptions import HostNotFound from software.exceptions import InternalError from software.exceptions import MetadataFail from software.exceptions import UpgradeNotSupported @@ -46,10 +49,10 @@ from software.exceptions import SoftwareError from software.exceptions import SoftwareFail from software.exceptions import ReleaseInvalidRequest from software.exceptions import ReleaseValidationFailure -from software.exceptions import ReleaseMismatchFailure from software.exceptions import ReleaseIsoDeleteFailure from software.exceptions import SoftwareServiceError -from software.release_data import SWReleaseCollection +from software.release_data import reload_release_data +from software.release_data import get_SWReleaseCollection from software.software_functions import collect_current_load_for_hosts from software.software_functions import create_deploy_hosts from software.software_functions import parse_release_metadata @@ -67,9 +70,11 @@ from software.software_functions import SW_VERSION from software.software_functions import LOG from software.software_functions import audit_log_info from software.software_functions import repo_root_dir -from software.software_functions import ReleaseData from software.software_functions import is_deploy_state_in_sync from software.software_functions import is_deployment_in_progress +from software.release_state import ReleaseState +from software.deploy_host_state import DeployHostState +from software.deploy_state import DeployState from software.release_verify import verify_files import software.config as cfg import software.utils as utils @@ -80,6 +85,7 @@ from software.db.api import get_instance import software.messages as messages import software.constants as constants +from software import states from tsconfig.tsconfig import INITIAL_CONFIG_COMPLETE_FLAG from tsconfig.tsconfig import INITIAL_CONTROLLER_CONFIG_COMPLETE @@ -106,19 +112,6 @@ pending_queries = [] thread_death = None keep_running = True -DEPLOY_STATE_METADATA_DIR_DICT = \ - { - constants.AVAILABLE: constants.AVAILABLE_DIR, - constants.UNAVAILABLE: constants.UNAVAILABLE_DIR, - constants.DEPLOYING_START: constants.DEPLOYING_START_DIR, - constants.DEPLOYING_HOST: constants.DEPLOYING_HOST_DIR, - constants.DEPLOYING_ACTIVATE: constants.DEPLOYING_ACTIVATE_DIR, - constants.DEPLOYING_COMPLETE: constants.DEPLOYING_COMPLETE_DIR, - constants.DEPLOYED: constants.DEPLOYED_DIR, - constants.REMOVING: constants.REMOVING_DIR, - constants.ABORTING: constants.ABORTING_DIR, - constants.COMMITTED: constants.COMMITTED_DIR, - } # Limit socket blocking to 5 seconds to allow for thread to shutdown api_socket_timeout = 5.0 @@ -318,6 +311,8 @@ class PatchMessageSyncReq(messages.PatchMessage): # We may need to do this in a separate thread, so that we continue to process hellos LOG.info("Handling sync req") + # NOTE(bqian) sync_from_nbr returns "False" if sync operations failed. + # need to think of reattempt to deal w/ the potential failure. sc.sync_from_nbr(host) resp = PatchMessageSyncComplete() @@ -566,13 +561,34 @@ class PatchMessageAgentInstallResp(messages.PatchMessage): # LOG.info("Handling hello ack") sc.hosts_lock.acquire() - if not addr[0] in sc.hosts: - sc.hosts[addr[0]] = AgentNeighbour(addr[0]) + try: + # NOTE(bqian) seems like trying to tolerant a failure situation + # that a host is directed to install a patch but during the installation + # software-controller-daemon gets restarted + # should remove the sc.hosts which is in memory volatile storage and replaced with + # armanent deploy-host entity + ip = addr[0] + if ip not in sc.hosts: + sc.hosts[ip] = AgentNeighbour(ip) - sc.hosts[addr[0]].install_status = self.status - sc.hosts[addr[0]].install_pending = False - sc.hosts[addr[0]].install_reject_reason = self.reject_reason - sc.hosts_lock.release() + sc.hosts[ip].install_status = self.status + sc.hosts[ip].install_pending = False + sc.hosts[ip].install_reject_reason = self.reject_reason + hostname = sc.hosts[ip].hostname + finally: + sc.hosts_lock.release() + + deploy_host_state = DeployHostState(hostname) + # NOTE(bqian) apparently it uses 2 boolean to indicate 2 situations + # where there could be 4 combinations + if self.status: + deploy_host_state.deployed() + return + elif self.reject_reason: + deploy_host_state.deploy_failed() + return + + LOG.error("Bug: shouldn't reach here") def send(self, sock): # pylint: disable=unused-argument LOG.error("Should not get here") @@ -686,14 +702,14 @@ class SWMessageDeployStateChanged(messages.PatchMessage): valid_agents = ['deploy-start'] if 'agent' in data: - agent = data['agent'] + self.agent = data['agent'] else: - agent = 'unknown' + self.agent = 'unknown' - if agent not in valid_agents: + if self.agent not in valid_agents: # ignore msg from unknown senders LOG.info("%s received from unknown agent %s" % - (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent)) + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent)) self.valid = False valid_state = { @@ -705,20 +721,20 @@ class SWMessageDeployStateChanged(messages.PatchMessage): if deploy_state in valid_state: self.deploy_state = valid_state[deploy_state] LOG.info("%s received from %s with deploy-state %s" % - (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, deploy_state)) + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state)) else: self.valid = False LOG.error("%s received from %s with invalid deploy-state %s" % - (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, deploy_state)) + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state)) if 'hostname' in data and data['hostname']: self.hostname = data['hostname'] if 'host-state' in data and data['host-state']: host_state = data['host-state'] - if host_state not in constants.VALID_HOST_DEPLOY_STATE: + if host_state not in states.VALID_HOST_DEPLOY_STATE: LOG.error("%s received from %s with invalid host-state %s" % - (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, host_state)) + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, host_state)) self.valid = False else: self.host_state = host_state @@ -728,7 +744,7 @@ class SWMessageDeployStateChanged(messages.PatchMessage): if not self.valid: LOG.error("%s received from %s as invalid %s" % - (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, data)) + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, data)) def handle(self, sock, addr): global sc @@ -763,7 +779,6 @@ class PatchController(PatchService): self.socket_lock = threading.RLock() self.controller_neighbours_lock = threading.RLock() self.hosts_lock = threading.RLock() - self.release_data_lock = threading.RLock() self.hosts = {} self.controller_neighbours = {} @@ -783,8 +798,7 @@ class PatchController(PatchService): self.controller_address = None self.agent_address = None self.patch_op_counter = 1 - self.release_data = ReleaseData() - self.release_data.load_all() + reload_release_data() try: self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION) except OSTreeCommandFail: @@ -824,11 +838,12 @@ class PatchController(PatchService): if self.hostname == "controller-1" \ else "controller-1" + DeployHostState.register_event_listener(DeployState.host_deploy_updated) + DeployState.register_event_listener(ReleaseState.deploy_updated) + @property def release_collection(self): - # for this stage, the SWReleaseCollection behaves as a broker which - # does not hold any release data. it only last one request - swrc = SWReleaseCollection(self.release_data) + swrc = get_SWReleaseCollection() return swrc def update_config(self): @@ -886,6 +901,8 @@ class PatchController(PatchService): if self.patch_op_counter >= nbr_patch_op_counter: return + # NOTE(bqian) sync_from_nbr returns "False" if sync operations failed. + # need to think of reattempt to deal w/ the potential failure. self.sync_from_nbr(host) def sync_from_nbr(self, host): @@ -936,13 +953,13 @@ class PatchController(PatchService): list_of_dirs = dir_names.stdout.decode("utf-8").rstrip().split() for rel_dir in list_of_dirs: - feed_ostree = "%s/%s/ostree_repo/" % (constants.FEED_OSTREE_BASE_DIR, rel_dir) - if not os.path.isdir(feed_ostree): - LOG.info("Skipping feed dir %s", feed_ostree) + feed_repo = "%s/%s/ostree_repo/" % (constants.FEED_OSTREE_BASE_DIR, rel_dir) + if not os.path.isdir(feed_repo): + LOG.info("Skipping feed dir %s", feed_repo) continue - LOG.info("Syncing %s", feed_ostree) + LOG.info("Syncing %s", feed_repo) output = subprocess.check_output(["ostree", - "--repo=%s" % feed_ostree, + "--repo=%s" % feed_repo, "pull", "--depth=-1", "--mirror", @@ -951,7 +968,7 @@ class PatchController(PatchService): output = subprocess.check_output(["ostree", "summary", "--update", - "--repo=%s" % feed_ostree], + "--repo=%s" % feed_repo], stderr=subprocess.STDOUT) LOG.info("Synced to mate feed via ostree pull: %s", output) except subprocess.CalledProcessError: @@ -960,20 +977,18 @@ class PatchController(PatchService): self.read_state_file() - with self.release_data_lock: - with self.hosts_lock: - self.interim_state = {} - self.release_data.load_all() - self.check_patch_states() + self.interim_state = {} + reload_release_data() + self.check_patch_states() - if os.path.exists(app_dependency_filename): - try: - with open(app_dependency_filename, 'r') as f: - self.app_dependencies = json.loads(f.read()) - except Exception: - LOG.exception("Failed to read app dependencies: %s", app_dependency_filename) - else: - self.app_dependencies = {} + if os.path.exists(app_dependency_filename): + try: + with open(app_dependency_filename, 'r') as f: + self.app_dependencies = json.loads(f.read()) + except Exception: + LOG.exception("Failed to read app dependencies: %s", app_dependency_filename) + else: + self.app_dependencies = {} return True @@ -985,13 +1000,22 @@ class PatchController(PatchService): # Default to allowing in-service patching self.allow_insvc_patching = True + # NOTE(bqian) How is this loop relevant? + # all_insevc_patching equals not required_reboot in deploy entity + # see software_entity. for ip in (ip for ip in list(self.hosts) if self.hosts[ip].out_of_date): - for release_id in self.release_data.metadata: - if self.release_data.metadata[release_id].get("reboot_required") != "N" and \ - self.release_data.metadata[release_id]["state"] == constants.DEPLOYING_START: + for release in self.release_collection.iterate_releases(): + # NOTE(bqian) below consolidates DEPLOYING_START to DEPLOYING + # all_insevc_patching equals not required_reboot in deploy entity + # see software_entity. + # also apparently it is a bug to check release state as it will + # end up return default (true) when it is not DEPLOYING_START for + # example, checking during removal. + if release.reboot_required and release.state == states.DEPLOYING: self.allow_insvc_patching = False + # NOTE(bqian) this function looks very buggy, should probably be rewritten - def get_release_dependency_list(self, release): + def get_release_dependency_list(self, release_id): """ Returns a list of software releases that are required by this release. @@ -1000,34 +1024,44 @@ class PatchController(PatchService): input param patch_id='R3' :param release: The software release version """ - if not self.release_data.metadata[release]["requires"]: - return [] - else: - release_dependency_list = [] - for req_release in self.release_data.metadata[release]["requires"]: - release_dependency_list.append(req_release) - release_dependency_list = release_dependency_list + \ - self.get_release_dependency_list(req_release) - return release_dependency_list - def get_release_required_by_list(self, release): + # TODO(bqian): this algorithm will fail if dependency is not sequential. + # i.e, if R5 requires R4 and R1, R4 requires R3 and R1, R3 requires R1 + # this relation will bring R1 before R3. + # change below is not fixing the algorithm, it converts directly using + # release_data to release_collection wrapper class. + release = self.release_collection.get_release_by_id(release_id) + if release is None: + error = f"Not all required releases are uploaded, missing {release_id}" + raise SoftwareServiceError(error=error) + + release_dependency_list = [] + for req_release in release.requires_release_ids: + release_dependency_list.append(req_release) + release_dependency_list = release_dependency_list + \ + self.get_release_dependency_list(req_release) + return release_dependency_list + + def get_release_required_by_list(self, release_id): """ Returns a list of software releases that require this release. Example: If R3 requires R2 and R2 requires R1, then this method will return ['R3', 'R2'] for input param patch_id='R1' - :param release: The software release version + :param release_id: The software release id """ - if release in self.release_data.metadata: - release_required_by_list = [] - for req_release in self.release_data.metadata: - if release in self.release_data.metadata[req_release]["requires"]: - release_required_by_list.append(req_release) + release_required_by_list = [] + # NOTE(bqian) not sure why the check is needed. release_id is always + # from the release_data collection. + if self.release_collection.get_release_by_id(release_id): + for req_release in self.release_collection.iterate_releases(): + if release_id in req_release.requires_release_ids: + release_required_by_list.append(req_release.id) release_required_by_list = release_required_by_list + \ - self.get_release_required_by_list(req_release) - return release_required_by_list - return [] + self.get_release_required_by_list(req_release.id) + + return release_required_by_list def get_ostree_tar_filename(self, patch_sw_version, patch_id): ''' @@ -1044,10 +1078,12 @@ class PatchController(PatchService): Deletes the restart script (if any) associated with the patch :param patch_id: The patch ID ''' - if not self.release_data.metadata[patch_id].get("restart_script"): + release = self.release_collection.get_release_by_id(patch_id) + restart_script = release.restart_script + if not restart_script: return - restart_script_path = "%s/%s" % (root_scripts_dir, self.release_data.metadata[patch_id]["restart_script"]) + restart_script_path = "%s/%s" % (root_scripts_dir, restart_script) try: # Delete the metadata os.remove(restart_script_path) @@ -1063,8 +1099,8 @@ class PatchController(PatchService): # Pass the current patch state to the semantic check as a series of args patch_state_args = [] - for patch_id in list(self.release_data.metadata): - patch_state = '%s=%s' % (patch_id, self.release_data.metadata[patch_id]["state"]) + for release in self.release_collection.iterate_releases(): + patch_state = '%s=%s' % (release.id, release.state) patch_state_args += ['-p', patch_state] # Run semantic checks, if any @@ -1136,25 +1172,11 @@ class PatchController(PatchService): # Restore /etc/hosts os.rename(ETC_HOSTS_BACKUP_FILE_PATH, ETC_HOSTS_FILE_PATH) - for release in sorted(list(self.release_data.metadata)): - if self.release_data.metadata[release]["state"] == constants.DEPLOYING_START: - self.release_data.metadata[release]["state"] = constants.DEPLOYED - try: - shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, release), - "%s/%s-metadata.xml" % (constants.DEPLOYED_DIR, release)) - except shutil.Error: - msg = "Failed to move the metadata for %s" % release - LOG.exception(msg) - raise MetadataFail(msg) - elif self.release_data.metadata[release]["state"] == constants.REMOVING: - self.release_data.metadata[release]["state"] = constants.AVAILABLE - try: - shutil.move("%s/%s-metadata.xml" % (constants.REMOVING_DIR, release), - "%s/%s-metadata.xml" % (constants.AVAILABLE_DIR, release)) - except shutil.Error: - msg = "Failed to move the metadata for %s" % release - LOG.exception(msg) - raise MetadataFail(msg) + for release in self.release_collection.iterate_releases(): + if release.state == states.DEPLOYING: + release.update_state(states.DEPLOYED) + elif release.state == states.REMOVING: + release.update_state(states.AVAILABLE) msg_info += "Software installation is complete.\n" msg_info += "Please reboot before continuing with configuration." @@ -1184,11 +1206,10 @@ class PatchController(PatchService): LOG.info(msg) raise SoftwareServiceError(error=msg) - def _process_upload_upgrade_files(self, upgrade_files, release_data): + def _process_upload_upgrade_files(self, upgrade_files): """ Process the uploaded upgrade files :param upgrade_files: dict of upgrade files - :param release_data: ReleaseData object :return: info, warning, error messages """ local_info = "" @@ -1201,15 +1222,16 @@ class PatchController(PatchService): to_release = None iso_mount_dir = None + all_good = True try: - if not verify_files([upgrade_files[constants.ISO_EXTENSION]], - upgrade_files[constants.SIG_EXTENSION]): - raise ReleaseValidationFailure("Invalid signature file") + iso = upgrade_files[constants.ISO_EXTENSION] + sig = upgrade_files[constants.SIG_EXTENSION] + if not verify_files([iso], sig): + msg = "Software %s:%s signature validation failed" % (iso, sig) + raise ReleaseValidationFailure(error=msg) - msg = ("iso and signature files upload completed\n" - "Importing iso is in progress\n") - LOG.info(msg) - local_info += msg + LOG.info("iso and signature files upload completed." + "Importing iso is in progress") iso_file = upgrade_files.get(constants.ISO_EXTENSION) @@ -1258,12 +1280,17 @@ class PatchController(PatchService): shutil.copyfile(metadata_file, to_file) # Update the release metadata - abs_stx_release_metadata_file = os.path.join( - iso_mount_dir, 'upgrades', f"{constants.RELEASE_GA_NAME % to_release}-metadata.xml") - release_data.parse_metadata(abs_stx_release_metadata_file, state=constants.AVAILABLE) + # metadata files have been copied over to the metadata/available directory + reload_release_data() LOG.info("Updated release metadata for %s", to_release) # Get release metadata + # NOTE(bqian) to_release is sw_version (MM.mm), the path isn't correct + # also prepatched iso needs to be handled. + # should go through the release_data to find the latest release of major release + # to_release + abs_stx_release_metadata_file = os.path.join( + iso_mount_dir, 'upgrades', f"{constants.RELEASE_GA_NAME % to_release}-metadata.xml") all_release_meta_info = parse_release_metadata(abs_stx_release_metadata_file) release_meta_info = { os.path.basename(upgrade_files[constants.ISO_EXTENSION]): { @@ -1275,25 +1302,20 @@ class PatchController(PatchService): "sw_version": None, } } - - except ReleaseValidationFailure: - msg = "Upgrade file signature verification failed" - LOG.exception(msg) - local_error += msg + "\n" - except Exception as e: - msg = "Failed to process upgrade files. Error: %s" % str(e) - LOG.exception(msg) - local_error += msg + "\n" - # delete versioned directory - if to_release: - to_release_dir = os.path.join(constants.SOFTWARE_STORAGE_DIR, "rel-%s" % to_release) - shutil.rmtree(to_release_dir, ignore_errors=True) + except Exception: + all_good = False + raise finally: # Unmount the iso file if iso_mount_dir: unmount_iso_load(iso_mount_dir) LOG.info("Unmounted iso file %s", iso_file) + # remove upload leftover in case of failure + if not all_good and to_release: + to_release_dir = os.path.join(constants.SOFTWARE_STORAGE_DIR, "rel-%s" % to_release) + shutil.rmtree(to_release_dir, ignore_errors=True) + return local_info, local_warning, local_error, release_meta_info def _process_upload_patch_files(self, patch_files): @@ -1309,7 +1331,7 @@ class PatchController(PatchService): upload_patch_info = [] try: # Create the directories - for state_dir in constants.DEPLOY_STATE_METADATA_DIR: + for state_dir in states.DEPLOY_STATE_METADATA_DIR: os.makedirs(state_dir, exist_ok=True) except os.error: msg = "Failed to create directories" @@ -1320,83 +1342,68 @@ class PatchController(PatchService): base_patch_filename = os.path.basename(patch_file) + # NOTE(bqian) does it make sense to link the release_id to name of the patch? # Get the release_id from the filename # and check to see if it's already uploaded # todo(abailey) We should not require the ID as part of the file (release_id, _) = os.path.splitext(base_patch_filename) - patch_metadata = self.release_data.metadata.get(release_id, None) + release = self.release_collection.get_release_by_id(release_id) - if patch_metadata: - if patch_metadata["state"] != constants.AVAILABLE: - msg = "%s is being or has already been deployed." % release_id + if release: + if release.state == states.COMMITTED: + msg = "%s is committed. Metadata not updated" % release_id LOG.info(msg) local_info += msg + "\n" - elif patch_metadata["state"] == constants.COMMITTED: - msg = "%s is committed. Metadata not updated" % release_id + elif release.state != states.AVAILABLE: + msg = "%s is not currently in available state to be deployed." % release_id LOG.info(msg) local_info += msg + "\n" else: try: # todo(abailey) PatchFile / extract_patch should be renamed - this_release = PatchFile.extract_patch(patch_file, - metadata_dir=constants.AVAILABLE_DIR, - metadata_only=True, - existing_content=self.release_data.contents[release_id], - base_pkgdata=self.base_pkgdata) + PatchFile.extract_patch(patch_file, + metadata_dir=states.AVAILABLE_DIR, + metadata_only=True, + existing_content=release.contents, + base_pkgdata=self.base_pkgdata) PatchFile.unpack_patch(patch_file) - self.release_data.update_release(this_release) + reload_release_data() msg = "%s is already uploaded. Updated metadata only" % release_id LOG.info(msg) local_info += msg + "\n" - except ReleaseMismatchFailure: - msg = "Contents of %s do not match re-uploaded release" % release_id - LOG.exception(msg) - local_error += msg + "\n" - except ReleaseValidationFailure as e: - msg = "Release validation failed for %s" % release_id - if str(e) is not None and str(e) != '': - msg += ":\n%s" % str(e) - LOG.exception(msg) - local_error += msg + "\n" except SoftwareFail: msg = "Failed to upload release %s" % release_id LOG.exception(msg) local_error += msg + "\n" else: try: - this_release = PatchFile.extract_patch(patch_file, - metadata_dir=constants.AVAILABLE_DIR, - base_pkgdata=self.base_pkgdata) + PatchFile.extract_patch(patch_file, + metadata_dir=states.AVAILABLE_DIR, + base_pkgdata=self.base_pkgdata) PatchFile.unpack_patch(patch_file) local_info += "%s is now uploaded\n" % release_id - self.release_data.add_release(this_release) + reload_release_data() - if not os.path.isfile(INITIAL_CONTROLLER_CONFIG_COMPLETE): - self.release_data.metadata[release_id]["state"] = constants.AVAILABLE - elif len(self.hosts) > 0: - self.release_data.metadata[release_id]["state"] = constants.AVAILABLE - else: - self.release_data.metadata[release_id]["state"] = constants.UNKNOWN - except ReleaseValidationFailure as e: - msg = "Release validation failed for %s" % release_id - if str(e) is not None and str(e) != '': - msg += ":\n%s" % str(e) - LOG.exception(msg) - local_error += msg + "\n" - continue + # NOTE(bqian) Below check an exception raise should be revisit, + # if applicable, should be applied to the beginning of all requests. + if len(self.hosts) == 0: + msg = "service is running in incorrect state. No registered host" + raise InternalError(msg) except SoftwareFail: msg = "Failed to upload release %s" % release_id LOG.exception(msg) local_error += msg + "\n" continue - upload_patch_info.append({ - base_patch_filename: { - "id": release_id, - "sw_version": self.release_data.metadata[release_id].get("sw_version", None), - } - }) + release = self.release_collection.get_release_by_id(release_id) + if release: + upload_patch_info.append({ + base_patch_filename: { + "id": release_id, + "sw_release": release.sw_release, # MM.mm.pp release version + } + }) # create versioned precheck for uploaded patches for patch in upload_patch_info: @@ -1406,23 +1413,20 @@ class PatchController(PatchService): if filename in pf: patch_file = pf - sw_version = values.get("sw_version") - required_patches = self.release_data.metadata[values.get("id")].get("requires") + sw_release = values.get("sw_release") + + required_patches = [] + for dep_id in self.release_collection.get_release_by_id(values.get("id")).requires_release_ids: + required_patches.append(version.parse(dep_id)) # sort the required patches list and get the latest, if available - req_patch_id = None - req_patch_metadata = None req_patch_version = None - if required_patches: - req_patch_id = sorted(required_patches)[-1] - if req_patch_id: - req_patch_metadata = self.release_data.metadata.get(req_patch_id) - if req_patch_metadata: - req_patch_version = req_patch_metadata.get("sw_version") - if req_patch_id and not req_patch_metadata: - LOG.warning("Required patch '%s' is not uploaded." % req_patch_id) + if len(required_patches) > 0: + req_patch_version = str(sorted(required_patches)[-1]) + if self.release_collection.get_release_by_id(req_patch_version) is None: + LOG.warning("Required patch '%s' is not uploaded." % req_patch_version) - PatchFile.create_versioned_precheck(patch_file, sw_version, req_patch_version=req_patch_version) + PatchFile.create_versioned_precheck(patch_file, sw_release, req_patch_version=req_patch_version) return local_info, local_warning, local_error, upload_patch_info @@ -1464,8 +1468,7 @@ class PatchController(PatchService): LOG.error(msg) msg_error += msg + "\n" elif len(upgrade_files) == 2: # Two upgrade files uploaded - tmp_info, tmp_warning, tmp_error, tmp_release_meta_info = self._process_upload_upgrade_files( - upgrade_files, self.release_data) + tmp_info, tmp_warning, tmp_error, tmp_release_meta_info = self._process_upload_upgrade_files(upgrade_files) msg_info += tmp_info msg_warning += tmp_warning msg_error += tmp_error @@ -1479,20 +1482,23 @@ class PatchController(PatchService): msg_error += tmp_error upload_info += tmp_patch_meta_info + reload_release_data() + return dict(info=msg_info, warning=msg_warning, error=msg_error, upload_info=upload_info) - def release_apply_remove_order(self, release, running_sw_version, reverse=False): + def release_apply_remove_order(self, release_id, running_sw_version, reverse=False): # If R4 requires R3, R3 requires R2 and R2 requires R1, # then release_order = ['R4', 'R3', 'R2', 'R1'] if reverse: - release_order = [release] + self.get_release_dependency_list(release) + release_order = [release_id] + self.get_release_dependency_list(release_id) # If release_order = ['R4', 'R3', 'R2', 'R1'] # and running_sw_version is the sw_version for R2 # After the operation below, release_order = ['R4', 'R3'] for i, rel in enumerate(release_order): - if self.release_data.metadata[rel]["sw_version"] == running_sw_version: + release = self.release_collection.get_release_by_id(rel) + if release.sw_release == running_sw_version: val = i - len(release_order) + 1 while val >= 0: release_order.pop() @@ -1500,7 +1506,7 @@ class PatchController(PatchService): break else: - release_order = [release] + self.get_release_required_by_list(release) + release_order = [release_id] + self.get_release_required_by_list(release_id) # reverse = True is for apply operation # In this case, the release_order = ['R3', 'R4'] # reverse = False is for remove operation @@ -1508,7 +1514,9 @@ class PatchController(PatchService): if reverse: release_order.reverse() else: + # Note(bqian) this pop is questionable, specified release would not be removed? release_order.pop(0) + return release_order def software_release_delete_api(self, release_ids): @@ -1563,41 +1571,10 @@ class PatchController(PatchService): LOG.info(msg) audit_log_info(msg) - # Verify releases exist and are in proper state first - id_verification = all(release_id in self.release_data.metadata for release_id in release_list) - for release_id in release_list: - if release_id not in self.release_data.metadata: - msg = "Release %s does not exist" % release_id - LOG.error(msg) - msg_error += msg + "\n" - id_verification = False - continue - - deploystate = self.release_data.metadata[release_id]["state"] - ignore_states = [constants.AVAILABLE, - constants.DEPLOYING_START, - constants.DEPLOYING_ACTIVATE, - constants.DEPLOYING_COMPLETE, - constants.DEPLOYING_HOST, - constants.DEPLOYED] - - if deploystate not in ignore_states: - msg = f"Release {release_id} is {deploystate} and cannot be deleted." - LOG.error(msg) - msg_error += msg + "\n" - id_verification = False - continue - - if not id_verification: - return dict(info=msg_info, warning=msg_warning, error=msg_error) - # Handle operation for release_id in release_list: - release_sw_version = utils.get_major_release_version( - self.release_data.metadata[release_id]["sw_version"]) - - # Need to support delete of older centos patches (metadata) from upgrades. - # todo(abailey): do we need to be concerned about this since this component is new. + release = self.release_collection.get_release_by_id(release_id) + release_sw_version = release.sw_version # Delete ostree content if it exists. # RPM based patches (from upgrades) will not have ostree contents @@ -1611,7 +1588,7 @@ class PatchController(PatchService): raise OSTreeTarFail(msg) package_repo_dir = "%s/rel-%s" % (constants.PACKAGE_FEED_DIR, release_sw_version) - packages = [pkg.split("_")[0] for pkg in self.release_data.metadata[release_id].get("packages")] + packages = [pkg.split("_")[0] for pkg in release.packages] if packages: apt_utils.package_remove(package_repo_dir, packages) @@ -1636,12 +1613,12 @@ class PatchController(PatchService): msg_info += msg + "\n" # TODO(lbonatti): treat the upcoming versioning changes - PatchFile.delete_versioned_directory(self.release_data.metadata[release_id]["sw_version"]) + PatchFile.delete_versioned_directory(release.sw_release) try: # Delete the metadata - deploystate = self.release_data.metadata[release_id]["state"] - metadata_dir = DEPLOY_STATE_METADATA_DIR_DICT[deploystate] + deploystate = release.state + metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[deploystate] os.remove("%s/%s" % (metadata_dir, metadata_file)) except OSError: msg = "Failed to remove metadata for %s" % release_id @@ -1649,7 +1626,7 @@ class PatchController(PatchService): raise MetadataFail(msg) self.delete_restart_script(release_id) - self.release_data.delete_release(release_id) + reload_release_data() msg = "%s has been deleted" % release_id LOG.info(msg) msg_info += msg + "\n" @@ -1688,21 +1665,21 @@ class PatchController(PatchService): return {"in_sync": is_in_sync} - def patch_init_release_api(self, release): + def patch_init_release_api(self, release_id): """ - Create an empty repo for a new release + Create an empty repo for a new release_id :return: dict of info, warning and error messages """ msg_info = "" msg_warning = "" msg_error = "" - msg = "Initializing repo for: %s" % release + msg = "Initializing repo for: %s" % release_id LOG.info(msg) audit_log_info(msg) - if release == SW_VERSION: - msg = "Rejected: Requested release %s is running release" % release + if release_id == SW_VERSION: + msg = "Rejected: Requested release %s is running release" % release_id msg_error += msg + "\n" LOG.info(msg) return dict(info=msg_info, warning=msg_warning, error=msg_error) @@ -1710,22 +1687,13 @@ class PatchController(PatchService): # Refresh data self.base_pkgdata.loaddirs() - self.release_data.load_all_metadata(constants.AVAILABLE_DIR, state=constants.AVAILABLE) - self.release_data.load_all_metadata(constants.UNAVAILABLE_DIR, state=constants.UNAVAILABLE) - self.release_data.load_all_metadata(constants.DEPLOYING_START_DIR, state=constants.DEPLOYING_START) - self.release_data.load_all_metadata(constants.DEPLOYING_HOST_DIR, state=constants.DEPLOYING_HOST) - self.release_data.load_all_metadata(constants.DEPLOYING_ACTIVATE_DIR, state=constants.DEPLOYING_ACTIVATE) - self.release_data.load_all_metadata(constants.DEPLOYING_COMPLETE_DIR, state=constants.DEPLOYING_COMPLETE) - self.release_data.load_all_metadata(constants.DEPLOYED_DIR, state=constants.DEPLOYED) - self.release_data.load_all_metadata(constants.REMOVING_DIR, state=constants.REMOVING) - self.release_data.load_all_metadata(constants.ABORTING_DIR, state=constants.ABORTING) - self.release_data.load_all_metadata(constants.COMMITTED_DIR, state=constants.COMMITTED) + reload_release_data() - repo_dir[release] = "%s/rel-%s" % (repo_root_dir, release) + repo_dir[release_id] = "%s/rel-%s" % (repo_root_dir, release_id) # Verify the release doesn't already exist - if os.path.exists(repo_dir[release]): - msg = "Patch repository for %s already exists" % release + if os.path.exists(repo_dir[release_id]): + msg = "Patch repository for %s already exists" % release_id msg_info += msg + "\n" LOG.info(msg) return dict(info=msg_info, warning=msg_warning, error=msg_error) @@ -1734,14 +1702,14 @@ class PatchController(PatchService): try: # todo(jcasteli) determine if ostree change needs a createrepo equivalent output = "UNDER CONSTRUCTION for OSTREE" - LOG.info("Repo[%s] updated:\n%s", release, output) + LOG.info("Repo[%s] updated:\n%s", release_id, output) except Exception: - msg = "Failed to update the repo for %s" % release + msg = "Failed to update the repo for %s" % release_id LOG.exception(msg) # Wipe out what was created - shutil.rmtree(repo_dir[release]) - del repo_dir[release] + shutil.rmtree(repo_dir[release_id]) + del repo_dir[release_id] raise SoftwareFail(msg) @@ -1763,7 +1731,8 @@ class PatchController(PatchService): # First, verify that all specified patches exist id_verification = True for patch_id in patch_ids: - if patch_id not in self.release_data.metadata: + release = self.release_collection.get_release_by_id(patch_id) + if release is None: msg = "Patch %s does not exist" % patch_id LOG.error(msg) msg_error += msg + "\n" @@ -1773,15 +1742,15 @@ class PatchController(PatchService): return dict(info=msg_info, warning=msg_warning, error=msg_error) required_patches = {} - for patch_iter in list(self.release_data.metadata): - for req_patch in self.release_data.metadata[patch_iter]["requires"]: + for release in self.release_collection.iterate_releases(): + for req_patch in release.requires_release_ids: if req_patch not in patch_ids: continue if req_patch not in required_patches: required_patches[req_patch] = [] - required_patches[req_patch].append(patch_iter) + required_patches[req_patch].append(release.id) for patch_id in patch_ids: if patch_id in required_patches: @@ -1811,10 +1780,7 @@ class PatchController(PatchService): # Increment the software_op_counter here self.inc_patch_op_counter() - self.release_data_lock.acquire() - # self.release_data.load_all() self.check_patch_states() - self.release_data_lock.release() if self.sock_out is None: return True @@ -1863,67 +1829,54 @@ class PatchController(PatchService): def software_release_query_cached(self, **kwargs): query_state = None if "show" in kwargs: - if kwargs["show"] == "available": - query_state = constants.AVAILABLE - if kwargs["show"] == "unavailable": - query_state = constants.UNAVAILABLE - elif kwargs["show"] == "deploying_start": - query_state = constants.DEPLOYING_START - elif kwargs["show"] == "deploying_host": - query_state = constants.DEPLOYING_HOST - elif kwargs["show"] == "deploying_activate": - query_state = constants.DEPLOYING_ACTIVATE - elif kwargs["show"] == "deploying_complete": - query_state = constants.DEPLOYING_COMPLETE - elif kwargs["show"] == "deployed": - query_state = constants.DEPLOYED - elif kwargs["show"] == "removing": - query_state = constants.REMOVING - elif kwargs["show"] == "aborting": - query_state = constants.ABORTING - elif kwargs["show"] == "committed": - query_state = constants.COMMITTED + valid_query_states = [ + states.AVAILABLE, + states.UNAVAILABLE, + states.DEPLOYED, + states.REMOVING, + states.COMMITTED, + states.DEPLOYING + ] + if kwargs["show"] in valid_query_states: + query_state = kwargs["show"] query_release = None if "release" in kwargs: query_release = kwargs["release"] - results = {} - self.release_data_lock.acquire() - if query_state is None and query_release is None: - # Return everything - results = self.release_data.metadata + results = [] + + def filter_by_version(): + for r in self.release_collection.iterate_releases(): + if r.sw_version in query_release: + yield r + + def filter_by_state(): + for rel in self.release_collection.iterate_releases_by_state(query_state): + yield rel + + if query_state is not None: + iterator = filter_by_state + elif query_release is not None: + iterator = filter_by_version else: - # Filter results - for release_id, data in self.release_data.metadata.items(): - if query_state is not None and data["state"] != query_state: - continue - if query_release is not None and data["sw_version"] != query_release: - continue - results[release_id] = data - self.release_data_lock.release() + iterator = self.release_collection.iterate_releases + + for i in iterator(): + data = i.to_query_dict() + results.append(data) return results def software_release_query_specific_cached(self, release_ids): - audit_log_info("software release show") + LOG.info("software release show") - results = {"metadata": {}, - "contents": {}, - "error": ""} + results = [] - with self.release_data_lock: - - for release_id in release_ids: - if release_id not in list(self.release_data.metadata): - results["error"] += "%s is unrecognized\n" % release_id - - for release_id, data in self.release_data.metadata.items(): - if release_id in release_ids: - results["metadata"][release_id] = data - for release_id, data in self.release_data.contents.items(): - if release_id in release_ids: - results["contents"][release_id] = data + for release_id in release_ids: + release = self.release_collection.get_release_by_id(release_id) + if release is not None: + results.append(release.to_query_dict()) return results @@ -1931,20 +1884,19 @@ class PatchController(PatchService): dependencies = set() patch_added = False - with self.release_data_lock: + # Add patches to workset + for patch_id in sorted(patch_ids): + dependencies.add(patch_id) + patch_added = True - # Add patches to workset - for patch_id in sorted(patch_ids): - dependencies.add(patch_id) - patch_added = True - - while patch_added: - patch_added = False - for patch_id in sorted(dependencies): - for req in self.release_data.metadata[patch_id]["requires"]: - if req not in dependencies: - dependencies.add(req) - patch_added = recursive + while patch_added: + patch_added = False + for patch_id in sorted(dependencies): + release = self.release_collection.get_release_by_id(patch_id) + for req in release.requires: + if req not in dependencies: + dependencies.add(req) + patch_added = recursive return sorted(dependencies) @@ -1962,15 +1914,14 @@ class PatchController(PatchService): if kwargs.get("recursive") == "yes": recursive = True - with self.release_data_lock: - - # Verify patch IDs - for patch_id in sorted(patch_ids): - if patch_id not in list(self.release_data.metadata): - errormsg = "%s is unrecognized\n" % patch_id - LOG.info("patch_query_dependencies: %s", errormsg) - results["error"] += errormsg - failure = True + # Verify patch IDs + for patch_id in sorted(patch_ids): + release = self.release_collection.get_release_by_id(patch_id) + if release is None: + errormsg = "%s is unrecognized\n" % patch_id + LOG.info("patch_query_dependencies: %s", errormsg) + results["error"] += errormsg + failure = True if failure: LOG.info("patch_query_dependencies failed") @@ -1986,10 +1937,10 @@ class PatchController(PatchService): audit_log_info(msg) try: - if not os.path.exists(constants.COMMITTED_DIR): - os.makedirs(constants.COMMITTED_DIR) + if not os.path.exists(states.COMMITTED_DIR): + os.makedirs(states.COMMITTED_DIR) except os.error: - msg = "Failed to create %s" % constants.COMMITTED_DIR + msg = "Failed to create %s" % states.COMMITTED_DIR LOG.exception(msg) raise SoftwareFail(msg) @@ -2001,10 +1952,9 @@ class PatchController(PatchService): # Ensure there are only REL patches non_rel_list = [] - with self.release_data_lock: - for patch_id in self.release_data.metadata: - if self.release_data.metadata[patch_id]['status'] != constants.STATUS_RELEASED: - non_rel_list.append(patch_id) + for release in self.release_collection.iterate_releases(): + if release.status != constants.STATUS_RELEASED: + non_rel_list.append(release.id) if len(non_rel_list) > 0: errormsg = "A commit cannot be performed with non-REL status patches in the system:\n" @@ -2015,13 +1965,13 @@ class PatchController(PatchService): return results # Verify Release IDs - with self.release_data_lock: - for patch_id in sorted(patch_ids): - if patch_id not in list(self.release_data.metadata): - errormsg = "%s is unrecognized\n" % patch_id - LOG.info("patch_commit: %s", errormsg) - results["error"] += errormsg - failure = True + for patch_id in sorted(patch_ids): + release = self.release_collection.get_release_by_id(patch_id) + if release is None: + errormsg = "%s is unrecognized\n" % patch_id + LOG.info("patch_commit: %s", errormsg) + results["error"] += errormsg + failure = True if failure: LOG.info("patch_commit: Failed patch ID check") @@ -2031,11 +1981,10 @@ class PatchController(PatchService): # Check patch states avail_list = [] - with self.release_data_lock: - for patch_id in commit_list: - if self.release_data.metadata[patch_id]['state'] != constants.DEPLOYED \ - and self.release_data.metadata[patch_id]['state'] != constants.COMMITTED: - avail_list.append(patch_id) + for patch_id in commit_list: + release = self.release_collection.get_release_by_id(patch_id) + if release.state not in [states.DEPLOYED, states.COMMITTED]: + avail_list.append(patch_id) if len(avail_list) > 0: errormsg = "The following patches are not applied and cannot be committed:\n" @@ -2045,22 +1994,21 @@ class PatchController(PatchService): results["error"] += errormsg return results - with self.release_data_lock: - for patch_id in commit_list: - # Fetch file paths that need to be cleaned up to - # free patch storage disk space - if self.release_data.metadata[patch_id].get("restart_script"): - restart_script_path = "%s/%s" % \ - (root_scripts_dir, - self.release_data.metadata[patch_id]["restart_script"]) - if os.path.exists(restart_script_path): - cleanup_files.add(restart_script_path) - patch_sw_version = utils.get_major_release_version( - self.release_data.metadata[patch_id]["sw_version"]) - abs_ostree_tar_dir = package_dir[patch_sw_version] - software_tar_path = "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id) - if os.path.exists(software_tar_path): - cleanup_files.add(software_tar_path) + for patch_id in commit_list: + release = self.release_collection.get_release_by_id(patch_id) + # Fetch file paths that need to be cleaned up to + # free patch storage disk space + if release.restart_script: + restart_script_path = "%s/%s" % \ + (root_scripts_dir, + release.restart_script) + if os.path.exists(restart_script_path): + cleanup_files.add(restart_script_path) + patch_sw_version = release.sw_release + abs_ostree_tar_dir = package_dir[patch_sw_version] + software_tar_path = "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id) + if os.path.exists(software_tar_path): + cleanup_files.add(software_tar_path) # Calculate disk space disk_space = 0 @@ -2077,8 +2025,8 @@ class PatchController(PatchService): # Move the metadata to the committed dir for patch_id in commit_list: metadata_fname = "%s-metadata.xml" % patch_id - deployed_fname = os.path.join(constants.DEPLOYED_DIR, metadata_fname) - committed_fname = os.path.join(constants.COMMITTED_DIR, metadata_fname) + deployed_fname = os.path.join(states.DEPLOYED_DIR, metadata_fname) + committed_fname = os.path.join(states.COMMITTED_DIR, metadata_fname) if os.path.exists(deployed_fname): try: shutil.move(deployed_fname, committed_fname) @@ -2096,7 +2044,7 @@ class PatchController(PatchService): LOG.exception(msg) raise MetadataFail(msg) - self.release_data.load_all() + reload_release_data() results["info"] = "The releases have been committed." return results @@ -2129,13 +2077,12 @@ class PatchController(PatchService): return rc def copy_restart_scripts(self): - with self.release_data_lock: - for patch_id in self.release_data.metadata: - if self.release_data.metadata[patch_id]["state"] in \ - [constants.DEPLOYING_START, constants.REMOVING] \ - and self.release_data.metadata[patch_id].get("restart_script"): + applying_states = [states.DEPLOYING, states.REMOVING] + for release in self.release_collection.iterate_releases(): + if release.restart_script: + if release.state in applying_states: try: - restart_script_name = self.release_data.metadata[patch_id]["restart_script"] + restart_script_name = release.restart_script restart_script_path = "%s/%s" \ % (root_scripts_dir, restart_script_name) dest_path = constants.PATCH_SCRIPTS_STAGING_DIR @@ -2145,23 +2092,23 @@ class PatchController(PatchService): os.makedirs(dest_path, 0o700) shutil.copyfile(restart_script_path, dest_script_file) os.chmod(dest_script_file, 0o700) - msg = "Creating restart script for %s" % patch_id + msg = "Creating restart script for %s" % release.id LOG.info(msg) except shutil.Error: - msg = "Failed to copy the restart script for %s" % patch_id + msg = "Failed to copy the restart script for %s" % release.id LOG.exception(msg) raise SoftwareError(msg) - elif self.release_data.metadata[patch_id].get("restart_script"): + else: try: - restart_script_name = self.release_data.metadata[patch_id]["restart_script"] + restart_script_name = release.restart_script restart_script_path = "%s/%s" \ % (constants.PATCH_SCRIPTS_STAGING_DIR, restart_script_name) if os.path.exists(restart_script_path): os.remove(restart_script_path) - msg = "Removing restart script for %s" % patch_id + msg = "Removing restart script for %s" % release.id LOG.info(msg) except shutil.Error: - msg = "Failed to delete the restart script for %s" % patch_id + msg = "Failed to delete the restart script for %s" % release.id LOG.exception(msg) def _update_state_to_peer(self): @@ -2176,32 +2123,21 @@ class PatchController(PatchService): """ Does basic sanity checks on the release data :param deployment: release to be checked - :return: release dict (if exists), + :return: release object (if exists), bool with success output, strings with info, warning and error messages """ - msg_info = "" - msg_warning = "" - msg_error = "" - success = True # We need to verify that the software release exists - release = self.release_data.metadata.get(deployment, None) + release = self.release_collection.get_release_by_id(deployment) if not release: msg = "Software release version corresponding to the specified release " \ "%s does not exist." % deployment LOG.error(msg) - msg_error += msg + " Try deleting and re-uploading the software for recovery." - success = False + msg = msg + " Try deleting and re-uploading the software for recovery." + raise SoftwareServiceError(error=msg) - # Check if release state is valid - elif release["state"] not in constants.VALID_DEPLOY_START_STATES: - msg = "Software release state is invalid: %s" % release["state"] - LOG.error(msg) - msg_error += msg - success = False - - return release, success, msg_info, msg_warning, msg_error + return release def _deploy_precheck(self, release_version: str, force: bool = False, region_name: str = "RegionOne", patch: bool = False) -> dict: @@ -2289,15 +2225,13 @@ class PatchController(PatchService): :param force: if True will ignore minor alarms during precheck :return: dict of info, warning and error messages """ - release, success, msg_info, msg_warning, msg_error = self._release_basic_checks(deployment) - if not success: - return dict(info=msg_info, warning=msg_warning, error=msg_error) + release = self._release_basic_checks(deployment) region_name = kwargs["region_name"] - release_version = release["sw_version"] + release_version = release.sw_release patch = not utils.is_upgrade_deploy(SW_VERSION, release_version) return self._deploy_precheck(release_version, force, region_name, patch) - def _deploy_upgrade_start(self, to_release): + def _deploy_upgrade_start(self, to_release, commit_id): LOG.info("start deploy upgrade to %s from %s" % (to_release, SW_VERSION)) deploy_script_name = constants.DEPLOY_START_SCRIPT cmd_path = utils.get_software_deploy_script(to_release, deploy_script_name) @@ -2312,7 +2246,6 @@ class PatchController(PatchService): postgresql_port = str(cfg.alt_postgresql_port) feed = os.path.join(constants.FEED_DIR, "rel-%s/ostree_repo" % major_to_release) - commit_id = None LOG.info("k8s version %s" % k8s_ver) upgrade_start_cmd = [cmd_path, SW_VERSION, major_to_release, k8s_ver, postgresql_port, @@ -2341,9 +2274,19 @@ class PatchController(PatchService): LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_start_cmd), e)) return False - def deploy_state_changed(self, deploy_state): + def deploy_state_changed(self, new_state): '''Handle 'deploy state change' event, invoked when operations complete. ''' - self.db_api_instance.update_deploy(deploy_state) + + deploy_state = DeployState.get_instance() + state_event = { + DEPLOY_STATES.START_DONE: deploy_state.start_done, + DEPLOY_STATES.START_FAILED: deploy_state.start_failed + } + if new_state in state_event: + state_event[new_state]() + else: + msg = f"Received invalid deploy state update {deploy_state}" + LOG.error(msg) def host_deploy_state_changed(self, hostname, host_deploy_state): '''Handle 'host deploy state change' event. ''' @@ -2354,21 +2297,40 @@ class PatchController(PatchService): tag.text = text return tag + @require_deploy_state([None], + "There is already a deployment is in progress ({state}). " + "Please complete the current deployment.") def software_deploy_start_api(self, deployment: str, force: bool, **kwargs) -> dict: """ - Start deployment by applying the changes to the feed ostree - return: dict of info, warning and error messages + to start deploy of a specified release. + The operation implies deploying all undeployed dependency releases of + the specified release. i.e, to deploy release 24.09.1, it implies + deploying 24.09.0 and 24.09.1 when 24.09.0 has not been deployed. + The operation includes steps: + 1. find all undeployed dependency releases + 2. ensure all releases (dependency and specified release) are ready to deployed + 3. precheck + 4. transform all involved releases to deploying state + 5. start the deploy subprocess """ - release, success, msg_info, msg_warning, msg_error = self._release_basic_checks(deployment) + msg_info = "" + msg_warning = "" + msg_error = "" + deploy_release = self._release_basic_checks(deployment) - if not success: - return dict(info=msg_info, warning=msg_warning, error=msg_error) + running_release = self.release_collection.running_release + deploy_sw_version = deploy_release.sw_version # MM.mm - # TODO(heitormatsui) Enforce deploy-precheck for patch release + feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, deploy_sw_version) + commit_id = deploy_release.commit_id patch_release = True - if utils.is_upgrade_deploy(SW_VERSION, release["sw_version"]): + if utils.is_upgrade_deploy(SW_VERSION, deploy_release.sw_release): + # TODO(bqian) remove default latest commit when a commit-id is built into GA metadata + if commit_id is None: + commit_id = ostree_utils.get_feed_latest_commit(deploy_sw_version) + patch_release = False - to_release = release["sw_version"] + to_release = deploy_release.sw_release ret = self._deploy_precheck(to_release, force, patch=patch_release) if ret["system_healthy"] is None: ret["error"] = "Fail to perform deploy precheck. Internal error has occurred.\n" + \ @@ -2380,33 +2342,21 @@ class PatchController(PatchService): "Please fix above issues then retry the deploy.\n" return ret - if self._deploy_upgrade_start(to_release): + if self._deploy_upgrade_start(to_release, commit_id): collect_current_load_for_hosts() create_deploy_hosts() - self.db_api_instance.begin_update() - try: - # TODO(bqian) replace SW_VERSION below to current running sw_release - # (MM.mm.pp) - self.update_and_sync_deploy_state(self.db_api_instance.create_deploy, - SW_VERSION, to_release, True) - self.update_and_sync_deploy_state(self.db_api_instance.update_deploy, - DEPLOY_STATES.START) - finally: - self.db_api_instance.end_update() - sw_rel = self.release_collection.get_release_by_id(deployment) - if sw_rel is None: - raise InternalError("%s cannot be found" % to_release) - sw_rel.update_state(constants.DEPLOYING) + release_state = ReleaseState(release_ids=[deploy_release.id]) + release_state.start_deploy() + deploy_state = DeployState.get_instance() + deploy_state.start(running_release, to_release, feed_repo, commit_id, deploy_release.reboot_required) + self._update_state_to_peer() + msg_info = "Deployment for %s started" % deployment else: msg_error = "Deployment for %s failed to start" % deployment return dict(info=msg_info, warning=msg_warning, error=msg_error) - # Identify if this is apply or remove operation - # todo(jcasteli) Remove once the logic to include major release version - # in release list is implemented - running_sw_version = "23.09.0" # todo(chuck) Remove once to determine how we are associating a patch # with a release. @@ -2416,17 +2366,23 @@ class PatchController(PatchService): # running_sw_version = self.release_data.metadata[release_id]["sw_version"] # LOG.info("Running software version: %s", running_sw_version) - higher = utils.compare_release_version(self.release_data.metadata[deployment]["sw_version"], - running_sw_version) + # TODO(bqian) update references of sw_release (string) to SWRelease object - if higher is None: + if deploy_release > running_release: + operation = "apply" + elif running_release > deploy_release: + operation = "remove" + else: + # NOTE(bqian) The error message doesn't seem right. software version format + # or any metadata semantic check should be done during upload. If data + # invalid found subsequently, data is considered damaged, should recommend + # delete and re-upload msg_error += "The software version format for this release is not correct.\n" return dict(info=msg_info, warning=msg_warning, error=msg_error) - elif higher: - operation = "apply" - else: - operation = "remove" + # NOTE(bqian) shouldn't that patch release deploy and remove are doing the same thing + # in terms of ostree commit, that it deploy to a commit specified by the commit-id that + # associated to the release from the deploy start command? # If releases are such that: # R2 requires R1, R3 requires R2, R4 requires R3 # If current running release is R2 and command issued is "software deploy start R4" @@ -2439,7 +2395,7 @@ class PatchController(PatchService): create_deploy_hosts() # reverse = True is used for apply operation - deployment_list = self.release_apply_remove_order(deployment, running_sw_version, reverse=True) + deployment_list = self.release_apply_remove_order(deployment, running_release.sw_release, reverse=True) msg = "Deploy start order for apply operation: %s" % ",".join(deployment_list) LOG.info(msg) @@ -2448,10 +2404,10 @@ class PatchController(PatchService): # todo(jcasteli) Do we need this block below? # Check for patches that can't be applied during an upgrade upgrade_check = True - for release in deployment_list: - if self.release_data.metadata[release]["sw_version"] != SW_VERSION \ - and self.release_data.metadata[release].get("apply_active_release_only") == "Y": - msg = "%s cannot be created during an upgrade" % release + for release_id in deployment_list: + release = self.release_collection.get_release_by_id(release_id) + if release.sw_version != SW_VERSION and release.apply_active_release_only == "Y": + msg = "%s cannot be created during an upgrade" % release_id LOG.error(msg) msg_error += msg + "\n" upgrade_check = False @@ -2463,54 +2419,49 @@ class PatchController(PatchService): self.run_semantic_check(constants.SEMANTIC_PREAPPLY, deployment_list) # Start applying the releases - for release in deployment_list: - msg = "Starting deployment for: %s" % release + for release_id in deployment_list: + release = self.release_collection.get_release_by_id(release_id) + msg = "Starting deployment for: %s" % release_id LOG.info(msg) audit_log_info(msg) - packages = [pkg.split("_")[0] for pkg in self.release_data.metadata[release].get("packages")] + packages = [pkg.split("_")[0] for pkg in release.packages] if packages is None: msg = "Unable to determine packages to install" LOG.error(msg) raise MetadataFail(msg) - if self.release_data.metadata[release]["state"] != constants.AVAILABLE \ - or self.release_data.metadata[release]["state"] == constants.COMMITTED: - msg = "%s is already being deployed" % release + if release.state not in (states.AVAILABLE, states.COMMITTED): + msg = "%s is already being deployed" % release_id LOG.info(msg) msg_info += msg + "\n" continue - release_sw_version = utils.get_major_release_version( - self.release_data.metadata[release]["sw_version"]) - latest_commit = "" try: - latest_commit = ostree_utils.get_feed_latest_commit(release_sw_version) + latest_commit = ostree_utils.get_feed_latest_commit(running_release.sw_version) LOG.info("Latest commit: %s" % latest_commit) except OSTreeCommandFail: - LOG.exception("Failure during commit consistency check for %s.", release) - - feed_ostree = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, release_sw_version) + LOG.exception("Failure during commit consistency check for %s.", release_id) try: - apt_utils.run_install(feed_ostree, packages) + apt_utils.run_install(feed_repo, packages) except APTOSTreeCommandFail: LOG.exception("Failed to intall Debian package.") raise APTOSTreeCommandFail(msg) # Update the feed ostree summary - ostree_utils.update_repo_summary_file(feed_ostree) + ostree_utils.update_repo_summary_file(feed_repo) # Get the latest commit after performing "apt-ostree install". self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION) try: # Move the release metadata to deploying dir - deploystate = self.release_data.metadata[release]["state"] - metadata_dir = DEPLOY_STATE_METADATA_DIR_DICT[deploystate] + deploystate = release.state + metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[deploystate] - metadata_file = "%s/%s-metadata.xml" % (metadata_dir, release) + metadata_file = "%s/%s-metadata.xml" % (metadata_dir, release_id) tree = ET.parse(metadata_file) root = tree.getroot() @@ -2525,33 +2476,44 @@ class PatchController(PatchService): outfile.write(tree) LOG.info("Latest feed commit: %s added to metadata file" % self.latest_feed_commit) - - shutil.move(metadata_file, - "%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, release)) - - msg_info += "%s is now in the repo\n" % release + msg_info += "%s is now in the repo\n" % release_id except shutil.Error: - msg = "Failed to move the metadata for %s" % release + msg = "Failed to move the metadata for %s" % release_id LOG.exception(msg) raise MetadataFail(msg) - self.release_data.metadata[release]["commit"] = self.latest_feed_commit + reload_release_data() + # NOTE(bqian) Below check an exception raise should be revisit, if applicable, + # should be applied to the begining of all requests. + if len(self.hosts) == 0: + msg = "service is running in incorrect state. No registered host" + raise InternalError(msg) - if not os.path.isfile(INITIAL_CONTROLLER_CONFIG_COMPLETE): - self.release_data.metadata[release]["state"] = constants.DEPLOYING_START - elif len(self.hosts) > 0: - self.release_data.metadata[release]["state"] = constants.DEPLOYING_START - else: - self.release_data.metadata[release]["state"] = constants.UNKNOWN + # TODO(bqian) get the list of undeployed required release ids + # i.e, when deploying 24.03.3, which requires 24.03.2 and 24.03.1, all + # 3 release ids should be passed into to create new ReleaseState + collect_current_load_for_hosts() + create_deploy_hosts() + release_state = ReleaseState(release_ids=[release.id]) + release_state.start_deploy() + deploy_state = DeployState.get_instance() + to_release = deploy_release.sw_release + deploy_state.start(running_release, to_release, feed_repo, commit_id, deploy_release.reboot_required) + self._update_state_to_peer() with self.hosts_lock: - self.interim_state[release] = list(self.hosts) + self.interim_state[release_id] = list(self.hosts) + + # There is no defined behavior for deploy start for patching releases, so + # move the deploy state to start-done + deploy_state = DeployState.get_instance() + deploy_state.start_done() + self._update_state_to_peer() elif operation == "remove": collect_current_load_for_hosts() create_deploy_hosts() - removed = False - deployment_list = self.release_apply_remove_order(deployment, running_sw_version) + deployment_list = self.release_apply_remove_order(deployment, running_release.sw_version) msg = "Deploy start order for remove operation: %s" % ",".join(deployment_list) LOG.info(msg) audit_log_info(msg) @@ -2563,19 +2525,20 @@ class PatchController(PatchService): # See if any of the patches are marked as unremovable unremovable_verification = True - for release in deployment_list: - if self.release_data.metadata[release].get("unremovable") == "Y": + for release_id in deployment_list: + release = self.release_collection.get_release_by_id(release_id) + if release.unremovable: if remove_unremovable: - msg = "Unremovable release %s being removed" % release + msg = "Unremovable release %s being removed" % release_id LOG.warning(msg) - msg_warning += msg + "\n" + msg_warning = msg + "\n" else: - msg = "Release %s is not removable" % release + msg = "Release %s is not removable" % release_id LOG.error(msg) msg_error += msg + "\n" unremovable_verification = False - elif self.release_data.metadata[release]['state'] == constants.COMMITTED: - msg = "Release %s is committed and cannot be removed" % release + elif release.state == states.COMMITTED: + msg = "Release %s is committed and cannot be removed" % release_id LOG.error(msg) msg_error += msg + "\n" unremovable_verification = False @@ -2604,91 +2567,92 @@ class PatchController(PatchService): if kwargs.get("skip-semantic") != "yes": self.run_semantic_check(constants.SEMANTIC_PREREMOVE, deployment_list) - for release in deployment_list: - removed = True - msg = "Removing release: %s" % release + for release_id in deployment_list: + release = self.release_collection.get_release_by_id(release_id) + msg = "Removing release: %s" % release_id LOG.info(msg) audit_log_info(msg) - if self.release_data.metadata[release]["state"] == constants.AVAILABLE: - msg = "The deployment for %s has not been created" % release + if release.state == states.AVAILABLE: + msg = "The deployment for %s has not been created" % release_id LOG.info(msg) msg_info += msg + "\n" continue - major_release_sw_version = utils.get_major_release_version( - self.release_data.metadata[release]["sw_version"]) + major_release_sw_version = release.sw_version # this is an ostree patch - # Base commit is fetched from the patch metadata - base_commit = self.release_data.contents[release]["base"]["commit"] - feed_ostree = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, major_release_sw_version) + # Base commit is fetched from the patch metadata. + base_commit = release.base_commit_id + feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, major_release_sw_version) try: # Reset the ostree HEAD - ostree_utils.reset_ostree_repo_head(base_commit, feed_ostree) + ostree_utils.reset_ostree_repo_head(base_commit, feed_repo) # Delete all commits that belong to this release - for i in range(int(self.release_data.contents[release]["number_of_commits"])): - commit_to_delete = self.release_data.contents[release]["commit%s" % (i + 1)]["commit"] - ostree_utils.delete_ostree_repo_commit(commit_to_delete, feed_ostree) + # NOTE(bqian) there should be just one commit per release. + commit_to_delete = release.commit_id + ostree_utils.delete_ostree_repo_commit(commit_to_delete, feed_repo) # Update the feed ostree summary - ostree_utils.update_repo_summary_file(feed_ostree) + ostree_utils.update_repo_summary_file(feed_repo) except OSTreeCommandFail: - LOG.exception("Failure while removing release %s.", release) + LOG.exception("Failure while removing release %s.", release_id) try: # Move the metadata to the deleted dir - deploystate = self.release_data.metadata[release]["state"] - metadata_dir = DEPLOY_STATE_METADATA_DIR_DICT[deploystate] - shutil.move("%s/%s-metadata.xml" % (metadata_dir, release), - "%s/%s-metadata.xml" % (constants.REMOVING_DIR, release)) - msg_info += "%s has been removed from the repo\n" % release + self.release_collection.update_state([release_id], states.REMOVING_DIR) + msg_info += "%s has been removed from the repo\n" % release_id except shutil.Error: - msg = "Failed to move the metadata for %s" % release - LOG.exception(msg) + msg = "Failed to move the metadata for %s" % release_id + LOG.Error(msg) raise MetadataFail(msg) - # update state - if not os.path.isfile(INITIAL_CONTROLLER_CONFIG_COMPLETE): - self.release_data.metadata[release]["state"] = constants.REMOVING - elif len(self.hosts) > 0: - self.release_data.metadata[release]["state"] = constants.REMOVING - else: - self.release_data.metadata[release]["state"] = constants.UNKNOWN + if len(self.hosts) == 0: + msg = "service is running in incorrect state. No registered host" + raise InternalError(msg) + + # TODO(bqian) get the list of undeployed required release ids + # i.e, when deploying 24.03.3, which requires 24.03.2 and 24.03.1, all + # 3 release ids should be passed into to create new ReleaseState + collect_current_load_for_hosts() + create_deploy_hosts() + release_state = ReleaseState(release_ids=[release.id]) + release_state.start_remove() + deploy_state = DeployState.get_instance() + to_release = deploy_release.sw_release + deploy_state.start(running_release, to_release, feed_repo, commit_id, deploy_release.reboot_required) + self._update_state_to_peer() # only update lastest_feed_commit if it is an ostree patch - if self.release_data.contents[release].get("base") is not None: + if release.base_commit_id is not None: # Base Commit in this release's metadata.xml file represents the latest commit # after this release has been removed from the feed repo - self.latest_feed_commit = self.release_data.contents[release]["base"]["commit"] + self.latest_feed_commit = release.base_commit_id with self.hosts_lock: - self.interim_state[release] = list(self.hosts) + self.interim_state[release_id] = list(self.hosts) - if removed: - self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION) - self.release_data.metadata[release]["commit"] = self.latest_feed_commit - try: - metadata_dir = DEPLOY_STATE_METADATA_DIR_DICT[deploystate] - shutil.move("%s/%s-metadata.xml" % (metadata_dir, deployment), - "%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, deployment)) - msg_info += "Deployment started for %s\n" % deployment - except shutil.Error: - msg = "Failed to move the metadata for %s" % deployment - LOG.exception(msg) - raise MetadataFail(msg) - - # update state - if not os.path.isfile(INITIAL_CONTROLLER_CONFIG_COMPLETE): - self.release_data.metadata[deployment]["state"] = constants.DEPLOYING_START - elif len(self.hosts) > 0: - self.release_data.metadata[deployment]["state"] = constants.DEPLOYING_START - else: - self.release_data.metadata[deployment]["state"] = constants.UNKNOWN + # There is no defined behavior for deploy start for patching releases, so + # move the deploy state to start-done + deploy_state = DeployState.get_instance() + deploy_state.start_done() + self._update_state_to_peer() return dict(info=msg_info, warning=msg_warning, error=msg_error) - def software_deploy_complete_api(self, release: str) -> dict: + def _deploy_complete(self): + # TODO(bqian) complete the deploy + # as deployment has been already activated, there is no return, + # deploy complete can only succeed. + # tasks for completion of deploy is to delete leftover data from + # previous release. If some data could not be deleted, need to + # automatically reattempt to delete it in later statge. (outside + # a deployment) + return True + + @require_deploy_state([DEPLOY_STATES.ACTIVATE_DONE], + "Must complete deploy activate before completing the deployment") + def software_deploy_complete_api(self) -> dict: """ Completes a deployment associated with the release :return: dict of info, warning and error messages @@ -2696,53 +2660,22 @@ class PatchController(PatchService): msg_info = "" msg_warning = "" msg_error = "" - if self.release_data.metadata[release]["state"] not in \ - [constants.DEPLOYING_ACTIVATE, constants.DEPLOYING_COMPLETE]: - msg = "%s is not activated yet" % release - LOG.info(msg) - msg_info += msg + "\n" - else: - # Set the state to deploying-complete - for release_id in sorted(list(self.release_data.metadata)): - if self.release_data.metadata[release_id]["state"] == constants.DEPLOYING_ACTIVATE: - self.release_data.metadata[release_id]["state"] = constants.DEPLOYING_COMPLETE - try: - shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_ACTIVATE_DIR, release_id), - "%s/%s-metadata.xml" % (constants.DEPLOYING_COMPLETE_DIR, release_id)) - except shutil.Error: - msg = "Failed to move the metadata for %s" % release_id - LOG.exception(msg) - raise MetadataFail(msg) - # The code for deploy complete is going to execute - # Once deploy complete is successfully executed, we move the metadata to their - # respective folders - for release_id in sorted(list(self.release_data.metadata)): - if self.release_data.metadata[release_id]["state"] == constants.REMOVING: - self.release_data.metadata[release_id]["state"] = constants.AVAILABLE - try: - shutil.move("%s/%s-metadata.xml" % (constants.REMOVING_DIR, release_id), - "%s/%s-metadata.xml" % (constants.AVAILABLE_DIR, release_id)) - msg_info += "%s is available\n" % release_id - except shutil.Error: - msg = "Failed to move the metadata for %s" % release_id - LOG.exception(msg) - raise MetadataFail(msg) - elif self.release_data.metadata[release_id]["state"] == constants.DEPLOYING_COMPLETE: - self.release_data.metadata[release_id]["state"] = constants.DEPLOYED + deploy_state = DeployState.get_instance() - try: - shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_COMPLETE_DIR, release_id), - "%s/%s-metadata.xml" % (constants.DEPLOYED_DIR, release_id)) - msg_info += "%s has been deployed\n" % release_id - except shutil.Error: - msg = "Failed to move the metadata for %s" % release_id - LOG.exception(msg) - raise MetadataFail(msg) + if self._deploy_complete(): + deploy_state.completed() + msg_info += "Deployment has been completed\n" return dict(info=msg_info, warning=msg_warning, error=msg_error) - def software_deploy_activate_api(self, release: str) -> dict: + def _activate(self): + # TODO(bqian) activate the deployment + return True + + @require_deploy_state([DEPLOY_STATES.HOST_DONE, DEPLOY_STATES.ACTIVATE_FAILED], + "Must complete deploying all hosts before activating the deployment") + def software_deploy_activate_api(self) -> dict: """ Activates the deployment associated with the release :return: dict of info, warning and error messages @@ -2750,21 +2683,16 @@ class PatchController(PatchService): msg_info = "" msg_warning = "" msg_error = "" - if self.release_data.metadata[release]["state"] != constants.DEPLOYING_HOST: - msg = "%s is not deployed on host" % release - LOG.info(msg) - msg_info += msg + "\n" - else: - try: - shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_HOST_DIR, release), - "%s/%s-metadata.xml" % (constants.DEPLOYING_ACTIVATE_DIR, release)) - except shutil.Error: - msg = "Failed to move the metadata for %s" % release - LOG.exception(msg) - raise MetadataFail(msg) - msg_info += "Deployment for %s has been activated\n" % release - self.release_data.metadata[release]["state"] = constants.DEPLOYING_ACTIVATE + deploy_state = DeployState.get_instance() + deploy_state.activate() + + if self._activate(): + deploy_state.activate_completed() + msg_info += "Deployment has been activated\n" + else: + deploy_state.activate_failed() + msg_error += "Dployment activation has failed.\n" return dict(info=msg_info, warning=msg_warning, error=msg_error) @@ -2776,30 +2704,26 @@ class PatchController(PatchService): # Retrieve deploy state from db in list format return self.db_api_instance.get_deploy_all() - def software_deploy_host_api(self, host_ip, force, async_req=False): + @require_deploy_state([DEPLOY_STATES.START_DONE, DEPLOY_STATES.HOST, DEPLOY_STATES.HOST_FAILED], + "Current deployment ({state}) is not ready to deploy host") + def software_deploy_host_api(self, hostname, force, async_req=False): msg_info = "" msg_warning = "" msg_error = "" - ip = host_ip + deploy_host = self.db_api_instance.get_deploy_host_by_hostname(hostname) + if deploy_host is None: + raise HostNotFound(hostname) - self.hosts_lock.acquire() - # If not in hosts table, maybe a hostname was used instead - if host_ip not in self.hosts: - try: - ip = utils.gethostbyname(host_ip) - if ip not in self.hosts: - # Translated successfully, but IP isn't in the table. - # Raise an exception to drop out to the failure handling - raise SoftwareError("Host IP (%s) not in table" % ip) - except Exception: - self.hosts_lock.release() - msg = "Unknown host specified: %s" % host_ip - msg_error += msg + "\n" - LOG.error("Error in host-install: %s", msg) - return dict(info=msg_info, warning=msg_warning, error=msg_error) + deploy_state = DeployState.get_instance() + deploy_host_state = DeployHostState(hostname) + deploy_state.deploy_host() + deploy_host_state.deploy_started() - msg = "Running software deploy host for %s (%s), force=%s, async_req=%s" % (host_ip, ip, force, async_req) + # NOTE(bqian) Get IP address to fulfill the need of patching structure. + # need to review the design + ip = socket.getaddrinfo(hostname, 0)[0][4][0] + msg = "Running software deploy host for %s (%s), force=%s, async_req=%s" % (hostname, ip, force, async_req) LOG.info(msg) audit_log_info(msg) @@ -2820,9 +2744,10 @@ class PatchController(PatchService): major_release, force, async_req) msg_info += msg + "\n" LOG.info(msg) - set_host_target_load(host_ip, major_release) + set_host_target_load(hostname, major_release) # TODO(heitormatsui) update host deploy status + self.hosts_lock.acquire() self.hosts[ip].install_pending = True self.hosts[ip].install_status = False self.hosts[ip].install_reject_reason = None @@ -2842,22 +2767,13 @@ class PatchController(PatchService): msg = "Host installation request sent to %s." % self.hosts[ip].hostname msg_info += msg + "\n" LOG.info("host-install async_req: %s", msg) - for release in sorted(list(self.release_data.metadata)): - if self.release_data.metadata[release]["state"] == constants.DEPLOYING_START: - try: - shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, release), - "%s/%s-metadata.xml" % (constants.DEPLOYING_HOST_DIR, release)) - msg_info += "%s has been activated\n" % release - except shutil.Error: - msg = "Failed to move the metadata for %s" % release - LOG.exception(msg) - raise MetadataFail(msg) - self.release_data.metadata[release]["state"] = constants.DEPLOYING_HOST - return dict(info=msg_info, warning=msg_warning, error=msg_error) + # TODO(bqian) update deploy state to deploy-host # Now we wait, up to ten mins. future enhancement: Wait on a condition resp_rx = False max_time = time.time() + 600 + # NOTE(bqian) loop below blocks REST API service (slow thread) + # Consider remove. while time.time() < max_time: self.hosts_lock.acquire() if ip not in self.hosts: @@ -2898,17 +2814,6 @@ class PatchController(PatchService): msg_error += msg + "\n" LOG.error("Error in host-install: %s", msg) - for release in sorted(list(self.release_data.metadata)): - if self.release_data.metadata[release]["state"] == constants.DEPLOYING_START: - try: - shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, release), - "%s/%s-metadata.xml" % (constants.DEPLOYING_HOST_DIR, release)) - msg_info += "%s has been activated\n" % release - except shutil.Error: - msg = "Failed to move the metadata for %s" % release - LOG.exception(msg) - raise MetadataFail(msg) - self.release_data.metadata[release]["state"] = constants.DEPLOYING_HOST return dict(info=msg_info, warning=msg_warning, error=msg_error) def drop_host(self, host_ip, sync_nbr=True): @@ -2961,56 +2866,33 @@ class PatchController(PatchService): return dict(info=msg_info, warning=msg_warning, error=msg_error) + def check_releases_state(self, release_ids, state): + """check all releases to be in the specified state""" + all_matched = True + + for release_id in release_ids: + release = self.release_collection.get_release_by_id(release_id) + if release is None: + all_matched = False + break + + if release.state != state: + all_matched = False + break + return all_matched + def is_available(self, release_ids): - all_available = True - - with self.release_data_lock: - - for release_id in release_ids: - if release_id not in self.release_data.metadata: - all_available = False - break - - if self.release_data.metadata[release_id]["state"] != \ - constants.AVAILABLE: - all_available = False - break - - return all_available + return self.check_releases_state(release_ids, states.AVAILABLE) def is_deployed(self, release_ids): - all_deployed = True - - with self.release_data_lock: - - for release_id in release_ids: - if release_id not in self.release_data.metadata: - all_deployed = False - break - - if self.release_data.metadata[release_id]["state"] != constants.DEPLOYED: - all_deployed = False - break - - return all_deployed + return self.check_releases_state(release_ids, states.DEPLOYED) def is_committed(self, release_ids): - all_committed = True - - with self.release_data_lock: - - for release_id in release_ids: - if release_id not in self.release_data.metadata: - all_committed = False - break - - if self.release_data.metadata[release_id]["state"] != \ - constants.COMMITTED: - all_committed = False - break - - return all_committed + return self.check_releases_state(release_ids, states.COMMITTED) + # NOTE(bqian) report_app_dependencies function not being called? + # which means self.app_dependencies will always be empty and file + # app_dependency_filename will never exist? def report_app_dependencies(self, patch_ids, **kwargs): """ Handle report of application dependencies @@ -3023,8 +2905,6 @@ class PatchController(PatchService): LOG.info("Handling app dependencies report: app=%s, patch_ids=%s", appname, ','.join(patch_ids)) - self.release_data_lock.acquire() - if len(patch_ids) == 0: if appname in self.app_dependencies: del self.app_dependencies[appname] @@ -3043,21 +2923,16 @@ class PatchController(PatchService): except Exception: LOG.exception("Failed in report_app_dependencies") raise SoftwareFail("Internal failure") - finally: - self.release_data_lock.release() return True + # NOTE(bqian) unused function query_app_dependencies def query_app_dependencies(self): """ Query application dependencies """ - self.release_data_lock.acquire() - data = self.app_dependencies - self.release_data_lock.release() - return dict(data) def deploy_host_list(self): @@ -3203,7 +3078,7 @@ class PatchController(PatchService): all_host_upgrades.append({ "hostname": deploy_host.get("hostname"), "current_sw_version": to_maj_min_release if deploy_host.get( - "state") == constants.DEPLOYED else from_maj_min_release, + "state") == states.DEPLOYED else from_maj_min_release, "target_sw_version": to_maj_min_release, "host_state": deploy_host.get("state") }) @@ -3550,7 +3425,7 @@ class PatchControllerMainThread(threading.Thread): SEND_MSG_INTERVAL_IN_SECONDS) # Only send the deploy state update from the active controller - if is_deployment_in_progress(sc.release_data.metadata) and utils.is_active_controller(): + if is_deployment_in_progress() and utils.is_active_controller(): try: sc.socket_lock.acquire() deploy_state_update = SoftwareMessageDeployStateUpdate() diff --git a/software/software/software_entities.py b/software/software/software_entities.py index d3346de9..cba8b8b1 100644 --- a/software/software/software_entities.py +++ b/software/software/software_entities.py @@ -20,8 +20,8 @@ from software.utils import save_to_json_file from software.utils import get_software_filesystem_data from software.utils import validate_versions -from software.constants import DEPLOY_HOST_STATES -from software.constants import DEPLOY_STATES +from software.states import DEPLOY_HOST_STATES +from software.states import DEPLOY_STATES LOG = logging.getLogger('main_logger') @@ -135,12 +135,15 @@ class Deploy(ABC): pass @abstractmethod - def create(self, from_release: str, to_release: str, reboot_required: bool, state: DEPLOY_STATES): + def create(self, from_release: str, to_release: str, feed_repo: str, + commit_id: str, reboot_required: bool, state: DEPLOY_STATES): """ Create a new deployment entry. :param from_release: The current release version. :param to_release: The target release version. + :param feed_repo: ostree repo feed path + :param commit_id: commit-id to deploy :param reboot_required: If is required to do host reboot. :param state: The state of the deployment. @@ -230,11 +233,7 @@ class DeployHosts(ABC): class DeployHandler(Deploy): - def __init__(self): - super().__init__() - self.data = get_software_filesystem_data() - - def create(self, from_release, to_release, reboot_required, state=DEPLOY_STATES.START): + def create(self, from_release, to_release, feed_repo, commit_id, reboot_required, state=DEPLOY_STATES.START): """ Create a new deploy with given from and to release version :param from_release: The current release version. @@ -242,30 +241,33 @@ class DeployHandler(Deploy): :param reboot_required: If is required to do host reboot. :param state: The state of the deployment. """ - super().create(from_release, to_release, reboot_required, state) + super().create(from_release, to_release, feed_repo, commit_id, reboot_required, state) deploy = self.query(from_release, to_release) if deploy: raise DeployAlreadyExist("Error to create. Deploy already exists.") new_deploy = { "from_release": from_release, "to_release": to_release, + "feed_repo": feed_repo, + "commit_id": commit_id, "reboot_required": reboot_required, "state": state.value } try: - deploy_data = self.data.get("deploy", []) + data = get_software_filesystem_data() + deploy_data = data.get("deploy", []) if not deploy_data: deploy_data = { "deploy": [] } deploy_data["deploy"].append(new_deploy) - self.data.update(deploy_data) + data.update(deploy_data) else: deploy_data.append(new_deploy) - save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data) + save_to_json_file(constants.SOFTWARE_JSON_FILE, data) except Exception: - self.data["deploy"][0] = {} + LOG.exception() def query(self, from_release, to_release): """ @@ -275,7 +277,8 @@ class DeployHandler(Deploy): :return: A list of deploy dictionary """ super().query(from_release, to_release) - for deploy in self.data.get("deploy", []): + data = get_software_filesystem_data() + for deploy in data.get("deploy", []): if (deploy.get("from_release") == from_release and deploy.get("to_release") == to_release): return deploy @@ -286,7 +289,8 @@ class DeployHandler(Deploy): Query all deployments inside software.json file. :return: A list of deploy dictionary """ - return self.data.get("deploy", []) + data = get_software_filesystem_data() + return data.get("deploy", []) def update(self, new_state: DEPLOY_STATES): """ @@ -298,11 +302,12 @@ class DeployHandler(Deploy): if not deploy: raise DeployDoNotExist("Error to update deploy state. No deploy in progress.") + data = get_software_filesystem_data() try: - self.data["deploy"][0]["state"] = new_state.value - save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data) + data["deploy"][0]["state"] = new_state.value + save_to_json_file(constants.SOFTWARE_JSON_FILE, data) except Exception: - self.data["deploy"][0] = deploy + LOG.exception() def delete(self): """ @@ -312,19 +317,16 @@ class DeployHandler(Deploy): deploy = self.query_all() if not deploy: raise DeployDoNotExist("Error to delete deploy state. No deploy in progress.") + + data = get_software_filesystem_data() try: - self.data["deploy"].clear() - save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data) + data["deploy"].clear() + save_to_json_file(constants.SOFTWARE_JSON_FILE, data) except Exception: - self.data["deploy"][0] = deploy + LOG.exception() class DeployHostHandler(DeployHosts): - - def __init__(self): - super().__init__() - self.data = get_software_filesystem_data() - def create(self, hostname, state: DEPLOY_HOST_STATES = DEPLOY_HOST_STATES.PENDING): super().create(hostname, state) deploy = self.query(hostname) @@ -336,16 +338,17 @@ class DeployHostHandler(DeployHosts): "state": state.value if state else None } - deploy_data = self.data.get("deploy_host", []) + data = get_software_filesystem_data() + deploy_data = data.get("deploy_host", []) if not deploy_data: deploy_data = { "deploy_host": [] } deploy_data["deploy_host"].append(new_deploy_host) - self.data.update(deploy_data) + data.update(deploy_data) else: deploy_data.append(new_deploy_host) - save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data) + save_to_json_file(constants.SOFTWARE_JSON_FILE, data) def query(self, hostname): """ @@ -354,13 +357,15 @@ class DeployHostHandler(DeployHosts): :return: A list of deploy dictionary """ super().query(hostname) - for deploy in self.data.get("deploy_host", []): + data = get_software_filesystem_data() + for deploy in data.get("deploy_host", []): if deploy.get("hostname") == hostname: return deploy return None def query_all(self): - return self.data.get("deploy_host", []) + data = get_software_filesystem_data() + return data.get("deploy_host", []) def update(self, hostname, state: DEPLOY_HOST_STATES): super().update(hostname, state) @@ -368,23 +373,26 @@ class DeployHostHandler(DeployHosts): if not deploy: raise Exception("Error to update. Deploy host do not exist.") - index = self.data.get("deploy_host", []).index(deploy) + data = get_software_filesystem_data() + index = data.get("deploy_host", []).index(deploy) updated_entity = { "hostname": hostname, "state": state.value } - self.data["deploy_host"][index].update(updated_entity) - save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data) + data["deploy_host"][index].update(updated_entity) + save_to_json_file(constants.SOFTWARE_JSON_FILE, data) return updated_entity def delete_all(self): - self.data.get("deploy_host").clear() - save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data) + data = get_software_filesystem_data() + data.get("deploy_host").clear() + save_to_json_file(constants.SOFTWARE_JSON_FILE, data) def delete(self, hostname): super().delete(hostname) deploy = self.query(hostname) if not deploy: raise DeployDoNotExist("Error to delete. Deploy host do not exist.") - self.data.get("deploy_host").remove(deploy) - save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data) + data = get_software_filesystem_data() + data.get("deploy_host").remove(deploy) + save_to_json_file(constants.SOFTWARE_JSON_FILE, data) diff --git a/software/software/software_functions.py b/software/software/software_functions.py index e26adc6e..350c147e 100644 --- a/software/software/software_functions.py +++ b/software/software/software_functions.py @@ -32,11 +32,11 @@ from software.exceptions import OSTreeTarFail from software.exceptions import ReleaseUploadFailure from software.exceptions import ReleaseValidationFailure from software.exceptions import ReleaseMismatchFailure -from software.exceptions import SoftwareFail from software.exceptions import SoftwareServiceError from software.exceptions import VersionedDeployPrecheckFailure import software.constants as constants +from software import states import software.utils as utils from software.sysinv_utils import get_ihost_list @@ -81,7 +81,7 @@ def configure_logging(logtofile=True, level=logging.INFO): my_exec = os.path.basename(sys.argv[0]) log_format = '%(asctime)s: ' \ - + my_exec + '[%(process)s]: ' \ + + my_exec + '[%(process)s:%(thread)d]: ' \ + '%(filename)s(%(lineno)s): ' \ + '%(levelname)s: %(message)s' @@ -231,10 +231,13 @@ class ReleaseData(object): """ def __init__(self): + self._reset() + + def _reset(self): # # The metadata dict stores all metadata associated with a release. # This dict is keyed on release_id, with metadata for each release stored - # in a nested dict. (See parse_metadata method for more info) + # in a nested dict. (See parse_metadata_string method for more info) # self.metadata = {} @@ -253,8 +256,8 @@ class ReleaseData(object): for release_id in list(updated_release.metadata): # Update all fields except state cur_state = self.metadata[release_id]['state'] + updated_release.metadata[release_id]['state'] = cur_state self.metadata[release_id].update(updated_release.metadata[release_id]) - self.metadata[release_id]['state'] = cur_state def delete_release(self, release_id): del self.contents[release_id] @@ -294,22 +297,21 @@ class ReleaseData(object): outfile.close() os.rename(new_filename, filename) - def parse_metadata(self, - filename, - state=None): + def parse_metadata_file(self, + filename, + state=None): """ Parse an individual release metadata XML file :param filename: XML file :param state: Indicates Applied, Available, or Committed :return: Release ID """ - with open(filename, "r") as f: text = f.read() return self.parse_metadata_string(text, state) - def parse_metadata_string(self, text, state): + def parse_metadata_string(self, text, state=None): root = ElementTree.fromstring(text) # # @@ -391,31 +393,35 @@ class ReleaseData(object): return release_id - def load_all_metadata(self, - loaddir, - state=None): + def _read_all_metafile(self, path): """ - Parse all metadata files in the specified dir - :return: + Load metadata from all xml files in the specified path + :param path: path of directory that xml files is in """ - for fname in glob.glob("%s/*.xml" % loaddir): - self.parse_metadata(fname, state) + for filename in glob.glob("%s/*.xml" % path): + with open(filename, "r") as f: + text = f.read() + yield filename, text def load_all(self): # Reset the data self.__init__() - self.load_all_metadata(constants.AVAILABLE_DIR, state=constants.AVAILABLE) - self.load_all_metadata(constants.UNAVAILABLE_DIR, state=constants.UNAVAILABLE) - self.load_all_metadata(constants.DEPLOYING_START_DIR, state=constants.DEPLOYING_START) - self.load_all_metadata(constants.DEPLOYING_HOST_DIR, state=constants.DEPLOYING_HOST) - self.load_all_metadata(constants.DEPLOYING_ACTIVATE_DIR, state=constants.DEPLOYING_ACTIVATE) - self.load_all_metadata(constants.DEPLOYING_COMPLETE_DIR, state=constants.DEPLOYING_COMPLETE) - self.load_all_metadata(constants.DEPLOYED_DIR, state=constants.DEPLOYED) - self.load_all_metadata(constants.REMOVING_DIR, state=constants.REMOVING) - self.load_all_metadata(constants.ABORTING_DIR, state=constants.ABORTING) - self.load_all_metadata(constants.COMMITTED_DIR, state=constants.COMMITTED) - # load the release metadata from feed directory or filesystem db + state_map = { + states.AVAILABLE: states.AVAILABLE_DIR, + states.UNAVAILABLE: states.UNAVAILABLE_DIR, + states.DEPLOYING: states.DEPLOYING_DIR, + states.DEPLOYED: states.DEPLOYED_DIR, + states.REMOVING: states.REMOVING_DIR, + } + + for state, path in state_map.items(): + for filename, text in self._read_all_metafile(path): + try: + self.parse_metadata_string(text, state=state) + except Exception as e: + err_msg = f"Failed parsing {filename}, {e}" + LOG.exception(err_msg) def query_line(self, release_id, @@ -636,54 +642,56 @@ class PatchFile(object): raise SystemExit(e.returncode) @staticmethod - def read_patch(path, cert_type=None): + def read_patch(path, dest, cert_type=None): # We want to enable signature checking by default # Note: cert_type=None is required if we are to enforce 'no dev patches on a formal load' rule. # Open the patch file and extract the contents to the current dir tar = tarfile.open(path, "r:gz") - tar.extract("signature") + tar.extract("signature", path=dest) try: - tar.extract(detached_signature_file) + tar.extract(detached_signature_file, path=dest) except KeyError: msg = "Patch has not been signed" LOG.warning(msg) # Filelist used for signature validation and verification - sig_filelist = ["metadata.tar", "software.tar"] + filelist = ["metadata.tar", "software.tar"] # Check if conditional scripts are inside the patch # If yes then add them to signature checklist if "semantics.tar" in [f.name for f in tar.getmembers()]: - sig_filelist.append("semantics.tar") + filelist.append("semantics.tar") if "pre-install.sh" in [f.name for f in tar.getmembers()]: - sig_filelist.append("pre-install.sh") + filelist.append("pre-install.sh") if "post-install.sh" in [f.name for f in tar.getmembers()]: - sig_filelist.append("post-install.sh") + filelist.append("post-install.sh") - for f in sig_filelist: - tar.extract(f) + for f in filelist: + tar.extract(f, path=dest) # Verify the data integrity signature first - sigfile = open("signature", "r") + sigfile = open(os.path.join(dest, "signature"), "r") sig = int(sigfile.read(), 16) sigfile.close() expected_sig = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF + sig_filelist = [os.path.join(dest, f) for f in filelist] for f in sig_filelist: sig ^= get_md5(f) if sig != expected_sig: - msg = "Patch failed verification" + msg = "Software failed signature verification." LOG.error(msg) - raise ReleaseValidationFailure(msg) + raise ReleaseValidationFailure(error=msg) # Verify detached signature - if os.path.exists(detached_signature_file): + sig_file = os.path.join(dest, detached_signature_file) + if os.path.exists(sig_file): sig_valid = verify_files( sig_filelist, - detached_signature_file, + sig_file, cert_type=cert_type) if sig_valid is True: msg = "Signature verified, patch has been signed" @@ -693,20 +701,21 @@ class PatchFile(object): msg = "Signature check failed" if cert_type is None: LOG.error(msg) - raise ReleaseValidationFailure(msg) + raise ReleaseValidationFailure(error=msg) else: - msg = "Patch has not been signed" + msg = "Software has not been signed." if cert_type is None: LOG.error(msg) - raise ReleaseValidationFailure(msg) + raise ReleaseValidationFailure(error=msg) # Restart script for f in tar.getmembers(): - if f.name not in sig_filelist: - tar.extract(f) + if f.name not in filelist: + tar.extract(f, path=dest) - tar = tarfile.open("metadata.tar") - tar.extractall() + metadata = os.path.join(dest, "metadata.tar") + tar = tarfile.open(metadata) + tar.extractall(path=dest) @staticmethod def query_patch(patch, field=None): @@ -716,12 +725,6 @@ class PatchFile(object): # Create a temporary working directory tmpdir = tempfile.mkdtemp(prefix="patch_") - # Save the current directory, so we can chdir back after - orig_wd = os.getcwd() - - # Change to the tmpdir - os.chdir(tmpdir) - r = {} try: @@ -729,7 +732,7 @@ class PatchFile(object): # Need to determine the cert_type for cert_type_str in cert_type_all: try: - PatchFile.read_patch(abs_patch, cert_type=[cert_type_str]) + PatchFile.read_patch(abs_patch, tmpdir, cert_type=[cert_type_str]) except ReleaseValidationFailure: pass else: @@ -738,15 +741,17 @@ class PatchFile(object): break if "cert" not in r: + # NOTE(bqian) below reads like a bug in certain cases. need to revisit. # If cert is unknown, then file is not yet open for reading. # Try to open it for reading now, using all available keys. # We can't omit cert_type, or pass None, because that will trigger the code # path used by installed product, in which dev keys are not accepted unless # a magic file exists. - PatchFile.read_patch(abs_patch, cert_type=cert_type_all) + PatchFile.read_patch(abs_patch, tmpdir, cert_type=cert_type_all) thispatch = ReleaseData() - patch_id = thispatch.parse_metadata("metadata.xml") + filename = os.path.join(tmpdir, "metadata.xml") + patch_id = thispatch.parse_metadata_file(filename) if field is None or field == "id": r["id"] = patch_id @@ -761,20 +766,14 @@ class PatchFile(object): r[field] = thispatch.query_line(patch_id, field) except ReleaseValidationFailure as e: - msg = "Patch validation failed during extraction" + msg = "Patch validation failed during extraction. %s" % str(e) LOG.exception(msg) raise e - except ReleaseMismatchFailure as e: - msg = "Patch Mismatch during extraction" + except tarfile.TarError as te: + msg = "Extract software failed %s" % str(te) LOG.exception(msg) - raise e - except tarfile.TarError: - msg = "Failed during patch extraction" - LOG.exception(msg) - raise ReleaseValidationFailure(msg) + raise ReleaseValidationFailure(error=msg) finally: - # Change back to original working dir - os.chdir(orig_wd) shutil.rmtree(tmpdir) return r @@ -790,45 +789,34 @@ class PatchFile(object): # Create a temporary working directory tmpdir = tempfile.mkdtemp(prefix="patch_") - # Save the current directory, so we can chdir back after - orig_wd = os.getcwd() - - # Change to the tmpdir - os.chdir(tmpdir) - try: cert_type = None meta_data = PatchFile.query_patch(abs_patch) if 'cert' in meta_data: cert_type = meta_data['cert'] - PatchFile.read_patch(abs_patch, cert_type=cert_type) - ReleaseData.modify_metadata_text("metadata.xml", key, value) + PatchFile.read_patch(abs_patch, tmpdir, cert_type=cert_type) + path = os.path.join(tmpdir, "metadata.xml") + ReleaseData.modify_metadata_text(path, key, value) PatchFile.write_patch(new_abs_patch, cert_type=cert_type) os.rename(new_abs_patch, abs_patch) rc = True - except ReleaseValidationFailure as e: - raise e - except ReleaseMismatchFailure as e: - raise e - except tarfile.TarError: - msg = "Failed during patch extraction" + except tarfile.TarError as te: + msg = "Extract software failed %s" % str(te) LOG.exception(msg) - raise ReleaseValidationFailure(msg) + raise ReleaseValidationFailure(error=msg) except Exception as e: template = "An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(e).__name__, e.args) - print(message) + LOG.exception(message) finally: - # Change back to original working dir - os.chdir(orig_wd) shutil.rmtree(tmpdir) return rc @staticmethod def extract_patch(patch, - metadata_dir=constants.AVAILABLE_DIR, + metadata_dir=states.AVAILABLE_DIR, metadata_only=False, existing_content=None, base_pkgdata=None): @@ -845,23 +833,18 @@ class PatchFile(object): # Create a temporary working directory tmpdir = tempfile.mkdtemp(prefix="patch_") - # Save the current directory, so we can chdir back after - orig_wd = os.getcwd() - - # Change to the tmpdir - os.chdir(tmpdir) - try: # Open the patch file and extract the contents to the tmpdir - PatchFile.read_patch(abs_patch) + PatchFile.read_patch(abs_patch, tmpdir) thispatch = ReleaseData() - patch_id = thispatch.parse_metadata("metadata.xml") + filename = os.path.join(tmpdir, "metadata.xml") + with open(filename, "r") as f: + text = f.read() + + patch_id = thispatch.parse_metadata_string(text) if patch_id is None: - print("Failed to import patch") - # Change back to original working dir - os.chdir(orig_wd) shutil.rmtree(tmpdir) return None @@ -872,15 +855,15 @@ class PatchFile(object): if not base_pkgdata.check_release(patch_sw_version): msg = "Software version %s for release %s is not installed" % (patch_sw_version, patch_id) LOG.exception(msg) - raise ReleaseValidationFailure(msg) + raise ReleaseValidationFailure(error=msg) if metadata_only: # This is a re-import. Ensure the content lines up if existing_content is None \ or existing_content != thispatch.contents[patch_id]: - msg = "Contents of re-imported patch do not match" - LOG.exception(msg) - raise ReleaseMismatchFailure(msg) + msg = f"Contents of {patch_id} do not match re-uploaded release" + LOG.error(msg) + raise ReleaseMismatchFailure(error=msg) patch_sw_version = utils.get_major_release_version( thispatch.metadata[patch_id]["sw_version"]) @@ -888,42 +871,41 @@ class PatchFile(object): if not os.path.exists(abs_ostree_tar_dir): os.makedirs(abs_ostree_tar_dir) - shutil.move("metadata.xml", + shutil.move(os.path.join(tmpdir, "metadata.xml"), "%s/%s-metadata.xml" % (abs_metadata_dir, patch_id)) - shutil.move("software.tar", + shutil.move(os.path.join(tmpdir, "software.tar"), "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id)) + v = "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id) + LOG.info("software.tar %s" % v) # restart_script may not exist in metadata. if thispatch.metadata[patch_id].get("restart_script"): if not os.path.exists(root_scripts_dir): os.makedirs(root_scripts_dir) - restart_script_name = thispatch.metadata[patch_id]["restart_script"] - shutil.move(restart_script_name, - "%s/%s" % (root_scripts_dir, restart_script_name)) + restart_script_name = os.path.join(tmpdir, thispatch.metadata[patch_id]["restart_script"]) + if os.path.isfile(restart_script_name): + shutil.move(restart_script_name, os.path.join(root_scripts_dir, restart_script_name)) - except ReleaseValidationFailure as e: - raise e - except ReleaseMismatchFailure as e: - raise e - except tarfile.TarError: - msg = "Failed during patch extraction" + except tarfile.TarError as te: + msg = "Extract software failed %s" % str(te) LOG.exception(msg) - raise ReleaseValidationFailure(msg) - except KeyError: - msg = "Failed during patch extraction" + raise ReleaseValidationFailure(error=msg) + except KeyError as ke: + # NOTE(bqian) assuming this is metadata missing key. + # this try except should be narror down to protect more specific + # routine accessing external data (metadata) only. + msg = "Software metadata missing required value for %s" % str(ke) LOG.exception(msg) - raise ReleaseValidationFailure(msg) - except OSError: - msg = "Failed during patch extraction" - LOG.exception(msg) - raise SoftwareFail(msg) - except IOError: # pylint: disable=duplicate-except - msg = "Failed during patch extraction" - LOG.exception(msg) - raise SoftwareFail(msg) + raise ReleaseValidationFailure(error=msg) + # except OSError: + # msg = "Failed during patch extraction" + # LOG.exception(msg) + # raise SoftwareFail(msg) + # except IOError: # pylint: disable=duplicate-except + # msg = "Failed during patch extraction" + # LOG.exception(msg) + # raise SoftwareFail(msg) finally: - # Change back to original working dir - os.chdir(orig_wd) shutil.rmtree(tmpdir) return thispatch @@ -939,17 +921,16 @@ class PatchFile(object): # Create a temporary working directory patch_tmpdir = tempfile.mkdtemp(prefix="patch_") - # Save the current directory, so we can chdir back after - orig_wd = os.getcwd() - - # Change to the tmpdir - os.chdir(patch_tmpdir) - # Load the patch abs_patch = os.path.abspath(patch) - PatchFile.read_patch(abs_patch) + PatchFile.read_patch(abs_patch, patch_tmpdir) thispatch = ReleaseData() - patch_id = thispatch.parse_metadata("metadata.xml") + + filename = os.path.join(patch_tmpdir, "metadata.xml") + with open(filename, "r") as f: + text = f.read() + + patch_id = thispatch.parse_metadata_string(text) patch_sw_version = utils.get_major_release_version( thispatch.metadata[patch_id]["sw_version"]) @@ -982,7 +963,6 @@ class PatchFile(object): raise OSTreeTarFail(msg) finally: shutil.rmtree(tmpdir, ignore_errors=True) - os.chdir(orig_wd) shutil.rmtree(patch_tmpdir) @staticmethod @@ -1316,13 +1296,15 @@ def is_deploy_state_in_sync(): return False -def is_deployment_in_progress(release_metadata): +def is_deployment_in_progress(): """ Check if at least one deployment is in progress :param release_metadata: dict of release metadata :return: bool true if in progress, false otherwise """ - return any(release['state'] == constants.DEPLOYING for release in release_metadata.values()) + dbapi = get_instance() + deploys = dbapi.get_deploy_all() + return len(deploys) > 0 def set_host_target_load(hostname, major_release): diff --git a/software/software/states.py b/software/software/states.py new file mode 100644 index 00000000..58bc334b --- /dev/null +++ b/software/software/states.py @@ -0,0 +1,126 @@ +""" +Copyright (c) 2023-2024 Wind River Systems, Inc. + +SPDX-License-Identifier: Apache-2.0 + +""" + +from enum import Enum +import os + +from software.constants import SOFTWARE_STORAGE_DIR + + +# software release life cycle +# (fresh install) -> deployed -> (upgrade to next version and deploy complete) -> unavailable -> (deleted) +# ^ +# |--------------------------------------------------------- +# ^ +# | +# (upload) -> available ->(deploy start) -> deploying -> (deploy complete) -> deployed +# \---> (deleted) +# +# deploy life cycle +# (deploy-start) +# | +# V +# deploy-start +# | +# V +# start-done -> deploy-host -> deploy-active -> deploy-active-done -> deploy-complete -> (delete) +# \ \ \ +# \--------------\------------\----> (deploy abort) -> deploy-abort --> deplete-abort-done -> (delete) +# +# deploy host life cycle +# /----(deploy abort/reverse deploy)--- +# / | +# / V +# (deploy-start) -> pending -> deploying -------------> deployed --------(deploy-complete) -> (deleted) +# ^ \---------> (deploy abort/reverse deploy) +# | / +# |-------------------------------------------/ + + +# Release states +AVAILABLE_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/available") +UNAVAILABLE_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/unavailable") +DEPLOYING_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/deploying") +DEPLOYED_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/deployed") +REMOVING_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/removing") +COMMITTED_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/committed") + +DEPLOY_STATE_METADATA_DIR = [ + AVAILABLE_DIR, + UNAVAILABLE_DIR, + DEPLOYING_DIR, + DEPLOYED_DIR, + REMOVING_DIR, + COMMITTED_DIR, +] + +# new release state needs to be added to VALID_RELEASE_STATES list +AVAILABLE = 'available' +UNAVAILABLE = 'unavailable' +DEPLOYING = 'deploying' +DEPLOYED = 'deployed' +REMOVING = 'removing' +COMMITTED = 'committed' + +VALID_RELEASE_STATES = [AVAILABLE, UNAVAILABLE, DEPLOYING, DEPLOYED, + REMOVING, COMMITTED] + +RELEASE_STATE_TO_DIR_MAP = {AVAILABLE: AVAILABLE_DIR, + UNAVAILABLE: UNAVAILABLE_DIR, + DEPLOYING: DEPLOYING_DIR, + DEPLOYED: DEPLOYED_DIR, + REMOVING: REMOVING_DIR, + COMMITTED: COMMITTED_DIR} + +DELETABLE_STATE = [AVAILABLE, UNAVAILABLE] + +# valid release state transition below could still be changed as +# development continue +RELEASE_STATE_VALID_TRANSITION = { + AVAILABLE: [DEPLOYING], + DEPLOYING: [DEPLOYED, AVAILABLE], + DEPLOYED: [REMOVING, UNAVAILABLE] +} + +VALID_DEPLOY_START_STATES = [ + AVAILABLE, + DEPLOYED, +] + + +# deploy states +class DEPLOY_STATES(Enum): + START = 'start' + START_DONE = 'start-done' + START_FAILED = 'start-failed' + + HOST = 'host' + HOST_DONE = 'host-done' + HOST_FAILED = 'host-failed' + + ACTIVATE = 'activate' + ACTIVATE_DONE = 'activate-done' + ACTIVATE_FAILED = 'activate-failed' + + ABORT = 'abort' + ABORT_DONE = 'abort-done' + + +# deploy host state +class DEPLOY_HOST_STATES(Enum): + DEPLOYED = 'deployed' + DEPLOYING = 'deploying' + FAILED = 'failed' + PENDING = 'pending' + + +VALID_HOST_DEPLOY_STATE = [ + DEPLOY_HOST_STATES.DEPLOYED, + DEPLOY_HOST_STATES.DEPLOYING, + DEPLOY_HOST_STATES.FAILED, + DEPLOY_HOST_STATES.PENDING, +] diff --git a/software/software/tests/test_software_controller.py b/software/software/tests/test_software_controller.py index dd1a3729..01fe8475 100644 --- a/software/software/tests/test_software_controller.py +++ b/software/software/tests/test_software_controller.py @@ -5,15 +5,15 @@ # # This import has to be first -from software.tests import base # pylint: disable=unused-import - +from software.tests import base # pylint: disable=unused-import # noqa: F401 from software.software_controller import PatchController -from software.software_controller import ReleaseValidationFailure +from software.exceptions import ReleaseValidationFailure import unittest from unittest.mock import MagicMock from unittest.mock import mock_open from unittest.mock import patch from software import constants +from software import states class TestSoftwareController(unittest.TestCase): @@ -65,8 +65,7 @@ class TestSoftwareController(unittest.TestCase): # Call the function being tested with patch('software.software_controller.SW_VERSION', '1.0.0'): - info, warning, error, release_meta_info = controller._process_upload_upgrade_files(self.upgrade_files, # pylint: disable=protected-access - controller.release_data) + info, warning, error, release_meta_info = controller._process_upload_upgrade_files(self.upgrade_files) # pylint: disable=protected-access # Verify that the expected functions were called with the expected arguments mock_verify_files.assert_called_once_with([self.upgrade_files[constants.ISO_EXTENSION]], @@ -85,7 +84,7 @@ class TestSoftwareController(unittest.TestCase): # Verify that the expected messages were returned self.assertEqual( info, - 'iso and signature files upload completed\nImporting iso is in progress\nLoad import successful') + 'Load import successful') self.assertEqual(warning, '') self.assertEqual(error, '') self.assertEqual( @@ -114,17 +113,14 @@ class TestSoftwareController(unittest.TestCase): # Call the function being tested with patch('software.software_controller.SW_VERSION', '1.0'): - info, warning, error, _ = controller._process_upload_upgrade_files(self.upgrade_files, # pylint: disable=protected-access - controller.release_data) - - # Verify that the expected messages were returned - self.assertEqual(info, '') - self.assertEqual(warning, '') - self.assertEqual(error, 'Upgrade file signature verification failed\n') + try: + controller._process_upload_upgrade_files(self.upgrade_files) # pylint: disable=protected-access + except ReleaseValidationFailure as e: + self.assertEqual(e.error, 'Software test.iso:test.sig signature validation failed') @patch('software.software_controller.PatchController.__init__', return_value=None) @patch('software.software_controller.verify_files', - side_effect=ReleaseValidationFailure('Invalid signature file')) + side_effect=ReleaseValidationFailure(error='Invalid signature file')) @patch('software.software_controller.PatchController.major_release_upload_check') def test_process_upload_upgrade_files_validation_error(self, mock_major_release_upload_check, @@ -137,13 +133,10 @@ class TestSoftwareController(unittest.TestCase): mock_major_release_upload_check.return_value = True # Call the function being tested - info, warning, error, _ = controller._process_upload_upgrade_files(self.upgrade_files, # pylint: disable=protected-access - controller.release_data) - - # Verify that the expected messages were returned - self.assertEqual(info, '') - self.assertEqual(warning, '') - self.assertEqual(error, 'Upgrade file signature verification failed\n') + try: + controller._process_upload_upgrade_files(self.upgrade_files) # pylint: disable=protected-access + except ReleaseValidationFailure as e: + self.assertEqual(e.error, "Invalid signature file") @patch('software.software_controller.os.path.isfile') @patch('software.software_controller.json.load') @@ -238,8 +231,8 @@ class TestSoftwareController(unittest.TestCase): "to_release": "2.0.0" }) controller.db_api_instance.get_deploy_host = MagicMock(return_value=[ - {"hostname": "host1", "state": constants.DEPLOYED}, - {"hostname": "host2", "state": constants.DEPLOYING} + {"hostname": "host1", "state": states.DEPLOYED}, + {"hostname": "host2", "state": states.DEPLOYING} ]) # Test when the host is deployed @@ -248,7 +241,7 @@ class TestSoftwareController(unittest.TestCase): "hostname": "host1", "current_sw_version": "2.0.0", "target_sw_version": "2.0.0", - "host_state": constants.DEPLOYED + "host_state": states.DEPLOYED }]) @patch('software.software_controller.json.load') @@ -267,8 +260,8 @@ class TestSoftwareController(unittest.TestCase): "to_release": "2.0.0" }) controller.db_api_instance.get_deploy_host = MagicMock(return_value=[ - {"hostname": "host1", "state": constants.DEPLOYED}, - {"hostname": "host2", "state": constants.DEPLOYING} + {"hostname": "host1", "state": states.DEPLOYED}, + {"hostname": "host2", "state": states.DEPLOYING} ]) # Test when the host is deploying @@ -277,7 +270,7 @@ class TestSoftwareController(unittest.TestCase): "hostname": "host2", "current_sw_version": "1.0.0", "target_sw_version": "2.0.0", - "host_state": constants.DEPLOYING + "host_state": states.DEPLOYING }]) @patch('software.software_controller.json.load') @@ -296,8 +289,8 @@ class TestSoftwareController(unittest.TestCase): "to_release": "2.0.0" }) controller.db_api_instance.get_deploy_host = MagicMock(return_value=[ - {"hostname": "host1", "state": constants.DEPLOYED}, - {"hostname": "host2", "state": constants.DEPLOYING} + {"hostname": "host1", "state": states.DEPLOYED}, + {"hostname": "host2", "state": states.DEPLOYING} ]) # Test when the host is deploying @@ -306,12 +299,12 @@ class TestSoftwareController(unittest.TestCase): "hostname": "host1", "current_sw_version": "2.0.0", "target_sw_version": "2.0.0", - "host_state": constants.DEPLOYED + "host_state": states.DEPLOYED }, { "hostname": "host2", "current_sw_version": "1.0.0", "target_sw_version": "2.0.0", - "host_state": constants.DEPLOYING + "host_state": states.DEPLOYING }]) @patch('software.software_controller.json.load') @@ -394,4 +387,4 @@ class TestSoftwareController(unittest.TestCase): # Verify that the expected methods were called db_api_instance_mock.get_deploy_all.assert_called_once() - self.assertEqual(result, None) + self.assertIsNone(result) diff --git a/software/software/tests/test_software_function.py b/software/software/tests/test_software_function.py index a22014a9..a8660f4a 100644 --- a/software/software/tests/test_software_function.py +++ b/software/software/tests/test_software_function.py @@ -130,7 +130,7 @@ class TestSoftwareFunction(unittest.TestCase): self.assertEqual(val["install_instructions"], r.install_instructions) self.assertEqual(val["warnings"], r.warnings) self.assertEqual(val["status"], r.status) - self.assertEqual(val["unremovable"], r.unremovable) + self.assertEqual(val["unremovable"] == 'Y', r.unremovable) if val["restart_script"] is None: self.assertIsNone(r.restart_script) else: @@ -159,7 +159,7 @@ class TestSoftwareFunction(unittest.TestCase): self.assertEqual(val["install_instructions"], r.install_instructions) self.assertEqual(val["warnings"], r.warnings) self.assertEqual(val["status"], r.status) - self.assertEqual(val["unremovable"], r.unremovable) + self.assertEqual(val["unremovable"] == 'Y', r.unremovable) if val["restart_script"] is None: self.assertIsNone(r.restart_script) else: @@ -178,7 +178,7 @@ class TestSoftwareFunction(unittest.TestCase): self.assertEqual(val["install_instructions"], r.install_instructions) self.assertEqual(val["warnings"], r.warnings) self.assertEqual(val["status"], r.status) - self.assertEqual(val["unremovable"], r.unremovable) + self.assertEqual(val["unremovable"] == 'Y', r.unremovable) if val["restart_script"] is None: self.assertIsNone(r.restart_script) else: diff --git a/software/software/utils.py b/software/software/utils.py index cd1b431c..a12d68bb 100644 --- a/software/software/utils.py +++ b/software/software/utils.py @@ -43,20 +43,19 @@ class ExceptionHook(hooks.PecanHook): status = 500 if isinstance(e, SoftwareServiceError): - LOG.warning("An issue is detected. Signature [%s]" % signature) + # Only the exceptions that are pre-categorized as "expected" that + # are known as operational or environmental, the detail (possibly + # with recovery/resolve instruction) are to be displayed to the end + # user + LOG.warning("%s. Signature [%s]" % (e.error, signature)) # TODO(bqian) remove the logging after it is stable LOG.exception(e) data = dict(info=e.info, warning=e.warning, error=e.error) else: + # with an exception that is not pre-categorized as "expected", it is a + # bug. Or not properly categorizing the exception itself is a bug. err_msg = "Internal error occurred. Error signature [%s]" % signature - try: - # If exception contains error details, send that to user - if str(e): - err_msg = "Error \"%s\", Error signature [%s]" % (str(e), signature) - except Exception: - pass - LOG.error(err_msg) LOG.exception(e) data = dict(info="", warning="", error=err_msg) return webob.Response(json.dumps(data), status=status) diff --git a/software/tox.ini b/software/tox.ini index 2c955e2b..77283e99 100644 --- a/software/tox.ini +++ b/software/tox.ini @@ -61,9 +61,9 @@ commands = # H203: Use assertIs(Not)None to check for None (off by default). enable-extensions = H106,H203 exclude = .venv,.git,.tox,dist,doc,*lib/python*,*egg,build,release-tag-* -max-line-length = 80 +max-line-length = 120 show-source = True -ignore = E402,H306,H404,H405,W504,E501 +ignore = E402,H306,H404,H405,W504,E501,H105 [testenv:flake8] commands = flake8 {posargs}