From 1b462be12adc44fdcba9e16cc1e9240bf3c3cc74 Mon Sep 17 00:00:00 2001 From: Bin Qian Date: Mon, 15 Jan 2024 15:20:14 +0000 Subject: [PATCH] deploy state changed update After major release data migration completes, update deploy state to start-done for successful migration or start-failed if not. Ths deploy state update from sub process is sent to software_controller via udp message. Only the updates from a valid agent will be accepted. story: 2010676 Task: 49590 TCs: pass: Update deploy state to start-done after data migration completes. pass: Update deploy state to start-failed after data migration fails. Change-Id: If4bfdfea168374bd11185243cffc20aa3af9c160 Signed-off-by: Bin Qian --- software/scripts/deploy-precheck | 2 +- software/scripts/software-deploy-start | 26 ++- software/scripts/usm_load_import | 106 +++++++----- software/setup.cfg | 1 + software/software/constants.py | 42 +++-- software/software/messages.py | 6 +- software/software/software_controller.py | 162 +++++++++++++++++- software/software/software_entities.py | 2 +- software/software/software_functions.py | 41 ++++- .../tests/test_software_controller.py | 9 + .../software/utilities/update_deploy_state.py | 112 ++++++++++++ software/software/utils.py | 22 +++ 12 files changed, 455 insertions(+), 76 deletions(-) create mode 100644 software/software/utilities/update_deploy_state.py diff --git a/software/scripts/deploy-precheck b/software/scripts/deploy-precheck index 86e09a9c..3744c59c 100644 --- a/software/scripts/deploy-precheck +++ b/software/scripts/deploy-precheck @@ -154,7 +154,7 @@ class UpgradeHealthCheck(HealthCheck): output = "" # get target release from script directory location - upgrade_release = re.match("^.*/rel-(\d\d.\d\d)/", __file__).group(1) + upgrade_release = re.match("^.*/rel-(\d\d.\d\d.\d*)/", __file__).group(1) # check installed license success = self._check_license(upgrade_release) diff --git a/software/scripts/software-deploy-start b/software/scripts/software-deploy-start index 2e28bf51..827bf11b 100755 --- a/software/scripts/software-deploy-start +++ b/software/scripts/software-deploy-start @@ -16,7 +16,7 @@ # TODO: centralize USM upgrade scripts output into one single log exec > /var/log/deploy_start.log 2>&1 - +exec_path=$(dirname $0) usage() { echo "usage: $0 from_ver to_ver k8s_ver postgresql_port feed [commit_id|latest_commit]" @@ -45,17 +45,30 @@ rootdir=${staging_dir}"/sysroot" repo=${staging_dir}"/ostree_repo" instbr="starlingx" +report_agent="deploy-start" + deploy_cleanup() { sudo ${rootdir}/usr/sbin/software-deploy/deploy-cleanup ${repo} ${rootdir} all } +deploy_update_state() { + local state="$1" + + # update deploy state to start-done + /usr/bin/software-deploy-update -s ${state} ${report_agent} +} + handle_error() { local exit_code="$1" local error_message="$2" + local state="start-failed" echo "Error: ${error_message}" >&2 echo "Please check the error details and take appropriate action for recovery." >&2 + echo "Update deploy state ${state}." >&2 + deploy_update_state ${state} + # cleanup before exiting deploy_cleanup @@ -69,13 +82,13 @@ for dir in $rootdir $repo; do fi done -# TODO(bqian) below ostree operations will be replaced by apt-ostree sudo mkdir ${repo} -p sudo ostree --repo=${repo} init --mode=archive || handle_error $? "Failed to init repo" sudo ostree --repo=${repo} remote add ${instbr} ${feed_url} --no-gpg-verify || handle_error $? "Failed to remote add repo" sudo ostree --repo=${repo} pull --depth=-1 --mirror ${instbr}:${instbr} || handle_error $? "Failed to pull repo" +# TODO(bqian) make commit_id mandatory once the commit-id is built to metadata.xml for major releases if [ -z ${commit_id} ]; then # get commit id, only latest for now commit_id=$(ostree rev-parse --repo=${repo} ${instbr}) @@ -95,10 +108,7 @@ sudo ${rootdir}/usr/sbin/software-deploy/chroot_mounts.sh ${rootdir} || handle_e sudo mount --bind ${rootdir}/usr/local/kubernetes/${k8s_ver} ${rootdir}/usr/local/kubernetes/current sudo cp /etc/kubernetes/admin.conf ${rootdir}/etc/kubernetes/ -# TODO: need to switch back to /opt/software/${to_ver}/bin/prep-data-migration -# for running with apt-ostree in the future, when the script is copied to versioned directory -# at software upload, such as: DATA_PREP_SCRIPT="/opt/software/${to_ver}/bin/prep-data-migration" -DATA_PREP_SCRIPT="/usr/sbin/software-deploy/prep-data-migration" +DATA_PREP_SCRIPT="${exec_path}/prep-data-migration" # OS_AUTH_URL, OS_USERNAME, OS_PASSWORD, OS_PROJECT_NAME, OS_USER_DOMAIN_NAME, # OS_PROJECT_DOMAIN_NAME, OS_REGION_NAME are in env variables. cmd_line=" --rootdir=${rootdir} --from_release=${from_ver} --to_release=${to_ver}" @@ -116,5 +126,9 @@ SYNC_CONTROLLERS_SCRIPT="/usr/sbin/software-deploy/sync-controllers-feed" sync_controllers_cmd="${SYNC_CONTROLLERS_SCRIPT} ${cmd_line} --feed=${feed}" ${sync_controllers_cmd} || handle_error $? "Failed to sync feeds" +state="start-done" +deploy_update_state $state +echo "Update deploy state ${state}." + # cleanup after successful data migration deploy_cleanup diff --git a/software/scripts/usm_load_import b/software/scripts/usm_load_import index a9a604dd..fce6283d 100644 --- a/software/scripts/usm_load_import +++ b/software/scripts/usm_load_import @@ -24,78 +24,100 @@ import sys AVAILABLE_DIR = "/opt/software/metadata/available" FEED_OSTREE_BASE_DIR = "/var/www/pages/feed" -RELEASE_GA_NAME = "starlingx-%s.0" +RELEASE_GA_NAME = "starlingx-%s" SOFTWARE_STORAGE_DIR = "/opt/software" TMP_DIR = "/tmp" VAR_PXEBOOT_DIR = "/var/pxeboot" +#TODO(bqian) move the function to shareable utils. +def get_major_release_version(sw_release_version): + """Gets the major release for a given software version """ + if not sw_release_version: + return None + else: + try: + separator = '.' + separated_string = sw_release_version.split(separator) + major_version = separated_string[0] + separator + separated_string[1] + return major_version + except Exception: + return None + + def load_import(from_release, to_release, iso_mount_dir): """ Import the iso files to the feed and pxeboot directories - :param to_release: to release version - :param release_data: ReleaseData object + :param from_release: from release version (MM.mm/MM.mm.p) + :param to_release: to release version (MM.mm.p) + :param iso_mount_dir: iso mount dir """ + # for now the from_release is the same as from_major_rel. until + # the sw_version is redefied to major release version, there is + # chance that from_release could be major.minor.patch. + from_major_rel = get_major_release_version(from_release) + to_major_rel = get_major_release_version(to_release) + try: # Copy the iso file to /var/www/pages/feed/rel- os.makedirs(FEED_OSTREE_BASE_DIR, exist_ok=True) - to_release_feed_dir = os.path.join(FEED_OSTREE_BASE_DIR, ("rel-%s" % to_release)) - if os.path.exists(to_release_feed_dir): - shutil.rmtree(to_release_feed_dir) - LOG.info("Removed existing %s", to_release_feed_dir) - os.makedirs(to_release_feed_dir, exist_ok=True) + to_feed_dir = os.path.join(FEED_OSTREE_BASE_DIR, ("rel-%s" % to_major_rel)) + if os.path.exists(to_feed_dir): + shutil.rmtree(to_feed_dir) + LOG.info("Removed existing %s", to_feed_dir) + os.makedirs(to_feed_dir, exist_ok=True) feed_contents = ["install_uuid", "efi.img", "kickstart", "ostree_repo", "pxeboot", "upgrades"] for content in feed_contents: src_abs_path = os.path.join(iso_mount_dir, content) if os.path.isfile(src_abs_path): - shutil.copyfile(src_abs_path, os.path.join(to_release_feed_dir, content)) - LOG.info("Copied %s to %s", src_abs_path, to_release_feed_dir) + shutil.copyfile(src_abs_path, os.path.join(to_feed_dir, content)) + LOG.info("Copied %s to %s", src_abs_path, to_feed_dir) elif os.path.isdir(src_abs_path): - shutil.copytree(src_abs_path, os.path.join(to_release_feed_dir, content)) - LOG.info("Copied %s to %s", src_abs_path, to_release_feed_dir) + shutil.copytree(src_abs_path, os.path.join(to_feed_dir, content)) + LOG.info("Copied %s to %s", src_abs_path, to_feed_dir) # Copy install_uuid to /var/www/pages/feed/rel- - from_release_feed_dir = os.path.join(FEED_OSTREE_BASE_DIR, ("rel-%s" % from_release)) - shutil.copyfile(os.path.join(from_release_feed_dir, "install_uuid"), - os.path.join(to_release_feed_dir, "install_uuid")) - LOG.info("Copied install_uuid to %s", to_release_feed_dir) + from_feed_dir = os.path.join(FEED_OSTREE_BASE_DIR, ("rel-%s" % from_major_rel)) + shutil.copyfile(os.path.join(from_feed_dir, "install_uuid"), + os.path.join(to_feed_dir, "install_uuid")) + LOG.info("Copied install_uuid to %s", to_feed_dir) - # Copy pxeboot-update-${from_release}.sh to from-release feed /upgrades - from_release_iso_upgrades_dir = os.path.join(from_release_feed_dir, "upgrades") - os.makedirs(from_release_iso_upgrades_dir, exist_ok=True) - shutil.copyfile(os.path.join("/etc", "pxeboot-update-%s.sh" % from_release), - os.path.join(from_release_iso_upgrades_dir, "pxeboot-update-%s.sh" % from_release)) - LOG.info("Copied pxeboot-update-%s.sh to %s", from_release, from_release_iso_upgrades_dir) + # Copy pxeboot-update-${from_major_release}.sh to from-release feed /upgrades + from_iso_upgrades_dir = os.path.join(from_feed_dir, "upgrades") + os.makedirs(from_iso_upgrades_dir, exist_ok=True) + shutil.copyfile(os.path.join("/etc", "pxeboot-update-%s.sh" % from_major_rel), + os.path.join(from_iso_upgrades_dir, "pxeboot-update-%s.sh" % from_major_rel)) + LOG.info("Copied pxeboot-update-%s.sh to %s", from_major_rel, from_iso_upgrades_dir) # Copy pxelinux.cfg.files to from-release feed /pxeboot - from_release_feed_pxeboot_dir = os.path.join(from_release_feed_dir, "pxeboot") - os.makedirs(from_release_feed_pxeboot_dir, exist_ok=True) + from_feed_pxeboot_dir = os.path.join(from_feed_dir, "pxeboot") + os.makedirs(from_feed_pxeboot_dir, exist_ok=True) # Find from-release pxelinux.cfg.files pxe_dir = os.path.join(VAR_PXEBOOT_DIR, "pxelinux.cfg.files") - from_release_pxe_files = glob.glob(os.path.join(pxe_dir, '*' + from_release)) - for from_release_pxe_file in from_release_pxe_files: - if os.path.isfile(from_release_pxe_file): - shutil.copyfile(from_release_pxe_file, os.path.join(from_release_feed_pxeboot_dir, - os.path.basename(from_release_pxe_file))) - LOG.info("Copied %s to %s", from_release_pxe_file, from_release_feed_pxeboot_dir) + from_pxe_files = glob.glob(os.path.join(pxe_dir, '*' + from_major_rel)) + for from_pxe_file in from_pxe_files: + if os.path.isfile(from_pxe_file): + shutil.copyfile(from_pxe_file, os.path.join(from_feed_pxeboot_dir, + os.path.basename(from_pxe_file))) + LOG.info("Copied %s to %s", from_pxe_file, from_feed_pxeboot_dir) # Converted from upgrade package extraction script - shutil.copyfile(os.path.join(to_release_feed_dir, "kickstart", "kickstart.cfg"), - os.path.join(to_release_feed_dir, "kickstart.cfg")) + shutil.copyfile(os.path.join(to_feed_dir, "kickstart", "kickstart.cfg"), + os.path.join(to_feed_dir, "kickstart.cfg")) # Copy bzImage and initrd - bzimage_files = glob.glob(os.path.join(to_release_feed_dir, 'pxeboot', 'bzImage*')) + bzimage_files = glob.glob(os.path.join(to_feed_dir, 'pxeboot', 'bzImage*')) for bzimage_file in bzimage_files: if os.path.isfile(bzimage_file): shutil.copyfile(bzimage_file, os.path.join(VAR_PXEBOOT_DIR, os.path.basename(bzimage_file))) LOG.info("Copied %s to %s", bzimage_file, VAR_PXEBOOT_DIR) - initrd_files = glob.glob(os.path.join(to_release_feed_dir, 'pxeboot', 'initrd*')) + initrd_files = glob.glob(os.path.join(to_feed_dir, 'pxeboot', 'initrd*')) for initrd_file in initrd_files: if os.path.isfile(initrd_file): shutil.copyfile(initrd_file, os.path.join(VAR_PXEBOOT_DIR, @@ -103,8 +125,8 @@ def load_import(from_release, to_release, iso_mount_dir): LOG.info("Copied %s to %s", initrd_file, VAR_PXEBOOT_DIR) # Copy to_release_feed/pxelinux.cfg.files to /var/pxeboot/pxelinux.cfg.files - pxeboot_cfg_files = glob.glob(os.path.join(to_release_feed_dir, 'pxeboot', 'pxelinux.cfg.files', - '*' + from_release)) + pxeboot_cfg_files = glob.glob(os.path.join(to_feed_dir, 'pxeboot', 'pxelinux.cfg.files', + '*' + from_major_rel)) for pxeboot_cfg_file in pxeboot_cfg_files: if os.path.isfile(pxeboot_cfg_file): shutil.copyfile(pxeboot_cfg_file, os.path.join(VAR_PXEBOOT_DIR, @@ -113,15 +135,15 @@ def load_import(from_release, to_release, iso_mount_dir): LOG.info("Copied %s to %s", pxeboot_cfg_file, VAR_PXEBOOT_DIR) # Copy pxeboot-update.sh to /etc - pxeboot_update_filename = "pxeboot-update-%s.sh" % to_release - shutil.copyfile(os.path.join(to_release_feed_dir, "upgrades", pxeboot_update_filename), + pxeboot_update_filename = "pxeboot-update-%s.sh" % to_major_rel + shutil.copyfile(os.path.join(to_feed_dir, "upgrades", pxeboot_update_filename), os.path.join("/etc", pxeboot_update_filename)) - LOG.info("Copied pxeboot-update-%s.sh to %s", to_release, "/etc") + LOG.info("Copied pxeboot-update-%s.sh to %s", to_major_rel, "/etc") except Exception as e: LOG.exception("Load import failed. Error: %s" % str(e)) - shutil.rmtree(to_release_feed_dir) - LOG.info("Removed %s", to_release_feed_dir) + shutil.rmtree(to_feed_dir) + LOG.info("Removed %s", to_feed_dir) raise try: @@ -157,7 +179,7 @@ def main(): parser.add_argument( "--to-release", required=True, - help="The to-release version.", + help="The to-release version, MM.mm.p", ) parser.add_argument( diff --git a/software/setup.cfg b/software/setup.cfg index 132c5e4a..255ebb5d 100644 --- a/software/setup.cfg +++ b/software/setup.cfg @@ -36,6 +36,7 @@ console_scripts = software-controller-daemon = software.software_controller:main software-agent = software.software_agent:main software-migrate = software.utilities.migrate:migrate + software-deploy-update = software.utilities.update_deploy_state:update_state [wheel] diff --git a/software/software/constants.py b/software/software/constants.py index fa33cb2f..db98cfb8 100644 --- a/software/software/constants.py +++ b/software/software/constants.py @@ -24,6 +24,9 @@ CONTROLLER_FLOATING_HOSTNAME = "controller" SOFTWARE_STORAGE_DIR = "/opt/software" SOFTWARE_CONFIG_FILE_LOCAL = "/etc/software/software.conf" +DEPLOY_PRECHECK_SCRIPT = "deploy-precheck" +DEPLOY_START_SCRIPT = "software-deploy-start" + AVAILABLE_DIR = "%s/metadata/available" % SOFTWARE_STORAGE_DIR UNAVAILABLE_DIR = "%s/metadata/unavailable" % SOFTWARE_STORAGE_DIR DEPLOYING_DIR = "%s/metadata/deploying" % SOFTWARE_STORAGE_DIR @@ -71,12 +74,26 @@ DEPLOYING_COMPLETE = 'deploying-complete' DEPLOYING_HOST = 'deploying-host' DEPLOYING_START = 'deploying-start' UNAVAILABLE = 'unavailable' +UNKNOWN = 'n/a' VALID_DEPLOY_START_STATES = [ AVAILABLE, DEPLOYED, ] +# host deploy substate +HOST_DEPLOY_PENDING = 'pending' +HOST_DEPLOY_STARTED = 'deploy-started' +HOST_DEPLOY_DONE = 'deploy-done' +HOST_DEPLOY_FAILED = 'deploy-failed' + +VALID_HOST_DEPLOY_STATE = [ + HOST_DEPLOY_PENDING, + HOST_DEPLOY_STARTED, + HOST_DEPLOY_DONE, + HOST_DEPLOY_FAILED +] + VALID_RELEASE_STATES = [AVAILABLE, UNAVAILABLE, DEPLOYING, DEPLOYED, REMOVING] @@ -137,7 +154,7 @@ SIG_EXTENSION = ".sig" PATCH_EXTENSION = ".patch" SUPPORTED_UPLOAD_FILE_EXT = [ISO_EXTENSION, SIG_EXTENSION, PATCH_EXTENSION] SCRATCH_DIR = "/scratch" -RELEASE_GA_NAME = "starlingx-%s.0" +RELEASE_GA_NAME = "starlingx-%s" LOCAL_LOAD_IMPORT_FILE = "/etc/software/usm_load_import" # Precheck constants @@ -156,17 +173,12 @@ UNKNOWN_SOFTWARE_VERSION = "0.0.0" class DEPLOY_STATES(Enum): - ACTIVATING = 'activating' - ACTIVATED = 'activated' - ACTIVATION_FAILED = 'activation-failed' - DATA_MIGRATION_FAILED = 'data-migration-failed' - DATA_MIGRATION = 'data-migration' - DEPLOYING = 'deploying' - DEPLOYED = 'deployed' - PRESTAGING = 'prestaging' - PRESTAGED = 'prestaged' - PRESTAGING_FAILED = 'prestaging-failed' - UPGRADE_CONTROLLERS = 'upgrade-controllers' - UPGRADE_CONTROLLER_FAILED = 'upgrade-controller-failed' - UPGRADE_HOSTS = 'upgrade-hosts' - UNKNOWN = 'unknown' + ACTIVATE = 'activate' + ACTIVATE_DONE = 'activate-done' + ACTIVATE_FAILED = 'activate-failed' + START = 'start' + START_DONE = 'start-done' + START_FAILED = 'start-failed' + HOST = 'host' + HOST_DONE = 'host-done' + HOST_FAILED = 'host-failed' diff --git a/software/software/messages.py b/software/software/messages.py index a096aa8f..4f908fe6 100644 --- a/software/software/messages.py +++ b/software/software/messages.py @@ -22,6 +22,8 @@ PATCHMSG_DROP_HOST_REQ = 11 PATCHMSG_SEND_LATEST_FEED_COMMIT = 12 PATCHMSG_DEPLOY_STATE_UPDATE = 13 PATCHMSG_DEPLOY_STATE_UPDATE_ACK = 14 +PATCHMSG_DEPLOY_STATE_CHANGED = 15 +PATCHMSG_DEPLOY_STATE_CHANGED_ACK = 16 PATCHMSG_STR = { PATCHMSG_UNKNOWN: "unknown", @@ -38,7 +40,9 @@ PATCHMSG_STR = { PATCHMSG_DROP_HOST_REQ: "drop-host-req", PATCHMSG_SEND_LATEST_FEED_COMMIT: "send-latest-feed-commit", PATCHMSG_DEPLOY_STATE_UPDATE: "deploy-state-update", - PATCHMSG_DEPLOY_STATE_UPDATE_ACK: "deploy-state-update-ack" + PATCHMSG_DEPLOY_STATE_UPDATE_ACK: "deploy-state-update-ack", + PATCHMSG_DEPLOY_STATE_CHANGED: "deploy-state-changed", + PATCHMSG_DEPLOY_STATE_CHANGED_ACK: "deploy-state-changed-ack", } MSG_ACK_SUCCESS = 'success' diff --git a/software/software/software_controller.py b/software/software/software_controller.py index 57b74172..f52d0f13 100644 --- a/software/software/software_controller.py +++ b/software/software/software_controller.py @@ -44,6 +44,7 @@ from software.exceptions import ReleaseInvalidRequest from software.exceptions import ReleaseValidationFailure from software.exceptions import ReleaseMismatchFailure from software.exceptions import ReleaseIsoDeleteFailure +from software.exceptions import SoftwareServiceError from software.release_data import SWReleaseCollection from software.software_functions import collect_current_load_for_hosts from software.software_functions import parse_release_metadata @@ -643,6 +644,104 @@ class SoftwareMessageDeployStateUpdateAck(messages.PatchMessage): LOG.error("Peer controller deploy state has diverged.") +class SWMessageDeployStateChanged(messages.PatchMessage): + def __init__(self): + messages.PatchMessage.__init__(self, messages.PATCHMSG_DEPLOY_STATE_CHANGED) + self.valid = False + self.agent = None + self.deploy_state = None + self.hostname = None + self.host_state = None + + def decode(self, data): + """ + The message is a serialized json object: + { + "msgtype": "deploy-state-changed", + "msgversion": 1, + "agent": "", + "deploy-state": "", + "hostname": "", + "host-state": "" + } + """ + + messages.PatchMessage.decode(self, data) + + self.valid = True + self.agent = None + + valid_agents = ['deploy-start'] + if 'agent' in data: + agent = data['agent'] + else: + agent = 'unknown' + + if agent not in valid_agents: + # ignore msg from unknown senders + LOG.info("%s received from unknown agent %s" % + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent)) + self.valid = False + + valid_state = { + DEPLOY_STATES.START_DONE.value: DEPLOY_STATES.START_DONE, + DEPLOY_STATES.START_FAILED.value: DEPLOY_STATES.START_FAILED + } + if 'deploy-state' in data and data['deploy-state']: + deploy_state = data['deploy-state'] + if deploy_state in valid_state: + self.deploy_state = valid_state[deploy_state] + LOG.info("%s received from %s with deploy-state %s" % + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, deploy_state)) + else: + self.valid = False + LOG.error("%s received from %s with invalid deploy-state %s" % + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, deploy_state)) + + if 'hostname' in data and data['hostname']: + self.hostname = data['hostname'] + + if 'host-state' in data and data['host-state']: + host_state = data['host-state'] + if host_state not in constants.VALID_HOST_DEPLOY_STATE: + LOG.error("%s received from %s with invalid host-state %s" % + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, host_state)) + self.valid = False + else: + self.host_state = host_state + + if self.valid: + self.valid = (bool(self.host_state and self.hostname) != bool(self.deploy_state)) + + if not self.valid: + LOG.error("%s received from %s as invalid %s" % + (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, data)) + + def handle(self, sock, addr): + global sc + if not self.valid: + # nothing to do + return + + if self.deploy_state: + LOG.info("Received deploy state changed to %s, agent %s" % + (self.deploy_state, self.agent)) + sc.deploy_state_changed(self.deploy_state) + else: + LOG.info("Received %s deploy state changed to %s, agent %s" % + (self.hostname, self.host_state, self.agent)) + sc.host_deploy_state_changed(self.hostname, self.host_state) + + sock.sendto(str.encode("OK"), addr) + + def send(self, sock): + global sc + LOG.info("sending sync req") + self.encode() + message = json.dumps(self.message) + sock.sendto(str.encode(message), (sc.controller_address, cfg.controller_port)) + + class PatchController(PatchService): def __init__(self): PatchService.__init__(self) @@ -1036,6 +1135,29 @@ class PatchController(PatchService): return dict(info=msg_info, warning=msg_warning, error=msg_error) + def major_release_upload_check(self): + """ + major release upload semantic check + """ + valid_controllers = ['controller-0'] + if socket.gethostname() not in valid_controllers: + msg = f"Upload rejected, major release must be uploaded to {valid_controllers}" + LOG.info(msg) + raise SoftwareServiceError(error=msg) + + max_major_releases = 2 + major_releases = [] + for rel in self.release_collection.iterate_releases(): + major_rel = utils.get_major_release_version(rel.sw_version) + if major_rel not in major_releases: + major_releases.append(major_rel) + + if len(major_releases) >= max_major_releases: + msg = f"Major releases {major_releases} have already been uploaded." + \ + f"Max major releases is {max_major_releases}" + LOG.info(msg) + raise SoftwareServiceError(error=msg) + def _process_upload_upgrade_files(self, upgrade_files, release_data): """ Process the uploaded upgrade files @@ -1048,6 +1170,9 @@ class PatchController(PatchService): local_error = "" release_meta_info = {} + # validate this major release upload + self.major_release_upload_check() + iso_mount_dir = None try: if not verify_files([upgrade_files[constants.ISO_EXTENSION]], @@ -1102,6 +1227,10 @@ class PatchController(PatchService): shutil.rmtree(to_release_bin_dir) shutil.copytree(os.path.join(iso_mount_dir, "upgrades", constants.SOFTWARE_DEPLOY_FOLDER), to_release_bin_dir) + # Copy metadata.xml to /opt/software/rel-/ + to_file = os.path.join(constants.SOFTWARE_STORAGE_DIR, ("rel-%s" % to_release), "metadata.xml") + metadata_file = os.path.join(iso_mount_dir, "upgrades", "metadata.xml") + shutil.copyfile(metadata_file, to_file) # Update the release metadata abs_stx_release_metadata_file = os.path.join( @@ -1363,7 +1492,7 @@ class PatchController(PatchService): constants.DEPLOYED] if deploystate not in ignore_states: - msg = "Release %s is active and cannot be deleted." % release_id + msg = f"Release {release_id} is {deploystate} and cannot be deleted." LOG.error(msg) msg_error += msg + "\n" id_verification = False @@ -1962,13 +2091,11 @@ class PatchController(PatchService): msg_warning = "" msg_error = "" - # TODO(bqian) when the deploy-precheck script is moved to /opt/software/rel-/, - # change the code below to call the right script with patch number in rel_ver = utils.get_major_release_version(release_version) - rel_path = "rel-%s" % rel_ver + rel_path = "rel-%s" % release_version deployment_dir = os.path.join(constants.FEED_OSTREE_BASE_DIR, rel_path) - precheck_script = os.path.join(deployment_dir, "upgrades", - constants.SOFTWARE_DEPLOY_FOLDER, "deploy-precheck") + precheck_script = utils.get_precheck_script(release_version) + if not os.path.isdir(deployment_dir) or not os.path.isfile(precheck_script): msg = "Upgrade files for deployment %s are not present on the system, " \ "cannot proceed with the precheck." % rel_ver @@ -2041,7 +2168,14 @@ class PatchController(PatchService): def _deploy_upgrade_start(self, to_release): LOG.info("start deploy upgrade to %s from %s" % (to_release, SW_VERSION)) - cmd_path = "/usr/sbin/software-deploy/software-deploy-start" + deploy_script_name = constants.DEPLOY_START_SCRIPT + cmd_path = utils.get_software_deploy_script(to_release, deploy_script_name) + if not os.path.isfile(cmd_path): + msg = f"{deploy_script_name} was not found" + LOG.error(msg) + raise SoftwareServiceError(f"{deploy_script_name} was not found. " + "The uploaded software could have been damaged. " + "Please delete the software and re-upload it") major_to_release = utils.get_major_release_version(to_release) k8s_ver = get_k8s_ver() postgresql_port = str(cfg.alt_postgresql_port) @@ -2076,6 +2210,16 @@ class PatchController(PatchService): LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_start_cmd), e)) return False + def deploy_state_changed(self, deploy_state): + '''Handle 'deploy state change' event, invoked when operations complete. ''' + dbapi = db_api.get_instance() + dbapi.update_deploy(deploy_state) + + def host_deploy_state_changed(self, hostname, host_deploy_state): + '''Handle 'host deploy state change' event. ''' + dbapi = db_api.get_instance() + dbapi.update_deploy_host(hostname, host_deploy_state) + def software_deploy_start_api(self, deployment: str, force: bool, **kwargs) -> dict: """ Start deployment by applying the changes to the feed ostree @@ -2100,7 +2244,7 @@ class PatchController(PatchService): collect_current_load_for_hosts() dbapi = db_api.get_instance() dbapi.create_deploy(SW_VERSION, to_release, True) - dbapi.update_deploy(DEPLOY_STATES.DATA_MIGRATION) + dbapi.update_deploy(DEPLOY_STATES.START) sw_rel = self.release_collection.get_release_by_id(deployment) if sw_rel is None: raise InternalError("%s cannot be found" % to_release) @@ -2997,6 +3141,8 @@ class PatchControllerMainThread(threading.Thread): msg = PatchMessageDropHostReq() elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_UPDATE_ACK: msg = SoftwareMessageDeployStateUpdateAck() + elif msgdata['msgtype'] == messages.PATCHMSG_DEPLOY_STATE_CHANGED: + msg = SWMessageDeployStateChanged() if msg is None: msg = messages.PatchMessage() diff --git a/software/software/software_entities.py b/software/software/software_entities.py index 92b69110..62297781 100644 --- a/software/software/software_entities.py +++ b/software/software/software_entities.py @@ -233,7 +233,7 @@ class DeployHandler(Deploy): super().__init__() self.data = get_software_filesystem_data() - def create(self, from_release, to_release, reboot_required, state=DEPLOY_STATES.DEPLOYING): + def create(self, from_release, to_release, reboot_required, state=DEPLOY_STATES.START): """ Create a new deploy with given from and to release version :param from_release: The source release version. diff --git a/software/software/software_functions.py b/software/software/software_functions.py index f33c11c5..c4baafb4 100644 --- a/software/software/software_functions.py +++ b/software/software/software_functions.py @@ -18,6 +18,7 @@ import sys import tarfile import tempfile from oslo_config import cfg as oslo_cfg +from packaging import version from lxml import etree as ElementTree from xml.dom import minidom @@ -31,6 +32,7 @@ from software.exceptions import ReleaseUploadFailure from software.exceptions import ReleaseValidationFailure from software.exceptions import ReleaseMismatchFailure from software.exceptions import SoftwareFail +from software.exceptions import SoftwareServiceError import software.constants as constants import software.utils as utils @@ -1102,6 +1104,39 @@ def unmount_iso_load(iso_path): pass +def get_metadata_files(root_dir): + files = [] + for filename in os.listdir(root_dir): + fn, ext = os.path.splitext(filename) + if ext == '.xml' and fn.endswith('-metadata'): + fullname = os.path.join(root_dir, filename) + files.append(fullname) + return files + + +def get_sw_version(metadata_files): + # from a list of metadata files, find the latest sw_version (e.g 24.0.1) + unset_ver = "0.0.0" + rel_ver = unset_ver + for f in metadata_files: + try: + root = ElementTree.parse(f).getroot() + except Exception: + msg = f"Cannot parse {f}" + LOG.exception(msg) + continue + + sw_ver = root.findtext("sw_version") + if sw_ver and version.parse(sw_ver) > version.parse(rel_ver): + rel_ver = sw_ver + + if rel_ver == unset_ver: + err_msg = "Invalid metadata. Cannot identify the sw_version." + raise SoftwareServiceError(err_msg) + + return rel_ver + + def read_upgrade_support_versions(mounted_dir): """ Read upgrade metadata file to get supported upgrades @@ -1112,9 +1147,11 @@ def read_upgrade_support_versions(mounted_dir): try: root = ElementTree.parse(mounted_dir + "/upgrades/metadata.xml").getroot() except IOError: - raise MetadataFail("Failed to read /upgrades/metadata.xml file") + raise SoftwareServiceError("Failed to read /upgrades/metadata.xml file") + + rel_metadata_files = get_metadata_files(os.path.join(mounted_dir, "upgrades")) + to_release = get_sw_version(rel_metadata_files) - to_release = root.findtext("version") supported_from_releases = [] supported_upgrades = root.find("supported_upgrades").findall("upgrade") for upgrade in supported_upgrades: diff --git a/software/software/tests/test_software_controller.py b/software/software/tests/test_software_controller.py index 5c6356df..3c18d762 100644 --- a/software/software/tests/test_software_controller.py +++ b/software/software/tests/test_software_controller.py @@ -32,7 +32,9 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.shutil.copytree') @patch('software.software_controller.parse_release_metadata') @patch('software.software_controller.unmount_iso_load') + @patch('software.software_controller.PatchController.major_release_upload_check') def test_process_upload_upgrade_files(self, + mock_major_release_upload_check, mock_unmount_iso_load, mock_parse_release_metadata, mock_copytree, # pylint: disable=unused-argument @@ -47,6 +49,7 @@ class TestSoftwareController(unittest.TestCase): controller.release_data = MagicMock() # Mock the return values of the mocked functions + mock_major_release_upload_check.return_value = True mock_verify_files.return_value = True mock_mount_iso_load.return_value = '/test/iso' mock_read_upgrade_support_versions.return_value = ( @@ -86,7 +89,9 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.verify_files') @patch('software.software_controller.mount_iso_load') @patch('software.software_controller.unmount_iso_load') + @patch('software.software_controller.PatchController.major_release_upload_check') def test_process_upload_upgrade_files_invalid_signature(self, + mock_major_release_upload_check, mock_unmount_iso_load, # pylint: disable=unused-argument mock_mount_iso_load, mock_verify_files, @@ -97,6 +102,7 @@ class TestSoftwareController(unittest.TestCase): # Mock the return values of the mocked functions mock_verify_files.return_value = False mock_mount_iso_load.return_value = '/test/iso' + mock_major_release_upload_check.return_value = True # Call the function being tested with patch('software.software_controller.SW_VERSION', '1.0'): @@ -111,13 +117,16 @@ class TestSoftwareController(unittest.TestCase): @patch('software.software_controller.PatchController.__init__', return_value=None) @patch('software.software_controller.verify_files', side_effect=ReleaseValidationFailure('Invalid signature file')) + @patch('software.software_controller.PatchController.major_release_upload_check') def test_process_upload_upgrade_files_validation_error(self, + mock_major_release_upload_check, mock_verify_files, mock_init): # pylint: disable=unused-argument controller = PatchController() controller.release_data = MagicMock() mock_verify_files.return_value = False + mock_major_release_upload_check.return_value = True # Call the function being tested info, warning, error, _ = controller._process_upload_upgrade_files(self.upgrade_files, # pylint: disable=protected-access diff --git a/software/software/utilities/update_deploy_state.py b/software/software/utilities/update_deploy_state.py new file mode 100644 index 00000000..13328d57 --- /dev/null +++ b/software/software/utilities/update_deploy_state.py @@ -0,0 +1,112 @@ +# +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import argparse +import json +from oslo_log import log +import socket + +import software.config as cfg +from software.messages import PATCHMSG_DEPLOY_STATE_CHANGED + + +LOG = log.getLogger(__name__) +MAX_RETRY = 3 +RETRY_INTERVAL = 1 +ACK_OK = "OK" + + +def get_udp_socket(server_addr, server_port): + addr = socket.getaddrinfo(server_addr, server_port) + if len(addr) > 0: + addr_family = addr[0][0] + else: + err = "Invalid server address (%s) or port (%s)" % \ + (server_addr, server_port) + raise Exception(err) + + sock = socket.socket(addr_family, socket.SOCK_DGRAM) + return sock + + +def update_deploy_state(server_addr, server_port, agent, deploy_state=None, host=None, host_state=None, timeout=1): + """ + Send MessageDeployStateChanged message to software-controller via + upd packet, wait for ack or raise exception. + The message is a serialized json object: + { + "msgtype": PATCHMSG_DEPLOY_STATE_CHANGED, + "msgversion": 1, + "agent": "", + "deploy-state": "", + "hostname": "", + "host-state": "" + } + """ + + msg = { + "msgtype": PATCHMSG_DEPLOY_STATE_CHANGED, + "msgversion": 1, + "agent": agent, + "deploy-state": deploy_state, + "hostname": host, + "host_state": host_state + } + + msg_txt = json.dumps(msg) + + sock = get_udp_socket(server_addr, server_port) + + if timeout >= 0: + sock.settimeout(timeout) + + resp = "" + + for _ in range(MAX_RETRY): + sock.sendto(str.encode(msg_txt), (server_addr, server_port)) + + try: + resp = sock.recv(64).decode() + except socket.timeout: + LOG.warning("timeout %s sec expired for ack" % timeout) + else: + break + + if resp != ACK_OK: + err = "%s failed updating deploy state %s %s %s" % \ + (agent, deploy_state, host, host_state) + raise Exception(err) + + +def update_state(): + # this is the entry point to update deploy state + + parser = argparse.ArgumentParser(add_help=False) + + parser.add_argument("agent", + default=False, + help="service agent") + + parser.add_argument('-s', '--state', + default=False, + help="deploy state") + + parser.add_argument('-h', '--host', + default=False, + help="host name") + + parser.add_argument('-t', '--host_state', + default=False, + help="host state") + + args = parser.parse_args() + + server = "controller" + cfg.read_config() + server_port = cfg.controller_port + update_deploy_state(server, int(server_port), args.agent, + deploy_state=args.state, + host=args.host, host_state=args.host_state) diff --git a/software/software/utils.py b/software/software/utils.py index a0a50367..042b0d9f 100644 --- a/software/software/utils.py +++ b/software/software/utils.py @@ -77,6 +77,28 @@ def get_major_release_version(sw_release_version): return None +def get_feed_path(sw_version): + sw_ver = get_major_release_version(sw_version) + path = os.path.join(constants.UPGRADE_FEED_DIR, f"rel-{sw_ver}") + return path + + +def get_software_deploy_script(sw_version, script): + if script == constants.DEPLOY_PRECHECK_SCRIPT: + return get_precheck_script(sw_version) + + feed_dir = get_feed_path(sw_version) + script_path = os.path.join(feed_dir, "upgrades/software-deploy", script) + return script_path + + +def get_precheck_script(sw_version): + deploy_precheck = os.path.join("/opt/software/", + f"rel-{sw_version}", + "bin", constants.DEPLOY_PRECHECK_SCRIPT) + return deploy_precheck + + def compare_release_version(sw_release_version_1, sw_release_version_2): """Compares release versions and returns True if first is higher than second """ if not sw_release_version_1 or not sw_release_version_2: