From 378e2010fe0633a0489379c6f9a03147a94a71b8 Mon Sep 17 00:00:00 2001 From: Luis Eduardo Bonatti Date: Thu, 15 Feb 2024 23:26:38 -0300 Subject: [PATCH] Deploy host-list implementation This commit add some changes on deploy host-list. Adds a function to query the hostnames from sysinv to deploy host-list entities during deploy start. Changes endpoint to GET verb, the endpoint return in case of no deployment in progress it will an empty list and at CLI will print "No deploy in progress." In case there is a deployment in progress the CLI will behave the same but the endpoint will return the data below: [{'hostname': '', 'software_release': '', 'target_release': '', 'reboot_required': 'str', 'host_state': ''}] This commit also changes the wait_for_install_complete function to follow the new state logic. Note: Software deploy host is affected by this change related to states and will need a future commit regarding state changes during deploy start and deploy host itself. Test Plan: PASS: Software deploy host-list with/without deployment in progress. PASS: Deploy_host creation/update/get/delete. PASS: Collect hostnames to deploy host entities during deploy start. Story: 2010676 Task: 49586 Change-Id: I7b03df30fd8e326637a3ffc031e0fdf543cb6356 Signed-off-by: Luis Eduardo Bonatti --- software-client/software_client/constants.py | 7 +- .../software_client/software_client.py | 86 +++++++------------ .../software/api/controllers/v1/software.py | 14 ++- software/software/constants.py | 6 ++ software/software/software_controller.py | 45 ++++------ software/software/software_entities.py | 22 ++--- software/software/software_functions.py | 24 ++++-- software/software/sysinv_utils.py | 9 ++ 8 files changed, 102 insertions(+), 111 deletions(-) diff --git a/software-client/software_client/constants.py b/software-client/software_client/constants.py index 0ec789b1..a0a32322 100644 --- a/software-client/software_client/constants.py +++ b/software-client/software_client/constants.py @@ -1,10 +1,9 @@ """ -Copyright (c) 2023 Wind River Systems, Inc. +Copyright (c) 2023-2024 Wind River Systems, Inc. SPDX-License-Identifier: Apache-2.0 """ - ADDRESS_VERSION_IPV4 = 4 ADDRESS_VERSION_IPV6 = 6 CONTROLLER_FLOATING_HOSTNAME = "controller" @@ -61,3 +60,7 @@ SIG_EXTENSION = ".sig" PATCH_EXTENSION = ".patch" SUPPORTED_UPLOAD_FILE_EXT = [ISO_EXTENSION, SIG_EXTENSION, PATCH_EXTENSION] SCRATCH_DIR = "/scratch" + +DEPLOYING = 'deploying' +FAILED = 'failed' +PENDING = 'pending' diff --git a/software-client/software_client/software_client.py b/software-client/software_client/software_client.py index f99c277a..bcd1fff2 100644 --- a/software-client/software_client/software_client.py +++ b/software-client/software_client/software_client.py @@ -612,14 +612,11 @@ def release_list_req(args): def print_software_deploy_host_list_result(req): if req.status_code == 200: - data = json.loads(req.text) - if 'data' not in data: - print("Invalid data returned:") - print_result_debug(req) + data = req.json().get("data", None) + if not data: + print("No deploy in progress.\n") return - agents = data['data'] - # Calculate column widths hdr_hn = "Hostname" hdr_rel = "Software Release" @@ -633,19 +630,19 @@ def print_software_deploy_host_list_result(req): width_rr = len(hdr_rr) width_state = len(hdr_state) - for agent in sorted(agents, key=lambda a: a["hostname"]): - if agent.get("deploy_host_state") is None: - agent["deploy_host_state"] = "No active deployment" - if agent.get("to_release") is None: - agent["to_release"] = "N/A" + for agent in sorted(data, key=lambda a: a["hostname"]): + if agent.get("host_state") is None: + agent["host_state"] = "No active deployment" + if agent.get("target_release") is None: + agent["target_release"] = "N/A" if len(agent["hostname"]) > width_hn: width_hn = len(agent["hostname"]) - if len(agent["sw_version"]) > width_rel: - width_rel = len(agent["sw_version"]) - if len(agent["to_release"]) > width_tg_rel: - width_tg_rel = len(agent["to_release"]) - if len(agent["deploy_host_state"]) > width_state: - width_state = len(agent["deploy_host_state"]) + if len(agent["software_release"]) > width_rel: + width_rel = len(agent["software_release"]) + if len(agent["target_release"]) > width_tg_rel: + width_tg_rel = len(agent["target_release"]) + if len(agent["host_state"]) > width_state: + width_state = len(agent["host_state"]) print("{0:^{width_hn}} {1:^{width_rel}} {2:^{width_tg_rel}} {3:^{width_rr}} {4:^{width_state}}".format( hdr_hn, hdr_rel, hdr_tg_rel, hdr_rr, hdr_state, @@ -654,13 +651,13 @@ def print_software_deploy_host_list_result(req): print("{0} {1} {2} {3} {4}".format( '=' * width_hn, '=' * width_rel, '=' * width_tg_rel, '=' * width_rr, '=' * width_state)) - for agent in sorted(agents, key=lambda a: a["hostname"]): + for agent in sorted(data, key=lambda a: a["hostname"]): print("{0:<{width_hn}} {1:^{width_rel}} {2:^{width_tg_rel}} {3:^{width_rr}} {4:^{width_state}}".format( agent["hostname"], - agent["sw_version"], - agent["to_release"], + agent["software_release"], + agent["target_release"], "Yes" if agent.get("reboot_required", None) else "No", - agent["deploy_host_state"], + agent["host_state"], width_hn=width_hn, width_rel=width_rel, width_tg_rel=width_tg_rel, width_rr=width_rr, width_state=width_state)) elif req.status_code == 500: @@ -723,54 +720,33 @@ def wait_for_install_complete(agent_ip): break if req.status_code == 200: - data = json.loads(req.text) - if 'data' not in data: + data = req.json().get("data", None) + if not data: print("Invalid host-list data returned:") print_result_debug(req) rc = 1 break - state = None - agents = data['data'] - interim_state = None + host_state = None - for agent in agents: - if agent['hostname'] == agent_ip \ - or agent['ip'] == agent_ip: - state = agent.get('state') - interim_state = agent.get('interim_state') + for d in data: + if d['hostname'] == agent_ip: + host_state = d.get('host_state') - if state is None: - # If the software daemons have restarted, there's a - # window after the software-controller restart that the - # hosts table will be empty. - retriable_count += 1 - if retriable_count <= max_retries: - continue - else: - print("%s agent has timed out." % agent_ip) - rc = 1 - break - - if state == constants.PATCH_AGENT_STATE_INSTALLING or \ - interim_state is True: - # Still installing + if host_state == constants.DEPLOYING: + # Still deploying sys.stdout.write(".") sys.stdout.flush() - elif state == constants.PATCH_AGENT_STATE_INSTALL_REJECTED: - print("\nInstallation rejected. Node must be locked") + elif host_state == constants.FAILED: + print("\nDeployment failed. Please check logs for details.") rc = 1 break - elif state == constants.PATCH_AGENT_STATE_INSTALL_FAILED: - print("\nInstallation failed. Please check logs for details.") - rc = 1 - break - elif state == constants.PATCH_AGENT_STATE_IDLE: - print("\nInstallation was successful.") + elif host_state == constants.DEPLOYED: + print("\nDeployment was successful.") rc = 0 break else: - print("\nPatch agent is reporting unknown state: %s" % state) + print("\nReported unknown state: %s" % host_state) rc = 1 break diff --git a/software/software/api/controllers/v1/software.py b/software/software/api/controllers/v1/software.py index 1861c8bf..1621410f 100644 --- a/software/software/api/controllers/v1/software.py +++ b/software/software/api/controllers/v1/software.py @@ -10,6 +10,7 @@ import os from oslo_log import log from pecan import expose from pecan import request +from pecan import Response import shutil from software.exceptions import SoftwareError @@ -231,14 +232,11 @@ class SoftwareAPIController(object): return dict(sd=sd) - @expose('json') - @expose('query_hosts.xml', content_type='application/xml') - def host_list(self, *args): # pylint: disable=unused-argument - try: - query_hosts = sc.deploy_host_list() - except Exception as e: - return dict(error=str(e)) - return dict(data=query_hosts) + @expose('json', method="GET") + def host_list(self): + query_hosts = dict(data=sc.deploy_host_list()) + response_data = json.dumps(query_hosts) + return Response(body=response_data, status_code=200) @expose(method='GET', template='json') def in_sync_controller(self): diff --git a/software/software/constants.py b/software/software/constants.py index 9bc5e2b7..73b945a3 100644 --- a/software/software/constants.py +++ b/software/software/constants.py @@ -181,3 +181,9 @@ class DEPLOY_STATES(Enum): HOST = 'host' HOST_DONE = 'host-done' HOST_FAILED = 'host-failed' + +class DEPLOY_HOST_STATES(Enum): + DEPLOYED = 'deployed' + DEPLOYING = 'deploying' + FAILED = 'failed' + PENDING = 'pending' diff --git a/software/software/software_controller.py b/software/software/software_controller.py index acf89cbf..b32bc1aa 100644 --- a/software/software/software_controller.py +++ b/software/software/software_controller.py @@ -46,6 +46,7 @@ from software.exceptions import ReleaseIsoDeleteFailure from software.exceptions import SoftwareServiceError from software.release_data import SWReleaseCollection from software.software_functions import collect_current_load_for_hosts +from software.software_functions import create_deploy_hosts from software.software_functions import parse_release_metadata from software.software_functions import configure_logging from software.software_functions import mount_iso_load @@ -2267,6 +2268,7 @@ class PatchController(PatchService): if self._deploy_upgrade_start(to_release): collect_current_load_for_hosts() + create_deploy_hosts() self.db_api_instance.begin_update() try: self.update_and_sync_deploy_state(self.db_api_instance.create_deploy, @@ -2319,6 +2321,7 @@ class PatchController(PatchService): if operation == "apply": collect_current_load_for_hosts() + create_deploy_hosts() # reverse = True is used for apply operation deployment_list = self.release_apply_remove_order(deployment, running_sw_version, reverse=True) @@ -2411,6 +2414,8 @@ class PatchController(PatchService): self.interim_state[release] = list(self.hosts) elif operation == "remove": + collect_current_load_for_hosts() + create_deploy_hosts() removed = False deployment_list = self.release_apply_remove_order(deployment, running_sw_version) msg = "Deploy start order for remove operation: %s" % ",".join(deployment_list) @@ -2900,40 +2905,20 @@ class PatchController(PatchService): return dict(data) def deploy_host_list(self): - query_hosts = self.query_host_cache() deploy_hosts = self.db_api_instance.get_deploy_host() deploy = self.db_api_instance.get_deploy() - # If there's a hostname missing, add it to query hosts. - hostnames = [] - for host in query_hosts: - hostnames.append(host["hostname"]) - for host in deploy_hosts: - if host["hostname"] not in hostnames: - query_hosts.append(host) - deploy_host_list = [] - # Merge dicts if hostname matches - for query_host in query_hosts: - query_host["reboot_required"] = query_host.pop("requires_reboot", None) - for host in deploy_hosts: - if query_host["hostname"] == host["hostname"]: - # New set of keys for the host list, some of previously dict keys - # is kept such as state, interim_state that is used for patch. - deploy_host = { - "hostname": host.get("hostname"), - "sw_version": deploy.get("from_release"), - "to_release": deploy.get("to_release"), - "reboot_required": deploy.get("reboot_required"), - "deploy_host_state": host.get("state"), - "state": query_host.get("state"), - "interim_state": query_host.get("interim_state"), - "ip": query_host.get("ip") - } - deploy_host_list.append(deploy_host) - break - if not deploy_host_list: - return query_hosts + for host in deploy_hosts: + state = host.get("state") + deploy_host = { + "hostname": host.get("hostname"), + "software_release": deploy.get("from_release"), + "target_release": deploy.get("to_release") if state else None, + "reboot_required": deploy.get("reboot_required") if state else None, + "host_state": state + } + deploy_host_list.append(deploy_host) return deploy_host_list def update_and_sync_deploy_state(self, func, *args, **kwargs): diff --git a/software/software/software_entities.py b/software/software/software_entities.py index 62297781..7b9a95d5 100644 --- a/software/software/software_entities.py +++ b/software/software/software_entities.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2023 Wind River Systems, Inc. +Copyright (c) 2023-2024 Wind River Systems, Inc. SPDX-License-Identifier: Apache-2.0 @@ -20,6 +20,7 @@ from software.utils import save_to_json_file from software.utils import get_software_filesystem_data from software.utils import validate_versions +from software.constants import DEPLOY_HOST_STATES from software.constants import DEPLOY_STATES LOG = logging.getLogger('main_logger') @@ -176,10 +177,10 @@ class Deploy(ABC): class DeployHosts(ABC): def __init__(self): - self.states = Enum('States', 'aborted deployed deploying failed pending-deploy') + pass @abstractmethod - def create(self, hostname: str, state: str): + def create(self, hostname: str, state: DEPLOY_HOST_STATES): """ Create a new deploy-host entry @@ -189,8 +190,7 @@ class DeployHosts(ABC): """ instances = [hostname] if state: - check_state(state, self.states) - instances.append(state) + check_instances([state], DEPLOY_HOST_STATES) check_instances(instances, str) pass @@ -213,8 +213,8 @@ class DeployHosts(ABC): :param hostname: The name of the host. :param state: The state of the deploy-host entry. """ - check_instances([hostname, state], str) - check_state(state, self.states) + check_instances([hostname], str) + check_instances([state], DEPLOY_HOST_STATES) pass @abstractmethod @@ -303,7 +303,7 @@ class DeployHostHandler(DeployHosts): super().__init__() self.data = get_software_filesystem_data() - def create(self, hostname, state=None): + def create(self, hostname, state:DEPLOY_HOST_STATES=None): super().create(hostname, state) deploy = self.query(hostname) if deploy: @@ -311,7 +311,7 @@ class DeployHostHandler(DeployHosts): new_deploy_host = { "hostname": hostname, - "state": state + "state": state.value if state else None } deploy_data = self.data.get("deploy_host", []) @@ -335,7 +335,7 @@ class DeployHostHandler(DeployHosts): def query_all(self): return self.data.get("deploy_host", []) - def update(self, hostname, state): + def update(self, hostname, state: DEPLOY_HOST_STATES): super().update(hostname, state) deploy = self.query(hostname) if not deploy: @@ -344,7 +344,7 @@ class DeployHostHandler(DeployHosts): index = self.data.get("deploy_host", []).index(deploy) updated_entity = { "hostname": hostname, - "state": state + "state": state.value } self.data["deploy_host"][index].update(updated_entity) save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data) diff --git a/software/software/software_functions.py b/software/software/software_functions.py index c4baafb4..0b26e132 100644 --- a/software/software/software_functions.py +++ b/software/software/software_functions.py @@ -23,6 +23,7 @@ from lxml import etree as ElementTree from xml.dom import minidom import software.apt_utils as apt_utils +from software.db.api import get_instance from software.release_verify import verify_files from software.release_verify import cert_type_all from software.release_signing import sign_files @@ -36,7 +37,7 @@ from software.exceptions import SoftwareServiceError import software.constants as constants import software.utils as utils -from software.sysinv_utils import get_sysinv_client +from software.sysinv_utils import get_ihost_list try: @@ -1162,15 +1163,28 @@ def read_upgrade_support_versions(mounted_dir): return to_release, supported_from_releases +def create_deploy_hosts(): + """ + Create deploy-hosts entities based on hostnames + from sysinv. + """ + try: + db_api_instance = get_instance() + for ihost in get_ihost_list(): + db_api_instance.create_deploy_host(ihost.hostname) + LOG.info("Deploy-hosts entities created successfully.") + except Exception as err: + LOG.exception("Error in deploy-hosts entities creation") + raise err + + + def collect_current_load_for_hosts(): load_data = { "current_loads": [] } try: - token, endpoint = utils.get_endpoints_token() - sysinv_client = get_sysinv_client(token=token, endpoint=endpoint) - host_list = sysinv_client.ihost.list() - for ihost in host_list: + for ihost in get_ihost_list(): software_load = ihost.software_load hostname = ihost.hostname load_data["current_loads"].append({ diff --git a/software/software/sysinv_utils.py b/software/software/sysinv_utils.py index 047b6df9..d72ffee6 100644 --- a/software/software/sysinv_utils.py +++ b/software/software/sysinv_utils.py @@ -40,3 +40,12 @@ def get_k8s_ver(): if k8s_ver.state == "active": return k8s_ver.version raise Exception("Failed to get current k8s version") + +def get_ihost_list(): + try: + token, endpoint = utils.get_endpoints_token() + sysinv_client = get_sysinv_client(token=token, endpoint=endpoint) + return sysinv_client.ihost.list() + except Exception as err: + LOG.error("Error getting ihost list: %s", err) + raise