Deploy host-list implementation

This commit add some changes on deploy host-list.
Adds a function to query the hostnames from sysinv
to deploy host-list entities during deploy start.
Changes endpoint to GET verb, the endpoint return in case
of no deployment in progress it will an empty list
and at CLI will print "No deploy in progress." In case
there is a deployment in progress the CLI will behave
the same but the endpoint will return the data below:

[{'hostname': '<hostname>',
  'software_release': '<sw_version>',
  'target_release': '<sw_version>',
  'reboot_required': 'str<true/false>',
  'host_state': '<host_deploy_state>'}]

This commit also changes the wait_for_install_complete function
to follow the new state logic.

Note: Software deploy host is affected by this change related
to states and will need a future commit regarding state changes
during deploy start and deploy host itself.

Test Plan:
PASS: Software deploy host-list with/without deployment in progress.
PASS: Deploy_host creation/update/get/delete.
PASS: Collect hostnames to deploy host entities during deploy start.

Story: 2010676
Task: 49586

Change-Id: I7b03df30fd8e326637a3ffc031e0fdf543cb6356
Signed-off-by: Luis Eduardo Bonatti <LuizEduardo.Bonatti@windriver.com>
This commit is contained in:
Luis Eduardo Bonatti 2024-02-15 23:26:38 -03:00
parent 88e95d5c1f
commit 378e2010fe
8 changed files with 102 additions and 111 deletions

View File

@ -1,10 +1,9 @@
"""
Copyright (c) 2023 Wind River Systems, Inc.
Copyright (c) 2023-2024 Wind River Systems, Inc.
SPDX-License-Identifier: Apache-2.0
"""
ADDRESS_VERSION_IPV4 = 4
ADDRESS_VERSION_IPV6 = 6
CONTROLLER_FLOATING_HOSTNAME = "controller"
@ -61,3 +60,7 @@ SIG_EXTENSION = ".sig"
PATCH_EXTENSION = ".patch"
SUPPORTED_UPLOAD_FILE_EXT = [ISO_EXTENSION, SIG_EXTENSION, PATCH_EXTENSION]
SCRATCH_DIR = "/scratch"
DEPLOYING = 'deploying'
FAILED = 'failed'
PENDING = 'pending'

View File

@ -612,14 +612,11 @@ def release_list_req(args):
def print_software_deploy_host_list_result(req):
if req.status_code == 200:
data = json.loads(req.text)
if 'data' not in data:
print("Invalid data returned:")
print_result_debug(req)
data = req.json().get("data", None)
if not data:
print("No deploy in progress.\n")
return
agents = data['data']
# Calculate column widths
hdr_hn = "Hostname"
hdr_rel = "Software Release"
@ -633,19 +630,19 @@ def print_software_deploy_host_list_result(req):
width_rr = len(hdr_rr)
width_state = len(hdr_state)
for agent in sorted(agents, key=lambda a: a["hostname"]):
if agent.get("deploy_host_state") is None:
agent["deploy_host_state"] = "No active deployment"
if agent.get("to_release") is None:
agent["to_release"] = "N/A"
for agent in sorted(data, key=lambda a: a["hostname"]):
if agent.get("host_state") is None:
agent["host_state"] = "No active deployment"
if agent.get("target_release") is None:
agent["target_release"] = "N/A"
if len(agent["hostname"]) > width_hn:
width_hn = len(agent["hostname"])
if len(agent["sw_version"]) > width_rel:
width_rel = len(agent["sw_version"])
if len(agent["to_release"]) > width_tg_rel:
width_tg_rel = len(agent["to_release"])
if len(agent["deploy_host_state"]) > width_state:
width_state = len(agent["deploy_host_state"])
if len(agent["software_release"]) > width_rel:
width_rel = len(agent["software_release"])
if len(agent["target_release"]) > width_tg_rel:
width_tg_rel = len(agent["target_release"])
if len(agent["host_state"]) > width_state:
width_state = len(agent["host_state"])
print("{0:^{width_hn}} {1:^{width_rel}} {2:^{width_tg_rel}} {3:^{width_rr}} {4:^{width_state}}".format(
hdr_hn, hdr_rel, hdr_tg_rel, hdr_rr, hdr_state,
@ -654,13 +651,13 @@ def print_software_deploy_host_list_result(req):
print("{0} {1} {2} {3} {4}".format(
'=' * width_hn, '=' * width_rel, '=' * width_tg_rel, '=' * width_rr, '=' * width_state))
for agent in sorted(agents, key=lambda a: a["hostname"]):
for agent in sorted(data, key=lambda a: a["hostname"]):
print("{0:<{width_hn}} {1:^{width_rel}} {2:^{width_tg_rel}} {3:^{width_rr}} {4:^{width_state}}".format(
agent["hostname"],
agent["sw_version"],
agent["to_release"],
agent["software_release"],
agent["target_release"],
"Yes" if agent.get("reboot_required", None) else "No",
agent["deploy_host_state"],
agent["host_state"],
width_hn=width_hn, width_rel=width_rel, width_tg_rel=width_tg_rel, width_rr=width_rr, width_state=width_state))
elif req.status_code == 500:
@ -723,54 +720,33 @@ def wait_for_install_complete(agent_ip):
break
if req.status_code == 200:
data = json.loads(req.text)
if 'data' not in data:
data = req.json().get("data", None)
if not data:
print("Invalid host-list data returned:")
print_result_debug(req)
rc = 1
break
state = None
agents = data['data']
interim_state = None
host_state = None
for agent in agents:
if agent['hostname'] == agent_ip \
or agent['ip'] == agent_ip:
state = agent.get('state')
interim_state = agent.get('interim_state')
for d in data:
if d['hostname'] == agent_ip:
host_state = d.get('host_state')
if state is None:
# If the software daemons have restarted, there's a
# window after the software-controller restart that the
# hosts table will be empty.
retriable_count += 1
if retriable_count <= max_retries:
continue
else:
print("%s agent has timed out." % agent_ip)
rc = 1
break
if state == constants.PATCH_AGENT_STATE_INSTALLING or \
interim_state is True:
# Still installing
if host_state == constants.DEPLOYING:
# Still deploying
sys.stdout.write(".")
sys.stdout.flush()
elif state == constants.PATCH_AGENT_STATE_INSTALL_REJECTED:
print("\nInstallation rejected. Node must be locked")
elif host_state == constants.FAILED:
print("\nDeployment failed. Please check logs for details.")
rc = 1
break
elif state == constants.PATCH_AGENT_STATE_INSTALL_FAILED:
print("\nInstallation failed. Please check logs for details.")
rc = 1
break
elif state == constants.PATCH_AGENT_STATE_IDLE:
print("\nInstallation was successful.")
elif host_state == constants.DEPLOYED:
print("\nDeployment was successful.")
rc = 0
break
else:
print("\nPatch agent is reporting unknown state: %s" % state)
print("\nReported unknown state: %s" % host_state)
rc = 1
break

View File

@ -10,6 +10,7 @@ import os
from oslo_log import log
from pecan import expose
from pecan import request
from pecan import Response
import shutil
from software.exceptions import SoftwareError
@ -231,14 +232,11 @@ class SoftwareAPIController(object):
return dict(sd=sd)
@expose('json')
@expose('query_hosts.xml', content_type='application/xml')
def host_list(self, *args): # pylint: disable=unused-argument
try:
query_hosts = sc.deploy_host_list()
except Exception as e:
return dict(error=str(e))
return dict(data=query_hosts)
@expose('json', method="GET")
def host_list(self):
query_hosts = dict(data=sc.deploy_host_list())
response_data = json.dumps(query_hosts)
return Response(body=response_data, status_code=200)
@expose(method='GET', template='json')
def in_sync_controller(self):

View File

@ -181,3 +181,9 @@ class DEPLOY_STATES(Enum):
HOST = 'host'
HOST_DONE = 'host-done'
HOST_FAILED = 'host-failed'
class DEPLOY_HOST_STATES(Enum):
DEPLOYED = 'deployed'
DEPLOYING = 'deploying'
FAILED = 'failed'
PENDING = 'pending'

View File

@ -46,6 +46,7 @@ from software.exceptions import ReleaseIsoDeleteFailure
from software.exceptions import SoftwareServiceError
from software.release_data import SWReleaseCollection
from software.software_functions import collect_current_load_for_hosts
from software.software_functions import create_deploy_hosts
from software.software_functions import parse_release_metadata
from software.software_functions import configure_logging
from software.software_functions import mount_iso_load
@ -2267,6 +2268,7 @@ class PatchController(PatchService):
if self._deploy_upgrade_start(to_release):
collect_current_load_for_hosts()
create_deploy_hosts()
self.db_api_instance.begin_update()
try:
self.update_and_sync_deploy_state(self.db_api_instance.create_deploy,
@ -2319,6 +2321,7 @@ class PatchController(PatchService):
if operation == "apply":
collect_current_load_for_hosts()
create_deploy_hosts()
# reverse = True is used for apply operation
deployment_list = self.release_apply_remove_order(deployment, running_sw_version, reverse=True)
@ -2411,6 +2414,8 @@ class PatchController(PatchService):
self.interim_state[release] = list(self.hosts)
elif operation == "remove":
collect_current_load_for_hosts()
create_deploy_hosts()
removed = False
deployment_list = self.release_apply_remove_order(deployment, running_sw_version)
msg = "Deploy start order for remove operation: %s" % ",".join(deployment_list)
@ -2900,40 +2905,20 @@ class PatchController(PatchService):
return dict(data)
def deploy_host_list(self):
query_hosts = self.query_host_cache()
deploy_hosts = self.db_api_instance.get_deploy_host()
deploy = self.db_api_instance.get_deploy()
# If there's a hostname missing, add it to query hosts.
hostnames = []
for host in query_hosts:
hostnames.append(host["hostname"])
for host in deploy_hosts:
if host["hostname"] not in hostnames:
query_hosts.append(host)
deploy_host_list = []
# Merge dicts if hostname matches
for query_host in query_hosts:
query_host["reboot_required"] = query_host.pop("requires_reboot", None)
for host in deploy_hosts:
if query_host["hostname"] == host["hostname"]:
# New set of keys for the host list, some of previously dict keys
# is kept such as state, interim_state that is used for patch.
deploy_host = {
"hostname": host.get("hostname"),
"sw_version": deploy.get("from_release"),
"to_release": deploy.get("to_release"),
"reboot_required": deploy.get("reboot_required"),
"deploy_host_state": host.get("state"),
"state": query_host.get("state"),
"interim_state": query_host.get("interim_state"),
"ip": query_host.get("ip")
}
deploy_host_list.append(deploy_host)
break
if not deploy_host_list:
return query_hosts
for host in deploy_hosts:
state = host.get("state")
deploy_host = {
"hostname": host.get("hostname"),
"software_release": deploy.get("from_release"),
"target_release": deploy.get("to_release") if state else None,
"reboot_required": deploy.get("reboot_required") if state else None,
"host_state": state
}
deploy_host_list.append(deploy_host)
return deploy_host_list
def update_and_sync_deploy_state(self, func, *args, **kwargs):

View File

@ -1,5 +1,5 @@
"""
Copyright (c) 2023 Wind River Systems, Inc.
Copyright (c) 2023-2024 Wind River Systems, Inc.
SPDX-License-Identifier: Apache-2.0
@ -20,6 +20,7 @@ from software.utils import save_to_json_file
from software.utils import get_software_filesystem_data
from software.utils import validate_versions
from software.constants import DEPLOY_HOST_STATES
from software.constants import DEPLOY_STATES
LOG = logging.getLogger('main_logger')
@ -176,10 +177,10 @@ class Deploy(ABC):
class DeployHosts(ABC):
def __init__(self):
self.states = Enum('States', 'aborted deployed deploying failed pending-deploy')
pass
@abstractmethod
def create(self, hostname: str, state: str):
def create(self, hostname: str, state: DEPLOY_HOST_STATES):
"""
Create a new deploy-host entry
@ -189,8 +190,7 @@ class DeployHosts(ABC):
"""
instances = [hostname]
if state:
check_state(state, self.states)
instances.append(state)
check_instances([state], DEPLOY_HOST_STATES)
check_instances(instances, str)
pass
@ -213,8 +213,8 @@ class DeployHosts(ABC):
:param hostname: The name of the host.
:param state: The state of the deploy-host entry.
"""
check_instances([hostname, state], str)
check_state(state, self.states)
check_instances([hostname], str)
check_instances([state], DEPLOY_HOST_STATES)
pass
@abstractmethod
@ -303,7 +303,7 @@ class DeployHostHandler(DeployHosts):
super().__init__()
self.data = get_software_filesystem_data()
def create(self, hostname, state=None):
def create(self, hostname, state:DEPLOY_HOST_STATES=None):
super().create(hostname, state)
deploy = self.query(hostname)
if deploy:
@ -311,7 +311,7 @@ class DeployHostHandler(DeployHosts):
new_deploy_host = {
"hostname": hostname,
"state": state
"state": state.value if state else None
}
deploy_data = self.data.get("deploy_host", [])
@ -335,7 +335,7 @@ class DeployHostHandler(DeployHosts):
def query_all(self):
return self.data.get("deploy_host", [])
def update(self, hostname, state):
def update(self, hostname, state: DEPLOY_HOST_STATES):
super().update(hostname, state)
deploy = self.query(hostname)
if not deploy:
@ -344,7 +344,7 @@ class DeployHostHandler(DeployHosts):
index = self.data.get("deploy_host", []).index(deploy)
updated_entity = {
"hostname": hostname,
"state": state
"state": state.value
}
self.data["deploy_host"][index].update(updated_entity)
save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data)

View File

@ -23,6 +23,7 @@ from lxml import etree as ElementTree
from xml.dom import minidom
import software.apt_utils as apt_utils
from software.db.api import get_instance
from software.release_verify import verify_files
from software.release_verify import cert_type_all
from software.release_signing import sign_files
@ -36,7 +37,7 @@ from software.exceptions import SoftwareServiceError
import software.constants as constants
import software.utils as utils
from software.sysinv_utils import get_sysinv_client
from software.sysinv_utils import get_ihost_list
try:
@ -1162,15 +1163,28 @@ def read_upgrade_support_versions(mounted_dir):
return to_release, supported_from_releases
def create_deploy_hosts():
"""
Create deploy-hosts entities based on hostnames
from sysinv.
"""
try:
db_api_instance = get_instance()
for ihost in get_ihost_list():
db_api_instance.create_deploy_host(ihost.hostname)
LOG.info("Deploy-hosts entities created successfully.")
except Exception as err:
LOG.exception("Error in deploy-hosts entities creation")
raise err
def collect_current_load_for_hosts():
load_data = {
"current_loads": []
}
try:
token, endpoint = utils.get_endpoints_token()
sysinv_client = get_sysinv_client(token=token, endpoint=endpoint)
host_list = sysinv_client.ihost.list()
for ihost in host_list:
for ihost in get_ihost_list():
software_load = ihost.software_load
hostname = ihost.hostname
load_data["current_loads"].append({

View File

@ -40,3 +40,12 @@ def get_k8s_ver():
if k8s_ver.state == "active":
return k8s_ver.version
raise Exception("Failed to get current k8s version")
def get_ihost_list():
try:
token, endpoint = utils.get_endpoints_token()
sysinv_client = get_sysinv_client(token=token, endpoint=endpoint)
return sysinv_client.ihost.list()
except Exception as err:
LOG.error("Error getting ihost list: %s", err)
raise