Merge "Deploy host-list implementation"

This commit is contained in:
Zuul 2024-03-08 13:04:36 +00:00 committed by Gerrit Code Review
commit 306ea5f631
8 changed files with 101 additions and 111 deletions

View File

@ -1,10 +1,9 @@
"""
Copyright (c) 2023 Wind River Systems, Inc.
Copyright (c) 2023-2024 Wind River Systems, Inc.
SPDX-License-Identifier: Apache-2.0
"""
ADDRESS_VERSION_IPV4 = 4
ADDRESS_VERSION_IPV6 = 6
CONTROLLER_FLOATING_HOSTNAME = "controller"
@ -61,3 +60,7 @@ SIG_EXTENSION = ".sig"
PATCH_EXTENSION = ".patch"
SUPPORTED_UPLOAD_FILE_EXT = [ISO_EXTENSION, SIG_EXTENSION, PATCH_EXTENSION]
SCRATCH_DIR = "/scratch"
DEPLOYING = 'deploying'
FAILED = 'failed'
PENDING = 'pending'

View File

@ -612,14 +612,11 @@ def release_list_req(args):
def print_software_deploy_host_list_result(req):
if req.status_code == 200:
data = json.loads(req.text)
if 'data' not in data:
print("Invalid data returned:")
print_result_debug(req)
data = req.json().get("data", None)
if not data:
print("No deploy in progress.\n")
return
agents = data['data']
# Calculate column widths
hdr_hn = "Hostname"
hdr_rel = "Software Release"
@ -633,19 +630,19 @@ def print_software_deploy_host_list_result(req):
width_rr = len(hdr_rr)
width_state = len(hdr_state)
for agent in sorted(agents, key=lambda a: a["hostname"]):
if agent.get("deploy_host_state") is None:
agent["deploy_host_state"] = "No active deployment"
if agent.get("to_release") is None:
agent["to_release"] = "N/A"
for agent in sorted(data, key=lambda a: a["hostname"]):
if agent.get("host_state") is None:
agent["host_state"] = "No active deployment"
if agent.get("target_release") is None:
agent["target_release"] = "N/A"
if len(agent["hostname"]) > width_hn:
width_hn = len(agent["hostname"])
if len(agent["sw_version"]) > width_rel:
width_rel = len(agent["sw_version"])
if len(agent["to_release"]) > width_tg_rel:
width_tg_rel = len(agent["to_release"])
if len(agent["deploy_host_state"]) > width_state:
width_state = len(agent["deploy_host_state"])
if len(agent["software_release"]) > width_rel:
width_rel = len(agent["software_release"])
if len(agent["target_release"]) > width_tg_rel:
width_tg_rel = len(agent["target_release"])
if len(agent["host_state"]) > width_state:
width_state = len(agent["host_state"])
print("{0:^{width_hn}} {1:^{width_rel}} {2:^{width_tg_rel}} {3:^{width_rr}} {4:^{width_state}}".format(
hdr_hn, hdr_rel, hdr_tg_rel, hdr_rr, hdr_state,
@ -654,13 +651,13 @@ def print_software_deploy_host_list_result(req):
print("{0} {1} {2} {3} {4}".format(
'=' * width_hn, '=' * width_rel, '=' * width_tg_rel, '=' * width_rr, '=' * width_state))
for agent in sorted(agents, key=lambda a: a["hostname"]):
for agent in sorted(data, key=lambda a: a["hostname"]):
print("{0:<{width_hn}} {1:^{width_rel}} {2:^{width_tg_rel}} {3:^{width_rr}} {4:^{width_state}}".format(
agent["hostname"],
agent["sw_version"],
agent["to_release"],
agent["software_release"],
agent["target_release"],
"Yes" if agent.get("reboot_required", None) else "No",
agent["deploy_host_state"],
agent["host_state"],
width_hn=width_hn, width_rel=width_rel, width_tg_rel=width_tg_rel, width_rr=width_rr, width_state=width_state))
elif req.status_code == 500:
@ -723,54 +720,33 @@ def wait_for_install_complete(agent_ip):
break
if req.status_code == 200:
data = json.loads(req.text)
if 'data' not in data:
data = req.json().get("data", None)
if not data:
print("Invalid host-list data returned:")
print_result_debug(req)
rc = 1
break
state = None
agents = data['data']
interim_state = None
host_state = None
for agent in agents:
if agent['hostname'] == agent_ip \
or agent['ip'] == agent_ip:
state = agent.get('state')
interim_state = agent.get('interim_state')
for d in data:
if d['hostname'] == agent_ip:
host_state = d.get('host_state')
if state is None:
# If the software daemons have restarted, there's a
# window after the software-controller restart that the
# hosts table will be empty.
retriable_count += 1
if retriable_count <= max_retries:
continue
else:
print("%s agent has timed out." % agent_ip)
rc = 1
break
if state == constants.PATCH_AGENT_STATE_INSTALLING or \
interim_state is True:
# Still installing
if host_state == constants.DEPLOYING:
# Still deploying
sys.stdout.write(".")
sys.stdout.flush()
elif state == constants.PATCH_AGENT_STATE_INSTALL_REJECTED:
print("\nInstallation rejected. Node must be locked")
elif host_state == constants.FAILED:
print("\nDeployment failed. Please check logs for details.")
rc = 1
break
elif state == constants.PATCH_AGENT_STATE_INSTALL_FAILED:
print("\nInstallation failed. Please check logs for details.")
rc = 1
break
elif state == constants.PATCH_AGENT_STATE_IDLE:
print("\nInstallation was successful.")
elif host_state == constants.DEPLOYED:
print("\nDeployment was successful.")
rc = 0
break
else:
print("\nPatch agent is reporting unknown state: %s" % state)
print("\nReported unknown state: %s" % host_state)
rc = 1
break

View File

@ -230,14 +230,11 @@ class SoftwareAPIController(object):
return dict(sd=sd)
@expose('json')
@expose('query_hosts.xml', content_type='application/xml')
def host_list(self, *args): # pylint: disable=unused-argument
try:
query_hosts = sc.deploy_host_list()
except Exception as e:
return dict(error=str(e))
return dict(data=query_hosts)
@expose('json', method="GET")
def host_list(self):
query_hosts = dict(data=sc.deploy_host_list())
response_data = json.dumps(query_hosts)
return Response(body=response_data, status_code=200)
@expose(method='GET', template='json')
def in_sync_controller(self):

View File

@ -181,3 +181,9 @@ class DEPLOY_STATES(Enum):
HOST = 'host'
HOST_DONE = 'host-done'
HOST_FAILED = 'host-failed'
class DEPLOY_HOST_STATES(Enum):
DEPLOYED = 'deployed'
DEPLOYING = 'deploying'
FAILED = 'failed'
PENDING = 'pending'

View File

@ -46,6 +46,7 @@ from software.exceptions import ReleaseIsoDeleteFailure
from software.exceptions import SoftwareServiceError
from software.release_data import SWReleaseCollection
from software.software_functions import collect_current_load_for_hosts
from software.software_functions import create_deploy_hosts
from software.software_functions import parse_release_metadata
from software.software_functions import configure_logging
from software.software_functions import mount_iso_load
@ -2267,6 +2268,7 @@ class PatchController(PatchService):
if self._deploy_upgrade_start(to_release):
collect_current_load_for_hosts()
create_deploy_hosts()
self.db_api_instance.begin_update()
try:
self.update_and_sync_deploy_state(self.db_api_instance.create_deploy,
@ -2319,6 +2321,7 @@ class PatchController(PatchService):
if operation == "apply":
collect_current_load_for_hosts()
create_deploy_hosts()
# reverse = True is used for apply operation
deployment_list = self.release_apply_remove_order(deployment, running_sw_version, reverse=True)
@ -2411,6 +2414,8 @@ class PatchController(PatchService):
self.interim_state[release] = list(self.hosts)
elif operation == "remove":
collect_current_load_for_hosts()
create_deploy_hosts()
removed = False
deployment_list = self.release_apply_remove_order(deployment, running_sw_version)
msg = "Deploy start order for remove operation: %s" % ",".join(deployment_list)
@ -2904,7 +2909,6 @@ class PatchController(PatchService):
return dict(data)
def deploy_host_list(self):
query_hosts = self.query_host_cache()
deploy_hosts = self.db_api_instance.get_deploy_host()
deploy = self.db_api_instance.get_deploy_all()
if not deploy:
@ -2912,36 +2916,17 @@ class PatchController(PatchService):
deploy = deploy[0]
# If there's a hostname missing, add it to query hosts.
hostnames = []
for host in query_hosts:
hostnames.append(host["hostname"])
for host in deploy_hosts:
if host["hostname"] not in hostnames:
query_hosts.append(host)
deploy_host_list = []
# Merge dicts if hostname matches
for query_host in query_hosts:
query_host["reboot_required"] = query_host.pop("requires_reboot", None)
for host in deploy_hosts:
if query_host["hostname"] == host["hostname"]:
# New set of keys for the host list, some of previously dict keys
# is kept such as state, interim_state that is used for patch.
deploy_host = {
"hostname": host.get("hostname"),
"sw_version": deploy.get("from_release"),
"to_release": deploy.get("to_release"),
"reboot_required": deploy.get("reboot_required"),
"deploy_host_state": host.get("state"),
"state": query_host.get("state"),
"interim_state": query_host.get("interim_state"),
"ip": query_host.get("ip")
}
deploy_host_list.append(deploy_host)
break
if not deploy_host_list:
return query_hosts
for host in deploy_hosts:
state = host.get("state")
deploy_host = {
"hostname": host.get("hostname"),
"software_release": deploy.get("from_release"),
"target_release": deploy.get("to_release") if state else None,
"reboot_required": deploy.get("reboot_required") if state else None,
"host_state": state
}
deploy_host_list.append(deploy_host)
return deploy_host_list
def update_and_sync_deploy_state(self, func, *args, **kwargs):

View File

@ -1,5 +1,5 @@
"""
Copyright (c) 2023 Wind River Systems, Inc.
Copyright (c) 2023-2024 Wind River Systems, Inc.
SPDX-License-Identifier: Apache-2.0
@ -20,6 +20,7 @@ from software.utils import save_to_json_file
from software.utils import get_software_filesystem_data
from software.utils import validate_versions
from software.constants import DEPLOY_HOST_STATES
from software.constants import DEPLOY_STATES
LOG = logging.getLogger('main_logger')
@ -177,10 +178,10 @@ class Deploy(ABC):
class DeployHosts(ABC):
def __init__(self):
self.states = Enum('States', 'aborted deployed deploying failed pending-deploy')
pass
@abstractmethod
def create(self, hostname: str, state: str):
def create(self, hostname: str, state: DEPLOY_HOST_STATES):
"""
Create a new deploy-host entry
@ -190,8 +191,7 @@ class DeployHosts(ABC):
"""
instances = [hostname]
if state:
check_state(state, self.states)
instances.append(state)
check_instances([state], DEPLOY_HOST_STATES)
check_instances(instances, str)
pass
@ -214,8 +214,8 @@ class DeployHosts(ABC):
:param hostname: The name of the host.
:param state: The state of the deploy-host entry.
"""
check_instances([hostname, state], str)
check_state(state, self.states)
check_instances([hostname], str)
check_instances([state], DEPLOY_HOST_STATES)
pass
@abstractmethod
@ -325,7 +325,7 @@ class DeployHostHandler(DeployHosts):
super().__init__()
self.data = get_software_filesystem_data()
def create(self, hostname, state=None):
def create(self, hostname, state:DEPLOY_HOST_STATES=None):
super().create(hostname, state)
deploy = self.query(hostname)
if deploy:
@ -333,7 +333,7 @@ class DeployHostHandler(DeployHosts):
new_deploy_host = {
"hostname": hostname,
"state": state
"state": state.value if state else None
}
deploy_data = self.data.get("deploy_host", [])
@ -362,7 +362,7 @@ class DeployHostHandler(DeployHosts):
def query_all(self):
return self.data.get("deploy_host", [])
def update(self, hostname, state):
def update(self, hostname, state: DEPLOY_HOST_STATES):
super().update(hostname, state)
deploy = self.query(hostname)
if not deploy:
@ -371,7 +371,7 @@ class DeployHostHandler(DeployHosts):
index = self.data.get("deploy_host", []).index(deploy)
updated_entity = {
"hostname": hostname,
"state": state
"state": state.value
}
self.data["deploy_host"][index].update(updated_entity)
save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data)

View File

@ -23,6 +23,7 @@ from lxml import etree as ElementTree
from xml.dom import minidom
import software.apt_utils as apt_utils
from software.db.api import get_instance
from software.release_verify import verify_files
from software.release_verify import cert_type_all
from software.release_signing import sign_files
@ -36,7 +37,7 @@ from software.exceptions import SoftwareServiceError
import software.constants as constants
import software.utils as utils
from software.sysinv_utils import get_sysinv_client
from software.sysinv_utils import get_ihost_list
try:
@ -1162,15 +1163,28 @@ def read_upgrade_support_versions(mounted_dir):
return to_release, supported_from_releases
def create_deploy_hosts():
"""
Create deploy-hosts entities based on hostnames
from sysinv.
"""
try:
db_api_instance = get_instance()
for ihost in get_ihost_list():
db_api_instance.create_deploy_host(ihost.hostname)
LOG.info("Deploy-hosts entities created successfully.")
except Exception as err:
LOG.exception("Error in deploy-hosts entities creation")
raise err
def collect_current_load_for_hosts():
load_data = {
"current_loads": []
}
try:
token, endpoint = utils.get_endpoints_token()
sysinv_client = get_sysinv_client(token=token, endpoint=endpoint)
host_list = sysinv_client.ihost.list()
for ihost in host_list:
for ihost in get_ihost_list():
software_load = ihost.software_load
hostname = ihost.hostname
load_data["current_loads"].append({

View File

@ -40,3 +40,12 @@ def get_k8s_ver():
if k8s_ver.state == "active":
return k8s_ver.version
raise Exception("Failed to get current k8s version")
def get_ihost_list():
try:
token, endpoint = utils.get_endpoints_token()
sysinv_client = get_sysinv_client(token=token, endpoint=endpoint)
return sysinv_client.ihost.list()
except Exception as err:
LOG.error("Error getting ihost list: %s", err)
raise