From ebe177d91858d470d140cab867faa7cdee547bab Mon Sep 17 00:00:00 2001
From: Bin Qian <bin.qian@windriver.com>
Date: Tue, 12 Mar 2024 22:27:25 +0000
Subject: [PATCH] USM deploy state

This change introduced state machines for release state, deploy state
and deploy host state.

This change removed the direct reference to the software metadata from
software-controller and other modules. Replaced with encapuslated
release_data module.

Also include changes:
1. removed required parameter for software deploy activate and software
deploy complete RestAPI.
2. ensure reload metadata for each request
3. added feed_repo and commit-id to the deploy entity, to be
   subsequently passed to deploy operations.
4. fix issues

TCs:
    passed: software upload major and patching releases
    passed: software deploy start major and patching releases
    passed: software deploy host (mock) major and patching release
    passed: software activate major and patching release
    passed: software complete major release and patching release
    passed: redeploy host after host deploy failed both major and
patching release

Story: 2010676
Task: 49849

Change-Id: I4b1894560eccb8ef4f613633a73bf3887b2b93fb
Signed-off-by: Bin Qian <bin.qian@windriver.com>
---
 software-client/debian/deb_folder/control     |    1 +
 software-client/requirements.txt              |    1 +
 .../software_client/common/utils.py           |   58 +-
 software-client/software_client/constants.py  |   27 +-
 software-client/software_client/v1/deploy.py  |  138 +-
 .../software_client/v1/deploy_shell.py        |   36 +-
 .../software_client/v1/release_shell.py       |    9 +-
 software/scripts/deploy-precheck              |    2 +-
 software/scripts/prep-data-migration          |    2 +
 .../software/api/controllers/v1/software.py   |   38 +-
 software/software/constants.py                |  109 --
 software/software/db/api.py                   |   13 +-
 software/software/deploy_host_state.py        |   69 +
 software/software/deploy_state.py             |  198 +++
 software/software/exceptions.py               |   85 +-
 software/software/parsable_error.py           |    5 +-
 software/software/release_data.py             |  215 ++-
 software/software/release_state.py            |   94 ++
 software/software/software_controller.py      | 1257 ++++++++---------
 software/software/software_entities.py        |   84 +-
 software/software/software_functions.py       |  256 ++--
 software/software/states.py                   |  126 ++
 .../tests/test_software_controller.py         |   57 +-
 .../software/tests/test_software_function.py  |    6 +-
 software/software/utils.py                    |   15 +-
 software/tox.ini                              |    4 +-
 26 files changed, 1648 insertions(+), 1257 deletions(-)
 create mode 100644 software/software/deploy_host_state.py
 create mode 100644 software/software/deploy_state.py
 create mode 100644 software/software/release_state.py
 create mode 100644 software/software/states.py

diff --git a/software-client/debian/deb_folder/control b/software-client/debian/deb_folder/control
index 018da48c..5f194a8b 100644
--- a/software-client/debian/deb_folder/control
+++ b/software-client/debian/deb_folder/control
@@ -5,6 +5,7 @@ Maintainer: StarlingX Developers <StarlingX-discuss@lists.StarlingX.io>
 Build-Depends: debhelper-compat (= 13),
  dh-python,
  python3-all,
+ python3-tabulate,
  python3-setuptools,
  python3-wheel
 Build-Depends-Indep:
diff --git a/software-client/requirements.txt b/software-client/requirements.txt
index 4ba0a8f3..7f473965 100644
--- a/software-client/requirements.txt
+++ b/software-client/requirements.txt
@@ -7,4 +7,5 @@ oslo.serialization
 netaddr
 pecan
 requests_toolbelt
+tabulate
 WebOb
diff --git a/software-client/software_client/common/utils.py b/software-client/software_client/common/utils.py
index f1b71a95..441115c5 100644
--- a/software-client/software_client/common/utils.py
+++ b/software-client/software_client/common/utils.py
@@ -21,15 +21,36 @@ import json
 import os
 import re
 import textwrap
-
+from tabulate import tabulate
 from oslo_utils import importutils
 from six.moves import zip
 
 from software_client.common.http_errors import HTTP_ERRORS
 
+# TODO(bqian) remove below overrides when switching to
+# system command style CLI display for USM CLI is ready
+from tabulate import _table_formats
+from tabulate import TableFormat
+from tabulate import Line
+from tabulate import DataRow
+
+simple = TableFormat(
+    lineabove=Line("", "-", "  ", ""),
+    linebelowheader=Line("", "=", "  ", ""),
+    linebetweenrows=None,
+    linebelow=Line("", "-", "  ", ""),
+    headerrow=DataRow("", "  ", ""),
+    datarow=DataRow("", "  ", ""),
+    padding=0,
+    with_header_hide=["lineabove", "linebelow"],
+)
+
+# _table_formats['pretty'] = simple
+#####################################################
 
 TERM_WIDTH = 72
 
+
 class HelpFormatter(argparse.HelpFormatter):
     def start_section(self, heading):
         # Title-case the headings
@@ -158,6 +179,41 @@ def display_info(resp):
         _display_info(text)
 
 
+def display_result_list(header_data_list, data):
+    header = [h for h in header_data_list]
+    table = []
+    for d in data:
+        row = []
+        for _, k in header_data_list.items():
+            row.append(d[k])
+        table.append(row)
+    if len(table) == 0:
+        print("No data")
+    else:
+        print(tabulate(table, header, tablefmt='pretty', colalign=("left", "left")))
+
+
+def display_detail_result(data):
+    header = ["Property", "Value"]
+    table = []
+    for k, v in data.items():
+        if isinstance(v, list):
+            if len(v) > 0:
+                row = [k, v[0]]
+                v.pop(0)
+            else:
+                row = [k, '']
+            table.append(row)
+
+            for r in v:
+                row = ['', r]
+                table.append(row)
+        else:
+            row = [k, v]
+            table.append(row)
+    print(tabulate(table, header, tablefmt='pretty', colalign=("left", "left")))
+
+
 def print_result_list(header_data_list, data_list, has_error, sort_key=0):
     """
     Print a list of data in a simple table format
diff --git a/software-client/software_client/constants.py b/software-client/software_client/constants.py
index 0a54e095..782a1aad 100644
--- a/software-client/software_client/constants.py
+++ b/software-client/software_client/constants.py
@@ -10,36 +10,11 @@ CONTROLLER_FLOATING_HOSTNAME = "controller"
 
 SOFTWARE_STORAGE_DIR = "/opt/software"
 
-AVAILABLE_DIR = "%s/metadata/available" % SOFTWARE_STORAGE_DIR
-UNAVAILABLE_DIR = "%s/metadata/unavailable" % SOFTWARE_STORAGE_DIR
-DEPLOYING_START_DIR = "%s/metadata/deploying_start" % SOFTWARE_STORAGE_DIR
-DEPLOYING_HOST_DIR = "%s/metadata/deploying_host" % SOFTWARE_STORAGE_DIR
-DEPLOYING_ACTIVATE_DIR = "%s/metadata/deploying_activate" % SOFTWARE_STORAGE_DIR
-DEPLOYING_COMPLETE_DIR = "%s/metadata/deploying_complete" % SOFTWARE_STORAGE_DIR
-DEPLOYED_DIR = "%s/metadata/deployed" % SOFTWARE_STORAGE_DIR
-REMOVING_DIR = "%s/metadata/removing" % SOFTWARE_STORAGE_DIR
-ABORTING_DIR = "%s/metadata/aborting" % SOFTWARE_STORAGE_DIR
-COMMITTED_DIR = "%s/metadata/committed" % SOFTWARE_STORAGE_DIR
-SEMANTICS_DIR = "%s/semantics" % SOFTWARE_STORAGE_DIR
-
 PATCH_AGENT_STATE_IDLE = "idle"
 PATCH_AGENT_STATE_INSTALLING = "installing"
 PATCH_AGENT_STATE_INSTALL_FAILED = "install-failed"
 PATCH_AGENT_STATE_INSTALL_REJECTED = "install-rejected"
 
-ABORTING = 'aborting'
-AVAILABLE = 'available'
-COMMITTED = 'committed'
-DEPLOYED = 'deployed'
-DEPLOYING_ACTIVATE = 'deploying-activate'
-DEPLOYING_COMPLETE = 'deploying-complete'
-DEPLOYING_HOST = 'deploying-host'
-DEPLOYING_START = 'deploying-start'
-REMOVING = 'removing'
-UNAVAILABLE = 'unavailable'
-UNKNOWN = 'n/a'
-
-
 STATUS_DEVELOPEMENT = 'DEV'
 STATUS_OBSOLETE = 'OBS'
 STATUS_RELEASED = 'REL'
@@ -61,9 +36,11 @@ PATCH_EXTENSION = ".patch"
 SUPPORTED_UPLOAD_FILE_EXT = [ISO_EXTENSION, SIG_EXTENSION, PATCH_EXTENSION]
 SCRATCH_DIR = "/scratch"
 
+# host deploy state
 DEPLOYING = 'deploying'
 FAILED = 'failed'
 PENDING = 'pending'
+DEPLOYED = 'deployed'
 
 # Authorization modes of software cli
 KEYSTONE = 'keystone'
diff --git a/software-client/software_client/v1/deploy.py b/software-client/software_client/v1/deploy.py
index 4aa9d2e5..76f943ef 100644
--- a/software-client/software_client/v1/deploy.py
+++ b/software-client/software_client/v1/deploy.py
@@ -54,10 +54,10 @@ class DeployManager(base.Manager):
 
     def host(self, args):
         # args.deployment is a string
-        agent_ip = args.agent
+        hostname = args.host
 
         # Issue deploy_host request and poll for results
-        path = "/v1/software/deploy_host/%s" % (agent_ip)
+        path = "/v1/software/deploy_host/%s" % (hostname)
 
         if args.force:
             path += "/force"
@@ -69,7 +69,8 @@ class DeployManager(base.Manager):
                 print(data["error"])
                 rc = 1
             else:
-                rc = self.wait_for_install_complete(agent_ip)
+                # NOTE(bqian) should consider return host_list instead.
+                rc = self.wait_for_install_complete(hostname)
         elif req.status_code == 500:
             print("An internal error has occurred. "
                 "Please check /var/log/software.log for details")
@@ -84,26 +85,20 @@ class DeployManager(base.Manager):
         return rc
 
     def activate(self, args):
-        # args.deployment is a string
-        deployment = args.deployment
-
         # Ignore interrupts during this function
         signal.signal(signal.SIGINT, signal.SIG_IGN)
 
         # Issue deploy_start request
-        path = "/v1/software/deploy_activate/%s" % (deployment)
+        path = "/v1/software/deploy_activate"
 
         return self._create(path, body={})
 
     def complete(self, args):
-        # args.deployment is a string
-        deployment = args.deployment
-
         # Ignore interrupts during this function
         signal.signal(signal.SIGINT, signal.SIG_IGN)
 
         # Issue deploy_start request
-        path = "/v1/software/deploy_complete/%s" % (deployment)
+        path = "/v1/software/deploy_complete/"
 
         return self._create(path, body={})
 
@@ -113,40 +108,9 @@ class DeployManager(base.Manager):
 
     def show(self):
         path = '/v1/software/deploy'
-        req, data = self._list(path, "")
+        return self._list(path, "")
 
-        if req.status_code >= 500:
-            print("An internal error has occurred. Please check /var/log/software.log for details")
-            return 1
-        elif req.status_code >= 400:
-            print("Respond code %d. Error: %s" % (req.status_code, req.reason))
-            return 1
-
-        if not data:
-            print("No deploy in progress.")
-        else:
-            data = data[0]
-            data["reboot_required"] = "Yes" if data.get("reboot_required") else "No"
-            data_list = [[k, v] for k, v in data.items()]
-            transposed_data_list = list(zip(*data_list))
-
-            transposed_data_list[0] = [s.title().replace('_', ' ') for s in transposed_data_list[0]]
-            # Find the longest header string in each column
-            header_lengths = [len(str(x)) for x in transposed_data_list[0]]
-            # Find the longest content string in each column
-            content_lengths = [len(str(x)) for x in transposed_data_list[1]]
-            # Find the max of the two for each column
-            col_lengths = [(x if x > y else y) for x, y in zip(header_lengths, content_lengths)]
-
-            print('  '.join(f"{x.center(col_lengths[i])}" for i,
-                x in enumerate(transposed_data_list[0])))
-            print('  '.join('=' * length for length in col_lengths))
-            print('  '.join(f"{x.center(col_lengths[i])}" for i,
-                x in enumerate(transposed_data_list[1])))
-
-        return 0
-
-    def wait_for_install_complete(self, agent_ip):
+    def wait_for_install_complete(self, hostname):
         url = "/v1/software/host_list"
         rc = 0
 
@@ -163,55 +127,49 @@ class DeployManager(base.Manager):
             except requests.exceptions.ConnectionError:
                 # The local software-controller may have restarted.
                 retriable_count += 1
-                if retriable_count <= max_retries:
-                    continue
-                else:
+                if retriable_count > max_retries:
                     print("Lost communications with the software controller")
                     rc = 1
-                    break
-
-            if req.status_code == 200:
-                data = data.get("data", None)
-                if not data:
-                    print("Invalid host-list data returned:")
-                    utils.print_result_debug(req, data)
-                    rc = 1
-                    break
-
-                host_state = None
-
-                for d in data:
-                    if d['hostname'] == agent_ip:
-                        host_state = d.get('host_state')
-
-                if host_state == constants.DEPLOYING:
-                # Still deploying
-                    sys.stdout.write(".")
-                    sys.stdout.flush()
-                elif host_state == constants.FAILED:
-                    print("\nDeployment failed. Please check logs for details.")
-                    rc = 1
-                    break
-                elif host_state == constants.DEPLOYED:
-                    print("\nDeployment was successful.")
-                    rc = 0
-                    break
-                else:
-                    print("\nReported unknown state: %s" % host_state)
-                    rc = 1
-                    break
-
-            elif req.status_code == 500:
-                print("An internal error has occurred. Please check /var/log/software.log for details")
-                rc = 1
-                break
+                    return rc
             else:
-                m = re.search("(Error message:.*)", req.text, re.MULTILINE)
-                if m:
-                    print(m.group(0))
-                else:
-                    print(vars(req))
-                rc = 1
                 break
 
+        if req.status_code == 200:
+            if not data:
+                print("Invalid host-list data returned:")
+                utils.print_result_debug(req, data)
+                rc = 1
+
+            host_state = None
+
+            for d in data:
+                if d['hostname'] == hostname:
+                    host_state = d.get('host_state')
+
+            if host_state == constants.DEPLOYING:
+                print("\nDeployment started.")
+                rc = 0
+            elif host_state == constants.FAILED:
+                print("\nDeployment failed. Please check logs for details.")
+                rc = 1
+            elif host_state == constants.DEPLOYED:
+                print("\nDeployment was successful.")
+                rc = 0
+            elif host_state == constants.PENDING:
+                print("\nDeployment pending.")
+            else:
+                print("\nReported unknown state: %s" % host_state)
+                rc = 1
+
+        elif req.status_code == 500:
+            print("An internal error has occurred. Please check /var/log/software.log for details")
+            rc = 1
+        else:
+            m = re.search("(Error message:.*)", req.text, re.MULTILINE)
+            if m:
+                print(m.group(0))
+            else:
+                print(vars(req))
+            rc = 1
+
         return rc
diff --git a/software-client/software_client/v1/deploy_shell.py b/software-client/software_client/v1/deploy_shell.py
index 06f9e48f..95b234ec 100644
--- a/software-client/software_client/v1/deploy_shell.py
+++ b/software-client/software_client/v1/deploy_shell.py
@@ -21,20 +21,26 @@ from software_client.common import utils
            help="List all deployments that have this state")
 def do_show(cc, args):
     """Show the software deployments states"""
-    # TODO(bqian) modify the cli to display with generic tabulated output
-    return cc.deploy.show()
+    resp, data = cc.deploy.show()
+    if args.debug:
+        utils.print_result_debug(resp, data)
+    else:
+        header_data_list = {"From Release": "from_release", "To Release": "to_release", "RR": "reboot_required", "State": "state"}
+        utils.display_result_list(header_data_list, data)
+
+    return utils.check_rc(resp, data)
 
 
 def do_host_list(cc, args):
     """List of hosts for software deployment """
-    req, data = cc.deploy.host_list()
-    # TODO(bqian) modify display with generic tabulated output
+    resp, data = cc.deploy.host_list()
     if args.debug:
-        utils.print_result_debug(req, data)
+        utils.print_result_debug(resp, data)
     else:
-        utils.print_software_deploy_host_list_result(req, data)
+        header_data_list = {"Host": "hostname", "From Release": "software_release", "To Release": "target_release", "RR": "reboot_required", "State": "host_state"}
+        utils.display_result_list(header_data_list, data)
 
-    return utils.check_rc(req, data)
+    return utils.check_rc(resp, data)
 
 
 @utils.arg('deployment',
@@ -68,17 +74,17 @@ def do_precheck(cc, args):
            help='Allow bypassing non-critical checks')
 def do_start(cc, args):
     """Start the software deployment"""
-    req, data = cc.deploy.start(args)
+    resp, data = cc.deploy.start(args)
     if args.debug:
-        utils.print_result_debug(req, data)
+        utils.print_result_debug(resp, data)
     else:
-        utils.print_software_op_result(req, data)
+        utils.display_info(resp)
 
-    return utils.check_rc(req, data)
+    return utils.check_rc(resp, data)
 
 
-@utils.arg('agent',
-           help="Agent on which host deploy is triggered")
+@utils.arg('host',
+           help="Name of the host that the deploy is triggered")
 @utils.arg('-f',
            '--force',
            action='store_true',
@@ -89,8 +95,6 @@ def do_host(cc, args):
     return cc.deploy.host(args)
 
 
-@utils.arg('deployment',
-           help='Deployment ID to activate')
 def do_activate(cc, args):
     """Activate the software deployment"""
     req, data = cc.deploy.activate(args)
@@ -101,8 +105,6 @@ def do_activate(cc, args):
 
     return utils.check_rc(req, data)
 
-@utils.arg('deployment',
-           help='Deployment ID to complete')
 def do_complete(cc, args):
     """Complete the software deployment"""
     req, data = cc.deploy.complete(args)
diff --git a/software-client/software_client/v1/release_shell.py b/software-client/software_client/v1/release_shell.py
index ed76bce3..433885b9 100644
--- a/software-client/software_client/v1/release_shell.py
+++ b/software-client/software_client/v1/release_shell.py
@@ -22,10 +22,8 @@ def do_list(cc, args):
     if args.debug:
         utils.print_result_debug(req, data)
     else:
-        header_data_list = ["Release", "RR", "State"]
-        data_list = [(k, v["reboot_required"], v["state"]) for k, v in data["sd"].items()]
-        has_error = 'error' in data and data["error"]
-        utils.print_result_list(header_data_list, data_list, has_error)
+        header_data_list = {"Release": "release_id", "RR": "reboot_required", "State": "state"}
+        utils.display_result_list(header_data_list, data)
 
     return utils.check_rc(req, data)
 
@@ -45,7 +43,8 @@ def do_show(cc, args):
     if args.debug:
         utils.print_result_debug(req, data)
     else:
-        utils.print_release_show_result(req, data, list_packages=list_packages)
+        for d in data:
+            utils.display_detail_result(d)
 
     return utils.check_rc(req, data)
 
diff --git a/software/scripts/deploy-precheck b/software/scripts/deploy-precheck
index c28a1838..cdb72548 100644
--- a/software/scripts/deploy-precheck
+++ b/software/scripts/deploy-precheck
@@ -100,7 +100,7 @@ class HealthCheck(object):
             print("Could not check required patches...")
             return False, required_patches
 
-        applied_patches = list(response.json()["sd"].keys())
+        applied_patches = [release['release_id'] for release in response.json()]
         missing_patch = list(set(required_patches) - set(applied_patches))
         if missing_patch:
             success = False
diff --git a/software/scripts/prep-data-migration b/software/scripts/prep-data-migration
index 4337b50d..2db25c19 100644
--- a/software/scripts/prep-data-migration
+++ b/software/scripts/prep-data-migration
@@ -240,6 +240,8 @@ class DataMigration(object):
             platform_config_dir = os.path.join(PLATFORM_PATH, "config")
             from_config_dir = os.path.join(platform_config_dir, self.from_release)
             to_config_dir = os.path.join(platform_config_dir, self.to_release)
+            if os.path.isdir(to_config_dir):
+                shutil.rmtree(to_config_dir)
             shutil.copytree(from_config_dir, to_config_dir)
         except Exception as e:
             LOG.exception("Failed to create platform config for release %s. "
diff --git a/software/software/api/controllers/v1/software.py b/software/software/api/controllers/v1/software.py
index 7379a324..59fc0f06 100644
--- a/software/software/api/controllers/v1/software.py
+++ b/software/software/api/controllers/v1/software.py
@@ -12,11 +12,12 @@ from pecan import expose
 from pecan import request
 import shutil
 
+from software import constants
 from software.exceptions import SoftwareError
 from software.exceptions import SoftwareServiceError
+from software.release_data import reload_release_data
 from software.software_controller import sc
-import software.utils as utils
-import software.constants as constants
+from software import utils
 
 
 LOG = logging.getLogger('main_logger')
@@ -26,6 +27,7 @@ class SoftwareAPIController(object):
 
     @expose('json')
     def commit_patch(self, *args):
+        reload_release_data()
         result = sc.patch_commit(list(args))
         sc.software_sync()
 
@@ -33,12 +35,14 @@ class SoftwareAPIController(object):
 
     @expose('json')
     def commit_dry_run(self, *args):
+        reload_release_data()
         result = sc.patch_commit(list(args), dry_run=True)
         return result
 
     @expose('json')
     @expose('query.xml', content_type='application/xml')
     def delete(self, *args):
+        reload_release_data()
         result = sc.software_release_delete_api(list(args))
         sc.software_sync()
 
@@ -46,28 +50,26 @@ class SoftwareAPIController(object):
 
     @expose('json')
     @expose('query.xml', content_type='application/xml')
-    def deploy_activate(self, *args):
-        if sc.any_patch_host_installing():
-            raise SoftwareServiceError(error="Rejected: One or more nodes are installing a release.")
+    def deploy_activate(self):
+        reload_release_data()
 
-        result = sc.software_deploy_activate_api(list(args)[0])
+        result = sc.software_deploy_activate_api()
         sc.software_sync()
         return result
 
     @expose('json')
     @expose('query.xml', content_type='application/xml')
-    def deploy_complete(self, *args):
-        if sc.any_patch_host_installing():
-            raise SoftwareServiceError(error="Rejected: One or more nodes are installing a release.")
-
-        result = sc.software_deploy_complete_api(list(args)[0])
+    def deploy_complete(self):
+        reload_release_data()
 
+        result = sc.software_deploy_complete_api()
         sc.software_sync()
         return result
 
     @expose('json')
     @expose('query.xml', content_type='application/xml')
     def deploy_host(self, *args):
+        reload_release_data()
         if len(list(args)) == 0:
             return dict(error="Host must be specified for install")
         force = False
@@ -81,6 +83,7 @@ class SoftwareAPIController(object):
     @expose('json')
     @expose('query.xml', content_type='application/xml')
     def deploy_precheck(self, *args, **kwargs):
+        reload_release_data()
         force = False
         if 'force' in list(args):
             force = True
@@ -92,6 +95,7 @@ class SoftwareAPIController(object):
     @expose('json')
     @expose('query.xml', content_type='application/xml')
     def deploy_start(self, *args, **kwargs):
+        reload_release_data()
         # if --force is provided
         force = 'force' in list(args)
 
@@ -107,6 +111,7 @@ class SoftwareAPIController(object):
 
     @expose('json', method="GET")
     def deploy(self):
+        reload_release_data()
         from_release = request.GET.get("from_release")
         to_release = request.GET.get("to_release")
         result = sc.software_deploy_show_api(from_release, to_release)
@@ -115,25 +120,30 @@ class SoftwareAPIController(object):
     @expose('json')
     @expose('query.xml', content_type='application/xml')
     def install_local(self):
+        reload_release_data()
         result = sc.software_install_local_api()
 
         return result
 
     @expose('json')
     def is_available(self, *args):
+        reload_release_data()
         return sc.is_available(list(args))
 
     @expose('json')
     def is_committed(self, *args):
+        reload_release_data()
         return sc.is_committed(list(args))
 
     @expose('json')
     def is_deployed(self, *args):
+        reload_release_data()
         return sc.is_deployed(list(args))
 
     @expose('json')
     @expose('show.xml', content_type='application/xml')
     def show(self, *args):
+        reload_release_data()
         result = sc.software_release_query_specific_cached(list(args))
 
         return result
@@ -141,6 +151,7 @@ class SoftwareAPIController(object):
     @expose('json')
     @expose('query.xml', content_type='application/xml')
     def upload(self):
+        reload_release_data()
         is_local = False
         temp_dir = None
         uploaded_files = []
@@ -186,12 +197,13 @@ class SoftwareAPIController(object):
     @expose('json')
     @expose('query.xml', content_type='application/xml')
     def query(self, **kwargs):
+        reload_release_data()
         sd = sc.software_release_query_cached(**kwargs)
-
-        return dict(sd=sd)
+        return sd
 
     @expose('json', method="GET")
     def host_list(self):
+        reload_release_data()
         result = sc.deploy_host_list()
         return result
 
diff --git a/software/software/constants.py b/software/software/constants.py
index f20e44ff..8d9f953e 100644
--- a/software/software/constants.py
+++ b/software/software/constants.py
@@ -4,7 +4,6 @@ Copyright (c) 2023-2024 Wind River Systems, Inc.
 SPDX-License-Identifier: Apache-2.0
 
 """
-from enum import Enum
 import os
 try:
     # The tsconfig module is only available at runtime
@@ -34,91 +33,8 @@ RC_UNHEALTHY = 3
 DEPLOY_PRECHECK_SCRIPT = "deploy-precheck"
 DEPLOY_START_SCRIPT = "software-deploy-start"
 
-AVAILABLE_DIR = "%s/metadata/available" % SOFTWARE_STORAGE_DIR
-UNAVAILABLE_DIR = "%s/metadata/unavailable" % SOFTWARE_STORAGE_DIR
-DEPLOYING_DIR = "%s/metadata/deploying" % SOFTWARE_STORAGE_DIR
-DEPLOYED_DIR = "%s/metadata/deployed" % SOFTWARE_STORAGE_DIR
-REMOVING_DIR = "%s/metadata/removing" % SOFTWARE_STORAGE_DIR
-
-# TODO(bqian) states to be removed once current references are removed
-DEPLOYING_START_DIR = "%s/metadata/deploying_start" % SOFTWARE_STORAGE_DIR
-DEPLOYING_HOST_DIR = "%s/metadata/deploying_host" % SOFTWARE_STORAGE_DIR
-DEPLOYING_ACTIVATE_DIR = "%s/metadata/deploying_activate" % SOFTWARE_STORAGE_DIR
-DEPLOYING_COMPLETE_DIR = "%s/metadata/deploying_complete" % SOFTWARE_STORAGE_DIR
-ABORTING_DIR = "%s/metadata/aborting" % SOFTWARE_STORAGE_DIR
-COMMITTED_DIR = "%s/metadata/committed" % SOFTWARE_STORAGE_DIR
 SEMANTICS_DIR = "%s/semantics" % SOFTWARE_STORAGE_DIR
 
-DEPLOY_STATE_METADATA_DIR = \
-    [
-        AVAILABLE_DIR,
-        UNAVAILABLE_DIR,
-        DEPLOYING_DIR,
-        DEPLOYED_DIR,
-        REMOVING_DIR,
-        # TODO(bqian) states to be removed once current references are removed
-        DEPLOYING_START_DIR,
-        DEPLOYING_HOST_DIR,
-        DEPLOYING_ACTIVATE_DIR,
-        DEPLOYING_COMPLETE_DIR,
-        ABORTING_DIR,
-        COMMITTED_DIR,
-    ]
-
-# new release state needs to be added to VALID_RELEASE_STATES list
-AVAILABLE = 'available'
-UNAVAILABLE = 'unavailable'
-DEPLOYING = 'deploying'
-DEPLOYED = 'deployed'
-REMOVING = 'removing'
-
-DELETABLE_STATE = [AVAILABLE, UNAVAILABLE]
-
-# TODO(bqian) states to be removed once current references are removed
-ABORTING = 'aborting'
-COMMITTED = 'committed'
-DEPLOYING_ACTIVATE = 'deploying-activate'
-DEPLOYING_COMPLETE = 'deploying-complete'
-DEPLOYING_HOST = 'deploying-host'
-DEPLOYING_START = 'deploying-start'
-UNAVAILABLE = 'unavailable'
-UNKNOWN = 'n/a'
-
-VALID_DEPLOY_START_STATES = [
-    AVAILABLE,
-    DEPLOYED,
-]
-
-# host deploy substate
-HOST_DEPLOY_PENDING = 'pending'
-HOST_DEPLOY_STARTED = 'deploy-started'
-HOST_DEPLOY_DONE = 'deploy-done'
-HOST_DEPLOY_FAILED = 'deploy-failed'
-
-VALID_HOST_DEPLOY_STATE = [
-    HOST_DEPLOY_PENDING,
-    HOST_DEPLOY_STARTED,
-    HOST_DEPLOY_DONE,
-    HOST_DEPLOY_FAILED
-]
-
-VALID_RELEASE_STATES = [AVAILABLE, UNAVAILABLE, DEPLOYING, DEPLOYED,
-                        REMOVING]
-
-RELEASE_STATE_TO_DIR_MAP = {AVAILABLE: AVAILABLE_DIR,
-                            UNAVAILABLE: UNAVAILABLE_DIR,
-                            DEPLOYING: DEPLOYING_DIR,
-                            DEPLOYED: DEPLOYED_DIR,
-                            REMOVING: REMOVING_DIR}
-
-# valid release state transition below could still be changed as
-# development continue
-RELEASE_STATE_VALID_TRANSITION = {
-    AVAILABLE: [DEPLOYING],
-    DEPLOYING: [DEPLOYED],
-    DEPLOYED: [REMOVING, UNAVAILABLE]
-}
-
 STATUS_DEVELOPEMENT = 'DEV'
 STATUS_OBSOLETE = 'OBS'
 STATUS_RELEASED = 'REL'
@@ -147,11 +63,6 @@ SEMANTIC_ACTIONS = [SEMANTIC_PREAPPLY, SEMANTIC_PREREMOVE]
 
 CHECKOUT_FOLDER = "checked_out_commit"
 
-DEPLOYMENT_STATE_ACTIVE = "Active"
-DEPLOYMENT_STATE_INACTIVE = "Inactive"
-DEPLOYMENT_STATE_PRESTAGING = "Prestaging"
-DEPLOYMENT_STATE_PRESTAGED = "Prestaged"
-
 FEED_DIR = "/var/www/pages/feed/"
 UPGRADE_FEED_DIR = FEED_DIR
 TMP_DIR = "/tmp"
@@ -183,23 +94,3 @@ LAST_IN_SYNC = "last_in_sync"
 
 SYSTEM_MODE_SIMPLEX = "simplex"
 SYSTEM_MODE_DUPLEX = "duplex"
-
-
-
-class DEPLOY_STATES(Enum):
-    ACTIVATE = 'activate'
-    ACTIVATE_DONE = 'activate-done'
-    ACTIVATE_FAILED = 'activate-failed'
-    START = 'start'
-    START_DONE = 'start-done'
-    START_FAILED = 'start-failed'
-    HOST = 'host'
-    HOST_DONE = 'host-done'
-    HOST_FAILED = 'host-failed'
-
-
-class DEPLOY_HOST_STATES(Enum):
-    DEPLOYED = 'deployed'
-    DEPLOYING = 'deploying'
-    FAILED = 'failed'
-    PENDING = 'pending'
diff --git a/software/software/db/api.py b/software/software/db/api.py
index 13f1397f..316f1fcb 100644
--- a/software/software/db/api.py
+++ b/software/software/db/api.py
@@ -9,7 +9,7 @@ import logging
 import threading
 from software.software_entities import DeployHandler
 from software.software_entities import DeployHostHandler
-from software.constants import DEPLOY_STATES
+from software.states import DEPLOY_STATES
 
 LOG = logging.getLogger('main_logger')
 
@@ -32,9 +32,9 @@ class SoftwareAPI:
         self.deploy_handler = DeployHandler()
         self.deploy_host_handler = DeployHostHandler()
 
-    def create_deploy(self, from_release, to_release, reboot_required: bool):
+    def create_deploy(self, from_release, to_release, feed_repo, commit_id, reboot_required: bool):
         self.begin_update()
-        self.deploy_handler.create(from_release, to_release, reboot_required)
+        self.deploy_handler.create(from_release, to_release, feed_repo, commit_id, reboot_required)
         self.end_update()
 
     def get_deploy(self, from_release, to_release):
@@ -79,6 +79,13 @@ class SoftwareAPI:
         finally:
             self.end_update()
 
+    def get_deploy_host_by_hostname(self, hostname):
+        self.begin_update()
+        try:
+            return self.deploy_host_handler.query(hostname)
+        finally:
+            self.end_update()
+
     def update_deploy_host(self, hostname, state):
         self.begin_update()
         try:
diff --git a/software/software/deploy_host_state.py b/software/software/deploy_host_state.py
new file mode 100644
index 00000000..5d97c09d
--- /dev/null
+++ b/software/software/deploy_host_state.py
@@ -0,0 +1,69 @@
+"""
+Copyright (c) 2024 Wind River Systems, Inc.
+
+SPDX-License-Identifier: Apache-2.0
+
+"""
+
+import logging
+
+from software.db.api import get_instance
+from software.exceptions import InvalidOperation
+from software.states import DEPLOY_HOST_STATES
+
+LOG = logging.getLogger('main_logger')
+
+
+deploy_host_state_transition = {
+    DEPLOY_HOST_STATES.PENDING: [DEPLOY_HOST_STATES.DEPLOYING],
+    DEPLOY_HOST_STATES.DEPLOYING: [DEPLOY_HOST_STATES.DEPLOYED, DEPLOY_HOST_STATES.FAILED],
+    DEPLOY_HOST_STATES.FAILED: [DEPLOY_HOST_STATES.DEPLOYING],
+    DEPLOY_HOST_STATES.DEPLOYED: []
+}
+
+
+class DeployHostState(object):
+    _callbacks = []
+
+    @staticmethod
+    def register_event_listener(callback):
+        if callback not in DeployHostState._callbacks:
+            LOG.info("Register event listener %s", callback.__qualname__)
+            DeployHostState._callbacks.append(callback)
+
+    def __init__(self, hostname):
+        self._hostname = hostname
+
+    def check_transition(self, target_state: DEPLOY_HOST_STATES):
+        db_api = get_instance()
+        deploy_host = db_api.get_deploy_host_by_hostname(self._hostname)
+        if deploy_host is not None:
+            cur_state = DEPLOY_HOST_STATES(deploy_host['state'])
+            if target_state in deploy_host_state_transition[cur_state]:
+                return True
+        else:
+            LOG.error('Host %s is not part of deployment' % self._hostname)
+        return False
+
+    def transform(self, target_state: DEPLOY_HOST_STATES):
+        db_api = get_instance()
+        db_api.begin_update()
+        try:
+            if self.check_transition(target_state):
+                db_api.update_deploy_host(self._hostname, target_state)
+                for callback in DeployHostState._callbacks:
+                    callback(self._hostname, target_state)
+            else:
+                msg = "Host can not transform to %s from current state" % target_state.value
+                raise InvalidOperation(msg)
+        finally:
+            db_api.end_update()
+
+    def deploy_started(self):
+        self.transform(DEPLOY_HOST_STATES.DEPLOYING)
+
+    def deployed(self):
+        self.transform(DEPLOY_HOST_STATES.DEPLOYED)
+
+    def deploy_failed(self):
+        self.transform(DEPLOY_HOST_STATES.FAILED)
diff --git a/software/software/deploy_state.py b/software/software/deploy_state.py
new file mode 100644
index 00000000..6884ad10
--- /dev/null
+++ b/software/software/deploy_state.py
@@ -0,0 +1,198 @@
+"""
+Copyright (c) 2024 Wind River Systems, Inc.
+
+SPDX-License-Identifier: Apache-2.0
+
+"""
+import logging
+
+from software.db.api import get_instance
+from software.exceptions import InvalidOperation
+from software.release_data import SWRelease
+from software.states import DEPLOY_STATES
+from software.states import DEPLOY_HOST_STATES
+
+LOG = logging.getLogger('main_logger')
+
+
+deploy_state_transition = {
+    None: [DEPLOY_STATES.START],  # Fake state for no deploy in progress
+    DEPLOY_STATES.START: [DEPLOY_STATES.START_DONE, DEPLOY_STATES.START_FAILED],
+    DEPLOY_STATES.START_FAILED: [DEPLOY_STATES.ABORT],
+    DEPLOY_STATES.ABORT: [DEPLOY_STATES.ABORT_DONE],
+    DEPLOY_STATES.START_DONE: [DEPLOY_STATES.ABORT, DEPLOY_STATES.HOST],
+    DEPLOY_STATES.HOST: [DEPLOY_STATES.HOST,
+                         DEPLOY_STATES.ABORT,
+                         DEPLOY_STATES.HOST_FAILED,
+                         DEPLOY_STATES.HOST_DONE],
+    DEPLOY_STATES.HOST_FAILED: [DEPLOY_STATES.HOST,  # deploy-host can reattempt
+                                DEPLOY_STATES.ABORT,
+                                DEPLOY_STATES.HOST_FAILED,
+                                DEPLOY_STATES.HOST_DONE],
+    DEPLOY_STATES.HOST_DONE: [DEPLOY_STATES.ABORT, DEPLOY_STATES.ACTIVATE],
+    DEPLOY_STATES.ACTIVATE: [DEPLOY_STATES.ACTIVATE_DONE, DEPLOY_STATES.ACTIVATE_FAILED],
+    DEPLOY_STATES.ACTIVATE_DONE: [DEPLOY_STATES.ABORT, None],  # abort after deploy-activated?
+    DEPLOY_STATES.ACTIVATE_FAILED: [DEPLOY_STATES.ACTIVATE, DEPLOY_STATES.ABORT],
+    DEPLOY_STATES.ABORT_DONE: []  # waitng for being deleted
+}
+
+
+class DeployState(object):
+    _callbacks = []
+    _instance = None
+
+    @staticmethod
+    def register_event_listener(callback):
+        """register event listener to be triggered when a state transition is completed"""
+        if callback is not None:
+            if callback not in DeployState._callbacks:
+                LOG.debug("Register event listener %s", callback.__qualname__)
+                DeployState._callbacks.append(callback)
+
+    @staticmethod
+    def get_deploy_state():
+        db_api_instance = get_instance()
+        deploys = db_api_instance.get_deploy_all()
+        if not deploys:
+            state = None  # No deploy in progress == None
+        else:
+            deploy = deploys[0]
+            state = DEPLOY_STATES(deploy['state'])
+        return state
+
+    @staticmethod
+    def get_instance():
+        if DeployState._instance is None:
+            DeployState._instance = DeployState()
+        return DeployState._instance
+
+    @staticmethod
+    def host_deploy_updated(_hostname, _host_new_state):
+        db_api_instance = get_instance()
+        deploy_hosts = db_api_instance.get_deploy_host()
+        deploy_state = DeployState.get_instance()
+        all_states = []
+        for deploy_host in deploy_hosts:
+            if deploy_host['state'] not in all_states:
+                all_states.append(deploy_host['state'])
+
+        LOG.info("Host deploy state %s" % str(all_states))
+        if DEPLOY_HOST_STATES.FAILED.value in all_states:
+            deploy_state.deploy_host_failed()
+        elif DEPLOY_HOST_STATES.PENDING.value in all_states or \
+                DEPLOY_HOST_STATES.DEPLOYING.value in all_states:
+            deploy_state.deploy_host()
+        elif all_states == [DEPLOY_HOST_STATES.DEPLOYED.value]:
+            deploy_state.deploy_host_completed()
+
+    def __init__(self):
+        self._from_release = None
+        self._to_release = None
+        self._reboot_required = None
+
+    def check_transition(self, target_state: DEPLOY_STATES):
+        cur_state = DeployState.get_deploy_state()
+        if cur_state is not None:
+            cur_state = DEPLOY_STATES(cur_state)
+        if target_state in deploy_state_transition[cur_state]:
+            return True
+        # TODO(bqian) reverse lookup the operation that is not permitted, as feedback
+        msg = f"Deploy state transform not permitted from {str(cur_state)} to {str(target_state)}"
+        LOG.info(msg)
+        return False
+
+    def transform(self, target_state: DEPLOY_STATES):
+        db_api = get_instance()
+        db_api.begin_update()
+        try:
+            if self.check_transition(target_state):
+                # None means not existing or deleting
+                if target_state is not None:
+                    db_api.update_deploy(target_state)
+            else:
+                # TODO(bqian) check the current state, and provide guidence on what is
+                # the possible next operation
+                if target_state is None:
+                    msg = "Deployment can not deleted in current state."
+                else:
+                    msg = "Host can not transform to %s from current state" % target_state.value()
+                raise InvalidOperation(msg)
+        finally:
+            db_api.end_update()
+
+        for callback in DeployState._callbacks:
+            LOG.debug("Calling event listener %s", callback.__qualname__)
+            callback(target_state)
+
+    # below are list of events to drive the FSM
+    def start(self, from_release, to_release, feed_repo, commit_id, reboot_required):
+        # start is special, it needs to create the deploy entity
+        if isinstance(from_release, SWRelease):
+            from_release = from_release.sw_release
+        if isinstance(to_release, SWRelease):
+            to_release = to_release.sw_release
+
+        msg = f"Start deploy {to_release}, current sw {from_release}"
+        LOG.info(msg)
+        db_api_instance = get_instance()
+        db_api_instance.create_deploy(from_release, to_release, feed_repo, commit_id, reboot_required)
+
+    def start_failed(self):
+        self.transform(DEPLOY_STATES.START_FAILED)
+
+    def start_done(self):
+        self.transform(DEPLOY_STATES.START_DONE)
+
+    def deploy_host(self):
+        self.transform(DEPLOY_STATES.HOST)
+
+    def abort(self):
+        self.transform(DEPLOY_STATES.ABORT)
+
+    def deploy_host_completed(self):
+        # depends on the deploy state, the deploy can be transformed
+        # to HOST_DONE (from DEPLOY_HOST) or ABORT_DONE (ABORT)
+        state = DeployState.get_deploy_state()
+        if state == DEPLOY_STATES.ABORT:
+            self.transform(DEPLOY_STATES.ABORT_DONE)
+        else:
+            self.transform(DEPLOY_STATES.HOST_DONE)
+
+    def deploy_host_failed(self):
+        self.transform(DEPLOY_STATES.HOST_FAILED)
+
+    def activate(self):
+        self.transform(DEPLOY_STATES.ACTIVATE)
+
+    def activate_completed(self):
+        self.transform(DEPLOY_STATES.ACTIVATE_DONE)
+
+    def activate_failed(self):
+        self.transform(DEPLOY_STATES.ACTIVATE_FAILED)
+
+    def completed(self):
+        self.transform(None)
+        # delete the deploy and deploy host entities
+        db_api = get_instance()
+        db_api.begin_update()
+        try:
+            db_api.delete_deploy_host_all()
+            db_api.delete_deploy()
+        finally:
+            db_api.end_update()
+
+
+def require_deploy_state(require_states, prompt):
+    def wrap(func):
+        def exec_op(*args, **kwargs):
+            state = DeployState.get_deploy_state()
+            if state in require_states:
+                res = func(*args, **kwargs)
+                return res
+            else:
+                msg = ""
+                if prompt:
+                    msg = prompt.format(state=state, require_states=require_states)
+                raise InvalidOperation(msg)
+        return exec_op
+    return wrap
diff --git a/software/software/exceptions.py b/software/software/exceptions.py
index 3e49897d..aa8bdf1a 100644
--- a/software/software/exceptions.py
+++ b/software/software/exceptions.py
@@ -6,6 +6,57 @@ SPDX-License-Identifier: Apache-2.0
 """
 
 
+class InternalError(Exception):
+    """This is an internal error aka bug"""
+    pass
+
+
+class SoftwareServiceError(Exception):
+    """
+    This is a service error, such as file system issue or configuration
+    issue, which is expected at design time for a valid reason.
+    This exception type will provide detail information to the user.
+    see ExceptionHook for detail
+    """
+    def __init__(self, info="", warn="", error=""):
+        self._info = info
+        self._warn = warn
+        self._error = error
+
+    @property
+    def info(self):
+        return self._info if self._info is not None else ""
+
+    @property
+    def warning(self):
+        return self._warn if self._warn is not None else ""
+
+    @property
+    def error(self):
+        return self._error if self._error is not None else ""
+
+
+class InvalidOperation(SoftwareServiceError):
+    """Invalid operation, such as deploy a host that is already deployed """
+    def __init__(self, msg):
+        super().__init__(error=msg)
+
+
+class ReleaseNotFound(SoftwareServiceError):
+    def __init__(self, release_ids):
+        if not isinstance(release_ids, list):
+            release_ids = [release_ids]
+        super().__init__(error="Release %s can not be found" % ', '.join(release_ids))
+
+
+class HostNotFound(SoftwareServiceError):
+    def __init__(self, hostname):
+        super().__init__(error="Host %s can not be found" % hostname)
+
+
+# TODO(bqian) gradually convert SoftwareError based exception to
+# either SoftwareServiceError for user visible exceptions, or
+# InternalError for internal error (bug)
 class SoftwareError(Exception):
     """Base class for software exceptions."""
 
@@ -57,7 +108,7 @@ class SoftwareFail(SoftwareError):
     pass
 
 
-class ReleaseValidationFailure(SoftwareError):
+class ReleaseValidationFailure(SoftwareServiceError):
     """Release validation error."""
     pass
 
@@ -67,7 +118,7 @@ class UpgradeNotSupported(SoftwareError):
     pass
 
 
-class ReleaseMismatchFailure(SoftwareError):
+class ReleaseMismatchFailure(SoftwareServiceError):
     """Release mismatch error."""
     pass
 
@@ -128,33 +179,3 @@ class FileSystemError(SoftwareError):
     Likely fixable by a root user.
     """
     pass
-
-
-class InternalError(Exception):
-    """This is an internal error aka bug"""
-    pass
-
-
-class SoftwareServiceError(Exception):
-    """
-    This is a service error, such as file system issue or configuration
-    issue, which is expected at design time for a valid reason.
-    This exception type will provide detail information to the user.
-    see ExceptionHook for detail
-    """
-    def __init__(self, info="", warn="", error=""):
-        self._info = info
-        self._warn = warn
-        self._error = error
-
-    @property
-    def info(self):
-        return self._info if self._info is not None else ""
-
-    @property
-    def warning(self):
-        return self._warn if self._warn is not None else ""
-
-    @property
-    def error(self):
-        return self._error if self._error is not None else ""
diff --git a/software/software/parsable_error.py b/software/software/parsable_error.py
index b526fd37..706cc341 100644
--- a/software/software/parsable_error.py
+++ b/software/software/parsable_error.py
@@ -84,11 +84,12 @@ class ParsableErrorMiddleware(object):
                     # simple check xml is valid
                     body = [et.ElementTree.tostring(
                             et.ElementTree.fromstring('<error_message>' +
-                                '\n'.join(app_iter) + '</error_message>'))]
+                                                      '\n'.join(app_iter) +
+                                                      '</error_message>'))]
                 except et.ElementTree.ParseError as err:
                     LOG.error('Error parsing HTTP response: %s' % err)
                     body = ['<error_message>%s' % state['status_code'] +
-                        '</error_message>']
+                            '</error_message>']
                 state['headers'].append(('Content-Type', 'application/xml'))
             else:
                 if six.PY3:
diff --git a/software/software/release_data.py b/software/software/release_data.py
index beeef3e1..26713204 100644
--- a/software/software/release_data.py
+++ b/software/software/release_data.py
@@ -7,11 +7,13 @@
 import os
 from packaging import version
 import shutil
-from software import constants
+import threading
+from software import states
 from software.exceptions import FileSystemError
-from software.exceptions import InternalError
+from software.exceptions import ReleaseNotFound
 from software.software_functions import LOG
 from software import utils
+from software.software_functions import ReleaseData
 
 
 class SWRelease(object):
@@ -22,6 +24,7 @@ class SWRelease(object):
         self._metadata = metadata
         self._contents = contents
         self._sw_version = None
+        self._release = None
 
     @property
     def metadata(self):
@@ -40,21 +43,8 @@ class SWRelease(object):
         return self.metadata['state']
 
     @staticmethod
-    def is_valid_state_transition(from_state, to_state):
-        if to_state not in constants.VALID_RELEASE_STATES:
-            msg = "Invalid state %s." % to_state
-            LOG.error(msg)
-            # this is a bug
-            raise InternalError(msg)
-
-        if from_state in constants.RELEASE_STATE_VALID_TRANSITION:
-            if to_state in constants.RELEASE_STATE_VALID_TRANSITION[from_state]:
-                return True
-        return False
-
-    @staticmethod
-    def ensure_state_transition(to_state):
-        to_dir = constants.RELEASE_STATE_TO_DIR_MAP[to_state]
+    def _ensure_state_transition(to_state):
+        to_dir = states.RELEASE_STATE_TO_DIR_MAP[to_state]
         if not os.path.isdir(to_dir):
             try:
                 os.makedirs(to_dir, mode=0o755, exist_ok=True)
@@ -63,27 +53,27 @@ class SWRelease(object):
                 raise FileSystemError(error)
 
     def update_state(self, state):
-        if SWRelease.is_valid_state_transition(self.state, state):
-            LOG.info("%s state from %s to %s" % (self.id, self.state, state))
-            SWRelease.ensure_state_transition(state)
+        LOG.info("%s state from %s to %s" % (self.id, self.state, state))
+        SWRelease._ensure_state_transition(state)
 
-            to_dir = constants.RELEASE_STATE_TO_DIR_MAP[state]
-            from_dir = constants.RELEASE_STATE_TO_DIR_MAP[self.state]
-            try:
-                shutil.move("%s/%s-metadata.xml" % (from_dir, self.id),
-                            "%s/%s-metadata.xml" % (to_dir, self.id))
-            except shutil.Error:
-                msg = "Failed to move the metadata for %s" % self.id
-                LOG.exception(msg)
-                raise FileSystemError(msg)
+        to_dir = states.RELEASE_STATE_TO_DIR_MAP[state]
+        from_dir = states.RELEASE_STATE_TO_DIR_MAP[self.state]
+        try:
+            shutil.move("%s/%s-metadata.xml" % (from_dir, self.id),
+                        "%s/%s-metadata.xml" % (to_dir, self.id))
+        except shutil.Error:
+            msg = "Failed to move the metadata for %s" % self.id
+            LOG.exception(msg)
+            raise FileSystemError(msg)
 
-            self.metadata['state'] = state
-        else:
-            # this is a bug
-            error = "Invalid state transition %s, current is %s, target state is %s" % \
-                    (self.id, self.state, state)
-            LOG.info(error)
-            raise InternalError(error)
+        self.metadata['state'] = state
+
+    @property
+    def version_obj(self):
+        '''returns packaging.version object'''
+        if self._release is None:
+            self._release = version.parse(self.sw_release)
+        return self._release
 
     @property
     def sw_release(self):
@@ -97,7 +87,14 @@ class SWRelease(object):
             self._sw_version = utils.get_major_release_version(self.sw_release)
         return self._sw_version
 
+    @property
+    def component(self):
+        return self._get_by_key('component')
+
     def _get_latest_commit(self):
+        if 'number_of_commits' not in self.contents:
+            return None
+
         num_commits = self.contents['number_of_commits']
         if int(num_commits) > 0:
             commit_tag = "commit%s" % num_commits
@@ -119,6 +116,14 @@ class SWRelease(object):
             # latest commit
             return None
 
+    @property
+    def base_commit_id(self):
+        commit = None
+        base = self.contents.get('base')
+        if base:
+            commit = base.get('commit')
+        return commit
+
     def _get_by_key(self, key, default=None):
         if key in self._metadata:
             return self._metadata[key]
@@ -147,16 +152,28 @@ class SWRelease(object):
 
     @property
     def unremovable(self):
-        return self._get_by_key('unremovable')
+        return self._get_by_key('unremovable') == "Y"
 
     @property
     def reboot_required(self):
-        return self._get_by_key('reboot_required')
+        return self._get_by_key('reboot_required') == "Y"
+
+    @property
+    def requires_release_ids(self):
+        return self._get_by_key('requires') or []
+
+    @property
+    def packages(self):
+        return self._get_by_key('packages')
 
     @property
     def restart_script(self):
         return self._get_by_key('restart_script')
 
+    @property
+    def apply_active_release_only(self):
+        return self._get_by_key('apply_active_release_only')
+
     @property
     def commit_checksum(self):
         commit = self._get_latest_commit()
@@ -167,15 +184,76 @@ class SWRelease(object):
             # latest commit
             return None
 
+    def get_all_dependencies(self, filter_states=None):
+        """
+        :return: sorted list of all direct and indirect required releases
+        raise ReleaseNotFound if one of the release is not uploaded.
+        """
+        def _get_all_deps(release_id, release_collection, deps):
+            release = release_collection[release_id]
+            if release is None:
+                raise ReleaseNotFound([release_id])
+
+            if filter_states and release.state not in filter_states:
+                return
+
+            for id in release.requires_release_ids:
+                if id not in deps:
+                    deps.append(id)
+                    _get_all_deps(id, release_collection, deps)
+
+        all_deps = []
+        release_collection = get_SWReleaseCollection()
+        _get_all_deps(self.id, release_collection, all_deps)
+        releases = sorted([release_collection[id] for id in all_deps])
+        return releases
+
+    def __lt__(self, other):
+        return self.version_obj < other.version_obj
+
+    def __le__(self, other):
+        return self.version_obj <= other.version_obj
+
+    def __eq__(self, other):
+        return self.version_obj == other.version_obj
+
+    def __ge__(self, other):
+        return self.version_obj >= other.version_obj
+
+    def __gt__(self, other):
+        return self.version_obj > other.version_obj
+
+    def __ne__(self, other):
+        return self.version_obj != other.version_obj
+
     @property
     def is_ga_release(self):
         ver = version.parse(self.sw_release)
-        _, _, pp = ver.release
+        if len(ver.release) == 2:
+            pp = 0
+        else:
+            _, _, pp = ver.release
         return pp == 0
 
     @property
     def is_deletable(self):
-        return self.state in constants.DELETABLE_STATE
+        return self.state in states.DELETABLE_STATE
+
+    def to_query_dict(self):
+        data = {"release_id": self.id,
+                "state": self.state,
+                "sw_version": self.sw_release,
+                "component": self.component,
+                "status": self.status,
+                "unremovable": self.unremovable,
+                "summary": self.summary,
+                "description": self.description,
+                "install_instructions": self.install_instructions,
+                "warnings": self.warnings,
+                "reboot_required": self.reboot_required,
+                "requires": self.requires_release_ids[:],
+                "packages": self.packages[:]}
+        return data
 
 
 class SWReleaseCollection(object):
@@ -191,11 +269,23 @@ class SWReleaseCollection(object):
             sw_release = SWRelease(rel_id, rel_data, contents)
             self._sw_releases[rel_id] = sw_release
 
+    @property
+    def running_release(self):
+        latest = None
+        for rel in self.iterate_releases_by_state(states.DEPLOYED):
+            if latest is None or rel.version_obj > latest.version_obj:
+                latest = rel
+
+        return latest
+
     def get_release_by_id(self, rel_id):
         if rel_id in self._sw_releases:
             return self._sw_releases[rel_id]
         return None
 
+    def __getitem__(self, rel_id):
+        return self.get_release_by_id(rel_id)
+
     def get_release_by_commit_id(self, commit_id):
         for _, sw_release in self._sw_releases:
             if sw_release.commit_id == commit_id:
@@ -219,15 +309,44 @@ class SWReleaseCollection(object):
             yield self._sw_releases[rel_id]
 
     def update_state(self, list_of_releases, state):
-        for release_id in list_of_releases:
-            release = self.get_release_by_id(release_id)
-            if release is not None:
-                if SWRelease.is_valid_state_transition(release.state, state):
-                    SWRelease.ensure_state_transition(state)
-            else:
-                LOG.error("release %s not found" % release_id)
-
         for release_id in list_of_releases:
             release = self.get_release_by_id(release_id)
             if release is not None:
                 release.update_state(state)
+
+
+class LocalStorage(object):
+    def __init__(self):
+        self._storage = threading.local()
+
+    def get_value(self, key):
+        if hasattr(self._storage, key):
+            return getattr(self._storage, key)
+        else:
+            return None
+
+    def set_value(self, key, value):
+        setattr(self._storage, key, value)
+
+    def void_value(self, key):
+        if hasattr(self._storage, key):
+            delattr(self._storage, key)
+
+
+_local_storage = LocalStorage()
+
+
+def get_SWReleaseCollection():
+    release_data = _local_storage.get_value('release_data')
+    if release_data is None:
+        LOG.info("Load release_data")
+        release_data = ReleaseData()
+        release_data.load_all()
+        LOG.info("release_data loaded")
+        _local_storage.set_value('release_data', release_data)
+
+    return SWReleaseCollection(release_data)
+
+
+def reload_release_data():
+    _local_storage.void_value('release_data')
diff --git a/software/software/release_state.py b/software/software/release_state.py
new file mode 100644
index 00000000..388e0624
--- /dev/null
+++ b/software/software/release_state.py
@@ -0,0 +1,94 @@
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Copyright (c) 2024 Wind River Systems, Inc.
+#
+import logging
+
+from software import states
+from software.exceptions import ReleaseNotFound
+from software.release_data import get_SWReleaseCollection
+from software.release_data import reload_release_data
+
+
+LOG = logging.getLogger('main_logger')
+
+# valid release state transition below will still be changed as
+# development continue
+release_state_transition = {
+    states.AVAILABLE: [states.DEPLOYING],
+    states.DEPLOYING: [states.DEPLOYED, states.AVAILABLE],
+    states.DEPLOYED: [states.REMOVING, states.UNAVAILABLE, states.COMMITTED],
+    states.REMOVING: [states.AVAILABLE],
+    states.COMMITTED: [],
+    states.UNAVAILABLE: [],
+}
+
+
+class ReleaseState(object):
+    def __init__(self, release_ids=None, release_state=None):
+        not_found_list = []
+        release_collection = get_SWReleaseCollection()
+        if release_ids:
+            self._release_ids = release_ids[:]
+            not_found_list = [rel_id for rel_id in release_ids if release_collection[rel_id] is None]
+        elif release_state:
+            self._release_ids = [rel.id for rel in release_collection.iterate_releases_by_state(release_state)]
+
+        if len(not_found_list) > 0:
+            raise ReleaseNotFound(not_found_list)
+
+    @staticmethod
+    def deploy_updated(target_state):
+        if target_state is None:  # completed
+            deploying = ReleaseState(release_state=states.DEPLOYING)
+
+            if deploying.is_major_release_deployment():
+                deployed = ReleaseState(release_state=states.DEPLOYED)
+                deployed.replaced()
+
+            deploying.deploy_completed()
+
+    def check_transition(self, target_state):
+        """check ALL releases can transform to target state"""
+        release_collection = get_SWReleaseCollection()
+        for rel_id in self._release_ids:
+            state = release_collection[rel_id].state
+            if target_state not in release_state_transition[state]:
+                return False
+        return True
+
+    def transform(self, target_state):
+        if self.check_transition(target_state):
+            release_collection = get_SWReleaseCollection()
+            release_collection.update_state(self._release_ids, target_state)
+
+        reload_release_data()
+
+    def is_major_release_deployment(self):
+        release_collection = get_SWReleaseCollection()
+        for rel_id in self._release_ids:
+            release = release_collection.get_release_by_id(rel_id)
+            if release.is_ga_release:
+                return True
+        return False
+
+    def start_deploy(self):
+        self.transform(states.DEPLOYING)
+
+    def deploy_completed(self):
+        self.transform(states.DEPLOYED)
+
+    def committed(self):
+        self.transform(states.COMMITTED)
+
+    def replaced(self):
+        """
+        Current running release is replaced with a new deployed release
+        This indicates a major release deploy is completed and running
+        release become "unavailable"
+        """
+        self.transform(states.UNAVAILABLE)
+
+    def start_remove(self):
+        self.transform(states.REMOVING)
diff --git a/software/software/software_controller.py b/software/software/software_controller.py
index 6b8cc8cd..66975fbf 100644
--- a/software/software/software_controller.py
+++ b/software/software/software_controller.py
@@ -13,6 +13,7 @@ import configparser
 import gc
 import json
 import os
+from packaging import version
 import select
 import sh
 import shutil
@@ -33,10 +34,12 @@ import software.apt_utils as apt_utils
 import software.ostree_utils as ostree_utils
 from software.api import app
 from software.authapi import app as auth_app
-from software.constants import DEPLOY_STATES
+from software.states import DEPLOY_STATES
 from software.base import PatchService
 from software.dc_utils import get_subcloud_groupby_version
+from software.deploy_state import require_deploy_state
 from software.exceptions import APTOSTreeCommandFail
+from software.exceptions import HostNotFound
 from software.exceptions import InternalError
 from software.exceptions import MetadataFail
 from software.exceptions import UpgradeNotSupported
@@ -46,10 +49,10 @@ from software.exceptions import SoftwareError
 from software.exceptions import SoftwareFail
 from software.exceptions import ReleaseInvalidRequest
 from software.exceptions import ReleaseValidationFailure
-from software.exceptions import ReleaseMismatchFailure
 from software.exceptions import ReleaseIsoDeleteFailure
 from software.exceptions import SoftwareServiceError
-from software.release_data import SWReleaseCollection
+from software.release_data import reload_release_data
+from software.release_data import get_SWReleaseCollection
 from software.software_functions import collect_current_load_for_hosts
 from software.software_functions import create_deploy_hosts
 from software.software_functions import parse_release_metadata
@@ -67,9 +70,11 @@ from software.software_functions import SW_VERSION
 from software.software_functions import LOG
 from software.software_functions import audit_log_info
 from software.software_functions import repo_root_dir
-from software.software_functions import ReleaseData
 from software.software_functions import is_deploy_state_in_sync
 from software.software_functions import is_deployment_in_progress
+from software.release_state import ReleaseState
+from software.deploy_host_state import DeployHostState
+from software.deploy_state import DeployState
 from software.release_verify import verify_files
 import software.config as cfg
 import software.utils as utils
@@ -80,6 +85,7 @@ from software.db.api import get_instance
 
 import software.messages as messages
 import software.constants as constants
+from software import states
 
 from tsconfig.tsconfig import INITIAL_CONFIG_COMPLETE_FLAG
 from tsconfig.tsconfig import INITIAL_CONTROLLER_CONFIG_COMPLETE
@@ -106,19 +112,6 @@ pending_queries = []
 thread_death = None
 keep_running = True
 
-DEPLOY_STATE_METADATA_DIR_DICT = \
-    {
-        constants.AVAILABLE: constants.AVAILABLE_DIR,
-        constants.UNAVAILABLE: constants.UNAVAILABLE_DIR,
-        constants.DEPLOYING_START: constants.DEPLOYING_START_DIR,
-        constants.DEPLOYING_HOST: constants.DEPLOYING_HOST_DIR,
-        constants.DEPLOYING_ACTIVATE: constants.DEPLOYING_ACTIVATE_DIR,
-        constants.DEPLOYING_COMPLETE: constants.DEPLOYING_COMPLETE_DIR,
-        constants.DEPLOYED: constants.DEPLOYED_DIR,
-        constants.REMOVING: constants.REMOVING_DIR,
-        constants.ABORTING: constants.ABORTING_DIR,
-        constants.COMMITTED: constants.COMMITTED_DIR,
-    }
 # Limit socket blocking to 5 seconds to allow for thread to shutdown
 api_socket_timeout = 5.0
 
@@ -318,6 +311,8 @@ class PatchMessageSyncReq(messages.PatchMessage):
         # We may need to do this in a separate thread, so that we continue to process hellos
         LOG.info("Handling sync req")
 
+        # NOTE(bqian) sync_from_nbr returns "False" if sync operations failed.
+        # need to think of reattempt to deal w/ the potential failure.
         sc.sync_from_nbr(host)
 
         resp = PatchMessageSyncComplete()
@@ -566,13 +561,34 @@ class PatchMessageAgentInstallResp(messages.PatchMessage):
         # LOG.info("Handling hello ack")
 
         sc.hosts_lock.acquire()
-        if not addr[0] in sc.hosts:
-            sc.hosts[addr[0]] = AgentNeighbour(addr[0])
+        try:
+            # NOTE(bqian) seems like trying to tolerant a failure situation
+            # that a host is directed to install a patch but during the installation
+            # software-controller-daemon gets restarted
+            # should remove the sc.hosts which is in memory volatile storage and replaced with
+            # armanent deploy-host entity
+            ip = addr[0]
+            if ip not in sc.hosts:
+                sc.hosts[ip] = AgentNeighbour(ip)
 
-        sc.hosts[addr[0]].install_status = self.status
-        sc.hosts[addr[0]].install_pending = False
-        sc.hosts[addr[0]].install_reject_reason = self.reject_reason
-        sc.hosts_lock.release()
+            sc.hosts[ip].install_status = self.status
+            sc.hosts[ip].install_pending = False
+            sc.hosts[ip].install_reject_reason = self.reject_reason
+            hostname = sc.hosts[ip].hostname
+        finally:
+            sc.hosts_lock.release()
+
+        deploy_host_state = DeployHostState(hostname)
+        # NOTE(bqian) apparently it uses 2 boolean to indicate 2 situations
+        # where there could be 4 combinations
+        if self.status:
+            deploy_host_state.deployed()
+            return
+        elif self.reject_reason:
+            deploy_host_state.deploy_failed()
+            return
+
+        LOG.error("Bug: shouldn't reach here")
 
     def send(self, sock):  # pylint: disable=unused-argument
         LOG.error("Should not get here")
@@ -686,14 +702,14 @@ class SWMessageDeployStateChanged(messages.PatchMessage):
 
         valid_agents = ['deploy-start']
         if 'agent' in data:
-            agent = data['agent']
+            self.agent = data['agent']
         else:
-            agent = 'unknown'
+            self.agent = 'unknown'
 
-        if agent not in valid_agents:
+        if self.agent not in valid_agents:
             # ignore msg from unknown senders
             LOG.info("%s received from unknown agent %s" %
-                     (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent))
+                     (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent))
             self.valid = False
 
         valid_state = {
@@ -705,20 +721,20 @@ class SWMessageDeployStateChanged(messages.PatchMessage):
             if deploy_state in valid_state:
                 self.deploy_state = valid_state[deploy_state]
                 LOG.info("%s received from %s with deploy-state %s" %
-                         (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, deploy_state))
+                         (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state))
             else:
                 self.valid = False
                 LOG.error("%s received from %s with invalid deploy-state %s" %
-                          (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, deploy_state))
+                          (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, deploy_state))
 
         if 'hostname' in data and data['hostname']:
             self.hostname = data['hostname']
 
         if 'host-state' in data and data['host-state']:
             host_state = data['host-state']
-            if host_state not in constants.VALID_HOST_DEPLOY_STATE:
+            if host_state not in states.VALID_HOST_DEPLOY_STATE:
                 LOG.error("%s received from %s with invalid host-state %s" %
-                          (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, host_state))
+                          (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, host_state))
                 self.valid = False
             else:
                 self.host_state = host_state
@@ -728,7 +744,7 @@ class SWMessageDeployStateChanged(messages.PatchMessage):
 
         if not self.valid:
             LOG.error("%s received from %s as invalid %s" %
-                      (messages.PATCHMSG_DEPLOY_STATE_CHANGED, agent, data))
+                      (messages.PATCHMSG_DEPLOY_STATE_CHANGED, self.agent, data))
 
     def handle(self, sock, addr):
         global sc
@@ -763,7 +779,6 @@ class PatchController(PatchService):
         self.socket_lock = threading.RLock()
         self.controller_neighbours_lock = threading.RLock()
         self.hosts_lock = threading.RLock()
-        self.release_data_lock = threading.RLock()
 
         self.hosts = {}
         self.controller_neighbours = {}
@@ -783,8 +798,7 @@ class PatchController(PatchService):
         self.controller_address = None
         self.agent_address = None
         self.patch_op_counter = 1
-        self.release_data = ReleaseData()
-        self.release_data.load_all()
+        reload_release_data()
         try:
             self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION)
         except OSTreeCommandFail:
@@ -824,11 +838,12 @@ class PatchController(PatchService):
                 if self.hostname == "controller-1" \
                 else "controller-1"
 
+        DeployHostState.register_event_listener(DeployState.host_deploy_updated)
+        DeployState.register_event_listener(ReleaseState.deploy_updated)
+
     @property
     def release_collection(self):
-        # for this stage, the SWReleaseCollection behaves as a broker which
-        # does not hold any release data. it only last one request
-        swrc = SWReleaseCollection(self.release_data)
+        swrc = get_SWReleaseCollection()
         return swrc
 
     def update_config(self):
@@ -886,6 +901,8 @@ class PatchController(PatchService):
         if self.patch_op_counter >= nbr_patch_op_counter:
             return
 
+        # NOTE(bqian) sync_from_nbr returns "False" if sync operations failed.
+        # need to think of reattempt to deal w/ the potential failure.
         self.sync_from_nbr(host)
 
     def sync_from_nbr(self, host):
@@ -936,13 +953,13 @@ class PatchController(PatchService):
                     list_of_dirs = dir_names.stdout.decode("utf-8").rstrip().split()
 
                     for rel_dir in list_of_dirs:
-                        feed_ostree = "%s/%s/ostree_repo/" % (constants.FEED_OSTREE_BASE_DIR, rel_dir)
-                        if not os.path.isdir(feed_ostree):
-                            LOG.info("Skipping feed dir %s", feed_ostree)
+                        feed_repo = "%s/%s/ostree_repo/" % (constants.FEED_OSTREE_BASE_DIR, rel_dir)
+                        if not os.path.isdir(feed_repo):
+                            LOG.info("Skipping feed dir %s", feed_repo)
                             continue
-                        LOG.info("Syncing %s", feed_ostree)
+                        LOG.info("Syncing %s", feed_repo)
                         output = subprocess.check_output(["ostree",
-                                                          "--repo=%s" % feed_ostree,
+                                                          "--repo=%s" % feed_repo,
                                                           "pull",
                                                           "--depth=-1",
                                                           "--mirror",
@@ -951,7 +968,7 @@ class PatchController(PatchService):
                         output = subprocess.check_output(["ostree",
                                                           "summary",
                                                           "--update",
-                                                          "--repo=%s" % feed_ostree],
+                                                          "--repo=%s" % feed_repo],
                                                          stderr=subprocess.STDOUT)
             LOG.info("Synced to mate feed via ostree pull: %s", output)
         except subprocess.CalledProcessError:
@@ -960,20 +977,18 @@ class PatchController(PatchService):
 
         self.read_state_file()
 
-        with self.release_data_lock:
-            with self.hosts_lock:
-                self.interim_state = {}
-                self.release_data.load_all()
-                self.check_patch_states()
+        self.interim_state = {}
+        reload_release_data()
+        self.check_patch_states()
 
-            if os.path.exists(app_dependency_filename):
-                try:
-                    with open(app_dependency_filename, 'r') as f:
-                        self.app_dependencies = json.loads(f.read())
-                except Exception:
-                    LOG.exception("Failed to read app dependencies: %s", app_dependency_filename)
-            else:
-                self.app_dependencies = {}
+        if os.path.exists(app_dependency_filename):
+            try:
+                with open(app_dependency_filename, 'r') as f:
+                    self.app_dependencies = json.loads(f.read())
+            except Exception:
+                LOG.exception("Failed to read app dependencies: %s", app_dependency_filename)
+        else:
+            self.app_dependencies = {}
 
         return True
 
@@ -985,13 +1000,22 @@ class PatchController(PatchService):
         # Default to allowing in-service patching
         self.allow_insvc_patching = True
 
+        # NOTE(bqian) How is this loop relevant?
+        # all_insevc_patching equals not required_reboot in deploy entity
+        # see software_entity.
         for ip in (ip for ip in list(self.hosts) if self.hosts[ip].out_of_date):
-            for release_id in self.release_data.metadata:
-                if self.release_data.metadata[release_id].get("reboot_required") != "N" and \
-                   self.release_data.metadata[release_id]["state"] == constants.DEPLOYING_START:
+            for release in self.release_collection.iterate_releases():
+                # NOTE(bqian) below consolidates DEPLOYING_START to DEPLOYING
+                # all_insevc_patching equals not required_reboot in deploy entity
+                # see software_entity.
+                # also apparently it is a bug to check release state as it will
+                # end up return default (true) when it is not DEPLOYING_START for
+                # example, checking during removal.
+                if release.reboot_required and release.state == states.DEPLOYING:
                     self.allow_insvc_patching = False
+        # NOTE(bqian) this function looks very buggy, should probably be rewritten
 
-    def get_release_dependency_list(self, release):
+    def get_release_dependency_list(self, release_id):
         """
         Returns a list of software releases that are required by this
         release.
@@ -1000,34 +1024,44 @@ class PatchController(PatchService):
                  input param patch_id='R3'
         :param release: The software release version
         """
-        if not self.release_data.metadata[release]["requires"]:
-            return []
-        else:
-            release_dependency_list = []
-            for req_release in self.release_data.metadata[release]["requires"]:
-                release_dependency_list.append(req_release)
-                release_dependency_list = release_dependency_list + \
-                    self.get_release_dependency_list(req_release)
-            return release_dependency_list
 
-    def get_release_required_by_list(self, release):
+        # TODO(bqian): this algorithm will fail if dependency is not sequential.
+        # i.e, if R5 requires R4 and R1, R4 requires R3 and R1, R3 requires R1
+        # this relation will bring R1 before R3.
+        # change below is not fixing the algorithm, it converts directly using
+        # release_data to release_collection wrapper class.
+        release = self.release_collection.get_release_by_id(release_id)
+        if release is None:
+            error = f"Not all required releases are uploaded, missing {release_id}"
+            raise SoftwareServiceError(error=error)
+
+        release_dependency_list = []
+        for req_release in release.requires_release_ids:
+            release_dependency_list.append(req_release)
+            release_dependency_list = release_dependency_list + \
+                self.get_release_dependency_list(req_release)
+        return release_dependency_list
+
+    def get_release_required_by_list(self, release_id):
         """
         Returns a list of software releases that require this
         release.
         Example: If R3 requires R2 and R2 requires R1,
                  then this method will return ['R3', 'R2'] for
                  input param patch_id='R1'
-        :param release: The software release version
+        :param release_id: The software release id
         """
-        if release in self.release_data.metadata:
-            release_required_by_list = []
-            for req_release in self.release_data.metadata:
-                if release in self.release_data.metadata[req_release]["requires"]:
-                    release_required_by_list.append(req_release)
+        release_required_by_list = []
+        # NOTE(bqian) not sure why the check is needed. release_id is always
+        # from the release_data collection.
+        if self.release_collection.get_release_by_id(release_id):
+            for req_release in self.release_collection.iterate_releases():
+                if release_id in req_release.requires_release_ids:
+                    release_required_by_list.append(req_release.id)
                     release_required_by_list = release_required_by_list + \
-                        self.get_release_required_by_list(req_release)
-            return release_required_by_list
-        return []
+                        self.get_release_required_by_list(req_release.id)
+
+        return release_required_by_list
 
     def get_ostree_tar_filename(self, patch_sw_version, patch_id):
         '''
@@ -1044,10 +1078,12 @@ class PatchController(PatchService):
         Deletes the restart script (if any) associated with the patch
         :param patch_id: The patch ID
         '''
-        if not self.release_data.metadata[patch_id].get("restart_script"):
+        release = self.release_collection.get_release_by_id(patch_id)
+        restart_script = release.restart_script
+        if not restart_script:
             return
 
-        restart_script_path = "%s/%s" % (root_scripts_dir, self.release_data.metadata[patch_id]["restart_script"])
+        restart_script_path = "%s/%s" % (root_scripts_dir, restart_script)
         try:
             # Delete the metadata
             os.remove(restart_script_path)
@@ -1063,8 +1099,8 @@ class PatchController(PatchService):
 
         # Pass the current patch state to the semantic check as a series of args
         patch_state_args = []
-        for patch_id in list(self.release_data.metadata):
-            patch_state = '%s=%s' % (patch_id, self.release_data.metadata[patch_id]["state"])
+        for release in self.release_collection.iterate_releases():
+            patch_state = '%s=%s' % (release.id, release.state)
             patch_state_args += ['-p', patch_state]
 
         # Run semantic checks, if any
@@ -1136,25 +1172,11 @@ class PatchController(PatchService):
             # Restore /etc/hosts
             os.rename(ETC_HOSTS_BACKUP_FILE_PATH, ETC_HOSTS_FILE_PATH)
 
-        for release in sorted(list(self.release_data.metadata)):
-            if self.release_data.metadata[release]["state"] == constants.DEPLOYING_START:
-                self.release_data.metadata[release]["state"] = constants.DEPLOYED
-                try:
-                    shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, release),
-                                "%s/%s-metadata.xml" % (constants.DEPLOYED_DIR, release))
-                except shutil.Error:
-                    msg = "Failed to move the metadata for %s" % release
-                    LOG.exception(msg)
-                    raise MetadataFail(msg)
-            elif self.release_data.metadata[release]["state"] == constants.REMOVING:
-                self.release_data.metadata[release]["state"] = constants.AVAILABLE
-                try:
-                    shutil.move("%s/%s-metadata.xml" % (constants.REMOVING_DIR, release),
-                                "%s/%s-metadata.xml" % (constants.AVAILABLE_DIR, release))
-                except shutil.Error:
-                    msg = "Failed to move the metadata for %s" % release
-                    LOG.exception(msg)
-                    raise MetadataFail(msg)
+        for release in self.release_collection.iterate_releases():
+            if release.state == states.DEPLOYING:
+                release.update_state(states.DEPLOYED)
+            elif release.state == states.REMOVING:
+                release.update_state(states.AVAILABLE)
 
         msg_info += "Software installation is complete.\n"
         msg_info += "Please reboot before continuing with configuration."
@@ -1184,11 +1206,10 @@ class PatchController(PatchService):
             LOG.info(msg)
             raise SoftwareServiceError(error=msg)
 
-    def _process_upload_upgrade_files(self, upgrade_files, release_data):
+    def _process_upload_upgrade_files(self, upgrade_files):
         """
         Process the uploaded upgrade files
         :param upgrade_files: dict of upgrade files
-        :param release_data: ReleaseData object
         :return: info, warning, error messages
         """
         local_info = ""
@@ -1201,15 +1222,16 @@ class PatchController(PatchService):
 
         to_release = None
         iso_mount_dir = None
+        all_good = True
         try:
-            if not verify_files([upgrade_files[constants.ISO_EXTENSION]],
-                                upgrade_files[constants.SIG_EXTENSION]):
-                raise ReleaseValidationFailure("Invalid signature file")
+            iso = upgrade_files[constants.ISO_EXTENSION]
+            sig = upgrade_files[constants.SIG_EXTENSION]
+            if not verify_files([iso], sig):
+                msg = "Software %s:%s signature validation failed" % (iso, sig)
+                raise ReleaseValidationFailure(error=msg)
 
-            msg = ("iso and signature files upload completed\n"
-                   "Importing iso is in progress\n")
-            LOG.info(msg)
-            local_info += msg
+            LOG.info("iso and signature files upload completed."
+                     "Importing iso is in progress")
 
             iso_file = upgrade_files.get(constants.ISO_EXTENSION)
 
@@ -1258,12 +1280,17 @@ class PatchController(PatchService):
             shutil.copyfile(metadata_file, to_file)
 
             # Update the release metadata
-            abs_stx_release_metadata_file = os.path.join(
-                iso_mount_dir, 'upgrades', f"{constants.RELEASE_GA_NAME % to_release}-metadata.xml")
-            release_data.parse_metadata(abs_stx_release_metadata_file, state=constants.AVAILABLE)
+            # metadata files have been copied over to the metadata/available directory
+            reload_release_data()
             LOG.info("Updated release metadata for %s", to_release)
 
             # Get release metadata
+            # NOTE(bqian) to_release is sw_version (MM.mm), the path isn't correct
+            # also prepatched iso needs to be handled.
+            # should go through the release_data to find the latest release of major release
+            # to_release
+            abs_stx_release_metadata_file = os.path.join(
+                iso_mount_dir, 'upgrades', f"{constants.RELEASE_GA_NAME % to_release}-metadata.xml")
             all_release_meta_info = parse_release_metadata(abs_stx_release_metadata_file)
             release_meta_info = {
                 os.path.basename(upgrade_files[constants.ISO_EXTENSION]): {
@@ -1275,25 +1302,20 @@ class PatchController(PatchService):
                     "sw_version": None,
                 }
             }
-
-        except ReleaseValidationFailure:
-            msg = "Upgrade file signature verification failed"
-            LOG.exception(msg)
-            local_error += msg + "\n"
-        except Exception as e:
-            msg = "Failed to process upgrade files. Error: %s" % str(e)
-            LOG.exception(msg)
-            local_error += msg + "\n"
-            # delete versioned directory
-            if to_release:
-                to_release_dir = os.path.join(constants.SOFTWARE_STORAGE_DIR, "rel-%s" % to_release)
-                shutil.rmtree(to_release_dir, ignore_errors=True)
+        except Exception:
+            all_good = False
+            raise
         finally:
             # Unmount the iso file
             if iso_mount_dir:
                 unmount_iso_load(iso_mount_dir)
                 LOG.info("Unmounted iso file %s", iso_file)
 
+            # remove upload leftover in case of failure
+            if not all_good and to_release:
+                to_release_dir = os.path.join(constants.SOFTWARE_STORAGE_DIR, "rel-%s" % to_release)
+                shutil.rmtree(to_release_dir, ignore_errors=True)
+
         return local_info, local_warning, local_error, release_meta_info
 
     def _process_upload_patch_files(self, patch_files):
@@ -1309,7 +1331,7 @@ class PatchController(PatchService):
         upload_patch_info = []
         try:
             # Create the directories
-            for state_dir in constants.DEPLOY_STATE_METADATA_DIR:
+            for state_dir in states.DEPLOY_STATE_METADATA_DIR:
                 os.makedirs(state_dir, exist_ok=True)
         except os.error:
             msg = "Failed to create directories"
@@ -1320,83 +1342,68 @@ class PatchController(PatchService):
 
             base_patch_filename = os.path.basename(patch_file)
 
+            # NOTE(bqian) does it make sense to link the release_id to name of the patch?
             # Get the release_id from the filename
             # and check to see if it's already uploaded
             # todo(abailey) We should not require the ID as part of the file
             (release_id, _) = os.path.splitext(base_patch_filename)
 
-            patch_metadata = self.release_data.metadata.get(release_id, None)
+            release = self.release_collection.get_release_by_id(release_id)
 
-            if patch_metadata:
-                if patch_metadata["state"] != constants.AVAILABLE:
-                    msg = "%s is being or has already been deployed." % release_id
+            if release:
+                if release.state == states.COMMITTED:
+                    msg = "%s is committed. Metadata not updated" % release_id
                     LOG.info(msg)
                     local_info += msg + "\n"
-                elif patch_metadata["state"] == constants.COMMITTED:
-                    msg = "%s is committed. Metadata not updated" % release_id
+                elif release.state != states.AVAILABLE:
+                    msg = "%s is not currently in available state to be deployed." % release_id
                     LOG.info(msg)
                     local_info += msg + "\n"
                 else:
                     try:
                         # todo(abailey) PatchFile / extract_patch should be renamed
-                        this_release = PatchFile.extract_patch(patch_file,
-                                                               metadata_dir=constants.AVAILABLE_DIR,
-                                                               metadata_only=True,
-                                                               existing_content=self.release_data.contents[release_id],
-                                                               base_pkgdata=self.base_pkgdata)
+                        PatchFile.extract_patch(patch_file,
+                                                metadata_dir=states.AVAILABLE_DIR,
+                                                metadata_only=True,
+                                                existing_content=release.contents,
+                                                base_pkgdata=self.base_pkgdata)
                         PatchFile.unpack_patch(patch_file)
-                        self.release_data.update_release(this_release)
+                        reload_release_data()
                         msg = "%s is already uploaded. Updated metadata only" % release_id
                         LOG.info(msg)
                         local_info += msg + "\n"
-                    except ReleaseMismatchFailure:
-                        msg = "Contents of %s do not match re-uploaded release" % release_id
-                        LOG.exception(msg)
-                        local_error += msg + "\n"
-                    except ReleaseValidationFailure as e:
-                        msg = "Release validation failed for %s" % release_id
-                        if str(e) is not None and str(e) != '':
-                            msg += ":\n%s" % str(e)
-                        LOG.exception(msg)
-                        local_error += msg + "\n"
                     except SoftwareFail:
                         msg = "Failed to upload release %s" % release_id
                         LOG.exception(msg)
                         local_error += msg + "\n"
             else:
                 try:
-                    this_release = PatchFile.extract_patch(patch_file,
-                                                           metadata_dir=constants.AVAILABLE_DIR,
-                                                           base_pkgdata=self.base_pkgdata)
+                    PatchFile.extract_patch(patch_file,
+                                            metadata_dir=states.AVAILABLE_DIR,
+                                            base_pkgdata=self.base_pkgdata)
                     PatchFile.unpack_patch(patch_file)
                     local_info += "%s is now uploaded\n" % release_id
-                    self.release_data.add_release(this_release)
+                    reload_release_data()
 
-                    if not os.path.isfile(INITIAL_CONTROLLER_CONFIG_COMPLETE):
-                        self.release_data.metadata[release_id]["state"] = constants.AVAILABLE
-                    elif len(self.hosts) > 0:
-                        self.release_data.metadata[release_id]["state"] = constants.AVAILABLE
-                    else:
-                        self.release_data.metadata[release_id]["state"] = constants.UNKNOWN
-                except ReleaseValidationFailure as e:
-                    msg = "Release validation failed for %s" % release_id
-                    if str(e) is not None and str(e) != '':
-                        msg += ":\n%s" % str(e)
-                    LOG.exception(msg)
-                    local_error += msg + "\n"
-                    continue
+                    # NOTE(bqian) Below check an exception raise should be revisit,
+                    # if applicable, should be applied to the beginning of all requests.
+                    if len(self.hosts) == 0:
+                        msg = "service is running in incorrect state. No registered host"
+                        raise InternalError(msg)
                 except SoftwareFail:
                     msg = "Failed to upload release %s" % release_id
                     LOG.exception(msg)
                     local_error += msg + "\n"
                     continue
 
-            upload_patch_info.append({
-                base_patch_filename: {
-                    "id": release_id,
-                    "sw_version": self.release_data.metadata[release_id].get("sw_version", None),
-                }
-            })
+            release = self.release_collection.get_release_by_id(release_id)
+            if release:
+                upload_patch_info.append({
+                    base_patch_filename: {
+                        "id": release_id,
+                        "sw_release": release.sw_release,  # MM.mm.pp release version
+                    }
+                })
 
         # create versioned precheck for uploaded patches
         for patch in upload_patch_info:
@@ -1406,23 +1413,20 @@ class PatchController(PatchService):
                 if filename in pf:
                     patch_file = pf
 
-            sw_version = values.get("sw_version")
-            required_patches = self.release_data.metadata[values.get("id")].get("requires")
+            sw_release = values.get("sw_release")
+
+            required_patches = []
+            for dep_id in self.release_collection.get_release_by_id(values.get("id")).requires_release_ids:
+                required_patches.append(version.parse(dep_id))
 
             # sort the required patches list and get the latest, if available
-            req_patch_id = None
-            req_patch_metadata = None
             req_patch_version = None
-            if required_patches:
-                req_patch_id = sorted(required_patches)[-1]
-            if req_patch_id:
-                req_patch_metadata = self.release_data.metadata.get(req_patch_id)
-            if req_patch_metadata:
-                req_patch_version = req_patch_metadata.get("sw_version")
-            if req_patch_id and not req_patch_metadata:
-                LOG.warning("Required patch '%s' is not uploaded." % req_patch_id)
+            if len(required_patches) > 0:
+                req_patch_version = str(sorted(required_patches)[-1])
+                if self.release_collection.get_release_by_id(req_patch_version) is None:
+                    LOG.warning("Required patch '%s' is not uploaded." % req_patch_version)
 
-            PatchFile.create_versioned_precheck(patch_file, sw_version, req_patch_version=req_patch_version)
+            PatchFile.create_versioned_precheck(patch_file, sw_release, req_patch_version=req_patch_version)
 
         return local_info, local_warning, local_error, upload_patch_info
 
@@ -1464,8 +1468,7 @@ class PatchController(PatchService):
             LOG.error(msg)
             msg_error += msg + "\n"
         elif len(upgrade_files) == 2:  # Two upgrade files uploaded
-            tmp_info, tmp_warning, tmp_error, tmp_release_meta_info = self._process_upload_upgrade_files(
-                upgrade_files, self.release_data)
+            tmp_info, tmp_warning, tmp_error, tmp_release_meta_info = self._process_upload_upgrade_files(upgrade_files)
             msg_info += tmp_info
             msg_warning += tmp_warning
             msg_error += tmp_error
@@ -1479,20 +1482,23 @@ class PatchController(PatchService):
             msg_error += tmp_error
             upload_info += tmp_patch_meta_info
 
+        reload_release_data()
+
         return dict(info=msg_info, warning=msg_warning, error=msg_error, upload_info=upload_info)
 
-    def release_apply_remove_order(self, release, running_sw_version, reverse=False):
+    def release_apply_remove_order(self, release_id, running_sw_version, reverse=False):
 
         # If R4 requires R3, R3 requires R2 and R2 requires R1,
         # then release_order = ['R4', 'R3', 'R2', 'R1']
 
         if reverse:
-            release_order = [release] + self.get_release_dependency_list(release)
+            release_order = [release_id] + self.get_release_dependency_list(release_id)
             # If release_order = ['R4', 'R3', 'R2', 'R1']
             # and running_sw_version is the sw_version for R2
             # After the operation below, release_order = ['R4', 'R3']
             for i, rel in enumerate(release_order):
-                if self.release_data.metadata[rel]["sw_version"] == running_sw_version:
+                release = self.release_collection.get_release_by_id(rel)
+                if release.sw_release == running_sw_version:
                     val = i - len(release_order) + 1
                     while val >= 0:
                         release_order.pop()
@@ -1500,7 +1506,7 @@ class PatchController(PatchService):
                     break
 
         else:
-            release_order = [release] + self.get_release_required_by_list(release)
+            release_order = [release_id] + self.get_release_required_by_list(release_id)
         # reverse = True is for apply operation
         # In this case, the release_order = ['R3', 'R4']
         # reverse = False is for remove operation
@@ -1508,7 +1514,9 @@ class PatchController(PatchService):
         if reverse:
             release_order.reverse()
         else:
+            # Note(bqian) this pop is questionable, specified release would not be removed?
             release_order.pop(0)
+
         return release_order
 
     def software_release_delete_api(self, release_ids):
@@ -1563,41 +1571,10 @@ class PatchController(PatchService):
         LOG.info(msg)
         audit_log_info(msg)
 
-        # Verify releases exist and are in proper state first
-        id_verification = all(release_id in self.release_data.metadata for release_id in release_list)
-        for release_id in release_list:
-            if release_id not in self.release_data.metadata:
-                msg = "Release %s does not exist" % release_id
-                LOG.error(msg)
-                msg_error += msg + "\n"
-                id_verification = False
-                continue
-
-            deploystate = self.release_data.metadata[release_id]["state"]
-            ignore_states = [constants.AVAILABLE,
-                             constants.DEPLOYING_START,
-                             constants.DEPLOYING_ACTIVATE,
-                             constants.DEPLOYING_COMPLETE,
-                             constants.DEPLOYING_HOST,
-                             constants.DEPLOYED]
-
-            if deploystate not in ignore_states:
-                msg = f"Release {release_id} is {deploystate} and cannot be deleted."
-                LOG.error(msg)
-                msg_error += msg + "\n"
-                id_verification = False
-                continue
-
-        if not id_verification:
-            return dict(info=msg_info, warning=msg_warning, error=msg_error)
-
         # Handle operation
         for release_id in release_list:
-            release_sw_version = utils.get_major_release_version(
-                self.release_data.metadata[release_id]["sw_version"])
-
-            # Need to support delete of older centos patches (metadata) from upgrades.
-            # todo(abailey): do we need to be concerned about this since this component is new.
+            release = self.release_collection.get_release_by_id(release_id)
+            release_sw_version = release.sw_version
 
             # Delete ostree content if it exists.
             # RPM based patches (from upgrades) will not have ostree contents
@@ -1611,7 +1588,7 @@ class PatchController(PatchService):
                     raise OSTreeTarFail(msg)
 
             package_repo_dir = "%s/rel-%s" % (constants.PACKAGE_FEED_DIR, release_sw_version)
-            packages = [pkg.split("_")[0] for pkg in self.release_data.metadata[release_id].get("packages")]
+            packages = [pkg.split("_")[0] for pkg in release.packages]
             if packages:
                 apt_utils.package_remove(package_repo_dir, packages)
 
@@ -1636,12 +1613,12 @@ class PatchController(PatchService):
                     msg_info += msg + "\n"
 
             # TODO(lbonatti): treat the upcoming versioning changes
-            PatchFile.delete_versioned_directory(self.release_data.metadata[release_id]["sw_version"])
+            PatchFile.delete_versioned_directory(release.sw_release)
 
             try:
                 # Delete the metadata
-                deploystate = self.release_data.metadata[release_id]["state"]
-                metadata_dir = DEPLOY_STATE_METADATA_DIR_DICT[deploystate]
+                deploystate = release.state
+                metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[deploystate]
                 os.remove("%s/%s" % (metadata_dir, metadata_file))
             except OSError:
                 msg = "Failed to remove metadata for %s" % release_id
@@ -1649,7 +1626,7 @@ class PatchController(PatchService):
                 raise MetadataFail(msg)
 
             self.delete_restart_script(release_id)
-            self.release_data.delete_release(release_id)
+            reload_release_data()
             msg = "%s has been deleted" % release_id
             LOG.info(msg)
             msg_info += msg + "\n"
@@ -1688,21 +1665,21 @@ class PatchController(PatchService):
 
         return {"in_sync": is_in_sync}
 
-    def patch_init_release_api(self, release):
+    def patch_init_release_api(self, release_id):
         """
-        Create an empty repo for a new release
+        Create an empty repo for a new release_id
         :return: dict of info, warning and error messages
         """
         msg_info = ""
         msg_warning = ""
         msg_error = ""
 
-        msg = "Initializing repo for: %s" % release
+        msg = "Initializing repo for: %s" % release_id
         LOG.info(msg)
         audit_log_info(msg)
 
-        if release == SW_VERSION:
-            msg = "Rejected: Requested release %s is running release" % release
+        if release_id == SW_VERSION:
+            msg = "Rejected: Requested release %s is running release" % release_id
             msg_error += msg + "\n"
             LOG.info(msg)
             return dict(info=msg_info, warning=msg_warning, error=msg_error)
@@ -1710,22 +1687,13 @@ class PatchController(PatchService):
         # Refresh data
         self.base_pkgdata.loaddirs()
 
-        self.release_data.load_all_metadata(constants.AVAILABLE_DIR, state=constants.AVAILABLE)
-        self.release_data.load_all_metadata(constants.UNAVAILABLE_DIR, state=constants.UNAVAILABLE)
-        self.release_data.load_all_metadata(constants.DEPLOYING_START_DIR, state=constants.DEPLOYING_START)
-        self.release_data.load_all_metadata(constants.DEPLOYING_HOST_DIR, state=constants.DEPLOYING_HOST)
-        self.release_data.load_all_metadata(constants.DEPLOYING_ACTIVATE_DIR, state=constants.DEPLOYING_ACTIVATE)
-        self.release_data.load_all_metadata(constants.DEPLOYING_COMPLETE_DIR, state=constants.DEPLOYING_COMPLETE)
-        self.release_data.load_all_metadata(constants.DEPLOYED_DIR, state=constants.DEPLOYED)
-        self.release_data.load_all_metadata(constants.REMOVING_DIR, state=constants.REMOVING)
-        self.release_data.load_all_metadata(constants.ABORTING_DIR, state=constants.ABORTING)
-        self.release_data.load_all_metadata(constants.COMMITTED_DIR, state=constants.COMMITTED)
+        reload_release_data()
 
-        repo_dir[release] = "%s/rel-%s" % (repo_root_dir, release)
+        repo_dir[release_id] = "%s/rel-%s" % (repo_root_dir, release_id)
 
         # Verify the release doesn't already exist
-        if os.path.exists(repo_dir[release]):
-            msg = "Patch repository for %s already exists" % release
+        if os.path.exists(repo_dir[release_id]):
+            msg = "Patch repository for %s already exists" % release_id
             msg_info += msg + "\n"
             LOG.info(msg)
             return dict(info=msg_info, warning=msg_warning, error=msg_error)
@@ -1734,14 +1702,14 @@ class PatchController(PatchService):
         try:
             # todo(jcasteli)  determine if ostree change needs a createrepo equivalent
             output = "UNDER CONSTRUCTION for OSTREE"
-            LOG.info("Repo[%s] updated:\n%s", release, output)
+            LOG.info("Repo[%s] updated:\n%s", release_id, output)
         except Exception:
-            msg = "Failed to update the repo for %s" % release
+            msg = "Failed to update the repo for %s" % release_id
             LOG.exception(msg)
 
             # Wipe out what was created
-            shutil.rmtree(repo_dir[release])
-            del repo_dir[release]
+            shutil.rmtree(repo_dir[release_id])
+            del repo_dir[release_id]
 
             raise SoftwareFail(msg)
 
@@ -1763,7 +1731,8 @@ class PatchController(PatchService):
         # First, verify that all specified patches exist
         id_verification = True
         for patch_id in patch_ids:
-            if patch_id not in self.release_data.metadata:
+            release = self.release_collection.get_release_by_id(patch_id)
+            if release is None:
                 msg = "Patch %s does not exist" % patch_id
                 LOG.error(msg)
                 msg_error += msg + "\n"
@@ -1773,15 +1742,15 @@ class PatchController(PatchService):
             return dict(info=msg_info, warning=msg_warning, error=msg_error)
 
         required_patches = {}
-        for patch_iter in list(self.release_data.metadata):
-            for req_patch in self.release_data.metadata[patch_iter]["requires"]:
+        for release in self.release_collection.iterate_releases():
+            for req_patch in release.requires_release_ids:
                 if req_patch not in patch_ids:
                     continue
 
                 if req_patch not in required_patches:
                     required_patches[req_patch] = []
 
-                required_patches[req_patch].append(patch_iter)
+                required_patches[req_patch].append(release.id)
 
         for patch_id in patch_ids:
             if patch_id in required_patches:
@@ -1811,10 +1780,7 @@ class PatchController(PatchService):
         # Increment the software_op_counter here
         self.inc_patch_op_counter()
 
-        self.release_data_lock.acquire()
-        # self.release_data.load_all()
         self.check_patch_states()
-        self.release_data_lock.release()
 
         if self.sock_out is None:
             return True
@@ -1863,67 +1829,54 @@ class PatchController(PatchService):
     def software_release_query_cached(self, **kwargs):
         query_state = None
         if "show" in kwargs:
-            if kwargs["show"] == "available":
-                query_state = constants.AVAILABLE
-            if kwargs["show"] == "unavailable":
-                query_state = constants.UNAVAILABLE
-            elif kwargs["show"] == "deploying_start":
-                query_state = constants.DEPLOYING_START
-            elif kwargs["show"] == "deploying_host":
-                query_state = constants.DEPLOYING_HOST
-            elif kwargs["show"] == "deploying_activate":
-                query_state = constants.DEPLOYING_ACTIVATE
-            elif kwargs["show"] == "deploying_complete":
-                query_state = constants.DEPLOYING_COMPLETE
-            elif kwargs["show"] == "deployed":
-                query_state = constants.DEPLOYED
-            elif kwargs["show"] == "removing":
-                query_state = constants.REMOVING
-            elif kwargs["show"] == "aborting":
-                query_state = constants.ABORTING
-            elif kwargs["show"] == "committed":
-                query_state = constants.COMMITTED
+            valid_query_states = [
+                states.AVAILABLE,
+                states.UNAVAILABLE,
+                states.DEPLOYED,
+                states.REMOVING,
+                states.COMMITTED,
+                states.DEPLOYING
+            ]
+            if kwargs["show"] in valid_query_states:
+                query_state = kwargs["show"]
 
         query_release = None
         if "release" in kwargs:
             query_release = kwargs["release"]
 
-        results = {}
-        self.release_data_lock.acquire()
-        if query_state is None and query_release is None:
-            # Return everything
-            results = self.release_data.metadata
+        results = []
+
+        def filter_by_version():
+            for r in self.release_collection.iterate_releases():
+                if r.sw_version in query_release:
+                    yield r
+
+        def filter_by_state():
+            for rel in self.release_collection.iterate_releases_by_state(query_state):
+                yield rel
+
+        if query_state is not None:
+            iterator = filter_by_state
+        elif query_release is not None:
+            iterator = filter_by_version
         else:
-            # Filter results
-            for release_id, data in self.release_data.metadata.items():
-                if query_state is not None and data["state"] != query_state:
-                    continue
-                if query_release is not None and data["sw_version"] != query_release:
-                    continue
-                results[release_id] = data
-        self.release_data_lock.release()
+            iterator = self.release_collection.iterate_releases
+
+        for i in iterator():
+            data = i.to_query_dict()
+            results.append(data)
 
         return results
 
     def software_release_query_specific_cached(self, release_ids):
-        audit_log_info("software release show")
+        LOG.info("software release show")
 
-        results = {"metadata": {},
-                   "contents": {},
-                   "error": ""}
+        results = []
 
-        with self.release_data_lock:
-
-            for release_id in release_ids:
-                if release_id not in list(self.release_data.metadata):
-                    results["error"] += "%s is unrecognized\n" % release_id
-
-            for release_id, data in self.release_data.metadata.items():
-                if release_id in release_ids:
-                    results["metadata"][release_id] = data
-            for release_id, data in self.release_data.contents.items():
-                if release_id in release_ids:
-                    results["contents"][release_id] = data
+        for release_id in release_ids:
+            release = self.release_collection.get_release_by_id(release_id)
+            if release is not None:
+                results.append(release.to_query_dict())
 
         return results
 
@@ -1931,20 +1884,19 @@ class PatchController(PatchService):
         dependencies = set()
         patch_added = False
 
-        with self.release_data_lock:
+        # Add patches to workset
+        for patch_id in sorted(patch_ids):
+            dependencies.add(patch_id)
+            patch_added = True
 
-            # Add patches to workset
-            for patch_id in sorted(patch_ids):
-                dependencies.add(patch_id)
-                patch_added = True
-
-            while patch_added:
-                patch_added = False
-                for patch_id in sorted(dependencies):
-                    for req in self.release_data.metadata[patch_id]["requires"]:
-                        if req not in dependencies:
-                            dependencies.add(req)
-                            patch_added = recursive
+        while patch_added:
+            patch_added = False
+            for patch_id in sorted(dependencies):
+                release = self.release_collection.get_release_by_id(patch_id)
+                for req in release.requires:
+                    if req not in dependencies:
+                        dependencies.add(req)
+                        patch_added = recursive
 
         return sorted(dependencies)
 
@@ -1962,15 +1914,14 @@ class PatchController(PatchService):
         if kwargs.get("recursive") == "yes":
             recursive = True
 
-        with self.release_data_lock:
-
-            # Verify patch IDs
-            for patch_id in sorted(patch_ids):
-                if patch_id not in list(self.release_data.metadata):
-                    errormsg = "%s is unrecognized\n" % patch_id
-                    LOG.info("patch_query_dependencies: %s", errormsg)
-                    results["error"] += errormsg
-                    failure = True
+        # Verify patch IDs
+        for patch_id in sorted(patch_ids):
+            release = self.release_collection.get_release_by_id(patch_id)
+            if release is None:
+                errormsg = "%s is unrecognized\n" % patch_id
+                LOG.info("patch_query_dependencies: %s", errormsg)
+                results["error"] += errormsg
+                failure = True
 
         if failure:
             LOG.info("patch_query_dependencies failed")
@@ -1986,10 +1937,10 @@ class PatchController(PatchService):
         audit_log_info(msg)
 
         try:
-            if not os.path.exists(constants.COMMITTED_DIR):
-                os.makedirs(constants.COMMITTED_DIR)
+            if not os.path.exists(states.COMMITTED_DIR):
+                os.makedirs(states.COMMITTED_DIR)
         except os.error:
-            msg = "Failed to create %s" % constants.COMMITTED_DIR
+            msg = "Failed to create %s" % states.COMMITTED_DIR
             LOG.exception(msg)
             raise SoftwareFail(msg)
 
@@ -2001,10 +1952,9 @@ class PatchController(PatchService):
 
         # Ensure there are only REL patches
         non_rel_list = []
-        with self.release_data_lock:
-            for patch_id in self.release_data.metadata:
-                if self.release_data.metadata[patch_id]['status'] != constants.STATUS_RELEASED:
-                    non_rel_list.append(patch_id)
+        for release in self.release_collection.iterate_releases():
+            if release.status != constants.STATUS_RELEASED:
+                non_rel_list.append(release.id)
 
         if len(non_rel_list) > 0:
             errormsg = "A commit cannot be performed with non-REL status patches in the system:\n"
@@ -2015,13 +1965,13 @@ class PatchController(PatchService):
             return results
 
         # Verify Release IDs
-        with self.release_data_lock:
-            for patch_id in sorted(patch_ids):
-                if patch_id not in list(self.release_data.metadata):
-                    errormsg = "%s is unrecognized\n" % patch_id
-                    LOG.info("patch_commit: %s", errormsg)
-                    results["error"] += errormsg
-                    failure = True
+        for patch_id in sorted(patch_ids):
+            release = self.release_collection.get_release_by_id(patch_id)
+            if release is None:
+                errormsg = "%s is unrecognized\n" % patch_id
+                LOG.info("patch_commit: %s", errormsg)
+                results["error"] += errormsg
+                failure = True
 
         if failure:
             LOG.info("patch_commit: Failed patch ID check")
@@ -2031,11 +1981,10 @@ class PatchController(PatchService):
 
         # Check patch states
         avail_list = []
-        with self.release_data_lock:
-            for patch_id in commit_list:
-                if self.release_data.metadata[patch_id]['state'] != constants.DEPLOYED \
-                        and self.release_data.metadata[patch_id]['state'] != constants.COMMITTED:
-                    avail_list.append(patch_id)
+        for patch_id in commit_list:
+            release = self.release_collection.get_release_by_id(patch_id)
+            if release.state not in [states.DEPLOYED, states.COMMITTED]:
+                avail_list.append(patch_id)
 
         if len(avail_list) > 0:
             errormsg = "The following patches are not applied and cannot be committed:\n"
@@ -2045,22 +1994,21 @@ class PatchController(PatchService):
             results["error"] += errormsg
             return results
 
-        with self.release_data_lock:
-            for patch_id in commit_list:
-                # Fetch file paths that need to be cleaned up to
-                # free patch storage disk space
-                if self.release_data.metadata[patch_id].get("restart_script"):
-                    restart_script_path = "%s/%s" % \
-                        (root_scripts_dir,
-                         self.release_data.metadata[patch_id]["restart_script"])
-                    if os.path.exists(restart_script_path):
-                        cleanup_files.add(restart_script_path)
-                patch_sw_version = utils.get_major_release_version(
-                    self.release_data.metadata[patch_id]["sw_version"])
-                abs_ostree_tar_dir = package_dir[patch_sw_version]
-                software_tar_path = "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id)
-                if os.path.exists(software_tar_path):
-                    cleanup_files.add(software_tar_path)
+        for patch_id in commit_list:
+            release = self.release_collection.get_release_by_id(patch_id)
+            # Fetch file paths that need to be cleaned up to
+            # free patch storage disk space
+            if release.restart_script:
+                restart_script_path = "%s/%s" % \
+                    (root_scripts_dir,
+                     release.restart_script)
+                if os.path.exists(restart_script_path):
+                    cleanup_files.add(restart_script_path)
+            patch_sw_version = release.sw_release
+            abs_ostree_tar_dir = package_dir[patch_sw_version]
+            software_tar_path = "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id)
+            if os.path.exists(software_tar_path):
+                cleanup_files.add(software_tar_path)
 
         # Calculate disk space
         disk_space = 0
@@ -2077,8 +2025,8 @@ class PatchController(PatchService):
         # Move the metadata to the committed dir
         for patch_id in commit_list:
             metadata_fname = "%s-metadata.xml" % patch_id
-            deployed_fname = os.path.join(constants.DEPLOYED_DIR, metadata_fname)
-            committed_fname = os.path.join(constants.COMMITTED_DIR, metadata_fname)
+            deployed_fname = os.path.join(states.DEPLOYED_DIR, metadata_fname)
+            committed_fname = os.path.join(states.COMMITTED_DIR, metadata_fname)
             if os.path.exists(deployed_fname):
                 try:
                     shutil.move(deployed_fname, committed_fname)
@@ -2096,7 +2044,7 @@ class PatchController(PatchService):
                 LOG.exception(msg)
                 raise MetadataFail(msg)
 
-        self.release_data.load_all()
+        reload_release_data()
 
         results["info"] = "The releases have been committed."
         return results
@@ -2129,13 +2077,12 @@ class PatchController(PatchService):
         return rc
 
     def copy_restart_scripts(self):
-        with self.release_data_lock:
-            for patch_id in self.release_data.metadata:
-                if self.release_data.metadata[patch_id]["state"] in \
-                   [constants.DEPLOYING_START, constants.REMOVING] \
-                   and self.release_data.metadata[patch_id].get("restart_script"):
+        applying_states = [states.DEPLOYING, states.REMOVING]
+        for release in self.release_collection.iterate_releases():
+            if release.restart_script:
+                if release.state in applying_states:
                     try:
-                        restart_script_name = self.release_data.metadata[patch_id]["restart_script"]
+                        restart_script_name = release.restart_script
                         restart_script_path = "%s/%s" \
                             % (root_scripts_dir, restart_script_name)
                         dest_path = constants.PATCH_SCRIPTS_STAGING_DIR
@@ -2145,23 +2092,23 @@ class PatchController(PatchService):
                             os.makedirs(dest_path, 0o700)
                         shutil.copyfile(restart_script_path, dest_script_file)
                         os.chmod(dest_script_file, 0o700)
-                        msg = "Creating restart script for %s" % patch_id
+                        msg = "Creating restart script for %s" % release.id
                         LOG.info(msg)
                     except shutil.Error:
-                        msg = "Failed to copy the restart script for %s" % patch_id
+                        msg = "Failed to copy the restart script for %s" % release.id
                         LOG.exception(msg)
                         raise SoftwareError(msg)
-                elif self.release_data.metadata[patch_id].get("restart_script"):
+                else:
                     try:
-                        restart_script_name = self.release_data.metadata[patch_id]["restart_script"]
+                        restart_script_name = release.restart_script
                         restart_script_path = "%s/%s" \
                             % (constants.PATCH_SCRIPTS_STAGING_DIR, restart_script_name)
                         if os.path.exists(restart_script_path):
                             os.remove(restart_script_path)
-                            msg = "Removing restart script for %s" % patch_id
+                            msg = "Removing restart script for %s" % release.id
                             LOG.info(msg)
                     except shutil.Error:
-                        msg = "Failed to delete the restart script for %s" % patch_id
+                        msg = "Failed to delete the restart script for %s" % release.id
                         LOG.exception(msg)
 
     def _update_state_to_peer(self):
@@ -2176,32 +2123,21 @@ class PatchController(PatchService):
         """
         Does basic sanity checks on the release data
         :param deployment: release to be checked
-        :return: release dict (if exists),
+        :return: release object (if exists),
                  bool with success output,
                  strings with info, warning and error messages
         """
-        msg_info = ""
-        msg_warning = ""
-        msg_error = ""
-        success = True
 
         # We need to verify that the software release exists
-        release = self.release_data.metadata.get(deployment, None)
+        release = self.release_collection.get_release_by_id(deployment)
         if not release:
             msg = "Software release version corresponding to the specified release " \
                   "%s does not exist." % deployment
             LOG.error(msg)
-            msg_error += msg + " Try deleting and re-uploading the software for recovery."
-            success = False
+            msg = msg + " Try deleting and re-uploading the software for recovery."
+            raise SoftwareServiceError(error=msg)
 
-        # Check if release state is valid
-        elif release["state"] not in constants.VALID_DEPLOY_START_STATES:
-            msg = "Software release state is invalid: %s" % release["state"]
-            LOG.error(msg)
-            msg_error += msg
-            success = False
-
-        return release, success, msg_info, msg_warning, msg_error
+        return release
 
     def _deploy_precheck(self, release_version: str, force: bool = False,
                          region_name: str = "RegionOne", patch: bool = False) -> dict:
@@ -2289,15 +2225,13 @@ class PatchController(PatchService):
         :param force: if True will ignore minor alarms during precheck
         :return: dict of info, warning and error messages
         """
-        release, success, msg_info, msg_warning, msg_error = self._release_basic_checks(deployment)
-        if not success:
-            return dict(info=msg_info, warning=msg_warning, error=msg_error)
+        release = self._release_basic_checks(deployment)
         region_name = kwargs["region_name"]
-        release_version = release["sw_version"]
+        release_version = release.sw_release
         patch = not utils.is_upgrade_deploy(SW_VERSION, release_version)
         return self._deploy_precheck(release_version, force, region_name, patch)
 
-    def _deploy_upgrade_start(self, to_release):
+    def _deploy_upgrade_start(self, to_release, commit_id):
         LOG.info("start deploy upgrade to %s from %s" % (to_release, SW_VERSION))
         deploy_script_name = constants.DEPLOY_START_SCRIPT
         cmd_path = utils.get_software_deploy_script(to_release, deploy_script_name)
@@ -2312,7 +2246,6 @@ class PatchController(PatchService):
         postgresql_port = str(cfg.alt_postgresql_port)
         feed = os.path.join(constants.FEED_DIR,
                             "rel-%s/ostree_repo" % major_to_release)
-        commit_id = None
 
         LOG.info("k8s version %s" % k8s_ver)
         upgrade_start_cmd = [cmd_path, SW_VERSION, major_to_release, k8s_ver, postgresql_port,
@@ -2341,9 +2274,19 @@ class PatchController(PatchService):
             LOG.error("Failed to start command: %s. Error %s" % (' '.join(upgrade_start_cmd), e))
             return False
 
-    def deploy_state_changed(self, deploy_state):
+    def deploy_state_changed(self, new_state):
         '''Handle 'deploy state change' event, invoked when operations complete. '''
-        self.db_api_instance.update_deploy(deploy_state)
+
+        deploy_state = DeployState.get_instance()
+        state_event = {
+            DEPLOY_STATES.START_DONE: deploy_state.start_done,
+            DEPLOY_STATES.START_FAILED: deploy_state.start_failed
+        }
+        if new_state in state_event:
+            state_event[new_state]()
+        else:
+            msg = f"Received invalid deploy state update {deploy_state}"
+            LOG.error(msg)
 
     def host_deploy_state_changed(self, hostname, host_deploy_state):
         '''Handle 'host deploy state change' event. '''
@@ -2354,21 +2297,40 @@ class PatchController(PatchService):
         tag.text = text
         return tag
 
+    @require_deploy_state([None],
+                          "There is already a deployment is in progress ({state}). "
+                          "Please complete the current deployment.")
     def software_deploy_start_api(self, deployment: str, force: bool, **kwargs) -> dict:
         """
-        Start deployment by applying the changes to the feed ostree
-        return: dict of info, warning and error messages
+        to start deploy of a specified release.
+        The operation implies deploying all undeployed dependency releases of
+        the specified release. i.e, to deploy release 24.09.1, it implies
+        deploying 24.09.0 and 24.09.1 when 24.09.0 has not been deployed.
+        The operation includes steps:
+        1. find all undeployed dependency releases
+        2. ensure all releases (dependency and specified release) are ready to deployed
+        3. precheck
+        4. transform all involved releases to deploying state
+        5. start the deploy subprocess
         """
-        release, success, msg_info, msg_warning, msg_error = self._release_basic_checks(deployment)
+        msg_info = ""
+        msg_warning = ""
+        msg_error = ""
+        deploy_release = self._release_basic_checks(deployment)
 
-        if not success:
-            return dict(info=msg_info, warning=msg_warning, error=msg_error)
+        running_release = self.release_collection.running_release
+        deploy_sw_version = deploy_release.sw_version  # MM.mm
 
-        # TODO(heitormatsui) Enforce deploy-precheck for patch release
+        feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, deploy_sw_version)
+        commit_id = deploy_release.commit_id
         patch_release = True
-        if utils.is_upgrade_deploy(SW_VERSION, release["sw_version"]):
+        if utils.is_upgrade_deploy(SW_VERSION, deploy_release.sw_release):
+            # TODO(bqian) remove default latest commit when a commit-id is built into GA metadata
+            if commit_id is None:
+                commit_id = ostree_utils.get_feed_latest_commit(deploy_sw_version)
+
             patch_release = False
-            to_release = release["sw_version"]
+            to_release = deploy_release.sw_release
             ret = self._deploy_precheck(to_release, force, patch=patch_release)
             if ret["system_healthy"] is None:
                 ret["error"] = "Fail to perform deploy precheck. Internal error has occurred.\n" + \
@@ -2380,33 +2342,21 @@ class PatchController(PatchService):
                               "Please fix above issues then retry the deploy.\n"
                 return ret
 
-            if self._deploy_upgrade_start(to_release):
+            if self._deploy_upgrade_start(to_release, commit_id):
                 collect_current_load_for_hosts()
                 create_deploy_hosts()
-                self.db_api_instance.begin_update()
-                try:
-                    # TODO(bqian) replace SW_VERSION below to current running sw_release
-                    # (MM.mm.pp)
-                    self.update_and_sync_deploy_state(self.db_api_instance.create_deploy,
-                                                      SW_VERSION, to_release, True)
-                    self.update_and_sync_deploy_state(self.db_api_instance.update_deploy,
-                                                      DEPLOY_STATES.START)
-                finally:
-                    self.db_api_instance.end_update()
 
-                sw_rel = self.release_collection.get_release_by_id(deployment)
-                if sw_rel is None:
-                    raise InternalError("%s cannot be found" % to_release)
-                sw_rel.update_state(constants.DEPLOYING)
+                release_state = ReleaseState(release_ids=[deploy_release.id])
+                release_state.start_deploy()
+                deploy_state = DeployState.get_instance()
+                deploy_state.start(running_release, to_release, feed_repo, commit_id, deploy_release.reboot_required)
+                self._update_state_to_peer()
+
                 msg_info = "Deployment for %s started" % deployment
             else:
                 msg_error = "Deployment for %s failed to start" % deployment
 
             return dict(info=msg_info, warning=msg_warning, error=msg_error)
-        # Identify if this is apply or remove operation
-        # todo(jcasteli) Remove once the logic to include major release version
-        # in release list is implemented
-        running_sw_version = "23.09.0"
 
         # todo(chuck) Remove once to determine how we are associating a patch
         # with a release.
@@ -2416,17 +2366,23 @@ class PatchController(PatchService):
         #        running_sw_version = self.release_data.metadata[release_id]["sw_version"]
         #        LOG.info("Running software version: %s", running_sw_version)
 
-        higher = utils.compare_release_version(self.release_data.metadata[deployment]["sw_version"],
-                                               running_sw_version)
+        # TODO(bqian) update references of sw_release (string) to SWRelease object
 
-        if higher is None:
+        if deploy_release > running_release:
+            operation = "apply"
+        elif running_release > deploy_release:
+            operation = "remove"
+        else:
+            # NOTE(bqian) The error message doesn't seem right. software version format
+            # or any metadata semantic check should be done during upload. If data
+            # invalid found subsequently, data is considered damaged, should recommend
+            # delete and re-upload
             msg_error += "The software version format for this release is not correct.\n"
             return dict(info=msg_info, warning=msg_warning, error=msg_error)
-        elif higher:
-            operation = "apply"
-        else:
-            operation = "remove"
 
+        # NOTE(bqian) shouldn't that patch release deploy and remove are doing the same thing
+        # in terms of ostree commit, that it deploy to a commit specified by the commit-id that
+        # associated to the release from the deploy start command?
         # If releases are such that:
         # R2 requires R1, R3 requires R2, R4 requires R3
         # If current running release is R2 and command issued is "software deploy start R4"
@@ -2439,7 +2395,7 @@ class PatchController(PatchService):
             create_deploy_hosts()
 
             # reverse = True is used for apply operation
-            deployment_list = self.release_apply_remove_order(deployment, running_sw_version, reverse=True)
+            deployment_list = self.release_apply_remove_order(deployment, running_release.sw_release, reverse=True)
 
             msg = "Deploy start order for apply operation: %s" % ",".join(deployment_list)
             LOG.info(msg)
@@ -2448,10 +2404,10 @@ class PatchController(PatchService):
             # todo(jcasteli) Do we need this block below?
             # Check for patches that can't be applied during an upgrade
             upgrade_check = True
-            for release in deployment_list:
-                if self.release_data.metadata[release]["sw_version"] != SW_VERSION \
-                        and self.release_data.metadata[release].get("apply_active_release_only") == "Y":
-                    msg = "%s cannot be created during an upgrade" % release
+            for release_id in deployment_list:
+                release = self.release_collection.get_release_by_id(release_id)
+                if release.sw_version != SW_VERSION and release.apply_active_release_only == "Y":
+                    msg = "%s cannot be created during an upgrade" % release_id
                     LOG.error(msg)
                     msg_error += msg + "\n"
                     upgrade_check = False
@@ -2463,54 +2419,49 @@ class PatchController(PatchService):
                 self.run_semantic_check(constants.SEMANTIC_PREAPPLY, deployment_list)
 
             # Start applying the releases
-            for release in deployment_list:
-                msg = "Starting deployment for: %s" % release
+            for release_id in deployment_list:
+                release = self.release_collection.get_release_by_id(release_id)
+                msg = "Starting deployment for: %s" % release_id
                 LOG.info(msg)
                 audit_log_info(msg)
 
-                packages = [pkg.split("_")[0] for pkg in self.release_data.metadata[release].get("packages")]
+                packages = [pkg.split("_")[0] for pkg in release.packages]
                 if packages is None:
                     msg = "Unable to determine packages to install"
                     LOG.error(msg)
                     raise MetadataFail(msg)
 
-                if self.release_data.metadata[release]["state"] != constants.AVAILABLE \
-                   or self.release_data.metadata[release]["state"] == constants.COMMITTED:
-                    msg = "%s is already being deployed" % release
+                if release.state not in (states.AVAILABLE, states.COMMITTED):
+                    msg = "%s is already being deployed" % release_id
                     LOG.info(msg)
                     msg_info += msg + "\n"
                     continue
 
-                release_sw_version = utils.get_major_release_version(
-                    self.release_data.metadata[release]["sw_version"])
-
                 latest_commit = ""
                 try:
-                    latest_commit = ostree_utils.get_feed_latest_commit(release_sw_version)
+                    latest_commit = ostree_utils.get_feed_latest_commit(running_release.sw_version)
                     LOG.info("Latest commit: %s" % latest_commit)
                 except OSTreeCommandFail:
-                    LOG.exception("Failure during commit consistency check for %s.", release)
-
-                feed_ostree = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, release_sw_version)
+                    LOG.exception("Failure during commit consistency check for %s.", release_id)
 
                 try:
-                    apt_utils.run_install(feed_ostree, packages)
+                    apt_utils.run_install(feed_repo, packages)
                 except APTOSTreeCommandFail:
                     LOG.exception("Failed to intall Debian package.")
                     raise APTOSTreeCommandFail(msg)
 
                 # Update the feed ostree summary
-                ostree_utils.update_repo_summary_file(feed_ostree)
+                ostree_utils.update_repo_summary_file(feed_repo)
 
                 # Get the latest commit after performing "apt-ostree install".
                 self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION)
 
                 try:
                     # Move the release metadata to deploying dir
-                    deploystate = self.release_data.metadata[release]["state"]
-                    metadata_dir = DEPLOY_STATE_METADATA_DIR_DICT[deploystate]
+                    deploystate = release.state
+                    metadata_dir = states.RELEASE_STATE_TO_DIR_MAP[deploystate]
 
-                    metadata_file = "%s/%s-metadata.xml" % (metadata_dir, release)
+                    metadata_file = "%s/%s-metadata.xml" % (metadata_dir, release_id)
                     tree = ET.parse(metadata_file)
                     root = tree.getroot()
 
@@ -2525,33 +2476,44 @@ class PatchController(PatchService):
                         outfile.write(tree)
 
                     LOG.info("Latest feed commit: %s added to metadata file" % self.latest_feed_commit)
-
-                    shutil.move(metadata_file,
-                                "%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, release))
-
-                    msg_info += "%s is now in the repo\n" % release
+                    msg_info += "%s is now in the repo\n" % release_id
                 except shutil.Error:
-                    msg = "Failed to move the metadata for %s" % release
+                    msg = "Failed to move the metadata for %s" % release_id
                     LOG.exception(msg)
                     raise MetadataFail(msg)
 
-                self.release_data.metadata[release]["commit"] = self.latest_feed_commit
+                reload_release_data()
+                # NOTE(bqian) Below check an exception raise should be revisit, if applicable,
+                # should be applied to the begining of all requests.
+                if len(self.hosts) == 0:
+                    msg = "service is running in incorrect state. No registered host"
+                    raise InternalError(msg)
 
-                if not os.path.isfile(INITIAL_CONTROLLER_CONFIG_COMPLETE):
-                    self.release_data.metadata[release]["state"] = constants.DEPLOYING_START
-                elif len(self.hosts) > 0:
-                    self.release_data.metadata[release]["state"] = constants.DEPLOYING_START
-                else:
-                    self.release_data.metadata[release]["state"] = constants.UNKNOWN
+                # TODO(bqian) get the list of undeployed required release ids
+                # i.e, when deploying 24.03.3, which requires 24.03.2 and 24.03.1, all
+                # 3 release ids should be passed into to create new ReleaseState
+                collect_current_load_for_hosts()
+                create_deploy_hosts()
+                release_state = ReleaseState(release_ids=[release.id])
+                release_state.start_deploy()
+                deploy_state = DeployState.get_instance()
+                to_release = deploy_release.sw_release
+                deploy_state.start(running_release, to_release, feed_repo, commit_id, deploy_release.reboot_required)
+                self._update_state_to_peer()
 
                 with self.hosts_lock:
-                    self.interim_state[release] = list(self.hosts)
+                    self.interim_state[release_id] = list(self.hosts)
+
+                # There is no defined behavior for deploy start for patching releases, so
+                # move the deploy state to start-done
+                deploy_state = DeployState.get_instance()
+                deploy_state.start_done()
+                self._update_state_to_peer()
 
         elif operation == "remove":
             collect_current_load_for_hosts()
             create_deploy_hosts()
-            removed = False
-            deployment_list = self.release_apply_remove_order(deployment, running_sw_version)
+            deployment_list = self.release_apply_remove_order(deployment, running_release.sw_version)
             msg = "Deploy start order for remove operation: %s" % ",".join(deployment_list)
             LOG.info(msg)
             audit_log_info(msg)
@@ -2563,19 +2525,20 @@ class PatchController(PatchService):
 
             # See if any of the patches are marked as unremovable
             unremovable_verification = True
-            for release in deployment_list:
-                if self.release_data.metadata[release].get("unremovable") == "Y":
+            for release_id in deployment_list:
+                release = self.release_collection.get_release_by_id(release_id)
+                if release.unremovable:
                     if remove_unremovable:
-                        msg = "Unremovable release %s being removed" % release
+                        msg = "Unremovable release %s being removed" % release_id
                         LOG.warning(msg)
-                        msg_warning += msg + "\n"
+                        msg_warning = msg + "\n"
                     else:
-                        msg = "Release %s is not removable" % release
+                        msg = "Release %s is not removable" % release_id
                         LOG.error(msg)
                         msg_error += msg + "\n"
                         unremovable_verification = False
-                elif self.release_data.metadata[release]['state'] == constants.COMMITTED:
-                    msg = "Release %s is committed and cannot be removed" % release
+                elif release.state == states.COMMITTED:
+                    msg = "Release %s is committed and cannot be removed" % release_id
                     LOG.error(msg)
                     msg_error += msg + "\n"
                     unremovable_verification = False
@@ -2604,91 +2567,92 @@ class PatchController(PatchService):
             if kwargs.get("skip-semantic") != "yes":
                 self.run_semantic_check(constants.SEMANTIC_PREREMOVE, deployment_list)
 
-            for release in deployment_list:
-                removed = True
-                msg = "Removing release: %s" % release
+            for release_id in deployment_list:
+                release = self.release_collection.get_release_by_id(release_id)
+                msg = "Removing release: %s" % release_id
                 LOG.info(msg)
                 audit_log_info(msg)
 
-                if self.release_data.metadata[release]["state"] == constants.AVAILABLE:
-                    msg = "The deployment for %s has not been created" % release
+                if release.state == states.AVAILABLE:
+                    msg = "The deployment for %s has not been created" % release_id
                     LOG.info(msg)
                     msg_info += msg + "\n"
                     continue
 
-                major_release_sw_version = utils.get_major_release_version(
-                    self.release_data.metadata[release]["sw_version"])
+                major_release_sw_version = release.sw_version
                 # this is an ostree patch
-                # Base commit is fetched from the patch metadata
-                base_commit = self.release_data.contents[release]["base"]["commit"]
-                feed_ostree = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, major_release_sw_version)
+                # Base commit is fetched from the patch metadata.
+                base_commit = release.base_commit_id
+                feed_repo = "%s/rel-%s/ostree_repo" % (constants.FEED_OSTREE_BASE_DIR, major_release_sw_version)
                 try:
                     # Reset the ostree HEAD
-                    ostree_utils.reset_ostree_repo_head(base_commit, feed_ostree)
+                    ostree_utils.reset_ostree_repo_head(base_commit, feed_repo)
 
                     # Delete all commits that belong to this release
-                    for i in range(int(self.release_data.contents[release]["number_of_commits"])):
-                        commit_to_delete = self.release_data.contents[release]["commit%s" % (i + 1)]["commit"]
-                        ostree_utils.delete_ostree_repo_commit(commit_to_delete, feed_ostree)
+                    # NOTE(bqian) there should be just one commit per release.
+                    commit_to_delete = release.commit_id
+                    ostree_utils.delete_ostree_repo_commit(commit_to_delete, feed_repo)
 
                     # Update the feed ostree summary
-                    ostree_utils.update_repo_summary_file(feed_ostree)
+                    ostree_utils.update_repo_summary_file(feed_repo)
 
                 except OSTreeCommandFail:
-                    LOG.exception("Failure while removing release %s.", release)
+                    LOG.exception("Failure while removing release %s.", release_id)
                 try:
                     # Move the metadata to the deleted dir
-                    deploystate = self.release_data.metadata[release]["state"]
-                    metadata_dir = DEPLOY_STATE_METADATA_DIR_DICT[deploystate]
-                    shutil.move("%s/%s-metadata.xml" % (metadata_dir, release),
-                                "%s/%s-metadata.xml" % (constants.REMOVING_DIR, release))
-                    msg_info += "%s has been removed from the repo\n" % release
+                    self.release_collection.update_state([release_id], states.REMOVING_DIR)
+                    msg_info += "%s has been removed from the repo\n" % release_id
                 except shutil.Error:
-                    msg = "Failed to move the metadata for %s" % release
-                    LOG.exception(msg)
+                    msg = "Failed to move the metadata for %s" % release_id
+                    LOG.Error(msg)
                     raise MetadataFail(msg)
 
-                # update state
-                if not os.path.isfile(INITIAL_CONTROLLER_CONFIG_COMPLETE):
-                    self.release_data.metadata[release]["state"] = constants.REMOVING
-                elif len(self.hosts) > 0:
-                    self.release_data.metadata[release]["state"] = constants.REMOVING
-                else:
-                    self.release_data.metadata[release]["state"] = constants.UNKNOWN
+                if len(self.hosts) == 0:
+                    msg = "service is running in incorrect state. No registered host"
+                    raise InternalError(msg)
+
+                # TODO(bqian) get the list of undeployed required release ids
+                # i.e, when deploying 24.03.3, which requires 24.03.2 and 24.03.1, all
+                # 3 release ids should be passed into to create new ReleaseState
+                collect_current_load_for_hosts()
+                create_deploy_hosts()
+                release_state = ReleaseState(release_ids=[release.id])
+                release_state.start_remove()
+                deploy_state = DeployState.get_instance()
+                to_release = deploy_release.sw_release
+                deploy_state.start(running_release, to_release, feed_repo, commit_id, deploy_release.reboot_required)
+                self._update_state_to_peer()
 
                 # only update lastest_feed_commit if it is an ostree patch
-                if self.release_data.contents[release].get("base") is not None:
+                if release.base_commit_id is not None:
                     # Base Commit in this release's metadata.xml file represents the latest commit
                     # after this release has been removed from the feed repo
-                    self.latest_feed_commit = self.release_data.contents[release]["base"]["commit"]
+                    self.latest_feed_commit = release.base_commit_id
 
                 with self.hosts_lock:
-                    self.interim_state[release] = list(self.hosts)
+                    self.interim_state[release_id] = list(self.hosts)
 
-            if removed:
-                self.latest_feed_commit = ostree_utils.get_feed_latest_commit(SW_VERSION)
-                self.release_data.metadata[release]["commit"] = self.latest_feed_commit
-                try:
-                    metadata_dir = DEPLOY_STATE_METADATA_DIR_DICT[deploystate]
-                    shutil.move("%s/%s-metadata.xml" % (metadata_dir, deployment),
-                                "%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, deployment))
-                    msg_info += "Deployment started for %s\n" % deployment
-                except shutil.Error:
-                    msg = "Failed to move the metadata for %s" % deployment
-                    LOG.exception(msg)
-                    raise MetadataFail(msg)
-
-                # update state
-                if not os.path.isfile(INITIAL_CONTROLLER_CONFIG_COMPLETE):
-                    self.release_data.metadata[deployment]["state"] = constants.DEPLOYING_START
-                elif len(self.hosts) > 0:
-                    self.release_data.metadata[deployment]["state"] = constants.DEPLOYING_START
-                else:
-                    self.release_data.metadata[deployment]["state"] = constants.UNKNOWN
+                # There is no defined behavior for deploy start for patching releases, so
+                # move the deploy state to start-done
+                deploy_state = DeployState.get_instance()
+                deploy_state.start_done()
+                self._update_state_to_peer()
 
         return dict(info=msg_info, warning=msg_warning, error=msg_error)
 
-    def software_deploy_complete_api(self, release: str) -> dict:
+    def _deploy_complete(self):
+        # TODO(bqian) complete the deploy
+        # as deployment has been already activated, there is no return,
+        # deploy complete can only succeed.
+        # tasks for completion of deploy is to delete leftover data from
+        # previous release. If some data could not be deleted, need to
+        # automatically reattempt to delete it in later statge. (outside
+        # a deployment)
+        return True
+
+    @require_deploy_state([DEPLOY_STATES.ACTIVATE_DONE],
+                          "Must complete deploy activate before completing the deployment")
+    def software_deploy_complete_api(self) -> dict:
         """
         Completes a deployment associated with the release
         :return: dict of info, warning and error messages
@@ -2696,53 +2660,22 @@ class PatchController(PatchService):
         msg_info = ""
         msg_warning = ""
         msg_error = ""
-        if self.release_data.metadata[release]["state"] not in \
-                [constants.DEPLOYING_ACTIVATE, constants.DEPLOYING_COMPLETE]:
-            msg = "%s is not activated yet" % release
-            LOG.info(msg)
-            msg_info += msg + "\n"
-        else:
-            # Set the state to deploying-complete
-            for release_id in sorted(list(self.release_data.metadata)):
-                if self.release_data.metadata[release_id]["state"] == constants.DEPLOYING_ACTIVATE:
-                    self.release_data.metadata[release_id]["state"] = constants.DEPLOYING_COMPLETE
-                    try:
-                        shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_ACTIVATE_DIR, release_id),
-                                    "%s/%s-metadata.xml" % (constants.DEPLOYING_COMPLETE_DIR, release_id))
-                    except shutil.Error:
-                        msg = "Failed to move the metadata for %s" % release_id
-                        LOG.exception(msg)
-                        raise MetadataFail(msg)
 
-            # The code for deploy complete is going to execute
-            # Once deploy complete is successfully executed, we move the metadata to their
-            # respective folders
-            for release_id in sorted(list(self.release_data.metadata)):
-                if self.release_data.metadata[release_id]["state"] == constants.REMOVING:
-                    self.release_data.metadata[release_id]["state"] = constants.AVAILABLE
-                    try:
-                        shutil.move("%s/%s-metadata.xml" % (constants.REMOVING_DIR, release_id),
-                                    "%s/%s-metadata.xml" % (constants.AVAILABLE_DIR, release_id))
-                        msg_info += "%s is available\n" % release_id
-                    except shutil.Error:
-                        msg = "Failed to move the metadata for %s" % release_id
-                        LOG.exception(msg)
-                        raise MetadataFail(msg)
-                elif self.release_data.metadata[release_id]["state"] == constants.DEPLOYING_COMPLETE:
-                    self.release_data.metadata[release_id]["state"] = constants.DEPLOYED
+        deploy_state = DeployState.get_instance()
 
-                    try:
-                        shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_COMPLETE_DIR, release_id),
-                                    "%s/%s-metadata.xml" % (constants.DEPLOYED_DIR, release_id))
-                        msg_info += "%s has been deployed\n" % release_id
-                    except shutil.Error:
-                        msg = "Failed to move the metadata for %s" % release_id
-                        LOG.exception(msg)
-                        raise MetadataFail(msg)
+        if self._deploy_complete():
+            deploy_state.completed()
+            msg_info += "Deployment has been completed\n"
 
         return dict(info=msg_info, warning=msg_warning, error=msg_error)
 
-    def software_deploy_activate_api(self, release: str) -> dict:
+    def _activate(self):
+        # TODO(bqian) activate the deployment
+        return True
+
+    @require_deploy_state([DEPLOY_STATES.HOST_DONE, DEPLOY_STATES.ACTIVATE_FAILED],
+                          "Must complete deploying all hosts before activating the deployment")
+    def software_deploy_activate_api(self) -> dict:
         """
         Activates the deployment associated with the release
         :return: dict of info, warning and error messages
@@ -2750,21 +2683,16 @@ class PatchController(PatchService):
         msg_info = ""
         msg_warning = ""
         msg_error = ""
-        if self.release_data.metadata[release]["state"] != constants.DEPLOYING_HOST:
-            msg = "%s is not deployed on host" % release
-            LOG.info(msg)
-            msg_info += msg + "\n"
-        else:
-            try:
-                shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_HOST_DIR, release),
-                            "%s/%s-metadata.xml" % (constants.DEPLOYING_ACTIVATE_DIR, release))
-            except shutil.Error:
-                msg = "Failed to move the metadata for %s" % release
-                LOG.exception(msg)
-                raise MetadataFail(msg)
 
-            msg_info += "Deployment for %s has been activated\n" % release
-            self.release_data.metadata[release]["state"] = constants.DEPLOYING_ACTIVATE
+        deploy_state = DeployState.get_instance()
+        deploy_state.activate()
+
+        if self._activate():
+            deploy_state.activate_completed()
+            msg_info += "Deployment has been activated\n"
+        else:
+            deploy_state.activate_failed()
+            msg_error += "Dployment activation has failed.\n"
 
         return dict(info=msg_info, warning=msg_warning, error=msg_error)
 
@@ -2776,30 +2704,26 @@ class PatchController(PatchService):
             # Retrieve deploy state from db in list format
             return self.db_api_instance.get_deploy_all()
 
-    def software_deploy_host_api(self, host_ip, force, async_req=False):
+    @require_deploy_state([DEPLOY_STATES.START_DONE, DEPLOY_STATES.HOST, DEPLOY_STATES.HOST_FAILED],
+                          "Current deployment ({state}) is not ready to deploy host")
+    def software_deploy_host_api(self, hostname, force, async_req=False):
         msg_info = ""
         msg_warning = ""
         msg_error = ""
 
-        ip = host_ip
+        deploy_host = self.db_api_instance.get_deploy_host_by_hostname(hostname)
+        if deploy_host is None:
+            raise HostNotFound(hostname)
 
-        self.hosts_lock.acquire()
-        # If not in hosts table, maybe a hostname was used instead
-        if host_ip not in self.hosts:
-            try:
-                ip = utils.gethostbyname(host_ip)
-                if ip not in self.hosts:
-                    # Translated successfully, but IP isn't in the table.
-                    # Raise an exception to drop out to the failure handling
-                    raise SoftwareError("Host IP (%s) not in table" % ip)
-            except Exception:
-                self.hosts_lock.release()
-                msg = "Unknown host specified: %s" % host_ip
-                msg_error += msg + "\n"
-                LOG.error("Error in host-install: %s", msg)
-                return dict(info=msg_info, warning=msg_warning, error=msg_error)
+        deploy_state = DeployState.get_instance()
+        deploy_host_state = DeployHostState(hostname)
+        deploy_state.deploy_host()
+        deploy_host_state.deploy_started()
 
-        msg = "Running software deploy host for %s (%s), force=%s, async_req=%s" % (host_ip, ip, force, async_req)
+        # NOTE(bqian) Get IP address to fulfill the need of patching structure.
+        # need to review the design
+        ip = socket.getaddrinfo(hostname, 0)[0][4][0]
+        msg = "Running software deploy host for %s (%s), force=%s, async_req=%s" % (hostname, ip, force, async_req)
         LOG.info(msg)
         audit_log_info(msg)
 
@@ -2820,9 +2744,10 @@ class PatchController(PatchService):
                 major_release, force, async_req)
             msg_info += msg + "\n"
             LOG.info(msg)
-            set_host_target_load(host_ip, major_release)
+            set_host_target_load(hostname, major_release)
             # TODO(heitormatsui) update host deploy status
 
+        self.hosts_lock.acquire()
         self.hosts[ip].install_pending = True
         self.hosts[ip].install_status = False
         self.hosts[ip].install_reject_reason = None
@@ -2842,22 +2767,13 @@ class PatchController(PatchService):
             msg = "Host installation request sent to %s." % self.hosts[ip].hostname
             msg_info += msg + "\n"
             LOG.info("host-install async_req: %s", msg)
-            for release in sorted(list(self.release_data.metadata)):
-                if self.release_data.metadata[release]["state"] == constants.DEPLOYING_START:
-                    try:
-                        shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, release),
-                                    "%s/%s-metadata.xml" % (constants.DEPLOYING_HOST_DIR, release))
-                        msg_info += "%s has been activated\n" % release
-                    except shutil.Error:
-                        msg = "Failed to move the metadata for %s" % release
-                        LOG.exception(msg)
-                        raise MetadataFail(msg)
-                    self.release_data.metadata[release]["state"] = constants.DEPLOYING_HOST
-            return dict(info=msg_info, warning=msg_warning, error=msg_error)
+            # TODO(bqian) update deploy state to deploy-host
 
         # Now we wait, up to ten mins. future enhancement: Wait on a condition
         resp_rx = False
         max_time = time.time() + 600
+        # NOTE(bqian) loop below blocks REST API service (slow thread)
+        # Consider remove.
         while time.time() < max_time:
             self.hosts_lock.acquire()
             if ip not in self.hosts:
@@ -2898,17 +2814,6 @@ class PatchController(PatchService):
             msg_error += msg + "\n"
             LOG.error("Error in host-install: %s", msg)
 
-        for release in sorted(list(self.release_data.metadata)):
-            if self.release_data.metadata[release]["state"] == constants.DEPLOYING_START:
-                try:
-                    shutil.move("%s/%s-metadata.xml" % (constants.DEPLOYING_START_DIR, release),
-                                "%s/%s-metadata.xml" % (constants.DEPLOYING_HOST_DIR, release))
-                    msg_info += "%s has been activated\n" % release
-                except shutil.Error:
-                    msg = "Failed to move the metadata for %s" % release
-                    LOG.exception(msg)
-                    raise MetadataFail(msg)
-                self.release_data.metadata[release]["state"] = constants.DEPLOYING_HOST
         return dict(info=msg_info, warning=msg_warning, error=msg_error)
 
     def drop_host(self, host_ip, sync_nbr=True):
@@ -2961,56 +2866,33 @@ class PatchController(PatchService):
 
         return dict(info=msg_info, warning=msg_warning, error=msg_error)
 
+    def check_releases_state(self, release_ids, state):
+        """check all releases to be in the specified state"""
+        all_matched = True
+
+        for release_id in release_ids:
+            release = self.release_collection.get_release_by_id(release_id)
+            if release is None:
+                all_matched = False
+                break
+
+            if release.state != state:
+                all_matched = False
+                break
+        return all_matched
+
     def is_available(self, release_ids):
-        all_available = True
-
-        with self.release_data_lock:
-
-            for release_id in release_ids:
-                if release_id not in self.release_data.metadata:
-                    all_available = False
-                    break
-
-                if self.release_data.metadata[release_id]["state"] != \
-                        constants.AVAILABLE:
-                    all_available = False
-                    break
-
-        return all_available
+        return self.check_releases_state(release_ids, states.AVAILABLE)
 
     def is_deployed(self, release_ids):
-        all_deployed = True
-
-        with self.release_data_lock:
-
-            for release_id in release_ids:
-                if release_id not in self.release_data.metadata:
-                    all_deployed = False
-                    break
-
-                if self.release_data.metadata[release_id]["state"] != constants.DEPLOYED:
-                    all_deployed = False
-                    break
-
-        return all_deployed
+        return self.check_releases_state(release_ids, states.DEPLOYED)
 
     def is_committed(self, release_ids):
-        all_committed = True
-
-        with self.release_data_lock:
-
-            for release_id in release_ids:
-                if release_id not in self.release_data.metadata:
-                    all_committed = False
-                    break
-
-                if self.release_data.metadata[release_id]["state"] != \
-                        constants.COMMITTED:
-                    all_committed = False
-                    break
-
-        return all_committed
+        return self.check_releases_state(release_ids, states.COMMITTED)
 
+    # NOTE(bqian) report_app_dependencies function not being called?
+    # which means self.app_dependencies will always be empty and file
+    # app_dependency_filename will never exist?
     def report_app_dependencies(self, patch_ids, **kwargs):
         """
         Handle report of application dependencies
@@ -3023,8 +2905,6 @@ class PatchController(PatchService):
         LOG.info("Handling app dependencies report: app=%s, patch_ids=%s",
                  appname, ','.join(patch_ids))
 
-        self.release_data_lock.acquire()
-
         if len(patch_ids) == 0:
             if appname in self.app_dependencies:
                 del self.app_dependencies[appname]
@@ -3043,21 +2923,16 @@ class PatchController(PatchService):
         except Exception:
             LOG.exception("Failed in report_app_dependencies")
             raise SoftwareFail("Internal failure")
-        finally:
-            self.release_data_lock.release()
 
         return True
 
+    # NOTE(bqian) unused function query_app_dependencies
     def query_app_dependencies(self):
         """
         Query application dependencies
         """
-        self.release_data_lock.acquire()
-
         data = self.app_dependencies
 
-        self.release_data_lock.release()
-
         return dict(data)
 
     def deploy_host_list(self):
@@ -3203,7 +3078,7 @@ class PatchController(PatchService):
             all_host_upgrades.append({
                 "hostname": deploy_host.get("hostname"),
                 "current_sw_version": to_maj_min_release if deploy_host.get(
-                    "state") == constants.DEPLOYED else from_maj_min_release,
+                    "state") == states.DEPLOYED else from_maj_min_release,
                 "target_sw_version": to_maj_min_release,
                 "host_state": deploy_host.get("state")
             })
@@ -3550,7 +3425,7 @@ class PatchControllerMainThread(threading.Thread):
                         SEND_MSG_INTERVAL_IN_SECONDS)
 
                     # Only send the deploy state update from the active controller
-                    if is_deployment_in_progress(sc.release_data.metadata) and utils.is_active_controller():
+                    if is_deployment_in_progress() and utils.is_active_controller():
                         try:
                             sc.socket_lock.acquire()
                             deploy_state_update = SoftwareMessageDeployStateUpdate()
diff --git a/software/software/software_entities.py b/software/software/software_entities.py
index d3346de9..cba8b8b1 100644
--- a/software/software/software_entities.py
+++ b/software/software/software_entities.py
@@ -20,8 +20,8 @@ from software.utils import save_to_json_file
 from software.utils import get_software_filesystem_data
 from software.utils import validate_versions
 
-from software.constants import DEPLOY_HOST_STATES
-from software.constants import DEPLOY_STATES
+from software.states import DEPLOY_HOST_STATES
+from software.states import DEPLOY_STATES
 
 LOG = logging.getLogger('main_logger')
 
@@ -135,12 +135,15 @@ class Deploy(ABC):
         pass
 
     @abstractmethod
-    def create(self, from_release: str, to_release: str, reboot_required: bool, state: DEPLOY_STATES):
+    def create(self, from_release: str, to_release: str, feed_repo: str,
+               commit_id: str, reboot_required: bool, state: DEPLOY_STATES):
         """
         Create a new deployment entry.
 
         :param from_release: The current release version.
         :param to_release: The target release version.
+        :param feed_repo: ostree repo feed path
+        :param commit_id: commit-id to deploy
         :param reboot_required: If is required to do host reboot.
         :param state: The state of the deployment.
 
@@ -230,11 +233,7 @@ class DeployHosts(ABC):
 
 
 class DeployHandler(Deploy):
-    def __init__(self):
-        super().__init__()
-        self.data = get_software_filesystem_data()
-
-    def create(self, from_release, to_release, reboot_required, state=DEPLOY_STATES.START):
+    def create(self, from_release, to_release, feed_repo, commit_id, reboot_required, state=DEPLOY_STATES.START):
         """
         Create a new deploy with given from and to release version
         :param from_release: The current release version.
@@ -242,30 +241,33 @@ class DeployHandler(Deploy):
         :param reboot_required: If is required to do host reboot.
         :param state: The state of the deployment.
         """
-        super().create(from_release, to_release, reboot_required, state)
+        super().create(from_release, to_release, feed_repo, commit_id, reboot_required, state)
         deploy = self.query(from_release, to_release)
         if deploy:
             raise DeployAlreadyExist("Error to create. Deploy already exists.")
         new_deploy = {
             "from_release": from_release,
             "to_release": to_release,
+            "feed_repo": feed_repo,
+            "commit_id": commit_id,
             "reboot_required": reboot_required,
             "state": state.value
         }
 
         try:
-            deploy_data = self.data.get("deploy", [])
+            data = get_software_filesystem_data()
+            deploy_data = data.get("deploy", [])
             if not deploy_data:
                 deploy_data = {
                     "deploy": []
                 }
                 deploy_data["deploy"].append(new_deploy)
-                self.data.update(deploy_data)
+                data.update(deploy_data)
             else:
                 deploy_data.append(new_deploy)
-            save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data)
+            save_to_json_file(constants.SOFTWARE_JSON_FILE, data)
         except Exception:
-            self.data["deploy"][0] = {}
+            LOG.exception()
 
     def query(self, from_release, to_release):
         """
@@ -275,7 +277,8 @@ class DeployHandler(Deploy):
         :return: A list of deploy dictionary
         """
         super().query(from_release, to_release)
-        for deploy in self.data.get("deploy", []):
+        data = get_software_filesystem_data()
+        for deploy in data.get("deploy", []):
             if (deploy.get("from_release") == from_release and
                     deploy.get("to_release") == to_release):
                 return deploy
@@ -286,7 +289,8 @@ class DeployHandler(Deploy):
         Query all deployments inside software.json file.
         :return: A list of deploy dictionary
         """
-        return self.data.get("deploy", [])
+        data = get_software_filesystem_data()
+        return data.get("deploy", [])
 
     def update(self, new_state: DEPLOY_STATES):
         """
@@ -298,11 +302,12 @@ class DeployHandler(Deploy):
         if not deploy:
             raise DeployDoNotExist("Error to update deploy state. No deploy in progress.")
 
+        data = get_software_filesystem_data()
         try:
-            self.data["deploy"][0]["state"] = new_state.value
-            save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data)
+            data["deploy"][0]["state"] = new_state.value
+            save_to_json_file(constants.SOFTWARE_JSON_FILE, data)
         except Exception:
-            self.data["deploy"][0] = deploy
+            LOG.exception()
 
     def delete(self):
         """
@@ -312,19 +317,16 @@ class DeployHandler(Deploy):
         deploy = self.query_all()
         if not deploy:
             raise DeployDoNotExist("Error to delete deploy state. No deploy in progress.")
+
+        data = get_software_filesystem_data()
         try:
-            self.data["deploy"].clear()
-            save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data)
+            data["deploy"].clear()
+            save_to_json_file(constants.SOFTWARE_JSON_FILE, data)
         except Exception:
-            self.data["deploy"][0] = deploy
+            LOG.exception()
 
 
 class DeployHostHandler(DeployHosts):
-
-    def __init__(self):
-        super().__init__()
-        self.data = get_software_filesystem_data()
-
     def create(self, hostname, state: DEPLOY_HOST_STATES = DEPLOY_HOST_STATES.PENDING):
         super().create(hostname, state)
         deploy = self.query(hostname)
@@ -336,16 +338,17 @@ class DeployHostHandler(DeployHosts):
             "state": state.value if state else None
         }
 
-        deploy_data = self.data.get("deploy_host", [])
+        data = get_software_filesystem_data()
+        deploy_data = data.get("deploy_host", [])
         if not deploy_data:
             deploy_data = {
                 "deploy_host": []
             }
             deploy_data["deploy_host"].append(new_deploy_host)
-            self.data.update(deploy_data)
+            data.update(deploy_data)
         else:
             deploy_data.append(new_deploy_host)
-        save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data)
+        save_to_json_file(constants.SOFTWARE_JSON_FILE, data)
 
     def query(self, hostname):
         """
@@ -354,13 +357,15 @@ class DeployHostHandler(DeployHosts):
         :return: A list of deploy dictionary
         """
         super().query(hostname)
-        for deploy in self.data.get("deploy_host", []):
+        data = get_software_filesystem_data()
+        for deploy in data.get("deploy_host", []):
             if deploy.get("hostname") == hostname:
                 return deploy
         return None
 
     def query_all(self):
-        return self.data.get("deploy_host", [])
+        data = get_software_filesystem_data()
+        return data.get("deploy_host", [])
 
     def update(self, hostname, state: DEPLOY_HOST_STATES):
         super().update(hostname, state)
@@ -368,23 +373,26 @@ class DeployHostHandler(DeployHosts):
         if not deploy:
             raise Exception("Error to update. Deploy host do not exist.")
 
-        index = self.data.get("deploy_host", []).index(deploy)
+        data = get_software_filesystem_data()
+        index = data.get("deploy_host", []).index(deploy)
         updated_entity = {
             "hostname": hostname,
             "state": state.value
         }
-        self.data["deploy_host"][index].update(updated_entity)
-        save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data)
+        data["deploy_host"][index].update(updated_entity)
+        save_to_json_file(constants.SOFTWARE_JSON_FILE, data)
         return updated_entity
 
     def delete_all(self):
-        self.data.get("deploy_host").clear()
-        save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data)
+        data = get_software_filesystem_data()
+        data.get("deploy_host").clear()
+        save_to_json_file(constants.SOFTWARE_JSON_FILE, data)
 
     def delete(self, hostname):
         super().delete(hostname)
         deploy = self.query(hostname)
         if not deploy:
             raise DeployDoNotExist("Error to delete. Deploy host do not exist.")
-        self.data.get("deploy_host").remove(deploy)
-        save_to_json_file(constants.SOFTWARE_JSON_FILE, self.data)
+        data = get_software_filesystem_data()
+        data.get("deploy_host").remove(deploy)
+        save_to_json_file(constants.SOFTWARE_JSON_FILE, data)
diff --git a/software/software/software_functions.py b/software/software/software_functions.py
index e26adc6e..350c147e 100644
--- a/software/software/software_functions.py
+++ b/software/software/software_functions.py
@@ -32,11 +32,11 @@ from software.exceptions import OSTreeTarFail
 from software.exceptions import ReleaseUploadFailure
 from software.exceptions import ReleaseValidationFailure
 from software.exceptions import ReleaseMismatchFailure
-from software.exceptions import SoftwareFail
 from software.exceptions import SoftwareServiceError
 from software.exceptions import VersionedDeployPrecheckFailure
 
 import software.constants as constants
+from software import states
 import software.utils as utils
 from software.sysinv_utils import get_ihost_list
 
@@ -81,7 +81,7 @@ def configure_logging(logtofile=True, level=logging.INFO):
         my_exec = os.path.basename(sys.argv[0])
 
         log_format = '%(asctime)s: ' \
-                     + my_exec + '[%(process)s]: ' \
+                     + my_exec + '[%(process)s:%(thread)d]: ' \
                      + '%(filename)s(%(lineno)s): ' \
                      + '%(levelname)s: %(message)s'
 
@@ -231,10 +231,13 @@ class ReleaseData(object):
     """
 
     def __init__(self):
+        self._reset()
+
+    def _reset(self):
         #
         # The metadata dict stores all metadata associated with a release.
         # This dict is keyed on release_id, with metadata for each release stored
-        # in a nested dict. (See parse_metadata method for more info)
+        # in a nested dict. (See parse_metadata_string method for more info)
         #
         self.metadata = {}
 
@@ -253,8 +256,8 @@ class ReleaseData(object):
         for release_id in list(updated_release.metadata):
             # Update all fields except state
             cur_state = self.metadata[release_id]['state']
+            updated_release.metadata[release_id]['state'] = cur_state
             self.metadata[release_id].update(updated_release.metadata[release_id])
-            self.metadata[release_id]['state'] = cur_state
 
     def delete_release(self, release_id):
         del self.contents[release_id]
@@ -294,22 +297,21 @@ class ReleaseData(object):
         outfile.close()
         os.rename(new_filename, filename)
 
-    def parse_metadata(self,
-                       filename,
-                       state=None):
+    def parse_metadata_file(self,
+                            filename,
+                            state=None):
         """
         Parse an individual release metadata XML file
         :param filename: XML file
         :param state: Indicates Applied, Available, or Committed
         :return: Release ID
         """
-
         with open(filename, "r") as f:
             text = f.read()
 
         return self.parse_metadata_string(text, state)
 
-    def parse_metadata_string(self, text, state):
+    def parse_metadata_string(self, text, state=None):
         root = ElementTree.fromstring(text)
         #
         #    <patch>
@@ -391,31 +393,35 @@ class ReleaseData(object):
 
         return release_id
 
-    def load_all_metadata(self,
-                          loaddir,
-                          state=None):
+    def _read_all_metafile(self, path):
         """
-        Parse all metadata files in the specified dir
-        :return:
+        Load metadata from all xml files in the specified path
+        :param path: path of directory that xml files is in
         """
-        for fname in glob.glob("%s/*.xml" % loaddir):
-            self.parse_metadata(fname, state)
+        for filename in glob.glob("%s/*.xml" % path):
+            with open(filename, "r") as f:
+                text = f.read()
+            yield filename, text
 
     def load_all(self):
         # Reset the data
         self.__init__()
-        self.load_all_metadata(constants.AVAILABLE_DIR, state=constants.AVAILABLE)
-        self.load_all_metadata(constants.UNAVAILABLE_DIR, state=constants.UNAVAILABLE)
-        self.load_all_metadata(constants.DEPLOYING_START_DIR, state=constants.DEPLOYING_START)
-        self.load_all_metadata(constants.DEPLOYING_HOST_DIR, state=constants.DEPLOYING_HOST)
-        self.load_all_metadata(constants.DEPLOYING_ACTIVATE_DIR, state=constants.DEPLOYING_ACTIVATE)
-        self.load_all_metadata(constants.DEPLOYING_COMPLETE_DIR, state=constants.DEPLOYING_COMPLETE)
-        self.load_all_metadata(constants.DEPLOYED_DIR, state=constants.DEPLOYED)
-        self.load_all_metadata(constants.REMOVING_DIR, state=constants.REMOVING)
-        self.load_all_metadata(constants.ABORTING_DIR, state=constants.ABORTING)
-        self.load_all_metadata(constants.COMMITTED_DIR, state=constants.COMMITTED)
 
-        # load the release metadata from feed directory or filesystem db
+        state_map = {
+            states.AVAILABLE: states.AVAILABLE_DIR,
+            states.UNAVAILABLE: states.UNAVAILABLE_DIR,
+            states.DEPLOYING: states.DEPLOYING_DIR,
+            states.DEPLOYED: states.DEPLOYED_DIR,
+            states.REMOVING: states.REMOVING_DIR,
+        }
+
+        for state, path in state_map.items():
+            for filename, text in self._read_all_metafile(path):
+                try:
+                    self.parse_metadata_string(text, state=state)
+                except Exception as e:
+                    err_msg = f"Failed parsing {filename}, {e}"
+                    LOG.exception(err_msg)
 
     def query_line(self,
                    release_id,
@@ -636,54 +642,56 @@ class PatchFile(object):
                 raise SystemExit(e.returncode)
 
     @staticmethod
-    def read_patch(path, cert_type=None):
+    def read_patch(path, dest, cert_type=None):
         # We want to enable signature checking by default
         # Note: cert_type=None is required if we are to enforce 'no dev patches on a formal load' rule.
 
         # Open the patch file and extract the contents to the current dir
         tar = tarfile.open(path, "r:gz")
 
-        tar.extract("signature")
+        tar.extract("signature", path=dest)
         try:
-            tar.extract(detached_signature_file)
+            tar.extract(detached_signature_file, path=dest)
         except KeyError:
             msg = "Patch has not been signed"
             LOG.warning(msg)
 
         # Filelist used for signature validation and verification
-        sig_filelist = ["metadata.tar", "software.tar"]
+        filelist = ["metadata.tar", "software.tar"]
 
         # Check if conditional scripts are inside the patch
         # If yes then add them to signature checklist
         if "semantics.tar" in [f.name for f in tar.getmembers()]:
-            sig_filelist.append("semantics.tar")
+            filelist.append("semantics.tar")
         if "pre-install.sh" in [f.name for f in tar.getmembers()]:
-            sig_filelist.append("pre-install.sh")
+            filelist.append("pre-install.sh")
         if "post-install.sh" in [f.name for f in tar.getmembers()]:
-            sig_filelist.append("post-install.sh")
+            filelist.append("post-install.sh")
 
-        for f in sig_filelist:
-            tar.extract(f)
+        for f in filelist:
+            tar.extract(f, path=dest)
 
         # Verify the data integrity signature first
-        sigfile = open("signature", "r")
+        sigfile = open(os.path.join(dest, "signature"), "r")
         sig = int(sigfile.read(), 16)
         sigfile.close()
 
         expected_sig = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+        sig_filelist = [os.path.join(dest, f) for f in filelist]
         for f in sig_filelist:
             sig ^= get_md5(f)
 
         if sig != expected_sig:
-            msg = "Patch failed verification"
+            msg = "Software failed signature verification."
             LOG.error(msg)
-            raise ReleaseValidationFailure(msg)
+            raise ReleaseValidationFailure(error=msg)
 
         # Verify detached signature
-        if os.path.exists(detached_signature_file):
+        sig_file = os.path.join(dest, detached_signature_file)
+        if os.path.exists(sig_file):
             sig_valid = verify_files(
                 sig_filelist,
-                detached_signature_file,
+                sig_file,
                 cert_type=cert_type)
             if sig_valid is True:
                 msg = "Signature verified, patch has been signed"
@@ -693,20 +701,21 @@ class PatchFile(object):
                 msg = "Signature check failed"
                 if cert_type is None:
                     LOG.error(msg)
-                raise ReleaseValidationFailure(msg)
+                raise ReleaseValidationFailure(error=msg)
         else:
-            msg = "Patch has not been signed"
+            msg = "Software has not been signed."
             if cert_type is None:
                 LOG.error(msg)
-            raise ReleaseValidationFailure(msg)
+            raise ReleaseValidationFailure(error=msg)
 
         # Restart script
         for f in tar.getmembers():
-            if f.name not in sig_filelist:
-                tar.extract(f)
+            if f.name not in filelist:
+                tar.extract(f, path=dest)
 
-        tar = tarfile.open("metadata.tar")
-        tar.extractall()
+        metadata = os.path.join(dest, "metadata.tar")
+        tar = tarfile.open(metadata)
+        tar.extractall(path=dest)
 
     @staticmethod
     def query_patch(patch, field=None):
@@ -716,12 +725,6 @@ class PatchFile(object):
         # Create a temporary working directory
         tmpdir = tempfile.mkdtemp(prefix="patch_")
 
-        # Save the current directory, so we can chdir back after
-        orig_wd = os.getcwd()
-
-        # Change to the tmpdir
-        os.chdir(tmpdir)
-
         r = {}
 
         try:
@@ -729,7 +732,7 @@ class PatchFile(object):
                 # Need to determine the cert_type
                 for cert_type_str in cert_type_all:
                     try:
-                        PatchFile.read_patch(abs_patch, cert_type=[cert_type_str])
+                        PatchFile.read_patch(abs_patch, tmpdir, cert_type=[cert_type_str])
                     except ReleaseValidationFailure:
                         pass
                     else:
@@ -738,15 +741,17 @@ class PatchFile(object):
                         break
 
             if "cert" not in r:
+                # NOTE(bqian) below reads like a bug in certain cases. need to revisit.
                 # If cert is unknown, then file is not yet open for reading.
                 # Try to open it for reading now, using all available keys.
                 # We can't omit cert_type, or pass None, because that will trigger the code
                 # path used by installed product, in which dev keys are not accepted unless
                 # a magic file exists.
-                PatchFile.read_patch(abs_patch, cert_type=cert_type_all)
+                PatchFile.read_patch(abs_patch, tmpdir, cert_type=cert_type_all)
 
             thispatch = ReleaseData()
-            patch_id = thispatch.parse_metadata("metadata.xml")
+            filename = os.path.join(tmpdir, "metadata.xml")
+            patch_id = thispatch.parse_metadata_file(filename)
 
             if field is None or field == "id":
                 r["id"] = patch_id
@@ -761,20 +766,14 @@ class PatchFile(object):
                     r[field] = thispatch.query_line(patch_id, field)
 
         except ReleaseValidationFailure as e:
-            msg = "Patch validation failed during extraction"
+            msg = "Patch validation failed during extraction. %s" % str(e)
             LOG.exception(msg)
             raise e
-        except ReleaseMismatchFailure as e:
-            msg = "Patch Mismatch during extraction"
+        except tarfile.TarError as te:
+            msg = "Extract software failed %s" % str(te)
             LOG.exception(msg)
-            raise e
-        except tarfile.TarError:
-            msg = "Failed during patch extraction"
-            LOG.exception(msg)
-            raise ReleaseValidationFailure(msg)
+            raise ReleaseValidationFailure(error=msg)
         finally:
-            # Change back to original working dir
-            os.chdir(orig_wd)
             shutil.rmtree(tmpdir)
 
         return r
@@ -790,45 +789,34 @@ class PatchFile(object):
         # Create a temporary working directory
         tmpdir = tempfile.mkdtemp(prefix="patch_")
 
-        # Save the current directory, so we can chdir back after
-        orig_wd = os.getcwd()
-
-        # Change to the tmpdir
-        os.chdir(tmpdir)
-
         try:
             cert_type = None
             meta_data = PatchFile.query_patch(abs_patch)
             if 'cert' in meta_data:
                 cert_type = meta_data['cert']
-            PatchFile.read_patch(abs_patch, cert_type=cert_type)
-            ReleaseData.modify_metadata_text("metadata.xml", key, value)
+            PatchFile.read_patch(abs_patch, tmpdir, cert_type=cert_type)
+            path = os.path.join(tmpdir, "metadata.xml")
+            ReleaseData.modify_metadata_text(path, key, value)
             PatchFile.write_patch(new_abs_patch, cert_type=cert_type)
             os.rename(new_abs_patch, abs_patch)
             rc = True
 
-        except ReleaseValidationFailure as e:
-            raise e
-        except ReleaseMismatchFailure as e:
-            raise e
-        except tarfile.TarError:
-            msg = "Failed during patch extraction"
+        except tarfile.TarError as te:
+            msg = "Extract software failed %s" % str(te)
             LOG.exception(msg)
-            raise ReleaseValidationFailure(msg)
+            raise ReleaseValidationFailure(error=msg)
         except Exception as e:
             template = "An exception of type {0} occurred. Arguments:\n{1!r}"
             message = template.format(type(e).__name__, e.args)
-            print(message)
+            LOG.exception(message)
         finally:
-            # Change back to original working dir
-            os.chdir(orig_wd)
             shutil.rmtree(tmpdir)
 
         return rc
 
     @staticmethod
     def extract_patch(patch,
-                      metadata_dir=constants.AVAILABLE_DIR,
+                      metadata_dir=states.AVAILABLE_DIR,
                       metadata_only=False,
                       existing_content=None,
                       base_pkgdata=None):
@@ -845,23 +833,18 @@ class PatchFile(object):
         # Create a temporary working directory
         tmpdir = tempfile.mkdtemp(prefix="patch_")
 
-        # Save the current directory, so we can chdir back after
-        orig_wd = os.getcwd()
-
-        # Change to the tmpdir
-        os.chdir(tmpdir)
-
         try:
             # Open the patch file and extract the contents to the tmpdir
-            PatchFile.read_patch(abs_patch)
+            PatchFile.read_patch(abs_patch, tmpdir)
 
             thispatch = ReleaseData()
-            patch_id = thispatch.parse_metadata("metadata.xml")
+            filename = os.path.join(tmpdir, "metadata.xml")
+            with open(filename, "r") as f:
+                text = f.read()
+
+            patch_id = thispatch.parse_metadata_string(text)
 
             if patch_id is None:
-                print("Failed to import patch")
-                # Change back to original working dir
-                os.chdir(orig_wd)
                 shutil.rmtree(tmpdir)
                 return None
 
@@ -872,15 +855,15 @@ class PatchFile(object):
                 if not base_pkgdata.check_release(patch_sw_version):
                     msg = "Software version %s for release %s is not installed" % (patch_sw_version, patch_id)
                     LOG.exception(msg)
-                    raise ReleaseValidationFailure(msg)
+                    raise ReleaseValidationFailure(error=msg)
 
             if metadata_only:
                 # This is a re-import. Ensure the content lines up
                 if existing_content is None \
                         or existing_content != thispatch.contents[patch_id]:
-                    msg = "Contents of re-imported patch do not match"
-                    LOG.exception(msg)
-                    raise ReleaseMismatchFailure(msg)
+                    msg = f"Contents of {patch_id} do not match re-uploaded release"
+                    LOG.error(msg)
+                    raise ReleaseMismatchFailure(error=msg)
 
             patch_sw_version = utils.get_major_release_version(
                 thispatch.metadata[patch_id]["sw_version"])
@@ -888,42 +871,41 @@ class PatchFile(object):
             if not os.path.exists(abs_ostree_tar_dir):
                 os.makedirs(abs_ostree_tar_dir)
 
-            shutil.move("metadata.xml",
+            shutil.move(os.path.join(tmpdir, "metadata.xml"),
                         "%s/%s-metadata.xml" % (abs_metadata_dir, patch_id))
-            shutil.move("software.tar",
+            shutil.move(os.path.join(tmpdir, "software.tar"),
                         "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id))
+            v = "%s/%s-software.tar" % (abs_ostree_tar_dir, patch_id)
+            LOG.info("software.tar %s" % v)
 
             # restart_script may not exist in metadata.
             if thispatch.metadata[patch_id].get("restart_script"):
                 if not os.path.exists(root_scripts_dir):
                     os.makedirs(root_scripts_dir)
-                restart_script_name = thispatch.metadata[patch_id]["restart_script"]
-                shutil.move(restart_script_name,
-                            "%s/%s" % (root_scripts_dir, restart_script_name))
+                restart_script_name = os.path.join(tmpdir, thispatch.metadata[patch_id]["restart_script"])
+                if os.path.isfile(restart_script_name):
+                    shutil.move(restart_script_name, os.path.join(root_scripts_dir, restart_script_name))
 
-        except ReleaseValidationFailure as e:
-            raise e
-        except ReleaseMismatchFailure as e:
-            raise e
-        except tarfile.TarError:
-            msg = "Failed during patch extraction"
+        except tarfile.TarError as te:
+            msg = "Extract software failed %s" % str(te)
             LOG.exception(msg)
-            raise ReleaseValidationFailure(msg)
-        except KeyError:
-            msg = "Failed during patch extraction"
+            raise ReleaseValidationFailure(error=msg)
+        except KeyError as ke:
+            # NOTE(bqian) assuming this is metadata missing key.
+            # this try except should be narror down to protect more specific
+            # routine accessing external data (metadata) only.
+            msg = "Software metadata missing required value for %s" % str(ke)
             LOG.exception(msg)
-            raise ReleaseValidationFailure(msg)
-        except OSError:
-            msg = "Failed during patch extraction"
-            LOG.exception(msg)
-            raise SoftwareFail(msg)
-        except IOError:  # pylint: disable=duplicate-except
-            msg = "Failed during patch extraction"
-            LOG.exception(msg)
-            raise SoftwareFail(msg)
+            raise ReleaseValidationFailure(error=msg)
+            # except OSError:
+            #     msg = "Failed during patch extraction"
+            #     LOG.exception(msg)
+            #     raise SoftwareFail(msg)
+            # except IOError:  # pylint: disable=duplicate-except
+            #     msg = "Failed during patch extraction"
+            #     LOG.exception(msg)
+            #     raise SoftwareFail(msg)
         finally:
-            # Change back to original working dir
-            os.chdir(orig_wd)
             shutil.rmtree(tmpdir)
 
         return thispatch
@@ -939,17 +921,16 @@ class PatchFile(object):
         # Create a temporary working directory
         patch_tmpdir = tempfile.mkdtemp(prefix="patch_")
 
-        # Save the current directory, so we can chdir back after
-        orig_wd = os.getcwd()
-
-        # Change to the tmpdir
-        os.chdir(patch_tmpdir)
-
         # Load the patch
         abs_patch = os.path.abspath(patch)
-        PatchFile.read_patch(abs_patch)
+        PatchFile.read_patch(abs_patch, patch_tmpdir)
         thispatch = ReleaseData()
-        patch_id = thispatch.parse_metadata("metadata.xml")
+
+        filename = os.path.join(patch_tmpdir, "metadata.xml")
+        with open(filename, "r") as f:
+            text = f.read()
+
+        patch_id = thispatch.parse_metadata_string(text)
 
         patch_sw_version = utils.get_major_release_version(
             thispatch.metadata[patch_id]["sw_version"])
@@ -982,7 +963,6 @@ class PatchFile(object):
             raise OSTreeTarFail(msg)
         finally:
             shutil.rmtree(tmpdir, ignore_errors=True)
-            os.chdir(orig_wd)
             shutil.rmtree(patch_tmpdir)
 
     @staticmethod
@@ -1316,13 +1296,15 @@ def is_deploy_state_in_sync():
     return False
 
 
-def is_deployment_in_progress(release_metadata):
+def is_deployment_in_progress():
     """
     Check if at least one deployment is in progress
     :param release_metadata: dict of release metadata
     :return: bool true if in progress, false otherwise
     """
-    return any(release['state'] == constants.DEPLOYING for release in release_metadata.values())
+    dbapi = get_instance()
+    deploys = dbapi.get_deploy_all()
+    return len(deploys) > 0
 
 
 def set_host_target_load(hostname, major_release):
diff --git a/software/software/states.py b/software/software/states.py
new file mode 100644
index 00000000..58bc334b
--- /dev/null
+++ b/software/software/states.py
@@ -0,0 +1,126 @@
+"""
+Copyright (c) 2023-2024 Wind River Systems, Inc.
+
+SPDX-License-Identifier: Apache-2.0
+
+"""
+
+from enum import Enum
+import os
+
+from software.constants import SOFTWARE_STORAGE_DIR
+
+
+# software release life cycle
+# (fresh install) -> deployed -> (upgrade to next version and deploy complete) -> unavailable -> (deleted)
+#                      ^
+#                      |---------------------------------------------------------
+#                                                                               ^
+#                                                                               |
+# (upload) -> available ->(deploy start) -> deploying -> (deploy complete) -> deployed
+#               \---> (deleted)
+#
+# deploy life cycle
+# (deploy-start)
+#     |
+#     V
+# deploy-start
+#     |
+#     V
+# start-done -> deploy-host -> deploy-active -> deploy-active-done -> deploy-complete -> (delete)
+#     \              \            \
+#      \--------------\------------\----> (deploy abort) -> deploy-abort --> deplete-abort-done -> (delete)
+#
+# deploy host life cycle
+#                      /----(deploy abort/reverse deploy)---
+#                     /                                     |
+#                    /                                      V
+# (deploy-start) -> pending -> deploying -------------> deployed --------(deploy-complete) -> (deleted)
+#                     ^                           \---------> (deploy abort/reverse deploy)
+#                     |                                            /
+#                     |-------------------------------------------/
+
+
+# Release states
+AVAILABLE_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/available")
+UNAVAILABLE_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/unavailable")
+DEPLOYING_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/deploying")
+DEPLOYED_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/deployed")
+REMOVING_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/removing")
+COMMITTED_DIR = os.path.join(SOFTWARE_STORAGE_DIR, "metadata/committed")
+
+DEPLOY_STATE_METADATA_DIR = [
+    AVAILABLE_DIR,
+    UNAVAILABLE_DIR,
+    DEPLOYING_DIR,
+    DEPLOYED_DIR,
+    REMOVING_DIR,
+    COMMITTED_DIR,
+]
+
+# new release state needs to be added to VALID_RELEASE_STATES list
+AVAILABLE = 'available'
+UNAVAILABLE = 'unavailable'
+DEPLOYING = 'deploying'
+DEPLOYED = 'deployed'
+REMOVING = 'removing'
+COMMITTED = 'committed'
+
+VALID_RELEASE_STATES = [AVAILABLE, UNAVAILABLE, DEPLOYING, DEPLOYED,
+                        REMOVING, COMMITTED]
+
+RELEASE_STATE_TO_DIR_MAP = {AVAILABLE: AVAILABLE_DIR,
+                            UNAVAILABLE: UNAVAILABLE_DIR,
+                            DEPLOYING: DEPLOYING_DIR,
+                            DEPLOYED: DEPLOYED_DIR,
+                            REMOVING: REMOVING_DIR,
+                            COMMITTED: COMMITTED_DIR}
+
+DELETABLE_STATE = [AVAILABLE, UNAVAILABLE]
+
+# valid release state transition below could still be changed as
+# development continue
+RELEASE_STATE_VALID_TRANSITION = {
+    AVAILABLE: [DEPLOYING],
+    DEPLOYING: [DEPLOYED, AVAILABLE],
+    DEPLOYED: [REMOVING, UNAVAILABLE]
+}
+
+VALID_DEPLOY_START_STATES = [
+    AVAILABLE,
+    DEPLOYED,
+]
+
+
+# deploy states
+class DEPLOY_STATES(Enum):
+    START = 'start'
+    START_DONE = 'start-done'
+    START_FAILED = 'start-failed'
+
+    HOST = 'host'
+    HOST_DONE = 'host-done'
+    HOST_FAILED = 'host-failed'
+
+    ACTIVATE = 'activate'
+    ACTIVATE_DONE = 'activate-done'
+    ACTIVATE_FAILED = 'activate-failed'
+
+    ABORT = 'abort'
+    ABORT_DONE = 'abort-done'
+
+
+# deploy host state
+class DEPLOY_HOST_STATES(Enum):
+    DEPLOYED = 'deployed'
+    DEPLOYING = 'deploying'
+    FAILED = 'failed'
+    PENDING = 'pending'
+
+
+VALID_HOST_DEPLOY_STATE = [
+    DEPLOY_HOST_STATES.DEPLOYED,
+    DEPLOY_HOST_STATES.DEPLOYING,
+    DEPLOY_HOST_STATES.FAILED,
+    DEPLOY_HOST_STATES.PENDING,
+]
diff --git a/software/software/tests/test_software_controller.py b/software/software/tests/test_software_controller.py
index dd1a3729..01fe8475 100644
--- a/software/software/tests/test_software_controller.py
+++ b/software/software/tests/test_software_controller.py
@@ -5,15 +5,15 @@
 #
 
 # This import has to be first
-from software.tests import base  # pylint: disable=unused-import
-
+from software.tests import base  # pylint: disable=unused-import # noqa: F401
 from software.software_controller import PatchController
-from software.software_controller import ReleaseValidationFailure
+from software.exceptions import ReleaseValidationFailure
 import unittest
 from unittest.mock import MagicMock
 from unittest.mock import mock_open
 from unittest.mock import patch
 from software import constants
+from software import states
 
 
 class TestSoftwareController(unittest.TestCase):
@@ -65,8 +65,7 @@ class TestSoftwareController(unittest.TestCase):
 
         # Call the function being tested
         with patch('software.software_controller.SW_VERSION', '1.0.0'):
-            info, warning, error, release_meta_info = controller._process_upload_upgrade_files(self.upgrade_files,   # pylint: disable=protected-access
-                                                                                               controller.release_data)
+            info, warning, error, release_meta_info = controller._process_upload_upgrade_files(self.upgrade_files)   # pylint: disable=protected-access
 
         # Verify that the expected functions were called with the expected arguments
         mock_verify_files.assert_called_once_with([self.upgrade_files[constants.ISO_EXTENSION]],
@@ -85,7 +84,7 @@ class TestSoftwareController(unittest.TestCase):
         # Verify that the expected messages were returned
         self.assertEqual(
             info,
-            'iso and signature files upload completed\nImporting iso is in progress\nLoad import successful')
+            'Load import successful')
         self.assertEqual(warning, '')
         self.assertEqual(error, '')
         self.assertEqual(
@@ -114,17 +113,14 @@ class TestSoftwareController(unittest.TestCase):
 
         # Call the function being tested
         with patch('software.software_controller.SW_VERSION', '1.0'):
-            info, warning, error, _ = controller._process_upload_upgrade_files(self.upgrade_files,  # pylint: disable=protected-access
-                                                                               controller.release_data)
-
-        # Verify that the expected messages were returned
-        self.assertEqual(info, '')
-        self.assertEqual(warning, '')
-        self.assertEqual(error, 'Upgrade file signature verification failed\n')
+            try:
+                controller._process_upload_upgrade_files(self.upgrade_files)  # pylint: disable=protected-access
+            except ReleaseValidationFailure as e:
+                self.assertEqual(e.error, 'Software test.iso:test.sig signature validation failed')
 
     @patch('software.software_controller.PatchController.__init__', return_value=None)
     @patch('software.software_controller.verify_files',
-           side_effect=ReleaseValidationFailure('Invalid signature file'))
+           side_effect=ReleaseValidationFailure(error='Invalid signature file'))
     @patch('software.software_controller.PatchController.major_release_upload_check')
     def test_process_upload_upgrade_files_validation_error(self,
                                                            mock_major_release_upload_check,
@@ -137,13 +133,10 @@ class TestSoftwareController(unittest.TestCase):
         mock_major_release_upload_check.return_value = True
 
         # Call the function being tested
-        info, warning, error, _ = controller._process_upload_upgrade_files(self.upgrade_files,  # pylint: disable=protected-access
-                                                                           controller.release_data)
-
-        # Verify that the expected messages were returned
-        self.assertEqual(info, '')
-        self.assertEqual(warning, '')
-        self.assertEqual(error, 'Upgrade file signature verification failed\n')
+        try:
+            controller._process_upload_upgrade_files(self.upgrade_files)  # pylint: disable=protected-access
+        except ReleaseValidationFailure as e:
+            self.assertEqual(e.error, "Invalid signature file")
 
     @patch('software.software_controller.os.path.isfile')
     @patch('software.software_controller.json.load')
@@ -238,8 +231,8 @@ class TestSoftwareController(unittest.TestCase):
             "to_release": "2.0.0"
         })
         controller.db_api_instance.get_deploy_host = MagicMock(return_value=[
-            {"hostname": "host1", "state": constants.DEPLOYED},
-            {"hostname": "host2", "state": constants.DEPLOYING}
+            {"hostname": "host1", "state": states.DEPLOYED},
+            {"hostname": "host2", "state": states.DEPLOYING}
         ])
 
         # Test when the host is deployed
@@ -248,7 +241,7 @@ class TestSoftwareController(unittest.TestCase):
             "hostname": "host1",
             "current_sw_version": "2.0.0",
             "target_sw_version": "2.0.0",
-            "host_state": constants.DEPLOYED
+            "host_state": states.DEPLOYED
         }])
 
     @patch('software.software_controller.json.load')
@@ -267,8 +260,8 @@ class TestSoftwareController(unittest.TestCase):
             "to_release": "2.0.0"
         })
         controller.db_api_instance.get_deploy_host = MagicMock(return_value=[
-            {"hostname": "host1", "state": constants.DEPLOYED},
-            {"hostname": "host2", "state": constants.DEPLOYING}
+            {"hostname": "host1", "state": states.DEPLOYED},
+            {"hostname": "host2", "state": states.DEPLOYING}
         ])
 
         # Test when the host is deploying
@@ -277,7 +270,7 @@ class TestSoftwareController(unittest.TestCase):
             "hostname": "host2",
             "current_sw_version": "1.0.0",
             "target_sw_version": "2.0.0",
-            "host_state": constants.DEPLOYING
+            "host_state": states.DEPLOYING
         }])
 
     @patch('software.software_controller.json.load')
@@ -296,8 +289,8 @@ class TestSoftwareController(unittest.TestCase):
             "to_release": "2.0.0"
         })
         controller.db_api_instance.get_deploy_host = MagicMock(return_value=[
-            {"hostname": "host1", "state": constants.DEPLOYED},
-            {"hostname": "host2", "state": constants.DEPLOYING}
+            {"hostname": "host1", "state": states.DEPLOYED},
+            {"hostname": "host2", "state": states.DEPLOYING}
         ])
 
         # Test when the host is deploying
@@ -306,12 +299,12 @@ class TestSoftwareController(unittest.TestCase):
             "hostname": "host1",
             "current_sw_version": "2.0.0",
             "target_sw_version": "2.0.0",
-            "host_state": constants.DEPLOYED
+            "host_state": states.DEPLOYED
         }, {
             "hostname": "host2",
             "current_sw_version": "1.0.0",
             "target_sw_version": "2.0.0",
-            "host_state": constants.DEPLOYING
+            "host_state": states.DEPLOYING
         }])
 
     @patch('software.software_controller.json.load')
@@ -394,4 +387,4 @@ class TestSoftwareController(unittest.TestCase):
         # Verify that the expected methods were called
         db_api_instance_mock.get_deploy_all.assert_called_once()
 
-        self.assertEqual(result, None)
+        self.assertIsNone(result)
diff --git a/software/software/tests/test_software_function.py b/software/software/tests/test_software_function.py
index a22014a9..a8660f4a 100644
--- a/software/software/tests/test_software_function.py
+++ b/software/software/tests/test_software_function.py
@@ -130,7 +130,7 @@ class TestSoftwareFunction(unittest.TestCase):
             self.assertEqual(val["install_instructions"], r.install_instructions)
             self.assertEqual(val["warnings"], r.warnings)
             self.assertEqual(val["status"], r.status)
-            self.assertEqual(val["unremovable"], r.unremovable)
+            self.assertEqual(val["unremovable"] == 'Y', r.unremovable)
             if val["restart_script"] is None:
                 self.assertIsNone(r.restart_script)
             else:
@@ -159,7 +159,7 @@ class TestSoftwareFunction(unittest.TestCase):
         self.assertEqual(val["install_instructions"], r.install_instructions)
         self.assertEqual(val["warnings"], r.warnings)
         self.assertEqual(val["status"], r.status)
-        self.assertEqual(val["unremovable"], r.unremovable)
+        self.assertEqual(val["unremovable"] == 'Y', r.unremovable)
         if val["restart_script"] is None:
             self.assertIsNone(r.restart_script)
         else:
@@ -178,7 +178,7 @@ class TestSoftwareFunction(unittest.TestCase):
             self.assertEqual(val["install_instructions"], r.install_instructions)
             self.assertEqual(val["warnings"], r.warnings)
             self.assertEqual(val["status"], r.status)
-            self.assertEqual(val["unremovable"], r.unremovable)
+            self.assertEqual(val["unremovable"] == 'Y', r.unremovable)
             if val["restart_script"] is None:
                 self.assertIsNone(r.restart_script)
             else:
diff --git a/software/software/utils.py b/software/software/utils.py
index cd1b431c..a12d68bb 100644
--- a/software/software/utils.py
+++ b/software/software/utils.py
@@ -43,20 +43,19 @@ class ExceptionHook(hooks.PecanHook):
         status = 500
 
         if isinstance(e, SoftwareServiceError):
-            LOG.warning("An issue is detected. Signature [%s]" % signature)
+            # Only the exceptions that are pre-categorized as "expected" that
+            # are known as operational or environmental, the detail (possibly
+            # with recovery/resolve instruction) are to be displayed to the end
+            # user
+            LOG.warning("%s. Signature [%s]" % (e.error, signature))
             # TODO(bqian) remove the logging after it is stable
             LOG.exception(e)
 
             data = dict(info=e.info, warning=e.warning, error=e.error)
         else:
+            # with an exception that is not pre-categorized as "expected", it is a
+            # bug. Or not properly categorizing the exception itself is a bug.
             err_msg = "Internal error occurred. Error signature [%s]" % signature
-            try:
-                # If exception contains error details, send that to user
-                if str(e):
-                    err_msg = "Error \"%s\", Error signature [%s]" % (str(e), signature)
-            except Exception:
-                pass
-            LOG.error(err_msg)
             LOG.exception(e)
             data = dict(info="", warning="", error=err_msg)
         return webob.Response(json.dumps(data), status=status)
diff --git a/software/tox.ini b/software/tox.ini
index 2c955e2b..77283e99 100644
--- a/software/tox.ini
+++ b/software/tox.ini
@@ -61,9 +61,9 @@ commands =
 # H203: Use assertIs(Not)None to check for None (off by default).
 enable-extensions = H106,H203
 exclude = .venv,.git,.tox,dist,doc,*lib/python*,*egg,build,release-tag-*
-max-line-length = 80
+max-line-length = 120
 show-source = True
-ignore = E402,H306,H404,H405,W504,E501
+ignore = E402,H306,H404,H405,W504,E501,H105
 
 [testenv:flake8]
 commands = flake8 {posargs}