From d3f8af4f4c6df5eede6f83d67bff8970d4bd1ed7 Mon Sep 17 00:00:00 2001 From: Don Penney Date: Mon, 22 Jun 2020 21:16:35 -0400 Subject: [PATCH] Protect against stale repo data in DNF cache When the patch-agent is notified by the patch-controller of a new patching operation (patch_op_counter in HELLO is incremented), it will run a new software query consisting first of running "dnf makecache", then checking the software repositories for changes. In rare cases, the metadata returned to dnf when it makes the query could be stale, resulting in the patch-agent believing the current software is up-to-date. In order to protect against this, the patch-agent will now verify the repository revision id for the updates repo. If the revision id has not changed when it is reasonable to expect it could have (ie. the patch_op_counter has increased, indicating the repository may have been updated), the patch-agent will retry once to allow for any potential caching to have cleared. Change-Id: I3a44ed86e16cd9fe67f0b0e763c95a5a7e126cf8 Closes-Bug: 1884094 Signed-off-by: Don Penney --- .../cgcs-patch/cgcs_patch/patch_agent.py | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/cgcs-patch/cgcs-patch/cgcs_patch/patch_agent.py b/cgcs-patch/cgcs-patch/cgcs_patch/patch_agent.py index eb128419..7c64a28a 100644 --- a/cgcs-patch/cgcs-patch/cgcs_patch/patch_agent.py +++ b/cgcs-patch/cgcs-patch/cgcs_patch/patch_agent.py @@ -348,6 +348,7 @@ class PatchAgent(PatchService): self.last_config_audit = 0 self.rejection_timestamp = 0 self.dnfb = None + self.last_repo_revision = None # Check state flags if os.path.exists(patch_installing_file): @@ -418,7 +419,7 @@ class PatchAgent(PatchService): # Read repo info self.dnfb.fill_sack() - def query(self): + def query(self, check_revision=False): """ Check current patch state """ if not check_install_uuid(): LOG.info("Failed install_uuid check. Skipping query") @@ -437,6 +438,32 @@ class PatchAgent(PatchService): # Set a state to "unknown"? return False + self.dnf_reset_client() + current_repo_revision = self.dnfb.repos['platform-updates']._repo.getRevision() # pylint: disable=protected-access + + if check_revision and self.last_repo_revision is not None: + # We're expecting the revision to be updated. + # If it's not, we ended up getting a cached repomd query. + if current_repo_revision == self.last_repo_revision: + LOG.info("makecache returned same revision as previous (%s). Retry after one second", + current_repo_revision) + time.sleep(1) + try: + subprocess.check_output(dnf_makecache, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + LOG.error("Failed to run dnf makecache") + LOG.error("Command output: %s", e.output) + # Set a state to "unknown"? + return False + + self.dnf_reset_client() + current_repo_revision = self.dnfb.repos['platform-updates']._repo.getRevision() # pylint: disable=protected-access + if current_repo_revision != self.last_repo_revision: + LOG.info("Stale repo revision id corrected with retry. New id: %s", + current_repo_revision) + + self.last_repo_revision = current_repo_revision + # Generate a unique query id self.query_id = random.random() @@ -450,8 +477,6 @@ class PatchAgent(PatchService): self.missing_pkgs = [] self.missing_pkgs_dnf = [] - self.dnf_reset_client() - # Get the repo data pkgs_installed = dnf.sack._rpmdb_sack(self.dnfb).query().installed() # pylint: disable=protected-access avail = self.dnfb.sack.query().available().latest() @@ -723,7 +748,7 @@ class PatchAgent(PatchService): changed = True if changed: - rc = self.query() + rc = self.query(check_revision=True) if not rc: # Query failed. Reset the op counter self.patch_op_counter = 0