Protect against stale repo data in DNF cache

When the patch-agent is notified by the patch-controller of a new
patching operation (patch_op_counter in HELLO is incremented), it will
run a new software query consisting first of running "dnf makecache",
then checking the software repositories for changes. In rare cases,
the metadata returned to dnf when it makes the query could be stale,
resulting in the patch-agent believing the current software is
up-to-date.

In order to protect against this, the patch-agent will now verify the
repository revision id for the updates repo. If the revision id has
not changed when it is reasonable to expect it could have (ie. the
patch_op_counter has increased, indicating the repository may have
been updated), the patch-agent will retry once to allow for any
potential caching to have cleared.

Change-Id: I3a44ed86e16cd9fe67f0b0e763c95a5a7e126cf8
Closes-Bug: 1884094
Signed-off-by: Don Penney <don.penney@windriver.com>
This commit is contained in:
Don Penney 2020-06-22 21:16:35 -04:00
parent 6abcfe6977
commit d3f8af4f4c
1 changed files with 29 additions and 4 deletions

View File

@ -348,6 +348,7 @@ class PatchAgent(PatchService):
self.last_config_audit = 0
self.rejection_timestamp = 0
self.dnfb = None
self.last_repo_revision = None
# Check state flags
if os.path.exists(patch_installing_file):
@ -418,7 +419,7 @@ class PatchAgent(PatchService):
# Read repo info
self.dnfb.fill_sack()
def query(self):
def query(self, check_revision=False):
""" Check current patch state """
if not check_install_uuid():
LOG.info("Failed install_uuid check. Skipping query")
@ -437,6 +438,32 @@ class PatchAgent(PatchService):
# Set a state to "unknown"?
return False
self.dnf_reset_client()
current_repo_revision = self.dnfb.repos['platform-updates']._repo.getRevision() # pylint: disable=protected-access
if check_revision and self.last_repo_revision is not None:
# We're expecting the revision to be updated.
# If it's not, we ended up getting a cached repomd query.
if current_repo_revision == self.last_repo_revision:
LOG.info("makecache returned same revision as previous (%s). Retry after one second",
current_repo_revision)
time.sleep(1)
try:
subprocess.check_output(dnf_makecache, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
LOG.error("Failed to run dnf makecache")
LOG.error("Command output: %s", e.output)
# Set a state to "unknown"?
return False
self.dnf_reset_client()
current_repo_revision = self.dnfb.repos['platform-updates']._repo.getRevision() # pylint: disable=protected-access
if current_repo_revision != self.last_repo_revision:
LOG.info("Stale repo revision id corrected with retry. New id: %s",
current_repo_revision)
self.last_repo_revision = current_repo_revision
# Generate a unique query id
self.query_id = random.random()
@ -450,8 +477,6 @@ class PatchAgent(PatchService):
self.missing_pkgs = []
self.missing_pkgs_dnf = []
self.dnf_reset_client()
# Get the repo data
pkgs_installed = dnf.sack._rpmdb_sack(self.dnfb).query().installed() # pylint: disable=protected-access
avail = self.dnfb.sack.query().available().latest()
@ -723,7 +748,7 @@ class PatchAgent(PatchService):
changed = True
if changed:
rc = self.query()
rc = self.query(check_revision=True)
if not rc:
# Query failed. Reset the op counter
self.patch_op_counter = 0