Add protection against duplicate RPMs
If a cold reboot occurs in the middle of patch installation, the system can be left in a state where the patch-agent is unable to perform its operations properly. The RPM database can be left with duplicate RPMs due to the incomplete transaction, which can in turn lead to DNF update installation issues. This update adds detection of duplicate RPMs to the patch-agent to avoid attempting installation until the system is recovered. Additionally, protection is added to the sw-patch init to treat multiple reboot patch installations as an error, to avoid boot loops. Closes-Bug: 1904928 Change-Id: Ia06a6f669c45398d7956f2ac2caa76c447bc1b16 Signed-off-by: Don Penney <don.penney@windriver.com>
This commit is contained in:
parent
1c8d87d404
commit
62a66370ca
|
@ -1,12 +1,12 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2014-2019 Wind River Systems, Inc.
|
||||
# Copyright (c) 2014-2020 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# StarlingX Patching
|
||||
# chkconfig: 345 20 23
|
||||
# description: CGCS Patching init script
|
||||
# description: StarlingX Patching init script
|
||||
|
||||
### BEGIN INIT INFO
|
||||
# Provides: sw-patch
|
||||
|
@ -25,6 +25,7 @@ NAME=$(basename $0)
|
|||
|
||||
logfile=/var/log/patching.log
|
||||
patch_failed_file=/var/run/patch_install_failed
|
||||
patched_during_init=/etc/patching/.patched_during_init
|
||||
|
||||
function LOG_TO_FILE {
|
||||
echo "`date "+%FT%T.%3N"`: $NAME: $*" >> $logfile
|
||||
|
@ -32,11 +33,24 @@ function LOG_TO_FILE {
|
|||
|
||||
function check_for_rr_patch {
|
||||
if [ -f /var/run/node_is_patched_rr ]; then
|
||||
if [ ! -f ${patched_during_init} ]; then
|
||||
echo
|
||||
echo "Node has been patched and requires an immediate reboot."
|
||||
echo
|
||||
LOG_TO_FILE "Node has been patched, with reboot-required flag set. Rebooting"
|
||||
touch ${patched_during_init}
|
||||
/sbin/reboot
|
||||
else
|
||||
echo
|
||||
echo "Node has been patched during init a second consecutive time. Skipping reboot due to possible error"
|
||||
echo
|
||||
LOG_TO_FILE "Node has been patched during init a second consecutive time. Skipping reboot due to possible error"
|
||||
touch ${patch_failed_file}
|
||||
rm -f ${patched_during_init}
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
rm -f ${patched_during_init}
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
|
@ -204,6 +204,7 @@ class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
|||
self.message['installed'] = pa.installed
|
||||
self.message['to_remove'] = pa.to_remove
|
||||
self.message['missing_pkgs'] = pa.missing_pkgs
|
||||
self.message['duplicated_pkgs'] = pa.duplicated_pkgs
|
||||
self.message['nodetype'] = cfg.nodetype
|
||||
self.message['sw_version'] = SW_VERSION
|
||||
self.message['subfunctions'] = subfunctions
|
||||
|
@ -340,6 +341,7 @@ class PatchAgent(PatchService):
|
|||
self.to_remove_dnf = []
|
||||
self.missing_pkgs = []
|
||||
self.missing_pkgs_dnf = []
|
||||
self.duplicated_pkgs = {}
|
||||
self.patch_op_counter = 0
|
||||
self.node_is_patched = os.path.exists(node_is_patched_file)
|
||||
self.node_is_patched_timestamp = 0
|
||||
|
@ -384,15 +386,22 @@ class PatchAgent(PatchService):
|
|||
self.listener.bind(('', self.port))
|
||||
self.listener.listen(2) # Allow two connections, for two controllers
|
||||
|
||||
@staticmethod
|
||||
def pkgobj_to_version_str(pkg):
|
||||
# Transform pkgobj version to format used by patch-controller
|
||||
if pkg.epoch != 0:
|
||||
output = "%s:%s-%s@%s" % (pkg.epoch, pkg.version, pkg.release, pkg.arch)
|
||||
else:
|
||||
output = "%s-%s@%s" % (pkg.version, pkg.release, pkg.arch)
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def pkgobjs_to_list(pkgobjs):
|
||||
# Transform pkgobj list to format used by patch-controller
|
||||
output = {}
|
||||
for pkg in pkgobjs:
|
||||
if pkg.epoch != 0:
|
||||
output[pkg.name] = "%s:%s-%s@%s" % (pkg.epoch, pkg.version, pkg.release, pkg.arch)
|
||||
else:
|
||||
output[pkg.name] = "%s-%s@%s" % (pkg.version, pkg.release, pkg.arch)
|
||||
output[pkg.name] = PatchAgent.pkgobj_to_version_str(pkg)
|
||||
|
||||
return output
|
||||
|
||||
|
@ -481,6 +490,18 @@ class PatchAgent(PatchService):
|
|||
pkgs_installed = dnf.sack._rpmdb_sack(self.dnfb).query().installed() # pylint: disable=protected-access
|
||||
avail = self.dnfb.sack.query().available().latest()
|
||||
|
||||
# Check for packages with multiple installed versions
|
||||
self.duplicated_pkgs = {}
|
||||
for pkg in pkgs_installed:
|
||||
pkglist = pkgs_installed.filter(name=pkg.name, arch=pkg.arch)
|
||||
if len(pkglist) > 1:
|
||||
if pkg.name not in self.duplicated_pkgs:
|
||||
self.duplicated_pkgs[pkg.name] = {}
|
||||
if pkg.arch not in self.duplicated_pkgs[pkg.name]:
|
||||
self.duplicated_pkgs[pkg.name][pkg.arch] = map(PatchAgent.pkgobj_to_version_str, pkglist)
|
||||
LOG.warn("Duplicate packages installed: %s %s",
|
||||
pkg.name, ", ".join(self.duplicated_pkgs[pkg.name][pkg.arch]))
|
||||
|
||||
# There are three possible actions:
|
||||
# 1. If installed pkg is not in a repo, remove it.
|
||||
# 2. If installed pkg version does not match newest repo version, update it.
|
||||
|
@ -538,6 +559,8 @@ class PatchAgent(PatchService):
|
|||
LOG.info("To install: %s", self.to_install)
|
||||
LOG.info("To remove: %s", self.to_remove)
|
||||
LOG.info("Missing: %s", self.missing_pkgs)
|
||||
if len(self.duplicated_pkgs) > 0:
|
||||
LOG.info("Duplicated: %s", self.duplicated_pkgs)
|
||||
|
||||
return True
|
||||
|
||||
|
@ -625,6 +648,10 @@ class PatchAgent(PatchService):
|
|||
changed = False
|
||||
rc = True
|
||||
|
||||
if len(self.duplicated_pkgs) > 0:
|
||||
LOG.error("Duplicate installed packages found. Manual recovery is required.")
|
||||
rc = False
|
||||
else:
|
||||
if len(self.to_install_dnf) > 0 or len(self.to_downgrade_dnf) > 0:
|
||||
LOG.info("Adding pkgs to installation set: %s", self.to_install)
|
||||
for pkg in self.to_install_dnf:
|
||||
|
|
|
@ -115,6 +115,7 @@ class AgentNeighbour(object):
|
|||
self.installed = {}
|
||||
self.to_remove = []
|
||||
self.missing_pkgs = []
|
||||
self.duplicated_pkgs = {}
|
||||
self.nodetype = None
|
||||
self.sw_version = "unknown"
|
||||
self.subfunctions = []
|
||||
|
@ -156,6 +157,7 @@ class AgentNeighbour(object):
|
|||
installed,
|
||||
to_remove,
|
||||
missing_pkgs,
|
||||
duplicated_pkgs,
|
||||
nodetype,
|
||||
sw_version,
|
||||
subfunctions,
|
||||
|
@ -163,6 +165,7 @@ class AgentNeighbour(object):
|
|||
self.installed = installed
|
||||
self.to_remove = to_remove
|
||||
self.missing_pkgs = missing_pkgs
|
||||
self.duplicated_pkgs = duplicated_pkgs
|
||||
self.nodetype = nodetype
|
||||
self.stale = False
|
||||
self.pending_query = False
|
||||
|
@ -186,6 +189,7 @@ class AgentNeighbour(object):
|
|||
"installed": self.installed,
|
||||
"to_remove": self.to_remove,
|
||||
"missing_pkgs": self.missing_pkgs,
|
||||
"duplicated_pkgs": self.duplicated_pkgs,
|
||||
"nodetype": self.nodetype,
|
||||
"subfunctions": self.subfunctions,
|
||||
"sw_version": self.sw_version,
|
||||
|
@ -421,6 +425,7 @@ class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
|||
self.to_install = {}
|
||||
self.to_remove = []
|
||||
self.missing_pkgs = []
|
||||
self.duplicated_pkgs = {}
|
||||
self.subfunctions = []
|
||||
self.nodetype = "unknown"
|
||||
self.agent_sw_version = "unknown"
|
||||
|
@ -434,6 +439,8 @@ class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
|||
self.to_remove = data['to_remove']
|
||||
if 'missing_pkgs' in data:
|
||||
self.missing_pkgs = data['missing_pkgs']
|
||||
if 'duplicated_pkgs' in data:
|
||||
self.duplicated_pkgs = data['duplicated_pkgs']
|
||||
if 'nodetype' in data:
|
||||
self.nodetype = data['nodetype']
|
||||
if 'sw_version' in data:
|
||||
|
@ -455,6 +462,7 @@ class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
|||
pc.hosts[ip].handle_query_detailed_resp(self.installed,
|
||||
self.to_remove,
|
||||
self.missing_pkgs,
|
||||
self.duplicated_pkgs,
|
||||
self.nodetype,
|
||||
self.agent_sw_version,
|
||||
self.subfunctions,
|
||||
|
|
Loading…
Reference in New Issue