Add protection against duplicate RPMs
If a cold reboot occurs in the middle of patch installation, the system can be left in a state where the patch-agent is unable to perform its operations properly. The RPM database can be left with duplicate RPMs due to the incomplete transaction, which can in turn lead to DNF update installation issues. This update adds detection of duplicate RPMs to the patch-agent to avoid attempting installation until the system is recovered. Additionally, protection is added to the sw-patch init to treat multiple reboot patch installations as an error, to avoid boot loops. Closes-Bug: 1904928 Change-Id: Ia06a6f669c45398d7956f2ac2caa76c447bc1b16 Signed-off-by: Don Penney <don.penney@windriver.com>
This commit is contained in:
parent
1c8d87d404
commit
62a66370ca
|
@ -1,12 +1,12 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# Copyright (c) 2014-2019 Wind River Systems, Inc.
|
# Copyright (c) 2014-2020 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
# StarlingX Patching
|
# StarlingX Patching
|
||||||
# chkconfig: 345 20 23
|
# chkconfig: 345 20 23
|
||||||
# description: CGCS Patching init script
|
# description: StarlingX Patching init script
|
||||||
|
|
||||||
### BEGIN INIT INFO
|
### BEGIN INIT INFO
|
||||||
# Provides: sw-patch
|
# Provides: sw-patch
|
||||||
|
@ -25,6 +25,7 @@ NAME=$(basename $0)
|
||||||
|
|
||||||
logfile=/var/log/patching.log
|
logfile=/var/log/patching.log
|
||||||
patch_failed_file=/var/run/patch_install_failed
|
patch_failed_file=/var/run/patch_install_failed
|
||||||
|
patched_during_init=/etc/patching/.patched_during_init
|
||||||
|
|
||||||
function LOG_TO_FILE {
|
function LOG_TO_FILE {
|
||||||
echo "`date "+%FT%T.%3N"`: $NAME: $*" >> $logfile
|
echo "`date "+%FT%T.%3N"`: $NAME: $*" >> $logfile
|
||||||
|
@ -32,11 +33,24 @@ function LOG_TO_FILE {
|
||||||
|
|
||||||
function check_for_rr_patch {
|
function check_for_rr_patch {
|
||||||
if [ -f /var/run/node_is_patched_rr ]; then
|
if [ -f /var/run/node_is_patched_rr ]; then
|
||||||
echo
|
if [ ! -f ${patched_during_init} ]; then
|
||||||
echo "Node has been patched and requires an immediate reboot."
|
echo
|
||||||
echo
|
echo "Node has been patched and requires an immediate reboot."
|
||||||
LOG_TO_FILE "Node has been patched, with reboot-required flag set. Rebooting"
|
echo
|
||||||
/sbin/reboot
|
LOG_TO_FILE "Node has been patched, with reboot-required flag set. Rebooting"
|
||||||
|
touch ${patched_during_init}
|
||||||
|
/sbin/reboot
|
||||||
|
else
|
||||||
|
echo
|
||||||
|
echo "Node has been patched during init a second consecutive time. Skipping reboot due to possible error"
|
||||||
|
echo
|
||||||
|
LOG_TO_FILE "Node has been patched during init a second consecutive time. Skipping reboot due to possible error"
|
||||||
|
touch ${patch_failed_file}
|
||||||
|
rm -f ${patched_during_init}
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
rm -f ${patched_during_init}
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -204,6 +204,7 @@ class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
||||||
self.message['installed'] = pa.installed
|
self.message['installed'] = pa.installed
|
||||||
self.message['to_remove'] = pa.to_remove
|
self.message['to_remove'] = pa.to_remove
|
||||||
self.message['missing_pkgs'] = pa.missing_pkgs
|
self.message['missing_pkgs'] = pa.missing_pkgs
|
||||||
|
self.message['duplicated_pkgs'] = pa.duplicated_pkgs
|
||||||
self.message['nodetype'] = cfg.nodetype
|
self.message['nodetype'] = cfg.nodetype
|
||||||
self.message['sw_version'] = SW_VERSION
|
self.message['sw_version'] = SW_VERSION
|
||||||
self.message['subfunctions'] = subfunctions
|
self.message['subfunctions'] = subfunctions
|
||||||
|
@ -340,6 +341,7 @@ class PatchAgent(PatchService):
|
||||||
self.to_remove_dnf = []
|
self.to_remove_dnf = []
|
||||||
self.missing_pkgs = []
|
self.missing_pkgs = []
|
||||||
self.missing_pkgs_dnf = []
|
self.missing_pkgs_dnf = []
|
||||||
|
self.duplicated_pkgs = {}
|
||||||
self.patch_op_counter = 0
|
self.patch_op_counter = 0
|
||||||
self.node_is_patched = os.path.exists(node_is_patched_file)
|
self.node_is_patched = os.path.exists(node_is_patched_file)
|
||||||
self.node_is_patched_timestamp = 0
|
self.node_is_patched_timestamp = 0
|
||||||
|
@ -384,15 +386,22 @@ class PatchAgent(PatchService):
|
||||||
self.listener.bind(('', self.port))
|
self.listener.bind(('', self.port))
|
||||||
self.listener.listen(2) # Allow two connections, for two controllers
|
self.listener.listen(2) # Allow two connections, for two controllers
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def pkgobj_to_version_str(pkg):
|
||||||
|
# Transform pkgobj version to format used by patch-controller
|
||||||
|
if pkg.epoch != 0:
|
||||||
|
output = "%s:%s-%s@%s" % (pkg.epoch, pkg.version, pkg.release, pkg.arch)
|
||||||
|
else:
|
||||||
|
output = "%s-%s@%s" % (pkg.version, pkg.release, pkg.arch)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def pkgobjs_to_list(pkgobjs):
|
def pkgobjs_to_list(pkgobjs):
|
||||||
# Transform pkgobj list to format used by patch-controller
|
# Transform pkgobj list to format used by patch-controller
|
||||||
output = {}
|
output = {}
|
||||||
for pkg in pkgobjs:
|
for pkg in pkgobjs:
|
||||||
if pkg.epoch != 0:
|
output[pkg.name] = PatchAgent.pkgobj_to_version_str(pkg)
|
||||||
output[pkg.name] = "%s:%s-%s@%s" % (pkg.epoch, pkg.version, pkg.release, pkg.arch)
|
|
||||||
else:
|
|
||||||
output[pkg.name] = "%s-%s@%s" % (pkg.version, pkg.release, pkg.arch)
|
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
@ -481,6 +490,18 @@ class PatchAgent(PatchService):
|
||||||
pkgs_installed = dnf.sack._rpmdb_sack(self.dnfb).query().installed() # pylint: disable=protected-access
|
pkgs_installed = dnf.sack._rpmdb_sack(self.dnfb).query().installed() # pylint: disable=protected-access
|
||||||
avail = self.dnfb.sack.query().available().latest()
|
avail = self.dnfb.sack.query().available().latest()
|
||||||
|
|
||||||
|
# Check for packages with multiple installed versions
|
||||||
|
self.duplicated_pkgs = {}
|
||||||
|
for pkg in pkgs_installed:
|
||||||
|
pkglist = pkgs_installed.filter(name=pkg.name, arch=pkg.arch)
|
||||||
|
if len(pkglist) > 1:
|
||||||
|
if pkg.name not in self.duplicated_pkgs:
|
||||||
|
self.duplicated_pkgs[pkg.name] = {}
|
||||||
|
if pkg.arch not in self.duplicated_pkgs[pkg.name]:
|
||||||
|
self.duplicated_pkgs[pkg.name][pkg.arch] = map(PatchAgent.pkgobj_to_version_str, pkglist)
|
||||||
|
LOG.warn("Duplicate packages installed: %s %s",
|
||||||
|
pkg.name, ", ".join(self.duplicated_pkgs[pkg.name][pkg.arch]))
|
||||||
|
|
||||||
# There are three possible actions:
|
# There are three possible actions:
|
||||||
# 1. If installed pkg is not in a repo, remove it.
|
# 1. If installed pkg is not in a repo, remove it.
|
||||||
# 2. If installed pkg version does not match newest repo version, update it.
|
# 2. If installed pkg version does not match newest repo version, update it.
|
||||||
|
@ -538,6 +559,8 @@ class PatchAgent(PatchService):
|
||||||
LOG.info("To install: %s", self.to_install)
|
LOG.info("To install: %s", self.to_install)
|
||||||
LOG.info("To remove: %s", self.to_remove)
|
LOG.info("To remove: %s", self.to_remove)
|
||||||
LOG.info("Missing: %s", self.missing_pkgs)
|
LOG.info("Missing: %s", self.missing_pkgs)
|
||||||
|
if len(self.duplicated_pkgs) > 0:
|
||||||
|
LOG.info("Duplicated: %s", self.duplicated_pkgs)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -625,78 +648,82 @@ class PatchAgent(PatchService):
|
||||||
changed = False
|
changed = False
|
||||||
rc = True
|
rc = True
|
||||||
|
|
||||||
if len(self.to_install_dnf) > 0 or len(self.to_downgrade_dnf) > 0:
|
if len(self.duplicated_pkgs) > 0:
|
||||||
LOG.info("Adding pkgs to installation set: %s", self.to_install)
|
LOG.error("Duplicate installed packages found. Manual recovery is required.")
|
||||||
for pkg in self.to_install_dnf:
|
rc = False
|
||||||
self.dnfb.package_install(pkg)
|
|
||||||
|
|
||||||
for pkg in self.to_downgrade_dnf:
|
|
||||||
self.dnfb.package_downgrade(pkg)
|
|
||||||
|
|
||||||
changed = True
|
|
||||||
|
|
||||||
if len(self.missing_pkgs_dnf) > 0:
|
|
||||||
LOG.info("Adding missing pkgs to installation set: %s", self.missing_pkgs)
|
|
||||||
for pkg in self.missing_pkgs_dnf:
|
|
||||||
self.dnfb.package_install(pkg)
|
|
||||||
changed = True
|
|
||||||
|
|
||||||
if len(self.to_remove_dnf) > 0:
|
|
||||||
LOG.info("Adding pkgs to be removed: %s", self.to_remove)
|
|
||||||
for pkg in self.to_remove_dnf:
|
|
||||||
self.dnfb.package_remove(pkg)
|
|
||||||
changed = True
|
|
||||||
|
|
||||||
if changed:
|
|
||||||
# Run the transaction set
|
|
||||||
transaction_rc = False
|
|
||||||
try:
|
|
||||||
transaction_rc = self.resolve_dnf_transaction()
|
|
||||||
except dnf.exceptions.DepsolveError:
|
|
||||||
LOG.exception("Failures resolving dependencies in transaction")
|
|
||||||
except dnf.exceptions.DownloadError:
|
|
||||||
LOG.exception("Failures downloading in transaction")
|
|
||||||
except dnf.exceptions.Error:
|
|
||||||
LOG.exception("Failure resolving transaction")
|
|
||||||
|
|
||||||
if not transaction_rc:
|
|
||||||
LOG.error("Failures occurred during transaction")
|
|
||||||
rc = False
|
|
||||||
if verbose_to_stdout:
|
|
||||||
print("WARNING: Software update failed.")
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if verbose_to_stdout:
|
if len(self.to_install_dnf) > 0 or len(self.to_downgrade_dnf) > 0:
|
||||||
print("Nothing to install.")
|
LOG.info("Adding pkgs to installation set: %s", self.to_install)
|
||||||
LOG.info("Nothing to install")
|
for pkg in self.to_install_dnf:
|
||||||
|
self.dnfb.package_install(pkg)
|
||||||
|
|
||||||
if changed and rc:
|
for pkg in self.to_downgrade_dnf:
|
||||||
# Update the node_is_patched flag
|
self.dnfb.package_downgrade(pkg)
|
||||||
setflag(node_is_patched_file)
|
|
||||||
|
|
||||||
self.node_is_patched = True
|
changed = True
|
||||||
if verbose_to_stdout:
|
|
||||||
print("This node has been patched.")
|
|
||||||
|
|
||||||
if os.path.exists(node_is_patched_rr_file):
|
if len(self.missing_pkgs_dnf) > 0:
|
||||||
LOG.info("Reboot is required. Skipping patch-scripts")
|
LOG.info("Adding missing pkgs to installation set: %s", self.missing_pkgs)
|
||||||
elif disallow_insvc_patch:
|
for pkg in self.missing_pkgs_dnf:
|
||||||
LOG.info("Disallowing patch-scripts. Treating as reboot-required")
|
self.dnfb.package_install(pkg)
|
||||||
setflag(node_is_patched_rr_file)
|
changed = True
|
||||||
else:
|
|
||||||
LOG.info("Running in-service patch-scripts")
|
|
||||||
|
|
||||||
|
if len(self.to_remove_dnf) > 0:
|
||||||
|
LOG.info("Adding pkgs to be removed: %s", self.to_remove)
|
||||||
|
for pkg in self.to_remove_dnf:
|
||||||
|
self.dnfb.package_remove(pkg)
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
if changed:
|
||||||
|
# Run the transaction set
|
||||||
|
transaction_rc = False
|
||||||
try:
|
try:
|
||||||
subprocess.check_output(run_insvc_patch_scripts_cmd, stderr=subprocess.STDOUT)
|
transaction_rc = self.resolve_dnf_transaction()
|
||||||
|
except dnf.exceptions.DepsolveError:
|
||||||
|
LOG.exception("Failures resolving dependencies in transaction")
|
||||||
|
except dnf.exceptions.DownloadError:
|
||||||
|
LOG.exception("Failures downloading in transaction")
|
||||||
|
except dnf.exceptions.Error:
|
||||||
|
LOG.exception("Failure resolving transaction")
|
||||||
|
|
||||||
# Clear the node_is_patched flag, since we've handled it in-service
|
if not transaction_rc:
|
||||||
clearflag(node_is_patched_file)
|
LOG.error("Failures occurred during transaction")
|
||||||
self.node_is_patched = False
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
LOG.exception("In-Service patch scripts failed")
|
|
||||||
LOG.error("Command output: %s", e.output)
|
|
||||||
# Fail the patching operation
|
|
||||||
rc = False
|
rc = False
|
||||||
|
if verbose_to_stdout:
|
||||||
|
print("WARNING: Software update failed.")
|
||||||
|
|
||||||
|
else:
|
||||||
|
if verbose_to_stdout:
|
||||||
|
print("Nothing to install.")
|
||||||
|
LOG.info("Nothing to install")
|
||||||
|
|
||||||
|
if changed and rc:
|
||||||
|
# Update the node_is_patched flag
|
||||||
|
setflag(node_is_patched_file)
|
||||||
|
|
||||||
|
self.node_is_patched = True
|
||||||
|
if verbose_to_stdout:
|
||||||
|
print("This node has been patched.")
|
||||||
|
|
||||||
|
if os.path.exists(node_is_patched_rr_file):
|
||||||
|
LOG.info("Reboot is required. Skipping patch-scripts")
|
||||||
|
elif disallow_insvc_patch:
|
||||||
|
LOG.info("Disallowing patch-scripts. Treating as reboot-required")
|
||||||
|
setflag(node_is_patched_rr_file)
|
||||||
|
else:
|
||||||
|
LOG.info("Running in-service patch-scripts")
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.check_output(run_insvc_patch_scripts_cmd, stderr=subprocess.STDOUT)
|
||||||
|
|
||||||
|
# Clear the node_is_patched flag, since we've handled it in-service
|
||||||
|
clearflag(node_is_patched_file)
|
||||||
|
self.node_is_patched = False
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
LOG.exception("In-Service patch scripts failed")
|
||||||
|
LOG.error("Command output: %s", e.output)
|
||||||
|
# Fail the patching operation
|
||||||
|
rc = False
|
||||||
|
|
||||||
# Clear the in-service patch dirs
|
# Clear the in-service patch dirs
|
||||||
if os.path.exists(insvc_patch_scripts):
|
if os.path.exists(insvc_patch_scripts):
|
||||||
|
|
|
@ -115,6 +115,7 @@ class AgentNeighbour(object):
|
||||||
self.installed = {}
|
self.installed = {}
|
||||||
self.to_remove = []
|
self.to_remove = []
|
||||||
self.missing_pkgs = []
|
self.missing_pkgs = []
|
||||||
|
self.duplicated_pkgs = {}
|
||||||
self.nodetype = None
|
self.nodetype = None
|
||||||
self.sw_version = "unknown"
|
self.sw_version = "unknown"
|
||||||
self.subfunctions = []
|
self.subfunctions = []
|
||||||
|
@ -156,6 +157,7 @@ class AgentNeighbour(object):
|
||||||
installed,
|
installed,
|
||||||
to_remove,
|
to_remove,
|
||||||
missing_pkgs,
|
missing_pkgs,
|
||||||
|
duplicated_pkgs,
|
||||||
nodetype,
|
nodetype,
|
||||||
sw_version,
|
sw_version,
|
||||||
subfunctions,
|
subfunctions,
|
||||||
|
@ -163,6 +165,7 @@ class AgentNeighbour(object):
|
||||||
self.installed = installed
|
self.installed = installed
|
||||||
self.to_remove = to_remove
|
self.to_remove = to_remove
|
||||||
self.missing_pkgs = missing_pkgs
|
self.missing_pkgs = missing_pkgs
|
||||||
|
self.duplicated_pkgs = duplicated_pkgs
|
||||||
self.nodetype = nodetype
|
self.nodetype = nodetype
|
||||||
self.stale = False
|
self.stale = False
|
||||||
self.pending_query = False
|
self.pending_query = False
|
||||||
|
@ -186,6 +189,7 @@ class AgentNeighbour(object):
|
||||||
"installed": self.installed,
|
"installed": self.installed,
|
||||||
"to_remove": self.to_remove,
|
"to_remove": self.to_remove,
|
||||||
"missing_pkgs": self.missing_pkgs,
|
"missing_pkgs": self.missing_pkgs,
|
||||||
|
"duplicated_pkgs": self.duplicated_pkgs,
|
||||||
"nodetype": self.nodetype,
|
"nodetype": self.nodetype,
|
||||||
"subfunctions": self.subfunctions,
|
"subfunctions": self.subfunctions,
|
||||||
"sw_version": self.sw_version,
|
"sw_version": self.sw_version,
|
||||||
|
@ -421,6 +425,7 @@ class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
||||||
self.to_install = {}
|
self.to_install = {}
|
||||||
self.to_remove = []
|
self.to_remove = []
|
||||||
self.missing_pkgs = []
|
self.missing_pkgs = []
|
||||||
|
self.duplicated_pkgs = {}
|
||||||
self.subfunctions = []
|
self.subfunctions = []
|
||||||
self.nodetype = "unknown"
|
self.nodetype = "unknown"
|
||||||
self.agent_sw_version = "unknown"
|
self.agent_sw_version = "unknown"
|
||||||
|
@ -434,6 +439,8 @@ class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
||||||
self.to_remove = data['to_remove']
|
self.to_remove = data['to_remove']
|
||||||
if 'missing_pkgs' in data:
|
if 'missing_pkgs' in data:
|
||||||
self.missing_pkgs = data['missing_pkgs']
|
self.missing_pkgs = data['missing_pkgs']
|
||||||
|
if 'duplicated_pkgs' in data:
|
||||||
|
self.duplicated_pkgs = data['duplicated_pkgs']
|
||||||
if 'nodetype' in data:
|
if 'nodetype' in data:
|
||||||
self.nodetype = data['nodetype']
|
self.nodetype = data['nodetype']
|
||||||
if 'sw_version' in data:
|
if 'sw_version' in data:
|
||||||
|
@ -455,6 +462,7 @@ class PatchMessageQueryDetailedResp(messages.PatchMessage):
|
||||||
pc.hosts[ip].handle_query_detailed_resp(self.installed,
|
pc.hosts[ip].handle_query_detailed_resp(self.installed,
|
||||||
self.to_remove,
|
self.to_remove,
|
||||||
self.missing_pkgs,
|
self.missing_pkgs,
|
||||||
|
self.duplicated_pkgs,
|
||||||
self.nodetype,
|
self.nodetype,
|
||||||
self.agent_sw_version,
|
self.agent_sw_version,
|
||||||
self.subfunctions,
|
self.subfunctions,
|
||||||
|
|
Loading…
Reference in New Issue