Merge "Enhance crashDumpMgr with oversized crash dump protection"

This commit is contained in:
Zuul 2020-10-28 18:05:28 +00:00 committed by Gerrit Code Review
commit fdf6ff8650
2 changed files with 95 additions and 40 deletions

View File

@ -5,28 +5,24 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
# chkconfig: 2345 98 2
#
### BEGIN INIT INFO
# Provides: crashDumpMgr
# Required-Start: $null
# Required-Stop: $null
# Default-Start: 3 5
# Default-Stop: 0 1 2 6
# Short-Description: Maintenance 'Crash Dump' Manager script
### END INIT INFO
CRASHDUMPMGR_TAG=${CRASHDUMPMGR_TAG:-"crashDumpMgr"} CRASHDUMPMGR_TAG=${CRASHDUMPMGR_TAG:-"crashDumpMgr"}
RETVAL=0 RETVAL=0
max_size=3221225472 # "3GiB" in human readable
min_remainder=1073741824 # "1GiB" in human readable
# number format to/from human readable commands.
NUMFMT_TO_HR="/usr/bin/numfmt --to=iec"
NUMFMT_FROM_HR="/usr/bin/numfmt --from=auto"
############################################################################# #############################################################################
# Log message to syslog # Log message to syslog
############################################################################# #############################################################################
function log() function log()
{ {
logger -t ${CRASHDUMPMGR_TAG} $@ logger -t "${CRASHDUMPMGR_TAG}" "$@"
} }
############################################################################# #############################################################################
@ -50,10 +46,17 @@ function log()
# #
# Assumptions: logration is used to compress these bundles in the background # Assumptions: logration is used to compress these bundles in the background
# #
# Parameters : $1 = max_size ; maximum vmcore size to keep
#
############################################################################ ############################################################################
function manage_crash_dumps() function manage_crash_dumps()
{ {
if [ "${1}" != "" ] ; then
max_size=${1}
log "max_size=$max_size"
fi
CRASH_DIR="/var/crash" CRASH_DIR="/var/crash"
CRASH_BUNDLE_DIR="/var/log/crash" CRASH_BUNDLE_DIR="/var/log/crash"
OTHER_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore.tar" OTHER_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore.tar"
@ -76,13 +79,44 @@ function manage_crash_dumps()
for entry in ${CRASH_DIR}/* for entry in ${CRASH_DIR}/*
do do
if [ -d ${entry} ] ; then remove_entry=false
if [ -e ${entry}/vmcore ] ; then if [ -d "${entry}" ] ; then
if [ -e "${entry}/${CRASH_BUNDLE_SUMMARY}" ] ; then
log "saving summary: ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY}"
# save the crash dump vmcore summary for all crash dumps # save the crash dump vmcore summary for all crash dumps
cp -a ${entry}/${CRASH_BUNDLE_SUMMARY} ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY} cp -a ${entry}/${CRASH_BUNDLE_SUMMARY} ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY}
fi
if [ -e "${entry}/vmcore" ] ; then
if [ "${cleanup}" != true ] ; then # get the size of this vmcore file ; raw and human readable
vmcore_size=$(stat --format='%s' ${entry}/vmcore)
vmcore_size_hr=$(${NUMFMT_TO_HR} ${vmcore_size})
# get available ${CRASH_BUNDLE_DIR} fs space in 1k blocks and convert that to bytes
available=$(($(df -k ${CRASH_BUNDLE_DIR} | grep -v Available | awk '{ print $4 }')*1000))
available_hr=$(${NUMFMT_TO_HR} ${available})
log "new vmcore detected (size:${vmcore_size}:${vmcore_size_hr}) ; ${CRASH_BUNDLE_DIR} avail:${available}:${available_hr}"
# Don't save this crash dump if it would leave the
# ${CRASH_BUNDLE_DIR} filesystem with less than 1GiB.
if [ ${available} -gt ${vmcore_size} ]; then
remaining=$((available-vmcore_size))
else
remaining=0
fi
if [ "${cleanup}" = true ] ; then
log "... remove ${entry} ; cleanup"
remove_entry=true
# check for min required 'remaining' ${CRASH_BUNDLE_DIR} filesystem space
elif [ ${remaining} -lt ${min_remainder} ] ; then
log "insufficient space in ${CRASH_BUNDLE_DIR} for ${vmcore_size_hr} ${entry}; would leave only ${remaining} bytes"
remove_entry=true
# create a new crash bundle if the vmcore file isn't oversized
elif [ ${vmcore_size} -lt ${max_size} ] ; then
if [ -e ${FIRST_BUNDLE} -o -e ${FIRST_BUNDLE_ROTATED} ] ; then if [ -e ${FIRST_BUNDLE} -o -e ${FIRST_BUNDLE_ROTATED} ] ; then
if [ ! -e ${OTHER_BUNDLE} ] ; then if [ ! -e ${OTHER_BUNDLE} ] ; then
log "creating bundle from ${entry}" log "creating bundle from ${entry}"
@ -94,38 +128,60 @@ function manage_crash_dumps()
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${FIRST_BUNDLE} -C ${CRASH_DIR} $(basename ${entry}) ${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${FIRST_BUNDLE} -C ${CRASH_DIR} $(basename ${entry})
cleanup=true cleanup=true
fi fi
remove_entry=true
else
log "deleting oversize (${vmcore_size_hr}) vmcore file $(basename ${entry})"
remove_entry=true
fi fi
log "removing ${entry}" elif [[ "$entry" == *"_vmcore-dmesg.txt"* ]] ; then
rm -rf "${entry}" log "saved old $entry summary"
elif [[ "$entry" != "$CRASH_DIR/*" ]] ; then
# removes vmcore files not named properly
# i.e vmcore.incomplete
remove_entry=true
fi fi
elif [[ "$entry" != *"_vmcore-dmesg.txt"* ]] ; then
# removes files in /var/crash that are not crash dumps related
remove_entry=true
fi
if [ "${remove_entry}" = true ] ; then
log "removing ${entry}"
rm -rf "${entry}"
fi fi
done done
} }
# service case
case "$1" in
start)
manage_crash_dumps
;;
stop) function print_help()
log "stop" {
;; echo "$(basename $0) { --max-size <human-readable-size> }"
}
restart) # Parse the command line
log "restart" while [[ ${#} -gt 0 ]] ; do
stop
start
;;
status) key="${1}"
log "status"
;;
*) case $key in
log "usage: $0 { start | stop | status | restart }"
RETVAL=1 -h|--help)
;; print_help
esac ;;
--max-size)
max_size=$(echo "$2" | ${NUMFMT_FROM_HR})
log "max crash dump vmcore size is ${2} (${max_size})"
shift
;;
*)
print_help
;;
esac
shift
done
manage_crash_dumps $max_size
exit $RETVAL exit $RETVAL

View File

@ -6,8 +6,7 @@ Before=sshd.service
[Service] [Service]
Type=oneshot Type=oneshot
RemainAfterExit=no RemainAfterExit=no
ExecStart=/etc/init.d/crashDumpMgr start ExecStart=/etc/init.d/crashDumpMgr --max-size 3Gi
ExecStop=/etc/init.d/crashDumpMgr stop
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target