Merge "Enhance crashDumpMgr with oversized crash dump protection"
This commit is contained in:
commit
fdf6ff8650
|
@ -5,28 +5,24 @@
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
|
|
||||||
# chkconfig: 2345 98 2
|
|
||||||
#
|
|
||||||
### BEGIN INIT INFO
|
|
||||||
# Provides: crashDumpMgr
|
|
||||||
# Required-Start: $null
|
|
||||||
# Required-Stop: $null
|
|
||||||
# Default-Start: 3 5
|
|
||||||
# Default-Stop: 0 1 2 6
|
|
||||||
# Short-Description: Maintenance 'Crash Dump' Manager script
|
|
||||||
### END INIT INFO
|
|
||||||
|
|
||||||
CRASHDUMPMGR_TAG=${CRASHDUMPMGR_TAG:-"crashDumpMgr"}
|
CRASHDUMPMGR_TAG=${CRASHDUMPMGR_TAG:-"crashDumpMgr"}
|
||||||
|
|
||||||
RETVAL=0
|
RETVAL=0
|
||||||
|
|
||||||
|
max_size=3221225472 # "3GiB" in human readable
|
||||||
|
min_remainder=1073741824 # "1GiB" in human readable
|
||||||
|
|
||||||
|
# number format to/from human readable commands.
|
||||||
|
NUMFMT_TO_HR="/usr/bin/numfmt --to=iec"
|
||||||
|
NUMFMT_FROM_HR="/usr/bin/numfmt --from=auto"
|
||||||
|
|
||||||
#############################################################################
|
#############################################################################
|
||||||
# Log message to syslog
|
# Log message to syslog
|
||||||
#############################################################################
|
#############################################################################
|
||||||
|
|
||||||
function log()
|
function log()
|
||||||
{
|
{
|
||||||
logger -t ${CRASHDUMPMGR_TAG} $@
|
logger -t "${CRASHDUMPMGR_TAG}" "$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
#############################################################################
|
#############################################################################
|
||||||
|
@ -50,10 +46,17 @@ function log()
|
||||||
#
|
#
|
||||||
# Assumptions: logration is used to compress these bundles in the background
|
# Assumptions: logration is used to compress these bundles in the background
|
||||||
#
|
#
|
||||||
|
# Parameters : $1 = max_size ; maximum vmcore size to keep
|
||||||
|
#
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
||||||
function manage_crash_dumps()
|
function manage_crash_dumps()
|
||||||
{
|
{
|
||||||
|
if [ "${1}" != "" ] ; then
|
||||||
|
max_size=${1}
|
||||||
|
log "max_size=$max_size"
|
||||||
|
fi
|
||||||
|
|
||||||
CRASH_DIR="/var/crash"
|
CRASH_DIR="/var/crash"
|
||||||
CRASH_BUNDLE_DIR="/var/log/crash"
|
CRASH_BUNDLE_DIR="/var/log/crash"
|
||||||
OTHER_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore.tar"
|
OTHER_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore.tar"
|
||||||
|
@ -76,13 +79,44 @@ function manage_crash_dumps()
|
||||||
|
|
||||||
for entry in ${CRASH_DIR}/*
|
for entry in ${CRASH_DIR}/*
|
||||||
do
|
do
|
||||||
if [ -d ${entry} ] ; then
|
remove_entry=false
|
||||||
if [ -e ${entry}/vmcore ] ; then
|
if [ -d "${entry}" ] ; then
|
||||||
|
if [ -e "${entry}/${CRASH_BUNDLE_SUMMARY}" ] ; then
|
||||||
|
log "saving summary: ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY}"
|
||||||
|
|
||||||
# save the crash dump vmcore summary for all crash dumps
|
# save the crash dump vmcore summary for all crash dumps
|
||||||
cp -a ${entry}/${CRASH_BUNDLE_SUMMARY} ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY}
|
cp -a ${entry}/${CRASH_BUNDLE_SUMMARY} ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY}
|
||||||
|
fi
|
||||||
|
if [ -e "${entry}/vmcore" ] ; then
|
||||||
|
|
||||||
if [ "${cleanup}" != true ] ; then
|
# get the size of this vmcore file ; raw and human readable
|
||||||
|
vmcore_size=$(stat --format='%s' ${entry}/vmcore)
|
||||||
|
vmcore_size_hr=$(${NUMFMT_TO_HR} ${vmcore_size})
|
||||||
|
|
||||||
|
# get available ${CRASH_BUNDLE_DIR} fs space in 1k blocks and convert that to bytes
|
||||||
|
available=$(($(df -k ${CRASH_BUNDLE_DIR} | grep -v Available | awk '{ print $4 }')*1000))
|
||||||
|
available_hr=$(${NUMFMT_TO_HR} ${available})
|
||||||
|
log "new vmcore detected (size:${vmcore_size}:${vmcore_size_hr}) ; ${CRASH_BUNDLE_DIR} avail:${available}:${available_hr}"
|
||||||
|
|
||||||
|
# Don't save this crash dump if it would leave the
|
||||||
|
# ${CRASH_BUNDLE_DIR} filesystem with less than 1GiB.
|
||||||
|
if [ ${available} -gt ${vmcore_size} ]; then
|
||||||
|
remaining=$((available-vmcore_size))
|
||||||
|
else
|
||||||
|
remaining=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${cleanup}" = true ] ; then
|
||||||
|
log "... remove ${entry} ; cleanup"
|
||||||
|
remove_entry=true
|
||||||
|
|
||||||
|
# check for min required 'remaining' ${CRASH_BUNDLE_DIR} filesystem space
|
||||||
|
elif [ ${remaining} -lt ${min_remainder} ] ; then
|
||||||
|
log "insufficient space in ${CRASH_BUNDLE_DIR} for ${vmcore_size_hr} ${entry}; would leave only ${remaining} bytes"
|
||||||
|
remove_entry=true
|
||||||
|
|
||||||
|
# create a new crash bundle if the vmcore file isn't oversized
|
||||||
|
elif [ ${vmcore_size} -lt ${max_size} ] ; then
|
||||||
if [ -e ${FIRST_BUNDLE} -o -e ${FIRST_BUNDLE_ROTATED} ] ; then
|
if [ -e ${FIRST_BUNDLE} -o -e ${FIRST_BUNDLE_ROTATED} ] ; then
|
||||||
if [ ! -e ${OTHER_BUNDLE} ] ; then
|
if [ ! -e ${OTHER_BUNDLE} ] ; then
|
||||||
log "creating bundle from ${entry}"
|
log "creating bundle from ${entry}"
|
||||||
|
@ -94,38 +128,60 @@ function manage_crash_dumps()
|
||||||
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${FIRST_BUNDLE} -C ${CRASH_DIR} $(basename ${entry})
|
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${FIRST_BUNDLE} -C ${CRASH_DIR} $(basename ${entry})
|
||||||
cleanup=true
|
cleanup=true
|
||||||
fi
|
fi
|
||||||
|
remove_entry=true
|
||||||
|
else
|
||||||
|
log "deleting oversize (${vmcore_size_hr}) vmcore file $(basename ${entry})"
|
||||||
|
remove_entry=true
|
||||||
fi
|
fi
|
||||||
log "removing ${entry}"
|
elif [[ "$entry" == *"_vmcore-dmesg.txt"* ]] ; then
|
||||||
rm -rf "${entry}"
|
log "saved old $entry summary"
|
||||||
|
elif [[ "$entry" != "$CRASH_DIR/*" ]] ; then
|
||||||
|
# removes vmcore files not named properly
|
||||||
|
# i.e vmcore.incomplete
|
||||||
|
remove_entry=true
|
||||||
fi
|
fi
|
||||||
|
elif [[ "$entry" != *"_vmcore-dmesg.txt"* ]] ; then
|
||||||
|
# removes files in /var/crash that are not crash dumps related
|
||||||
|
remove_entry=true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "${remove_entry}" = true ] ; then
|
||||||
|
log "removing ${entry}"
|
||||||
|
rm -rf "${entry}"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
# service case
|
|
||||||
case "$1" in
|
|
||||||
start)
|
|
||||||
manage_crash_dumps
|
|
||||||
;;
|
|
||||||
|
|
||||||
stop)
|
function print_help()
|
||||||
log "stop"
|
{
|
||||||
;;
|
echo "$(basename $0) { --max-size <human-readable-size> }"
|
||||||
|
}
|
||||||
|
|
||||||
restart)
|
# Parse the command line
|
||||||
log "restart"
|
while [[ ${#} -gt 0 ]] ; do
|
||||||
stop
|
|
||||||
start
|
|
||||||
;;
|
|
||||||
|
|
||||||
status)
|
key="${1}"
|
||||||
log "status"
|
|
||||||
;;
|
|
||||||
|
|
||||||
*)
|
case $key in
|
||||||
log "usage: $0 { start | stop | status | restart }"
|
|
||||||
RETVAL=1
|
-h|--help)
|
||||||
;;
|
print_help
|
||||||
esac
|
;;
|
||||||
|
|
||||||
|
--max-size)
|
||||||
|
max_size=$(echo "$2" | ${NUMFMT_FROM_HR})
|
||||||
|
log "max crash dump vmcore size is ${2} (${max_size})"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
print_help
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
manage_crash_dumps $max_size
|
||||||
|
|
||||||
exit $RETVAL
|
exit $RETVAL
|
||||||
|
|
|
@ -6,8 +6,7 @@ Before=sshd.service
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
RemainAfterExit=no
|
RemainAfterExit=no
|
||||||
ExecStart=/etc/init.d/crashDumpMgr start
|
ExecStart=/etc/init.d/crashDumpMgr --max-size 3Gi
|
||||||
ExecStop=/etc/init.d/crashDumpMgr stop
|
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|
Loading…
Reference in New Issue