Add new configuration parameters to crashDumpMgr

This commmit updates crashDumpMgr in order to add three new parameters
and enhance the existing one.

1. Maximum Files: Added 'max-files' parameter to specify the maximum
   number of saved crash dump files. The default value is 4.
2. Maximum Size: Updated the 'max-size' parameter to support
   the 'unlimited' value. The default value is 5GiB.
3. Maximum Used: Included 'max-used' parameter to limit the maximum
   storage used by saved crash dump files. It supports 'unlimited'
   and has a default value of unlimited.
4. Minimum Available: Implemented 'min-available' parameter, enabling
   the definition of a minimum available storage threshold on the
   crash dump file system. The value is restricted to a minimum of
   1GB and defaults to 10%.

These enhancements refine the crash dump management process and
offer more control over storage usage and crash dump file retention.

Story: 2010893
Task: 48676

Test Plan:
1) max-files parameter:
  PASS: don't set max-files param. Ensure the default value is used.
  Create 5 directories inside /var/crash. Each of them contains
  dmesg.<date> and dump.<date>. run the crashDumpMgr script.
  Verify:
    PASS: the vmcore_first.tar.1.gz is created when the first
          directory is read.
    PASS: 4 more vmcore_<date>.tar files are created.
    PASS: There will be 1 vmcore_first.tar.1.gz and 4
          vmcore_<date>.tar inside /var/log/crash.
    PASS: There will be one summary file for each direcory:
          <date>_dmesg.<date> inside /var/crash
2) max-size parameter
  PASS: don't set max-size param. Ensure the default value is used
        (5GiB).
  PASS: Set a fixed max-size param. Create a dump.<date> file greater
        that the max-size param. Run the crashDumpMgr script. Verify
        that the crash dump file is not generated and a log
        message is displayed.
3) max-used parameter:
  PASS: don't set max-used param. Ensure the default value is used
        (unlimited).
  PASS: Set a fixed max-used param. Create a dump.<date> file that
        will generate that the used space is greater that the
        max-used param. Run the crashDumpMgr script. Verify that
        the crash dump file is not generated, a log message is
        displayed and the directory is deleted.
4) min-available parameter:
  PASS: don't set min-available param. Ensure the default value is
        used (10% of /var/log/crash).
  PASS: Set a fixed 'min-available' param. Generate a 'dump.<date>'
        file to simulate a situation where the remaining space is
        less than the 'min-available' parameter. Run the crashDumpMgr
        script and ensure that it does not create the crashdump file,
        displays a log message, and deletes the entry.
5) PASS: Since the crashDumpMgr.service file is not being modified,
         verify that the script takes the default values.

Note: All tests have also been conducted by generating a kernel panic
and ensuring the crashDumpMgr script follows the correct workflow.

Change-Id: I8948593469dae01f190fd1ea21da3d0852bd7814
Signed-off-by: Enzo Candotti <enzo.candotti@windriver.com>
This commit is contained in:
Enzo Candotti 2023-08-25 17:52:55 -03:00
parent 005544b651
commit a120cc5fea
1 changed files with 154 additions and 50 deletions

View File

@ -1,18 +1,21 @@
#!/bin/bash
#
# Copyright (c) 2022 Wind River Systems, Inc.
# Copyright (c) 2020-2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# Modify it that is to support the debian coredump file.
# coredump files are dmesg.202206101633 and dump.202206101633 in Debian.
CRASHDUMPMGR_TAG=${CRASHDUMPMGR_TAG:-"crashDumpMgr"}
RETVAL=0
max_size=3221225472 # "3GiB"
min_remainder=1073741824 # "1GiB"
# Default values and constants
DEFAULT_MAX_SIZE=5368709120 # "5GiB"
DEFAULT_MAX_FILES=4
UNLIMITED="unlimited"
DEFAULT_MAX_USED="${UNLIMITED}" # Assign UNLIMITED to DEFAULT_MAX_USED
DEFAULT_MIN_REMAINDER_PERCENT=10
MIN_REMAINDER_MINIMUM=1073741824 # 1GiB in bytes
# number format to/from human readable commands.
NUMFMT_TO_HR="/usr/bin/numfmt --to=iec"
@ -38,51 +41,81 @@ function log()
# bundles are quite large and, if too many occur,
# can fill up its target filesystem.
#
# This function nicely tars a crash bundle found in /var/crash
# to /var/log/crash.
# This function manages the crash dump bundles, creating tar archives for
# storage, handling maximum file count and storage limits, and preserving
# summaries of the crash dumps.
#
# The first bundle is tar'ed as vmcore_first.tar and preserved.
# Subsequent crash bundles are nicely tar'ed as vmcore.tar
# Subsequent crash bundles are nicely tar'ed as vmcore_<date>.tar.
#
# Save the crash dump vmcore summary for all crash dumps.
#
# Assumptions: logration is used to compress these bundles in the background
# Assumptions: log rotation is used to compress these bundles in the background
#
# Parameters : $1 = max_size ; maximum vmcore size to keep
# Parameters:
# $1 = max_size ; maximum vmcore size to keep (human-readable size or "unlimited")
# $2 = max_files ; maximum number of crash dump files to keep
# $3 = max_used ; maximum used storage size (human-readable size or "unlimited")
# $4 = min_available ; minimum available storage size (human-readable size)
#
############################################################################
function manage_crash_dumps()
{
if [ "${1}" != "" ] ; then
max_size=${1}
log "max_size=$max_size"
fi
CRASH_DIR="/var/crash"
CRASH_BUNDLE_DIR="/var/log/crash"
OTHER_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore.tar"
OTHER_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore"
FIRST_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore_first.tar"
FIRST_BUNDLE_ROTATED="${CRASH_BUNDLE_DIR}/vmcore_first.tar.1.gz"
CRASH_BUNDLE_SUMMARY="vmcore-dmesg.txt"
CRASH_BUNDLE_SUMMARY_DEB="dmesg."
if [ "${4}" = "" ] ; then
# Get the size of the filesystem assigned to /var/log/crash in bytes
fs_size=$(df -B1 ${CRASH_BUNDLE_DIR} | awk 'NR==2 {print $2}')
# Calculate min_available as the percentage of the filesystem size
min_available=$((${fs_size} * ${DEFAULT_MIN_REMAINDER_PERCENT} / 100))
else
min_available=${4}
fi
# Set a minimum value for min_available
if [ ${min_available} -lt ${MIN_REMAINDER_MINIMUM} ]; then
min_available=${MIN_REMAINDER_MINIMUM}
fi
max_size_hr=""
max_used_hr=""
if [ "${max_size}" != "${UNLIMITED}" ]; then
max_size_hr="($(${NUMFMT_TO_HR} ${max_size}))"
fi
if [ "${max_used}" != "${UNLIMITED}" ]; then
max_used_hr="($(${NUMFMT_TO_HR} ${max_used}))"
fi
min_available_hr=$(${NUMFMT_TO_HR} ${min_available})
log "max crash dump files set to ${max_files}"
log "max crash dump vmcore size is ${max_size} ${max_size_hr}"
log "max used storage size is ${max_used} ${max_used_hr}"
log "minimum available storage size is ${min_available} (${min_available_hr})"
# tar command and nice levels
TAR_CMD="tar -cf"
NICE_CMD="/usr/bin/nice -n19"
IONICE_CMD="/usr/bin/ionice -c2 -n7"
log "managing ${CRASH_DIR}"
cleanup=false
# create dir if it does not exist
if [ ! -d ${CRASH_BUNDLE_DIR} ] ; then
mkdir ${CRASH_BUNDLE_DIR}
fi
for entry in ${CRASH_DIR}/*
for entry in $(ls -rt ${CRASH_DIR}/);
do
entry=${CRASH_DIR}/${entry}
remove_entry=false
max_files_saved=false
if [ -d "${entry}" ] ; then
time=${entry##*/}
if [ -e "${entry}/${CRASH_BUNDLE_SUMMARY_DEB}${time}" ] ; then
@ -107,10 +140,28 @@ function manage_crash_dumps()
fi
vmcore_size_hr=$(${NUMFMT_TO_HR} ${vmcore_size})
# get available ${CRASH_BUNDLE_DIR} fs space in 1k blocks and convert that to bytes
available=$(($(df -k ${CRASH_BUNDLE_DIR} | grep -v Available | awk '{ print $4 }')*1000))
# Manage max number of files
if [ "$(ls -A ${CRASH_BUNDLE_DIR} | wc -l)" -ge "${max_files}" ]; then
oldest_vmcore=$(ls -t ${CRASH_BUNDLE_DIR} | tail -2 | head -1)
oldest_vmcore_size=$(stat --format='%s' ${oldest_vmcore})
max_files_saved=true
fi
# get available ${CRASH_BUNDLE_DIR} fs space in bytes
available=$(df -B1 ${CRASH_BUNDLE_DIR} | grep -v Available | awk '{ print $4 }')
# get the current used space in the ${CRASH_BUNDLE_DIR} fs
used_space=$(du -sb ${CRASH_BUNDLE_DIR} | awk '{print $1}')
# if the ${CRASH_BUNDLE_DIR} contains the maximum number of files, the available and used_space
# need to be updated to the value after deleting the oldest crash dump file.
if [ "${max_files_saved}" = true ] ; then
available=$((available+oldest_vmcore_size))
used_space=$((used_space-oldest_vmcore_size))
fi
available_hr=$(${NUMFMT_TO_HR} ${available})
log "new vmcore detected (size:${vmcore_size}:${vmcore_size_hr}) ; ${CRASH_BUNDLE_DIR} avail:${available}:${available_hr}"
log "new vmcore detected (size:${vmcore_size}:${vmcore_size_hr}) ;" \
"${CRASH_BUNDLE_DIR} avail:${available}:${available_hr}"
# Don't save this crash dump if it would leave the
# ${CRASH_BUNDLE_DIR} filesystem with less than 1GiB.
@ -120,27 +171,35 @@ function manage_crash_dumps()
remaining=0
fi
if [ "${cleanup}" = true ] ; then
log "... remove ${entry} ; cleanup"
# Check if adding the file would exceed the maximum used space limit
total_used_space=$((used_space + file_size))
if [ "${max_used}" != "$UNLIMITED" ] && [ ${total_used_space} -gt ${max_used} ]; then
log "The last crash dump is not saved because it would exceed the maximum" \
"used space limit specified in the max_used parameter (${max_used} bytes)."
remove_entry=true
# check for min required 'remaining' ${CRASH_BUNDLE_DIR} filesystem space
elif [ ${remaining} -lt ${min_remainder} ] ; then
log "insufficient space in ${CRASH_BUNDLE_DIR} for ${vmcore_size_hr} ${entry}; would leave only ${remaining} bytes"
elif [ ${remaining} -lt ${min_available} ] ; then
log "insufficient space in ${CRASH_BUNDLE_DIR} for ${vmcore_size_hr} ${entry};" \
"would leave only ${remaining} bytes"
remove_entry=true
# create a new crash bundle if the vmcore file isn't oversized
elif [ ${vmcore_size} -lt ${max_size} ] ; then
if [ -e ${FIRST_BUNDLE} -o -e ${FIRST_BUNDLE_ROTATED} ] ; then
if [ ! -e ${OTHER_BUNDLE} ] ; then
log "creating bundle from ${entry}"
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${OTHER_BUNDLE} -C ${CRASH_DIR} $(basename ${entry})
cleanup=true
fi
else
elif [ ${vmcore_size} -lt ${max_size} ] || [ "${max_size}" = "${UNLIMITED}" ] ; then
if [ ! -e "${FIRST_BUNDLE}" ] && [ ! -e "${FIRST_BUNDLE_ROTATED}" ]; then
log "creating first bundle from ${entry}"
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${FIRST_BUNDLE} -C ${CRASH_DIR} $(basename ${entry})
cleanup=true
else
if [ "${max_files_saved}" = true ] ; then
# delete oldest vmcore file
log "removing oldest vmcore file: ${oldest_vmcore}"
rm -rf "${CRASH_BUNDLE_DIR}/${oldest_vmcore}"
fi
log "creating bundle from ${entry}"
${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${OTHER_BUNDLE}_${time}.tar -C ${CRASH_DIR} $(basename ${entry})
fi
remove_entry=true
else
@ -166,36 +225,81 @@ function manage_crash_dumps()
done
}
function print_help()
{
echo "$(basename $0) { --max-size <human-readable-size> }"
function print_help() {
echo "Usage: $(basename "$0") [OPTIONS]"
echo "Options:"
echo " --max-size <size> Set maximum vmcore size (human-readable size or \"$UNLIMITED\")"
echo " --max-files <number> Set maximum number of crash dump files"
echo " --max-used <size> Set maximum used storage size (human-readable size or \"$UNLIMITED\")"
echo " --min-available <size> Set minimum available storage size (human-readable size)"
}
normalize_size_format() {
echo "$1" | tr ',' '.'
}
# Initialize default values
max_size="${DEFAULT_MAX_SIZE}"
max_files="${DEFAULT_MAX_FILES}"
max_used="${DEFAULT_MAX_USED}"
min_available=""
# Initialize a flag to indicate if help has been shown
help_shown=false
# Parse the command line
while [[ ${#} -gt 0 ]] ; do
key="${1}"
case $key in
while [[ $# -gt 0 ]]; do
case "${1}" in
-h|--help)
print_help
help_shown=true
shift
;;
--max-size)
max_size=$(echo "$2" | ${NUMFMT_FROM_HR})
log "max crash dump vmcore size is ${2} (${max_size})"
shift
if [ "${1}" = "${UNLIMITED}" ]; then
max_size=${UNLIMITED}
else
max_size=$(${NUMFMT_FROM_HR} "$(normalize_size_format "${1}")")
fi
shift
;;
--max-files)
shift
max_files="${1}"
shift
;;
--max-used)
shift
if [ "${1}" = "${UNLIMITED}" ]; then
max_used=${UNLIMITED}
else
max_used=$(${NUMFMT_FROM_HR} "$(normalize_size_format "${1}")")
fi
shift
;;
--min-available)
shift
min_available=$(${NUMFMT_FROM_HR} "$(normalize_size_format "${1}")")
shift
;;
*)
print_help
if [ "${help_shown}" = false ]; then
print_help
help_shown=true
else
# Handle unknown options or arguments
shift
fi
;;
esac
shift
done
manage_crash_dumps $max_size
manage_crash_dumps ${max_size} ${max_files} ${max_used} ${min_available}
exit $RETVAL