#!/bin/bash # # Copyright (c) 2020-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # CRASHDUMPMGR_TAG=${CRASHDUMPMGR_TAG:-"crash-dump-manager"} RETVAL=0 # Default values and constants DEFAULT_MAX_SIZE=5368709120 # "5GiB" DEFAULT_MAX_FILES=4 UNLIMITED="unlimited" DEFAULT="default" DEFAULT_MAX_USED="${UNLIMITED}" # Assign UNLIMITED to DEFAULT_MAX_USED DEFAULT_MIN_REMAINDER_PERCENT=10 MIN_REMAINDER_MINIMUM=1073741824 # 1GiB in bytes # number format to/from human readable commands. NUMFMT_TO_HR="/usr/bin/numfmt --to=iec" NUMFMT_FROM_HR="/usr/bin/numfmt --from=auto" ############################################################################# # Log message to syslog ############################################################################# function log() { logger -t "${CRASHDUMPMGR_TAG}" "$@" } ############################################################################# # # Name : manage_crash_dumps # # Purpose: Prevent crash dumps from filling up the root fs # # The kernel directs new crash dump bundles to # /var/crash/. Crash dump # bundles are quite large and, if too many occur, # can fill up its target filesystem. # # This function manages the crash dump bundles, creating tar archives for # storage, handling maximum file count and storage limits, and preserving # summaries of the crash dumps. # # The first bundle is tar'ed as vmcore_first.tar and preserved. # Subsequent crash bundles are nicely tar'ed as vmcore_.tar. # # Save the crash dump vmcore summary for all crash dumps. # # Assumptions: log rotation is used to compress these bundles in the background # # Parameters: # $1 = max_size ; maximum vmcore size to keep (human-readable size or "unlimited") # $2 = max_files ; maximum number of crash dump files to keep # $3 = max_used ; maximum used storage size (human-readable size or "unlimited") # $4 = min_available ; minimum available storage size (human-readable size) # ############################################################################ function manage_crash_dumps() { CRASH_DIR="/var/crash" CRASH_BUNDLE_DIR="/var/log/crash" OTHER_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore" FIRST_BUNDLE="${CRASH_BUNDLE_DIR}/vmcore_first.tar" FIRST_BUNDLE_ROTATED="${CRASH_BUNDLE_DIR}/vmcore_first.tar.1.gz" CRASH_BUNDLE_SUMMARY="vmcore-dmesg.txt" CRASH_BUNDLE_SUMMARY_DEB="dmesg." if [ "${4}" = "" ] ; then # Get the size of the filesystem assigned to /var/log/crash in bytes fs_size=$(df -B1 ${CRASH_BUNDLE_DIR} | awk 'NR==2 {print $2}') # Calculate min_available as the percentage of the filesystem size min_available=$((${fs_size} * ${DEFAULT_MIN_REMAINDER_PERCENT} / 100)) else min_available=${4} fi # Set a minimum value for min_available if [ ${min_available} -lt ${MIN_REMAINDER_MINIMUM} ]; then min_available=${MIN_REMAINDER_MINIMUM} fi max_size_hr="" max_used_hr="" if [ "${max_size}" != "${UNLIMITED}" ]; then max_size_hr="($(${NUMFMT_TO_HR} ${max_size}))" fi if [ "${max_used}" != "${UNLIMITED}" ]; then max_used_hr="($(${NUMFMT_TO_HR} ${max_used}))" fi min_available_hr=$(${NUMFMT_TO_HR} ${min_available}) log "max crash dump files set to ${max_files}" log "max crash dump vmcore size is ${max_size} ${max_size_hr}" log "max used storage size is ${max_used} ${max_used_hr}" log "minimum available storage size is ${min_available} (${min_available_hr})" # tar command and nice levels TAR_CMD="tar -cf" NICE_CMD="/usr/bin/nice -n19" IONICE_CMD="/usr/bin/ionice -c2 -n7" log "managing ${CRASH_DIR}" # create dir if it does not exist if [ ! -d ${CRASH_BUNDLE_DIR} ] ; then mkdir ${CRASH_BUNDLE_DIR} fi for entry in $(ls -rt ${CRASH_DIR}/); do entry=${CRASH_DIR}/${entry} remove_entry=false max_files_saved=false if [ -d "${entry}" ] ; then time=${entry##*/} if [ -e "${entry}/${CRASH_BUNDLE_SUMMARY_DEB}${time}" ] ; then log "saving summary: ${CRASH_DIR}/$(basename ${time})_${CRASH_BUNDLE_SUMMARY_DEB}${time}" # save the crash dump dmesg. for debian for all crash dumps cp -a ${entry}/${CRASH_BUNDLE_SUMMARY_DEB}${time} ${CRASH_DIR}/$(basename ${time})_${CRASH_BUNDLE_SUMMARY_DEB}${time} fi if [ -e "${entry}/${CRASH_BUNDLE_SUMMARY}" ] ; then log "saving summary: ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY}" # save the crash dump vmcore summary for all crash dumps cp -a ${entry}/${CRASH_BUNDLE_SUMMARY} ${CRASH_DIR}/$(basename ${entry})_${CRASH_BUNDLE_SUMMARY} fi if [ -e "${entry}/dump.${time}" ] || [ -e "${entry}/vmcore" ] ; then # get the size of this vmcore file ; raw and human readable if [ -e "${entry}/dump.${time}" ] ; then vmcore_size=$(stat --format='%s' ${entry}/dump.${time}) else vmcore_size=$(stat --format='%s' ${entry}/vmcore) fi vmcore_size_hr=$(${NUMFMT_TO_HR} ${vmcore_size}) # Manage max number of files files_in_crash_bundle_dir=$(ls -A ${CRASH_BUNDLE_DIR} | wc -l) num_files_to_remove=$((files_in_crash_bundle_dir-max_files+1)) if [ "${num_files_to_remove}" -ge 1 ]; then files_to_remove=$(ls -t ${CRASH_BUNDLE_DIR} | tail -$((num_files_to_remove+1)) | head -$((num_files_to_remove))) files_to_remove_size=0 for file in ${files_to_remove}; do files_to_remove_size+=$(stat --format='%s' ${file}) done max_files_saved=true fi # get available ${CRASH_BUNDLE_DIR} fs space in bytes available=$(df -B1 ${CRASH_BUNDLE_DIR} | grep -v Available | awk '{ print $4 }') # get the current used space in the ${CRASH_BUNDLE_DIR} fs used_space=$(du -sb ${CRASH_BUNDLE_DIR} | awk '{print $1}') # if the ${CRASH_BUNDLE_DIR} contains the maximum number of files, the available and used_space # need to be updated to the value after deleting the oldest crash dump file. if [ "${max_files_saved}" = true ] ; then available=$((available+files_to_remove_size)) used_space=$((used_space-files_to_remove_size)) fi available_hr=$(${NUMFMT_TO_HR} ${available}) log "new vmcore detected (size:${vmcore_size}:${vmcore_size_hr}) ;" \ "${CRASH_BUNDLE_DIR} avail:${available}:${available_hr}" # Don't save this crash dump if it would leave the # ${CRASH_BUNDLE_DIR} filesystem with less than 1GiB. if [ ${available} -gt ${vmcore_size} ]; then remaining=$((available-vmcore_size)) else remaining=0 fi # Check if adding the file would exceed the maximum used space limit total_used_space=$((used_space + file_size)) if [ "${max_used}" != "$UNLIMITED" ] && [ ${total_used_space} -gt ${max_used} ]; then log "The last crash dump is not saved because it would exceed the maximum" \ "used space limit specified in the max_used parameter (${max_used} bytes)." remove_entry=true # check for min required 'remaining' ${CRASH_BUNDLE_DIR} filesystem space elif [ ${remaining} -lt ${min_available} ] ; then log "insufficient space in ${CRASH_BUNDLE_DIR} for ${vmcore_size_hr} ${entry};" \ "would leave only ${remaining} bytes" remove_entry=true # create a new crash bundle if the vmcore file isn't oversized elif [ ${vmcore_size} -lt ${max_size} ] || [ "${max_size}" = "${UNLIMITED}" ] ; then if [ ! -e "${FIRST_BUNDLE}" ] && [ ! -e "${FIRST_BUNDLE_ROTATED}" ]; then log "creating first bundle from ${entry}" ${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${FIRST_BUNDLE} -C ${CRASH_DIR} $(basename ${entry}) else if [ "${max_files_saved}" = true ] ; then for file in ${files_to_remove}; do # delete old vmcore file log "removing old vmcore file: ${file}" rm -rf "${CRASH_BUNDLE_DIR}/${file}" done fi log "creating bundle from ${entry}" ${IONICE_CMD} ${NICE_CMD} ${TAR_CMD} ${OTHER_BUNDLE}_${time}.tar -C ${CRASH_DIR} $(basename ${entry}) fi remove_entry=true else log "deleting oversize (${vmcore_size_hr}) vmcore file $(basename ${entry})" remove_entry=true fi elif [[ "$entry" == *"_dmesg."* ]] || [[ "$entry" == *"_vmcore-dmesg.txt"* ]] ; then log "saved old $entry summary" elif [[ "$entry" != "$CRASH_DIR/*" ]] ; then # removes vmcore files not named properly # i.e vmcore.incomplete remove_entry=true fi elif [[ "$entry" != *"_dmesg."* ]] && [[ "$entry" != *"_vmcore-dmesg.txt"* ]] ; then # removes files in /var/crash that are not crash dumps related remove_entry=true fi if [ "${remove_entry}" = true ] ; then log "removing ${entry}" rm -rf "${entry}" fi done } function print_help() { echo "Usage: $(basename "$0") [OPTIONS]" echo "Options:" echo " --max-size Set maximum vmcore size (human-readable size or \"$UNLIMITED\")" echo " --max-files Set maximum number of crash dump files" echo " --max-used Set maximum used storage size (human-readable size or \"$UNLIMITED\")" echo " --min-available Set minimum available storage size (human-readable size)" } normalize_size_format() { echo "$1" | tr ',' '.' } # Initialize default values max_size="${DEFAULT_MAX_SIZE}" max_files="${DEFAULT_MAX_FILES}" max_used="${DEFAULT_MAX_USED}" min_available="" # Initialize a flag to indicate if help has been shown help_shown=false # Parse the command line while [[ $# -gt 0 ]]; do case "${1}" in -h|--help) print_help help_shown=true shift ;; --max-size) shift if [ "${1}" = "${UNLIMITED}" ]; then max_size=${UNLIMITED} else max_size=$(${NUMFMT_FROM_HR} "$(normalize_size_format "${1}")") fi shift ;; --max-files) shift max_files="${1}" shift ;; --max-used) shift if [ "${1}" = "${UNLIMITED}" ]; then max_used=${UNLIMITED} else max_used=$(${NUMFMT_FROM_HR} "$(normalize_size_format "${1}")") fi shift ;; --min-available) shift if [ "${1}" = "${DEFAULT}" ]; then min_available="" else min_available=$(${NUMFMT_FROM_HR} "$(normalize_size_format "${1}")") fi shift ;; *) if [ "${help_shown}" = false ]; then print_help help_shown=true else # Handle unknown options or arguments shift fi ;; esac done manage_crash_dumps ${max_size} ${max_files} ${max_used} ${min_available} exit $RETVAL