273 lines
8.7 KiB
Python
273 lines
8.7 KiB
Python
"""
|
|
Copyright (c) 2014 Wind River Systems, Inc.
|
|
|
|
SPDX-License-Identifier: Apache-2.0
|
|
|
|
"""
|
|
|
|
###################
|
|
# IMPORTS
|
|
###################
|
|
import logging
|
|
import logging.handlers
|
|
import time
|
|
import os
|
|
import subprocess
|
|
import glob
|
|
import re
|
|
import sys
|
|
|
|
from daemon import runner
|
|
|
|
import prepostrotate
|
|
|
|
###################
|
|
# CONSTANTS
|
|
###################
|
|
LOG_DIR = '/var/lib/logmgmt'
|
|
LOG_FILE = LOG_DIR + '/logmgmt.log'
|
|
PID_FILE = '/var/run/logmgmt.pid'
|
|
LOG_FILE_MAX_BYTES = 1024*1024
|
|
LOG_FILE_BACKUP_COUNT = 5
|
|
|
|
PERCENT_FREE_CRITICAL = 10
|
|
PERCENT_FREE_MAJOR = 20
|
|
|
|
LOGROTATE_PERIOD = 600 # Every ten minutes
|
|
|
|
###################
|
|
# METHODS
|
|
###################
|
|
def start_polling():
|
|
logmgmt_daemon = LogMgmtDaemon()
|
|
logmgmt_runner = runner.DaemonRunner(logmgmt_daemon)
|
|
logmgmt_runner.daemon_context.umask = 0o022
|
|
logmgmt_runner.do_action()
|
|
|
|
def handle_exception(exc_type, exc_value, exc_traceback):
|
|
"""
|
|
Exception handler to log any uncaught exceptions
|
|
"""
|
|
logging.error("Uncaught exception",
|
|
exc_info=(exc_type, exc_value, exc_traceback))
|
|
sys.__excepthook__(exc_type, exc_value, exc_traceback)
|
|
|
|
|
|
###################
|
|
# CLASSES
|
|
###################
|
|
class LogMgmtDaemon():
|
|
""" Daemon process representation of
|
|
the /var/log monitoring program
|
|
"""
|
|
def __init__(self):
|
|
# Daemon-specific init
|
|
self.stdin_path = '/dev/null'
|
|
self.stdout_path = '/dev/null'
|
|
self.stderr_path = '/dev/null'
|
|
self.pidfile_path = PID_FILE
|
|
self.pidfile_timeout = 5
|
|
|
|
self.monitored_files = []
|
|
self.unmonitored_files = []
|
|
|
|
self.last_logrotate = 0
|
|
self.last_check = 0
|
|
|
|
def configure_logging(self, level=logging.DEBUG):
|
|
my_exec = os.path.basename(sys.argv[0])
|
|
|
|
if not os.path.exists(LOG_DIR):
|
|
os.mkdir(LOG_DIR, 0755)
|
|
|
|
log_format = '%(asctime)s: ' \
|
|
+ my_exec + '[%(process)s]: ' \
|
|
+ '%(filename)s(%(lineno)s): ' \
|
|
+ '%(levelname)s: %(message)s'
|
|
|
|
fmt = logging.Formatter(fmt=log_format)
|
|
|
|
# Use python's log rotation, rather than logrotate
|
|
handler = logging.handlers.RotatingFileHandler(
|
|
LOG_FILE,
|
|
maxBytes=LOG_FILE_MAX_BYTES,
|
|
backupCount=LOG_FILE_BACKUP_COUNT)
|
|
|
|
my_logger = logging.getLogger()
|
|
my_logger.setLevel(level)
|
|
|
|
handler.setFormatter(fmt)
|
|
handler.setLevel(level)
|
|
my_logger.addHandler(handler)
|
|
|
|
# Log uncaught exceptions to file
|
|
sys.excepthook = handle_exception
|
|
|
|
def run(self):
|
|
self.configure_logging()
|
|
|
|
while True:
|
|
self.check_var_log()
|
|
|
|
# run/poll every 1 min
|
|
time.sleep(60)
|
|
|
|
def get_percent_free(self):
|
|
usage = os.statvfs('/var/log')
|
|
return ((usage.f_bavail * 100) / usage.f_blocks)
|
|
|
|
def get_monitored_files(self):
|
|
self.monitored_files = []
|
|
|
|
try:
|
|
output = subprocess.check_output(['/usr/sbin/logrotate', '-d', '/etc/logrotate.conf'],
|
|
stderr=subprocess.STDOUT)
|
|
|
|
for line in output.split('\n'):
|
|
fields = line.split()
|
|
if len(fields) > 0 and fields[0] == "considering":
|
|
self.monitored_files.extend(glob.glob(fields[2]))
|
|
self.monitored_files.extend(glob.glob(fields[2] + '.[0-9].gz'))
|
|
self.monitored_files.extend(glob.glob(fields[2] + '.[0-9][0-9].gz'))
|
|
self.monitored_files.extend(glob.glob(fields[2] + '.[0-9]'))
|
|
self.monitored_files.extend(glob.glob(fields[2] + '.[0-9][0-9]'))
|
|
except:
|
|
logging.error('Failed to determine monitored files')
|
|
raise
|
|
|
|
def get_unmonitored_files(self):
|
|
self.unmonitored_files = []
|
|
|
|
try:
|
|
output = subprocess.check_output(['find', '/var/log', '-type', 'f'])
|
|
|
|
for fname in output.split('\n'):
|
|
if fname in self.monitored_files:
|
|
continue
|
|
|
|
# Ignore some files
|
|
if '/var/log/puppet' in fname \
|
|
or '/var/log/dmesg' in fname \
|
|
or '/var/log/rabbitmq' in fname \
|
|
or '/var/log/lastlog' in fname:
|
|
continue
|
|
|
|
if os.path.exists(fname):
|
|
self.unmonitored_files.append(fname)
|
|
|
|
except:
|
|
logging.error('Failed to determine unmonitored files')
|
|
|
|
def purge_files(self, index):
|
|
pattern = re.compile('.*\.([0-9]*)\.gz')
|
|
for fname in sorted(self.monitored_files):
|
|
result = pattern.match(fname)
|
|
if result:
|
|
if int(result.group(1)) >= index:
|
|
logging.info("Purging file: %s" % fname)
|
|
try:
|
|
os.remove(fname)
|
|
except OSError as e:
|
|
logging.error('Failed to remove file: %s', e)
|
|
|
|
def run_logrotate(self):
|
|
self.last_logrotate = int(time.time())
|
|
try:
|
|
subprocess.check_call(['/usr/sbin/logrotate', '/etc/logrotate.conf'])
|
|
except:
|
|
logging.error('Failed logrotate')
|
|
|
|
def run_logrotate_forced(self):
|
|
self.last_logrotate = int(time.time())
|
|
try:
|
|
subprocess.check_call(['/usr/sbin/logrotate', '-f', '/etc/logrotate.conf'])
|
|
except:
|
|
logging.error('Failed logrotate -f')
|
|
|
|
def timecheck(self):
|
|
# If we're more than a couple of mins since the last timecheck,
|
|
# there could have been a large time correction, which would skew
|
|
# our timing. Reset the logrotate timestamp to ensure we don't miss anything
|
|
now = int(time.time())
|
|
|
|
if self.last_check > now or (now - self.last_check) > 120:
|
|
self.last_logrotate = 0
|
|
|
|
self.last_check = now
|
|
|
|
def check_var_log(self):
|
|
self.timecheck()
|
|
|
|
try:
|
|
prepostrotate.ensure_bash_log_locked_down()
|
|
except Exception as e:
|
|
logging.exception('Failed to ensure bash.log locked', e)
|
|
|
|
pf = self.get_percent_free()
|
|
|
|
if pf > PERCENT_FREE_CRITICAL:
|
|
# We've got more than 10% free space, so just run logrotate every ten minutes
|
|
now = int(time.time())
|
|
if self.last_logrotate > now or (now - self.last_logrotate) > LOGROTATE_PERIOD:
|
|
logging.info("Running logrotate")
|
|
self.run_logrotate()
|
|
|
|
return
|
|
|
|
logging.warning("Reached critical disk usage for /var/log: %d%% free" % pf)
|
|
|
|
# We're running out of disk space, so we need to start deleting files
|
|
try:
|
|
for index in range(20, 11, -1):
|
|
logging.info("/var/log is %d%% free. Purging rotated .%d.gz files to free space" % (pf, index))
|
|
self.get_monitored_files()
|
|
self.purge_files(index)
|
|
pf = self.get_percent_free()
|
|
|
|
if pf >= PERCENT_FREE_MAJOR:
|
|
# We've freed up enough space. Do a logrotate and leave
|
|
logging.info("/var/log is %d%% free. Running logrotate" % pf)
|
|
self.run_logrotate()
|
|
return
|
|
except Exception as e:
|
|
logging.exception('Failed purging rotated files', e)
|
|
|
|
# We still haven't freed up enough space, so try a logrotate
|
|
logging.info("/var/log is %d%% free. Running logrotate" % pf)
|
|
self.run_logrotate()
|
|
|
|
pf = self.get_percent_free()
|
|
if pf >= PERCENT_FREE_MAJOR:
|
|
return
|
|
|
|
# Try a forced rotate
|
|
logging.info("/var/log is %d%% free. Running forced logrotate" % pf)
|
|
self.run_logrotate_forced()
|
|
|
|
pf = self.get_percent_free()
|
|
if pf >= PERCENT_FREE_MAJOR:
|
|
return
|
|
|
|
# Start deleting unmonitored files
|
|
try:
|
|
self.get_monitored_files()
|
|
self.get_unmonitored_files()
|
|
logging.info("/var/log is %d%% free. Deleting unmonitored files to free space" % pf)
|
|
for fname in sorted(self.unmonitored_files, key=os.path.getsize, reverse=True):
|
|
logging.info("Deleting unmonitored file: %s" % fname)
|
|
try:
|
|
os.remove(fname)
|
|
except OSError as e:
|
|
logging.error('Failed to remove file: %s', e)
|
|
pf = self.get_percent_free()
|
|
if pf >= PERCENT_FREE_MAJOR:
|
|
logging.info("/var/log is %d%% free." % pf)
|
|
return
|
|
except Exception as e:
|
|
logging.exception('Failed checking unmonitored files', e)
|
|
|
|
# Nothing else to be done
|
|
logging.info("/var/log is %d%% free." % pf)
|
|
return
|
|
|