nfv/nfv/nfv-debug-tools/histogram_analysis/Histogram.py

#!/usr/bin/env python
################################################################################
#
# Copyright (c) 2017 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
################################################################################
#
# Description: Un-gzips nfv-vim log files within the same directory and generates
#              CSVs based on the processes found related to histogram data,
#              naming each CSV after the process found in the log files whose
#              log data it is storing. Each CSV file contains 3 columns of
#              data: TIMESTAMP, average execution time, hits per sample
#              This script is meant to be used with the plotter.py data visualization
#              script.
#
# Behaviour   : The script runs without any input arguments, copies all nfv-vim
#               log files found, gzipped or not, and places them into a directory
#               called logs/ where it un-gzips any zipped files (this retains the
#               original gzipped files). After this it begins parsing each nfv-vim
#               log file found, starting from the highest numbered e.g. nfv-vim.log.20
#               down to the lowest nfv-vim.log.1 or nfv-vim.log. This script expects
#               the highest number of digits following .log. to be two i.e [0-9][0-9]
#               CSV files will be stored in a new directory called csv/
#               Timestamps for each sample are written to the CSV, followed by the
#               average execution time which is each execution time in the sample,
#               multiplied by its respective number of hits during that sample, and
#               then all of these values summed together, divided by the sum comprising
#               the total number of hits for that particular sample. The third column
#               is simply the total number of hits for that same (across all execution
#               times).
#
# Place this script in a directory containing the gzipped or ungzipped logs you would
# like to generate CSV files for.
#
# Run the script with ./Histogram.py
#
################################################################################

from collections import defaultdict
import glob
import os
from subprocess import call

dir = os.path.dirname(__file__)
csvDir = os.path.join(dir, 'csv/')
logDir = os.path.join(dir, 'logs/')

if not os.path.exists(csvDir):
    os.makedirs(csvDir)

if not os.path.exists(logDir):
    os.makedirs(logDir)

call("cp nfv-vim.log nfv-vim.log.[0-9] nfv-vim.log.[0-9][0-9] nfv-vim.log.[0-9].gz nfv-vim.log.[0-9][0-9].gz logs/", shell=True)
call("gunzip logs/nfv-vim.log.[0-9].gz logs/nfv-vim.log.[0-9][0-9].gz", shell=True)


class Parser(object):
    def __init__(self):
        self.proc = ""  # Name of process being read
        self.timestamp = ""  # Timestamp found on line stating process name
        self.write = False  # Flag indicating data has yet to be written
        self.stored = False  # Flag indicating that there is new data stored
        self.length = 0  # Time duration of process
        self.instanceCount = 0  # Number of hits for the particular duration
        self.rollingCount = 0  # Sum of the hits for each duration parsed within the sample
        self.total = 0  # Specific duration multiplied by number of hits for that duration
        self.avg = 0  # Average execution time of process
        self.unit = ""  # Unit execution time was recorded in
        self.csvs = defaultdict(list)  # Stores unique processes in a dict of lists

    # Resets variables when a new process begins to be read in logs
    def reset(self):
        self.length = 0
        self.avg = 0
        self.instanceCount = 0
        self.rollingCount = 0
        self.total = 0
        self.proc = ""
        self.unit = ""
        self.write = False
        self.stored = False

    # Adds log data for a process to the csvs dictionary
    def add(self, proc, total, timestamp, rollingCount):
        if rollingCount != 0:
            avg = total / float(rollingCount)
        else:
            avg = 0
        self.csvs[proc].append(timestamp + "," + str(avg) + "," + str(rollingCount) + ",")
        self.reset()

    def main(self):
        # Sorts the log files to read them in descending order
        sorted_files = glob.glob(logDir + "nfv-vim.log*")
        sorted_files.sort(reverse=True)
        for logFile in sorted_files:
            with open(logFile, "r+") as f:
                cfgLines = f.read().splitlines()
                for line in cfgLines:
                    if "Histogram" in line:
                        if self.write or self.stored:
                            self.add(self.proc,
                                     self.total,
                                     self.timestamp,
                                     self.rollingCount)
                        self.write = True
                        self.proc = line.partition("Histogram: ")[2]
                        self.proc = ("".join(self.proc.split())).rstrip(':')
                        self.timestamp = line.split()[0]
                    elif "histogram.py" in line:
                        line = line.split()
                        self.length = int(line[8])
                        self.unit = line[9]
                        self.instanceCount = int(line[10])
                        if "decisecond" in self.unit:
                            self.length *= 100
                        elif "secs" in self.unit:
                            self.length *= 1000
                        self.total = self.total + self.instanceCount * self.length
                        self.rollingCount += self.instanceCount
                        self.stored = True
            f.close()
            if self.write or self.stored:
                self.add(self.proc, self.total, self.timestamp, self.rollingCount)

        for process in self.csvs:
            with open(os.path.join(csvDir, process + ".csv"), 'w+') as csvOut:
                for line in self.csvs[process]:
                    csvOut.write(line + "\n")
            csvOut.close()


process = Parser()
process.main()
print("\nComplete\n")