nfv/nfv/nfv-debug-tools/histogram_analysis/Histogram.py

#!/usr/bin/env python
################################################################################
#
# Copyright (c) 2017 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
################################################################################
#
# Description: Un-gzips nfv-vim log files within the same directory and generates
#              CSVs based on the processes found related to histogram data,
#              naming each CSV after the process found in the log files whose
#              log data it is storing. Each CSV file contains 3 columns of
#              data: TIMESTAMP, average execution time, hits per sample
#              This script is meant to be used with the plotter.py data visualization
#              script.
#
# Behaviour   : The script runs without any input arguments, copies all nfv-vim
#               log files found, gzipped or not, and places them into a directory
#               called logs/ where it un-gzips any zipped files (this retains the
#               original gzipped files). After this it begins parsing each nfv-vim
#               log file found, starting from the highest numbered e.g. nfv-vim.log.20
#               down to the lowest nfv-vim.log.1 or nfv-vim.log. This script expects
#               the highest number of digits following .log. to be two i.e [0-9][0-9]
#               CSV files will be stored in a new directory called csv/
#               Timestamps for each sample are written to the CSV, followed by the
#               average execution time which is each execution time in the sample,
#               multiplied by its respective number of hits during that sample, and
#               then all of these values summed together, divided by the sum comprising
#               the total number of hits for that particular sample. The third column
#               is simply the total number of hits for that same (across all execution
#               times).
#
# Place this script in a directory containing the gzipped or ungzipped logs you would
# like to generate CSV files for.
#
# Run the script with ./Histogram.py
#
################################################################################

import os
import glob
from subprocess import call
from collections import defaultdict

dir = os.path.dirname(__file__)
csvDir = os.path.join(dir,'csv/')
logDir = os.path.join(dir,'logs/')

if not os.path.exists(csvDir):
    os.makedirs(csvDir)

if not os.path.exists(logDir):
    os.makedirs(logDir)

call("cp nfv-vim.log nfv-vim.log.[0-9] nfv-vim.log.[0-9][0-9] nfv-vim.log.[0-9].gz nfv-vim.log.[0-9][0-9].gz logs/", shell=True)
call("gunzip logs/nfv-vim.log.[0-9].gz logs/nfv-vim.log.[0-9][0-9].gz", shell=True)


class Parser:
    def __init__(self):
        self.proc="" # Name of process being read
        self.timestamp="" # Timestamp found on line stating process name
        self.write=False # Flag indicating data has yet to be written
        self.stored=False # Flag indicating that there is new data stored
        self.length=0 # Time duration of process
        self.instanceCount=0 # Number of hits for the particular duration
        self.rollingCount=0 # Sum of the hits for each duration parsed within the sample
        self.total=0 # Specific duration multiplied by number of hits for that duration
        self.avg=0 # Average execution time of process
        self.unit="" # Unit execution time was recorded in
        self.csvs=defaultdict(list) # Stores unique processes in a dict of lists

    # Resets variables when a new process begins to be read in logs
    def reset(self):
        self.length=0
        self.avg=0
        self.instanceCount=0
        self.rollingCount=0
        self.total=0
        self.proc=""
        self.unit=""
        self.write=False
        self.stored=False

    # Adds log data for a process to the csvs dictionary
    def add(self,proc,total,timestamp,rollingCount):
        if rollingCount != 0:
            avg=total/float(rollingCount)
        else:
            avg=0
        self.csvs[proc].append(timestamp+","+str(avg)+","+str(rollingCount)+",")
        self.reset()

    def main(self):
        # Sorts the log files to read them in descending order
        sorted_files = glob.glob(logDir+"nfv-vim.log*")
        sorted_files.sort(reverse=True)
        for logFile in sorted_files:
            with open(logFile,"r+") as f:
                cfgLines = f.read().splitlines()
                for line in cfgLines:
                    if "Histogram" in line:
                        if self.write or self.stored:
                            self.add(self.proc,
                                     self.total,
                                     self.timestamp,
                                     self.rollingCount)
                        self.write=True
                        self.proc=line.partition("Histogram: ")[2]
                        self.proc=("".join(self.proc.split())).rstrip(':')
                        self.timestamp=line.split()[0]
                    elif "histogram.py" in line:
                        line=line.split()
                        self.length=int(line[8])
                        self.unit=line[9]
                        self.instanceCount=int(line[10])
                        if "decisecond" in self.unit:
                            self.length*=100
                        elif "secs" in self.unit:
                            self.length*=1000
                        self.total=self.total+self.instanceCount*self.length
                        self.rollingCount+=self.instanceCount
                        self.stored=True
            f.close()
            if self.write or self.stored:
                self.add(self.proc,self.total,self.timestamp,self.rollingCount)

        for process in self.csvs:
            with open(os.path.join(csvDir,process+".csv"),'w+') as csvOut:
                for line in self.csvs[process]:
                    csvOut.write(line+"\n")
            csvOut.close()

process=Parser()
process.main()
print("\nComplete\n")