nfv/nfv/nfv-debug-tools/histogram_analysis/Histogram.py

138 lines
5.9 KiB
Python
Executable File

#!/usr/bin/env python
################################################################################
#
# Copyright (c) 2017 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
################################################################################
#
# Description: Un-gzips nfv-vim log files within the same directory and generates
# CSVs based on the processes found related to histogram data,
# naming each CSV after the process found in the log files whose
# log data it is storing. Each CSV file contains 3 columns of
# data: TIMESTAMP, average execution time, hits per sample
# This script is meant to be used with the plotter.py data visualization
# script.
#
# Behaviour : The script runs without any input arguments, copies all nfv-vim
# log files found, gzipped or not, and places them into a directory
# called logs/ where it un-gzips any zipped files (this retains the
# original gzipped files). After this it begins parsing each nfv-vim
# log file found, starting from the highest numbered e.g. nfv-vim.log.20
# down to the lowest nfv-vim.log.1 or nfv-vim.log. This script expects
# the highest number of digits following .log. to be two i.e [0-9][0-9]
# CSV files will be stored in a new directory called csv/
# Timestamps for each sample are written to the CSV, followed by the
# average execution time which is each execution time in the sample,
# multiplied by its respective number of hits during that sample, and
# then all of these values summed together, divided by the sum comprising
# the total number of hits for that particular sample. The third column
# is simply the total number of hits for that same (across all execution
# times).
#
# Place this script in a directory containing the gzipped or ungzipped logs you would
# like to generate CSV files for.
#
# Run the script with ./Histogram.py
#
################################################################################
import os
import glob
from subprocess import call
from collections import defaultdict
dir = os.path.dirname(__file__)
csvDir = os.path.join(dir,'csv/')
logDir = os.path.join(dir,'logs/')
if not os.path.exists(csvDir):
os.makedirs(csvDir)
if not os.path.exists(logDir):
os.makedirs(logDir)
call("cp nfv-vim.log nfv-vim.log.[0-9] nfv-vim.log.[0-9][0-9] nfv-vim.log.[0-9].gz nfv-vim.log.[0-9][0-9].gz logs/", shell=True)
call("gunzip logs/nfv-vim.log.[0-9].gz logs/nfv-vim.log.[0-9][0-9].gz", shell=True)
class Parser:
def __init__(self):
self.proc="" # Name of process being read
self.timestamp="" # Timestamp found on line stating process name
self.write=False # Flag indicating data has yet to be written
self.stored=False # Flag indicating that there is new data stored
self.length=0 # Time duration of process
self.instanceCount=0 # Number of hits for the particular duration
self.rollingCount=0 # Sum of the hits for each duration parsed within the sample
self.total=0 # Specific duration multiplied by number of hits for that duration
self.avg=0 # Average execution time of process
self.unit="" # Unit execution time was recorded in
self.csvs=defaultdict(list) # Stores unique processes in a dict of lists
# Resets variables when a new process begins to be read in logs
def reset(self):
self.length=0
self.avg=0
self.instanceCount=0
self.rollingCount=0
self.total=0
self.proc=""
self.unit=""
self.write=False
self.stored=False
# Adds log data for a process to the csvs dictionary
def add(self,proc,total,timestamp,rollingCount):
if rollingCount != 0:
avg=total/float(rollingCount)
else:
avg=0
self.csvs[proc].append(timestamp+","+str(avg)+","+str(rollingCount)+",")
self.reset()
def main(self):
# Sorts the log files to read them in descending order
sorted_files = glob.glob(logDir+"nfv-vim.log*")
sorted_files.sort(reverse=True)
for logFile in sorted_files:
with open(logFile,"r+") as f:
cfgLines = f.read().splitlines()
for line in cfgLines:
if "Histogram" in line:
if self.write or self.stored:
self.add(self.proc,
self.total,
self.timestamp,
self.rollingCount)
self.write=True
self.proc=line.partition("Histogram: ")[2]
self.proc=("".join(self.proc.split())).rstrip(':')
self.timestamp=line.split()[0]
elif "histogram.py" in line:
line=line.split()
self.length=int(line[8])
self.unit=line[9]
self.instanceCount=int(line[10])
if "decisecond" in self.unit:
self.length*=100
elif "secs" in self.unit:
self.length*=1000
self.total=self.total+self.instanceCount*self.length
self.rollingCount+=self.instanceCount
self.stored=True
f.close()
if self.write or self.stored:
self.add(self.proc,self.total,self.timestamp,self.rollingCount)
for process in self.csvs:
with open(os.path.join(csvDir,process+".csv"),'w+') as csvOut:
for line in self.csvs[process]:
csvOut.write(line+"\n")
csvOut.close()
process=Parser()
process.main()
print("\nComplete\n")