#!/usr/bin/perl #Copyright (c) 2016 Wind River Systems, Inc. # #SPDX-License-Identifier: Apache-2.0 # # Usage: # parse_memstats [--list | --all | --name ] file1 file2 file3.gz ... # [--pid ] ... # [--cmd ] ... # [--exact ...] # [--excl ] ... # [--detail] # [--thresh ] # [--transient] # [--dir ] # [--dur ] # [--report] # [--help] # # Purpose: Parse and summarize memory usage trends for total available memory, # and usage for each process. Display hirunners and usage rate of change. # # Modification history: # - 2014-Feb-27 - Jim Gauld, prototype created. # - 2016-Nov-23 - Craig Griffin, updated to ouptut additional data for analysis ############################################################################## use 5.10.0; use warnings; use strict; use Time::Local 'timelocal_nocheck'; # inverse time functions use File::Basename; use File::Spec (); use Data::Dumper; use constant Ki => 1024.0; use constant Mi => 1024.0*1024.0; use constant Gi => 1024.0*1024.0*1024.0; my $SCRIPT = basename($0); my $DEFAULT_PATH = "."; my $iter = 0; # Hash storage data-structures my (%data, %overall, %timestamps, %days, %matched, %stats) = (); my (%mem_current, %mem_stats, %mem_slope) = (); my (%minmaxdata) = (); # Timestamp variables my ($wday, $month, $day, $hh, $mm, $ss, $yy, $ns) = (); # Uptime data my ($uptime, $idle); # Meminfo data my ($MemTotal, $MemFree, $Buffers, $Cached, $CommitLimit, $Committed_AS, $Slab, $SReclaimable) = (); # Process data my ($LWP, $PID, $PPID, $NLWP, $RSS, $VSZ, $COMMAND) = (); # Free memory my ($is_strict) = (0); my ($avail_free_mem_MB, $unlinked_files_MB) = (); my ($fs_root_MB, $fs_root_p_use, $fs_tmp_MB, $fs_tmp_p_use) = (); # Argument list parameters our ($arg_all, $arg_match, $arg_name, @arg_pids, @arg_commands, @arg_exact, @arg_commands_excl, $arg_list, $arg_detail, $arg_thresh, $arg_transient, $arg_path, $arg_dur, $arg_report, @arg_files) = (); # CGRIFFIN update my ($filename, $fh); my ($cmdabbrev); my ($idx_pidcount, $idx_nlwp, $idx_minrss, $idx_minvsz, $idx_maxrss, $idx_maxvsz, $idx_sumrss, $idx_sumvsz, $idx_output) = (0, 1, 2, 3, 4, 5, 6, 7, 8); # Determine location of gunzip binary our $GUNZIP = which('gunzip'); if (!(defined $GUNZIP)) { die "*error* cannot find 'gunzip' binary. Cannot continue.\n"; } our $BUNZIP2 = which('bunzip2'); if (!(defined $BUNZIP2)) { die "*error* cannot find 'bunzip2' binary. Cannot continue.\n"; } # Parse input arguments and print tool usage if necessary # -- note: @arg_pids, and @arg_commands are only defined if they are set &get_parse_memstats_args(\$arg_all, \$arg_match, \$arg_name, \@arg_pids, \@arg_commands, \@arg_exact, \@arg_commands_excl, \$arg_list, \$arg_detail, \$arg_thresh, \$arg_transient, \$arg_path, \$arg_dur, \$arg_report, \@arg_files); # Print list of memory information if (defined $arg_list) { my @list = (); my %chrono = (); my ($host, $time) = (); opendir(DIR, $arg_path) || die "can't opendir $arg_path: ($!)"; @list = sort {$a cmp $b} grep { /_(\d{4}-\d{2}-\d{2}_\d{4})_memory?.?g?z\b/ && -f "$arg_path/$_" } readdir(DIR); closedir DIR; foreach my $file (@list) { $_ = $file; ($host, $time) = /(.*)_(\d{4}-\d{2}-\d{2}_\d{4})_memory/; $chrono{$host}{$time} = 1; } # Print out summary of hosts with oldests and newest files printf "%s: List of available 'memory' data:\n\n", $SCRIPT; printf "%-20s %15s %15s\n", "host", "oldest", "newest"; printf "%-20s %15s %15s\n", "-"x20, "-"x15, "-"x15; foreach $host (sort keys %chrono) { my @times = sort {$a cmp $b} keys %{$chrono{$host}}; printf "%-20s %15s %15s\n", $host, $times[0], $times[-1]; } exit 1; } # Print selected options (except for file list) if ((@arg_pids) || (@arg_commands)) { printf "selected pids/patterns: @arg_pids @arg_commands\n"; } printf "this may take a while...\n"; # Determine file list based on smart file GLOB if (!@arg_files) { if (defined $arg_name) { @arg_files = <$arg_path/*$arg_name*>; } else { @arg_files = <$arg_path/*memory*> } if (!@arg_files) { printf "no files selected.\n"; } } # Compile regular expressions command string patterns # -- store list of expressions to INCLUDE my @re_commands = (); foreach my $arg (@arg_commands) { push @re_commands, qr/\Q$arg\E/; } my @re_exact_commands = (); foreach my $arg (@arg_exact) { push @re_exact_commands, qr/\Q$arg\E/; } # -- store list of expressions to EXCLUDE my @nre_commands = (); push @arg_commands_excl, $SCRIPT; foreach my $arg (@arg_commands_excl) { push @nre_commands, qr/\Q$arg\E/; } # Determine list of files per matching hostname my %filenames = (); foreach my $file (sort @arg_files) { # BYPASS FILENAME CHECK FOR NOW if (1 == 0) { if ($file !~ /_\d{4}-\d{2}-\d{2}_\d{4}_mem/) { printf "ignoring: '$file', does not match '_--

-_memory' format.\n"; next; } } my $host = $file; $host =~ s/_\d{4}-\d{2}-\d{2}_\d{4}_.*$//; $host =~ s/^.*\///; printf "host = $host, file = $file\n"; push @{$filenames{$host}}, $file; } # Prune file list retain most recent number of --dur per host my $max_files = int($arg_dur*24); foreach my $host (keys %filenames) { if (scalar(@{$filenames{$host}}) > $max_files) { # prune to size of max_files, keeping end of list (most recent) @{$filenames{$host}} = splice(@{$filenames{$host}},-$max_files); } } my $seconds_in_day = 24*60*60; my $first_time = 0.0; my $last_time = 0.0; # PROCESS ALL MATCHING HOSTS # -- then process all files per host in chronological order foreach my $host (sort keys %filenames) { my $pass = 1; REPEAT_CALCULATION: $iter = 0; $first_time = 0.0; %matched = (); %data = (); %timestamps = (); %overall = (); %days = (); %stats = (); %mem_stats = (); # Evalutate first and last filename's time and convert time to days relative to first_time my $first_file = ${$filenames{$host}}[0]; my $last_file = ${$filenames{$host}}[-1]; $_ = $first_file; ($yy, $month, $day, $hh) = /_(\d{4})-(\d{2})-(\d{2})_(\d{2})\d{2}_mem/; $first_time = timelocal_nocheck(00, 00, $hh, $day, $month-1, $yy-1900)/$seconds_in_day; my $first_date = sprintf("%4d-%02d-%02d %02d:00", $yy, $month, $day, $hh); $_ = $last_file; ($yy, $month, $day, $hh) = /_(\d{4})-(\d{2})-(\d{2})_(\d{2})\d{2}_mem/; $last_time = timelocal_nocheck(59, 59, $hh, $day, $month-1, $yy-1900)/$seconds_in_day - $first_time; my $last_date = sprintf("%4d-%02d-%02d %02d:00", $yy, $month, $day, $hh); FILE_LIST: foreach my $file ( @{$filenames{$host}} ) { my $FOUND = 0; # handle files being decompressed while parser is running if ( -e $file ) { if ($file =~ /\.gz$/) { open(FILE, "$::GUNZIP -c $file |") || die "Cannot open file: $file ($!)\n"; } elsif ($file =~ /\.bz2$/) { open(FILE, "$::BUNZIP2 -c $file |") || die "Cannot open file: $file ($!)\n"; } else { open(FILE, $file) || die "Cannot open file: $file ($!)\n"; } $FOUND = 1; } else { if ($file =~ /\.gz$/) {$file =~ s/\.gz//;} else {$file .= '.gz';} if ($file =~ /\.bz2$/) {$file =~ s/\.bz2//;} else {$file .= '.bz2';} if ( -e $file ) { if ($file =~ /\.gz$/) { open(FILE, "$::GUNZIP -c $file |") || die "Cannot open file: $file ($!)\n"; } elsif ($file =~ /\.bz2$/) { open(FILE, "$::BUNZIP2 -c $file |") || die "Cannot open file: $file ($!)\n"; } else { open(FILE, $file) || die "Cannot open file: $file ($!)\n"; } $FOUND = 1; } } next if ($FOUND == 0); # Parse file line at a time READ_LOOP: while($_ = ) { s/[\0\e\f\r\a]//g; chomp; # strip control characters if any # START OF SAMPLE Time: Parse hires timestamp, ignore timezone if (/time:\s+(\w+)\s+(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})\.(\d{9})\s+\w+\s+\S+\s+uptime:\s+(\S+)\s+(\S+)/) { $wday = $1; $yy = $2; $month = $3; $day = $4; $hh = $5; $mm = $6; $ss = $7; $ns = $8; $uptime = $9; $idle = $10; $FOUND = -1 if (($FOUND == 1) || ($FOUND == -2)); $timestamps{$iter} = [($wday,$month,$day,$hh,$mm,$ss,$yy)]; $days{$iter} = timelocal_nocheck($ss, $mm, $hh, $day, $month-1, $yy-1900)/$seconds_in_day - $first_time; # Initialize variables (in case they not found later in file) ($avail_free_mem_MB, $unlinked_files_MB) = (0.0, 0.0); ($fs_root_MB, $fs_root_p_use, $fs_tmp_MB, $fs_tmp_p_use) = (0.0, 0.0, 0.0, 0.0); ($MemTotal, $MemFree, $Buffers, $Cached, $CommitLimit, $Committed_AS, $Slab, $SReclaimable) = (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0); %mem_current = (); next READ_LOOP; } if (/^MEMINFO:/ || /^# \/proc\/meminfo/) { # handle case where we detect the sample is incomplete, and delete if ($FOUND != -1) { close(FILE); delete $days{$iter} if (defined $days{$iter}); delete $timestamps{$iter} if (defined $timestamps{$iter}); delete $data{$iter} if (defined $data{$iter}); delete $overall{$iter} if (defined $overall{$iter}); next FILE_LIST; } # Process all entries of MEMINFO MEMINFO_LOOP: while($_ = ) { s/[\0\e\f\r\a]//g; chomp; # strip control characters if any last MEMINFO_LOOP if (/^\s*$/); # end at blank-line if (/\bMemTotal:\s+(\d+)\s+kB/) { $MemTotal = $1; next MEMINFO_LOOP; } if (/\bMemFree:\s+(\d+)\s+kB/) { $MemFree = $1; next MEMINFO_LOOP; } if (/\bBuffers:\s+(\d+)\s+kB/) { $Buffers = $1; next MEMINFO_LOOP; } if (/\bCached:\s+(\d+)\s+kB/) { $Cached = $1; next MEMINFO_LOOP; } if (/\bCommitLimit:\s+(\d+)\s+kB/) { $CommitLimit = $1; next MEMINFO_LOOP; } if (/\bCommitted_AS:\s+(\d+)\s+kB/) { $Committed_AS = $1; next MEMINFO_LOOP; } if (/\bSlab:\s+(\d+)\s+kB/) { $Slab = $1; next MEMINFO_LOOP; } if (/\bSReclaimable:\s+(\d+)\s+kB/) { $SReclaimable = $1; next MEMINFO_LOOP; } } # Determine whether this host is using strict memory accounting # [ DETERMINE BASED ON OUTPUT FROM /proc - JUST HARD CODE FOR NOW ] $is_strict = 0; # Determine available memory (MiB), and account for strict vs overcommit if ($is_strict) { # Strict memory accounting $avail_free_mem_MB = ($CommitLimit - $Committed_AS)/Ki; } else { # non-strict -- assume this blade overcommits memory # - KernelReserve is 20MiB / 1GiB physical + 1 MB engineering buffer # [JGAULD: KernelReserve NEEDS VALIDATION, DOES NOT WORK, IS WAY TOO BIG ] my $KernelReserve_MB = 0.0*20.0*$MemTotal/Mi + 1.0; my $reclaimable_MB = ($Cached + $Buffers + $SReclaimable)/Ki - $KernelReserve_MB; $reclaimable_MB = ($reclaimable_MB < 0.0) ? 0.0 : $reclaimable_MB; $avail_free_mem_MB = $MemFree/Ki + $reclaimable_MB; } } next if ! $_; # sometimes $_ can be null (at EOF) and causes subsequent warnings # EXPECTED RAW FORMAT ## ps -e -o ppid,pid,nlwp,rss:10,vsz:10,cmd --sort=-rss # PPID PID NLWP RSS VSZ CMD # 1 4941 22 59924 1802312 python /usr/bin/nova-compute if (/^PROCESS SUMMARY:/ || /^# ps -e/) { my $hcnt = 0; # find headings line HEADER_LOOP: while($_ = ) { s/[\0\e\f\r\a]//g; chomp; # strip control characters if any $hcnt++; last HEADER_LOOP if (/\bPID\b/); # end titles-line next READ_LOOP if (($hcnt == 1) && /^\s*$/); # end at blank-line (eg no 'ps' output) } next if ! $_; # sometimes $_ can be null (at EOF) and causes subsequent warnings # Parse file line at a time PROCESS_LOOP: while($_ = ) { my $found_pid = 0; s/[\0\e\f\r\a]//g; chomp; # strip control characters if any last PROCESS_LOOP if (/^\s*$/); # end at blank-line if (($PPID, $PID, $NLWP, $RSS, $VSZ, $COMMAND) = /(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(.*)/) { $found_pid = 1; } if ($found_pid == 1) { # Match on multiple regular expressions or multiple pids my $match = (defined $arg_all ? 1: 0); foreach my $pid (@arg_pids) {if ($pid == $PID) {$match = 1; goto FOUND_CMD;} } # inclusions foreach my $nre (@nre_commands) {if ($COMMAND =~ $nre) {$match = 0; goto FOUND_CMD;} } # exclusions foreach my $re (@re_commands) {if ($COMMAND =~ $re) {$match = 1; goto FOUND_CMD;} } # inclusions foreach my $re (@re_exact_commands) {if ($COMMAND =~ /^$re$/) {$match = 1; goto FOUND_CMD;} } # inclusions FOUND_CMD: if ($match == 1) { if ($arg_match) { $matched{'MATCH'}{$PID} = 1; $data{$iter}{'MATCH'}{$PID} = [($NLWP, $RSS, $VSZ)]; } else { $matched{$COMMAND}{$PID} = 1; $data{$iter}{$COMMAND}{$PID} = [($NLWP, $RSS, $VSZ)]; # CGRIFFIN update if (!(defined $minmaxdata{$COMMAND}{$PID})) { $minmaxdata{$COMMAND}{$PID} = [(1, $NLWP, $RSS, $VSZ, $RSS, $VSZ, $RSS, $VSZ, 0)]; } else { if ($RSS < $minmaxdata{$COMMAND}{$PID}[$idx_minrss]) { $minmaxdata{$COMMAND}{$PID}[$idx_minrss] = $RSS; } if ($VSZ < $minmaxdata{$COMMAND}{$PID}[$idx_minvsz]) { $minmaxdata{$COMMAND}{$PID}[$idx_minvsz] = $VSZ; } if ($RSS > $minmaxdata{$COMMAND}{$PID}[$idx_maxrss]) { $minmaxdata{$COMMAND}{$PID}[$idx_maxrss] = $RSS; } if ($VSZ > $minmaxdata{$COMMAND}{$PID}[$idx_maxvsz]) { $minmaxdata{$COMMAND}{$PID}[$idx_maxvsz] = $VSZ; } $minmaxdata{$COMMAND}{$PID}[$idx_sumrss] += $RSS; $minmaxdata{$COMMAND}{$PID}[$idx_sumvsz] += $VSZ; $minmaxdata{$COMMAND}{$PID}[$idx_pidcount] += 1; } } } # Store resident memory stats for all processes if ($is_strict) { if ($arg_match) { if ($match == 1) { $mem_current{'MATCH'} += $VSZ; } } else { $mem_current{$COMMAND} += $VSZ; } } else { if ($arg_match) { if ($match == 1) { $mem_current{'MATCH'} += $RSS; } } else { $mem_current{$COMMAND} += $RSS; } } } } } next if ! $_; # sometimes $_ can be null (at EOF) and causes subsequent warnings if (/^END OF SAMPLE:/ || /^----/ || / done$/) { next if !(defined $uptime); $FOUND = -2; # flag that we parsed complete memory sample # Handle incomplete sample in case there was no END OF SAMPLE if (!(defined $days{$iter}) || !(defined $timestamps{$iter})) { delete $days{$iter} if (defined $days{$iter}); delete $timestamps{$iter} if (defined $timestamps{$iter}); delete $data{$iter} if (defined $data{$iter}); delete $overall{$iter} if (defined $overall{$iter}); next; } $overall{$iter} = [($avail_free_mem_MB, $unlinked_files_MB, $fs_root_p_use, $uptime, $Slab, $SReclaimable)]; # Store data for hirunner stats for current day only my $day = $days{$iter}; if (!(defined $day)) { printf "iter = %s\n", $iter; printf "day=%s, last_time=%s\n", $day, $last_time; $Data::Dumper::Indent = 1; print Data::Dumper->Dump([\%overall], [qw(overall)]); #printf "overall:%s = %s\n", $iter, "@{$overall{$iter}}"; } if ($day >= ($last_time - 1.0)) { foreach my $cmd (keys %mem_current) { my ($count, $cur_mem, $max_mem, $sum_X, $sum_Y, $sum_XX, $sum_XY) = (0,0,0,0,0,0,0); if (defined $mem_stats{$cmd}) { ($count, $cur_mem, $max_mem, $sum_X, $sum_Y, $sum_XX, $sum_XY) = @{$mem_stats{$cmd}}; } $count++; $cur_mem = $mem_current{$cmd}; $max_mem = ($cur_mem > $max_mem) ? $cur_mem : $max_mem; $sum_X += $day; $sum_Y += $cur_mem; $sum_XX += ($day * $day); $sum_XY += ($cur_mem * $day); $mem_stats{$cmd} = [($count, $cur_mem, $max_mem, $sum_X, $sum_Y, $sum_XX, $sum_XY)]; } } $iter++; $uptime = (); next READ_LOOP; } } close(FILE); # Check that last sample was completed, else delete last hash key if defined # -- no use in showing a message to user, will just confuse if ($FOUND != -2 || !$overall{$iter}) { delete $days{$iter} if (defined $days{$iter}); delete $timestamps{$iter} if (defined $timestamps{$iter}); delete $data{$iter} if (defined $data{$iter}); delete $overall{$iter} if (defined $overall{$iter}); } } # PRINT SUMMARY FOR THIS HOSTNAME my ($idx_nlwp, $idx_rss, $idx_vsz) = (0, 1, 2); my ($idx_avail, $idx_unlinked, $idx_fs_root_p_use, $idx_uptime, $idx_slab) = (0, 1, 2, 3, 4); my ($idx_wday, $idx_month, $idx_day, $idx_hh, $idx_mm, $idx_ss, $idx_yy) = (0, 1, 2, 3, 4, 5, 6); my @iters = sort {$a <=> $b} keys %timestamps; if (scalar(@iters) == 0) { # do not continue processing for this host if no valid data print "\n", "="x80, "\n", "NO VALID DATA FOR: $host\n\n"; next; } $last_time = $days{ $iters[-1] }; # Calculate statistics (only on first pass) my $idx_mem = $is_strict ? $idx_vsz : $idx_rss; if ((defined $arg_report) && ($pass == 1)) { $pass++; my %num = (); foreach my $iter (@iters) { my $avail = ${ $overall{$iter} }[$idx_avail]; my $unlinked = ${ $overall{$iter} }[$idx_unlinked]; my $fs_root = ${ $overall{$iter} }[$idx_fs_root_p_use]; my $uptime = ${ $overall{$iter} }[$idx_uptime]; my $slab = ${ $overall{$iter} }[$idx_slab]; my $day = $days{$iter}; my @keys = (); # Cumulate stats for regression if ($days{$iter} >= ($last_time - 1.0)) { push @keys, '1day';} foreach my $key (@keys) { $stats{$key}{'DAY'}{'sum_X'} += $day; $stats{$key}{'DAY'}{'sum_XX'} += ($day * $day); $stats{$key}{'DAY'}{'sum_XY'} += ($day * $day); $num{$key}++; } foreach my $cmd (sort {$b cmp $a} keys %matched) { # Sum up key values for commands that match my $SUM_mem = 0.0; foreach my $pid (keys %{ $matched{$cmd} }) { if (defined $data{$iter}{$cmd}{$pid}) { $SUM_mem += ${ $data{$iter}{$cmd}{$pid} }[$idx_mem]; } } # Cumulate stats for regression foreach my $key (@keys) { $stats{$key}{'mem_' .$cmd}{'sum_X'} += $SUM_mem; $stats{$key}{'mem_' .$cmd}{'sum_XX'} += ($SUM_mem * $SUM_mem); $stats{$key}{'mem_' .$cmd}{'sum_XY'} += ($SUM_mem * $day); } } } # Perform simple linear regression on all variables foreach my $key (keys %stats) { foreach my $var (keys %{ $stats{$key} }) { ($stats{$key}{$var}{'int'}, $stats{$key}{$var}{'slope'}) = &linear_fit( $stats{$key}{'DAY'}{'sum_X'}, # 'DAY' is 'x' variable $stats{$key}{$var}{'sum_X'}, $stats{$key}{$var}{'sum_XY'}, $stats{$key}{'DAY'}{'sum_XX'}, $num{$key} ); } } %mem_slope = (); my $max_iter = $iters[-1]; # Compile regular expressions command string patterns # -- store list of expressions to INCLUDE @re_exact_commands = (); foreach my $arg (@arg_exact) { push @re_exact_commands, qr/\Q$arg\E/; } foreach my $cmd (keys %{mem_stats}) { my ($count, $cur_mem, $max_mem, $sum_X, $sum_Y, $sum_XX, $sum_XY) = @{$mem_stats{$cmd}}; my ($intercept, $slope) = &linear_fit($sum_X, $sum_Y, $sum_XY, $sum_XX, $count); $mem_slope{$cmd} = $slope/Ki; # slope (MiB/d) # sneaky check for ignoring transient processes # i.e. specific process exists less than half a day, # or is in less than half the samples if (($mem_slope{$cmd} >= $arg_thresh) && ((defined $arg_transient) || ((($max_iter >= 142) && ($count > 70)) || (($max_iter < 142) && ($max_iter/2 < $count)) ))) { push @re_exact_commands, qr/\Q$cmd\E/; } } goto REPEAT_CALCULATION; } print "\n", "="x80, "\n", "SUMMARY: host:$host ($first_date to $last_date)\n\n"; if (keys %matched) { # PRINT HEADING OF MATCHED COMMAND PATTERNS AND PIDS my %labels = (); my $ele = 0; foreach my $cmd (sort {$b cmp $a} keys %matched) { my @pids = keys %{$matched{$cmd}}; # Create short command name my $name = ""; $_ = $cmd; if (/^\[(.*)\]/) { $name = $1; } else { my @array = split(/\s+/, $cmd); $name = shift @array; $name =~ s/^.*\///; } $labels{$cmd} = sprintf("%d:%s", $ele, $name); printf "label: %s (%s)\n", $labels{$cmd}, $cmd; printf " pids:("; foreach my $pid (sort {$a <=> $b} keys %{ $matched{$cmd} }) { printf "%d,", $pid; } printf ")\n"; $ele++; } # PRINT COLUMN HEADINGS FOR EACH PATTERN printf "%10s %8s %8s %7s", "", "", "", ""; if (!(defined $arg_report)) { my $width = 22; foreach my $cmd (sort {$b cmp $a} keys %matched) { my @pids = keys %{$matched{$cmd}}; printf " | %22s", substr $labels{$cmd}, 0, $width; } } else { my $width = 11; foreach my $cmd (sort {$b cmp $a} keys %matched) { my @pids = keys %{$matched{$cmd}}; printf " %11s", substr $labels{$cmd}, 0, $width; } } print "\n"; } printf "%10s %8s %8s %7s", "DATE", "TIME", "AVAIL", "SLAB"; if (!(defined $arg_report)) { foreach my $cmd (sort {$b cmp $a} keys %matched) { printf " | %4s %8s %8s", "NLWP", "RSS", "VSZ"; } } else { foreach my $cmd (sort {$b cmp $a} keys %matched) { printf " %8s", ($is_strict == 1 ) ? "VSZ" : "RSS"; } } print "\n"; my %num = (); my $uptime_last = 0.0; my $num_reboots = 0; foreach my $iter (@iters) { my $avail = ${ $overall{$iter} }[$idx_avail]; my $unlinked = ${ $overall{$iter} }[$idx_unlinked]; my $fs_root = ${ $overall{$iter} }[$idx_fs_root_p_use]; my $uptime = ${ $overall{$iter} }[$idx_uptime]; my $slab = ${ $overall{$iter} }[$idx_slab]; my $day = $days{$iter}; my @keys = ('all'); if ($uptime < $uptime_last) { $num_reboots++; if (defined $arg_detail) { printf "--reboot detected----%28s", '-'x28; if (!(defined $arg_report)) { foreach (keys %matched) {printf "%25s", '-'x25;} } else { foreach (keys %matched) {printf "%12s", '-'x12;} } print "\n"; } } if ((defined $arg_detail) || ($iter == $iters[-1])) { printf "%04d-%02d-%02d %02d:%02d:%02d %8.2f %7.2f", ${ $timestamps{$iter} }[$idx_yy], ${ $timestamps{$iter} }[$idx_month], ${ $timestamps{$iter} }[$idx_day], ${ $timestamps{$iter} }[$idx_hh], ${ $timestamps{$iter} }[$idx_mm], ${ $timestamps{$iter} }[$idx_ss], ${ $overall{$iter} }[$idx_avail], ${ $overall{$iter} }[$idx_slab]/Ki; } # Cumulate stats for regression if ($days{$iter} >= ($last_time - 1.0)) { push @keys, '1day';} foreach my $key (@keys) { $stats{$key}{'DAY'}{'sum_X'} += $day; $stats{$key}{'AVAIL'}{'sum_X'} += $avail; $stats{$key}{'SLAB'}{'sum_X'} += $slab; $stats{$key}{'UNLINKED'}{'sum_X'} += $unlinked; $stats{$key}{'ROOT_USE'}{'sum_X'} += $fs_root; $stats{$key}{'DAY'}{'sum_XX'} += ($day * $day); $stats{$key}{'AVAIL'}{'sum_XX'} += ($avail * $avail); $stats{$key}{'SLAB'}{'sum_XX'} += ($slab * $slab); $stats{$key}{'UNLINKED'}{'sum_XX'} += ($unlinked * $unlinked); $stats{$key}{'ROOT_USE'}{'sum_XX'} += ($fs_root * $fs_root); $stats{$key}{'DAY'}{'sum_XY'} += ($day * $day); $stats{$key}{'AVAIL'}{'sum_XY'} += ($avail * $day); $stats{$key}{'SLAB'}{'sum_XY'} += ($slab * $day); $stats{$key}{'UNLINKED'}{'sum_XY'} += ($unlinked * $day); $stats{$key}{'ROOT_USE'}{'sum_XY'} += ($fs_root * $day); $num{$key}++; } foreach my $cmd (sort {$b cmp $a} keys %matched) { # Sum up key values for commands that match my ($SUM_nlwp, $SUM_rss, $SUM_vsz) = (0.0, 0.0, 0.0); foreach my $pid (keys %{ $matched{$cmd} }) { if (defined $data{$iter}{$cmd}{$pid}) { $SUM_nlwp += ${ $data{$iter}{$cmd}{$pid} }[$idx_nlwp]; $SUM_rss += ${ $data{$iter}{$cmd}{$pid} }[$idx_rss]; $SUM_vsz += ${ $data{$iter}{$cmd}{$pid} }[$idx_vsz]; # CGRIFFIN update $cmdabbrev = substr $cmd, 0, 15; $cmdabbrev =~ s/\s+//g; $cmdabbrev =~ s/[[:punct:]]//g; $filename = "pid-" . $pid . "-" . $cmdabbrev . ".csv"; open($fh, '>>', $filename) or die "Could not open file '$filename' $!"; printf $fh "%04d-%02d-%02d %02d:%02d:%02d,%d,%d,%d,%d,%s\n", ${ $timestamps{$iter} }[$idx_yy], ${ $timestamps{$iter} }[$idx_month], ${ $timestamps{$iter} }[$idx_day], ${ $timestamps{$iter} }[$idx_hh], ${ $timestamps{$iter} }[$idx_mm], ${ $timestamps{$iter} }[$idx_ss], $pid, ${ $data{$iter}{$cmd}{$pid} }[$idx_nlwp], ${ $data{$iter}{$cmd}{$pid} }[$idx_rss], ${ $data{$iter}{$cmd}{$pid} }[$idx_vsz], $cmd; close $fh; if (defined $minmaxdata{$cmd}{$pid} and ($minmaxdata{$cmd}{$pid}[$idx_output] == 0)) { $filename = "pidstats.csv"; open($fh, '>>', $filename) or die "Could not open file '$filename' $!"; printf $fh "%d,%d,%d,%d,%d,%d,%f,%f,%s\n", $pid, $minmaxdata{$cmd}{$pid}[$idx_nlwp], $minmaxdata{$cmd}{$pid}[$idx_minrss], $minmaxdata{$cmd}{$pid}[$idx_minvsz], $minmaxdata{$cmd}{$pid}[$idx_maxrss], $minmaxdata{$cmd}{$pid}[$idx_maxvsz], $minmaxdata{$cmd}{$pid}[$idx_sumrss] / $minmaxdata{$cmd}{$pid}[$idx_pidcount], $minmaxdata{$cmd}{$pid}[$idx_sumvsz] / $minmaxdata{$cmd}{$pid}[$idx_pidcount], $cmd; close $fh; } $minmaxdata{$cmd}{$pid}[$idx_output] = 1; } } if ((defined $arg_detail) || ($iter == $iters[-1])) { if (!(defined $arg_report)) { printf " | %4d %8d %8d", $SUM_nlwp, $SUM_rss, $SUM_vsz; } else { printf " %8d", ($is_strict == 1 ) ? $SUM_vsz : $SUM_rss; } } # Cumulate stats for regression foreach my $key (@keys) { $stats{$key}{'nlwp_'.$cmd}{'sum_X'} += $SUM_nlwp; $stats{$key}{'rss_' .$cmd}{'sum_X'} += $SUM_rss; $stats{$key}{'vsz_' .$cmd}{'sum_X'} += $SUM_vsz; $stats{$key}{'nlwp_'.$cmd}{'sum_XX'} += ($SUM_nlwp * $SUM_nlwp); $stats{$key}{'rss_' .$cmd}{'sum_XX'} += ($SUM_rss * $SUM_rss); $stats{$key}{'vsz_' .$cmd}{'sum_XX'} += ($SUM_vsz * $SUM_vsz); $stats{$key}{'nlwp_'.$cmd}{'sum_XY'} += ($SUM_nlwp * $day); $stats{$key}{'rss_' .$cmd}{'sum_XY'} += ($SUM_rss * $day); $stats{$key}{'vsz_' .$cmd}{'sum_XY'} += ($SUM_vsz * $day); } } if ((defined $arg_detail) || ($iter == $iters[-1])) { printf "\n"; } # save uptime for comparison $uptime_last = $uptime; } # Perform simple linear regression on all variables foreach my $key (keys %stats) { foreach my $var (keys %{ $stats{$key} }) { ($stats{$key}{$var}{'int'}, $stats{$key}{$var}{'slope'}) = &linear_fit( $stats{$key}{'DAY'}{'sum_X'}, # 'DAY' is 'x' variable $stats{$key}{$var}{'sum_X'}, $stats{$key}{$var}{'sum_XY'}, $stats{$key}{'DAY'}{'sum_XX'}, $num{$key} ); } } %mem_slope = (); foreach my $cmd (keys %{mem_stats}) { my ($count, $cur_mem, $max_mem, $sum_X, $sum_Y, $sum_XX, $sum_XY) = @{$mem_stats{$cmd}}; my ($intercept, $slope) = &linear_fit($sum_X, $sum_Y, $sum_XY, $sum_XX, $count); $mem_slope{$cmd} = $slope/Ki; # slope (MiB/d) } # Print out linear trends # [ OBSOLETE ] printf "%20s %8s %7s %6s %5s", '-'x20, '-'x8, '-'x7, '-'x6, '-'x5; printf "%20s %8s %7s", '-'x20, '-'x8, '-'x7; if (!(defined $arg_report)) { foreach my $cmd (sort {$b cmp $a} keys %matched) { printf " | %4s %8s %8s", '-'x4, '-'x8, '-'x8; } } else { foreach my $cmd (sort {$b cmp $a} keys %matched) { printf " %11s", '-'x11; } } print "\n"; foreach my $key (sort {$b cmp $a} keys %stats) { printf "%20s %8.1f %7.1f", ($key eq 'all') ? "LONG TREND: (MiB/d)" : "1 DAY TREND: (MiB/d)", $stats{$key}{'AVAIL'}{'slope'}, $stats{$key}{'SLAB'}{'slope'}/Ki; if (!(defined $arg_report)) { foreach my $cmd (sort {$b cmp $a} keys %matched) { printf " | %4.0f %8.3f %8.3f", $stats{$key}{'nlwp_'.$cmd}{'slope'}, $stats{$key}{'rss_' .$cmd}{'slope'}/Ki, $stats{$key}{'vsz_' .$cmd}{'slope'}/Ki; } } else { foreach my $cmd (sort {$b cmp $a} keys %matched) { printf " %8.3f", ($is_strict == 1 ) ? $stats{$key}{'vsz_' .$cmd}{'slope'}/Ki : $stats{$key}{'rss_' .$cmd}{'slope'}/Ki; } } if (($key eq 'all') && ($num_reboots > 0)) { printf " (%d reboots)", $num_reboots; } print "\n"; } my $n = 0; # Print out hirunner process growth printf "\nPROCESSES WITH HIGHEST GROWTH (1 DAY TREND: > %.3f MiB/day):\n", $arg_thresh; if ($is_strict) { printf "%9s %9s %9s %s\n", 'CurVSZ', 'HiVSZ', 'Rate', 'COMMAND'; } else { printf "%9s %9s %9s %s\n", 'CurRSS', 'HiRSS', 'Rate', 'COMMAND'; } printf "%9s %9s %9s %s\n", '(MiB)', '(MiB)', '(MiB/d)', '(name)'; printf "%9s %9s %9s %s\n", '-'x8, '-'x8, '-'x8, '-'x9; foreach my $cmd (sort {$mem_slope{$b} <=> $mem_slope{$a} } keys %mem_slope) { last if ($mem_slope{$cmd} < $arg_thresh); my $max_iter = $iters[-1]; my ($count, $cur_mem, $max_mem, $sum_X, $sum_Y, $sum_XX, $sum_XY) = @{$mem_stats{$cmd}}; if ((defined $arg_transient) || ((($max_iter >= 142) && ($count > 70)) || (($max_iter < 142) && ($max_iter/2 < $count)))) { # print only processes seen most of the time printf "%9.3f %9.3f %9.3f %s\n", $cur_mem/Ki, $max_mem/Ki, $mem_slope{$cmd}, $cmd; $n++; } } print "none\n" if ($n == 0); print "\n"; } exit 0; ####################################################################################################################### # Lightweight which(), derived from CPAN File::Which sub which { my ($exec) = @_; return undef unless $exec; my $all = wantarray; my @results = (); my @path = File::Spec->path; foreach my $file ( map { File::Spec->catfile($_, $exec) } @path ) { next if -d $file; if (-x _) { return $file unless $all; push @results, $file; } } $all ? return @results : return undef; } # Process "parse_memory" command line arguments and set defaults sub get_parse_memstats_args { # Returned parameters local (*::arg_all, *::arg_match, *::arg_name, *::arg_pids, *::arg_commands, *::arg_exact, *::arg_commands_excl, *::arg_list, *::arg_detail, *::arg_thresh, *::arg_transient, *::arg_path, *::arg_dur, *::arg_report, *::arg_files) = @_; # Local variables my ($fail, $arg_help) = (); my @tmp = (); # Use the Argument processing module use Getopt::Long; # Print usage if no arguments if (!@ARGV) { &Usage(); exit 0; } # Process input arguments $fail = 0; GetOptions( "all", \$::arg_all, # CURRENTLY UNUSED "match", \$::arg_match, "name=s", \$::arg_name, "pid=i", \@::arg_pids, "cmd=s", \@::arg_commands, "exact=s", \@::arg_exact, "excl=s", \@::arg_commands_excl, "list", \$::arg_list, "detail", \$::arg_detail, "thresh=f", \$::arg_thresh, "transient", \$::arg_transient, "dir=s", \$::arg_path, "dur=f", \$::arg_dur, "report", \$::arg_report, "help|?", \$arg_help ) || GetOptionsMessage(); # Print help documentation if user has selected -help &ListHelp() if (defined $arg_help); # Listify @::arg_pids @tmp = (); if (@::arg_pids) { @tmp = @::arg_pids; @::arg_pids = (); foreach my $pid (@tmp) { push @::arg_pids, (split /,/, $pid); } } # Listify @::arg_commands @tmp = (); if (@::arg_commands) { @tmp = @::arg_commands; @::arg_commands = (); foreach my $cmd (@tmp) { push @::arg_commands, (split /,/, $cmd); } } # Listify @::arg_exact @tmp = (); if (@::arg_exact) { @tmp = @::arg_exact; @::arg_exact = (); foreach my $cmd (@tmp) { push @::arg_exact, (split /,/, $cmd); } } # Listify @::arg_commands_excl @tmp = (); if (@::arg_commands_excl) { @tmp = @::arg_commands_excl; @::arg_commands_excl = (); foreach my $cmd (@tmp) { push @::arg_commands_excl, (split /,/, $cmd); } } # Give warning messages and usage when parameters are specified incorrectly. my $cnt = 0; $cnt++ if (defined $::arg_name); $cnt++ if (defined $::arg_list); ##$cnt++ if (defined $::arg_all); # [ JGAULD - maybe add $::arg_match] if ($cnt > 1) { warn "$SCRIPT: Input error: cannot specify more than one of {--list} or {--name } options.\n"; $fail = 1; } if ($fail == 1) { # touch variables here to make silly warning go away $::arg_all = ""; $::arg_match = ""; $::arg_name = ""; $::arg_list = ""; $::arg_detail = ""; $::arg_thresh = 0; $::arg_transient = ""; &Usage(); exit 1; } # Assume remaining options are filenames @::arg_files = @ARGV; # Set defaults for options requiring values if (!(defined $::arg_thresh)) { $::arg_thresh = 0.005; # Default to 0.005 MiB/d } if (!(defined $::arg_dur)) { $::arg_dur = 7.0; # Default to 7.0 days worth of data } else { $::arg_dur = 1.0 if ($::arg_dur < 1.0); # minimum 1 day worth of data } $::arg_path ||= $DEFAULT_PATH; $::arg_detail = 1 if (defined $::arg_report); # print details if 'report' option chosen } sub GetOptionsMessage { # Print out a warning message and then print program usage. warn "$SCRIPT: Error processing input arguments.\n"; &Usage(); exit 1; } sub Usage { # Print out program usage. printf "Usage: $SCRIPT file1 file2 file3.gz ...\n"; printf "\t[--list | --all | --name ]\n"; printf "\t[--pid ] ...\n"; printf "\t[--cmd ] ...\n"; printf "\t[--exact ] ...\n"; printf "\t[--excl ] ...\n"; printf "\t[--detail]\n"; printf "\t[--thresh ]\n"; printf "\t[--transient]\n"; printf "\t[--dir ]\n"; printf "\t[--dur ]\n"; printf "\t[--report]\n"; printf "\t[--help | -?]\n"; } sub ListHelp { # Print out tool help printf "$SCRIPT -- parses 'memstats' data and prints processes with hirunner memory growth\n"; &Usage(); printf "\nOptional input arguments:\n"; printf " --list : list all available 'memory' data\n"; printf " --all : summarize all blades\n"; printf " --name : match files with pattern (file globbing allowed if in \"quotes\")\n"; printf " --pid : match 'pid' (can specify multiple pids)\n"; printf " --cmd : match command name 'string' pattern (can specify multiple patterns)\n"; printf " --exact : exact match command name 'string' pattern (can specify multiple patterns)\n"; printf " --excl : match command name 'string' pattern to exclude (can specify multiple patterns)\n"; printf " --detail : time-series output\n"; printf " --thresh : set threshold for hirunner growth processes : default > 0.005 MiB/d\n"; printf " --transient : include transient processes (i.e., do not filter out short-lived processes)\n"; printf " --dir : path to memory files : default: $DEFAULT_PATH\n"; printf " --dur : number of days of data to process : default: 7.0\n"; printf " --report : summarize details for hi-runner memory growth\n"; printf " --help : this help information\n"; printf "\nmemory data storage: %s/%s\n", $DEFAULT_PATH, "_--

_00_memory{.gz}"; printf "(most data files are gzip compressed)\n"; printf "\nExamples:\n"; printf " $SCRIPT --all (i.e., summarize all hosts\n"; printf " $SCRIPT --name 2014-02-22 (i.e., match files containing 2014-02-22)\n"; printf " $SCRIPT --name compute-0 --cmd python (i.e., compute-0, specify process(es))\n"; printf " $SCRIPT --name compute-0 --pid 1 --pid 2 --detail (i.e., slot 1, specify PIDs 1 and 2 time-series)\n"; printf " $SCRIPT --name controller-0 --cmd python --excl blah --detail (i.e., time-series for 'python', but not 'blah')\n"; printf "\nReported Memory Headings:\n"; printf " AVAIL (MiB) - available free memory\n"; printf " Strict = MemTotal - Committed_AS\n"; printf " Non-strict = MemFree + Buffers + Cached - KernelReserved\n"; printf " NLWP (#) - number of lightweight processes (threads)\n"; printf " RSS (KiB) - resident set size of process (SUM of multiple matched processes)\n"; printf " VSZ (KiB) - virtual size of process (SUM of multiple matched processes)\n"; printf " TREND - reported in change of MiB per day"; printf "\n"; exit 0; } # Calculate linear regression coefficients for linear equation, y = a + b*x sub linear_fit { my ($sum_X, $sum_Y, $sum_XY, $sum_XX, $n) = @_; my ($a, $b, $s1_sq) = (); # Prevent doing regression with less than 2 points return (0,0) if ($n < 2); $s1_sq = ($sum_XX - $sum_X / $n * $sum_X) / ($n - 1); # Prevent divide by zero return (0,0) if ($s1_sq <= 0.0); $b = ($n * $sum_XY - $sum_X * $sum_Y) / $n / ($n - 1) / $s1_sq; $a = ($sum_Y - $b * $sum_X)/$n; return ($a, $b); } 1;