Relocate hostdata-collectors to

stx-integ/tools/engtools/hostdata-collectors

Move content from stx-utils into stx-integ or stx-update

Packages will be relocated to

stx-update:
    enable-dev-patch
    extras

stx-integ:
    config-files/
        io-scheduler

    filesystem/
        filesystem-scripts

    grub/
        grubby

    logging/
        logmgmt

    tools/
        collector
        monitor-tools

    tools/engtools/
        hostdata-collectors
        parsers

    utilities/
        build-info
        branding   (formerly wrs-branding)
        platform-util

Change-Id: I9796704d8ffc6590a971af9d41b626189e35ecc4
Story: 2002801
Task: 22687
Signed-off-by: Scott Little <scott.little@windriver.com>
This commit is contained in:
Scott Little 2018-08-01 12:29:23 -04:00
parent 76cfa48701
commit 4366c6cfd8
32 changed files with 4870 additions and 0 deletions

View File

@ -115,3 +115,4 @@ tools/collector
grub/grubby
utilities/platform-util
tools/monitor-tools
tools/engtools/hostdata-collectors

View File

@ -0,0 +1,12 @@
The Engineering tools is meant to be installed as a patch. Therefore, the RPM is generated as part
of the build but is not included in the image. Assuming your development environment is fully set up,
simply run patch-engtools.sh to generate the patch:
In this directory ($MY_REPO/addons/wr-cgcs/layers/cgcs/middleware/util/recipes-common/engtools/hostdata-collectors),
enter the command:
>./patch-engtools.sh
This generates ENGTOOLS-X.patch (X is Tis release version) which can be applied via sw-patch.
The patch is built with --all-nodes option by default. This can be changed to a combination of the following:
--controller, --compute, --storage, --controller-compute, and --compute-lowlatency.

View File

@ -0,0 +1,2 @@
SRC_DIR="scripts"
TIS_PATCH_VER=1

View File

@ -0,0 +1,101 @@
Summary: Host performance data collection tools package
Name: engtools
Version: 1.0
Release: %{tis_patch_ver}%{?_tis_dist}
License: Apache-2.0
Group: Tools
Packager: Wind River <info@windriver.com>
URL: http://www.windriver.com/
BuildArch: noarch
Source: %{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
Requires: iperf3
%description
This package contains data collection tools to monitor host performance.
Tools are general purpose engineering and debugging related. Includes
overall memory, cpu occupancy, per-task cpu, per-task scheduling, per-task
io.
# Don't try fancy stuff like debuginfo, which is useless on binary-only
# packages. Don't strip binary too
# Be sure buildpolicy set to do nothing
%define __spec_install_post %{nil}
%define debug_package %{nil}
%define __os_install_post %{_dbpath}/brp-compress
%define _binaries_in_noarch_packages_terminate_build 0
%define local_dir /usr/local
%define local_bindir %{local_dir}/bin/
%define local_initdir /etc/init.d/
%define local_confdir /etc/engtools/
%define local_systemddir /etc/systemd/system/
%prep
%setup -q
%build
# Empty section.
%install
mkdir -p %{buildroot}
install -d 755 %{buildroot}%{local_bindir}
# Installing additional tools, memtop, occtop and schedtop are already in the image
install -m 755 buddyinfo.py %{buildroot}%{local_bindir}
install -m 755 chewmem %{buildroot}%{local_bindir}
# Installing data collection scripts
install -m 755 ceph.sh %{buildroot}%{local_bindir}
install -m 755 cleanup-engtools.sh %{buildroot}%{local_bindir}
install -m 755 collect-engtools.sh %{buildroot}%{local_bindir}
install -m 755 diskstats.sh %{buildroot}%{local_bindir}
install -m 755 engtools_util.sh %{buildroot}%{local_bindir}
install -m 755 filestats.sh %{buildroot}%{local_bindir}
install -m 755 iostat.sh %{buildroot}%{local_bindir}
install -m 755 linux_benchmark.sh %{buildroot}%{local_bindir}
install -m 755 memstats.sh %{buildroot}%{local_bindir}
install -m 755 netstats.sh %{buildroot}%{local_bindir}
install -m 755 postgres.sh %{buildroot}%{local_bindir}
install -m 755 rabbitmq.sh %{buildroot}%{local_bindir}
install -m 755 remote/rbzip2-engtools.sh %{buildroot}%{local_bindir}
install -m 755 remote/rstart-engtools.sh %{buildroot}%{local_bindir}
install -m 755 remote/rstop-engtools.sh %{buildroot}%{local_bindir}
install -m 755 remote/rsync-engtools-data.sh %{buildroot}%{local_bindir}
install -m 755 slab.sh %{buildroot}%{local_bindir}
install -m 755 ticker.sh %{buildroot}%{local_bindir}
install -m 755 top.sh %{buildroot}%{local_bindir}
install -m 755 vswitch.sh %{buildroot}%{local_bindir}
install -m 755 live_stream.py %{buildroot}%{local_bindir}
# Installing conf file
install -d 755 %{buildroot}%{local_confdir}
install -m 644 -p -D cfg/engtools.conf %{buildroot}%{local_confdir}
# Installing init script
install -d 755 %{buildroot}%{local_initdir}
install -m 755 init.d/collect-engtools.sh %{buildroot}%{local_initdir}
# Installing service file
install -d 755 %{buildroot}%{local_systemddir}
install -m 644 -p -D collect-engtools.service %{buildroot}%{local_systemddir}
%clean
rm -rf $RPM_BUILD_ROOT
%files
%license LICENSE
%defattr(-,root,root,-)
%{local_bindir}/*
%{local_confdir}/*
%{local_initdir}/*
%{local_systemddir}/*
%post
/bin/systemctl enable collect-engtools.service > /dev/null 2>&1
/bin/systemctl start collect-engtools.service > /dev/null 2>&1
%preun
#/bin/systemctl --no-reload disable collect-engtools.sh.service > /dev/null 2>&1
#/bin/systemctl stop collect-engtools.sh.service > /dev/null 2>&1
%systemd_preun collect-engtools.service
%postun
%systemd_postun_with_restart collect-engtools.service

View File

@ -0,0 +1,33 @@
#!/bin/bash
# Designer patches:
# http://twiki.wrs.com/PBUeng/Patching
if [ -z $MY_WORKSPACE ] || [ -z $MY_REPO ]; then
echo "Some dev environment variables are not set."
echo "Refer to http://wiki.wrs.com/PBUeng/CentOSBuildProcess for instructions."
exit 1
fi
ENGTOOLS=$(ls ${MY_WORKSPACE}/std/rpmbuild/RPMS/engtools*noarch.rpm 2>/dev/null)
if [ $? -ne 0 ]; then
echo "Engtools RPM has not been built. Please run \"build-pkgs engtools\" first."
exit 1
fi
source ${MY_REPO}/addons/wr-cgcs/layers/cgcs/middleware/recipes-common/build-info/release-info.inc
#TiS_REL="16.10"
#PATCH_ID="ENGTOOLS-${TiS_REL}"
PATCH_ID="ENGTOOLS-${PLATFORM_RELEASE}"
PWD=$(pwd)
# Create CGCS Patch
cd ${MY_WORKSPACE}
PATCH_BUILD=${MY_REPO}/addons/wr-cgcs/layers/cgcs/extras.ND/scripts/patch_build.sh
${PATCH_BUILD} --id ${PATCH_ID} --reboot-required=N \
--summary "System engineering data collection and analysis tools." \
--desc "System engineering data collection and analysis tools." \
--all-nodes ${ENGTOOLS} \
--warn "Intended for system engineering use only."
cd ${PWD}
exit 0

View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,121 @@
#!/usr/bin/env python
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 textwidth=79 autoindent
"""
Python source code
Last modified: 15 Feb 2014 - 13:38
Last author: lmwangi at gmail com
Displays the available memory fragments
by querying /proc/buddyinfo
Example:
# python buddyinfo.py
"""
import optparse
import os
import re
from collections import defaultdict
import logging
class Logger:
def __init__(self, log_level):
self.log_level = log_level
def get_formatter(self):
return logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
def get_handler(self):
return logging.StreamHandler()
def get_logger(self):
"""Returns a Logger instance for the specified module_name"""
logger = logging.getLogger('main')
logger.setLevel(self.log_level)
log_handler = self.get_handler()
log_handler.setFormatter(self.get_formatter())
logger.addHandler(log_handler)
return logger
class BuddyInfo(object):
"""BuddyInfo DAO"""
def __init__(self, logger):
super(BuddyInfo, self).__init__()
self.log = logger
self.buddyinfo = self.load_buddyinfo()
def parse_line(self, line):
line = line.strip()
self.log.debug("Parsing line: %s" % line)
parsed_line = re.match("Node\s+(?P<numa_node>\d+).*zone\s+(?P<zone>\w+)\s+(?P<nr_free>.*)", line).groupdict()
self.log.debug("Parsed line: %s" % parsed_line)
return parsed_line
def read_buddyinfo(self):
buddyhash = defaultdict(list)
buddyinfo = open("/proc/buddyinfo").readlines()
for line in map(self.parse_line, buddyinfo):
numa_node = int(line["numa_node"])
zone = line["zone"]
free_fragments = map(int, line["nr_free"].split())
max_order = len(free_fragments)
fragment_sizes = self.get_order_sizes(max_order)
usage_in_bytes = [block[0] * block[1] for block in zip(free_fragments, fragment_sizes)]
buddyhash[numa_node].append({
"zone": zone,
"nr_free": free_fragments,
"sz_fragment": fragment_sizes,
"usage": usage_in_bytes })
return buddyhash
def load_buddyinfo(self):
buddyhash = self.read_buddyinfo()
self.log.info(buddyhash)
return buddyhash
def page_size(self):
return os.sysconf("SC_PAGE_SIZE")
def get_order_sizes(self, max_order):
return [self.page_size() * 2**order for order in range(0, max_order)]
def __str__(self):
ret_string = ""
width = 20
for node in self.buddyinfo:
ret_string += "Node: %s\n" % node
for zoneinfo in self.buddyinfo.get(node):
ret_string += " Zone: %s\n" % zoneinfo.get("zone")
ret_string += " Free KiB in zone: %.2f\n" % (sum(zoneinfo.get("usage")) / (1024.0))
ret_string += '\t{0:{align}{width}} {1:{align}{width}} {2:{align}{width}}\n'.format(
"Fragment size", "Free fragments", "Total available KiB",
width=width,
align="<")
for idx in range(len(zoneinfo.get("sz_fragment"))):
ret_string += '\t{order:{align}{width}} {nr:{align}{width}} {usage:{align}{width}}\n'.format(
width=width,
align="<",
order = zoneinfo.get("sz_fragment")[idx],
nr = zoneinfo.get("nr_free")[idx],
usage = zoneinfo.get("usage")[idx] / 1024.0)
return ret_string
def main():
"""Main function. Called when this file is a shell script"""
usage = "usage: %prog [options]"
parser = optparse.OptionParser(usage)
parser.add_option("-s", "--size", dest="size", choices=["B","K","M"],
action="store", type="choice", help="Return results in bytes, kib, mib")
(options, args) = parser.parse_args()
logger = Logger(logging.DEBUG).get_logger()
logger.info("Starting....")
logger.info("Parsed options: %s" % options)
print logger
buddy = BuddyInfo(logger)
print buddy
if __name__ == '__main__':
main()

View File

@ -0,0 +1,60 @@
#!/bin/bash
# Usage: ceph.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
# Print key ceph statistics
function print_ceph()
{
print_separator
TOOL_HIRES_TIME
cmd='ceph -s'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
cmd='ceph osd tree'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
cmd='ceph df detail'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
}
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
for ((rep=1; rep <= REPEATS ; rep++))
do
print_ceph
sleep ${INTERVAL_SEC}
done
print_ceph
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,77 @@
# engtools configuration
# You may comment out any unwanted fields under the Intervals section, but do not comment out any other configuration options as the python parsing utility will complain. Please follow the comments
[LabConfiguration]
# Set this option to Y/N depending on the setup of your lab
CPE_LAB=N
[LiveStream]
# Set this option to Y/N before patch creation to enable/disable live stats collection
ENABLE_LIVE_STREAM=Y
# Set the duration of the live stream capture utility. Leave blank for continuous collection. Ex: 1s,1m,1h,1d
DURATION=
[StaticCollection]
# Set this option to Y/N before patch creation to enable/disable static stats collection
ENABLE_STATIC_COLLECTION=Y
[CollectInternal]
# controller external OAM interface used to communicate with remote server. If unset, the first interface from ifconfig will be used
CONTROLLER0_EXTERNAL_INTERFACE=
CONTROLLER1_EXTERNAL_INTERFACE=
[RemoteServer]
# remote server influx and grafana info
INFLUX_IP=128.224.186.61
INFLUX_PORT=8086
INFLUX_DB=
GRAFANA_PORT=3000
# This key is created through Grafana. If deleted, a new key (with admin privileges) must be created and copied here
GRAFANA_API_KEY=eyJrIjoiSkR1SXcxbkVVckd1dW9PMHFKS0EzQ2hQWTd1YUhtSkIiLCJuIjoiZGJfY3JlYXRvciIsImlkIjoxfQ==
[Intervals]
# Set the collection interval (in seconds) to be used in the live_stream.py script. If unset or commented out, that field will not be collected
memtop=10
memstats=10
occtop=10
schedtop=10
load_avg=3
cpu_count=60
diskstats=30
iostat=10
filestats=30
netstats=10
postgres=30
rabbitmq=3600
vswitch=30
[AdditionalOptions]
# Set this option to Y/N to enable/disable Openstack API GET/POST collection
API_REQUESTS=N
# Set this option to Y/N to enable/disable the collection of all services and not just the ones listed below. Note that this hasn't been tested thoroughly
ALL_SERVICES=N
# Set this option to Y/N to enable/disable fast postgres connections collection. By default, postgres connections use the same collection interval as postgres DB size (set above), this option will set the collection interval to 0 seconds while not affecting the above postgres collection interval
FAST_POSTGRES_CONNECTIONS=N
# Set this option to Y/N to enable/disable automatic database deletion for InfluxDB and Grafana. As of now, this feature does not work with the engtools patch
AUTO_DELETE_DB=N
[ControllerServices]
CONTROLLER_SERVICE_LIST=aodh-api aodh-listener aodh-notifier aodh-evaluator beam.smp ceilometer-api ceilometer-collector ceilometer-agent-notification ceilometer-mem-db ceph-mon ceph-rest-api ceph-alarm-manager cinder-api cinder-volume cinder-scheduler glance-api glance-registry heat-api heat-engine heat-api-cfn heat-api-cloudwatch hbsAgent ironic-api ironic-conductor keystone-all magnum-api magnum-conductor neutron-server nova-api nova-api-proxy nova-compute nova-scheduler nova-conductor nova-console-auth nova-novncproxy nova-placement-api panko-api sysinv-api sysinv-conductor postgres fmManager rabbitmq-server gunicorn postgres snmpd patch-alarm-manager lighttpd sw-patch-controller-daemon nfv-vim nfv-vim-api nfv-vim-webserver slapd mtcAgent guestAgent
[ComputeServices]
COMPUTE_SERVICE_LIST=nova-compute neutron-dhcp-agent neutron-metadata-agent neutron-sriov-nic-agent kvm libvirtd guestServer host_agent
[StorageServices]
STORAGE_SERVICE_LIST=ceph-mon ceph-osd ceph-manager ceph-rest-api
[RabbitmqServices]
RABBITMQ_QUEUE_LIST=notifications.info versioned_notifications.info
[CommonServices]
COMMON_SERVICE_LIST=dnsmasq ceilometer-polling haproxy hwmond pmond rmond fsmond sw-patch-agent sysinv-agent syslog-ng hostwd iscsid io-monitor-manager acpid hbsClient logmgmt mtcClient mtcalarmd mtclogd sshd ntpd smartd sm sm-eru sm-watchdog sm-api ceilometer keyring cinder-rtstool

View File

@ -0,0 +1,86 @@
#!/usr/bin/perl
# Usage:
# ./chewmem.pl <MiB>
# Description:
# This will create a character array requiring "MiB" actual memory.
# Summarize high-level memory usage.
# Ideally we can demonstate creating larger and larger
# successful memory allocations until Avail is near 0.
# It is very likely to trigger OOM Killer or cause reset
# if we run completely out of memory.
use warnings;
use strict;
use POSIX qw(strftime);
sub show_memusage() {
our $count;
$::count++; $::count %= 15;
my $Ki = 1024.0;
my ($MemTotal, $MemFree, $Buffers, $Cached, $CommitLimit, $Committed_AS, $Slab, $SReclaimable);
# Process all entries of MEMINFO
my $file = '/proc/meminfo';
open(FILE, $file) || die "Cannot open file: $file ($!)";
MEMINFO_LOOP: while($_ = <FILE>) {
s/[\0\e\f\r\a]//g; chomp; # strip control characters if any
last MEMINFO_LOOP if (/^\s*$/); # end at blank-line
if (/\bMemTotal:\s+(\d+)\s+kB/) {
$MemTotal = $1; next MEMINFO_LOOP;
}
if (/\bMemFree:\s+(\d+)\s+kB/) {
$MemFree = $1; next MEMINFO_LOOP;
}
if (/\bBuffers:\s+(\d+)\s+kB/) {
$Buffers = $1; next MEMINFO_LOOP;
}
if (/\bCached:\s+(\d+)\s+kB/) {
$Cached = $1; next MEMINFO_LOOP;
}
if (/\bCommitLimit:\s+(\d+)\s+kB/) {
$CommitLimit = $1; next MEMINFO_LOOP;
}
if (/\bCommitted_AS:\s+(\d+)\s+kB/) {
$Committed_AS = $1; next MEMINFO_LOOP;
}
if (/\bSlab:\s+(\d+)\s+kB/) {
$Slab = $1; next MEMINFO_LOOP;
}
if (/\bSReclaimable:\s+(\d+)\s+kB/) {
$SReclaimable = $1; next MEMINFO_LOOP;
}
}
close(FILE);
my $Avail_MiB = ($MemFree + $Cached + $Buffers + $SReclaimable)/$Ki;
my $Strict_MiB = ($CommitLimit - $Committed_AS)/$Ki;
my $now = strftime "%Y-%m-%d %H:%M:%S", localtime();
if ($::count == 1) {
printf "%19s %6s %6s %6s %6s %6s %6s %6s %6s %6s\n",
'yyyy-mm-dd hh:mm:ss', 'Tot', 'Free', 'Ca', 'Buf', 'Slab', 'CAS', 'CLim', 'Avail', 'Strict';
}
printf "%19s %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f\n",
$now, $MemTotal/$Ki, $MemFree/$Ki, $Cached/$Ki, $Buffers/$Ki, $Slab/$Ki,
$Committed_AS/$Ki, $CommitLimit/$Ki, $Avail_MiB, $Strict_MiB;
}
#-------------------------------------------------------------------------------
# MAIN PROGRAM
# Autoflush output
select(STDERR);
$| = 1;
select(STDOUT); # default
$| = 1;
my $MiB = $ARGV[0] ||=0.0;
my $A = "A" x (1024*1024*$MiB/2);
print "Allocating $MiB MiB character array.\n";
while(1) {
sleep(1);
show_memusage();
}
exit 0;
1;

View File

@ -0,0 +1,57 @@
#!/bin/bash
# Purpose:
# Some of the engtools scripts are not shutting down gracefully.
# Define common utility functions
TOOLBIN=$(dirname $0)
. ${TOOLBIN}/engtools_util.sh
if [ $UID -ne 0 ]; then
ERRLOG "Require sudo/root access."
exit 1
fi
declare -a TOOLS
TOOLS=()
TOOLS+=('collect-engtools.sh')
TOOLS+=('ceph.sh')
TOOLS+=('diskstats.sh')
TOOLS+=('iostat.sh')
TOOLS+=('rabbitmq.sh')
TOOLS+=('ticker.sh')
TOOLS+=('top.sh')
TOOLS+=('memstats.sh')
TOOLS+=('netstats.sh')
TOOLS+=('postgres.sh')
TOOLS+=('vswitch.sh')
TOOLS+=('filestats.sh')
TOOLS+=('live_stream.py')
LOG "Cleanup engtools:"
# Brute force methods (assume trouble with: service collect-engtools.sh stop)
# ( be sure not to clobber /etc/init.d/collect-engtools.sh )
LOG "kill processes brute force"
pids=( $(pidof -x /usr/local/bin/collect-engtools.sh) )
if [ ${#pids[@]} -ne 0 ]
then
LOG "killing: ${pids[@]}"
for pid in ${pids[@]}
do
LOG "kill: [ ${pid} ] "
pkill -KILL -P ${pid}
kill -9 ${pid}
done
pkill -KILL iostat
pkill -KILL top
else
LOG "no pids found"
fi
LOG "remove pidfiles"
for TOOL in "${TOOLS[@]}"
do
rm -f -v /var/run/${TOOL}.pid
done
LOG "done"
exit 0

View File

@ -0,0 +1,14 @@
[Unit]
Description=Engineering data collection tools to monitor host performance
After=network.service
[Service]
Type=forking
ExecStart=/etc/init.d/collect-engtools.sh start
ExecStop=/etc/init.d/collect-engtools.sh stop
ExecReload=/etc/init.d/collect-engtools.sh reload
PIDFile=/var/run/collect-engtools.sh.pid
Restart=always
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,334 @@
#!/bin/bash
# Usage:
# collect-engtools.sh [-f] [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
# Define common utility functions
TOOLBIN=$(dirname $0)
. ${TOOLBIN}/engtools_util.sh
# ENABLE DEBUG (0=disable, 1=enable)
OPT_DEBUG=0
# Set options for long soak (vs, shorter collection)
#OPT_SOAK=0 # long soak
OPT_SOAK=1 # few hour soak
#OPT_SOAK=2 # < hour soak
# Define command to set nice + ionice
CMD_IDLE=$( cmd_idle_priority )
# Purge configuration options
# - how much data may be created per cycle
PURGE_HEADROOM_MB=100
# - how much remaining space to leave
PURGE_HEADROOM_PERCENT=15
# - maximum size of data collection
PURGE_MAXUSAGE_MB=1000
# Affine to pinned cores
AFFINE_PINNED=1
# Line-buffer stream output (instead of buffered)
STDBUF="stdbuf -oL"
# Define some common durations
DUR_60MIN_IN_SEC=$[60*60]
DUR_30MIN_IN_SEC=$[30*60]
DUR_15MIN_IN_SEC=$[15*60]
DUR_10MIN_IN_SEC=$[10*60]
DUR_5MIN_IN_SEC=$[5*60]
DUR_1MIN_IN_SEC=$[1*60]
# Global variables
declare -a parallel_outfiles
declare df_size_bytes
declare df_avail_bytes
declare du_used_bytes
declare tgt_avail_bytes
declare tgt_used_bytes
# do_parallel_commands - launch parallel tools with separate output files
function do_parallel_commands()
{
parallel_outfiles=()
for elem in "${tlist[@]}"
do
tool=""; period=""; repeat=""; interval=""
my_hash="elem[*]"
local ${!my_hash}
if [ ! -z "${name}" ]; then
fname="${TOOL_DEST_DIR}/${HOSTNAME}_${timestamp}_${name}"
parallel_outfiles+=( $fname )
LOG "collecting ${tool}, ${interval} second intervals, to: ${fname}"
if [ ! -z "${period}" ]; then
${STDBUF} ${tool} -p ${period} -i ${interval} > ${fname} 2>/dev/null &
elif [ ! -z "${repeat}" ]; then
${STDBUF} ${tool} --repeat=${repeat} --delay=${interval} > ${fname} 2>/dev/null &
fi
else
# run without file output (eg., ticker)
${STDBUF} ${tool} -p ${period} -i ${interval} 2>/dev/null &
fi
done
}
# get_current_avail_usage() - get output destination file-system usage and
# availability.
# - updates: df_size_bytes, df_avail_bytes, du_used_bytes
function get_current_avail_usage()
{
local -a df_arr_bytes=( $(df -P --block-size=1 ${TOOL_DEST_DIR} | awk 'NR==2 {print $2, $4}') )
df_size_bytes=${df_arr_bytes[0]}
df_avail_bytes=${df_arr_bytes[1]}
du_used_bytes=$(du --block-size=1 ${TOOL_DEST_DIR} | awk 'NR==1 {print $1}')
}
# purge_oldest_files() - remove oldest files based on file-system available space,
# and maximum collection size
function purge_oldest_files()
{
# get current file-system usage
get_current_avail_usage
msg=$(printf "avail %d MB, headroom %d MB; used %d MB, max %d MB" \
$[$df_avail_bytes/1024/1024] $[$tgt_avail_bytes/1024/1024] \
$[$du_used_bytes/1024/1024] $[$tgt_used_bytes/1024/1024])
LOG "usage: ${msg}"
if [[ $df_avail_bytes -lt $tgt_avail_bytes ]] || \
[[ $du_used_bytes -gt $tgt_used_bytes ]]; then
# wait for compression to complete
wait
get_current_avail_usage
if [[ $df_avail_bytes -lt $tgt_avail_bytes ]]; then
msg=$(printf "purge: avail %d MB < target %d MB" \
$[$df_avail_bytes/1024/1024] $[$tgt_avail_bytes/1024/1024] )
LOG "purge: ${msg}"
fi
if [[ $du_used_bytes -gt $tgt_used_bytes ]]; then
msg=$(printf "purge: used %d MB > target %d MB" \
$[$du_used_bytes/1024/1024] $[$tgt_used_bytes/1024/1024] )
LOG "purge: ${msg}"
fi
else
return
fi
# remove files in oldest time sorted order until we meet usage targets,
# incrementally updating usage as we remve files
for file in $( ls -rt ${TOOL_DEST_DIR}/${HOSTNAME}_* 2>/dev/null )
do
if [[ $df_avail_bytes -ge $tgt_avail_bytes ]] && \
[[ $du_used_bytes -le $tgt_used_bytes ]]; then
break
fi
if [ ${OPT_DEBUG} -eq 1 ]; then
msg="purge: file=$file"
if [[ $df_avail_bytes -lt $tgt_avail_bytes ]]; then
msg="${msg}, < AVAIL"
fi
if [[ $du_used_bytes -gt $tgt_used_bytes ]]; then
msg="${msg}, > MAXUSAGE"
fi
LOG "${msg}"
fi
sz_bytes=$(stat --printf="%s" $file)
((df_avail_bytes += sz_bytes))
((du_used_bytes -= sz_bytes))
rm -fv ${file}
done
}
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Read configuration variable file if it is present
NAME=collect-engtools.sh
[ -r /etc/default/$NAME ] && . /etc/default/$NAME
# Initialize tool
tools_init
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
# Affine tools to NOVA pinned cores (i.e., non-cpu 0)
# - remove interference with cpu 0
if [ "${AFFINE_PINNED}" -eq 1 ]; then
NOVA_CONF=/etc/nova/compute_extend.conf
if [ -f "${NOVA_CONF}" ]; then
source "${NOVA_CONF}"
CPULIST=${compute_pinned_cpulist}
else
CPULIST=""
fi
fi
set_affinity ${CPULIST}
# Define output directory
if [[ "${HOSTNAME}" =~ "controller-" ]]; then
TOOL_DEST_DIR=/scratch/syseng_data/${HOSTNAME}
elif [[ "${HOSTNAME}" =~ "compute-" ]]; then
TOOL_DEST_DIR=/tmp/syseng_data/${HOSTNAME}
else
TOOL_DEST_DIR=/tmp/syseng_data/${HOSTNAME}
fi
mkdir -p ${TOOL_DEST_DIR}
# Define daemon log output
timestamp=$( date +"%Y-%0m-%0e_%H%M" )
DAEMON_OUT="${TOOL_DEST_DIR}/${HOSTNAME}_${timestamp}_${TOOLNAME}.log"
# Redirect stdout and append to log if not connected to TTY
if test ! -t 1 ; then
exec 1>> ${DAEMON_OUT}
fi
# Get current availability and usage
get_current_avail_usage
# Calculate disk usage and availability purge targets
df_offset_bytes=$[$PURGE_HEADROOM_MB*1024*1024]
tgt_used_bytes=$[$PURGE_MAXUSAGE_MB*1024*1024]
((tgt_avail_bytes = df_size_bytes/100*PURGE_HEADROOM_PERCENT + df_offset_bytes))
# Set granularity based on duration
if [ $PERIOD_MIN -le 30 ]; then
GRAN_MIN=5
else
GRAN_MIN=60
fi
# Adjust repeats and intervals based on GRAN_MIN granularity
PERIOD_MIN=$[($PERIOD_MIN+(GRAN_MIN-1))/GRAN_MIN*GRAN_MIN]
((REPEATS = PERIOD_MIN/GRAN_MIN))
GRAN_MIN_IN_SEC=$[$GRAN_MIN*60]
if [ ${INTERVAL_SEC} -gt ${GRAN_MIN_IN_SEC} ]; then
INTERVAL_SEC=${GRAN_MIN_IN_SEC}
fi
# Define tools and options
# [ JGAULD - need config file for customization; long soak vs specific tools ]
# [ Ideally sample < 5 second granularity, but files get big, and tool has cpu overhead ]
# [ Need < 5 second granularity to see cache pressure/flush issues ]
# [ Desire 60 sec interval for soak ]
if [ ${OPT_SOAK} -eq 1 ]; then
# Desire 60 second or greater interval for longer term data collections,
# otherwise collection files get too big.
schedtop_interval=20
occtop_interval=60
memtop_interval=60
netstats_interval=60
# JGAULD: temporarily increase frequency to 1 min
postgres_interval=${DUR_1MIN_IN_SEC}
#postgres_interval=${DUR_15MIN_IN_SEC}
rabbitmq_interval=${DUR_15MIN_IN_SEC}
ceph_interval=${DUR_15MIN_IN_SEC}
diskstats_interval=${DUR_15MIN_IN_SEC}
memstats_interval=${DUR_15MIN_IN_SEC}
filestats_interval=${DUR_15MIN_IN_SEC}
elif [ ${OPT_SOAK} -eq 2 ]; then
# Assume much shorter collection (eg, < hours)
schedtop_interval=2 # i.e., 2 second interval
occtop_interval=2 # i.e., 2 second interval
memtop_interval=1 # i.e., 1 second interval
netstats_interval=30 # i.e., 30 second interval
postgres_interval=${DUR_5MIN_IN_SEC}
rabbitmq_interval=${DUR_5MIN_IN_SEC}
ceph_interval=${DUR_5MIN_IN_SEC}
diskstats_interval=${DUR_5MIN_IN_SEC}
memstats_interval=${DUR_5MIN_IN_SEC}
filestats_interval=${DUR_5MIN_IN_SEC}
else
# Assume shorter collection (eg, < a few hours)
schedtop_interval=5 # i.e., 5 second interval
occtop_interval=5 # i.e., 5 second interval
memtop_interval=5 # i.e., 5 second interval
netstats_interval=30 # i.e., 30 second interval
postgres_interval=${DUR_5MIN_IN_SEC}
rabbitmq_interval=${DUR_5MIN_IN_SEC}
ceph_interval=${DUR_5MIN_IN_SEC}
diskstats_interval=${DUR_5MIN_IN_SEC}
memstats_interval=${DUR_5MIN_IN_SEC}
filestats_interval=${DUR_5MIN_IN_SEC}
fi
schedtop_repeat=$[ $PERIOD_MIN * 60 / $schedtop_interval ]
occtop_repeat=$[ $PERIOD_MIN * 60 / $occtop_interval ]
memtop_repeat=$[ $PERIOD_MIN * 60 / $memtop_interval ]
netstats_repeat=$[ $PERIOD_MIN * 60 / $netstats_interval ]
# Disable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=0
# Define parallel engtools configuration
# - tool name, filename, and collection interval attributes
BINDIR=/usr/bin
LBINDIR=/usr/local/bin
while IFS='' read -r line || [[ -n "$line" ]]; do
if [[ $line =~ 'ENABLE_STATIC_COLLECTION'* ]]; then
static_collection=${line:25:1}
fi
done < /etc/engtools/engtools.conf
declare -a tlist
if [[ $static_collection == "Y" ]] || [[ $static_collection == "y" ]]; then
tlist+=( "tool=${LBINDIR}/top.sh name=top period=${PERIOD_MIN} interval=${DUR_1MIN_IN_SEC}" )
tlist+=( "tool=${LBINDIR}/iostat.sh name=iostat period=${PERIOD_MIN} interval=${DUR_1MIN_IN_SEC}" )
tlist+=( "tool=${LBINDIR}/netstats.sh name=netstats period=${PERIOD_MIN} interval=${netstats_interval}" )
tlist+=( "tool=${BINDIR}/occtop name=occtop repeat=${occtop_repeat} interval=${occtop_interval}" )
tlist+=( "tool=${BINDIR}/memtop name=memtop repeat=${memtop_repeat} interval=${memtop_interval}" )
tlist+=( "tool=${BINDIR}/schedtop name=schedtop repeat=${schedtop_repeat} interval=${schedtop_interval}" )
tlist+=( "tool=${LBINDIR}/diskstats.sh name=diskstats period=${PERIOD_MIN} interval=${diskstats_interval}" )
tlist+=( "tool=${LBINDIR}/memstats.sh name=memstats period=${PERIOD_MIN} interval=${memstats_interval}" )
tlist+=( "tool=${LBINDIR}/filestats.sh name=filestats period=${PERIOD_MIN} interval=${filestats_interval}" )
if [[ "${HOSTNAME}" =~ "controller-" ]]; then
tlist+=( "tool=${LBINDIR}/ceph.sh name=ceph period=${PERIOD_MIN} interval=${ceph_interval}" )
tlist+=( "tool=${LBINDIR}/postgres.sh name=postgres period=${PERIOD_MIN} interval=${postgres_interval}" )
# tlist+=( "tool=${LBINDIR}/rabbitmq.sh name=rabbitmq period=${PERIOD_MIN} interval=${rabbitmq_interval}" )
elif [[ "${HOSTNAME}" =~ "compute-" ]]; then
tlist+=( "tool=${LBINDIR}/vswitch.sh name=vswitch period=${PERIOD_MIN} interval=${DUR_1MIN_IN_SEC}" )
fi
fi
# ticker - shows progress on the screen
tlist+=( "tool=${LBINDIR}/ticker.sh name= period=${PERIOD_MIN} interval=${DUR_1MIN_IN_SEC}" )
#-------------------------------------------------------------------------------
# Main loop
#-------------------------------------------------------------------------------
OPT_DEBUG=0
REP=0
while [[ ${TOOL_USR1_SIGNAL} -eq 0 ]] &&
[[ ${OPT_FOREVER} -eq 1 || ${REP} -lt ${REPEATS} ]]
do
# increment loop counter
((REP++))
# purge oldest files
purge_oldest_files
# define filename timestamp
timestamp=$( date +"%Y-%0m-%0e_%H%M" )
# collect tools in parallel to separate output files
LOG "collecting ${TOOLNAME} at ${timestamp} for ${PERIOD_MIN} mins, repeat=${REP}"
do_parallel_commands
wait
# Compress latest increment
LOG "compressing: ${parallel_outfiles[@]}"
${CMD_IDLE} bzip2 -q -f ${parallel_outfiles[@]} 2>/dev/null &
done
# wait for compression to complete
wait
tools_cleanup 0
exit 0

View File

@ -0,0 +1,122 @@
#!/bin/bash
# Usage: diskstats.sh
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
# Print disk summary
function print_disk()
{
print_separator
TOOL_HIRES_TIME
# NOTES:
# --total (grand-total) is a new option, but don't necessarily want to add tmpfs
# or dummy filesystems.
# - use -H to print in SI (eg, GB, vs GiB)
# - can use -a to print all filesystems including dummy filesystems, but then
# there can be double-counting:
print_separator
cmd='df -h -H -T --local -t ext2 -t ext3 -t ext4 -t xfs --total'
${ECHO} "Disk space usage ext2,ext3,ext4,xfs,tmpfs (SI):"
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
print_separator
cmd='df -h -H -T --local -i -t ext2 -t ext3 -t ext4 -t xfs --total'
${ECHO} "Disk inodes usage ext2,ext3,ext4,xfs,tmpfs (SI):"
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
print_separator
cmd='drbd-overview'
${ECHO} "drbd disk usage and status:"
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
print_separator
cmd='lvs'
${ECHO} "logical volumes usage and status:"
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
print_separator
cmd='pvs'
${ECHO} "physical volumes usage and status:"
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
print_separator
cmd='vgs'
${ECHO} "volume groups usage and status:"
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
}
# Print disk static summary
function print_disk_static()
{
print_separator
cmd='cat /proc/scsi/scsi'
${ECHO} "Attached devices: ${cmd}"
${cmd}
${ECHO}
# fdisk - requires sudo/root
print_separator
cmd='fdisk -l'
if [ $UID -eq 0 ]; then
${ECHO} "List disk devices: ${cmd}"
${cmd}
else
WARNLOG "Skipping cmd=${cmd}, root/sudo passwd required"
fi
${ECHO}
# parted - requires sudo/root
print_separator
cmd='parted -l'
if [ $UID -eq 0 ]; then
${ECHO} "List disk devices: ${cmd}"
${cmd}
else
WARNLOG "Skipping cmd=${cmd}, root/sudo passwd required"
fi
${ECHO}
}
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Print static disk information
print_disk_static
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
for ((rep=1; rep <= REPEATS ; rep++))
do
print_disk
sleep ${INTERVAL_SEC}
done
print_disk
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,478 @@
#!/bin/bash
TOOLNAME=$(basename $0)
PIDFILE=/var/run/${TOOLNAME}.pid
TOOL_DEBUG=1
TOOL_EXIT_SIGNAL=0
TOOL_USR1_SIGNAL=0
TOOL_USR2_SIGNAL=0
TOOL_TTY=0
if tty 1>/dev/null ; then
TOOL_TTY=1
fi
# [ JGAULD : SHOULD RENAME TO TOOL_X ]
OPT_USE_INTERVALS=0
OPT_FOREVER=0
PERIOD_MIN=5
INTERVAL_SEC=60
CPULIST=0
# Include lsb functions
if [ -d /lib/lsb ]; then
. /lib/lsb/init-functions
else
. /etc/init.d/functions
fi
# Lightweight replacement for pidofproc -p <pid>
function check_pidfile ()
{
local pidfile pid
OPTIND=1
while getopts p: opt ; do
case "$opt" in
p)
pidfile="$OPTARG"
;;
esac
done
shift $(($OPTIND - 1))
read pid < "${pidfile}"
if [ -n "${pid:-}" ]; then
if $(kill -0 "${pid:-}" 2> /dev/null); then
echo "$pid"
return 0
elif ps "${pid:-}" >/dev/null 2>&1; then
echo "$pid"
return 0 # program is running, but not owned by this user
else
return 1 # program is dead and /var/run pid file exists
fi
fi
}
# tools_init - initialize tool resources
function tools_init ()
{
local rc=0
local error=0
TOOLNAME=$(basename $0)
# Check for sufficient priviledges
if [ $UID -ne 0 ]; then
ERRLOG "${NAME} requires sudo/root access."
return 1
fi
# Check for essential binaries
ECHO=$(which echo 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ECHO=echo # use bash built-in echo
${ECHO} "FATAL, 'echo' not found, rc=$rc";
error=$rc
fi
DATE=$(which date 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
${ECHO} "FATAL, 'date' not found, rc=$rc";
error=$rc
fi
# Check for standard linux binaries, at least can use LOG functions now
# - these are used in tools_header
CAT=$(which cat 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'cat' not found, rc=$rc";
error=$rc
fi
ARCH=$(which arch 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'arch' not found, rc=$rc";
error=$rc
fi
SED=$(which sed 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'sed' not found, rc=$rc";
error=$rc
fi
GREP=$(which grep 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'grep' not found, rc=$rc";
error=$rc
fi
WC=$(which wc 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'wc' not found, rc=$rc";
error=$rc
fi
UNAME=$(which uname 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'uname' not found, rc=$rc";
error=$rc
fi
SORT=$(which sort 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'sort' not found, rc=$rc";
error=$rc
fi
TR=$(which tr 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'tr' not found, rc=$rc";
error=$rc
fi
AWK=$(which awk 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'awk' not found, rc=$rc";
error=$rc
fi
PKILL=$(which pkill 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'pkill' not found, rc=$rc";
error=$rc
fi
LS=$(which ls 2>/dev/null)
rc=$?
if [ $rc -ne 0 ]; then
ERRLOG "'ls' not found, rc=$rc";
error=$rc
fi
# The following block is needed for LSB systems such as Windriver Linux.
# The utility is not available on CentOS so comment it out.
# Generic utility, but may not be available
# LSB=$(which lsb_release 2>/dev/null)
# rc=$?
# if [ $rc -ne 0 ]; then
# WARNLOG "'lsb_release' not found, rc=$rc";
# fi
# Let parent program decide what to do with the errors,
# give ominous warning
if [ $error -eq 1 ]; then
WARNLOG "possibly cannot continue, missing linux binaries"
fi
# Check if tool was previously running
if [ -e ${PIDFILE} ]; then
# [ JGAULD - remove pidofproc() / LSB compatibility issue ]
if check_pidfile -p "${PIDFILE}" >/dev/null; then
ERRLOG "${PIDFILE} exists and ${TOOLNAME} is running"
return 1
else
# remove pid file
WARNLOG "${PIDFILE} exists but ${TOOLNAME} is not running; cleaning up"
rm -f ${PIDFILE}
fi
fi
# Create pid file
echo $$ > ${PIDFILE}
# Setup trap handler - these signals trigger child shutdown and cleanup
trap tools_exit_handler INT HUP TERM EXIT
trap tools_usr1_handler USR1
trap tools_usr2_handler USR2
return ${rc}
}
# tools_cleanup() - terminate child processes
function tools_cleanup() {
# restore signal handling to default behaviour
trap - INT HUP TERM EXIT
trap - USR1 USR2
local VERBOSE_OPT=''
if [ "$1" -ne "0" ]; then
LOG "cleanup invoked with code: $1"
if [ ${TOOL_DEBUG} -ne 0 ]; then
VERBOSE_OPT='-v'
fi
fi
# stop all processes launched from this process
pkill -TERM -P $$
if [ "$1" -ne "0" ]; then
sleep 1
fi
# OK, if the above didn't work, use force
pkill -KILL -P $$
# remove pid file
if [ -e ${PIDFILE} ]; then
rm -f ${VERBOSE_OPT} ${PIDFILE}
fi
exit $1
}
# tools_exit_handler() - exit handler routine
function tools_exit_handler() {
TOOL_EXIT_SIGNAL=1
tools_cleanup 128
}
# tools_usr1_handler() - USR1 handler routine
function tools_usr1_handler() {
TOOL_USR1_SIGNAL=1
LOG "caught USR1"
}
# tools_usr2_handler() - USR2 handler routine
function tools_usr2_handler() {
TOOL_USR2_SIGNAL=1
LOG "caught USR1"
}
# LOG(), WARNLOG(), ERRLOG() - simple print log functions (not logger)
function LOG ()
{
local tstamp_H=$( date +"%Y-%0m-%0e %H:%M:%S" )
echo "${tstamp_H} ${HOSTNAME} $0($$): $@";
}
function LOG_NOCR ()
{
local tstamp_H=$( date +"%Y-%0m-%0e %H:%M:%S" )
echo -n "${tstamp_H} ${HOSTNAME} $0($$): $@";
}
function WARNLOG () { LOG "WARN $@"; }
function ERRLOG () { LOG "ERROR $@"; }
# TOOL_HIRES_TIME() - easily parsed date/timestamp and hi-resolution uptime
function TOOL_HIRES_TIME()
{
echo "time: " $( ${DATE} +"%a %F %H:%M:%S.%N %Z %z" ) "uptime: " $( cat /proc/uptime )
}
# set_affinity() - set affinity for current script if a a CPULIST is defined
function set_affinity() {
local CPULIST=$1
if [ -z "${CPULIST}" ]; then
return
fi
# Set cpu affinity for current program
local TASKSET=$(which taskset 2>/dev/null)
if [ -x "${TASKSET}" ]; then
${TASKSET} -pc ${CPULIST} $$ 2>/dev/null
fi
}
# cmd_idle_priority() - command to set nice + ionice
function cmd_idle_priority() {
local NICE=""
local IONICE=""
NICE=$( which nice 2>/dev/null )
if [ $? -eq 0 ]; then
NICE="${NICE} -n 19"
else
NICE=""
fi
IONICE=$( which ionice 2>/dev/null )
if [ $? -eq 0 ]; then
IONICE="${IONICE} -c 3"
else
IONICE=""
fi
echo "${NICE} ${IONICE}"
}
# print_separator() - print a horizontal separation line '\u002d' is '-'
function print_separator () {
printf '\u002d%.s' {1..80}
printf '\n'
}
# tools_header() - print out common GenWare tools header
function tools_header() {
local TOOLNAME=$(basename $0)
# Get timestamp
#local tstamp=$( date +"%Y-%0m-%0e %H:%M:%S" 2>/dev/null )
local tstamp=$( date --rfc-3339=ns | cut -c1-23 2>/dev/null )
# Linux Generic
local UPTIME=/proc/uptime
# Get number of online cpus
local CPUINFO=/proc/cpuinfo
local online_cpus=$( cat ${CPUINFO} | grep -i ^processor | wc -l 2>/dev/null )
# Get load average, run-queue size, and number of threads
local LOADAVG=/proc/loadavg
local LDAVG=( `cat ${LOADAVG} | sed -e 's#[/]# #g' 2>/dev/null` )
# Get current architecture
local arch=$( uname -m )
# Determine processor name (there are many different formats... *sigh* )
# - build up info from multiple lines
local processor='unk'
local NAME=$( cat ${CPUINFO} | grep \
-e '^cpu\W\W:' \
-e ^'cpu model' \
-e ^'model name' \
-e ^'system type' \
-e ^Processor \
-e ^[Mm]achine | \
sort -u | awk 'BEGIN{FS=":";} {print $2;}' | \
tr '\n' ' ' | tr -s [:blank:] 2>/dev/null )
if [ ! -z "${NAME}" ]; then
processor=${NAME}
fi
# Determine processor speed (abort grep after first match)
local speed='unk'
local BOGO=$( cat ${CPUINFO} | grep -m1 -e ^BogoMIPS -e ^bogomips | \
awk 'BEGIN{FS=":";} {printf "%.1f", $2;}' 2>/dev/null )
local MHZ=$( cat ${CPUINFO} | grep -m1 -e ^'cpu MHz' -e ^clock | \
awk 'BEGIN{FS=":";} {printf "%.1f", $2;}' 2>/dev/null )
local MHZ2=$( cat ${CPUINFO} | grep -m1 -e ^Cpu0ClkTck -e ^'cycle frequency' | \
awk 'BEGIN{FS=":";} {printf "%.1f", $2/1.0E6;}' 2>/dev/null )
if [ ! -z "${MHZ}" ]; then
speed=${MHZ}
elif [ ! -z "${MHZ2}" ]; then
speed=${MHZ2}
elif [ ! -z ${BOGO} ]; then
speed=${BOGO}
fi
# Determine OS and kernel version
local os_name=$( uname -s 2>/dev/null )
local os_release=$( uname -r 2>/dev/null )
declare -a arr
local dist_id=""
# Determine OS distribution ID
if [ lsb_pres == "yes" ]; then
arr=( $( lsb_release -i 2>/dev/null ) )
dist_id=${arr[2]}
else
local dist_id=$(cat /etc/centos-release | awk '{print $1}' 2>/dev/null)
fi
local dist_rel=""
if [ lsb_pres == "yes" ]; then
# Determine OS distribution release
arr=( $( cat /proc/version | awk '{print $3}' 2>/dev/null ) )
local dist_rel=${arr[1]}
else
local dist_rel=$(cat /etc/centos-release | awk '{print $4}' 2>/dev/null)
fi
# Print generic header
echo "${TOOLNAME} -- ${tstamp} load average:${LDAVG[0]}, ${LDAVG[1]}, ${LDAVG[2]} runq:${LDAVG[3]} nproc:${LDAVG[4]}"
echo " host:${HOSTNAME} Distribution:${dist_id} ${dist_rel} ${os_name} ${os_release}"
echo " arch:${arch} processor:${processor} speed:${speed} MHz CPUs:${online_cpus}"
}
# tools_usage() - show generic tools tool usage
function tools_usage() {
if [ ${OPT_USE_INTERVALS} -eq 1 ]; then
echo "usage: ${TOOLNAME} [-f] [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]"
else
echo "Usage: ${TOOLNAME} [-f] [-p <period_mins>] [-c <cpulist>] [-h]"
fi
}
# tools_print_help() - print generic tool help
function tools_print_help() {
tools_usage
echo
echo "Options:";
echo " -f : collect forever : default: none"
echo " -p <period_minutes> : overall collection period (minutes) : default: ${DEFAULT_PERIOD_MIN}"
if [ ${OPT_USE_INTERVALS} -eq 1 ]; then
echo " -i <interval_seconds> : sample interval (seconds) : default: ${DEFAULT_INTERVAL_SEC}"
fi
echo " -c <cpulist> : cpu list where tool runs (e.g., 0-1,8) : default: none"
echo
if [ ${OPT_USE_INTERVALS} -eq 1 ]; then
echo "Example: collect 5 minute period, sample every 30 seconds interval"
echo " ${TOOLNAME} -p 5 -i 30"
else
echo "Example: collect 5 minute period"
echo " ${TOOLNAME} -p 5"
fi
}
# tools_parse_options() -- parse common options for tools scripts
function tools_parse_options() {
# check for no arguments, print usage
if [ $# -eq "0" ]; then
tools_usage
tools_cleanup 0
exit 0
fi
# parse the input arguments
while getopts "fp:i:c:h" Option
do
case $Option in
f)
OPT_FOREVER=1
PERIOD_MIN=60
;;
p) PERIOD_MIN=$OPTARG ;;
i)
OPT_USE_INTERVALS=1
INTERVAL_SEC=$OPTARG
;;
c) CPULIST=$OPTARG ;;
h)
tools_print_help
tools_cleanup 0
exit 0
;;
*)
tools_usage
tools_cleanup 0
exit 0
;;
esac
done
# validate input arguments
PERIOD_MAX=$[4*24*60]
INTERVAL_MAX=$[60*60]
error=0
if [[ ${PERIOD_MIN} -lt 1 || ${PERIOD_MIN} -gt ${PERIOD_MAX} ]]; then
echo "-p <period_mid> must be > 0 and <= ${PERIOD_MAX}."
error=1
fi
if [[ ${INTERVAL_SEC} -lt 1 || ${INTERVAL_SEC} -gt ${INTERVAL_MAX} ]]; then
echo "-i <interval> must be > 0 and <= ${INTERVAL_MAX}."
error=1
fi
if [ ${error} -eq 1 ]; then
tools_cleanup 0
exit 1
fi
}

View File

@ -0,0 +1,98 @@
#!/bin/bash
# Usage: filestats.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
PAGE_SIZE=$(getconf PAGE_SIZE)
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
function print_files()
{
print_separator
TOOL_HIRES_TIME
${ECHO} "# ls -l /proc/*/fd"
sudo ls -l /proc/*/fd 2>/dev/null | awk \
'$11 ~ /socket/ {a += 1} ; \
$11 ~ /null/ {b += 1} ; \
{c += 1} \
END {\
{printf "%-10s %-10s %-10s %-10s\n", "TOTAL", "FILES", "SOCKETS", "NULL PIPES"} \
{printf "%-10s %-10s %-10s %-10s\n", c, c-(a+b) , a, b}}'
${ECHO}
${ECHO} "# lsof"
printf "%-7s %-7s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %s\n" "PID" "TOTAL" "FD" "U" "W" "R" "CWD" "RTD" "TXT" "MEM" "DEL" "TCP" "CMD"
sudo lsof +c 15| awk '$3 !~ /^[0-9]+/{ {pids[$2]["COMMAND"]=$1}\
{pids[$2]["PID"]=$2}\
{pids[$2]["TOTAL"]+=1}\
{pids[$2]["TCP"]+=($8=="TCP")? 1 : 0}\
{($4 ~ /^[0-9][0-9]*[urw]/ )? \
pids[$2][substr($4, length($4),1)]+=1 : pids[$2][$4]+=1} }
END {
{ for (i in pids) \
if(pids[i]["PID"]!="PID") {
{printf "%-7s %-7s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %-6s %s\n", \
pids[i]["PID"], \
pids[i]["TOTAL"],\
((pids[i]["u"]!="")? pids[i]["u"] : 0) + ((pids[i]["w"]!="")? pids[i]["w"] : 0 )+ ((pids[i]["r"]!="")? pids[i]["r"] : 0),\
(pids[i]["u"]!="")? pids[i]["u"] : 0,\
(pids[i]["w"]!="")? pids[i]["w"] : 0,\
(pids[i]["r"]!="")? pids[i]["r"] : 0,\
(pids[i]["cwd"]!="")? pids[i]["cwd"] : 0,\
(pids[i]["rtd"]!="")? pids[i]["rtd"] : 0,\
(pids[i]["txt"]!="")? pids[i]["txt"] : 0,\
(pids[i]["mem"]!="")? pids[i]["mem"] : 0,\
(pids[i]["DEL"]!="")? pids[i]["DEL"] : 0,\
(pids[i]["TCP"]!="")? pids[i]["TCP"] : 0,\
pids[i]["COMMAND"]} }}}' | sort -n -r -k3
${ECHO}
${ECHO} "# lsof -nP +L1"
sudo lsof -nP +L1
${ECHO}
}
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
for ((rep=1; rep <= REPEATS ; rep++))
do
print_files
sleep ${INTERVAL_SEC}
done
print_files
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,120 @@
#!/bin/bash
### BEGIN INIT INFO
# Provides: collect-engtools
# Required-Start: $local_fs $network $syslog postgresql
# Required-Stop: $local_fs $network $syslog postgresql
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: initscript to launch engineering tools data collection daemon
# Description: initscript to launch engineering tools data collection daemon
# Blah.
### END INIT INFO
PATH=/sbin:/usr/sbin:/bin:/usr/bin
DESC="collect engtools service"
NAME="collect-engtools.sh"
DAEMON=/usr/local/bin/${NAME}
DAEMON_ARGS="-f"
PIDFILE=/var/run/${NAME}.pid
SCRIPTNAME=/etc/init.d/${NAME}
DEFAULTFILE=/etc/default/${NAME}
# Exit if the package is not installed
[ -x "$DAEMON" ] || exit 0
. /etc/init.d/functions
# Read configuration variable file if it is present
[ -r $DEFAULTFILE ] && . $DEFAULTFILE
# Load the VERBOSE setting and other rcS variables
#. /lib/init/vars.sh
# Define lsb fallback versions of:
# log_daemon_msg(), log_end_msg()
log_daemon_msg() { echo -n "${1:-}: ${2:-}"; }
log_end_msg() { echo "."; }
# Use lsb functions to perform the operations.
if [ -f /lib/lsb/init-functions ]; then
. /lib/lsb/init-functions
fi
# Check for sufficient priviledges
# [ JGAULD : possibly provide user = 'operator' option instead... ]
if [ $UID -ne 0 ]; then
log_daemon_msg "Starting ${NAME} requires sudo/root access."
exit 1
fi
case $1 in
start)
if [ -e ${PIDFILE} ]; then
pid=$(pidof -x ${NAME})
if test "${pid}" != ""
then
echo_success "${NAME} already running"
exit
fi
fi
log_daemon_msg "Starting ${NAME}"
if start-stop-daemon --start --background --quiet --oknodo --pidfile ${PIDFILE} \
--exec ${DAEMON} -- ${DAEMON_ARGS} ; then
./usr/local/bin/live_stream.py &
log_end_msg 0
else
log_end_msg 1
fi
;;
stop)
if [ -e ${PIDFILE} ]; then
pids=$(pidof -x ${NAME})
if [[ ! -z "${pids}" ]]
then
echo_success "Stopping ${NAME} [$pid]"
start-stop-daemon --stop --quiet --oknodo --pidfile ${PIDFILE} --retry=TERM/3/KILL/5
# [ JGAULD: none of the following should be necessary ]
/usr/local/bin/cleanup-engtools.sh
else
echo_failure "${NAME} is not running"
fi
else
echo_failure "${PIDFILE} does not exist"
fi
;;
restart)
$0 stop && sleep 2 && $0 start
;;
status)
if [ -e ${PIDFILE} ]; then
pid=$(pidof -x ${NAME})
if test "${pid}" != ""
then
echo_success "${NAME} is running"
else
echo_success "${NAME} is not running"
fi
else
echo_success "${NAME} is not running"
fi
;;
reload)
if [ -e ${PIDFILE} ]; then
start-stop-daemon --stop --signal USR1 --quiet --pidfile ${PIDFILE} --name ${NAME}
echo_success "${NAME} reloaded successfully"
else
echo_success "${PIDFILE} does not exist"
fi
;;
*)
echo "Usage: $0 {start|stop|restart|reload|status}"
exit 2
;;
esac
exit 0

View File

@ -0,0 +1,49 @@
#!/bin/bash
# Usage: iostat.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
IOSTAT=$( which iostat 2>/dev/null )
if [ $? -ne 0 ]; then
print_separator
WARNLOG "iostat not available"
tools_cleanup 0
fi
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
((REP = REPEATS + 1))
# Execute tool for specified duration
CMD="${IOSTAT} -k -x -t ${INTERVAL_SEC} ${REP}"
#LOG "CMD: ${CMD}"
${CMD}
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,547 @@
#!/bin/bash
username="wrsroot"
password="Li69nux*"
test_duration="30"
wait_duration="5"
udp_find_0_frameloss="1"
udp_max_iter="20"
udp_granularity="100000"
result_dir="/home/${username}/benchmark_results"
summary_file="${result_dir}/benchmark_summary.xls"
host=""
remote=""
controllers=()
computes=()
nodes=()
max_compute_node="10"
interfaces=("")
# udp header total length: Ethernet header ( 14 ) + CRC ( 4 ) + IPv4 header ( 20 ) + UDP header ( 8 )
udp_header_len="46"
# icmp header total length: ICMP header ( 8 ) + IPv4 header ( 20 )
icmp_header_len="28"
frame_sizes=(64 128 256 512 1024 1280 1518)
ssh_opt="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -q"
# ports used for different kind of traffics except hiprio. these are chosen randomly since they are not used
# 8000 - storage; 8001 - migration; 8002 - default; 8003 - drbd
controller_ports=(8000 8001 8002 8003)
compute_ports=(8000 8001 8002)
traffic_types=(storage migration default drbd)
flow_ids=(1:20 1:30 1:40 1:50)
function exec_cmd ()
{
node="$1"
cmd="$2"
if [[ "${node}" == *"${host}"* ]]; then
echo "$(bash -c "${cmd}")"
else
echo "$(ssh ${ssh_opt} ${username}@${node} "${cmd}")"
fi
}
function iperf3_server_start ()
{
local server="$1"
local result="$2"
local port="$3"
local cmd="iperf3 -s"
if [ "${port}" ]; then
cmd="${cmd} -p ${port}"
fi
cmd="nohup ${cmd} > ${result} 2>&1 &"
$(exec_cmd "${server}" "${cmd}")
}
function iperf3_client_tcp_start ()
{
local result="${result_dir}/throughput"
local cmd=""
local client="$1"
local server="$2"
local port="$3"
cmd="iperf3 -t ${test_duration} -c $(get_ip_addr "${server}")"
if [ "${port}" ]; then
cmd="${cmd} -p ${port} -O ${wait_duration}"
result="${result}_parallel_${port}"
else
result="${result}_tcp"
if [[ "${server}" == *"infra"* ]]; then
result="${result}_infra"
fi
fi
$(exec_cmd "${client}" "${cmd} > ${result} 2>&1")
}
function iperf3_client_udp_start ()
{
local result="${result_dir}/throughput_udp"
local cmd=""
local client="$1"
local server="$2"
local frame_size="$3"
local bw="0"
if [ "${4}" ]; then
bw="${4}"
fi
cmd="iperf3 -u -t ${test_duration} -c $(get_ip_addr ${server})"
if [ ${frame_size} ]; then
cmd="${cmd} -l ${frame_size}"
result="${result}_$[${frame_size}+${udp_header_len}]"
fi
if [[ ${server} == *"infra"* ]]; then
result="${result}_infra"
fi
$(exec_cmd "${client}" "${cmd} -b ${bw} >> ${result} 2>&1" )
}
function iperf3_stop ()
{
local node="$1"
local cmd="pkill iperf3"
$(exec_cmd "${node}" "${cmd}")
}
function get_ip_addr ()
{
arp -a | grep -oP "(?<=$1 \()[^)]*" | head -n 1
}
function throughput_tcp_test()
{
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
for interface in "${interfaces[@]}"; do
local interface_name="management"
local interface_suffix=""
local result_suffix=""
if [ "${interface}" == "infra" ]; then
interface_name="infrastructure"
interface_suffix="-infra"
result_suffix="_infra"
fi
local result_file="${result_dir}/throughput_tcp${result_suffix}"
printf "Running TCP throughput test between ${nodes[${i}]} and ${nodes[$[${i}+1]]}'s ${interface_name} network..."
iperf3_server_start ${nodes[$[${i}+1]]}${interface_suffix} ${result_file}
iperf3_client_tcp_start ${nodes[${i}]}${interface_suffix} ${nodes[$[${i}+1]]}${interface_suffix}
iperf3_stop ${nodes[$[${i}+1]]}${interface_suffix}
result=$(exec_cmd "${nodes[${i}]}" "awk '/sender/ {print \$7 \" \" \$8}' ${result_file}")
printf " Done (${result})\n"
done
done
}
function throughput_udp_test ()
{
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
for interface in "${interfaces[@]}"; do
local interface_name="management"
local interface_suffix=""
local result_suffix=""
if [ "${interface}" == "infra" ]; then
interface_name="infrastructure"
interface_suffix="-infra"
result_suffix="_infra"
fi
echo "Running UDP throughput test between ${nodes[${i}]} and ${nodes[$[${i}+1]]}'s ${interface_name} network"
for frame_size in "${frame_sizes[@]}"; do
local max_bw="0"
local min_bw="0"
local cur_bw="0"
local old_bw="0"
local result=""
local result_unit=""
local frame_loss=""
local max_result=""
local max_result_unit=""
local max_frame_loss=""
local result_file="${result_dir}/throughput_udp_${frame_size}${result_suffix}"
local iter="0"
local diff=""
printf "\tFrame size = ${frame_size}..."
while true; do
iperf3_server_start ${nodes[$[${i}+1]]}${interface_suffix} ${result_file}
iperf3_client_udp_start ${nodes[${i}]}${interface_suffix} ${nodes[$[${i}+1]]}${interface_suffix} $[${frame_size}-${udp_header_len}] ${cur_bw}
iperf3_stop ${nodes[$[${i}+1]]}${interface_suffix}
result=$(exec_cmd "${nodes[${i}]}" "awk '/%/ {print \$7}' ${result_file} | tail -n1")
result_unit=$(exec_cmd "${nodes[${i}]}" "awk '/%/ {print \$8}' ${result_file} | tail -n1")
frame_loss=$(exec_cmd "${nodes[${i}]}" "awk '/%/ {print \$12}' ${result_file} | tail -n1 | tr -d '()%'")
if [ "${udp_find_0_frameloss}" == "1" ]; then
if [ "${iter}" -eq "0" ]; then
max_result="${result}"
max_result_unit="${result_unit}"
max_frame_loss="${frame_loss}"
fi
if [ $(echo ${frame_loss} | grep e) ]; then
frame_loss="$(echo ${frame_loss} | sed 's/e/*10^/g;s/ /*/' )"
fi
if [ "$(echo "${frame_loss} > 0" | bc -l)" -eq "1" ]; then
max_bw="${result}"
if [ "${result_unit}" == "Kbits/sec" ]; then
max_bw="$(echo "(${max_bw} * 1000) / 1" | bc)"
elif [ "${result_unit}" == "Mbits/sec" ]; then
max_bw="$(echo "(${max_bw} * 1000000) / 1" | bc)"
elif [ "${result_unit}" == "Gbits/sec" ]; then
max_bw="$(echo "(${max_bw} * 1000000000) / 1" | bc)"
fi
else
if [ "${iter}" -eq "0" ]; then
break
else
min_bw="${result}"
if [ "${result_unit}" == "Kbits/sec" ]; then
min_bw="$(echo "(${min_bw} * 1000) / 1" | bc)"
elif [ "${result_unit}" == "Mbits/sec" ]; then
min_bw="$(echo "(${min_bw} * 1000000) / 1" | bc)"
elif [ "${result_unit}" == "Gbits/sec" ]; then
min_bw="$(echo "(${min_bw} * 1000000000) / 1" | bc)"
fi
fi
fi
old_bw="${cur_bw}"
cur_bw="$[(${max_bw} + ${min_bw}) / 2]"
diff="$(echo "$[${cur_bw} - ${old_bw}]" | tr -d '-')"
#break
((iter++))
if [ "${diff}" -lt "${udp_granularity}" ]; then
break
fi
if [ "${udp_max_iter}" -ne "0" ] && [ "${iter}" -ge "${udp_max_iter}" ]; then
break
fi
else
break
fi
done
if [ "${udp_find_0_frameloss}" == "1" ]; then
printf " Done (%s %s @ %s%% & %s %s @ %s%%)\n" "${max_result}" "${max_result_unit}" "${max_frame_loss}" "${result}" "${result_unit}" "${frame_loss}"
else
printf " Done (%s %s @ %s%%)\n" "${result}" "${result_unit}" "${frame_loss}"
fi
done
done
done
}
function throughput_parallel_test ()
{
local dev=""
local ip_addr=""
local interface_name=""
local interface_suffix=""
local result_file="${result_dir}/throughput_parallel"
# get device name of the interface
if [ "${#interfaces[@]}" -gt "1" ]; then
interface_name="infrastructure"
interface_suffix="-infra"
ip_addr=$(ping -c1 ${host}-infra | awk -F'[()]' '/PING/{print $2}')
else
interface_name="management"
ip_addr=$(ping -c1 ${host} | awk -F'[()]' '/PING/{print $2}')
fi
dev=$(ifconfig | grep -B1 "inet ${ip_addr}" | awk '$1!="inet" && $1!="--" {print $1}')
# set all the filters
for node in ${nodes[@]}; do
local ports=("${controller_ports[@]}")
if [[ "${node}" == *"compute"* ]]; then
ports=("${compute_ports[@]}")
fi
for i in $(seq 0 $[${#ports[@]} - 1]); do
if [ ${traffic_types[i]} != "default" ]; then
tc_dport="tc filter add dev ${dev} protocol ip parent 1:0 prio 1 u32 match ip protocol 6 0xff match ip dport ${ports[i]} 0xffff flowid ${flow_ids[i]}"
tc_sport="tc filter add dev ${dev} protocol ip parent 1:0 prio 1 u32 match ip protocol 6 0xff match ip sport ${ports[i]} 0xffff flowid ${flow_ids[i]}"
$(exec_cmd "${node}" "echo ${password} | sudo -S bash -c '${tc_dport}; ${tc_sport}' > /dev/null 2>&1")
fi
done
done
# run the tests
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
local ports=("${controller_ports[@]}")
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
ports=("${compute_ports[@]}")
fi
printf "Running parallel throughput test between ${nodes[${i}]} and ${nodes[$[${i}+1]]}'s ${interface_name} network..."
# start the servers
for port in "${ports[@]}"; do
iperf3_server_start "${nodes[$[${i}+1]]}${interface_suffix}" "${result_file}_${port}" "${port}"
done
#start the clients
for port in "${controller_ports[@]}"; do
iperf3_client_tcp_start ${nodes[${i}]}${interface_suffix} ${nodes[$[${i}+1]]}${interface_suffix} ${port} &
done
sleep $[${test_duration} + ${wait_duration} + 1]
iperf3_stop ${nodes[$[${i}+1]]}${interface_suffix}
printf " Done\n"
# get results
for j in $(seq 0 $[${#ports[@]} - 1]); do
result=$(exec_cmd "${nodes[${i}]}" "awk '/sender/ {print \$7 \" \" \$8}' ${result_file}_${ports[${j}]}")
printf "\t${traffic_types[$j]} = ${result}\n"
done
done
# remove all the filters
for node in ${nodes[@]}; do
local handles=()
local ports=("${controller_ports[@]}")
if [[ "${node}" == *"compute"* ]]; then
ports=("${compute_ports[@]}")
fi
handles=($(exec_cmd "${node}" "/usr/sbin/tc filter show dev ${dev} | awk '/filter/ {print \$10}' | tail -n $[(${#ports[@]} - 1) * 2 ]"))
for handle in "${handles[@]}"; do
$(exec_cmd "${node}" "echo ${password} | sudo -S /usr/sbin/tc filter delete dev ${dev} parent 1: handle ${handle} prio 1 u32 > /dev/null 2>&1")
done
done
}
function latency_test ()
{
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
for interface in "${interfaces[@]}"; do
local interface_name="management"
local interface_suffix=""
local result_suffix=""
if [ "${interface}" == "infra" ]; then
interface_name="infrastructure"
interface_suffix="-infra"
result_suffix="_infra"
fi
echo "Running latency test between ${nodes[${i}]} and ${nodes[$[${i}+1]]}'s ${interface_name} network"
for frame_size in "${frame_sizes[@]}"; do
local result_file="${result_dir}/latency_${frame_size}${result_suffix}"
printf "\tFrame size = ${frame_size}..."
$(exec_cmd "${nodes[${i}]}" "ping -s $[${frame_size}-8] -w ${test_duration} -i 0.2 ${nodes[$[${i}+1]]}${interface_suffix} > ${result_file} 2>&1")
result=$(exec_cmd "${nodes[${i}]}" "awk '/rtt/ {print \$2 \" = \" \$4 \" \" \$5}' ${result_file}")
printf " Done (%s)\n" "${result}"
done
done
done
}
function setup ()
{
for node in ${nodes[@]}; do
iperf3_stop "${node}"
$(exec_cmd "${node}" "rm -rf ${result_dir}; mkdir -p ${result_dir}")
done
}
function get_remote_results ()
{
for node in ${nodes[@]}; do
if [ "${node}" != "${host}" ]; then
mkdir ${result_dir}/${node}
scp ${ssh_opt} ${username}@${node}:${result_dir}/* ${result_dir}/${node} > /dev/null 2>&1
fi
done
}
function get_interface_info ()
{
local dev=""
local ip_addr=""
printf "Network interfaces info\n" >> ${summary_file}
for interface in "${interfaces[@]}"; do
local interface_suffix=""
local interface_name="management"
if [ "${interface}" == "infra" ]; then
interface_name="infrastructure"
interface_suffix="-infra"
fi
ip_addr=$(ping -c1 ${host}${interface_suffix} | awk -F'[()]' '/PING/{print $2}')
dev=$(ifconfig | grep -B1 "inet ${ip_addr}" | awk '$1!="inet" && $1!="--" {print $1}')
printf "%s network interface\n" "${interface_name}" >> ${summary_file}
echo ${password} | sudo -S ethtool ${dev} >> ${summary_file}
done
}
function generate_summary ()
{
local header=""
local result=""
local result_file=""
printf "Summary\n\n" > ${summary_file}
printf "Throughput TCP\n" >> ${summary_file}
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
for interface in "${interfaces[@]}"; do
local node_type="controller"
local interface_type="mgmt"
local result_suffix=""
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
node_type="compute"
fi
if [ "${interface}" == "infra" ]; then
interface_type="infra"
result_suffix="_infra"
fi
header="${header},${node_type}'s ${interface_type}"
result_file="${result_dir}"
if [ ${node_type} == "compute" ]; then
result_file="${result_file}/${nodes[${i}]}"
fi
result_file="${result_file}/throughput_tcp${result_suffix}"
result="${result},$(awk '/sender/ {print $7 " " $8}' ${result_file})"
done
done
printf "%s\n%s\n\n" "${header}" "${result}" >> ${summary_file}
printf "Throughput UDP\n" >> ${summary_file}
header=",frame,max throughput,max frameloss"
if [ "${udp_find_0_frameloss}" == "1" ]; then
header="${header},final throughput, final frameloss"
fi
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
for interface in "${interfaces[@]}"; do
local node_type="controller"
local interface_type="mgmt"
local result_suffix=""
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
node_type="compute"
fi
if [ "${interface}" == "infra" ]; then
interface_type="infra"
result_suffix="_infra"
fi
printf "%s's %s\n%s\n" "${node_type}" "${interface_type}" "${header}" >> ${summary_file}
result_file=${result_dir}
if [ ${node_type} == "compute" ]; then
result_file="${result_file}/${nodes[${i}]}"
fi
for frame in ${frame_sizes[@]}; do
result="${frame},$(awk '/%/ {print $7 " " $8}' ${result_file}/throughput_udp_${frame}${result_suffix} | head -n1),$(awk '/%/ {print $12}' ${result_file}/throughput_udp_${frame}${result_suffix} | head -n1 | tr -d '()')"
if [ "${udp_find_0_frameloss}" == "1" ]; then
result="${result},$(awk '/%/ {print $7 " " $8}' ${result_file}/throughput_udp_${frame}${result_suffix} | tail -n1),$(awk '/%/ {print $12}' ${result_file}/throughput_udp_${frame}${result_suffix} | tail -n1 | tr -d '()')"
fi
printf ",%s\n" "${result}" >> ${summary_file}
done
printf "\n" >> ${summary_file}
done
done
printf "Parallel throughput result\n" >> ${summary_file}
header=",Node type"
for traffic_type in "${traffic_types[@]}"; do
header="${header},${traffic_type}"
done
printf "%s\n" "${header}" >> ${summary_file}
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
local node_type="controller"
local ports=("${controller_ports[@]}")
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
node_type="compute"
fi
result_file=${result_dir}
if [ ${node_type} == "compute" ]; then
ports=("${compute_ports[@]}")
result_file="${result_file}/${nodes[${i}]}"
fi
result=",${node_type}"
for port in "${ports[@]}"; do
result="${result},$(awk '/sender/ {print $7 " " $8}' ${result_file}/throughput_parallel_${port})"
done
printf "%s\n" "${result}" >> ${summary_file}
done
printf "\nLatency result in ms\n" >> ${summary_file}
for (( i = 0; i < ${#nodes[@]} ; i+=2 )); do
for interface in "${interfaces[@]}"; do
local node_type="controller"
local interface_type="mgmt"
local result_suffix=""
if [[ "${nodes[${i}]}" == *"compute"* ]]; then
node_type="compute"
fi
if [ "${interface}" == "infra" ]; then
interface_type="infra"
result_suffix="_infra"
fi
printf "%s's %s network\n" "${node_type}" "${interface_type}" >> ${summary_file}
result_file=${result_dir}
if [ ${node_type} == "compute" ]; then
result_file="${result_file}/${nodes[${i}]}"
fi
result_file="${result_file}/latency"
printf ",frame size,%s\n" "$(awk '/rtt/ {print $2}' ${result_file}_${frame_sizes}${result_suffix} | tr '/' ',' )" >> ${summary_file}
for frame_size in "${frame_sizes[@]}"; do
printf ",%s,%s\n" "${frame_size}" "$(awk '/rtt/ {print $4}' ${result_file}_${frame_size}${result_suffix} | tr '/' ',' )" >> ${summary_file}
done
printf "latency distribution\n" >> ${summary_file}
printf ",frame size" >> ${summary_file}
for (( j = 1; j < "20" ; j+=1 )); do
printf ",%s" "$(echo "scale=3;${j}/100" | bc | awk '{printf "%.3f", $0}')" >> ${summary_file}
done
printf "\n" >> ${summary_file}
for frame_size in "${frame_sizes[@]}"; do
printf ",%s" "${frame_size}" >> ${summary_file}
for (( j = 1; j < "20" ; j+=1 )); do
printf ",%s" "$(grep -c "time=$(echo "scale=2;${j}/100" | bc | awk '{printf "%.2f", $0}')" ${result_file}_${frame_size}${result_suffix})" >> ${summary_file}
done
printf "\n" >> ${summary_file}
done
printf "\n" >> ${summary_file}
done
done
get_interface_info
}
echo "Starting linux interface benchmark test. ($(date))"
# find the nodes to test
host=${HOSTNAME}
if [ "${host}" == "controller-1" ]; then
remote="controller-0"
else
remote="controller-1"
fi
# at least another controller needs to be reachable
ping -c1 ${remote} > /dev/null 2>&1
if [ $? -eq 0 ]; then
controllers=(${host} ${remote})
nodes+=("${controllers[@]}")
else
echo "Stopping test as ${remote} is not reachable"
exit 1
fi
# check if infrastructure interface is provisioned
ping -c1 "${remote}-infra" > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "Infrastructure network is provisioned"
interfaces+=("infra")
fi
# check if there are any compute nodes
for i in $(seq 0 $[${max_compute_node} - 1]); do
ping -c1 compute-${i} > /dev/null 2>&1
if [ $? -eq 0 ]; then
computes+=("compute-${i}")
if [ ${#computes[@]} -ge "2" ]; then
nodes+=("${computes[@]}")
break
fi
fi
done
setup
throughput_tcp_test
throughput_udp_test
throughput_parallel_test
latency_test
get_remote_results
generate_summary
echo "Linux interface benchmark test finished. ($(date))"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,112 @@
#!/bin/bash
# Usage: memstats.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
PAGE_SIZE=$(getconf PAGE_SIZE)
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
# Print key networking device statistics
function print_memory()
{
# Configuration for netcmds
MEMINFO=/proc/meminfo
NODEINFO=/sys/devices/system/node/node?/meminfo
BUDDYINFO=/proc/buddyinfo
SLABINFO=/proc/slabinfo
print_separator
TOOL_HIRES_TIME
${ECHO} "# ${MEMINFO}"
${CAT} ${MEMINFO}
${ECHO}
${ECHO} "# ${NODEINFO}"
${CAT} ${NODEINFO}
${ECHO}
${ECHO} "# ${BUDDYINFO}"
${CAT} ${BUDDYINFO}
${ECHO}
${ECHO} "# PSS"
cat /proc/*/smaps 2>/dev/null | \
awk '/^Pss:/ {a += $2;} END {printf "%d MiB\n", a/1024.0;}'
${ECHO}
# use old slabinfo format (i.e. slub not enabled in kernel)
${ECHO} "# ${SLABINFO}"
${CAT} ${SLABINFO} | \
awk -v page_size_B=${PAGE_SIZE} '
BEGIN {page_KiB = page_size_B/1024; TOT_KiB = 0;}
(NF == 17) {
gsub(/[<>]/, "");
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8s\n",
$2, $3, $4, $5, $6, $7, $8, $10, $11, $12, $13, $15, $16, $17, "KiB");
}
(NF == 16) {
num_objs=$3; obj_per_slab=$5; pages_per_slab=$6;
KiB = (obj_per_slab > 0) ? page_KiB*num_objs/obj_per_slab*pages_per_slab : 0;
TOT_KiB += KiB;
printf("%-22s %11d %8d %8d %10d %12d %1s %5d %10d %12d %1s %12d %9d %11d %8d\n",
$1, $2, $3, $4, $5, $6, $7, $9, $10, $11, $12, $14, $15, $16, KiB);
}
END {
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8d\n",
"TOTAL", "-", "-", "-", "-", "-", ":", "-", "-", "-", ":", "-", "-", "-", TOT_KiB);
}
' 2>/dev/null
${ECHO}
${ECHO} "# disk usage: rootfs, tmpfs"
cmd='df -h -H -T --local -t rootfs -t tmpfs'
${ECHO} "Disk space usage rootfs,tmpfs (SI):"
${ECHO} "${cmd}"
${cmd}
${ECHO}
CMD='ps -e -o ppid,pid,nlwp,rss:10,vsz:10,cmd --sort=-rss'
${ECHO} "# ${CMD}"
${CMD}
${ECHO}
}
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
for ((rep=1; rep <= REPEATS ; rep++))
do
print_memory
sleep ${INTERVAL_SEC}
done
print_memory
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,66 @@
#!/bin/bash
# Usage: netstats.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
# Print key networking device statistics
function print_netcmds()
{
# Configuration for netcmds
DEV=/proc/net/dev
NETSTAT=/proc/net/netstat
print_separator
TOOL_HIRES_TIME
for net in \
${DEV} ${NETSTAT}
do
if [ -e "${net}" ]
then
${ECHO} "# ${net}"
${CAT} ${net}
${ECHO}
fi
done
}
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
for ((rep=1; rep <= REPEATS ; rep++))
do
print_netcmds
sleep ${INTERVAL_SEC}
done
print_netcmds
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,141 @@
#!/bin/bash
# Usage: postgres.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
# Print key networking device statistics
function print_postgres()
{
print_separator
TOOL_HIRES_TIME
# postgressql command: set user, disable pagination, and be quiet
PSQL="sudo -u postgres psql --pset pager=off -q"
# List postgres databases
db_list=( $(${PSQL} -t -c "SELECT datname FROM pg_database WHERE datistemplate = false;") )
${ECHO} "# postgres databases"
echo "db_list = ${db_list[@]}"
${ECHO}
# List sizes of all postgres databases (similar to "\l+")
${ECHO} "# postgres database sizes"
${PSQL} -c "
SELECT
pg_database.datname,
pg_database_size(pg_database.datname),
pg_size_pretty(pg_database_size(pg_database.datname))
FROM pg_database
ORDER BY pg_database_size DESC;
"
# For each database, list tables and their sizes (similar to "\dt+")
for db in "${db_list[@]}"
do
${ECHO} "# postgres database: ${db}"
${PSQL} -d ${db} -c "
SELECT
table_schema,
table_name,
pg_size_pretty(table_size) AS table_size,
pg_size_pretty(indexes_size) AS indexes_size,
pg_size_pretty(total_size) AS total_size,
live_tuples,
dead_tuples
FROM (
SELECT
table_schema,
table_name,
pg_table_size(table_name) AS table_size,
pg_indexes_size(table_name) AS indexes_size,
pg_total_relation_size(table_name) AS total_size,
pg_stat_get_live_tuples(table_name::regclass) AS live_tuples,
pg_stat_get_dead_tuples(table_name::regclass) AS dead_tuples
FROM (
SELECT
table_schema,
table_name
FROM information_schema.tables
WHERE table_schema='public'
AND table_type='BASE TABLE'
) AS all_tables
ORDER BY total_size DESC
) AS pretty_sizes;
"
${ECHO} "# postgres database vacuum: ${db}"
${PSQL} -d ${db} -c "
SELECT
relname,
n_live_tup,
n_dead_tup,
last_vacuum,
last_autovacuum,
last_analyze,
last_autoanalyze
FROM pg_stat_user_tables;
"
done
# Specific table counts (This is very SLOW, look at "live tuples" instead)
# Number of keystone tokens
#${ECHO} "# keystone token count"
# Number of postgres connections
${ECHO} "# postgres database connections"
CONN=$(ps -C postgres -o cmd= | wc -l)
CONN_T=$(ps -C postgres -o cmd= | awk '/postgres: / {print $3}' | awk '{for(i=1;i<=NF;i++) a[$i]++} END {for(k in a) print k, a[k]}' | sort -k 2 -nr )
${ECHO} "connections total = ${CONN}"
${ECHO}
${ECHO} "connections breakdown:"
${ECHO} "${CONN_T}"
${ECHO}
${ECHO} "connections breakdown (query):"
${PSQL} -c "SELECT datname,state,count(*) from pg_stat_activity group by datname,state;"
${ECHO}
${ECHO} "connections idle age:"
${PSQL} -c "SELECT datname,age(now(),state_change) from pg_stat_activity where state='idle';"
${ECHO}
}
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
for ((rep=1; rep <= REPEATS ; rep++))
do
print_postgres
sleep ${INTERVAL_SEC}
done
print_postgres
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,85 @@
#!/bin/bash
# Usage: rabbitmq.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
#Need this workaround
MQOPT="-n rabbit@localhost"
# Print key networking device statistics
function print_rabbitmq()
{
print_separator
TOOL_HIRES_TIME
# IMPORTANT:
# - Difficulty getting rabbitmqctl to work from init.d script;
# apparently it requires a psuedo-TTY, which is something you don't have
# until post-init.
# - WORKAROUND: run command using 'sudo', even if you are 'root'
# Dump various rabbitmq related stats
MQ_STATUS="rabbitmqctl ${MQOPT} status"
${ECHO} "# ${MQ_STATUS}"
sudo ${MQ_STATUS} | grep -e '{memory' -A30
${ECHO}
# THe following is useful in diagnosing rabbit memory leaks
# when end-users do not drain their queues (eg, due to RPC timeout issues, etc)
MQ_QUEUES="rabbitmqctl ${MQOPT} list_queues messages name pid messages_ready messages_unacknowledged memory consumers"
${ECHO} "# ${MQ_QUEUES}"
sudo ${MQ_QUEUES}
${ECHO}
num_queues=$(sudo rabbitmqctl ${MQOPT} list_queues | wc -l); ((num_queues-=2))
num_bindings=$(sudo rabbitmqctl ${MQOPT} list_bindings | wc -l); ((num_bindings-=2))
num_exchanges=$(sudo rabbitmqctl ${MQOPT} list_exchanges | wc -l); ((num_exchanges-=2))
num_connections=$(sudo rabbitmqctl ${MQOPT} list_connections | wc -l); ((num_connections-=2))
num_channels=$(sudo rabbitmqctl ${MQOPT} list_channels | wc -l); ((num_channels-=2))
arr=($(sudo rabbitmqctl ${MQOPT} list_queues messages consumers memory | \
awk '/^[0-9]/ {a+=$1; b+=$2; c+=$3} END {print a, b, c}'))
messages=${arr[0]}; consumers=${arr[1]}; memory=${arr[2]}
printf "%6s %8s %9s %11s %8s %8s %9s %10s\n" \
"queues" "bindings" "exchanges" "connections" "channels" "messages" "consumers" "memory"
printf "%6d %8d %9d %11d %8d %8d %9d %10d\n" \
$num_queues $num_bindings $num_exchanges $num_connections $num_channels $messages $consumers $memory
${ECHO}
}
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
for ((rep=1; rep <= REPEATS ; rep++))
do
print_rabbitmq
sleep ${INTERVAL_SEC}
done
print_rabbitmq
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,46 @@
#!/bin/bash
# Purpose:
# bzip2 compress engtools data on all nodes.
# Define common utility functions
TOOLBIN=$(dirname $0)
. ${TOOLBIN}/engtools_util.sh
if [ $UID -eq 0 ]; then
ERRLOG "Do not start $0 using sudo/root access."
exit 1
fi
# environment for system commands
source /etc/nova/openrc
declare -a CONTROLLER
declare -a COMPUTE
declare -a STORAGE
CONTROLLER=( $(system host-list | awk '(/controller/) {print $4;}') )
COMPUTE=( $(system host-list | awk '(/compute/) {print $4;}') )
STORAGE=( $(system host-list | awk '(/storage/) {print $4;}') )
LOG "Remote bzip2 engtools data on all blades:"
for blade in ${CONTROLLER[@]}; do
ping -c1 ${blade} 1>/dev/null 2>/dev/null
if [ $? -eq 0 ]; then
LOG "bzip2 on $blade:"
ssh -q -t -o StrictHostKeyChecking=no \
${blade} sudo bzip2 /scratch/syseng_data/${blade}/*
else
WARNLOG "cannot ping: ${blade}"
fi
done
for blade in ${STORAGE[@]} ${COMPUTE[@]} ; do
ping -c1 ${blade} 1>/dev/null 2>/dev/null
if [ $? -eq 0 ]; then
LOG "bzip2 on $blade:"
ssh -q -t -o StrictHostKeyChecking=no \
${blade} sudo bzip2 /tmp/syseng_data/${blade}/*
else
WARNLOG "cannot ping: ${blade}"
fi
done
LOG "done"
exit 0

View File

@ -0,0 +1,37 @@
#!/bin/bash
# Purpose:
# Remote start engtools on all blades.
# Define common utility functions
TOOLBIN=$(dirname $0)
. ${TOOLBIN}/engtools_util.sh
if [ $UID -eq 0 ]; then
ERRLOG "Do not start $0 using sudo/root access."
exit 1
fi
# environment for system commands
source /etc/nova/openrc
declare -a BLADES
BLADES=( $(system host-list | awk '(/compute|controller|storage/) {print $4;}') )
LOG "Remote start engtools on all blades:"
for blade in ${BLADES[@]}; do
if [ "${blade}" == "${HOSTNAME}" ]; then
LOG "start on $blade:"
sudo service collect-engtools.sh start
else
ping -c1 ${blade} 1>/dev/null 2>/dev/null
if [ $? -eq 0 ]; then
LOG "start on $blade:"
ssh -q -t -o StrictHostKeyChecking=no \
${blade} sudo service collect-engtools.sh start
else
WARNLOG "cannot ping: ${blade}"
fi
fi
done
LOG "done"
exit 0

View File

@ -0,0 +1,37 @@
#!/bin/bash
# Purpose:
# Remote stop engtools on all blades.
# Define common utility functions
TOOLBIN=$(dirname $0)
. ${TOOLBIN}/engtools_util.sh
if [ $UID -eq 0 ]; then
ERRLOG "Do not start $0 using sudo/root access."
exit 1
fi
# environment for system commands
source /etc/nova/openrc
declare -a BLADES
BLADES=( $(system host-list | awk '(/compute|controller|storage/) {print $4;}') )
LOG "Remote stop engtools on all blades:"
for blade in ${BLADES[@]}; do
if [ "${blade}" == "${HOSTNAME}" ]; then
LOG "stop on $blade:"
sudo service collect-engtools.sh stop
else
ping -c1 ${blade} 1>/dev/null 2>/dev/null
if [ $? -eq 0 ]; then
LOG "stop on $blade:"
ssh -q -t -o StrictHostKeyChecking=no \
${blade} sudo service collect-engtools.sh stop
else
WARNLOG "cannot ping: ${blade}"
fi
fi
done
LOG "done"
exit 0

View File

@ -0,0 +1,70 @@
#!/bin/bash
# Purpose:
# rsync data from all nodes to backup location.
# Define common utility functions
TOOLBIN=$(dirname $0)
. ${TOOLBIN}/engtools_util.sh
if [ $UID -eq 0 ]; then
ERRLOG "Do not start $0 using sudo/root access."
exit 1
fi
# environment for system commands
source /etc/nova/openrc
declare -a BLADES
declare -a CONTROLLER
declare -a STORAGE
declare -a COMPUTE
BLADES=( $(system host-list | awk '(/compute|controller|storage/) {print $4;}') )
CONTROLLER=( $(system host-list | awk '(/controller/) {print $4;}') )
COMPUTE=( $(system host-list | awk '(/compute/) {print $4;}') )
STORAGE=( $(system host-list | awk '(/storage/) {print $4;}') )
DEST=/opt/backups/syseng_data/
if [[ "${HOSTNAME}" =~ "controller-" ]]; then
LOG "rsync DEST=${DEST}"
else
LOG "*ERROR* only run this on controller"
exit 1
fi
sudo mkdir -p ${DEST}
# rsync options
USER=wrsroot
RSYNC_OPT="-r -l --safe-links -h -P --stats --exclude=*.pyc"
# Rsync data from multiple locations
LOG "rsync engtools data from all blades:"
# controllers
SRC=/scratch/syseng_data/
DEST=/opt/backups/syseng_data/
for HOST in ${CONTROLLER[@]}
do
ping -c1 ${HOST} 1>/dev/null 2>/dev/null
if [ $? -eq 0 ]; then
LOG "rsync ${RSYNC_OPT} ${USER}@${HOST}:${SRC} ${DEST}"
sudo rsync ${RSYNC_OPT} ${USER}@${HOST}:${SRC} ${DEST}
else
WARNLOG "cannot ping: ${HOST}"
fi
done
# computes & storage
SRC=/tmp/syseng_data/
DEST=/opt/backups/syseng_data/
for HOST in ${STORAGE[@]} ${COMPUTE[@]}
do
ping -c1 ${HOST} 1>/dev/null 2>/dev/null
if [ $? -eq 0 ]; then
LOG "rsync ${RSYNC_OPT} ${USER}@${HOST}:${SRC} ${DEST}"
sudo rsync ${RSYNC_OPT} ${USER}@${HOST}:${SRC} ${DEST}
else
WARNLOG "cannot ping: ${HOST}"
fi
done
LOG 'done'
exit 0

View File

@ -0,0 +1,23 @@
#!/bin/bash
PAGE_SIZE=$(getconf PAGE_SIZE)
cat /proc/slabinfo | awk -v page_size_B=${PAGE_SIZE} '
BEGIN {page_KiB = page_size_B/1024; TOT_KiB = 0;}
(NF == 17) {
gsub(/[<>]/, "");
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8s\n",
$2, $3, $4, $5, $6, $7, $8, $10, $11, $12, $13, $15, $16, $17, "KiB");
}
(NF == 16) {
num_objs=$3; obj_per_slab=$5; pages_per_slab=$6;
KiB = (obj_per_slab > 0) ? page_KiB*num_objs/obj_per_slab*pages_per_slab : 0;
TOT_KiB += KiB;
printf("%-22s %11d %8d %8d %10d %12d %1s %5d %10d %12d %1s %12d %9d %11d %8d\n",
$1, $2, $3, $4, $5, $6, $7, $9, $10, $11, $12, $14, $15, $16, KiB);
}
END {
printf("%-22s %11s %8s %8s %10s %12s %1s %5s %10s %12s %1s %12s %9s %11s %8d\n",
"TOTAL", "-", "-", "-", "-", "-", ":", "-", "-", "-", ":", "-", "-", "-", TOT_KiB);
}
' 2>/dev/null
exit 0

View File

@ -0,0 +1,50 @@
#!/bin/bash
# Usage: ticker.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
((REP_LOG = 10 * 60 / INTERVAL_SEC))
LOG_NOCR "collecting "
t=0
for ((rep=1; rep <= REPEATS ; rep++))
do
((t++))
sleep ${INTERVAL_SEC}
if [ ${t} -ge ${REP_LOG} ]; then
t=0
echo "."
LOG_NOCR "collecting "
else
echo -n "."
fi
done
echo "."
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,43 @@
#!/bin/bash
# Usage: top.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
((REP = REPEATS + 1))
# Execute tool for specified duration
CMD="top -b -c -H -n ${REP} -d ${INTERVAL_SEC}"
#LOG "CMD: ${CMD}"
${CMD}
LOG "done"
# normal program exit
tools_cleanup 0
exit 0

View File

@ -0,0 +1,68 @@
#!/bin/bash
# Usage: vswitch.sh [-p <period_mins>] [-i <interval_seconds>] [-c <cpulist>] [-h]
TOOLBIN=$(dirname $0)
# Initialize tools environment variables, and define common utility functions
. ${TOOLBIN}/engtools_util.sh
tools_init
if [ $? -ne 0 ]; then
echo "FATAL, tools_init - could not setup environment"
exit $?
fi
# Enable use of INTERVAL_SEC sample interval
OPT_USE_INTERVALS=1
# Print key networking device statistics
function print_vswitch()
{
print_separator
TOOL_HIRES_TIME
cmd='vshell engine-list'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
cmd='vshell engine-stats-list'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
cmd='vshell port-list'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
cmd='vshell port-stats-list'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
cmd='vshell network-list'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
cmd='vshell network-stats-list'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
cmd='vshell interface-list'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
cmd='vshell interface-stats-list'
${ECHO} "# ${cmd}" ; ${cmd} ; ${ECHO}
}
#-------------------------------------------------------------------------------
# MAIN Program:
#-------------------------------------------------------------------------------
# Parse input options
tools_parse_options "${@}"
# Set affinity of current script
CPULIST=""
set_affinity ${CPULIST}
LOG "collecting ${TOOLNAME} for ${PERIOD_MIN} minutes, with ${INTERVAL_SEC} second sample intervals."
# Print tools generic tools header
tools_header
# Calculate number of sample repeats based on overall interval and sampling interval
((REPEATS = PERIOD_MIN * 60 / INTERVAL_SEC))
for ((rep=1; rep <= REPEATS ; rep++))
do
print_vswitch
sleep ${INTERVAL_SEC}
done
print_vswitch
LOG "done"
# normal program exit
tools_cleanup 0
exit 0