diff --git a/.zuul.yaml b/.zuul.yaml index 937b1619f..f9212f67c 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -27,6 +27,7 @@ timeout: 7800 required-projects: - openstack/stx-fault + - openstack/stx-update vars: tox_envlist: functional devstack_services: @@ -62,6 +63,7 @@ devstack_plugins: stx-fault: git://git.starlingx.io/stx-fault stx-integ: git://git.starlingx.io/stx-integ + stx-update: git://git.starlingx.io/stx-update devstack_localrc: LIBS_FROM_GIT: keystone files: diff --git a/centos_iso_image.inc b/centos_iso_image.inc index 63d9b3c95..12c3b587f 100644 --- a/centos_iso_image.inc +++ b/centos_iso_image.inc @@ -226,6 +226,7 @@ ntp-config syslog-ng-config rsync-config pam-config +docker-config # net-snmp net-snmp-utils diff --git a/centos_pkg_dirs b/centos_pkg_dirs index 7131b8a1a..3d94d6b46 100644 --- a/centos_pkg_dirs +++ b/centos_pkg_dirs @@ -115,6 +115,7 @@ config-files/syslog-ng-config config-files/rsync-config config-files/pam-config config-files/util-linux-config +config-files/docker-config tools/collector grub/grubby utilities/platform-util diff --git a/config-files/docker-config/centos/build_srpm.data b/config-files/docker-config/centos/build_srpm.data new file mode 100644 index 000000000..15dbe0dbc --- /dev/null +++ b/config-files/docker-config/centos/build_srpm.data @@ -0,0 +1,2 @@ +SRC_DIR="$PKG_BASE/files" +TIS_PATCH_VER=1 diff --git a/config-files/docker-config/centos/docker-config.spec b/config-files/docker-config/centos/docker-config.spec new file mode 100644 index 000000000..dc4145d3d --- /dev/null +++ b/config-files/docker-config/centos/docker-config.spec @@ -0,0 +1,31 @@ +Summary: StarlingX Docker Configuration File +Name: docker-config +Version: 1.0 +Release: %{tis_patch_ver}%{?_tis_dist} +License: Apache-2.0 +Group: config-files +Packager: StarlingX +URL: unknown + +Source0: %{name}-%{version}.tar.gz + +BuildArch: noarch +Requires: docker-ce + +%define debug_package %{nil} + +%description +StarlingX docker configuration file + +%prep +%setup + +%install +make DATADIR=%{buildroot}%{_datadir} SYSCONFDIR=%{buildroot}%{_sysconfdir} install + +%files +%defattr(-,root,root) +%license LICENSE +%dir %{_sysconfdir}/systemd/system/docker.service.d +%{_sysconfdir}/pmon.d/docker.conf +%{_sysconfdir}/systemd/system/docker.service.d/docker-stx-override.conf diff --git a/config-files/docker-config/files/LICENSE b/config-files/docker-config/files/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/config-files/docker-config/files/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/config-files/docker-config/files/Makefile b/config-files/docker-config/files/Makefile new file mode 100644 index 000000000..10662c559 --- /dev/null +++ b/config-files/docker-config/files/Makefile @@ -0,0 +1,11 @@ +# +# Copyright (c) 2019 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +install: + install -d -m 0755 $(SYSCONFDIR)/pmon.d + install -D -m 644 docker-pmond.conf $(SYSCONFDIR)/pmon.d/docker.conf + install -d -m 0755 $(SYSCONFDIR)/systemd/system/docker.service.d + install -D -m 644 docker-stx-override.conf $(SYSCONFDIR)/systemd/system/docker.service.d/docker-stx-override.conf diff --git a/config-files/docker-config/files/docker-pmond.conf b/config-files/docker-config/files/docker-pmond.conf new file mode 100644 index 000000000..e6c930e5b --- /dev/null +++ b/config-files/docker-config/files/docker-pmond.conf @@ -0,0 +1,15 @@ +; +; Copyright (c) 2019 Wind River Systems, Inc. +; +; SPDX-License-Identifier: Apache-2.0 +; +[process] +process = dockerd +service = docker +pidfile = /var/run/dockerd.pid +style = lsb ; lsb +severity = critical ; minor, major, critical +restarts = 3 ; restarts before error assertion +startuptime = 5 ; seconds to wait after process start +interval = 5 ; number of seconds to wait between restarts +debounce = 20 ; number of seconds to wait before degrade clear diff --git a/config-files/docker-config/files/docker-stx-override.conf b/config-files/docker-config/files/docker-stx-override.conf new file mode 100644 index 000000000..85a66b139 --- /dev/null +++ b/config-files/docker-config/files/docker-stx-override.conf @@ -0,0 +1,6 @@ +[Service] +ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/dockerd.pid;' +ExecStopPost=/bin/rm -f /var/run/dockerd.pid + +# pmond monitors docker service +Restart=no diff --git a/devstack/lib/stx-integ b/devstack/lib/stx-integ index ad1590a4a..7683341ca 100644 --- a/devstack/lib/stx-integ +++ b/devstack/lib/stx-integ @@ -1,13 +1,7 @@ #!/bin/bash # # lib/stx-integ -# Functions to control the configuration and operation of stx-integ - -# Dependencies: # -# - ``functions`` file -# - ``DEST``, ``DATA_DIR``, ``STACK_USER`` must be defined - # ``plugin.sh`` calls the entry points in this order: # # - install_integ @@ -27,30 +21,11 @@ STXINTEG_DIR=${GITDIR[$STX_INTEG_NAME]} PLATFORM_UTIL_DIR=$STXINTEG_DIR/utilities/platform-util -STX_BIN_DIR=$(get_python_exec_prefix) +# STX_INST_DIR should be a non-root-writable place to install build artifacts +STX_INST_DIR=${STX_INST_DIR:-$DEST/usr} +STX_BIN_DIR=$STX_INST_DIR/bin PYTHON_SITE_DIR=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())") -function install_platform_util { - pushd $PLATFORM_UTIL_DIR/platform-util - sudo python setup.py install --root=/ --install-lib=$PYTHON_SITE_DIR --prefix=/usr --install-data=/usr/share --single-version-externally-managed - popd - - local stx_integ_sbindir=/usr/local/sbin/ - local systemddir=/etc/systemd - sudo install -m 755 -d ${stx_integ_sbindir} - sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/patch-restart-mtce ${stx_integ_sbindir} - sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/patch-restart-processes ${stx_integ_sbindir} - sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/patch-restart-haproxy ${stx_integ_sbindir} - - sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/cgcs_tc_setup.sh ${STX_BIN_DIR} - sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/remotelogging_tc_setup.sh ${STX_BIN_DIR} - sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/connectivity_test ${STX_BIN_DIR} - - # sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/opt-platform.mount ${systemddir}/system - # sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/opt-platform.service ${systemddir}/system - # sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/memcached.service ${systemddir}/system -} - function cleanup_integ { # Cleanup the service stop_integ @@ -73,14 +48,40 @@ function install_integ { fi } -function sart_integ { - # Shut the service down - : +function install_platform_util { + pushd $PLATFORM_UTIL_DIR/platform-util + sudo python setup.py install \ + --root=/ \ + --install-lib=$PYTHON_SITE_DIR \ + --prefix=/usr \ + --install-data=/usr/share \ + --single-version-externally-managed + popd + + local stx_integ_sbindir=$STX_INST_DIR/sbin + local systemddir=/etc/systemd + sudo install -m 755 -d ${stx_integ_sbindir} + sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/patch-restart-mtce ${stx_integ_sbindir} + sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/patch-restart-processes ${stx_integ_sbindir} + sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/patch-restart-haproxy ${stx_integ_sbindir} + + sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/cgcs_tc_setup.sh ${STX_BIN_DIR} + sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/remotelogging_tc_setup.sh ${STX_BIN_DIR} + sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/connectivity_test ${STX_BIN_DIR} + + # sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/opt-platform.mount ${systemddir}/system + # sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/opt-platform.service ${systemddir}/system + # sudo install -m 755 $PLATFORM_UTIL_DIR/scripts/memcached.service ${systemddir}/system } -function stop_integ { +function start_integ { # Initialize and start the service : } +function stop_integ { + # Shut the service down + : +} + $_XTRACE_STX_INTEG diff --git a/devstack/plugin.sh b/devstack/plugin.sh index bbff62e80..538985f10 100755 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -1,8 +1,9 @@ #!/bin/bash -# ``stack.sh`` calls the entry points in this order: -# -echo_summary "integ devstack plugin.sh called: $1/$2" +# devstack/plugin.sh +# Triggers specific functions to install and configure stx-integ + +echo_summary "stx-integ devstack plugin.sh called: $1/$2" # check for service enabled if is_service_enabled stx-integ; then @@ -18,6 +19,7 @@ if is_service_enabled stx-integ; then # Initialize and start the service echo_summary "Initialize and start stx-integ" init_integ + start_integ elif [[ "$1" == "stack" && "$2" == "test-config" ]]; then # do sanity test echo_summary "do test-config" diff --git a/devstack/settings b/devstack/settings index 93dbe22ec..1eb7532c5 100644 --- a/devstack/settings +++ b/devstack/settings @@ -1,6 +1,13 @@ #!/bin/bash # Devstack settings +# This plugin enables StarlingX stx-integ services and follows the +# DevStack plugin contract: +# https://docs.openstack.org/devstack/latest/plugins.html#plugin-sh-contract + +# Services +# platform-util + # Defaults # -------- @@ -9,7 +16,10 @@ STX_INTEG_NAME=stx-integ ######### Plugin Specific ########## enable_service $STX_INTEG_NAME -#platform_license +# This must not use any variables to work properly in OpenStack's DevStack playbook +define_plugin stx-integ +# This works for Zuul jobs using OpenStack's DevStack roles +plugin_requires stx-integ stx-update # Initial source of lib script source $DEST/stx-integ/devstack/lib/stx-integ diff --git a/monitoring/collectd-extensions/src/cpu.py b/monitoring/collectd-extensions/src/cpu.py index 79ffe97bf..09832556c 100755 --- a/monitoring/collectd-extensions/src/cpu.py +++ b/monitoring/collectd-extensions/src/cpu.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018 Wind River Systems, Inc. +# Copyright (c) 2018-2019 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -179,7 +179,7 @@ def read_func(): _schedstat)) else: collectd.error('%s unsupported schedstat version [%d]' % - (PLUGIN, c.version)) + (PLUGIN, c.version)) return 0 f.close() diff --git a/monitoring/collectd-extensions/src/fm_notifier.py b/monitoring/collectd-extensions/src/fm_notifier.py index 815fb07ac..88ce9280d 100755 --- a/monitoring/collectd-extensions/src/fm_notifier.py +++ b/monitoring/collectd-extensions/src/fm_notifier.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018 Wind River Systems, Inc. +# Copyright (c) 2018-2019 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -102,6 +102,7 @@ api = fm_api.FaultAPIs() debug = False debug_lists = False want_state_audit = False +want_vswitch = False # number of notifier loops before the state is object dumped DEBUG_AUDIT = 2 @@ -123,6 +124,8 @@ DATABASE_NAME = 'collectd samples' READING_TYPE__PERCENT_USAGE = '% usage' +# Default invalid threshold value +INVALID_THRESHOLD = float(-1) # collectd severity definitions ; # Note: can't seem to pull then in symbolically with a header @@ -231,8 +234,10 @@ class PluginObject: # [ 'float value string','float threshold string] self.values = [] - self.threshold = float(0) # float value of threshold - self.value = float(0) # float value of reading + self.value = float(0) # float value of reading + + # float value of threshold + self.threshold = float(INVALID_THRESHOLD) # Common static class members. self.reason_warning = "" @@ -333,7 +338,8 @@ class PluginObject: # Purpose : Manage sample value change. # # Handle no sample update case. - # Parse the notification log + # Parse the notification log. + # Handle base object instances. # Generate a log entry if the sample value changes more than # step value. # @@ -385,10 +391,22 @@ class PluginObject: # get the threshold if its there. if len(self.values) > 1: self.threshold = float(self.values[1]) + if nObject.plugin == PLUGIN__MEM: + if self.reading_type == READING_TYPE__PERCENT_USAGE: + # Note: add one to % usage reading types so that it + # matches how rmond did it. In collectd an + # overage is over the specified threshold + # whereas in rmon an overage is at threshold + # or above. + self.threshold = float(self.values[1]) + 1 + else: + self.threshold = float(self.values[1]) + else: + self.threshold = float(INVALID_THRESHOLD) # invalid value except ValueError as ex: collectd.error("%s %s value not integer or float (%s) (%s)" % - (PLUGIN, self.entity_id, self.value, str(ex))) + (PLUGIN, self.entity_id, self.value, str(ex))) return "done" except TypeError as ex: collectd.info("%s %s value has no type (%s)" % @@ -429,6 +447,11 @@ class PluginObject: if self.plugin == PLUGIN__DF: resource = self.instance + elif self.plugin == PLUGIN__MEM: + if self.instance_name: + if self.instance_name != 'platform': + resource += ' ' + self.instance_name + # setup resource name for vswitch process instance name elif self.plugin == PLUGIN__VSWITCH_MEM: resource += ' Processor ' @@ -697,7 +720,7 @@ class PluginObject: self.instance_objects[eid] = obj except: collectd.error("%s failed to add instance to %s object list" % - (PLUGIN, self.plugin)) + (PLUGIN, self.plugin)) finally: collectd.debug("%s %s Add UnLock ..." % (PLUGIN, self.plugin)) @@ -751,14 +774,14 @@ class PluginObject: self._add_instance_object(inst_obj, inst_obj.entity_id) collectd.debug("%s created %s instance (%s) object %s" % - (PLUGIN, inst_obj.resource_name, - inst_obj.entity_id, inst_obj)) + (PLUGIN, inst_obj.resource_name, + inst_obj.entity_id, inst_obj)) - collectd.debug("%s monitoring %s %s %s" % - (PLUGIN, - inst_obj.resource_name, - inst_obj.instance_name, - inst_obj.reading_type)) + collectd.info("%s monitoring %s %s %s" % + (PLUGIN, + inst_obj.resource_name, + inst_obj.instance_name, + inst_obj.reading_type)) return inst_obj @@ -891,7 +914,11 @@ def _build_entity_id(plugin, plugin_instance): entity_id = 'host=' entity_id += PluginObject.host - if plugin == PLUGIN__VSWITCH_MEM: + if plugin == PLUGIN__MEM: + if plugin_instance != 'platform': + entity_id += '.numa=' + plugin_instance + + elif plugin == PLUGIN__VSWITCH_MEM: # host=.processor= if plugin_instance: @@ -933,15 +960,6 @@ def _build_entity_id(plugin, plugin_instance): instance = instance.replace('-', '/') entity_id += instance - # Will be uncommented when the numa memory monitor is added - # to the platform memory plugin. - # - #elif plugin == PLUGIN__MEM: - # if plugin_instance is not 'platform': - # # host=controller-0.numa=node0 - # entity_id += '.numa=' - # entity_id += plugin_instance - if inst_error is True: collectd.error("%s eid build failed ; missing instance" % plugin) return None @@ -957,7 +975,7 @@ def _get_df_mountpoints(): if not os.path.exists(conf_file): collectd.error("%s cannot create filesystem " "instance objects ; missing : %s" % - (PLUGIN, conf_file)) + (PLUGIN, conf_file)) return FAIL mountpoints = [] @@ -1162,7 +1180,7 @@ def _clear_alarm_for_missing_filesystems(): df_base_obj._manage_alarm(obj.entity_id, "okay") else: collectd.debug("%s maintaining alarm for %s" % - (PLUGIN, path)) + (PLUGIN, path)) # Collectd calls this function on startup. @@ -1211,7 +1229,9 @@ def init_func(): obj._create_instance_objects() # ntp query is for controllers only - if tsc.nodetype == 'worker' or 'worker' in tsc.subfunctions: + if want_vswitch is False: + collectd.debug("%s vSwitch monitoring disabled" % PLUGIN) + elif tsc.nodetype == 'worker' or 'worker' in tsc.subfunctions: ####################################################################### @@ -1410,13 +1430,13 @@ def notifier_func(nObject): collectd.debug("%s %s lock" % (PLUGIN, nObject.plugin)) PluginObject.lock.acquire() - #collectd.info("%s Object Search eid: %s" % - # (nObject.plugin, eid)) + # collectd.info("%s Object Search eid: %s" % + # (nObject.plugin, eid)) - #for o in base_obj.instance_objects: - # collectd.error("%s %s inst object dict item %s : %s" % - # (PLUGIN, nObject.plugin, o, - # base_obj.instance_objects[o])) + # for o in base_obj.instance_objects: + # collectd.error("%s %s inst object dict item %s : %s" % + # (PLUGIN, nObject.plugin, o, + # base_obj.instance_objects[o])) # we will take an exception if this object is not in the list. # the exception handling code below will create and add this @@ -1438,14 +1458,14 @@ def notifier_func(nObject): inst_obj = base_obj._get_instance_object(eid) if inst_obj: collectd.debug("%s %s:%s inst object created" % - (PLUGIN, - inst_obj.plugin, - inst_obj.instance)) + (PLUGIN, + inst_obj.plugin, + inst_obj.instance)) else: collectd.error("%s %s:%s inst object create failed" % - (PLUGIN, - nObject.plugin, - nObject.plugin_instance)) + (PLUGIN, + nObject.plugin, + nObject.plugin_instance)) return 0 # re-assign the object @@ -1461,7 +1481,7 @@ def notifier_func(nObject): else: collectd.debug("%s notification for unknown plugin: %s %s" % - (PLUGIN, nObject.plugin, nObject.plugin_instance)) + (PLUGIN, nObject.plugin, nObject.plugin_instance)) return 0 # if obj.warnings or obj.failures: @@ -1507,11 +1527,11 @@ def notifier_func(nObject): # if this is a threshold alarm then build the reason text that # includes the threahold and the reading that caused the assertion. reason = obj.resource_name - reason += " threshold exceeded" - if obj.threshold: - reason += "; threshold {:2.0f} ".format(obj.threshold) + "%, " + reason += " threshold exceeded ;" + if obj.threshold != INVALID_THRESHOLD: + reason += " threshold {:2.0f}".format(obj.threshold) + "%," if obj.value: - reason += "actual {:2.0f}".format(obj.value) + "%" + reason += " actual {:2.0f}".format(obj.value) + "%" elif _severity_num == fm_constants.FM_ALARM_SEVERITY_CRITICAL: reason = obj.reason_failure @@ -1542,14 +1562,13 @@ def notifier_func(nObject): # update the lists now that base_obj._manage_alarm(obj.entity_id, severity_str) - collectd.info("%s %s alarm %s:%s %s:%s thld:%2.2f value:%2.2f" % ( + collectd.info("%s %s alarm %s:%s %s:%s value:%2.2f" % ( PLUGIN, _alarm_state, base_obj.id, severity_str, obj.instance, obj.entity_id, - obj.threshold, obj.value)) # Debug only: comment out for production code. diff --git a/monitoring/collectd-extensions/src/memory.py b/monitoring/collectd-extensions/src/memory.py index cf4d1e7cd..b9a8e1f8d 100755 --- a/monitoring/collectd-extensions/src/memory.py +++ b/monitoring/collectd-extensions/src/memory.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018 Wind River Systems, Inc. +# Copyright (c) 2018-2019 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -19,11 +19,9 @@ import collectd debug = False -# general return codes -PASS = 0 -FAIL = 1 - PLUGIN = 'platform memory usage' +PLUGIN_NUMA = 'numa memory usage' +PLUGIN_HUGE = 'hugepage memory usage' # CPU Control class @@ -41,8 +39,10 @@ class MEM: CommitLimit = 0 Committed_AS = 0 HugePages_Total = 0 + HugePages_Free = 0 Hugepagesize = 0 AnonPages = 0 + FilePages = 0 # derived values avail = 0 @@ -54,6 +54,27 @@ class MEM: obj = MEM() +def log_meminfo(plugin, name, meminfo): + """ Log the supplied meminfo """ + + if debug is False: + return + + collectd.info("%s %s" % (plugin, name)) + collectd.info("%s ---------------------------" % plugin) + collectd.info("%s memTotal_kB : %f" % (plugin, meminfo.memTotal_kB)) + collectd.info("%s memFree_kB : %f" % (plugin, meminfo.memFree_kB)) + collectd.info("%s Buffers : %f" % (plugin, meminfo.buffers)) + collectd.info("%s Cached : %f" % (plugin, meminfo.cached)) + collectd.info("%s SReclaimable : %f" % (plugin, meminfo.SReclaimable)) + collectd.info("%s CommitLimit : %f" % (plugin, meminfo.CommitLimit)) + collectd.info("%s Committed_AS : %f" % (plugin, meminfo.Committed_AS)) + collectd.info("%s HugePages_Total: %f" % (plugin, meminfo.HugePages_Total)) + collectd.info("%s HugePages_Free : %f" % (plugin, meminfo.HugePages_Free)) + collectd.info("%s Hugepagesize : %f" % (plugin, meminfo.Hugepagesize)) + collectd.info("%s AnonPages : %f" % (plugin, meminfo.AnonPages)) + + def config_func(config): """ Configure the memory usage plugin @@ -110,7 +131,12 @@ def read_func(): except EnvironmentError as e: collectd.error("%s unable to read from %s ; str(e)" % (PLUGIN, str(e))) - return FAIL + return 0 + + # setup the sample structure + val = collectd.Values(host=obj.hostname) + val.type = 'percent' + val.type_instance = 'used' # remove the 'unit' (kB) suffix that might be on some of the lines for line in meminfo: @@ -130,20 +156,11 @@ def read_func(): obj.CommitLimit = float(meminfo['CommitLimit']) obj.Committed_AS = float(meminfo['Committed_AS']) obj.HugePages_Total = float(meminfo['HugePages_Total']) + obj.HugePages_Free = float(meminfo['HugePages_Free']) obj.Hugepagesize = float(meminfo['Hugepagesize']) obj.AnonPages = float(meminfo['AnonPages']) - # collectd.info("%s /proc/meminfo: %s" % (PLUGIN, meminfo)) - # collectd.info("%s ---------------------------" % PLUGIN) - # collectd.info("%s memTotal_kB : %f" % (PLUGIN, obj.memTotal_kB)) - # collectd.info("%s memFree_kB : %f" % (PLUGIN, obj.memFree_kB)) - # collectd.info("%s Buffers : %f" % (PLUGIN, obj.buffers)) - # collectd.info("%s Cached : %f" % (PLUGIN, obj.cached)) - # collectd.info("%s SReclaimable : %f" % (PLUGIN, obj.SReclaimable)) - # collectd.info("%s CommitLimit : %f" % (PLUGIN, obj.CommitLimit)) - # collectd.info("%s Committed_AS : %f" % (PLUGIN, obj.Committed_AS)) - # collectd.info("%s HugePages_Total: %f" % (PLUGIN, obj.HugePages_Total)) - # collectd.info("%s AnonPages : %f" % (PLUGIN, obj.AnonPages)) + log_meminfo(PLUGIN, "/proc/meminfo", obj) obj.avail = float(float(obj.memFree_kB) + float(obj.buffers) + @@ -152,38 +169,93 @@ def read_func(): obj.total = float(float(obj.avail) + float(obj.AnonPages)) - # collectd.info("%s ---------------------------" % PLUGIN) - # collectd.info("%s memTotal: %d" % (PLUGIN, obj.avail)) - # collectd.info("%s memAvail: %d" % (PLUGIN, obj.total)) - if obj.strict == 1: obj.value = float(float(obj.Committed_AS) / float(obj.CommitLimit)) else: obj.value = float(float(obj.AnonPages) / float(obj.total)) - obj.value = float(float(obj.value) * 100) - # get numa node memory - # numa_node_files = [] - # fn = "/sys/devices/system/node/" - # files = os.listdir(fn) - # for file in files: - # if 'node' in file: - # numa_node_files.append(fn + file) - # collectd.info("%s numa node files: %s" % - # (PLUGIN, numa_node_files)) - - collectd.debug('%s reports %.2f %% usage' % - (PLUGIN, obj.value)) + if debug is True: + collectd.info("%s ---------------------------" % PLUGIN) + collectd.info("%s memAvail: %d" % (PLUGIN, obj.avail)) + collectd.info("%s memTotal: %d" % (PLUGIN, obj.total)) + collectd.info('%s reports %.2f %% usage' % (PLUGIN, obj.value)) # Dispatch usage value to collectd - val = collectd.Values(host=obj.hostname) val.plugin = 'memory' - val.type = 'percent' - val.type_instance = 'used' + val.plugin_instance = 'platform' val.dispatch(values=[obj.value]) - return PASS + ##################################################################### + # Now get the Numa Node Memory Usage + ##################################################################### + numa_node_files = [] + fn = "/sys/devices/system/node/" + files = os.listdir(fn) + for file in files: + if 'node' in file: + numa_node_files.append(fn + file + '/meminfo') + + for numa_node in numa_node_files: + meminfo = {} + try: + with open(numa_node) as fd: + for line in fd: + meminfo[line.split()[2][0:-1]] = line.split()[3].strip() + + obj.memFree_kB = float(meminfo['MemFree']) + obj.FilePages = float(meminfo['FilePages']) + obj.SReclaimable = float(meminfo['SReclaimable']) + obj.AnonPages = float(meminfo['AnonPages']) + obj.HugePages_Total = float(meminfo['HugePages_Total']) + obj.HugePages_Free = float(meminfo['HugePages_Free']) + + log_meminfo(PLUGIN, numa_node, obj) + + avail = float(float(obj.memFree_kB) + + float(obj.FilePages) + + float(obj.SReclaimable)) + total = float(float(avail) + + float(obj.AnonPages)) + obj.value = float(float(obj.AnonPages)) / float(total) + obj.value = float(float(obj.value) * 100) + + # Dispatch usage value to collectd for this numa node + val.plugin_instance = numa_node.split('/')[5] + val.dispatch(values=[obj.value]) + + collectd.debug('%s reports %s at %.2f %% usage (%s)' % + (PLUGIN_NUMA, + val.plugin, + obj.value, + val.plugin_instance)) + + # Numa Node Huge Page Memory Monitoring + # + # Only monitor if there is Huge Page Memory + if obj.HugePages_Total > 0: + obj.value = \ + float(float(obj.HugePages_Total - + obj.HugePages_Free)) / \ + float(obj.HugePages_Total) + obj.value = float(float(obj.value) * 100) + + # Dispatch huge page memory usage value + # to collectd for this numa node. + val.plugin_instance = numa_node.split('/')[5] + '_hugepages' + val.dispatch(values=[obj.value]) + + collectd.debug('%s reports %s at %.2f %% usage (%s)' % + (PLUGIN_HUGE, + val.plugin, + obj.value, + val.plugin_instance)) + + except EnvironmentError as e: + collectd.error("%s unable to read from %s ; str(e)" % + (PLUGIN_NUMA, str(e))) + + return 0 collectd.register_config(config_func) diff --git a/monitoring/collectd-extensions/src/mtce_notifier.py b/monitoring/collectd-extensions/src/mtce_notifier.py index 1f645e0d8..9b72b81a4 100755 --- a/monitoring/collectd-extensions/src/mtce_notifier.py +++ b/monitoring/collectd-extensions/src/mtce_notifier.py @@ -1,8 +1,10 @@ # -# Copyright (c) 2018 Wind River Systems, Inc. +# Copyright (c) 2018-2019 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # +############################################################################# +# # This file is the collectd 'Maintenance' Notifier. # # Collects provides information about each event as an object passed to the @@ -50,10 +52,6 @@ NOTIF_FAILURE = 1 NOTIF_WARNING = 2 NOTIF_OKAY = 4 -# generic return codes -PASS = 0 -FAIL = 1 - # default mtce port. # ... with configuration override MTCE_CMD_RX_PORT = 2101 @@ -292,7 +290,7 @@ def notifier_func(nObject): else: collectd.info("%s unsupported severity %d" % (PLUGIN, nObject.severity)) - return FAIL + return 0 # running counter of notifications. obj.msg_throttle += 1 @@ -374,7 +372,7 @@ def notifier_func(nObject): mtce_socket.close() else: collectd.error("%s %s failed to open socket (%s)" % - (PLUGIN, resource, obj.addr)) + (PLUGIN, resource, obj.addr)) except socket.error as e: if e.args[0] == socket.EAI_ADDRFAMILY: # Handle IPV4 to IPV6 switchover: @@ -383,7 +381,7 @@ def notifier_func(nObject): (PLUGIN, resource, obj.addr)) else: collectd.error("%s %s socket error (%s) ; %s" % - (PLUGIN, resource, obj.addr, str(e))) + (PLUGIN, resource, obj.addr, str(e))) # try self correction obj.addr = None obj.protocol = socket.AF_INET diff --git a/monitoring/collectd-extensions/src/ntpq.py b/monitoring/collectd-extensions/src/ntpq.py index 3f7964656..b470d7c01 100755 --- a/monitoring/collectd-extensions/src/ntpq.py +++ b/monitoring/collectd-extensions/src/ntpq.py @@ -118,15 +118,15 @@ def _add_unreachable_server(ip=None): if ip: if ip not in obj.unreachable_servers: collectd.debug("%s adding '%s' to unreachable servers list: %s" % - (PLUGIN, ip, obj.unreachable_servers)) + (PLUGIN, ip, obj.unreachable_servers)) obj.unreachable_servers.append(ip) collectd.info("%s added '%s' to unreachable servers list: %s" % - (PLUGIN, ip, obj.unreachable_servers)) + (PLUGIN, ip, obj.unreachable_servers)) else: collectd.debug("%s ip '%s' already in unreachable_servers list" % - (PLUGIN, ip)) + (PLUGIN, ip)) else: collectd.error("%s _add_unreachable_server called with no IP" % PLUGIN) @@ -323,7 +323,7 @@ def _cleanup_stale_servers(): """ Cleanup the server IP tracking lists """ collectd.debug("%s CLEANUP REACHABLE: %s %s" % - (PLUGIN, obj.server_list_ntpq, obj.reachable_servers)) + (PLUGIN, obj.server_list_ntpq, obj.reachable_servers)) for ip in obj.reachable_servers: if ip not in obj.server_list_ntpq: collectd.info("%s removing missing '%s' server from reachable " @@ -506,7 +506,7 @@ def init_func(): obj.base_eid = 'host=' + obj.hostname + '.ntp' collectd.debug("%s on %s with entity id '%s'" % - (PLUGIN, obj.hostname, obj.base_eid)) + (PLUGIN, obj.hostname, obj.base_eid)) # get a list of provisioned ntp servers _get_ntp_servers() @@ -686,7 +686,7 @@ def read_func(): # update the selected server list obj.selected_server = ip collectd.debug("%s selected server is '%s'" % - (PLUGIN, obj.selected_server)) + (PLUGIN, obj.selected_server)) else: collectd.debug("%s local controller '%s' marked " "as selected server ; ignoring" %