diff --git a/monitoring/collectd-extensions/centos/build_srpm.data b/monitoring/collectd-extensions/centos/build_srpm.data index e7f74e208..8514ebc35 100644 --- a/monitoring/collectd-extensions/centos/build_srpm.data +++ b/monitoring/collectd-extensions/centos/build_srpm.data @@ -16,7 +16,9 @@ COPY_LIST="$PKG_BASE/src/LICENSE \ $PKG_BASE/src/ntpq.conf \ $PKG_BASE/src/interface.py \ $PKG_BASE/src/interface.conf \ + $PKG_BASE/src/remotels.py \ + $PKG_BASE/src/remotels.conf \ $PKG_BASE/src/example.py \ $PKG_BASE/src/example.conf" -TIS_PATCH_VER=7 +TIS_PATCH_VER=8 diff --git a/monitoring/collectd-extensions/centos/collectd-extensions.spec b/monitoring/collectd-extensions/centos/collectd-extensions.spec index 0665fb650..f8aa0936f 100644 --- a/monitoring/collectd-extensions/centos/collectd-extensions.spec +++ b/monitoring/collectd-extensions/centos/collectd-extensions.spec @@ -23,6 +23,7 @@ Source12: memory.py Source14: example.py Source15: ntpq.py Source16: interface.py +Source17: remotels.py # collectd plugin conf files into /etc/collectd.d Source100: python_plugins.conf @@ -32,6 +33,7 @@ Source103: df.conf Source104: example.conf Source105: ntpq.conf Source106: interface.conf +Source107: remotels.conf BuildRequires: systemd-devel @@ -75,6 +77,7 @@ install -m 700 %{SOURCE12} %{buildroot}%{local_python_extensions_dir} install -m 700 %{SOURCE14} %{buildroot}%{local_python_extensions_dir} install -m 700 %{SOURCE15} %{buildroot}%{local_python_extensions_dir} install -m 700 %{SOURCE16} %{buildroot}%{local_python_extensions_dir} +install -m 700 %{SOURCE17} %{buildroot}%{local_python_extensions_dir} # collectd plugin conf files into /etc/collectd.d @@ -85,6 +88,7 @@ install -m 600 %{SOURCE103} %{buildroot}%{local_plugin_dir} install -m 600 %{SOURCE104} %{buildroot}%{local_plugin_dir} install -m 600 %{SOURCE105} %{buildroot}%{local_plugin_dir} install -m 600 %{SOURCE106} %{buildroot}%{local_plugin_dir} +install -m 600 %{SOURCE107} %{buildroot}%{local_plugin_dir} %clean rm -rf $RPM_BUILD_ROOT diff --git a/monitoring/collectd-extensions/src/plugin_common.py b/monitoring/collectd-extensions/src/plugin_common.py index d6ba89894..6390024e0 100644 --- a/monitoring/collectd-extensions/src/plugin_common.py +++ b/monitoring/collectd-extensions/src/plugin_common.py @@ -33,6 +33,7 @@ class PluginObject(object): self.plugin = plugin # the name of this plugin self.hostname = '' # the name of this host self.port = 0 # the port number for this plugin + self.base_eid = '' # the base entity id host= # dynamic gate variables self.config_complete = False # set to True once config is complete @@ -42,6 +43,8 @@ class PluginObject(object): # dynamic variables set in read_func self.usage = float(0) # last usage value recorded as float self.audits = 0 # number of audit since init + self.enabled = False # tracks a plugin's enabled state + self.alarmed = False # tracks the current alarmed state # http and json specific variables self.url = url # target url diff --git a/monitoring/collectd-extensions/src/python_plugins.conf b/monitoring/collectd-extensions/src/python_plugins.conf index 85ba02377..e33de86a4 100644 --- a/monitoring/collectd-extensions/src/python_plugins.conf +++ b/monitoring/collectd-extensions/src/python_plugins.conf @@ -14,6 +14,7 @@ LoadPlugin python Port 2122 + Import "remotels" LogTraces = true Encoding "utf-8" diff --git a/monitoring/collectd-extensions/src/remotels.conf b/monitoring/collectd-extensions/src/remotels.conf new file mode 100644 index 000000000..f9e588992 --- /dev/null +++ b/monitoring/collectd-extensions/src/remotels.conf @@ -0,0 +1,13 @@ + + + + Instance "reachable" + Persist true + PersistOK true + WarningMin 1 + FailureMin 0 + Hits 2 + Invert false + + + diff --git a/monitoring/collectd-extensions/src/remotels.py b/monitoring/collectd-extensions/src/remotels.py new file mode 100755 index 000000000..9a766d9df --- /dev/null +++ b/monitoring/collectd-extensions/src/remotels.py @@ -0,0 +1,345 @@ +# +# Copyright (c) 2019 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +############################################################################ +# +# This is the Remote Logging Server plugin for collectd. +# +# The Remote Logging Server is enabled if /etc/syslog-ng/syslog-ng.conf +# contains '@include remotelogging.conf' +# +# There is no asynchronous notification of remote logging server +# configuration enable/disable state changes. Therefore, each audit +# interval needs to check whether its enabled or not. +# +# every audit interval ... +# +# read_func: +# check enabled: +# if disabled and alarmed: +# clear alarm +# if enabled: +# get ip and port +# query status +# if connected and alarmed: +# clear alarm +# if not connected and not alarmed: +# raise alarm +# +# system remotelogging-modify --ip_address +# --transport tcp +# --enabled True +# +############################################################################ + +import os +import collectd +import tsconfig.tsconfig as tsc +import plugin_common as pc +from fm_api import constants as fm_constants +from oslo_concurrency import processutils +from fm_api import fm_api + +# Fault manager API Object +api = fm_api.FaultAPIs() + +# name of the plugin +PLUGIN_NAME = 'remotels' + +# all logs produced by this plugin are prefixed with this +PLUGIN = 'remote logging server' + +# Interface Monitoring Interval in seconds +PLUGIN_AUDIT_INTERVAL = 60 + +# Sample Data 'type' and 'instance' database field values. +PLUGIN_TYPE = 'absolute' +PLUGIN_TYPE_INSTANCE = 'reachable' + +# Remote Logging Connectivity Alarm ID +PLUGIN_ALARMID = '100.118' + +# The file where this plugin learns if remote logging is enabled +SYSLOG_CONF_FILE = '/etc/syslog-ng/syslog-ng.conf' + +# Plugin Control Object +obj = pc.PluginObject(PLUGIN, "") + + +# Raise Remote Logging Server Alarm +def raise_alarm(): + """ Raise Remote Logging Server Alarm. """ + + repair = 'Ensure Remote Log Server IP is reachable from ' + repair += 'Controller through OAM interface; otherwise ' + repair += 'contact next level of support.' + + reason = 'Controller cannot establish connection with ' + reason += 'remote logging server.' + + try: + fault = fm_api.Fault( + alarm_id=PLUGIN_ALARMID, + alarm_state=fm_constants.FM_ALARM_STATE_SET, + entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, + entity_instance_id=obj.base_eid, + severity=fm_constants.FM_ALARM_SEVERITY_MINOR, + reason_text=reason, + alarm_type=fm_constants.FM_ALARM_TYPE_1, + probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_6, + proposed_repair_action=repair, + service_affecting=False, + suppression=False) + + alarm_uuid = api.set_fault(fault) + if pc.is_uuid_like(alarm_uuid) is False: + collectd.error("%s %s:%s set_fault failed:%s" % + (PLUGIN, PLUGIN_ALARMID, + obj.base_eid, alarm_uuid)) + else: + collectd.info("%s %s:%s alarm raised" % + (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) + obj.alarmed = True + + except: + collectd.error("%s %s:%s set_fault exception" % + (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) + + +# Clear remote logging server alarm +def clear_alarm(): + """ Clear remote logging server alarm """ + + try: + if api.clear_fault(PLUGIN_ALARMID, obj.base_eid) is True: + collectd.info("%s alarm cleared" % PLUGIN) + obj.alarmed = False + return True + + except: + collectd.error("%s %s:%s clear failed ; will retry" % + (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) + return False + + +# The config function - called once on collectd process startup +def config_func(config): + """ Configure the plugin """ + + # all configuration is learned during normal monitoring + obj.config_done = True + return 0 + + +# The init function - called once on collectd process startup +def init_func(): + """ Init the plugin """ + + # remote logging server monitoring is for controllers only + if tsc.nodetype != 'controller': + return 0 + + if obj.init_done is False: + if obj.init_ready() is False: + return False + + obj.hostname = obj.gethostname() + obj.base_eid = 'host=' + obj.hostname + obj.init_done = True + collectd.info("%s initialization complete" % PLUGIN) + + return True + + +# The sample read function - called on every audit interval +def read_func(): + """ Remote logging server connectivity plugin read function """ + + # remote logging server monitoring is for controllers only + if tsc.nodetype != 'controller': + return 0 + + if obj.init_done is False: + init_func() + return 0 + + # get current state + current_enabled_state = obj.enabled + + # check to see if remote logging is enabled + obj.enabled = False # assume disabled + if os.path.exists(SYSLOG_CONF_FILE) is True: + with open(SYSLOG_CONF_FILE, 'r') as infile: + for line in infile: + if line.startswith('@include '): + service = line.rstrip().split(' ')[1] + if service == '"remotelogging.conf"': + obj.enabled = True + break + + if current_enabled_state == obj.enabled: + logit = False + else: + if obj.enabled is False: + collectd.info("%s is disabled" % PLUGIN) + else: + collectd.info("%s is enabled" % PLUGIN) + logit = True + + # Handle startup case by clearing existing alarm if its raised. + # Its runtime cheaper and simpler to issue a blind clear than query. + if obj.audits == 0: + if clear_alarm() is False: + # if clear fails then retry next time + return 0 + if obj.enabled is False: + collectd.info("%s is disabled" % PLUGIN) + obj.audits = 1 + + if obj.enabled is False: + if obj.alarmed is True: + clear_alarm() + return 0 + + # If we get here then the server is enabled ... + # Need to query it + + # Get the ip and port from line that looks like this + # + # tag proto address port + # ----------------------------- --- -------------- --- + # destination remote_log_server {tcp("128.224.186.65" port(514));}; + # + address = protocol = port = '' + with open(SYSLOG_CONF_FILE, 'r') as infile: + for line in infile: + if line.startswith('destination remote_log_server'): + try: + if len(line.split('{')) > 1: + protocol = line.split('{')[1][0:3] + address = line.split('{')[1].split('"')[1] + port = line.split('{')[1].split('(')[2].split(')')[0] + if not protocol or not address or not port: + collectd.error("%s remote log server credentials " + "parse error ; (%s:%s:%s)" % + (PLUGIN, protocol, address, port)) + return 1 + else: + # line parsed ; move on ... + break + else: + collectd.error("%s remote log server line parse error" + " ; %s" % (PLUGIN, line)) + except Exception as ex: + collectd.error("%s remote log server credentials " + "parse exception ; (%s)" % (PLUGIN, line)) + + if ':' in address: + ipv = 6 + protocol += 6 + + # Monitoring of IPV6 is not currently supported + return 0 + + else: + ipv = 4 + + # This plugin detects server connectivity through its socket status. + # To get that construct the remote logging server IP string. + # The files being looked at(/proc/net/tcp(udp)) use hex values, + # so convert the string caps hex value with reverse ordering of + # the "ipv4" values + index = 3 + addr = [0, 0, 0, 0] + + # swap order + for tup in address.split('.'): + addr[index] = int(tup) + index -= 1 + + # build the CAPs HEX address + UPPER_HEX_IP = '' + for tup in addr: + val = hex(int(tup)).split('x')[-1].upper() + if len(val) == 1: + UPPER_HEX_IP += '0' + UPPER_HEX_IP += val + UPPER_HEX_IP += ':' + tmp = hex(int(port)).split('x')[-1].upper() + for i in range(4-len(tmp)): + UPPER_HEX_IP += '0' + UPPER_HEX_IP += tmp + + # log example tcp:ipv4:128.224.186.65:514 : IP:41BAE080:0202 + collectd.debug("%s %s:ipv%d:%s:%s : IP:%s" % + (PLUGIN, protocol, ipv, address, port, UPPER_HEX_IP)) + + cmd = "cat /proc/net/" + protocol + cmd += " | awk '{print $3 \" \" $4}' | grep " + UPPER_HEX_IP + cmd += " | awk '{print $2}'" + res, err = processutils.execute(cmd, shell=True) + if err: + collectd.error("%s processutils error:%s" % (PLUGIN, err)) + + # cmd example: + # cat /proc/net/tcp | awk '{print $3 " " $4}' + # | grep 41BAE080:0202 + # | awk '{print $2}' + collectd.debug("%s Cmd:%s" % (PLUGIN, cmd)) + return 0 + + if res and res.rstrip() == '01': + # connected state reads 01 + # Example log: Res:[01] + + # clear alarm if + # - currently alarmed and + # - debounced by 1 ; need 2 connected readings in a row + if obj.alarmed is True: + clear_alarm() + + # Only log on state change + if obj.usage != 1: + logit = True + + obj.usage = 1 + conn = '' + + else: + # res typically reads 02 when notr connected + # Example log: Res:[02] + collectd.debug("%s Res:[%s] " % (PLUGIN, res.rstrip())) + + # raise alarm if + # - not already alarmed + # - debounced by 1 ; need 2 failures in a row + if obj.alarmed is False and obj.usage == 0: + raise_alarm() + + # only log on state change + if obj.usage == 1 or obj.audits == 1: + logit = True + + obj.usage = 0 + conn = 'not ' + + if logit is True: + collectd.info("%s is %sconnected [%s ipv%d %s:%s]" % + (PLUGIN, conn, protocol, ipv, address, port)) + obj.audits += 1 + + # Dispatch usage value to collectd + val = collectd.Values(host=obj.hostname) + val.plugin = PLUGIN_NAME + val.type = PLUGIN_TYPE + val.type_instance = PLUGIN_TYPE_INSTANCE + val.dispatch(values=[obj.usage]) + return 0 + + +# register the config, init and read functions +collectd.register_config(config_func) +collectd.register_init(init_func) +collectd.register_read(read_func, interval=PLUGIN_AUDIT_INTERVAL)