196 lines
5.4 KiB
Python
Executable File
196 lines
5.4 KiB
Python
Executable File
|
|
# Copyright (c) 2018 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
#
|
|
|
|
import os
|
|
import subprocess
|
|
import uuid
|
|
import collectd
|
|
from fm_api import constants as fm_constants
|
|
from fm_api import fm_api
|
|
import tsconfig.tsconfig as tsc
|
|
|
|
api = fm_api.FaultAPIs()
|
|
|
|
PLUGIN = 'NTP query plugin'
|
|
|
|
PLUGIN_SCRIPT = '/etc/rmonfiles.d/query_ntp_servers.sh'
|
|
PLUGIN_RESULT = '/tmp/ntpq_server_info'
|
|
|
|
# static variables
|
|
ALARM_ID__NTPQ = "100.114"
|
|
|
|
|
|
# define a class here that will persist over read calls
|
|
class NtpqObject:
|
|
hostname = ''
|
|
base_eid = ''
|
|
severity = 'clear'
|
|
suppression = True
|
|
service_affecting = False
|
|
status = 0
|
|
last_result = ''
|
|
this_result = ''
|
|
id = ALARM_ID__NTPQ
|
|
name = "NTP"
|
|
alarm_type = fm_constants.FM_ALARM_TYPE_1
|
|
cause = fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN
|
|
repair = "Monitor and if condition persists, "
|
|
repair += "contact next level of support."
|
|
|
|
|
|
obj = NtpqObject()
|
|
|
|
|
|
def is_uuid_like(val):
|
|
"""Returns validation of a value as a UUID."""
|
|
try:
|
|
return str(uuid.UUID(val)) == val
|
|
except (TypeError, ValueError, AttributeError):
|
|
return False
|
|
|
|
|
|
# The config function - called once on collectd process startup
|
|
def config_func(config):
|
|
"""
|
|
Configure the plugin
|
|
"""
|
|
|
|
collectd.debug('%s config function' % PLUGIN)
|
|
return 0
|
|
|
|
|
|
# The init function - called once on collectd process startup
|
|
def init_func():
|
|
|
|
# ntp query is for controllers only
|
|
if tsc.nodetype != 'controller':
|
|
return 0
|
|
|
|
# get current hostname
|
|
obj.hostname = os.uname()[1]
|
|
obj.base_eid = 'host=' + obj.hostname + '.ntp'
|
|
collectd.info("%s on %s with entity id '%s'" % PLUGIN, obj.hostname, obj.base_eid)
|
|
return 0
|
|
|
|
|
|
# The sample read function - called on every audit interval
|
|
def read_func():
|
|
|
|
# ntp query is for controllers only
|
|
if tsc.nodetype != 'controller':
|
|
return 0
|
|
|
|
result = int(0)
|
|
# Query ntp
|
|
try:
|
|
result = os.system(PLUGIN_SCRIPT)
|
|
except Exception as e:
|
|
collectd.error("%s Could not run '%s' (%s)" %
|
|
(PLUGIN, e))
|
|
return 0
|
|
|
|
obj.status = int(result)/0x100
|
|
|
|
collectd.info("%s Query Result: %s" % (PLUGIN, obj.status))
|
|
|
|
if os.path.exists(PLUGIN_RESULT) is False:
|
|
collectd.error("%s produced no result file '%s'" %
|
|
(PLUGIN, PLUGIN_RESULT))
|
|
return 0
|
|
|
|
# read the query result file.
|
|
# format is in the PLUGIN_SCRIPT file.
|
|
# This code only wants the second line.
|
|
# It contains list of unreachable ntp servers that need alarm management.
|
|
count = 0
|
|
with open(PLUGIN_RESULT, 'r') as infile:
|
|
for line in infile:
|
|
count += 1
|
|
collectd.info("%s Query Result: %s" % (PLUGIN, line))
|
|
if count == 0:
|
|
collectd.error("%s produced empty result file '%s'" %
|
|
(PLUGIN, PLUGIN_RESULT))
|
|
return 0
|
|
|
|
sample = 1
|
|
|
|
# Dispatch usage value to collectd
|
|
val = collectd.Values(host=obj.hostname)
|
|
val.plugin = 'ntpq'
|
|
val.plugin_instance = 'some.ntp.server.ip'
|
|
val.type = 'absolute'
|
|
val.type_instance = 'state'
|
|
val.dispatch(values=[sample])
|
|
|
|
severity = 'clear'
|
|
obj.severity = 'clear'
|
|
|
|
# if there is no severity change then consider exiting
|
|
if obj.severity == severity:
|
|
|
|
# unless the current severity is 'minor'
|
|
if severity == 'minor':
|
|
# TODO: check to see if the failing IP address is changed
|
|
collectd.info("%s NEED TO CHECK IP ADDRESSES" % (PLUGIN))
|
|
else:
|
|
return 0
|
|
|
|
# if current severity is clear but previous severity is not then
|
|
# prepare to clear the alarms
|
|
if severity == 'clear':
|
|
_alarm_state = fm_constants.FM_ALARM_STATE_CLEAR
|
|
|
|
# TODO: loop over all raises alarms and clear them
|
|
collectd.info("%s NEED CLEAR ALL ALARMS" % (PLUGIN))
|
|
if api.clear_fault(obj.id, obj.base_eid) is False:
|
|
collectd.error("%s %s:%s clear_fault failed" %
|
|
(PLUGIN, obj.id, obj.base_eid))
|
|
return 0
|
|
|
|
elif severity == 'major':
|
|
reason = "NTP configuration does not contain any valid "
|
|
reason += "or reachable NTP servers."
|
|
eid = obj.base_eid
|
|
fm_severity = fm_constants.FM_ALARM_SEVERITY_MAJOR
|
|
else:
|
|
# TODO: There can be up to 3 inacessable servers
|
|
ip = 'some.server.ip.addr'
|
|
reason = "NTP address "
|
|
reason += ip
|
|
reason += " is not a valid or a reachable NTP server."
|
|
eid = obj.base_eid + '=' + ip
|
|
fm_severity = fm_constants.FM_ALARM_SEVERITY_MINOR
|
|
|
|
fault = fm_api.Fault(
|
|
alarm_id=obj.id,
|
|
alarm_state=fm_constants.FM_ALARM_STATE_SET,
|
|
entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
|
|
entity_instance_id=eid,
|
|
severity=fm_severity,
|
|
reason_text=reason,
|
|
alarm_type=obj.alarm_type,
|
|
probable_cause=obj.cause,
|
|
proposed_repair_action=obj.repair,
|
|
service_affecting=obj.service_affecting,
|
|
suppression=obj.suppression)
|
|
|
|
alarm_uuid = api.set_fault(fault)
|
|
if is_uuid_like(alarm_uuid) is False:
|
|
collectd.error("%s %s:%s set_fault failed:%s" %
|
|
(PLUGIN, obj.id, eid, alarm_uuid))
|
|
return 0
|
|
|
|
# TODO: clear the object alarm state
|
|
|
|
return 0
|
|
|
|
|
|
# register the config, init and read functions
|
|
collectd.register_config(config_func)
|
|
collectd.register_init(init_func)
|
|
collectd.register_read(read_func)
|