1073 lines
41 KiB
C++
1073 lines
41 KiB
C++
/*
|
|
* Copyright (c) 2015-2017 Wind River Systems, Inc.
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
* Wind River Titanium Cloud 'Maintenance Agent' Alarm Module
|
|
*/
|
|
|
|
#include <sys/types.h>
|
|
#include <iostream>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
using namespace std;
|
|
|
|
#ifdef __AREA__
|
|
#undef __AREA__
|
|
#endif
|
|
#define __AREA__ "alm"
|
|
|
|
#include "daemon_common.h" /* */
|
|
|
|
#include "nodeBase.h" /* */
|
|
#include "nodeClass.h" /* */
|
|
#include "nodeTimers.h" /* */
|
|
#include "nodeUtil.h" /* */
|
|
#include "mtcAlarm.h" /* for ... this module header */
|
|
#include "hbsAlarm.h" /* for ... hbsAlarm stubs */
|
|
|
|
alarmUtil_type alarm_list[MTC_ALARM_ID__LAST] ;
|
|
|
|
void mtcAlarm_init ( void )
|
|
{
|
|
alarmUtil_type * ptr ;
|
|
|
|
/** Lock Alarm ************************************************************/
|
|
|
|
ptr = &alarm_list[MTC_ALARM_ID__LOCK];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", LOCK_ALARM_ID);
|
|
|
|
ptr->name = "Lock" ;
|
|
ptr->instc_prefix = "" ;
|
|
|
|
ptr->critl_reason =
|
|
ptr->major_reason =
|
|
ptr->minor_reason = "was administratively locked to take it out-of-service.";
|
|
ptr->clear_reason = "was administratively unlocked and is back in-service.";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL;
|
|
ptr->alarm.probable_cause = FM_ALARM_OUT_OF_SERVICE ;
|
|
ptr->alarm.inhibit_alarms = FM_TRUE ;
|
|
ptr->alarm.service_affecting = FM_TRUE ;
|
|
ptr->alarm.suppression = FM_FALSE;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */
|
|
|
|
snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
"Administratively unlock Host to bring it back in-service.");
|
|
|
|
/** Enable Alarm ************************************************************/
|
|
|
|
ptr = &alarm_list[MTC_ALARM_ID__ENABLE];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", ENABLE_ALARM_ID);
|
|
|
|
ptr->name = "In-Service" ;
|
|
ptr->instc_prefix = "" ;
|
|
|
|
/* this is for a log */
|
|
ptr->minor_reason = "has experienced a minor In-Service test event. "
|
|
"No action is required. " ;
|
|
|
|
/* this is for an alarm and degrade */
|
|
ptr->major_reason = "Host Services failed to start.";
|
|
|
|
ptr->critl_reason = "experienced a service-affecting failure. "
|
|
"Auto-recovery in progress. "
|
|
"Manual Lock and Unlock may be required if auto-recovery is unsuccessful.";
|
|
|
|
ptr->clear_reason = "was auto recovered through Reboot and is now in-service if 'unlocked-enabled' "
|
|
"or is otherwise 'locked-disabled' by administrative 'lock' action.";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL;
|
|
ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
|
ptr->alarm.service_affecting = FM_TRUE ;
|
|
ptr->alarm.suppression = FM_TRUE ;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */
|
|
|
|
snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
"If auto-recovery is consistently unable to recover host to the unlocked-enabled "
|
|
"state contact next level of support or lock and replace failing Host.");
|
|
|
|
|
|
/** Configuration Alarm ************************************************************/
|
|
|
|
ptr = &alarm_list[MTC_ALARM_ID__CONFIG];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CONFIG_ALARM_ID);
|
|
|
|
ptr->name = "Configuration" ;
|
|
ptr->instc_prefix = "" ;
|
|
|
|
ptr->critl_reason =
|
|
ptr->major_reason =
|
|
ptr->minor_reason = "experienced a configuration failure. ";
|
|
ptr->clear_reason = "has been successfully configured and is now in-service if 'unlocked-enabled' "
|
|
"or is otherwise 'locked-disabled' by administrative 'lock' action.";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL;
|
|
ptr->alarm.probable_cause = FM_ALARM_CONFIG_ERROR ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE;
|
|
ptr->alarm.service_affecting = FM_TRUE ;
|
|
ptr->alarm.suppression = FM_TRUE ;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */
|
|
|
|
snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
"If manual or auto-recovery is consistently unable to recover host to the unlocked-enabled "
|
|
"state contact next level of support or lock and replace failing Host.");
|
|
|
|
/** Init Board Management Controller Access Alarm Entry ******************/
|
|
|
|
ptr = &alarm_list[MTC_ALARM_ID__BM];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", BM_ALARM_ID);
|
|
|
|
ptr->name = "Board Management Controller Access" ;
|
|
ptr->instc_prefix = "" ;
|
|
|
|
ptr->critl_reason = "board management controller is unresponsive." ;
|
|
ptr->major_reason = "board management controller is unresponsive." ;
|
|
ptr->minor_reason = "access to board management module has failed." ;
|
|
ptr->clear_reason = "access to board management module is established" ;
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL ;
|
|
ptr->alarm.probable_cause = FM_ALARM_COMM_SUBSYS_FAILURE ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE;
|
|
ptr->alarm.service_affecting = FM_FALSE;
|
|
ptr->alarm.suppression = FM_FALSE;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */
|
|
|
|
snprintf( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
"Check Host's board management config and connectivity.");
|
|
|
|
/** Init Controller Failure Alarm Entry **********************************/
|
|
|
|
ptr = &alarm_list[MTC_ALARM_ID__CH_CONT];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CH_CONT_ALARM_ID);
|
|
|
|
ptr->name = "Controller Function" ;
|
|
ptr->instc_prefix = "" ;
|
|
|
|
ptr->critl_reason =
|
|
ptr->major_reason =
|
|
ptr->minor_reason = "controller function has in-service failure while compute services "
|
|
"remain healthy.";
|
|
ptr->clear_reason = "controller function has recovered";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL;
|
|
ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
|
ptr->alarm.service_affecting = FM_TRUE ;
|
|
ptr->alarm.suppression = FM_TRUE ;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */
|
|
|
|
snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
"Lock and then Unlock host to recover. "
|
|
"Avoid using 'Force Lock' action as that will impact compute services "
|
|
"running on this host. If lock action fails then contact next level "
|
|
"of support to investigate and recover.");
|
|
|
|
/** Init Compute Failure Alarm Entry *************************************/
|
|
|
|
ptr = &alarm_list[MTC_ALARM_ID__CH_COMP];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CH_COMP_ALARM_ID);
|
|
|
|
ptr->name = "Compute Function" ;
|
|
ptr->instc_prefix = "" ;
|
|
|
|
ptr->minor_reason =
|
|
ptr->major_reason = "Compute service is not fully operational. Auto recovery in progress." ;
|
|
ptr->critl_reason = "Compute service of the only available controller is not operational. "
|
|
"Auto-recovery disabled. Degrading host instead.";
|
|
ptr->clear_reason = "compute service has recovered";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_OPERATIONAL;
|
|
ptr->alarm.probable_cause = FM_ALARM_APP_SUBSYS_FAILURE ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
|
ptr->alarm.service_affecting = FM_TRUE ;
|
|
ptr->alarm.suppression = FM_TRUE ;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_CLEAR ; /* Dynamic */
|
|
|
|
snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
"If alarm is against the only active controller then Enable second controller "
|
|
"and Switch Activity (Swact) to it as soon as possible. If the alarm "
|
|
"persists then Lock/Unlock host to recover its local compute service.");
|
|
|
|
/** Init Event Log Entry *************************************************/
|
|
|
|
ptr = &alarm_list[MTC_LOG_ID__EVENT];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", EVENT_LOG_ID);
|
|
|
|
ptr->name = "Maintenance Event" ;
|
|
|
|
ptr->minor_reason =
|
|
ptr->major_reason =
|
|
ptr->critl_reason =
|
|
ptr->clear_reason = "";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_TYPE_UNKNOWN ;
|
|
ptr->alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
|
ptr->alarm.service_affecting = FM_FALSE ;
|
|
ptr->alarm.suppression = FM_FALSE ;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; /* Dynamic */
|
|
|
|
snprintf ( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, "%s", "");
|
|
|
|
/** Init Command Log Entry ***********************************************/
|
|
|
|
ptr = &alarm_list[MTC_LOG_ID__COMMAND];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", COMMAND_LOG_ID);
|
|
|
|
ptr->name = "Maintenance Command" ;
|
|
|
|
ptr->minor_reason =
|
|
ptr->major_reason =
|
|
ptr->critl_reason =
|
|
ptr->clear_reason = "";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_TYPE_UNKNOWN ;
|
|
ptr->alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
|
ptr->alarm.service_affecting = FM_FALSE ;
|
|
ptr->alarm.suppression = FM_FALSE ;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; /* Dynamic */
|
|
|
|
snprintf ( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, "%s", "");
|
|
|
|
/** Init Config Log Entry ***********************************************/
|
|
|
|
ptr = &alarm_list[MTC_LOG_ID__CONFIG];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", CONFIG_LOG_ID);
|
|
|
|
ptr->name = "Maintenance Config" ;
|
|
|
|
ptr->minor_reason =
|
|
ptr->major_reason =
|
|
ptr->critl_reason =
|
|
ptr->clear_reason = "";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_TYPE_UNKNOWN ;
|
|
ptr->alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
|
ptr->alarm.service_affecting = FM_FALSE ;
|
|
ptr->alarm.suppression = FM_FALSE ;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; /* Dynamic */
|
|
|
|
snprintf ( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, "%s", "");
|
|
|
|
/** Init State Change Log Entry ******************************************/
|
|
|
|
ptr = &alarm_list[MTC_LOG_ID__STATECHANGE];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", STATECHANGE_LOG_ID);
|
|
|
|
ptr->name = "Maintenance State Change" ;
|
|
|
|
ptr->minor_reason =
|
|
ptr->major_reason =
|
|
ptr->critl_reason =
|
|
ptr->clear_reason = "";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_TYPE_UNKNOWN ;
|
|
ptr->alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
|
ptr->alarm.service_affecting = FM_FALSE ;
|
|
ptr->alarm.suppression = FM_FALSE ;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; /* Dynamic */
|
|
|
|
snprintf ( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, "%s", "");
|
|
|
|
/** Init Service Status Log Entry ****************************************/
|
|
|
|
ptr = &alarm_list[MTC_LOG_ID__SERVICESTATUS];
|
|
memset (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
|
|
snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", SERVICESTATUS_LOG_ID);
|
|
|
|
ptr->name = "Maintenance Service Status Change" ;
|
|
|
|
ptr->minor_reason =
|
|
ptr->major_reason =
|
|
ptr->critl_reason =
|
|
ptr->clear_reason = "";
|
|
|
|
ptr->alarm.alarm_type = FM_ALARM_TYPE_UNKNOWN ;
|
|
ptr->alarm.probable_cause = FM_ALARM_CAUSE_UNKNOWN ;
|
|
ptr->alarm.inhibit_alarms = FM_FALSE ;
|
|
ptr->alarm.service_affecting = FM_FALSE ;
|
|
ptr->alarm.suppression = FM_FALSE ;
|
|
|
|
ptr->alarm.severity = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
|
|
ptr->alarm.alarm_state = FM_ALARM_STATE_MSG ; /* Dynamic */
|
|
|
|
snprintf ( ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, "%s", "");
|
|
|
|
}
|
|
|
|
string _getIdentity ( mtc_alarm_id_enum id )
|
|
{
|
|
switch ( id )
|
|
{
|
|
case MTC_ALARM_ID__LOCK: return (LOCK_ALARM_ID);
|
|
case MTC_ALARM_ID__CONFIG: return (CONFIG_ALARM_ID);
|
|
case MTC_ALARM_ID__ENABLE: return (ENABLE_ALARM_ID);
|
|
case MTC_ALARM_ID__BM: return (BM_ALARM_ID);
|
|
case MTC_ALARM_ID__CH_CONT: return (CH_CONT_ALARM_ID);
|
|
case MTC_ALARM_ID__CH_COMP: return (CH_COMP_ALARM_ID);
|
|
case MTC_LOG_ID__EVENT: return (EVENT_LOG_ID);
|
|
case MTC_LOG_ID__COMMAND: return (COMMAND_LOG_ID);
|
|
case MTC_LOG_ID__STATECHANGE: return (STATECHANGE_LOG_ID);
|
|
case MTC_LOG_ID__CONFIG: return (CONFIG_LOG_ID);
|
|
default: return ("200.000");
|
|
}
|
|
}
|
|
|
|
string mtcAlarm_getId_str ( mtc_alarm_id_enum id )
|
|
{
|
|
return(_getIdentity(id));
|
|
}
|
|
|
|
string _getInstance ( mtc_alarm_id_enum id )
|
|
{
|
|
id = id ;
|
|
return ("");
|
|
}
|
|
|
|
EFmAlarmSeverityT mtcAlarm_state ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
string identity = _getIdentity(id) ;
|
|
string instance = _getInstance(id) ;
|
|
return ( alarmUtil_query ( hostname, identity, instance));
|
|
}
|
|
|
|
void mtcAlarm_clear_all ( string hostname )
|
|
{
|
|
for ( int i = 0 ; i < MTC_ALARM_ID__LAST ; ++i )
|
|
{
|
|
mtcAlarm_clear ( hostname, (mtc_alarm_id_enum)i );
|
|
}
|
|
}
|
|
|
|
/****************************************************************************
|
|
*
|
|
* Name : mtcAlarm_audit
|
|
*
|
|
* Purpose : Monitor and Auto-Correct maintenance alarms
|
|
*
|
|
* Description: Query locked state alarm (raw)
|
|
* if successful
|
|
* - Query alarms
|
|
* - compare to running state
|
|
* - correct mismatches ; internal state takes precidence
|
|
* - log all alarm state changes
|
|
*
|
|
****************************************************************************/
|
|
|
|
void nodeLinkClass::mtcAlarm_audit ( struct nodeLinkClass::node * node_ptr )
|
|
{
|
|
/*
|
|
* Read locked state alarm directly to detect fm access failures.
|
|
* If successful further reads are done using a wrapper utility.
|
|
*/
|
|
SFmAlarmDataT alarm_query ;
|
|
AlarmFilter alarm_filter ;
|
|
EFmErrorT rc ;
|
|
|
|
memset(&alarm_query, 0, sizeof(alarm_query));
|
|
memset(&alarm_filter, 0, sizeof(alarm_filter));
|
|
snprintf ( &alarm_filter.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s",
|
|
LOCK_ALARM_ID);
|
|
snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s",
|
|
ENTITY_PREFIX, node_ptr->hostname.data());
|
|
rc = fm_get_fault ( &alarm_filter, &alarm_query );
|
|
if (( rc != FM_ERR_OK ) && ( rc != FM_ERR_ENTITY_NOT_FOUND ))
|
|
{
|
|
wlog("%s alarm query failure ; code:%d",
|
|
node_ptr->hostname.c_str(),
|
|
rc );
|
|
return ;
|
|
}
|
|
|
|
/* With FM comms proven working lets check the other mtc alarms */
|
|
string active_alarms = "";
|
|
for ( int i = 0 ; i < MAX_ALARMS ; i++ )
|
|
{
|
|
mtc_alarm_id_enum id = (mtc_alarm_id_enum)i ;
|
|
if ( id == MTC_ALARM_ID__LOCK )
|
|
{
|
|
/* Unexpected severity case */
|
|
if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED )
|
|
{
|
|
if ( alarm_query.severity != FM_ALARM_SEVERITY_WARNING )
|
|
{
|
|
node_ptr->alarms[id] = FM_ALARM_SEVERITY_WARNING ;
|
|
|
|
wlog("%s %s alarm mismatch ; %s -> %s",
|
|
node_ptr->hostname.c_str(),
|
|
_getIdentity(id).c_str(),
|
|
alarmUtil_getSev_str(alarm_query.severity).c_str(),
|
|
alarmUtil_getSev_str(node_ptr->alarms[id]).c_str());
|
|
|
|
mtcAlarm_warning ( node_ptr->hostname, MTC_ALARM_ID__LOCK );
|
|
|
|
}
|
|
if (!active_alarms.empty())
|
|
active_alarms.append(", ");
|
|
active_alarms.append(_getIdentity(id) + ":");
|
|
active_alarms.append(alarmUtil_getSev_str(node_ptr->alarms[id]));
|
|
}
|
|
/* Unexpected assertion case */
|
|
else if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
|
|
( alarm_query.severity != FM_ALARM_SEVERITY_CLEAR ))
|
|
{
|
|
node_ptr->alarms[id] = FM_ALARM_SEVERITY_CLEAR ;
|
|
|
|
wlog("%s %s alarm mismatch ; %s -> %s",
|
|
node_ptr->hostname.c_str(),
|
|
_getIdentity(id).c_str(),
|
|
alarmUtil_getSev_str(alarm_query.severity).c_str(),
|
|
alarmUtil_getSev_str(node_ptr->alarms[id]).c_str());
|
|
|
|
mtcAlarm_clear ( node_ptr->hostname, id );
|
|
}
|
|
}
|
|
else if (( id == MTC_ALARM_ID__CONFIG ) ||
|
|
( id == MTC_ALARM_ID__ENABLE ) ||
|
|
( id == MTC_ALARM_ID__BM ) ||
|
|
( id == MTC_ALARM_ID__CH_CONT) ||
|
|
( id == MTC_ALARM_ID__CH_COMP))
|
|
{
|
|
EFmAlarmSeverityT severity = mtcAlarm_state ( node_ptr->hostname, id);
|
|
if ( severity != node_ptr->alarms[id] )
|
|
{
|
|
ilog ("%s %s alarm mismatch ; %s -> %s",
|
|
node_ptr->hostname.c_str(),
|
|
_getIdentity(id).c_str(),
|
|
alarmUtil_getSev_str(severity).c_str(),
|
|
alarmUtil_getSev_str(node_ptr->alarms[id]).c_str());
|
|
|
|
if ( node_ptr->alarms[id] == FM_ALARM_SEVERITY_CLEAR )
|
|
{
|
|
mtcAlarm_clear ( node_ptr->hostname, id );
|
|
}
|
|
else
|
|
{
|
|
mtcAlarm_raise ( node_ptr->hostname, id, node_ptr->alarms[id] );
|
|
}
|
|
}
|
|
if ( node_ptr->alarms[id] != FM_ALARM_SEVERITY_CLEAR )
|
|
{
|
|
if (!active_alarms.empty())
|
|
active_alarms.append(", ");
|
|
active_alarms.append(_getIdentity(id) + ":");
|
|
active_alarms.append(alarmUtil_getSev_str(node_ptr->alarms[id]));
|
|
}
|
|
}
|
|
/* else don't care about other alarm ids ; logs events etc */
|
|
}
|
|
|
|
/* manage logging of active alarms */
|
|
if ( !active_alarms.empty() )
|
|
{
|
|
if ( node_ptr->active_alarms != active_alarms )
|
|
{
|
|
ilog ("%s active alarms: %s",
|
|
node_ptr->hostname.c_str(),
|
|
active_alarms.c_str());
|
|
|
|
node_ptr->active_alarms = active_alarms ;
|
|
}
|
|
/* else
|
|
* do nothing because there are active alarms
|
|
* that have not changed since the last audit.
|
|
*/
|
|
}
|
|
else if ( ! node_ptr->active_alarms.empty() )
|
|
{
|
|
/* clear active alarm list since there 'were' active alarms
|
|
* but there are no longer active alarms */
|
|
node_ptr->active_alarms.clear();
|
|
ilog ("%s no active alarms", node_ptr->hostname.c_str());
|
|
}
|
|
/* else
|
|
* no active alarms ; don't log */
|
|
}
|
|
|
|
/************************* A L A R M I N G **************************/
|
|
|
|
/* Raise the specified maintenance alarm severity */
|
|
int mtcAlarm_raise ( string hostname, mtc_alarm_id_enum id, EFmAlarmSeverityT severity )
|
|
{
|
|
switch ( severity )
|
|
{
|
|
case FM_ALARM_SEVERITY_MINOR:
|
|
return (mtcAlarm_minor(hostname,id));
|
|
case FM_ALARM_SEVERITY_MAJOR:
|
|
return (mtcAlarm_major(hostname,id));
|
|
case FM_ALARM_SEVERITY_CRITICAL:
|
|
return (mtcAlarm_critical(hostname,id));
|
|
default:
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
}
|
|
|
|
/* Clear the specified hosts's maintenance alarm */
|
|
int mtcAlarm_clear ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
if ( id < MTC_ALARM_ID__LAST )
|
|
{
|
|
string identity = _getIdentity(id);
|
|
string instance = _getInstance(id);
|
|
|
|
ilog ("%s clearing '%s' alarm (%s%s)\n",
|
|
hostname.c_str(),
|
|
alarm_list[id].name.c_str(),
|
|
identity.c_str(),
|
|
instance.c_str());
|
|
|
|
snprintf( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].clear_reason.data());
|
|
|
|
return ( alarmUtil_clear ( hostname, identity, instance, alarm_list[id].alarm ));
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
|
|
/** Assert a specified hosts's mtce alarm with a CRITICAL severity level */
|
|
int mtcAlarm_critical ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
if ( id < MTC_ALARM_ID__LAST )
|
|
{
|
|
string identity = _getIdentity(id);
|
|
string instance = _getInstance(id);
|
|
|
|
elog ("%s setting critical '%s' failure alarm (%s %s)\n",
|
|
hostname.c_str(),
|
|
alarm_list[id].name.c_str(),
|
|
identity.c_str(),
|
|
instance.c_str());
|
|
|
|
snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].critl_reason.data());
|
|
|
|
return ( alarmUtil_critical ( hostname, identity, instance, alarm_list[id].alarm ));
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
|
|
/** Assert a specified host's mtce alarm with a MAJOR severity level */
|
|
int mtcAlarm_major ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
if ( id < MTC_ALARM_ID__LAST )
|
|
{
|
|
string identity = _getIdentity(id);
|
|
string instance = _getInstance(id);
|
|
|
|
wlog ("%s setting major '%s' failure alarm (%s %s)\n",
|
|
hostname.c_str(),
|
|
alarm_list[id].name.c_str(),
|
|
identity.c_str(),
|
|
instance.c_str());
|
|
|
|
if ( id == MTC_ALARM_ID__BM )
|
|
{
|
|
snprintf( alarm_list[id].alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
"board managment controller 'reset' or 'power-cycle' is recommended.");
|
|
}
|
|
|
|
else if ( id == MTC_ALARM_ID__ENABLE )
|
|
{
|
|
snprintf( alarm_list[id].alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
"If alarm persists, host may require lock/unlock to recover. See maintenance logs for more detail.");
|
|
}
|
|
|
|
snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].major_reason.data());
|
|
|
|
return ( alarmUtil_major ( hostname, identity, instance, alarm_list[id].alarm ));
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
|
|
/** Assert a specified host's mtce alarm with a MINOR severity level */
|
|
int mtcAlarm_minor ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
if ( id < MTC_ALARM_ID__LAST )
|
|
{
|
|
string identity = _getIdentity(id);
|
|
string instance = _getInstance(id);
|
|
|
|
wlog ("%s setting minor '%s' failure alarm (%s %s)\n",
|
|
hostname.c_str(),
|
|
alarm_list[id].name.c_str(),
|
|
identity.c_str(),
|
|
instance.c_str());
|
|
|
|
snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].minor_reason.data());
|
|
|
|
return ( alarmUtil_minor ( hostname, identity, instance, alarm_list[id].alarm ));
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
|
|
/** Assert a specified host's mtce alarm with a WARNING severity level */
|
|
int mtcAlarm_warning ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
if ( id < MTC_ALARM_ID__LAST )
|
|
{
|
|
string identity = _getIdentity(id);
|
|
string instance = _getInstance(id);
|
|
|
|
wlog ("%s setting warning '%s' alarm (%s %s)\n",
|
|
hostname.c_str(),
|
|
alarm_list[id].name.c_str(),
|
|
identity.c_str(),
|
|
instance.c_str());
|
|
|
|
if ( id == MTC_ALARM_ID__BM )
|
|
{
|
|
snprintf( alarm_list[id].alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
|
|
"Check Host's board management config and connectivity.");
|
|
}
|
|
|
|
snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].minor_reason.data());
|
|
|
|
return ( alarmUtil_warning ( hostname, identity, instance, alarm_list[id].alarm ));
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
|
|
/*************************** L O G G I N G **********************************/
|
|
|
|
/** Create a CRITICAL maintenance log */
|
|
int mtcAlarm_critical_log ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
if ( id < MTC_ALARM_ID__LAST )
|
|
{
|
|
string identity = _getIdentity(id);
|
|
string instance = _getInstance(id);
|
|
|
|
elog ("%s creating critical '%s' log (%s %s)\n",
|
|
hostname.c_str(),
|
|
alarm_list[id].name.c_str(),
|
|
identity.c_str(),
|
|
instance.c_str());
|
|
|
|
snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].critl_reason.data());
|
|
|
|
return ( alarmUtil_critical_log ( hostname, identity, instance, alarm_list[id].alarm ));
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
|
|
/** Create a MAJOR maintenance log */
|
|
int mtcAlarm_major_log ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
if ( id < MTC_ALARM_ID__LAST )
|
|
{
|
|
string identity = _getIdentity(id);
|
|
string instance = _getInstance(id);
|
|
|
|
wlog ("%s creating major '%s' log (%s %s)\n",
|
|
hostname.c_str(),
|
|
alarm_list[id].name.c_str(),
|
|
identity.c_str(),
|
|
instance.c_str());
|
|
|
|
snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s %s", hostname.data(), alarm_list[id].major_reason.data());
|
|
|
|
return ( alarmUtil_major_log ( hostname, identity, instance, alarm_list[id].alarm ));
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
|
|
/** Create a MINOR maintenance log */
|
|
int mtcAlarm_minor_log ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
if ( id < MTC_ALARM_ID__LAST )
|
|
{
|
|
string identity = _getIdentity(id);
|
|
string instance = _getInstance(id);
|
|
|
|
wlog ("%s creating minor '%s' log (%s %s)\n",
|
|
hostname.c_str(),
|
|
alarm_list[id].name.c_str(),
|
|
identity.c_str(),
|
|
instance.c_str());
|
|
|
|
snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s", alarm_list[id].minor_reason.data());
|
|
|
|
return ( alarmUtil_minor_log ( hostname, identity, instance, alarm_list[id].alarm ));
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
|
|
/** Create a WARNING maintenance log */
|
|
int mtcAlarm_warning_log ( string hostname, mtc_alarm_id_enum id )
|
|
{
|
|
if ( id < MTC_ALARM_ID__LAST )
|
|
{
|
|
string identity = _getIdentity(id);
|
|
string instance = _getInstance(id);
|
|
|
|
wlog ("%s creating warning '%s' log (%s %s)\n",
|
|
hostname.c_str(),
|
|
alarm_list[id].name.c_str(),
|
|
identity.c_str(),
|
|
instance.c_str());
|
|
|
|
snprintf ( alarm_list[id].alarm.reason_text, FM_MAX_BUFFER_LENGTH, "%s", alarm_list[id].minor_reason.data());
|
|
|
|
return ( alarmUtil_warning_log ( hostname, identity, instance, alarm_list[id].alarm ));
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|
|
|
|
/** Create a neutral customer log */
|
|
int mtcAlarm_log ( string hostname, mtc_alarm_id_enum id, string str )
|
|
{
|
|
if ( id < MTC_ALARM_ID__END )
|
|
{
|
|
/* default to command */
|
|
mtc_alarm_id_enum index = MTC_LOG_ID__COMMAND ;
|
|
bool found = false ;
|
|
|
|
if ( id == MTC_LOG_ID__EVENT_ADD )
|
|
{
|
|
index = MTC_LOG_ID__EVENT ;
|
|
alarm_list[index].instc_prefix = "event=add" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"has been 'added' to the system" );
|
|
found = true ;
|
|
|
|
}
|
|
else if ( id == MTC_LOG_ID__EVENT_MNFA_ENTER )
|
|
{
|
|
index = MTC_LOG_ID__EVENT ;
|
|
alarm_list[index].instc_prefix = "event=mnfa_enter" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"has 'entered' multi-node failure avoidance" );
|
|
found = true ;
|
|
|
|
}
|
|
else if ( id == MTC_LOG_ID__EVENT_MNFA_EXIT )
|
|
{
|
|
index = MTC_LOG_ID__EVENT ;
|
|
alarm_list[index].instc_prefix = "event=mnfa_exit" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"has 'exited' multi-node failure avoidance" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__STATUSCHANGE_FAILED )
|
|
{
|
|
index = MTC_LOG_ID__STATECHANGE ;
|
|
alarm_list[index].instc_prefix = "status=failed" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"is 'disabled-failed' to the system" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__STATUSCHANGE_ENABLED )
|
|
{
|
|
index = MTC_LOG_ID__STATECHANGE ;
|
|
alarm_list[index].instc_prefix = "state=enabled" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"is now 'enabled'" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__STATUSCHANGE_DISABLED )
|
|
{
|
|
index = MTC_LOG_ID__STATECHANGE ;
|
|
alarm_list[index].instc_prefix = "state=disabled" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"is now 'disabled'" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__STATUSCHANGE_OFFLINE )
|
|
{
|
|
index = MTC_LOG_ID__STATECHANGE ;
|
|
alarm_list[index].instc_prefix = "status=offline" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"is now 'offline'" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__STATUSCHANGE_ONLINE )
|
|
{
|
|
index = MTC_LOG_ID__STATECHANGE ;
|
|
alarm_list[index].instc_prefix = "status=online" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"is now 'online'" );
|
|
found = true ;
|
|
}
|
|
|
|
else if ( id == MTC_LOG_ID__STATUSCHANGE_REINSTALL_FAILED )
|
|
{
|
|
index = MTC_LOG_ID__STATECHANGE ;
|
|
alarm_list[index].instc_prefix = "status=reinstall-failed" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"reinstall failed" );
|
|
found = true ;
|
|
}
|
|
|
|
else if ( id == MTC_LOG_ID__STATUSCHANGE_REINSTALL_COMPLETE )
|
|
{
|
|
index = MTC_LOG_ID__STATECHANGE ;
|
|
alarm_list[index].instc_prefix = "status=reinstall-complete" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"reinstall completed successfully" );
|
|
found = true ;
|
|
}
|
|
|
|
else if ( id == MTC_LOG_ID__COMMAND_UNLOCK )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=unlock" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"manual 'unlock' request" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_FORCE_LOCK )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=force-lock" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"manual 'force-lock' request" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_SWACT )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=swact" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"manual 'controller switchover' request" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_MANUAL_REBOOT )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=reboot" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"manual 'reboot' request" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_AUTO_REBOOT )
|
|
{
|
|
alarm_list[index].instc_prefix = "action=reboot" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"'reboot' action" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_MANUAL_RESET )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=reset" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"manual 'reset' request" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_AUTO_RESET )
|
|
{
|
|
alarm_list[index].instc_prefix = "action=reset" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"'reset' action" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_REINSTALL )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=reinstall" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"manual 'reinstall' request" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_MANUAL_POWER_ON )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=power-on" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"manual 'power-on' request" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_AUTO_POWER_ON )
|
|
{
|
|
alarm_list[index].instc_prefix = "action=power-on" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"'power-on' action" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_MANUAL_POWER_OFF )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=power-off" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"manual 'power-off' request" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_AUTO_POWER_OFF )
|
|
{
|
|
alarm_list[index].instc_prefix = "action=power-off" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"'power-off' action" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_DELETE )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=delete" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"manual 'delete' request" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_BM_PROVISIONED )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=provision" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"board management controller has been 'provisioned'" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_BM_DEPROVISIONED )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=deprovision" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"board management controller has been 'de-provisioned'" );
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__COMMAND_BM_REPROVISIONED )
|
|
{
|
|
alarm_list[index].instc_prefix = "command=reprovision" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s",
|
|
hostname.data(),
|
|
"board management controller has been 're-provisioned'" );
|
|
found = true ;
|
|
}
|
|
else if (( id == MTC_LOG_ID__CONFIG_HB_ACTION_FAIL ) ||
|
|
( id == MTC_LOG_ID__CONFIG_HB_ACTION_DEGRADE ) ||
|
|
( id == MTC_LOG_ID__CONFIG_HB_ACTION_ALARM ) ||
|
|
( id == MTC_LOG_ID__CONFIG_HB_ACTION_NONE ))
|
|
{
|
|
alarm_list[index].instc_prefix = "config=heartbeat_failure_action" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s %s",
|
|
hostname.data(),
|
|
"platform maintenance service parameter 'heartbeat failure action' changed from",
|
|
str.data());
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__CONFIG_MNFA_TIMEOUT )
|
|
{
|
|
alarm_list[index].instc_prefix = "config=mnfa_timeout" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s %s",
|
|
hostname.data(),
|
|
"platform maintenance service parameter 'mnfa_timeout' changed from",
|
|
str.data());
|
|
found = true ;
|
|
}
|
|
else if ( id == MTC_LOG_ID__CONFIG_MNFA_THRESHOLD )
|
|
{
|
|
alarm_list[index].instc_prefix = "config=mnfa_threshold" ;
|
|
snprintf ( alarm_list[index].alarm.reason_text,
|
|
FM_MAX_BUFFER_LENGTH, "%s %s %s",
|
|
hostname.data(),
|
|
"platform maintenance service parameter 'mnfa_threshold' changed from",
|
|
str.data());
|
|
found = true ;
|
|
}
|
|
|
|
if ( found == true )
|
|
{
|
|
int rc ;
|
|
|
|
string identity = _getIdentity(index);
|
|
string instance = _getInstance(index);
|
|
instance.append(alarm_list[index].instc_prefix);
|
|
|
|
/* Want to make this log a critical */
|
|
if ( id == MTC_LOG_ID__STATUSCHANGE_REINSTALL_FAILED )
|
|
{
|
|
alarm_list[index].alarm.severity = FM_ALARM_SEVERITY_CRITICAL ;
|
|
}
|
|
|
|
rc = alarmUtil_log ( hostname, identity, instance, alarm_list[index].alarm );
|
|
|
|
/* Revert the severity of the event log back to Clear ( shows up as N/A ) */
|
|
if ( id == MTC_LOG_ID__STATUSCHANGE_REINSTALL_FAILED )
|
|
{
|
|
alarm_list[MTC_LOG_ID__STATECHANGE].alarm.severity = FM_ALARM_SEVERITY_CLEAR ;
|
|
}
|
|
return (rc);
|
|
}
|
|
}
|
|
return (FAIL_BAD_PARM);
|
|
}
|