// // Copyright (c) 2014 -2016 Wind River Systems, Inc. // // SPDX-License-Identifier: Apache-2.0 // #include "sm_alarm_thread.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fm_api_wrapper.h" #include "sm_limits.h" #include "sm_types.h" #include "sm_time.h" #include "sm_debug.h" #include "sm_trap.h" #include "sm_timer.h" #include "sm_selobj.h" #include "sm_node_utils.h" #include "sm_thread_health.h" #define SM_ALARM_THREAD_NAME "sm_alarm" #define SM_ALARM_THREAD_ALARM_REPORT_TIMER_IN_MS 5000 #define SM_ALARM_THREAD_ALARM_AUDIT_TIMER_IN_MS 60000 #define SM_ALARM_THREAD_TICK_INTERVAL_IN_MS 5000 #define SM_ALARM_CUSTOMER_LOG_FILE "/var/log/sm-customer.alarm" #define SM_ALARM_ENTRY_INUSE 0xFDFDFDFD #define SM_ALARM_DOMAIN_ENTRY_INUSE 0xFDFDFDFD typedef enum { SM_ALARM_STATE_UNKNOWN, SM_ALARM_STATE_INITIAL, SM_ALARM_STATE_CLEARING, SM_ALARM_STATE_CLEARED, SM_ALARM_STATE_RAISING, SM_ALARM_STATE_RAISED, SM_ALARM_STATE_MAX, } SmAlarmStateT; typedef struct { uint32_t inuse; SmAlarmT alarm; SmAlarmStateT alarm_state; SmAlarmNodeNameT alarm_node_name; SmAlarmDomainNameT alarm_domain_name; SmAlarmEntityNameT alarm_entity_name; SmAlarmDataT alarm_data; // Alarm Timestamps struct timespec alarm_raised_time; struct timespec alarm_changed_time; struct timespec alarm_cleared_time; // External FM Alarm Data fm_uuid_t fm_uuid; SFmAlarmDataT fm_alarm_data; } SmAlarmEntryT; typedef struct { uint32_t inuse; bool manage; SmAlarmDomainNameT alarm_domain_name; } SmAlarmDomainEntryT; static sig_atomic_t _stay_on; static bool _thread_created = false; static pthread_t _alarm_thread; static SmTimerIdT _alarm_report_timer_id = SM_TIMER_ID_INVALID; static SmTimerIdT _alarm_audit_timer_id = SM_TIMER_ID_INVALID; static int _server_fd = -1; static bool _server_fd_registered = false; static SmAlarmEntryT _alarms[SM_ALARMS_MAX]; static SmAlarmDomainEntryT _alarm_domains[SM_ALARM_DOMAINS_MAX]; static uint64_t _customer_alarm_log_id = 0; static FILE * _customer_alarm_log = NULL; // **************************************************************************** // Alarm Thread - Event Type String // ================================ static const char * sm_alarm_thread_event_type_str( SmAlarmEventTypeT event_type ) { switch( event_type ) { case SM_ALARM_EVENT_TYPE_UNKNOWN: return( "unknown" ); break; case SM_ALARM_EVENT_TYPE_COMMUNICATIONS_ALARM: return( "communications alarm" ); break; case SM_ALARM_EVENT_TYPE_PROCESSING_ERROR_ALARM: return( "processing error alarm" ); break; case SM_ALARM_EVENT_TYPE_ENVIRONMENTAL_ALARM: return( "environmental alarm" ); break; case SM_ALARM_EVENT_TYPE_QUALITY_OF_SERVICE_ALARM: return( "quality of service alarm" ); break; case SM_ALARM_EVENT_TYPE_EQUIPMENT_ALARM: return( "equipment alarm" ); break; case SM_ALARM_EVENT_TYPE_INTEGRITY_VIOLATION: return( "integrity violation" ); break; case SM_ALARM_EVENT_TYPE_OPERATIONAL_VIOLATION: return( "operational violation" ); break; case SM_ALARM_EVENT_TYPE_PHYSICAL_VIOLATION: return( "physical violation" ); break; case SM_ALARM_EVENT_TYPE_SECURITY_SERVICE_VIOLATION: return( "security service violation" ); break; case SM_ALARM_EVENT_TYPE_MECHANISM_VIOLATION: return( "mechanism violation" ); break; case SM_ALARM_EVENT_TYPE_TIME_DOMAIN_VIOLATION: return( "time domain violation" ); break; default: return ( "???" ); break; } return( "???" ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Probable Cause String // ==================================== static const char * sm_alarm_thread_probable_cause_str( SmAlarmProbableCauseT probable_cause ) { switch( probable_cause ) { case SM_ALARM_PROBABLE_CAUSE_UNKNOWN: return( "unknown" ); break; case SM_ALARM_PROBABLE_CAUSE_INDETERMINATE: return( "indeterminate" ); break; case SM_ALARM_PROBABLE_CAUSE_SOFTWARE_ERROR: return( "software error" ); break; case SM_ALARM_PROBABLE_CAUSE_SOFTWARE_PROGRAM_ERROR: return( "software program error" ); break; case SM_ALARM_PROBABLE_CAUSE_UNDERLYING_RESOURCE_UNAVAILABLE: return( "underlying resource unavailable" ); break; case SM_ALARM_PROBABLE_CAUSE_KEY_EXPIRED: return( "key expired" ); break; default: return ( "???" ); break; } return( "???" ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Severity String // ============================== static const char * sm_alarm_thread_severity_str( SmAlarmSeverityT severity ) { switch( severity ) { case SM_ALARM_SEVERITY_UNKNOWN: return( "unknown" ); break; case SM_ALARM_SEVERITY_CLEARED: return( "cleared" ); break; case SM_ALARM_SEVERITY_INDETERMINATE: return( "indeterminate" ); break; case SM_ALARM_SEVERITY_WARNING: return( "warning" ); break; case SM_ALARM_SEVERITY_MINOR: return( "minor" ); break; case SM_ALARM_SEVERITY_MAJOR: return( "major" ); break; case SM_ALARM_SEVERITY_CRITICAL: return( "critical" ); break; default: return ( "???" ); break; } return( "???" ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Trend String // =========================== static const char * sm_alarm_thread_trend_str( SmAlarmTrendIndicationT trend ) { switch( trend ) { case SM_ALARM_TREND_INDICATION_UNKNOWN: return( "unknown" ); break; case SM_ALARM_TREND_INDICATION_LESS_SEVERE: return( "less severe" ); break; case SM_ALARM_TREND_INDICATION_NO_CHANGE: return( "no change" ); break; case SM_ALARM_TREND_INDICATION_MORE_SEVERE: return( "more severe" ); break; default: return ( "???" ); break; } return( "???" ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Alarm String // =========================== static const char * sm_alarm_thread_alarm_str( SmAlarmT alarm ) { switch( alarm ) { case SM_ALARM_UNKNOWN: return( "unknown" ); break; case SM_ALARM_SERVICE_GROUP_STATE: return( "service-group-state" ); break; case SM_ALARM_SERVICE_GROUP_REDUNDANCY: return( "service-group-redundancy" ); break; case SM_ALARM_SOFTWARE_MODIFICATION: return( "software-modification" ); break; case SM_ALARM_COMMUNICATION_FAILURE: return( "communication-failure" ); break; default: return ( "???" ); break; } return( "???" ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Log // ================== static void sm_alarm_thread_log( SmAlarmEntryT* entry ) { char date_str[32]; struct tm t_real; fprintf( _customer_alarm_log, "alarm-log-id: %" PRIu64 "\n", ++_customer_alarm_log_id ); fprintf( _customer_alarm_log, "alarm-type: %s\n", sm_alarm_thread_alarm_str( entry->alarm ) ); if( '\0' != entry->alarm_node_name[0] ) { fprintf( _customer_alarm_log, "alarm-node: %s\n", entry->alarm_node_name ); } fprintf( _customer_alarm_log, "alarm-domain: %s\n", entry->alarm_domain_name ); fprintf( _customer_alarm_log, "alarm-entity: %s\n", entry->alarm_entity_name ); fprintf( _customer_alarm_log, "alarm-event-type: %s\n", sm_alarm_thread_event_type_str( entry->alarm_data.event_type ) ); fprintf( _customer_alarm_log, "alarm-probable-cause: %s\n", sm_alarm_thread_probable_cause_str( entry->alarm_data.probable_cause ) ); if( '\0' != entry->alarm_data.specific_problem_text[0] ) { fprintf( _customer_alarm_log, "alarm-specific-problem-text: %s\n", entry->alarm_data.specific_problem_text ); } fprintf( _customer_alarm_log, "alarm-perceived-severity: %s\n", sm_alarm_thread_severity_str( entry->alarm_data.perceived_severity ) ); if( SM_ALARM_SEVERITY_CLEARED != entry->alarm_data.perceived_severity ) { fprintf( _customer_alarm_log, "alarm-trend-indication: %s\n", sm_alarm_thread_trend_str( entry->alarm_data.trend_indication ) ); } if( entry->alarm_data.state_info.applicable ) { fprintf( _customer_alarm_log, "alarm-entity-state: %s\n", entry->alarm_data.state_info.state ); fprintf( _customer_alarm_log, "alarm-entity-status: %s\n", entry->alarm_data.state_info.status ); fprintf( _customer_alarm_log, "alarm-entity-condition: %s\n", entry->alarm_data.state_info.condition ); } if( entry->alarm_data.threshold_info.applicable ) { fprintf( _customer_alarm_log, "alarm-threshold-value: %i\n", entry->alarm_data.threshold_info.threshold_value ); fprintf( _customer_alarm_log, "alarm-observed_value: %i\n", entry->alarm_data.threshold_info.observed_value ); } if( '\0' != entry->alarm_data.proposed_repair_action[0] ) { fprintf( _customer_alarm_log, "alarm-proposed-repair-action: %s\n", entry->alarm_data.proposed_repair_action ); } if( '\0' != entry->alarm_data.additional_text[0] ) { fprintf( _customer_alarm_log, "alarm-additional-text: %s\n", entry->alarm_data.additional_text ); } if( SM_ALARM_SEVERITY_CLEARED != entry->alarm_data.perceived_severity ) { fprintf( _customer_alarm_log, "alarm-service-affecting: %s\n", entry->alarm_data.service_affecting ? "yes" : "no" ); fprintf( _customer_alarm_log, "alarm-suppression-allowed: %s\n", entry->alarm_data.suppression_allowed ? "yes" : "no" ); } if( 0 != entry->alarm_raised_time.tv_sec ) { if( NULL == localtime_r( &(entry->alarm_raised_time.tv_sec), &t_real ) ) { fprintf( _customer_alarm_log, "alarm-raised-time: " "YYYY:MM:DD HH:MM:SS.xxx\n" ); } else { strftime( date_str, sizeof(date_str), "%FT%T", &t_real ); fprintf( _customer_alarm_log, "alarm-raised-time: " "%s.%03ld\n", date_str, entry->alarm_raised_time.tv_nsec/1000000 ); } } if( 0 != entry->alarm_changed_time.tv_sec ) { if( NULL == localtime_r( &(entry->alarm_changed_time.tv_sec), &t_real ) ) { fprintf( _customer_alarm_log, "alarm-changed-time: " "YYYY:MM:DD HH:MM:SS.xxx\n" ); } else { strftime( date_str, sizeof(date_str), "%FT%T", &t_real ); fprintf( _customer_alarm_log, "alarm-changed-time: " "%s.%03ld\n", date_str, entry->alarm_changed_time.tv_nsec/1000000 ); } } if( 0 != entry->alarm_cleared_time.tv_sec ) { if( NULL == localtime_r( &(entry->alarm_cleared_time.tv_sec), &t_real ) ) { fprintf( _customer_alarm_log, "alarm-cleared-time: " "YYYY:MM:DD HH:MM:SS.xxx\n" ); } else { strftime( date_str, sizeof(date_str), "%FT%T", &t_real ); fprintf( _customer_alarm_log, "alarm-cleared-time: " "%s.%03ld\n", date_str, entry->alarm_cleared_time.tv_nsec/1000000 ); } } fprintf( _customer_alarm_log, "\n" ); fflush( _customer_alarm_log ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Find Empty Domain // ================================ static SmAlarmDomainEntryT* sm_alarm_thread_find_empty_domain( void ) { SmAlarmDomainEntryT* domain_entry; int domain_i; for( domain_i=0; SM_ALARM_DOMAINS_MAX > domain_i; ++domain_i ) { domain_entry = &(_alarm_domains[domain_i]); if( SM_ALARM_DOMAIN_ENTRY_INUSE != domain_entry->inuse ) { return( domain_entry ); } } return( NULL ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Find Domain // ========================== static SmAlarmDomainEntryT* sm_alarm_thread_find_domain( SmAlarmDomainNameT domain_name ) { SmAlarmDomainEntryT* domain_entry; int domain_i; for( domain_i=0; SM_ALARM_DOMAINS_MAX > domain_i; ++domain_i ) { domain_entry = &(_alarm_domains[domain_i]); if( SM_ALARM_DOMAIN_ENTRY_INUSE == domain_entry->inuse ) { if( 0 == strcmp( domain_name, domain_entry->alarm_domain_name ) ) { return( domain_entry ); } } } return( NULL ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Domain Managed // ============================= static bool sm_alarm_thread_domain_managed( SmAlarmT alarm ) { switch( alarm ) { case SM_ALARM_SERVICE_GROUP_STATE: return( true ); break; case SM_ALARM_SERVICE_GROUP_REDUNDANCY: return( true ); break; case SM_ALARM_SOFTWARE_MODIFICATION: return( false ); break; case SM_ALARM_COMMUNICATION_FAILURE: return( false ); break; default: DPRINTFE( "Unknown alarm (%i).", alarm ); return( false ); break; } return( false ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Find Empty // ========================= static SmAlarmEntryT* sm_alarm_thread_find_empty( void ) { SmAlarmEntryT* entry; unsigned int entry_i; for( entry_i=0; SM_ALARMS_MAX > entry_i; ++entry_i ) { entry = &(_alarms[entry_i]); if( SM_ALARM_ENTRY_INUSE != entry->inuse ) return( entry ); } return( NULL ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Find // =================== static SmAlarmEntryT* sm_alarm_thread_find( SmAlarmT alarm, SmAlarmNodeNameT node_name, SmAlarmDomainNameT domain_name, SmAlarmEntityNameT entity_name ) { SmAlarmEntryT* entry; unsigned int entry_i; for( entry_i=0; SM_ALARMS_MAX > entry_i; ++entry_i ) { entry = &(_alarms[entry_i]); if( SM_ALARM_ENTRY_INUSE != entry->inuse ) continue; if(( alarm == entry->alarm )&& ( 0 == strcmp( node_name, entry->alarm_node_name ) )&& ( 0 == strcmp( domain_name, entry->alarm_domain_name ) )&& ( 0 == strcmp( entity_name, entry->alarm_entity_name ) )) return( entry ); } return( NULL ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Add // ================== static void sm_alarm_thread_add( struct timespec* ts_real, SmAlarmT alarm, SmAlarmNodeNameT node_name, SmAlarmDomainNameT domain_name, SmAlarmEntityNameT entity_name, SmAlarmDataT* data ) { bool raise_alarm = false; SmAlarmEntryT* entry; entry = sm_alarm_thread_find( alarm, node_name, domain_name, entity_name ); if( NULL == entry ) { entry = sm_alarm_thread_find_empty(); if( NULL == entry ) { DPRINTFE( "Failed to store alarm, no more room." ); return; } else { // New alarm entry. memset( entry, 0, sizeof(SmAlarmEntryT) ); entry->inuse = SM_ALARM_ENTRY_INUSE; entry->alarm = alarm; entry->alarm_state = SM_ALARM_STATE_INITIAL; snprintf( entry->alarm_node_name, sizeof(entry->alarm_node_name), "%s", node_name ); snprintf( entry->alarm_domain_name, sizeof(entry->alarm_domain_name), "%s", domain_name ); snprintf( entry->alarm_entity_name, sizeof(entry->alarm_entity_name), "%s", entity_name ); } } switch( entry->alarm_state ) { case SM_ALARM_STATE_INITIAL: case SM_ALARM_STATE_CLEARING: case SM_ALARM_STATE_CLEARED: memcpy( &(entry->alarm_raised_time), ts_real, sizeof(struct timespec) ); memset( &(entry->alarm_changed_time), 0, sizeof(struct timespec) ); memset( &(entry->alarm_cleared_time), 0, sizeof(struct timespec) ); raise_alarm = true; break; case SM_ALARM_STATE_RAISING: case SM_ALARM_STATE_RAISED: if( 0 != memcmp( &(entry->alarm_data), data, sizeof(SmAlarmDataT) ) ) { memcpy( &(entry->alarm_changed_time), ts_real, sizeof(struct timespec) ); memset( &(entry->alarm_cleared_time), 0, sizeof(struct timespec) ); raise_alarm = true; } break; default: // Ignore. break; } if( raise_alarm ) { entry->alarm_state = SM_ALARM_STATE_RAISING; memcpy( &(entry->alarm_data), data, sizeof(entry->alarm_data) ); DPRINTFI( "Raising alarm (%s) for node (%s) domain (%s) entity (%s).", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name ); } } // **************************************************************************** // **************************************************************************** // Alarm Thread - Delete // ===================== static void sm_alarm_thread_delete( struct timespec* ts_real, SmAlarmT alarm, SmAlarmNodeNameT node_name, SmAlarmDomainNameT domain_name, SmAlarmEntityNameT entity_name ) { SmAlarmEntryT* entry; entry = sm_alarm_thread_find( alarm, node_name, domain_name, entity_name ); if( NULL == entry ) { DPRINTFD( "No alarm (%s) raised for node (%s) domain (%s) entity (%s) " "found.", sm_alarm_thread_alarm_str( alarm ), node_name, domain_name, entity_name ); return; } if( SM_ALARM_STATE_CLEARED != entry->alarm_state ) { memcpy( &(entry->alarm_cleared_time), ts_real, sizeof(struct timespec) ); entry->alarm_state = SM_ALARM_STATE_CLEARING; DPRINTFI( "Clearing alarm (%s) for node (%s) domain (%s) entity (%s).", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name ); }else { DPRINTFI( "Removed cleared alarm (%s) for node (%s) domain (%s) entity (%s).", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name ); entry->inuse = 0; //This alarm has reached the end of its lifecycle } } // **************************************************************************** // **************************************************************************** // Alarm Thread - Get Alarm Identifier // =================================== static void sm_alarm_thread_get_alarm_id( SFmAlarmDataT* fm_alarm, SmAlarmT* alarm, SmAlarmNodeNameT alarm_node_name, SmAlarmDomainNameT alarm_domain_name, SmAlarmEntityNameT alarm_entity_name ) { char fm_entity_instance_id[FM_MAX_BUFFER_LENGTH]; int len; *alarm = SM_ALARM_UNKNOWN; alarm_node_name[0] = '\0'; alarm_domain_name[0] = '\0'; alarm_entity_name[0] = '\0'; len = snprintf( fm_entity_instance_id, sizeof(fm_entity_instance_id), "%s", fm_alarm->entity_instance_id ); int char_i; for( char_i=0; len > char_i; ++char_i ) { if( '.' == fm_entity_instance_id[char_i] ) { fm_entity_instance_id[char_i] = ' '; } } if( 0 == strcmp( fm_alarm->alarm_id, SM_ALARM_SERVICE_GROUP_STATE_ALARM_ID ) ) { *alarm = SM_ALARM_SERVICE_GROUP_STATE; sscanf( fm_entity_instance_id, "service_domain=%s service_group=%s host=%s", alarm_domain_name, alarm_entity_name, alarm_node_name ); return; } if( 0 == strcmp( fm_alarm->alarm_id, SM_ALARM_SERVICE_GROUP_REDUNDANCY_ALARM_ID ) ) { *alarm = SM_ALARM_SERVICE_GROUP_REDUNDANCY; sscanf( fm_entity_instance_id, "service_domain=%s service_group=%s", alarm_domain_name, alarm_entity_name ); return; } if( 0 == strcmp( fm_alarm->alarm_id, SM_ALARM_SOFTWARE_MODIFICATION_ALARM_ID ) ) { *alarm = SM_ALARM_SOFTWARE_MODIFICATION; sscanf( fm_entity_instance_id, "host=%s", alarm_node_name ); return; } if( 0 == strcmp( fm_alarm->alarm_id, SM_ALARM_COMMUNICATION_FAILURE_ALARM_ID ) ) { *alarm = SM_ALARM_COMMUNICATION_FAILURE; sscanf( fm_entity_instance_id, "host=%s network=%s", alarm_node_name, alarm_entity_name ); return; } } // **************************************************************************** // **************************************************************************** // Alarm Thread - Get Fault Management Alarm Identifier // ==================================================== static SmErrorT sm_alarm_thread_get_fm_alarm_id( SmAlarmT alarm, const SmAlarmNodeNameT alarm_node_name, const SmAlarmDomainNameT alarm_domain_name, const SmAlarmEntityNameT alarm_entity_name, char fm_alarm_id[], char fm_entity_type_id[], char fm_entity_instance_id[] ) { switch( alarm ) { case SM_ALARM_SERVICE_GROUP_STATE: snprintf( fm_alarm_id, FM_MAX_BUFFER_LENGTH, SM_ALARM_SERVICE_GROUP_STATE_ALARM_ID ); snprintf( fm_entity_type_id, FM_MAX_BUFFER_LENGTH, "service_domain.service_group.host" ); snprintf( fm_entity_instance_id, FM_MAX_BUFFER_LENGTH, "service_domain=%s.service_group=%s.host=%s", alarm_domain_name, alarm_entity_name, alarm_node_name ); break; case SM_ALARM_SERVICE_GROUP_REDUNDANCY: snprintf( fm_alarm_id, FM_MAX_BUFFER_LENGTH, SM_ALARM_SERVICE_GROUP_REDUNDANCY_ALARM_ID ); snprintf( fm_entity_type_id, FM_MAX_BUFFER_LENGTH, "service_domain.service_group" ); snprintf( fm_entity_instance_id, FM_MAX_BUFFER_LENGTH, "service_domain=%s.service_group=%s", alarm_domain_name, alarm_entity_name ); break; case SM_ALARM_SOFTWARE_MODIFICATION: snprintf( fm_alarm_id, FM_MAX_BUFFER_LENGTH, SM_ALARM_SOFTWARE_MODIFICATION_ALARM_ID ); snprintf( fm_entity_type_id, FM_MAX_BUFFER_LENGTH, "host" ); snprintf( fm_entity_instance_id, FM_MAX_BUFFER_LENGTH, "host=%s", alarm_node_name ); break; case SM_ALARM_COMMUNICATION_FAILURE: snprintf( fm_alarm_id, FM_MAX_BUFFER_LENGTH, SM_ALARM_COMMUNICATION_FAILURE_ALARM_ID ); snprintf( fm_entity_type_id, FM_MAX_BUFFER_LENGTH, "host.network" ); snprintf( fm_entity_instance_id, FM_MAX_BUFFER_LENGTH, "host=%s.network=%s", alarm_node_name, alarm_entity_name ); break; default: DPRINTFE( "Unknown alarm (%i).", alarm ); return( SM_FAILED ); break; } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Get Fault Management Alarm Type // ============================================== static EFmAlarmTypeT sm_alarm_thread_get_fm_alarm_type( SmAlarmEntryT* entry ) { switch( entry->alarm_data.event_type ) { case SM_ALARM_EVENT_TYPE_UNKNOWN: return( FM_ALARM_TYPE_UNKNOWN ); break; case SM_ALARM_EVENT_TYPE_COMMUNICATIONS_ALARM: return( FM_ALARM_COMM ); break; case SM_ALARM_EVENT_TYPE_PROCESSING_ERROR_ALARM: return( FM_ALARM_PROCESSING_ERROR ); break; case SM_ALARM_EVENT_TYPE_ENVIRONMENTAL_ALARM: return( FM_ALARM_ENVIRONMENTAL ); break; case SM_ALARM_EVENT_TYPE_QUALITY_OF_SERVICE_ALARM: return( FM_ALARM_QOS ); break; case SM_ALARM_EVENT_TYPE_EQUIPMENT_ALARM: return( FM_ALARM_EQUIPMENT ); break; case SM_ALARM_EVENT_TYPE_INTEGRITY_VIOLATION: return( FM_ALARM_INTERGRITY ); break; case SM_ALARM_EVENT_TYPE_OPERATIONAL_VIOLATION: return( FM_ALARM_OPERATIONAL ); break; case SM_ALARM_EVENT_TYPE_PHYSICAL_VIOLATION: return( FM_ALARM_PHYSICAL ); break; case SM_ALARM_EVENT_TYPE_SECURITY_SERVICE_VIOLATION: return( FM_ALARM_SECURITY ); break; case SM_ALARM_EVENT_TYPE_MECHANISM_VIOLATION: return( FM_ALARM_SECURITY ); break; case SM_ALARM_EVENT_TYPE_TIME_DOMAIN_VIOLATION: return( FM_ALARM_TIME ); break; default: DPRINTFI( "Unknown event type (%i) for alarm (%s) " "entity (%s).", entry->alarm_data.event_type, sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_entity_name ); return( FM_ALARM_TYPE_UNKNOWN ); break; } } // **************************************************************************** // **************************************************************************** // Alarm Thread - Get Fault Management Severity // ============================================ static EFmAlarmSeverityT sm_alarm_thread_get_fm_severity( SmAlarmEntryT* entry ) { switch( entry->alarm_data.perceived_severity ) { case SM_ALARM_SEVERITY_CLEARED: return( FM_ALARM_SEVERITY_CLEAR ); break; case SM_ALARM_SEVERITY_INDETERMINATE: DPRINTFI( "Indeterminate severity for alarm (%s) entity (%s) " "mapped to critical.", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_entity_name ); return( FM_ALARM_SEVERITY_CRITICAL ); break; case SM_ALARM_SEVERITY_WARNING: return( FM_ALARM_SEVERITY_WARNING ); break; case SM_ALARM_SEVERITY_MINOR: return( FM_ALARM_SEVERITY_MINOR ); break; case SM_ALARM_SEVERITY_MAJOR: return( FM_ALARM_SEVERITY_MAJOR ); break; case SM_ALARM_SEVERITY_CRITICAL: return( FM_ALARM_SEVERITY_CRITICAL ); break; default: DPRINTFI( "Unknown severity (%i) for alarm (%s) entity (%s) " "mapped to warning.", entry->alarm_data.perceived_severity, sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_entity_name ); return( FM_ALARM_SEVERITY_WARNING ); break; } } // **************************************************************************** // **************************************************************************** // Alarm - Get Fault Management Probable Cause // =========================================== static EFmAlarmProbableCauseT sm_alarm_thread_get_fm_probable_cause( SmAlarmEntryT* entry ) { switch( entry->alarm_data.probable_cause ) { case SM_ALARM_PROBABLE_CAUSE_UNKNOWN: return( FM_ALARM_CAUSE_UNKNOWN ); break; case SM_ALARM_PROBABLE_CAUSE_INDETERMINATE: return( FM_ALARM_CAUSE_UNKNOWN ); break; case SM_ALARM_PROBABLE_CAUSE_SOFTWARE_ERROR: return( FM_ALARM_SOFTWARE_ERROR ); break; case SM_ALARM_PROBABLE_CAUSE_SOFTWARE_PROGRAM_ERROR: return( FM_ALARM_PROGRAM_ERROR ); break; case SM_ALARM_PROBABLE_CAUSE_UNDERLYING_RESOURCE_UNAVAILABLE: return( FM_ALARM_UNDERLYING_RESOURCE_UNAVAILABLE ); break; case SM_ALARM_PROBABLE_CAUSE_KEY_EXPIRED: return( FM_ALARM_KEY_EXPIRED ); break; default: DPRINTFI( "Unknown probable cause (%i) for alarm (%s) " "entity (%s).", entry->alarm_data.probable_cause, sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_entity_name ); return( FM_ALARM_CAUSE_UNKNOWN ); break; } } // **************************************************************************** // **************************************************************************** // Alarm Thread - Build Fault Management Alarm Data // ================================================ static SmErrorT sm_alarm_thread_build_fm_alarm_data( SmAlarmEntryT* entry ) { SFmAlarmDataT* fm_alarm_data = &(entry->fm_alarm_data); SmErrorT error; error = sm_alarm_thread_get_fm_alarm_id( entry->alarm, entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name, fm_alarm_data->alarm_id, fm_alarm_data->entity_type_id, fm_alarm_data->entity_instance_id ); if( SM_OKAY != error ) { DPRINTFE( "Failed to get alarm (%s) data for node (%s) domain (%s) " "entity (%s), error=%s.", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name, sm_error_str( error ) ); return( error ); } fm_alarm_data->alarm_state = FM_ALARM_STATE_SET; fm_alarm_data->severity = sm_alarm_thread_get_fm_severity( entry ); // FM only has a reason text field. Need to concatenate the // specific_problem_text and addition text fields. // Capitalize the first character. if( '\0' == entry->alarm_data.additional_text[0] ) { snprintf( fm_alarm_data->reason_text, sizeof(fm_alarm_data->reason_text), "%s", entry->alarm_data.specific_problem_text ); } else { snprintf( fm_alarm_data->reason_text, sizeof(fm_alarm_data->reason_text), "%s; %s", entry->alarm_data.specific_problem_text, entry->alarm_data.additional_text ); } if( isalpha( fm_alarm_data->reason_text[0] ) ) { fm_alarm_data->reason_text[0] = toupper( fm_alarm_data->reason_text[0] ); } fm_alarm_data->alarm_type = sm_alarm_thread_get_fm_alarm_type( entry ); fm_alarm_data->probable_cause = sm_alarm_thread_get_fm_probable_cause( entry ); // Capitalize the first character. snprintf( fm_alarm_data->proposed_repair_action, sizeof(fm_alarm_data->proposed_repair_action), "%s", entry->alarm_data.proposed_repair_action ); if( isalpha( fm_alarm_data->proposed_repair_action[0] ) ) { fm_alarm_data->proposed_repair_action[0] = toupper( fm_alarm_data->proposed_repair_action[0] ); } if( entry->alarm_data.service_affecting ) { fm_alarm_data->service_affecting = FM_TRUE; } else { fm_alarm_data->service_affecting = FM_FALSE; } if( entry->alarm_data.suppression_allowed ) { fm_alarm_data->suppression = FM_TRUE; } else { fm_alarm_data->suppression = FM_FALSE; } fm_alarm_data->inhibit_alarms = FM_FALSE; return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Raise Alarm // ========================== static void sm_alarm_thread_raise_alarm( SmAlarmEntryT* entry ) { EFmErrorT fm_error; SmErrorT error; if( SM_ALARM_SEVERITY_CLEARED == entry->alarm_data.perceived_severity ) { DPRINTFE( "Attempting to raise a cleared alarm." ); return; } error = sm_alarm_thread_build_fm_alarm_data( entry ); if( SM_OKAY != error ) { DPRINTFE( "Failed to build alarm (%s) data for node (%s) domain (%s) " "entity (%s), error=%s.", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name, sm_error_str( error ) ); return; } fm_error = fm_set_fault_wrapper( &(entry->fm_alarm_data), &(entry->fm_uuid) ); if( FM_ERR_OK != fm_error ) { DPRINTFE( "Failed to set alarm (%s) for node (%s) domain (%s) " "entity (%s), error=%i", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name, fm_error ); return; } DPRINTFI( "Raised alarm (%s) for node (%s) domain (%s) entity (%s), " "fm_uuid=%s.", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name, entry->fm_uuid ); entry->alarm_state = SM_ALARM_STATE_RAISED; sm_alarm_thread_log( entry ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Clear Alarm // ========================== static void sm_alarm_thread_clear_alarm( SmAlarmEntryT* entry ) { AlarmFilter fm_filter; char fm_entity_type_id[FM_MAX_BUFFER_LENGTH]; EFmErrorT fm_error; SmErrorT error; error = sm_alarm_thread_get_fm_alarm_id( entry->alarm, entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name, fm_filter.alarm_id, fm_entity_type_id, fm_filter.entity_instance_id ); if( SM_OKAY != error ) { DPRINTFE( "Failed to get alarm (%s) data for entity (%s), " "error=%s.", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_entity_name, sm_error_str( error ) ); return; } fm_error = fm_clear_fault_wrapper( &fm_filter ); if(( FM_ERR_OK != fm_error )&&( FM_ERR_ENTITY_NOT_FOUND != fm_error )) { DPRINTFE( "Failed to clear alarm (%s) for node (%s) domain (%s) " "entity (%s), error=%i", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name, fm_error ); return; } DPRINTFI( "Cleared alarm (%s) for node (%s) domain (%s) entity (%s), " "fm_uuid=%s.", sm_alarm_thread_alarm_str( entry->alarm ), entry->alarm_node_name, entry->alarm_domain_name, entry->alarm_entity_name, entry->fm_uuid ); entry->alarm_state = SM_ALARM_STATE_CLEARED; entry->alarm_data.perceived_severity = SM_ALARM_SEVERITY_CLEARED; entry->alarm_data.state_info.applicable = false; entry->alarm_data.threshold_info.applicable = false; entry->alarm_data.proposed_repair_action[0] = '\0'; entry->alarm_data.additional_text[0] = '\0'; sm_alarm_thread_log( entry ); entry->inuse = 0; //This alarm has reached the end of its lifecycle } // **************************************************************************** // **************************************************************************** // Alarm Thread - Clear All // ======================== static void sm_alarm_thread_clear_all( struct timespec* ts_real, SmAlarmDomainNameT domain_name ) { char date_str[32]; struct tm t_real; char fm_entity_instance_id[FM_MAX_BUFFER_LENGTH]; EFmErrorT fm_error; SmAlarmEntryT* entry = NULL; snprintf( fm_entity_instance_id, FM_MAX_BUFFER_LENGTH, "service_domain=%s", domain_name ); fm_error = fm_clear_all_wrapper( &fm_entity_instance_id ); if(( FM_ERR_OK != fm_error )&&( FM_ERR_ENTITY_NOT_FOUND != fm_error )) { DPRINTFE( "Failed to clear service domain (%s) alarms, error=%i", domain_name, fm_error ); } DPRINTFI( "Cleared all alarms." ); unsigned int entry_i; for( entry_i=0; SM_ALARMS_MAX > entry_i; ++entry_i ) { entry = &(_alarms[entry_i]); if( SM_ALARM_ENTRY_INUSE != entry->inuse ) continue; if( 0 == strcmp( domain_name, entry->alarm_domain_name ) ) { memset( entry, 0, sizeof(SmAlarmEntryT) ); } } if( NULL == localtime_r( &(ts_real->tv_sec), &t_real ) ) { fprintf( _customer_alarm_log, "----- all alarms cleared for %s -----\n\n", domain_name ); } else { strftime( date_str, sizeof(date_str), "%FT%T", &t_real ); fprintf( _customer_alarm_log, "----- all alarms cleared for %s at " "%s.%03ld -----\n\n", domain_name, date_str, ts_real->tv_nsec/1000000 ); } } // **************************************************************************** // **************************************************************************** // Alarm Thread - Manage Domain Alarms // =================================== static void sm_alarm_thread_manage_domain_alarms( SmAlarmDomainNameT domain_name, bool manage ) { SmAlarmEntryT* entry; SmAlarmDomainEntryT* domain_entry; domain_entry = sm_alarm_thread_find_domain( domain_name ); if( NULL == domain_entry ) { domain_entry = sm_alarm_thread_find_empty_domain(); if( NULL == domain_entry ) { DPRINTFE( "Alarm domain allocation failure, no free alarm " "domain entries." ); return; } domain_entry->inuse = SM_ALARM_DOMAIN_ENTRY_INUSE; domain_entry->manage = manage; snprintf( domain_entry->alarm_domain_name, sizeof(domain_entry->alarm_domain_name), "%s", domain_name ); } else { domain_entry->manage = manage; } if( manage ) { DPRINTFI( "Managing alarms for domain (%s).", domain_name ); } else { DPRINTFI( "No longer managing alarms for domain (%s).", domain_name ); unsigned int entry_i; for( entry_i=0; SM_ALARMS_MAX > entry_i; ++entry_i ) { entry = &(_alarms[entry_i]); if( SM_ALARM_ENTRY_INUSE == entry->inuse ) { if( sm_alarm_thread_domain_managed( entry->alarm ) ) { memset( entry, 0, sizeof(SmAlarmEntryT) ); } } } } } // **************************************************************************** // **************************************************************************** // Alarm Thread - Report // ===================== static void sm_alarm_thread_report( void ) { SmAlarmEntryT* entry; unsigned int entry_i; for( entry_i=0; SM_ALARMS_MAX > entry_i; ++entry_i ) { entry = &(_alarms[entry_i]); if( SM_ALARM_ENTRY_INUSE != entry->inuse ) continue; // Call Fault Management. if( SM_ALARM_STATE_CLEARING == entry->alarm_state ) { sm_alarm_thread_clear_alarm( entry ); } else if( SM_ALARM_STATE_RAISING == entry->alarm_state ) { sm_alarm_thread_raise_alarm( entry ); } } } // **************************************************************************** // **************************************************************************** // Alarm Thread - Audit // ==================== static void sm_alarm_thread_audit( const char entity_instance[] ) { bool found; char fm_entity_instance_id[FM_MAX_BUFFER_LENGTH]; SFmAlarmDataT fm_alarm_data[SM_ALARMS_MAX]; unsigned fm_total_alarms = SM_ALARMS_MAX; SFmAlarmDataT* fm_alarm; EFmErrorT fm_error; SmAlarmT alarm; SmAlarmNodeNameT node_name; SmAlarmDomainNameT domain_name; SmAlarmEntityNameT entity_name; SmAlarmEntryT* entry; char hostname[SM_NODE_NAME_MAX_CHAR]; SmErrorT error; error = sm_node_utils_get_hostname( hostname ); if( SM_OKAY != error ) { DPRINTFE( "Failed to get hostname, error=%s.", sm_error_str( error ) ); return; } snprintf( fm_entity_instance_id, FM_MAX_BUFFER_LENGTH, "%s", entity_instance ); fm_error = fm_get_faults_wrapper( &fm_entity_instance_id, fm_alarm_data, &fm_total_alarms ); if(( FM_ERR_OK != fm_error )&&( FM_ERR_ENTITY_NOT_FOUND != fm_error )) { DPRINTFE( "Failed to get all alarms, error=%i", fm_error ); return; } unsigned int fm_alarm_i; for( fm_alarm_i=0; fm_total_alarms > fm_alarm_i; ++fm_alarm_i ) { found = false; fm_alarm = &(fm_alarm_data[fm_alarm_i]); sm_alarm_thread_get_alarm_id( fm_alarm, &alarm, node_name, domain_name, entity_name ); DPRINTFD( "Looking at fm_alarm_id=%s, fm_entity_instance_id=%s, " "alarm=%i, node_name=%s, domain_name=%s, entity_name=%s.", fm_alarm->alarm_id, fm_alarm->entity_instance_id, alarm, node_name, domain_name, entity_name ); if( SM_ALARM_UNKNOWN == alarm ) { continue; } if( sm_alarm_thread_domain_managed( alarm ) ) { SmAlarmDomainEntryT* domain_entry; domain_entry = sm_alarm_thread_find_domain( domain_name ); if( NULL == domain_entry ) { DPRINTFD( "Alarm domain (%s) not found.", domain_name ); continue; } if( !domain_entry->manage ) { DPRINTFD( "Not managing alarm domain (%s).", domain_name ); continue; } } else if( 0 != strcmp( hostname, node_name ) ) { DPRINTFD( "Not our alarm, node_name=%s.", node_name ); continue; } entry = sm_alarm_thread_find( alarm, node_name, domain_name, entity_name ); if( NULL != entry ) { if( SM_ALARM_STATE_RAISING == entry->alarm_state || SM_ALARM_STATE_RAISED == entry->alarm_state ) { // only when a matching record locally to be considered as "found" found = true; entry->alarm_state = SM_ALARM_STATE_RAISED; }else { // found a record, but not in raising/raised mode, alarm need to be cleared } } if( !found ) { AlarmFilter fm_filter; snprintf( fm_filter.alarm_id, sizeof(fm_filter.alarm_id), "%s", fm_alarm->alarm_id ); snprintf( fm_filter.entity_instance_id, sizeof(fm_filter.entity_instance_id), "%s", fm_alarm->entity_instance_id ); fm_error = fm_clear_fault_wrapper( &fm_filter ); if(( FM_ERR_OK != fm_error )&&( FM_ERR_ENTITY_NOT_FOUND != fm_error )) { DPRINTFE( "Failed to clear stale alarm (fm_alarm_id=%s, " "fm_entity_instance_id=%s), error=%i", fm_alarm->alarm_id, fm_alarm->entity_instance_id, fm_error ); } if ( NULL != entry ) { //alarm is cleared, retire the record entry->inuse = 0; DPRINTFI( "Removed cleared alarm, fm_alarm_id=%s, " "fm_entity_instance_id=%s, fm_uuid=%s.", fm_alarm->alarm_id, fm_alarm->entity_instance_id, fm_alarm->uuid ); }else { DPRINTFI( "Deleted stale alarm, fm_alarm_id=%s, " "fm_entity_instance_id=%s, fm_uuid=%s.", fm_alarm->alarm_id, fm_alarm->entity_instance_id, fm_alarm->uuid ); } } } } // **************************************************************************** // **************************************************************************** // Alarm Thread - Report Timer // =========================== static bool sm_alarm_thread_report_timer( SmTimerIdT timer_id, int64_t user_data ) { sm_alarm_thread_report(); return( true ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Audit Timer // ========================== static bool sm_alarm_thread_audit_timer( SmTimerIdT timer_id, int64_t user_data ) { SmAlarmEntryT* entry; // Set raised alarms to raising, the audit will set them // to raised, if FM really has them. unsigned int entry_i; for( entry_i=0; SM_ALARMS_MAX > entry_i; ++entry_i ) { entry = &(_alarms[entry_i]); if( SM_ALARM_ENTRY_INUSE != entry->inuse ) continue; if( SM_ALARM_STATE_RAISED == entry->alarm_state ) { entry->alarm_state = SM_ALARM_STATE_RAISING; } } sm_alarm_thread_audit( "service_domain" ); sm_alarm_thread_audit( "host" ); return( true ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Dispatch // ======================= static void sm_alarm_thread_dispatch( int selobj, int64_t user_data ) { int bytes_read; SmAlarmThreadMsgT msg; SmAlarmThreadMsgRaiseAlarmT* raise_alarm = &(msg.u.raise_alarm); SmAlarmThreadMsgClearAlarmT* clear_alarm = &(msg.u.clear_alarm); SmAlarmThreadMsgClearAllAlarmsT* clear_all_alarms = &(msg.u.clear_all_alarms); SmAlarmThreadMsgManageDomainAlarmsT* manage_alarms = &(msg.u.manage_alarms); bool report_alarm = false; memset( &msg, 0, sizeof(msg) ); int retry_count; for( retry_count = 5; retry_count != 0; --retry_count ) { bytes_read = recv( selobj, &msg, sizeof(msg), 0 ); if( 0 < bytes_read ) { break; } else if( 0 == bytes_read ) { // For connection oriented sockets, this indicates that the peer // has performed an orderly shutdown. return; } else if(( 0 > bytes_read )&&( EINTR != errno )) { DPRINTFE( "Failed to receive message, errno=%s.", strerror( errno ) ); return; } DPRINTFD( "Interrupted while receiving message, retry=%d, errno=%s.", retry_count, strerror( errno ) ); } switch( msg.type ) { case SM_ALARM_THREAD_MSG_RAISE_ALARM: sm_alarm_thread_add( &(raise_alarm->ts_real), raise_alarm->alarm, raise_alarm->node_name, raise_alarm->domain_name, raise_alarm->entity_name, &(raise_alarm->data) ); report_alarm = true; break; case SM_ALARM_THREAD_MSG_CLEAR_ALARM: sm_alarm_thread_delete( &(clear_alarm->ts_real), clear_alarm->alarm, clear_alarm->node_name, clear_alarm->domain_name, clear_alarm->entity_name ); report_alarm = true; break; case SM_ALARM_THREAD_MSG_CLEAR_ALL_ALARMS: sm_alarm_thread_clear_all( &(clear_alarm->ts_real), clear_all_alarms->domain_name ); break; case SM_ALARM_THREAD_MSG_MANAGE_DOMAIN_ALARMS: sm_alarm_thread_manage_domain_alarms( manage_alarms->domain_name, manage_alarms->manage ); break; default: DPRINTFE( "Unknown message (%i) received.", msg.type ); return; break; } if( report_alarm ) sm_alarm_thread_report(); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Initialize Thread // ================================ static SmErrorT sm_alarm_thread_initialize_thread( void ) { SmErrorT error; _server_fd_registered = false; error = sm_selobj_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize selection object module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } if( -1 < _server_fd ) { error = sm_selobj_register( _server_fd, sm_alarm_thread_dispatch, 0 ); if( SM_OKAY != error ) { DPRINTFE( "Failed to register selection object, error=%s.", sm_error_str( error ) ); return( error ); } _server_fd_registered = true; } error = sm_timer_initialize( SM_ALARM_THREAD_TICK_INTERVAL_IN_MS ); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize timer module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_timer_register( "alarm report", SM_ALARM_THREAD_ALARM_REPORT_TIMER_IN_MS, sm_alarm_thread_report_timer, 0, &_alarm_report_timer_id ); if( SM_OKAY != error ) { DPRINTFE( "Failed to create alarm report timer, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_timer_register( "alarm audit", SM_ALARM_THREAD_ALARM_AUDIT_TIMER_IN_MS, sm_alarm_thread_audit_timer, 0, &_alarm_audit_timer_id ); if( SM_OKAY != error ) { DPRINTFE( "Failed to create alarm audit timer, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } if( NULL != _customer_alarm_log ) { fflush( _customer_alarm_log ); fclose( _customer_alarm_log ); _customer_alarm_log = NULL; } _customer_alarm_log = fopen( SM_ALARM_CUSTOMER_LOG_FILE, "a" ); if( NULL == _customer_alarm_log ) { DPRINTFE( "Failed to open customer alarm log file (%s).", SM_ALARM_CUSTOMER_LOG_FILE ); return( SM_FAILED ); } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Finalize Thread // ============================== static SmErrorT sm_alarm_thread_finalize_thread( void ) { SmErrorT error; if( SM_TIMER_ID_INVALID != _alarm_report_timer_id ) { error = sm_timer_deregister( _alarm_report_timer_id ); if( SM_OKAY != error ) { DPRINTFE( "Failed to cancel alarm report timer, error=%s.", sm_error_str( error ) ); } _alarm_report_timer_id = SM_TIMER_ID_INVALID; } if( SM_TIMER_ID_INVALID != _alarm_audit_timer_id ) { error = sm_timer_deregister( _alarm_audit_timer_id ); if( SM_OKAY != error ) { DPRINTFE( "Failed to cancel alarm audit timer, error=%s.", sm_error_str( error ) ); } _alarm_audit_timer_id = SM_TIMER_ID_INVALID; } error = sm_timer_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finialize timer module, error=%s.", sm_error_str( error ) ); } if( _server_fd_registered ) { error = sm_selobj_deregister( _server_fd ); if( SM_OKAY != error ) { DPRINTFE( "Failed to deregister selection object, error=%s.", sm_error_str( error ) ); } _server_fd_registered = false; } error = sm_selobj_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finialize selection object module, error=%s.", sm_error_str( error ) ); } if( NULL != _customer_alarm_log ) { fflush( _customer_alarm_log ); fclose( _customer_alarm_log ); _customer_alarm_log = NULL; } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Main // =================== static void* sm_alarm_thread_main( void* arguments ) { SmErrorT error; pthread_setname_np( pthread_self(), SM_ALARM_THREAD_NAME ); sm_debug_set_thread_info(); sm_trap_set_thread_info(); DPRINTFI( "Starting" ); // Warn after 2 minutes, fail after 16 minutes. error = sm_thread_health_register( SM_ALARM_THREAD_NAME, 120000, 1000000 ); if( SM_OKAY != error ) { DPRINTFE( "Failed to register alarm thread, error=%s.", sm_error_str( error ) ); pthread_exit( NULL ); } error = sm_alarm_thread_initialize_thread(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize alarm thread, error=%s.", sm_error_str( error ) ); pthread_exit( NULL ); } DPRINTFI( "Started." ); while( _stay_on ) { error = sm_selobj_dispatch( SM_ALARM_THREAD_TICK_INTERVAL_IN_MS ); if( SM_OKAY != error ) { DPRINTFE( "Selection object dispatch failed, error=%s.", sm_error_str(error) ); break; } error = sm_thread_health_update( SM_ALARM_THREAD_NAME ); if( SM_OKAY != error ) { DPRINTFE( "Failed to update alarm thread health, error=%s.", sm_error_str(error) ); } } DPRINTFI( "Shutting down." ); error = sm_alarm_thread_finalize_thread(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize alarm thread, error=%s.", sm_error_str( error ) ); } error = sm_thread_health_deregister( SM_ALARM_THREAD_NAME ); if( SM_OKAY != error ) { DPRINTFE( "Failed to deregister alarm thread, error=%s.", sm_error_str( error ) ); } DPRINTFI( "Shutdown complete." ); return( NULL ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Start // ==================== SmErrorT sm_alarm_thread_start( int server_fd ) { int result; memset( _alarms, 0, sizeof(_alarms) ); memset( _alarm_domains, 0, sizeof(_alarm_domains) ); _stay_on = 1; _thread_created = false; _server_fd = server_fd; result = pthread_create( &_alarm_thread, NULL, sm_alarm_thread_main, NULL ); if( 0 != result ) { DPRINTFE( "Failed to start alarm thread, error=%s.", strerror(result) ); return( SM_FAILED ); } _thread_created = true; return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Alarm Thread - Stop // =================== SmErrorT sm_alarm_thread_stop( void ) { _stay_on = 0; if( _thread_created ) { long ms_expired; SmTimeT time_prev; int result; sm_time_get( &time_prev ); while( true ) { result = pthread_tryjoin_np( _alarm_thread, NULL ); if(( 0 != result )&&( EBUSY != result )) { if(( ESRCH != result )&&( EINVAL != result )) { DPRINTFE( "Failed to wait for alarm thread exit, " "sending kill signal, error=%s.", strerror(result) ); pthread_kill( _alarm_thread, SIGKILL ); } break; } ms_expired = sm_time_get_elapsed_ms( &time_prev ); if( 5000 <= ms_expired ) { DPRINTFE( "Failed to stop alarm thread, sending " "kill signal." ); pthread_kill( _alarm_thread, SIGKILL ); break; } usleep( 250000 ); // 250 milliseconds. } } _server_fd = -1; _thread_created = false; memset( _alarms, 0, sizeof(_alarms) ); memset( _alarm_domains, 0, sizeof(_alarm_domains) ); return( SM_OKAY ); } // ****************************************************************************