// // Copyright (c) 2014 Wind River Systems, Inc. // // SPDX-License-Identifier: Apache-2.0 // #include "sm_process.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sm_types.h" #include "sm_debug.h" #include "sm_time.h" #include "sm_utils.h" #include "sm_selobj.h" #include "sm_timer.h" #include "sm_heartbeat.h" #include "sm_log.h" #include "sm_alarm.h" #include "sm_thread_health.h" #include "sm_process_death.h" #include "sm_hw.h" #include "sm_msg.h" #include "sm_db.h" #include "sm_node_utils.h" #include "sm_node_stats.h" #include "sm_node_api.h" #include "sm_service_domain_api.h" #include "sm_service_domain_interface_api.h" #include "sm_service_group_api.h" #include "sm_service_api.h" #include "sm_service_action.h" #include "sm_service_heartbeat_api.h" #include "sm_service_heartbeat_thread.h" #include "sm_service_domain_scheduler.h" #include "sm_main_event_handler.h" #include "sm_troubleshoot.h" #include "sm_service_action_table.h" #include "sm_heartbeat_thread.h" #include "sm_failover.h" #include "sm_failover_thread.h" #include "sm_task_affining_thread.h" #define SM_PROCESS_DB_CHECKPOINT_INTERVAL_IN_MS 30000 #define SM_PROCESS_TICK_INTERVAL_IN_MS 200 #define SM_PROCESS_PAUSE_IN_MS 30000 static sig_atomic_t _stay_on = 1; static sig_atomic_t _reap_children = 0; static sig_atomic_t _do_reload_data = 0; static sig_atomic_t _do_dump_data = 0; static sig_atomic_t _about_to_patch = 0; static int _last_signum = 0; static bool _is_aio = false; static bool _is_aio_simplex = false; static bool _is_aio_duplex = false; // **************************************************************************** // Process - Reap Children // ======================= static void sm_process_reap_children( void ) { if( _reap_children ) { pid_t pid; int status; while( 0 < (pid = waitpid( -1, &status, WNOHANG | WUNTRACED )) ) { if( WIFEXITED( status ) ) { sm_process_death_save( pid, WEXITSTATUS( status ) ); } else { sm_process_death_save( pid, SM_PROCESS_FAILED ); } } _reap_children = 0; } } // **************************************************************************** // **************************************************************************** // Process - Signal Handler // ======================== static void sm_process_signal_handler( int signum ) { switch( signum ) { case SIGINT: case SIGTERM: case SIGQUIT: _stay_on = 0; break; case SIGCHLD: _reap_children = 1; break; case SIGHUP: _do_reload_data = 1; break; case SIGUSR1: _do_dump_data = 1; break; case SIGUSR2: _about_to_patch = 1; break; case SIGCONT: DPRINTFD( "Ignoring signal SIGCONT (%i).", signum ); break; case SIGPIPE: DPRINTFD( "Ignoring signal SIGPIPE (%i).", signum ); break; default: DPRINTFD( "Signal (%i) ignored.", signum ); break; } _last_signum = signum; } // **************************************************************************** // **************************************************************************** // Process - Setup Signal Handler // ============================== static void sm_process_setup_signal_handler( void ) { struct sigaction sa; memset( &sa, 0, sizeof(sa) ); sa.sa_handler = sm_process_signal_handler; sigaction( SIGINT, &sa, NULL ); sigaction( SIGTERM, &sa, NULL ); sigaction( SIGQUIT, &sa, NULL ); sigaction( SIGCHLD, &sa, NULL ); sigaction( SIGUSR1, &sa, NULL ); sigaction( SIGUSR2, &sa, NULL ); sigaction( SIGCONT, &sa, NULL ); sigaction( SIGPIPE, &sa, NULL ); sigaction( SIGHUP, &sa, NULL ); } // **************************************************************************** // **************************************************************************** // Process - Initialize // ==================== static SmErrorT sm_process_initialize( void ) { SmErrorT error; error = sm_selobj_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize selection object module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_timer_initialize( SM_PROCESS_TICK_INTERVAL_IN_MS ); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize timer module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_hw_initialize( NULL ); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize hardware module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_msg_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize messaging module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_node_stats_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize node stats module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_thread_health_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize thread health module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_alarm_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize alarm module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_log_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize log module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } if (_is_aio_simplex) { sm_heartbeat_thread_disable_heartbeat(); } else { error = sm_heartbeat_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize heartbeat module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } } error = sm_process_death_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize process death module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_db_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize database module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_node_api_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize node api module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_service_domain_api_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize service domain api module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_service_domain_interface_api_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize service domain interface api module, " "error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_service_group_api_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize service group api module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_service_action_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize service action module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_service_api_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize service api module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_service_heartbeat_api_initialize( true ); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize service heartbeat api module, " "error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_service_heartbeat_thread_start(); if( SM_OKAY != error ) { DPRINTFE( "Failed start service heartbeat thread, error=%s.", sm_error_str(error) ); return( error ); } error = sm_main_event_handler_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize main event handler module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_failover_thread_start(); if( SM_OKAY != error ) { DPRINTFE( "Failed to start the failover thread, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } error = sm_failover_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to initialize failover handler module, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } // US102803: Start a task affining thread for AIO duplex system if(_is_aio_duplex) { error = sm_task_affining_thread_start(); if( SM_OKAY != error ) { DPRINTFE( "Failed to start the task affining thread, error=%s.", sm_error_str( error ) ); return( SM_FAILED ); } } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Process - Finalize // ================== static SmErrorT sm_process_finalize( void ) { SmErrorT error; // US102803: Stop the task affining thread if it is AIO duplex if(_is_aio_duplex) { error = sm_task_affining_thread_stop(); if( SM_OKAY != error ) { DPRINTFE( "Failed to stop task affining thread, error=%s.", sm_error_str( error ) ); } } error = sm_failover_thread_stop(); if( SM_OKAY != error ) { DPRINTFE( "Failed to stop failover thread, error=%s.", sm_error_str( error ) ); } error = sm_failover_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize failover handler module, error=%s.", sm_error_str( error ) ); } error = sm_main_event_handler_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize main event handler module, error=%s.", sm_error_str( error ) ); } error = sm_service_heartbeat_thread_stop(); if( SM_OKAY != error ) { DPRINTFE( "Failed start service heartbeat thread, error=%s.", sm_error_str(error) ); } error = sm_service_heartbeat_api_finalize( true ); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize service heartbeat api module, " "error=%s.", sm_error_str( error ) ); } error = sm_service_api_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize service api module, error=%s.", sm_error_str( error ) ); } error = sm_service_action_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize service action module, error=%s.", sm_error_str( error ) ); } error = sm_service_group_api_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize service group api module, error=%s.", sm_error_str( error ) ); } error = sm_service_domain_interface_api_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize service domain interface api module, " "error=%s.", sm_error_str( error ) ); } error = sm_service_domain_api_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize service domain api module, error=%s.", sm_error_str( error ) ); } error = sm_node_api_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize interface api module, error=%s.", sm_error_str( error ) ); } error = sm_db_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize database module, error=%s.", sm_error_str( error ) ); } error = sm_process_death_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize process death module, error=%s.", sm_error_str( error ) ); } error = sm_heartbeat_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize heartbeat module, error=%s.", sm_error_str( error ) ); } error = sm_alarm_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize alarm module, error=%s.", sm_error_str( error ) ); } error = sm_thread_health_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize thread health module, error=%s.", sm_error_str( error ) ); } error = sm_node_stats_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize node stats module, error=%s.", sm_error_str( error ) ); } error = sm_msg_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize messaging module, error=%s.", sm_error_str( error ) ); } error = sm_hw_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize hardware module, error=%s.", sm_error_str( error ) ); } error = sm_timer_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize timer module, error=%s.", sm_error_str( error ) ); } error = sm_selobj_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize selection object module, error=%s.", sm_error_str( error ) ); } error = sm_log_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed to finalize log module, error=%s.", sm_error_str( error ) ); } return( SM_OKAY ); } // **************************************************************************** // **************************************************************************** // Process - Wait For Node Configuration // ===================================== static SmErrorT sm_process_wait_node_configuration( void ) { bool config_complete; SmErrorT error; while( _stay_on ) { error = sm_node_utils_config_complete( &config_complete ); if( SM_OKAY != error ) { DPRINTFE( "Failed to determine if node configuration " "completed, error=%s.", sm_error_str(error) ); sleep( 10 ); continue; } if( config_complete ) { DPRINTFI( "Node configuration completed." ); break; } else { DPRINTFI( "Waiting for node configuration to complete." ); sleep( 10 ); } } if( _stay_on ) { error = SM_OKAY; } else { DPRINTFI( "Shutdown signalled, last-signal=%i.", _last_signum ); error = SM_FAILED; } return error; } // **************************************************************************** // **************************************************************************** // Process - Main // ============== SmErrorT sm_process_main( int argc, char *argv[], char *envp[] ) { int result; long ms_expired; bool thread_health; bool do_patch = false; SmTimeT db_checkpoint_time_prev; SmTimeT patch_time_prev; SmErrorT error; int opt = 0; static struct option long_options[] = { {"interval-extension", 1, 0, 'i'}, {"timeout-extension", 1, 0, 't'}, {0, 0, 0, 0} }; int long_index = 0; sm_process_setup_signal_handler(); DPRINTFI( "Starting" ); if( sm_utils_process_running( SM_PROCESS_PID_FILENAME ) ) { DPRINTFI( "Already running an instance of sm." ); return( SM_OKAY ); } if( !sm_utils_set_pid_file( SM_PROCESS_PID_FILENAME ) ) { DPRINTFE( "Failed to write pid file for sm, error=%s.", strerror(errno) ); return( SM_FAILED ); } result = setpriority( PRIO_PROCESS, getpid(), -2 ); if( 0 > result ) { DPRINTFE( "Failed to set priority of process, error=%s.", strerror( errno ) ); return( SM_FAILED ); } if( 0 > mkdir( SM_RUN_DIRECTORY, 0700 ) ) { if( EEXIST == errno ) { DPRINTFI( "Run directory (%s) exists.", SM_RUN_DIRECTORY ); } else { DPRINTFE( "Run directory (%s) creation failed, error=%s.", SM_RUN_DIRECTORY, strerror(errno) ); return( SM_FAILED ); } } // Check for cmdline args while ((opt = getopt_long(argc, argv, "i:t:", long_options, &long_index)) != -1) { switch (opt) { case 'i': { sm_service_action_table_set_interval_extension( atoi(optarg) ); break; } case 't': { sm_service_action_table_set_timeout_extension( atoi(optarg) ); break; } default: { DPRINTFE( "Failed to process cmdline arg." ); return( SM_FAILED ); } } } error = sm_process_wait_node_configuration(); if( SM_OKAY != error ) { DPRINTFE( "Failed to wait for node configuration, error=%s.", sm_error_str(error) ); return( error ); } DPRINTFI( "Configuring Databases" ); error = sm_db_configure( SM_DATABASE_NAME, SM_DB_TYPE_MAIN ); if( SM_OKAY != error ) { DPRINTFE( "Failed configuring database, error=%s.", sm_error_str(error) ); return( error ); } error = sm_db_configure( SM_HEARTBEAT_DATABASE_NAME, SM_DB_TYPE_HEARTBEAT ); if( SM_OKAY != error ) { DPRINTFE( "Failed configuring heartbeat database, error=%s.", sm_error_str(error) ); return( error ); } error = sm_node_utils_is_aio(&_is_aio); if( SM_OKAY != error ) { DPRINTFE( "Failed to check for AIO system, error=%s.", sm_error_str(error) ); return( error ); } error = sm_node_utils_is_aio_simplex(&_is_aio_simplex); if( SM_OKAY != error ) { DPRINTFE( "Failed to check for AIO simplex system, error=%s.", sm_error_str(error) ); return( error ); } error = sm_node_utils_is_aio_duplex(&_is_aio_duplex); if( SM_OKAY != error ) { DPRINTFE( "Failed to check for AIO duplex system, error=%s.", sm_error_str(error) ); return( error ); } error = sm_process_initialize(); if( SM_OKAY != error ) { DPRINTFE( "Failed initialize process, error=%s.", sm_error_str(error) ); return( error ); } error = sm_utils_set_boot_complete(); if( SM_OKAY != error ) { DPRINTFE( "Failed to set boot complete, error=%s.", sm_error_str(error) ); return( error ); } DPRINTFI( "Started." ); sm_time_get( &db_checkpoint_time_prev ); while( _stay_on ) { error = sm_selobj_dispatch( SM_PROCESS_TICK_INTERVAL_IN_MS ); if( SM_OKAY != error ) { DPRINTFE( "Selection object dispatch failed, error=%s.", sm_error_str(error) ); break; } sm_process_reap_children(); ms_expired = sm_time_get_elapsed_ms( &db_checkpoint_time_prev ); if( SM_PROCESS_DB_CHECKPOINT_INTERVAL_IN_MS <= ms_expired ) { error = sm_db_checkpoint( SM_DATABASE_NAME ); if( SM_OKAY != error ) { DPRINTFE( "Database (%s) checkpoint failed, error=%s.", SM_DATABASE_NAME, sm_error_str(error) ); } error = sm_db_checkpoint( SM_HEARTBEAT_DATABASE_NAME ); if( SM_OKAY != error ) { DPRINTFE( "Database (%s) checkpoint failed, error=%s.", SM_HEARTBEAT_DATABASE_NAME, sm_error_str(error) ); } sm_time_get( &db_checkpoint_time_prev ); } error = sm_thread_health_check( &thread_health ); if( SM_OKAY != error ) { DPRINTFE( "Failed to check thread health, error=%s.", sm_error_str(error) ); break; } if( !thread_health ) { DPRINTFE( "Thread health check failed." ); sm_troubleshoot_dump_data( "thread health check failed" ); break; } if( _do_reload_data ) { DPRINTFI( "Reload data signalled." ); sm_main_event_handler_reload_data(); _do_reload_data = 0; } if( _do_dump_data ) { DPRINTFI( "Dump data signalled." ); sm_troubleshoot_dump_data( "user request" ); _do_dump_data = 0; } if( _about_to_patch ) { do_patch = true; sm_time_get( &patch_time_prev ); _about_to_patch = 0; DPRINTFI( "About to patch signalled." ); } if( do_patch ) { ms_expired = sm_time_get_elapsed_ms( &patch_time_prev ); if( SM_PROCESS_PAUSE_IN_MS < ms_expired ) { do_patch = false; DPRINTFI( "Too much time elapsed between patch signal and " "shutdown, ms_expired=%li, max=%i.", ms_expired, SM_PROCESS_PAUSE_IN_MS ); } } } if( do_patch ) { ms_expired = sm_time_get_elapsed_ms( &patch_time_prev ); if( SM_PROCESS_PAUSE_IN_MS < ms_expired ) { DPRINTFI( "Too much time elapsed between patch signal and " "shutdown, ms_expired=%li, max=%i.", ms_expired, SM_PROCESS_PAUSE_IN_MS ); } else { DPRINTFI( "Sending pause signal." ); int retry_i; for( retry_i=0; 5 > retry_i; ++retry_i ) { sm_service_domain_api_pause_all( SM_PROCESS_PAUSE_IN_MS ); } } } DPRINTFI( "Shutting down." ); error = sm_process_finalize(); if( SM_OKAY != error ) { DPRINTFE( "Failed finalize process, error=%s.", sm_error_str(error) ); } DPRINTFI( "Shutdown complete." ); return( SM_OKAY ); } // ****************************************************************************