/* * Copyright (c) 2016-2017 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * */ /**************************************************************************** * * @file * Wind River Titanium Cloud - Threading Base Implementation Module" * * This module implements the common thread utility module that can be used * by any maintenance process to * * - launch threads * - monitor thread execution * - detect thread completion * - kill a thread if needed * * - thread exeuction FSM * * For more complehensive description please refer * to the module header threadUtil.h * ****************************************************************************/ #include "daemon_common.h" /* for ... daemon_health_test */ #include "nodeBase.h" /* for ... mtce node common definitions */ #include "hostUtil.h" /* for ... mtce host common definitions */ #include "threadUtil.h" /* for ... this module header */ /* Stores the parent process's timer handler */ static void (*thread_timer_handler)(int, siginfo_t*, void*) = NULL ; static pthread_attr_t __attr; static sigset_t __enabled_mask ; static sigset_t __disabled_mask; static unsigned int __thread_init_sig ; /***************************************************************************** * * Name : threadUtil_init * * Description: Module init with caller specified timer handler. * ****************************************************************************/ static std::string threadStages_str[THREAD_STAGE__STAGES+1]; int threadUtil_init ( void (*handler)(int, siginfo_t*, void* )) { /* preserve parent process timer handler */ thread_timer_handler = handler ; /* setup the stage strings */ threadStages_str[THREAD_STAGE__IDLE] = "Idle" ; threadStages_str[THREAD_STAGE__IGNORE] = "Ignore" ; threadStages_str[THREAD_STAGE__LAUNCH] = "Launch" ; threadStages_str[THREAD_STAGE__MONITOR] = "Monitor" ; threadStages_str[THREAD_STAGE__DONE] = "Done" ; threadStages_str[THREAD_STAGE__KILL] = "Kill" ; threadStages_str[THREAD_STAGE__WAIT] = "Wait" ; /* setup to create a 'detached' threads */ pthread_attr_init(&__attr); pthread_attr_setdetachstate(&__attr, PTHREAD_CREATE_DETACHED); threadUtil_setstack_size (); __thread_init_sig = THREAD_INIT_SIG ; return (PASS); } #define MTCE_PTHREAD_MAX_STACK_SIZE (0x20000) /* 128K */ void threadUtil_setstack_size ( void ) { size_t stack_size_before = 0 ; size_t stack_size_after = 0 ; /* manage pthread stack size */ if ( pthread_attr_getstacksize (&__attr,&stack_size_before) == PASS ) { if ( stack_size_before > MTCE_PTHREAD_MAX_STACK_SIZE ) { if ( pthread_attr_setstacksize ( &__attr, MTCE_PTHREAD_MAX_STACK_SIZE ) == PASS ) { if ( pthread_attr_getstacksize (&__attr,&stack_size_after) == PASS ) { ilog ("thread Stack: %zu KB (was %zu)\n", stack_size_after/1024, stack_size_before/1024 ); } else { elog ("failed to set pthread stack size (%d:%m)\n", errno ); } } } else { ilog ("pthread stack size is %zu bytes\n", stack_size_before ); } } else { elog ("failed to get pthread stack size (%d:%m)\n", errno ); } } /***************************************************************************** * * Name : threadUtil_fini * * Description: Module cleanup * ****************************************************************************/ void threadUtil_fini ( void ) { ; // ilog ("called\n"); } /***************************************************************************** * * Name : _stage_change * * Description: Change thread FSM stage. * * See thread_stages_enum in threadUtil.h for a list of stage enums * ****************************************************************************/ void _stage_change ( thread_ctrl_type & ctrl, thread_stages_enum newStage ) { if ( newStage < THREAD_STAGE__STAGES ) { clog ("%s %s thread stage from %s -> %s\n", ctrl.hostname.c_str(), ctrl.name.c_str(), threadStages_str[ctrl.stage].c_str(), threadStages_str[newStage].c_str()); ctrl.stage = newStage ; } else { slog ("%s %s thread stage change to '%d' is invalid ; switching to KILL\n", ctrl.hostname.c_str(), ctrl.name.c_str(), newStage ); ctrl.stage = THREAD_STAGE__KILL ; } ctrl.stage_log_throttle = 0 ; } /***************************************************************************** * * Name : thread_stage * * Description: Returns a string representing the current thread stage. * *****************************************************************************/ string thread_stage ( thread_ctrl_type & ctrl ) { if ( ctrl.stage < THREAD_STAGE__STAGES ) return(threadStages_str[ctrl.stage]); return("out-of-range thread stage"); } /***************************************************************************** * * Name : thread_init * * Description: Default a thread 'ctrl' and 'info' structs. * * Assumptions: Called at init time once. * * Warning : Thread should be running when this is called. * Warning : Should not be called more than once or else might create * an orphan timer. * * Parameters: * * - reference to the ctrl and info structs for a specified thread * - pointer to thread specific extra data * - the thread function pointer itself * - thread execution timeout in seconds * - reference to the host and thread names. * * Returns : nothing * *****************************************************************************/ void thread_init ( thread_ctrl_type & thread_ctrl, thread_info_type & thread_info, void* extra_data_ptr, void* (*thread) (void*), int timeout, string & hostname, string threadname ) { /* default the ctrl struct */ thread_ctrl.stage = THREAD_STAGE__IDLE ; thread_ctrl.done = true ; thread_ctrl.idle = true ; thread_ctrl.id = 0 ; thread_ctrl.thread = thread ; thread_ctrl.hostname = hostname ; thread_ctrl.name = threadname ; thread_ctrl.timeout = timeout ; mtcTimer_init ( thread_ctrl.timer, hostname, threadname ); thread_ctrl.status = PASS ; thread_ctrl.runcount = 0 ; thread_ctrl.retries = 0 ; thread_ctrl.stage_log_throttle = 0 ; /* Init the thread's info struct - the only non-stack memory the * thread can look at or touch */ thread_info.hostname = hostname ; thread_info.name = threadname ; thread_info.id = 0 ; thread_info.command = 0 ; thread_info.runcount = 0 ; thread_info.progress = 0 ; thread_info.signal = 0 ; thread_info.data.clear() ; thread_info.extra_info_ptr = extra_data_ptr ; thread_info.pw_file_fd = 0 ; thread_info.password_file.clear() ; /* command execution status */ thread_info.status_string.clear(); thread_info.status = 0 ; snprintf ( thread_info.log_prefix, MAX_LOG_PREFIX_LEN, "%s %s thread", thread_ctrl.hostname.data(), thread_ctrl.name.data()); } /**************************************************************************** * * Name : thread_done * * Description: Return true if we are in the DONE stage. * ****************************************************************************/ bool thread_done ( thread_ctrl_type & ctrl ) { if ( ctrl.stage == THREAD_STAGE__DONE ) { return (true) ; } return (false); } /**************************************************************************** * * Name : thread_idle * * Description: Return true if we are in the IDLE stage. * ****************************************************************************/ bool thread_idle ( thread_ctrl_type & ctrl ) { if ( ctrl.stage == THREAD_STAGE__IDLE ) { return (true) ; } return (false); } /**************************************************************************** * * Name : thread_launch * * Description: Perform prechecks that verify the ctrl struct is ready for * thread launch and if so change stage to THREAD_STAGE__LAUNCH. * ****************************************************************************/ int thread_launch ( thread_ctrl_type & ctrl, thread_info_type & info ) { int rc = FAIL ; if ( ! thread_timer_handler ) { slog ("%s no thread timer handler bound in\n", ctrl.hostname.c_str()); rc = FAIL_NULL_POINTER ; } else if ( ctrl.thread == NULL ) { slog ("%s %s no thread bound in\n", ctrl.hostname.c_str(), ctrl.name.c_str()); rc = FAIL_NULL_POINTER ; } else if ( ctrl.stage != THREAD_STAGE__IDLE ) { wlog ("%s %s not in IDLE stage (in %s stage)\n", ctrl.hostname.c_str(), ctrl.name.c_str(), threadStages_str[ctrl.stage].c_str()); thread_kill ( ctrl, info ); rc = FAIL_BAD_STATE ; } else if ( ctrl.id ) { slog ("%s %s thread may be running ; id is not null and should be\n", ctrl.hostname.c_str(), ctrl.name.c_str()); thread_kill ( ctrl, info ); rc = FAIL_THREAD_RUNNING ; } else { _stage_change ( ctrl, THREAD_STAGE__LAUNCH ); rc = PASS ; } return (rc); } /**************************************************************************** * * Name : thread_kill * * Description: put the FSM in the kill state. * ****************************************************************************/ void thread_kill ( thread_ctrl_type & ctrl, thread_info_type & info ) { info.signal = SIGKILL ; /* only go to kill if not already handling kill */ if (( ctrl.stage != THREAD_STAGE__KILL ) && ( ctrl.stage != THREAD_STAGE__WAIT ) && ( ctrl.stage != THREAD_STAGE__IDLE )) { blog ("%s kill request\n", ctrl.hostname.c_str() ); _stage_change ( ctrl, THREAD_STAGE__KILL ); } } /***************************************************************************** * * Name : thread_handler * * Description: finite state machine to manage a pthread execution life cycle * * The parent must periodically run this thread_handler to service and make * forward progress in the FSM. * * Thread FSM life cycle and responsibilities: * * Parent calls thread_init once before any launch which sets up the ctrl * and info structs. Default state THREAD_STAGE__IDLE IDLE * * When there is a thread to be launched ... * * 1. Parent FSM calls thread_launch to launch the thread * - Thread FSM performs thread launch pre-checks * - check for timer handler binding * - rc = FAIL_NULL_POINTER * - check that there is a thread bound in * - rc = FAIL_NULL_POINTER * - verify we are in the correct stage for launch * - rc = FAIL_BAD_STATE * - verify the thread is not already running * - rc = FAIL_THREAD_RUNNING * * - if rc == PASS change state to THREAD_STAGE__LAUNCH * - if rc != PASS change state to THREAD_STAGE__IDLE * * - Parent FSM handles thread_launch return status * * if ( thread_launch == PASS ) * - start a parent timer ; a longer umbrella timer * else * - fail operation or retry * * 2. Thread FSM launches the thread in THREAD_STAGE__LAUNCH stage * - preserves parent signal mask * - clears signal mask so that thread does not inherit signal handling * - launch the thread * - restore signal mask * * if launch failed * - change ctrl.status = FAIL_THREAD_CREATE * - change ctrl.stage = THREAD_STAGE__DONE * * if launch passed * - start the thread timeout timer if timeout is !0 * - change ctrl.stage = THREAD_STAGE__MONITOR * * 3. Thread FSM monitors thread execution in THREAD_STAGE__MONITOR stage * - waits for done conditions or thread timeout * - ctrl.timer.ring or incremented info.runcount * Note: thread increments info.runcount on exit/done * * if ( thread timeout ) * - sets ctrl.status = FAIL_TIMEOUT * - sets ctrl.stage = THREAD_STAGE__KILL * * if ( info.runcount > ctrl.runcount ) * - stop thread timer * - change ctrl.stage = THREAD_STAGE__DONE * * 4. Parent FSM Monitors for thread done or parent timer timeout * - has started its own umbrella timeout timer that is a * few seconds longer than the actual thread timeout. * - thread_done returns true when ctrl.stage == THREAD_STAGE__DONE * * if ( parent timeout ) * - sets ctrl.status = FAIL_TIMEOUT * - sets trl.stage = THREAD_STAGE__KILL * * if ( thread_done ) * - interprets ctrl.status * - interprets info.status * - consumes info.data which contains thread execution result * - changes ctrl.done = true once data is consumed. * - Parent FSM is done with this thread * * 5. Thread FSM monitors for Parent FSM done in THREAD_STAGE__DONE * - Parent FSM changes ctrl.done to true once it has consumed the thread data * - Thread FSM polls ctrl.done * * if ( ctrl.done == true ) * - changes ctrl.stage = THREAD_STAGE__IDLE * - Thread FSM is done with this thread * * Note: The ctrl and info structs are intentionally kept separate for two * reasons ... * * 1. distinguish between parent process (ctrl) and thread (info) data. * 2. the parent might want them to occupying completely differnet memory * spaces in the future. * *****************************************************************************/ int thread_handler ( thread_ctrl_type & ctrl, thread_info_type & info ) { int rc = PASS ; switch ( ctrl.stage ) { case THREAD_STAGE__IGNORE: { break ; } case THREAD_STAGE__IDLE: { if ( ctrl.idle == false ) { ctrl.idle = true ; dlog ("%s IDLE\n", info.log_prefix); if (( ctrl.id ) || ( info.id ) || ( ctrl.done == false )) { slog ("%s bad thread state [%lu:%lu:%d]\n", info.log_prefix, ctrl.id, info.id, ctrl.done ); } } /******************** Garbage Collection *****************/ /* remove previous password file if it somehow did not get removed before */ if ( info.pw_file_fd ) { wlog ("%s closing pw fd (%d) ; garbage collected\n", info.hostname.c_str(), info.pw_file_fd ); close(info.pw_file_fd); info.pw_file_fd = 0 ; } if ( ! info.password_file.empty() ) { if ( daemon_is_file_present ( info.password_file.data() )) { wlog ("%s removing pw file (%s) ; garbage collected\n", info.hostname.c_str(), info.password_file.c_str()); unlink(info.password_file.data()); daemon_remove_file (info.password_file.data()); info.password_file.clear(); } } break ; } case THREAD_STAGE__WAIT: { if ( mtcTimer_expired ( ctrl.timer ) ) { ctrl.timer.ring = false ; ctrl.done = true ; ctrl.id = 0 ; info.id = 0 ; info.command = 0 ; _stage_change ( ctrl, THREAD_STAGE__IDLE ); } else if ( ctrl.done == true ) { /* force wait completed */ mtcTimer_reset ( ctrl.timer ); info.command = 0 ; _stage_change ( ctrl, THREAD_STAGE__IDLE ); } break ; } case THREAD_STAGE__DONE: { if ( ctrl.done == true ) { if (( info.signal_handling == 0 ) && ( info.status == PASS )) { wlog ("%s %s thread not servicing pthread_signal_handler\n", ctrl.hostname.c_str(), ctrl.name.c_str()); } dlog ("%s %s thread data was consumed by parent ; switching to IDLE\n", ctrl.hostname.c_str(), ctrl.name.c_str()); ctrl.id = 0 ; info.id = 0 ; dlog ("%s %s done\n", ctrl.hostname.c_str(), ctrl.name.c_str()); _stage_change ( ctrl, THREAD_STAGE__IDLE ); } else if ( info.signal == SIGKILL ) { wlog ("%s %s thread completed ; waiting on DONE but got SIGKILL ; forcing DONE\n", ctrl.hostname.c_str(), ctrl.name.c_str() ); ctrl.done = true ; } break ; } case THREAD_STAGE__LAUNCH: { /* * pre-check should never this this come in as non-null but just * to be sure a thread is actually created properly we set it to null */ if ( ctrl.id ) { slog ("%s %s thread id should be 0\n", ctrl.hostname.c_str(), ctrl.name.c_str()); ctrl.id = 0 ; } /* * Prepare thread complete criteria. * * When info.runcount > ctrl.runcount then the thread is done. */ ctrl.runcount = info.runcount ; /* thread updates this stuff */ info.status_string.clear() ; info.status = -1 ; info.progress = 0 ; info.signal = 0 ; info.id = 0 ; info.data.clear() ; info.signal_handling = 0 ; ctrl.idle = false ; /* not idle - for idle log throttle */ ctrl.done = false ; /* declare the thread as running */ daemon_signal_hdlr (); /* Block signals */ sigfillset(&__disabled_mask); // sigemptyset(&__enabled_mask); /* maybe not needed */ pthread_sigmask(SIG_SETMASK, &__disabled_mask, NULL ); pthread_sigmask(SIG_BLOCK, &__disabled_mask, &__enabled_mask); rc = pthread_create(&ctrl.id, &__attr, ctrl.thread, (void*)&info); if ( sigismember (&__enabled_mask, SIGINT ) == 0 ) { slog ("%s SIGINT signal was not enabled ; enabling\n", ctrl.hostname.c_str()); sigaddset(&__enabled_mask, SIGINT); } if ( sigismember (&__enabled_mask, SIGTERM ) == 0 ) { slog ("%s SIGTERM signal was not enabled ; enabling\n", ctrl.hostname.c_str()); sigaddset(&__enabled_mask, SIGTERM); } if ( sigismember (&__enabled_mask, SIGUSR1 ) == 0 ) { slog ("%s SIGUSR1 signal was not enabled ; enabling\n", ctrl.hostname.c_str()); sigaddset(&__enabled_mask, SIGUSR1); } /* restore signal mask */ pthread_sigmask(SIG_SETMASK, &__enabled_mask, NULL ); pthread_sigmask(SIG_UNBLOCK, &__enabled_mask, NULL ); /* The above disables signal handling for a short period while a * thread is started. In the meantime the only signal that is * crutial not to miss is USR1. * Work Around: run the USR1 signal handler immediately following * the launch just in case it was requested during the launch * while the signals were masked. */ daemon_health_test (); if (rc != PASS) { elog ("%s %s thread launch failed (%d:%d:%m]", ctrl.hostname.c_str(), ctrl.name.c_str(), rc, errno ); ctrl.status = info.status = FAIL_THREAD_CREATE ; _stage_change ( ctrl, THREAD_STAGE__DONE ); } else if ( ctrl.id == 0 ) { elog ("%s %s thread id is null\n", ctrl.hostname.c_str(), ctrl.name.c_str()); ctrl.status = info.status = FAIL_THREAD_CREATE ; _stage_change ( ctrl, THREAD_STAGE__DONE ); } else { dlog ("%s %s thread launched with command:%d\n", ctrl.hostname.c_str(), ctrl.name.c_str(), info.command ); ctrl.status = PASS ; if ( ctrl.timeout ) { mtcTimer_start ( ctrl.timer, thread_timer_handler, ctrl.timeout ); } /* start monitoring */ _stage_change ( ctrl, THREAD_STAGE__MONITOR ); } break ; } case THREAD_STAGE__MONITOR: { /* provide subtle indication that the thread ids don't match */ if (( ctrl.id != info.id ) && ( info.id != 0 )) { ilog_throttled (ctrl.stage_log_throttle, 50, "%s %s thread [%ld:%ld] monitoring (progress:%d)\n", ctrl.hostname.c_str(), ctrl.name.c_str(), ctrl.id, info.id, info.progress); } #ifdef WANT_THROTTLED_PROGRESS_LOG else { ilog_throttled (ctrl.stage_log_throttle, 50, "%s %s thread monitoring (progress:%d)\n", ctrl.hostname.c_str(), ctrl.name.c_str(), info.progress); } #endif if (( ctrl.timeout ) && ( mtcTimer_expired ( ctrl.timer ) )) { elog ("%s %s thread timeout\n", ctrl.hostname.c_str(), ctrl.name.c_str()); ctrl.status = FAIL_TIMEOUT ; _stage_change ( ctrl, THREAD_STAGE__KILL ); } else if ( info.runcount > ctrl.runcount ) { mtcTimer_reset ( ctrl.timer ); if ( info.runcount != (ctrl.runcount+1)) { wlog ("%s %s thread runcount jumped from %d to %d (rc:%d)\n", ctrl.hostname.c_str(), ctrl.name.c_str(), ctrl.runcount, info.runcount, info.status); } else { if ( info.status ) { wlog ("%s %s thread completed (rc:%d)\n", ctrl.hostname.c_str(), ctrl.name.c_str(), info.status); } } ctrl.id = 0 ; info.id = 0 ; _stage_change ( ctrl, THREAD_STAGE__DONE ); } break ; } case THREAD_STAGE__KILL: { info.signal = SIGKILL ; if ( info.id != 0 ) { wlog ("%s %s thread kill req (rc:%d)\n", ctrl.hostname.c_str(), ctrl.name.c_str(), info.status); } if ( ctrl.id != 0 ) { /* Tell the thread ; by way of cancellation points ; to exit * * WARNING: Cannot send a cancel to the thread because if the * thread is already gone then, although the Linux man page says * it will just return an error, in fact after testing with 0 and * invalid numbers, causes the calling process to segfault. * Too dangerous !! Need cooperative exit */ // pthread_cancel(ctrl.id); ctrl.id = 0 ; } mtcTimer_reset ( ctrl.timer ); mtcTimer_start ( ctrl.timer, thread_timer_handler, THREAD_POST_KILL_WAIT ); _stage_change ( ctrl, THREAD_STAGE__WAIT ); break ; } case THREAD_STAGE__STAGES: default: { slog ("%s %s has invalid stage ; changing to IDLE\n", ctrl.hostname.c_str(), ctrl.name.c_str() ); _stage_change ( ctrl , THREAD_STAGE__IDLE ); rc = FAIL ; break ; } } return (rc); } /* called by the thread */ void pthread_signal_handler ( thread_info_type * info_ptr ) { switch ( info_ptr->signal ) { case SIGKILL: ilog ("%s SIGKILL ; exiting ...\n", info_ptr->log_prefix ); /* avoid touching data after the sigkill is received */ // info_ptr->data = "thread SIGKILL" ; // info_ptr->status = FAIL_THREAD_EXIT ; // info_ptr->runcount++ ; pthread_exit(&info_ptr->status ); exit (FAIL_THREAD_EXIT); break ; default: info_ptr->signal_handling++ ; } /* check for a cancel request - handled internally */ /* Note: No pint using pthread_testcancel since we don't * use pthread_cancel because if the risk of crashing the * parent process * pthread_testcancel (); */ } pthread_t thread_launch_thread (void*(thread)(void*), void * arg) { pthread_t id ; int rc = FAIL ; if ( __thread_init_sig == THREAD_INIT_SIG ) { rc = pthread_create(&id, &__attr, thread, (void*)arg); } else { slog ("cannot launch thread ; threading not initialized yet\n"); } if ( rc ) return 0 ; return id ; } void * thread_test ( void * arg ) { UNUSED(arg); for ( ; ; ) sleep (1); return NULL ; }