From 537935bb0caa257df624a0b470a971c82d215152 Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Mon, 9 Jul 2018 08:36:22 -0400 Subject: [PATCH] Reorder process restart operations to prevent pmond futex deadlock All compute hosts seen to self reboot by hostw during patching due to stuck pmond process Current method to kill the running process leads to a race condition that results in a user space futex dead lock that hangs pmond and results in a watchdog self-reset due to quorum master 'pmond' failure. The dead lock was traced to the ordering of the kill process. Current steps to kill: - kill process - remove pidfile - unregister pid with kernel Deadlock is avoided by reversing the kill steps to what is more logical. - unregister pid with kernel - remove pidfile - kill process Also introduced audit that registers manually restarted processes with the kernel. Failure Rate Before Fix: 1 every 25 process restarts. Mostly fails before 5. Failure Rate After Fix: No failures after 15000 process restarts across 8 hosts including all host types between 2 different labs 2 different loads 18.07 and 18.08. Test Method: Pmon restart regression test restarts all processes on a host. Total soak restart of 25 monitored processes for 50 loops over 12 hosts = 15000 restarts. Also regressed process kill / recovery handling. (5000 process recoveries) Change-Id: Icac64df52df9d8074fcd886567dda6e53641572d Signed-off-by: David Sullivan Story: 2002993 Task: 23007 --- mtce-common/cgts-mtce-common-1.0/pmon/pmon.h | 1 + .../cgts-mtce-common-1.0/pmon/pmonFsm.cpp | 29 ++------ .../cgts-mtce-common-1.0/pmon/pmonHdlr.cpp | 73 +++++++++++-------- .../cgts-mtce-common-1.0/pmon/pmonMsg.cpp | 2 +- 4 files changed, 52 insertions(+), 53 deletions(-) diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h b/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h index c7a03a14..ec247532 100755 --- a/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h @@ -445,6 +445,7 @@ typedef struct /** holds the alarm severity state of CLEAR, MINOR, MAJOR, CRITICAL */ EFmAlarmSeverityT alarm_severity ; bool restart ; + bool registered ; /**< true if pid is registered with kernel */ bool failed ; bool ignore ; /**< ignore this process ; debug purposes */ bool stopped ; /**< process was stopped by command */ diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonFsm.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonFsm.cpp index 2ef7a2ff..69a32839 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/pmonFsm.cpp +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonFsm.cpp @@ -194,10 +194,7 @@ int pmon_active_handler ( process_config_type * ptr ) } case ACTIVE_STAGE__GAP_SETUP: { - if ( ptr->pt_ptr->tid ) - { - mtcTimer_stop ( ptr->pt_ptr ); - } + mtcTimer_reset ( ptr->pt_ptr ); mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->period ); activeStageChange ( ptr, ACTIVE_STAGE__GAP_WAIT ); break ; @@ -216,8 +213,7 @@ int pmon_active_handler ( process_config_type * ptr ) ptr->active_failed = true ; ptr->afailed_count++ ; ptr->b2b_miss_count = 0 ; - if ( ptr->pt_ptr->tid ) - mtcTimer_stop ( ptr->pt_ptr ); + mtcTimer_reset ( ptr->pt_ptr ); manage_process_failure ( ptr ); @@ -569,16 +565,17 @@ int pmon_passive_handler ( process_config_type * ptr ) respawn_process ( ptr ) ; /* Start the monitor debounce timer. */ - if ( ptr->pt_ptr->tid ) mtcTimer_stop ( ptr->pt_ptr ); - mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->startuptime ); + mtcTimer_reset ( ptr->pt_ptr ); + mtcTimer_start ( ptr->pt_ptr, pmon_timer_handler, ptr->startuptime ); passiveStageChange ( ptr, PMON_STAGE__MONITOR_WAIT ) ; /* Don't wait for the debounce timer to take this process out of 'commanded restart' mode. * Do it now, otherwise tight patch loop stress testing might fail */ if ( ptr->restart == true ) { - ilog ("%s exit manual restart request mode\n", ptr->process ) + ilog ("%s Restarted\n", ptr->process ) ptr->restart = false ; + ptr->registered = false ; } break ; } @@ -972,12 +969,7 @@ int pmon_status_handler ( process_config_type * ptr ) // a ring when the command execute successfully or returns a failure if ( (ptr->pt_ptr->ring == true) || (ptr->status != PASS ) ) { - // Stop timer if we had one - if ( ptr->pt_ptr->tid ) - { - dlog ("%s stop the status command timer %p\n", ptr->process, ptr->pt_ptr->tid ); - mtcTimer_stop( ptr->pt_ptr); - } + mtcTimer_reset( ptr->pt_ptr); ptr->pt_ptr->ring = false; if (( !ptr->sigchld_rxed ) || ( !ptr->child_pid ) || (ptr->status != PASS)) @@ -1056,12 +1048,7 @@ int pmon_status_handler ( process_config_type * ptr ) // a ring when the command execute successfully or returns a failure if ( (ptr->pt_ptr->ring == true) || (ptr->status != PASS) ) { - // stop timer if we had one - if ( ptr->pt_ptr->tid ) - { - dlog ("%s stop the start command timer %p\n", ptr->process, ptr->pt_ptr->tid ); - mtcTimer_stop( ptr->pt_ptr); - } + mtcTimer_reset( ptr->pt_ptr); ptr->pt_ptr->ring = false; // If the status had failed then ptr->status_failed will be set to true. Status failure diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp index daf31540..19305c10 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp @@ -379,10 +379,7 @@ void load_processes ( void ) */ for ( int i = 0 ; i < _pmon_ctrl_ptr->processes ; i++ ) { - if ( process_config[i].pt_ptr->tid ) - { - mtcTimer_stop ( process_config[i].pt_ptr ); - } + mtcTimer_reset ( process_config[i].pt_ptr ); close_process_socket ( &process_config[i] ); } @@ -925,13 +922,13 @@ bool kill_running_process ( int pid ) if ( result == 0 ) { char * proc_name_ptr = &unknown_process[0] ; - - result = kill ( pid, SIGKILL ); process_config_type * ptr = find_parent_process ( pid ) ; if ( ptr ) { + daemon_remove_file ( ptr->pidfile ); proc_name_ptr = (char*)ptr->process ; } + result = kill ( pid, SIGKILL ); if ( ptr && ( result == 0 ) ) { if ( daemon_is_file_present ( ptr->pidfile ) ) @@ -947,14 +944,10 @@ bool kill_running_process ( int pid ) } else { - wlog ("%s kill failed or process not running (%d)\n", proc_name_ptr, pid ); + ilog ("%s kill failed (%d)\n", proc_name_ptr, pid ); } } } - else - { - wlog ("%s cannot kill pid %d\n", unknown_process, pid); - } return (rc); } @@ -1095,13 +1088,21 @@ int unregister_process ( process_config_type * ptr ) info.events = PMON_EVENT_FLAGS ; if ( prctl (PR_DO_NOTIFY_TASK_STATE, &info )) { - wlog ("%s failed to unregister process %d\n", ptr->process, ptr->pid ); + if ( errno != ESRCH ) + { + wlog ("%s unregister pid:%d (%d:%s)\n", + ptr->process, + ptr->pid, + errno, + strerror(errno) ); + } } else { ilog ("%s unregistered (%d)\n", ptr->process, ptr->pid ); } } + ptr->registered = false ; return (PASS); } @@ -1120,7 +1121,7 @@ int register_process ( process_config_type * ptr ) info.events = PMON_EVENT_FLAGS; if ( prctl (PR_DO_NOTIFY_TASK_STATE, &info ) ) { - elog ("%s failed to register pid:%d (%d) (%s)\n", ptr->process, pid, errno, strerror(errno)); + elog ("%s failed to register pid:%d (%d:%s)\n", ptr->process, pid, errno, strerror(errno)); if ( errno == EINVAL ) { _pmon_ctrl_ptr->event_mode = false ; @@ -1135,6 +1136,7 @@ int register_process ( process_config_type * ptr ) { ilog ("%s Registered (%d)\n", ptr->process , pid ); ptr->failed = false ; + ptr->registered = true ; passiveStageChange ( ptr, PMON_STAGE__MANAGE ) ; if ( ptr->active_monitoring == false ) { @@ -1148,6 +1150,10 @@ int register_process ( process_config_type * ptr ) { wlog ("%s Registered (%d) in polling mode\n", ptr->process , pid); + + /* prevent infinite reg retry in polling mode */ + ptr->registered = true ; + if ( process_running ( ptr ) == false ) { ptr->failed = true ; @@ -1192,25 +1198,14 @@ int respawn_process ( process_config_type * ptr ) int rc = PASS ; bool restart = false ; + unregister_process ( ptr ); if ( process_running ( ptr ) == true ) { ilog ("%s restart of running process\n", ptr->process ); restart = true ; + kill_running_process ( ptr->pid ); } - /* Handle the case where the process is running but the known pid suggests its not. - * Do this by quering by processname and if it returns a valid PID then kill it before - * we start managing its death */ - pid = get_pid_by_name_pipe ( ptr->process ) ; - if ( pid ) - { - /* Note: We could just go with this new PID ; update the struct and such - * but that could be a bit risky ; instead we kill and restart. */ - kill_running_process ( pid ); - } - - unregister_process ( ptr ); - ptr->restarts_cnt++ ; /* default restart result and ponitoring controls */ @@ -1306,7 +1301,7 @@ int respawn_process ( process_config_type * ptr ) gettime ( ptr->time_start ); - ilog ("%s Spawn (%d) fork\n", ptr->process, ptr->child_pid ); + ilog ("%s Spawn (%d)\n", ptr->process, ptr->child_pid ); return (PASS); } @@ -1906,10 +1901,7 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr ) ilog ("Setting config reload flag\n"); /* Hijack the audit timer for the next period for config reload */ - if ( pmonTimer_degrade.tid ) - { - mtcTimer_stop (pmonTimer_degrade); - } + mtcTimer_reset (pmonTimer_degrade); if ( daemon_is_file_present ( PATCHING_IN_PROG_FILE ) == true ) { _pmon_ctrl_ptr->patching_in_progress = true ; @@ -2037,6 +2029,25 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr ) manage_process_failure ( &process_config[i]) ; } } + + /* Audit to ensure that running processes are + * registered with the kernel */ + if (( process_config[i].registered == false ) && + ( _pmon_ctrl_ptr->event_mode ) && + ( process_config[i].restart == false ) && + ( process_config[i].failed == false ) && + ( process_config[i].ignore == false )) + { + int pid = get_process_pid ( &process_config[i] ); + if ( pid ) + { + if ( kill (pid, 0 ) == 0 ) + { + process_config[i].pid = pid ; + register_process ( &process_config[i] ); + } + } + } } /* Debugging */ diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp index 8e0daf6a..2b231e6f 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp @@ -717,7 +717,7 @@ void pmon_service_inbox ( void ) else { process_config_type * ptr = get_process_config_ptr ( process ); - ilog ("%s process 'restart' request\n", process.c_str()); + dlog ("%s process 'restart' request\n", process.c_str()); if ( ptr != NULL ) { if ( strcmp ( ptr->mode, "status" ) == 0 )