diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h b/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h index 69fbc24e..c7a03a14 100755 --- a/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmon.h @@ -194,6 +194,9 @@ typedef enum PMOND_RECOVERY_METHOD__SYSTEMD = 1, } recovery_method_type ; +#define SYSTEMD_SERVICE_FILE_DIR1 ((const char *)"/etc/systemd/system") +#define SYSTEMD_SERVICE_FILE_DIR2 ((const char *)"/usr/lib/systemd/system") + /* * Used to mark a configured process * This aids in freeing duped memory over a process re-config @@ -267,7 +270,7 @@ int setup_signal_handler ( int rt_signal_num ); /* Monitored Process Config Bit Mask */ #define CONF_PROCESS (0x0001) -#define CONF_SCRIPT (0x0002) +#define CONF_RECOVERY (0x0002) #define CONF_STYLE (0x0004) #define CONF_PIDFILE (0x0008) #define CONF_RESTARTS (0x0010) @@ -286,8 +289,8 @@ int setup_signal_handler ( int rt_signal_num ); /* Monitored Passive Process Config Mask */ #define CONF_MASK (CONF_PROCESS | \ - CONF_SCRIPT | \ CONF_STYLE | \ + CONF_RECOVERY | \ CONF_PIDFILE | \ CONF_SEVERITY | \ CONF_RESTARTS | \ @@ -302,8 +305,8 @@ int setup_signal_handler ( int rt_signal_num ); /* Monitored Status Process Config Mask */ #define CONF_STATUS_MON_MASK (CONF_PROCESS | \ - CONF_SCRIPT | \ CONF_STYLE | \ + CONF_RECOVERY | \ CONF_SEVERITY | \ CONF_RESTARTS | \ CONF_INTERVAL | \ diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp index eb1b56a1..daf31540 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonHdlr.cpp @@ -524,6 +524,43 @@ void pmon_timer_handler ( int sig, siginfo_t *si, void *uc) } } +/**************************************************************************** + * + * Name : service_file_exists + * + * Description: Look in some well known places for the specified service file. + * + * Returns : Return true if the specified service file is found. + * + * Updates : If the service file is found then update the supplied + * character string buffer with the full path/name of that + * service file. + * + ****************************************************************************/ +bool service_file_exists ( string service_filename, + char * path_n_name_ptr, + int max_len ) +{ + /* load the name of the service file */ + snprintf ( path_n_name_ptr, max_len, "%s/%s", + SYSTEMD_SERVICE_FILE_DIR1, + service_filename.data()); + if (( path_n_name_ptr ) && (strnlen ( path_n_name_ptr, max_len ))) + { + if ( daemon_is_file_present ( path_n_name_ptr ) == true ) + return true ; + } + snprintf ( path_n_name_ptr, max_len, "%s/%s", + SYSTEMD_SERVICE_FILE_DIR2, + service_filename.data()); + if (( path_n_name_ptr ) && ( strnlen ( path_n_name_ptr, max_len ))) + { + if ( daemon_is_file_present ( path_n_name_ptr ) == true ) + return true ; + } + return false ; +} + /***************************************************************************** * * Name : process_config_load @@ -533,8 +570,8 @@ void pmon_timer_handler ( int sig, siginfo_t *si, void *uc) *****************************************************************************/ int process_config_load (process_config_type * pc_ptr, const char * config_file_ptr ) { - char service_name_buf [_MAX_LEN_] ; - memset (service_name_buf,0, sizeof(service_name_buf)); + char recovery_method_buf [_MAX_LEN_] ; + memset (recovery_method_buf,0, sizeof(recovery_method_buf)); if ( _pmon_ctrl_ptr->processes >= MAX_PROCESSES ) { @@ -566,40 +603,59 @@ int process_config_load (process_config_type * pc_ptr, const char * config_file_ pc_ptr->startuptime = PMON_MIN_START_DELAY ; } - /* default recovery method to process init script */ - snprintf ( &service_name_buf[0], _MAX_LEN_, "%s", pc_ptr->script ); + /* Many process conf files came from a sysvinit origin and might not + * have a service file label. Account for that in the following + * load of recovery_method_buf. + * Accept a script name if the service name is missing. */ + bool recovery_method_found = false ; - /* Print error logs if there is no recovery method present for this service/process */ - if ( _pmon_ctrl_ptr->recovery_method == PMOND_RECOVERY_METHOD__SYSTEMD ) + /* look for the service file */ + if ( pc_ptr->service ) { - /* If the config file does not specify a service name - * then the service name defaults to the process name */ - if ( ! pc_ptr->service ) - { - snprintf ( &service_name_buf[0], _MAX_LEN_, "%s/%s.service", SYSTEMD_SERVICE_FILE_DIR, pc_ptr->process ); - if ( daemon_is_file_present ( service_name_buf ) == false ) - { - if ( daemon_is_file_present ( pc_ptr->script ) == false ) - { - /* print a log if we have no recovery method */ - wlog ("%s has no recovery method\n", pc_ptr->process ); - wlog ("... neither %s nor %s exist\n", service_name_buf, pc_ptr->script ); - } - } - } - else - { - snprintf ( &service_name_buf[0], _MAX_LEN_, "%s/%s.service", SYSTEMD_SERVICE_FILE_DIR, pc_ptr->service ); - if ( daemon_is_file_present ( service_name_buf ) == false ) - { - /* print a log if we have no recovery method */ - wlog ("%s service has no recovery method\n", pc_ptr->service ); - wlog ("... %s does not exist\n", service_name_buf ); - } - } + string service = pc_ptr->service ; + if ( service.find(".service") == string::npos ) + service.append(".service"); + if ( service_file_exists(service, &recovery_method_buf[0], _MAX_LEN_) == true ) + recovery_method_found = true ; + } + else if ( pc_ptr->script ) + { + string script = basename((char*)pc_ptr->script); + if ( script.find(".service") == string::npos ) + script.append(".service"); + if ( service_file_exists(script, &recovery_method_buf[0], _MAX_LEN_) == true ) + recovery_method_found = true ; + else + { + /* resort to the script file only */ + /* load the name of the process init script */ + snprintf ( &recovery_method_buf[0], _MAX_LEN_, "%s", pc_ptr->script ); + if ( daemon_is_file_present ( recovery_method_buf ) == true ) + { + recovery_method_found = true ; + } + else + { + wlog ("%s has script but not found (%s)\n", + pc_ptr->process, recovery_method_buf ); + } + } + } + else + { + /* print a log if we have no recovery method */ + wlog ("%s has no recovery method ; process not monitored\n", pc_ptr->process ); + wlog ("... conf file has no 'service' or 'script' recovery entry\n"); + return (FAIL_NOT_FOUND); } - update_config_option ( &pc_ptr->recovery_method , service_name_buf ); + if ( recovery_method_found == false ) + { + wlog ("%s has no recovery method found ; process not monitored\n", pc_ptr->process ); + return (FAIL_NOT_FOUND); + } + + update_config_option ( &pc_ptr->recovery_method , recovery_method_buf ); if ( !strcmp ( pc_ptr->mode, "status" ) ) { @@ -710,7 +766,7 @@ int process_config_load (process_config_type * pc_ptr, const char * config_file_ * that subfunction init is complete */ ilog ("%7s Def : %-30s %-8s - %s (%s)\n", pc_ptr->mode, pc_ptr->process, - pc_ptr->ignore ? "ignored" : pc_ptr->severity, service_name_buf, + pc_ptr->ignore ? "ignored" : pc_ptr->severity, recovery_method_buf, pc_ptr->subfunction); /* defer subfunction processes to the FSM to get enabled */ pc_ptr->stage = PMON_STAGE__POLLING ; @@ -724,7 +780,7 @@ int process_config_load (process_config_type * pc_ptr, const char * config_file_ ilog ("%7s Mon : %-30s %-8s - %s\n", pc_ptr->mode, pc_ptr->process, - pc_ptr->ignore ? "ignored" : pc_ptr->severity, service_name_buf); + pc_ptr->ignore ? "ignored" : pc_ptr->severity, recovery_method_buf); pc_ptr->stage = PMON_STAGE__MANAGE ; } // mem_log_process ( pc_ptr ); @@ -1870,6 +1926,11 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr ) if ( pmonTimer_hostwd.ring == true ) { + /* inservice recovery from hostw connection failures */ + if ( sock_ptr->hostwd_sock == 0 ) + { + hostwd_port_init(); + } if ( ctrl_ptr->event_mode == true ) { pmon_send_hostwd ( ); diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonInit.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonInit.cpp index 6837100b..dd87c9e7 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/pmonInit.cpp +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonInit.cpp @@ -116,14 +116,15 @@ int pmon_process_config ( void * user, } if (MATCH("process", "service")) { + ptr->mask |= CONF_RECOVERY ; ptr->service = strdup(value); dlog1 ("Service : %s\n", ptr->service ); rc = PASS ; } else if (MATCH("process", "script")) { - ptr->mask |= CONF_SCRIPT ; - ptr->status_mask |= CONF_SCRIPT ; + ptr->mask |= CONF_RECOVERY ; + ptr->status_mask |= CONF_RECOVERY ; ptr->script = strdup(value); dlog1 ("Script : %s\n", ptr->script ); } @@ -423,7 +424,7 @@ int socket_init ( void ) * host watchdog process */ if ( rc == PASS ) { - rc = hostwd_port_init ( ); + hostwd_port_init ( ); } pmon_inbox_init ( ); @@ -500,22 +501,8 @@ int daemon_init ( string iface, string nodetype_str ) pmon_timer_init (); } - /* - * Setup the recovery method based on the O/S - * - * WRL - SYSVINIT - * CENTOS - SYSTEMD - * - **/ - if ( daemon_is_file_present ( CENTOS_RELEASE_FILE ) ) - { - pmon_ctrl.recovery_method = PMOND_RECOVERY_METHOD__SYSTEMD ; - pmon_ctrl.system_state = get_system_state(); - } - else - { - pmon_ctrl.recovery_method = PMOND_RECOVERY_METHOD__SYSVINIT ; - } + pmon_ctrl.recovery_method = PMOND_RECOVERY_METHOD__SYSTEMD ; + pmon_ctrl.system_state = get_system_state(); ilog ("Recovery Method: %s\n", pmon_ctrl.recovery_method ? "systemd via systemctl" : "sysvinit via script" ); return (rc); } diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp b/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp index 4b388b01..8e0daf6a 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp +++ b/mtce-common/cgts-mtce-common-1.0/pmon/pmonMsg.cpp @@ -78,53 +78,42 @@ int pulse_port_init ( void ) } /* Setup the Unix Host Watchdog Socket */ -#define _THROTTLE_LEVEL (5) int hostwd_port_init ( void ) { - int rc = FAIL ; - int fail_count = 0 ; memset(&pmon_sock.hostwd_addr, 0, sizeof(pmon_sock.hostwd_addr)); - while (rc == FAIL) + pmon_sock.hostwd_sock = socket(AF_UNIX, SOCK_DGRAM, 0); + + if (pmon_sock.hostwd_sock <= 0) { - int len; - int connected; - pmon_sock.hostwd_sock = socket(AF_UNIX, SOCK_DGRAM, 0); - - if (pmon_sock.hostwd_sock <= 0) { - if ( fail_count++ > _THROTTLE_LEVEL ) { - wlog("Could not connect to create hostwd socket - will retry\n"); - } - sleep(1); - continue; - } - - /* Set up the socket address */ - memset (&pmon_sock.hostwd_addr, 0, sizeof(pmon_sock.hostwd_addr)); - pmon_sock.hostwd_addr.sun_family = AF_UNIX; - - /* Unix abstract namespace takes a string that starts with a NULL - * as the identifier. Thus, we need a pointer to byte[1] of the - * sockaddr_un.sun_path (a char array) - */ - strncpy( &(pmon_sock.hostwd_addr.sun_path[1]), - HOSTW_UNIX_SOCKNAME, - UNIX_PATH_MAX-1); - len = sizeof(pmon_sock.hostwd_addr); - - connected = connect( pmon_sock.hostwd_sock, (sockaddr*) &pmon_sock.hostwd_addr, - len); - if (connected == -1) { - if ( fail_count++ > _THROTTLE_LEVEL ) { - wlog("Could not connect to hostwd port - will retry\n"); - } - close(pmon_sock.hostwd_sock); - pmon_sock.hostwd_sock = 0; - sleep(1); - } else { - rc = PASS; - } + wlog("Could not connect to create hostwd socket - will retry\n"); + pmon_sock.hostwd_sock = 0 ; + return (FAIL_SOCKET_CREATE); } - return (rc); + + /* Set up the socket address */ + memset (&pmon_sock.hostwd_addr, 0, sizeof(pmon_sock.hostwd_addr)); + pmon_sock.hostwd_addr.sun_family = AF_UNIX; + + /* Unix abstract namespace takes a string that starts with a NULL + * as the identifier. Thus, we need a pointer to byte[1] of the + * sockaddr_un.sun_path (a char array) + */ + strncpy( &(pmon_sock.hostwd_addr.sun_path[1]), + HOSTW_UNIX_SOCKNAME, + UNIX_PATH_MAX-1); + int len = sizeof(pmon_sock.hostwd_addr); + int connected = connect( pmon_sock.hostwd_sock, (sockaddr*) &pmon_sock.hostwd_addr, + len); + if (connected == -1) + { + wlog("Could not connect to hostwd port - will retry\n"); + if ( pmon_sock.hostwd_sock ) + close(pmon_sock.hostwd_sock); + pmon_sock.hostwd_sock = 0; + return (FAIL_CONNECT); + } + ilog ("connected to host watchdog\n"); + return (PASS); } /* Build a message for host watchdog, and send it */ @@ -174,7 +163,13 @@ int pmon_send_hostwd ( void ) { elog("Error sending message to host watchdog -- error %d (%s)\n", errno, strerror(errno)); + if ( pmon_sock.hostwd_sock ) + { + close(pmon_sock.hostwd_sock); + pmon_sock.hostwd_sock = 0; + } return (FAIL); + } } return (FAIL); diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/acpid.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/acpid.conf index 7d5740c4..e1c88cfc 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/acpid.conf +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/acpid.conf @@ -2,7 +2,6 @@ process = acpid service = acpid pidfile = /var/run/acpid.pid -script = /etc/init.d/acpid style = lsb ; ocf or lsb severity = minor ; minor, major, critical restarts = 3 ; restart retries before error assertion diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/nslcd.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/nslcd.conf index ad9dd838..63cc2f13 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/nslcd.conf +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/nslcd.conf @@ -2,7 +2,6 @@ process = nslcd service = nslcd pidfile = /var/run/nslcd/nslcd.pid -script = /etc/init.d/openldap style = lsb ; ocf or lsb severity = major ; minor, major, critical restarts = 3 ; restart retries before error assertion diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/ntpd.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/ntpd.conf index 568a89ee..524573b7 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/ntpd.conf +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/ntpd.conf @@ -2,7 +2,6 @@ process = ntpd service = ntpd pidfile = /var/run/ntp.pid -script = /etc/init.d/ntpd style = lsb ; ocf or lsb severity = minor ; minor, major, critical restarts = 0 ; restart retries before error assertion diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/sshd.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/sshd.conf index 80c507a7..dfa3a21c 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/sshd.conf +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/sshd.conf @@ -2,7 +2,6 @@ process = sshd service = sshd pidfile = /var/run/sshd.pid -script = /etc/init.d/sshd style = lsb ; ocf or lsb severity = minor ; minor, major, critical restarts = 10 ; restart retries before error assertion diff --git a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/syslog-ng.conf b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/syslog-ng.conf index 5bb48129..9d78fcad 100644 --- a/mtce-common/cgts-mtce-common-1.0/pmon/scripts/syslog-ng.conf +++ b/mtce-common/cgts-mtce-common-1.0/pmon/scripts/syslog-ng.conf @@ -2,7 +2,6 @@ process = syslog-ng service = syslog-ng pidfile = /var/run/syslog-ng/syslog-ng.pid -script = /etc/init.d/syslog style = lsb ; ocf or lsb severity = minor ; minor, major, critical restarts = 2 ; restart retries before error assertion