Remove sm-watchdog service since NFS is now stable
sm-watchdog was introduced as a workaround because of NFS hung. Another clean fix is already provided, but the sm-watchdog was not removed. Test plan: [centos] build, install and unlock. [debian] build, install and unlock. Story: 2010087 Task: 46007 Signed-off-by: Davi Frossard <dbarrosf@windriver.com> Change-Id: I29fffff4e8982dc504f104f49c6586f7c74527fb
This commit is contained in:
parent
924c088f3a
commit
bd9e560d4b
|
@ -44,7 +44,6 @@
|
||||||
sm-tools: true
|
sm-tools: true
|
||||||
sm-api: true
|
sm-api: true
|
||||||
sm-eru: true
|
sm-eru: true
|
||||||
sm-watchdog: true
|
|
||||||
mysql: false
|
mysql: false
|
||||||
postgresql: true
|
postgresql: true
|
||||||
tls-proxy: false
|
tls-proxy: false
|
||||||
|
|
|
@ -156,14 +156,9 @@ function cleanup_sm_common {
|
||||||
$STX_INST_DIR/lib64/libsm_common.so.* \
|
$STX_INST_DIR/lib64/libsm_common.so.* \
|
||||||
$STX_BIN_DIR/sm-eru \
|
$STX_BIN_DIR/sm-eru \
|
||||||
$STX_BIN_DIR/sm-eru-dump \
|
$STX_BIN_DIR/sm-eru-dump \
|
||||||
$STX_BIN_DIR/sm-watchdog \
|
|
||||||
$STX_SM_VAR_DIR/watchdog/modules/libsm_watchdog_nfs.so.* \
|
|
||||||
$STX_SYSCONFDIR/systemd/system/sm-eru.service \
|
$STX_SYSCONFDIR/systemd/system/sm-eru.service \
|
||||||
$STX_SYSCONFDIR/systemd/system/sm-watchdog.service \
|
|
||||||
$STX_SYSCONFDIR/pmon.d/sm-eru.conf \
|
$STX_SYSCONFDIR/pmon.d/sm-eru.conf \
|
||||||
$STX_SYSCONFDIR/pmon.d/sm-watchdog.conf \
|
|
||||||
$STX_SYSCONFDIR/init.d/sm-eru \
|
$STX_SYSCONFDIR/init.d/sm-eru \
|
||||||
$STX_SYSCONFDIR/init.d/sm-watchdog \
|
|
||||||
/etc/ld.so.conf.d/stx-ha.conf
|
/etc/ld.so.conf.d/stx-ha.conf
|
||||||
|
|
||||||
popd
|
popd
|
||||||
|
@ -190,7 +185,6 @@ function configure_ha {
|
||||||
|
|
||||||
if is_service_enabled sm-common; then
|
if is_service_enabled sm-common; then
|
||||||
config_eru
|
config_eru
|
||||||
config_watchdog
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if is_service_enabled sm-daemon; then
|
if is_service_enabled sm-daemon; then
|
||||||
|
@ -215,12 +209,6 @@ function config_eru {
|
||||||
iniset -sudo ${STX_SYSCONFDIR}/systemd/system/devstack@sm-eru.service "Service" "PIDFile" "/var/run/sm-eru.pid"
|
iniset -sudo ${STX_SYSCONFDIR}/systemd/system/devstack@sm-eru.service "Service" "PIDFile" "/var/run/sm-eru.pid"
|
||||||
}
|
}
|
||||||
|
|
||||||
function config_watchdog {
|
|
||||||
sudo sed -i "s%SM_WATCHDOG=\"/usr/bin/\${SM_WATCHDOG_NAME}\"%SM_WATCHDOG=\"$STX_INST_DIR/bin/\${SM_WATCHDOG_NAME}\"%" $STX_SYSCONFDIR/init.d/sm-watchdog
|
|
||||||
iniset -sudo ${STX_SYSCONFDIR}/systemd/system/devstack@sm-watchdog.service "Service" "Type" "forking"
|
|
||||||
iniset -sudo ${STX_SYSCONFDIR}/systemd/system/devstack@sm-watchdog.service "Service" "PIDFile" "/var/run/sm-watchdog.pid"
|
|
||||||
}
|
|
||||||
|
|
||||||
function create_sm_accounts {
|
function create_sm_accounts {
|
||||||
create_service_user "smapi"
|
create_service_user "smapi"
|
||||||
get_or_create_service "smapi" "servicemanagement" "Service Management"
|
get_or_create_service "smapi" "servicemanagement" "Service Management"
|
||||||
|
@ -340,15 +328,11 @@ function install_sm_common {
|
||||||
|
|
||||||
install_sm_common_libs
|
install_sm_common_libs
|
||||||
|
|
||||||
sudo install -m 0755 -p -D -t $STX_SM_VAR_DIR/watchdog/modules src/libsm_watchdog_nfs.so.${STX_SM_COMMON_VERSION}
|
|
||||||
sudo cp -P src/libsm_watchdog_nfs.so src/libsm_watchdog_nfs.so.${STX_SM_COMMON_VERSION%%.*} $STX_SM_VAR_DIR/watchdog/modules
|
|
||||||
|
|
||||||
# scripts/
|
# scripts/
|
||||||
(cd scripts; sudo make DEST_DIR= UNIT_DIR=$STX_SYSCONFDIR/systemd/system install)
|
(cd scripts; sudo make DEST_DIR= UNIT_DIR=$STX_SYSCONFDIR/systemd/system install)
|
||||||
|
|
||||||
sudo install -m 750 -p -D src/sm_eru $STX_BIN_DIR/sm-eru
|
sudo install -m 750 -p -D src/sm_eru $STX_BIN_DIR/sm-eru
|
||||||
sudo install -m 750 -p -D src/sm_eru_dump $STX_BIN_DIR/sm-eru-dump
|
sudo install -m 750 -p -D src/sm_eru_dump $STX_BIN_DIR/sm-eru-dump
|
||||||
sudo install -m 750 -p -D src/sm_watchdog $STX_BIN_DIR/sm-watchdog
|
|
||||||
|
|
||||||
echo $STX_INST_DIR/lib64 | sudo tee /etc/ld.so.conf.d/stx-ha.conf
|
echo $STX_INST_DIR/lib64 | sudo tee /etc/ld.so.conf.d/stx-ha.conf
|
||||||
sudo ldconfig
|
sudo ldconfig
|
||||||
|
@ -411,10 +395,6 @@ function start_eru {
|
||||||
run_process sm-eru "${STX_SYSCONFDIR}/init.d/sm-eru start" root root
|
run_process sm-eru "${STX_SYSCONFDIR}/init.d/sm-eru start" root root
|
||||||
}
|
}
|
||||||
|
|
||||||
function start_watchdog {
|
|
||||||
run_process sm-watchdog "${STX_SYSCONFDIR}/init.d/sm-watchdog start" root root
|
|
||||||
}
|
|
||||||
|
|
||||||
function start_ha {
|
function start_ha {
|
||||||
if is_service_enabled sm-daemon; then
|
if is_service_enabled sm-daemon; then
|
||||||
start_sm
|
start_sm
|
||||||
|
@ -426,14 +406,12 @@ function start_ha {
|
||||||
|
|
||||||
if is_service_enabled sm-common; then
|
if is_service_enabled sm-common; then
|
||||||
start_eru
|
start_eru
|
||||||
start_watchdog
|
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
function stop_ha {
|
function stop_ha {
|
||||||
if is_service_enabled sm-common; then
|
if is_service_enabled sm-common; then
|
||||||
stop_process sm-eru
|
stop_process sm-eru
|
||||||
stop_process sm-watchdog
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if is_service_enabled sm-api; then
|
if is_service_enabled sm-api; then
|
||||||
|
|
|
@ -16,14 +16,10 @@ install:
|
||||||
install -m 750 -d $(DEST_DIR)/usr/bin
|
install -m 750 -d $(DEST_DIR)/usr/bin
|
||||||
install -m 750 -p -D $(BUILDSUBDIR)/src/sm_eru $(DEST_DIR)/$(BIN_DIR)/sm-eru
|
install -m 750 -p -D $(BUILDSUBDIR)/src/sm_eru $(DEST_DIR)/$(BIN_DIR)/sm-eru
|
||||||
install -m 750 -p -D $(BUILDSUBDIR)/src/sm_eru_dump $(DEST_DIR)/$(BIN_DIR)/sm-eru-dump
|
install -m 750 -p -D $(BUILDSUBDIR)/src/sm_eru_dump $(DEST_DIR)/$(BIN_DIR)/sm-eru-dump
|
||||||
install -m 750 -p -D $(BUILDSUBDIR)/src/sm_watchdog $(DEST_DIR)/$(BIN_DIR)/sm-watchdog
|
|
||||||
install -m 644 -p -D $(BUILDSUBDIR)/scripts/sm-eru.service $(DEST_DIR)/$(UNIT_DIR)/sm-eru.service
|
install -m 644 -p -D $(BUILDSUBDIR)/scripts/sm-eru.service $(DEST_DIR)/$(UNIT_DIR)/sm-eru.service
|
||||||
install -m 644 -p -D $(BUILDSUBDIR)/scripts/sm-watchdog.service $(DEST_DIR)/$(UNIT_DIR)/sm-watchdog.service
|
|
||||||
install -m 750 -d $(DEST_DIR)/$(ETC_DIR)/pmon.d
|
install -m 750 -d $(DEST_DIR)/$(ETC_DIR)/pmon.d
|
||||||
install -m 640 -p -D $(BUILDSUBDIR)/scripts/sm-eru.conf $(DEST_DIR)/$(ETC_DIR)/pmon.d/sm-eru.conf
|
install -m 640 -p -D $(BUILDSUBDIR)/scripts/sm-eru.conf $(DEST_DIR)/$(ETC_DIR)/pmon.d/sm-eru.conf
|
||||||
install -m 640 -p -D $(BUILDSUBDIR)/scripts/sm-watchdog.conf $(DEST_DIR)/$(ETC_DIR)/pmon.d/sm-watchdog.conf
|
|
||||||
install -m 750 -p -D $(BUILDSUBDIR)/scripts/sm-eru $(DEST_DIR)/$(ETC_DIR)/init.d/sm-eru
|
install -m 750 -p -D $(BUILDSUBDIR)/scripts/sm-eru $(DEST_DIR)/$(ETC_DIR)/init.d/sm-eru
|
||||||
install -m 750 -p -D $(BUILDSUBDIR)/scripts/sm-watchdog $(DEST_DIR)/$(ETC_DIR)/init.d/sm-watchdog
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@( cd src; make clean )
|
@( cd src; make clean )
|
||||||
|
|
|
@ -91,9 +91,6 @@ MAJOR=`echo $VER | awk -F . '{print $1}'`
|
||||||
MINOR=`echo $VER | awk -F . '{print $2}'`
|
MINOR=`echo $VER | awk -F . '{print $2}'`
|
||||||
make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_libdir} INC_DIR=%{_includedir} BUILDSUBDIR=%{_buildsubdir} VER=$VER VER_MJR=$MAJOR install
|
make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_libdir} INC_DIR=%{_includedir} BUILDSUBDIR=%{_buildsubdir} VER=$VER VER_MJR=$MAJOR install
|
||||||
|
|
||||||
%post
|
|
||||||
/usr/bin/systemctl enable sm-watchdog.service >/dev/null 2>&1
|
|
||||||
|
|
||||||
%post -n sm-eru
|
%post -n sm-eru
|
||||||
/usr/bin/systemctl enable sm-eru.service >/dev/null 2>&1
|
/usr/bin/systemctl enable sm-eru.service >/dev/null 2>&1
|
||||||
|
|
||||||
|
@ -101,10 +98,6 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||||
%files
|
%files
|
||||||
%license LICENSE
|
%license LICENSE
|
||||||
%defattr(-,root,root,-)
|
%defattr(-,root,root,-)
|
||||||
/etc/init.d/sm-watchdog
|
|
||||||
/etc/pmon.d/sm-watchdog.conf
|
|
||||||
/usr/bin/sm-watchdog
|
|
||||||
/usr/lib/systemd/system/sm-watchdog.service
|
|
||||||
|
|
||||||
#%{_unitdir}/*
|
#%{_unitdir}/*
|
||||||
#%{_bindir}/*
|
#%{_bindir}/*
|
||||||
|
@ -113,10 +106,6 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||||
|
|
||||||
%files libs
|
%files libs
|
||||||
%{_libdir}/*.so.*
|
%{_libdir}/*.so.*
|
||||||
%dir "/var/lib/sm"
|
|
||||||
%dir "/var/lib/sm/watchdog"
|
|
||||||
%dir "/var/lib/sm/watchdog/modules"
|
|
||||||
/var/lib/sm/watchdog/modules/*.so.*
|
|
||||||
|
|
||||||
%files -n sm-eru
|
%files -n sm-eru
|
||||||
%defattr(-,root,root,-)
|
%defattr(-,root,root,-)
|
||||||
|
@ -135,18 +124,14 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||||
#"/usr/lib64/.debug/libsm_common.so.1.0.0"
|
#"/usr/lib64/.debug/libsm_common.so.1.0.0"
|
||||||
#%dir "/usr/bin/.debug"
|
#%dir "/usr/bin/.debug"
|
||||||
#"/usr/bin/.debug/sm-eru-dump"
|
#"/usr/bin/.debug/sm-eru-dump"
|
||||||
#"/usr/bin/.debug/sm-watchdog"
|
|
||||||
#"/usr/bin/.debug/sm-eru"
|
#"/usr/bin/.debug/sm-eru"
|
||||||
#%dir "/usr/src/debug/sm-common"
|
#%dir "/usr/src/debug/sm-common"
|
||||||
#%dir "/usr/src/debug/sm-common/1.0.0-r7"
|
#%dir "/usr/src/debug/sm-common/1.0.0-r7"
|
||||||
#%dir "/usr/src/debug/sm-common/1.0.0-r7/src"
|
#%dir "/usr/src/debug/sm-common/1.0.0-r7/src"
|
||||||
#/usr/src/debug/sm-common/1.0.0-r7/src/*.h
|
#/usr/src/debug/sm-common/1.0.0-r7/src/*.h
|
||||||
#/usr/src/debug/sm-common/1.0.0-r7/src/*.c
|
#/usr/src/debug/sm-common/1.0.0-r7/src/*.c
|
||||||
#%dir "/var/lib/sm/watchdog/modules/.debug"
|
|
||||||
#"/var/lib/sm/watchdog/modules/.debug/libsm_watchdog_nfs.so.1.0.0"
|
|
||||||
|
|
||||||
%files dev
|
%files dev
|
||||||
%defattr(-,root,root,-)
|
%defattr(-,root,root,-)
|
||||||
%{_includedir}/*
|
%{_includedir}/*
|
||||||
%{_libdir}/*.so
|
%{_libdir}/*.so
|
||||||
/var/lib/sm/watchdog/modules/libsm_watchdog_nfs.so
|
|
||||||
|
|
|
@ -23,11 +23,8 @@ override_dh_auto_install:
|
||||||
# Prevents dh_fixperms from changing the permissions defined in the makefiles
|
# Prevents dh_fixperms from changing the permissions defined in the makefiles
|
||||||
override_dh_fixperms:
|
override_dh_fixperms:
|
||||||
dh_fixperms \
|
dh_fixperms \
|
||||||
-Xsm-watchdog* \
|
|
||||||
-Xlibsm_common.so.* \
|
-Xlibsm_common.so.* \
|
||||||
-Xlibsm_watchdog_nfs.so.* \
|
|
||||||
-Xsm-eru*
|
-Xsm-eru*
|
||||||
|
|
||||||
override_dh_installsystemd:
|
override_dh_installsystemd:
|
||||||
dh_installsystemd -psm-common sm-watchdog.service
|
|
||||||
dh_installsystemd -psm-eru sm-eru.service
|
dh_installsystemd -psm-eru sm-eru.service
|
||||||
|
|
|
@ -1,3 +1,2 @@
|
||||||
usr/include/*
|
usr/include/*
|
||||||
usr/lib/*.so
|
usr/lib/*.so
|
||||||
var/lib/sm/watchdog/modules/libsm_watchdog_nfs.so
|
|
||||||
|
|
|
@ -1,3 +1 @@
|
||||||
/var/lib/sm
|
/var/lib/sm
|
||||||
/var/lib/sm/watchdog
|
|
||||||
/var/lib/sm/watchdog/modules
|
|
||||||
|
|
|
@ -1,2 +1 @@
|
||||||
usr/lib/*.so.*
|
usr/lib/*.so.*
|
||||||
var/lib/sm/watchdog/modules/*.so.*
|
|
||||||
|
|
|
@ -1,5 +1 @@
|
||||||
etc/init.d/sm-watchdog
|
|
||||||
etc/pmon.d/sm-watchdog.conf
|
|
||||||
usr/bin/sm-watchdog
|
|
||||||
lib/systemd/system/sm-watchdog.service
|
|
||||||
debian/systemd/00-sm-common.preset etc/systemd/system-preset
|
debian/systemd/00-sm-common.preset etc/systemd/system-preset
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
enable sm-watchdog.service
|
|
|
@ -72,19 +72,6 @@ MAJOR=`echo $VER | awk -F . '{print $1}'`
|
||||||
MINOR=`echo $VER | awk -F . '{print $2}'`
|
MINOR=`echo $VER | awk -F . '{print $2}'`
|
||||||
make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_libdir} INC_DIR=%{_includedir} BUILDSUBDIR=%{_buildsubdir} VER=$VER VER_MJR=$MAJOR install
|
make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_libdir} INC_DIR=%{_includedir} BUILDSUBDIR=%{_buildsubdir} VER=$VER VER_MJR=$MAJOR install
|
||||||
|
|
||||||
%pre
|
|
||||||
%service_add_pre sm-watchdog.service sm-watchdog.target
|
|
||||||
|
|
||||||
%preun
|
|
||||||
%service_del_preun sm-watchdog.service sm-watchdog.target
|
|
||||||
|
|
||||||
%post
|
|
||||||
%service_add_post sm-watchdog.service sm-watchdog.target
|
|
||||||
/usr/bin/systemctl enable sm-watchdog.service
|
|
||||||
|
|
||||||
%postun
|
|
||||||
%service_del_postun sm-watchdog.service sm-watchdog.target
|
|
||||||
|
|
||||||
%pre -n sm-eru
|
%pre -n sm-eru
|
||||||
%service_add_pre sm-eru.service sm-eru.target
|
%service_add_pre sm-eru.service sm-eru.target
|
||||||
|
|
||||||
|
@ -108,17 +95,10 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||||
%files
|
%files
|
||||||
%license LICENSE
|
%license LICENSE
|
||||||
%defattr(-,root,root,-)
|
%defattr(-,root,root,-)
|
||||||
%{_sysconfdir}/init.d/sm-watchdog
|
|
||||||
%config %{_sysconfdir}/pmon.d/sm-watchdog.conf
|
|
||||||
%{_bindir}/sm-watchdog
|
|
||||||
%{_unitdir}/sm-watchdog.service
|
|
||||||
|
|
||||||
%files libs
|
%files libs
|
||||||
%{_libdir}/*.so.*
|
%{_libdir}/*.so.*
|
||||||
%dir %{_sharedstatedir}/sm
|
%dir %{_sharedstatedir}/sm
|
||||||
%dir %{_sharedstatedir}/sm/watchdog
|
|
||||||
%dir %{_sharedstatedir}/sm/watchdog/modules
|
|
||||||
%{_sharedstatedir}/sm/watchdog/modules/*.so.*
|
|
||||||
|
|
||||||
%files -n sm-eru
|
%files -n sm-eru
|
||||||
%defattr(-,root,root,-)
|
%defattr(-,root,root,-)
|
||||||
|
@ -134,6 +114,5 @@ make DEST_DIR=%{buildroot} BIN_DIR=%{_bindir} UNIT_DIR=%{_unitdir} LIB_DIR=%{_li
|
||||||
%defattr(-,root,root,-)
|
%defattr(-,root,root,-)
|
||||||
%{_includedir}/*
|
%{_includedir}/*
|
||||||
%{_libdir}/*.so
|
%{_libdir}/*.so
|
||||||
%{_sharedstatedir}/sm/watchdog/modules/libsm_watchdog_nfs.so
|
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
|
|
@ -6,7 +6,7 @@ install:
|
||||||
install -d $(DEST_DIR)$(UNIT_DIR)
|
install -d $(DEST_DIR)$(UNIT_DIR)
|
||||||
install -m 644 *.service $(DEST_DIR)$(UNIT_DIR)
|
install -m 644 *.service $(DEST_DIR)$(UNIT_DIR)
|
||||||
install -d $(DEST_DIR)/etc/init.d
|
install -d $(DEST_DIR)/etc/init.d
|
||||||
install sm-watchdog sm-eru $(DEST_DIR)/etc/init.d
|
install sm-eru $(DEST_DIR)/etc/init.d
|
||||||
install -d $(DEST_DIR)/etc/pmon.d
|
install -d $(DEST_DIR)/etc/pmon.d
|
||||||
install *.conf $(DEST_DIR)/etc/pmon.d
|
install *.conf $(DEST_DIR)/etc/pmon.d
|
||||||
|
|
||||||
|
|
|
@ -1,131 +0,0 @@
|
||||||
#! /bin/sh
|
|
||||||
#
|
|
||||||
# Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# chkconfig: - 87 87
|
|
||||||
# processname: sm-watchdog
|
|
||||||
# description: Service Management Watchdog
|
|
||||||
#
|
|
||||||
### BEGIN INIT INFO
|
|
||||||
# Description: sm-watchdog
|
|
||||||
#
|
|
||||||
# Short-Description: Service Management Watchdog
|
|
||||||
# Provides: sm-watchdog
|
|
||||||
# Required-Start: $network
|
|
||||||
# Should-Start: $syslog
|
|
||||||
# Required-Stop: $network
|
|
||||||
# Default-Start: 3 5
|
|
||||||
# Default-Stop: 0 6
|
|
||||||
### END INIT INFO
|
|
||||||
|
|
||||||
. /etc/init.d/functions
|
|
||||||
|
|
||||||
RETVAL=0
|
|
||||||
|
|
||||||
SM_WATCHDOG_NAME="sm-watchdog"
|
|
||||||
SM_WATCHDOG="/usr/bin/${SM_WATCHDOG_NAME}"
|
|
||||||
SM_WATCHDOG_PIDFILE="/var/run/${SM_WATCHDOG_NAME}.pid"
|
|
||||||
|
|
||||||
if [ ! -e "${SM_WATCHDOG}" ]
|
|
||||||
then
|
|
||||||
logger "${SM_WATCHDOG} is missing"
|
|
||||||
exit 5
|
|
||||||
fi
|
|
||||||
|
|
||||||
PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
|
|
||||||
|
|
||||||
case "$1" in
|
|
||||||
start)
|
|
||||||
echo -n "Starting ${SM_WATCHDOG_NAME}: "
|
|
||||||
if [ -n "`pidof ${SM_WATCHDOG}`" ]
|
|
||||||
then
|
|
||||||
# PMOND might have restarted SM-WATCHDOG already.
|
|
||||||
RETVAL=0
|
|
||||||
else
|
|
||||||
start-stop-daemon --start -b -x ${SM_WATCHDOG}
|
|
||||||
RETVAL=$?
|
|
||||||
fi
|
|
||||||
if [ ${RETVAL} -eq 0 ]
|
|
||||||
then
|
|
||||||
echo "OK"
|
|
||||||
else
|
|
||||||
echo "FAIL"
|
|
||||||
RETVAL=1
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
|
|
||||||
stop)
|
|
||||||
echo -n "Stopping ${SM_WATCHDOG_NAME}: "
|
|
||||||
if [ -n "`pidof ${SM_WATCHDOG}`" ]
|
|
||||||
then
|
|
||||||
killproc ${SM_WATCHDOG}
|
|
||||||
fi
|
|
||||||
|
|
||||||
SHUTDOWN_TIMEOUT=5
|
|
||||||
count=0
|
|
||||||
while [ ${count} -lt ${SHUTDOWN_TIMEOUT} ]
|
|
||||||
do
|
|
||||||
pidof ${SM_WATCHDOG} &> /dev/null
|
|
||||||
rc=$?
|
|
||||||
if [ ${rc} -eq 1 ]
|
|
||||||
then
|
|
||||||
echo "OK"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
count=`expr ${count} + 1`
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
pidof ${SM_WATCHDOG} &> /dev/null
|
|
||||||
rc=$?
|
|
||||||
if [ ${rc} -eq 0 ]
|
|
||||||
then
|
|
||||||
echo "FAIL"
|
|
||||||
RETVAL=7
|
|
||||||
fi
|
|
||||||
|
|
||||||
rm -f ${SM_WATCHDOG_PIDFILE}
|
|
||||||
;;
|
|
||||||
|
|
||||||
status)
|
|
||||||
pid=`cat ${SM_WATCHDOG_PIDFILE} 2>/dev/null`
|
|
||||||
if [ -n "${pid}" ]
|
|
||||||
then
|
|
||||||
if ps -p ${pid} &>/dev/null
|
|
||||||
then
|
|
||||||
echo "${SM_WATCHDOG_NAME} is running"
|
|
||||||
RETVAL=0
|
|
||||||
else
|
|
||||||
echo "${SM_WATCHDOG_NAME} is not running but has pid file"
|
|
||||||
RETVAL=1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "${SM_WATCHDOG_NAME} is not running"
|
|
||||||
RETVAL=3
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
|
|
||||||
restart)
|
|
||||||
$0 stop
|
|
||||||
sleep 1
|
|
||||||
$0 start
|
|
||||||
;;
|
|
||||||
|
|
||||||
reload)
|
|
||||||
echo "${SM_WATCHDOG_NAME} reload"
|
|
||||||
$0 restart
|
|
||||||
;;
|
|
||||||
|
|
||||||
force-reload)
|
|
||||||
echo "${SM_WATCHDOG_NAME} force-reload"
|
|
||||||
$0 restart
|
|
||||||
;;
|
|
||||||
|
|
||||||
*)
|
|
||||||
echo "usage: $0 { start | stop | status | restart | reload | force-reload }"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
exit ${RETVAL}
|
|
|
@ -1,15 +0,0 @@
|
||||||
;
|
|
||||||
; Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
;
|
|
||||||
; SPDX-License-Identifier: Apache-2.0
|
|
||||||
;
|
|
||||||
[process]
|
|
||||||
process = sm-watchdog
|
|
||||||
pidfile = /var/run/sm-watchdog.pid
|
|
||||||
script = /etc/init.d/sm-watchdog
|
|
||||||
style = lsb ; lsb
|
|
||||||
severity = major ; minor, major, critical
|
|
||||||
restarts = 3 ; restarts before error assertion
|
|
||||||
startuptime = 5 ; seconds to wait after process start
|
|
||||||
interval = 5 ; number of seconds to wait between restarts
|
|
||||||
debounce = 20 ; number of seconds to wait before degrade clear
|
|
|
@ -1,15 +0,0 @@
|
||||||
[Unit]
|
|
||||||
Description=Service Management Watchdog
|
|
||||||
After=network-online.target syslog-ng.service config.service
|
|
||||||
Before=sm.service pmon.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=forking
|
|
||||||
RemainAfterExit=yes
|
|
||||||
User=root
|
|
||||||
ExecStart=/etc/init.d/sm-watchdog start
|
|
||||||
ExecStop=/etc/init.d/sm-watchdog stop
|
|
||||||
PIDFile=/var/run/sm-watchdog.pid
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
|
@ -34,7 +34,7 @@ EXTRACCFLAGS+= -Wformat -Wformat-security
|
||||||
LDLIBS= -lsqlite3 -lglib-2.0 -lgmodule-2.0 -luuid -lrt -lpthread
|
LDLIBS= -lsqlite3 -lglib-2.0 -lgmodule-2.0 -luuid -lrt -lpthread
|
||||||
LDFLAGS = -shared -rdynamic
|
LDFLAGS = -shared -rdynamic
|
||||||
|
|
||||||
build: libsm_common.so libsm_watchdog_nfs.so sm_watchdog sm_eru sm_eru_dump
|
build: libsm_common.so sm_eru sm_eru_dump
|
||||||
|
|
||||||
.c.o:
|
.c.o:
|
||||||
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@
|
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) -c $< -o $@
|
||||||
|
@ -48,18 +48,6 @@ libsm_common.so.$(VER_MJR): libsm_common.so.$(VER)
|
||||||
libsm_common.so.$(VER): ${OBJS}
|
libsm_common.so.$(VER): ${OBJS}
|
||||||
$(CXX) ${LDFLAGS} -Wl,--start-group $(LDLIBS) -Wl,-soname,libsm_common.so.$(VER_MJR) -o $@ $^
|
$(CXX) ${LDFLAGS} -Wl,--start-group $(LDLIBS) -Wl,-soname,libsm_common.so.$(VER_MJR) -o $@ $^
|
||||||
|
|
||||||
libsm_watchdog_nfs.so: libsm_watchdog_nfs.so.$(VER_MJR)
|
|
||||||
ln -sf $^ $@
|
|
||||||
|
|
||||||
libsm_watchdog_nfs.so.$(VER_MJR): libsm_watchdog_nfs.so.$(VER)
|
|
||||||
ln -sf $^ $@
|
|
||||||
|
|
||||||
libsm_watchdog_nfs.so.$(VER): libsm_common.so.$(VER) libsm_common.so
|
|
||||||
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) sm_watchdog_nfs.c ${LDFLAGS} $(LDLIBS) -L./ -lsm_common -Wl,-soname,libsm_watchdog_nfs.so.$(VER_MJR) -o $@
|
|
||||||
|
|
||||||
sm_watchdog: libsm_common.so
|
|
||||||
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) $(OBJS) sm_watchdog_module.c sm_watchdog_process.c sm_watchdog_main.c $(LDLIBS) -L./ -lsm_common -o sm_watchdog
|
|
||||||
|
|
||||||
sm_eru: libsm_common.so
|
sm_eru: libsm_common.so
|
||||||
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) $(OBJS) sm_eru_process.c sm_eru_main.c $(LDLIBS) -L./ -lsm_common -o sm_eru
|
$(CXX) $(INCLUDES) $(CCFLAGS) $(EXTRACCFLAGS) $(OBJS) sm_eru_process.c sm_eru_main.c $(LDLIBS) -L./ -lsm_common -o sm_eru
|
||||||
|
|
||||||
|
@ -71,15 +59,12 @@ install:
|
||||||
# renamed with '-' like they are in the bitbake file.
|
# renamed with '-' like they are in the bitbake file.
|
||||||
#
|
#
|
||||||
# install -d $(DEST_DIR)$(BIN_DIR)
|
# install -d $(DEST_DIR)$(BIN_DIR)
|
||||||
# install sm_watchdog sm_eru sm_eru_dump $(DEST_DIR)$(BIN_DIR)
|
# install sm_eru sm_eru_dump $(DEST_DIR)$(BIN_DIR)
|
||||||
install -d $(DEST_DIR)$(LIB_DIR)
|
install -d $(DEST_DIR)$(LIB_DIR)
|
||||||
install libsm_common.so.${VER} $(DEST_DIR)$(LIB_DIR)
|
install libsm_common.so.${VER} $(DEST_DIR)$(LIB_DIR)
|
||||||
cp -P libsm_common.so libsm_common.so.$(VER_MJR) $(DEST_DIR)$(LIB_DIR)
|
cp -P libsm_common.so libsm_common.so.$(VER_MJR) $(DEST_DIR)$(LIB_DIR)
|
||||||
install -d $(DEST_DIR)$(INC_DIR)
|
install -d $(DEST_DIR)$(INC_DIR)
|
||||||
install -m 644 *.h $(DEST_DIR)$(INC_DIR)
|
install -m 644 *.h $(DEST_DIR)$(INC_DIR)
|
||||||
install -d $(DEST_DIR)/var/lib/sm/watchdog/modules
|
|
||||||
install libsm_watchdog_nfs.so.${VER} $(DEST_DIR)/var/lib/sm/watchdog/modules
|
|
||||||
cp -P libsm_watchdog_nfs.so libsm_watchdog_nfs.so.${VER_MJR} $(DEST_DIR)/var/lib/sm/watchdog/modules
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f *.o *.so *.so.*
|
rm -f *.o *.so *.so.*
|
||||||
|
|
|
@ -77,15 +77,12 @@ extern "C" {
|
||||||
|
|
||||||
#define SM_PROCESS_PID_FILENAME "/var/run/sm.pid"
|
#define SM_PROCESS_PID_FILENAME "/var/run/sm.pid"
|
||||||
#define SM_TRAP_PROCESS_PID_FILENAME "/var/run/sm-trap.pid"
|
#define SM_TRAP_PROCESS_PID_FILENAME "/var/run/sm-trap.pid"
|
||||||
#define SM_WATCHDOG_PROCESS_PID_FILENAME "/var/run/sm-watchdog.pid"
|
|
||||||
#define SM_ERU_PROCESS_PID_FILENAME "/var/run/sm-eru.pid"
|
#define SM_ERU_PROCESS_PID_FILENAME "/var/run/sm-eru.pid"
|
||||||
|
|
||||||
#define SM_BOOT_COMPLETE_FILENAME "/var/run/sm_boot_complete"
|
#define SM_BOOT_COMPLETE_FILENAME "/var/run/sm_boot_complete"
|
||||||
|
|
||||||
#define SM_INDICATE_DEGRADED_FILENAME "/var/run/.sm_degraded"
|
#define SM_INDICATE_DEGRADED_FILENAME "/var/run/.sm_degraded"
|
||||||
|
|
||||||
#define SM_WATCHDOG_HEARTBEAT_FILENAME "/var/run/.sm_watchdog_heartbeat"
|
|
||||||
|
|
||||||
#define SM_DUMP_DATA_FILE "/tmp/sm_data_dump.txt"
|
#define SM_DUMP_DATA_FILE "/tmp/sm_data_dump.txt"
|
||||||
|
|
||||||
#define SM_TROUBLESHOOT_LOG_FILE "/var/log/sm-troubleshoot.log"
|
#define SM_TROUBLESHOOT_LOG_FILE "/var/log/sm-troubleshoot.log"
|
||||||
|
|
|
@ -15,9 +15,6 @@
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <time.h>
|
|
||||||
#include <utime.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
@ -127,77 +124,3 @@ SmErrorT sm_utils_clear_degraded( void )
|
||||||
return( SM_OKAY );
|
return( SM_OKAY );
|
||||||
}
|
}
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Utils - Watchdog Heartbeat
|
|
||||||
// ==========================
|
|
||||||
void sm_utils_watchdog_heartbeat( void )
|
|
||||||
{
|
|
||||||
struct utimbuf file_times;
|
|
||||||
struct timespec ts_mono;
|
|
||||||
|
|
||||||
clock_gettime( CLOCK_MONOTONIC_RAW, &ts_mono );
|
|
||||||
|
|
||||||
memset( &file_times, 0, sizeof(struct utimbuf) );
|
|
||||||
|
|
||||||
file_times.actime = ts_mono.tv_sec;
|
|
||||||
file_times.modtime = ts_mono.tv_sec;
|
|
||||||
|
|
||||||
if( 0 > access( SM_WATCHDOG_HEARTBEAT_FILENAME, F_OK ) )
|
|
||||||
{
|
|
||||||
int fd = open( SM_WATCHDOG_HEARTBEAT_FILENAME, O_RDWR | O_CREAT,
|
|
||||||
S_IRUSR | S_IRGRP | S_IROTH | O_CLOEXEC );
|
|
||||||
if( 0 > fd )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to create/open watchdog heartbeat, error=%s.",
|
|
||||||
strerror(errno) );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
close( fd );
|
|
||||||
}
|
|
||||||
|
|
||||||
if( 0 > utime( SM_WATCHDOG_HEARTBEAT_FILENAME, &file_times ) )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to update watchdog heartbeat timings, error=%s.",
|
|
||||||
strerror(errno) );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Utils - Watchdog Delayed
|
|
||||||
// =========================
|
|
||||||
bool sm_utils_watchdog_delayed( int max_delay_secs )
|
|
||||||
{
|
|
||||||
struct stat stat_data;
|
|
||||||
|
|
||||||
if( 0 == access( SM_WATCHDOG_HEARTBEAT_FILENAME, F_OK ) )
|
|
||||||
{
|
|
||||||
int elapsed_secs;
|
|
||||||
struct timespec ts_mono;
|
|
||||||
|
|
||||||
clock_gettime( CLOCK_MONOTONIC_RAW, &ts_mono );
|
|
||||||
|
|
||||||
if( 0 > stat( SM_WATCHDOG_HEARTBEAT_FILENAME, &stat_data ) )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Stat failed on file (%s), error=%s.",
|
|
||||||
SM_WATCHDOG_HEARTBEAT_FILENAME, strerror( errno ) );
|
|
||||||
return( false );
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure that the elapsed seconds drift is in a valid range.
|
|
||||||
elapsed_secs = ts_mono.tv_sec - stat_data.st_mtime;
|
|
||||||
if(( max_delay_secs < elapsed_secs )&&( elapsed_secs <= 300 ))
|
|
||||||
{
|
|
||||||
DPRINTFI( "SM-Watchdog has been delayed by more than %d "
|
|
||||||
"seconds, elapsed_secs=%d", max_delay_secs,
|
|
||||||
elapsed_secs );
|
|
||||||
return( true );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return( false );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
|
@ -50,18 +50,6 @@ extern SmErrorT sm_utils_indicate_degraded( void );
|
||||||
extern SmErrorT sm_utils_clear_degraded( void );
|
extern SmErrorT sm_utils_clear_degraded( void );
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Utils - Watchdog Heartbeat
|
|
||||||
// ==========================
|
|
||||||
extern void sm_utils_watchdog_heartbeat( void );
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Utils - Watchdog Delayed
|
|
||||||
// =========================
|
|
||||||
extern bool sm_utils_watchdog_delayed( int max_delay_secs );
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
//
|
|
||||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <libgen.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
|
|
||||||
#include "sm_types.h"
|
|
||||||
#include "sm_debug.h"
|
|
||||||
#include "sm_watchdog_process.h"
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Main - Thread
|
|
||||||
// =============
|
|
||||||
int main( int argc, char *argv[], char *envp[] )
|
|
||||||
{
|
|
||||||
SmErrorT error;
|
|
||||||
|
|
||||||
error = sm_debug_initialize();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
printf( "Debug initialization failed, error=%s.\n",
|
|
||||||
sm_error_str( error ) );
|
|
||||||
return( EXIT_FAILURE );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_watchdog_process_main( argc, argv, envp );
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
printf( "Process failure, error=%s.\n", sm_error_str( error ) );
|
|
||||||
return( EXIT_FAILURE );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_debug_finalize();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
printf( "Debug finalization failed, error=%s.\n",
|
|
||||||
sm_error_str( error ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
return( EXIT_SUCCESS );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
|
@ -1,247 +0,0 @@
|
||||||
//
|
|
||||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
#include "sm_watchdog_module.h"
|
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <glib.h>
|
|
||||||
#include <gmodule.h>
|
|
||||||
|
|
||||||
#include "sm_types.h"
|
|
||||||
#include "sm_list.h"
|
|
||||||
#include "sm_timer.h"
|
|
||||||
#include "sm_debug.h"
|
|
||||||
|
|
||||||
#define SM_WATCHDOG_MODULE_FILENAME_MAX_SIZE 128
|
|
||||||
#define SM_WATCHDOG_MODULE_PATH "/var/lib/sm/watchdog/modules"
|
|
||||||
#define SM_WATCHDOG_MODULE_DO_CHECK_FUNC "sm_watchdog_module_do_check"
|
|
||||||
#define SM_WATCHDOG_MODULE_INITIALIZE_FUNC "sm_watchdog_module_initialize"
|
|
||||||
#define SM_WATCHDOG_MODULE_FINALIZE_FUNC "sm_watchdog_module_finalize"
|
|
||||||
|
|
||||||
typedef void (*SmWatchdogModuleDoCheckT) (void);
|
|
||||||
typedef bool (*SmWatchdogModuleInitializeT) (int* do_check_in_ms);
|
|
||||||
typedef bool (*SmWatchdogModuleFinalizeT) (void);
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
gchar filename[SM_WATCHDOG_MODULE_FILENAME_MAX_SIZE];
|
|
||||||
GModule* glibmod;
|
|
||||||
int do_check_in_ms;
|
|
||||||
SmTimerIdT do_check_timer_id;
|
|
||||||
SmWatchdogModuleDoCheckT do_check;
|
|
||||||
SmWatchdogModuleInitializeT initialize;
|
|
||||||
SmWatchdogModuleFinalizeT finalize;
|
|
||||||
} SmWatchdogModuleT;
|
|
||||||
|
|
||||||
static SmListT* _modules = NULL;
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog Module - Do Check Timer
|
|
||||||
// ================================
|
|
||||||
static bool sm_watchdog_module_do_check_timer( SmTimerIdT timer_id,
|
|
||||||
int64_t user_data )
|
|
||||||
{
|
|
||||||
SmListT* entry = NULL;
|
|
||||||
SmListEntryDataPtrT entry_data;
|
|
||||||
SmWatchdogModuleT* module = NULL;
|
|
||||||
|
|
||||||
SM_LIST_FOREACH( _modules, entry, entry_data )
|
|
||||||
{
|
|
||||||
module = (SmWatchdogModuleT*) entry_data;
|
|
||||||
if( NULL == module )
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( timer_id == module->do_check_timer_id )
|
|
||||||
{
|
|
||||||
DPRINTFD( "Found do-check timer for module (%s).",
|
|
||||||
g_module_name(module->glibmod) );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if( NULL != module )
|
|
||||||
{
|
|
||||||
if( NULL != module->do_check )
|
|
||||||
{
|
|
||||||
DPRINTFD( "Calling do-check for module (%s).",
|
|
||||||
g_module_name(module->glibmod) );
|
|
||||||
module->do_check();
|
|
||||||
return( true );
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
DPRINTFE( "Module not found for do-check timer." );
|
|
||||||
}
|
|
||||||
|
|
||||||
return( false );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
// Watchdog Module - Load
|
|
||||||
// ======================
|
|
||||||
static SmErrorT sm_watchdog_module_load( const gchar* filename )
|
|
||||||
{
|
|
||||||
gchar* filepath;
|
|
||||||
SmWatchdogModuleT* module;
|
|
||||||
|
|
||||||
module = (SmWatchdogModuleT*) malloc( sizeof(SmWatchdogModuleT) );
|
|
||||||
if( NULL == module )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to allocate watchdog module." );
|
|
||||||
return( SM_FAILED );
|
|
||||||
}
|
|
||||||
|
|
||||||
memset( module, 0, sizeof(SmWatchdogModuleT) );
|
|
||||||
|
|
||||||
g_snprintf(module->filename, SM_WATCHDOG_MODULE_FILENAME_MAX_SIZE,
|
|
||||||
"%s", filename);
|
|
||||||
|
|
||||||
filepath = g_module_build_path( SM_WATCHDOG_MODULE_PATH, filename );
|
|
||||||
|
|
||||||
module->glibmod = g_module_open( filepath, G_MODULE_BIND_LAZY );
|
|
||||||
if( NULL == module->glibmod )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to open module (%s).", filepath );
|
|
||||||
free( module );
|
|
||||||
g_free( filepath );
|
|
||||||
return( SM_FAILED );
|
|
||||||
}
|
|
||||||
|
|
||||||
g_free( filepath );
|
|
||||||
|
|
||||||
g_module_symbol( module->glibmod, SM_WATCHDOG_MODULE_INITIALIZE_FUNC,
|
|
||||||
(gpointer*) &(module->initialize) );
|
|
||||||
|
|
||||||
g_module_symbol( module->glibmod, SM_WATCHDOG_MODULE_FINALIZE_FUNC,
|
|
||||||
(gpointer*) &(module->finalize) );
|
|
||||||
|
|
||||||
g_module_symbol( module->glibmod, SM_WATCHDOG_MODULE_DO_CHECK_FUNC,
|
|
||||||
(gpointer*) &(module->do_check) );
|
|
||||||
|
|
||||||
SM_LIST_PREPEND( _modules, (SmListEntryDataPtrT) module );
|
|
||||||
|
|
||||||
return( SM_OKAY );
|
|
||||||
}
|
|
||||||
// ***************************************************************************
|
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
// Watchdog Module - Load All
|
|
||||||
// ==========================
|
|
||||||
SmErrorT sm_watchdog_module_load_all( void )
|
|
||||||
{
|
|
||||||
const gchar* file;
|
|
||||||
GDir* directory;
|
|
||||||
GError* g_error;
|
|
||||||
SmListT* entry = NULL;
|
|
||||||
SmListEntryDataPtrT entry_data;
|
|
||||||
SmWatchdogModuleT* module;
|
|
||||||
SmErrorT error;
|
|
||||||
|
|
||||||
directory = g_dir_open( SM_WATCHDOG_MODULE_PATH, 0, &g_error );
|
|
||||||
if( NULL == directory )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to open directory( %s), error=%s",
|
|
||||||
SM_WATCHDOG_MODULE_PATH, g_error->message );
|
|
||||||
g_error_free( g_error );
|
|
||||||
return( SM_FAILED );
|
|
||||||
}
|
|
||||||
|
|
||||||
file = g_dir_read_name( directory );
|
|
||||||
while( NULL != file )
|
|
||||||
{
|
|
||||||
DPRINTFI( "Loading module (%s).", file );
|
|
||||||
|
|
||||||
error = sm_watchdog_module_load( file );
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to load module (%s), error=%s.",
|
|
||||||
file, sm_error_str(error) );
|
|
||||||
}
|
|
||||||
|
|
||||||
file = g_dir_read_name( directory );
|
|
||||||
}
|
|
||||||
|
|
||||||
g_dir_close( directory );
|
|
||||||
|
|
||||||
SM_LIST_FOREACH( _modules, entry, entry_data )
|
|
||||||
{
|
|
||||||
module = (SmWatchdogModuleT*) entry_data;
|
|
||||||
if( NULL == module )
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( NULL != module->initialize )
|
|
||||||
{
|
|
||||||
DPRINTFI( "Initializing module (%s).",
|
|
||||||
g_module_name(module->glibmod) );
|
|
||||||
|
|
||||||
if( !(module->initialize( &(module->do_check_in_ms) )) )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to initialize %s.",
|
|
||||||
g_module_name(module->glibmod) );
|
|
||||||
return( SM_FAILED );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_timer_register( module->filename,
|
|
||||||
module->do_check_in_ms,
|
|
||||||
sm_watchdog_module_do_check_timer,
|
|
||||||
0, &(module->do_check_timer_id) );
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to create module (%s) do-check timer, "
|
|
||||||
"error=%s.", g_module_name(module->glibmod),
|
|
||||||
sm_error_str( error ) );
|
|
||||||
return( error );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return( SM_OKAY );
|
|
||||||
}
|
|
||||||
// ***************************************************************************
|
|
||||||
|
|
||||||
// ***************************************************************************
|
|
||||||
// Watchdog Module - Unload All
|
|
||||||
// ============================
|
|
||||||
SmErrorT sm_watchdog_module_unload_all( void )
|
|
||||||
{
|
|
||||||
SmListT* entry = NULL;
|
|
||||||
SmListEntryDataPtrT entry_data;
|
|
||||||
SmWatchdogModuleT* module;
|
|
||||||
|
|
||||||
SM_LIST_FOREACH( _modules, entry, entry_data )
|
|
||||||
{
|
|
||||||
module = (SmWatchdogModuleT*) entry_data;
|
|
||||||
if( NULL == module )
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( NULL != module->finalize )
|
|
||||||
{
|
|
||||||
DPRINTFI( "Finalizing module (%s).",
|
|
||||||
g_module_name(module->glibmod) );
|
|
||||||
|
|
||||||
if( !(module->finalize()) )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to finalize %s.",
|
|
||||||
g_module_name(module->glibmod) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
g_module_close( module->glibmod );
|
|
||||||
}
|
|
||||||
|
|
||||||
SM_LIST_CLEANUP_ALL( _modules );
|
|
||||||
|
|
||||||
return( SM_OKAY );
|
|
||||||
}
|
|
||||||
// ***************************************************************************
|
|
|
@ -1,31 +0,0 @@
|
||||||
//
|
|
||||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
#ifndef __SM_WATCHDOG_MODULE_H__
|
|
||||||
#define __SM_WATCHDOG_MODULE_H__
|
|
||||||
|
|
||||||
#include "sm_types.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog Module - Load All
|
|
||||||
// ==========================
|
|
||||||
extern SmErrorT sm_watchdog_module_load_all( void );
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog Module - Unload All
|
|
||||||
// ============================
|
|
||||||
extern SmErrorT sm_watchdog_module_unload_all( void );
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // __SM_WATCHDOG_MODULE_H__
|
|
|
@ -1,608 +0,0 @@
|
||||||
//
|
|
||||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
#include "sm_watchdog_nfs.h"
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <signal.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <sched.h>
|
|
||||||
#include <pthread.h>
|
|
||||||
#include <dirent.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/syscall.h>
|
|
||||||
#include <sys/time.h>
|
|
||||||
#include <sys/resource.h>
|
|
||||||
|
|
||||||
#include "sm_types.h"
|
|
||||||
#include "sm_time.h"
|
|
||||||
#include "sm_debug.h"
|
|
||||||
#include "sm_node_utils.h"
|
|
||||||
#include "sm_node_stats.h"
|
|
||||||
|
|
||||||
#define SM_WATCHDOG_NFS_THREAD_NAME "(nfsd)"
|
|
||||||
#define SM_WATCHDOG_NFS_REBOOT_INPROGRESS 0xA5A5A5A5
|
|
||||||
#define SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS 32
|
|
||||||
#define SM_WATCHDOG_NFS_CHECK_IN_MS 10000
|
|
||||||
#define SM_WATCHDOG_NFS_MAX_UNINTERRUPTIBLE_SLEEP 60000
|
|
||||||
#define SM_WATCHDOG_NFS_DELAY_REBOOT_IN_MS 60000
|
|
||||||
#define SM_WATCHDOG_NFS_DELAY_REBOOT_FORCE_IN_MS 480000
|
|
||||||
#define SM_WATCHDOG_NFS_DEBUG_FILE "/var/log/nfs.debug"
|
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
bool inuse;
|
|
||||||
bool stale;
|
|
||||||
int pid;
|
|
||||||
SmTimeT timestamp;
|
|
||||||
SmNodeProcessStatusT status;
|
|
||||||
} SmWatchDogNfsBlockedInfoT;
|
|
||||||
|
|
||||||
static uint32_t _nfs_reboot_inprogress;
|
|
||||||
|
|
||||||
static SmWatchDogNfsBlockedInfoT
|
|
||||||
_nfs_blocked_threads[SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS];
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Find Blocked Thread
|
|
||||||
// ==================================
|
|
||||||
static SmWatchDogNfsBlockedInfoT* sm_watchdog_nfs_find_blocked_thread( int pid )
|
|
||||||
{
|
|
||||||
SmWatchDogNfsBlockedInfoT* entry;
|
|
||||||
|
|
||||||
int thread_i;
|
|
||||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
|
||||||
++thread_i )
|
|
||||||
{
|
|
||||||
entry = &(_nfs_blocked_threads[thread_i]);
|
|
||||||
|
|
||||||
if( entry->inuse )
|
|
||||||
{
|
|
||||||
if( pid == entry->pid )
|
|
||||||
{
|
|
||||||
return( entry );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return( NULL );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Add Blocked Thread
|
|
||||||
// =================================
|
|
||||||
static void sm_watchdog_nfs_add_blocked_thread( int pid,
|
|
||||||
SmNodeProcessStatusT* status )
|
|
||||||
{
|
|
||||||
SmWatchDogNfsBlockedInfoT* entry;
|
|
||||||
|
|
||||||
int thread_i;
|
|
||||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
|
||||||
++thread_i )
|
|
||||||
{
|
|
||||||
entry = &(_nfs_blocked_threads[thread_i]);
|
|
||||||
|
|
||||||
if( !(entry->inuse) )
|
|
||||||
{
|
|
||||||
entry->inuse = true;
|
|
||||||
entry->stale = false;
|
|
||||||
entry->pid = pid;
|
|
||||||
sm_time_get( &(entry->timestamp) );
|
|
||||||
memcpy( &(entry->status), status, sizeof(SmNodeProcessStatusT) );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DPRINTFE( "Not enough room for all the NFS blocked threads." );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Delete Blocked Thread
|
|
||||||
// ====================================
|
|
||||||
static void sm_watchdog_nfs_delete_blocked_thread( int pid )
|
|
||||||
{
|
|
||||||
SmWatchDogNfsBlockedInfoT* entry;
|
|
||||||
|
|
||||||
entry = sm_watchdog_nfs_find_blocked_thread( pid );
|
|
||||||
if( NULL != entry )
|
|
||||||
{
|
|
||||||
memset( entry, 0, sizeof(SmWatchDogNfsBlockedInfoT) );
|
|
||||||
entry->inuse = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Do Reboot
|
|
||||||
// ========================
|
|
||||||
static void sm_watchdog_nfs_do_reboot( void )
|
|
||||||
{
|
|
||||||
char cmd[2048];
|
|
||||||
pid_t reboot_pid;
|
|
||||||
pid_t reboot_force_pid;
|
|
||||||
pid_t sm_troubleshoot_pid;
|
|
||||||
pid_t collect_pid;
|
|
||||||
SmWatchDogNfsBlockedInfoT* entry;
|
|
||||||
SmErrorT error;
|
|
||||||
|
|
||||||
if( SM_WATCHDOG_NFS_REBOOT_INPROGRESS == _nfs_reboot_inprogress )
|
|
||||||
{
|
|
||||||
DPRINTFD( "Reboot already inprogress." );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fork child to do the reboot.
|
|
||||||
reboot_pid = fork();
|
|
||||||
if( 0 > reboot_pid )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to fork process for reboot, error=%s.",
|
|
||||||
strerror( errno ) );
|
|
||||||
return;
|
|
||||||
|
|
||||||
} else if( 0 == reboot_pid ) {
|
|
||||||
// Child process.
|
|
||||||
long ms_expired;
|
|
||||||
char reboot_cmd[] = "reboot";
|
|
||||||
char* reboot_argv[] = {reboot_cmd, NULL};
|
|
||||||
char* reboot_env[] = {NULL};
|
|
||||||
struct rlimit file_limits;
|
|
||||||
SmTimeT timestamp;
|
|
||||||
|
|
||||||
setpgid( 0, 0 );
|
|
||||||
|
|
||||||
if( 0 == getrlimit( RLIMIT_NOFILE, &file_limits ) )
|
|
||||||
{
|
|
||||||
unsigned int fd_i;
|
|
||||||
for( fd_i=0; fd_i < file_limits.rlim_cur; ++fd_i )
|
|
||||||
{
|
|
||||||
close( fd_i );
|
|
||||||
}
|
|
||||||
|
|
||||||
open( "/dev/null", O_RDONLY ); // stdin
|
|
||||||
open( "/dev/null", O_WRONLY ); // stdout
|
|
||||||
open( "/dev/null", O_WRONLY ); // stderr
|
|
||||||
}
|
|
||||||
|
|
||||||
sm_time_get( ×tamp );
|
|
||||||
|
|
||||||
while( true )
|
|
||||||
{
|
|
||||||
ms_expired = sm_time_get_elapsed_ms( ×tamp );
|
|
||||||
if( SM_WATCHDOG_NFS_DELAY_REBOOT_IN_MS < ms_expired )
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
sleep( 10 ); // 10 seconds
|
|
||||||
}
|
|
||||||
|
|
||||||
execve( "/sbin/reboot", reboot_argv, reboot_env );
|
|
||||||
|
|
||||||
// Shouldn't get this far, else there was an error.
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fork child to do reboot force.
|
|
||||||
reboot_force_pid = fork();
|
|
||||||
if( 0 > reboot_force_pid )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to fork process for reboot escalation, "
|
|
||||||
"error=%s.", strerror( errno ) );
|
|
||||||
return;
|
|
||||||
|
|
||||||
} else if( 0 == reboot_force_pid ) {
|
|
||||||
// Child process.
|
|
||||||
long ms_expired;
|
|
||||||
int sysrq_handler_fd;
|
|
||||||
int sysrq_tigger_fd;
|
|
||||||
struct rlimit file_limits;
|
|
||||||
SmTimeT timestamp;
|
|
||||||
|
|
||||||
setpgid( 0, 0 );
|
|
||||||
|
|
||||||
if( 0 == getrlimit( RLIMIT_NOFILE, &file_limits ) )
|
|
||||||
{
|
|
||||||
unsigned int fd_i;
|
|
||||||
for( fd_i=0; fd_i < file_limits.rlim_cur; ++fd_i )
|
|
||||||
{
|
|
||||||
close( fd_i );
|
|
||||||
}
|
|
||||||
|
|
||||||
open( "/dev/null", O_RDONLY ); // stdin
|
|
||||||
open( "/dev/null", O_WRONLY ); // stdout
|
|
||||||
open( "/dev/null", O_WRONLY ); // stderr
|
|
||||||
}
|
|
||||||
|
|
||||||
sm_time_get( ×tamp );
|
|
||||||
|
|
||||||
while( true )
|
|
||||||
{
|
|
||||||
ms_expired = sm_time_get_elapsed_ms( ×tamp );
|
|
||||||
if( SM_WATCHDOG_NFS_DELAY_REBOOT_FORCE_IN_MS < ms_expired )
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
sleep( 10 ); // 10 seconds
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enable sysrq handling.
|
|
||||||
sysrq_handler_fd = open( "/proc/sys/kernel/sysrq", O_RDWR | O_CLOEXEC );
|
|
||||||
if( 0 > sysrq_handler_fd )
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
write( sysrq_handler_fd, "1", 1 );
|
|
||||||
close( sysrq_handler_fd );
|
|
||||||
|
|
||||||
// Trigger sysrq command.
|
|
||||||
sysrq_tigger_fd = open( "/proc/sysrq-trigger", O_RDWR | O_CLOEXEC );
|
|
||||||
if( 0 > sysrq_tigger_fd )
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
write( sysrq_tigger_fd, "b", 1 );
|
|
||||||
close( sysrq_tigger_fd );
|
|
||||||
|
|
||||||
exit( EXIT_SUCCESS );
|
|
||||||
}
|
|
||||||
|
|
||||||
_nfs_reboot_inprogress = SM_WATCHDOG_NFS_REBOOT_INPROGRESS;
|
|
||||||
|
|
||||||
// Fork child to do the sm-troubleshoot.
|
|
||||||
sm_troubleshoot_pid = fork();
|
|
||||||
if( 0 > sm_troubleshoot_pid )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to fork process for sm-trouble, error=%s.",
|
|
||||||
strerror( errno ) );
|
|
||||||
|
|
||||||
} else if( 0 == sm_troubleshoot_pid ) {
|
|
||||||
// Child process.
|
|
||||||
char cmd[] = "sm-troubleshoot";
|
|
||||||
char log_file[] = SM_TROUBLESHOOT_LOG_FILE;
|
|
||||||
char* argv[] = {cmd, log_file, NULL};
|
|
||||||
char* env[] = {NULL};
|
|
||||||
struct rlimit file_limits;
|
|
||||||
|
|
||||||
setpgid( 0, 0 );
|
|
||||||
|
|
||||||
if( 0 == getrlimit( RLIMIT_NOFILE, &file_limits ) )
|
|
||||||
{
|
|
||||||
unsigned int fd_i;
|
|
||||||
for( fd_i=0; fd_i < file_limits.rlim_cur; ++fd_i )
|
|
||||||
{
|
|
||||||
close( fd_i );
|
|
||||||
}
|
|
||||||
|
|
||||||
open( "/dev/null", O_RDONLY ); // stdin
|
|
||||||
open( "/dev/null", O_WRONLY ); // stdout
|
|
||||||
open( "/dev/null", O_WRONLY ); // stderr
|
|
||||||
}
|
|
||||||
|
|
||||||
execve( SM_TROUBLESHOOT_SCRIPT, argv, env );
|
|
||||||
|
|
||||||
// Shouldn't get this far, else there was an error.
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fork child to run collect.
|
|
||||||
collect_pid = fork();
|
|
||||||
if( 0 > collect_pid )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to fork process for collect, error=%s.",
|
|
||||||
strerror( errno ) );
|
|
||||||
|
|
||||||
} else if( 0 == collect_pid ) {
|
|
||||||
// Child process.
|
|
||||||
char cmd[] = "collect";
|
|
||||||
char* argv[] = {cmd, NULL};
|
|
||||||
char* env[] = {NULL};
|
|
||||||
struct rlimit file_limits;
|
|
||||||
|
|
||||||
setpgid( 0, 0 );
|
|
||||||
|
|
||||||
if( 0 == getrlimit( RLIMIT_NOFILE, &file_limits ) )
|
|
||||||
{
|
|
||||||
unsigned int fd_i;
|
|
||||||
for( fd_i=0; fd_i < file_limits.rlim_cur; ++fd_i )
|
|
||||||
{
|
|
||||||
close( fd_i );
|
|
||||||
}
|
|
||||||
|
|
||||||
open( "/dev/null", O_RDONLY ); // stdin
|
|
||||||
open( "/dev/null", O_WRONLY ); // stdout
|
|
||||||
open( "/dev/null", O_WRONLY ); // stderr
|
|
||||||
}
|
|
||||||
|
|
||||||
execve( "/usr/local/sbin/collect", argv, env );
|
|
||||||
|
|
||||||
// Shouldn't get this far, else there was an error.
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_node_utils_set_unhealthy();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to set node unhealthy, error=%s.",
|
|
||||||
sm_error_str(error) );
|
|
||||||
}
|
|
||||||
|
|
||||||
DPRINTFI( "*******************************************************" );
|
|
||||||
DPRINTFI( "** Issuing a reboot of the system, NFS hang detected **" );
|
|
||||||
DPRINTFI( "*******************************************************" );
|
|
||||||
|
|
||||||
DPRINTFI( "Reboot (%i) process created.", (int) reboot_pid );
|
|
||||||
DPRINTFI( "Reboot force (%i) process created.", (int) reboot_force_pid );
|
|
||||||
DPRINTFI( "SM troubleshoot (%i) process created.", (int) sm_troubleshoot_pid );
|
|
||||||
DPRINTFI( "Collect (%i) process created.", (int) collect_pid );
|
|
||||||
|
|
||||||
snprintf( cmd, sizeof(cmd),
|
|
||||||
"date >> %s; "
|
|
||||||
"echo \"*******************************************\" >> %s; "
|
|
||||||
"echo \"NFS HANG DETECTED\" >> %s", SM_WATCHDOG_NFS_DEBUG_FILE,
|
|
||||||
SM_WATCHDOG_NFS_DEBUG_FILE, SM_WATCHDOG_NFS_DEBUG_FILE );
|
|
||||||
system( cmd );
|
|
||||||
|
|
||||||
int thread_i;
|
|
||||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
|
||||||
++thread_i )
|
|
||||||
{
|
|
||||||
entry = &(_nfs_blocked_threads[thread_i]);
|
|
||||||
|
|
||||||
if( entry->inuse )
|
|
||||||
{
|
|
||||||
snprintf( cmd, sizeof(cmd),
|
|
||||||
"date >> %s; "
|
|
||||||
"echo \"cat /proc/%i/sched\" >> %s; "
|
|
||||||
"cat /proc/%i/sched >> %s", SM_WATCHDOG_NFS_DEBUG_FILE,
|
|
||||||
entry->pid, SM_WATCHDOG_NFS_DEBUG_FILE, entry->pid,
|
|
||||||
SM_WATCHDOG_NFS_DEBUG_FILE );
|
|
||||||
system( cmd );
|
|
||||||
|
|
||||||
snprintf( cmd, sizeof(cmd),
|
|
||||||
"date >> %s; "
|
|
||||||
"echo \"cat /proc/%i/stack\" >> %s; "
|
|
||||||
"cat /proc/%i/stack >> %s", SM_WATCHDOG_NFS_DEBUG_FILE,
|
|
||||||
entry->pid, SM_WATCHDOG_NFS_DEBUG_FILE, entry->pid,
|
|
||||||
SM_WATCHDOG_NFS_DEBUG_FILE );
|
|
||||||
system( cmd );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
snprintf( cmd, sizeof(cmd),
|
|
||||||
"echo \"*******************************************\" >> %s",
|
|
||||||
SM_WATCHDOG_NFS_DEBUG_FILE );
|
|
||||||
system( cmd );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Search
|
|
||||||
// =====================
|
|
||||||
static void sm_watchdog_nfs_search( const char dir_name[] )
|
|
||||||
{
|
|
||||||
bool is_dir;
|
|
||||||
DIR* dir;
|
|
||||||
char path[PATH_MAX];
|
|
||||||
int path_len;
|
|
||||||
SmNodeProcessStatusT status;
|
|
||||||
SmErrorT error;
|
|
||||||
|
|
||||||
dir = opendir( dir_name );
|
|
||||||
if( NULL == dir )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to open directory (%s), error=%s.", dir_name,
|
|
||||||
strerror( errno ) );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct dirent* entry;
|
|
||||||
for( entry = readdir( dir ); NULL != entry; entry = readdir( dir ) )
|
|
||||||
{
|
|
||||||
is_dir = false;
|
|
||||||
|
|
||||||
path_len = snprintf( path, sizeof(path), "%s/%s", dir_name,
|
|
||||||
entry->d_name );
|
|
||||||
if( PATH_MAX <= path_len )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Path (%s/%s) is too long, max_len=%i.",
|
|
||||||
dir_name, entry->d_name, path_len );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( 0 != (DT_REG & entry->d_type) )
|
|
||||||
{
|
|
||||||
if( '.' != entry->d_name[0] )
|
|
||||||
{
|
|
||||||
struct stat stat_data;
|
|
||||||
|
|
||||||
if( 0 > lstat( path, &stat_data ) )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Stat on (%s) failed, error=%s.", entry->d_name,
|
|
||||||
strerror( errno ) );
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
is_dir = S_ISDIR( stat_data.st_mode );
|
|
||||||
}
|
|
||||||
} else if( 0 != (DT_DIR & entry->d_type) ) {
|
|
||||||
if(( 0 != strcmp( ".", entry->d_name ) )&&
|
|
||||||
( 0 != strcmp( "..", entry->d_name ) ))
|
|
||||||
{
|
|
||||||
is_dir = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if( is_dir )
|
|
||||||
{
|
|
||||||
long val;
|
|
||||||
char* end;
|
|
||||||
|
|
||||||
val = strtol( entry->d_name, &end, 10 );
|
|
||||||
if(( ERANGE == errno )&&
|
|
||||||
(( LONG_MIN == val ) ||( LONG_MAX == val )))
|
|
||||||
{
|
|
||||||
DPRINTFD( "Directory (%s) name out of range.",
|
|
||||||
entry->d_name );
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( end == entry->d_name )
|
|
||||||
{
|
|
||||||
DPRINTFD( "Directory (%s) is not a pid directory.",
|
|
||||||
entry->d_name );
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_node_stats_get_process_status( val, &status );
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
if( SM_NOT_FOUND == error )
|
|
||||||
{
|
|
||||||
DPRINTFD( "Failed to get %ld pid status, error=%s.",
|
|
||||||
val, sm_error_str(error) );
|
|
||||||
} else {
|
|
||||||
DPRINTFE( "Failed to get %ld pid status, error=%s.",
|
|
||||||
val, sm_error_str(error) );
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
DPRINTFD( "Looking at pid=%i, name=%s", status.pid, status.name );
|
|
||||||
|
|
||||||
if( 0 != strcmp( SM_WATCHDOG_NFS_THREAD_NAME, status.name ) )
|
|
||||||
{
|
|
||||||
DPRINTFD( "Process (%s) not an nfs thread, pid=%i.",
|
|
||||||
status.name, status.pid );
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
DPRINTFD( "NFS thread, pid=%i, state=%c, block_start_ns=%lld.",
|
|
||||||
status.pid, status.state, status.block_start_ns );
|
|
||||||
|
|
||||||
if(( 0 != status.block_start_ns )&&( 'D' == status.state ))
|
|
||||||
{
|
|
||||||
SmWatchDogNfsBlockedInfoT* entry;
|
|
||||||
|
|
||||||
entry = sm_watchdog_nfs_find_blocked_thread( (int) val );
|
|
||||||
if( NULL == entry )
|
|
||||||
{
|
|
||||||
sm_watchdog_nfs_add_blocked_thread( (int) val, &status );
|
|
||||||
|
|
||||||
} else if( status.block_start_ns == entry->status.block_start_ns ) {
|
|
||||||
long ms_expired;
|
|
||||||
|
|
||||||
entry->stale = false;
|
|
||||||
ms_expired = sm_time_get_elapsed_ms( &(entry->timestamp) );
|
|
||||||
if( SM_WATCHDOG_NFS_MAX_UNINTERRUPTIBLE_SLEEP < ms_expired )
|
|
||||||
{
|
|
||||||
sm_watchdog_nfs_do_reboot();
|
|
||||||
DPRINTFI( "Rebooting stuck nfs thread (%i).",
|
|
||||||
(int) val );
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
if( (SM_WATCHDOG_NFS_MAX_UNINTERRUPTIBLE_SLEEP/2)
|
|
||||||
< ms_expired )
|
|
||||||
{
|
|
||||||
DPRINTFI( "WARNING: NFS thread, pid=%i, state=%c, "
|
|
||||||
"block_start_ns=%lld, elapsed_ms=%ld.",
|
|
||||||
status.pid, status.state,
|
|
||||||
status.block_start_ns, ms_expired );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sm_watchdog_nfs_delete_blocked_thread( (int) val );
|
|
||||||
sm_watchdog_nfs_add_blocked_thread( (int) val, &status );
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
sm_watchdog_nfs_delete_blocked_thread( (int) val );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
closedir( dir );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Do Check
|
|
||||||
// =======================
|
|
||||||
void sm_watchdog_module_do_check( void )
|
|
||||||
{
|
|
||||||
DPRINTFD( "NFS do check called." );
|
|
||||||
|
|
||||||
if( SM_WATCHDOG_NFS_REBOOT_INPROGRESS != _nfs_reboot_inprogress )
|
|
||||||
{
|
|
||||||
int thread_i;
|
|
||||||
SmWatchDogNfsBlockedInfoT* entry;
|
|
||||||
|
|
||||||
// Mark entries as stale.
|
|
||||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
|
||||||
++thread_i )
|
|
||||||
{
|
|
||||||
entry = &(_nfs_blocked_threads[thread_i]);
|
|
||||||
|
|
||||||
if( entry->inuse )
|
|
||||||
{
|
|
||||||
entry->stale = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Audit NFS threads.
|
|
||||||
sm_watchdog_nfs_search( "/proc" );
|
|
||||||
|
|
||||||
// Cleanup stale entries.
|
|
||||||
for( thread_i=0; SM_WATCHDOG_NFS_MAX_BLOCKED_THREADS > thread_i;
|
|
||||||
++thread_i )
|
|
||||||
{
|
|
||||||
entry = &(_nfs_blocked_threads[thread_i]);
|
|
||||||
|
|
||||||
if(( entry->inuse )&&( entry->stale ))
|
|
||||||
{
|
|
||||||
memset( entry, 0, sizeof(SmWatchDogNfsBlockedInfoT) );
|
|
||||||
entry->inuse = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
DPRINTFD( "Reboot inprogress." );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Initialize
|
|
||||||
// =========================
|
|
||||||
bool sm_watchdog_module_initialize( int* do_check_in_ms )
|
|
||||||
{
|
|
||||||
*do_check_in_ms = SM_WATCHDOG_NFS_CHECK_IN_MS;
|
|
||||||
_nfs_reboot_inprogress = 0;
|
|
||||||
memset( &_nfs_blocked_threads, 0, sizeof(_nfs_blocked_threads) );
|
|
||||||
return( true );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Finalize
|
|
||||||
// =======================
|
|
||||||
bool sm_watchdog_module_finalize( void )
|
|
||||||
{
|
|
||||||
_nfs_reboot_inprogress = 0;
|
|
||||||
memset( &_nfs_blocked_threads, 0, sizeof(_nfs_blocked_threads) );
|
|
||||||
return( true );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
|
@ -1,37 +0,0 @@
|
||||||
//
|
|
||||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
#ifndef __SM_WATCHDOG_NFS_H__
|
|
||||||
#define __SM_WATCHDOG_NFS_H__
|
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Do Check
|
|
||||||
// =======================
|
|
||||||
extern void sm_watchdog_module_do_check( void );
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Initialize
|
|
||||||
// =========================
|
|
||||||
extern bool sm_watchdog_module_initialize( int* do_check_in_ms );
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog NFS - Finalize
|
|
||||||
// =======================
|
|
||||||
extern bool sm_watchdog_module_finalize( void );
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // __SM_WATCHDOG_NFS_H__
|
|
|
@ -1,241 +0,0 @@
|
||||||
//
|
|
||||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
#include "sm_watchdog_process.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <signal.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <time.h>
|
|
||||||
#include <sched.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <errno.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/wait.h>
|
|
||||||
#include <sys/time.h>
|
|
||||||
#include <sys/syscall.h>
|
|
||||||
#include <sys/resource.h>
|
|
||||||
#include <sys/select.h>
|
|
||||||
#include <getopt.h>
|
|
||||||
|
|
||||||
#include "sm_limits.h"
|
|
||||||
#include "sm_types.h"
|
|
||||||
#include "sm_debug.h"
|
|
||||||
#include "sm_utils.h"
|
|
||||||
#include "sm_selobj.h"
|
|
||||||
#include "sm_time.h"
|
|
||||||
#include "sm_timer.h"
|
|
||||||
#include "sm_node_stats.h"
|
|
||||||
#include "sm_watchdog_module.h"
|
|
||||||
|
|
||||||
#define SM_WATCHDOG_PROCESS_TICK_INTERVAL_IN_MS 1000
|
|
||||||
|
|
||||||
static sig_atomic_t _stay_on = 1;
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog Process - Signal Handler
|
|
||||||
// =================================
|
|
||||||
static void sm_watchdog_process_signal_handler( int signum )
|
|
||||||
{
|
|
||||||
switch( signum )
|
|
||||||
{
|
|
||||||
case SIGINT:
|
|
||||||
case SIGTERM:
|
|
||||||
case SIGQUIT:
|
|
||||||
_stay_on = 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case SIGCONT:
|
|
||||||
DPRINTFD( "Ignoring signal SIGCONT (%i).", signum );
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
DPRINTFD( "Signal (%i) ignored.", signum );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog Process - Setup Signal Handler
|
|
||||||
// =======================================
|
|
||||||
static void sm_watchdog_process_setup_signal_handler( void )
|
|
||||||
{
|
|
||||||
struct sigaction sa;
|
|
||||||
|
|
||||||
memset( &sa, 0, sizeof(sa) );
|
|
||||||
sa.sa_handler = sm_watchdog_process_signal_handler;
|
|
||||||
|
|
||||||
sigaction( SIGINT, &sa, NULL );
|
|
||||||
sigaction( SIGTERM, &sa, NULL );
|
|
||||||
sigaction( SIGQUIT, &sa, NULL );
|
|
||||||
sigaction( SIGCONT, &sa, NULL );
|
|
||||||
|
|
||||||
signal( SIGCHLD, SIG_IGN );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog Process - Initialize
|
|
||||||
// =============================
|
|
||||||
static SmErrorT sm_watchdog_process_initialize( void )
|
|
||||||
{
|
|
||||||
SmErrorT error;
|
|
||||||
|
|
||||||
error = sm_selobj_initialize();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to initialize selection object module, error=%s.",
|
|
||||||
sm_error_str( error ) );
|
|
||||||
return( error );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_timer_initialize( SM_WATCHDOG_PROCESS_TICK_INTERVAL_IN_MS );
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to initialize timer module, error=%s.",
|
|
||||||
sm_error_str( error ) );
|
|
||||||
return( error );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_node_stats_initialize();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to initialize node stats, error=%s.",
|
|
||||||
sm_error_str( error ) );
|
|
||||||
return( error );
|
|
||||||
}
|
|
||||||
|
|
||||||
return( SM_OKAY );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog Process - Finalize
|
|
||||||
// ===========================
|
|
||||||
static SmErrorT sm_watchdog_process_finalize( void )
|
|
||||||
{
|
|
||||||
SmErrorT error;
|
|
||||||
|
|
||||||
error = sm_node_stats_finalize();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to finialize node stats, error=%s.",
|
|
||||||
sm_error_str( error ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_timer_finalize();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to finalize timer module, error=%s.",
|
|
||||||
sm_error_str( error ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_selobj_finalize();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to finalize selection object module, error=%s.",
|
|
||||||
sm_error_str( error ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
return( SM_OKAY );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog Process - Main
|
|
||||||
// =======================
|
|
||||||
SmErrorT sm_watchdog_process_main( int argc, char *argv[], char *envp[] )
|
|
||||||
{
|
|
||||||
long ms_expired;
|
|
||||||
SmTimeT watchdog_heartbeat_time_prev;
|
|
||||||
SmErrorT error;
|
|
||||||
|
|
||||||
sm_watchdog_process_setup_signal_handler();
|
|
||||||
|
|
||||||
DPRINTFI( "Starting" );
|
|
||||||
|
|
||||||
if( sm_utils_process_running( SM_WATCHDOG_PROCESS_PID_FILENAME ) )
|
|
||||||
{
|
|
||||||
DPRINTFI( "Already running an instance of sm-watchdog." );
|
|
||||||
return( SM_OKAY );
|
|
||||||
}
|
|
||||||
|
|
||||||
if( !sm_utils_set_pid_file( SM_WATCHDOG_PROCESS_PID_FILENAME ) )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to write pid file for sm-watchdog, error=%s.",
|
|
||||||
strerror(errno) );
|
|
||||||
return( SM_FAILED );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_watchdog_process_initialize();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed initialize process, error=%s.",
|
|
||||||
sm_error_str(error) );
|
|
||||||
return( error );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_watchdog_module_load_all();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed load modules, error=%s.",
|
|
||||||
sm_error_str(error) );
|
|
||||||
return( error );
|
|
||||||
}
|
|
||||||
|
|
||||||
DPRINTFI( "Started." );
|
|
||||||
|
|
||||||
sm_time_get( &watchdog_heartbeat_time_prev );
|
|
||||||
sm_utils_watchdog_heartbeat();
|
|
||||||
|
|
||||||
while( _stay_on )
|
|
||||||
{
|
|
||||||
error = sm_selobj_dispatch( SM_WATCHDOG_PROCESS_TICK_INTERVAL_IN_MS );
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Selection object dispatch failed, error=%s.",
|
|
||||||
sm_error_str(error) );
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
ms_expired = sm_time_get_elapsed_ms( &watchdog_heartbeat_time_prev );
|
|
||||||
if( SM_WATCHDOG_PROCESS_TICK_INTERVAL_IN_MS <= ms_expired )
|
|
||||||
{
|
|
||||||
if( sm_timer_scheduling_on_time() )
|
|
||||||
{
|
|
||||||
sm_utils_watchdog_heartbeat();
|
|
||||||
sm_time_get( &watchdog_heartbeat_time_prev );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DPRINTFI( "Shutting down." );
|
|
||||||
|
|
||||||
error = sm_watchdog_module_unload_all();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed unload modules, error=%s.",
|
|
||||||
sm_error_str(error) );
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_watchdog_process_finalize();
|
|
||||||
if( SM_OKAY != error )
|
|
||||||
{
|
|
||||||
DPRINTFE( "Failed to finalize process, error=%s.",
|
|
||||||
sm_error_str( error ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
DPRINTFI( "Shutdown complete." );
|
|
||||||
|
|
||||||
return( SM_OKAY );
|
|
||||||
}
|
|
||||||
// ****************************************************************************
|
|
|
@ -1,25 +0,0 @@
|
||||||
//
|
|
||||||
// Copyright (c) 2014 Wind River Systems, Inc.
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
#ifndef __SM_WATCHDOG_PROCESS_H__
|
|
||||||
#define __SM_WATCHDOG_PROCESS_H__
|
|
||||||
|
|
||||||
#include "sm_types.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// ****************************************************************************
|
|
||||||
// Watchdog Process - Main
|
|
||||||
// =======================
|
|
||||||
extern SmErrorT sm_watchdog_process_main( int argc, char *argv[], char *envp[] );
|
|
||||||
// ****************************************************************************
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif // __SM_WATCHDOG_PROCESS_H__
|
|
|
@ -1,6 +1,6 @@
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Service Management Unit
|
Description=Service Management Unit
|
||||||
After=network-online.target syslog-ng.service config.service sm-watchdog.service systemd-udev-settle.service drbd.service
|
After=network-online.target syslog-ng.service config.service systemd-udev-settle.service drbd.service
|
||||||
Before=sm-shutdown.service sm-api.service pmon.service
|
Before=sm-shutdown.service sm-api.service pmon.service
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
|
|
|
@ -67,9 +67,6 @@ timeout --signal KILL 5s pmap -x `cat /var/run/sm-trap.pid`
|
||||||
delimiter "pmap -x cat /var/run/sm-eru.pid"
|
delimiter "pmap -x cat /var/run/sm-eru.pid"
|
||||||
timeout --signal KILL 5s pmap -x `cat /var/run/sm-eru.pid`
|
timeout --signal KILL 5s pmap -x `cat /var/run/sm-eru.pid`
|
||||||
|
|
||||||
delimiter "pmap -x cat /var/run/sm-watchdog.pid"
|
|
||||||
timeout --signal KILL 5s pmap -x `cat /var/run/sm-watchdog.pid`
|
|
||||||
|
|
||||||
delimiter "top -b -n 1 -H -c"
|
delimiter "top -b -n 1 -H -c"
|
||||||
timeout --signal KILL 5s top -b -n 1 -H -c
|
timeout --signal KILL 5s top -b -n 1 -H -c
|
||||||
|
|
||||||
|
|
|
@ -19,14 +19,11 @@
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
|
|
||||||
#include "sm_types.h"
|
#include "sm_types.h"
|
||||||
#include "sm_utils.h"
|
|
||||||
#include "sm_debug.h"
|
#include "sm_debug.h"
|
||||||
#include "sm_sha512.h"
|
#include "sm_sha512.h"
|
||||||
#include "sm_service_action_table.h"
|
#include "sm_service_action_table.h"
|
||||||
#include "sm_service_action_result_table.h"
|
#include "sm_service_action_result_table.h"
|
||||||
|
|
||||||
#define SM_SERVICE_ACTION_MAX_DELAY_IN_SECS 4
|
|
||||||
#define SM_SERVICE_ACTION_TIMER_SKEW_IN_MS 60000
|
|
||||||
#define SM_SERVICE_ACTION_VALIDATE_TIMER_IN_MS 60000
|
#define SM_SERVICE_ACTION_VALIDATE_TIMER_IN_MS 60000
|
||||||
|
|
||||||
// ****************************************************************************
|
// ****************************************************************************
|
||||||
|
@ -839,15 +836,6 @@ SmErrorT sm_service_action_run( char service_name[], char instance_name[],
|
||||||
*process_id = (int) pid;
|
*process_id = (int) pid;
|
||||||
*timeout_in_ms = action_data->timeout_in_secs * 1000;
|
*timeout_in_ms = action_data->timeout_in_secs * 1000;
|
||||||
|
|
||||||
if( sm_utils_watchdog_delayed( SM_SERVICE_ACTION_MAX_DELAY_IN_SECS ) )
|
|
||||||
{
|
|
||||||
DPRINTFI( "Service (%s) timeout %d secs increased by %d ms, "
|
|
||||||
"sm-watchdog delayed.", action_data->service_name,
|
|
||||||
action_data->timeout_in_secs,
|
|
||||||
SM_SERVICE_ACTION_TIMER_SKEW_IN_MS );
|
|
||||||
*timeout_in_ms += SM_SERVICE_ACTION_TIMER_SKEW_IN_MS;
|
|
||||||
}
|
|
||||||
|
|
||||||
DPRINTFD( "Child process (%i) created for service (%s).", *process_id,
|
DPRINTFD( "Child process (%i) created for service (%s).", *process_id,
|
||||||
action_data->service_name );
|
action_data->service_name );
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,9 +40,7 @@ typedef struct
|
||||||
SmServiceGroupNotificationT service_group_notification;
|
SmServiceGroupNotificationT service_group_notification;
|
||||||
} SmNotificationEnvT;
|
} SmNotificationEnvT;
|
||||||
|
|
||||||
#define SM_NOTIFICATION_SCRIPT_MAX_DELAY_IN_SECS 4
|
|
||||||
#define SM_NOTIFICATION_SCRIPT_TIMEOUT_IN_MS 30000
|
#define SM_NOTIFICATION_SCRIPT_TIMEOUT_IN_MS 30000
|
||||||
#define SM_NOTIFICATION_SCRIPT_TIMER_SKEW_IN_MS 60000
|
|
||||||
#define SM_NOTIFICATION_SCRIPT_SUCCESS 0
|
#define SM_NOTIFICATION_SCRIPT_SUCCESS 0
|
||||||
#define SM_NOTIFICATION_SCRIPT_TIMEOUT -65534
|
#define SM_NOTIFICATION_SCRIPT_TIMEOUT -65534
|
||||||
#define SM_NOTIFICATION_SCRIPT_FAILURE -65535
|
#define SM_NOTIFICATION_SCRIPT_FAILURE -65535
|
||||||
|
@ -712,14 +710,6 @@ SmErrorT sm_service_group_notification_notify( SmServiceGroupT* service_group,
|
||||||
snprintf( timer_name, sizeof(timer_name), "%s %s notification ",
|
snprintf( timer_name, sizeof(timer_name), "%s %s notification ",
|
||||||
service_group->name, notification_str );
|
service_group->name, notification_str );
|
||||||
|
|
||||||
if( sm_utils_watchdog_delayed( SM_NOTIFICATION_SCRIPT_MAX_DELAY_IN_SECS ) )
|
|
||||||
{
|
|
||||||
DPRINTFI( "Notification timeout %d secs increased by %d ms, "
|
|
||||||
"sm-watchdog delayed.", timeout_in_ms,
|
|
||||||
SM_NOTIFICATION_SCRIPT_TIMER_SKEW_IN_MS );
|
|
||||||
timeout_in_ms += SM_NOTIFICATION_SCRIPT_TIMER_SKEW_IN_MS;
|
|
||||||
}
|
|
||||||
|
|
||||||
error = sm_timer_register( timer_name, timeout_in_ms,
|
error = sm_timer_register( timer_name, timeout_in_ms,
|
||||||
sm_service_group_notification_timeout,
|
sm_service_group_notification_timeout,
|
||||||
service_group->id, &timer_id );
|
service_group->id, &timer_id );
|
||||||
|
|
Loading…
Reference in New Issue