From 083d38923a7db348bac5b44744576a63d7b212a9 Mon Sep 17 00:00:00 2001 From: Eric MacDonald Date: Fri, 8 Jun 2018 12:07:36 -0400 Subject: [PATCH] Mtce: Force enable failure of host that did not reboot during enable. If the first mtcAlive message from a host that was supposed to be rebooted reports uptime in excess of 40 minutes then that means it did not reboot as expected. This was seen to happen during an extended offline case where the host failed heartbeat, then was reported offline during Graceful Recovery which forced a full enable. When the host eventually came back online its reported uptime made it clear that it never rebooted but mtce allowed it to come into service anyway. This is a security issue that can lead to a host disappearing, being security hacked and brought back into the system without reboot. To fix that, this update requires that a host's uptime, reported in its first mtcAlive message, indicate that it has been up for less twice the configured mtcAlive timeout or the enable will fail until it is proven to reset. Story: 2002882 Task: 22845 Change-Id: I9b3ff0bc1ba5af2ca5b07a58db9da9f288b59576 Signed-off-by: Jack Ding --- .../maintenance/mtcNodeHdlrs.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp index 01df6ddb..ba56776f 100755 --- a/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp +++ b/mtce-common/cgts-mtce-common-1.0/maintenance/mtcNodeHdlrs.cpp @@ -1052,8 +1052,19 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr ) } else { - plog ("%s is MTCALIVE (uptime:%d)\n", node_ptr->hostname.c_str(), node_ptr->uptime ); - + plog ("%s is MTCALIVE (uptime:%d secs)\n", + node_ptr->hostname.c_str(), node_ptr->uptime ); + if ((NOT_THIS_HOST) && + ( node_ptr->uptime > ((unsigned int)(node_ptr->mtcalive_timeout*2)))) + { + elog ("%s uptime is more than %d seconds ; host did not reboot\n", + node_ptr->hostname.c_str(), + (node_ptr->mtcalive_timeout*2)); + elog ("%s ... enable failed ; host needs to reboot\n", + node_ptr->hostname.c_str()); + enableStageChange(node_ptr, MTC_ENABLE__FAILURE); + break ; + } /* Set the node mtcAlive timer to configured value. * This will revert bact to normal timeout after any first * unlock value that may be in effect. */