/* * Copyright (c) 2013, 2016 Wind River Systems, Inc. * * SPDX-License-Identifier: Apache-2.0 * */ /*************************************************************************** * * @file * Wind River CGTS Platform Node Maintenance "Finite State Machine" * * Description: This FSM follows the X.731 specification. * * The FSM manages nodes based on the following three perspectives * * Administrative: action taken on node (mtc_nodeAdministrative_action_type) * Operational : state of the node mtc_nodeOperational_state_type) * Availability : status of current node state (mtc_nodeAvailability_status_type) * */ using namespace std; #define __AREA__ "fsm" #include "nodeClass.h" #include "tokenUtil.h" #include "mtcNodeFsm.h" #include "mtcInvApi.h" #include "mtcNodeMsg.h" #include "mtcNodeHdlrs.h" /* for ... mtcTimer_handl */ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr ) { int rc = PASS ; if ( node_ptr == NULL ) { slog ("Null Node Pointer\n"); return FAIL ; } /* if the multi-Node-Failure Avoidance timer rang then run its ecovery handler */ if ( mtcTimer_mnfa.ring == true ) { mtcTimer_mnfa.ring = false ; mnfa_exit ( true ); } /* handle clear task request */ if ( node_ptr->clear_task == true ) { mtcInvApi_update_task ( node_ptr, "" ); node_ptr->clear_task = false ; } /* Service the libEvent work queue */ workQueue_process ( node_ptr ) ; /* Service the maintenance command queue if there are commands waiting */ if ( node_ptr->mtcCmd_work_fifo.size()) { rc = nodeLinkClass::cmd_handler ( node_ptr ); if ( rc == RETRY ) { return (rc); } } /* Monitor and Manage active threads */ thread_handler ( node_ptr->ipmitool_thread_ctrl, node_ptr->ipmitool_thread_info ); /* manage the host connected state and board management alarms */ nodeLinkClass::bm_handler ( node_ptr ); /* manage host's degrade state */ nodeLinkClass::degrade_handler ( node_ptr ); /* * Always run the offline handler * * - does nothing unless in fault handling mode * - looks for offline state during fault handling */ nodeLinkClass::offline_handler ( node_ptr ); /* * Always run the online handler. * * - handles offline/online state transitions based on periodic audit * with mtcAlive debouncing */ nodeLinkClass::online_handler ( node_ptr ); if ( node_ptr->adminAction == MTC_ADMIN_ACTION__DELETE ) { flog ("%s -> Delete Action\n", node_ptr->hostname.c_str()); nodeLinkClass::delete_handler ( node_ptr ); return (PASS); } /* Run the config FSM if the configAction bool is set. * We keep this as a separate action unto itself so that * mtce can continue to service all other actions for the * same host while it handles configuration commands */ if (( node_ptr->configAction == MTC_CONFIG_ACTION__INSTALL_PASSWD ) || ( node_ptr->configAction == MTC_CONFIG_ACTION__CHANGE_PASSWD ) || ( node_ptr->configAction == MTC_CONFIG_ACTION__CHANGE_PASSWD_AGAIN )) { nodeLinkClass::cfg_handler ( node_ptr ); } /**************************************************************************** * No Op: Do nothing for this Healthy Enabled Running Host * This block of code was added to resolve an issue. With this change: * the insv_test_handler gets run as soon as a host's main function is enabled. **************************************************************************** */ if (( node_ptr->ar_disabled == false ) && ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && ((node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || (node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))) { // flog ("%s -> insv_test_handler\n", node_ptr->hostname.c_str()); nodeLinkClass::insv_test_handler ( node_ptr ); } /**************************************************************************** * Add Host Services: **************************************************************************** */ if ( node_ptr->adminAction == MTC_ADMIN_ACTION__ADD ) { flog ("%s -> Add Action\n", node_ptr->hostname.c_str()); nodeLinkClass::add_handler ( node_ptr ); } /**************************************************************************** * No Op: Do nothing for this Healthy Enabled Running Host **************************************************************************** */ else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && ((node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) || (node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED ))) { // flog ("%s -> oos_test_handler\n", node_ptr->hostname.c_str()); nodeLinkClass::oos_test_handler ( node_ptr ); } else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWERCYCLE ) { nodeLinkClass::powercycle_handler ( node_ptr ); } /**************************************************************************** * Reset Host: Run the Reset handler for this Reset Action on Locked Host **************************************************************************** */ else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__RESET ) { flog ("%s -> Reset Action\n", node_ptr->hostname.c_str()); nodeLinkClass::reset_handler ( node_ptr ); nodeLinkClass::oos_test_handler ( node_ptr ); } /**************************************************************************** * Reboot Host: Run the Reboot handler for this Reboot Action on Locked Host **************************************************************************** */ else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__REBOOT ) { flog ("%s -> Reboot Action\n", node_ptr->hostname.c_str()); nodeLinkClass::reboot_handler ( node_ptr ); } /**************************************************************************** * Recovering Host: Run Enable handler for failed or recovering host **************************************************************************** */ else if ((( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && ( node_ptr->availStatus == MTC_AVAIL_STATUS__FAILED )) || ( node_ptr->adminAction == MTC_ADMIN_ACTION__ENABLE)) { flog ("%s -> Run Enable Handler\n", node_ptr->hostname.c_str()); nodeLinkClass::enable_handler ( node_ptr ); } /* Do nothing with locked disabled offline state */ else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED ) && ( node_ptr->operState == MTC_OPER_STATE__DISABLED ) && (( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) || ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) || ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFDUTY ) || ( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ))) { flog ("%s -> Run OOS Test Handler\n", node_ptr->hostname.c_str()); nodeLinkClass::oos_test_handler ( node_ptr ); } /**************************************************************************** * Recovering Host: Run Recovery handler for failed or recovering host **************************************************************************** */ else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__RECOVER ) && ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED )) { flog ("%s -> Run Recovery\n", node_ptr->hostname.c_str()); nodeLinkClass::recovery_handler ( node_ptr ); } /**************************************************************************** * Recovering Host: Run Enable handler for failed or recovering host **************************************************************************** */ else if ( ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && ( node_ptr->operState == MTC_OPER_STATE__DISABLED ) && (( node_ptr->availStatus == MTC_AVAIL_STATUS__FAILED ) || ( node_ptr->availStatus == MTC_AVAIL_STATUS__INTEST ) || ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFLINE ) || ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ))) { flog ("%s -> Run Enable\n", node_ptr->hostname.c_str()); nodeLinkClass::enable_handler ( node_ptr ); } /* Try and recover an accidentally powered of host */ else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && ( node_ptr->availStatus == MTC_AVAIL_STATUS__POWERED_OFF ) && ( node_ptr->hwmon_powercycle.attempts == 0 ) && ( node_ptr->hwmon_powercycle.state == RECOVERY_STATE__INIT )) { ilog ("%s auto-poweron for unlocked host\n", node_ptr->hostname.c_str()); adminActionChange ( node_ptr, MTC_ADMIN_ACTION__POWERON ); /* FSM sanity check below will reject this operation, need exit now */ return (PASS); } /**************************************************************************** * Unlock Host: Run Enable handler for the Unlock Action ***************************************************************************/ else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__UNLOCK ) { flog ("%s -> Unlock Action\n", node_ptr->hostname.c_str()); /* Proceed to unlock host */ nodeLinkClass::enable_handler ( node_ptr ); } /**************************************************************************** * Run the Subfunction FSM, usually after the ADD or at the end of the enable * in a small system. ****************************************************************************/ else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__ENABLE_SUBF ) { flog ("%s -> Running SubFunction Enable handler (%d)\n", node_ptr->hostname.c_str(), node_ptr->enableStage ); nodeLinkClass::enable_subf_handler ( node_ptr ); } /**************************************************************************** * Lock Host: Run Disable handler for the Lock Action **************************************************************************** */ else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__LOCK ) || ( node_ptr->adminAction == MTC_ADMIN_ACTION__FORCE_LOCK )) { // flog ("%s -> Lock Action\n", node_ptr->hostname.c_str()); nodeLinkClass::disable_handler ( node_ptr ); } /**************************************************************************** * Semantic Handling: Reject Recovery Actions Against In-Service Host **************************************************************************** */ else if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && (( node_ptr->adminAction == MTC_ADMIN_ACTION__POWEROFF ) || ( node_ptr->adminAction == MTC_ADMIN_ACTION__RESET ) || ( node_ptr->adminAction == MTC_ADMIN_ACTION__REBOOT ) || ( node_ptr->adminAction == MTC_ADMIN_ACTION__REINSTALL ))) { flog ("%s -> OOS Action Check\n", node_ptr->hostname.c_str()); /* TEMPORARY: To allow reset of unlocked host for fault insertion. */ if ( node_ptr->adminAction == MTC_ADMIN_ACTION__RESET ) { wlog ("%s Allowing Reset of unlocked host for FIT\n", node_ptr->hostname.c_str()); if ( node_ptr->hostname.compare(nodeLinkClass::my_hostname)) { nodeLinkClass::reset_handler ( node_ptr ); } else { wlog ("%s Cowardly avoiding reset of self\n", node_ptr->hostname.c_str()); adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); /* Clear the UI task since we are not really resetting */ mtcInvApi_update_task ( node_ptr, "" ); } } else { elog ("%s Administrative '%s' Operation Rejected\n", node_ptr->hostname.c_str(), get_adminAction_str (node_ptr->adminAction) ); elog ("%s Cannot perform out-of-service action against in-service host\n", node_ptr->hostname.c_str()); adminActionChange ( node_ptr , MTC_ADMIN_ACTION__NONE ); /* Clear the UI task since we are not really resetting */ mtcInvApi_update_task ( node_ptr, "" ); } } /**************************************************************************** * Reload Host: Run the Reload handler to Nuke the disk on Locked Host **************************************************************************** */ else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__REINSTALL ) { flog ("%s -> Reload Action\n", node_ptr->hostname.c_str()); nodeLinkClass::reinstall_handler ( node_ptr ); } /**************************************************************************** * No Op: Do nothing for this Healthy Enabled Locked CPE Simplex Host **************************************************************************** */ else if (( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) && ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED )) { nodeLinkClass::insv_test_handler ( node_ptr ); nodeLinkClass::oos_test_handler ( node_ptr ); } /**************************************************************************** * Power-Off Host: **************************************************************************** */ else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWEROFF ) { flog ("%s -> Power-Off Action\n", node_ptr->hostname.c_str()); nodeLinkClass::power_handler ( node_ptr ); nodeLinkClass::oos_test_handler ( node_ptr ); } /**************************************************************************** * Power-On Host: **************************************************************************** */ else if ( node_ptr->adminAction == MTC_ADMIN_ACTION__POWERON ) { flog ("%s -> Power-On Action\n", node_ptr->hostname.c_str()); nodeLinkClass::power_handler ( node_ptr ); nodeLinkClass::oos_test_handler ( node_ptr ); } /**************************************************************************** * Swact Host Services: **************************************************************************** */ else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__SWACT ) || ( node_ptr->adminAction == MTC_ADMIN_ACTION__FORCE_SWACT )) { flog ("%s -> Swact Action\n", node_ptr->hostname.c_str()); nodeLinkClass::swact_handler ( node_ptr ); } /***** DEGRADED Cases *******/ /* Handle the degrade action */ else if (( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) && ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) && ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) && ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )) { /* We do nothing, the in service test catches this */ // flog ("%s -> Degrade Recovery\n", node_ptr->hostname.c_str()); ; // nodeLinkClass::degrade_handler ( node_ptr ); } else { if (( node_ptr->adminState >= MTC_ADMIN_STATES ) || ( node_ptr->operState >= MTC_OPER_STATES ) || ( node_ptr->availStatus >= MTC_AVAIL_STATUS )) { elog ("Unhandled FSM Case: %s %d-%d-%d\n", node_ptr->hostname.c_str(), node_ptr->adminState, node_ptr->operState, node_ptr->availStatus ); } else { wlog ("Unsupported FSM State: %s Action:%s %s-%s-%s ; auto-correcting ...\n", node_ptr->hostname.c_str(), get_adminAction_str ( node_ptr->adminAction ), adminState_enum_to_str (node_ptr->adminState).c_str(), operState_enum_to_str (node_ptr->operState).c_str(), availStatus_enum_to_str (node_ptr->availStatus).c_str()); } /* Unlocked state overrides unsupported oper-avail states * Try to recover the host */ if ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) { /* Reset the state in the database for these error states */ node_ptr->adminState = MTC_ADMIN_STATE__UNLOCKED ; node_ptr->operState = MTC_OPER_STATE__DISABLED ; node_ptr->availStatus = MTC_AVAIL_STATUS__ONLINE ; mtcInvApi_update_states ( node_ptr, "unlocked", "disabled" , "online" ); /* Force the action */ adminActionChange ( node_ptr , MTC_ADMIN_ACTION__UNLOCK ); } else { /* Reset the state in the database for these error states */ node_ptr->adminState = MTC_ADMIN_STATE__LOCKED ; node_ptr->operState = MTC_OPER_STATE__DISABLED ; node_ptr->availStatus = MTC_AVAIL_STATUS__OFFLINE ; mtcInvApi_update_states ( node_ptr, "locked", "disabled" , "offline" ); /* Force the action */ adminActionChange ( node_ptr , MTC_ADMIN_ACTION__FORCE_LOCK ); } return (PASS); } return (rc) ; }