diff --git a/mtce-common/centos/build_srpm.data b/mtce-common/centos/build_srpm.data
index a6783bd5..d872dde9 100644
--- a/mtce-common/centos/build_srpm.data
+++ b/mtce-common/centos/build_srpm.data
@@ -1,3 +1,3 @@
 SRC_DIR="src"
-TIS_PATCH_VER=140
+TIS_PATCH_VER=141
 BUILD_IS_SLOW=5
diff --git a/mtce-common/src/common/logMacros.h b/mtce-common/src/common/logMacros.h
index 8d73fe1c..55a72496 100644
--- a/mtce-common/src/common/logMacros.h
+++ b/mtce-common/src/common/logMacros.h
@@ -166,6 +166,18 @@ typedef struct
     int   kernwd_update_period         ; /**< expect kernel watchdog to be updated */
     int   autorecovery_threshold       ; /**< AIO stop autorecovery threshold */
 
+    /**< Auto Recovery Thresholds                                             */
+    int   ar_config_threshold          ; /**< Configuration Failure Threshold */
+    int   ar_goenable_threshold        ; /**< GoEnable Failure Threshold      */
+    int   ar_hostservices_threshold    ; /**< Host Services Failure Threshold */
+    int   ar_heartbeat_threshold       ; /**< Heartbeat Soak Failure Threshold*/
+
+    /**< Auto Recovery Retry Intervals                                        */
+    int   ar_config_interval           ; /**< Configuration Failure Interval  */
+    int   ar_goenable_interval         ; /**< GoEnable Failure Interval       */
+    int   ar_hostservices_interval     ; /**< Host Services Failure Interval  */
+    int   ar_heartbeat_interval        ; /**< Heartbeat Soak Failure Interval */
+
     int   debug_all    ;
     int   debug_json   ; /**< Enable jlog (json string  ) output if not false */
     int   debug_timer  ; /**< Enable tlog (timer logs   ) output if not false */
diff --git a/mtce-common/src/common/nodeBase.h b/mtce-common/src/common/nodeBase.h
index 9e8586cd..9d113768 100755
--- a/mtce-common/src/common/nodeBase.h
+++ b/mtce-common/src/common/nodeBase.h
@@ -229,28 +229,35 @@ void daemon_exit ( void );
 #define MTC_TASK_INIT_FAIL         "Initialization Failed, recovering"
 #define MTC_TASK_START_SERVICE_FAIL "Start Services Failed"
 #define MTC_TASK_START_SERVICE_TO  "Start Services Timeout"
-#define MTC_TASK_ENABLING          "Enabling"
-#define MTC_TASK_ENABLING_SUBF     "Enabling Compute Service"
-#define MTC_TASK_ENABLING_SUBF_FAIL "Enabling Compute Service Failed"
-#define MTC_TASK_ENABLING_SUBF_TO  "Enabling Compute Service Timeout"
-#define MTC_TASK_ENABLE_WORK_FAIL  "Enable Action Failed, re-enabling"
-#define MTC_TASK_ENABLE_WORK_FAIL_ "Enable Action Failed"
-#define MTC_TASK_ENABLE_WORK_TO    "Enable Action Timeout, re-enabling"
-#define MTC_TASK_ENABLE_WORK_TO_   "Enable Action Timeout"
+#define MTC_TASK_ENABLE_WORK_FAIL  "Enable Action Failed"
+#define MTC_TASK_ENABLE_WORK_TO    "Enable Action Timeout"
 #define MTC_TASK_ENABLE_FAIL_HB    "Enable Heartbeat Failure, re-enabling"
 #define MTC_TASK_RECOVERY_FAIL     "Graceful Recovery Failed, re-enabling"
 #define MTC_TASK_RECOVERY_WAIT     "Graceful Recovery Wait"
 #define MTC_TASK_RECOVERED         "Gracefully Recovered"
+
+#define MTC_TASK_ENABLING          "Enabling"
 #define MTC_TASK_MAIN_CONFIG_FAIL  "Configuration Failed, re-enabling"
-#define MTC_TASK_SUBF_CONFIG_FAIL  "Compute Configuration Failed, re-enabling"
-#define MTC_TASK_SUBF_CONFIG_FAIL_ "Compute Configuration Failed"
 #define MTC_TASK_MAIN_CONFIG_TO    "Configuration Timeout, re-enabling"
+#define MTC_TASK_MAIN_INTEST_FAIL  "In-Test Failed, re-enabling"
+#define MTC_TASK_MAIN_INTEST_TO    "In-Test Timeout, re-enabling"
+#define MTC_TASK_MAIN_SERVICE_FAIL "Start Services Failed, re-enabling"
+#define MTC_TASK_MAIN_SERVICE_TO   "Start Services Timeout, re-enabling"
+
+#define MTC_TASK_ENABLING_SUBF     "Enabling Compute Service"
+#define MTC_TASK_SUBF_CONFIG_FAIL  "Compute Configuration Failed, re-enabling"
 #define MTC_TASK_SUBF_CONFIG_TO    "Compute Configuration Timeout, re-enabling"
-#define MTC_TASK_SUBF_CONFIG_TO_   "Compute Configuration Timeout"
-#define MTC_TASK_INTEST_FAIL       "In-Test Failed, re-enabling"
-#define MTC_TASK_INTEST_FAIL_      "In-Test Failed"
-#define MTC_TASK_INTEST_FAIL_TO    "In-Test Timeout, re-enabling"
-#define MTC_TASK_INTEST_FAIL_TO_   "In-Test Timeout"
+#define MTC_TASK_SUBF_INTEST_FAIL  "Compute In-Test Failed, re-enabling"
+#define MTC_TASK_SUBF_INTEST_TO    "Compute In-Test Timeout, re-enabling"
+#define MTC_TASK_SUBF_SERVICE_FAIL "Compute Start Services Failed, re-enabling"
+#define MTC_TASK_SUBF_SERVICE_TO   "Compute Start Services Timeout, re-enabling"
+
+#define MTC_TASK_AR_DISABLED_CONFIG    "Configuration failure, threshold reached, Lock/Unlock to retry"
+#define MTC_TASK_AR_DISABLED_GOENABLE  "In-Test Failure, threshold reached, Lock/Unlock to retry"
+#define MTC_TASK_AR_DISABLED_SERVICES  "Service Failure, threshold reached, Lock/Unlock to retry"
+#define MTC_TASK_AR_DISABLED_ENABLE    "Enable Failure, threshold reached, Lock/Unlock to retry"
+#define MTC_TASK_AR_DISABLED_HEARTBEAT "Heartbeat Failure, threshold reached, Lock/Unlock to retry"
+
 #define MTC_TASK_RESET_FAIL        "Reset Failed"
 #define MTC_TASK_RESET_QUEUE       "Reset Failed, retrying (%d of %d)"
 #define MTC_TASK_POWERON_FAIL      "Power-On Failed"
@@ -275,8 +282,6 @@ void daemon_exit ( void );
 #define MTC_TASK_RESETTING_HOST    "Resetting Host, critical sensor"
 #define MTC_TASK_CPE_SX_UNLOCK_MSG "Unlocking, please stand-by while the system gracefully reboots"
 #define MTC_TASK_SELF_UNLOCK_MSG   "Unlocking active controller, please stand-by while it reboots"
-#define MTC_TASK_AUTO_RECOVERY          "Critical failure. Auto-recovery enabled, re-enabling"
-#define MTC_TASK_AUTO_RECOVERY_DISABLED "Critical failure. Auto-recovery disabled, threshold reached"
 #define MTC_TASK_FAILED_SWACT_REQ  "Critical failure.Requesting SWACT to enabled standby controller"
 #define MTC_TASK_FAILED_NO_BACKUP  "Critical failure.Please provision/enable standby controller"
 
@@ -1176,13 +1181,6 @@ typedef enum
     MTC_STRESS_TEST__STAGES = 6,
 } mtc_stressStages_enum ;
 
-typedef union
-{
-    mtc_enableStages_enum   enable  ;
-    mtc_disableStages_enum  disable ;
-    int                     raw     ;
-} mtc_stages_union ;
-
 typedef struct
 {
    mtc_nodeAdminAction_enum adminAction ;
@@ -1210,6 +1208,24 @@ typedef enum
       MAX_IFACES = 2
 } iface_enum ;
 
+/* Auto recovery Disable Causes */
+typedef enum
+{
+    MTC_AR_DISABLE_CAUSE__CONFIG,
+    MTC_AR_DISABLE_CAUSE__GOENABLE,
+    MTC_AR_DISABLE_CAUSE__HOST_SERVICES,
+    MTC_AR_DISABLE_CAUSE__HEARTBEAT,
+    MTC_AR_DISABLE_CAUSE__LAST,
+    MTC_AR_DISABLE_CAUSE__NONE,
+} autorecovery_disable_cause_enum ;
+
+/* Service Based Auto Recovery Control Structure */
+typedef struct
+{
+    unsigned int count     ; /* running back-2-back failure count         */
+    bool         disabled  ; /* true if autorecovery is disabled          */
+} autorecovery_cause_ctrl_type ;
+
 /** Returns true if the specified admin state string is valid */
 bool adminStateOk  ( string admin );
 
diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp
index 04d96330..e88b84be 100755
--- a/mtce/src/common/nodeClass.cpp
+++ b/mtce/src/common/nodeClass.cpp
@@ -236,9 +236,6 @@ nodeLinkClass::nodeLinkClass()
     this->loc_recovery_timeout        = 0;
     this->node_reinstall_timeout      = 0;
     this->token_refresh_rate          = 0;
-    this->autorecovery_enabled        = false ;
-    this->autorecovery_disabled       = false ;
-
 
     head = tail = NULL;
     memory_allocs = 0 ;
@@ -313,6 +310,11 @@ nodeLinkClass::nodeLinkClass()
     sysinv_noncrit_timeout       = HTTP_SYSINV_NONC_TIMEOUT ;
     work_queue_timeout           = MTC_WORKQUEUE_TIMEOUT    ;
 
+    /* Init the auto recovery threshold and intervals to zero until
+     * modified by daemon config */
+    memset (&ar_threshold, 0, sizeof(ar_threshold));
+    memset (&ar_interval, 0, sizeof(ar_interval));
+
     /* Inservice test periods in seconds - 0 = disabled */
     insv_test_period = 0 ;
     oos_test_period = 0 ;
@@ -340,7 +342,6 @@ nodeLinkClass::nodeLinkClass()
     tokenEvent.buf   = NULL ;
 
     unknown_host_throttle = 0 ;
-    invalid_arg_throttle = 0 ;
 
     testmode = 0 ;
     module_init( );
@@ -564,6 +565,11 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
     ptr->was_dor_recovery_mode= false ;
     ptr->dor_recovery_time    = 0     ;
 
+    ptr->ar_disabled = false ;
+    ptr->ar_cause = MTC_AR_DISABLE_CAUSE__NONE ;
+    memset (&ptr->ar_count, 0, sizeof(ptr->ar_count));
+    ptr->ar_log_throttle = 0 ;
+
     mtcTimer_init ( ptr->mtcTimer,         hostname, "mtc timer");       /* Init node's general mtc timer    */
     mtcTimer_init ( ptr->insvTestTimer,    hostname, "insv test timer");
     mtcTimer_init ( ptr->oosTestTimer,     hostname, "oos test timer");  /* Init node's oos test timer       */
@@ -603,7 +609,8 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
     ptr->subStage          = MTC_SUBSTAGE__DONE  ;
     ptr->reinstallStage    = MTC_REINSTALL__DONE ;
     ptr->resetStage        = MTC_RESET__START    ;
-    ptr->handlerStage.enable = MTC_ENABLE__START ; /* Enable and Disable */
+    ptr->enableStage       = MTC_ENABLE__START   ;
+    ptr->disableStage      = MTC_DISABLE__START  ;
 
     ptr->oos_test_count = 0 ;
     ptr->insv_test_count = 0 ;
@@ -613,9 +620,8 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
     ptr->uptime_refresh_counter = 0 ;
     ptr->node_unlocked_counter = 0 ;
 
-    /* Default to a healthy config until mtcAlive messages prove otherwise */
-    ptr->mtce_flags = ( MTC_FLAG__I_AM_CONFIGURED |
-                        MTC_FLAG__I_AM_HEALTHY ) ;
+    /* Good health needs to be learned */
+    ptr->mtce_flags = 0 ;
 
     ptr->graceful_recovery_counter = 0 ;
     ptr->health_threshold_counter = 0 ;
@@ -746,8 +752,6 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
     ptr->adminAction = MTC_ADMIN_ACTION__NONE    ;
     ptr->adminAction_todo_list.clear();
 
-    ptr->handlerStage.enable = MTC_ENABLE__START;
-
     hosts++ ;
 
     /* (re)build the Resource Reference Array */
@@ -4504,16 +4508,31 @@ void nodeLinkClass::manage_heartbeat_failure ( string hostname, iface_enum iface
     }
     else
     {
-        //bool want_degrade = true ;
-        //if ( this->hbs_failure_action == HBS_FAILURE_ACTION__ALARM )
-        //    want_degrade = false ;
-        // alarm_enabled_failure (node_ptr, want_degrade);
+        /* handle auto recovery for heartbeat failure during enable */
+        if ( node_ptr->ar_cause == MTC_AR_DISABLE_CAUSE__HEARTBEAT )
+            return ;
+        else if ( node_ptr->enableStage == MTC_ENABLE__HEARTBEAT_SOAK )
+        {
+            elog ("%s %s *** Heartbeat Loss *** (during enable soak)\n",
+                      hostname.c_str(),
+                      get_iface_name_str(iface));
+
+            if ( ar_manage ( node_ptr,
+                             MTC_AR_DISABLE_CAUSE__HEARTBEAT,
+                             MTC_TASK_AR_DISABLED_HEARTBEAT ) == PASS )
+            {
+                  mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_FAIL_HB );
+                  enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
+                  adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
+            }
+            return ;
+        }
 
         mnfa_add_host ( node_ptr , iface );
 
         if ( mnfa_active == false )
         {
-            elog ("%s %s *** Heartbeat Loss ***\n", hostname.c_str(), get_iface_name_str(iface)); 
+            elog ("%s %s *** Heartbeat Loss ***\n", hostname.c_str(), get_iface_name_str(iface));
             if ( iface == INFRA_IFACE )
             {
                 node_ptr->heartbeat_failed[INFRA_IFACE] = true ;
@@ -4546,6 +4565,7 @@ void nodeLinkClass::manage_heartbeat_failure ( string hostname, iface_enum iface
                 {
                     mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_FAIL_HB );
                     enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
+                    adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
                 }
             }
         }
@@ -5485,7 +5505,7 @@ int nodeLinkClass::critical_process_failed( string & hostname,
         /* Special critical process failure handling for AIO system */
         if ( THIS_HOST && ( is_inactive_controller_main_insv() == false ))
         {
-            if ( this->autorecovery_disabled == true )
+            if ( node_ptr->ar_disabled == true )
             {
                 dlog ("%s bypassing persistent critical process failure\n",
                           node_ptr->hostname.c_str());
@@ -5510,7 +5530,7 @@ int nodeLinkClass::critical_process_failed( string & hostname,
         dlog ("%s adminState:%s  EnableStage:%s\n",
                   node_ptr->hostname.c_str(),
                   adminAction_enum_to_str(node_ptr->adminAction).c_str(),
-                  get_enableStages_str(node_ptr->handlerStage.enable).c_str());
+                  get_enableStages_str(node_ptr->enableStage).c_str());
     }
     return (PASS);
 }
@@ -5843,7 +5863,7 @@ int nodeLinkClass::set_enableStage ( string & hostname,
     nodeLinkClass::node * node_ptr = getNode ( hostname ) ;
     if ( node_ptr != NULL )
     {
-        node_ptr->handlerStage.enable = stage ;
+        node_ptr->enableStage = stage ;
         return (PASS);
     }
     return (FAIL);
@@ -5867,7 +5887,7 @@ mtc_enableStages_enum nodeLinkClass::get_enableStage ( string & hostname)
     nodeLinkClass::node * node_ptr = getNode ( hostname ) ;
     if ( node_ptr != NULL )
     {
-        return ( node_ptr->handlerStage.enable ) ;
+        return ( node_ptr->enableStage ) ;
     }
     return (MTC_ENABLE__STAGES);
 }
@@ -6124,16 +6144,15 @@ int nodeLinkClass::adminActionChange ( struct nodeLinkClass::node * node_ptr,
                         node_ptr->node_unlocked_counter++ ;
                     }
 
-                    if ( is_controller ( node_ptr ) )
-                        autorecovery_clear (node_ptr->hostname);
+                    ar_enable (node_ptr);
 
-                    node_ptr->handlerStage.enable = MTC_ENABLE__START ;
+                    node_ptr->enableStage = MTC_ENABLE__START ;
                     break ;
                 }
                 case MTC_ADMIN_ACTION__LOCK:
                 case MTC_ADMIN_ACTION__FORCE_LOCK:
                 {
-                    node_ptr->handlerStage.disable = MTC_DISABLE__START ;
+                    node_ptr->disableStage = MTC_DISABLE__START ;
                     break ;
                 }
                 case MTC_ADMIN_ACTION__RESET:
@@ -6423,26 +6442,26 @@ int nodeLinkClass::enableStageChange ( struct nodeLinkClass::node * node_ptr,
 {
     /* TODO: Consider converting stage to strings ... */
     if (( newHdlrStage >= MTC_ENABLE__STAGES ) ||
-        ( node_ptr->handlerStage.enable >= MTC_ENABLE__STAGES ))
+        ( node_ptr->enableStage >= MTC_ENABLE__STAGES ))
     {
        slog ("%s has invalid Enable stage (%d:%d)\n",
               node_ptr->hostname.c_str(),
-              node_ptr->handlerStage.enable,
+              node_ptr->enableStage,
               newHdlrStage );
 
-       node_ptr->handlerStage.enable = MTC_ENABLE__FAILURE ;
+       node_ptr->enableStage = MTC_ENABLE__FAILURE ;
 
        /* TODO: cause failed or degraded state ? */
        return (FAIL);
     }
-    else if ( node_ptr->handlerStage.enable != newHdlrStage )
+    else if ( node_ptr->enableStage != newHdlrStage )
     {
         clog ("%s %s -> %s\n",
                node_ptr->hostname.c_str(),
-               get_enableStages_str(node_ptr->handlerStage.enable).c_str(),
+               get_enableStages_str(node_ptr->enableStage).c_str(),
                get_enableStages_str(newHdlrStage).c_str());
 
-        node_ptr->handlerStage.enable = newHdlrStage  ;
+        node_ptr->enableStage = newHdlrStage  ;
         return (PASS);
     }
     else
@@ -6450,7 +6469,7 @@ int nodeLinkClass::enableStageChange ( struct nodeLinkClass::node * node_ptr,
         /* No state change */
         dlog1 ("%s %s -> %s\n",
                node_ptr->hostname.c_str(),
-               get_enableStages_str(node_ptr->handlerStage.enable).c_str(),
+               get_enableStages_str(node_ptr->enableStage).c_str(),
                get_enableStages_str(newHdlrStage).c_str());
         return (PASS);
     }
@@ -6461,15 +6480,15 @@ int nodeLinkClass::disableStageChange ( struct nodeLinkClass::node * node_ptr,
                                         mtc_disableStages_enum newHdlrStage )
 {
     /* TODO: Consider converting stage to strings ... */
-    if (( newHdlrStage >= MTC_DISABLE__STAGES ) || 
-        ( node_ptr->handlerStage.disable >= MTC_DISABLE__STAGES ))
+    if (( newHdlrStage >= MTC_DISABLE__STAGES ) ||
+        ( node_ptr->disableStage >= MTC_DISABLE__STAGES ))
     {
-       slog ("%s has invalid disable stage (%d:%d)\n", 
+       slog ("%s has invalid disable stage (%d:%d)\n",
               node_ptr->hostname.c_str(),
-              node_ptr->handlerStage.disable, 
+              node_ptr->disableStage,
               newHdlrStage );
 
-       node_ptr->handlerStage.disable = MTC_DISABLE__DISABLED ;
+       node_ptr->disableStage = MTC_DISABLE__DISABLED ;
 
        /* TODO: cause failed or degraded state ? */
        return (FAIL);
@@ -6478,10 +6497,10 @@ int nodeLinkClass::disableStageChange ( struct nodeLinkClass::node * node_ptr,
     {
         clog ("%s %s -> %s\n", 
                node_ptr->hostname.c_str(),
-               get_disableStages_str(node_ptr->handlerStage.disable).c_str(), 
+               get_disableStages_str(node_ptr->disableStage).c_str(), 
                get_disableStages_str(newHdlrStage).c_str());
 
-        node_ptr->handlerStage.disable = newHdlrStage  ;
+        node_ptr->disableStage = newHdlrStage  ;
         return (PASS);
     }
 }
@@ -7053,61 +7072,131 @@ struct nodeLinkClass::node * nodeLinkClass::get_insvTestTimer ( timer_t tid )
     return static_cast<struct node *>(NULL);
 }
 
-
 /*****************************************************************************
  *
- * Name       : autorecovery_clear
+ * Name       : ar_enable
  *
- * Assumptions: Applies when simplex.
- *
- * Description: Removes the auto recovery count file if it exists.
- *
- * Auto recovery count is tracked/preserved in a host named auto recovery
- * counter file /tmp/hostname_ar_count.
- *
- *****************************************************************************/
-
-void autorecovery_clear ( string hostname )
-{
-    string ar_file = TMP_DIR_PATH + hostname + AUTO_RECOVERY_FILE_SUFFIX ;
-    if ( daemon_is_file_present (ar_file.data()))
-    {
-        wlog ("%s clearing autorecovery counter\n", hostname.c_str());
-        daemon_remove_file (ar_file.data());
-    }
-}
-
-/*****************************************************************************
- *
- * Name       : manage_autorecovery
- *
- * Assumptions: Applies to the active controller only while simplex.
- *
- * Description: Issues an immediate lazy reboot if the autorecovery threshold
- *              is reached. Otherwise it disables autorecovery and returns
- *              do we don't get a rolling boot loop.
+ * Description: Clears all auto recovery state for the specified host and
+ *              removes the auto recovery count file if it exists.
  *
  * Auto recovery count is tracked/preserved in a host named auto recovery
  * counter file /etc/mtc/tmp/hostname_ar_count.
  *
- * in the event of a persistent autorecovery failure that results in a
- * disable then the active controller goes enabled-degraded with a horizon
- * status that indicates the active controller has a critical failure but
- * auto recovery is disabled. The enable alarm is raised.
- *
  *****************************************************************************/
 
-void nodeLinkClass::manage_autorecovery ( struct nodeLinkClass::node * node_ptr )
+void nodeLinkClass::ar_enable ( struct nodeLinkClass::node * node_ptr )
 {
-    /* manage removing the auto recovery threshold count file */
-    if ( ( THIS_HOST ) &&
-        ( this->autorecovery_enabled == true ) &&
-        ( this->autorecovery_disabled == false ) &&
-        ( is_inactive_controller_main_insv() == false ))
+    string ar_file = TMP_DIR_PATH + node_ptr->hostname + AUTO_RECOVERY_FILE_SUFFIX ;
+    if ( daemon_is_file_present (ar_file.data()))
     {
-        int value      = 0 ;
-        string ar_file = TMP_DIR_PATH + node_ptr->hostname + AUTO_RECOVERY_FILE_SUFFIX ;
-        int threshold  = daemon_get_cfg_ptr()->autorecovery_threshold ;
+        wlog ("%s clearing autorecovery file counter\n", node_ptr->hostname.c_str());
+        daemon_remove_file (ar_file.data());
+    }
+
+    if (( node_ptr->ar_disabled ) ||
+        ( node_ptr->ar_cause != MTC_AR_DISABLE_CAUSE__NONE ))
+    {
+        wlog ("%s re-enabling autorecovery\n", node_ptr->hostname.c_str());
+    }
+
+    node_ptr->ar_disabled = false ;
+    node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__NONE ;
+    memset (&node_ptr->ar_count, 0, sizeof(node_ptr->ar_count));
+
+    node_ptr->ar_log_throttle = 0 ;
+}
+
+/*****************************************************************************
+ *
+ * Name       : ar_manage
+ *
+ * Purpose    : Manage Auto Recovery state.
+ *
+ * Description: the following checks and operations are performed ...
+ *
+ * Pre Checks:
+ *
+ * Validate auto recovery cause code
+ * Return if already in ar_disabled state. Unlikely but safe guard.
+ *
+ * Manage Auto Recovery:
+ *
+ * Case 1: Failed active controller with no enabled inactive controller.
+ *
+ *    Requires persistent count file and self reboot until threshold
+ *    is reached.
+ *
+ *    Issues an immediate lazy reboot if the autorecovery threshold
+ *    is not reached. Otherwise it disables autorecovery and returns
+ *    so we don't get a rolling boot loop.
+ *
+ *    Auto recovery count is tracked/preserved in a host named auto
+ *    recovery counter file /etc/mtc/tmp/hostname_ar_count.
+ *
+ * Case 2: All other cases
+ *
+ * Case 2a: No auto recovery thresholding of active controller in non AIO SX
+ *          where the user can't lock and unlock the active controller.
+ *
+ *    Maintain auto recovery count and set ar_disabled for the host when
+ *    the threshold is reached.
+ *
+ * Parameters:
+ *
+ *    node_ptr nodeLinkClass ptr of failing host.
+ *
+ *    cause    autorecovery_disable_cause_enum failure cause code.
+ *
+ *    string   host status string to display when auto recovery
+ *             threshold is reached and autorecovery is disabled.
+ *
+ * Returns:
+ *
+ *    FAIL tells the caller to break from its FSM at earliest opportunity
+ *         because auto recovery threshold is reached and auto recovery
+ *         is disabled.
+ *
+ *    PASS tells the caller that the threshold is not reached and to
+ *         continue handling the failure.
+ *
+ ******************************************************************************/
+
+int nodeLinkClass::ar_manage ( struct nodeLinkClass::node * node_ptr,
+                               autorecovery_disable_cause_enum cause,
+                               string ar_disable_banner )
+{
+    int rc = FAIL ;
+
+    /* Auto recovery only applies for hosts that are unlocked
+     * and not already in ar_disabled state */
+    if (( node_ptr->adminState != MTC_ADMIN_STATE__UNLOCKED ) ||
+        ( node_ptr->ar_disabled ))
+    {
+        return (rc);
+    }
+
+    /* check for invalid call case */
+    if ( cause >= MTC_AR_DISABLE_CAUSE__LAST )
+    {
+        slog ("%s called with invalid auto recovery cause (%d)",
+                  node_ptr->hostname.c_str(), cause );
+        return (rc);
+    }
+
+    /* update cause code */
+    if ( node_ptr->ar_cause != cause )
+        node_ptr->ar_cause = cause ;
+
+
+    /* Case 1 check */
+    if ( ( THIS_HOST ) && ( is_inactive_controller_main_insv() == false ))
+    {
+        /* manage the auto recovery threshold count file */
+        unsigned int value = 0 ;
+
+        string ar_file = TMP_DIR_PATH +
+                         node_ptr->hostname +
+                         AUTO_RECOVERY_FILE_SUFFIX ;
 
         if ( daemon_is_file_present (ar_file.data()))
         {
@@ -7119,48 +7208,75 @@ void nodeLinkClass::manage_autorecovery ( struct nodeLinkClass::node * node_ptr
         /* Save the new value in the file */
         daemon_log_value ( ar_file.data(), value );
 
+        value = daemon_get_file_int ( ar_file.data() );
+
         /* set rc to reflect what the caller should do */
-        if ( value > threshold )
+        if ( value > this->ar_threshold[node_ptr->ar_cause] )
         {
             elog ("%s auto recovery threshold exceeded (%d)\n",
-                      node_ptr->hostname.c_str(), threshold );
+                      node_ptr->hostname.c_str(),
+                      this->ar_threshold[node_ptr->ar_cause] );
 
-            this->autorecovery_disabled = true ;
-
-            if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
-            {
-                alarm_compute_failure ( node_ptr , FM_ALARM_SEVERITY_CRITICAL ) ;
-            }
-            else
-            {
-                alarm_enabled_failure ( node_ptr , true ) ;
-            }
+            node_ptr->ar_disabled = true ;
+            adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
 
             allStateChange  ( node_ptr, node_ptr->adminState,
                                         MTC_OPER_STATE__ENABLED,
                                         MTC_AVAIL_STATUS__DEGRADED );
 
-            mtcInvApi_update_task ( node_ptr,
-                                    MTC_TASK_AUTO_RECOVERY_DISABLED );
+            mtcInvApi_update_task ( node_ptr, ar_disable_banner );
 
-            return ;
+            return (rc);
         }
 
-        wlog ("%s auto recovery (try %d of %d)\n",
-                  node_ptr->hostname.c_str(), value , threshold );
+        wlog ("%s auto recovery (try %d of %d) (%d)",
+                  node_ptr->hostname.c_str(),
+                  value,
+                  this->ar_threshold[node_ptr->ar_cause],
+                  node_ptr->ar_cause);
 
-        mtcInvApi_update_states_now ( node_ptr,
-                                      "unlocked",
-                                      "disabled",
-                                      "failed",
-                                      "disabled",
-                                      "failed" );
-
-        mtcInvApi_update_task_now ( node_ptr,
-                                    MTC_TASK_AUTO_RECOVERY );
+        mtcInvApi_update_states_now ( node_ptr, "unlocked",
+                                      "disabled", "failed",
+                                      "disabled", "failed" );
 
         lazy_graceful_fs_reboot ( node_ptr );
     }
+    else /* Case 2 */
+    {
+        send_hbs_command   ( node_ptr->hostname, MTC_CMD_STOP_HOST  );
+        mtcInvApi_update_states ( node_ptr, "unlocked", "disabled", "failed" );
+
+        if (( NOT_THIS_HOST ) &&
+            ( this->system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX ))
+        {
+            if ( ++node_ptr->ar_count[node_ptr->ar_cause] >=
+                  this->ar_threshold [node_ptr->ar_cause] )
+            {
+                elog ("%s auto recovery threshold exceeded (%d)\n",
+                          node_ptr->hostname.c_str(),
+                          this->ar_threshold[node_ptr->ar_cause] );
+                node_ptr->ar_disabled = true ;
+                adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
+                mtcInvApi_update_task ( node_ptr, ar_disable_banner );
+                rc = FAIL ;
+            }
+            else
+            {
+                wlog ("%s auto recovery (try %d of %d) (%d)",
+                          node_ptr->hostname.c_str(),
+                          node_ptr->ar_count[node_ptr->ar_cause],
+                          this->ar_threshold[node_ptr->ar_cause],
+                          node_ptr->ar_cause);
+                rc = PASS ;
+            }
+        }
+        else
+        {
+            wlog ("%s auto recovery\n", node_ptr->hostname.c_str());
+            rc = PASS ;
+        }
+    }
+    return (rc);
 }
 
 /****************************************************************************
@@ -7198,13 +7314,8 @@ void nodeLinkClass::report_dor_recovery ( struct nodeLinkClass::node * node_ptr,
 
 void nodeLinkClass::force_full_enable ( struct nodeLinkClass::node * node_ptr )
 {
-    /* don't do a full enable if active controller in simplex mode */
-    if ( THIS_HOST && SIMPLEX )
-    {
-        wlog ("%s avoiding full enable of simplex system\n", node_ptr->hostname.c_str());
-        wlog ("%s ... lock and unlock host to force recovery\n", node_ptr->hostname.c_str());
+    if ( node_ptr->ar_disabled == true )
         return ;
-    }
 
     if ( node_ptr->was_dor_recovery_mode )
     {
@@ -8599,13 +8710,14 @@ void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr )
 void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr )
 {
     char str[MAX_MEM_LOG_DATA] ;
-    snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Able:%d\n", 
+    snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAdd:%d Offline:%d: Swact:%d Recovery:%d Enable:%d Disable:%d\n",
                 node_ptr->hostname.c_str(),
-                node_ptr->addStage, 
-                node_ptr->offlineStage, 
-                node_ptr->swactStage, 
+                node_ptr->addStage,
+                node_ptr->offlineStage,
+                node_ptr->swactStage,
                 node_ptr->recoveryStage,
-                node_ptr->handlerStage.raw);
+                node_ptr->enableStage,
+                node_ptr->disableStage);
     mem_log (str);
 }
 
diff --git a/mtce/src/common/nodeClass.h b/mtce/src/common/nodeClass.h
index e5c39172..da61e779 100755
--- a/mtce/src/common/nodeClass.h
+++ b/mtce/src/common/nodeClass.h
@@ -254,13 +254,9 @@ private:
         mtc_nodeOperState_enum    operState_dport  ; /**< Data Port Operational State   */
         mtc_nodeAvailStatus_enum  availStatus_dport; /**< Data Port Availability Status */
 
-
-        /** Maintains the current handler stage.
-          * This is a union of all handler types such as enable,
-          * disable, degrade etc. See nodeBase.h for list of union members */
-        mtc_stages_union          handlerStage;
-
         /* Individual FSM handler stages */
+        mtc_enableStages_enum     enableStage     ;
+        mtc_disableStages_enum    disableStage    ;
         mtc_offlineStages_enum    offlineStage    ;
         mtc_onlineStages_enum     onlineStage     ;
         mtc_swactStages_enum      swactStage      ;
@@ -380,6 +376,24 @@ private:
         /** when true requests the task for this host be cleared at first opportunity */
         bool clear_task ;
 
+        /******* Auto Recovery Control Structure and member Functions ********/
+
+        /* reason/cause based host level enable failure counter */
+        unsigned int ar_count[MTC_AR_DISABLE_CAUSE__LAST] ;
+
+        /* The last enable failure reason/cause.
+         * Note: MTC_AR_DISABLE_CAUSE__NONE is no failure (default) */
+        autorecovery_disable_cause_enum ar_cause ;
+
+        /* when true indicates that a host has reached its enbale failure
+         * threshold and is left in the unlocked-disabled state */
+        bool ar_disabled ;
+
+        /* throttles the ar_disabled log to periodically indicate auto
+         * recovery disabled state but avoid flooding that same message. */
+        #define AR_LOG_THROTTLE_THRESHOLD (100000)
+        unsigned int ar_log_throttle ;
+
         /** Host's mtc timer struct. Use to time handler stages.
          *
          *  reset     -> reset command response
@@ -870,9 +884,10 @@ private:
 
     int update_dport_states ( struct nodeLinkClass::node * node_ptr, int event );
 
-    /* manage deciding to return or issue an immediate reboot if the
-     * auto recovery threshold is exceeded. */
-    void manage_autorecovery ( struct nodeLinkClass::node * node_ptr );
+    /* manage auto recovery */
+    int  ar_manage ( struct nodeLinkClass::node * node_ptr,
+                     autorecovery_disable_cause_enum cause,
+                     string ar_disable_banner );
 
     /** ***********************************************************************
       *
@@ -1041,6 +1056,12 @@ private:
     void clear_main_failed_bools ( struct nodeLinkClass::node * node_ptr );
     void clear_hostservices_ctls ( struct nodeLinkClass::node * node_ptr );
 
+    /* Enables/Clears dynamic auto recovery state. start fresh !
+     * called in disabled_handler (lock) and in the DONE stages
+     * of the enable handler. */
+    void ar_enable ( struct nodeLinkClass::node * node_ptr );
+
+
     /** Find the node that has this timerID in its general mtc timer */
     struct nodeLinkClass::node * get_mtcTimer_timer   ( timer_t tid );
     struct nodeLinkClass::node * get_mtcConfig_timer  ( timer_t tid );
@@ -2005,22 +2026,36 @@ public:
     int compute_mtcalive_timeout;
     int controller_mtcalive_timeout ;
     int goenabled_timeout      ;
+
+    /** /etc/mtc.conf configurable audit intervals */
     int swact_timeout          ;
     int sysinv_timeout         ;
     int sysinv_noncrit_timeout ;
-    int loc_recovery_timeout ; /**< Loss Of Communication Recovery Timeout        */
-    int work_queue_timeout   ;
+    int loc_recovery_timeout   ; /**< Loss Of Communication Recovery Timeout */
+    int work_queue_timeout     ;
     int node_reinstall_timeout ;
 
-    /** /etc/mtc.ini configurable audit intervals */
     int insv_test_period  ;
     int oos_test_period   ;
     int uptime_period     ;
     int online_period     ;
     int token_refresh_rate;
 
+    /* Service specific max failures before autorecovery is disabled.
+     *
+     * ... values for each service are loaded from mtc config
+     *     file at daemon startup
+     */
+    unsigned int ar_threshold[MTC_AR_DISABLE_CAUSE__LAST] ;
+
+    /* service specific secs between autorecovery retries.
+     *
+     * ... values for each service are loaded from mtc config
+     *     file at daemon startup
+     */
+    unsigned int ar_interval[MTC_AR_DISABLE_CAUSE__LAST]  ;
+
     int  unknown_host_throttle ;
-    int  invalid_arg_throttle  ;
 };
 
 /**
@@ -2052,7 +2087,6 @@ const char * get_adminAction_str ( mtc_nodeAdminAction_enum action );
 string       bmc_get_ip          ( string hostname, string mac , string & current_bm_ip );
 void         clear_host_degrade_causes ( unsigned int & degrade_mask );
 bool         sensor_monitoring_supported ( string hostname );
-void         autorecovery_clear  ( string hostname );
 void         log_mnfa_pool      ( std::list<string> & mnfa_awol_list );
 
 #endif /* __INCLUDE_NODECLASS_H__ */
diff --git a/mtce/src/maintenance/mtcNodeCtrl.cpp b/mtce/src/maintenance/mtcNodeCtrl.cpp
index b82ee661..75c26853 100644
--- a/mtce/src/maintenance/mtcNodeCtrl.cpp
+++ b/mtce/src/maintenance/mtcNodeCtrl.cpp
@@ -316,6 +316,25 @@ static int mtc_config_handler ( void * user,
         mtcInv.offline_threshold = atoi(value);
         ilog ("OfflineThrsh: %d\n", mtcInv.offline_threshold );
     }
+
+    else if (MATCH("agent", "ar_config_threshold"))
+        mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__CONFIG] = atoi(value);
+    else if (MATCH("agent", "ar_goenable_threshold"))
+        mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__GOENABLE] = atoi(value);
+    else if (MATCH("agent", "ar_hostservices_threshold"))
+        mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HOST_SERVICES] = atoi(value);
+    else if (MATCH("agent", "ar_heartbeat_threshold"))
+        mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = atoi(value);
+
+    else if (MATCH("agent", "ar_config_interval"))
+        mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__CONFIG] = atoi(value);
+    else if (MATCH("agent", "ar_goenable_interval"))
+        mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__GOENABLE] = atoi(value);
+    else if (MATCH("agent", "ar_hostservices_interval"))
+        mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__HOST_SERVICES] = atoi(value);
+    else if (MATCH("agent", "ar_heartbeat_interval"))
+        mtcInv.ar_interval[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = atoi(value);
+
     else
     {
         return (PASS);
@@ -635,6 +654,20 @@ int daemon_configure ( void )
     ilog("hwmond      : %d (port)\n", mtc_config.hwmon_cmd_port );
     ilog("auth_host   : %s \n", mtc_config.keystone_auth_host );
 
+    /* log system wide service based auto recovery control values */
+    ilog("AR Config   : %d (threshold) %d sec (retry interval)",
+          mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__CONFIG],
+          mtcInv.ar_interval [MTC_AR_DISABLE_CAUSE__CONFIG]);
+    ilog("AR GoEnable : %d (threshold) %d sec (retry interval)",
+          mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__GOENABLE],
+          mtcInv.ar_interval [MTC_AR_DISABLE_CAUSE__GOENABLE]);
+    ilog("AR Host Svcs: %d (threshold) %d sec (retry interval)",
+          mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HOST_SERVICES],
+          mtcInv.ar_interval [MTC_AR_DISABLE_CAUSE__HOST_SERVICES]);
+    ilog("AR Heartbeat: %d (threshold) %d sec (retry interval)",
+          mtcInv.ar_threshold[MTC_AR_DISABLE_CAUSE__HEARTBEAT],
+          mtcInv.ar_interval [MTC_AR_DISABLE_CAUSE__HEARTBEAT]);
+
     /* Get this Controller Activity State */
     mtc_config.active = daemon_get_run_option ("active") ;
     ilog ("Controller  : %s\n",
diff --git a/mtce/src/maintenance/mtcNodeFsm.cpp b/mtce/src/maintenance/mtcNodeFsm.cpp
index 20fad599..19eba3ee 100755
--- a/mtce/src/maintenance/mtcNodeFsm.cpp
+++ b/mtce/src/maintenance/mtcNodeFsm.cpp
@@ -118,7 +118,8 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
      * the insv_test_handler gets run as soon as a host's main function is enabled.
      ****************************************************************************
      */
-    if (( node_ptr->adminState  == MTC_ADMIN_STATE__UNLOCKED ) &&
+    if (( node_ptr->ar_disabled == false ) &&
+        ( node_ptr->adminState  == MTC_ADMIN_STATE__UNLOCKED ) &&
         ( node_ptr->operState   == MTC_OPER_STATE__ENABLED )   &&
         ((node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) ||
          (node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )))
@@ -265,7 +266,7 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
     {
         flog ("%s -> Running SubFunction Enable handler (%d)\n",
                      node_ptr->hostname.c_str(),
-                     node_ptr->handlerStage.enable );
+                     node_ptr->enableStage );
 
         nodeLinkClass::enable_subf_handler ( node_ptr );
     }
diff --git a/mtce/src/maintenance/mtcNodeHdlrs.cpp b/mtce/src/maintenance/mtcNodeHdlrs.cpp
index 73f85b1d..6bd4c42a 100755
--- a/mtce/src/maintenance/mtcNodeHdlrs.cpp
+++ b/mtce/src/maintenance/mtcNodeHdlrs.cpp
@@ -446,6 +446,15 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
 {
     int rc = PASS ;
 
+    if ( node_ptr->ar_disabled == true )
+    {
+        wlog_throttled ( node_ptr->ar_log_throttle,
+                         AR_LOG_THROTTLE_THRESHOLD,
+                         "%s auto recovery disabled cause:%d",
+                         node_ptr->hostname.c_str(), node_ptr->ar_cause );
+         return (RETRY); ;
+    }
+
     if ( THIS_HOST )
     {
         /******************************************************************
@@ -476,7 +485,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
         }
     }
 
-    switch ( (int)node_ptr->handlerStage.enable )
+    switch ( (int)node_ptr->enableStage )
     {
         case MTC_ENABLE__FAILURE:
         {
@@ -539,7 +548,8 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                 if ( is_inactive_controller_main_insv() == true )
                 {
                     wlog ("%s has critical failure\n", node_ptr->hostname.c_str());
-                    wlog ("%s ... requesting swact to in-service inactive controller\n", node_ptr->hostname.c_str());
+                    wlog ("%s ... requesting swact to peer controller",
+                              node_ptr->hostname.c_str());
 
                     mtcInvApi_update_task_now ( node_ptr, MTC_TASK_FAILED_SWACT_REQ );
 
@@ -587,19 +597,6 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                 }
                 else
                 {
-                    this->autorecovery_enabled = true ;
-
-                    /* use thresholded auto recovery for simplext failure case */
-                    manage_autorecovery ( node_ptr );
-
-                    if ( this->autorecovery_disabled == false )
-                    {
-                        wlog ("%s has critical failure.\n", node_ptr->hostname.c_str());
-                        wlog ("%s ... downgrading to degrade with auto recovery disabled\n", node_ptr->hostname.c_str());
-                        wlog ("%s ... to avoid disabling only enabled controller\n", node_ptr->hostname.c_str());
-                        this->autorecovery_disabled = true ;
-                    }
-
                     if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
                     {
                         /* Raise Critical Compute Function Alarm */
@@ -620,7 +617,22 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                                             MTC_AVAIL_STATUS__FAILED );
             }
 
-            if ( degrade_only == true )
+            /* if we get here in controller simplex mode then go degraded
+             * if we are not already degraded. Otherwise, fail. */
+            if ( THIS_HOST && ( is_inactive_controller_main_insv() == false ))
+            {
+                if (( node_ptr->adminState  != MTC_ADMIN_STATE__UNLOCKED ) ||
+                    ( node_ptr->operState   != MTC_OPER_STATE__ENABLED   ) ||
+                    ( node_ptr->availStatus != MTC_AVAIL_STATUS__DEGRADED))
+                {
+                    allStateChange  ( node_ptr, MTC_ADMIN_STATE__UNLOCKED,
+                                                MTC_OPER_STATE__ENABLED,
+                                                MTC_AVAIL_STATUS__DEGRADED );
+                }
+                /* adminAction state is already changed to NONE. */
+            }
+
+            else if ( degrade_only == true )
             {
                 allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED,
                                            MTC_OPER_STATE__ENABLED,
@@ -636,25 +648,28 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             /* Inform the VIM of the failure */
             mtcVimApi_state_change ( node_ptr, VIM_HOST_FAILED, 3 );
 
-            /* if we get here in controller simplex mode then go degraded
-             * if we are not already degraded. Otherwise, fail. */
-            if ( THIS_HOST && ( is_inactive_controller_main_insv() == false ))
+            /* handle thresholded auto recovery retry delay interval */
+            if ( node_ptr->ar_cause < MTC_AR_DISABLE_CAUSE__LAST )
             {
-                /* autorecovery must be disabled */
-                if (( node_ptr->adminState  != MTC_ADMIN_STATE__UNLOCKED ) ||
-                    ( node_ptr->operState   != MTC_OPER_STATE__ENABLED   ) ||
-                    ( node_ptr->availStatus != MTC_AVAIL_STATUS__DEGRADED))
+                unsigned int interval = this->ar_interval[node_ptr->ar_cause] ;
+                if ( interval )
                 {
-                    allStateChange  ( node_ptr, MTC_ADMIN_STATE__UNLOCKED,
-                                                MTC_OPER_STATE__ENABLED,
-                                                MTC_AVAIL_STATUS__DEGRADED );
+                    /* Wait this failure cause's retry delay */
+                    mtcTimer_start ( node_ptr->mtcTimer,
+                                     mtcTimer_handler,
+                                     interval );
+
+                    wlog ("%s waiting %d secs before enable sequence retry (%d)",
+                              node_ptr->hostname.c_str(),
+                              interval, node_ptr->ar_cause );
                 }
-                /* adminAction state is already changed to NONE. */
+                else
+                    node_ptr->mtcTimer.ring = true ;
             }
             else
-            {
-                enableStageChange ( node_ptr, MTC_ENABLE__FAILURE_WAIT );
-            }
+                node_ptr->mtcTimer.ring = true ;
+
+            enableStageChange ( node_ptr, MTC_ENABLE__FAILURE_WAIT );
 
             break;
         }
@@ -717,6 +732,8 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             mtcCmd_workQ_purge ( node_ptr );
             mtcCmd_doneQ_purge ( node_ptr );
 
+            node_ptr->mtce_flags = 0 ;
+
             /* Assert the mtc alive gate */
             node_ptr->mtcAlive_gate = true ;
 
@@ -739,8 +756,9 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
 
                    /* enable auto recovery if the inactive controller
                     * is out of service */
-                   if (( is_controller (node_ptr) ) && ( NOT_THIS_HOST ))
-                       this->autorecovery_enabled = true ;
+                   //if (( is_controller (node_ptr) ) && ( NOT_THIS_HOST ))
+                   //    node_ptr->ar_disabled = false ;
+                   // this->autorecovery_enabled = true ;
 
                     /* fall through */
 
@@ -757,20 +775,6 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                                   get_availStatus_str(node_ptr->availStatus).c_str());
 
                         mtcInvApi_update_task ( node_ptr, "" );
-
-                        /* Special case */
-                        // alarm_enabled_clear ( node_ptr, false );
-
-                        //mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__CONFIG );
-                        //node_ptr->alarms[MTC_ALARM_ID__CONFIG] = FM_ALARM_SEVERITY_CLEAR ;
-
-                        //allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED,
-                        //                           MTC_OPER_STATE__ENABLED,
-                        //                           MTC_AVAIL_STATUS__DEGRADED );
-
-                        // adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
-
-                        // return (PASS);
                     }
                     else
                     {
@@ -986,6 +990,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             node_ptr->mtcAlive_online         = false ;
             node_ptr->mtcAlive_offline        = true  ;
             node_ptr->goEnabled               = false ;
+            node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__NONE ;
 
             clear_service_readies ( node_ptr );
 
@@ -1038,17 +1043,24 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                 mtcTimer_reset ( node_ptr->mtcTimer );
 
                 /* Check to see if the host is/got configured correctly */
-                if ( (node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED) == 0 )
+                if ((( !node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED )) ||
+                    ((  node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY )))
                 {
-                    elog ("%s configuration incomplete or failed (oob:%x:%x)\n",
+                    elog ("%s configuration failed or incomplete (oob:%x)\n",
                               node_ptr->hostname.c_str(),
-                              node_ptr->mtce_flags,
-                              MTC_FLAG__I_AM_CONFIGURED);
+                              node_ptr->mtce_flags)
 
                     /* raise an alarm for the failure of the config */
                     alarm_config_failure ( node_ptr );
+
                     mtcInvApi_update_task ( node_ptr, MTC_TASK_MAIN_CONFIG_FAIL );
                     enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
+
+                    /* handle auto recovery for this failure */
+                    if ( ar_manage ( node_ptr,
+                                     MTC_AR_DISABLE_CAUSE__CONFIG,
+                                     MTC_TASK_AR_DISABLED_CONFIG ) != PASS )
+                        break ;
                 }
                 else
                 {
@@ -1152,6 +1164,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
         }
         case MTC_ENABLE__GOENABLED_WAIT:
         {
+            bool goenable_failed = false ;
             /* The healthy code comes from the host in the mtcAlive message.
              * This 'if' clause was introduced to detected failure of host
              * without having to wait for the GOENABLED phase to timeout.
@@ -1162,27 +1175,22 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
              * be gracefully recovered to enabled in that case. Instead
              * we want to recover the card through a reset as quickly as
              * possible. */
-            if ( node_ptr->health == NODE_UNHEALTHY )
-            {
-                elog ("%s is UNHEALTHY\n", node_ptr->hostname.c_str());
 
-                mtcTimer_reset ( node_ptr->mtcTimer );
-                this->force_full_enable ( node_ptr );
-            }
             /* search for the Go Enable message */
-            else if ( node_ptr->goEnabled_failed == true )
+            if (( node_ptr->health == NODE_UNHEALTHY ) ||
+                (( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY)) ||
+                 ( node_ptr->goEnabled_failed == true ))
             {
                 elog ("%s got GOENABLED Failed\n", node_ptr->hostname.c_str());
                 mtcTimer_reset ( node_ptr->mtcTimer );
-                mtcInvApi_update_task ( node_ptr, MTC_TASK_INTEST_FAIL );
-                enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
+                goenable_failed = true ;
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_MAIN_INTEST_FAIL );
             }
             /* search for the Go Enable message */
             else if ( node_ptr->goEnabled == true )
             {
                 mtcTimer_reset ( node_ptr->mtcTimer );
                 plog ("%s got GOENABLED\n", node_ptr->hostname.c_str());
-                // plog ("%s main configured OK\n", node_ptr->hostname.c_str());
 
                 /* O.K. clearing the state now that we got it */
                 node_ptr->goEnabled = false ;
@@ -1194,26 +1202,28 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             }
             else if ( mtcTimer_expired ( node_ptr->mtcTimer ))
             {
-                elog ("%s has GOENABLED Timeout\n", node_ptr->hostname.c_str());
-                ilog ("%s ... the out-of-service tests took too long to complete\n",
-                          node_ptr->hostname.c_str());
-
-                mtcInvApi_update_task ( node_ptr, MTC_TASK_INTEST_FAIL_TO_ );
+                elog ("%s has GOENABLED Timeout", node_ptr->hostname.c_str());
                 node_ptr->mtcTimer.ring = false ;
-
-                /* raise an alarm for the enable failure */
-                alarm_enabled_failure ( node_ptr , true );
-
-                /* go back and issue reboot again */
-                enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
-
-                /* no longer In-Test ; we are 'Failed' again" */
-                availStatusChange ( node_ptr, MTC_AVAIL_STATUS__FAILED  );
+                goenable_failed = true ;
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_MAIN_INTEST_TO );
             }
             else
             {
                 ; /* wait some more */
             }
+
+            if ( goenable_failed )
+            {
+                alarm_enabled_failure ( node_ptr, true );
+
+                enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
+
+                /* handle auto recovery for this failure */
+                if ( ar_manage ( node_ptr,
+                                 MTC_AR_DISABLE_CAUSE__GOENABLE,
+                                 MTC_TASK_AR_DISABLED_GOENABLE ) != PASS )
+                    break ;
+            }
             break ;
         }
 
@@ -1224,14 +1234,20 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             plog ("%s Starting Host Services\n", node_ptr->hostname.c_str());
             if ( this->launch_host_services_cmd ( node_ptr, start ) != PASS )
             {
-                node_ptr->hostservices_failed = true ;
-
                 elog ("%s %s failed ; launch\n",
                           node_ptr->hostname.c_str(),
                           node_ptr->host_services_req.name.c_str());
 
-                mtcInvApi_update_task ( node_ptr, MTC_TASK_START_SERVICE_FAIL );
+                node_ptr->hostservices_failed = true ;
+                alarm_enabled_failure ( node_ptr, true );
                 enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_MAIN_SERVICE_FAIL );
+
+                /* handle auto recovery for this failure */
+                if ( ar_manage ( node_ptr,
+                                 MTC_AR_DISABLE_CAUSE__HOST_SERVICES,
+                                 MTC_TASK_AR_DISABLED_SERVICES ) != PASS )
+                    break ;
             }
             else
             {
@@ -1261,6 +1277,10 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             else if ( rc != PASS )
             {
                 node_ptr->hostservices_failed = true ;
+                alarm_enabled_failure ( node_ptr, true );
+                enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
+
+
                 /* distinguish 'timeout' from other 'execution' failures */
                 if ( rc == FAIL_TIMEOUT )
                 {
@@ -1269,7 +1289,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                               node_ptr->host_services_req.name.c_str());
 
                     mtcInvApi_update_task ( node_ptr,
-                                            MTC_TASK_START_SERVICE_TO );
+                                            MTC_TASK_MAIN_SERVICE_TO );
                 }
                 else
                 {
@@ -1279,9 +1299,14 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                               rc);
 
                     mtcInvApi_update_task ( node_ptr,
-                                            MTC_TASK_START_SERVICE_FAIL );
+                                            MTC_TASK_MAIN_SERVICE_FAIL );
                 }
-                enableStageChange ( node_ptr, MTC_ENABLE__FAILURE );
+
+                /* handle auto recovery for this failure */
+                if ( ar_manage ( node_ptr,
+                                 MTC_AR_DISABLE_CAUSE__HOST_SERVICES,
+                                 MTC_TASK_AR_DISABLED_SERVICES ) != PASS )
+                    break ;
             }
             else /* success path */
             {
@@ -1321,8 +1346,6 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                 mtcTimer_reset ( node_ptr->mtcTimer );
             }
 
-            /* Start Monitoring Services - heartbeat, process and hardware */
-            send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
 
             if ( this->hbs_failure_action == HBS_FAILURE_ACTION__NONE )
             {
@@ -1338,11 +1361,13 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                           MTC_HEARTBEAT_SOAK_BEFORE_ENABLE,
                           node_ptr->hbsClient_ready ? " ready event" : "out ready event"  );
 
-
                 /* allow heartbeat to run for MTC_HEARTBEAT_SOAK_BEFORE_ENABLE
                  * seconds before we declare enable */
                 mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_HEARTBEAT_SOAK_BEFORE_ENABLE );
                 enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_SOAK );
+
+                /* Start Monitoring Services - heartbeat, process and hardware */
+                send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
             }
             break ;
         }
@@ -1351,6 +1376,11 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             if ( node_ptr->mtcTimer.ring == true )
             {
                 plog ("%s heartbeating\n", node_ptr->hostname.c_str() );
+
+                /* handle auto recovery ear for thsi potential cause */
+                node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__NONE ;
+                node_ptr->ar_count[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = 0 ;
+
                 /* if heartbeat is not working then we will
                  * never get here and enable the host */
                 enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE );
@@ -1490,7 +1520,6 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             }
             else
             {
-
                 node_ptr->enabled_count++ ;
 
                 /* Inform the VIM that this host is enabled */
@@ -1505,6 +1534,8 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                 adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
 
                 node_ptr->health_threshold_counter = 0 ;
+
+                ar_enable ( node_ptr );
             }
 
             break ;
@@ -2103,7 +2134,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
             {
                 elog ("%s got GOENABLED Failed\n", node_ptr->hostname.c_str());
                 mtcTimer_reset ( node_ptr->mtcTimer );
-                mtcInvApi_update_task ( node_ptr, MTC_TASK_INTEST_FAIL );
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_MAIN_INTEST_FAIL );
                 this->force_full_enable ( node_ptr );
             }
 
@@ -2121,6 +2152,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
             else if ( node_ptr->mtcTimer.ring == true )
             {
                 elog ("%s has GOENABLED Timeout\n", node_ptr->hostname.c_str());
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_MAIN_INTEST_TO );
 
                 node_ptr->mtcTimer.ring = false ;
 
@@ -2640,7 +2672,7 @@ int nodeLinkClass::disable_handler  ( struct nodeLinkClass::node * node_ptr )
 {
     int rc = PASS ;
 
-    switch ( (int)node_ptr->handlerStage.disable )
+    switch ( (int)node_ptr->disableStage )
     {
         case MTC_DISABLE__START:
         {
@@ -2657,6 +2689,7 @@ int nodeLinkClass::disable_handler  ( struct nodeLinkClass::node * node_ptr )
             clear_subf_failed_bools ( node_ptr );
             clear_hostservices_ctls ( node_ptr );
 
+            enableStageChange  ( node_ptr, MTC_ENABLE__START ) ;
             disableStageChange ( node_ptr, MTC_DISABLE__DIS_SERVICES_WAIT) ;
 
             stop_offline_handler ( node_ptr );
@@ -2758,7 +2791,7 @@ int nodeLinkClass::disable_handler  ( struct nodeLinkClass::node * node_ptr )
             /* If the stage is still MTC_DISABLE__DIS_SERVICES_WAIT then the
              * host should already be powered on so lets send the stop
              * services command */
-            if ( node_ptr->handlerStage.disable == MTC_DISABLE__DIS_SERVICES_WAIT )
+            if ( node_ptr->disableStage == MTC_DISABLE__DIS_SERVICES_WAIT )
             {
                 bool start = false ;
                 if ( this->launch_host_services_cmd ( node_ptr, start ) != PASS )
@@ -3042,6 +3075,9 @@ int nodeLinkClass::disable_handler  ( struct nodeLinkClass::node * node_ptr )
             recovery_ctrl_init ( node_ptr->hwmon_reset );
             recovery_ctrl_init ( node_ptr->hwmon_powercycle );
 
+            /* re-enable auto recovery */
+            ar_enable ( node_ptr );
+
             /* Load configured mtcAlive and goEnabled timers */
             LOAD_NODETYPE_TIMERS ;
 
@@ -3055,7 +3091,7 @@ int nodeLinkClass::disable_handler  ( struct nodeLinkClass::node * node_ptr )
         default:
         {
             elog ("%s Bad Case (%d)\n", node_ptr->hostname.c_str(),
-                                        node_ptr->handlerStage.disable );
+                                        node_ptr->disableStage );
             rc = FAIL_BAD_CASE ;
         }
     }
@@ -3273,14 +3309,20 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
     int rc = PASS ;
 
     /* don't need to manage the offline or online state
-     * for the following availability states */
-    if (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )   ||
+     * for the following states
+     *  ... auto recovery state
+     *  ... enable stages
+     *  ... availability states */
+    if (( node_ptr->ar_disabled == true ) ||
+        ( node_ptr->enableStage == MTC_ENABLE__FAILURE ) ||
+        ( node_ptr->enableStage == MTC_ENABLE__FAILURE_WAIT ) ||
+        ( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE )   ||
         ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED  )   ||
         ( node_ptr->availStatus == MTC_AVAIL_STATUS__OFFDUTY   )   ||
         ( node_ptr->availStatus == MTC_AVAIL_STATUS__INTEST    )   ||
         ( node_ptr->availStatus == MTC_AVAIL_STATUS__NOT_INSTALLED ))
     {
-        return (PASS);
+        return (rc);
     }
 
     switch ( (int)node_ptr->onlineStage )
@@ -4648,6 +4690,8 @@ int nodeLinkClass::power_handler ( struct nodeLinkClass::node * node_ptr )
             recovery_ctrl_init ( node_ptr->hwmon_reset );
             recovery_ctrl_init ( node_ptr->hwmon_powercycle );
 
+            ar_enable ( node_ptr );
+
             mtcInvApi_force_task ( node_ptr, "" );
             break ;
         }
@@ -5537,25 +5581,69 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
 
         case MTC_ADD__CLEAR_TASK:
         {
-            if ( is_controller(node_ptr) )
+            /* Check for hosts that were in the auto recovery disabled state */
+            if ( !node_ptr->task.empty () )
             {
-                if ( node_ptr->mtcTimer.ring == true )
+                if (( node_ptr->adminState   == MTC_ADMIN_STATE__UNLOCKED ) &&
+                   (( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_CONFIG))  ||
+                    ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_GOENABLE))||
+                    ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_SERVICES))||
+                    ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_HEARTBEAT))))
                 {
-                    if ( !node_ptr->task.empty () )
+                    if ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_CONFIG ))
                     {
-                        mtcInvApi_force_task ( node_ptr, "" );
+                        node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__CONFIG ;
+                        alarm_config_failure ( node_ptr );
                     }
+                    else if ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_GOENABLE ))
+                    {
+                        node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__GOENABLE ;
+                        alarm_enabled_failure ( node_ptr, true );
+                    }
+                    else if ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_SERVICES ))
+                    {
+                        node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__HOST_SERVICES ;
+                        alarm_enabled_failure ( node_ptr, true );
+                    }
+                    else if ( !node_ptr->task.compare(MTC_TASK_AR_DISABLED_HEARTBEAT ))
+                    {
+                        node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__HEARTBEAT ;
+                    }
+                    node_ptr->ar_disabled = true ;
+
+                    if ( THIS_HOST )
+                        mtcInvApi_update_states ( node_ptr, "unlocked", "enabled", "degraded" );
+                    else
+                        mtcInvApi_update_states ( node_ptr, "unlocked", "disabled", "failed" );
+
+                    node_ptr->addStage = MTC_ADD__START;
+                    node_ptr->add_completed = true ;
+
+                    adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
+                    plog ("%s Host Add Completed ; auto recovery disabled state (uptime:%d)\n",
+                              node_ptr->hostname.c_str(), node_ptr->uptime );
+                    break ;
                 }
                 else
                 {
-                    break ;
+                    if ( is_controller(node_ptr) )
+                    {
+                        if ( node_ptr->mtcTimer.ring == true )
+                        {
+                            mtcInvApi_force_task ( node_ptr, "" );
+                        }
+                        else
+                        {
+                            break ;
+                        }
+                    }
+                    else
+                    {
+                        /* do it immediately for all otyher server types */
+                        mtcInvApi_force_task ( node_ptr, "" );
+                    }
                 }
             }
-            else
-            {
-                /* do it immediately for all otyher server types */
-                mtcInvApi_force_task ( node_ptr, "" );
-            }
             /* default retries counter to zero before START_SERVICES */
             node_ptr->retries = 0 ;
             node_ptr->addStage = MTC_ADD__START_SERVICES ;
@@ -6017,55 +6105,6 @@ int nodeLinkClass::oos_test_handler ( struct nodeLinkClass::node * node_ptr )
                               node_ptr->start_services_running_main);
                 }
             }
-#endif
-
-
-            /* Avoid forcing the states to the database when on the first & second pass.
-             * This is because it is likely we just read all the states and
-             * if coming out of a DOR or a SWACT we don't need to un-necessarily
-             * produce that extra sysinv traffic.
-             * Also, no point forcing the states while there is an admin action
-             * or enable or graceful recovery going on as well because state changes
-             * are being done in the FSM already */
-            if (( node_ptr->oos_test_count > 1 ) &&
-                ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) &&
-                ( !node_ptr->handlerStage.raw ) &&
-                ( !node_ptr->recoveryStage ))
-            {
-                /* Change the oper and avail states in the database */
-                allStateChange ( node_ptr, node_ptr->adminState,
-                                           node_ptr->operState,
-                                           node_ptr->availStatus );
-            }
-
-#ifdef WANT_CLEAR_ALARM_AUDIT
-
-            /* TODO: Obsolete with new Alarm Strategy */
-            /* Self Correct Stuck Failure Alarms */
-            if (( node_ptr->adminState   == MTC_ADMIN_STATE__UNLOCKED  ) &&
-                ( node_ptr->operState    == MTC_OPER_STATE__ENABLED ) &&
-                (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) ||
-                 ( node_ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )))
-            {
-                if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CLEAR )
-                {
-                    mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__CONFIG );
-                    node_ptr->alarms[MTC_ALARM_ID__CONFIG] = FM_ALARM_SEVERITY_CLEAR ;
-                }
-                alarm_enabled_clear ( node_ptr , false);
-            }
-#endif
-            /* Make sure the locked status on the host itself is set */
-            if (( node_ptr->adminState  == MTC_ADMIN_STATE__LOCKED  ) &&
-                ( node_ptr->operState   == MTC_OPER_STATE__DISABLED ) &&
-                ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) &&
-                ( !(node_ptr->mtce_flags & MTC_FLAG__I_AM_LOCKED)    ))
-            {
-                ilog ("%s setting 'locked' status\n", node_ptr->hostname.c_str());
-
-                /* Tell the host that it is locked */
-                send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE);
-            }
 
             if (( daemon_is_file_present ( MTC_CMD_FIT__GOENABLE_AUDIT )) &&
                 ( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
@@ -6078,6 +6117,43 @@ int nodeLinkClass::oos_test_handler ( struct nodeLinkClass::node * node_ptr )
                     send_mtc_cmd ( node_ptr->hostname, MTC_REQ_SUBF_GOENABLED, MGMNT_INTERFACE );
                 }
             }
+#endif
+
+            if ( node_ptr->ar_disabled == true )
+            {
+                elog ( "%s auto recovery disabled cause:%d",
+                           node_ptr->hostname.c_str(),
+                           node_ptr->ar_cause );
+            }
+
+            /* Avoid forcing the states to the database when on the first & second pass.
+             * This is because it is likely we just read all the states and
+             * if coming out of a DOR or a SWACT we don't need to un-necessarily
+             * produce that extra sysinv traffic.
+             * Also, no point forcing the states while there is an admin action
+             * or enable or graceful recovery going on as well because state changes
+             * are being done in the FSM already */
+            if (( node_ptr->oos_test_count > 1 ) &&
+                ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) &&
+                ( !node_ptr->enableStage ) && ( !node_ptr->recoveryStage ))
+            {
+                /* Change the oper and avail states in the database */
+                allStateChange ( node_ptr, node_ptr->adminState,
+                                           node_ptr->operState,
+                                           node_ptr->availStatus );
+            }
+
+            /* Make sure the locked status on the host itself is set */
+            if (( node_ptr->adminState  == MTC_ADMIN_STATE__LOCKED  ) &&
+                ( node_ptr->operState   == MTC_OPER_STATE__DISABLED ) &&
+                ( node_ptr->availStatus == MTC_AVAIL_STATUS__ONLINE ) &&
+                ( !(node_ptr->mtce_flags & MTC_FLAG__I_AM_LOCKED)    ))
+            {
+                ilog ("%s setting 'locked' status\n", node_ptr->hostname.c_str());
+
+                /* Tell the host that it is locked */
+                send_mtc_cmd ( node_ptr->hostname , MTC_MSG_LOCKED, MGMNT_INTERFACE);
+            }
 
             break ;
         }
@@ -6140,7 +6216,8 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
             }
             /* manage degrade state and alarms */
             if ((  node_ptr->adminState  == MTC_ADMIN_STATE__UNLOCKED ) &&
-                (  node_ptr->operState   == MTC_OPER_STATE__ENABLED   ))
+                (  node_ptr->operState   == MTC_OPER_STATE__ENABLED   ) &&
+                (  node_ptr->ar_disabled == false ))
             {
                 /************************************************************
                  *               Manage In-Service Alarms                   *
@@ -6170,7 +6247,6 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
         }
         case MTC_INSV_TEST__RUN:
         {
-
 #ifdef WANT_FIT_TESTING
 
             daemon_load_fit ();
@@ -6229,22 +6305,20 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
              * controller autorecovery. Otherwise enable it but in this case
              * don't change the disable bool as that is used to gate auto
              * recovery once the threshoild is reached */
-            if ( is_controller ( node_ptr ) && NOT_THIS_HOST )
-            {
-                if (( this->autorecovery_enabled == true ) &&
-                    ( node_ptr->operState == MTC_OPER_STATE__ENABLED ))
-                {
-                    autorecovery_clear ( CONTROLLER_0 );
-                    autorecovery_clear ( CONTROLLER_1 );
-                    this->autorecovery_enabled  = false ;
-                    this->autorecovery_disabled = false ;
-                }
-                else if (( this->autorecovery_enabled == false ) &&
-                         ( node_ptr->operState != MTC_OPER_STATE__ENABLED ))
-                {
-                    this->autorecovery_enabled = true ;
-                }
-            }
+//            if ( is_controller ( node_ptr ) && NOT_THIS_HOST )
+//            {
+//                if (( node_ptr->ar_disabled == false ) &&
+//                    ( node_ptr->operState == MTC_OPER_STATE__ENABLED ))
+//                {
+//                    autorecovery_clear ( CONTROLLER_0 );
+//                    autorecovery_clear ( CONTROLLER_1 );
+//                }
+                //else if (( node_ptr->ar_disabled == true ) &&
+                //         ( node_ptr->operState != MTC_OPER_STATE__ENABLED ))
+                //{
+                //    node_ptr->ar_disabled = false ;
+                //}
+            // }
 
             /* Monitor the health of the host - no pass file */
             if ((  node_ptr->adminState  == MTC_ADMIN_STATE__UNLOCKED ) &&
@@ -6440,7 +6514,7 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
                      *
                      **/
                     if (( node_ptr->operState_subf == MTC_OPER_STATE__DISABLED ) &&
-                        ( this->autorecovery_disabled == false ) &&
+                        ( node_ptr->ar_disabled == false ) &&
                         ( node_ptr->start_services_needed == false ))
                     {
                         if (( node_ptr->adminAction != MTC_ADMIN_ACTION__ENABLE_SUBF ) &&
@@ -6527,7 +6601,7 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
                         if (( daemon_is_file_present ( CONFIG_COMPLETE_FILE )) &&
                             ( daemon_is_file_present ( CONFIG_FAIL_FILE )))
                         {
-                            wlog_throttled ( node_ptr->health_threshold_counter, (MTC_UNHEALTHY_THRESHOLD*3), "%s is UNHEALTHY\n", node_ptr->hostname.c_str());
+                            wlog_throttled ( node_ptr->health_threshold_counter, (MTC_UNHEALTHY_THRESHOLD*10), "%s is UNHEALTHY\n", node_ptr->hostname.c_str());
                             if ( node_ptr->health_threshold_counter >= MTC_UNHEALTHY_THRESHOLD )
                             {
                                 node_ptr->degrade_mask |= DEGRADE_MASK_CONFIG ;
diff --git a/mtce/src/maintenance/mtcSubfHdlrs.cpp b/mtce/src/maintenance/mtcSubfHdlrs.cpp
index 20d42793..9a25c701 100644
--- a/mtce/src/maintenance/mtcSubfHdlrs.cpp
+++ b/mtce/src/maintenance/mtcSubfHdlrs.cpp
@@ -43,20 +43,17 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
 {
     int rc = PASS ;
 
+    if ( node_ptr->ar_disabled == true )
+    {
+        enableStageChange ( node_ptr, MTC_ENABLE__START );
+        return (rc);
+    }
+
     /* Setup the log prefix */
     string name = node_ptr->hostname ;
     name.append("-compute");
 
-    bool simplex = false ;
-    if (( SIMPLEX ) ||
-        (( THIS_HOST ) &&
-        (( this->is_inactive_controller_main_insv() == false ) ||
-         ( this->is_inactive_controller_subf_insv() == false ))))
-    {
-        simplex = true ;
-    }
-
-    switch ( (int)node_ptr->handlerStage.enable )
+    switch ( (int)node_ptr->enableStage )
     {
         case MTC_ENABLE__FAILURE_WAIT:
         {
@@ -77,29 +74,7 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
             workQueue_purge ( node_ptr );
             doneQueue_purge ( node_ptr );
 
-            enableStageChange ( node_ptr, MTC_ENABLE__START );
-
-            /* avoid failing this controller if there is no inactive to
-             * take over and avoid thrashing back and forth if the sub
-             * function on the inactive is disabled */
-            if ( simplex )
-            {
-                /* if autorecovery is enabled then handle it that way. */
-                if ( this->autorecovery_enabled == true )
-                {
-                    adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
-                    enableStageChange ( node_ptr, MTC_ENABLE__START );
-
-                    manage_autorecovery ( node_ptr );
-                }
-
-                wlog ("%s is ENABLED-degraded (failed subfunction)\n", name.c_str());
-            }
-            else
-            {
-                /* if there is another controller enabled then just force a full enable of this one */
-                force_full_enable ( node_ptr ) ;
-            }
+            force_full_enable ( node_ptr ) ;
             break ;
         }
 
@@ -139,22 +114,26 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
                 enableStageChange ( node_ptr, MTC_ENABLE__GOENABLED_TIMER );
                 alarm_config_clear ( node_ptr );
             }
-            else if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY )
+
+            if ((( !node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED )) ||
+                ((  node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY )))
             {
                 mtcTimer_reset (node_ptr->mtcTimer);
-                elog ("%s configuration failed (oob:%x:%x)\n",
-                          name.c_str(),
-                          node_ptr->mtce_flags,
-                          MTC_FLAG__I_AM_NOT_HEALTHY);
+
+                elog ("%s configuration failed or incomplete (oob:%x)\n",
+                          name.c_str(), node_ptr->mtce_flags);
 
                 alarm_config_failure ( node_ptr );
 
-                if ( simplex )
-                    mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_FAIL_ );
-                else
-                    mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_FAIL );
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_FAIL );
 
                 enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
+
+                /* handle auto recovery for this failure */
+                if ( ar_manage ( node_ptr,
+                                 MTC_AR_DISABLE_CAUSE__CONFIG,
+                                 MTC_TASK_AR_DISABLED_CONFIG ) != PASS )
+                    break ;
             }
 
             /* timeout handling */
@@ -166,12 +145,15 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
 
                 alarm_config_failure ( node_ptr );
 
-                if ( simplex )
-                    mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_TO_ );
-                else
-                    mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_TO );
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_CONFIG_TO );
 
                 enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
+
+                /* handle auto recovery for this failure */
+                if ( ar_manage ( node_ptr,
+                                 MTC_AR_DISABLE_CAUSE__CONFIG,
+                                 MTC_TASK_AR_DISABLED_CONFIG ) != PASS )
+                    break ;
             }
             else
             {
@@ -227,19 +209,19 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
         }
         case MTC_ENABLE__GOENABLED_WAIT:
         {
+            bool goenable_failed = false ;
+
             /* search for the Go Enable message */
-            if ( node_ptr->goEnabled_failed_subf == true )
+            if (( node_ptr->health == NODE_UNHEALTHY ) ||
+                ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY) ||
+                ( node_ptr->goEnabled_failed_subf == true ))
             {
                 mtcTimer_reset ( node_ptr->mtcTimer );
                 elog ("%s one or more out-of-service tests failed\n", name.c_str());
 
-                mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_INTEST_FAIL_ : MTC_TASK_INTEST_FAIL );
-
-                /* Need thresholded auto recovery for this failure mode */
-                if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
-                    this->autorecovery_enabled = true ;
-
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_INTEST_FAIL );
                 enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
+                goenable_failed = true ;
             }
 
             /* search for the Go Enable message */
@@ -248,6 +230,7 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
                 mtcTimer_reset ( node_ptr->mtcTimer );
 
                 alarm_enabled_clear ( node_ptr, false );
+                alarm_compute_clear ( node_ptr, true );
 
                 plog ("%s passed  out-of-service tests\n", name.c_str());
 
@@ -275,18 +258,25 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
             {
                 elog ("%s out-of-service test execution timeout\n", name.c_str());
 
-                mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_INTEST_FAIL_TO_ : MTC_TASK_INTEST_FAIL_TO );
-
-                /* Need thresholded auto recovery for this failure mode */
-                if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
-                    this->autorecovery_enabled = true ;
-
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_INTEST_TO );
                 enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
+                goenable_failed = true ;
             }
             else
             {
                 ; /* wait some more */
             }
+
+            if ( goenable_failed == true )
+            {
+                alarm_compute_failure ( node_ptr, FM_ALARM_SEVERITY_CRITICAL );
+
+                /* handle auto recovery for this failure */
+                if ( ar_manage ( node_ptr,
+                                 MTC_AR_DISABLE_CAUSE__GOENABLE,
+                                 MTC_TASK_AR_DISABLED_GOENABLE ) != PASS )
+                    break ;
+            }
             break ;
         }
         case  MTC_ENABLE__HOST_SERVICES_START:
@@ -312,17 +302,20 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
 
             else if ( launch_host_services_cmd ( node_ptr, start, subf ) != PASS )
             {
-                node_ptr->hostservices_failed_subf = true ;
-
                 wlog ("%s %s failed ; launch\n",
                           name.c_str(),
                           node_ptr->host_services_req.name.c_str());
 
-                /* Need thresholded auto recovery for this failure mode */
-                if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
-                    this->autorecovery_enabled = true ;
-
+                node_ptr->hostservices_failed_subf = true ;
+                alarm_compute_failure ( node_ptr, FM_ALARM_SEVERITY_CRITICAL );
                 enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_SERVICE_FAIL );
+
+                /* handle auto recovery for this failure */
+                if ( ar_manage ( node_ptr,
+                                 MTC_AR_DISABLE_CAUSE__HOST_SERVICES,
+                                 MTC_TASK_AR_DISABLED_SERVICES ) != PASS )
+                    break ;
             }
             else
             {
@@ -343,11 +336,12 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
             }
             else if ( rc != PASS )
             {
-                /* Need thresholded auto recovery for this failure mode */
-                if ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
-                    this->autorecovery_enabled = true ;
-
                 node_ptr->hostservices_failed_subf = true ;
+                alarm_compute_failure ( node_ptr, FM_ALARM_SEVERITY_CRITICAL );
+
+                enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
+
+
                 if ( rc == FAIL_TIMEOUT )
                 {
                     elog ("%s %s failed ; timeout\n",
@@ -355,7 +349,7 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
                               node_ptr->host_services_req.name.c_str());
 
                     /* Report "Enabling Compute Service Timeout" to sysinv/horizon */
-                    mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF_TO );
+                    mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_SERVICE_TO );
                 }
                 else
                 {
@@ -365,9 +359,14 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
                               rc);
 
                     /* Report "Enabling Compute Service Failed" to sysinv/horizon */
-                    mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF_FAIL );
+                    mtcInvApi_update_task ( node_ptr, MTC_TASK_SUBF_SERVICE_FAIL );
                 }
-                enableStageChange ( node_ptr, MTC_ENABLE__SUBF_FAILED );
+
+                /* handle auto recovery for this failure */
+                if ( ar_manage ( node_ptr,
+                                 MTC_AR_DISABLE_CAUSE__HOST_SERVICES,
+                                 MTC_TASK_AR_DISABLED_SERVICES ) != PASS )
+                    break ;
             }
             else /* success path */
             {
@@ -409,8 +408,6 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
                 mtcTimer_reset ( node_ptr->mtcTimer );
             }
 
-            /* Start Monitoring heartbeat */
-            send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
 
             if ( this->hbs_failure_action == HBS_FAILURE_ACTION__NONE )
             {
@@ -426,6 +423,9 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
                 /* allow heartbeat to run for 10 seconds before we declare enable */
                 mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_HEARTBEAT_SOAK_BEFORE_ENABLE );
                 enableStageChange ( node_ptr, MTC_ENABLE__HEARTBEAT_SOAK );
+
+                /* Start Monitoring heartbeat */
+                send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
             }
             break ;
         }
@@ -434,6 +434,11 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
             if ( node_ptr->mtcTimer.ring == true )
             {
                 plog ("%s heartbeating\n", name.c_str() );
+
+                /* handle auto recovery ear for thsi potential cause */
+                node_ptr->ar_cause = MTC_AR_DISABLE_CAUSE__NONE ;
+                node_ptr->ar_count[MTC_AR_DISABLE_CAUSE__HEARTBEAT] = 0 ;
+
                 /* if heartbeat is not working then we will
                  * never get here and enable the host */
                 enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE );
@@ -472,7 +477,7 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
             {
                 elog ("%s enable failed ; Enable workQueue timeout, purging ...\n", name.c_str());
 
-                mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_ENABLE_WORK_TO_ : MTC_TASK_ENABLE_WORK_TO );
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_WORK_TO );
 
                 fail = true ;
             }
@@ -480,7 +485,7 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
             {
                 elog ("%s enable failed ; Enable doneQueue has failed commands\n", name.c_str());
 
-                mtcInvApi_update_task ( node_ptr, simplex ? MTC_TASK_ENABLE_WORK_FAIL_ : MTC_TASK_ENABLE_WORK_FAIL );
+                mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLE_WORK_FAIL );
 
                 fail = true ;
             }
@@ -656,6 +661,8 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
             node_ptr->dor_recovery_mode = false ;
             this->dor_mode_active = false ;
 
+            ar_enable ( node_ptr );
+
             mtcInvApi_force_task ( node_ptr, "" );
             break ;
         }
diff --git a/mtce/src/scripts/mtc.conf b/mtce/src/scripts/mtc.conf
index 8c0730d9..898e8a3b 100644
--- a/mtce/src/scripts/mtc.conf
+++ b/mtce/src/scripts/mtc.conf
@@ -34,6 +34,35 @@ autorecovery_threshold = 3    ; The number of times maintenance will try to
                               ;  while there is no backup controllers to fail
                               ;  over to before giving up.
 
+
+; Service specific Auto Recovery failure thresholds.
+;
+; ar_<service>_threshold = <max_retries>
+;
+; If a host fails to enable due to a particular service failure, for example
+; configuration, goenabled etc. , then the mtcAgent will stop retrying after
+; the particular services' threshold is reached. While at threshold auto
+; recovery for specified host is disabled. The host sits there in the
+; unlocked-disabled-failed state with the WEBGUI host status showing that
+; auto recovery is disabled and horizon showing then a lock/unlock is required
+; to trigger another enable attempt and reset of the auto recovery counter.
+ar_config_threshold = 2
+ar_goenable_threshold = 2
+ar_hostservices_threshold = 2
+ar_heartbeat_threshold = 2
+
+; Service specific Auto Recovery retry interval.
+;
+; ar_<service>_interval = <retry delay in seconds>
+;
+; When a host fails to enable due to a particular service reason then
+; the mtcAgent will use the service specific interval value specified
+; to wait before it retries the enable sequence again.
+ar_config_interval = 30
+ar_goenable_interval = 30
+ar_hostservices_interval = 30
+ar_heartbeat_interval = 600
+
 api_retries = 10              ; number of API retries b4 failure
 
 [client]                     ; Client Configuration