diff --git a/bsp-files/filter_out_from_controller b/bsp-files/filter_out_from_controller
index c07ec6d7..bd5c4604 100644
--- a/bsp-files/filter_out_from_controller
+++ b/bsp-files/filter_out_from_controller
@@ -25,6 +25,7 @@ mtce-guestServer
 nfscheck
 radvd
 config-gate-worker
+isolcpus-device-plugin
 kernel-rt
 kernel-module-igb-uio
 kernel-module-igb-uio-rt
@@ -33,6 +34,7 @@ kernel-rt-modules-extra
 kmod-e1000e-rt
 kmod-i40e-rt
 kmod-iavf-rt
+kmod-ice-rt
 kmod-ixgbe-rt
 kmod-ixgbevf-rt
 kmod-igb_uio-rt
@@ -53,3 +55,7 @@ openvswitch-config
 pci-irq-affinity-agent
 kvm-timer-advance
 sysinv-fpga-agent
+kernel-rt-headers
+kernel-rt-devel
+kernel-headers
+kernel-devel
diff --git a/bsp-files/filter_out_from_smallsystem b/bsp-files/filter_out_from_smallsystem
index 53d70456..5798bbac 100644
--- a/bsp-files/filter_out_from_smallsystem
+++ b/bsp-files/filter_out_from_smallsystem
@@ -13,6 +13,7 @@ kernel-rt-modules-extra
 kmod-e1000e-rt
 kmod-i40e-rt
 kmod-iavf-rt
+kmod-ice-rt
 kmod-ixgbe-rt
 kmod-ixgbevf-rt
 kmod-igb_uio-rt
@@ -26,3 +27,5 @@ qat17-rt
 kernel-rt-tools
 kernel-rt-tools-libs
 kmod-drbd-rt
+kernel-rt-headers
+kernel-rt-devel
diff --git a/bsp-files/filter_out_from_smallsystem_lowlatency b/bsp-files/filter_out_from_smallsystem_lowlatency
index 63952b03..a23573d0 100644
--- a/bsp-files/filter_out_from_smallsystem_lowlatency
+++ b/bsp-files/filter_out_from_smallsystem_lowlatency
@@ -11,6 +11,7 @@ kernel-module-igb-uio
 kmod-e1000e
 kmod-i40e
 kmod-iavf
+kmod-ice
 kmod-ixgbe
 kmod-ixgbevf
 kmod-igb_uio
@@ -23,3 +24,5 @@ kernel-tools
 kernel-tools-libs
 kmod-drbd
 kernel-modules-extra
+kernel-headers
+kernel-devel
diff --git a/bsp-files/filter_out_from_storage b/bsp-files/filter_out_from_storage
index 33122822..bb46fab1 100644
--- a/bsp-files/filter_out_from_storage
+++ b/bsp-files/filter_out_from_storage
@@ -69,6 +69,7 @@ influxdb
 influxdb-extensions
 io-monitor
 io-scheduler
+isolcpus-device-plugin
 isomd5sum
 ipxe-roms-qemu
 kernel-module-openvswitch
@@ -120,8 +121,6 @@ nova-tests
 nova-api-proxy
 nova-placement-api
 novnc
-net-snmp
-net-snmp-config
 openstack-aodh-api
 openstack-aodh-commmon
 openstack-aodh-compat
@@ -256,7 +255,6 @@ qemu-kvm-ev
 qemu-kvm-tools-ev
 radvd
 rubygem-rdoc
-snmp-ext
 task-cloud-compute
 task-cloud-controller
 tgt
@@ -290,6 +288,7 @@ kernel-rt-modules-extra
 kmod-e1000e-rt
 kmod-i40e-rt
 kmod-iavf-rt
+kmod-ice-rt
 kmod-ixgbe-rt
 kmod-ixgbevf-rt
 kmod-igb_uio-rt
@@ -304,7 +303,6 @@ kernel-rt-tools
 kernel-rt-tools-libs
 NaviCLI-Linux-64-x86-en_US
 kmod-drbd-rt
-snmp-audittrail
 wrs-ssl
 tpm2-tools
 tss2
@@ -340,6 +338,11 @@ stx-oidc-auth-helm
 stx-cert-manager-helm
 stx-nginx-ingress-controller-helm
 stx-portieris-helm
+stx-snmp-helm
 stx-vault-helm
 sysinv-fpga-agent
 k8s-pod-recovery
+kernel-rt-headers
+kernel-rt-devel
+kernel-headers
+kernel-devel
diff --git a/bsp-files/filter_out_from_worker b/bsp-files/filter_out_from_worker
index 83b3a9af..8590a982 100644
--- a/bsp-files/filter_out_from_worker
+++ b/bsp-files/filter_out_from_worker
@@ -81,8 +81,6 @@ nova-tests
 nova-api-proxy
 nova-placement-api
 novnc
-net-snmp
-net-snmp-config
 openldap-backend-bdb
 openldap-backend-dnssrv
 openldap-backend-hdb
@@ -138,7 +136,6 @@ python-swiftclient
 python-wsme
 fm-mgr
 fm-rest-api
-snmp-ext
 sm
 sm-api
 sm-client
@@ -258,6 +255,7 @@ kernel-rt-modules-extra
 kmod-e1000e-rt
 kmod-i40e-rt
 kmod-iavf-rt
+kmod-ice-rt
 kmod-ixgbe-rt
 kmod-ixgbevf-rt
 kmod-igb_uio-rt
@@ -272,7 +270,6 @@ kernel-rt-tools
 kernel-rt-tools-libs
 NaviCLI-Linux-64-x86-en_US
 kmod-drbd-rt
-snmp-audittrail
 wrs-ssl
 tpm2-tools
 tss2
@@ -301,5 +298,8 @@ stx-oidc-auth-helm
 stx-cert-manager-helm
 stx-nginx-ingress-controller-helm
 stx-portieris-helm
+stx-snmp-helm
 stx-vault-helm
 k8s-pod-recovery
+kernel-rt-headers
+kernel-rt-devel
diff --git a/bsp-files/filter_out_from_worker_lowlatency b/bsp-files/filter_out_from_worker_lowlatency
index ae4c8fe0..2b02e1df 100644
--- a/bsp-files/filter_out_from_worker_lowlatency
+++ b/bsp-files/filter_out_from_worker_lowlatency
@@ -81,8 +81,6 @@ nova-tests
 nova-api-proxy
 nova-placement-api
 novnc
-net-snmp
-net-snmp-config
 neutron-plugin-ml2
 neutron-server
 neutron-tests
@@ -141,7 +139,6 @@ python-swiftclient
 python-wsme
 fm-mgr
 fm-rest-api
-snmp-ext
 sm
 sm-api
 sm-client
@@ -261,6 +258,7 @@ kernel-module-igb-uio
 kmod-e1000e
 kmod-i40e
 kmod-iavf
+kmod-ice
 kmod-ixgbe
 kmod-ixgbevf
 kmod-igb_uio
@@ -274,7 +272,6 @@ kernel-tools-libs
 kernel-modules-extra
 NaviCLI-Linux-64-x86-en_US
 kmod-drbd-rt
-snmp-audittrail
 wrs-ssl
 tpm2-tools
 tss2
@@ -302,5 +299,8 @@ stx-oidc-auth-helm
 stx-cert-manager-helm
 stx-nginx-ingress-controller-helm
 stx-portieris-helm
+stx-snmp-helm
 stx-vault-helm
 k8s-pod-recovery
+kernel-headers
+kernel-devel
diff --git a/bsp-files/kickstarts/pre_disk_aio.cfg b/bsp-files/kickstarts/pre_disk_aio.cfg
index 68e1a3e9..09d831dc 100755
--- a/bsp-files/kickstarts/pre_disk_aio.cfg
+++ b/bsp-files/kickstarts/pre_disk_aio.cfg
@@ -29,11 +29,12 @@
 ## ETCD_STOR_SIZE = 5GiB
 ## CEPH_MON_SIZE = 20GiB
 ## KUBELET_STOR_SIZE = 10GiB
+## DC_VAULT_SIZE = 15GiB
 ## RESERVED_PE = 16MiB (based on pesize=32768)
 ##
-## CGCS_PV_SIZE = (10 + 2*10 + 25 + 8 + 16 + 2 + 1 + 30 + 16 + 5 + 20 + 10)GiB + 16MiB/1024 = 163.02GiB
+## CGCS_PV_SIZE = (10 + 2*10 + 25 + 8 + 16 + 2 + 1 + 30 + 16 + 5 + 20 + 10 + 15)GiB + 16MiB/1024 = 178.02GiB
 ##
-##***************************************************************************************************
+##**********************************************************************************************************
 ## Small disk install - (for disks below 240GB)
 ##  - DB size is doubled to allow for upgrades
 ##
@@ -50,11 +51,12 @@
 ## ETCD_STOR_SIZE = 5GiB
 ## CEPH_MON_SIZE = 20GiB
 ## KUBELET_STOR_SIZE = 10GiB
+## DC_VAULT_SIZE = 15GiB
 ## RESERVED_PE = 16MiB (based on pesize=32768)
 ##
-## CGCS_PV_SIZE = (10 + 2*5 + 20 + 8 + 16 + 2 + 1 + 30 + 16 + 5 + 20 + 10)GiB + 16MiB/1024 = 148.02GiB
+## CGCS_PV_SIZE = (10 + 2*5 + 20 + 8 + 16 + 2 + 1 + 30 + 16 + 5 + 20 + 10 + 15)GiB + 16MiB/1024 = 163.02GiB
 ##
-##***************************************************************************************************
+##*********************************************************************************************************
 ## Tiny disk install - (for disks below 154GB)
 ##
 ## NOTE: Tiny disk setup is mainly for StarlingX running in QEMU/KVM VM.
@@ -89,15 +91,15 @@ EFI_SIZE=300
 #   which are DEFAULT_SMALL_DISK_SIZE
 #             MINIMUM_SMALL_DISK_SIZE
 default_small_disk_size=240
-minimum_small_disk_size=181
+minimum_small_disk_size=196
 sz=$(blockdev --getsize64 $rootfs_device)
 # Round CGCS_PV_SIZE to the closest upper value that can be divided by 1024.
 if [ $sz -gt $(($default_small_disk_size*$gb)) ] ; then
-    # Large disk: CGCS_PV_SIZE=164GiB*1024=167936
-    CGCS_PV_SIZE=167936
+    # Large disk: CGCS_PV_SIZE=179GiB*1024=183296
+    CGCS_PV_SIZE=183296
 elif [ $sz -ge $(($minimum_small_disk_size*$gb)) ] ; then
-    # Small disk: CGCS_PV_SIZE=149GiB*1024=152576
-    CGCS_PV_SIZE=152576
+    # Small disk: CGCS_PV_SIZE=164GiB*1024=167936
+    CGCS_PV_SIZE=167936
 else
     # Tiny disk: CGCS_PV_SIZE=43GiB*1024=44032
     # Using a disk with a size under 60GiB will fail.
diff --git a/bsp-files/kickstarts/pre_disk_setup_common.cfg b/bsp-files/kickstarts/pre_disk_setup_common.cfg
index 0d885384..07b0304f 100644
--- a/bsp-files/kickstarts/pre_disk_setup_common.cfg
+++ b/bsp-files/kickstarts/pre_disk_setup_common.cfg
@@ -167,6 +167,13 @@ else
         # Avoid wiping ceph osds if sysinv tells us so
         if [ ${WIPE_CEPH_OSDS} == "false" ]; then
             wipe_dev="true"
+
+            pvs | grep -q "$dev *ceph"
+            if [ $? -eq 0 ]; then
+                wlog "skip rook provisoned disk $dev"
+                continue
+            fi
+
             part_numbers=( `parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}'` )
             # Scanning the partitions looking for CEPH OSDs and
             # skipping any disk found with such partitions
@@ -178,7 +185,15 @@ else
                     wipe_dev="false"
                     break
                 fi
+
+                pvs | grep -q -e "${dev}${part_number} *ceph" -e "${dev}p${part_number} *ceph"
+                if [ $? -eq 0 ]; then
+                    wlog "Rook OSD found on $dev$part_number, skip wipe"
+                    wipe_dev="false"
+                    break
+                fi
             done
+
             if [ "$wipe_dev" == "false" ]; then
                 continue
             fi
diff --git a/installer/pxe-network-installer/centos/build_srpm.data b/installer/pxe-network-installer/centos/build_srpm.data
index 7be54e52..d59c4cec 100644
--- a/installer/pxe-network-installer/centos/build_srpm.data
+++ b/installer/pxe-network-installer/centos/build_srpm.data
@@ -6,6 +6,6 @@ COPY_LIST="pxe-network-installer/* \
            /import/mirrors/CentOS/stx-installer/vmlinuz \
 "
 
-TIS_PATCH_VER=28
+TIS_PATCH_VER=PKG_GITREVCOUNT+13
 BUILD_IS_BIG=4
 BUILD_IS_SLOW=4
diff --git a/installer/pxe-network-installer/centos/pxe-network-installer.spec b/installer/pxe-network-installer/centos/pxe-network-installer.spec
index bd767a0d..d6268e38 100644
--- a/installer/pxe-network-installer/centos/pxe-network-installer.spec
+++ b/installer/pxe-network-installer/centos/pxe-network-installer.spec
@@ -110,6 +110,7 @@ install -v -m 644 %{_sourcedir}/efi-centos-pxe-worker_lowlatency-install \
 install -v -m 644 %{_sourcedir}/efi-centos-pxe-smallsystem_lowlatency-install \
     %{buildroot}/pxeboot/pxelinux.cfg.files/efi-pxe-smallsystem_lowlatency-install-%{platform_release}
 
+ln -sf /pxeboot/EFI/grubx64.efi %{buildroot}/pxeboot/grubx64.efi
 
 sed -i "s/xxxSW_VERSIONxxx/%{platform_release}/g" \
     %{buildroot}/pxeboot/pxelinux.cfg.files/pxe-* \
diff --git a/mtce-common/src/common/bmcUtil.cpp b/mtce-common/src/common/bmcUtil.cpp
index 43bed414..964e4eca 100644
--- a/mtce-common/src/common/bmcUtil.cpp
+++ b/mtce-common/src/common/bmcUtil.cpp
@@ -274,9 +274,9 @@ void bmcUtil_create_pw_file ( thread_info_type * info_ptr,
  *
  *************************************************************************/
 
-string bmcUtil_create_data_fn ( string & hostname,
-                                string   file_suffix,
-                     bmc_protocol_enum   protocol )
+string bmcUtil_create_data_fn ( const string & hostname,
+                                string file_suffix,
+                     bmc_protocol_enum protocol )
 {
     /* create the output filename */
     string datafile ;
diff --git a/mtce-common/src/common/bmcUtil.h b/mtce-common/src/common/bmcUtil.h
index 0208b88c..8c2a351d 100644
--- a/mtce-common/src/common/bmcUtil.h
+++ b/mtce-common/src/common/bmcUtil.h
@@ -82,6 +82,14 @@ typedef struct
 
 } bmc_info_type ;
 
+typedef struct
+{
+    string hostname;
+    string host_ip ;
+    string   bm_ip ;
+    string   bm_un ;
+    string   bm_pw ;
+} bmcUtil_accessInfo_type ;
 
 /* BMC commands */
 typedef enum
@@ -107,6 +115,7 @@ typedef enum
 #define BMC_QUERY_FILE_SUFFIX          ((const char *)("_root_query"))
 #define BMC_INFO_FILE_SUFFIX           ((const char *)("_bmc_info"))
 #define BMC_POWER_CMD_FILE_SUFFIX      ((const char *)("_power_cmd_result"))
+#define BMC_RESET_CMD_FILE_SUFFIX      ((const char *)("_reset"))
 #define BMC_BOOTDEV_CMD_FILE_SUFFIX    ((const char *)("_bootdev"))
 #define BMC_RESTART_CAUSE_FILE_SUFFIX  ((const char *)("_restart_cause"))
 #define BMC_POWER_STATUS_FILE_SUFFIX   ((const char *)("_power_status"))
@@ -137,9 +146,9 @@ void bmcUtil_create_pw_file ( thread_info_type * info_ptr,
                              bmc_protocol_enum   protocol );
 
 /* create the output filename */
-string bmcUtil_create_data_fn ( string & hostname,
-                                string   file_suffix,
-                     bmc_protocol_enum   protocol );
+string bmcUtil_create_data_fn ( const string & hostname,
+                                string file_suffix,
+                     bmc_protocol_enum protocol );
 
 /*  Get power state from query response data. */
 int bmcUtil_is_power_on ( string              hostname,
diff --git a/mtce-common/src/common/hostUtil.cpp b/mtce-common/src/common/hostUtil.cpp
index 06b15c16..5e5c4a16 100644
--- a/mtce-common/src/common/hostUtil.cpp
+++ b/mtce-common/src/common/hostUtil.cpp
@@ -130,6 +130,14 @@ bool hostUtil_is_valid_username ( string un )
     return (false);
 }
 
+bool hostUtil_is_valid_pw ( string pw )
+{
+    if ( !pw.empty() )
+        if ( pw.compare(NONE) )
+            return (true);
+    return (false);
+}
+
 bool hostUtil_is_valid_mac_addr ( string mac )
 {
     if ( !mac.empty() )
diff --git a/mtce-common/src/common/hostUtil.h b/mtce-common/src/common/hostUtil.h
index 09e19b53..d9a561a8 100644
--- a/mtce-common/src/common/hostUtil.h
+++ b/mtce-common/src/common/hostUtil.h
@@ -46,6 +46,7 @@ string hostUtil_getPrefixPath  ( void );
 bool hostUtil_is_valid_uuid    ( string uuid );
 bool hostUtil_is_valid_ip_addr ( string ip );
 bool hostUtil_is_valid_username ( string un );
+bool hostUtil_is_valid_pw      ( string pw );
 bool hostUtil_is_valid_bm_type ( string bm_type );
 
 int  hostUtil_mktmpfile ( string hostname, string basename, string & filename, string data );
diff --git a/mtce-common/src/common/ipmiUtil.cpp b/mtce-common/src/common/ipmiUtil.cpp
index c5e03193..0679df2e 100644
--- a/mtce-common/src/common/ipmiUtil.cpp
+++ b/mtce-common/src/common/ipmiUtil.cpp
@@ -202,3 +202,66 @@ int ipmiUtil_bmc_info_load ( string hostname, const char * filename, bmc_info_ty
     ipmiUtil_bmc_info_log ( hostname, bmc_info, rc );
     return (rc);
 }
+
+
+int ipmiUtil_reset_host_now ( string hostname,
+                              bmcUtil_accessInfo_type accessInfo,
+                              string output_filename)
+{
+    dlog("%s %s BMC [IP:%s UN:%s]",
+          accessInfo.hostname.c_str(),
+          accessInfo.host_ip.c_str(),
+          accessInfo.bm_ip.c_str(),
+          accessInfo.bm_un.c_str());
+
+    if (daemon_is_file_present ( BMC_OUTPUT_DIR ) == false )
+        daemon_make_dir(BMC_OUTPUT_DIR) ;
+    if (daemon_is_file_present ( IPMITOOL_OUTPUT_DIR ) == false )
+        daemon_make_dir(IPMITOOL_OUTPUT_DIR) ;
+
+    /* create temp password file */
+    thread_info_type info ;
+    info.hostname = accessInfo.hostname ;
+    info.password_file = "" ;
+    info.pw_file_fd = 0 ;
+
+    /* Use common utility to create a temp pw file */
+    bmcUtil_create_pw_file ( &info, accessInfo.bm_pw, BMC_PROTOCOL__IPMITOOL );
+
+    /* create request */
+    string request =
+    ipmiUtil_create_request ( IPMITOOL_POWER_RESET_CMD,
+                              accessInfo.bm_ip,
+                              accessInfo.bm_un,
+                              info.password_file,
+                              output_filename );
+
+    /* issue request
+     *
+     * Note: Could launch a thread to avoid any stall.
+     *       However, mtcClient can withstand up to a 25 second stall
+     *       before pmon will fail it due to active monitoring.
+     *       UT showed that there is no stall at all. */
+    unsigned long long latency_threshold_secs = DEFAULT_SYSTEM_REQUEST_LATENCY_SECS ;
+    unsigned long long before_time = gettime_monotonic_nsec () ;
+    int rc = system ( request.data()) ;
+    unsigned long long after_time = gettime_monotonic_nsec () ;
+    unsigned long long delta_time = after_time-before_time ;
+    if ( rc )
+    {
+        wlog("system call failed ; rc:%d [%d:%s]", rc, errno, strerror(errno) );
+        rc = FAIL_SYSTEM_CALL ;
+    }
+    if ( delta_time > (latency_threshold_secs*1000000000))
+    {
+        wlog ("%s bmc system call took %2llu.%-8llu sec", hostname.c_str(),
+              (delta_time > NSEC_TO_SEC) ? (delta_time/NSEC_TO_SEC) : 0,
+              (delta_time > NSEC_TO_SEC) ? (delta_time%NSEC_TO_SEC) : 0);
+    }
+
+    /* Cleanup */
+    if ( info.pw_file_fd > 0 )
+        close(info.pw_file_fd);
+    daemon_remove_file ( info.password_file.data());
+    return (rc);
+}
diff --git a/mtce-common/src/common/ipmiUtil.h b/mtce-common/src/common/ipmiUtil.h
index 7cc9edbc..39e8b9fd 100644
--- a/mtce-common/src/common/ipmiUtil.h
+++ b/mtce-common/src/common/ipmiUtil.h
@@ -57,6 +57,8 @@ int ipmiUtil_init ( void );
 
 int  ipmiUtil_bmc_info_load ( string hostname, const char * filename, bmc_info_type & mc_info );
 
+int  ipmiUtil_reset_host_now ( string hostname, bmcUtil_accessInfo_type accessInfo, string output_filename );
+
 /* Create the ipmi request */
 string ipmiUtil_create_request ( string cmd, string & ip, string & un, string & pw, string & out );
 
diff --git a/mtce-common/src/common/nodeBase.cpp b/mtce-common/src/common/nodeBase.cpp
index 3c7c482a..046db72a 100755
--- a/mtce-common/src/common/nodeBase.cpp
+++ b/mtce-common/src/common/nodeBase.cpp
@@ -149,6 +149,8 @@ const char * get_mtcNodeCommand_str ( int cmd )
         case MTC_REQ_MTCALIVE:    return ("mtcAlive req");
         case MTC_MSG_LOCKED:      return ("locked msg");
         case MTC_CMD_LAZY_REBOOT: return ("lazy reboot");
+        case MTC_MSG_INFO:        return ("info msg");
+        case MTC_CMD_SYNC:        return ("sync");
 
         /* goenabled commands and messages */
         case MTC_MSG_MAIN_GOENABLED:         return ("goEnabled main msg");
@@ -199,7 +201,8 @@ const char * get_mtcNodeCommand_str ( int cmd )
         case MTC_EVENT_PMON_MAJOR: return("pmon major event");
         case MTC_EVENT_PMON_MINOR: return("pmon minor event");
         case MTC_EVENT_PMON_LOG:   return("pmon log");
-        case MTC_EVENT_PMOND_RAISE: return("pmon raise");
+        case MTC_EVENT_PMOND_RAISE: return("pmond raise");
+        case MTC_EVENT_PMOND_CLEAR: return("pmond clear");
 
         /* data port events */
         case MTC_EVENT_AVS_CLEAR:    return("AVS clear");
@@ -394,10 +397,9 @@ void mtc_stages_init ( void )
    recoveryStages_str[MTC_RECOVERY__HEARTBEAT_START    ] = "Heartbeat-Start";
    recoveryStages_str[MTC_RECOVERY__HEARTBEAT_SOAK     ] = "Heartbeat-Soak";
    recoveryStages_str[MTC_RECOVERY__STATE_CHANGE       ] = "State Change";
-   recoveryStages_str[MTC_RECOVERY__ENABLE_START       ] = "Enable-Start";
    recoveryStages_str[MTC_RECOVERY__FAILURE            ] = "Failure";
    recoveryStages_str[MTC_RECOVERY__WORKQUEUE_WAIT     ] = "WorkQ-Wait";
-   recoveryStages_str[MTC_RECOVERY__ENABLE_WAIT        ] = "Enable-Wait";
+   recoveryStages_str[MTC_RECOVERY__ENABLE             ] = "Enable";
    recoveryStages_str[MTC_RECOVERY__STAGES             ] = "unknown";
 
    disableStages_str [MTC_DISABLE__START               ] = "Disable-Start";
diff --git a/mtce-common/src/common/nodeBase.h b/mtce-common/src/common/nodeBase.h
index 0603c0ce..b98c34d4 100755
--- a/mtce-common/src/common/nodeBase.h
+++ b/mtce-common/src/common/nodeBase.h
@@ -185,7 +185,7 @@ typedef enum
 #define DEFAULT_MTCALIVE_TIMEOUT    (1200)
 #define DEFAULT_GOENABLE_TIMEOUT     (300)
 #define DEFAULT_DOR_MODE_TIMEOUT      (20)
-#define DEFAULT_DOR_MODE_CPE_TIMEOUT (600)
+#define DEFAULT_DOR_MODE_AIO_TIMEOUT (600)
 
 /** TODO: Convert names to omit JSON part */
 #define MTC_JSON_INV_LABEL     "ihosts"
@@ -263,6 +263,7 @@ typedef enum
 #define MTC_TASK_ENABLE_WORK_FAIL  "Enable Action Failed"
 #define MTC_TASK_ENABLE_WORK_TO    "Enable Action Timeout"
 #define MTC_TASK_ENABLE_FAIL_HB    "Enable Heartbeat Failure, re-enabling"
+#define MTC_TASK_RECOVERY_FAIL_HB  "Graceful Recovery Heartbeat Failure, re-enabling"
 #define MTC_TASK_RECOVERY_FAIL     "Graceful Recovery Failed, re-enabling"
 #define MTC_TASK_RECOVERY_WAIT     "Graceful Recovery Wait"
 #define MTC_TASK_RECOVERED         "Gracefully Recovered"
@@ -311,7 +312,7 @@ typedef enum
 #define MTC_TASK_POWERCYCLE_FAIL   "Critical Event Power-Cycle %d; failed"
 #define MTC_TASK_POWERCYCLE_DOWN   "Critical Event Power-Down ; due to persistent critical sensor"
 #define MTC_TASK_RESETTING_HOST    "Resetting Host, critical sensor"
-#define MTC_TASK_CPE_SX_UNLOCK_MSG "Unlocking, please stand-by while the system gracefully reboots"
+#define MTC_TASK_AIO_SX_UNLOCK_MSG "Unlocking, please stand-by while the system gracefully reboots"
 #define MTC_TASK_SELF_UNLOCK_MSG   "Unlocking active controller, please stand-by while it reboots"
 #define MTC_TASK_FAILED_SWACT_REQ  "Critical failure.Requesting SWACT to enabled standby controller"
 #define MTC_TASK_FAILED_NO_BACKUP  "Critical failure.Please provision/enable standby controller"
@@ -383,8 +384,8 @@ typedef enum
 /* 5 milliseconds */
 #define MTCAGENT_SELECT_TIMEOUT (5000)
 
-/* dedicate more idle time in CPE ; there is less maintenance to do */
-#define MTCAGENT_CPE_SELECT_TIMEOUT (10000)
+/* dedicate more idle time in AIO ; there is less maintenance to do */
+#define MTCAGENT_AIO_SELECT_TIMEOUT (10000)
 
 /** Number of retries maintenance will do when it experiences
  *  a REST API call failure ; any failure */
@@ -751,7 +752,9 @@ typedef struct
 #define MTC_CMD_START_STORAGE_SVCS    19  /*   to host */
 #define MTC_CMD_LAZY_REBOOT           20  /*   to host */
 #define MTC_CMD_HOST_SVCS_RESULT      21  /*   to host */
-#define MTC_CMD_LAST                  22
+#define MTC_MSG_INFO                  22  /*   to host */
+#define MTC_CMD_SYNC                  23  /*   to host */
+#define MTC_CMD_LAST                  24
 
 #define RESET_PROG_MAX_REBOOTS_B4_RESET (5)
 #define RESET_PROG_MAX_REBOOTS_B4_RETRY (RESET_PROG_MAX_REBOOTS_B4_RESET+2)
@@ -946,7 +949,7 @@ typedef enum
 string get_delStages_str ( mtc_delStages_enum stage );
 
 
-#define MTC_MAX_FAST_ENABLES (3)
+#define MTC_MAX_FAST_ENABLES (5)
 typedef enum
 {
     MTC_RECOVERY__START =  0,
@@ -972,10 +975,9 @@ typedef enum
     MTC_RECOVERY__HEARTBEAT_START,
     MTC_RECOVERY__HEARTBEAT_SOAK,
     MTC_RECOVERY__STATE_CHANGE,
-    MTC_RECOVERY__ENABLE_START,
     MTC_RECOVERY__FAILURE,
     MTC_RECOVERY__WORKQUEUE_WAIT,
-    MTC_RECOVERY__ENABLE_WAIT,
+    MTC_RECOVERY__ENABLE,
     MTC_RECOVERY__STAGES,
 } mtc_recoveryStages_enum ;
 
@@ -1263,6 +1265,14 @@ typedef enum
     MTC_AR_DISABLE_CAUSE__NONE,
 } autorecovery_disable_cause_enum ;
 
+/* code that represents a specific group of maintenance information
+ * ... typically for a specific feature */
+typedef enum
+{
+    MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO,
+    MTC_INFO_CODE__LAST
+} mtcInfo_enum ;
+
 /* Service Based Auto Recovery Control Structure */
 typedef struct
 {
diff --git a/mtce-common/src/common/threadUtil.cpp b/mtce-common/src/common/threadUtil.cpp
index 034647eb..46e650bb 100644
--- a/mtce-common/src/common/threadUtil.cpp
+++ b/mtce-common/src/common/threadUtil.cpp
@@ -309,6 +309,48 @@ bool thread_idle ( thread_ctrl_type & ctrl )
     return (false);
 }
 
+/****************************************************************************
+ *
+ * Name       : thread_done_consume
+ *
+ * Description: Return to IDLE stage.
+ *
+ ****************************************************************************/
+
+int thread_done_consume ( thread_ctrl_type & ctrl, thread_info_type & info )
+{
+    if ( ctrl.stage == THREAD_STAGE__IDLE )
+    {
+        return PASS ;
+    }
+    else if ( ctrl.done == false )
+    {
+        if ( info.runcount > ctrl.runcount )
+        {
+            ilog("%s thread cleanup ; cmd:%d ; cnt:%d:%d",
+                 info.hostname.c_str(),
+                 info.command,
+                 ctrl.runcount,
+                 info.runcount);
+            ctrl.done = true ;
+            ctrl.stage = THREAD_STAGE__DONE ;
+            thread_handler (ctrl, info);
+            return PASS ;
+        }
+        else
+        {
+            thread_kill(ctrl, info);
+            return RETRY ;
+        }
+    }
+    else
+    {
+        ctrl.stage = THREAD_STAGE__DONE ;
+        thread_handler( ctrl, info );
+        return PASS ;
+    }
+}
+
 /****************************************************************************
  *
  * Name       : thread_launch
@@ -381,7 +423,7 @@ void thread_kill ( thread_ctrl_type & ctrl, thread_info_type & info )
         ( ctrl.stage != THREAD_STAGE__WAIT ) &&
         ( ctrl.stage != THREAD_STAGE__IDLE ))
     {
-        blog ("%s kill request\n", ctrl.hostname.c_str() );
+        wlog ("%s kill request\n", ctrl.hostname.c_str() );
         _stage_change ( ctrl, THREAD_STAGE__KILL );
     }
 }
diff --git a/mtce-common/src/common/threadUtil.h b/mtce-common/src/common/threadUtil.h
index 552d47bb..2cbabe41 100644
--- a/mtce-common/src/common/threadUtil.h
+++ b/mtce-common/src/common/threadUtil.h
@@ -284,6 +284,7 @@ bool   thread_done   ( thread_ctrl_type & ctrl );
 bool   thread_idle   ( thread_ctrl_type & ctrl );
 void   thread_kill   ( thread_ctrl_type & ctrl , thread_info_type & info );
 string thread_stage  ( thread_ctrl_type & ctrl );
+int    thread_done_consume ( thread_ctrl_type & ctrl, thread_info_type & info );
 
 /* Cooperative service of cancel and exit requests from parent */
 void pthread_signal_handler ( thread_info_type * info_ptr );
diff --git a/mtce-common/src/daemon/daemon_common.h b/mtce-common/src/daemon/daemon_common.h
index 3f9ac031..0f9f5322 100755
--- a/mtce-common/src/daemon/daemon_common.h
+++ b/mtce-common/src/daemon/daemon_common.h
@@ -38,15 +38,15 @@ using namespace std ;
 /* List of different types */
 typedef enum
 {
-    SYSTEM_TYPE__NORMAL                  =0,
-    SYSTEM_TYPE__CPE_MODE__DUPLEX        =1,
-    SYSTEM_TYPE__CPE_MODE__DUPLEX_DIRECT =2,
-    SYSTEM_TYPE__CPE_MODE__SIMPLEX       =3,
+    SYSTEM_TYPE__NORMAL             =0,
+    SYSTEM_TYPE__AIO__DUPLEX        =1,
+    SYSTEM_TYPE__AIO__DUPLEX_DIRECT =2,
+    SYSTEM_TYPE__AIO__SIMPLEX       =3,
 } system_type_enum ;
 
 
 /** Called by signal handler on daemon exit
-  * Performs cleanup by closing open files 
+  * Performs cleanup by closing open files
   * and freeing used memory */
 void daemon_exit ( void );
 
diff --git a/mtce-common/src/daemon/daemon_files.cpp b/mtce-common/src/daemon/daemon_files.cpp
index 8272e7a8..0809b756 100755
--- a/mtce-common/src/daemon/daemon_files.cpp
+++ b/mtce-common/src/daemon/daemon_files.cpp
@@ -347,7 +347,7 @@ string daemon_mgmnt_iface ( void )
 system_type_enum daemon_system_type ( void )
 {
     char buffer  [BUFFER];
-    system_type_enum system_type = SYSTEM_TYPE__CPE_MODE__SIMPLEX ;
+    system_type_enum system_type = SYSTEM_TYPE__AIO__SIMPLEX ;
     FILE * cfg_file_stream = fopen ( PLATFORM_CONF_FILE, "r" );
     if ( cfg_file_stream != NULL )
     {
@@ -401,11 +401,11 @@ system_type_enum daemon_system_type ( void )
                         if ( !mode.empty() )
                         {
                             if ( mode.compare("duplex") == 0 )
-                                system_type = SYSTEM_TYPE__CPE_MODE__DUPLEX ;
+                                system_type = SYSTEM_TYPE__AIO__DUPLEX ;
                             else if ( mode.compare("duplex-direct") == 0 )
-                                system_type = SYSTEM_TYPE__CPE_MODE__DUPLEX_DIRECT ;
+                                system_type = SYSTEM_TYPE__AIO__DUPLEX_DIRECT ;
                             else if ( mode.compare("simplex") == 0 )
-                                system_type = SYSTEM_TYPE__CPE_MODE__SIMPLEX ;
+                                system_type = SYSTEM_TYPE__AIO__SIMPLEX ;
                             else
                             {
                                 elog ("%s All-In-One system type ; mode unknown\n", SYSTEM_TYPE_PREFIX );
@@ -438,21 +438,21 @@ system_type_enum daemon_system_type ( void )
             ilog("%s Standard System\n", SYSTEM_TYPE_PREFIX);
             break ;
         }
-        case SYSTEM_TYPE__CPE_MODE__DUPLEX_DIRECT:
+        case SYSTEM_TYPE__AIO__DUPLEX_DIRECT:
         {
             ilog ("%s All-in-one Duplex Direct Connect\n", SYSTEM_TYPE_PREFIX );
             break ;
         }
-        case SYSTEM_TYPE__CPE_MODE__DUPLEX:
+        case SYSTEM_TYPE__AIO__DUPLEX:
         {
             ilog ("%s All-in-one Duplex\n", SYSTEM_TYPE_PREFIX );
             break ;
         }
-        case SYSTEM_TYPE__CPE_MODE__SIMPLEX:
+        case SYSTEM_TYPE__AIO__SIMPLEX:
         default:
         {
             ilog ("%s All-in-one Simplex \n", SYSTEM_TYPE_PREFIX );
-            system_type = SYSTEM_TYPE__CPE_MODE__SIMPLEX ;
+            system_type = SYSTEM_TYPE__AIO__SIMPLEX ;
             break ;
         }
     }
diff --git a/mtce-control/src/scripts/hbsAgent.service b/mtce-control/src/scripts/hbsAgent.service
index 7e111707..bd4bcd63 100644
--- a/mtce-control/src/scripts/hbsAgent.service
+++ b/mtce-control/src/scripts/hbsAgent.service
@@ -1,22 +1,13 @@
 [Unit]
 Description=StarlingX Maintenance Heartbeat Agent
-After=network.target syslog.service config.service
+After=hbsClient.service
 Before=pmon.service
 
 [Service]
 Type=forking
 ExecStart=/etc/rc.d/init.d/hbsAgent start
-ExecStop=/etc/rc.d/init.d/hbsAgent start
+ExecStop=/etc/rc.d/init.d/hbsAgent stop
 PIDFile=/var/run/hbsAgent.pid
-KillMode=process
-SendSIGKILL=no
-
-# Process recovery is handled by pmond if its running.
-# Delay 10 seconds to give pmond a chance to recover
-# before systemd kicks in to do it as a backup plan.
-Restart=always
-RestartSec=10
 
 [Install]
 WantedBy=multi-user.target
-
diff --git a/mtce/src/alarm/scripts/mtcalarm.logrotate b/mtce/src/alarm/scripts/mtcalarm.logrotate
index c1b91aa2..8287c7e5 100644
--- a/mtce/src/alarm/scripts/mtcalarm.logrotate
+++ b/mtce/src/alarm/scripts/mtcalarm.logrotate
@@ -1,17 +1,19 @@
-#daily
-nodateext
-start 1
-compress
-copytruncate
-notifempty
-missingok
+#
+# Copyright (c) 2018-2021 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 /var/log/mtcalarmd.log
 {
+    create 0640 root root
+    start 1
     size 10M
     rotate 20
-    sharedscripts
+    compress
+    notifempty
+    missingok
     postrotate
         systemctl reload syslog-ng > /dev/null 2>&1 || true
     endscript
+    delaycompress
 }
diff --git a/mtce/src/common/nodeClass.cpp b/mtce/src/common/nodeClass.cpp
index ae43fe64..e2320430 100755
--- a/mtce/src/common/nodeClass.cpp
+++ b/mtce/src/common/nodeClass.cpp
@@ -660,7 +660,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
     {
         ptr->alarms[id] = FM_ALARM_SEVERITY_CLEAR ;
     }
-    ptr->alarms_loaded   = false ;
+    ptr->active_alarms = "" ; /* no active alarms */
 
     ptr->cfgEvent.base   = NULL ;
     ptr->sysinvEvent.base= NULL ;
@@ -778,6 +778,7 @@ nodeLinkClass::node* nodeLinkClass::addNode( string hostname )
     return ptr ;
 }
 
+
 struct nodeLinkClass::node* nodeLinkClass::getNode ( string hostname )
 {
    /* check for empty list condition */
@@ -2706,7 +2707,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
         node_ptr->operState   = operState_str_to_enum   (inv.oper.data ());
         node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
 
-        if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+        if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
         {
             node_ptr->operState_subf   = operState_str_to_enum (inv.oper_subf.data());
             node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@@ -2818,7 +2819,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
                     node_ptr->operState   = operState_str_to_enum   (inv.oper.data ());
                     node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
 
-                    if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                    if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                     {
                         node_ptr->operState_subf   = operState_str_to_enum (inv.oper_subf.data());
                         node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@@ -2835,7 +2836,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
                     node_ptr->operState   = operState_str_to_enum   (inv.oper.data ());
                     node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
 
-                    if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                    if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                     {
                         node_ptr->operState_subf   = operState_str_to_enum (inv.oper_subf.data());
                         node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@@ -2853,7 +2854,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
                     node_ptr->operState   = operState_str_to_enum   (inv.oper.data ());
                     node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
 
-                    if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                    if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                     {
                         node_ptr->operState_subf   = operState_str_to_enum (inv.oper_subf.data());
                         node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@@ -2871,7 +2872,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
                     node_ptr->operState   = operState_str_to_enum   (inv.oper.data ());
                     node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
 
-                    if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                    if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                     {
                         node_ptr->operState_subf   = operState_str_to_enum (inv.oper_subf.data());
                         node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@@ -2889,7 +2890,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
                     node_ptr->operState   = operState_str_to_enum   (inv.oper.data ());
                     node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
 
-                    if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                    if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                     {
                         node_ptr->operState_subf   = operState_str_to_enum (inv.oper_subf.data());
                         node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@@ -2940,7 +2941,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
                     node_ptr->operState   = MTC_OPER_STATE__DISABLED ;
                     node_ptr->availStatus = MTC_AVAIL_STATUS__OFFLINE ;
 
-                    if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                    if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                     {
                         node_ptr->operState_subf   = MTC_OPER_STATE__DISABLED ;
                         node_ptr->availStatus_subf = MTC_AVAIL_STATUS__OFFLINE ;
@@ -2958,7 +2959,7 @@ int nodeLinkClass::add_host ( node_inv_type & inv )
                 node_ptr->operState   = operState_str_to_enum   (inv.oper.data ());
                 node_ptr->availStatus = availStatus_str_to_enum (inv.avail.data());
 
-                if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                 {
                     node_ptr->operState_subf   = operState_str_to_enum (inv.oper_subf.data());
                     node_ptr->availStatus_subf = availStatus_str_to_enum (inv.avail_subf.data());
@@ -3295,6 +3296,102 @@ void nodeLinkClass::mtcInfo_log ( struct nodeLinkClass::node * node_ptr )
     }
 }
 
+/***************************************************************************
+ *
+ * Name        : build_mtcInfo_dict
+ *
+ * Purpose     : Build a json dictionary for the specified info code enum
+ *
+ * Assumptions : Only MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO is supported
+ *
+ * Returns     : Returns a json dictionary of mtcInfo.
+ *
+ *               {
+ *                  "controller-0":{
+ *                      "ip":"192.168.204.2",
+ *                      "bm_ip":"xxx.xxx.xx.23",
+ *                      "bm_un":"root",
+ *                      "bm_pw":"root"
+ *                   },
+ *                   "controller-1":{
+ *                      "ip":"192.168.204.3",
+ *                      "bm_ip":"xxx.xxx.xx.24",
+ *                      "bm_un":"root",
+ *                      "bm_pw":"root"
+ *                   }
+ *               }
+ *
+ **************************************************************************/
+
+string nodeLinkClass::build_mtcInfo_dict ( mtcInfo_enum mtcInfo_code )
+{
+    string mtcInfo_dict = "" ;
+
+    /* loop/exit control */
+    int temp = 0 ;
+
+    /* should never happen but better to be safe */
+    if ( head == NULL )
+        return mtcInfo_dict ;
+
+    /* force the update to be a dictionary */
+    mtcInfo_dict = "{" ;
+
+    for ( struct node * ptr = head ;  ; ptr = ptr->next )
+    {
+        if (( ptr->nodetype & CONTROLLER_TYPE ) &&
+            ( mtcInfo_code == MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO ))
+        {
+            if ( temp )
+                mtcInfo_dict.append(",");
+            mtcInfo_dict.append("\"" + ptr->hostname + "\":{");
+            mtcInfo_dict.append("\"mgmt_ip\":\"" + ptr->ip + "\",");
+            mtcInfo_dict.append("\"bm_ip\":\"" + ptr->bm_ip + "\",");
+            mtcInfo_dict.append("\"bm_un\":\"" + ptr->bm_un + "\",");
+            mtcInfo_dict.append("\"bm_pw\":\"" + ptr->bm_pw + "\"}");
+            if ( ++temp >= 2 )
+                break ;
+        }
+        if (( ptr->next == NULL ) || ( ptr == tail ))
+           break ;
+    }
+    mtcInfo_dict.append("}");
+    return mtcInfo_dict ;
+}
+
+/**************************************************************************
+ *
+ * Name          : mtcInfo_handler
+ *
+ * Purpose       : Send mtcInfo update to provisioned controllers when
+ *                 the push flag is set.
+ *
+ **************************************************************************/
+
+void nodeLinkClass::mtcInfo_handler ( void )
+{
+    /* This is set in the bm_handler once access to the BMC using
+     * provisioned credentials have been verified. */
+    if ( this->want_mtcInfo_push )
+    {
+        /* handler will enhance when more codes are introduced */
+        mtcInfo_enum mtcInfo_code = MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO ;
+
+        string mtcInfo_dict = build_mtcInfo_dict(mtcInfo_code);
+        if ( ! mtcInfo_dict.empty() )
+        {
+            string temp = CONTROLLER_0 ;
+            send_mtc_cmd ( temp, MTC_MSG_INFO, MGMNT_INTERFACE, mtcInfo_dict);
+            if ( this->controllers > 1 )
+            {
+                temp = CONTROLLER_1;
+                send_mtc_cmd ( temp, MTC_MSG_INFO, MGMNT_INTERFACE, mtcInfo_dict);
+            }
+        }
+        this->want_mtcInfo_push = false ;
+    }
+}
+
 /* Lock Rules
  *
  * 1. Cannot lock this controller
@@ -4034,6 +4131,18 @@ int  nodeLinkClass::get_uptime_refresh_ctr ( string & hostname )
     return (0);
 }
 
+
+int nodeLinkClass::get_mtce_flags ( string & hostname )
+{
+    nodeLinkClass::node* node_ptr ;
+    node_ptr = nodeLinkClass::getNode ( hostname );
+    if ( node_ptr != NULL )
+    {
+        return ( node_ptr->mtce_flags );
+    }
+    return (0);
+}
+
 void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface )
 {
     nodeLinkClass::node* node_ptr = nodeLinkClass::getNode ( hostname );
@@ -4114,7 +4223,7 @@ void nodeLinkClass::set_mtce_flags ( string hostname, int flags, int iface )
 
 
         /* Deal with sub-function if AIO controller host */
-        if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+        if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
         {
             if ( flags & MTC_FLAG__SUBF_GOENABLED )
             {
@@ -4422,6 +4531,18 @@ string nodeLinkClass::get_bm_ip   ( string hostname )
     return ("");
 }
 
+string nodeLinkClass::get_bm_pw   ( string hostname )
+{
+    nodeLinkClass::node* node_ptr ;
+    node_ptr = nodeLinkClass::getNode ( hostname );
+    if ( node_ptr != NULL )
+    {
+         return (node_ptr->bm_pw);
+    }
+    elog ("%s bm pw lookup failed\n", hostname.c_str() );
+    return ("");
+}
+
 string nodeLinkClass::get_bm_un   ( string hostname )
 {
     nodeLinkClass::node* node_ptr ;
@@ -4774,7 +4895,10 @@ void nodeLinkClass::hbs_minor_clear ( struct nodeLinkClass::node * node_ptr, ifa
 
             /* Otherwise this is a single host that has recovered
              * possibly as part of a mnfa group or simply a lone wolf */
-            else
+            else if (( node_ptr->hbs_minor[MGMNT_IFACE] == false ) &&
+                     (( clstr_network_provisioned == false ) ||
+                      (( clstr_network_provisioned == true ) &&
+                       ( node_ptr->hbs_minor[CLSTR_IFACE] == false ))))
             {
                 if ( node_ptr->mnfa_graceful_recovery == true )
                 {
@@ -4782,6 +4906,8 @@ void nodeLinkClass::hbs_minor_clear ( struct nodeLinkClass::node * node_ptr, ifa
                     mnfa_awol_list.remove(node_ptr->hostname);
                 }
 
+                /* Don't recover until heartbeat is working over all
+                 * monitored interfaces */
                 mnfa_recover_host ( node_ptr );
 
                 if ( mnfa_active == true )
@@ -4819,17 +4945,17 @@ void nodeLinkClass::hbs_minor_clear ( struct nodeLinkClass::node * node_ptr, ifa
     }
 
      if ( temp_count != mnfa_host_count[iface] )
-     {    
+     {
          slog ("%s MNFA host tally (%s:%d incorrect - expected %d) ; correcting\n",
                    node_ptr->hostname.c_str(),
                    get_iface_name_str(iface),
                    mnfa_host_count[iface], temp_count );
                    mnfa_host_count[iface] = temp_count ;
          mnfa_host_count[iface] = temp_count ;
-     }    
+     }
      else
      {
-         wlog ("%s MNFA host tally (%s:%d)\n",
+         dlog ("%s MNFA host tally (%s:%d)\n",
                    node_ptr->hostname.c_str(),
                    get_iface_name_str(iface),
                    mnfa_host_count[iface] );
@@ -4935,11 +5061,28 @@ void nodeLinkClass::manage_heartbeat_failure ( string hostname, iface_enum iface
             }
             return ;
         }
+        else if ( node_ptr->recoveryStage == MTC_RECOVERY__HEARTBEAT_SOAK )
+        {
+            elog ("%s %s *** Heartbeat Loss *** (during recovery soak)\n",
+                      hostname.c_str(),
+                      get_iface_name_str(iface));
+            force_full_enable ( node_ptr );
+            return ;
+        }
 
         mnfa_add_host ( node_ptr , iface );
 
         if ( mnfa_active == false )
         {
+            /* if node is already in graceful recovery just ignore the event */
+            if ( node_ptr->graceful_recovery_counter != 0 )
+            {
+                dlog ("%s %s loss event ; already in graceful recovery try %d",
+                          hostname.c_str(),
+                          get_iface_name_str(iface),
+                          node_ptr->graceful_recovery_counter );
+                return ;
+            }
             elog ("%s %s *** Heartbeat Loss ***\n", hostname.c_str(), get_iface_name_str(iface));
             if ( iface == CLSTR_IFACE )
             {
@@ -4980,6 +5123,15 @@ void nodeLinkClass::manage_heartbeat_failure ( string hostname, iface_enum iface
     }
 }
 
+/****************************************************************************
+ *
+ * Name       : manage_heartbeat_clear
+ *
+ * Description: Manage clearing heartbeat failure status
+ *
+ * Assuptions : Called by Both hbsAgent and mtcAgent
+ *
+ ***************************************************************************/
 void nodeLinkClass::manage_heartbeat_clear ( string hostname, iface_enum iface )
 {
     nodeLinkClass::node * node_ptr = nodeLinkClass::getNode ( hostname );
@@ -4995,13 +5147,17 @@ void nodeLinkClass::manage_heartbeat_clear ( string hostname, iface_enum iface )
             node_ptr->heartbeat_failed[i] = false ;
             if ( i == MGMNT_IFACE )
             {
-                node_ptr->alarms[HBS_ALARM_ID__HB_MGMNT] = FM_ALARM_SEVERITY_CLEAR ;
-                node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_MGMNT ;
+                if ( heartbeat )
+                    node_ptr->alarms[HBS_ALARM_ID__HB_MGMNT] = FM_ALARM_SEVERITY_CLEAR ;
+                if ( maintenance )
+                    node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_MGMNT ;
             }
             if ( i == CLSTR_IFACE )
             {
-                node_ptr->alarms[HBS_ALARM_ID__HB_CLSTR] = FM_ALARM_SEVERITY_CLEAR ;
-                node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_CLSTR ;
+                if ( heartbeat )
+                    node_ptr->alarms[HBS_ALARM_ID__HB_CLSTR] = FM_ALARM_SEVERITY_CLEAR ;
+                if ( maintenance )
+                    node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_CLSTR ;
             }
         }
     }
@@ -5010,13 +5166,17 @@ void nodeLinkClass::manage_heartbeat_clear ( string hostname, iface_enum iface )
         node_ptr->heartbeat_failed[iface] = false ;
         if ( iface == MGMNT_IFACE )
         {
-            node_ptr->alarms[HBS_ALARM_ID__HB_MGMNT] = FM_ALARM_SEVERITY_CLEAR ;
-            node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_MGMNT ;
+            if ( heartbeat )
+                node_ptr->alarms[HBS_ALARM_ID__HB_MGMNT] = FM_ALARM_SEVERITY_CLEAR ;
+            if ( maintenance )
+                node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_MGMNT ;
         }
         else if ( iface == CLSTR_IFACE )
         {
-            node_ptr->alarms[HBS_ALARM_ID__HB_CLSTR] = FM_ALARM_SEVERITY_CLEAR ;
-            node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_CLSTR ;
+            if ( heartbeat )
+                node_ptr->alarms[HBS_ALARM_ID__HB_CLSTR] = FM_ALARM_SEVERITY_CLEAR ;
+            if ( maintenance )
+                node_ptr->degrade_mask &= ~DEGRADE_MASK_HEARTBEAT_CLSTR ;
         }
     }
 }
@@ -5795,9 +5955,6 @@ int nodeLinkClass::critical_process_failed( string & hostname,
                       node_ptr->hostname.c_str()); /* dlog */
         }
 
-        /* Start fresh the next time we enter graceful recovery handler */
-        node_ptr->graceful_recovery_counter = 0 ;
-
         /* Set node as unlocked-disabled-failed */
         allStateChange ( node_ptr, MTC_ADMIN_STATE__UNLOCKED,
                                    MTC_OPER_STATE__DISABLED,
@@ -6755,7 +6912,7 @@ int nodeLinkClass::disableStageChange ( struct nodeLinkClass::node * node_ptr,
 }
 
 /** Validate and log Recovery stage changes */
-int nodeLinkClass::recoveryStageChange  ( struct nodeLinkClass::node * node_ptr, 
+int nodeLinkClass::recoveryStageChange  ( struct nodeLinkClass::node * node_ptr,
                                           mtc_recoveryStages_enum newHdlrStage )
 {
     int rc = PASS ;
@@ -6763,14 +6920,14 @@ int nodeLinkClass::recoveryStageChange  ( struct nodeLinkClass::node * node_ptr,
     if (( newHdlrStage >= MTC_RECOVERY__STAGES ) ||
         ( node_ptr->recoveryStage >= MTC_RECOVERY__STAGES ))
     {
-        slog ("%s Invalid recovery stage (%d:%d)\n", 
+        slog ("%s Invalid recovery stage (%d:%d)\n",
                   node_ptr->hostname.c_str(),
-                  node_ptr->recoveryStage, 
+                  node_ptr->recoveryStage,
                   newHdlrStage );
 
         if ( newHdlrStage < MTC_RECOVERY__STAGES )
         {
-            clog ("%s ? -> %s\n", 
+            clog ("%s ? -> %s\n",
                node_ptr->hostname.c_str(),
                get_recoveryStages_str(newHdlrStage).c_str());
 
@@ -6782,11 +6939,11 @@ int nodeLinkClass::recoveryStageChange  ( struct nodeLinkClass::node * node_ptr,
             rc = FAIL ;
         }
     }
-    else 
+    else
     {
-        clog ("%s %s -> %s\n", 
+        clog ("%s %s -> %s\n",
                node_ptr->hostname.c_str(),
-               get_recoveryStages_str(node_ptr->recoveryStage).c_str(), 
+               get_recoveryStages_str(node_ptr->recoveryStage).c_str(),
                get_recoveryStages_str(newHdlrStage).c_str());
 
         node_ptr->recoveryStage = newHdlrStage  ;
@@ -7514,7 +7671,7 @@ int nodeLinkClass::ar_manage ( struct nodeLinkClass::node * node_ptr,
         mtcInvApi_update_states ( node_ptr, "unlocked", "disabled", "failed" );
 
         if (( NOT_THIS_HOST ) &&
-            ( this->system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX ))
+            ( this->system_type != SYSTEM_TYPE__AIO__SIMPLEX ))
         {
             if ( ++node_ptr->ar_count[node_ptr->ar_cause] >=
                   this->ar_threshold [node_ptr->ar_cause] )
@@ -7746,7 +7903,11 @@ int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool sen
 
             if ( true_false == true )
             {
-                ilog ("%s heartbeat start", hostname.c_str());
+                ilog ("%s %s heartbeat %sstart",
+                          hostname.c_str(),
+                          get_iface_name_str(iface),
+                          node_ptr->monitor[iface] ? "re" : "");
+
                 node_ptr->no_work_log_throttle = 0 ;
                 node_ptr->b2b_misses_count[iface] = 0 ;
                 node_ptr->hbs_misses_count[iface] = 0 ;
@@ -7758,7 +7919,12 @@ int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool sen
             }
             else
             {
-                ilog ("%s heartbeat stop", hostname.c_str());
+                if (  node_ptr->monitor[iface] == true )
+                {
+                    ilog ("%s %s heartbeat stop",
+                              hostname.c_str(),
+                              get_iface_name_str(iface));
+                }
             }
             node_ptr->monitor[iface] = true_false ;
         }
@@ -7771,7 +7937,7 @@ int nodeLinkClass::mon_host ( const string & hostname, bool true_false, bool sen
 void nodeLinkClass::set_hwmond_monitor_state ( string & hostname, bool state )
 {
     if ( hostname.length() )
-    {  
+    {
         struct nodeLinkClass::node* node_ptr ;
         node_ptr = nodeLinkClass::getNode ( hostname );
         if ( node_ptr != NULL )
@@ -8511,7 +8677,7 @@ void nodeLinkClass::manage_heartbeat_alarm ( struct nodeLinkClass::node * node_p
 
 
 
-#define HBS_LOSS_REPORT_THROTTLE (100)
+#define HBS_LOSS_REPORT_THROTTLE (100000)
 int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
 {
     int lost = 0  ;
@@ -8551,6 +8717,13 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
 
             if ( pulse_ptr->b2b_misses_count[iface] > 1 )
             {
+                if ( pulse_ptr->b2b_misses_count[iface] < hbs_failure_threshold )
+                {
+                    hbs_cluster_change ( pulse_ptr->hostname + " " +
+                            get_iface_name_str(iface) +
+                            " heartbeat miss " +
+                            itos(pulse_ptr->b2b_misses_count[iface]));
+                }
                 if ( pulse_ptr->b2b_misses_count[iface] >= hbs_failure_threshold )
                 {
                     if ( pulse_ptr->b2b_misses_count[iface] == hbs_failure_threshold )
@@ -8657,57 +8830,43 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
                 }
             }
 
-            /* Turn the cluster-host heartbeat loss into a degrade only
-             * condition if the clstr_degrade_only flag is set */
-            if (( iface == CLSTR_IFACE ) &&
-                ( pulse_ptr->b2b_misses_count[iface] >= hbs_failure_threshold ) &&
-                ( clstr_degrade_only == true ))
-            {
-                /* Only print the log at the threshold boundary */
-                if (( pulse_ptr->b2b_misses_count[iface]%HBS_LOSS_REPORT_THROTTLE) == hbs_failure_threshold )
-                {
-                    if ( this->active_controller )
-                    {
-                        manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface );
-                    }
-
-                    wlog ( "%s %s *** Heartbeat Loss *** (degrade only)\n",
-                               pulse_ptr->hostname.c_str(),
-                               get_iface_name_str(iface) );
-                    hbs_cluster_change ( pulse_ptr->hostname + " heartbeat loss" );
-                }
-            }
-
             /* Turn the clstr heartbeat loss into a degrade only
              * condition for inactive controller on normal system. */
-            else if (( iface == CLSTR_IFACE ) &&
-                     ( pulse_ptr->b2b_misses_count[iface] >= hbs_failure_threshold ) &&
-                     ( this->system_type == SYSTEM_TYPE__NORMAL ) &&
-                     (( pulse_ptr->nodetype & CONTROLLER_TYPE) == CONTROLLER_TYPE ))
+            if (( iface == CLSTR_IFACE ) &&
+                ((( this->system_type == SYSTEM_TYPE__NORMAL ) &&
+                 (( pulse_ptr->nodetype & CONTROLLER_TYPE) == CONTROLLER_TYPE )) ||
+                 ( clstr_degrade_only == true )))
             {
                 /* Only print the log at the threshold boundary */
-                if ( (pulse_ptr->b2b_misses_count[iface]%HBS_LOSS_REPORT_THROTTLE) == hbs_failure_threshold )
+                if ( pulse_ptr->b2b_misses_count[iface]%HBS_LOSS_REPORT_THROTTLE == hbs_failure_threshold )
                 {
                     if ( this->active_controller )
                     {
                         manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface );
                     }
-                    wlog ( "%s %s *** Heartbeat Loss *** (degrade only)\n",
+                    wlog ( "%s %s *** Heartbeat Loss *** (degrade only due to %s)\n",
                                pulse_ptr->hostname.c_str(),
-                               get_iface_name_str(iface));
-                    hbs_cluster_change ( pulse_ptr->hostname + " heartbeat loss" );
+                               get_iface_name_str(iface),
+                               clstr_degrade_only ? "config option" : "system type");
+                    hbs_cluster_change ( pulse_ptr->hostname + " " + get_iface_name_str(iface) + " heartbeat loss" );
                 }
             }
 
             else if ((pulse_ptr->b2b_misses_count[iface]%HBS_LOSS_REPORT_THROTTLE) == hbs_failure_threshold )
+            // else if ( pulse_ptr->hbs_failure[iface] == false )
             {
-                elog ("%s %s *** Heartbeat Loss ***\n", pulse_ptr->hostname.c_str(),
-                                                        get_iface_name_str(iface) );
+                elog ("%s %s *** Heartbeat Loss *** (b2b_misses:0x%x)\n",
+                          pulse_ptr->hostname.c_str(),
+                          get_iface_name_str(iface),
+                          pulse_ptr->b2b_misses_count[iface]);
+                hbs_cluster_change ( pulse_ptr->hostname + " " + get_iface_name_str(iface) + " heartbeat loss" );
 
                 if ( this->active_controller )
                 {
-                    manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface );
-
+                    if ( pulse_ptr->hbs_failure[iface] == false )
+                    {
+                        manage_heartbeat_alarm ( pulse_ptr, FM_ALARM_SEVERITY_CRITICAL, iface );
+                    }
                     /* report this host as failed */
                     if ( send_event ( pulse_ptr->hostname, MTC_EVENT_HEARTBEAT_LOSS , iface ) == PASS )
                     {
@@ -8715,10 +8874,8 @@ int nodeLinkClass::lost_pulses ( iface_enum iface, bool & storage_0_responding )
                     }
                 }
                 else
-                {
                     pulse_ptr->hbs_failure[iface] = true ;
-                }
-                hbs_cluster_change ( pulse_ptr->hostname + " heartbeat loss" );
+
                 pulse_ptr->hbs_failure_count[iface]++ ;
             }
             if ( pulse_ptr->b2b_misses_count[iface] > pulse_ptr->max_count[iface] )
@@ -8963,21 +9120,21 @@ void nodeLinkClass::mem_log_mtcalive ( struct nodeLinkClass::node * node_ptr )
 {
     char str[MAX_MEM_LOG_DATA] ;
 
-    snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d\n", 
-                node_ptr->hostname.c_str(), 
+    snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tmtcAlive: online:%c offline:%c Cnt:%d Gate:%s Misses:%d\n",
+                node_ptr->hostname.c_str(),
                 node_ptr->mtcAlive_online ? 'Y' : 'N',
                 node_ptr->mtcAlive_offline ? 'Y' : 'N',
                 node_ptr->mtcAlive_count,
                 node_ptr->mtcAlive_gate ? "closed" : "open",
-                node_ptr->mtcAlive_misses); 
+                node_ptr->mtcAlive_misses);
     mem_log (str);
 }
 
 void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr )
 {
     char str[MAX_MEM_LOG_DATA] ;
-    snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAlarm List:%s%s%s%s%s%s\n", 
-               node_ptr->hostname.c_str(), 
+    snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tAlarm List:%s%s%s%s%s%s\n",
+               node_ptr->hostname.c_str(),
                node_ptr->alarms[MTC_ALARM_ID__LOCK    ] ? " Locked"   : " .",
                node_ptr->alarms[MTC_ALARM_ID__CONFIG  ] ? " Config"   : " .",
                node_ptr->alarms[MTC_ALARM_ID__ENABLE  ] ? " Enable"   : " .",
@@ -8987,6 +9144,18 @@ void nodeLinkClass::mem_log_alarm1 ( struct nodeLinkClass::node * node_ptr )
     mem_log (str);
 }
 
+void nodeLinkClass::mem_log_alarm2 ( struct nodeLinkClass::node * node_ptr )
+{
+    if ( ! node_ptr->active_alarms.empty() )
+    {
+        char str[MAX_MEM_LOG_DATA] ;
+        snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tActive Alarms:%s\n",
+                   node_ptr->hostname.c_str(),
+                   node_ptr->active_alarms.c_str());
+        mem_log (str);
+    }
+}
+
 void nodeLinkClass::mem_log_stage ( struct nodeLinkClass::node * node_ptr )
 {
     char str[MAX_MEM_LOG_DATA] ;
@@ -9037,8 +9206,8 @@ void nodeLinkClass::mem_log_network ( struct nodeLinkClass::node * node_ptr )
 {
     char str[MAX_MEM_LOG_DATA] ;
     snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s %s cluster_host_ip: %s Uptime: %u\n",
-                node_ptr->hostname.c_str(), 
-                node_ptr->mac.c_str(), 
+                node_ptr->hostname.c_str(),
+                node_ptr->mac.c_str(),
                 node_ptr->ip.c_str(),
                 node_ptr->clstr_ip.c_str(),
                 node_ptr->uptime );
@@ -9050,11 +9219,11 @@ void nodeLinkClass::mem_log_heartbeat ( struct nodeLinkClass::node * node_ptr )
     char str[MAX_MEM_LOG_DATA] ;
     for ( int iface = 0 ; iface < MAX_IFACES ; iface++ )
     {
-        snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s Minor:%s Degrade:%s Failed:%s  Monitor:%s\n", 
-                   node_ptr->hostname.c_str(), 
+        snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\t%s Minor:%s Degrade:%s Failed:%s  Monitor:%s\n",
+                   node_ptr->hostname.c_str(),
                    get_iface_name_str (iface),
-                   node_ptr->hbs_minor[iface] ? "true " : "false", 
-                   node_ptr->hbs_degrade[iface] ? "true " : "false", 
+                   node_ptr->hbs_minor[iface] ? "true " : "false",
+                   node_ptr->hbs_degrade[iface] ? "true " : "false",
                    node_ptr->hbs_failure[iface] ? "true " : "false",
                    node_ptr->monitor[iface] ? "YES" : "no"  );
         mem_log (str);
@@ -9083,8 +9252,8 @@ void nodeLinkClass::mem_log_hbs_cnts ( struct nodeLinkClass::node * node_ptr )
 void nodeLinkClass::mem_log_test_info ( struct nodeLinkClass::node * node_ptr )
 {
     char str[MAX_MEM_LOG_DATA] ;
-    snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tOOS Stage:%s Runs:%d - INSV Stage:%s Runs:%d\n", 
-                node_ptr->hostname.c_str(), 
+    snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tOOS Stage:%s Runs:%d - INSV Stage:%s Runs:%d\n",
+                node_ptr->hostname.c_str(),
                 get_oosTestStages_str(node_ptr->oosTestStage).c_str(),
                 node_ptr->oos_test_count,
                 get_insvTestStages_str(node_ptr->insvTestStage).c_str(),
@@ -9117,7 +9286,7 @@ void nodeLinkClass::mem_log_type_info ( struct nodeLinkClass::node * node_ptr )
                 node_ptr->function);
     mem_log (str);
 
-    if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+    if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
     {
         snprintf (&str[0], MAX_MEM_LOG_DATA, "%s\tSub-Function: %s (%u) (SubFunc Enabled:%c)\n",
                 node_ptr->hostname.c_str(),
@@ -9156,6 +9325,7 @@ void nodeLinkClass::memDumpNodeState ( string hostname )
             // mem_log_reset_info ( node_ptr );
             mem_log_power_info ( node_ptr );
             mem_log_alarm1     ( node_ptr );
+            mem_log_alarm2     ( node_ptr );
             mem_log_mtcalive   ( node_ptr );
             mem_log_stage      ( node_ptr );
             mem_log_bm         ( node_ptr );
diff --git a/mtce/src/common/nodeClass.h b/mtce/src/common/nodeClass.h
index 42ca79d6..5df2ce56 100755
--- a/mtce/src/common/nodeClass.h
+++ b/mtce/src/common/nodeClass.h
@@ -76,11 +76,11 @@ using namespace std;
 #define LARGE_SYSTEM \
     ( this->system_type == SYSTEM_TYPE__NORMAL )
 
-#define CPE_SYSTEM \
+#define AIO_SYSTEM \
     ( this->system_type != SYSTEM_TYPE__NORMAL )
 
-#define SIMPLEX_CPE_SYSTEM \
-    ( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
+#define SIMPLEX_AIO_SYSTEM \
+    ( this->system_type == SYSTEM_TYPE__AIO__SIMPLEX )
 
 /**
  * @addtogroup nodeLinkClass
@@ -652,12 +652,12 @@ private:
 
         /** @} private_monitoring_services_variables */
 
-        /* List of alarms and current severity */
-        #define MAX_ALARMS           (10)
+        /* List of alarms current severity */
         EFmAlarmSeverityT alarms[MAX_ALARMS];
 
-        /* tracks whether the alarms for this host have been loaded already or not */
-        bool alarms_loaded ;
+        /* string containing active alarms and their severity
+         * ... for logging purposes only */
+        string active_alarms ;
 
         /** true if this host has recovered before the mnfa timeout period.
          *  This bool flags the graceful recovery handler that this node
@@ -665,8 +665,6 @@ private:
          *  and uptime accordingly */
         bool mnfa_graceful_recovery ;
 
-        int stress_iteration ;
-
         /* BMC Protocol Learning Controls and State */
 
         /* specifies what BMC protocol is selected for this host
@@ -828,10 +826,13 @@ private:
     int oos_test_handler   ( struct nodeLinkClass::node * node_ptr );
     int insv_test_handler  ( struct nodeLinkClass::node * node_ptr );
     int stress_handler     ( struct nodeLinkClass::node * node_ptr );
-    int bmc_handler         ( struct nodeLinkClass::node * node_ptr );
+    int bmc_handler        ( struct nodeLinkClass::node * node_ptr );
     int degrade_handler    ( struct nodeLinkClass::node * node_ptr );
+
     int uptime_handler     ( void );
 
+    void mtcInfo_handler   ( void );
+
     int host_services_handler ( struct nodeLinkClass::node * node_ptr );
 
     /* Starts the specified 'reset or powercycle' recovery monitor */
@@ -840,6 +841,9 @@ private:
     /* server specific power state query handler */
     bool (*is_poweron_handler) (string hostname, string query_response );
 
+    /* Audit that monitors and auto corrects alarm state mismatches */
+    void mtcAlarm_audit ( struct nodeLinkClass::node * node_ptr );
+
     /* Calculate the overall reset progression timeout */
     int calc_reset_prog_timeout ( struct nodeLinkClass::node * node_ptr, int retries );
 
@@ -851,13 +855,22 @@ private:
     void ctl_mtcAlive_gate ( struct nodeLinkClass::node * node_ptr, bool gate_state );
     void set_mtcAlive      ( struct nodeLinkClass::node * node_ptr, int interface );
 
+    /*********               mtcInfo in the database              ************/
     int    mtcInfo_set ( struct nodeLinkClass::node * node_ptr, string key, string value );
     string mtcInfo_get ( struct nodeLinkClass::node * node_ptr, string key );
     void   mtcInfo_clr ( struct nodeLinkClass::node * node_ptr, string key );
     void   mtcInfo_log ( struct nodeLinkClass::node * node_ptr );
-
     int    set_mtcInfo ( struct nodeLinkClass::node * node_ptr, string & mtc_info );
 
+    /*********       mtcInfo that gets puished out to daemons      ***********/
+
+
+    /* flag telling mtce when a mtcInfo push needs to be done */
+    bool want_mtcInfo_push = false ;
+
+    /* performs the mtcInfo push */
+    void push_mtcInfo ( void );
+
     /*****************************************************************************
      *
      * Name       : bmc_command_send
@@ -1192,11 +1205,11 @@ private:
      * Set to true when the autorecovery threshold is reached
      * and we want to avoid taking further autorecovery action
      * even though it may be requested. */
-    bool autorecovery_disabled ;
+    bool autorecovery_disabled = false ;
 
     /* Set to true by fault detection methods that are
      * autorecoverable when in simplex mode. */
-    bool autorecovery_enabled ;
+    bool autorecovery_enabled = false ;
 
     /** Tracks the number of hosts that 'are currently' in service trouble
      *  wrt heartbeat (above minor threshold).
@@ -1292,6 +1305,7 @@ private:
     void mem_log_state1    ( struct nodeLinkClass::node * node_ptr );
     void mem_log_state2    ( struct nodeLinkClass::node * node_ptr );
     void mem_log_alarm1    ( struct nodeLinkClass::node * node_ptr );
+    void mem_log_alarm2    ( struct nodeLinkClass::node * node_ptr );
     void mem_log_mtcalive  ( struct nodeLinkClass::node * node_ptr );
     void mem_log_stage     ( struct nodeLinkClass::node * node_ptr );
     void mem_log_test_info ( struct nodeLinkClass::node * node_ptr );
@@ -1464,11 +1478,14 @@ public:
 
     /***********************************************************/
 
+    /** Number of provisioned controllers */
+    int controllers = 0 ;
+
     /** Number of provisioned hosts (nodes) */
-    int hosts  ;
+    int hosts = 0 ;
 
     /* Set to True while waiting for UNLOCK_READY_FILE in simplex mode */
-    bool unlock_ready_wait ;
+    bool unlock_ready_wait = false ;
 
     /** Host has been deleted */
     bool host_deleted ;
@@ -1517,6 +1534,9 @@ public:
     /** Return the number of inventoried hosts */
     int num_hosts ( void );
 
+    /** Return the number of inventoried controllers */
+    int num_controllers ( void );
+
     /** **********************************************************************
       *
       * Name       : nodeLinkClass::workQueue_enqueue
@@ -1664,6 +1684,9 @@ public:
     /* Clear heartbeat failed flag for all interfaces */
     void manage_heartbeat_clear   ( string hostname, iface_enum iface );
 
+    /* Build a json dictionary of containing code specified maintenance info */
+    string build_mtcInfo_dict ( mtcInfo_enum mtcInfo_code );
+
    /** Test and Debug Members and Variables */
 
     /** Print node info banner */
@@ -1752,6 +1775,7 @@ public:
         #define MTC_FLAG__I_AM_LOCKED      (0x00000008)
     */
     void set_mtce_flags ( string hostname, int flags, int iface );
+    int  get_mtce_flags ( string & hostname );
 
     /** Updates the node's health code
       * Codes are found in nodeBase.h
@@ -1789,6 +1813,7 @@ public:
 
     string get_bm_ip   ( string hostname );
     string get_bm_un   ( string hostname );
+    string get_bm_pw   ( string hostname );
     string get_bm_type ( string hostname );
 
     string get_hostname_from_bm_ip ( string bm_ip );
diff --git a/mtce/src/fsmon/scripts/fsmon.logrotate b/mtce/src/fsmon/scripts/fsmon.logrotate
index 0476a8b2..821391f8 100644
--- a/mtce/src/fsmon/scripts/fsmon.logrotate
+++ b/mtce/src/fsmon/scripts/fsmon.logrotate
@@ -1,15 +1,19 @@
-#daily
-nodateext
+#
+# Copyright (c) 2015-2021 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 /var/log/fsmond.log
 {
-    size 10M
+    create 0640 root root
     start 1
-    missingok
+    size 10M
     rotate 20
     compress
-    sharedscripts
+    notifempty
+    missingok
     postrotate
         systemctl reload syslog-ng > /dev/null 2>&1 || true
     endscript
+    delaycompress
 }
diff --git a/mtce/src/heartbeat/Makefile b/mtce/src/heartbeat/Makefile
index a625f20b..a0fdb8da 100755
--- a/mtce/src/heartbeat/Makefile
+++ b/mtce/src/heartbeat/Makefile
@@ -13,7 +13,7 @@ LDLIBS = -lstdc++ -ldaemon -lcommon -lthreadUtil -lpthread -lfmcommon -lalarm -l
 INCLUDES = -I. -I/usr/include/mtce-daemon -I/usr/include/mtce-common
 INCLUDES += -I../common -I../alarm -I../maintenance -I../public
 
-CCFLAGS = -g -O2 -Wall -Wextra -Werror
+CCFLAGS = -g -O2 -Wall -Wextra -Werror -std=c++11
 
 STATIC_ANALYSIS_TOOL = cppcheck
 STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0)
diff --git a/mtce/src/heartbeat/hbsAgent.cpp b/mtce/src/heartbeat/hbsAgent.cpp
index 4eec5a29..ecd6941a 100644
--- a/mtce/src/heartbeat/hbsAgent.cpp
+++ b/mtce/src/heartbeat/hbsAgent.cpp
@@ -1381,6 +1381,7 @@ int daemon_init ( string iface, string nodetype )
         hbs_ctrl.locked = true ;
     }
 
+
     daemon_init_fit();
     return (rc);
 }
@@ -1521,6 +1522,7 @@ void hbs_sm_handler ( void )
  *              False if time delta is greater
  *
  ***************************************************************************/
+#define HUGE_NUMBER_B2B_SM_HEARTBEAT_MISSES (10000)
 bool manage_sm_heartbeat ( void )
 {
     struct timespec ts ;
@@ -1532,8 +1534,9 @@ bool manage_sm_heartbeat ( void )
     if ( delta_in_ms > SM_HEARTBEAT_PULSE_PERIOD_MSECS )
     {
         sm_heartbeat_count = 0;
-        if (( ++sm_heartbeat_count_b2b_misses < 20 )||
-            (!( sm_heartbeat_count_b2b_misses % 100 )))
+        if ((( ++sm_heartbeat_count_b2b_misses < 20 ) ||
+            (!( sm_heartbeat_count_b2b_misses % 1000 ))) &&
+            ( sm_heartbeat_count_b2b_misses < HUGE_NUMBER_B2B_SM_HEARTBEAT_MISSES ))
         {
             wlog("SM Heartbeat missing since %ld.%03ld secs ago ; HBS Period Misses:%3d ; Running HB Count:%4d",
                   delta.secs, delta.msecs,
@@ -1817,6 +1820,10 @@ void daemon_service_run ( void )
                         inv.name = hbsInv.my_hostname ;
                         inv.nodetype = CONTROLLER_TYPE ;
                         hbsInv.add_heartbeat_host ( inv );
+
+                        /* add this host to local inventory */
+                        hostname_inventory.push_front(hbsInv.my_hostname);
+                        ilog ("%s added to inventory (self)", hbsInv.my_hostname.c_str());
                     }
 
                     /* enable the base level signal handler latency monitor */
@@ -1841,7 +1848,7 @@ void daemon_service_run ( void )
                     clock_gettime (CLOCK_MONOTONIC, &sm_heartbeat_timestamp_last );
 
                     /* no need for the heartbeat audit in a simplex system */
-                    if ( hbsInv.system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX )
+                    if ( hbsInv.system_type != SYSTEM_TYPE__AIO__SIMPLEX )
                     {
                         /* start the state audit */
                         /* run the first audit in 30 seconds */
@@ -2056,7 +2063,7 @@ void daemon_service_run ( void )
                                           hbsInv.active_controller ? "" : "in" );
 
                                 /* no need for the heartbeat audit in a simplex system */
-                                if ( hbsInv.system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX )
+                                if ( hbsInv.system_type != SYSTEM_TYPE__AIO__SIMPLEX )
                                 {
                                     /* Due to activity state change we will dump
                                      * the heartbeat cluster state at now time
@@ -2074,6 +2081,7 @@ void daemon_service_run ( void )
                             inv.nodetype = msg.parm[0];
                             hbsInv.add_heartbeat_host ( inv ) ;
                             hostname_inventory.push_back ( inv.name );
+                            hostname_inventory.unique(); // avoid duplicates
                             ilog ("%s added to heartbeat service (%d)\n",
                                       inv.name.c_str(),
                                       inv.nodetype);
@@ -2119,7 +2127,7 @@ void daemon_service_run ( void )
                         {
                             if ( hostname != hbsInv.my_hostname )
                             {
-                                hbsInv.mon_host ( hostname, false, true );
+                                hbsInv.mon_host ( hostname, false, false );
                                 hbs_cluster_del ( hostname );
                                 ilog ("%s heartbeat service disabled by stop command",
                                           hostname.c_str());
@@ -2366,6 +2374,7 @@ void daemon_service_run ( void )
                     arrival_histogram[iface] = "" ;
                     unexpected_pulse_list[iface] = "" ;
 
+
                     rc = hbs_pulse_request ( (iface_enum)iface, seq_num, ri, rri );
                     if ( rc != 0 )
                     {
@@ -2523,7 +2532,9 @@ void daemon_service_run ( void )
                 }
             }
             /* log cluster throttled */
-            if (( heartbeat_ok == false ) && ( !( sm_heartbeat_count_b2b_misses % 100 )))
+            if ((( heartbeat_ok == false ) &&
+                ( !( sm_heartbeat_count_b2b_misses % 1000 ))) &&
+                ( sm_heartbeat_count_b2b_misses < HUGE_NUMBER_B2B_SM_HEARTBEAT_MISSES ))
             {
                 hbs_state_audit ( );
             }
diff --git a/mtce/src/heartbeat/hbsBase.h b/mtce/src/heartbeat/hbsBase.h
index bfa8f1d1..b9f067f7 100755
--- a/mtce/src/heartbeat/hbsBase.h
+++ b/mtce/src/heartbeat/hbsBase.h
@@ -326,7 +326,7 @@ void hbs_cluster_log  ( string & hostname, mtce_hbs_cluster_type & cluster, stri
 void hbs_sm_handler ( void );
 
 /* send the cluster vault to SM */
-void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid , string reason );
+int hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid , string reason );
 
 /* copy cluster data from src to dst */
 void hbs_cluster_copy ( mtce_hbs_cluster_type & src, mtce_hbs_cluster_type & dst );
@@ -338,6 +338,10 @@ void hbs_cluster_dump ( mtce_hbs_cluster_type & vault );
 /* Heartbeat service state audit */
 void hbs_state_audit ( void );
 
+/* Send state change message to SM if there has been a
+ * state change in the last period */
+void hbs_cluster_change_notifier ( void );
+
 /**
  * @} hbs_base
  */
diff --git a/mtce/src/heartbeat/hbsCluster.cpp b/mtce/src/heartbeat/hbsCluster.cpp
index 780fa8e3..1f82a4e3 100644
--- a/mtce/src/heartbeat/hbsCluster.cpp
+++ b/mtce/src/heartbeat/hbsCluster.cpp
@@ -69,6 +69,8 @@ typedef struct
 
     msgClassSock * sm_socket_ptr ;
 
+    string cluster_change_reason ;
+
 } hbs_cluster_ctrl_type ;
 
 /* Cluster control structire construct allocation. */
@@ -122,6 +124,8 @@ void hbs_cluster_init ( unsigned short period, msgClassSock * sm_socket_ptr )
     {
         ctrl.sm_socket_ptr = sm_socket_ptr ;
     }
+    ctrl.cluster_change_reason = "";
+
     ctrl.log_throttle = 0 ;
 }
 
@@ -173,7 +177,30 @@ void hbs_cluster_nums ( unsigned short this_controller,
 
 void hbs_cluster_change ( string cluster_change_reason )
 {
-    hbs_cluster_send( ctrl.sm_socket_ptr, 0, cluster_change_reason );
+    ilog ("reason: %s", cluster_change_reason.c_str());
+    if ( ctrl.cluster_change_reason.empty() )
+        ctrl.cluster_change_reason = cluster_change_reason ;
+    else
+        ctrl.cluster_change_reason.append("," + cluster_change_reason) ;
+}
+
+/****************************************************************************
+ *
+ * Name        : hbs_cluster_change_notifier
+ *
+ * Description : Send SM the cluster info if there has been a state change.
+ *
+ ***************************************************************************/
+void hbs_cluster_change_notifier ( void )
+{
+    if ( ! ctrl.cluster_change_reason.empty () )
+    {
+        if ( hbs_cluster_send( ctrl.sm_socket_ptr, 0,
+                               ctrl.cluster_change_reason ) == PASS )
+        {
+            ctrl.cluster_change_reason.clear();
+        }
+    }
 }
 
 /****************************************************************************
@@ -444,6 +471,7 @@ void hbs_cluster_update ( iface_enum iface,
             wlog_throttled ( ctrl.log_throttle, THROTTLE_COUNT,
                              "Unable to store history beyond %d ",
                              ctrl.cluster.histories );
+            hbs_cluster_change_notifier ();
             return ;
         }
         else
@@ -544,6 +572,8 @@ void hbs_cluster_update ( iface_enum iface,
     else
         history_ptr->oldest_entry_index++ ;
 
+    hbs_cluster_change_notifier ();
+
     /* clear the log throttle if we are updating history ok. */
     ctrl.log_throttle = 0 ;
 }
@@ -647,12 +677,12 @@ unsigned short hbs_cluster_unused_bytes ( void )
  *
  ***************************************************************************/
 
-void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid , string reason )
+int hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid , string reason )
 {
+    int rc = FAIL_SOCKET_SENDTO ;
     ctrl.cluster.reqid = (unsigned short)reqid ;
     if (( sm_client_sock ) && ( sm_client_sock->sock_ok() == true ))
     {
-        ilog ("cluster state notification Reason: %s", reason.c_str());
         int len = sizeof(mtce_hbs_cluster_type)-hbs_cluster_unused_bytes();
         int bytes = sm_client_sock->write((char*)&ctrl.cluster, len);
         if ( bytes <= 0 )
@@ -660,12 +690,19 @@ void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid , string reason
              elog ("failed to send cluster vault to SM (bytes=%d) (%d:%s)\n",
                     bytes , errno, strerror(errno));
         }
-        hbs_cluster_dump ( ctrl.cluster );
+        else
+        {
+            /* limit the string length */
+            ilog ("reason: %s", reason.substr(0,80).c_str());
+            hbs_cluster_dump ( ctrl.cluster );
+            rc = PASS ;
+        }
     }
     else
     {
         wlog ("cannot send cluster info due to socket error");
     }
+    return(rc);
 }
 
 /****************************************************************************
@@ -689,7 +726,7 @@ void hbs_history_save ( string hostname,
         {
             if ( hbs_cluster_cmp( sample, ctrl.cluster.history[h] ) )
             {
-                 hbs_cluster_change ("peer controller cluster event " +
+                 hbs_cluster_change ("peer cluster delta " +
                  hbs_cluster_network_name((mtce_hbs_network_enum)sample.network));
             }
 
diff --git a/mtce/src/heartbeat/hbsStubs.cpp b/mtce/src/heartbeat/hbsStubs.cpp
index 81326d17..474a7221 100644
--- a/mtce/src/heartbeat/hbsStubs.cpp
+++ b/mtce/src/heartbeat/hbsStubs.cpp
@@ -279,8 +279,14 @@ void nodeLinkClass::mnfa_enter ( void )
 void nodeLinkClass::mnfa_exit  ( bool force )
 { force = force ; }
 
-int send_mtc_cmd ( string & hostname, int cmd, int interface )
-{ UNUSED(hostname); UNUSED(cmd); UNUSED(interface); return PASS ; }
+int send_mtc_cmd ( string & hostname, int cmd, int interface, string json_dict)
+{
+    UNUSED(hostname);
+    UNUSED(cmd);
+    UNUSED(interface);
+    UNUSED(json_dict);
+    return PASS ;
+}
 
 int nodeLinkClass::mtcInvApi_subf_states ( string hostname,
                                            string oper_subf,
diff --git a/mtce/src/hostw/scripts/hostw.logrotate b/mtce/src/hostw/scripts/hostw.logrotate
index 065ccc1f..cb2f6aa4 100644
--- a/mtce/src/hostw/scripts/hostw.logrotate
+++ b/mtce/src/hostw/scripts/hostw.logrotate
@@ -1,16 +1,19 @@
-#daily
-nodateext
+#
+# Copyright (c) 2020-2021 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 /var/log/hostwd.log
 {
-    nodateext
-    size 10M
+    create 0640 root root
     start 1
-    missingok
+    size 10M
     rotate 20
     compress
-    sharedscripts
+    notifempty
+    missingok
     postrotate
         systemctl reload syslog-ng > /dev/null 2>&1 || true
     endscript
+    delaycompress
 }
diff --git a/mtce/src/hwmon/hwmonSensor.cpp b/mtce/src/hwmon/hwmonSensor.cpp
index 475c7cd5..07f3c392 100644
--- a/mtce/src/hwmon/hwmonSensor.cpp
+++ b/mtce/src/hwmon/hwmonSensor.cpp
@@ -254,7 +254,7 @@ void hwmonGroup_init ( string & hostname , struct sensor_group_type * group_ptr
         group_ptr->actions_critical_choices.append(HWMON_ACTION_ALARM);
 
         /* Don't support reset and power cycle in AIO simplex mode */
-        if ( obj_ptr->system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX )
+        if ( obj_ptr->system_type != SYSTEM_TYPE__AIO__SIMPLEX )
         {
             group_ptr->actions_critical_choices.append(",");
             group_ptr->actions_critical_choices.append(HWMON_ACTION_RESET);
diff --git a/mtce/src/hwmon/hwmonThreads.cpp b/mtce/src/hwmon/hwmonThreads.cpp
index f5305050..a572ce4c 100644
--- a/mtce/src/hwmon/hwmonThreads.cpp
+++ b/mtce/src/hwmon/hwmonThreads.cpp
@@ -964,6 +964,10 @@ static int _parse_redfish_sensor_data( char * json_str_ptr, thread_info_type * i
                         {
                             strcpy(_sample_list[samples].status, "cr");
                         }
+                        else if  (!strcmp (health.data(), REDFISH_SEVERITY__NONRECOVERABLE ))
+                        {
+                            strcpy(_sample_list[samples].status, "nr");
+                        }
                         else
                         {
                             strcpy(_sample_list[samples].status, "na");
diff --git a/mtce/src/hwmon/hwmonThreads.h b/mtce/src/hwmon/hwmonThreads.h
index f215cca8..2e5a0a3a 100644
--- a/mtce/src/hwmon/hwmonThreads.h
+++ b/mtce/src/hwmon/hwmonThreads.h
@@ -33,6 +33,7 @@
 #define REDFISH_SEVERITY__GOOD     "OK"
 #define REDFISH_SEVERITY__MAJOR    "Warning"
 #define REDFISH_SEVERITY__CRITICAL "Critical"
+#define REDFISH_SEVERITY__NONRECOVERABLE "NonRecoverable"
 
 #define BMC_SENSOR_DEFAULT_UNIT_TYPE_TEMP    "degrees"
 #define BMC_SENSOR_DEFAULT_UNIT_TYPE_VOLT    "Volts"
diff --git a/mtce/src/hwmon/scripts/hwmon.logrotate b/mtce/src/hwmon/scripts/hwmon.logrotate
index dd1eceee..e8ce8e66 100644
--- a/mtce/src/hwmon/scripts/hwmon.logrotate
+++ b/mtce/src/hwmon/scripts/hwmon.logrotate
@@ -1,28 +1,21 @@
-#daily
-nodateext
-start 1
-missingok
-notifempty
-compress
-sharedscripts
-postrotate
-    systemctl reload syslog-ng > /dev/null 2>&1 || true
-endscript
+#
+# Copyright (c) 2020-2021 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 /var/log/hwmond.log
-{
-    size 50M
-    rotate 5
-}
-
 /var/log/hwmond_event.log
-{
-    size 50M
-    rotate 5
-}
-
 /var/log/hwmond_api.log
 {
+    create 0640 root root
+    start 1
     size 50M
     rotate 5
+    compress
+    notifempty
+    missingok
+    postrotate
+        systemctl reload syslog-ng > /dev/null 2>&1 || true
+    endscript
+    delaycompress
 }
diff --git a/mtce/src/lmon/scripts/lmon.logrotate b/mtce/src/lmon/scripts/lmon.logrotate
index b59fa9ff..e6fe3191 100644
--- a/mtce/src/lmon/scripts/lmon.logrotate
+++ b/mtce/src/lmon/scripts/lmon.logrotate
@@ -1,16 +1,19 @@
-#daily
-nodateext
+#
+# Copyright (c) 2020-2021 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 /var/log/lmond.log
 {
-    nodateext
-    size 10M
+    create 0640 root root
     start 1
-    missingok
+    size 10M
     rotate 20
     compress
-    sharedscripts
+    notifempty
+    missingok
     postrotate
         systemctl reload syslog-ng > /dev/null 2>&1 || true
     endscript
+    delaycompress
 }
diff --git a/mtce/src/maintenance/Makefile b/mtce/src/maintenance/Makefile
index 7d11f6ac..767ffefe 100755
--- a/mtce/src/maintenance/Makefile
+++ b/mtce/src/maintenance/Makefile
@@ -54,7 +54,7 @@ BINS = mtcAgent mtcClient
 LDLIBS += -lstdc++ -ldaemon -lcommon -lthreadUtil -lbmcUtils -lfmcommon -lalarm -lpthread -lrt -levent -ljson-c -lamon -lcrypto -luuid
 INCLUDES = -I. -I/usr/include/mtce-daemon -I/usr/include/mtce-common
 INCLUDES += -I../common -I../alarm -I../heartbeat -I../hwmon -I../public
-CCFLAGS += -g -O2 -Wall -Wextra -Werror -Wno-missing-braces
+CCFLAGS += -g -O2 -Wall -Wextra -Werror -Wno-missing-braces -std=c++11
 
 STATIC_ANALYSIS_TOOL = cppcheck
 STATIC_ANALYSIS_TOOL_EXISTS = $(shell [[ -e `which $(STATIC_ANALYSIS_TOOL)` ]] && echo 1 || echo 0)
diff --git a/mtce/src/maintenance/mtcAlarm.cpp b/mtce/src/maintenance/mtcAlarm.cpp
index 8262da9f..28d1b6bc 100644
--- a/mtce/src/maintenance/mtcAlarm.cpp
+++ b/mtce/src/maintenance/mtcAlarm.cpp
@@ -26,6 +26,7 @@ using namespace std;
 #include "daemon_common.h" /*                                           */
 
 #include "nodeBase.h"      /*                                           */
+#include "nodeClass.h"     /*                                           */
 #include "nodeTimers.h"    /*                                           */
 #include "nodeUtil.h"      /*                                           */
 #include "mtcAlarm.h"      /* for ... this module header                */
@@ -379,8 +380,169 @@ void mtcAlarm_clear_all ( string hostname )
     }
 }
 
+/****************************************************************************
+ *
+ * Name       : mtcAlarm_audit
+ *
+ * Purpose    : Monitor and Auto-Correct maintenance alarms
+ *
+ * Description: Query locked state alarm (raw)
+ *              if successful
+ *                 - Query alarms
+ *                 - compare to running state
+ *                 - correct mismatches ; internal state takes precidence
+ *                 - log all alarm state changes
+ *
+ ****************************************************************************/
+
+void nodeLinkClass::mtcAlarm_audit ( struct nodeLinkClass::node * node_ptr )
+{
+   /*
+    * Read locked state alarm directly to detect fm access failures.
+    * If successful further reads are done using a wrapper utility.
+    */
+    SFmAlarmDataT alarm_query  ;
+    AlarmFilter   alarm_filter ;
+    EFmErrorT     rc           ;
+
+    memset(&alarm_query, 0, sizeof(alarm_query));
+    memset(&alarm_filter, 0, sizeof(alarm_filter));
+    snprintf ( &alarm_filter.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s",
+               LOCK_ALARM_ID);
+    snprintf ( &alarm_filter.entity_instance_id[0], FM_MAX_BUFFER_LENGTH, "%s%s",
+                    ENTITY_PREFIX, node_ptr->hostname.data());
+    rc = fm_get_fault ( &alarm_filter, &alarm_query );
+    if (( rc != FM_ERR_OK ) && ( rc != FM_ERR_ENTITY_NOT_FOUND ))
+    {
+        wlog("%s alarm query failure ; code:%d",
+                 node_ptr->hostname.c_str(),
+                 rc );
+        return ;
+    }
+
+    /* With FM comms proven working lets check the other mtc alarms */
+    string active_alarms = "";
+    for ( int i = 0 ; i < MAX_ALARMS ; i++ )
+    {
+        mtc_alarm_id_enum id = (mtc_alarm_id_enum)i ;
+        if ( id == MTC_ALARM_ID__LOCK )
+        {
+            /* Unexpected severity case */
+            if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED )
+            {
+                if ( alarm_query.severity != FM_ALARM_SEVERITY_WARNING )
+                {
+                    node_ptr->alarms[id] = FM_ALARM_SEVERITY_WARNING ;
+
+                    wlog("%s %s alarm mismatch ; %s -> %s",
+                             node_ptr->hostname.c_str(),
+                             _getIdentity(id).c_str(),
+                             alarmUtil_getSev_str(alarm_query.severity).c_str(),
+                             alarmUtil_getSev_str(node_ptr->alarms[id]).c_str());
+
+                    mtcAlarm_warning ( node_ptr->hostname, MTC_ALARM_ID__LOCK );
+
+                }
+                if (!active_alarms.empty())
+                    active_alarms.append(", ");
+                active_alarms.append(_getIdentity(id) + ":");
+                active_alarms.append(alarmUtil_getSev_str(node_ptr->alarms[id]));
+            }
+            /* Unexpected assertion case */
+            else if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
+                     (  alarm_query.severity != FM_ALARM_SEVERITY_CLEAR ))
+            {
+                node_ptr->alarms[id] = FM_ALARM_SEVERITY_CLEAR ;
+
+                wlog("%s %s alarm mismatch ; %s -> %s",
+                         node_ptr->hostname.c_str(),
+                         _getIdentity(id).c_str(),
+                         alarmUtil_getSev_str(alarm_query.severity).c_str(),
+                         alarmUtil_getSev_str(node_ptr->alarms[id]).c_str());
+
+                mtcAlarm_clear ( node_ptr->hostname, id );
+            }
+        }
+        else if (( id == MTC_ALARM_ID__CONFIG ) ||
+                 ( id == MTC_ALARM_ID__ENABLE ) ||
+                 ( id == MTC_ALARM_ID__BM     ) ||
+                 ( id == MTC_ALARM_ID__CH_CONT) ||
+                 ( id == MTC_ALARM_ID__CH_COMP))
+        {
+            EFmAlarmSeverityT severity = mtcAlarm_state ( node_ptr->hostname, id);
+            if ( severity != node_ptr->alarms[id] )
+            {
+                ilog ("%s %s alarm mismatch ; %s -> %s",
+                          node_ptr->hostname.c_str(),
+                          _getIdentity(id).c_str(),
+                           alarmUtil_getSev_str(severity).c_str(),
+                           alarmUtil_getSev_str(node_ptr->alarms[id]).c_str());
+
+                if ( node_ptr->alarms[id] == FM_ALARM_SEVERITY_CLEAR )
+                {
+                    mtcAlarm_clear ( node_ptr->hostname, id );
+                }
+                else
+                {
+                    mtcAlarm_raise ( node_ptr->hostname, id, node_ptr->alarms[id] );
+                }
+            }
+            if ( node_ptr->alarms[id] != FM_ALARM_SEVERITY_CLEAR )
+            {
+                if (!active_alarms.empty())
+                    active_alarms.append(", ");
+                active_alarms.append(_getIdentity(id) + ":");
+                active_alarms.append(alarmUtil_getSev_str(node_ptr->alarms[id]));
+            }
+        }
+        /* else don't care about other alarm ids ; logs events etc */
+    }
+
+    /* manage logging of active alarms */
+    if ( !active_alarms.empty() )
+    {
+        if ( node_ptr->active_alarms != active_alarms )
+        {
+            ilog ("%s active alarms: %s",
+                      node_ptr->hostname.c_str(),
+                      active_alarms.c_str());
+
+            node_ptr->active_alarms = active_alarms ;
+        }
+        /* else
+         *    do nothing because there are active alarms
+         *    that have not changed since the last audit.
+         */
+    }
+    else if ( ! node_ptr->active_alarms.empty() )
+    {
+        /* clear active alarm list since there 'were' active alarms
+         * but there are no longer active alarms */
+        node_ptr->active_alarms.clear();
+        ilog ("%s no active alarms", node_ptr->hostname.c_str());
+    }
+    /* else
+     *    no active alarms ; don't log */
+}
+
 /*************************   A L A R M I N G   **************************/
 
+/* Raise the specified maintenance alarm severity */
+int mtcAlarm_raise ( string hostname, mtc_alarm_id_enum id, EFmAlarmSeverityT severity )
+{
+    switch ( severity )
+    {
+        case FM_ALARM_SEVERITY_MINOR:
+            return (mtcAlarm_minor(hostname,id));
+        case FM_ALARM_SEVERITY_MAJOR:
+            return (mtcAlarm_major(hostname,id));
+        case FM_ALARM_SEVERITY_CRITICAL:
+            return (mtcAlarm_critical(hostname,id));
+        default:
+            return (FAIL_BAD_PARM);
+    }
+}
+
 /* Clear the specified hosts's maintenance alarm */
 int mtcAlarm_clear ( string hostname, mtc_alarm_id_enum id )
 {
diff --git a/mtce/src/maintenance/mtcAlarm.h b/mtce/src/maintenance/mtcAlarm.h
index 25565d4f..6e93f659 100644
--- a/mtce/src/maintenance/mtcAlarm.h
+++ b/mtce/src/maintenance/mtcAlarm.h
@@ -95,6 +95,9 @@ string mtcAlarm_getId_str ( mtc_alarm_id_enum id );
 /** Clear the specified maintenance alarm for specific host */
 int  mtcAlarm_clear    ( string hostname, mtc_alarm_id_enum id );
 
+/** Raise specified severity level alarm for the specified host */
+int mtcAlarm_raise ( string hostname, mtc_alarm_id_enum id, EFmAlarmSeverityT severity );
+
 /** Assert a specified mtce alarm against the specified host with a WARNING severity level */
 int  mtcAlarm_warning  ( string hostname, mtc_alarm_id_enum id );
 
diff --git a/mtce/src/maintenance/mtcBmcUtil.cpp b/mtce/src/maintenance/mtcBmcUtil.cpp
index 2c76a654..817db71b 100644
--- a/mtce/src/maintenance/mtcBmcUtil.cpp
+++ b/mtce/src/maintenance/mtcBmcUtil.cpp
@@ -39,6 +39,26 @@ int nodeLinkClass::bmc_command_send ( struct nodeLinkClass::node * node_ptr,
 {
     int rc = PASS ;
 
+    /* handle 'kill of in-progress' thread or 'done but not consumed' thread */
+    if ( ! thread_idle ( node_ptr->bmc_thread_ctrl ))
+    {
+        if ( ! thread_done ( node_ptr->bmc_thread_ctrl ))
+        {
+            thread_kill ( node_ptr->bmc_thread_ctrl,
+                          node_ptr->bmc_thread_info );
+            return (RETRY);
+        }
+        else
+        {
+             mtcTimer_reset ( node_ptr->bmc_thread_ctrl.timer );
+             if ( thread_done_consume ( node_ptr->bmc_thread_ctrl,
+                                        node_ptr->bmc_thread_info ) != PASS )
+             {
+                 return (RETRY);
+             }
+        }
+    }
+
     node_ptr->bmc_thread_info.command = command ;
 
     /* Update / Setup the BMC access credentials */
@@ -437,6 +457,13 @@ bmc_command_recv_cleanup:
 
     if ( rc != RETRY )
     {
+        ilog ("%s %s recv '%s' command (%s) (rc:%d)",
+                  node_ptr->hostname.c_str(),
+                  node_ptr->bmc_thread_ctrl.name.c_str(),
+                  bmcUtil_getCmd_str(node_ptr->bmc_thread_info.command).c_str(),
+                  bmcUtil_getProtocol_str(node_ptr->bmc_protocol).c_str(),
+                  rc);
+
         node_ptr->bmc_thread_ctrl.done = true ;
         node_ptr->bmc_thread_ctrl.retries = 0 ;
         node_ptr->bmc_thread_ctrl.id = 0 ;
diff --git a/mtce/src/maintenance/mtcCompMsg.cpp b/mtce/src/maintenance/mtcCompMsg.cpp
index d3793553..41e37213 100755
--- a/mtce/src/maintenance/mtcCompMsg.cpp
+++ b/mtce/src/maintenance/mtcCompMsg.cpp
@@ -20,7 +20,7 @@
 
 #include <stdio.h>
 #include <string.h>
-#include <sys/un.h>      /* for ... unix domain sockets     */
+#include <sys/un.h>    /* for ... unix domain sockets     */
 #include <arpa/inet.h>
 #include <sys/socket.h>
 #include <net/if.h>
@@ -29,8 +29,8 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <errno.h>
-#include <list>        /* for the list of conf file names */
-
+#include <list>        /* for ... list of conf file names */
+#include <unistd.h>    /* for ... sync                    */
 
 using namespace std;
 
@@ -70,11 +70,15 @@ void stop_pmon( void )
 {
     /* max pipe command response length */
     #define PIPE_COMMAND_RESPON_LEN (100)
+
+    ilog("Stopping collectd.");
+    int rc = system("/usr/local/sbin/pmon-stop collectd");
+    sleep (2);
     ilog("Stopping pmon to prevent process recovery during shutdown");
     for ( int retry = 0 ; retry < 5 ; retry++ )
     {
         char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ;
-        int rc = system("/usr/bin/systemctl stop pmon");
+        rc = system("/usr/bin/systemctl stop pmon");
         sleep(2);
 
         /* confirm pmon is no longer active */
@@ -204,6 +208,24 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
             mlog1 ("mtcAlive request received (%s network)\n", interface_name.c_str());
             return ( send_mtcAlive_msg ( sock_ptr, get_who_i_am(), interface ));
         }
+        else if ( msg.cmd == MTC_MSG_INFO )
+        {
+            mlog1("mtc 'info' message received (%s network)\n", interface_name.c_str());
+            load_mtcInfo_msg ( msg );
+            return ( PASS ); /* no ack for this message */
+        }
+        else if ( msg.cmd == MTC_CMD_SYNC )
+        {
+            ilog ("mtc '%s' message received (%s network)\n",
+                   get_mtcNodeCommand_str(msg.cmd),
+                   interface_name.c_str());
+
+            ilog ("Sync Start");
+            sync ();
+            ilog ("Sync Done");
+
+            return ( PASS ); /* no ack for this message */
+        }
         else if ( msg.cmd == MTC_MSG_LOCKED )
         {
             /* Only recreate the file if its not already present */
@@ -603,7 +625,7 @@ int mtc_service_command ( mtc_socket_type * sock_ptr, int interface )
 }
 
 /** Send an event to the mtcAgent **/
-int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_name_ptr )
+int mtce_send_event ( mtc_socket_type * sock_ptr, unsigned int cmd , const char * mtce_name_ptr )
 {
     mtc_message_type event ;
 
@@ -619,6 +641,24 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_na
         /* We don't use the buffer for mtce events to remove it from the size */
         bytes = ((sizeof(mtc_message_type))-(BUF_SIZE));
     }
+    else if ( cmd == MTC_EVENT_MONITOR_READY )
+    {
+        string event_info = "{\"" ;
+        event_info.append(MTC_JSON_INV_NAME);
+        event_info.append("\":\"");
+        event_info.append(get_hostname());
+        event_info.append("\",\"");
+        event_info.append(MTC_JSON_SERVICE);
+        event_info.append("\":\"");
+        event_info.append(MTC_SERVICE_MTCCLIENT_NAME );
+        event_info.append("\"}");
+
+        size_t len =  event_info.length()+1 ;
+        snprintf ( &event.hdr[0], MSG_HEADER_SIZE, "%s", get_mtce_event_header());
+        snprintf ( &event.buf[0], len, "%s", event_info.data());
+        bytes = ((sizeof(mtc_message_type))-(BUF_SIZE-len));
+        ilog ("%s %s ready", get_hostname().c_str(), MTC_SERVICE_MTCCLIENT_NAME);
+    }
     else if (( cmd == MTC_EVENT_AVS_CLEAR    ) ||
              ( cmd == MTC_EVENT_AVS_MAJOR    ) ||
              ( cmd == MTC_EVENT_AVS_CRITICAL ))
@@ -666,7 +706,7 @@ int mtce_send_event ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_na
     {
         if ( bytes == 0 )
         {
-           slog ("message send failed ; message size=0 for cmd:%d is 0\n", event.cmd );
+           slog ("message send failed ; message size=0 for cmd:0x%x is 0\n", event.cmd );
            rc = FAIL_NO_DATA ;
         }
         else if ((rc = sock_ptr->mtc_client_tx_socket->write((char*)&event.hdr[0], bytes))!= bytes )
@@ -912,15 +952,18 @@ int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interfa
         }
 
         /* Send to controller-1 cluster address */
-        if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) &&
-            ( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true ))
+        if ( get_ctrl_ptr()->system_type != SYSTEM_TYPE__AIO__SIMPLEX )
         {
-            print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false );
-            sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes ) ;
-        }
-        else
-        {
-            elog("mtc_client_tx_socket_c1_clstr not ok");
+            if (( sock_ptr->mtc_client_tx_socket_c1_clstr ) &&
+                ( sock_ptr->mtc_client_tx_socket_c1_clstr->sock_ok() == true ))
+            {
+                print_mtc_message ( CONTROLLER_1, MTC_CMD_TX, msg, get_iface_name_str(CLSTR_INTERFACE), false );
+                sock_ptr->mtc_client_tx_socket_c1_clstr->write((char*)&msg.hdr[0], bytes ) ;
+            }
+            else
+            {
+                elog("mtc_client_tx_socket_c1_clstr not ok");
+            }
         }
     }
     else
@@ -933,32 +976,59 @@ int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interfa
     return (PASS) ;
 }
 
-/* Accelerated Virtual Switch 'events' socket
- * - for receiving data port state change event
- * Event strings are
-  *
-  * {"type":"port-state", "severity":"critical|major|clear"}
-  *
-  * type:port-state - the provider network data port status has changed to the supplied fault severity
-  *
-  * severity:
-  *   critical - port has failed and is not part of an aggregate or is the last port in an aggregate (degrade, disable services)
-  *   major    - port has failed and is part of an aggregate with other inservice-ports (degrade only)
-  *   clear    - port has recovered from a failed state and is operational (clear degrade, enable services)
-  *
-  * NOTE: The port status can transition from any of the above states to any other state.
-  *
-  * The neutron agent monitors the vswitch ports at a 2 second interval.
-  * If a port changes link state during the polling period, it will
-  * raise/clear the alarm, but now also calculates the impact of that port
-  * failure on the provider network data interface.
-  *
-  * The overall aggregated state across all provider network interfaces will
-  * be reported to maintenance when ports enter a link down or up state.
-  * The agent will also periodically send the current provider network port
-  * status to maintenance every 30 seconds.
-  *
-  */
+int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, string address, int port)
+{
+    mtc_message_type msg ;
+    int bytes = 0 ;
+    MEMSET_ZERO (msg);
+    snprintf ( &msg.hdr[0], MSG_HEADER_SIZE, "%s", get_cmd_req_msg_header());
+    msg.cmd = cmd ;
+
+    switch ( cmd )
+    {
+        case MTC_CMD_SYNC:
+        {
+            ilog ("Sending '%s' command to %s:%s:%d",
+                   get_mtcNodeCommand_str(cmd),
+                   hostname.c_str(),
+                   address.c_str(), port);
+
+            msg.num = 0   ;
+
+            /* buffer  not used in this message */
+            bytes = ((sizeof(mtc_message_type))-(BUF_SIZE));
+
+            break ;
+        }
+        default:
+        {
+            slog("Unsupported command ; %s:%d", get_mtcNodeCommand_str(cmd), cmd );
+            return (FAIL_BAD_CASE);
+        }
+    }
+    int rc = FAIL ;
+
+    /* Send to controller floating address */
+    if (( sock_ptr->mtc_client_tx_socket ) &&
+        ( sock_ptr->mtc_client_tx_socket->sock_ok() == true ))
+    {
+        print_mtc_message ( hostname, MTC_CMD_TX, msg, get_iface_name_str(MGMNT_INTERFACE), false );
+        rc = sock_ptr->mtc_client_tx_socket->write((char*)&msg.hdr[0], bytes, address.data(), port ) ;
+        if ( 0 >= rc )
+        {
+            elog("failed to send command to mtcClient (%d) (%d:%s)", rc, errno, strerror(errno));
+            rc = FAIL_SOCKET_SENDTO ;
+        }
+        else
+            rc = PASS ;
+    }
+    else
+    {
+        elog("mtc_client_tx_socket not ok");
+        rc = FAIL_BAD_STATE ;
+    }
+    return (rc) ;
+}
 
 int mtcCompMsg_testhead ( void )
 {
diff --git a/mtce/src/maintenance/mtcCtrlMsg.cpp b/mtce/src/maintenance/mtcCtrlMsg.cpp
index 6a820ed1..5a7be7e9 100755
--- a/mtce/src/maintenance/mtcCtrlMsg.cpp
+++ b/mtce/src/maintenance/mtcCtrlMsg.cpp
@@ -443,6 +443,34 @@ int mtc_service_inbox ( nodeLinkClass   *  obj_ptr,
                     obj_ptr->declare_service_ready ( hostname, MTC_SERVICE_HEARTBEAT );
                     return (PASS);
                 }
+                else if ( service == MTC_SERVICE_MTCCLIENT_NAME )
+                {
+                    ilog ("%s %s ready", hostname.c_str(), MTC_SERVICE_MTCCLIENT_NAME);
+
+                    /* if this ready event is from the mtcClient of a
+                     * controller that has valid bmc access info then
+                     * build the 'peer controller kill' mtcInfo and
+                     * send it to that mtcClient */
+                    if ( obj_ptr->get_nodetype ( hostname ) & CONTROLLER_TYPE )
+                    {
+                        string bm_pw = obj_ptr->get_bm_pw ( hostname ) ;
+                        if ( !bm_pw.empty() && ( bm_pw != NONE ))
+                        {
+                            string bm_un = obj_ptr->get_bm_un ( hostname ) ;
+                            string bm_ip = obj_ptr->get_bm_ip ( hostname ) ;
+                            if (( hostUtil_is_valid_username  ( bm_un )) &&
+                                ( hostUtil_is_valid_ip_addr   ( bm_ip )))
+                            {
+                                send_mtc_cmd ( hostname,
+                                               MTC_MSG_INFO,
+                                               MGMNT_INTERFACE,
+                                               obj_ptr->build_mtcInfo_dict (
+                                MTC_INFO_CODE__PEER_CONTROLLER_KILL_INFO));
+                            }
+                        }
+                    }
+                    return (PASS);
+                }
                 if (  service == MTC_SERVICE_HWMOND_NAME )
                 {
                     std::list<string>::iterator temp ;
@@ -578,11 +606,12 @@ int mtc_service_inbox ( nodeLinkClass   *  obj_ptr,
     return (rc);
 }
 
-int send_mtc_cmd ( string & hostname, int cmd , int interface )
+int send_mtc_cmd ( string & hostname, int cmd , int interface, string json_dict )
 {
     int rc = FAIL ;
     bool force = false ;
     mtc_message_type mtc_cmd ;
+    string data = "" ;
     mtc_socket_type * sock_ptr = get_sockPtr ();
     memset (&mtc_cmd,0,sizeof(mtc_message_type));
 
@@ -592,6 +621,16 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface )
 
     switch ( cmd )
     {
+        case MTC_MSG_INFO:
+        {
+            snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s" , get_cmd_req_msg_header() );
+            mtc_cmd.cmd = cmd ;
+            mtc_cmd.num = 0 ;
+            data = "{\"mtcInfo\":" + json_dict + "}";
+            ilog("%s mtc info update", hostname.c_str());
+            rc = PASS ;
+            break ;
+        }
         case MTC_REQ_MTCALIVE:
         {
             snprintf ( &mtc_cmd.hdr[0], MSG_HEADER_SIZE, "%s" , get_cmd_req_msg_header() );
@@ -689,11 +728,20 @@ int send_mtc_cmd ( string & hostname, int cmd , int interface )
          * Note: the minus 1 is to overwrite the null */
         snprintf ( &mtc_cmd.hdr[MSG_HEADER_SIZE-1], MSG_HEADER_SIZE, "%s", obj_ptr->get_hostIfaceMac(hostname, MGMNT_IFACE).data());
 
-        string data = "{\"address\":\"";
-        data.append(obj_ptr->my_float_ip) ;
-        data.append("\",\"interface\":\"");
-        data.append(get_iface_name_str(interface));
-        data.append("\"}");
+        /* If data is empty then at least add where the message came from */
+        if ( data.empty() )
+        {
+            data = "{\"address\":\"";
+            data.append(obj_ptr->my_float_ip) ;
+            data.append("\",\"interface\":\"");
+            data.append(get_iface_name_str(interface));
+            data.append("\"}");
+        }
+        else
+        {
+            ; /* data is already pre loaded by the command case above */
+        }
+        /* copy data into message buffer */
         snprintf ( &mtc_cmd.buf[0], data.length()+1, "%s", data.data());
         bytes = (sizeof(mtc_message_type)-(BUF_SIZE-(data.length()+1)));
 
@@ -1176,7 +1224,7 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr )
     else if ( msg.cmd == MTC_EVENT_HEARTBEAT_READY )
     {
         /* no heartbeating in simplex mode */
-        if ( obj_ptr->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
+        if ( obj_ptr->system_type == SYSTEM_TYPE__AIO__SIMPLEX )
         {
             return (PASS);
         }
@@ -1214,13 +1262,68 @@ int service_events ( nodeLinkClass * obj_ptr, mtc_socket_type * sock_ptr )
             {
                 elog ("%s Failed to send inventory to heartbeat service\n", hostname.c_str());
             }
-            /* Send the start event to the heartbeat service for all enabled hosts */
+            /* Consider sending the 'start' request to the heartbeat service
+             * for all enabled hosts. */
             if (( obj_ptr->get_adminState  ( hostname ) == MTC_ADMIN_STATE__UNLOCKED ) &&
                 ( obj_ptr->get_operState   ( hostname ) == MTC_OPER_STATE__ENABLED ) &&
                 ((obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__AVAILABLE ) ||
                  (obj_ptr->get_availStatus ( hostname ) == MTC_AVAIL_STATUS__DEGRADED )))
             {
-                send_hbs_command ( hostname, MTC_CMD_START_HOST, controller );
+                /* However, bypass sending heartbeat 'start' for nodes that
+                 * are not ready to heartbeat; enabling, configuring, testing.
+                 * Such cases are if a host is:
+                 *
+                 * 1. running the add_handler or
+                 * 2. running the enable_handler or
+                 * 3. running the enable_subf_handler or
+                 * 4. not configured or
+                 * 5. not tested (goenabled not complete)
+                 *
+                 */
+                mtc_nodeAdminAction_enum current_action =
+                    obj_ptr->get_adminAction (hostname);
+                if (( current_action != MTC_ADMIN_ACTION__ADD ) &&
+                    ( current_action != MTC_ADMIN_ACTION__ENABLE ) &&
+                    ( current_action != MTC_ADMIN_ACTION__ENABLE_SUBF ))
+                {
+                    int mtce_flags = obj_ptr->get_mtce_flags(hostname);
+                    if (( mtce_flags & MTC_FLAG__I_AM_CONFIGURED ) &&
+                        ( mtce_flags & MTC_FLAG__I_AM_HEALTHY  ) &&
+                        ( mtce_flags & MTC_FLAG__MAIN_GOENABLED ))
+                    {
+                        if (( obj_ptr->system_type != SYSTEM_TYPE__NORMAL ) &&
+                            ( obj_ptr->get_nodetype ( hostname ) & CONTROLLER_TYPE ))
+                        {
+                            /* If its an AIO then its worker subfunction
+                             * needs to have been be configured and tested. */
+                            if (( mtce_flags & MTC_FLAG__SUBF_CONFIGURED ) &&
+                                ( mtce_flags & MTC_FLAG__SUBF_GOENABLED ))
+                            {
+                                ilog("%s heartbeat start (AIO controller)",
+                                         hostname.c_str());
+                                send_hbs_command ( hostname, MTC_CMD_START_HOST, controller );
+                            }
+                            else
+                            {
+                                wlog ("%s not heartbeat ready (subf) (oob:%x)",
+                                          hostname.c_str(),
+                                          mtce_flags);
+                            }
+                        }
+                        else
+                        {
+                            ilog("%s heartbeat start (from ready event)",
+                                     hostname.c_str());
+                            send_hbs_command ( hostname, MTC_CMD_START_HOST, controller );
+                        }
+                    }
+                    else
+                    {
+                        wlog ("%s not heartbeat ready (main) (oob:%x)",
+                                  hostname.c_str(),
+                                  mtce_flags);
+                    }
+                }
             }
         }
         ilog ("%s %s inventory push ... done",
diff --git a/mtce/src/maintenance/mtcInvApi.cpp b/mtce/src/maintenance/mtcInvApi.cpp
index 0743455b..770c580f 100755
--- a/mtce/src/maintenance/mtcInvApi.cpp
+++ b/mtce/src/maintenance/mtcInvApi.cpp
@@ -974,7 +974,7 @@ int nodeLinkClass::mtcInvApi_update_states_now ( struct nodeLinkClass::node * no
     else
         avail = " " ;
 
-    if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+    if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
     {
         if ( ! oper_subf.empty() )
         {
@@ -1016,7 +1016,7 @@ int nodeLinkClass::mtcInvApi_update_states_now ( struct nodeLinkClass::node * no
         this->sysinvEvent.payload.erase(len-1,1);
         this->sysinvEvent.payload.append ( "]");
 
-        if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+        if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
         {
             ilog ("%s %s-%s-%s %s-%s\n",
                       node_ptr->hostname.c_str(),
diff --git a/mtce/src/maintenance/mtcNodeComp.cpp b/mtce/src/maintenance/mtcNodeComp.cpp
index 6e58c7fe..9db192a8 100644
--- a/mtce/src/maintenance/mtcNodeComp.cpp
+++ b/mtce/src/maintenance/mtcNodeComp.cpp
@@ -43,9 +43,9 @@
 #include <signal.h>
 #include <fcntl.h>
 #include <errno.h>
-//#include <syslog.h>    /* for ... syslog                  */
 #include <sys/stat.h>
 #include <list>
+#include <json-c/json.h> /* for ... json_tokener_parse                    */
 
 using namespace std;
 
@@ -56,6 +56,10 @@ using namespace std;
 #include "nodeBase.h"       /* for ... Common Definitions                 */
 #include "nodeTimers.h"     /* fpr ... Timer Service                      */
 #include "nodeUtil.h"       /* for ... Common Utilities                   */
+#include "hostUtil.h"       /* for ... hostUtil_is_valid_...              */
+#include "jsonUtil.h"       /* for ... jsonUtil_get_key_value_string      */
+#include "bmcUtil.h"        /* for ... bmcUtil_accessInfo_type            */
+#include "ipmiUtil.h"       /* for ... ipmiUtil_reset_host_now            */
 #include "nodeMacro.h"      /* for ... CREATE_NONBLOCK_INET_UDP_RX_SOCKET */
 #include "mtcNodeMsg.h"     /* for ... common maintenance messaging       */
 #include "mtcNodeComp.h"    /* for ... this module header                 */
@@ -96,7 +100,7 @@ string get_hostname ( void )
  * Daemon Configuration Structure - The allocated struct
  * @see daemon_common.h for daemon_config_type struct format.
  */
-static daemon_config_type mtc_config ; 
+static daemon_config_type mtc_config ;
 daemon_config_type * daemon_get_cfg_ptr () { return &mtc_config ; }
 
 /**
@@ -106,6 +110,8 @@ daemon_config_type * daemon_get_cfg_ptr () { return &mtc_config ; }
 static mtc_socket_type mtc_sock   ;
 static mtc_socket_type * sock_ptr ;
 
+static bmcUtil_accessInfo_type peer_controller = {"none","none","none","none","none"};
+static bmcUtil_accessInfo_type this_controller = {"none","none","none","none","none"};
 
 int run_goenabled_scripts ( string type );
 
@@ -138,6 +144,16 @@ void timer_handler ( int sig, siginfo_t *si, void *uc)
         mtcTimer_stop_int_safe ( ctrl.hostservices.timer );
         ctrl.hostservices.timer.ring = true ;
     }
+    else if ( *tid_ptr == ctrl.peer_ctrlr_reset.sync_timer.tid )
+    {
+        ctrl.peer_ctrlr_reset.sync_timer.ring = true ;
+        mtcTimer_stop_int_safe ( ctrl.peer_ctrlr_reset.sync_timer );
+    }
+    else if ( *tid_ptr == ctrl.peer_ctrlr_reset.audit_timer.tid )
+    {
+        /* use auto restart */
+        ctrl.peer_ctrlr_reset.audit_timer.ring = true ;
+    }
     else
     {
         mtcTimer_stop_tid_int_safe ( tid_ptr );
@@ -207,9 +223,8 @@ void daemon_exit ( void )
     exit (0) ;
 }
 
-                                 
 /* Startup config read */
-static int mtc_config_handler ( void * user, 
+static int mtc_config_handler ( void * user,
                           const char * section,
                           const char * name,
                           const char * value)
@@ -236,11 +251,14 @@ static int mtc_config_handler ( void * user,
         config_ptr->failsafe_shutdown_delay = atoi(value);
         ilog ("Shutdown TO : %d secs\n", config_ptr->failsafe_shutdown_delay );
     }
-    else
+    if (( ctrl.nodetype & CONTROLLER_TYPE ) &&
+        (MATCH("client", "sync_b4_peer_ctrlr_reset")))
     {
-        return (PASS);
+        ctrl.peer_ctrlr_reset.sync = atoi(value);
+        ilog("SyncB4 Reset: %s",
+              ctrl.peer_ctrlr_reset.sync ? "Yes" : "No" );
     }
-    return (FAIL);
+    return (PASS);
 }
 
 /* Read the mtc.ini file and load control    */
@@ -431,7 +449,7 @@ void setup_clstr_tx_sockets ( void )
             mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok(false);
         }
     }
-    if ( ctrl.system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX )
+    if ( ctrl.system_type != SYSTEM_TYPE__AIO__SIMPLEX )
     {
         dlog ("setup of %s TX\n", CONTROLLER_1_CLUSTER_HOST);
 
@@ -946,6 +964,65 @@ void _manage_goenabled_tests ( void )
     _scripts_cleanup (ctrl.active_script_set) ;
 }
 
+int issue_reset_and_cleanup ( void )
+{
+    int rc = FAIL ;
+    const char peer_ctrlr [] = "Peer controller reset" ;
+
+    ilog("SM %s request", peer_ctrlr );
+    /* check creds */
+    if (( hostUtil_is_valid_ip_addr  ( peer_controller.bm_ip ) == false ) ||
+        ( hostUtil_is_valid_username ( peer_controller.bm_un ) == false ) ||
+        ( hostUtil_is_valid_pw       ( peer_controller.bm_pw ) == false ))
+    {
+        elog("%s cannot reset peer BMC host at %s due to invalid credentials",
+                 ctrl.hostname, peer_controller.bm_ip.c_str());
+        return (rc);
+    }
+
+    /* create output filename - no need to delete after operation */
+    string output_filename = bmcUtil_create_data_fn ( ctrl.hostname,
+                             BMC_RESET_CMD_FILE_SUFFIX,
+                             BMC_PROTOCOL__IPMITOOL );
+    if ( output_filename.empty() )
+    {
+        elog("%s ; failed to create output filename", peer_ctrlr);
+        rc = FAIL_STRING_EMPTY ;
+    }
+    else if ( ipmiUtil_reset_host_now ( ctrl.hostname,
+                                        peer_controller,
+                                        output_filename ) == PASS )
+    {
+        string result = daemon_get_file_str ( output_filename.data() );
+        ilog("%s succeeded", peer_ctrlr);
+
+        /* don't fail the operation if the result is unexpected ; but log it */
+        if ( result.compare( IPMITOOL_POWER_RESET_RESP ) )
+        {
+            dlog("... but reset command output was unexpected ; %s",
+                      result.c_str());
+        }
+        rc = PASS ;
+    }
+    else
+    {
+        elog("%s failed", peer_ctrlr);
+        rc = FAIL_OPERATION ;
+    }
+
+    if ( rc == PASS )
+    {
+        /* give the host a chance to reset before
+         * telling SM the reset is done */
+        sleep (2) ;
+
+        /* Don't want to remove the file if the reset was not successful */
+        dlog("removing %s", RESET_PEER_NOW );
+        daemon_remove_file ( RESET_PEER_NOW );
+    }
+    return (rc);
+}
+
 
 /* The main service loop */
 int daemon_init ( string iface, string nodetype_str )
@@ -963,6 +1040,7 @@ int daemon_init ( string iface, string nodetype_str )
     ctrl.subfunction = 0 ;
     ctrl.system_type = daemon_system_type ();
     ctrl.clstr_iface_provisioned = false ;
+    ctrl.peer_ctrlr_reset.sync = false ;
 
     /* convert node type to integer */
     ctrl.nodetype = get_host_function_mask ( nodetype_str ) ;
@@ -1018,6 +1096,13 @@ int daemon_init ( string iface, string nodetype_str )
     mtcTimer_init ( ctrl.goenabled.timer, &ctrl.hostname[0], "goenable timer" );
     mtcTimer_init ( ctrl.hostservices.timer, &ctrl.hostname[0], "host services timer" );
 
+    /* initialize peer controller reset feature */
+    mtcTimer_init ( ctrl.peer_ctrlr_reset.audit_timer, &ctrl.hostname[0], "peer ctrlr reset audit timer" ),
+    mtcTimer_init ( ctrl.peer_ctrlr_reset.sync_timer, &ctrl.hostname[0], "peer ctrlr reset sync timer" ),
+    ctrl.peer_ctrlr_reset.sync_timer.ring = false ;
+    ctrl.peer_ctrlr_reset.audit_timer.ring = false ;
+    ctrl.peer_ctrlr_reset.audit_period = PEER_CTRLR_AUDIT_PERIOD ;
+
     /* initialize the script group control structures */
     script_ctrl_init ( &ctrl.goenabled    );
     script_ctrl_init ( &ctrl.hostservices );
@@ -1073,6 +1158,17 @@ void daemon_service_run ( void )
     /* Send first mtcAlive ASAP */
     mtcTimer_start ( ctrl.timer, timer_handler, 1 );
 
+    /* Monitor for peer controller reset requests when this
+     * daemon runs on a controller */
+    if ( ctrl.nodetype & CONTROLLER_TYPE )
+    {
+        mtcTimer_start ( ctrl.peer_ctrlr_reset.audit_timer,
+                         timer_handler,
+                         ctrl.peer_ctrlr_reset.audit_period );
+    }
+
+    mtce_send_event ( sock_ptr, MTC_EVENT_MONITOR_READY, NULL );
+
     /* lets go select so that the sock does not go crazy */
     dlog ("%s running main loop with %d msecs socket timeout\n",
                        &ctrl.hostname[0], (SOCKET_WAIT/1000) );
@@ -1305,8 +1401,20 @@ void daemon_service_run ( void )
                 socket_reinit = true ;
             }
 
-            /* Clstr Tx */
-            else if (( ctrl.clstr_iface_provisioned == true ) &&
+            /* Clstr Tx ; AIO SX */
+            else if ((ctrl.system_type == SYSTEM_TYPE__AIO__SIMPLEX) &&
+                     ( ctrl.clstr_iface_provisioned == true ) &&
+                     (( mtc_sock.mtc_client_tx_socket_c0_clstr == NULL ) ||
+                      ( mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok() == false )))
+            {
+                wlog ("calling setup_clstr_tx_sockets (auto-recovery)\n");
+                setup_clstr_tx_sockets();
+                socket_reinit = true ;
+            }
+
+            /* Clstr Tx ; not AIO SX */
+            else if ((ctrl.system_type != SYSTEM_TYPE__AIO__SIMPLEX) &&
+                     ( ctrl.clstr_iface_provisioned == true ) &&
                      (( mtc_sock.mtc_client_tx_socket_c0_clstr == NULL ) ||
                       ( mtc_sock.mtc_client_tx_socket_c1_clstr == NULL ) ||
                       ( mtc_sock.mtc_client_tx_socket_c0_clstr->sock_ok() == false ) ||
@@ -1384,7 +1492,51 @@ void daemon_service_run ( void )
                 }
             }
         }
-
+        /* service controller specific audits */
+        if ( ctrl.nodetype & CONTROLLER_TYPE )
+        {
+            /* peer controller reset service audit */
+            if ( ctrl.peer_ctrlr_reset.audit_timer.ring )
+            {
+                if ( daemon_is_file_present ( RESET_PEER_NOW ) )
+                {
+                    if ( ctrl.peer_ctrlr_reset.sync )
+                    {
+                        if ( ctrl.peer_ctrlr_reset.sync_timer.ring )
+                        {
+                            issue_reset_and_cleanup ();
+                            ctrl.peer_ctrlr_reset.sync_timer.ring = false ;
+                        }
+                        else if ( ctrl.peer_ctrlr_reset.sync_timer.tid == NULL )
+                        {
+                            if ( send_mtcClient_cmd ( &mtc_sock,
+                                                       MTC_CMD_SYNC,
+                                                       peer_controller.hostname,
+                                                       peer_controller.host_ip,
+                                                       mtc_config.mtc_rx_mgmnt_port) == PASS )
+                            {
+                                mtcTimer_start ( ctrl.peer_ctrlr_reset.sync_timer, timer_handler, MTC_SECS_10 );
+                                ilog("... waiting for peer controller to sync - %d secs", MTC_SECS_10);
+                            }
+                            else
+                            {
+                                elog("failed to send 'sync' command to peer controller mtcClient");
+                                ctrl.peer_ctrlr_reset.sync_timer.ring = true ;
+                            }
+                        }
+                        else
+                        {
+                            ; /* wait longer */
+                        }
+                    }
+                    else
+                    {
+                        issue_reset_and_cleanup ();
+                    }
+                }
+                ctrl.peer_ctrlr_reset.audit_timer.ring = false ;
+            }
+        }
         daemon_signal_hdlr ();
     }
     daemon_exit();
@@ -1573,7 +1725,7 @@ int run_hostservices_scripts ( unsigned int cmd )
 
 
     /* For the stop command we need the mtcClient to run both controller and
-     * worker stop services if we are on a CPE system.
+     * worker stop services if we are on a AIO system.
      * This saves the mtcAgent from having to issue and manage 2 commands,
      * one for controller and 1 for worker */
     if ( ctrl.system_type != SYSTEM_TYPE__NORMAL )
@@ -1750,7 +1902,6 @@ void daemon_sigchld_hdlr ( void )
         }
         default:
         {
-            wlog ("child handler running with no active script set (%d)\n", ctrl.active_script_set );
             return ;
         }
     }
@@ -1820,6 +1971,84 @@ void daemon_sigchld_hdlr ( void )
     }
 }
 
+/***************************************************************************
+ *
+ * Name       : load_mtcInfo_msg
+ *
+ * Description: Extract the mtc info from the MTC_MSG_INFO message.
+ *
+ * Assumptions: So far only the peer controller reset feature uses this.
+ *
+ * Returns    : Nothing
+ *
+ ***************************************************************************/
+
+void load_mtcInfo_msg ( mtc_message_type & msg )
+{
+    if ( ctrl.nodetype & CONTROLLER_TYPE )
+    {
+        mlog1("%s", &msg.buf[0]);
+        struct json_object *_obj = json_tokener_parse( &msg.buf[0] );
+        if ( _obj )
+        {
+            if ( strcmp(&ctrl.hostname[0], CONTROLLER_0 ))
+                peer_controller.hostname = CONTROLLER_0 ;
+            else
+                peer_controller.hostname = CONTROLLER_1 ;
+
+            struct json_object *info_obj = (struct json_object *)(NULL);
+            json_bool json_rc = json_object_object_get_ex( _obj,
+                                                          "mtcInfo",
+                                                          &info_obj );
+            if ( ( json_rc == TRUE ) && ( info_obj ))
+            {
+                struct json_object *ctrl_obj = (struct json_object *)(NULL);
+                json_bool json_rc =
+                json_object_object_get_ex( info_obj,
+                                           peer_controller.hostname.data(),
+                                          &ctrl_obj );
+
+                if (( json_rc == TRUE ) && ( ctrl_obj ))
+                {
+                    peer_controller.host_ip = jsonUtil_get_key_value_string(ctrl_obj, MTC_JSON_INV_HOSTIP) ;
+                    peer_controller.bm_ip = jsonUtil_get_key_value_string(ctrl_obj, MTC_JSON_INV_BMIP) ;
+                    peer_controller.bm_un = jsonUtil_get_key_value_string(ctrl_obj, "bm_un");
+                    peer_controller.bm_pw = jsonUtil_get_key_value_string(ctrl_obj, "bm_pw");
+
+                    /* log the mc info but not the bmc password ; only
+                     * indicate that it looks 'ok' or 'is 'none' */
+                    ilog ("%s is my peer [host:%s bmc:%s:%s:%s]",
+                           peer_controller.hostname.c_str(),
+                           peer_controller.host_ip.c_str(),
+                           peer_controller.bm_ip.c_str(),
+                           peer_controller.bm_un.c_str(),
+                           hostUtil_is_valid_pw(peer_controller.bm_pw) ? "ok":"none");
+                }
+                else
+                {
+                    wlog("peer mtcInfo missing (rc:%d) ; %s",
+                          json_rc, &msg.buf[0]);
+                }
+            }
+            else
+            {
+                wlog("mtcInfo label parse error (rc:%d) ; %s",
+                      json_rc, &msg.buf[0]);
+            }
+            json_object_put(_obj);
+        }
+        else
+        {
+            wlog("message buffer tokenize error ; %s", &msg.buf[0]);
+        }
+    }
+    else
+    {
+        slog("%s got mtcInfo ; unexpected for this nodetype", ctrl.hostname);
+    }
+}
+
+
 /* Push daemon state to log file */
 void daemon_dump_info ( void )
 {
@@ -1853,13 +2082,13 @@ int daemon_run_testhead ( void )
     * STAGE 1: some test
     ************************************************/
     printf ( "| Test  %d : Maintenance Service Test ............. ", stage );
-    if ( rc != PASS )    
+    if ( rc != PASS )
     {
        FAILED_STR ;
        rc = FAIL ;
     }
     else
-       PASSED ; 
+       PASSED ;
 
     printf  ("+---------------------------------------------------------+\n");
     return PASS ;
diff --git a/mtce/src/maintenance/mtcNodeComp.h b/mtce/src/maintenance/mtcNodeComp.h
index 612144f8..190500c6 100644
--- a/mtce/src/maintenance/mtcNodeComp.h
+++ b/mtce/src/maintenance/mtcNodeComp.h
@@ -17,6 +17,10 @@
 #include <string.h>
 #include <unistd.h>
 
+using namespace std;
+
+#include "nodeTimers.h"     /* for ... Timer Service  */
+
 /** Compute Config mask */
 #define CONFIG_CLIENT_MASK  (CONFIG_AGENT_MTC_MGMNT_PORT  |\
                              CONFIG_CLIENT_MTC_MGMNT_PORT |\
@@ -59,6 +63,22 @@ typedef struct
 } script_ctrl_type ;
 void script_ctrl_init ( script_ctrl_type * script_ctrl_ptr );
 
+/* peer controller reset control structure and associated definitions */
+
+/* This is a flag file set by SM when SM wants maintanence to perform a
+ * BMC reset of the other (peer) controller */
+#define RESET_PEER_NOW "/var/run/.sm_reset_peer"
+
+#define PEER_CTRLR_AUDIT_PERIOD (2)
+typedef struct
+{
+    struct
+    mtc_timer  sync_timer  ;
+    mtc_timer audit_timer  ;
+    int       audit_period ;
+    bool      sync   ;
+} peer_ctrlr_reset_type ;
+
 typedef struct
 {
     char             hostname [MAX_HOST_NAME_SIZE+1];
@@ -76,7 +96,7 @@ typedef struct
     unsigned int     function ;
     unsigned int  subfunction ;
 
-    struct mtc_timer timer ; /* mtcAlive timer */
+    struct mtc_timer timer       ; /* mtcAlive timer */
 
     bool             clstr_iface_provisioned ;
 
@@ -102,6 +122,7 @@ typedef struct
     /* Where to send events */
     string mtcAgent_ip ;
 
+    peer_ctrlr_reset_type peer_ctrlr_reset;
 } ctrl_type ;
 
 ctrl_type * get_ctrl_ptr ( void );
@@ -109,5 +130,6 @@ ctrl_type * get_ctrl_ptr ( void );
 bool is_subfunction_worker ( void );
 int run_goenabled_scripts ( mtc_socket_type * sock_ptr , string requestor );
 int run_hostservices_scripts ( unsigned int cmd );
+void load_mtcInfo_msg ( mtc_message_type & msg );
 
 #endif
diff --git a/mtce/src/maintenance/mtcNodeCtrl.cpp b/mtce/src/maintenance/mtcNodeCtrl.cpp
index 6732ca88..a52b67e3 100644
--- a/mtce/src/maintenance/mtcNodeCtrl.cpp
+++ b/mtce/src/maintenance/mtcNodeCtrl.cpp
@@ -1187,15 +1187,6 @@ int _self_provision ( void )
 
             if ( my_identity.name == record_info.name )
             {
-                /* If the active controller was 'locked' and is being auto-corrected
-                 * to 'unlocked' then ensure that there is no locked alarm set for it */
-                if ( record_info.admin != "locked" )
-                {
-                        mtcAlarm_clear ( my_identity.name, MTC_ALARM_ID__LOCK );
-                        /* this is not required because its already inited to clear */
-                        // node_ptr->alarms[MTC_ALARM_ID__LOCK] = FM_ALARM_SEVERITY_CLEAR
-                }
-
                 if ( my_identity.mac != record_info.mac )
                 {
                     wlog ("%s mac address mismatch (%s - %s)\n",
@@ -1326,6 +1317,7 @@ void nodeLinkClass::fsm ( void )
             daemon_signal_hdlr ();
             mtcHttpSvr_look ( mtce_event );
         }
+        mtcInv.mtcInfo_handler();
     }
 }
 
@@ -1515,9 +1507,9 @@ void daemon_service_run ( void )
 
     if ( ts.tv_sec < MTC_MINS_15 )
     {
-        /* CPE DOR window is much greater in CPE since heartbeat
-         * cannot start until the inactive CPE has run both manifests */
-        int timeout = DEFAULT_DOR_MODE_CPE_TIMEOUT ;
+        /* AIO DOR window is much greater in AIO since heartbeat
+         * cannot start until the inactive AIO has run both manifests */
+        int timeout = DEFAULT_DOR_MODE_AIO_TIMEOUT ;
 
         /* override the timeout to a smaller value for normal system */
         if ( mtcInv.system_type == SYSTEM_TYPE__NORMAL )
@@ -1601,7 +1593,7 @@ void daemon_service_run ( void )
         if ( mtcInv.system_type == SYSTEM_TYPE__NORMAL )
             mtc_sock.waitd.tv_usec = MTCAGENT_SELECT_TIMEOUT ;
         else
-            mtc_sock.waitd.tv_usec = MTCAGENT_CPE_SELECT_TIMEOUT ;
+            mtc_sock.waitd.tv_usec = MTCAGENT_AIO_SELECT_TIMEOUT ;
 
         /* This is used as a delay up to select_timeout */
         rc = select( socks.back()+1, &mtc_sock.readfds, NULL, NULL, &mtc_sock.waitd);
diff --git a/mtce/src/maintenance/mtcNodeFsm.cpp b/mtce/src/maintenance/mtcNodeFsm.cpp
index af5e9a26..98c0e8a4 100755
--- a/mtce/src/maintenance/mtcNodeFsm.cpp
+++ b/mtce/src/maintenance/mtcNodeFsm.cpp
@@ -63,6 +63,11 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
 
     /* Monitor and Manage active threads */
     thread_handler ( node_ptr->bmc_thread_ctrl, node_ptr->bmc_thread_info );
+    if ( node_ptr->bmc_thread_ctrl.stage == THREAD_STAGE__KILL )
+    {
+        /* do nothing while thread is being killed */
+        return RETRY ;
+    }
 
     /* manage the host connected state and board management alarms */
     nodeLinkClass::bmc_handler ( node_ptr );
@@ -310,10 +315,10 @@ int nodeLinkClass::fsm ( struct nodeLinkClass::node * node_ptr )
     }
 
     /****************************************************************************
-     * No Op: Do nothing for this Healthy Enabled Locked CPE Simplex Host
+     * No Op: Do nothing for this Healthy Enabled Locked AIO Simplex Host
      ****************************************************************************
      */
-    else if (( this->system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX ) &&
+    else if (( this->system_type == SYSTEM_TYPE__AIO__SIMPLEX ) &&
              ( node_ptr->adminAction == MTC_ADMIN_ACTION__NONE ) &&
              ( node_ptr->adminState  == MTC_ADMIN_STATE__LOCKED ))
     {
diff --git a/mtce/src/maintenance/mtcNodeHdlrs.cpp b/mtce/src/maintenance/mtcNodeHdlrs.cpp
index de5ae2a4..49ac2684 100755
--- a/mtce/src/maintenance/mtcNodeHdlrs.cpp
+++ b/mtce/src/maintenance/mtcNodeHdlrs.cpp
@@ -481,7 +481,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
         if ( node_ptr->adminAction == MTC_ADMIN_ACTION__UNLOCK )
         {
             bool aio = false ;
-            if ( SIMPLEX_CPE_SYSTEM )
+            if ( SIMPLEX_AIO_SYSTEM )
                 aio = true ;
             else
                 aio = false ;
@@ -525,7 +525,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                 }
             }
             mtcInvApi_update_states_now ( node_ptr, "unlocked", "disabled" , "offline", "disabled", "offline" );
-            mtcInvApi_update_task_now   ( node_ptr, aio ? MTC_TASK_CPE_SX_UNLOCK_MSG : MTC_TASK_SELF_UNLOCK_MSG );
+            mtcInvApi_update_task_now   ( node_ptr, aio ? MTC_TASK_AIO_SX_UNLOCK_MSG : MTC_TASK_SELF_UNLOCK_MSG );
 
             wlog ("%s unlocking %s with reboot\n",
                       my_hostname.c_str(),
@@ -546,7 +546,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
              * Condition 1: While there is no in-service backup controller
              *              to swact to. In this case the ctive controller
              *              - is only degraded to avoid a system outage.
-             *              - the CPE subfunction is failed
+             *              - the AIO subfunction is failed
              *              - worker SubFunction Alarm is raised
              *              - Enable alarm is raised
              *              - A process monitor alarm may also be raised if
@@ -648,7 +648,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                 }
                 else
                 {
-                    if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                    if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                     {
                         /* Raise Critical Compute Function Alarm */
                         alarm_compute_failure ( node_ptr , FM_ALARM_SEVERITY_CRITICAL );
@@ -661,7 +661,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             node_ptr->graceful_recovery_counter = 0 ;
             node_ptr->health_threshold_counter  = 0 ;
 
-            if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+            if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
             {
                 node_ptr->inservice_failed_subf = true ;
                 subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
@@ -1358,7 +1358,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
                  * have a worker function and the heartbeat for those hosts
                  * are started at the end of the subfunction handler. */
                 if (( THIS_HOST ) ||
-                   (( CPE_SYSTEM ) && ( is_controller(node_ptr)) ))
+                   (( AIO_SYSTEM ) && ( is_controller(node_ptr)) ))
                 {
                     enableStageChange ( node_ptr, MTC_ENABLE__STATE_CHANGE );
                 }
@@ -1523,8 +1523,8 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
             if ( is_controller(node_ptr) )
             {
                 /* Defer telling SM the controller state if
-                 * this is a CPE and this is the only controller */
-                if ( CPE_SYSTEM && ( num_controllers_enabled() > 0 ))
+                 * this is a AIO and this is the only controller */
+                if ( AIO_SYSTEM && ( num_controllers_enabled() > 0 ))
                 {
                     wlog ("%s deferring SM enable notification till subfunction-enable complete\n",
                               node_ptr->hostname.c_str());
@@ -1555,7 +1555,7 @@ int nodeLinkClass::enable_handler ( struct nodeLinkClass::node * node_ptr )
 
             enableStageChange ( node_ptr, MTC_ENABLE__START );
 
-            if (( CPE_SYSTEM ) && ( is_controller(node_ptr)))
+            if (( AIO_SYSTEM ) && ( is_controller(node_ptr)))
             {
                 ilog ("%s running worker sub-function enable handler\n", node_ptr->hostname.c_str());
                 mtcInvApi_update_task ( node_ptr, MTC_TASK_ENABLING_SUBF );
@@ -1637,9 +1637,10 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
             node_ptr->http_retries_cur = 0 ;
             node_ptr->unknown_health_reported = false ;
 
-            plog ("%s %sGraceful Recovery (uptime was %d)\n",
+            plog ("%s %sGraceful Recovery (%d) (uptime was %d)\n",
                       node_ptr->hostname.c_str(),
                       node_ptr->mnfa_graceful_recovery ? "MNFA " : "",
+                      node_ptr->graceful_recovery_counter,
                       node_ptr->uptime );
 
             /* Cancel any outstanding timers */
@@ -1660,7 +1661,8 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
              *   2. Setting the node operational state to Disabled
              *   3. Setting the Enable action
              */
-            if ( ++node_ptr->graceful_recovery_counter > MTC_MAX_FAST_ENABLES )
+            node_ptr->graceful_recovery_counter++ ;
+            if ( node_ptr->graceful_recovery_counter > MTC_MAX_FAST_ENABLES )
             {
                 /* gate off further mtcAlive messaging timme the offline
                 * handler runs. This prevents stale messages from making it
@@ -1772,10 +1774,11 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
 
                 else if ( node_ptr->mnfa_graceful_recovery == true )
                 {
-                    if ( node_ptr->uptime > MTC_MINS_10 )
+                    if ( node_ptr->uptime > MTC_MINS_15 )
                     {
                         /* did not reboot case */
-                        wlog ("%s Connectivity Recovered ; host did not reset\n", node_ptr->hostname.c_str());
+                        wlog ("%s Connectivity Recovered ; host did not reset (uptime:%d)\n",
+                                  node_ptr->hostname.c_str(), node_ptr->uptime);
                         wlog ("%s ... continuing with MNFA graceful recovery\n", node_ptr->hostname.c_str());
                         wlog ("%s ... with no affect to host services\n", node_ptr->hostname.c_str());
 
@@ -1788,7 +1791,8 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
                     else
                     {
                         /* did reboot case */
-                        wlog ("%s Connectivity Recovered ; host has reset\n", node_ptr->hostname.c_str());
+                        wlog ("%s Connectivity Recovered ; host has reset (uptime:%d)\n",
+                                  node_ptr->hostname.c_str(),  node_ptr->uptime);
                         ilog ("%s ... continuing with MNFA graceful recovery\n", node_ptr->hostname.c_str());
                         ilog ("%s ... without additional reboot %s\n",
                                   node_ptr->hostname.c_str(), node_ptr->bm_ip.empty() ? "or reset" : "" );
@@ -1806,12 +1810,13 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
                         break ;
                     }
                 }
-                else if (( node_ptr->uptime_save ) && ( node_ptr->uptime >= node_ptr->uptime_save ))
+                else if ( node_ptr->uptime > MTC_MINS_15 )
                 {
                     /* did not reboot case */
-                    wlog ("%s Connectivity Recovered ; host did not reset%s\n",
+                    wlog ("%s Connectivity Recovered ; host did not reset%s (uptime:%d)",
                               node_ptr->hostname.c_str(),
-                              node_ptr->was_dor_recovery_mode ? " (DOR)" : "" );
+                              node_ptr->was_dor_recovery_mode ? " (DOR)" : "",
+                              node_ptr->uptime);
 
                     wlog ("%s ... continuing with graceful recovery\n", node_ptr->hostname.c_str());
                     wlog ("%s ... with no affect to host services\n", node_ptr->hostname.c_str());
@@ -1875,7 +1880,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
                                            MTC_OPER_STATE__DISABLED,
                                            MTC_AVAIL_STATUS__FAILED );
 
-                if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                 {
                     subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
                                                MTC_AVAIL_STATUS__FAILED );
@@ -1905,7 +1910,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
         {
             int timeout = 0 ;
 
-            /* Set the FSM task state to booting */
+            /* Set the FSM task state to 'Graceful Recovery Wait' */
             node_ptr->uptime = 0 ;
             mtcInvApi_update_task ( node_ptr, MTC_TASK_RECOVERY_WAIT );
 
@@ -2266,7 +2271,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
             {
                 /* The active controller would never get/be here but
                  * if it did then just fall through to change state. */
-                if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                 {
                     /* Here we need to run the sub-fnction goenable and start
                      * host services if this is the other controller in a AIO
@@ -2442,10 +2447,10 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
             }
             else /* success path */
             {
-                /* allow the fsm to wait for up to 1 minute for the
-                 * hbsClient's ready event before starting heartbeat
+                /* allow the fsm to wait for up to 'worker config timeout'
+                 * for the hbsClient's ready event before starting heartbeat
                  * test. */
-                mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_MINS_1 );
+                mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_WORKER_CONFIG_TIMEOUT );
                 recoveryStageChange ( node_ptr, MTC_RECOVERY__HEARTBEAT_START );
             }
             break ;
@@ -2502,6 +2507,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
         {
             if ( node_ptr->mtcTimer.ring == true )
             {
+                ilog ("%s heartbeating", node_ptr->hostname.c_str());
                 /* if heartbeat is not working then we will
                  * never get here and enable the host */
                 recoveryStageChange ( node_ptr, MTC_RECOVERY__STATE_CHANGE );
@@ -2510,7 +2516,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
         }
         case MTC_RECOVERY__STATE_CHANGE:
         {
-            if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+            if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
             {
                 /* Set node as unlocked-enabled */
                 subfStateChange ( node_ptr, MTC_OPER_STATE__ENABLED,
@@ -2555,7 +2561,7 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
             else if ( rc == PASS )
             {
                 /* Start Graceful Recovery */
-                recoveryStageChange ( node_ptr, MTC_RECOVERY__ENABLE_START ) ;
+                recoveryStageChange ( node_ptr, MTC_RECOVERY__ENABLE ) ;
                 break ;
             }
             else if ( rc == FAIL_WORKQ_TIMEOUT )
@@ -2571,51 +2577,37 @@ int nodeLinkClass::recovery_handler ( struct nodeLinkClass::node * node_ptr )
             nodeLinkClass::force_full_enable ( node_ptr );
             break ;
         }
-        case MTC_RECOVERY__ENABLE_START:
+        case MTC_RECOVERY__ENABLE:
         {
-            /* Create the recovery enable timer. This timer is short.
-             * A node need to stay enabled with the hartbeat service
-             * running for a period of time before declaring it enabled */
-            mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler, MTC_HEARTBEAT_SOAK_BEFORE_ENABLE );
-
-            recoveryStageChange ( node_ptr, MTC_RECOVERY__ENABLE_WAIT ) ;
-            break;
-        }
-        case MTC_RECOVERY__ENABLE_WAIT:
-        {
-            /* When this timer fires the host has been up for enough time */
-            if ( node_ptr->mtcTimer.ring == true )
+            if ( is_controller(node_ptr) )
             {
-                if ( is_controller(node_ptr) )
+                if ( mtcSmgrApi_request ( node_ptr,
+                                          CONTROLLER_ENABLED,
+                                          SMGR_MAX_RETRIES ) != PASS )
                 {
-                    if ( mtcSmgrApi_request ( node_ptr,
-                                              CONTROLLER_ENABLED,
-                                              SMGR_MAX_RETRIES ) != PASS )
-                    {
-                        wlog ("%s Failed to send 'unlocked-disabled' to HA Service Manager ; allowing enable\n",
-                              node_ptr->hostname.c_str());
-                    }
+                    wlog ("%s Failed to send 'unlocked-enabled' to HA Service Manager ; allowing enable\n",
+                          node_ptr->hostname.c_str());
                 }
-                /* Node Has Recovered */
-                node_ptr->graceful_recovery_counter = 0 ;
-                recoveryStageChange ( node_ptr, MTC_RECOVERY__START );
-                adminActionChange   ( node_ptr, MTC_ADMIN_ACTION__NONE );
-                node_ptr->health_threshold_counter = 0 ;
-                node_ptr->enabled_count++ ;
-                node_ptr->http_retries_cur = 0 ;
-
-                doneQueue_purge ( node_ptr );
-                if ( node_ptr->was_dor_recovery_mode )
-                {
-                    report_dor_recovery (  node_ptr , "is ENABLED" );
-                }
-                else
-                {
-                    plog ("%s is ENABLED (Gracefully Recovered)\n",
-                              node_ptr->hostname.c_str());
-                }
-                alarm_enabled_clear ( node_ptr, false );
             }
+            /* Node Has Recovered */
+            node_ptr->graceful_recovery_counter = 0 ;
+            recoveryStageChange ( node_ptr, MTC_RECOVERY__START );
+            adminActionChange   ( node_ptr, MTC_ADMIN_ACTION__NONE );
+            node_ptr->health_threshold_counter = 0 ;
+            node_ptr->enabled_count++ ;
+            node_ptr->http_retries_cur = 0 ;
+
+            doneQueue_purge ( node_ptr );
+            if ( node_ptr->was_dor_recovery_mode )
+            {
+                report_dor_recovery (  node_ptr , "is ENABLED" );
+            }
+            else
+            {
+                plog ("%s is ENABLED (Gracefully Recovered)\n",
+                          node_ptr->hostname.c_str());
+            }
+            alarm_enabled_clear ( node_ptr, false );
             break ;
         }
         default:
@@ -2783,7 +2775,7 @@ int nodeLinkClass::disable_handler  ( struct nodeLinkClass::node * node_ptr )
                                            MTC_OPER_STATE__DISABLED,
                                            locked_status );
 
-                if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                 {
                     subfStateChange ( node_ptr, MTC_OPER_STATE__DISABLED,
                                                 locked_status );
@@ -3432,7 +3424,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
 
                         /* otherwise change state */
                         mtcInvApi_update_state(node_ptr, MTC_JSON_INV_AVAIL,"offline" );
-                        if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                        if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                         {
                             mtcInvApi_update_state(node_ptr, MTC_JSON_INV_AVAIL_SUBF,"offline" );
                         }
@@ -3473,7 +3465,7 @@ int nodeLinkClass::online_handler ( struct nodeLinkClass::node * node_ptr )
                                   node_ptr->hostname.c_str());
 
                         mtcInvApi_update_state ( node_ptr, MTC_JSON_INV_AVAIL, "online" );
-                        if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+                        if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
                         {
                             mtcInvApi_update_state ( node_ptr, MTC_JSON_INV_AVAIL_SUBF, "online" );
                         }
@@ -6093,7 +6085,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
 
             mtcInfo_log(node_ptr);
 
-            if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+            if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
             {
                 if ( daemon_is_file_present ( CONFIG_COMPLETE_WORKER ) == false )
                 {
@@ -6120,52 +6112,38 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
                 mtcInvApi_update_state ( node_ptr, "availability", "available" );
             }
 
-            /* handle other cases */
-            EFmAlarmSeverityT sev = mtcAlarm_state ( node_ptr->hostname,
-                                                     MTC_ALARM_ID__ENABLE);
+            /* Query FM for existing Enable and Config alarm status */
+            EFmAlarmSeverityT enable_alarm_severity =
+                mtcAlarm_state ( node_ptr->hostname, MTC_ALARM_ID__ENABLE);
+            EFmAlarmSeverityT config_alarm_severity =
+                mtcAlarm_state ( node_ptr->hostname, MTC_ALARM_ID__CONFIG);
 
-            if ( node_ptr->adminState == MTC_ADMIN_STATE__LOCKED )
+            /* Clear generic enable alarm over process restart.
+             * Will get reasserted if the cause condition still exists */
+            if ( enable_alarm_severity != FM_ALARM_SEVERITY_CLEAR )
             {
-                node_ptr->alarms[MTC_ALARM_ID__LOCK] = FM_ALARM_SEVERITY_WARNING ;
-
-                /* If the node is locked then the Enable alarm
-                 * should not be present */
-                if ( sev != FM_ALARM_SEVERITY_CLEAR )
-                {
-                    mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__ENABLE );
-                    sev = FM_ALARM_SEVERITY_CLEAR ;
-                }
+                ilog ("%s found enable alarm ; clearing %s",
+                          node_ptr->hostname.c_str(),
+                          alarmUtil_getSev_str(enable_alarm_severity).c_str());
+                mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__ENABLE );
             }
 
-            /* Manage enable alarm over process restart.
-             *
-             * - clear the alarm in the active controller case
-             * - maintain the alarm, set degrade state in MAJOR and CRIT cases
-             * - clear alarm for all other severities.
-             */
-            if ( THIS_HOST )
+            /* The config alarm is maintained if it exists.
+             * The in-service test handler will clear the alarm
+             * if the config failure is gone */
+            if ( config_alarm_severity != FM_ALARM_SEVERITY_CLEAR )
             {
-                if ( sev != FM_ALARM_SEVERITY_CLEAR )
-                {
-                    mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__ENABLE );
-                }
-            }
-            else
-            {
-                if (( sev == FM_ALARM_SEVERITY_CRITICAL ) ||
-                    ( sev == FM_ALARM_SEVERITY_MAJOR ))
-                {
-                    node_ptr->alarms[MTC_ALARM_ID__ENABLE] = sev ;
-                    node_ptr->degrade_mask |= DEGRADE_MASK_ENABLE ;
-                }
-                else if ( sev != FM_ALARM_SEVERITY_CLEAR )
-                {
-                    mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__ENABLE );
-                }
+                node_ptr->degrade_mask |= DEGRADE_MASK_CONFIG ;
+                node_ptr->alarms[MTC_ALARM_ID__CONFIG] = config_alarm_severity ;
+                ilog ("%s found config alarm ; loaded %s",
+                          node_ptr->hostname.c_str(),
+                          alarmUtil_getSev_str(config_alarm_severity).c_str());
             }
 
             if ( is_controller(node_ptr) )
             {
+                this->controllers++ ;
+
                 mtc_cmd_enum state = CONTROLLER_DISABLED ;
 
                 if (( node_ptr->adminState   == MTC_ADMIN_STATE__UNLOCKED ) &&
@@ -6199,7 +6177,6 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
                     {
                         ilog ("%s %s\n",node_ptr->hostname.c_str(), MTC_TASK_SWACT_COMPLETE );
 
-                        /* Work Around for issue: */
                         mtcInvApi_update_uptime ( node_ptr, node_ptr->uptime );
 
                         mtcInvApi_update_task ( node_ptr, MTC_TASK_SWACT_COMPLETE );
@@ -6233,7 +6210,6 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
                     mtcSmgrApi_request ( node_ptr, state , SWACT_FAIL_THRESHOLD );
                 }
             }
-
             if ( daemon_get_cfg_ptr()->debug_level & 1 )
                 nodeLinkClass::host_print (node_ptr);
 
@@ -6290,6 +6266,40 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
                               node_ptr->hostname.c_str(), node_ptr->uptime );
                     break ;
                 }
+                /* Handle catching and recovering/restoring hosts that might
+                 * have been in the Graceful Recovery Wait state.
+                 *
+                 * Prevents an extra reboot for hosts that might be in
+                 * Graceful Recovery over a maintenance process restart. */
+                else if (( NOT_THIS_HOST ) &&
+                         ( !node_ptr->task.compare(MTC_TASK_RECOVERY_WAIT)))
+                {
+                    ilog ("%s is in %s ; restoring state",
+                              node_ptr->hostname.c_str(),
+                              MTC_TASK_RECOVERY_WAIT);
+
+                    /* Complete necessary add operations before switching
+                     * to Recovery */
+                    LOAD_NODETYPE_TIMERS ;
+                    workQueue_purge ( node_ptr );
+                    if (( hostUtil_is_valid_bm_type  ( node_ptr->bm_type )) &&
+                        ( hostUtil_is_valid_ip_addr  ( node_ptr->bm_ip )) &&
+                        ( hostUtil_is_valid_username ( node_ptr->bm_un )))
+                    {
+                        set_bm_prov ( node_ptr, true ) ;
+                    }
+                    mtcTimer_reset ( node_ptr->mtcTimer );
+                    adminActionChange ( node_ptr, MTC_ADMIN_ACTION__NONE );
+                    node_ptr->addStage = MTC_ADD__START;
+
+                    /* Switch into recovery_handler's Graceful Recovery Wait
+                     * state with the Graceful Recovery Wait timeout */
+                    adminActionChange ( node_ptr, MTC_ADMIN_ACTION__RECOVER );
+                    mtcTimer_start ( node_ptr->mtcTimer, mtcTimer_handler,
+                                     node_ptr->mtcalive_timeout );
+                    recoveryStageChange ( node_ptr, MTC_RECOVERY__MTCALIVE_WAIT );
+                    break ;
+                }
                 else
                 {
                     if ( is_controller(node_ptr) )
@@ -6354,7 +6364,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
 
             send_hbs_command   ( node_ptr->hostname, MTC_CMD_ADD_HOST );
 
-            if ( ( CPE_SYSTEM ) || ( is_worker (node_ptr) == true ))
+            if ( ( AIO_SYSTEM ) || ( is_worker (node_ptr) == true ))
             {
                 send_guest_command ( node_ptr->hostname, MTC_CMD_ADD_HOST );
             }
@@ -6368,6 +6378,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
         }
         case MTC_ADD__WORKQUEUE_WAIT:
         {
+
             rc = workQueue_done ( node_ptr );
             if ( rc == RETRY )
             {
@@ -6393,11 +6404,11 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
                 ( node_ptr->operState  == MTC_OPER_STATE__ENABLED ))
             {
                 /* start the heartbeat service in all cases except for
-                 * THIS host and CPE controller hosts */
+                 * THIS host and AIO controller hosts */
                 if ( NOT_THIS_HOST )
                 {
                     if (( LARGE_SYSTEM ) ||
-                        (( CPE_SYSTEM ) && ( this->dor_mode_active == false )))
+                        (( AIO_SYSTEM ) && ( this->dor_mode_active == false )))
                     {
                         send_hbs_command ( node_ptr->hostname, MTC_CMD_START_HOST );
                     }
@@ -6430,7 +6441,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
                 node_ptr->configAction = MTC_CONFIG_ACTION__INSTALL_PASSWD ;
             }
 
-            if (( ! SIMPLEX_CPE_SYSTEM ) &&
+            if (( ! SIMPLEX_AIO_SYSTEM ) &&
                 ( node_ptr->bmc_provisioned == true ))
             {
                 mtcAlarm_clear ( node_ptr->hostname, MTC_ALARM_ID__BM );
@@ -6438,7 +6449,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
             }
 
             /* Special Add handling for the AIO system */
-            if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+            if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
             {
                 if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
                     ( node_ptr->operState  == MTC_OPER_STATE__ENABLED ))
@@ -6455,6 +6466,7 @@ int nodeLinkClass::add_handler ( struct nodeLinkClass::node * node_ptr )
             }
 
             node_ptr->addStage = MTC_ADD__START;
+
             plog ("%s Host Add Completed (uptime:%d)\n", node_ptr->hostname.c_str(), node_ptr->uptime );
             node_ptr->add_completed = true ;
             break ;
@@ -6635,6 +6647,8 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
                         mtcInfo_set ( node_ptr, MTCE_INFO_KEY__BMC_PROTOCOL, BMC_PROTOCOL__IPMI_STR );
                         node_ptr->bmc_protocol = BMC_PROTOCOL__IPMITOOL ;
                     }
+                    /* store mtcInfo, which specifies the selected BMC protocol,
+                     * into the sysinv database */
                     mtcInvApi_update_mtcInfo ( node_ptr );
 
                     ilog ("%s bmc control using %s:%s",
@@ -6751,8 +6765,15 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
                         node_ptr->bmc_thread_ctrl.done = true  ;
                         node_ptr->bmc_thread_info.command = 0  ;
                     }
+                    /* store mtcInfo, which specifies the selected BMC protocol,
+                     * into the sysinv database */
                     mtcInvApi_update_mtcInfo ( node_ptr );
 
+                    /* push the BMC access info out to the mtcClient when
+                     * a controller's BMC connection is established/verified */
+                    if ( node_ptr->nodetype & CONTROLLER_TYPE )
+                        this->want_mtcInfo_push = true ;
+
                     send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST );
                     send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST );
                 }
@@ -6942,6 +6963,11 @@ int nodeLinkClass::bmc_handler ( struct nodeLinkClass::node * node_ptr )
                                 }
                             } /* end power off detection handling     */
 
+                            /* push the BMC access info out to the mtcClient when
+                             * a controller's BMC connection is established/verified */
+                            if ( node_ptr->nodetype & CONTROLLER_TYPE )
+                                this->want_mtcInfo_push = true ;
+
                             send_hwmon_command ( node_ptr->hostname, MTC_CMD_ADD_HOST );
                             send_hwmon_command ( node_ptr->hostname, MTC_CMD_START_HOST );
 
@@ -7199,6 +7225,9 @@ int nodeLinkClass::oos_test_handler ( struct nodeLinkClass::node * node_ptr )
                 }
             }
 
+            /* audit alarms */
+            mtcAlarm_audit (node_ptr );
+
             break ;
         }
         case MTC_OOS_TEST__WAIT:
@@ -7494,7 +7523,7 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
              *  In the restart case the subfunction fsm enable handler is not run so
              *  we try to detect the missing goenabled_subf flag as an inservice test.
              *
-             *  Only in CPE type
+             *  Only in AIO type
              *   - clear the alarm if the issue goes away -
              *     i.e. the goenabled tests eventually pass. Today
              *     hey are not re-run in the background but someday they may be
@@ -7502,7 +7531,7 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
              *     and we have only a single enabled controller (which must be this one)
              *     and the alarm is not already raised.
              **/
-            if (( CPE_SYSTEM ) && ( is_controller(node_ptr) == true ))
+            if (( AIO_SYSTEM ) && ( is_controller(node_ptr) == true ))
             {
                 if (( node_ptr->adminState == MTC_ADMIN_STATE__UNLOCKED ) &&
                     ( node_ptr->operState == MTC_OPER_STATE__ENABLED ) &&
@@ -7597,7 +7626,7 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
                 }
             }
 
-            /* Monitor the health of the host - no pass file */
+            /* Monitor the health of the host */
             if ((  node_ptr->adminState  == MTC_ADMIN_STATE__UNLOCKED ) &&
                 (  node_ptr->operState   == MTC_OPER_STATE__ENABLED   ) &&
                 (( node_ptr->availStatus == MTC_AVAIL_STATUS__AVAILABLE ) ||
@@ -7623,6 +7652,11 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
                     ilog ("%s sm degrade clear\n", node_ptr->hostname.c_str());
                 }
 
+                /*
+                 * In-service Config Failure/Alarm handling
+                 */
+
+                /* Detect new config failure condition */
                 if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY)
                 {
                     /* not healthy .... */
@@ -7634,16 +7668,7 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
                         {
                             wlog_throttled ( node_ptr->health_threshold_counter, (MTC_UNHEALTHY_THRESHOLD*10), "%s is UNHEALTHY\n", node_ptr->hostname.c_str());
                             if ( node_ptr->health_threshold_counter >= MTC_UNHEALTHY_THRESHOLD )
-                            {
-                                node_ptr->degrade_mask |= DEGRADE_MASK_CONFIG ;
-
-                                /* threshold is reached so raise the config alarm if it is not already raised */
-                                if ( node_ptr->alarms[MTC_ALARM_ID__CONFIG] != FM_ALARM_SEVERITY_CRITICAL )
-                                {
-                                    mtcAlarm_critical ( node_ptr->hostname, MTC_ALARM_ID__CONFIG );
-                                    node_ptr->alarms[MTC_ALARM_ID__CONFIG] = FM_ALARM_SEVERITY_CRITICAL ;
-                                }
-                            }
+                                alarm_config_failure ( node_ptr );
                         }
                     }
                     else
@@ -7663,6 +7688,12 @@ int nodeLinkClass::insv_test_handler ( struct nodeLinkClass::node * node_ptr )
                         }
                     }
                 }
+                /* or correct an alarmed config failure that has cleared */
+                else if ( node_ptr->degrade_mask & DEGRADE_MASK_CONFIG )
+                {
+                    if ( node_ptr->mtce_flags & MTC_FLAG__I_AM_HEALTHY )
+                        alarm_config_clear ( node_ptr );
+                }
                 else
                 {
                     node_ptr->health_threshold_counter = 0 ;
diff --git a/mtce/src/maintenance/mtcNodeMnfa.cpp b/mtce/src/maintenance/mtcNodeMnfa.cpp
index af2493b1..8ebbc15c 100644
--- a/mtce/src/maintenance/mtcNodeMnfa.cpp
+++ b/mtce/src/maintenance/mtcNodeMnfa.cpp
@@ -159,19 +159,20 @@ void nodeLinkClass::mnfa_recover_host ( struct nodeLinkClass::node * node_ptr )
 
     if ( node_ptr->mnfa_graceful_recovery == true )
     {
-        /* Restart the heartbeat for this recovered host */
-        // send_hbs_command ( node_ptr->hostname, MTC_RESTART_HBS );
-
         if ( node_ptr->adminAction != MTC_ADMIN_ACTION__RECOVER )
         {
-            ilog ("%s graceful recovery from MNFA\n", node_ptr->hostname.c_str());
-            recoveryStageChange ( node_ptr, MTC_RECOVERY__START );
-            adminActionChange   ( node_ptr, MTC_ADMIN_ACTION__RECOVER );
+            ilog ("%s graceful recovery (graceful recover count:%d)",
+                      node_ptr->hostname.c_str(),
+                      node_ptr->graceful_recovery_counter);
         }
         else
         {
-            wlog ("%s already gracefully recovering\n", node_ptr->hostname.c_str() );
+            wlog ("%s graceful recovery restart (graceful recover count:%d)",
+                      node_ptr->hostname.c_str(),
+                      node_ptr->graceful_recovery_counter );
         }
+        recoveryStageChange ( node_ptr, MTC_RECOVERY__START );
+        adminActionChange   ( node_ptr, MTC_ADMIN_ACTION__RECOVER );
     }
 }
 
@@ -298,43 +299,38 @@ void nodeLinkClass::mnfa_exit ( bool force )
          * Clear heartbeat degrades */
         for ( struct node * ptr = head ;  ; ptr = ptr->next )
         {
-            if ((( ptr->hbs_minor[CLSTR_IFACE] == true ) ||
-                 ( ptr->hbs_minor[MGMNT_IFACE] == true )) &&
-                 ( ptr->operState == MTC_OPER_STATE__ENABLED ))
+            std::list<string>::iterator mnfa_awol_ptr  ;
+            for ( mnfa_awol_ptr = mnfa_awol_list.begin() ;
+                  mnfa_awol_ptr != mnfa_awol_list.end() ;
+                  mnfa_awol_ptr++ )
             {
-                ptr->hbs_minor[MGMNT_IFACE] = false ;
-                ptr->hbs_minor[CLSTR_IFACE] = false ;
+                /* skip host if not in the mnfa pool */
+                if ( ptr->hostname.compare(*(mnfa_awol_ptr)) )
+                   continue ;
 
-                if ( force == true )
+                if ((( ptr->hbs_minor[CLSTR_IFACE] == true ) ||
+                     ( ptr->hbs_minor[MGMNT_IFACE] == true )) &&
+                     ( ptr->operState == MTC_OPER_STATE__ENABLED ))
                 {
-                    elog ("... %s failed ; auto-recovering\n",
-                               ptr->hostname.c_str());
+                    ptr->hbs_minor[MGMNT_IFACE] = false ;
+                    ptr->hbs_minor[CLSTR_IFACE] = false ;
 
-                    /* Set node as failed */
-                    availStatusChange ( ptr, MTC_AVAIL_STATUS__FAILED );
-                    enableStageChange ( ptr, MTC_ENABLE__START );
-                    adminActionChange ( ptr, MTC_ADMIN_ACTION__NONE );
-                }
-                else
-                {
-                    if ( ptr->availStatus == MTC_AVAIL_STATUS__DEGRADED )
+                    if ( force == true )
                     {
-                        if ( ptr->degrade_mask == 0 )
-                        {
-                            availStatusChange ( ptr, MTC_AVAIL_STATUS__AVAILABLE );
-                        }
-                    }
+                        elog ("... %s failed ; auto-recovering\n",
+                                   ptr->hostname.c_str());
 
-                    if ( ptr->adminAction != MTC_ADMIN_ACTION__RECOVER )
-                    {
-                        recoveryStageChange ( ptr, MTC_RECOVERY__START );
-                        adminActionChange   ( ptr, MTC_ADMIN_ACTION__RECOVER );
+                        /* Set node as failed */
+                        availStatusChange ( ptr, MTC_AVAIL_STATUS__FAILED );
+                        enableStageChange ( ptr, MTC_ENABLE__START );
+                        adminActionChange ( ptr, MTC_ADMIN_ACTION__NONE );
                     }
                     else
                     {
-                        wlog ("%s already gracefully recovering\n", ptr->hostname.c_str() );
+                        mnfa_recover_host ( ptr );
                     }
                 }
+                break ;
             }
             if (( ptr->next == NULL ) || ( ptr == tail ))
                 break ;
diff --git a/mtce/src/maintenance/mtcNodeMsg.h b/mtce/src/maintenance/mtcNodeMsg.h
index 6816354c..11319c0f 100755
--- a/mtce/src/maintenance/mtcNodeMsg.h
+++ b/mtce/src/maintenance/mtcNodeMsg.h
@@ -125,11 +125,13 @@ int send_mtcAlive_msg ( mtc_socket_type * sock_ptr, string identity, int interfa
 
 int recv_mtc_reply_noblock ( void );
 
-int send_mtc_cmd ( string & hostname, int cmd, int interface );
+int send_mtc_cmd ( string & hostname, int cmd, int interface , string json_dict="" );
 int mtc_service_command ( mtc_socket_type * sock_ptr , int interface );
 int mtc_set_availStatus ( string & hostname, mtc_nodeAvailStatus_enum status );
-int mtce_send_event    ( mtc_socket_type * sock_ptr, int cmd , const char * mtce_name_ptr );
+int mtce_send_event    ( mtc_socket_type * sock_ptr, unsigned int cmd , const char * mtce_name_ptr );
 int mtc_clstr_init     ( mtc_socket_type * sock_ptr , char * iface );
 string get_who_i_am ( void );
 
+int send_mtcClient_cmd ( mtc_socket_type * sock_ptr, int cmd, string hostname, string address, int port);
+
 #endif
diff --git a/mtce/src/maintenance/mtcSmgrApi.cpp b/mtce/src/maintenance/mtcSmgrApi.cpp
index e511228c..9fd4ddc7 100644
--- a/mtce/src/maintenance/mtcSmgrApi.cpp
+++ b/mtce/src/maintenance/mtcSmgrApi.cpp
@@ -96,7 +96,7 @@ int nodeLinkClass::mtcSmgrApi_request ( struct nodeLinkClass::node * node_ptr, m
     int rc = PASS ;
     string operation_string = "unknown" ;
 
-    if ( system_type == SYSTEM_TYPE__CPE_MODE__SIMPLEX )
+    if ( system_type == SYSTEM_TYPE__AIO__SIMPLEX )
     {
         dlog ("%s simpex mode ; SM '%d' request not sent\n", node_ptr->hostname.c_str(), operation );
         return ( PASS );
diff --git a/mtce/src/maintenance/mtcSubfHdlrs.cpp b/mtce/src/maintenance/mtcSubfHdlrs.cpp
index e22aaa2c..5c994f4a 100644
--- a/mtce/src/maintenance/mtcSubfHdlrs.cpp
+++ b/mtce/src/maintenance/mtcSubfHdlrs.cpp
@@ -110,14 +110,16 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
             if ( node_ptr->mtce_flags & MTC_FLAG__SUBF_CONFIGURED )
             {
                 mtcTimer_reset (node_ptr->mtcTimer);
-                plog ("%s Subf Configured OK\n", name.c_str());
+                plog ("%s Subf Configured OK (oob:%x)\n",
+                          name.c_str(), node_ptr->mtce_flags);
                 enableStageChange ( node_ptr, MTC_ENABLE__GOENABLED_TIMER );
                 alarm_config_clear ( node_ptr );
                 break ;
             }
 
-            if ((( !node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED )) ||
-                ((  node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY )))
+            if (( node_ptr->mtce_flags ) &&
+                (( !node_ptr->mtce_flags & MTC_FLAG__I_AM_CONFIGURED ) ||
+                 (  node_ptr->mtce_flags & MTC_FLAG__I_AM_NOT_HEALTHY )))
             {
                 mtcTimer_reset (node_ptr->mtcTimer);
 
@@ -140,9 +142,10 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
             /* timeout handling */
             else if ( node_ptr->mtcTimer.ring == true )
             {
-                elog ("%s configuration timeout (%d secs)\n",
+                elog ("%s configuration timeout (%d secs) (oob:%x)\n",
                           name.c_str(),
-                          MTC_WORKER_CONFIG_TIMEOUT );
+                          MTC_WORKER_CONFIG_TIMEOUT,
+                          node_ptr->mtce_flags);
 
                 alarm_config_failure ( node_ptr );
 
@@ -169,7 +172,7 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
              *
              * issue: subfunction go-enable patching script fails and
              * maintenance reboots the active controller when no-reboot
-             * patching maintenance in CPE.
+             * patching maintenance in AIO.
              *
              * The fix is to avoid running the subfunction go-enabled tests
              * on self while patching.
@@ -490,7 +493,7 @@ int nodeLinkClass::enable_subf_handler ( struct nodeLinkClass::node * node_ptr )
 
                 fail = true ;
             }
-            else if ( this->system_type != SYSTEM_TYPE__CPE_MODE__SIMPLEX )
+            else if ( this->system_type != SYSTEM_TYPE__AIO__SIMPLEX )
             {
                 /* Loop over the heartbeat interfaces and fail the Enable if any of them are failing */
                 for ( int i = 0 ; i < MAX_IFACES ; i++ )
diff --git a/mtce/src/pmon/pmon.h b/mtce/src/pmon/pmon.h
index 11ed9714..158553da 100755
--- a/mtce/src/pmon/pmon.h
+++ b/mtce/src/pmon/pmon.h
@@ -231,6 +231,7 @@ typedef struct
     recovery_method_type recovery_method ; /**< How processes are recovered */
     bool reload_config ;
     bool patching_in_progress ;
+    bool last_alarm_query_pass;
 
 } pmon_ctrl_type ;
 void pmon_set_ctrl_ptr ( pmon_ctrl_type * ctrl_ptr );
diff --git a/mtce/src/pmon/pmonAlarm.cpp b/mtce/src/pmon/pmonAlarm.cpp
index 2a491642..86e0a319 100644
--- a/mtce/src/pmon/pmonAlarm.cpp
+++ b/mtce/src/pmon/pmonAlarm.cpp
@@ -38,14 +38,14 @@ void pmonAlarm_init ( void )
     alarmUtil_type * ptr ;
 
     /** Process Failure Alarm ****************************************************/
-    
+
     ptr = &alarm_list[PMON_ALARM_ID__PMOND];
     memset  (&ptr->alarm, 0, (sizeof(SFmAlarmDataT)));
     snprintf(&ptr->alarm.alarm_id[0], FM_MAX_BUFFER_LENGTH, "%s", PMOND_ALARM_ID);
 
     ptr->name = "process failure" ;
     ptr->instc_prefix = "process=" ;
-     
+
     ptr->critl_reason = "";
     ptr->minor_reason = "";
     ptr->major_reason = "";
@@ -56,12 +56,12 @@ void pmonAlarm_init ( void )
     ptr->alarm.inhibit_alarms    = FM_FALSE;
     ptr->alarm.service_affecting = FM_TRUE ;
     ptr->alarm.suppression       = FM_TRUE ;
-            
+
     ptr->alarm.severity          = FM_ALARM_SEVERITY_CLEAR ; /* Dynamic */
     ptr->alarm.alarm_state       = FM_ALARM_STATE_CLEAR    ; /* Dynamic */
 
-    snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH, 
-              "If problem consistently occurs after Host is locked and unlocked then " 
+    snprintf (ptr->alarm.proposed_repair_action, FM_MAX_BUFFER_LENGTH,
+              "If problem consistently occurs after Host is locked and unlocked then "
               "contact next level of support for root cause analysis and recovery.");
 }
 
@@ -97,38 +97,46 @@ EFmAlarmSeverityT pmonAlarm_state ( string hostname, pmon_alarm_id_enum id )
 
 /******************************************************************************
  *
- * Name       : manage_queried_alarms
+ * Name       : query_alarms
  *
  * Description: query FM for all the existing process monitor alarms and build
  *              up the callers 'saved_alarm_list' with those process names and
  *              corresponding severity.
  *
- * Assumptions: If the hostname is passed in as not empty then assume the clear
- *              is requested.
- *
  * Updates    : callers saved_alarm_list
  *
+ * Returns    : PASS if FM returns no error
+ *              FAIL_REQUEST      ... alarmUtil_query_identity failed
+ *              FAIL_OPERATION    ... fm_get_fault failed
+ *              FAIL_NULL_POINTER ... failed to get memory
+ *
  ******************************************************************************/
 
-void manage_queried_alarms (  list<active_process_alarms_type> & saved_alarm_list, string hostname )
+int query_alarms (  list<active_process_alarms_type> & saved_alarm_list, string hostname )
 {
+    static const char HOSTNAME_LABEL [] = "host=" ;
+    static const char PROCNAME_LABEL [] = ".process=" ;
+
+    int rc = FAIL ;
     saved_alarm_list.clear();
 
-    /**
-     *  Query all the pmon alarms and if there is an alarm for a
-     *  process that is functioing properly then clear the alarm.
-     **/
     SFmAlarmDataT * alarm_list_ptr = (SFmAlarmDataT*) malloc ((sizeof(SFmAlarmDataT)*PMON_MAX_ALARMS));
     if ( alarm_list_ptr )
     {
-        if ( alarmUtil_query_identity ( pmonAlarm_getId_str(PMON_ALARM_ID__PMOND), alarm_list_ptr, PMON_MAX_ALARMS ) == PASS )
+        /* Query all the pmon alarms  */
+        rc = alarmUtil_query_identity ( pmonAlarm_getId_str(PMON_ALARM_ID__PMOND), alarm_list_ptr, PMON_MAX_ALARMS );
+        if ( rc == RETRY )
+        {
+            dlog ("no %s alarms found",  pmonAlarm_getId_str(PMON_ALARM_ID__PMOND).c_str());
+            rc = PASS ;
+        }
+        else if ( rc == PASS )
         {
             for ( int i = 0 ; i < PMON_MAX_ALARMS ; ++i )
             {
                 /* loop over each active alarm and maintain its activity state */
                 if ( strnlen ((alarm_list_ptr+i)->entity_instance_id , MAX_FILENAME_LEN ) )
                 {
-                    int rc ;
                     AlarmFilter   alarm_filter ;
                     SFmAlarmDataT alarm_query  ;
                     memset(&alarm_query, 0, sizeof(alarm_query));
@@ -139,34 +147,49 @@ void manage_queried_alarms (  list<active_process_alarms_type> & saved_alarm_lis
 
                     if (( rc = fm_get_fault ( &alarm_filter, &alarm_query )) == FM_ERR_OK )
                     {
-                        string entity = alarm_filter.entity_instance_id ;
-                        size_t pos = entity.find("process=");
-                        if ( pos != std::string::npos )
-                        {
-                            string pn = entity.substr(pos+strlen("process="));
-                            ilog ("%s alarm is %s (process:%s)\n", alarm_filter.entity_instance_id,
-                                 alarmUtil_getSev_str(alarm_query.severity).c_str(), pn.c_str());
+                        rc = PASS ;
 
-                            /* filter out 'process=pmond' as that alarm is handled by hbsAgent */
-                            if ( pn.compare("pmond") )
+                        string entity = alarm_filter.entity_instance_id ;
+                        size_t pos_hn = entity.find(HOSTNAME_LABEL);
+                        size_t pos_pn = entity.find(PROCNAME_LABEL);
+
+                        if (( pos_hn != std::string::npos ) &&
+                            ( pos_pn != std::string::npos ))
+                        {
+                            string hn = entity.substr(pos_hn+strlen(HOSTNAME_LABEL), pos_pn-strlen(HOSTNAME_LABEL));
+                            string pn = entity.substr(pos_pn+strlen(PROCNAME_LABEL));
+
+                            /* verify hostname */
+                            if ( ( hn.length() == 0 ) || ( hn != hostname ) )
                             {
-                                if ( !hostname.empty() )
-                                {
-                                    pmonAlarm_clear ( hostname, PMON_ALARM_ID__PMOND, pn );
-                                }
-                                else
-                                {
-                                     active_process_alarms_type this_alarm ;
-                                     this_alarm.process  = pn ;
-                                     this_alarm.severity = alarm_query.severity ;
-                                     saved_alarm_list.push_front ( this_alarm  );
-                                }
+                                /* ignore alarms not for this host */
+                                dlog ("%s %s %s alarm not for this host",
+                                          entity.c_str(),
+                                          hn.c_str(),
+                                          pn.c_str());
+                                continue ;
+                            }
+                            dlog ("%s alarm is %s (process:%s)\n",
+                                      alarm_filter.entity_instance_id,
+                                      alarmUtil_getSev_str(alarm_query.severity).c_str(),
+                                      pn.c_str());
+
+                            /* filter out 'process=pmond'
+                             * ... that alarm is handled by hbsAgent */
+                            if ( pn != MTC_SERVICE_PMOND_NAME )
+                            {
+                                 active_process_alarms_type this_alarm ;
+                                 this_alarm.process  = pn ;
+                                 this_alarm.severity = alarm_query.severity ;
+                                 saved_alarm_list.push_front ( this_alarm  );
                             }
                         }
                     }
                     else
                     {
-                        ilog ("fm_get_fault failed (rc:%d)\n", rc );
+                        wlog ("fm_get_fault failed (rc:%d)\n", rc );
+                        rc = FAIL_OPERATION ;
+                        break ;
                     }
                 }
                 else
@@ -174,10 +197,21 @@ void manage_queried_alarms (  list<active_process_alarms_type> & saved_alarm_lis
                     dlog2 ("last entry %d\n", i);
                     break ;
                 }
-            }
+            } /* for loop */
+        }
+        else
+        {
+            wlog("failed to query alarms from fm ; rc:%d", rc);
+            rc = FAIL_REQUEST ;
         }
         free(alarm_list_ptr);
     }
+    else
+    {
+        elog ("unable to allocate memory for alarm list");
+        rc = FAIL_NULL_POINTER ;
+    }
+    return (rc);
 }
 
 /*************************   A L A R M I N G   **************************/
diff --git a/mtce/src/pmon/pmonAlarm.h b/mtce/src/pmon/pmonAlarm.h
index 79414e1c..392fea82 100644
--- a/mtce/src/pmon/pmonAlarm.h
+++ b/mtce/src/pmon/pmonAlarm.h
@@ -37,8 +37,10 @@ typedef struct
     EFmAlarmSeverityT severity ;
 } active_process_alarms_type   ;
 
-/* Clear any pending alarms if the specified hostname is valid */
-void manage_queried_alarms (  list<active_process_alarms_type> & alarm_list, string hostname="" );
+/* Query FM for a list of Process Monitor (200.006) alarms */
+int query_alarms (  list<active_process_alarms_type> & alarm_list, string hostname="" );
+
+void alarmed_process_audit ( void );
 
 void pmonAlarm_init ( void );
 
diff --git a/mtce/src/pmon/pmonHdlr.cpp b/mtce/src/pmon/pmonHdlr.cpp
index 7ab0a8ee..2abe1255 100644
--- a/mtce/src/pmon/pmonHdlr.cpp
+++ b/mtce/src/pmon/pmonHdlr.cpp
@@ -41,15 +41,6 @@ static struct mtc_timer ptimer[MAX_PROCESSES] ;
 std::list<string> config_files ;
 std::list<string>::iterator string_iter_ptr ;
 
-/* If there is an alarm in the list that matches one in the process list
- * then update that process with its severity and failed state.
- * If there is a process in the saved list that is not in the process list
- * then clear its alarm as it is no longer valid.
- */
-void manage_process_alarms (  list<active_process_alarms_type> & _list,
-                              process_config_type * const ptr,
-                              int const processes );
-
 static process_config_type process_config[MAX_PROCESSES] ;
 
 /* lookup process control by index  and return its pointer if found.
@@ -216,6 +207,7 @@ void pmon_timer_init ( void )
         /* Init the timer for this process */
         mtcTimer_init ( process_config[i].pt_ptr, _pmon_ctrl_ptr->my_hostname, "process" ) ;
     }
+    _pmon_ctrl_ptr->last_alarm_query_pass = false ;
 }
 
 void _process_death_hdlr ( int sig_num, siginfo_t * info_ptr, void * context_ptr );
@@ -371,7 +363,7 @@ void init_process_config_memory ( void )
  * all the process config files from /etc/pmon.d */
 void load_processes ( void )
 {
-    list<active_process_alarms_type> saved_alarm_list ;
+    list<active_process_alarms_type> queried_alarm_list ;
 
     int rc = PASS ;
 
@@ -385,10 +377,6 @@ void load_processes ( void )
         close_process_socket ( &process_config[i] );
     }
 
-    /* Query fm for existing pmon process alarms and
-     * for each that is found store their 'name' and
-     * 'severity' in the passed in saved list */
-    manage_queried_alarms ( saved_alarm_list );
 
     /* init the process config memory */
     init_process_config_memory ();
@@ -454,13 +442,8 @@ void load_processes ( void )
     }
     _pmon_ctrl_ptr->reload_config = false ;
 
-    /* If there were process alarms that existed over the reload
-     * then ensure that those processes are updated with that information. */
-    if ( saved_alarm_list.size () )
-    {
-        ilog ("there are %ld active alarms over reload\n", saved_alarm_list.size());
-        manage_process_alarms ( saved_alarm_list, &process_config[0], _pmon_ctrl_ptr->processes );
-    }
+    /* use the audit to clear pre-existing alarms at process startup */
+    alarmed_process_audit ();
 }
 
 
@@ -1702,65 +1685,124 @@ void _process_death_hdlr ( int sig_num, siginfo_t * info_ptr, void * context_ptr
     }
 }
 
-/************************************************************************
+/***************************************************************************
  *
- * Name :       manage_process_alarms
+ * Name       : alarmed_process_audit
  *
- * Description: This interface manages process alarms over a process
- *              configuration reload
+ * Purpose    : Verify the process state matches the queried alarm state
  *
- * Steps:
+ * Description: To correct process alarm state mismatches.
  *
- * 1. Loop over each item in the list and mark the process as failed
- *    with the specified severity level.
- *
- * 2. If the process is not found then clear its alarm as it is no
- *    longer a valid process in the new profile and we don't want a
- *    lingering stuck alarm.
- *
- *************************************************************************/
+ ***************************************************************************/
 
-void manage_process_alarms (  list<active_process_alarms_type> & _list,
-                              process_config_type * const ptr,
-                              int const processes )
+void alarmed_process_audit ( void )
 {
-    /* get out if the list is empty ; should not have been called if
-     * empty but ... just in case */
-    if ( ! _list.empty() )
+    /* Don't audit FM in service after the last query was successful.
+     * There is a blocking issue that needs to be dealt with */
+    if ( _pmon_ctrl_ptr->last_alarm_query_pass == true )
+        return ;
+
+    /*
+     * Query fm for existing pmon process alarms and
+     * for each that is found store their 'name' and
+     * 'severity' in the passed in queried_alarm_list.
+     */
+    list<active_process_alarms_type> queried_alarm_list ;
+    int rc = query_alarms ( queried_alarm_list, get_ctrl_ptr()->my_hostname );
+    _pmon_ctrl_ptr->last_alarm_query_pass = (rc == PASS);
+
+    /* just return if query failed */
+    if ( _pmon_ctrl_ptr->last_alarm_query_pass == false )
+        return ;
+
+    if ( queried_alarm_list.size () )
     {
         list<active_process_alarms_type>::iterator _iter_ptr ;
 
+        alog ("audit found %ld active alarms", queried_alarm_list.size());
+
         /* loop over the list ... */
-        for ( _iter_ptr=_list.begin(); _iter_ptr!=_list.end(); ++_iter_ptr )
+        for (   _iter_ptr=queried_alarm_list.begin();
+                _iter_ptr!=queried_alarm_list.end();
+              ++_iter_ptr )
         {
-            /* for each item assum it is not found */
             bool found = false ;
+            alog ("%s audit", _iter_ptr->process.c_str());
 
-            /* try and find this process in the new process profile */
-            for ( int i = 0 ; i < processes ; i++ )
+            /* find this process*/
+            for ( int i = 0 ; (i < _pmon_ctrl_ptr->processes) && !found ; i++ )
             {
-                if ( ! _iter_ptr->process.compare((ptr+i)->process) )
-                {
-                    /* If the process is found then mark it as failed and update its severity.
-                     * At this point we then assume that there is an alarm raised for this process. */
-                    found = true ;
+                process_config_type * ptr = &process_config[i];
 
-                   (ptr+i)->failed = false ;
-                    wlog ("%s process was failed critical ; clearing existing alarm\n", _iter_ptr->process.c_str() );
-                    pmonAlarm_clear ( get_ctrl_ptr()->my_hostname, PMON_ALARM_ID__PMOND, _iter_ptr->process );
+                if ( ! _iter_ptr->process.compare(ptr->process) )
+                {
+                    found = true ;
+                    if ( ptr->failed == false )
+                    {
+                        ilog ("%s stale alarm ; clearing",
+                                  _iter_ptr->process.c_str() );
+
+                        pmonAlarm_clear ( get_ctrl_ptr()->my_hostname,
+                                          PMON_ALARM_ID__PMOND,
+                                          _iter_ptr->process );
+                    }
+                    else if ( _iter_ptr->severity != ptr->alarm_severity )
+                    {
+                        wlog ("%s alarm severity mismatch ; %s -> %s ; correcting",
+                                  ptr->process,
+                                  alarmUtil_getSev_str(_iter_ptr->severity).c_str(),
+                                  alarmUtil_getSev_str(ptr->alarm_severity).c_str());
+                        if ( ptr->alarm_severity == FM_ALARM_SEVERITY_MINOR )
+                        {
+                            pmonAlarm_minor(get_ctrl_ptr()->my_hostname,
+                                            PMON_ALARM_ID__PMOND,
+                                            ptr->process, 0);
+                        }
+                        else if (ptr->alarm_severity == FM_ALARM_SEVERITY_MAJOR )
+                        {
+                            pmonAlarm_major(get_ctrl_ptr()->my_hostname,
+                                            PMON_ALARM_ID__PMOND,
+                                            ptr->process);
+                        }
+                        else if (ptr->alarm_severity == FM_ALARM_SEVERITY_CRITICAL )
+                        {
+                             pmonAlarm_critical(get_ctrl_ptr()->my_hostname,
+                                                PMON_ALARM_ID__PMOND,
+                                                ptr->process);
+                        }
+                        else
+                        {
+                            wlog ("%s unexpected severity '%s' ; clearing alarm",
+                                      ptr->process,
+                                      ptr->severity);
+
+                            pmonAlarm_clear ( get_ctrl_ptr()->my_hostname,
+                                              PMON_ALARM_ID__PMOND,
+                                              ptr->process );
+                        }
+                    }
+                    else
+                    {
+                        alog ("%s is alarmed '%s' ; audit",
+                                  ptr->process,
+                                  ptr->severity);
+                    }
                 }
             }
-
             /* if not found then just clear the alarm */
             if ( found == false)
             {
-                wlog ("%s process alarm clear ; not in current process profile\n", _iter_ptr->process.c_str() );
-                pmonAlarm_clear ( get_ctrl_ptr()->my_hostname, PMON_ALARM_ID__PMOND, _iter_ptr->process );
+                wlog ("%s is not a monitored process ; clearing alarm",
+                          _iter_ptr->process.c_str());
+                pmonAlarm_clear ( get_ctrl_ptr()->my_hostname,
+                                  PMON_ALARM_ID__PMOND,
+                                  _iter_ptr->process );
             }
         }
     }
 }
 
+
 void pmon_service ( pmon_ctrl_type * ctrl_ptr )
 {
     std::list<int> socks ;
@@ -1931,6 +1973,8 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
         {
             _get_events ();
             mtcTimer_start ( pmonTimer_audit, pmon_timer_handler, audit_period );
+
+            alarmed_process_audit ();
         }
 
         /* Run the degrade set/clear by audit */
diff --git a/mtce/src/pmon/scripts/pmon.logrotate b/mtce/src/pmon/scripts/pmon.logrotate
old mode 100755
new mode 100644
index ea151b26..08416fb9
--- a/mtce/src/pmon/scripts/pmon.logrotate
+++ b/mtce/src/pmon/scripts/pmon.logrotate
@@ -1,16 +1,19 @@
-#daily
-nodateext
+#
+# Copyright (c) 2015-2021 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
 
 /var/log/pmond.log
 {
-    nodateext
-    size 10M
+    create 0640 root root
     start 1
-    missingok
+    size 10M
     rotate 20
     compress
-    sharedscripts
+    notifempty
+    missingok
     postrotate
         systemctl reload syslog-ng > /dev/null 2>&1 || true
     endscript
+    delaycompress
 }
diff --git a/mtce/src/pmon/scripts/pmond.conf b/mtce/src/pmon/scripts/pmond.conf
old mode 100755
new mode 100644
diff --git a/mtce/src/scripts/crashdump.logrotate b/mtce/src/scripts/crashdump.logrotate
index a16bcb7c..ca8e84c6 100644
--- a/mtce/src/scripts/crashdump.logrotate
+++ b/mtce/src/scripts/crashdump.logrotate
@@ -1,7 +1,11 @@
+#
+# Copyright (c) 2020-2021 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 /var/log/crash/vmcore.tar
 /var/log/crash/vmcore_first.tar
 {
-    nodateext
     size 1K
     start 1
     rotate 1
diff --git a/mtce/src/scripts/mtc.conf b/mtce/src/scripts/mtc.conf
index edfd6c5d..461766b0 100644
--- a/mtce/src/scripts/mtc.conf
+++ b/mtce/src/scripts/mtc.conf
@@ -87,6 +87,10 @@ sched_delay_threshold = 300  ; scheduler delay time in msecs that will trigger
 daemon_log_port = 2121       ; daemon logger port
 mtcalarm_req_port = 2122     ;
 
+sync_b4_peer_ctrlr_reset = 0 ; issue a sync command to peer controller mtcClient
+                             ;   before issuing BMC reset.
+
+
 [timeouts]                   ; configurable maintenance timeout values in seconds
 
 failsafe_shutdown_delay = 120;
diff --git a/mtce/src/scripts/mtce.logrotate b/mtce/src/scripts/mtce.logrotate
index 17842c8a..8095f311 100644
--- a/mtce/src/scripts/mtce.logrotate
+++ b/mtce/src/scripts/mtce.logrotate
@@ -1,59 +1,67 @@
-#daily
-
-# Apply all these options to all the logs
-nodateext
-start 1
-compress
-notifempty
-missingok
-sharedscripts
-postrotate
-    systemctl reload syslog-ng > /dev/null 2>&1 || true
-endscript
-
+#
+# Copyright (c) 2015-2021 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
 /var/log/mtcAgent.log
 {
-    size 100M
+    create 0640 root root
+    start 1
     rotate 10
+    size 100M
+    compress
+    notifempty
+    missingok
+    postrotate
+        systemctl reload syslog-ng > /dev/null 2>&1 || true
+    endscript
+    delaycompress
 }
 
 /var/log/hbsAgent.log
-{
-    size 20M
-    rotate 5
-}
-
 /var/log/mtcClient.log
-{
-    size 20M
-    rotate 5
-}
-
 /var/log/hbsClient.log
 {
-    size 20M
+    create 0640 root root
+    start 1
     rotate 5
+    size 20M
+    compress
+    notifempty
+    missingok
+    postrotate
+        systemctl reload syslog-ng > /dev/null 2>&1 || true
+    endscript
+    delaycompress
 }
 
 /var/log/mtclogd.log
 {
-    size 10M
+    create 0640 root root
+    start 1
     rotate 5
+    size 10M
+    compress
+    notifempty
+    missingok
+    postrotate
+        systemctl reload syslog-ng > /dev/null 2>&1 || true
+    endscript
+    delaycompress
 }
 
+# The mtclogd opens and closes these log files on every log addition.
+# Therefore does not require a notification over log rotation.
+/var/log/mtcAgent_event.log
+/var/log/mtcAgent_alarm.log
 /var/log/mtcAgent_api.log
 {
-    size 20M
+    create 0640 root root
+    start 1
     rotate 5
-}
-
-/var/log/mtcAgent_event.log
-{
-    size 20M
-    rotate 5
-}
-/var/log/mtcAgent_alarm.log
-{
     size 10M
-    rotate 5
+    compress
+    notifempty
+    missingok
+    delaycompress
 }
diff --git a/mtce/src/scripts/wipedisk b/mtce/src/scripts/wipedisk
index 636ced60..2dc9bc34 100755
--- a/mtce/src/scripts/wipedisk
+++ b/mtce/src/scripts/wipedisk
@@ -18,6 +18,28 @@ usage ()
     exit 1
 }
 
+# Systemd automatically remounts all the mounted filesystems at shutdown
+# When we are deleting a partition, we have to unmount its corresponding filesystem
+# because remounting deleted filesystems at shutdown will throw errors
+unmount_fs()
+{
+  local fs=$1
+  local ret_code=0
+  echo "Trying to unmount $fs"
+  if findmnt $fs > /dev/null 2>&1 ; then
+      if umount -f $fs ; then
+         echo "$fs has been successfully unmounted"
+      else
+         echo "Error! Failed to unmount $fs"
+         ret_code=1
+      fi
+  else
+      echo "Warning! $fs is not mounted"
+      ret_code=2
+  fi
+  return $ret_code
+}
+
 OPTS=`getopt -o h -l force -- "$@"`
 if [ $? != 0 ]
 then
@@ -100,11 +122,14 @@ fi
 BACKUP_PART_GUID="BA5EBA11-0000-1111-2222-000000000002"
 part_type_guid_str="Partition GUID code"
 
+# get the nodetype variable to check later if this node is a controller
+. /etc/platform/platform.conf
+
 for dev in $WIPE_HDD
 do
     if [[ -e $dev ]]
     then
-        if [ "$dev" == "$rootfs" ]
+        if [[ "$dev" == "$rootfs" && "${nodetype}" == "controller" ]]
         then
             part_numbers=( $(parted -s $dev print | awk '$1 == "Number" {i=1; next}; i {print $1}') )
             for part_number in "${part_numbers[@]}"; do
@@ -128,6 +153,7 @@ do
                 # Skip / or we will lose access to the tools on the system.
                 if [[ $part != $rootfs_part ]]
                 then
+                    unmount_fs $part
                     dd if=/dev/zero of=$part bs=512 count=34
                     dd if=/dev/zero of=$part bs=512 count=34 seek=$((`blockdev --getsz $part` - 34))
                 fi
@@ -141,6 +167,7 @@ do
         else
             echo "Wiping $dev..."
             wipefs -f -a $dev
+            unmount_fs $dev
 
             # Clearing previous GPT tables or LVM data
             # Delete the first few bytes at the start and end of the partition. This is required with