metal/mtce/src/heartbeat/hbsCluster.cpp

/*
 * Copyright (c) 2018 Wind River Systems, Inc.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * @file Maintenance Heartbeat Agent Cluster Manager Module
 *
 *************************************************************************
 *
 * This module provides the heartbeat cluster implementation member
 * functions that the hbsAgent service calls to collect, store and
 * send heartbeat cluster information to SM upon request.
 *
 * See mtceHbsCluster.h for formal API between SM and Mtce.
 *
 *************************************************************************/

using namespace std;

#include "nodeBase.h"      /* common maintenance constructs and definitions */
#include "daemon_common.h" /* common daemon constructs and definitions      */
#include "hbsBase.h"       /* mtce heartbeat constructs and definitions     */

/* Error log throttle counter. */
#define THROTTLE_COUNT (500)

/* Private Heartbeat Cluster Control Structure. */
typedef struct
{
    /* Contains the controller number (0 or 1) for this controller. */
    unsigned short this_controller ;

    /* Preserves which controllers are enabled. */
    bool controller_0_enabled ;
    bool controller_1_enabled ;
#ifdef THREE_CONTROLLER_SYSTEM
    bool controller_2_enabled ;
#endif

    /* Used to prevent log flooding in presence of back to back errors. */
    unsigned int log_throttle ;

    /* Used to threshold storage-0 not responding state */
    unsigned int storage_0_not_responding_count[MTCE_HBS_NETWORKS];

    /* Contains the number of monitored networks in the system.
     * Management only = 1
     * Management and Inrastructure = 2 */
    unsigned short monitored_networks ;

    /* This contains the current number of heartbeat enabled hosts.
     *
     * Used to improve performance.
     *
     * Performance: This value is included in each history entry so
     * rather than do the size calculation of monitored_hostname_list
     * each time, this variable is updated from monitored_hostname_list
     * after each add/del operation. */
    unsigned short monitored_hosts ;

    /* List of host names being monitored. */
    std::list<string>monitored_hostname_list ;

    /* The working heartbeat cluster data vault. */
    mtce_hbs_cluster_type cluster ;

    bool cluster_change ;
    int  cluster_change_threshold_count ;
    int  cluster_change_difference_count ;

    msgClassSock * sm_socket_ptr ;

} hbs_cluster_ctrl_type ;

/* Cluster control structire construct allocation. */
static hbs_cluster_ctrl_type ctrl ;

#define STORAGE_0_NR_THRESHOLD (4)
#define CLUSTER_CHANGE_THRESHOLD (50000)

/****************************************************************************
 *
 * Name        : hbs_cluster_init
 *
 * Description : Initialize the cluster structure to default values.
 *
 * Assumtions  : Called by hbsAgent.cpp before entering the main loop.
 *
 ***************************************************************************/

void hbs_cluster_init ( unsigned short period, msgClassSock * sm_socket_ptr )
{
    ctrl.monitored_hosts = 0;
    ctrl.monitored_hostname_list.clear();

    /* Init the cluster - header. */
    ctrl.cluster.version  = MTCE_HBS_CLUSTER_VERSION  ;
    ctrl.cluster.revision = MTCE_HBS_CLUSTER_REVISION ;
    ctrl.cluster.magic_number = MTCE_HBS_MAGIC_NUMBER ;

    /* Init the cluster - global / dynamic data. */
    ctrl.cluster.reqid = 0 ;
    ctrl.cluster.period_msec = period ;
    ctrl.cluster.storage0_enabled = false ;
    ctrl.cluster.histories = 0 ;
    ctrl.cluster.bytes = BYTES_IN_CLUSTER_VAULT(ctrl.cluster.histories);

    /* The storage-0 thresholding counter for each network. */
    for ( int n = 0 ; n < MTCE_HBS_NETWORKS ; n++ )
        ctrl.storage_0_not_responding_count[n] = 0 ;

    for ( int h = 0 ; h < MTCE_HBS_MAX_HISTORY_ELEMENTS ; h++ )
        hbs_cluster_history_init ( ctrl.cluster.history[h] );

    clog ("Cluster Info: v%d.%d sig:%x bytes:%d (%ld)",
             ctrl.cluster.version,
             ctrl.cluster.revision,
             ctrl.cluster.magic_number,
             ctrl.cluster.bytes,
             sizeof(mtce_hbs_cluster_history_type));

    if ( sm_socket_ptr )
    {
        ctrl.sm_socket_ptr = sm_socket_ptr ;
    }
    ctrl.log_throttle = 0 ;
}


/****************************************************************************
 *
 * Name        : hbs_cluster_nums
 *
 * Description : Set this controller number and the number of monitored
 *               networks in this system.
 *
 *               These values do not change without a process restart.
 *
 * Assumtions  : Called by hbsAgent.cpp before entering the main loop.
 *
 * Returns     : None
 *
 ***************************************************************************/

void hbs_cluster_nums ( unsigned short this_controller,
                        unsigned short monitored_networks )
{
   ctrl.this_controller = this_controller ;
   ctrl.monitored_networks = monitored_networks ;
}


/****************************************************************************
 *
 * Name        : cluster_list
 *
 * Description : Log the list of monitored hosts.
 *               Typically done on a list change.
 *
 * Returns     : None
 *
 ***************************************************************************/

void cluster_list ( void )
{
    std::list<string>::iterator iter_ptr  ;
    string list = "" ;
    for ( iter_ptr = ctrl.monitored_hostname_list.begin() ;
          iter_ptr != ctrl.monitored_hostname_list.end() ;
          iter_ptr++ )
    {
        list.append (*(iter_ptr));
        list.append (" ");
    }
    ilog ("cluster: %s", list.c_str());
}


/****************************************************************************
 *
 * Name        : cluster_storage0_state
 *
 * Description : Record the heartbeat monitoring state of storage-0.
 *
 * Parameters  : true  if storage-0 heartbeating is in the 'started' state.
 *               false if storage-0 heartbeating is in the 'stopped' state.
 *
 * Returns     : None
 *
 ***************************************************************************/

void cluster_storage0_state ( bool enabled )
{
    if ( ctrl.cluster.storage0_enabled != enabled )
    {
        ctrl.cluster.storage0_enabled = enabled ;
        ilog ("storage-0 heartbeat state changed to %s",
                enabled ? "enabled" : "disabled" );
        ctrl.cluster_change = true ;
    }
}


/****************************************************************************
 *
 * Name        : hbs_manage_controller_state
 *
 * Description : Track the monitored enabled state of the controllers.
 *
 ***************************************************************************/

void hbs_manage_controller_state ( string & hostname, bool enabled )
{
    /* track controller state */
    if ( hostname == CONTROLLER_0 )
    {
        ctrl.controller_0_enabled = enabled ;
    }
    else if ( hostname == CONTROLLER_1 )
    {
        ctrl.controller_1_enabled = enabled ;
    }
#ifdef THREE_CONTROLLER_SYSTEM
    else if ( hostname == CONTROLLER_2 )
    {
        ctrl.controller_2_enabled = enabled ;
    }
#endif
}


/****************************************************************************
 *
 * Name        : hbs_cluster_add
 *
 * Description : Add the specified hostname to the enabled hosts list.
 *
 * Updates     : hostname is added to monitored_hostname_list
 *
 *               If added host is storage-0 then update its enabled status.
 *               if added host is a controller then update controller state.
 *
 * Parameters  : hostname string
 *
 * Updates     : monitored_hostname_list
 *
 ***************************************************************************/

void hbs_cluster_add ( string & hostname )
{
    bool already_in_list = false ;
    std::list<string>::iterator hostname_ptr ;
    for ( hostname_ptr  = ctrl.monitored_hostname_list.begin();
          hostname_ptr != ctrl.monitored_hostname_list.end() ;
          hostname_ptr++ )
    {
        if ( hostname_ptr->compare(hostname) == 0 )
        {
            already_in_list = true ;
            break ;
        }
    }

    if ( already_in_list == false )
    {
        ctrl.monitored_hostname_list.push_back(hostname) ;
        ctrl.monitored_hosts = (unsigned short)ctrl.monitored_hostname_list.size();
        ilog ("%s added to cluster", hostname.c_str());
        cluster_list ();
        ctrl.cluster_change = true ;
    }

    /* Manage storage-0 state */
    if ( hostname.compare(STORAGE_0) == 0 )
    {
        cluster_storage0_state ( true );
    }

    /* If we get down to 0 monitored hosts then just start fresh */
    if (( ctrl.monitored_hosts ) == 0 )
    {
        hbs_cluster_init ( ctrl.cluster.period_msec, NULL );
    }

    /* Manage controller state ; true means enabled in this case. */
    hbs_manage_controller_state ( hostname, true );

    if (( ctrl.cluster_change ) && ( ctrl.sm_socket_ptr ))
    {
        hbs_cluster_send( ctrl.sm_socket_ptr, 0 );
        ctrl.cluster_change = false ;
    }

}

/****************************************************************************
 *
 * Name        : hbs_cluster_del
 *
 * Description : Delete the specified hostname from the enabled hosts list.
 *
 * Updates     : hostname is removed from monitored_hostname_list
 *
 *               If added host is storage-0 then update its enabled status.
 *               if added host is a controller then update controller count.
 *
 * Parameters  : hostname string
 *
 * Updates     : monitored_hostname_list
 *
 ***************************************************************************/

void hbs_cluster_del ( string & hostname )
{
    std::list<string>::iterator hostname_ptr ;
    for ( hostname_ptr  = ctrl.monitored_hostname_list.begin();
          hostname_ptr != ctrl.monitored_hostname_list.end() ;
          hostname_ptr++ )
    {
        if ( hostname_ptr->compare(hostname) == 0 )
        {
            ctrl.monitored_hostname_list.remove(hostname) ;
            ctrl.monitored_hosts = (unsigned short)ctrl.monitored_hostname_list.size();

            /* Manage storage-0 state. */
            if ( hostname.compare(STORAGE_0) == 0 )
            {
                cluster_storage0_state ( false );
            }

            /* If we get down to 0 monitored hosts then just start fresh */
            if (( ctrl.monitored_hosts ) == 0 )
            {
                hbs_cluster_init ( ctrl.cluster.period_msec, NULL );
            }

            /* Manage controller state ; false means not enabled in this case. */
            hbs_manage_controller_state ( hostname , false );

            ilog ("%s deleted from cluster", hostname.c_str());

            cluster_list ();

            ctrl.cluster_change = true ;

            break ;
        }
    }

    if (( ctrl.cluster_change ) && ( ctrl.sm_socket_ptr ))
    {
        hbs_cluster_send( ctrl.sm_socket_ptr, 0 );
        ctrl.cluster_change = false ;
    }
}

/****************************************************************************
 *
 * Name        : hbs_cluster_update
 *
 * Description : Update this controller's cluster info for the specified
 *               network with ...
 *
 *               1. The number of enabled hosts.
 *               2. The number of responding hosts.
 *               3. The oldest history index in the rotational history fifo.
 *               4. Maintain a back to back non-responding count for storage-0.
 *                  Once the count reaches the minimum threshold of
 *                  STORAGE_0_NR_THRESHOLD then the specific network history
 *                  is updated to indicate storgae-0 is not responding. Once
 *                  storage-0 starts responding again with a single response
 *                  then that network history is updated to indicate storage-0
 *                  is responding.
 *
 * Assumptions : Converts heartbeat interface number to cluster network number.
 *
 * Parameters  : heartbeat interface number ( iface_enum )
 *               network index
 *               number of not responding hosts for this interval
 *
 * Updates     : This and last history as well as storage-0 not responding
 *               count.
 *
 ***************************************************************************/


void hbs_cluster_update ( iface_enum iface,
                      unsigned short not_responding_hosts,
                                bool storage_0_responding )
{
    if ( ctrl.monitored_hosts == 0 )
        return ;

    /* convert heartbeat iface enum to cluster network enum. */
    mtce_hbs_network_enum n ;
    if ( iface == MGMNT_IFACE )
        n = MTCE_HBS_NETWORK_MGMT ;
    else if ( iface == INFRA_IFACE )
        n = MTCE_HBS_NETWORK_INFRA ;
#ifdef MONITORED_OAM_NETWORK
    else if ( iface == OAM_IFACE )
        n = MTCE_HBS_NETWORK_OAM ;
#endif
    else
        return ;

    if ( not_responding_hosts )
    {
        clog ("controller-%d %s enabled:%d not responding:%d",
               ctrl.this_controller,
               hbs_cluster_network_name(n).c_str(),
               ctrl.monitored_hosts,
               not_responding_hosts);
    }
    else
    {
        clog ("controller-%d %s has %d monitored hosts and all are responding",
               ctrl.this_controller,
               hbs_cluster_network_name(n).c_str(),
               ctrl.monitored_hosts);
    }

    /* Look-up active history array for this network combination */
    mtce_hbs_cluster_history_type * history_ptr = NULL ;
    GET_CLUSTER_HISTORY_PTR(ctrl.cluster, ctrl.this_controller ,n);
    if ( history_ptr == NULL )
    {
        if ( ctrl.cluster.histories >= MTCE_HBS_MAX_HISTORY_ELEMENTS )
        {
            /* Should never happen but if it does then log without floooding */
            wlog_throttled ( ctrl.log_throttle, THROTTLE_COUNT,
                             "Unable to store history beyond %d ",
                             ctrl.cluster.histories );
            return ;
        }
        else
        {
            /* Adding a new history slot. */
            history_ptr = &ctrl.cluster.history[ctrl.cluster.histories] ;
            ctrl.cluster.histories++ ;
            ctrl.cluster.bytes = BYTES_IN_CLUSTER_VAULT(ctrl.cluster.histories);
            history_ptr->controller = ctrl.this_controller ;
            history_ptr->network = n ;

            /* Log new network history as its being started. */
            ilog ("controller-%d added new controller-%d:%s history to vault ; now have %d network views",
                   ctrl.this_controller,
                   ctrl.this_controller,
                   hbs_cluster_network_name(n).c_str(),
                   ctrl.cluster.histories);
        }
    }

    /* Manage storage-0 status. */
    if ( ctrl.cluster.storage0_enabled )
    {
        /* Handle storage-0 status change from not responding to responding. */
        if ( storage_0_responding == true )
        {
            if (history_ptr->storage0_responding == false)
            {
                history_ptr->storage0_responding = true ;
                ilog ("controller-%d %s heartbeat ; storage-0 is ok",
                   ctrl.this_controller,
                   hbs_cluster_network_name(n).c_str());
            }
            if (ctrl.storage_0_not_responding_count[n])
                ctrl.storage_0_not_responding_count[n] = 0 ;
        }
        /* Count the storage-0 not responding case for this network. */
        else
        {
            ctrl.storage_0_not_responding_count[n]++ ;
            if ( ctrl.storage_0_not_responding_count[n] == 2 )
            {
                ilog ("controller-%d %s heartbeat ; storage-0 has 2 misses",
                       ctrl.this_controller,
                       hbs_cluster_network_name(n).c_str() );
            }
        }

        /* Handle storage-0 status change from responding to not responding. */
        if (( history_ptr->storage0_responding == true ) &&
            ( ctrl.storage_0_not_responding_count[n] >= STORAGE_0_NR_THRESHOLD ))
        {
            history_ptr->storage0_responding = false ;
            ilog ("controller-%d %s heartbeat ; storage-0 is not responding",
                   ctrl.this_controller,
                   hbs_cluster_network_name(n).c_str() );
        }
    }
    else
    {
        /* Typical path for storage-0 disabled or normal non-storage system case */
        if ( history_ptr->storage0_responding == true )
            history_ptr->storage0_responding = false ;

        /* Handle clearing threshold count when storage-0 is not enabled. */
        if ( ctrl.storage_0_not_responding_count[n] )
            ctrl.storage_0_not_responding_count[n] = 0 ;
    }

    /*
     * Manage the history entry index.
     *
     * Get the previous entry index ...
     * ... which is the one before the oldest index.
     * ... which is the index for the next entry.
     */
    unsigned short last_entry_index ;
    unsigned short oldest_entry_index = history_ptr->oldest_entry_index ;

    if ( oldest_entry_index == 0 )
    {
        /* Go to the end of the array. */
        last_entry_index = MTCE_HBS_HISTORY_ENTRIES-1 ;
    }
    else
    {
        /* Otherwise, the previous index in the array */
        last_entry_index = oldest_entry_index - 1 ;
    }

    bool   logit = false ;
    string logit_reason = "" ;

    /* Update the history with this data. */
    history_ptr->entry[oldest_entry_index].hosts_enabled = ctrl.monitored_hosts ;
    history_ptr->entry[oldest_entry_index].hosts_responding = ctrl.monitored_hosts - not_responding_hosts ;

    if (( history_ptr->entry[oldest_entry_index].hosts_enabled !=
          history_ptr->entry[  last_entry_index].hosts_enabled ) ||
        ( history_ptr->entry[oldest_entry_index].hosts_responding !=
          history_ptr->entry[  last_entry_index].hosts_responding))
    {
        /* Only log on change events. */
        if ( history_ptr->entry[oldest_entry_index].hosts_enabled ==
             history_ptr->entry[oldest_entry_index].hosts_responding )
        {
            ilog ("controller-%d %s cluster of %d is healthy",
                   ctrl.this_controller,
                   hbs_cluster_network_name(n).c_str(),
                   history_ptr->entry[oldest_entry_index].hosts_enabled);
            ctrl.cluster_change_threshold_count = 0 ;
            ctrl.cluster_change_difference_count = 0 ;
        }
        else
        {
            ctrl.cluster_change_threshold_count++ ;
            ctrl.cluster_change_difference_count =
            history_ptr->entry[oldest_entry_index].hosts_enabled -
            history_ptr->entry[oldest_entry_index].hosts_responding ;
        }
    }
    if ( daemon_get_cfg_ptr()->debug_state&4 )
    {
        logit = true ;
        logit_reason = "(debug)" ;
    }
//    else if (( ctrl.cluster_change_threshold_count == 1 ) &&
//             ( cluster_change == false ))
//    {
//        logit = true ;
//        logit_reason = "" ;
//    }
    else if ( ctrl.cluster_change_threshold_count >= CLUSTER_CHANGE_THRESHOLD )
    {
        logit = true ;
        ctrl.cluster_change_threshold_count = 0 ;
        logit_reason = "(threshold)" ;
    }
    else
    {
        int delta =
        history_ptr->entry[oldest_entry_index].hosts_enabled -
        history_ptr->entry[oldest_entry_index].hosts_responding ;
        if ( delta != ctrl.cluster_change_difference_count )
        {
            logit = true ;
            ctrl.cluster_change_difference_count = delta ;
            logit_reason = "(delta)" ;
        }
    }

    if ( logit )
    {
        ilog ("controller-%d %s cluster of %d with %d responding (%d:%d) %s",
               ctrl.this_controller,
               hbs_cluster_network_name(n).c_str(),
               history_ptr->entry[oldest_entry_index].hosts_enabled,
               history_ptr->entry[oldest_entry_index].hosts_responding,
               ctrl.cluster_change_difference_count,
               not_responding_hosts,
               logit_reason.c_str());
    }

    /* Increment the entries count till it reaches the max. */
    if ( history_ptr->entries < MTCE_HBS_HISTORY_ENTRIES )
        history_ptr->entries++ ;

    /* Manage the next entry update index ; aka the oldest index. */
    if ( oldest_entry_index == (MTCE_HBS_HISTORY_ENTRIES-1))
        history_ptr->oldest_entry_index = 0 ;
    else
        history_ptr->oldest_entry_index++ ;

    /* clear the log throttle if we are updating history ok. */
    ctrl.log_throttle = 0 ;
}

/****************************************************************************
 *
 * Name        : hbs_cluster_append
 *
 * Description : Add this controller's cluster info to this pulse
 *               request message.
 *
 ***************************************************************************/

void hbs_cluster_append ( hbs_message_type & msg )
{
    CHECK_CTRL_NTWK_PARMS(ctrl.this_controller, ctrl.monitored_networks);

    msg.cluster.version          = ctrl.cluster.version ;
    msg.cluster.revision         = ctrl.cluster.revision ;
    msg.cluster.magic_number     = ctrl.cluster.magic_number ;
    msg.cluster.period_msec      = ctrl.cluster.period_msec ;
    msg.cluster.storage0_enabled = ctrl.cluster.storage0_enabled ;
    msg.cluster.histories        = 0 ;

    /* Copy this controller's cluster history into the broadcast request. */
    for ( int h = 0 ; h < ctrl.cluster.histories ; h++ )
    {
        if ( ctrl.cluster.history[h].controller == ctrl.this_controller )
        {
            memcpy( &msg.cluster.history[msg.cluster.histories],
                    &ctrl.cluster.history[h],
                    sizeof(mtce_hbs_cluster_history_type));

            msg.cluster.histories++ ;
        }
    }
    msg.cluster.bytes = BYTES_IN_CLUSTER_VAULT(msg.cluster.histories);

    clog2 ("controller-%d appending cluster info to heartbeat message (%d:%d:%d)",
            ctrl.this_controller, ctrl.monitored_networks, ctrl.cluster.histories, msg.cluster.bytes );
}

/****************************************************************************
 *
 * Name        : hbs_cluster_unused_bytes
 *
 * Descrition  : Used to set how much data to send in the heartbeat pulse
 *               requests.
 *
 * Returns     : The number of bytes that are not used in the full
 *               history array cluster structure.
 *
 ***************************************************************************/

unsigned short hbs_cluster_unused_bytes ( void )
{
    if ( ctrl.cluster.histories <= MTCE_HBS_MAX_HISTORY_ELEMENTS )
    {
        unsigned short tmp = MTCE_HBS_MAX_HISTORY_ELEMENTS - ctrl.cluster.histories ;
        return((unsigned short)(sizeof(mtce_hbs_cluster_history_type)*tmp)) ;
    }
    return 0;
}


/****************************************************************************
 *
 * Name       : hbs_cluster_send
 *
 * Description: Send the cluster vault to SM.
 *
 * Returns    : Nothing
 *
 ***************************************************************************/

void hbs_cluster_send ( msgClassSock * sm_client_sock, int reqid )
{
    ctrl.cluster.reqid = (unsigned short)reqid ;
    if (( sm_client_sock ) && ( sm_client_sock->sock_ok() == true ))
    {
        int len = sizeof(mtce_hbs_cluster_type)-hbs_cluster_unused_bytes();
        int bytes = sm_client_sock->write((char*)&ctrl.cluster, len);
        if ( bytes <= 0 )
        {
             elog ("failed to send cluster vault to SM (bytes=%d) (%d:%s)\n",
                    bytes , errno, strerror(errno));
        }
        else
        {
            string reason = "" ;
            // ilog ("heartbeat cluster vault sent to SM (%d bytes)", len );
            if ( reqid )
                reason = "cluster query" ;
            else
                reason = "cluster event" ;
            hbs_cluster_dump ( ctrl.cluster, reason, true );
        }
    }
    else
    {
        wlog ("cannot send cluster info due to socket error");
    }
}

/****************************************************************************
 *
 * Name        : hbs_history_save
 *
 * Descrition  : Copy the history sample to the vault.
 *
 * Returns     : Nothing.
 *
 ***************************************************************************/

void hbs_history_save ( string hostname, mtce_hbs_cluster_history_type & sample )
{
    for ( int h = 0 ; h < ctrl.cluster.histories ; h++ )
    {
        if (( ctrl.cluster.history[h].controller ==  sample.controller ) &&
            ( ctrl.cluster.history[h].network == sample.network ))
        {
            memcpy( &ctrl.cluster.history[h], &sample,
                    sizeof(mtce_hbs_cluster_history_type));

            clog1 ("controller-%d updated vault with controller-%d:%s network history through %s (histories:%d)",
                   ctrl.this_controller,
                   sample.controller,
                   hbs_cluster_network_name((mtce_hbs_network_enum)sample.network).c_str(),
                   hostname.c_str(),
                   ctrl.cluster.histories);
            return ;
        }
    }

    /* not found ? Add a new one */
    memcpy( &ctrl.cluster.history[ctrl.cluster.histories], &sample,
            sizeof(mtce_hbs_cluster_history_type));
    ctrl.cluster.histories++ ;
    ctrl.cluster.bytes = BYTES_IN_CLUSTER_VAULT(ctrl.cluster.histories);

    ilog ("controller-%d added new controller-%d:%s history to vault ; now have %d network views",
              ctrl.this_controller,
              sample.controller,
              hbs_cluster_network_name((mtce_hbs_network_enum)sample.network).c_str(),
              ctrl.cluster.histories);
}

void hbs_state_audit ( void )
{
   hbs_cluster_dump ( ctrl.cluster, "Audit", true );
}


void hbs_cluster_log ( string & hostname, string prefix )
{
    hbs_cluster_log ( hostname, ctrl.cluster, prefix );
}

void hbs_cluster_log ( string & hostname,
                       string log_prefix,
                       bool force )
{
    hbs_cluster_log (hostname,  ctrl.cluster, log_prefix, force );
}

/****************************************************************************
 *
 * Active Active Heartbeating and Debug Member Functions
 *
 ***************************************************************************/

/****************************************************************************
 *
 * Name        : hbs_cluster_cmp
 *
 * Descrition  : Performs a sanity check over the cluster structure.
 *
 * Assumptions : Debug tool, not called at runtime.
 *
 * Returns     : PASS or FAIL
 *
 ***************************************************************************/

int hbs_cluster_cmp( hbs_message_type & msg )
{
    if ( msg.cluster.version < ctrl.cluster.version )
    {
        wlog ("Unexpected version (%d:%d)",
               msg.cluster.version, ctrl.cluster.version );
    }
    else if ( msg.cluster.revision != ctrl.cluster.revision )
    {
        wlog ("Unexpected revision (%d:%d)",
               msg.cluster.revision, ctrl.cluster.revision );
    }
    else if ( msg.cluster.magic_number != ctrl.cluster.magic_number )
    {
        wlog ("Unexpected magic number (%d:%d)",
               msg.cluster.magic_number, ctrl.cluster.magic_number );
    }
    else if ( msg.cluster.period_msec != ctrl.cluster.period_msec )
    {
        wlog ("Cluster Heartbeat period delta (%d:%d)",
               msg.cluster.period_msec, ctrl.cluster.period_msec );
    }
    else if ( msg.cluster.storage0_enabled != ctrl.cluster.storage0_enabled )
    {
        wlog ("Cluster storage0 enabled state delta (%d:%d)",
               msg.cluster.storage0_enabled, ctrl.cluster.storage0_enabled );
    }
    else
    {
        return (PASS);
    }
    return (FAIL);
}

/****************************************************************************
 *
 * Name        : hbs_cluster_save
 *
 * Descrition  : Copies the other controllers information from msg into
 *               the cluster.
 *
 * Returns     : PASS or FAIL
 *
 ***************************************************************************/

int hbs_cluster_save ( string & hostname,
                       mtce_hbs_network_enum network,
                       hbs_message_type & msg )
{
    /* cluster info is only supported in HBS_MESSAGE_VERSION 1 */
    if ( msg.v < HBS_MESSAGE_VERSION )
        return FAIL_NOT_SUPPORTED ;

    if ( ! ctrl.monitored_hosts )
        return RETRY ;

    if ( msg.cluster.histories == 0 )
        return PASS ;

    for ( int h = 0 ; h < msg.cluster.histories ; h++ )
    {
        if ( msg.cluster.history[h].network >= MTCE_HBS_MAX_NETWORKS )
        {
            elog ("Invalid network id (%d:%d:%d)",
                   h,
                   msg.cluster.history[h].controller,
                   msg.cluster.history[h].network );
        }
        else if ( msg.cluster.history[h].controller != ctrl.this_controller )
        {
            hbs_history_save ( hostname, msg.cluster.history[h] );
        }
        hbs_cluster_log( hostname, ctrl.cluster, hbs_cluster_network_name(network) );
    }
    return (PASS);
}