--- scripts/drbd | 1 scripts/drbd.ocf | 259 ++++++++++++++++++++++--------------------------------- 2 files changed, 109 insertions(+), 151 deletions(-) --- a/scripts/drbd.ocf +++ b/scripts/drbd.ocf @@ -5,6 +5,8 @@ # # Copyright (c) 2009 LINBIT HA-Solutions GmbH, # Copyright (c) 2009 Florian Haas, Lars Ellenberg +# Copyright (c) 2014 Wind River Systems, Inc. All rights reserved. +# # Based on the Heartbeat drbd OCF Resource Agent by Lars Marowsky-Bree # (though it turned out to be an almost complete rewrite) # @@ -216,20 +218,6 @@ do_drbdadm() { return $ret } -set_master_score() { - if [ -x ${HA_SBIN_DIR}/crm_master ]; then - # Use quiet mode (-Q) to quench logging. Actual score updates - # will get logged by attrd anyway - do_cmd ${HA_SBIN_DIR}/crm_master -Q -l reboot -v $1 - fi -} - -remove_master_score() { - if [ -x ${HA_SBIN_DIR}/crm_master ]; then - do_cmd ${HA_SBIN_DIR}/crm_master -l reboot -D - fi -} - _sh_status_process() { # _volume not present should not happen, # but may help make this agent work even if it talks to drbd 8.3. @@ -242,6 +230,7 @@ _sh_status_process() { DRBD_DSTATE_LOCAL[$_volume]=${_disk:-Unconfigured} DRBD_DSTATE_REMOTE[$_volume]=${_pdsk:-DUnknown} } + drbd_set_status_variables() { # drbdsetup sh-status prints these values to stdout, # and then prints _sh_status_process. @@ -322,119 +311,9 @@ maybe_outdate_self() ocf_log notice "outdating $DRBD_RESOURCE: according to OCF_RESKEY_CRM_meta_notify_master_uname, '$host' is still master" do_drbdadm outdate $DRBD_RESOURCE - # on some pacemaker versions, -INFINITY may cause resource instance stop/start. - # But in this case that is ok, it may even clear the replication link - # problem. - set_master_score -INFINITY - return 0 } -drbd_update_master_score() { - # NOTE - # there may be constraint scores from rules on role=Master, - # that in some ways can add to the node attribute based master score we - # specify below. If you think you want to add personal preferences, - # in case the scores given by this RA do not suffice, this is the - # value space you can work with: - # -INFINITY: Do not promote. Really. Won't work anyways. - # Too bad, at least with current (Oktober 2009) Pacemaker, - # negative master scores cause instance stop; restart cycle :( - # missing, zero: Do not promote. - # I think my data is not good enough. - # Though, of course, you may try, and it might even work. - # 5: please, do not promote, unless this is your only option. - # 10: promotion is probably a bad idea, our local data is no good, - # you'd probably run into severe performance problems, and risk - # application crashes or blocking IO in case you lose the - # replication connection. - # 1000: Ok to be promoted, we have good data locally (though we don't - # know about the peer, so possibly it has even better data?). - # You sould use the crm-fence-peer.sh handler or similar - # mechanism to avoid data divergence. - # 10000: Please promote me/keep me Primary. - # I'm confident that my data is as good as it gets. - # - # For multi volume, we need to compare who is "better" a bit more sophisticated. - # The ${XXX[*]//UpToDate}, without being in double quotes, results in a single space, - # if all are UpToDate. - : == DEBUG == ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ == - case ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ in - *Primary*/\ /*/) - # I am Primary, all local disks are UpToDate - set_master_score 10000 - ;; - */\ /*DUnknown*/) - # all local disks are UpToDate, - # but I'm not Primary, - # and I'm not sure about the peer's disk state(s). - # We may need to outdate ourselves? - # But if we outdate in a MONITOR, and are disconnected - # secondary because of a hard primary crash, before CRM noticed - # that there is no more master, we'd make us utterly useless! - # Trust that the primary will also notice the disconnect, - # and will place an appropriate fencing constraint via - # its fence-peer handler callback. - set_master_score 1000 - ;; - */\ /*/) - # We know something about our peer, which means that either the - # replication link is established, or it was not even - # consistent last time we talked to each other. - # Also all our local disks are UpToDate, which means even if we are - # currently synchronizing, we do so as SyncSource. - set_master_score 10000 - ;; - - */*/\ /) - # At least one of our local disks is not up to date. - # But our peer is ALL OK. - # We can expect to have access to useful - # data, but must expect degraded performance. - set_master_score 10 - ;; - */*Attaching*/*/|\ - */*Negotiating*/*/) - # some transitional state. - # just don't do anything - : ;; - - Unconfigured*|\ - */*Diskless*/*/|\ - */*Failed*/*/|\ - */*Inconsistent*/*/|\ - */*Outdated*/*/) - # ALWAYS put the cluster in MAINTENANCE MODE - # if you add a volume to a live replication group, - # because the new volume will typically come up as Inconsistent - # the first time, which would cause a monitor to revoke the - # master score! - # - # At least some of our local disks are not really useable. - # Our peer is not all good either (or some previous case block - # would have matched). We have no access to useful data. - # DRBD would refuse to be promoted, anyways. - # - # set_master_score -INFINITY - # Too bad, at least with current (Oktober 2009) Pacemaker, - # negative master scores cause instance stop; restart cycle :( - # Hope that this will suffice. - remove_master_score - ;; - *) - # All local disks seem to be Consistent. - # They _may_ be up to date, or not. - # We hope that fencing mechanisms have put constraints in - # place, so we won't be promoted with stale data. - # But in case this was a cluster crash, - # at least allow _someone_ to be promoted. - set_master_score 5 - ;; - esac - - return $OCF_SUCCESS -} - is_drbd_enabled() { test -f /proc/drbd } @@ -488,7 +367,103 @@ drbd_status() { return $rc } -# I'm sorry, but there is no $OCF_DEGRADED_MASTER or similar yet. +drbd_condition() { + local status + local rc + + status=$1 + rc=$status + + if [ $status -ne $OCF_SUCCESS -a $status -ne $OCF_RUNNING_MASTER ] + then + return $rc + fi + + drbd_set_status_variables + + ocf_log info "${OCF_RESKEY_drbd_resource} ${DRBD_ROLE_LOCAL}/${DRBD_DSTATE_LOCAL}/${DRBD_DSTATE_REMOTE} ${DRBD_CSTATE}" + + case "${DRBD_DSTATE_LOCAL}" in + UpToDate) + case "${DRBD_CSTATE}" in + StandAlone) + rc=$OCF_DATA_STANDALONE + ocf_log info "${OCF_RESKEY_drbd_resource} standalone, attempting to reconnect." + do_drbdadm connect ${OCF_RESKEY_drbd_resource} + ;; + StartingSyncT | WFBitMapT | WFSyncUUID | SyncTarget | \ + PausedSyncT) + rc=$OCF_DATA_SYNC + #drbd-overview | grep -A 1 drbd-cgcs | grep sync\'ed | cut -f2,3 -d' ' + ocf_log info "${OCF_RESKEY_drbd_resource} syncing" + ;; + *) + ;; + esac + ;; + Consistent) + case "${DRBD_CSTATE}" in + StandAlone) + rc=$OCF_DATA_STANDALONE + ocf_log info "${OCF_RESKEY_drbd_resource} standalone, attempting to reconnect" + do_drbdadm connect ${OCF_RESKEY_drbd_resource} + ;; + *) + rc=$OCF_DATA_CONSISTENT + ocf_log info "${OCF_RESKEY_drbd_resource} consistent" + ;; + esac + ;; + Outdated) + rc=$OCF_DATA_OUTDATED + ocf_log info "${OCF_RESKEY_drbd_resource} outdated" + ;; + *) + case "${DRBD_CSTATE}" in + StandAlone) + rc=$OCF_DATA_STANDALONE + ocf_log info "${OCF_RESKEY_drbd_resource} standalone" + ;; + StartingSyncT | WFBitMapT | WFSyncUUID | SyncTarget | \ + PausedSyncT) + rc=$OCF_DATA_SYNC + ocf_log info "${OCF_RESKEY_drbd_resource} sync" + ;; + *) + rc=$OCF_DATA_INCONSISTENT + ocf_log info "${OCF_RESKEY_drbd_resource} inconsistent" + ;; + esac + ;; + esac + + if [ $status -eq $OCF_RUNNING_MASTER ] + then + if [ $rc -eq $OCF_DATA_INCONSISTENT ] + then + rc=$OCF_RUNNING_MASTER_DATA_INCONSISTENT + + elif [ $rc -eq $OCF_DATA_OUTDATED ] + then + rc=$OCF_RUNNING_MASTER_DATA_OUTDATED + + elif [ $rc -eq $OCF_DATA_CONSISTENT ] + then + rc=$OCF_RUNNING_MASTER_DATA_CONSISTENT + + elif [ $rc -eq $OCF_DATA_SYNC ] + then + rc=$OCF_RUNNING_MASTER_DATA_SYNC + + elif [ $rc -eq $OCF_DATA_STANDALONE ] + then + rc=$OCF_RUNNING_MASTER_DATA_STANDALONE + fi + fi + + return $rc +} + drbd_monitor() { local status @@ -501,7 +476,8 @@ drbd_monitor() { drbd_status status=$? - drbd_update_master_score + drbd_condition $status + status=$? return $status } @@ -578,7 +554,8 @@ drbd_start() { # "running" already, anyways, right? figure_out_drbd_peer_uname do_drbdadm $DRBD_TO_PEER adjust $DRBD_RESOURCE - rc=$OCF_SUCCESS + drbd_condition $OCF_SUCCESS + rc=$? break ;; $OCF_NOT_RUNNING) @@ -606,9 +583,6 @@ drbd_start() { $first_try || sleep 1 first_try=false done - # in case someone does not configure monitor, - # we must at least call it once after start. - drbd_update_master_score return $rc } @@ -642,7 +616,8 @@ drbd_promote() { break ;; $OCF_RUNNING_MASTER) - rc=$OCF_SUCCESS + drbd_condition $OCF_SUCCESS + rc=$? break esac $first_try || sleep 1 @@ -666,7 +641,8 @@ drbd_demote() { status=$? case "$status" in $OCF_SUCCESS) - rc=$OCF_SUCCESS + drbd_condition $OCF_SUCCESS + rc=$? break ;; $OCF_NOT_RUNNING) @@ -718,14 +694,9 @@ drbd_stop() { # outdate myself in drbd on-disk meta data. maybe_outdate_self - # do not let old master scores laying around. - # they may confuse crm if this node was set to standby. - remove_master_score - return $rc } - drbd_notify() { local n_type=$OCF_RESKEY_CRM_meta_notify_type local n_op=$OCF_RESKEY_CRM_meta_notify_operation @@ -760,7 +731,6 @@ drbd_notify() { # After something has been done is a good time to # recheck our status: drbd_set_status_variables - drbd_update_master_score : == DEBUG == ${DRBD_DSTATE_REMOTE[*]} == case ${DRBD_DSTATE_REMOTE[*]} in @@ -793,17 +763,6 @@ ls_stat_is_block_maj_147() { [[ $1 = b* ]] && [[ $5 == 147,* ]] } -check_crm_feature_set() -{ - set -- ${OCF_RESKEY_crm_feature_set//[!0-9]/ } - local a=${1:-0} b=${2:-0} c=${3:-0} - - (( a > 3 )) || - (( a == 3 && b > 0 )) || - (( a == 3 && b == 0 && c > 0 )) || - ocf_log warn "You may be disappointed: This RA is intended for pacemaker 1.0 or better!" -} - drbd_validate_all () { DRBDADM="drbdadm" DRBDSETUP="drbdsetup" @@ -821,7 +780,6 @@ drbd_validate_all () { if (( $DRBDADM_VERSION_CODE >= 0x080400 )); then DRBD_HAS_MULTI_VOLUME=true fi - check_crm_feature_set # Check clone and M/S options. meta_expect clone-max -le 2 @@ -890,7 +848,6 @@ drbd_validate_all () { # hm. probably misconfigured constraint somewhere. # sorry. don't retry anywhere. ocf_log err "DRBD resource ${DRBD_RESOURCE} not found in configuration file ${OCF_RESKEY_drbdconf}." - remove_master_score return $OCF_ERR_INSTALLED fi fi --- a/scripts/drbd +++ b/scripts/drbd @@ -4,6 +4,7 @@ # description: Loads and unloads the drbd module # # Copyright 2001-2010 LINBIT +# Copyright (c) 2014 Wind River Systems, Inc. All rights reserved. # # Philipp Reisner, Lars Ellenberg #