gplv2/drbd-tools/centos/patches/0003-drbd_report_condition....

388 lines
12 KiB
Diff

---
scripts/drbd | 1
scripts/drbd.ocf | 259 ++++++++++++++++++++++---------------------------------
2 files changed, 109 insertions(+), 151 deletions(-)
--- a/scripts/drbd.ocf
+++ b/scripts/drbd.ocf
@@ -5,6 +5,8 @@
#
# Copyright (c) 2009 LINBIT HA-Solutions GmbH,
# Copyright (c) 2009 Florian Haas, Lars Ellenberg
+# Copyright (c) 2014 Wind River Systems, Inc. All rights reserved.
+#
# Based on the Heartbeat drbd OCF Resource Agent by Lars Marowsky-Bree
# (though it turned out to be an almost complete rewrite)
#
@@ -216,20 +218,6 @@ do_drbdadm() {
return $ret
}
-set_master_score() {
- if [ -x ${HA_SBIN_DIR}/crm_master ]; then
- # Use quiet mode (-Q) to quench logging. Actual score updates
- # will get logged by attrd anyway
- do_cmd ${HA_SBIN_DIR}/crm_master -Q -l reboot -v $1
- fi
-}
-
-remove_master_score() {
- if [ -x ${HA_SBIN_DIR}/crm_master ]; then
- do_cmd ${HA_SBIN_DIR}/crm_master -l reboot -D
- fi
-}
-
_sh_status_process() {
# _volume not present should not happen,
# but may help make this agent work even if it talks to drbd 8.3.
@@ -242,6 +230,7 @@ _sh_status_process() {
DRBD_DSTATE_LOCAL[$_volume]=${_disk:-Unconfigured}
DRBD_DSTATE_REMOTE[$_volume]=${_pdsk:-DUnknown}
}
+
drbd_set_status_variables() {
# drbdsetup sh-status prints these values to stdout,
# and then prints _sh_status_process.
@@ -322,119 +311,9 @@ maybe_outdate_self()
ocf_log notice "outdating $DRBD_RESOURCE: according to OCF_RESKEY_CRM_meta_notify_master_uname, '$host' is still master"
do_drbdadm outdate $DRBD_RESOURCE
- # on some pacemaker versions, -INFINITY may cause resource instance stop/start.
- # But in this case that is ok, it may even clear the replication link
- # problem.
- set_master_score -INFINITY
-
return 0
}
-drbd_update_master_score() {
- # NOTE
- # there may be constraint scores from rules on role=Master,
- # that in some ways can add to the node attribute based master score we
- # specify below. If you think you want to add personal preferences,
- # in case the scores given by this RA do not suffice, this is the
- # value space you can work with:
- # -INFINITY: Do not promote. Really. Won't work anyways.
- # Too bad, at least with current (Oktober 2009) Pacemaker,
- # negative master scores cause instance stop; restart cycle :(
- # missing, zero: Do not promote.
- # I think my data is not good enough.
- # Though, of course, you may try, and it might even work.
- # 5: please, do not promote, unless this is your only option.
- # 10: promotion is probably a bad idea, our local data is no good,
- # you'd probably run into severe performance problems, and risk
- # application crashes or blocking IO in case you lose the
- # replication connection.
- # 1000: Ok to be promoted, we have good data locally (though we don't
- # know about the peer, so possibly it has even better data?).
- # You sould use the crm-fence-peer.sh handler or similar
- # mechanism to avoid data divergence.
- # 10000: Please promote me/keep me Primary.
- # I'm confident that my data is as good as it gets.
- #
- # For multi volume, we need to compare who is "better" a bit more sophisticated.
- # The ${XXX[*]//UpToDate}, without being in double quotes, results in a single space,
- # if all are UpToDate.
- : == DEBUG == ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ ==
- case ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ in
- *Primary*/\ /*/)
- # I am Primary, all local disks are UpToDate
- set_master_score 10000
- ;;
- */\ /*DUnknown*/)
- # all local disks are UpToDate,
- # but I'm not Primary,
- # and I'm not sure about the peer's disk state(s).
- # We may need to outdate ourselves?
- # But if we outdate in a MONITOR, and are disconnected
- # secondary because of a hard primary crash, before CRM noticed
- # that there is no more master, we'd make us utterly useless!
- # Trust that the primary will also notice the disconnect,
- # and will place an appropriate fencing constraint via
- # its fence-peer handler callback.
- set_master_score 1000
- ;;
- */\ /*/)
- # We know something about our peer, which means that either the
- # replication link is established, or it was not even
- # consistent last time we talked to each other.
- # Also all our local disks are UpToDate, which means even if we are
- # currently synchronizing, we do so as SyncSource.
- set_master_score 10000
- ;;
-
- */*/\ /)
- # At least one of our local disks is not up to date.
- # But our peer is ALL OK.
- # We can expect to have access to useful
- # data, but must expect degraded performance.
- set_master_score 10
- ;;
- */*Attaching*/*/|\
- */*Negotiating*/*/)
- # some transitional state.
- # just don't do anything
- : ;;
-
- Unconfigured*|\
- */*Diskless*/*/|\
- */*Failed*/*/|\
- */*Inconsistent*/*/|\
- */*Outdated*/*/)
- # ALWAYS put the cluster in MAINTENANCE MODE
- # if you add a volume to a live replication group,
- # because the new volume will typically come up as Inconsistent
- # the first time, which would cause a monitor to revoke the
- # master score!
- #
- # At least some of our local disks are not really useable.
- # Our peer is not all good either (or some previous case block
- # would have matched). We have no access to useful data.
- # DRBD would refuse to be promoted, anyways.
- #
- # set_master_score -INFINITY
- # Too bad, at least with current (Oktober 2009) Pacemaker,
- # negative master scores cause instance stop; restart cycle :(
- # Hope that this will suffice.
- remove_master_score
- ;;
- *)
- # All local disks seem to be Consistent.
- # They _may_ be up to date, or not.
- # We hope that fencing mechanisms have put constraints in
- # place, so we won't be promoted with stale data.
- # But in case this was a cluster crash,
- # at least allow _someone_ to be promoted.
- set_master_score 5
- ;;
- esac
-
- return $OCF_SUCCESS
-}
-
is_drbd_enabled() {
test -f /proc/drbd
}
@@ -488,7 +367,103 @@ drbd_status() {
return $rc
}
-# I'm sorry, but there is no $OCF_DEGRADED_MASTER or similar yet.
+drbd_condition() {
+ local status
+ local rc
+
+ status=$1
+ rc=$status
+
+ if [ $status -ne $OCF_SUCCESS -a $status -ne $OCF_RUNNING_MASTER ]
+ then
+ return $rc
+ fi
+
+ drbd_set_status_variables
+
+ ocf_log info "${OCF_RESKEY_drbd_resource} ${DRBD_ROLE_LOCAL}/${DRBD_DSTATE_LOCAL}/${DRBD_DSTATE_REMOTE} ${DRBD_CSTATE}"
+
+ case "${DRBD_DSTATE_LOCAL}" in
+ UpToDate)
+ case "${DRBD_CSTATE}" in
+ StandAlone)
+ rc=$OCF_DATA_STANDALONE
+ ocf_log info "${OCF_RESKEY_drbd_resource} standalone, attempting to reconnect."
+ do_drbdadm connect ${OCF_RESKEY_drbd_resource}
+ ;;
+ StartingSyncT | WFBitMapT | WFSyncUUID | SyncTarget | \
+ PausedSyncT)
+ rc=$OCF_DATA_SYNC
+ #drbd-overview | grep -A 1 drbd-cgcs | grep sync\'ed | cut -f2,3 -d' '
+ ocf_log info "${OCF_RESKEY_drbd_resource} syncing"
+ ;;
+ *)
+ ;;
+ esac
+ ;;
+ Consistent)
+ case "${DRBD_CSTATE}" in
+ StandAlone)
+ rc=$OCF_DATA_STANDALONE
+ ocf_log info "${OCF_RESKEY_drbd_resource} standalone, attempting to reconnect"
+ do_drbdadm connect ${OCF_RESKEY_drbd_resource}
+ ;;
+ *)
+ rc=$OCF_DATA_CONSISTENT
+ ocf_log info "${OCF_RESKEY_drbd_resource} consistent"
+ ;;
+ esac
+ ;;
+ Outdated)
+ rc=$OCF_DATA_OUTDATED
+ ocf_log info "${OCF_RESKEY_drbd_resource} outdated"
+ ;;
+ *)
+ case "${DRBD_CSTATE}" in
+ StandAlone)
+ rc=$OCF_DATA_STANDALONE
+ ocf_log info "${OCF_RESKEY_drbd_resource} standalone"
+ ;;
+ StartingSyncT | WFBitMapT | WFSyncUUID | SyncTarget | \
+ PausedSyncT)
+ rc=$OCF_DATA_SYNC
+ ocf_log info "${OCF_RESKEY_drbd_resource} sync"
+ ;;
+ *)
+ rc=$OCF_DATA_INCONSISTENT
+ ocf_log info "${OCF_RESKEY_drbd_resource} inconsistent"
+ ;;
+ esac
+ ;;
+ esac
+
+ if [ $status -eq $OCF_RUNNING_MASTER ]
+ then
+ if [ $rc -eq $OCF_DATA_INCONSISTENT ]
+ then
+ rc=$OCF_RUNNING_MASTER_DATA_INCONSISTENT
+
+ elif [ $rc -eq $OCF_DATA_OUTDATED ]
+ then
+ rc=$OCF_RUNNING_MASTER_DATA_OUTDATED
+
+ elif [ $rc -eq $OCF_DATA_CONSISTENT ]
+ then
+ rc=$OCF_RUNNING_MASTER_DATA_CONSISTENT
+
+ elif [ $rc -eq $OCF_DATA_SYNC ]
+ then
+ rc=$OCF_RUNNING_MASTER_DATA_SYNC
+
+ elif [ $rc -eq $OCF_DATA_STANDALONE ]
+ then
+ rc=$OCF_RUNNING_MASTER_DATA_STANDALONE
+ fi
+ fi
+
+ return $rc
+}
+
drbd_monitor() {
local status
@@ -501,7 +476,8 @@ drbd_monitor() {
drbd_status
status=$?
- drbd_update_master_score
+ drbd_condition $status
+ status=$?
return $status
}
@@ -578,7 +554,8 @@ drbd_start() {
# "running" already, anyways, right?
figure_out_drbd_peer_uname
do_drbdadm $DRBD_TO_PEER adjust $DRBD_RESOURCE
- rc=$OCF_SUCCESS
+ drbd_condition $OCF_SUCCESS
+ rc=$?
break
;;
$OCF_NOT_RUNNING)
@@ -606,9 +583,6 @@ drbd_start() {
$first_try || sleep 1
first_try=false
done
- # in case someone does not configure monitor,
- # we must at least call it once after start.
- drbd_update_master_score
return $rc
}
@@ -642,7 +616,8 @@ drbd_promote() {
break
;;
$OCF_RUNNING_MASTER)
- rc=$OCF_SUCCESS
+ drbd_condition $OCF_SUCCESS
+ rc=$?
break
esac
$first_try || sleep 1
@@ -666,7 +641,8 @@ drbd_demote() {
status=$?
case "$status" in
$OCF_SUCCESS)
- rc=$OCF_SUCCESS
+ drbd_condition $OCF_SUCCESS
+ rc=$?
break
;;
$OCF_NOT_RUNNING)
@@ -718,14 +694,9 @@ drbd_stop() {
# outdate myself in drbd on-disk meta data.
maybe_outdate_self
- # do not let old master scores laying around.
- # they may confuse crm if this node was set to standby.
- remove_master_score
-
return $rc
}
-
drbd_notify() {
local n_type=$OCF_RESKEY_CRM_meta_notify_type
local n_op=$OCF_RESKEY_CRM_meta_notify_operation
@@ -760,7 +731,6 @@ drbd_notify() {
# After something has been done is a good time to
# recheck our status:
drbd_set_status_variables
- drbd_update_master_score
: == DEBUG == ${DRBD_DSTATE_REMOTE[*]} ==
case ${DRBD_DSTATE_REMOTE[*]} in
@@ -793,17 +763,6 @@ ls_stat_is_block_maj_147() {
[[ $1 = b* ]] && [[ $5 == 147,* ]]
}
-check_crm_feature_set()
-{
- set -- ${OCF_RESKEY_crm_feature_set//[!0-9]/ }
- local a=${1:-0} b=${2:-0} c=${3:-0}
-
- (( a > 3 )) ||
- (( a == 3 && b > 0 )) ||
- (( a == 3 && b == 0 && c > 0 )) ||
- ocf_log warn "You may be disappointed: This RA is intended for pacemaker 1.0 or better!"
-}
-
drbd_validate_all () {
DRBDADM="drbdadm"
DRBDSETUP="drbdsetup"
@@ -821,7 +780,6 @@ drbd_validate_all () {
if (( $DRBDADM_VERSION_CODE >= 0x080400 )); then
DRBD_HAS_MULTI_VOLUME=true
fi
- check_crm_feature_set
# Check clone and M/S options.
meta_expect clone-max -le 2
@@ -890,7 +848,6 @@ drbd_validate_all () {
# hm. probably misconfigured constraint somewhere.
# sorry. don't retry anywhere.
ocf_log err "DRBD resource ${DRBD_RESOURCE} not found in configuration file ${OCF_RESKEY_drbdconf}."
- remove_master_score
return $OCF_ERR_INSTALLED
fi
fi
--- a/scripts/drbd
+++ b/scripts/drbd
@@ -4,6 +4,7 @@
# description: Loads and unloads the drbd module
#
# Copyright 2001-2010 LINBIT
+# Copyright (c) 2014 Wind River Systems, Inc. All rights reserved.
#
# Philipp Reisner, Lars Ellenberg
#