gplv2/cluster-resource-agents/cluster-resource-agents/lvm_vg_activation.patch

156 lines
4.6 KiB
Diff

commit 69217b67c0d018f129c7cbf526aebf0b236be701
Author: Chris Friesen <chris.friesen@windriver.com>
Date: Thu Sep 17 15:26:16 2015 -0400
CGCS-2553/CGTS-2534: tweak LVM success criteria
It turns out that activating an LVM LV which has a snapshot (or activating
the snapshot) will take an amount of time that is proportional to the
delta between the snapshot and the original volume.
Because of this it's possible that running "vgchange" could take a long
time, since it also activates the LVs.
If this happens, rather than timeout the whole script we want to log which
LVs/snapshots havn't yet been activated, and then just continue on.
Accordingly, we want to set the internal timeout in the "start" operation
to something less than the timeout for the "start" action.
There will be corresponding changes in cinder to properly handle this case.
diff --git a/heartbeat/LVM b/heartbeat/LVM
index bd1a47a..24b0244 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -186,6 +186,81 @@ LVM_monitor() {
}
#
+# Activate one volume explicitly.
+#
+activate_volume() {
+ ocf_run lvchange $1 /dev/${2}/$3
+ if [ $? -eq 0 ] ; then
+ ocf_log info "Succesfully activated $LV."
+ else
+ ocf_log err "Problem activating $LV."
+ fi
+}
+
+#
+# Kick off parallel activation of all volumes
+#
+activate_all_volumes() {
+ VG=$1
+ shift
+ lvchange_args="$*"
+
+ # Get the list of volumes, without the first line which is column headings.
+ VOLS=`lvs $VG |tail -n +2`
+
+ while read -r LINE; do
+ # Convert the line into an array.
+ LINE_ARRAY=($LINE)
+
+ # First array element is the volume/snapshot name.
+ LV=${LINE_ARRAY[0]}
+
+ # Third array element is the attributes.
+ ATTR=${LINE_ARRAY[2]}
+
+ # Fifth character in the attributes is "a" if it's active.
+ ACTIVE=${ATTR:4:1}
+ if [ "$ACTIVE" == "a" ]; then
+ ocf_log info "$LV is already active."
+ continue
+ fi
+
+ SNAPSHOT_ORIGIN=${LINE_ARRAY[4]}
+ if [ "$SNAPSHOT_ORIGIN" != "" ] ; then
+ # If this is a snapshot, don't activate it.
+ continue
+ fi
+
+ ( activate_volume "$*" $VG $LV ) &
+ done <<< "$VOLS"
+}
+
+#
+# Scan for inactive volumes and log any that are found.
+#
+log_inactive_volumes() {
+ # Get the list of volumes, without the first line which is column headings.
+ VOLS=`lvs $1 |tail -n +2`
+
+ while read -r LINE; do
+ # Convert the line into an array.
+ LINE_ARRAY=($LINE)
+
+ # First array element is the volume/snapshot name.
+ LV=${LINE_ARRAY[0]}
+
+ # Third array element is the attributes.
+ ATTR=${LINE_ARRAY[2]}
+
+ # Fifth character in the attributes is "a" if it's active.
+ ACTIVE=${ATTR:4:1}
+ if [ "$ACTIVE" != "a" ]; then
+ ocf_log err "Volume $LV is not active after expiry of timeout."
+ fi
+ done <<< "$VOLS"
+}
+
+#
# Enable LVM volume
#
LVM_start() {
@@ -218,7 +293,47 @@ LVM_start() {
vgchange_options="$vgchange_options --monitor y"
fi
- ocf_run vgchange $vgchange_options $1 || return $OCF_ERR_GENERIC
+ # Kick off activation of all volumes. If it doesn't complete within
+ # the timeout period, then we'll log the not-yet-activated volumes and
+ # continue on.
+ (ocf_run vgchange $vgchange_options $1) & PID=$!
+
+ # Check every second for up to TIMEOUT seconds whether the vgchange has
+ # completed.
+ TIMEOUT=300
+ TIMED_OUT=true
+ SECONDS=0;
+ PARALLEL_ACTIVATE_DELAY=10
+ PARALLEL_ACTIVATE_DONE=false
+ while [ $SECONDS -lt $TIMEOUT ] ; do
+ kill -0 $PID &> /dev/null
+ if [ $? -eq 1 ] ; then
+ # process with pid of $PID doesn't exist, vgchange command completed
+ TIMED_OUT=false
+ break
+ fi
+ if [ $SECONDS -ge $PARALLEL_ACTIVATE_DELAY ] && \
+ [ "$PARALLEL_ACTIVATE_DONE" != true ] && \
+ [ "$1" == "cinder-volumes" ] ; then
+ # This will kick off parallel activation of all LVs in the VG.
+ # The delay is to ensure the VG is activated first.
+ PARALLEL_ACTIVATE_DONE=true
+ ocf_log info Explicitly activating all volumes in $1 with: $vgchange_options
+ activate_all_volumes $1 $vgchange_options
+ fi
+ sleep 1
+ done
+
+ if [ "$TIMED_OUT" = true ] ; then
+ ocf_log err "Timed out running ocf_run vgchange $vgchange_options $1"
+ log_inactive_volumes $1
+ else
+ # Child process completed, get its status.
+ wait $PID
+ if [ $? -ne 0 ] ; then
+ return $OCF_ERR_GENERIC
+ fi
+ fi
if LVM_status $1; then
: OK Volume $1 activated just fine!