upstream/openstack/openstack-ras/openstack-ras/cgts-4061-cinder-volume-ser...

142 lines
4.5 KiB
Diff

--- a/ocf/cinder-volume
+++ b/ocf/cinder-volume
@@ -221,10 +221,73 @@ cinder_volume_status() {
fi
}
+cinder_volume_get_service_status() {
+ source /etc/nova/openrc
+ python - <<'EOF'
+from __future__ import print_function
+
+from cinderclient import client as cinder_client
+import keyring
+from keystoneclient import session as keystone_session
+from keystoneclient.auth.identity import v3
+import os
+import sys
+
+DEFAULT_OS_VOLUME_API_VERSION = 2
+CINDER_CLIENT_TIMEOUT_SEC = 3
+
+def create_cinder_client():
+ password = keyring.get_password('CGCS', os.environ['OS_USERNAME'])
+ auth = v3.Password(
+ user_domain_name=os.environ['OS_USER_DOMAIN_NAME'],
+ username = os.environ['OS_USERNAME'],
+ password = password,
+ project_domain_name = os.environ['OS_PROJECT_DOMAIN_NAME'],
+ project_name = os.environ['OS_PROJECT_NAME'],
+ auth_url = os.environ['OS_AUTH_URL'])
+ session = keystone_session.Session(auth=auth)
+ return cinder_client.Client(
+ DEFAULT_OS_VOLUME_API_VERSION,
+ username = os.environ['OS_USERNAME'],
+ auth_url = os.environ['OS_AUTH_URL'],
+ region_name=os.environ['OS_REGION_NAME'],
+ session = session, timeout = CINDER_CLIENT_TIMEOUT_SEC)
+
+def service_is_up(s):
+ return s.state == 'up'
+
+def cinder_volume_service_status(cc):
+ services = cc.services.list(
+ host='controller',
+ binary='cinder-volume')
+ if not len(services):
+ return (False, False)
+ exists, is_up = (True, service_is_up(services[0]))
+ for s in services[1:]:
+ # attempt to merge statuses
+ if is_up != service_is_up(s):
+ raise Exception(('Found multiple cinder-volume '
+ 'services with different '
+ 'statuses: {}').format(
+ [s.to_dict() for s in services]))
+ return (exists, is_up)
+
+try:
+ status = cinder_volume_service_status(
+ create_cinder_client())
+ print(('exists={0[0]}\n'
+ 'is_up={0[1]}').format(status))
+except Exception as e:
+ print(str(e), file=sys.stderr)
+ sys.exit(1)
+EOF
+}
+
cinder_volume_monitor() {
local rc
local pid
local volume_amqp_check
+ local check_service_status=$1; shift
cinder_volume_status
rc=$?
@@ -279,6 +342,46 @@ cinder_volume_monitor() {
touch $VOLUME_FAIL_ON_AMQP_CHECK_FILE >> /dev/null 2>&1
+ if [ $check_service_status == "check-service-status" ]; then
+ local retries_left
+ local retry_interval
+
+ retries_left=3
+ retry_interval=3
+ while [ $retries_left -gt 0 ]; do
+ retries_left=`expr $retries_left - 1`
+ status=$(cinder_volume_get_service_status)
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_log err "Unable to get Cinder Volume status"
+ if [ $retries_left -gt 0 ]; then
+ sleep $retry_interval
+ continue
+ else
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ local exists
+ local is_up
+ eval $status
+
+ if [ "$exists" == "True" ] && [ "$is_up" == "False" ]; then
+ ocf_log err "Cinder Volume service status is down"
+ if [ $retries_left -gt 0 ]; then
+ sleep $retry_interval
+ continue
+ else
+ ocf_log info "Trigger Cinder Volume guru meditation report"
+ ocf_run kill -s USR2 $pid
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ break
+ done
+ fi
+
ocf_log debug "OpenStack Cinder Volume (cinder-volume) monitor succeeded"
return $OCF_SUCCESS
}
@@ -386,7 +489,7 @@ cinder_volume_stop() {
# SIGTERM didn't help either, try SIGKILL
ocf_log info "OpenStack Cinder Volume (cinder-volume) failed to stop after ${shutdown_timeout}s \
using SIGTERM. Trying SIGKILL ..."
- ocf_run kill -s KILL $pid
+ ocf_run kill -s KILL -$pid
fi
cinder_volume_confirm_stop
@@ -414,7 +517,7 @@ case "$1" in
start) cinder_volume_start;;
stop) cinder_volume_stop;;
status) cinder_volume_status;;
- monitor) cinder_volume_monitor;;
+ monitor) cinder_volume_monitor "check-service-status";;
validate-all) ;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;