--- a/ocf/cinder-volume +++ b/ocf/cinder-volume @@ -221,10 +221,73 @@ cinder_volume_status() { fi } +cinder_volume_get_service_status() { + source /etc/nova/openrc + python - <<'EOF' +from __future__ import print_function + +from cinderclient import client as cinder_client +import keyring +from keystoneclient import session as keystone_session +from keystoneclient.auth.identity import v3 +import os +import sys + +DEFAULT_OS_VOLUME_API_VERSION = 2 +CINDER_CLIENT_TIMEOUT_SEC = 3 + +def create_cinder_client(): + password = keyring.get_password('CGCS', os.environ['OS_USERNAME']) + auth = v3.Password( + user_domain_name=os.environ['OS_USER_DOMAIN_NAME'], + username = os.environ['OS_USERNAME'], + password = password, + project_domain_name = os.environ['OS_PROJECT_DOMAIN_NAME'], + project_name = os.environ['OS_PROJECT_NAME'], + auth_url = os.environ['OS_AUTH_URL']) + session = keystone_session.Session(auth=auth) + return cinder_client.Client( + DEFAULT_OS_VOLUME_API_VERSION, + username = os.environ['OS_USERNAME'], + auth_url = os.environ['OS_AUTH_URL'], + region_name=os.environ['OS_REGION_NAME'], + session = session, timeout = CINDER_CLIENT_TIMEOUT_SEC) + +def service_is_up(s): + return s.state == 'up' + +def cinder_volume_service_status(cc): + services = cc.services.list( + host='controller', + binary='cinder-volume') + if not len(services): + return (False, False) + exists, is_up = (True, service_is_up(services[0])) + for s in services[1:]: + # attempt to merge statuses + if is_up != service_is_up(s): + raise Exception(('Found multiple cinder-volume ' + 'services with different ' + 'statuses: {}').format( + [s.to_dict() for s in services])) + return (exists, is_up) + +try: + status = cinder_volume_service_status( + create_cinder_client()) + print(('exists={0[0]}\n' + 'is_up={0[1]}').format(status)) +except Exception as e: + print(str(e), file=sys.stderr) + sys.exit(1) +EOF +} + cinder_volume_monitor() { local rc local pid local volume_amqp_check + local check_service_status=$1; shift cinder_volume_status rc=$? @@ -279,6 +342,46 @@ cinder_volume_monitor() { touch $VOLUME_FAIL_ON_AMQP_CHECK_FILE >> /dev/null 2>&1 + if [ $check_service_status == "check-service-status" ]; then + local retries_left + local retry_interval + + retries_left=3 + retry_interval=3 + while [ $retries_left -gt 0 ]; do + retries_left=`expr $retries_left - 1` + status=$(cinder_volume_get_service_status) + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "Unable to get Cinder Volume status" + if [ $retries_left -gt 0 ]; then + sleep $retry_interval + continue + else + return $OCF_ERR_GENERIC + fi + fi + + local exists + local is_up + eval $status + + if [ "$exists" == "True" ] && [ "$is_up" == "False" ]; then + ocf_log err "Cinder Volume service status is down" + if [ $retries_left -gt 0 ]; then + sleep $retry_interval + continue + else + ocf_log info "Trigger Cinder Volume guru meditation report" + ocf_run kill -s USR2 $pid + return $OCF_ERR_GENERIC + fi + fi + + break + done + fi + ocf_log debug "OpenStack Cinder Volume (cinder-volume) monitor succeeded" return $OCF_SUCCESS } @@ -386,7 +489,7 @@ cinder_volume_stop() { # SIGTERM didn't help either, try SIGKILL ocf_log info "OpenStack Cinder Volume (cinder-volume) failed to stop after ${shutdown_timeout}s \ using SIGTERM. Trying SIGKILL ..." - ocf_run kill -s KILL $pid + ocf_run kill -s KILL -$pid fi cinder_volume_confirm_stop @@ -414,7 +517,7 @@ case "$1" in start) cinder_volume_start;; stop) cinder_volume_stop;; status) cinder_volume_status;; - monitor) cinder_volume_monitor;; + monitor) cinder_volume_monitor "check-service-status";; validate-all) ;; *) usage exit $OCF_ERR_UNIMPLEMENTED;;