Move STX specific files from stx-ceph to stx-integ

By moving STX specific files from stx-ceph to stx-integ, we
decouple STX code from the upstream ceph repo. When making
changes in those STX files, we don't need to make "pull
request" in stx-ceph repo any more.

Change-Id: Ifaaae452798561ddfa7557cf59b072535bec7687
Story: 2002844
Task: 28993
Signed-off-by: Wei Zhou <wei.zhou@windriver.com>
This commit is contained in:
Wei Zhou 2019-01-21 16:39:07 -05:00
parent e12b3a436f
commit ed8655fa77
13 changed files with 2718 additions and 4 deletions

View File

@ -1,4 +1,5 @@
SRC_DIR="$CGCS_BASE/git/ceph"
COPY_LIST="files/*"
TIS_BASE_SRCREV=3f07f7ff1a5c7bfa8d0de12c966594d5fb7cf4ec
TIS_PATCH_VER=GITREVCOUNT
BUILD_IS_BIG=40

View File

@ -1 +0,0 @@
../../../../git/ceph/ceph.spec

1884
ceph/ceph/centos/ceph.spec Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,282 @@
#!/bin/bash
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This script is a helper wrapper for pmon monitoring of ceph
# processes. The "/etc/init.d/ceph" script does not know if ceph is
# running on the node. For example when the node is locked, ceph
# processes are not running. In that case we do not want pmond to
# monitor these processes.
#
# The script "/etc/services.d/<node>/ceph.sh" will create the file
# "/var/run/.ceph_started" when ceph is running and remove it when
# is not.
#
# The script also extracts one or more ceph process names that are
# reported as 'not running' or 'dead' or 'failed' by '/etc/intit.d/ceph status'
# and writes the names to a text file: /tmp/ceph_status_failure.txt for
# pmond to access. The pmond adds the text to logs and alarms. Example of text
# samples written to file by this script are:
# 'osd.1'
# 'osd.1, osd.2'
# 'mon.storage-0'
# 'mon.storage-0, osd.2'
#
# Moreover, for processes that are reported as 'hung' by '/etc/intit.d/ceph status'
# the script will try increase their logging to 'debug' for a configurable interval.
# With logging increased it will outputs a few stack traces then, at the end of this
# interval, it dumps its stack core and kills it.
#
# Return values;
# zero - /etc/init.d/ceph returned success or ceph is not running on the node
# non-zero /etc/init.d/ceph returned a failure or invalid syntax
#
source /usr/bin/tsconfig
source /etc/platform/platform.conf
CEPH_SCRIPT="/etc/init.d/ceph"
CEPH_FILE="$VOLATILE_PATH/.ceph_started"
CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting"
CEPH_GET_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_status"
CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
BINDIR=/usr/bin
SBINDIR=/usr/sbin
LIBDIR=/usr/lib64/ceph
ETCDIR=/etc/ceph
source $LIBDIR/ceph_common.sh
LOG_PATH=/var/log/ceph
LOG_FILE=$LOG_PATH/ceph-process-states.log
LOG_LEVEL=NORMAL # DEBUG
verbose=0
DATA_PATH=$VOLATILE_PATH/ceph_hang # folder where we keep state information
mkdir -p $DATA_PATH # make sure folder exists
MONITORING_INTERVAL=15
TRACE_LOOP_INTERVAL=5
GET_STATUS_TIMEOUT=120
CEPH_STATUS_TIMEOUT=20
WAIT_FOR_CMD=1
RC=0
args=("$@")
if [ ! -z $ARGS ]; then
IFS=";" read -r -a new_args <<< "$ARGS"
args+=("${new_args[@]}")
fi
wait_for_status ()
{
timeout=$GET_STATUS_TIMEOUT # wait for status no more than $timeout seconds
while [ -f ${CEPH_GET_STATUS_FILE} ] && [ $timeout -gt 0 ]; do
sleep 1
let timeout-=1
done
if [ $timeout -eq 0 ]; then
wlog "-" "WARN" "Getting status takes more than ${GET_STATUS_TIMEOUT}s, continuing"
rm -f $CEPH_GET_STATUS_FILE
fi
}
start ()
{
if [ -f ${CEPH_FILE} ]; then
wait_for_status
${CEPH_SCRIPT} start $1
RC=$?
else
# Ceph is not running on this node, return success
exit 0
fi
}
stop ()
{
wait_for_status
${CEPH_SCRIPT} stop $1
}
restart ()
{
if [ -f ${CEPH_FILE} ]; then
wait_for_status
touch $CEPH_RESTARTING_FILE
${CEPH_SCRIPT} restart $1
rm -f $CEPH_RESTARTING_FILE
else
# Ceph is not running on this node, return success
exit 0
fi
}
log_and_restart_blocked_osds ()
{
# Log info about the blocked osd daemons and then restart it
local names=$1
for name in $names; do
wlog $name "INFO" "Restarting OSD with blocked operations"
${CEPH_SCRIPT} restart $name
done
}
log_and_kill_hung_procs ()
{
# Log info about the hung processes and then kill them; later on pmon will restart them
local names=$1
for name in $names; do
type=`echo $name | cut -c 1-3` # e.g. 'mon', if $item is 'mon1'
id=`echo $name | cut -c 4- | sed 's/^\\.//'`
get_conf run_dir "/var/run/ceph" "run dir"
get_conf pid_file "$run_dir/$type.$id.pid" "pid file"
pid=$(cat $pid_file)
wlog $name "INFO" "Dealing with hung process (pid:$pid)"
# monitoring interval
wlog $name "INFO" "Increasing log level"
execute_ceph_cmd ret $name "ceph daemon $name config set debug_$type 20/20"
monitoring=$MONITORING_INTERVAL
while [ $monitoring -gt 0 ]; do
if [ $(($monitoring % $TRACE_LOOP_INTERVAL)) -eq 0 ]; then
date=$(date "+%Y-%m-%d_%H-%M-%S")
log_file="$LOG_PATH/hang_trace_${name}_${pid}_${date}.log"
wlog $name "INFO" "Dumping stack trace to: $log_file"
$(pstack $pid >$log_file) &
fi
let monitoring-=1
sleep 1
done
wlog $name "INFO" "Trigger core dump"
kill -ABRT $pid &>/dev/null
rm -f $pid_file # process is dead, core dump is archiving, preparing for restart
# Wait for pending systemd core dumps
sleep 2 # hope systemd_coredump has started meanwhile
deadline=$(( $(date '+%s') + 300 ))
while [[ $(date '+%s') -lt "${deadline}" ]]; do
systemd_coredump_pid=$(pgrep -f "systemd-coredump.*${pid}.*ceph-${type}")
[[ -z "${systemd_coredump_pid}" ]] && break
wlog $name "INFO" "systemd-coredump ceph-${type} in progress: pid ${systemd_coredump_pid}"
sleep 2
done
kill -KILL $pid &>/dev/null
done
}
status ()
{
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
timeout $CEPH_STATUS_TIMEOUT ceph -s
if [ "$?" -ne 0 ]; then
# Ceph cluster is not accessible. Don't panic, controller swact
# may be in progress.
wlog "-" INFO "Ceph is down, ignoring OSD status."
exit 0
fi
fi
if [ -f ${CEPH_RESTARTING_FILE} ]; then
# Ceph is restarting, we don't report state changes on the first pass
rm -f ${CEPH_RESTARTING_FILE}
exit 0
fi
if [ -f ${CEPH_FILE} ]; then
# Make sure the script does not 'exit' between here and the 'rm -f' below
# or the checkpoint file will be left behind
touch -f ${CEPH_GET_STATUS_FILE}
result=`${CEPH_SCRIPT} status $1`
RC=$?
if [ "$RC" -ne 0 ]; then
erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
hung_procs=`echo "$result" | sort | uniq | awk ' /hung/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
blocked_ops_procs=`echo "$result" | sort | uniq | awk ' /blocked ops/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
invalid=0
host=`hostname`
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
# On 2 node configuration we have a floating monitor
host="controller"
fi
for i in $(echo $erred_procs $hung_procs); do
if [[ "$i" =~ osd.?[0-9]?[0-9]|mon.$host ]]; then
continue
else
invalid=1
fi
done
log_and_restart_blocked_osds $blocked_ops_procs
log_and_kill_hung_procs $hung_procs
hung_procs_text=""
for i in $(echo $hung_procs); do
hung_procs_text+="$i(process hung) "
done
rm -f $CEPH_STATUS_FAILURE_TEXT_FILE
if [ $invalid -eq 0 ]; then
text=""
for i in $erred_procs; do
text+="$i, "
done
for i in $hung_procs; do
text+="$i (process hang), "
done
echo "$text" | tr -d '\n' > $CEPH_STATUS_FAILURE_TEXT_FILE
else
echo "$host: '${CEPH_SCRIPT} status $1' result contains invalid process names: $erred_procs"
echo "Undetermined osd or monitor id" > $CEPH_STATUS_FAILURE_TEXT_FILE
fi
fi
rm -f ${CEPH_GET_STATUS_FILE}
if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
# SM needs exit code != 0 from 'status mon' argument of the init script on
# standby controller otherwise it thinks that the monitor is running and
# tries to stop it.
# '/etc/init.d/ceph status mon' checks the status of monitors configured in
# /etc/ceph/ceph.conf and if it should be running on current host.
# If it should not be running it just exits with code 0. This is what
# happens on the standby controller.
# When floating monitor is running on active controller /var/lib/ceph/mon of
# standby is not mounted (Ceph monitor partition is DRBD synced).
test -e "/var/lib/ceph/mon/ceph-controller"
if [ "$?" -ne 0 ]; then
exit 3
fi
fi
else
# Ceph is not running on this node, return success
exit 0
fi
}
case "${args[0]}" in
start)
start ${args[1]}
;;
stop)
stop ${args[1]}
;;
restart)
restart ${args[1]}
;;
status)
status ${args[1]}
;;
*)
echo "Usage: $0 {start|stop|restart|status} [{mon|osd|osd.<number>|mon.<hostname>}]"
exit 1
;;
esac
exit $RC

View File

@ -1,6 +1,6 @@
#!/usr/bin/python
#
# Copyright (c) 2016 Wind River Systems, Inc.
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -12,6 +12,7 @@ import re
import subprocess
import sys
DEVICE_NAME_NVME = "nvme"
#########
# Utils #
@ -85,7 +86,11 @@ def is_partitioning_correct(disk_path, partition_sizes):
partition_index = 1
for size in partition_sizes:
# Check that each partition size matches the one in input
partition_node = disk_node + str(partition_index)
if DEVICE_NAME_NVME in disk_node:
partition_node = '{}p{}'.format(disk_node, str(partition_index))
else:
partition_node = '{}{}'.format(disk_node, str(partition_index))
output, _, _ = command(["udevadm", "settle", "-E", partition_node])
cmd = ["parted", "-s", partition_node, "unit", "MiB", "print"]
output, _, _ = command(cmd)
@ -118,7 +123,7 @@ def create_partitions(disk_path, partition_sizes):
# GPT partitions on the storage node so nothing to remove in this case
links = []
if os.path.isdir(DISK_BY_PARTUUID):
links = [ os.path.join(DISK_BY_PARTUUID,l) for l in os.listdir(DISK_BY_PARTUUID)
links = [ os.path.join(DISK_BY_PARTUUID,l) for l in os.listdir(DISK_BY_PARTUUID)
if os.path.islink(os.path.join(DISK_BY_PARTUUID, l)) ]
# Erase all partitions on current node by creating a new GPT table

View File

@ -0,0 +1,18 @@
[Unit]
Description=radosgw RESTful rados gateway
After=network.target
#After=remote-fs.target nss-lookup.target network-online.target time-sync.target
#Wants=network-online.target
[Service]
Type=forking
Restart=no
KillMode=process
RemainAfterExit=yes
ExecStart=/etc/rc.d/init.d/ceph-radosgw start
ExecStop=/etc/rc.d/init.d/ceph-radosgw stop
ExecReload=/etc/rc.d/init.d/ceph-radosgw reload
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,92 @@
#!/bin/sh
### BEGIN INIT INFO
# Provides: ceph-rest-api
# Required-Start: $ceph
# Required-Stop: $ceph
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Ceph REST API daemon
# Description: Ceph REST API daemon
### END INIT INFO
DESC="ceph-rest-api"
DAEMON="/usr/bin/ceph-rest-api"
RUNDIR="/var/run/ceph"
PIDFILE="${RUNDIR}/ceph-rest-api.pid"
start()
{
if [ -e $PIDFILE ]; then
PIDDIR=/proc/$(cat $PIDFILE)
if [ -d ${PIDDIR} ]; then
echo "$DESC already running."
exit 0
else
echo "Removing stale PID file $PIDFILE"
rm -f $PIDFILE
fi
fi
echo -n "Starting $DESC..."
mkdir -p $RUNDIR
start-stop-daemon --start --quiet --background \
--pidfile ${PIDFILE} --make-pidfile --exec ${DAEMON}
if [ $? -eq 0 ]; then
echo "done."
else
echo "failed."
exit 1
fi
}
stop()
{
echo -n "Stopping $DESC..."
start-stop-daemon --stop --quiet --pidfile $PIDFILE
if [ $? -eq 0 ]; then
echo "done."
else
echo "failed."
fi
rm -f $PIDFILE
}
status()
{
pid=`cat $PIDFILE 2>/dev/null`
if [ -n "$pid" ]; then
if ps -p $pid &>/dev/null ; then
echo "$DESC is running"
exit 0
else
echo "$DESC is not running but has pid file"
exit 1
fi
fi
echo "$DESC is not running"
exit 3
}
case "$1" in
start)
start
;;
stop)
stop
;;
restart|force-reload|reload)
stop
start
;;
status)
status
;;
*)
echo "Usage: $0 {start|stop|force-reload|restart|reload|status}"
exit 1
;;
esac
exit 0

View File

@ -0,0 +1,16 @@
[Unit]
Description=Ceph REST API
After=network.target ceph.target
[Service]
Type=forking
Restart=no
KillMode=process
RemainAfterExit=yes
ExecStart=/etc/rc.d/init.d/ceph-rest-api start
ExecStop=/etc/rc.d/init.d/ceph-rest-api stop
ExecReload=/etc/rc.d/init.d/ceph-rest-api reload
[Install]
WantedBy=multi-user.target

50
ceph/ceph/files/ceph.conf Normal file
View File

@ -0,0 +1,50 @@
[global]
# Unique ID for the cluster.
fsid = %CLUSTER_UUID%
# Public network where the monitor is connected to, i.e, 128.224.0.0/16
#public network = 127.0.0.1/24
# For version 0.55 and beyond, you must explicitly enable
# or disable authentication with "auth" entries in [global].
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
osd_journal_size = 1024
# Uncomment the following line if you are mounting with ext4
# filestore xattr use omap = true
# Number of replicas of objects. Write an object 2 times.
# Cluster cannot reach an active + clean state until there's enough OSDs
# to handle the number of copies of an object. In this case, it requires
# at least 2 OSDs
osd_pool_default_size = 2
# Allow writing one copy in a degraded state.
osd_pool_default_min_size = 1
# Ensure you have a realistic number of placement groups. We recommend
# approximately 100 per OSD. E.g., total number of OSDs multiplied by 100
# divided by the number of replicas (i.e., osd pool default size). So for
# 2 OSDs and osd pool default size = 2, we'd recommend approximately
# (100 * 2) / 2 = 100.
osd_pool_default_pg_num = 64
osd_pool_default_pgp_num = 64
osd_crush_chooseleaf_type = 1
setuser match path = /var/lib/ceph/$type/$cluster-$id
# Override Jewel default of 2 reporters. StarlingX has replication factor 2
mon_osd_min_down_reporters = 1
# Use Hammer's report interval default value
osd_mon_report_interval_max = 120
[osd]
osd_mkfs_type = xfs
osd_mkfs_options_xfs = "-f"
osd_mount_options_xfs = "rw,noatime,inode64,logbufs=8,logbsize=256k"
[mon]
mon warn on legacy crush tunables = false
# Quiet new warnings on move to Hammer
mon pg warn max per osd = 2048
mon pg warn max object skew = 0

View File

@ -0,0 +1,26 @@
[process]
process = ceph
script = /etc/init.d/ceph-init-wrapper
style = lsb
severity = major ; minor, major, critical
restarts = 3 ; restart retries before error assertion
interval = 30 ; number of seconds to wait between restarts
mode = status ; Monitoring mode: passive (default) or active
; passive: process death monitoring (default: always)
; active : heartbeat monitoring, i.e. request / response messaging
; status : determine process health with executing "status" command
; "start" is used to start the process(es) again
; ignore : do not monitor or stop monitoring
; Status and Active Monitoring Options
period = 30 ; monitor period in seconds
timeout = 120 ; for active mode, messaging timeout period in seconds, must be shorter than period
; for status mode, max amount of time for a command to execute
; Status Monitoring Options
start_arg = start ; start argument for the script
status_arg = status ; status argument for the script
status_failure_text = /tmp/ceph_status_failure.txt ; text to be added to alarms or logs, this is optional

View File

@ -0,0 +1,16 @@
[Unit]
Description=StarlingX Ceph Startup
After=network.target
[Service]
Type=forking
Restart=no
KillMode=process
RemainAfterExit=yes
ExecStart=/etc/rc.d/init.d/ceph start
ExecStop=/etc/rc.d/init.d/ceph stop
PIDFile=/var/run/ceph/ceph.pid
[Install]
WantedBy=multi-user.target

77
ceph/ceph/files/ceph.sh Executable file
View File

@ -0,0 +1,77 @@
#!/bin/bash
INITDIR=/etc/init.d
LOGFILE=/var/log/ceph/ceph-init.log
CEPH_FILE=/var/run/.ceph_started
# Get our nodetype
. /etc/platform/platform.conf
# Exit immediately if ceph not configured (i.e. no mon in the config file)
if ! grep -q "mon\." /etc/ceph/ceph.conf
then
exit 0
fi
logecho ()
{
echo $1
date >> ${LOGFILE}
echo $1 >> ${LOGFILE}
}
start ()
{
if [[ "$nodetype" == "controller" ]] || [[ "$nodetype" == "storage" ]]; then
logecho "Starting ceph services..."
${INITDIR}/ceph start >> ${LOGFILE} 2>&1
RC=$?
if [ ! -f ${CEPH_FILE} ]; then
touch ${CEPH_FILE}
fi
else
logecho "No ceph services on ${nodetype} node"
exit 0
fi
}
stop ()
{
if [[ "$nodetype" == "controller" ]] || [[ "$nodetype" == "storage" ]]; then
if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" == "simplex" ]]; then
logecho "Ceph services will continue to run on node"
exit 0
fi
logecho "Stopping ceph services..."
if [ -f ${CEPH_FILE} ]; then
rm -f ${CEPH_FILE}
fi
${INITDIR}/ceph stop >> ${LOGFILE} 2>&1
RC=$?
else
logecho "No ceph services on ${nodetype} node"
exit 0
fi
}
RC=0
case "$1" in
start)
start
;;
stop)
stop
;;
*)
echo "Usage: $0 {start|stop}"
exit 1
;;
esac
logecho "RC was: $RC"
exit $RC

View File

@ -0,0 +1,246 @@
#!/usr/bin/python
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# Wait for one or a group of OSDs to match one or a group of statuses
# as reported by "ceph osd tree".
#
# Examples:
# - wait for osd 0 to be up:
# osd-wait-status -o 0 -s up
#
# - wait for osd 0 and osd 1 to be up:
# osd-wait-status -o 0 1 -s up
#
# The amount of time spent waiting for OSDs to match a status can
# be limited by specifying:
#
# - the maximum retry count; the script will if the status doesn't
# match the desired one after more than retry count attempts.
# The interval between attempts is controlled by the "-i" flag.
# Example:
# osd-wait-status -o 0 -s up -c 2 -i 3
# will call "ceph osd tree" once to get the status of osd 0 and if
# it's not "up" then it will try one more time after 3 seconds.
#
# - a deadline as the maximum interval of time the script is looping
# waiting for OSDs to match status. The interval between attempts
# is controlled by the "-i" flag.
# Example:
# osd-wait-status -o 0 -s up -d 10 -i 3
# will call "ceph osd tree" until either osd 0 status is "up" or
# no more than 10 seconds have passed, that's 3-4 attempts depending
# on how much time it takes to run "ceph osd tree"
#
# Status match can be reversed by using "-n" flag.
# Example:
# osd-wait-status -o 0 -n -s up
# waits until osd 0 status is NOT up.
#
# osd-wait-status does not allow matching arbitrary combinations of
# OSDs and statuses. For example: "osd 0 up and osd 1 down" is not
# supported.
#
# Return code is 0 if OSDs match expected status before the
# retry count*interval / deadline limits are reached.
import argparse
import json
import logging
import retrying
import subprocess
import sys
import time
logging.basicConfig(level=logging.DEBUG)
LOG = logging.getLogger('osd-wait-status')
CEPH_BINARY_PATH = '/usr/bin/ceph'
RETRY_INTERVAL_SEC = 1
RETRY_FOREVER = 0
NO_DEADLINE = 0
class OsdException(Exception):
def __init__(self, message, restartable=False):
super(OsdException, self).__init__(message)
self.restartable = restartable
def get_osd_tree():
command = [CEPH_BINARY_PATH,
'osd', 'tree', '--format', 'json']
try:
p = subprocess.Popen(command,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE)
output, error = p.communicate()
if p.returncode != 0:
raise OsdException(
('Command failed: command="{}", '
'returncode={}, output="{}"').format(
' '.join(command),
p.returncode,
output, error),
restartable=True)
except OSError as e:
raise OsdException(
('Command failed: command="{}", '
'reason="{}"').format(command, str(e)))
try:
return json.loads(output)
except ValueError as e:
raise OsdException(
('JSON decode failed: '
'data="{}", error="{}"').format(
output, e))
def osd_match_status(target_osd, target_status,
reverse_logic):
LOG.info(('Match status: '
'target_osd={}, '
'target status={}, '
'reverse_logic={}').format(
target_osd, target_status, reverse_logic))
tree = get_osd_tree()
osd_status = {}
for node in tree.get('nodes'):
name = node.get('name')
if name in target_osd:
osd_status[name] = node.get('status')
if len(osd_status) == len(target_osd):
break
LOG.info('Current OSD(s) status: {}'.format(osd_status))
for name in target_osd:
if name not in osd_status:
raise OsdException(
('Unable to retrieve status '
'for "{}"').format(
name))
if reverse_logic:
if osd_status[name] not in target_status:
del osd_status[name]
else:
if osd_status[name] in target_status:
del osd_status[name]
if len(osd_status) == 0:
LOG.info('OSD(s) status target reached.')
return True
else:
LOG.info('OSD(s) {}matching status {}: {}'.format(
'' if reverse_logic else 'not ',
target_status,
osd_status.keys()))
return False
def osd_wait_status(target_osd, target_status,
reverse_logic,
retry_count, retry_interval,
deadline):
def retry_if_false(result):
return (result is False)
def retry_if_restartable(exception):
return (isinstance(exception, OsdException)
and exception.restartable)
LOG.info(('Wait options: '
'target_osd={}, '
'target_status={}, '
'reverse_logic={}, '
'retry_count={}, '
'retry_interval={}, '
'deadline={}').format(
target_osd, target_status, reverse_logic,
retry_count, retry_interval, deadline))
kwargs = {
'retry_on_result': retry_if_false,
'retry_on_exception': retry_if_restartable}
if retry_count != RETRY_FOREVER:
kwargs['stop_max_attempt_number'] = retry_count
if deadline != NO_DEADLINE:
kwargs['stop_max_delay'] = deadline * 1000
if retry_interval != 0:
kwargs['wait_fixed'] = retry_interval * 1000
if not len(target_osd):
return
retrying.Retrying(**kwargs).call(
osd_match_status,
target_osd, target_status,
reverse_logic)
def non_negative_interger(value):
value = int(value)
if value < 0:
raise argparse.argumenttypeerror(
'{} is a negative integer value'.format(value))
return value
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Wait for OSD status match')
parser.add_argument(
'-o', '--osd',
nargs='*',
help='osd id',
type=non_negative_interger,
required=True)
parser.add_argument(
'-n', '--not',
dest='reverse_logic',
help='reverse logic: wait for status NOT to match',
action='store_true',
default=False)
parser.add_argument(
'-s', '--status',
nargs='+',
help='status',
type=str,
required=True)
parser.add_argument(
'-c', '--retry-count',
help='retry count',
type=non_negative_interger,
default=RETRY_FOREVER)
parser.add_argument(
'-i', '--retry-interval',
help='retry interval (seconds)',
type=non_negative_interger,
default=RETRY_INTERVAL_SEC)
parser.add_argument(
'-d', '--deadline',
help='deadline (seconds)',
type=non_negative_interger,
default=NO_DEADLINE)
args = parser.parse_args()
start = time.time()
try:
osd_wait_status(
['osd.{}'.format(o) for o in args.osd],
args.status,
args.reverse_logic,
args.retry_count,
args.retry_interval,
args.deadline)
LOG.info('Elapsed time: {:.02f} seconds'.format(
time.time() - start))
sys.exit(0)
except retrying.RetryError as e:
LOG.warn(
('Retry error: {}. '
'Elapsed time: {:.02f} seconds'.format(
e, time.time() - start)))
except OsdException as e:
LOG.warn(
('OSD wait error: {}. '
'Elapsed time: {:.02f} seconds').format(
e, time.time() - start))
sys.exit(1)

View File

@ -0,0 +1,2 @@
656b5b63ed7c43bd014bcafd81b001959d5f089f
v10.2.6