247 lines
7.5 KiB
Python
247 lines
7.5 KiB
Python
#!/usr/bin/python
|
|
#
|
|
# Copyright (c) 2019 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
#
|
|
# Wait for one or a group of OSDs to match one or a group of statuses
|
|
# as reported by "ceph osd tree".
|
|
#
|
|
# Examples:
|
|
# - wait for osd 0 to be up:
|
|
# osd-wait-status -o 0 -s up
|
|
#
|
|
# - wait for osd 0 and osd 1 to be up:
|
|
# osd-wait-status -o 0 1 -s up
|
|
#
|
|
# The amount of time spent waiting for OSDs to match a status can
|
|
# be limited by specifying:
|
|
#
|
|
# - the maximum retry count; the script will if the status doesn't
|
|
# match the desired one after more than retry count attempts.
|
|
# The interval between attempts is controlled by the "-i" flag.
|
|
# Example:
|
|
# osd-wait-status -o 0 -s up -c 2 -i 3
|
|
# will call "ceph osd tree" once to get the status of osd 0 and if
|
|
# it's not "up" then it will try one more time after 3 seconds.
|
|
#
|
|
# - a deadline as the maximum interval of time the script is looping
|
|
# waiting for OSDs to match status. The interval between attempts
|
|
# is controlled by the "-i" flag.
|
|
# Example:
|
|
# osd-wait-status -o 0 -s up -d 10 -i 3
|
|
# will call "ceph osd tree" until either osd 0 status is "up" or
|
|
# no more than 10 seconds have passed, that's 3-4 attempts depending
|
|
# on how much time it takes to run "ceph osd tree"
|
|
#
|
|
# Status match can be reversed by using "-n" flag.
|
|
# Example:
|
|
# osd-wait-status -o 0 -n -s up
|
|
# waits until osd 0 status is NOT up.
|
|
#
|
|
# osd-wait-status does not allow matching arbitrary combinations of
|
|
# OSDs and statuses. For example: "osd 0 up and osd 1 down" is not
|
|
# supported.
|
|
#
|
|
# Return code is 0 if OSDs match expected status before the
|
|
# retry count*interval / deadline limits are reached.
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import retrying
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
LOG = logging.getLogger('osd-wait-status')
|
|
|
|
CEPH_BINARY_PATH = '/usr/bin/ceph'
|
|
RETRY_INTERVAL_SEC = 1
|
|
RETRY_FOREVER = 0
|
|
NO_DEADLINE = 0
|
|
|
|
|
|
class OsdException(Exception):
|
|
def __init__(self, message, restartable=False):
|
|
super(OsdException, self).__init__(message)
|
|
self.restartable = restartable
|
|
|
|
|
|
def get_osd_tree():
|
|
command = [CEPH_BINARY_PATH,
|
|
'osd', 'tree', '--format', 'json']
|
|
try:
|
|
p = subprocess.Popen(command,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE)
|
|
output, error = p.communicate()
|
|
if p.returncode != 0:
|
|
raise OsdException(
|
|
('Command failed: command="{}", '
|
|
'returncode={}, output="{}"').format(
|
|
' '.join(command),
|
|
p.returncode,
|
|
output, error),
|
|
restartable=True)
|
|
except OSError as e:
|
|
raise OsdException(
|
|
('Command failed: command="{}", '
|
|
'reason="{}"').format(command, str(e)))
|
|
try:
|
|
return json.loads(output)
|
|
except ValueError as e:
|
|
raise OsdException(
|
|
('JSON decode failed: '
|
|
'data="{}", error="{}"').format(
|
|
output, e))
|
|
|
|
|
|
def osd_match_status(target_osd, target_status,
|
|
reverse_logic):
|
|
LOG.info(('Match status: '
|
|
'target_osd={}, '
|
|
'target status={}, '
|
|
'reverse_logic={}').format(
|
|
target_osd, target_status, reverse_logic))
|
|
tree = get_osd_tree()
|
|
osd_status = {}
|
|
for node in tree.get('nodes'):
|
|
name = node.get('name')
|
|
if name in target_osd:
|
|
osd_status[name] = node.get('status')
|
|
if len(osd_status) == len(target_osd):
|
|
break
|
|
LOG.info('Current OSD(s) status: {}'.format(osd_status))
|
|
for name in target_osd:
|
|
if name not in osd_status:
|
|
raise OsdException(
|
|
('Unable to retrieve status '
|
|
'for "{}"').format(
|
|
name))
|
|
if reverse_logic:
|
|
if osd_status[name] not in target_status:
|
|
del osd_status[name]
|
|
else:
|
|
if osd_status[name] in target_status:
|
|
del osd_status[name]
|
|
if len(osd_status) == 0:
|
|
LOG.info('OSD(s) status target reached.')
|
|
return True
|
|
else:
|
|
LOG.info('OSD(s) {}matching status {}: {}'.format(
|
|
'' if reverse_logic else 'not ',
|
|
target_status,
|
|
osd_status.keys()))
|
|
return False
|
|
|
|
|
|
def osd_wait_status(target_osd, target_status,
|
|
reverse_logic,
|
|
retry_count, retry_interval,
|
|
deadline):
|
|
|
|
def retry_if_false(result):
|
|
return (result is False)
|
|
|
|
def retry_if_restartable(exception):
|
|
return (isinstance(exception, OsdException)
|
|
and exception.restartable)
|
|
|
|
LOG.info(('Wait options: '
|
|
'target_osd={}, '
|
|
'target_status={}, '
|
|
'reverse_logic={}, '
|
|
'retry_count={}, '
|
|
'retry_interval={}, '
|
|
'deadline={}').format(
|
|
target_osd, target_status, reverse_logic,
|
|
retry_count, retry_interval, deadline))
|
|
kwargs = {
|
|
'retry_on_result': retry_if_false,
|
|
'retry_on_exception': retry_if_restartable}
|
|
if retry_count != RETRY_FOREVER:
|
|
kwargs['stop_max_attempt_number'] = retry_count
|
|
if deadline != NO_DEADLINE:
|
|
kwargs['stop_max_delay'] = deadline * 1000
|
|
if retry_interval != 0:
|
|
kwargs['wait_fixed'] = retry_interval * 1000
|
|
if not len(target_osd):
|
|
return
|
|
retrying.Retrying(**kwargs).call(
|
|
osd_match_status,
|
|
target_osd, target_status,
|
|
reverse_logic)
|
|
|
|
|
|
def non_negative_interger(value):
|
|
value = int(value)
|
|
if value < 0:
|
|
raise argparse.argumenttypeerror(
|
|
'{} is a negative integer value'.format(value))
|
|
return value
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description='Wait for OSD status match')
|
|
parser.add_argument(
|
|
'-o', '--osd',
|
|
nargs='*',
|
|
help='osd id',
|
|
type=non_negative_interger,
|
|
required=True)
|
|
parser.add_argument(
|
|
'-n', '--not',
|
|
dest='reverse_logic',
|
|
help='reverse logic: wait for status NOT to match',
|
|
action='store_true',
|
|
default=False)
|
|
parser.add_argument(
|
|
'-s', '--status',
|
|
nargs='+',
|
|
help='status',
|
|
type=str,
|
|
required=True)
|
|
parser.add_argument(
|
|
'-c', '--retry-count',
|
|
help='retry count',
|
|
type=non_negative_interger,
|
|
default=RETRY_FOREVER)
|
|
parser.add_argument(
|
|
'-i', '--retry-interval',
|
|
help='retry interval (seconds)',
|
|
type=non_negative_interger,
|
|
default=RETRY_INTERVAL_SEC)
|
|
parser.add_argument(
|
|
'-d', '--deadline',
|
|
help='deadline (seconds)',
|
|
type=non_negative_interger,
|
|
default=NO_DEADLINE)
|
|
args = parser.parse_args()
|
|
start = time.time()
|
|
try:
|
|
osd_wait_status(
|
|
['osd.{}'.format(o) for o in args.osd],
|
|
args.status,
|
|
args.reverse_logic,
|
|
args.retry_count,
|
|
args.retry_interval,
|
|
args.deadline)
|
|
LOG.info('Elapsed time: {:.02f} seconds'.format(
|
|
time.time() - start))
|
|
sys.exit(0)
|
|
except retrying.RetryError as e:
|
|
LOG.warn(
|
|
('Retry error: {}. '
|
|
'Elapsed time: {:.02f} seconds'.format(
|
|
e, time.time() - start)))
|
|
except OsdException as e:
|
|
LOG.warn(
|
|
('OSD wait error: {}. '
|
|
'Elapsed time: {:.02f} seconds').format(
|
|
e, time.time() - start))
|
|
sys.exit(1)
|