test/automated-pytest-suite/testcases/performance/test_detect_failed_controll...

91 lines
3.8 KiB
Python
Executable File

###
#
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
#
# Performance test to check the time untill detection of failed controller
###
import datetime
from pytest import mark
from consts.timeout import HostTimeout
from consts.stx import HostAvailState
from keywords import system_helper, host_helper, kube_helper
from utils.clients import ssh
from utils.tis_log import LOG
@mark.robotperformance
def test_detect_failed_controller(no_simplex):
con_ssh = ssh.ControllerClient.get_active_controller()
active_controller, controller_host = system_helper.get_active_standby_controllers()
controller_su_prompt = r'.*controller\-([0-9]){1,}\:/home/sysadmin#'
cmd_get_offset = ("ntpq -p | grep {} -A1 | "
"tail -1 | awk '{{print$8}}'".format(active_controller))
cmd_magic_keys_enable = ("echo 1 > /proc/sys/kernel/sysrq")
cmd_get_start_date = ("python -c \"import datetime; "
"print str(datetime.datetime.now())[:-3]\"")
cmd_get_end_date = ("cat /var/log/mtcAgent.log | "
"grep --color=never \"{} MNFA new candidate\" | "
"tail -1 | awk '{{print$1}}'".format(controller_host))
cmd_get_recovered_date = ("cat /var/log/mtcAgent.log | "
"grep --color=never '{} unlocked-enabled-available' | "
"tail -1 | awk '{{print$1}}'".format(controller_host))
cmd_trigger_reboot = ("echo b > /proc/sysrq-trigger")
res = list()
rec_res = list()
for i in range(20):
LOG.tc_step("Start of iter {}".format(i))
st = str()
offset = float()
with host_helper.ssh_to_host(controller_host) as node_ssh:
offset = float(node_ssh.exec_cmd(cmd=cmd_get_offset, get_exit_code=False)[1])/1000
node_ssh.send_sudo(cmd="su")
node_ssh.expect(controller_su_prompt)
node_ssh.send_sudo(cmd=cmd_magic_keys_enable)
node_ssh.expect(controller_su_prompt)
st = node_ssh.exec_cmd(cmd=cmd_get_start_date, get_exit_code=False,
blob=controller_su_prompt)[1]
node_ssh.exec_sudo_cmd(cmd_trigger_reboot, get_exit_code=False)
system_helper.wait_for_hosts_states(controller_host, check_interval=20,
availability=HostAvailState.AVAILABLE)
pods_health = kube_helper.wait_for_pods_healthy(check_interval=20,
timeout=HostTimeout.REBOOT)
assert pods_health is True, "Check PODs health has failed"
st_date = datetime.datetime.fromtimestamp(
datetime.datetime.strptime(st, '%Y-%m-%d %H:%M:%S.%f').timestamp() - offset)
et = con_ssh.exec_cmd(cmd=cmd_get_end_date, get_exit_code=False)[1]
et_date = datetime.datetime.strptime(et, '%Y-%m-%dT%H:%M:%S.%f')
er = con_ssh.exec_cmd(cmd=cmd_get_recovered_date, get_exit_code=False)[1]
er_date = datetime.datetime.strptime(er, '%Y-%m-%dT%H:%M:%S.%f')
diff = et_date - st_date
rec_diff = er_date - st_date
LOG.info(("\noffset = {}\n"
"start time = {}\n"
"end time = {}\n"
"recover time = {}".format(offset, st, et, er)))
LOG.info("\ndiff = {}".format(diff))
LOG.info("\nrecover diff = {}".format(rec_diff))
res.append(diff)
rec_res.append(rec_diff)
def calc_avg(lst):
rtrn_sum = datetime.timedelta()
for i in lst:
LOG.info("Iter {}: {}".format(lst.index(i), i))
rtrn_sum += i
return rtrn_sum/len(lst)
final_res = calc_avg(res)
final_rec_res = calc_avg(rec_res)
LOG.info("Avg time is : {}".format(final_res))
LOG.info("Avg rec time is : {}".format(final_rec_res))