288 lines
13 KiB
Python
288 lines
13 KiB
Python
#
|
||
# Copyright (c) 2020 Wind River Systems, Inc.
|
||
#
|
||
# SPDX-License-Identifier: Apache-2.0
|
||
#
|
||
|
||
import time
|
||
|
||
from pytest import fixture
|
||
|
||
from utils import cli
|
||
from utils.tis_log import LOG
|
||
from utils.clients.ssh import ControllerClient
|
||
from utils import table_parser
|
||
from consts.proj_vars import ProjVar
|
||
from consts.auth import Tenant
|
||
from consts.stx import SubcloudStatus, EventLogID
|
||
from consts.timeout import DCTimeout
|
||
from keywords import dc_helper, system_helper
|
||
|
||
# Set the level of stress you want to test
|
||
ALARMS_NO = 500
|
||
|
||
|
||
@fixture(scope="module")
|
||
def subcloud_to_test():
|
||
check_alarm_summary_match_subcloud(ProjVar.get_var('PRIMARY_SUBCLOUD'))
|
||
return ProjVar.get_var('PRIMARY_SUBCLOUD')
|
||
|
||
|
||
def check_alarm_summary_match_subcloud(subcloud, timeout=400):
|
||
LOG.info("Ensure alarm summary on SystemController with subcloud {}".format(subcloud))
|
||
subcloud_auth = Tenant.get('admin_platform', dc_region=subcloud)
|
||
central_auth = Tenant.get('admin_platform', dc_region='RegionOne')
|
||
|
||
severities = ["critical_alarms", "major_alarms", "minor_alarms", "warnings"]
|
||
central_alarms = subcloud_alarms = None
|
||
end_time = time.time() + timeout
|
||
while time.time() < end_time:
|
||
output_central = cli.dcmanager('alarm summary', auth_info=central_auth, fail_ok=False)[1]
|
||
output_sub = cli.fm("alarm-summary", auth_info=subcloud_auth, fail_ok=False)[1]
|
||
|
||
central_alarms = table_parser.get_multi_values(table_parser.table(output_central),
|
||
fields=severities, **{"NAME": subcloud})
|
||
subcloud_alarms = table_parser.get_multi_values(table_parser.table(output_sub), severities)
|
||
|
||
if central_alarms == subcloud_alarms:
|
||
LOG.info("'dcmanager alarm summary' output for {} matches 'fm alarm-summary' on "
|
||
"{}".format(subcloud, subcloud))
|
||
return
|
||
|
||
time.sleep(30)
|
||
|
||
assert central_alarms == subcloud_alarms, \
|
||
"'dcmanager alarm summary did not match 'fm alarm-summary' on {} " \
|
||
"within {}s".format(subcloud, timeout)
|
||
|
||
|
||
def alarm_summary_add_and_del(subcloud):
|
||
try:
|
||
# Test adding alarm on subcloud
|
||
ssh_client = ControllerClient.get_active_controller(name=subcloud)
|
||
LOG.info("Wait for alarm raised on subcloud {}".format(subcloud))
|
||
system_helper.wait_for_alarm(alarm_id=EventLogID.PROVIDER_NETWORK_FAILURE,
|
||
con_ssh=ssh_client)
|
||
LOG.tc_step("Ensure alarm summary match nn Central with subcloud: {}".format(subcloud))
|
||
check_alarm_summary_match_subcloud(subcloud)
|
||
|
||
# Test clearing alarm on subcloud
|
||
LOG.tc_step("Clear alarm on subcloud: {}".format(subcloud))
|
||
ssh_client.exec_cmd('fmClientCli -D host=testhost-0', fail_ok=False)
|
||
LOG.info("Wait for alarm clear on subcloud {}".format(subcloud))
|
||
system_helper.wait_for_alarm_gone(alarm_id=EventLogID.PROVIDER_NETWORK_FAILURE,
|
||
con_ssh=ssh_client)
|
||
check_alarm_summary_match_subcloud(subcloud)
|
||
finally:
|
||
ssh_client = ControllerClient.get_active_controller(name=subcloud)
|
||
LOG.info("Clear alarm on subcloud: {}".format(subcloud))
|
||
ssh_client.exec_cmd('fmClientCli -D host=testhost-0')
|
||
|
||
|
||
def add_routes_to_subcloud(subcloud, subcloud_table, fail_ok=False):
|
||
LOG.debug("Add routes back to subcloud: {}".format(subcloud))
|
||
ssh_client = ControllerClient.get_active_controller(name=subcloud)
|
||
for host_id in subcloud_table:
|
||
comm_args = table_parser.get_multi_values(subcloud_table[host_id],
|
||
["ifname", "network", "prefix", "gateway"])
|
||
command = "host-route-add {} {} {} {} {}".format(host_id, comm_args[0][0],
|
||
comm_args[1][0], comm_args[2][0],
|
||
comm_args[3][0])
|
||
code, output = cli.system("host-route-list {}".format(host_id))
|
||
uuid_list = table_parser.get_values(table_parser.table(output), "uuid")
|
||
if table_parser.get_values(subcloud_table[host_id], "uuid")[0] not in uuid_list:
|
||
cli.system(command, ssh_client=ssh_client, fail_ok=fail_ok)
|
||
|
||
|
||
def test_dc_alarm_aggregation_managed(subcloud_to_test):
|
||
"""
|
||
Test Alarm Aggregation on Distributed Cloud
|
||
Args:
|
||
subcloud_to_test (str): module fixture
|
||
|
||
Setups:
|
||
- Make sure there is consistency between alarm summary on
|
||
Central Cloud and on subclouds
|
||
|
||
Test Steps:
|
||
- Raise an alarm at subcloud;
|
||
- Ensure relative alarm raised on subcloud
|
||
- Ensure system alarm-summary on subcloud matches dcmanager alarm summary on system
|
||
- Clean alarm at subcloud
|
||
- Ensure relative alarm cleared on subcloud
|
||
- Ensure system alarm-summary on subcloud matches dcmanager alarm summary on system
|
||
"""
|
||
|
||
ssh_client = ControllerClient.get_active_controller(name=subcloud_to_test)
|
||
LOG.tc_step("Raise alarm on subcloud: {}".format(subcloud_to_test))
|
||
ssh_client.exec_cmd(
|
||
"fmClientCli -c \"### ###300.005###clear###system.vm###host=testhost-0"
|
||
"### ###critical### ###processing-error###cpu-cycles-limit-exceeded### ###"
|
||
"True###True###'\"", fail_ok=False)
|
||
|
||
alarm_summary_add_and_del(subcloud_to_test)
|
||
|
||
|
||
def test_dc_fault_scenario(subcloud_to_test):
|
||
"""
|
||
Test Fault Scenario on Distributed Cloud
|
||
Args:
|
||
subcloud_to_test (str): module fixture
|
||
|
||
Setup:
|
||
- Make sure there is consistency between alarm summary on
|
||
Central Cloud and on subclouds
|
||
|
||
Test Steps:
|
||
- Make subcloud offline (e. g. delete route)
|
||
Step1:
|
||
- Ensure suncloud shows offline
|
||
Step2:
|
||
- Raise alarm on subcloud
|
||
- Ensure relative alarm raised on subcloud,
|
||
- Ensure system alarm-summary on subcloud has changed
|
||
- Ensure dcmanager alarm summary on system controller has no change
|
||
Step3:
|
||
- Resume connectivity to subcloud (e. g. add route back)
|
||
- Ensure suncloud shows online and in-sync
|
||
- Ensure system alarm-summary on subcloud matches dcmanager alarm summary on system
|
||
controller
|
||
Step4:
|
||
- Clean alarm on subcloud
|
||
- Ensure relative alarm cleared on subcloud
|
||
- Ensure system alarm-summary on subcloud matches dcmanager alarm summary on system
|
||
controller
|
||
"""
|
||
ssh_central = ControllerClient.get_active_controller(name="RegionOne")
|
||
ssh_subcloud = ControllerClient.get_active_controller(name=subcloud_to_test)
|
||
subcloud_table = {}
|
||
try:
|
||
code, output = cli.dcmanager("subcloud show {}".format(subcloud_to_test),
|
||
ssh_client=ssh_central)
|
||
gateway = table_parser.get_value_two_col_table(table_parser.table(output),
|
||
"management_gateway_ip")
|
||
code, hosts_raw = cli.system("host-list", ssh_client=ssh_subcloud)
|
||
hosts_id = table_parser.get_values(table_parser.table(hosts_raw), 'id')
|
||
for host_id in hosts_id:
|
||
code, route_raw = cli.system("host-route-list {}".format(host_id),
|
||
ssh_client=ssh_subcloud)
|
||
route_table = table_parser.filter_table(table_parser.table(route_raw),
|
||
**{'gateway': gateway})
|
||
subcloud_table[host_id] = route_table
|
||
|
||
LOG.tc_step("Delete route for subcloud: {} and wait for it to go offline.".format(
|
||
subcloud_to_test))
|
||
ssh_subcloud = ControllerClient.get_active_controller(name=subcloud_to_test)
|
||
for host_id in subcloud_table:
|
||
command = "host-route-delete {}".format(table_parser.get_values(
|
||
subcloud_table[host_id], "uuid")[0])
|
||
cli.system(command, ssh_client=ssh_subcloud)
|
||
|
||
dc_helper.wait_for_subcloud_status(subcloud_to_test,
|
||
avail=SubcloudStatus.AVAIL_OFFLINE,
|
||
timeout=DCTimeout.SYNC, con_ssh=ssh_central)
|
||
|
||
LOG.tc_step("Raise alarm on subcloud: {}".format(subcloud_to_test))
|
||
ssh_subcloud = ControllerClient.get_active_controller(name=subcloud_to_test)
|
||
code_sub_before, output_sub_before = cli.fm("alarm-summary", ssh_client=ssh_subcloud)
|
||
code_central_before, output_central_before = cli.dcmanager('alarm summary')
|
||
ssh_subcloud.exec_cmd(
|
||
"fmClientCli -c \"### ###300.005###clear###system.vm###host="
|
||
"testhost-0### ###critical### ###processing-error###cpu-cycles-limit-exceeded"
|
||
"### ###True###True###'\"", fail_ok=False)
|
||
LOG.info("Ensure relative alarm was raised at subcloud: {}".format(subcloud_to_test))
|
||
system_helper.wait_for_alarm(alarm_id=EventLogID.PROVIDER_NETWORK_FAILURE,
|
||
con_ssh=ssh_subcloud)
|
||
code_sub_after, output_sub_after = cli.fm("alarm-summary", ssh_client=ssh_subcloud)
|
||
code_central_after, output_central_after = cli.dcmanager('alarm summary')
|
||
LOG.info("Ensure fm alarm summary on subcloud: {} has changed but dcmanager alarm"
|
||
"summary has not changed".format(subcloud_to_test))
|
||
assert output_central_before == output_central_after and output_sub_before != \
|
||
output_sub_after
|
||
|
||
add_routes_to_subcloud(subcloud_to_test, subcloud_table)
|
||
|
||
dc_helper.wait_for_subcloud_status(subcloud_to_test, avail=SubcloudStatus.AVAIL_ONLINE,
|
||
sync=SubcloudStatus.SYNCED, timeout=DCTimeout.SYNC,
|
||
con_ssh=ssh_central)
|
||
alarm_summary_add_and_del(subcloud_to_test)
|
||
|
||
finally:
|
||
cli.dcmanager("subcloud show {}".format(subcloud_to_test),
|
||
ssh_client=ssh_central, fail_ok=True)
|
||
add_routes_to_subcloud(subcloud_to_test, subcloud_table, fail_ok=True)
|
||
LOG.info("Clear alarm on subcloud: {}".format(subcloud_to_test))
|
||
ssh_subcloud.exec_cmd('fmClientCli -D host=testhost-0')
|
||
check_alarm_summary_match_subcloud(subcloud=subcloud_to_test)
|
||
|
||
|
||
def test_dc_stress_alarm(subcloud_to_test):
|
||
"""
|
||
Test Stress Scenario on Distributed Cloud
|
||
Args:
|
||
subcloud_to_test (str): module fixture
|
||
|
||
Setup:
|
||
- Make sure there is consistency between alarm summary on
|
||
Central Cloud and on subclouds
|
||
|
||
Test Steps:
|
||
Step1:
|
||
- Trigger large amount of alarms, quickly on one subcloud
|
||
- ensure system alarm-summary on subcloud matches dcmanager alarm summary on system
|
||
controller
|
||
Step2:
|
||
- Trigger large amount of alarms quickly for a long time on all subclouds
|
||
- Each alarm summary updates once every 30 seconds until the event is over
|
||
- Ensure system alarm-summary on subcloud matches dcmanager alarm summary on system
|
||
controller
|
||
Step3:
|
||
- Clear all alarms
|
||
- Ensure system alarm-summary on subcloud matches dcmanager alarm summary on system
|
||
controller
|
||
"""
|
||
ssh_client = ControllerClient.get_active_controller(name=subcloud_to_test)
|
||
|
||
# Step 1
|
||
LOG.tc_step("Trigger large amount of alarms, quickly on one subcloud")
|
||
try:
|
||
for i in range(1, ALARMS_NO + 1):
|
||
ssh_client.exec_cmd(
|
||
"fmClientCli -c \"### ###300.005###clear###system.vm###host="
|
||
"testhost-{}### ###critical### ###processing-error###cpu-cycles-limit-exceeded"
|
||
"### ###True###True###'\"".format(i), fail_ok=False)
|
||
finally:
|
||
for i in range(1, ALARMS_NO + 1):
|
||
ssh_client.exec_cmd('fmClientCli -D host=testhost-{}'.format(i))
|
||
|
||
check_alarm_summary_match_subcloud(subcloud_to_test)
|
||
|
||
# Step 2
|
||
ssh_client_list = {}
|
||
for subcloud in dc_helper.get_subclouds(mgmt='managed'):
|
||
ssh_client_list[subcloud] = ControllerClient.get_active_controller(name=subcloud_to_test)
|
||
|
||
try:
|
||
LOG.tc_step("Trigger large amount of alarms quickly for a long time on all subclouds")
|
||
for subcloud in ssh_client_list:
|
||
subcloud_ssh = ssh_client_list[subcloud]
|
||
for i in range(1, ALARMS_NO + 1):
|
||
subcloud_ssh.exec_cmd(
|
||
"fmClientCli -c \"### ###300.005###clear###"
|
||
"system.vm###host=testhost-{}### ###critical### ###processing-error###"
|
||
"cpu-cycles-limit-exceeded### ###True###True###'\"".format(i),
|
||
fail_ok=False)
|
||
|
||
for subcloud in ssh_client_list:
|
||
check_alarm_summary_match_subcloud(subcloud)
|
||
finally:
|
||
# Step 3
|
||
LOG.tc_step("Clear all alarms on all subclouds")
|
||
for subcloud in ssh_client_list:
|
||
subcloud_ssh = ssh_client_list[subcloud]
|
||
for i in range(1, ALARMS_NO + 1):
|
||
subcloud_ssh.exec_cmd('fmClientCli -D host=testhost-{}'.format(i))
|
||
|
||
for subcloud in ssh_client_list:
|
||
check_alarm_summary_match_subcloud(subcloud)
|