Pytest: Add rook-ceph tests

add rook ceph deployment tests ceph status, openstack and ceph pods status. remove reapply rook ceph, reboot host status. create instance and check status. Signed-off-by: Yong Fu <fuyong@neusoft.com> Change-Id: I33e50cf347f75e6e9dfc1da767bffa0852242244
2021-06-23 15:48:23 +08:00 · 2021-06-23 15:48:23 +08:00 · 811cfd5abe
parent a68778f64d
commit 811cfd5abe
6 changed files with 355 additions and 6 deletions
--- a/.gitreview
+++ b/.gitreview
@ -2,4 +2,4 @@
 host=review.opendev.org
 port=29418
 project=starlingx/test.git
-defaultbranch=r/stx.4.0
+defaultbranch=devel
--- a/automated-pytest-suite/keywords/container_helper.py
+++ b/automated-pytest-suite/keywords/container_helper.py
@ -359,6 +359,29 @@ def apply_app(app_name, check_first=False, fail_ok=False, applied_timeout=300,
    return 0, msg


+def abort_app(app_name, fail_ok=False, applied_timeout=300,
+              con_ssh=None, auth_info=Tenant.get('admin_platform')):
+
+    code, output = cli.system('application-abort', app_name,
+                              ssh_client=con_ssh, fail_ok=fail_ok,
+                              auth_info=auth_info)
+
+    if code > 0:
+        return 1, output
+
+    res = wait_for_apps_status(apps=app_name, status=AppStatus.APPLY_FAILED,
+                               timeout=applied_timeout, con_ssh=con_ssh,
+                               auth_info=auth_info, fail_ok=fail_ok)[0]
+
+    if not res:
+        return 2, "{} failed to abort".format(app_name)
+
+    msg = '{} abort successfully'.format(app_name)
+    LOG.info(msg)
+    return 0, msg
+
+
+
 def delete_app(app_name, check_first=True, fail_ok=False, applied_timeout=300,
               con_ssh=None,
               auth_info=Tenant.get('admin_platform')):
--- a/automated-pytest-suite/keywords/host_helper.py
+++ b/automated-pytest-suite/keywords/host_helper.py
@ -315,7 +315,7 @@ def reboot_hosts(hostnames, timeout=HostTimeout.REBOOT, con_ssh=None,
                if check_hypervisor_up and computes:
                    res, hosts_hypervisordown = wait_for_hypervisors_up(
                        computes, fail_ok=fail_ok, con_ssh=con_ssh,
-                        timeout=HostTimeout.HYPERVISOR_UP, auth_info=auth_info)
+                        timeout=HostTimeout.HYPERVISOR_UP)
                    if not res:
                        err_msg = "Hosts not up in nova hypervisor-list: " \
                                  "{}".format(hosts_hypervisordown)
--- a/automated-pytest-suite/keywords/network_helper.py
+++ b/automated-pytest-suite/keywords/network_helper.py
@ -61,6 +61,43 @@ def get_ip_address_str(ip=None):
        return None


+def create_segmentation_range(name=None, shared=None, project=None, minimum=None, maximum=None,
+                              network_type=None, physical_network=None, fail_ok=False,
+                              auth_info=None, con_ssh=None):
+    if name is None:
+        name = common.get_unique_name(name_str='net')
+
+    args = name
+    if project is not None:
+        tenant_id = keystone_helper.get_projects(field='ID', name=project,
+                                                 con_ssh=con_ssh)[0]
+        args += ' --project ' + tenant_id
+
+    if shared is not None:
+        args += ' --share' if shared else ' --private'
+    if network_type:
+        args += ' --network-type ' + network_type
+    if physical_network:
+        args += ' --physical-network ' + physical_network
+    if minimum:
+        args += ' --minimum ' + str(minimum)
+    if minimum:
+        args += ' --maximum ' + str(maximum)
+
+    LOG.info("Creating network segmentation range: Args: {}".format(args))
+    code, output = cli.openstack('network segment range create', args, ssh_client=con_ssh,
+                                 fail_ok=fail_ok, auth_info=auth_info)
+    table_ = table_parser.table(output)
+    net_id = table_parser.get_value_two_col_table(table_, 'id')
+
+    if code == 1:
+        return 1, output
+
+    succ_msg = "Network segmentation range {} is successfully created".format(net_id)
+    LOG.info(succ_msg)
+    return 0, net_id
+
+
 def create_network(name=None, shared=None, project=None, network_type=None,
                   segmentation_id=None, qos=None,
                   physical_network=None, vlan_transparent=None,
@ -129,11 +166,11 @@ def create_network(name=None, shared=None, project=None, network_type=None,
            args += ' --tag ' + tag

    if segmentation_id:
-        args += ' --provider:segmentation_id ' + segmentation_id
+        args += ' --provider-segment ' + segmentation_id
    if network_type:
-        args += ' --provider:network_type ' + network_type
+        args += ' --provider-network-type ' + network_type
    if physical_network:
-        args += ' --provider:physical_network ' + physical_network
+        args += ' --provider-physical-network ' + physical_network
    if avail_zone:
        args += ' --availability-zone-hint ' + avail_zone
    if qos:
@ -4200,7 +4237,7 @@ def set_sfc_port_pair_group(group, port_pairs=None, name=None, description=None,
                                 "{}".format(val, actual_val)
                assert len(set(actual_val)) == len(
                    actual_val), "Duplicated item found in Port pairs field: " \
-                                 "{}". format(actual_val)
+                                 "{}".format(actual_val)
            else:
                assert not actual_val, "Port pair still exist in group {} " \
                                       "after setting to no: {}". \
--- a/automated-pytest-suite/testcases/functional/storage/rook-ceph/init.py
+++ b/automated-pytest-suite/testcases/functional/storage/rook-ceph/init.py
--- a/automated-pytest-suite/testcases/functional/storage/rook-ceph/test_rook_ceph.py
+++ b/automated-pytest-suite/testcases/functional/storage/rook-ceph/test_rook_ceph.py
@ -0,0 +1,289 @@
+import os
+import re
+import time
+
+from pytest import mark, fixture, skip
+
+from consts.auth import HostLinuxUser, Tenant
+from consts.reasons import SkipSysType
+from consts.stx import GuestImages, VMStatus, AppStatus
+from keywords import nova_helper, glance_helper, cinder_helper, system_helper, vm_helper, \
+    host_helper, container_helper, network_helper, kube_helper, storage_helper
+from testfixtures.recover_hosts import HostsToRecover
+from utils import cli, table_parser
+from utils.clients.ssh import ControllerClient
+from utils.tis_log import LOG
+
+ROOK_CEPH_APP_NAME = 'rook-ceph-apps'
+STX_OPENSTACK = 'stx-openstack'
+NETWORK_NAME = "network-rook"
+SUBNET_NAME = "subnet-rook"
+SUBNET_RANGE = "192.168.0.0/24"
+IP_VERSION = 4
+
+# Flavor, Image, Volume info
+cirros_params = {
+    "flavor_name": "f1.tinny",
+    "flavor_vcpus": 1,
+    "flavor_ram": 512,
+    "flavor_disk": 4,
+    "volume_name": "vol-rook",
+    "image_name": "img-rook",
+    "image_file": os.path.join(GuestImages.DEFAULT["image_dir"], "cirros-0.4.0-x86_64-disk.img"),
+    "disk_format": "qcow2"
+}
+
+
+@fixture(scope="module")
+def create_network_rook():
+    con_ssh = ControllerClient.get_active_controller()
+    auth_info = Tenant.get('admin')
+    table_ = table_parser.table(
+        cli.openstack('network segment range list', ssh_client=con_ssh, auth_info=auth_info)[1])
+    val = table_parser.get_values(table_, "Name")
+    if not val:
+        network_helper.create_segmentation_range(name='physnet0-a', shared=False, project='admin',
+                                                 minimum=400, maximum=499, network_type='vlan',
+                                                 physical_network='physnet0')
+        network_helper.create_segmentation_range(name='physnet0-b', shared=True, minimum=10,
+                                                 maximum=10, network_type='vlan',
+                                                 physical_network='physnet0')
+        network_helper.create_segmentation_range(name='physnet1-a', shared=False, project='admin',
+                                                 minimum=500, maximum=599, network_type='vlan',
+                                                 physical_network='physnet1')
+    net_id = network_helper.create_network(name=NETWORK_NAME, cleanup="module")[1]
+    subnet_id = network_helper.create_subnet(name=SUBNET_NAME, network=NETWORK_NAME,
+                                             subnet_range=SUBNET_RANGE, dhcp=True,
+                                             ip_version=IP_VERSION, cleanup="module")[1]
+    return net_id, subnet_id
+
+
+# Creating Flavor For Image
+@fixture(scope="module")
+def create_flavor_and_image():
+    fl_id = nova_helper.create_flavor(name=cirros_params['flavor_name'],
+                                      vcpus=cirros_params['flavor_vcpus'],
+                                      ram=cirros_params['flavor_ram'],
+                                      root_disk=cirros_params['flavor_disk'],
+                                      is_public=True, add_default_specs=False,
+                                      cleanup="module")[1]
+    im_id = glance_helper.create_image(name=cirros_params['image_name'],
+                                       source_image_file=cirros_params['image_file'],
+                                       disk_format=cirros_params['disk_format'],
+                                       cleanup="module")[1]
+    return {
+        "flavor": fl_id,
+        "image": im_id
+    }
+
+
+# Creating Volume For Instance
+@fixture(scope="module")
+def volume_from_instance(create_flavor_and_image):
+    vol_id = cinder_helper.create_volume(name=cirros_params['volume_name'], source_type='image',
+                                         source_id=create_flavor_and_image['image'],
+                                         size=cirros_params['flavor_disk'], cleanup="module")[1]
+    return vol_id
+
+
+#  Creating Instance
+@fixture(scope="module")
+def launch_instance(create_flavor_and_image, create_network_rook, volume_from_instance):
+    net_id_list = [{"net-id": create_network_rook[0]}]
+    host = system_helper.get_active_controller_name()
+    vm_id = vm_helper.boot_vm(name='vm-rook', flavor=create_flavor_and_image["flavor"],
+                              nics=net_id_list, source="volume", source_id=volume_from_instance,
+                              meta={'foo': 'bar'}, vm_host=host, cleanup="module")[1]
+    return vm_id
+
+
+# After remove rook-ceph, need to clean disk
+def clean_ceph_disk():
+    con_ssh = ControllerClient.get_active_controller()
+    cmd = "dmsetup info -C | grep ^ceph| awk '{{print $1}}'"
+    target_ = con_ssh.exec_sudo_cmd(cmd)[1]
+    LOG.info("ceph device name is {}".format(target_))
+    con_ssh.exec_sudo_cmd('dmsetup remove ' + target_)
+    con_ssh.exec_sudo_cmd('pvremove /dev/sdb --force --force -y')
+    con_ssh.exec_sudo_cmd('sgdisk -Z /dev/sdb')
+    if not system_helper.is_aio_simplex():
+        with host_helper.ssh_to_host('controller-1') as node_ssh:
+            cmd = "dmsetup info -C | grep ^ceph| awk '{{print $1}}'"
+            target_ = node_ssh.exec_sudo_cmd(cmd)[1]
+            LOG.info("ceph device name is {}".format(target_))
+            node_ssh.exec_sudo_cmd('dmsetup remove ' + target_)
+            node_ssh.exec_sudo_cmd('pvremove /dev/sdb --force --force -y')
+            node_ssh.exec_sudo_cmd('sgdisk -Z /dev/sdb')
+
+
+# reboot host and test necessary case
+def reboot_host(vm_id, host):
+    HostsToRecover.add(host)
+    host_helper.reboot_hosts(host)
+    host_helper.wait_for_hosts_ready(host)
+    system_helper.wait_for_services_enable()
+    vm_helper.wait_for_vm_status(vm_id, status=VMStatus.ACTIVE, fail_ok=False)
+
+
+def apply_openstack():
+    """
+        Upload helm_chart
+        Apply stx-openstack
+    """
+    # Do application upload stx-openstack.
+    app_dir = HostLinuxUser.get_home()
+    con_ssh = ControllerClient.get_active_controller()
+    tar_file = os.path.join(app_dir, re.search(r"(stx-openstack.*?.tgz)",
+                                               con_ssh.exec_cmd("ls ~")[1]).group(1))
+    LOG.info("tar_file is %s" % tar_file)
+    container_helper.upload_app(tar_file=tar_file, app_name=STX_OPENSTACK, con_ssh=con_ssh,
+                                uploaded_timeout=600)
+
+    # Do application apply stx-openstack.
+    LOG.info("Apply %s" % STX_OPENSTACK)
+    container_helper.apply_app(app_name=STX_OPENSTACK, applied_timeout=3600,
+                               check_interval=30, con_ssh=con_ssh)
+
+
+@mark.robotrook
+def test_remove_reapply_rook_ceph():
+    """
+        Verify after remove rook-ceph, reapply,
+        stx-openstack application need not deployed
+    """
+    LOG.info("stx-openstack must be not deployed")
+    flag = False
+    if container_helper.is_stx_openstack_deployed():
+        LOG.info("stx-openstack is deployed, need remove it")
+        container_helper.remove_app(app_name=STX_OPENSTACK)
+        flag = True
+    LOG.info("Remove application {}".format(ROOK_CEPH_APP_NAME))
+    container_helper.remove_app(app_name=ROOK_CEPH_APP_NAME)
+    clean_ceph_disk()
+    LOG.info("Reapply application {}".format(ROOK_CEPH_APP_NAME))
+    container_helper.apply_app(app_name=ROOK_CEPH_APP_NAME, applied_timeout=1200,
+                               check_interval=30)
+    time.sleep(120)
+    container_helper.wait_for_apps_status(apps=ROOK_CEPH_APP_NAME, status=AppStatus.APPLIED,
+                                          timeout=600, check_interval=30)
+    if flag:
+        apply_openstack()
+
+
+@mark.robotrook
+def test_apply_abort_reapply_rook_ceph():
+    """
+        Verify after reapply abort and remove rook-ceph, reapply
+        stx-openstack application need not deployed
+    """
+    LOG.info("stx-openstack must be not deployed")
+    if container_helper.is_stx_openstack_deployed():
+        LOG.info("stx-openstack is deployed, need remove it")
+        container_helper.remove_app(app_name=STX_OPENSTACK)
+    LOG.info("Apply application {}".format(ROOK_CEPH_APP_NAME))
+    cli.system('application-apply', ROOK_CEPH_APP_NAME)
+    container_helper.abort_app(ROOK_CEPH_APP_NAME)
+    code = container_helper.remove_app(app_name=ROOK_CEPH_APP_NAME, fail_ok=True)[0]
+    if code != 0:
+        # If remove failed,try remove again
+        LOG.info("remove %s failed, try remove again" % ROOK_CEPH_APP_NAME)
+        container_helper.remove_app(app_name=ROOK_CEPH_APP_NAME)
+    clean_ceph_disk()
+    LOG.info("Reapply application {}".format(ROOK_CEPH_APP_NAME))
+    container_helper.apply_app(app_name=ROOK_CEPH_APP_NAME, applied_timeout=1200,
+                               check_interval=30)
+    time.sleep(120)
+    container_helper.wait_for_apps_status(apps=ROOK_CEPH_APP_NAME, status=AppStatus.APPLIED,
+                                          timeout=600, check_interval=30)
+    apply_openstack()
+
+
+@mark.robotrook
+def test_rook_ceph_health_and_openstack_status(ceph_precheck):
+    """
+        Verify ceph health 'ceph -s' and server status
+    """
+    cinder_helper.get_volumes()
+    network_helper.get_networks()
+    nova_helper.get_flavors()
+    nova_helper.get_keypairs()
+    system_helper.get_services()
+
+
+@mark.robotrook
+def test_pod_status():
+    """
+        Verify the health of the pods: healthy, running or in completed state
+    """
+    application_status = container_helper.get_apps(application=STX_OPENSTACK)[0]
+    assert application_status == "applied", "stx-openstack status is not in state applied"
+    command_health = kube_helper.wait_for_pods_healthy(namespace='openstack')
+    assert command_health, "Check openstack PODs health has failed"
+
+    application_status = container_helper.get_apps(application=ROOK_CEPH_APP_NAME)[0]
+    assert application_status == "applied", "rook-ceph-apps status is not in state applied"
+    command_health = kube_helper.wait_for_pods_healthy(namespace="kube-system")
+    assert command_health, "Check kube-syste PODs health has failed"
+
+
+@mark.robotrook
+def test_ceph_osd_status():
+    """
+        Verify the OSD status is up
+    """
+    con_ssh = ControllerClient.get_active_controller()
+    tuple_ = con_ssh.exec_sudo_cmd('ceph osd ls')
+    for osd_id in tuple_[1].split('\n'):
+        assert storage_helper.is_osd_up(osd_id, con_ssh), "OSD status is down"
+
+
+@mark.robotrook
+def test_reboot_active_controller_system_status(launch_instance):
+    """
+        Verify reboot active controller
+    """
+    active_controller = system_helper.get_active_controller_name()
+    LOG.info('active_controller name is {}'.format(active_controller))
+    reboot_host(launch_instance, active_controller)
+    con_ssh = ControllerClient.get_active_controller()
+    out = con_ssh.exec_sudo_cmd('systemctl --failed')[1]
+    assert re.search('0 loaded units listed', out), out
+
+
+@mark.robotrook
+def test_swact_controllers(wait_for_con_drbd_sync_complete):
+    """
+        Verify swact active controller
+    """
+    if system_helper.is_aio_simplex():
+        skip("Simplex system detected")
+
+    if not wait_for_con_drbd_sync_complete:
+        skip(SkipSysType.LESS_THAN_TWO_CONTROLLERS)
+    active_controller, standby_controller = \
+        system_helper.get_active_standby_controllers()
+    assert standby_controller, "No standby controller available"
+
+    LOG.info("Start swact action")
+    try:
+        host_helper.swact_host('controller-0')
+        kube_helper.wait_for_nodes_ready(hosts=(active_controller, standby_controller))
+
+    finally:
+        active_controller = system_helper.get_active_controller_name()
+        if active_controller != "controller-0":
+            LOG.info("Start swact action")
+            host_helper.swact_host('controller-1')
+
+
+@mark.robotrook
+def test_reboot_standby_controller_system_status(no_simplex, launch_instance):
+    """
+        Verify reboot standby controller
+    """
+    standby_controller = system_helper.get_standby_controller_name()
+    LOG.info('standby_controller name is {}'.format(standby_controller))
+    reboot_host(launch_instance, standby_controller)
+    con_ssh = ControllerClient.get_active_controller()
+    out = con_ssh.exec_sudo_cmd('systemctl --failed')[1]
+    assert re.search('0 loaded units listed', out), out