diff --git a/automated-pytest-suite/keywords/container_helper.py b/automated-pytest-suite/keywords/container_helper.py index 85ca581..a72556a 100644 --- a/automated-pytest-suite/keywords/container_helper.py +++ b/automated-pytest-suite/keywords/container_helper.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019, 2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -630,6 +630,32 @@ def tag_docker_image(source_image, target_name, source_tag=None, return 0, target_args +def remove_docker_images_with_pattern(pattern, con_ssh=None, timeout=300): + """ + Remove docker image(s) via docker image rm matching 'pattern' + Args: + pattern: + con_ssh: + timeout: + + Returns (tuple): + (0, ) + (1, ) + + """ + + LOG.info("Remove docker images matching pattern: {}".format(pattern)) + + args = " | grep " + pattern + " | awk '{print $3}' " + code, out = exec_docker_cmd("images", args, timeout=timeout, fail_ok=True, con_ssh=con_ssh) + + if out: + image_list = out.splitlines() + code, out = remove_docker_images(image_list, force=True, con_ssh=con_ssh) + + return code, out + + def remove_docker_images(images, force=False, con_ssh=None, timeout=300, fail_ok=False): """ diff --git a/automated-pytest-suite/keywords/system_helper.py b/automated-pytest-suite/keywords/system_helper.py index 7200e8d..57602b6 100644 --- a/automated-pytest-suite/keywords/system_helper.py +++ b/automated-pytest-suite/keywords/system_helper.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2019 Wind River Systems, Inc. +# Copyright (c) 2019, 2020 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -9,6 +9,7 @@ import ipaddress import re import os import time +import yaml from pytest import skip @@ -276,6 +277,44 @@ def get_hosts(personality=None, administrative=None, operational=None, return hostnames +def get_host_list_data(columns=None, con_ssh=None, + auth_info=Tenant.get('admin_platform'), source_rc=False): + """ + Args: + columns + con_ssh + auth_info + source_rc + + Returns (list of dict of hosts): + e.g., [{'administrative': 'unlocked', 'availability': 'available', 'hostname': 'controller-0', + 'id': 1, 'operational': 'enabled', 'personality': 'controller'}, + {'administrative': 'unlocked', 'availability': 'available', 'hostname': 'compute-1', + 'id': 2, 'operational': 'enabled', 'personality': 'worker'}, + {'administrative': 'unlocked', 'availability': 'available', 'hostname': 'compute-0', + 'id': 3, 'operational': 'enabled', 'personality': 'worker'}, + {'administrative': 'unlocked', 'availability': 'available', 'hostname': 'controller-1', + 'id': 4, 'operational': 'enabled', 'personality': 'controller'}, + ] + + """ + + args = "" + if columns: + for col in columns: + args += ' --column {}'.format(col) + args += " --format yaml" + + code, output = cli.system('host-list', args, ssh_client=con_ssh, + auth_info=auth_info, source_openrc=source_rc) + + if code == 0: + return yaml.safe_load(output) + else: + LOG.error("Error with CLI command") + return output + + def get_hosts_per_personality(availability=None, administrative=None, operational=None, con_ssh=None, auth_info=Tenant.get('admin_platform'), diff --git a/automated-pytest-suite/testcases/functional/stx_monitor/test_stx_monitor.py b/automated-pytest-suite/testcases/functional/stx_monitor/test_stx_monitor.py new file mode 100644 index 0000000..0f632f9 --- /dev/null +++ b/automated-pytest-suite/testcases/functional/stx_monitor/test_stx_monitor.py @@ -0,0 +1,473 @@ +# +# Copyright (c) 2020 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + + +import os +import json + +from pytest import fixture +from pytest import mark + +from utils.tis_log import LOG +from utils.clients.ssh import ControllerClient + +from keywords import container_helper +from keywords import host_helper +from keywords import kube_helper +from keywords import system_helper +from consts.stx import SysType +from consts.auth import HostLinuxUser + +from consts.auth import Tenant + +STX_MONITOR_TAR = 'stx-monitor.tgz' +STX_MONITOR_APP_NAME = 'stx-monitor' + +MONITOR_PORT = 31001 + +POD_NAME = 0 +POD_NODE = 1 + +MONITORING_HOSTS = ["controller", "compute"] + +STX_MONITOR_LABELS = ['elastic-client', 'elastic-controller', 'elastic-data', 'elastic-master'] + +CONTROLLER_LABELS = STX_MONITOR_LABELS +COMPUTE_LABELS = ['elastic-master'] +SUBCLOUD_CONTROLLER_LABELS = ['elastic-controller'] + +POD_RUNNING_ALL_HOSTS = 'all_hosts' +POD_RUNNING_ONE_INSTANCE = 'one_instance' + +POD_READY_STATE_ARGS = '--namespace=monitor --for=condition=Ready pods --timeout=30s --all ' \ + '--selector=app!=elasticsearch-curator' + +MON_METRICBEAT_DS = 'mon-metricbeat-YYYYY' +MON_METRICBEAT_LABEL = 'mon-metricbeat-LABEL' +MON_METRICBEAT_PATIAL_NAME = 'mon-metricbeat-' + +# This is a dictionary of labels and their corresponding pods names. Each pod +# can either run on all labeled hosts or on 1 instance on a labeled host. +# Daemon set pods run on all hosts and not correspond on a label. +PODS_LABEL_MATCHING_DICT = { + # 'daemon_set' is a custom label for automation only + 'daemon_set': { + 'mon-filebeat-': POD_RUNNING_ALL_HOSTS, + MON_METRICBEAT_DS: POD_RUNNING_ALL_HOSTS + }, + 'elastic-client': { + 'mon-elasticsearch-client-': POD_RUNNING_ALL_HOSTS, + }, + 'elastic-controller': { + # the curator is a transient pod so we will skip checking for it + # 'mon-elasticsearch-curator-': POD_RUNNING_ONE_INSTANCE, + 'mon-kibana-': POD_RUNNING_ONE_INSTANCE, + 'mon-kube-state-metrics-': POD_RUNNING_ONE_INSTANCE, + 'mon-logstash-': POD_RUNNING_ALL_HOSTS, + MON_METRICBEAT_LABEL: POD_RUNNING_ONE_INSTANCE, + 'mon-nginx-ingress-controller-': POD_RUNNING_ALL_HOSTS, + 'mon-nginx-ingress-default-backend-': POD_RUNNING_ONE_INSTANCE + }, + 'elastic-data': { + 'mon-elasticsearch-data-': POD_RUNNING_ALL_HOSTS + }, + 'elastic-master': { + 'mon-elasticsearch-master-': POD_RUNNING_ALL_HOSTS + } +} + +PODS_LABEL_MATCHING_SUBCLOUD_DICT = { + # 'daemon_set' is a custom label for automation only + 'daemon_set': { + 'mon-filebeat-': POD_RUNNING_ALL_HOSTS, + MON_METRICBEAT_DS: POD_RUNNING_ALL_HOSTS + }, + 'elastic-controller': { + # the curator is a transient pod so we will skip checking for it + # 'mon-elasticsearch-curator-': POD_RUNNING_ONE_INSTANCE, + 'mon-kube-state-metrics-': POD_RUNNING_ONE_INSTANCE, + 'mon-logstash-': POD_RUNNING_ALL_HOSTS, + MON_METRICBEAT_LABEL: POD_RUNNING_ONE_INSTANCE + } +} + + +def stx_monitor_file_exist(): + con_ssh = ControllerClient.get_active_controller() + home_dir = HostLinuxUser.get_home() + stx_mon_file = '{}/{}'.format(home_dir, STX_MONITOR_TAR) + + LOG.info("Check if file %s is present" % stx_mon_file) + + return con_ssh.file_exists(stx_mon_file) + + +@fixture() +def setup_app(request): + LOG.fixture_step("Setup: Clean up any pre-existing stx-monitor resources") + cleanup_app() + + def cleanup_after_test(): + LOG.fixture_step("Tear down: clean up any stx-monitor resources") + cleanup_app() + request.addfinalizer(cleanup_after_test) + + +def delete_images_from_host_registries(con_ssh=None, auth_info=Tenant.get('admin_platform')): + hosts = system_helper.get_hosts(con_ssh=con_ssh, auth_info=auth_info) + for host in hosts: + with host_helper.ssh_to_host(hostname=host, con_ssh=con_ssh) as host_ssh: + LOG.info("Delete {} images for host: {}".format(STX_MONITOR_APP_NAME, host)) + container_helper.remove_docker_images_with_pattern(pattern="elastic", con_ssh=host_ssh, + timeout=120) + + +def cleanup_app(con_ssh=None, auth_info=Tenant.get('admin_platform')): + """ + Remove application stx-monitor + Delete application stx-monitor + Remove stx-monitor images registries from all hosts + Remove stx-monitor labels from all hosts + """ + + LOG.info("Remove application {}".format(STX_MONITOR_APP_NAME)) + container_helper.remove_app(app_name=STX_MONITOR_APP_NAME, con_ssh=con_ssh, auth_info=auth_info) + + LOG.info("Delete application {}".format(STX_MONITOR_APP_NAME)) + container_helper.delete_app(app_name=STX_MONITOR_APP_NAME, con_ssh=con_ssh, auth_info=auth_info) + + delete_images_from_host_registries(con_ssh=con_ssh, auth_info=auth_info) + + LOG.info("Delete labels for {}".format(STX_MONITOR_APP_NAME)) + delete_all_monitor_labels(con_ssh=con_ssh, auth_info=auth_info) + + LOG.info("Cleanup completed") + + +def assign_labels(system_type, con_ssh=None, auth_info=Tenant.get('admin_platform')): + """ + The following labels are required on all controllers: + elastic-controller=enabled + elastic-master=enabled + elastic-data=enabled + elastic-client=enabled + + The following label is required on one compute: + elastic-master=enabled + """ + LOG.info("Assign stx-monitor labels to controller-0") + host_list = system_helper.get_hosts(con_ssh=con_ssh, auth_info=auth_info) + host_helper.assign_host_labels("controller-0", CONTROLLER_LABELS, lock=False, unlock=False, + con_ssh=con_ssh, auth_info=auth_info) + + if system_type != SysType.AIO_SX and "controller-1" in host_list: + LOG.info("Assign stx-monitor labels to controller-1") + host_helper.assign_host_labels("controller-1", CONTROLLER_LABELS, lock=False, unlock=False, + con_ssh=con_ssh, auth_info=auth_info) + + if "compute-0" in host_list: + LOG.info("Assign stx-monitor labels to compute-0") + host_helper.assign_host_labels("compute-0", COMPUTE_LABELS, lock=False, unlock=False, + con_ssh=con_ssh, auth_info=auth_info) + + +def assign_subcloud_labels(system_type, con_ssh=None, auth_info=Tenant.get('admin_platform')): + """ + The following label is required on all Subcloud controllers: + elastic-controller=enabled + """ + LOG.info("Assign stx-monitor labels to controller-0") + host_list = system_helper.get_hosts(con_ssh=con_ssh, auth_info=auth_info) + host_helper.assign_host_labels("controller-0", SUBCLOUD_CONTROLLER_LABELS, lock=False, + unlock=False, con_ssh=con_ssh, auth_info=auth_info) + + if system_type != SysType.AIO_SX and "controller-1" in host_list: + LOG.info("Assign stx-monitor labels to controller-1") + host_helper.assign_host_labels("controller-1", SUBCLOUD_CONTROLLER_LABELS, lock=False, + unlock=False, con_ssh=con_ssh, auth_info=auth_info) + + +def delete_all_monitor_labels(con_ssh=None, auth_info=Tenant.get('admin_platform')): + LOG.info("Delete monitor labels from hosts") + + host_list = system_helper.get_hosts(con_ssh=con_ssh, auth_info=auth_info) + for host in host_list: + # Remove all monitor labels from all hosts on the system + host_helper.remove_host_labels(host, STX_MONITOR_LABELS, lock=False, unlock=False, + con_ssh=con_ssh, auth_info=auth_info) + + +def app_upload_apply(con_ssh=None, auth_info=Tenant.get('admin_platform')): + """ + Upload stx-monitor + Apply stx-monitor + """ + + # Do application upload stx-monitor. + app_dir = HostLinuxUser.get_home() + tar_file = os.path.join(app_dir, STX_MONITOR_TAR) + LOG.info("Upload %s" % tar_file) + container_helper.upload_app(tar_file=tar_file, app_name=STX_MONITOR_APP_NAME, con_ssh=con_ssh, + auth_info=auth_info, uploaded_timeout=3600,) + + # Do application apply stx-monitor. + LOG.info("Apply %s" % STX_MONITOR_APP_NAME) + container_helper.apply_app(app_name=STX_MONITOR_APP_NAME, applied_timeout=3600, + check_interval=60, con_ssh=con_ssh, auth_info=auth_info) + + +def get_oam_floating_ip(): + """ + Get oam floating ip address + """ + if system_helper.is_aio_simplex(): + fields = 'oam_ip' + else: + fields = ('oam_c0_ip', 'oam_c1_ip', 'oam_floating_ip') + oam_info = system_helper.get_oam_values(fields=fields) + + for key, value in oam_info.items(): + if value is not None: + oam_floating_ip = value + + return oam_floating_ip + + +def check_cluster_health(system_type): + # Check the cluster health (cluster health status will be yellow for + # AIO-SX as there will be no replicated shards) + LOG.info("Check the cluster health") + hosts = system_helper.get_hosts() + LOG.info("System has hosts: ".format(hosts)) + prefix = 'http' + oam_ip = get_oam_floating_ip() + + for host in hosts: + with host_helper.ssh_to_host(hostname=host) as host_ssh: + code, output = host_ssh.exec_cmd( + 'curl {}://{}:31001/mon-elasticsearch-client/_cluster/health?pretty'.format( + prefix, oam_ip), fail_ok=False) + + if output: + data_dict = json.loads(output) + + # check that 'status' is green + if not (data_dict['status'] == 'green' or + (system_type == SysType.AIO_SX and data_dict['status'] == 'yellow')): + raise AssertionError("status not green or in case of AIO-SX yellow") + + # check that 'unassigned shards' is 0 + if system_type != SysType.AIO_SX and data_dict['unassigned_shards'] != 0: + raise AssertionError("unassigned_shards not 0") + + # check that 'active_shards' is 0 + if data_dict['active_shards'] == 0: + raise AssertionError("active_shards not 0") + else: + raise AssertionError("curl command failed") + + +def is_pod_running_on_host(pods, host, partial_pod_name): + + for pod in (_pod for _pod in pods if host == _pod[POD_NODE]): + + # Special case for 'mon-metricbeat-'. There are two running processes with that partial + # name; + # - The daemon set pod 'mon-metricbeat-YYYYY' + # - The label 'mon-metricbeat-YYYYYYYYYY-YYYYY'. Note that the middle Y are variable + # lengths. e.g. mon-metricbeat-557fb9cb7-pbbzs vs mon-kube-state-metrics-77db855d59-5s566 + # was seen in different labs. + if partial_pod_name == MON_METRICBEAT_DS: + if MON_METRICBEAT_PATIAL_NAME in pod[POD_NAME] and \ + len(pod[POD_NAME]) == len(MON_METRICBEAT_DS): + LOG.info('Found pod matching name {} for host {}. POD: {}'.format( + partial_pod_name, host, pod[POD_NAME])) + return True + + elif partial_pod_name == MON_METRICBEAT_LABEL: + if MON_METRICBEAT_PATIAL_NAME in pod[POD_NAME] and \ + len(pod[POD_NAME]) >= len(MON_METRICBEAT_DS)+2: + LOG.info('Found pod matching name {} for host {}. POD: {}'.format( + partial_pod_name, host, pod[POD_NAME])) + return True + + elif partial_pod_name in pod[POD_NAME]: + LOG.info('Found pod matching name {} for host {}. POD: {}'.format( + partial_pod_name, host, pod[POD_NAME])) + + return True + + LOG.info('Missing pod matching name {} for host {}'.format(partial_pod_name, host)) + return False + + +def are_monitor_pods_running(system_type, con_ssh=None, auth_info=Tenant.get('admin_platform'), + matching_dict=PODS_LABEL_MATCHING_DICT): + # Get all the pods for stx-monitor + monitor_pods = kube_helper.get_pods(field=('NAME', 'NODE'), namespace="monitor", strict=False, + con_ssh=con_ssh) + + LOG.info("Running pods for stx-monitor: %s" % monitor_pods) + + # Make a dictionary of which hosts are assigned to which stx-monitor + # labels. e.g. + # + # { + # 'daemon_set': ['controller-0', 'controller-1'], + # 'elastic-client': ['controller-0', 'controller-1'], + # 'elastic-controller': ['controller-0', 'controller-1'], + # ... + # } + # + host_list = system_helper.get_host_list_data(columns=["hostname", "personality"], + con_ssh=con_ssh, auth_info=auth_info) + labels_to_host_dict = {} + for host in (_host for _host in host_list if _host.get('hostname')): + hostname = host.get('hostname') + personality = host.get('personality') + if personality and personality in str(MONITORING_HOSTS): + + # Add the daemon set custom label, this is a special label only + # for this labels_to_host_dict + hosts_for_label = labels_to_host_dict.get('daemon_set', []) + hosts_for_label.append(hostname) + labels_to_host_dict.update({'daemon_set': hosts_for_label}) + + # Add the host's assigned labels + labels = host_helper.get_host_labels_info(hostname, con_ssh=con_ssh, + auth_info=auth_info) + for label_name, label_status in labels.items(): + if label_status == 'enabled': + hosts_for_label = labels_to_host_dict.get(label_name, []) + hosts_for_label.append(hostname) + labels_to_host_dict.update({label_name: hosts_for_label}) + + LOG.info('labels_running_hosts:{}'.format(labels_to_host_dict)) + + # For each labels currently assigned on the system, get the matching + # POD names from matching_dict + for label, hosts_for_label in labels_to_host_dict.items(): + LOG.debug('----------') + LOG.debug('label:{} hosts:{}'.format(label, hosts_for_label)) + + pod_details = None + for k, v in matching_dict.items(): + if k == label: + pod_details = v + break + + if pod_details is None: + # Label not found in dict just return True + return True + + # Get the list of pod names we need to search for, a label can have + # more than one pods. + for partial_pod_name, running_type in pod_details.items(): + LOG.info('-----') + LOG.info('partial_pod_name:{} running_type:{}'.format(partial_pod_name, running_type)) + + inst_found_count = 0 + for host in hosts_for_label: + if is_pod_running_on_host(monitor_pods, host, partial_pod_name): + # The pod was found, increment the no of instances running on all hosts for this + # pod + inst_found_count += 1 + + # Special case for AIO-DX and mon-elasticsearch-master-x + if partial_pod_name == 'mon-elasticsearch-master-' and system_type == SysType.AIO_DX \ + and inst_found_count == 1: + LOG.info('Pod {} only needs to run one instances for AIO-DX'.format( + partial_pod_name)) + pass + # Some pods only run one instances even if the label is on multiple hosts + elif inst_found_count == 1 and running_type == POD_RUNNING_ONE_INSTANCE: + LOG.info('Pod {} only needs to run one instances'.format(partial_pod_name)) + pass + # Pod did not match the number of hosts its supposed to run on + elif inst_found_count != len(hosts_for_label): + LOG.error('Pod check for {} failed, missing instances'.format(partial_pod_name)) + return False + + LOG.info('Check for pod {} SUCCESS'.format(partial_pod_name)) + + return True + + +@mark.skipif(not stx_monitor_file_exist(), reason="Missing stx-monitor tar file from system") +@mark.platform_sanity +def test_stx_monitor(setup_app): + """ + Test stx-monitor application + + Assumptions: /home/sysadmin/stx-monitor.tgz is present on controller-0 + + Args: + setup_app: fixture + + Setups: + - application remove and delete stx-monitor, + application-remove stx-monitor + application-delete stx-monitor + - delete images from all registries on all hosts. + docker images | grep elastic | awk '{print $3}' + docker image rm --force + - remove all stx-monitor labels from all hosts + e.g. host-label-remove + + Test Steps: + - Assign labels (varies depending on type of system and hosts). + e.g. host-label-assign