From acefd544f0f02aa348e29a46be925436349e542d Mon Sep 17 00:00:00 2001 From: Jim Gauld Date: Thu, 14 Feb 2019 15:42:07 -0500 Subject: [PATCH 1/8] Mitigate memory leak of sessions by disabling sudo for sriov agent The sriov agent was polling devices via 'sudo ip link show', and this resulted in a severe memory leak. The usage of 'sudo' uses the host 'dbus-daemon', and somewhere the host does not clean up login sessions. Symptoms: - gradual run out of memory until system unstable, host spontaneous reboot due to delay or OOM - huge growth of kernel slab - thousands of /sys/fs/cgroup/systemd/user.slice/user-0.slice session-x*.scope files with empty 'tasks', i.e., sessions that should have deleted - huge latency seen with ssh and various systemd commands The problem is mitigated by disabling 'sudo' for sriov agent, using a helm override that configures [agent]/root_helper='' . Testing: - Verified that we could launch a VM with SR-IOV interface; VFs were able to set MAC and VLAN attributes. Closes-Bug: 1815106 Change-Id: I0c57629c01b7407c99cc7f38b409019ab87af859 Signed-off-by: Jim Gauld --- sysinv/sysinv/sysinv/sysinv/helm/neutron.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sysinv/sysinv/sysinv/sysinv/helm/neutron.py b/sysinv/sysinv/sysinv/sysinv/helm/neutron.py index 169e7173e0..4f7bc3d1a5 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/neutron.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/neutron.py @@ -246,6 +246,14 @@ class NeutronHelm(openstack.OpenstackBaseHelm): 'securitygroup': { 'firewall_driver': 'noop', }, + # Mitigate host OS memory leak of cgroup session-*scope files + # and kernel slab resources. The leak is triggered using 'sudo' + # which utilizes the host dbus-daemon. The sriov agent frequently + # polls devices via 'ip link show' using run_as_root=True, but + # does not actually require 'sudo'. + 'agent': { + 'root_helper': '', + }, 'sriov_nic': sriov_nic, } From cf23446094d52851e4bd2ade516ab724b65844f0 Mon Sep 17 00:00:00 2001 From: Dean Troyer Date: Tue, 12 Feb 2019 17:06:53 -0600 Subject: [PATCH 2/8] Fix configutilities and controllerconfig installs in DevStack Use the DevStack-provided functions to do the Python installations for configutilities and controllerconfig. Prepare the plugin setting for declaring DevStack prereqs that is available in master's DevStack playbook. Also do not enable all services by default. sysinv-api is disabled in the devstack job as it does not properly start under Bionic. We will address this separately. Change-Id: Ib57863526d285049b5964828e1b60bf215d25a23 Signed-off-by: Dean Troyer --- .zuul.yaml | 3 ++- devstack/lib/stx-config | 14 ++++++++------ devstack/settings | 9 +++++++-- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/.zuul.yaml b/.zuul.yaml index 2d02a9db55..c17ef2659e 100644 --- a/.zuul.yaml +++ b/.zuul.yaml @@ -180,7 +180,8 @@ # fm-rest-api: true # fm-mgr: true sysinv-agent: true - sysinv-api: true + # Skip sysinv-api for now, needs more attention + # sysinv-api: true sysinv-cond: true mysql: false postgresql: true diff --git a/devstack/lib/stx-config b/devstack/lib/stx-config index e8d261913a..9bb9854ca8 100644 --- a/devstack/lib/stx-config +++ b/devstack/lib/stx-config @@ -162,15 +162,17 @@ function install_cgtsclient { } function install_configutilities { - pushd $STXCONFIG_CONFUTILS - sudo python setup.py install --root=/ --install-lib=$PYTHON_SITE_DIR --prefix=/usr --install-data=/usr/share --single-version-externally-managed - popd + # We can't use setup_develop as there is no setup.cfg file present for configutilities + setup_package $STXCONFIG_CONFUTILS -e } function install_controllerconfig { - pushd $STXCONFIG_CONTROL - sudo python setup.py install --root=/ --install-lib=$PYTHON_SITE_DIR --prefix=/usr --install-data=/usr/share --single-version-externally-managed - popd + # This is a hack to work around the lack of proper global-requirements + # setup in these packages + pip_install pycrypto + + # We can't use setup_develop as there is no setup.cfg file present for controllerconfig + setup_package $STXCONFIG_CONTROL -e } function install_sysinv { diff --git a/devstack/settings b/devstack/settings index 74795743ff..87cfd07319 100644 --- a/devstack/settings +++ b/devstack/settings @@ -7,8 +7,13 @@ STX_CONFIG_NAME=stx-config ######### Plugin Specific ########## -enable_service $STX_CONFIG_NAME sysinv sysinv-api sysinv-cond -#define_plugin sysinv +enable_service $STX_CONFIG_NAME + +# This must not use any variables to work properly in OpenStack's DevStack playbook +define_plugin stx-config +# This works for Zuul jobs using OpenStack's DevStack roles +plugin_requires stx-config stx-integ +plugin_requires stx-config stx-update # Handle STX pre-reqs # stx-integ From d5db10f6b7df537924efef684395bee3c608d23a Mon Sep 17 00:00:00 2001 From: Kristine Bujold Date: Tue, 12 Feb 2019 10:03:48 -0500 Subject: [PATCH 3/8] Move neutron static configs to Armada manifest Move all neutron static configurations from the overrides to the Armada manifest. This is being done so we have a consistent way of managing containerized openstack configurations. Static configurations will be located in the Armada manifest and dynamic configuration will be located in the overrides files. Story: 2003909 Task: 29433 Change-Id: I5baf0bbc15912e0303955456151e69856bba0385 Signed-off-by: Kristine Bujold --- .../stx-openstack-helm/centos/build_srpm.data | 2 +- .../manifests/manifest-no-tests.yaml | 68 ++++++- .../manifests/manifest.yaml | 68 ++++++- sysinv/sysinv/centos/build_srpm.data | 2 +- sysinv/sysinv/sysinv/sysinv/helm/neutron.py | 172 ++++++++---------- 5 files changed, 193 insertions(+), 119 deletions(-) diff --git a/kubernetes/applications/stx-openstack/stx-openstack-helm/centos/build_srpm.data b/kubernetes/applications/stx-openstack/stx-openstack-helm/centos/build_srpm.data index 7ad6604ed5..7037828a98 100644 --- a/kubernetes/applications/stx-openstack/stx-openstack-helm/centos/build_srpm.data +++ b/kubernetes/applications/stx-openstack/stx-openstack-helm/centos/build_srpm.data @@ -1,3 +1,3 @@ SRC_DIR="stx-openstack-helm" COPY_LIST_TO_TAR="$PKG_BASE/../../../helm-charts/rbd-provisioner $PKG_BASE/../../../helm-charts/garbd" -TIS_PATCH_VER=3 \ No newline at end of file +TIS_PATCH_VER=4 diff --git a/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest-no-tests.yaml b/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest-no-tests.yaml index b010e9b33c..ab5d0c98e9 100644 --- a/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest-no-tests.yaml +++ b/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest-no-tests.yaml @@ -762,6 +762,9 @@ data: pod: replicas: server: 2 + user: + neutron: + uid: 0 affinity: anti: type: @@ -769,13 +772,13 @@ data: labels: agent: dhcp: - node_selector_key: openstack-control-plane + node_selector_key: openstack-compute-node node_selector_value: enabled l3: - node_selector_key: openstack-control-plane + node_selector_key: openstack-compute-node node_selector_value: enabled metadata: - node_selector_key: openstack-control-plane + node_selector_key: openstack-compute-node node_selector_value: enabled job: node_selector_key: openstack-control-plane @@ -798,19 +801,68 @@ data: node_selector_value: enabled network: interface: - tunnel: enp0s3 + tunnel: docker0 + backend: + - openvswitch + - sriov conf: neutron: DEFAULT: - l3_ha: true - min_l3_agents_per_router: 2 - max_l3_agents_per_router: 5 + l3_ha: false + min_l3_agents_per_router: 1 + max_l3_agents_per_router: 1 l3_ha_network_type: vxlan - dhcp_agents_per_network: 2 + dhcp_agents_per_network: 1 + max_overflow: 64 + max_pool_size: 1 + idle_timeout: 60 + router_status_managed: true + vlan_transparent: true + wsgi_default_pool_size: 100 + notify_nova_on_port_data_changes: true + notify_nova_on_port_status_changes: true + control_exchange: neutron + core_plugin: neutron.plugins.ml2.plugin.Ml2Plugin + state_path: /var/run/neutron + syslog_log_facility: local2 + use_syslog: true + pnet_audit_enabled: false + driver: messagingv2 + enable_proxy_headers_parsing: true + lock_path: /var/run/neutron/lock + log_format: '[%(name)s] %(message)s' + policy_file: /etc/neutron/policy.json + service_plugins: router + dns_domain: openstacklocal + enable_new_agents: false + allow_automatic_dhcp_failover: true + allow_automatic_l3agent_failover: true + agent: + root_helper: sudo + vhost: + vhost_user_enabled: false + dhcp_agent: + DEFAULT: + enable_isolated_metadata: true + enable_metadata_network: false + interface_driver: openvswitch + resync_interval: 30 + l3_agent: + DEFAULT: + agent_mode: dvr_snat + interface_driver: openvswitch + metadata_port: 80 plugins: ml2_conf: ml2_type_flat: flat_networks: public + ml2: + mechanism_drivers: openvswitch,sriovnicswitch,l2population + path_mtu: 0 + tenant_network_types: vlan,vxlan + type_drivers: managed_flat,managed_vlan,managed_vxlan + securitygroup: + firewall_driver: noop openvswitch_agent: agent: tunnel_types: vxlan diff --git a/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest.yaml b/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest.yaml index 2b772de5e0..4562fe9762 100644 --- a/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest.yaml +++ b/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest.yaml @@ -762,6 +762,9 @@ data: pod: replicas: server: 2 + user: + neutron: + uid: 0 affinity: anti: type: @@ -769,13 +772,13 @@ data: labels: agent: dhcp: - node_selector_key: openstack-control-plane + node_selector_key: openstack-compute-node node_selector_value: enabled l3: - node_selector_key: openstack-control-plane + node_selector_key: openstack-compute-node node_selector_value: enabled metadata: - node_selector_key: openstack-control-plane + node_selector_key: openstack-compute-node node_selector_value: enabled job: node_selector_key: openstack-control-plane @@ -798,19 +801,68 @@ data: node_selector_value: enabled network: interface: - tunnel: enp0s3 + tunnel: docker0 + backend: + - openvswitch + - sriov conf: neutron: DEFAULT: - l3_ha: true - min_l3_agents_per_router: 2 - max_l3_agents_per_router: 5 + l3_ha: false + min_l3_agents_per_router: 1 + max_l3_agents_per_router: 1 l3_ha_network_type: vxlan - dhcp_agents_per_network: 2 + dhcp_agents_per_network: 1 + max_overflow: 64 + max_pool_size: 1 + idle_timeout: 60 + router_status_managed: true + vlan_transparent: true + wsgi_default_pool_size: 100 + notify_nova_on_port_data_changes: true + notify_nova_on_port_status_changes: true + control_exchange: neutron + core_plugin: neutron.plugins.ml2.plugin.Ml2Plugin + state_path: /var/run/neutron + syslog_log_facility: local2 + use_syslog: true + pnet_audit_enabled: false + driver: messagingv2 + enable_proxy_headers_parsing: true + lock_path: /var/run/neutron/lock + log_format: '[%(name)s] %(message)s' + policy_file: /etc/neutron/policy.json + service_plugins: router + dns_domain: openstacklocal + enable_new_agents: false + allow_automatic_dhcp_failover: true + allow_automatic_l3agent_failover: true + agent: + root_helper: sudo + vhost: + vhost_user_enabled: false + dhcp_agent: + DEFAULT: + enable_isolated_metadata: true + enable_metadata_network: false + interface_driver: openvswitch + resync_interval: 30 + l3_agent: + DEFAULT: + agent_mode: dvr_snat + interface_driver: openvswitch + metadata_port: 80 plugins: ml2_conf: ml2_type_flat: flat_networks: public + ml2: + mechanism_drivers: openvswitch,sriovnicswitch,l2population + path_mtu: 0 + tenant_network_types: vlan,vxlan + type_drivers: managed_flat,managed_vlan,managed_vxlan + securitygroup: + firewall_driver: noop openvswitch_agent: agent: tunnel_types: vxlan diff --git a/sysinv/sysinv/centos/build_srpm.data b/sysinv/sysinv/centos/build_srpm.data index 2d590250af..6164e638b9 100644 --- a/sysinv/sysinv/centos/build_srpm.data +++ b/sysinv/sysinv/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="sysinv" -TIS_PATCH_VER=300 +TIS_PATCH_VER=301 diff --git a/sysinv/sysinv/sysinv/sysinv/helm/neutron.py b/sysinv/sysinv/sysinv/sysinv/helm/neutron.py index c900d70472..d3db207784 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/neutron.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/neutron.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018 Wind River Systems, Inc. +# Copyright (c) 2018-2019 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -32,40 +32,13 @@ class NeutronHelm(openstack.OpenstackBaseHelm): overrides = { common.HELM_NS_OPENSTACK: { 'pod': { - 'user': { - 'neutron': { - 'uid': 0 - } - }, 'replicas': { 'server': self._num_controllers() }, }, - 'network': { - 'interface': { - 'tunnel': 'docker0' - }, - 'backend': ['openvswitch', 'sriov'], - }, 'conf': { - 'neutron': self._get_neutron_config(), 'plugins': { - 'ml2_conf': self._get_neutron_ml2_config(), - }, - 'dhcp_agent': { - 'DEFAULT': { - 'resync_interval': 30, - 'enable_isolated_metadata': True, - 'enable_metadata_network': False, - 'interface_driver': 'openvswitch', - }, - }, - 'l3_agent': { - 'DEFAULT': { - 'interface_driver': 'openvswitch', - 'agent_mode': 'dvr_snat', - 'metadata_port': 80, - }, + 'ml2_conf': self._get_neutron_ml2_config() }, 'overrides': { 'neutron_ovs-agent': { @@ -85,7 +58,6 @@ class NeutronHelm(openstack.OpenstackBaseHelm): }, } }, - 'labels': self._get_labels_overrides(), 'endpoints': self._get_endpoints_overrides(), 'images': self._get_images_overrides(), } @@ -116,30 +88,90 @@ class NeutronHelm(openstack.OpenstackBaseHelm): def update_dynamic_options(self, overrides): if utils.is_virtual(): - overrides['neutron']['vhost']['vhost_user_enabled'] = False + overrides.update({ + 'neutron': { + 'vhost': { + 'vhost_user_enabled': False + } + } + }) def update_from_service_parameters(self, overrides): service_parameters = self._get_service_parameters(service=constants.SERVICE_TYPE_NETWORK) for param in service_parameters: if param.section == constants.SERVICE_PARAM_SECTION_NETWORK_DEFAULT: if param.name == constants.SERVICE_PARAM_NAME_DEFAULT_SERVICE_PLUGINS: - overrides['neutron']['DEFAULT']['service_plugins'] = str(param.value) + overrides.update({ + 'neutron': { + 'DEFAULT': { + 'service_plugins': str(param.value) + } + } + }) if param.name == constants.SERVICE_PARAM_NAME_DEFAULT_DNS_DOMAIN: - overrides['neutron']['DEFAULT']['dns_domain'] = str(param.value) + overrides.update({ + 'neutron': { + 'DEFAULT': { + 'dns_domain': str(param.value) + } + } + }) if param.name == constants.SERVICE_PARAM_NAME_BASE_MAC: - overrides['neutron']['DEFAULT']['base_mac'] = str(param.value) + overrides.update({ + 'neutron': { + 'DEFAULT': { + 'base_mac': str(param.value) + } + } + }) if param.name == constants.SERVICE_PARAM_NAME_DVR_BASE_MAC: - overrides['neutron']['DEFAULT']['dvr_base_mac'] = str(param.value) + overrides.update({ + 'neutron': { + 'DEFAULT': { + 'dvr_base_mac': str(param.value) + } + } + }) elif param.section == constants.SERVICE_PARAM_SECTION_NETWORK_ML2: if param.name == constants.SERVICE_PARAM_NAME_ML2_MECHANISM_DRIVERS: - overrides['plugins']['ml2_conf']['ml2']['mechanism_drivers'] = str(param.value) + overrides.update({ + 'plugins': { + 'ml2_conf': { + 'ml2': { + 'mechanism_drivers': str(param.value) + } + } + } + }) if param.name == constants.SERVICE_PARAM_NAME_ML2_EXTENSION_DRIVERS: - overrides['plugins']['ml2_conf']['ml2']['extension_drivers'] = str(param.value) + overrides.update({ + 'plugins': { + 'ml2_conf': { + 'ml2': { + 'extension_drivers': str(param.value) + } + } + } + }) if param.name == constants.SERVICE_PARAM_NAME_ML2_TENANT_NETWORK_TYPES: - overrides['plugins']['ml2_conf']['ml2']['tenant_network_types'] = str(param.value) + overrides.update({ + 'plugins': { + 'ml2_conf': { + 'ml2': { + 'tenant_network_types': str(param.value) + } + } + } + }) elif param.section == constants.SERVICE_PARAM_SECTION_NETWORK_DHCP: if param.name == constants.SERVICE_PARAM_NAME_DHCP_FORCE_METADATA: - overrides['dhcp_agent']['DEFAULT']['force_metadata'] = str(param.value) + overrides.update({ + 'dhcp_agent': { + 'DEFAULT': { + 'force_metadata': str(param.value) + } + } + }) def _get_per_host_overrides(self): host_list = [] @@ -249,49 +281,6 @@ class NeutronHelm(openstack.OpenstackBaseHelm): 'sriov_nic': sriov_nic, } - def _get_neutron_config(self): - neutron_config = { - 'DEFAULT': { - 'l3_ha': False, - 'min_l3_agents_per_router': 1, - 'max_l3_agents_per_router': 1, - 'l3_ha_network_type': 'vxlan', - 'dhcp_agents_per_network': 1, - 'max_overflow': 64, - 'max_pool_size': 1, - 'idle_timeout': 60, - 'router_status_managed': True, - 'vlan_transparent': True, - 'wsgi_default_pool_size': 100, - 'notify_nova_on_port_data_changes': True, - 'notify_nova_on_port_status_changes': True, - 'control_exchange': 'neutron', - 'core_plugin': 'neutron.plugins.ml2.plugin.Ml2Plugin', - 'state_path': '/var/run/neutron', - 'syslog_log_facility': 'local2', - 'use_syslog': True, - 'pnet_audit_enabled': False, - 'driver': 'messagingv2', - 'enable_proxy_headers_parsing': True, - 'lock_path': '/var/run/neutron/lock', - 'log_format': '[%(name)s] %(message)s', - 'policy_file': '/etc/neutron/policy.json', - 'service_plugins': 'router', - 'dns_domain': 'openstacklocal', - 'enable_new_agents': False, - 'allow_automatic_dhcp_failover': True, - 'allow_automatic_l3agent_failover': True, - }, - 'vhost': { - 'vhost_user_enabled': True, - }, - 'agent': { - 'root_helper': 'sudo', - }, - } - - return neutron_config - def _get_ml2_physical_network_mtus(self): ml2_physical_network_mtus = [] datanetworks = self.dbapi.datanetworks_get_all() @@ -304,15 +293,7 @@ class NeutronHelm(openstack.OpenstackBaseHelm): def _get_neutron_ml2_config(self): ml2_config = { 'ml2': { - 'type_drivers': 'managed_flat,managed_vlan,managed_vxlan', - 'tenant_network_types': 'vlan,vxlan', - 'mechanism_drivers': 'openvswitch,sriovnicswitch,l2population', - 'path_mtu': 0, 'physical_network_mtus': self._get_ml2_physical_network_mtus() - - }, - 'securitygroup': { - 'firewall_driver': 'noop', }, } LOG.info("_get_neutron_ml2_config=%s" % ml2_config) @@ -415,16 +396,5 @@ class NeutronHelm(openstack.OpenstackBaseHelm): return overrides - def _get_labels_overrides(self): - overrides = { - 'agent': { - 'dhcp': {'node_selector_key': 'openvswitch'}, - 'l3': {'node_selector_key': 'openvswitch'}, - 'metadata': {'node_selector_key': 'openvswitch'}, - }, - } - - return overrides - def get_region_name(self): return self._get_service_region_name(self.SERVICE_NAME) From 5b94294002617b18bc0f98b206a24cec38a5b929 Mon Sep 17 00:00:00 2001 From: Angie Wang Date: Thu, 7 Feb 2019 23:42:25 -0500 Subject: [PATCH 4/8] Support stx-openstack app install with the authed local registry The functionality of local docker registry authentication will be enabled in commit https://review.openstack.org/#/c/626355/. However, local docker registry is currently used to pull/push images during application apply without authentication and no credentials passed to the kubernetes when pulling images on other nodes except for active controller. In order to install stx-openstack app with local docker registry that has authentication turned on, this commit updates the following: 1. Pass the user credentials when pulling/pushing images from local registry during application apply. 2. Create a well-known registry secret "default-registry-key" which holds the authorization token during stx-openstack app apply and delete the secret during removal. The helm-toolkit is updated to refer to this secret in k8s openstack service account template for pulling images from local by kubelet. This secret is also added to rbd-provisioner service account as well since it is not using helm-toolkit to create service account. Note: #2 is short-term solution. The long-term solution is to implement the BP https://blueprints.launchpad.net/openstack-helm/+spec/support -docker-registry-with-authentication-turned-on. Story: 2002840 Task: 28945 Depends-On: https://review.openstack.org/636181 Change-Id: I015dccd12c5c7fa7a4bea74eef8d172f03b5d60e Signed-off-by: Angie Wang --- .../templates/serviceaccount.yaml | 2 + sysinv/sysinv/centos/build_srpm.data | 2 +- .../sysinv/sysinv/sysinv/common/exception.py | 9 + .../sysinv/sysinv/sysinv/common/kubernetes.py | 110 ++++++++ .../sysinv/sysinv/conductor/kube_app.py | 252 +++++++++++++----- 5 files changed, 301 insertions(+), 74 deletions(-) diff --git a/kubernetes/helm-charts/rbd-provisioner/templates/serviceaccount.yaml b/kubernetes/helm-charts/rbd-provisioner/templates/serviceaccount.yaml index 14aacd984d..a839643ebb 100644 --- a/kubernetes/helm-charts/rbd-provisioner/templates/serviceaccount.yaml +++ b/kubernetes/helm-charts/rbd-provisioner/templates/serviceaccount.yaml @@ -12,4 +12,6 @@ kind: ServiceAccount metadata: name: {{ .Values.rbac.serviceAccount }} namespace: {{ .Release.Namespace }} +imagePullSecrets: + - name: default-registry-key {{- end }} diff --git a/sysinv/sysinv/centos/build_srpm.data b/sysinv/sysinv/centos/build_srpm.data index 6164e638b9..293e65bcfe 100644 --- a/sysinv/sysinv/centos/build_srpm.data +++ b/sysinv/sysinv/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="sysinv" -TIS_PATCH_VER=301 +TIS_PATCH_VER=302 diff --git a/sysinv/sysinv/sysinv/sysinv/common/exception.py b/sysinv/sysinv/sysinv/sysinv/common/exception.py index 30077ae331..3b96cb78b6 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/exception.py +++ b/sysinv/sysinv/sysinv/sysinv/common/exception.py @@ -923,6 +923,11 @@ class KubeAppNotFound(NotFound): message = _("No application with name %(name)s.") +class DockerRegistryCredentialNotFound(NotFound): + message = _("Credentials to access local docker registry " + "for user %(name)s could not be found.") + + class SDNNotEnabled(SysinvException): message = _("SDN configuration is not enabled.") @@ -1055,6 +1060,10 @@ class KubeAppProgressMonitorTimeout(SysinvException): message = "Armada execution progress monitor timed out." +class K8sNamespaceDeleteTimeout(SysinvException): + message = "Namespace %(name)s deletion timeout." + + class InvalidEndpoint(SysinvException): message = "The provided endpoint is invalid" diff --git a/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py b/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py index 7d5abffc54..461adc4fb5 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py +++ b/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py @@ -67,3 +67,113 @@ class KubeOperator(object): except Exception as e: LOG.error("Kubernetes exception in kube_get_nodes: %s" % e) raise + + def kube_create_namespace(self, namespace): + body = {'metadata': {'name': namespace}} + + c = self._get_kubernetesclient() + try: + c.create_namespace(body) + except ApiException as e: + if e.status == httplib.CONFLICT: + # Already exist + LOG.warn("Namespace %s already exist." % namespace) + else: + LOG.error("Failed to create Namespace %s: %s" % (namespace, e.body)) + raise + except Exception as e: + LOG.error("Kubernetes exception in " + "_kube_create_namespace %s: %s" % (namespace, e)) + raise + + def kube_get_namespace(self, namespace): + c = self._get_kubernetesclient() + try: + c.read_namespace(namespace) + return True + except ApiException as e: + if e.status == httplib.NOT_FOUND: + return False + else: + LOG.error("Failed to get Namespace %s: %s" % (namespace, e.body)) + raise + except Exception as e: + LOG.error("Kubernetes exception in " + "kube_get_namespace %s: %s" % (namespace, e)) + raise + + def kube_get_secret(self, name, namespace): + c = self._get_kubernetesclient() + try: + c.read_namespaced_secret(name, namespace) + return True + except ApiException as e: + if e.status == httplib.NOT_FOUND: + return False + else: + LOG.error("Failed to get Secret %s under " + "Namespace %s: %s" % (name, namespace, e.body)) + raise + except Exception as e: + LOG.error("Kubernetes exception in kube_get_secret: %s" % e) + raise + + def kube_create_secret(self, namespace, body): + c = self._get_kubernetesclient() + try: + c.create_namespaced_secret(namespace, body) + except Exception as e: + LOG.error("Failed to create Secret %s under Namespace %s: " + "%s" % (body['metadata']['name'], namespace, e)) + raise + + def kube_delete_persistent_volume_claim(self, namespace, **kwargs): + c = self._get_kubernetesclient() + try: + c.delete_collection_namespaced_persistent_volume_claim( + namespace, **kwargs) + except Exception as e: + LOG.error("Failed to delete Persistent Volume Claim " + "under Namespace %s: %s" % (namespace, e)) + raise + + def kube_delete_secret(self, name, namespace, **kwargs): + body = {} + + if kwargs: + body.update(kwargs) + + c = self._get_kubernetesclient() + try: + c.delete_namespaced_secret(name, namespace, body) + except ApiException as e: + if e.status == httplib.NOT_FOUND: + LOG.warn("Secret %s under Namespace %s " + "not found." % (name, namespace)) + else: + LOG.error("Failed to clean up Secret %s under " + "Namespace %s: %s" % (name, namespace, e.body)) + raise + except Exception as e: + LOG.error("Kubernetes exception in kube_delete_secret: %s" % e) + raise + + def kube_delete_namespace(self, namespace, **kwargs): + body = {} + + if kwargs: + body.update(kwargs) + + c = self._get_kubernetesclient() + try: + c.delete_namespace(namespace, body) + except ApiException as e: + if e.status == httplib.NOT_FOUND: + LOG.warn("Namespace %s not found." % namespace) + else: + LOG.error("Failed to clean up Namespace %s: " + "%s" % (namespace, e.body)) + raise + except Exception as e: + LOG.error("Kubernetes exception in kube_delete_namespace: %s" % e) + raise diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py index 01867ea4a3..9c5d7e2ea9 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py @@ -9,8 +9,10 @@ """ System Inventory Kubernetes Application Operator.""" +import base64 import docker import grp +import keyring import os import pwd import re @@ -57,6 +59,9 @@ INSTALLATION_TIMEOUT = 3600 MAX_DOWNLOAD_THREAD = 20 TARFILE_DOWNLOAD_CONNECTION_TIMEOUT = 60 TARFILE_TRANSFER_CHUNK_SIZE = 1024 * 512 +DOCKER_REGISTRY_USER = 'admin' +DOCKER_REGISTRY_SERVICE = 'CGCS' +DOCKER_REGISTRY_SECRET = 'default-registry-key' # Helper functions @@ -97,6 +102,17 @@ def get_app_install_root_path_ownership(): return (uid, gid) +def get_local_docker_registry_auth(): + registry_password = keyring.get_password( + DOCKER_REGISTRY_SERVICE, DOCKER_REGISTRY_USER) + if not registry_password: + raise exception.DockerRegistryCredentialNotFound( + name=DOCKER_REGISTRY_USER) + + return dict(username=DOCKER_REGISTRY_USER, + password=registry_password) + + Chart = namedtuple('Chart', 'name namespace') @@ -105,7 +121,7 @@ class AppOperator(object): def __init__(self, dbapi): self._dbapi = dbapi - self._docker = DockerHelper() + self._docker = DockerHelper(self._dbapi) self._helm = helm.HelmOperator(self._dbapi) self._kube = kubernetes.KubeOperator(self._dbapi) self._lock = threading.Lock() @@ -653,6 +669,111 @@ class AppOperator(object): self._remove_host_labels(controller_hosts, controller_labels_set) self._remove_host_labels(compute_hosts, compute_labels_set) + def _create_local_registry_secrets(self, app_name): + # Temporary function to create default registry secret + # which would be used by kubernetes to pull images from + # local registry. + # This should be removed after OSH supports the deployment + # with registry has authentication turned on. + # https://blueprints.launchpad.net/openstack-helm/+spec/ + # support-docker-registry-with-authentication-turned-on + body = { + 'type': 'kubernetes.io/dockerconfigjson', + 'metadata': {}, + 'data': {} + } + + app_ns = self._helm.get_helm_application_namespaces(app_name) + namespaces = \ + list(set([ns for ns_list in app_ns.values() for ns in ns_list])) + for ns in namespaces: + if (ns == common.HELM_NS_HELM_TOOLKIT or + self._kube.kube_get_secret(DOCKER_REGISTRY_SECRET, ns)): + # Secret already exist + continue + + try: + local_registry_server = self._docker.get_local_docker_registry_server() + local_registry_auth = get_local_docker_registry_auth() + + auth = '{0}:{1}'.format(local_registry_auth['username'], + local_registry_auth['password']) + token = '{{\"auths\": {{\"{0}\": {{\"auth\": \"{1}\"}}}}}}'.format( + local_registry_server, base64.b64encode(auth)) + + body['data'].update({'.dockerconfigjson': base64.b64encode(token)}) + body['metadata'].update({'name': DOCKER_REGISTRY_SECRET, + 'namespace': ns}) + + if not self._kube.kube_get_namespace(ns): + self._kube.kube_create_namespace(ns) + self._kube.kube_create_secret(ns, body) + LOG.info("Secret %s created under Namespace %s." % (DOCKER_REGISTRY_SECRET, ns)) + except Exception as e: + LOG.error(e) + raise + + def _delete_local_registry_secrets(self, app_name): + # Temporary function to delete default registry secrets + # which created during stx-opesntack app apply. + # This should be removed after OSH supports the deployment + # with registry has authentication turned on. + # https://blueprints.launchpad.net/openstack-helm/+spec/ + # support-docker-registry-with-authentication-turned-on + + app_ns = self._helm.get_helm_application_namespaces(app_name) + namespaces = \ + list(set([ns for ns_list in app_ns.values() for ns in ns_list])) + + for ns in namespaces: + if ns == common.HELM_NS_HELM_TOOLKIT: + continue + + try: + LOG.info("Deleting Secret %s under Namespace " + "%s ..." % (DOCKER_REGISTRY_SECRET, ns)) + self._kube.kube_delete_secret( + DOCKER_REGISTRY_SECRET, ns, grace_period_seconds=0) + LOG.info("Secret %s under Namespace %s delete " + "completed." % (DOCKER_REGISTRY_SECRET, ns)) + except Exception as e: + LOG.error(e) + raise + + def _delete_namespace(self, namespace): + loop_timeout = 1 + timeout = 300 + try: + LOG.info("Deleting Namespace %s ..." % namespace) + self._kube.kube_delete_namespace(namespace, + grace_periods_seconds=0) + + # Namespace termination timeout 5mins + while(loop_timeout <= timeout): + if not self._kube.kube_get_namespace(namespace): + # Namepace has been terminated + break + loop_timeout += 1 + time.sleep(1) + + if loop_timeout > timeout: + raise exception.K8sNamespaceDeleteTimeout(name=namespace) + LOG.info("Namespace %s delete completed." % namespace) + except Exception as e: + LOG.error(e) + raise + + def _delete_persistent_volume_claim(self, namespace): + try: + LOG.info("Deleting Persistent Volume Claim " + "under Namespace %s ..." % namespace) + self._kube.kube_delete_persistent_volume_claim(namespace, + timeout_seconds=10) + LOG.info("Persistent Volume Claim delete completed.") + except Exception as e: + LOG.error(e) + raise + def _get_list_of_charts(self, manifest_file): charts = [] with open(manifest_file, 'r') as f: @@ -893,6 +1014,7 @@ class AppOperator(object): try: app.charts = self._get_list_of_charts(app.armada_mfile_abs) if app.system_app: + self._create_local_registry_secrets(app.name) self._update_app_status( app, new_progress=constants.APP_PROGRESS_GENERATE_OVERRIDES) LOG.info("Generating application overrides...") @@ -956,59 +1078,14 @@ class AppOperator(object): if self._make_armada_request_with_monitor(app, constants.APP_DELETE_OP): if app.system_app: - # TODO convert these kubectl commands to use the k8s api - p1 = subprocess.Popen( - ['kubectl', '--kubeconfig=/etc/kubernetes/admin.conf', - 'get', 'pvc', '--no-headers', '-n', 'openstack'], - stdout=subprocess.PIPE) - p2 = subprocess.Popen(['awk', '{print $3}'], - stdin=p1.stdout, - stdout=subprocess.PIPE) - p3 = subprocess.Popen( - ['xargs', '-i', 'kubectl', - '--kubeconfig=/etc/kubernetes/admin.conf', 'delete', - 'pv', '{}', '--wait=false'], - stdin=p2.stdout, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - timer = threading.Timer(10, p3.kill) try: - timer.start() - p1.stdout.close() - p2.stdout.close() - out, err = p3.communicate() - if out and not err: - LOG.info("Persistent Volumes marked for deletion.") - else: - self._abort_operation(app, constants.APP_REMOVE_OP) - LOG.error("Failed to clean up PVs after app removal.") + self._delete_local_registry_secrets(app.name) + self._delete_persistent_volume_claim(common.HELM_NS_OPENSTACK) + self._delete_namespace(common.HELM_NS_OPENSTACK) except Exception as e: self._abort_operation(app, constants.APP_REMOVE_OP) - LOG.exception("Failed to clean up PVs after app " - "removal: %s" % e) - finally: - timer.cancel() - - p4 = subprocess.Popen( - ['kubectl', '--kubeconfig=/etc/kubernetes/admin.conf', - 'delete', 'namespace', 'openstack'], - stdout=subprocess.PIPE) - timer2 = threading.Timer(10, p4.kill) - try: - timer2.start() - out, err = p4.communicate() - if out and not err: - LOG.info("Openstack namespace delete completed.") - else: - self._abort_operation(app, constants.APP_REMOVE_OP) - LOG.error("Failed to clean up openstack namespace" - " after app removal.") - except Exception as e: - self._abort_operation(app, constants.APP_REMOVE_OP) - LOG.exception("Failed to clean up openstack namespace " - "after app removal: %s" % e) - finally: - timer2.cancel() + LOG.exception(e) + return False self._update_app_status(app, constants.APP_UPLOAD_SUCCESS) LOG.info("Application (%s) remove completed." % app.name) @@ -1104,6 +1181,9 @@ class AppOperator(object): class DockerHelper(object): """ Utility class to encapsulate Docker related operations """ + def __init__(self, dbapi): + self._dbapi = dbapi + def _start_armada_service(self, client): try: container = client.containers.get(ARMADA_CONTAINER_NAME) @@ -1229,34 +1309,60 @@ class DockerHelper(object): (request, manifest_file, e)) return rc - def download_an_image(self, loc_img_tag): + def get_local_docker_registry_server(self): + registry_ip = self._dbapi.address_get_by_name( + cutils.format_address_name(constants.CONTROLLER_HOSTNAME, + constants.NETWORK_TYPE_MGMT) + ).address + registry_server = '{}:{}'.format(registry_ip, common.REGISTRY_PORT) + return registry_server + + def download_an_image(self, img_tag): rc = True + local_registry_server = self.get_local_docker_registry_server() + start = time.time() - try: - # Pull image from local docker registry - LOG.info("Image %s download started from local registry" % loc_img_tag) - client = docker.APIClient(timeout=INSTALLATION_TIMEOUT) - client.pull(loc_img_tag) - except docker.errors.NotFound: + if img_tag.startswith(local_registry_server): try: - # Image is not available in local docker registry, get the image - # from the public registry and push to the local registry - LOG.info("Image %s is not available in local registry, " - "download started from public registry" % loc_img_tag) - pub_img_tag = loc_img_tag[1 + loc_img_tag.find('/'):] - client.pull(pub_img_tag) - client.tag(pub_img_tag, loc_img_tag) - client.push(loc_img_tag) + LOG.info("Image %s download started from local registry" % img_tag) + local_registry_auth = get_local_docker_registry_auth() + client = docker.APIClient(timeout=INSTALLATION_TIMEOUT) + client.pull(img_tag, auth_config=local_registry_auth) + except docker.errors.NotFound: + try: + # Pull the image from the public registry + LOG.info("Image %s is not available in local registry, " + "download started from public registry" % img_tag) + pub_img_tag = img_tag.replace(local_registry_server + "/", "") + client.pull(pub_img_tag) + except Exception as e: + rc = False + LOG.error("Image %s download failed from public registry: %s" % (pub_img_tag, e)) + return img_tag, rc + + try: + # Tag and push the image to the local registry + client.tag(pub_img_tag, img_tag) + client.push(img_tag, auth_config=local_registry_auth) + except Exception as e: + rc = False + LOG.error("Image %s push failed to local registry: %s" % (img_tag, e)) except Exception as e: rc = False - LOG.error("Image %s download failed from public registry: %s" % (pub_img_tag, e)) - except Exception as e: - rc = False - LOG.error("Image %s download failed from local registry: %s" % (loc_img_tag, e)) - elapsed_time = time.time() - start + LOG.error("Image %s download failed from local registry: %s" % (img_tag, e)) + else: + try: + LOG.info("Image %s download started from public registry" % img_tag) + client = docker.APIClient(timeout=INSTALLATION_TIMEOUT) + client.pull(img_tag) + except Exception as e: + rc = False + LOG.error("Image %s download failed from public registry: %s" % (img_tag, e)) + + elapsed_time = time.time() - start if rc: LOG.info("Image %s download succeeded in %d seconds" % - (loc_img_tag, elapsed_time)) - return loc_img_tag, rc + (img_tag, elapsed_time)) + return img_tag, rc From 0dd4b86526609b86d8c7395a7c9af13e7f769596 Mon Sep 17 00:00:00 2001 From: David Sullivan Date: Tue, 12 Feb 2019 14:09:10 -0500 Subject: [PATCH 5/8] Add replica and anti-affinity settings Add anti-affinity settings to openstack pods. Add replication to novncproxy, aodh, panko and rbd_provisioner services. Change-Id: I8091a54cab98ff295eba6e7dd6fa76827d149b5f Story: 2004520 Task: 29418 Signed-off-by: David Sullivan --- .../manifests/manifest-no-tests.yaml | 41 +++++++++++++++++++ .../manifests/manifest.yaml | 41 +++++++++++++++++++ sysinv/sysinv/centos/build_srpm.data | 2 +- sysinv/sysinv/sysinv/sysinv/helm/aodh.py | 12 ++++++ sysinv/sysinv/sysinv/sysinv/helm/nova.py | 2 +- sysinv/sysinv/sysinv/sysinv/helm/panko.py | 9 ++++ .../sysinv/sysinv/helm/rbd_provisioner.py | 9 ++++ 7 files changed, 114 insertions(+), 2 deletions(-) diff --git a/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest-no-tests.yaml b/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest-no-tests.yaml index ab5d0c98e9..784efab68b 100644 --- a/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest-no-tests.yaml +++ b/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest-no-tests.yaml @@ -48,6 +48,10 @@ data: replicas: error_page: 2 ingress: 2 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/ingress-0.1.0.tgz @@ -89,6 +93,10 @@ data: replicas: error_page: 2 ingress: 2 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/ingress-0.1.0.tgz @@ -118,6 +126,12 @@ data: - type: job labels: app: rbd-provisioner + values: + pod: + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/rbd-provisioner-0.1.0.tgz @@ -158,6 +172,11 @@ data: prometheus_mysql_exporter: node_selector_key: openstack-control-plane node_selector_value: enabled + pod: + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/mariadb-0.1.0.tgz @@ -331,6 +350,10 @@ data: pod: replicas: api: 2 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/keystone-0.1.0.tgz @@ -970,6 +993,10 @@ data: user: aodh: uid: 0 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution jobs: alarms_cleaner: # daily at the 35 minute mark @@ -1070,6 +1097,11 @@ data: app:healthcheck: use: egg:oslo.middleware#healthcheck oslo_config_project: gnocchi + pod: + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/gnocchi-0.1.0.tgz @@ -1110,6 +1142,10 @@ data: user: panko: uid: 0 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution jobs: events_cleaner: # hourly at the 10 minute mark @@ -1756,6 +1792,11 @@ data: attributes: controller: resource_metadata.controller switch: resource_metadata.switch + pod: + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/ceilometer-0.1.0.tgz diff --git a/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest.yaml b/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest.yaml index 4562fe9762..8f3f47eab8 100644 --- a/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest.yaml +++ b/kubernetes/applications/stx-openstack/stx-openstack-helm/stx-openstack-helm/manifests/manifest.yaml @@ -48,6 +48,10 @@ data: replicas: error_page: 2 ingress: 2 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/ingress-0.1.0.tgz @@ -89,6 +93,10 @@ data: replicas: error_page: 2 ingress: 2 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/ingress-0.1.0.tgz @@ -118,6 +126,12 @@ data: - type: job labels: app: rbd-provisioner + values: + pod: + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/rbd-provisioner-0.1.0.tgz @@ -158,6 +172,11 @@ data: prometheus_mysql_exporter: node_selector_key: openstack-control-plane node_selector_value: enabled + pod: + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/mariadb-0.1.0.tgz @@ -331,6 +350,10 @@ data: pod: replicas: api: 2 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/keystone-0.1.0.tgz @@ -970,6 +993,10 @@ data: user: aodh: uid: 0 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution jobs: alarms_cleaner: # daily at the 35 minute mark @@ -1070,6 +1097,11 @@ data: app:healthcheck: use: egg:oslo.middleware#healthcheck oslo_config_project: gnocchi + pod: + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/gnocchi-0.1.0.tgz @@ -1110,6 +1142,10 @@ data: user: panko: uid: 0 + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution jobs: events_cleaner: # hourly at the 10 minute mark @@ -1756,6 +1792,11 @@ data: attributes: controller: resource_metadata.controller switch: resource_metadata.switch + pod: + affinity: + anti: + type: + default: requiredDuringSchedulingIgnoredDuringExecution source: type: tar location: http://172.17.0.1/helm_charts/ceilometer-0.1.0.tgz diff --git a/sysinv/sysinv/centos/build_srpm.data b/sysinv/sysinv/centos/build_srpm.data index 293e65bcfe..be4508a9e5 100644 --- a/sysinv/sysinv/centos/build_srpm.data +++ b/sysinv/sysinv/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="sysinv" -TIS_PATCH_VER=302 +TIS_PATCH_VER=303 diff --git a/sysinv/sysinv/sysinv/sysinv/helm/aodh.py b/sysinv/sysinv/sysinv/sysinv/helm/aodh.py index f726de5ab0..9d8d370643 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/aodh.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/aodh.py @@ -24,6 +24,7 @@ class AodhHelm(openstack.OpenstackBaseHelm): def get_overrides(self, namespace=None): overrides = { common.HELM_NS_OPENSTACK: { + 'pod': self._get_pod_overrides(), 'images': self._get_images_overrides(), 'conf': self._get_conf_overrides(), 'endpoints': self._get_endpoints_overrides() @@ -38,6 +39,17 @@ class AodhHelm(openstack.OpenstackBaseHelm): else: return overrides + def _get_pod_overrides(self): + overrides = { + 'replicas': { + 'api': self._num_controllers(), + 'evaluator': self._num_controllers(), + 'listener': self._num_controllers(), + 'notifier': self._num_controllers() + } + } + return overrides + def _get_images_overrides(self): heat_image = self._operator.chart_operators[ constants.HELM_CHART_HEAT].docker_image diff --git a/sysinv/sysinv/sysinv/sysinv/helm/nova.py b/sysinv/sysinv/sysinv/sysinv/helm/nova.py index b756e6b50a..b69f0d1196 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/nova.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/nova.py @@ -69,7 +69,7 @@ class NovaHelm(openstack.OpenstackBaseHelm): 'conductor': self._num_controllers(), 'consoleauth': self._num_controllers(), 'scheduler': self._num_controllers(), - # set replicas for novncproxy once it's validated. + 'novncproxy': self._num_controllers() } }, 'conf': { diff --git a/sysinv/sysinv/sysinv/sysinv/helm/panko.py b/sysinv/sysinv/sysinv/sysinv/helm/panko.py index 0e0254eddd..dbbafa54ab 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/panko.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/panko.py @@ -24,6 +24,7 @@ class PankoHelm(openstack.OpenstackBaseHelm): def get_overrides(self, namespace=None): overrides = { common.HELM_NS_OPENSTACK: { + 'pod': self._get_pod_overrides(), 'images': self._get_images_overrides(), 'endpoints': self._get_endpoints_overrides() } @@ -37,6 +38,14 @@ class PankoHelm(openstack.OpenstackBaseHelm): else: return overrides + def _get_pod_overrides(self): + overrides = { + 'replicas': { + 'api': self._num_controllers() + } + } + return overrides + def _get_images_overrides(self): heat_image = self._operator.chart_operators[ constants.HELM_CHART_HEAT].docker_image diff --git a/sysinv/sysinv/sysinv/sysinv/helm/rbd_provisioner.py b/sysinv/sysinv/sysinv/sysinv/helm/rbd_provisioner.py index 1b911de167..08453348fc 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/rbd_provisioner.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/rbd_provisioner.py @@ -104,6 +104,7 @@ class RbdProvisionerHelm(base.BaseHelm): "classes": classes, "ephemeral_pools": ephemeral_pools, "images": self._get_images_overrides(), + "pods": self._get_pod_overrides() } } @@ -115,6 +116,14 @@ class RbdProvisionerHelm(base.BaseHelm): else: return overrides + def _get_pod_overrides(self): + overrides = { + 'replicas': { + 'rbd-provisioner': self._num_controllers() + } + } + return overrides + def _get_images_overrides(self): # TODO: Remove after ceph upgrade # Format the name of the stx specific ceph config helper From 70ed5b099496c98b37a94b061610d48c9263f554 Mon Sep 17 00:00:00 2001 From: Alex Kozyrev Date: Fri, 15 Feb 2019 15:46:32 -0500 Subject: [PATCH 6/8] Enable Barbican provisioning in SM in kubernetes environment Since Barbican is in charge of storing BMC passwords for MTCE now we need it to run as a bare-metal service alongside with kubernetes. This patch enables SM provisioning for barbican in this case. Change-Id: Id51f679738d429e78f388b6dc42e7606ef0c41ab Story: 2003108 Task: 27700 Signed-off-by: Alex Kozyrev --- .../src/modules/platform/manifests/sm.pp | 116 ++++++++++-------- 1 file changed, 68 insertions(+), 48 deletions(-) diff --git a/puppet-manifests/src/modules/platform/manifests/sm.pp b/puppet-manifests/src/modules/platform/manifests/sm.pp index f8b2cc6edb..b038b14f08 100644 --- a/puppet-manifests/src/modules/platform/manifests/sm.pp +++ b/puppet-manifests/src/modules/platform/manifests/sm.pp @@ -192,6 +192,10 @@ class platform::sm $os_region_name = $keystone_region } + # Barbican + include ::openstack::barbican::params + $barbican_enabled = $::openstack::barbican::params::service_enabled + $ost_cl_ctrl_host = $::platform::network::mgmt::params::controller_address_url include ::platform::client::params @@ -266,9 +270,6 @@ class platform::sm # Panko include ::openstack::panko::params - # Barbican - include ::openstack::barbican::params - if $system_mode == 'simplex' { $hostunit = '0' $management_my_unit_ip = $::platform::network::mgmt::params::controller0_address @@ -338,7 +339,6 @@ class platform::sm $gnocchi_enabled = false $aodh_enabled = false $panko_enabled = false - $barbican_enabled = false } else { $heat_service_enabled = $::openstack::heat::params::service_enabled $murano_configured = $::openstack::murano::params::service_enabled @@ -347,7 +347,6 @@ class platform::sm $gnocchi_enabled = $::openstack::gnocchi::params::service_enabled $aodh_enabled = $::openstack::aodh::params::service_enabled $panko_enabled = $::openstack::panko::params::service_enabled - $barbican_enabled = $::openstack::barbican::params::service_enabled } # lint:ignore:140chars @@ -625,6 +624,21 @@ class platform::sm } } + # Barbican + if $barbican_enabled { + exec { 'Configure OpenStack - Barbican API': + command => "sm-configure service_instance barbican-api barbican-api \"config=/etc/barbican/barbican.conf\"", + } + + exec { 'Configure OpenStack - Barbican Keystone Listener': + command => "sm-configure service_instance barbican-keystone-listener barbican-keystone-listener \"config=/etc/barbican/barbican.conf\"", + } + + exec { 'Configure OpenStack - Barbican Worker': + command => "sm-configure service_instance barbican-worker barbican-worker \"config=/etc/barbican/barbican.conf\"", + } + } + if $configure_glance { if !$glance_cached { exec { 'Configure OpenStack - Glance Registry': @@ -1123,49 +1137,6 @@ class platform::sm command => "sm-configure service_instance ironic-conductor ironic-conductor \"config=/etc/ironic/ironic.conf,tftproot=${ironic_tftproot}\"", } - # Barbican - if $barbican_enabled { - - exec { 'Configure OpenStack - Barbican API': - command => "sm-configure service_instance barbican-api barbican-api \"config=/etc/barbican/barbican.conf\"", - } - - exec { 'Configure OpenStack - Barbican Keystone Listener': - command => "sm-configure service_instance barbican-keystone-listener barbican-keystone-listener \"config=/etc/barbican/barbican.conf\"", - } - - exec { 'Configure OpenStack - Barbican Worker': - command => "sm-configure service_instance barbican-worker barbican-worker \"config=/etc/barbican/barbican.conf\"", - } - } else { - exec { 'Deprovision OpenStack - Barbican API (service-group-member)': - path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], - command => 'sm-deprovision service-group-member cloud-services barbican-api', - } - -> exec { 'Deprovision OpenStack - Barbican API (service)': - path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], - command => 'sm-deprovision service barbican-api', - } - - exec { 'Deprovision OpenStack - Barbican Keystone Listener (service-group-member)': - path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], - command => 'sm-deprovision service-group-member cloud-services barbican-keystone-listener', - } - -> exec { 'Deprovision OpenStack - Barbican Keystone Listener (service)': - path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], - command => 'sm-deprovision service barbican-keystone-listener', - } - - exec { 'Deprovision OpenStack - Barbican Worker (service-group-member)': - path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], - command => 'sm-deprovision service-group-member cloud-services barbican-worker', - } - -> exec { 'Deprovision OpenStack - Barbican Worker (service)': - path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], - command => 'sm-deprovision service barbican-worker', - } - } - exec { 'Configure OpenStack - Nova Compute': command => "sm-configure service_instance nova-compute nova-compute \"config=/etc/nova/nova-ironic.conf\"", } @@ -1362,6 +1333,55 @@ class platform::sm } } + # Barbican + if $barbican_enabled { + exec { 'Provision OpenStack - Barbican API (service-group-member)': + command => 'sm-provision service-group-member cloud-services barbican-api', + } + -> exec { 'Provision OpenStack - Barbican API (service)': + command => 'sm-provision service barbican-api', + } + -> exec { 'Provision OpenStack - Barbican Keystone Listener (service-group-member)': + command => 'sm-provision service-group-member cloud-services barbican-keystone-listener', + } + -> exec { 'Provision OpenStack - Barbican Keystone Listener (service)': + command => 'sm-provision service barbican-keystone-listener', + } + -> exec { 'Provision OpenStack - Barbican Worker (service-group-member)': + command => 'sm-provision service-group-member cloud-services barbican-worker', + } + -> exec { 'Provision OpenStack - Barbican Worker (service)': + command => 'sm-provision service barbican-worker', + } + } else { + exec { 'Deprovision OpenStack - Barbican API (service-group-member)': + path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], + command => 'sm-deprovision service-group-member cloud-services barbican-api', + } + -> exec { 'Deprovision OpenStack - Barbican API (service)': + path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], + command => 'sm-deprovision service barbican-api', + } + + exec { 'Deprovision OpenStack - Barbican Keystone Listener (service-group-member)': + path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], + command => 'sm-deprovision service-group-member cloud-services barbican-keystone-listener', + } + -> exec { 'Deprovision OpenStack - Barbican Keystone Listener (service)': + path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], + command => 'sm-deprovision service barbican-keystone-listener', + } + + exec { 'Deprovision OpenStack - Barbican Worker (service-group-member)': + path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], + command => 'sm-deprovision service-group-member cloud-services barbican-worker', + } + -> exec { 'Deprovision OpenStack - Barbican Worker (service)': + path => [ '/usr/bin', '/usr/sbin', '/usr/local/bin', '/etc', '/sbin', '/bin' ], + command => 'sm-deprovision service barbican-worker', + } + } + exec { 'Configure Murano Rabbit': command => "sm-configure service_instance murano-rabbit murano-rabbit \"server=${rabbitmq_server},ctl=${rabbitmqctl},nodename=${murano_rabbit_node_name},mnesia_base=${murano_rabbit_mnesia_base},ip=${oam_ip_param_ip},config_file=${murano_rabbit_config_file},env_config_file=${murano_rabbit_env_config_file},pid_file=${murano_rabbit_pid},dist_port=${murano_rabbit_dist_port}\"", } From ed3c63a06da2cb04b7415cb1b5ba6340c3fa229a Mon Sep 17 00:00:00 2001 From: Erich Cordoba Date: Tue, 19 Feb 2019 12:09:42 -0600 Subject: [PATCH 7/8] Add DNS requirement for kubernetes and helm. `helm init` is being execute before networking and DNS is properly configured in the controller. A dependency was added to kubernetes to setup DNS, helm manifest was updated to depend on kubernetes. Also, the `--skip-refresh` flag was added to helm init for second controller to avoid timeout scenarios on proxy enviroments. Closes-Bug: 1814968 Change-Id: I65759314b3a861e7fdb428889aa5f5c1c7037661 Suggested-by: Mingyuan Qi Signed-off-by: Erich Cordoba --- puppet-manifests/src/modules/platform/manifests/helm.pp | 7 +++++-- .../src/modules/platform/manifests/kubernetes.pp | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/puppet-manifests/src/modules/platform/manifests/helm.pp b/puppet-manifests/src/modules/platform/manifests/helm.pp index 47b5effe14..31396dae90 100644 --- a/puppet-manifests/src/modules/platform/manifests/helm.pp +++ b/puppet-manifests/src/modules/platform/manifests/helm.pp @@ -76,9 +76,12 @@ class platform::helm } } else { - exec { 'initialize helm': + + Class['::platform::kubernetes::master'] + + -> exec { 'initialize helm': environment => [ 'KUBECONFIG=/etc/kubernetes/admin.conf', 'HOME=/home/wrsroot' ], - command => 'helm init --client-only', + command => 'helm init --skip-refresh --client-only', logoutput => true, user => 'wrsroot', group => 'wrs', diff --git a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp index 2045f2791d..97bc539674 100644 --- a/puppet-manifests/src/modules/platform/manifests/kubernetes.pp +++ b/puppet-manifests/src/modules/platform/manifests/kubernetes.pp @@ -16,10 +16,14 @@ class platform::kubernetes::kubeadm { $iptables_file = "net.bridge.bridge-nf-call-ip6tables = 1 net.bridge.bridge-nf-call-iptables = 1" + # Ensure DNS is configured as name resolution is required when + # kubeadm init is run. + Class['::platform::dns'] + # Update iptables config. This is required based on: # https://kubernetes.io/docs/tasks/tools/install-kubeadm # This probably belongs somewhere else - initscripts package? - file { '/etc/sysctl.d/k8s.conf': + -> file { '/etc/sysctl.d/k8s.conf': ensure => file, content => $iptables_file, owner => 'root', From 1c467789c43827321e4319d50065fdbab1be35a2 Mon Sep 17 00:00:00 2001 From: David Sullivan Date: Wed, 20 Feb 2019 00:49:17 -0500 Subject: [PATCH 8/8] Add replica settings for mariadb ingress pod There was no mariadb replica override for the ingress pod. On AIO-SX this caused two pods to be scheduled. When anti-affinity was added to mariadb this broke application-apply on AIO-SX. The mariadb ingress pod replication will be set to the number of controllers. Change-Id: Icf3f1979720629904ca9ddcabf59e8ecfab709e5 Story: 2004520 Task: 29570 Signed-off-by: David Sullivan --- sysinv/sysinv/centos/build_srpm.data | 2 +- sysinv/sysinv/sysinv/sysinv/helm/mariadb.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sysinv/sysinv/centos/build_srpm.data b/sysinv/sysinv/centos/build_srpm.data index be4508a9e5..97482ad914 100644 --- a/sysinv/sysinv/centos/build_srpm.data +++ b/sysinv/sysinv/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="sysinv" -TIS_PATCH_VER=303 +TIS_PATCH_VER=304 diff --git a/sysinv/sysinv/sysinv/sysinv/helm/mariadb.py b/sysinv/sysinv/sysinv/sysinv/helm/mariadb.py index 8b093334db..6d56ca2c5f 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/mariadb.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/mariadb.py @@ -32,7 +32,8 @@ class MariadbHelm(openstack.OpenstackBaseHelm): common.HELM_NS_OPENSTACK: { 'pod': { 'replicas': { - 'server': self._num_server_replicas() + 'server': self._num_server_replicas(), + 'ingress': self._num_controllers() } }, 'images': self._get_images_overrides(),