From e4ff1ef2c36a368796be67108ddd428df9c0c1be Mon Sep 17 00:00:00 2001 From: Irina Mihai Date: Wed, 28 Nov 2018 03:41:08 +0000 Subject: [PATCH] Decouple Ceph pools creation from sysinv For a containerized deployment: - the following Ceph pools are no longer created by sysinv: cinder-volumes, images, ephemeral, kube-rbd; the creation has been left/moved to the helm charts as follows: -> the cinder chart: cinder-volumes, cinder.backup -> the rbd-provisioner chart: kube-rbd pools and the ephemeral pool for nova (temporary) -> glance: images - sysinv no longer supports updating the pool quotas - sysinv no longer supports updating the replication for Ceph pools: the replication is updated in the DB through the 'system storage-backend-modify' command, but the chart is applying the change through the helm overrides when the application is (re)applied - sysinv no longer audits the Ceph pools and adjusts the PG num - sysinv no longer generates the Ceph keys for the Ceph pools and the k8s secrets, as these have been moved to the rbd-provisioner chart - upon storage node lock, we determine which are the existing data Ceph pools and deny lock if they are not empty NOTE: There are still parts of the pool management code that will have to be removed once we switch to a containerized deployment only. I've marked that with "TODO(CephPoolsDecouple)" to easily track it. Validation: - install AIO-SX & Storage setups with --kubernetes and -> add Ceph (primary and secondary tier) -> lock/unlock host/storage hosts -> check pools are not created by sysinv -> generate the stx-openstack application tarball -> system application-upload stx-openstack helm-charts-manifest-no-tests.tgz -> system application-apply stx-openstack - install AIO-SX without --kubernetes -> check lock/unlock - install Storage setup without --kubernetes -> check lock/unlock of storage nodes -> check Ceph pools are created -> test quotas can be changed Story: 2002844 Task: 28190 Change-Id: Ic2190e488917bffebcd16daf895dcddd92c6d9c5 Signed-off-by: Irina Mihai --- .../templates/clusterrole.yaml | 3 + .../rbd-provisioner/templates/deployment.yaml | 8 +- .../templates/pre-install-check-ceph.yaml | 167 +++++++++------ .../rbd-provisioner/templates/role.yaml | 2 +- .../templates/storageclass.yaml | 2 +- .../helm-charts/rbd-provisioner/values.yaml | 47 ++++- .../sysinv/sysinv/api/controllers/v1/host.py | 37 ++-- .../sysinv/api/controllers/v1/storage_ceph.py | 77 ++++--- .../sysinv/sysinv/api/controllers/v1/utils.py | 7 + sysinv/sysinv/sysinv/sysinv/common/ceph.py | 35 ++++ .../sysinv/sysinv/sysinv/common/constants.py | 3 +- .../sysinv/common/storage_backend_conf.py | 101 ---------- sysinv/sysinv/sysinv/sysinv/conductor/ceph.py | 190 +++--------------- .../sysinv/sysinv/sysinv/conductor/manager.py | 44 ++-- .../sysinv/sysinv/sysinv/conductor/rpcapi.py | 22 +- sysinv/sysinv/sysinv/sysinv/helm/cinder.py | 6 +- .../sysinv/sysinv/helm/rbd_provisioner.py | 95 ++++++++- 17 files changed, 393 insertions(+), 453 deletions(-) diff --git a/kubernetes/helm-charts/rbd-provisioner/templates/clusterrole.yaml b/kubernetes/helm-charts/rbd-provisioner/templates/clusterrole.yaml index 55ee65496c..9fc0c4bc43 100644 --- a/kubernetes/helm-charts/rbd-provisioner/templates/clusterrole.yaml +++ b/kubernetes/helm-charts/rbd-provisioner/templates/clusterrole.yaml @@ -31,4 +31,7 @@ rules: resources: ["services"] resourceNames: ["kube-dns"] verbs: ["list", "get"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "create", "list", "update"] {{- end}} diff --git a/kubernetes/helm-charts/rbd-provisioner/templates/deployment.yaml b/kubernetes/helm-charts/rbd-provisioner/templates/deployment.yaml index 574f85c344..c52887d742 100644 --- a/kubernetes/helm-charts/rbd-provisioner/templates/deployment.yaml +++ b/kubernetes/helm-charts/rbd-provisioner/templates/deployment.yaml @@ -21,15 +21,15 @@ spec: labels: app: {{ .Values.global.name }} spec: + {{- if (.Values.global.rbac) or (.Values.global.reuseRbac)}} + serviceAccount: {{ .Values.rbac.serviceAccount }} + {{- end }} containers: - name: {{ .Values.global.name }} - image: {{ .Values.global.image | quote }} + image: {{ .Values.images.tags.rbd_provisioner | quote }} env: - name: PROVISIONER_NAME value: ceph.com/rbd -{{- if (.Values.global.rbac) or (.Values.global.reuseRbac)}} - serviceAccount: {{ .Values.rbac.serviceAccount }} -{{- end }} {{- if .Values.global.nodeSelector }} nodeSelector: {{ .Values.global.nodeSelector | toYaml | trim | indent 8 }} diff --git a/kubernetes/helm-charts/rbd-provisioner/templates/pre-install-check-ceph.yaml b/kubernetes/helm-charts/rbd-provisioner/templates/pre-install-check-ceph.yaml index b2352303f0..32f25129ab 100644 --- a/kubernetes/helm-charts/rbd-provisioner/templates/pre-install-check-ceph.yaml +++ b/kubernetes/helm-charts/rbd-provisioner/templates/pre-install-check-ceph.yaml @@ -6,47 +6,47 @@ # */}} -{{- if .Values.global.doPreInstallVerification }} +{{- if .Values.global.job_storage_init }} {{ $root := . }} {{ $defaults := .Values.classdefaults}} {{ $mount := "/tmp/mount" }} -{{- range $classConfig := .Values.classes }} -kind: ConfigMap +--- apiVersion: v1 +kind: ConfigMap metadata: creationTimestamp: 2016-02-18T19:14:38Z - name: config-{{- $root.Values.global.name -}}-{{- $classConfig.name }} + name: config-{{- $root.Values.global.name }} namespace: {{ $root.Values.global.namespace }} - annotations: - "helm.sh/hook": pre-install - "helm.sh/hook-weight": "-6" - "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded, hook-failed data: ceph.conf: | - {{ $monitors := or $classConfig.monitors $defaults.monitors }}{{ range $index, $element := $monitors}} + {{ $monitors := $defaults.monitors }}{{ range $index, $element := $monitors}} [mon.{{- $index }}] mon_addr = {{ $element }} {{- end }} check_ceph.sh: |- #!/bin/bash + # Copy from read only mount to Ceph config folder cp {{ $mount -}}/ceph.conf /etc/ceph/ - # Set up admin key in Ceph format - CEPH_ADMIN_KEY="/etc/ceph/ceph.client.admin.keyring" - if [ ! -z "$CEPH_ADMIN_SECRET" ]; then - cat < $CEPH_ADMIN_KEY - [client.admin] - key = $CEPH_ADMIN_SECRET - EOF - else - touch $CEPH_ADMIN_KEY + if [ ! -z $CEPH_ADMIN_SECRET ]; then + kubectl get secret -n kube-system | grep $CEPH_ADMIN_SECRET + ret=$? + if [ $ret -ne 0 ]; then + msg="Create $CEPH_ADMIN_SECRET secret" + echo "$msg" + kubectl create secret generic $CEPH_ADMIN_SECRET --type="kubernetes.io/rbd" --from-literal=key= --namespace=$NAMESPACE + ret=$? + if [ $ret -ne 0 ]; then + msg="Error creating secret $CEPH_ADMIN_SECRET, exit" + echo "$msg" + exit $ret + fi + fi fi - # Set up pool key in Ceph format - CEPH_USER_KEY=/etc/ceph/ceph.client.{{- $classConfig.userId -}}.keyring - echo $CEPH_USER_SECRET > $CEPH_USER_KEY + touch /etc/ceph/ceph.client.admin.keyring # Check if ceph is accessible echo "====================================" @@ -55,52 +55,67 @@ data: if [ $ret -ne 0 ]; then msg="Error: Ceph cluster is not accessible, check Pod logs for details." echo "$msg" - echo "$msg" > /dev/termination-log exit $ret fi - # Check if pool exists - echo "====================================" - ceph osd lspools | grep {{ $classConfig.pool }} - ret=$? - if [ $ret -ne 0 ]; then - msg="Error: Ceph pool {{ $classConfig.pool }} is not accessible, check Pod logs for details." + set -ex + + # Get the ruleset from the rule name. + ruleset=$(ceph osd crush rule dump $POOL_CRUSH_RULE_NAME | grep "\"ruleset\":" | grep -Eo '[0-9]*') + # Make sure the pool exists. + ceph osd pool stats $POOL_NAME || ceph osd pool create $POOL_NAME $POOL_CHUNK_SIZE + # Set pool configuration. + ceph osd pool set $POOL_NAME size $POOL_REPLICATION + ceph osd pool set $POOL_NAME crush_rule $ruleset + + if [[ -z $USER_ID && -z $CEPH_USER_SECRET ]]; then + msg="No need to create secrets for pool $POOL_NAME" echo "$msg" - echo "$msg" > /dev/termination-log - exit $ret + exit 0 fi + KEYRING=$(ceph auth get-or-create client.$USER_ID mon "allow r" osd "allow rwx pool=${POOL_NAME}" | sed -n 's/^[[:blank:]]*key[[:blank:]]\+=[[:blank:]]\(.*\)/\1/p') + # Set up pool key in Ceph format + CEPH_USER_KEYRING=/etc/ceph/ceph.client.$USER_ID.keyring + echo $KEYRING > $CEPH_USER_KEYRING + IFS=',' read -a POOL_SECRET_NAMESPACES_ARR <<< "${POOL_SECRET_NAMESPACES}" + + for pool_secret_namespace in "${POOL_SECRET_NAMESPACES_ARR[@]}" + do + kubectl create secret generic $CEPH_USER_SECRET --type="kubernetes.io/rbd" --from-literal=key=$KEYRING --namespace=$pool_secret_namespace + done + + set +ex + # Check if pool is accessible using provided credentials - echo "====================================" - rbd -p {{ $classConfig.pool }} --user {{ $classConfig.userId }} ls -K $CEPH_USER_KEY + echo "=====================================" + rbd -p $POOL_NAME --user $USER_ID ls -K $CEPH_USER_KEYRING ret=$? if [ $ret -ne 0 ]; then - msg="Error: Ceph pool {{ $classConfig.pool }} is not accessible using \ - credentials for user {{ $classConfig.userId }}, check Pod logs for details." + msg="Error: Ceph pool $POOL_NAME is not accessible using \ + credentials for user $USER_ID, check Pod logs for details." echo "$msg" - echo "$msg" > /dev/termination-log exit $ret + else + msg="Pool $POOL_NAME accessible" + echo "$msg" fi + ceph -s + --- apiVersion: batch/v1 kind: Job metadata: - name: check-{{- $root.Values.global.name -}}-{{- $classConfig.name }} + name: rbd-provisioner-storage-init namespace: {{ $root.Values.global.namespace }} labels: heritage: {{$root.Release.Service | quote }} release: {{$root.Release.Name | quote }} chart: "{{$root.Chart.Name}}-{{$root.Chart.Version}}" - annotations: - "helm.sh/hook": pre-install - "helm.sh/hook-weight": "-5" - "helm.sh/hook-delete-policy": before-hook-creation, hook-succeeded spec: - # Note due to https://github.com/kubernetes/kubernetes/issues/62382 - # backoffLimit doesn't work in 1.10.x - backoffLimit: 1 # Limit the number of job restart in case of failure - activeDeadlineSeconds: 60 + backoffLimit: 3 # Limit the number of job restart in case of failure + activeDeadlineSeconds: 180 template: metadata: name: "{{$root.Release.Name}}" @@ -110,29 +125,57 @@ spec: release: {{$root.Release.Name | quote }} chart: "{{$root.Chart.Name}}-{{$root.Chart.Version}}" spec: - restartPolicy: Never + serviceAccountName: {{ $root.Values.rbac.serviceAccount }} + restartPolicy: OnFailure volumes: - - name: config-volume-{{- $root.Values.global.name -}}-{{- $classConfig.name }} + - name: config-volume-{{- $root.Values.global.name }} configMap: - name: config-{{- $root.Values.global.name -}}-{{- $classConfig.name }} + name: config-{{- $root.Values.global.name }} containers: - - name: pre-install-job-{{- $root.Values.global.name -}}-{{- $classConfig.name }} - image: {{ $root.Values.global.image | quote }} + {{- range $classConfig := $root.Values.classes }} + - name: storage-init-{{- $classConfig.name }} + image: {{ $root.Values.images.tags.rbd_provisioner_storage_init | quote }} command: [ "/bin/bash", "{{ $mount }}/check_ceph.sh" ] - volumeMounts: - - name: config-volume-{{- $root.Values.global.name -}}-{{- $classConfig.name }} - mountPath: {{ $mount }} env: + - name: NAMESPACE + value: {{ $root.Values.global.namespace }} + - name: POOL_SECRET_NAMESPACES + value: {{ $classConfig.pool_secrets_namespaces }} - name: CEPH_ADMIN_SECRET - valueFrom: - secretKeyRef: - name: {{ or $classConfig.adminSecretName $defaults.adminSecretName }} - key: key + value: {{ $defaults.adminSecretName }} - name: CEPH_USER_SECRET - valueFrom: - secretKeyRef: - name: {{ or $classConfig.userSecretName }} - key: key ---- -{{- end }} + value: {{ $classConfig.userSecretName }} + - name: USER_ID + value: {{ $classConfig.userId }} + - name: POOL_NAME + value: {{ $classConfig.pool_name }} + - name: POOL_REPLICATION + value: {{ $classConfig.replication | quote }} + - name: POOL_CRUSH_RULE_NAME + value: {{ $classConfig.crush_rule_name | quote }} + - name: POOL_CHUNK_SIZE + value: {{ $classConfig.chunk_size | quote }} + volumeMounts: + - name: config-volume-{{- $root.Values.global.name }} + mountPath: {{ $mount }} + {{- end }} + {{- range $ephemeralPool := $root.Values.ephemeral_pools }} + - name: storage-init-{{- $ephemeralPool.pool_name }} + image: {{ $root.Values.images.tags.rbd_provisioner_storage_init | quote }} + command: [ "/bin/bash", "{{ $mount }}/check_ceph.sh" ] + env: + - name: NAMESPACE + value: {{ $root.Values.global.namespace }} + - name: POOL_NAME + value: {{ $ephemeralPool.pool_name }} + - name: POOL_REPLICATION + value: {{ $ephemeralPool.replication | quote }} + - name: POOL_CRUSH_RULE_NAME + value: {{ $ephemeralPool.crush_rule_name | quote }} + - name: POOL_CHUNK_SIZE + value: {{ $ephemeralPool.chunk_size | quote }} + volumeMounts: + - name: config-volume-{{- $root.Values.global.name }} + mountPath: {{ $mount }} + {{- end }} {{- end }} \ No newline at end of file diff --git a/kubernetes/helm-charts/rbd-provisioner/templates/role.yaml b/kubernetes/helm-charts/rbd-provisioner/templates/role.yaml index 2c1acaa1cb..89c1b84e93 100644 --- a/kubernetes/helm-charts/rbd-provisioner/templates/role.yaml +++ b/kubernetes/helm-charts/rbd-provisioner/templates/role.yaml @@ -15,5 +15,5 @@ metadata: rules: - apiGroups: [""] resources: ["secrets"] - verbs: ["get"] + verbs: ["get", "create", "list", "update"] {{- end}} \ No newline at end of file diff --git a/kubernetes/helm-charts/rbd-provisioner/templates/storageclass.yaml b/kubernetes/helm-charts/rbd-provisioner/templates/storageclass.yaml index 412d55b602..4b2e2bd808 100644 --- a/kubernetes/helm-charts/rbd-provisioner/templates/storageclass.yaml +++ b/kubernetes/helm-charts/rbd-provisioner/templates/storageclass.yaml @@ -20,7 +20,7 @@ parameters: adminId: {{ or $classConfig.adminId $defaults.adminId}} adminSecretName: {{ or $classConfig.adminSecretName $defaults.adminSecretName }} adminSecretNamespace: {{ $namespace }} - pool: {{ or $classConfig.pool $defaults.pool }} + pool: {{ or $classConfig.pool_name $defaults.pool_name }} userId: {{ or $classConfig.userId $defaults.userId }} userSecretName: {{ $classConfig.userSecretName }} imageFormat: {{ or $classConfig.imageFormat $defaults.imageFormat | quote }} diff --git a/kubernetes/helm-charts/rbd-provisioner/values.yaml b/kubernetes/helm-charts/rbd-provisioner/values.yaml index c872b9ae24..b3d65f68c7 100644 --- a/kubernetes/helm-charts/rbd-provisioner/values.yaml +++ b/kubernetes/helm-charts/rbd-provisioner/values.yaml @@ -17,14 +17,12 @@ global: # namespace: kube-system # - # Run pre-install verifications or skip them. - # Skipping them is not recommended + # Execute initialization job to verify external Ceph cluster access + # and setup additional dependencies assumed by dependent helm charts + # (i.e. configmap and secrets). + # Skipping is not recommended. # - doPreInstallVerification: True - # - # Defines Provisioner's image name including container registry. - # - image: quay.io/external_storage/rbd-provisioner:latest + job_storage_init: true # # Defines whether to reuse an already defined RBAC policy. # Make sure that the serviceAccount defined in the RBAC section matches the one @@ -132,11 +130,40 @@ classdefaults: classes: - name: fast-rbd # Name of storage class. # Ceph pool name - pool: kube + pool_name: kube # Ceph user name to access this pool userId: kube # K8 secret name with key for accessing the Ceph pool userSecretName: ceph-secret-kube + # Namespaces for creating the k8s secrets for accessing the Ceph pools + pool_secrets_namespaces: kube-system + # Name of pool to configure + pool_name: kube-rbd + # Pool replication + replication: 1 + # Pool crush rule name + crush_rule_name: storage_tier_ruleset + # Pool chunk size / PG_NUM + chunk_size: 8 +# Configuration data for the ephemeral pool(s) +ephemeral_pools: +- chunk_size: 8 + crush_rule_name: storage_tier_ruleset + pool_name: ephemeral + replication: 1 - - +# +# Defines: +# - Provisioner's image name including container registry. +# - CEPH helper image +# +images: + tags: + rbd_provisioner: quay.io/external_storage/rbd-provisioner:latest + rbd_provisioner_storage_init: docker.io/port/ceph-config-helper:v1.10.3 + pull_policy: "IfNotPresent" + local_registry: + active: false + exclude: + - dep_check + - image_repo_sync diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index 0b6f77ee7c..ce803dc4b7 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -2292,12 +2292,16 @@ class HostController(rest.RestController): storage_nodes = pecan.request.dbapi.ihost_get_by_personality( constants.STORAGE) if len(storage_nodes) == 1: + # TODO(CephPoolsDecouple): rework # delete osd pools # It would be nice if we have a ceph API that can delete # all osd pools at once. - pools = pecan.request.rpcapi.list_osd_pools(pecan.request.context) - for ceph_pool in pools: - pecan.request.rpcapi.delete_osd_pool(pecan.request.context, ceph_pool) + if not utils.is_kubernetes_config(): + pools = pecan.request.rpcapi.list_osd_pools( + pecan.request.context) + for ceph_pool in pools: + pecan.request.rpcapi.delete_osd_pool( + pecan.request.context, ceph_pool) # update tier status tier_list = pecan.request.dbapi.storage_tier_get_list() @@ -5232,7 +5236,7 @@ class HostController(rest.RestController): StorageBackendConfig.get_ceph_pool_replication(pecan.request.dbapi, bk) stors = pecan.request.dbapi.istor_get_by_tier(tier.id) if len(stors) < replication: - word = 'is' if len(replication) == 1 else 'are' + word = 'is' if replication == 1 else 'are' msg = ("Can not unlock node until at least %(replication)s osd stor %(word)s " "configured for tier '%(tier)s'." % {'replication': str(replication), 'word': word, 'tier': tier['name']}) @@ -5568,7 +5572,7 @@ class HostController(rest.RestController): ) if not backend: raise wsme.exc.ClientSideError( - _("Ceph must be configured as a backend.")) + _("Ceph must be configured as a backend.")) if (backend.task == constants.SB_TASK_RESTORE and force): LOG.info("%s Allow force-locking as ceph backend is in " @@ -5623,19 +5627,18 @@ class HostController(rest.RestController): if not pools_usage: raise wsme.exc.ClientSideError( _("Cannot lock a storage node when ceph pool usage is undetermined.")) - for ceph_pool in pools_usage: - # We only need to check data pools - if ([pool for pool in constants.ALL_CEPH_POOLS - if ceph_pool['name'].startswith(pool)] and - int(ceph_pool['stats']['bytes_used']) > 0): - # Ceph pool is not empty and no other enabled storage - # in set, so locking this storage node is not allowed. - msg = _("Cannot lock a storage node when ceph pools are" - " not empty and replication is lost. This may" - " result in data loss. ") - raise wsme.exc.ClientSideError(msg) - ceph_pools_empty = True + ceph_pools_empty = self._ceph.ceph_pools_empty( + pecan.request.dbapi, pools_usage) + + if not ceph_pools_empty: + msg = _( + "Cannot lock a storage node when ceph pools are" + " not empty and replication is lost. This may" + " result in data loss. ") + # Ceph pool is not empty and no other enabled storage + # in set, so locking this storage node is not allowed. + raise wsme.exc.ClientSideError(msg) # Perform checks on storage regardless of operational state # as a minimum number of monitor is required. diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py index e37751195c..e725bc65b3 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py @@ -413,11 +413,9 @@ def _discover_and_validate_backend_config_data(caps_dict, confirmed): elif k == constants.CEPH_BACKEND_MIN_REPLICATION_CAP: rep = int(caps_dict[constants.CEPH_BACKEND_REPLICATION_CAP]) v_supported = constants.CEPH_REPLICATION_MAP_SUPPORTED[rep] - msg = _("Missing or invalid value for " - "backend parameter \'%s\', when " - "replication is set as \'%s\'. " - "Supported values are %s." % - (k, rep, str(v_supported))) + msg = _("Missing or invalid value for backend parameter \'%s\', " + "when replication is set as \'%s\'. Supported values are " + "%s." % (k, rep, str(v_supported))) try: v = int(v) except ValueError: @@ -691,16 +689,6 @@ def _check_and_update_rbd_provisioner(new_storceph, remove=False): LOG.info("Ceph not configured, delaying rbd-provisioner configuration.") return new_storceph - # Cluster is configured, run live. - try: - new_storceph = \ - pecan.request.rpcapi.check_and_update_rbd_provisioner(pecan.request.context, - new_storceph) - except Exception as e: - msg = _("Error configuring rbd-provisioner service. Please " - "investigate and try again: %s." % str(e)) - raise wsme.exc.ClientSideError(msg) - return new_storceph @@ -1015,6 +1003,7 @@ def _check_replication_number(new_cap, orig_cap): new_cap[constants.CEPH_BACKEND_REPLICATION_CAP]))) +# TODO(CephPoolsDecouple): remove def _is_quotaconfig_changed(ostorceph, storceph): if storceph and ostorceph: if (storceph.cinder_pool_gib != ostorceph.cinder_pool_gib or @@ -1026,6 +1015,7 @@ def _is_quotaconfig_changed(ostorceph, storceph): return False +# TODO(CephPoolsDecouple): remove def _check_pool_quotas_data(ostorceph, storceph): # Only relevant for ceph backend if not StorageBackendConfig.has_backend_configured( @@ -1161,6 +1151,7 @@ def _check_pool_quotas_data(ostorceph, storceph): % (total_quota_gib, int(tier_size))) +# TODO(CephPoolsDecouple): remove def _update_pool_quotas(storceph): # In R4, the object data pool name could be either # CEPH_POOL_OBJECT_GATEWAY_NAME_HAMMER or CEPH_POOL_OBJECT_GATEWAY_NAME_JEWEL @@ -1241,6 +1232,7 @@ def _patch(storceph_uuid, patch): # Obtain the fields that have changed. delta = rpc_storceph.obj_what_changed() + # TODO(CephPoolsDecouple): remove quota values allowed_attributes = ['services', 'capabilities', 'task', 'cinder_pool_gib', 'glance_pool_gib', @@ -1248,6 +1240,7 @@ def _patch(storceph_uuid, patch): 'object_pool_gib', 'kube_pool_gib', 'object_gateway'] + # TODO(CephPoolsDecouple): remove variable quota_attributes = ['cinder_pool_gib', 'glance_pool_gib', 'ephemeral_pool_gib', 'object_pool_gib', 'kube_pool_gib'] @@ -1296,6 +1289,7 @@ def _patch(storceph_uuid, patch): # We only have changes to fast configurable services and/or to their capabilities fast_config = True + # TODO(CephPoolsDecouple): remove variable quota_only_update = True replication_only_update = False for d in delta: @@ -1303,12 +1297,13 @@ def _patch(storceph_uuid, patch): raise wsme.exc.ClientSideError( _("Can not modify '%s' with this operation." % d)) + # TODO(CephPoolsDecouple): remove condition if d not in quota_attributes: quota_only_update = False # TODO (rchurch): In R6, refactor and remove object_gateway attribute - # and DB column. This should be driven by if the service is added to the - # services list + # and DB column. This should be driven by if the service is added to + # the services list if d == 'object_gateway': if ostorceph[d]: raise wsme.exc.ClientSideError( @@ -1364,15 +1359,20 @@ def _patch(storceph_uuid, patch): LOG.info("SYS_I orig storage_ceph: %s " % ostorceph.as_dict()) LOG.info("SYS_I patched storage_ceph: %s " % storceph_config.as_dict()) - if _is_quotaconfig_changed(ostorceph, storceph_config): - _check_pool_quotas_data(ostorceph, storceph_config.as_dict()) - _update_pool_quotas(storceph_config.as_dict()) - # check again after update - _check_pool_quotas_data(ostorceph, storceph_config.as_dict()) + # TODO(CephPoolsDecouple): remove block + if not utils.is_kubernetes_config(): + if _is_quotaconfig_changed(ostorceph, storceph_config): + _check_pool_quotas_data(ostorceph, storceph_config.as_dict()) + _update_pool_quotas(storceph_config.as_dict()) + # check again after update + _check_pool_quotas_data(ostorceph, storceph_config.as_dict()) + else: + LOG.info("Don't check quotas") - if not quota_only_update: - # Execute the common semantic checks for all backends, if backend is - # not present this will not return. + # TODO(CephPoolsDecouple): remove condition + if not quota_only_update or utils.is_kubernetes_config(): + # Execute the common semantic checks for all backends, if backend + # is not present this will not return. api_helper.common_checks(constants.SB_API_OP_MODIFY, rpc_storceph.as_dict()) @@ -1392,20 +1392,18 @@ def _patch(storceph_uuid, patch): rpc_storceph[field] != storceph_config.as_dict()[field]): rpc_storceph[field] = storceph_config.as_dict()[field] + # TODO(CephPoolsDecouple): remove - on a containerized deployment, + # replication is updated through the helm charts. # Update replication on the fly on a single node install. - if (replication_only_update and - utils.is_aio_simplex_system(pecan.request.dbapi)): - # For single node setups update replication number on the fly. - min_replication = new_cap.get(constants.CEPH_BACKEND_MIN_REPLICATION_CAP, None) - replication = new_cap.get(constants.CEPH_BACKEND_REPLICATION_CAP, None) - pecan.request.rpcapi.configure_osd_pools(pecan.request.context, - rpc_storceph, - replication, min_replication) - - # Perform changes to the RBD Provisioner service - remove_rbd_provisioner = constants.SB_SVC_RBD_PROVISIONER in services_removed - ret = _check_and_update_rbd_provisioner(rpc_storceph.as_dict(), remove_rbd_provisioner) - rpc_storceph['capabilities'] = ret['capabilities'] + if not utils.is_kubernetes_config(): + if (replication_only_update and + utils.is_aio_simplex_system(pecan.request.dbapi)): + # For single node setups update replication number on the fly. + min_replication = new_cap.get(constants.CEPH_BACKEND_MIN_REPLICATION_CAP, None) + replication = new_cap.get(constants.CEPH_BACKEND_REPLICATION_CAP, None) + pecan.request.rpcapi.configure_osd_pools( + pecan.request.context, rpc_storceph, replication, + min_replication) LOG.info("SYS_I new storage_ceph: %s " % rpc_storceph.as_dict()) try: @@ -1413,10 +1411,11 @@ def _patch(storceph_uuid, patch): rpc_storceph.save() + # TODO(CephPoolsDecouple): rework - remove quota_only_update if ((not quota_only_update and not fast_config and not replication_only_update) or - (storceph_config.state == constants.SB_STATE_CONFIG_ERR)): + (storceph_config.state == constants.SB_STATE_CONFIG_ERR)): # Enable the backend changes: _apply_backend_changes(constants.SB_API_OP_MODIFY, rpc_storceph) diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/utils.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/utils.py index a697177d4e..dc062eeaa3 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/utils.py @@ -395,6 +395,13 @@ def is_aio_duplex_system(): SystemHelper.get_product_build() == constants.TIS_AIO_BUILD +def is_aio_kubernetes(dbapi=None): + if not dbapi: + dbapi = pecan.request.dbapi + return SystemHelper.get_product_build() == constants.TIS_AIO_BUILD and \ + is_kubernetes_config(dbapi) + + def get_compute_count(dbapi=None): if not dbapi: dbapi = pecan.request.dbapi diff --git a/sysinv/sysinv/sysinv/sysinv/common/ceph.py b/sysinv/sysinv/sysinv/sysinv/common/ceph.py index c0638f93ba..99179e3616 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/common/ceph.py @@ -605,6 +605,41 @@ class CephApiOperator(object): osd_list.append(osd_id) return constants.CEPH_HEALTH_OK + def ceph_pools_empty(self, db_api, pools_usage): + """ Determine if data CEPH pools are empty. + :return True if the data CEPH pools are empty + :return False if the data CEPH pools are not empty + """ + # TODO(CephPoolsDecouple): rework + if utils.is_kubernetes_config(db_api): + for ceph_pool in pools_usage: + # We only need to check data pools. + if (constants.CEPH_POOL_OBJECT_GATEWAY_NAME_PART in + ceph_pool['name']): + if not ( + ceph_pool['name'].startswith( + constants.CEPH_POOL_OBJECT_GATEWAY_NAME_JEWEL) or + ceph_pool['name'].startswith( + constants.CEPH_POOL_OBJECT_GATEWAY_NAME_HAMMER)): + continue + + # Ceph pool is not empty. + if int(ceph_pool['stats']['bytes_used']) > 0: + return False + + return True + + # TODO(CephPoolsDecouple): remove iteration below + for ceph_pool in pools_usage: + # We only need to check data pools. + if ([pool for pool in constants.ALL_CEPH_POOLS + if ceph_pool['name'].startswith(pool)] and + int(ceph_pool['stats']['bytes_used']) > 0): + # Ceph pool is not empty. + return False + + return True + def get_monitors_status(self, db_api): # first check that the monitors are available in sysinv num_active_monitors = 0 diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index 2b9fac847f..3fd5aa6079 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -718,6 +718,7 @@ CEPH_POOL_KUBE_QUOTA_GIB = 20 # Ceph RADOS Gateway default data pool # Hammer version pool name will be kept if upgrade from R3 and # Swift/Radosgw was configured/enabled in R3. +CEPH_POOL_OBJECT_GATEWAY_NAME_PART = 'rgw' CEPH_POOL_OBJECT_GATEWAY_NAME_JEWEL = 'default.rgw.buckets.data' CEPH_POOL_OBJECT_GATEWAY_NAME_HAMMER = '.rgw.buckets' CEPH_POOL_OBJECT_GATEWAY_ROOT_NAME = '.rgw.root' @@ -1475,8 +1476,6 @@ SUPPORTED_HELM_APP_CHARTS = { K8S_RBD_PROV_STORAGECLASS_NAME = 'rbd_storageclass_name' # Customer K8S_RBD_PROV_NAMESPACES = 'rbd_provisioner_namespaces' # Customer K8S_RBD_PROV_NAMESPACES_READY = '.rbd_provisioner_namespaces_ready' # Hidden -K8S_RBD_PROV_ADMIN_SECRET_READY = '.k8s_admin_secret_ready' # Hidden -K8S_RBD_PROV_CEPH_POOL_KEY_READY = '.k8s_pool_secret_ready' # Hidden # RBD Provisioner defaults and constants K8S_RBD_PROV_NAMESPACE_DEFAULT = "kube-system" diff --git a/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py b/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py index e2a20d0c76..97d9678344 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py +++ b/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py @@ -554,104 +554,3 @@ class K8RbdProvisioner(object): "Details: %s" % (secret_name, namespace, str(e))) return stdout - - @staticmethod - def create_k8s_pool_secret(bk, key=None, namespace=None, force=False): - user_secret_name = K8RbdProvisioner.get_user_secret_name(bk) - - if K8RbdProvisioner.get_k8s_secret(user_secret_name, - namespace=namespace): - if not force: - return - # Key already exists - LOG.warning("K8S Secret for backend: %s and namespace: %s exists and " - "should not be present! Removing existing and creating " - "a new one." % (bk['name'], namespace)) - K8RbdProvisioner.remove_k8s_pool_secret(bk, namespace) - - LOG.info("Creating Kubernetes RBD Provisioner Ceph pool secret " - "for namespace: %s." % namespace) - try: - # Create the k8s secret for the given Ceph pool and namespace. - cmd = ['kubectl', '--kubeconfig=/etc/kubernetes/admin.conf', - 'create', 'secret', 'generic', - user_secret_name, - '--type=kubernetes.io/rbd'] - if key: - cmd.append('--from-literal=key=%s' % key) - if namespace: - cmd.append('--namespace=%s' % namespace) - _, _ = cutils.execute(*cmd, run_as_root=False) - except exception.ProcessExecutionError as e: - raise exception.SysinvException( - "Could not create Kubernetes secret: %s for backend: %s, " - "namespace: %s, Details: %s." % - (user_secret_name, bk['name'], namespace, str(e))) - - @staticmethod - def remove_k8s_pool_secret(bk, namespace): - user_secret_name = K8RbdProvisioner.get_user_secret_name(bk) - if not K8RbdProvisioner.get_k8s_secret(user_secret_name, - namespace=namespace): - LOG.warning("K8S secret for backend: %s and namespace: %s " - "does not exists. Skipping removal." % (bk['name'], namespace)) - return - try: - # Remove the k8s secret from given namepsace. - cmd = ['kubectl', '--kubeconfig=/etc/kubernetes/admin.conf', - 'delete', 'secret', user_secret_name, - '--namespace=%s' % namespace] - _, _ = cutils.execute(*cmd, run_as_root=False) - except exception.ProcessExecutionError as e: - raise exception.SysinvException( - "Could not remove Kubernetes secret: %s for backend: %s, " - "namespace: %s, Details: %s." % - (user_secret_name, bk['name'], namespace, str(e))) - - @staticmethod - def create_k8s_admin_secret(): - admin_secret_name = constants.K8S_RBD_PROV_ADMIN_SECRET_NAME - namespace = constants.K8S_RBD_PROV_NAMESPACE_DEFAULT - - if K8RbdProvisioner.get_k8s_secret( - admin_secret_name, namespace=namespace): - # Key already exists - return - - LOG.info("Creating Kubernetes RBD Provisioner Ceph admin secret.") - try: - # TODO(oponcea): Get admin key on Ceph clusters with - # enabled authentication. For now feed an empty key - # to satisfy RBD Provisioner requirements. - cmd = ['kubectl', '--kubeconfig=/etc/kubernetes/admin.conf', - 'create', 'secret', 'generic', - admin_secret_name, - '--type=kubernetes.io/rbd', - '--from-literal=key='] - cmd.append('--namespace=%s' % namespace) - _, _ = cutils.execute(*cmd, run_as_root=False) - except exception.ProcessExecutionError as e: - raise exception.SysinvException( - "Could not create Kubernetes secret: %s, namespace: %s," - "Details: %s" % (admin_secret_name, namespace, str(e))) - - @staticmethod - def remove_k8s_admin_secret(): - admin_secret_name = constants.K8S_RBD_PROV_ADMIN_SECRET_NAME - namespace = constants.K8S_RBD_PROV_NAMESPACE_DEFAULT - - if not K8RbdProvisioner.get_k8s_secret( - admin_secret_name, namespace=namespace): - # Secret does not exist. - return - - LOG.info("Removing Kubernetes RBD Provisioner Ceph admin secret.") - try: - cmd = ['kubectl', '--kubeconfig=/etc/kubernetes/admin.conf', - 'delete', 'secret', admin_secret_name, - '--namespace=%s' % namespace] - _, _ = cutils.execute(*cmd, run_as_root=False) - except exception.ProcessExecutionError as e: - raise exception.SysinvException( - "Could not delete Kubernetes secret: %s, namespace: %s," - "Details: %s." % (admin_secret_name, namespace, str(e))) diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py b/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py index b2a58ea8ab..f87f51f5ad 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py @@ -15,6 +15,7 @@ from __future__ import absolute_import import os import uuid import copy +import tsconfig.tsconfig as tsc from requests.exceptions import RequestException, ReadTimeout from cephclient import wrapper as ceph @@ -25,7 +26,6 @@ from sysinv.common import utils as cutils from sysinv.openstack.common import log as logging from sysinv.openstack.common import uuidutils from sysinv.common.storage_backend_conf import StorageBackendConfig -from sysinv.common.storage_backend_conf import K8RbdProvisioner from sysinv.api.controllers.v1 import utils @@ -273,6 +273,7 @@ class CephOperator(object): reason=response.reason) return response, body + # TODO(CephPoolsDecouple): remove def osd_set_pool_param(self, pool_name, param, value): response, body = self._ceph_api.osd_set_pool_param( pool_name, param, value, @@ -391,6 +392,7 @@ class CephOperator(object): LOG.info("osdmap is empty, restoring Ceph config...") return self.rebuild_osdmap() + # TODO(CephPoolsDecouple): remove def _pool_create(self, name, pg_num, pgp_num, ruleset, size, min_size): """Create Ceph pool and ruleset. @@ -447,7 +449,7 @@ class CephOperator(object): "pool_name={}, ruleset={}").format( name, ruleset)) else: - msg = _("Failed to to complete parameter assignment on OSD pool" + msg = _("Failed to complete parameter assignment on OSD pool" ": {0}. reason: {1}").format(name, response.reason) e = exception.CephFailure(reason=msg) LOG.error(e) @@ -472,13 +474,14 @@ class CephOperator(object): "pool_name={}, min_size={}").format(name, min_size)) else: - msg = _("Failed to to complete parameter assignment on existing" + msg = _("Failed to complete parameter assignment on existing " "OSD pool: {0}. reason: {1}").format(name, response.reason) e = exception.CephFailure(reason=msg) LOG.error(e) raise e + # TODO(CephPoolsDecouple): remove def create_or_resize_osd_pool(self, pool_name, pg_num, pgp_num, size, min_size): """Create or resize an osd pool as needed @@ -588,6 +591,7 @@ class CephOperator(object): return {"max_objects": quota["output"]["quota_max_objects"], "max_bytes": quota["output"]["quota_max_bytes"]} + # TODO(CephPoolsDecouple): remove def set_osd_pool_quota(self, pool, max_bytes=0, max_objects=0): """Set the quota for an OSD pool Setting max_bytes or max_objects to 0 will disable that quota param @@ -622,6 +626,8 @@ class CephOperator(object): LOG.error(e) raise e + # TODO(CephPoolsDecouple): rework if needed: we determine the existing + # pools on the spot def get_pools_values(self): """Create or resize all of the osd pools as needed """ @@ -652,6 +658,7 @@ class CephOperator(object): LOG.debug("Pool Quotas: %s" % quotas) return tuple(quotas) + # TODO(CephPoolsDecouple): remove def set_quota_gib(self, pool_name): quota_gib_value = None cinder_pool_gib, glance_pool_gib, ephemeral_pool_gib, \ @@ -754,6 +761,7 @@ class CephOperator(object): return rc + # TODO(CephPoolsDecouple): remove def _configure_primary_tier_pool(self, pool, size, min_size): """Configure the default Ceph tier pools.""" @@ -769,6 +777,7 @@ class CephOperator(object): except exception.CephFailure: pass + # TODO(CephPoolsDecouple): remove def _configure_secondary_tier_pools(self, tier_obj, size, min_size): """Configure the service pools that are allowed for additional ceph tiers. """ @@ -821,6 +830,7 @@ class CephOperator(object): name=rule_name, reason=body['status']) raise e + # TODO(CephPoolsDecouple): remove def configure_osd_pools(self, ceph_backend=None, new_pool_size=None, new_pool_min_size=None): """Create or resize all of the osd pools as needed ceph backend could be 2nd backend which is in configuring state @@ -881,74 +891,6 @@ class CephOperator(object): except exception.CephFailure as e: LOG.info("Cannot add pools: %s" % e) - def _update_k8s_ceph_pool_secrets(self, ceph_backend): - """Create CEPH pool secrets for k8s namespaces. - :param ceph_backend input/output storage backend data - """ - - pool_name = K8RbdProvisioner.get_pool(ceph_backend) - - namespaces_to_add, namespaces_to_rm = \ - K8RbdProvisioner.getNamespacesDelta(ceph_backend) - - # Get or create Ceph pool key. One per pool. - # This key will be used by the K8S secrets from the rbd-provisioner. - if namespaces_to_add: - key = self._configure_pool_key(pool_name) - - # Get the capabilities of the backend directly from DB to avoid - # committing changes unrelated to ceph pool keys. - try: - orig_ceph_backend = self._db_api.storage_backend_get(ceph_backend['id']) - orig_capab = orig_ceph_backend['capabilities'] - except exception.InvalidParameterValue: - # This is a new backend, not yet stored in DB. - orig_ceph_backend = None - - configured_namespaces = \ - K8RbdProvisioner.getListFromNamespaces(orig_ceph_backend, - get_configured=True) - - # Adding secrets to namespaces - for namespace in namespaces_to_add: - K8RbdProvisioner.create_k8s_pool_secret( - ceph_backend, key=key, - namespace=namespace, force=(True if not ceph_backend else False)) - - # Update the backend's capabilities to reflect that a secret - # has been created for the k8s pool in the given namespace. - # Update DB for each item to reflect reality in case of error. - configured_namespaces.append(namespace) - if orig_ceph_backend: - orig_capab[constants.K8S_RBD_PROV_NAMESPACES_READY] = \ - ','.join(configured_namespaces) - self._db_api.storage_backend_update(ceph_backend['id'], - {'capabilities': orig_capab}) - - # Removing secrets from namespaces - for namespace in namespaces_to_rm: - K8RbdProvisioner.remove_k8s_pool_secret(ceph_backend, - namespace) - configured_namespaces.remove(namespace) - if orig_ceph_backend: - if configured_namespaces: - orig_capab[constants.K8S_RBD_PROV_NAMESPACES_READY] = \ - ','.join(configured_namespaces) - elif constants.K8S_RBD_PROV_NAMESPACES_READY in orig_capab: - # No RBD Provisioner configured, cleanup - del orig_capab[constants.K8S_RBD_PROV_NAMESPACES_READY] - self._db_api.storage_backend_update(ceph_backend['id'], - {'capabilities': orig_capab}) - - # Done, store the updated capabilities in the ceph_backend reference - capab = ceph_backend['capabilities'] - if configured_namespaces: - capab[constants.K8S_RBD_PROV_NAMESPACES_READY] = \ - ','.join(configured_namespaces) - elif constants.K8S_RBD_PROV_NAMESPACES_READY in capab: - # No RBD Provisioner configured, cleanup - del capab[constants.K8S_RBD_PROV_NAMESPACES_READY] - def _update_db_capabilities(self, bk, new_storceph): # Avoid updating DB for all capabilities in new_storceph as we # don't manage them. Leave the callers deal with it. @@ -959,96 +901,6 @@ class CephOperator(object): {'capabilities': bk['capabilities']} ) - def check_and_update_rbd_provisioner(self, new_storceph=None): - """ Check and/or update RBD Provisioner configuration for all Ceph - internal backends. - - This function should be called when: - 1. Making any changes to rbd-provisioner service - (adding a new, removing or updating an existing provisioner) - 2. Synchronizing changes with the DB. - - To speed up synchronization, DB entries are used to determine when - changes are needed and only then proceed with more time consuming - operations. - - Note: This function assumes a functional Ceph cluster - - :param new_storceph a storage backend object as_dict() with updated - data. This is required as database updates can happen later. - :returns an updated version of new_storceph or None - """ - # Get an updated list of backends - if new_storceph: - ceph_backends = [b.as_dict() for b in - self._db_api.storage_backend_get_list() - if b['backend'] == constants.SB_TYPE_CEPH and - b['name'] != new_storceph['name']] - ceph_backends.append(new_storceph) - else: - ceph_backends = [b.as_dict() for b in - self._db_api.storage_backend_get_list() - if b['backend'] == constants.SB_TYPE_CEPH] - - # Nothing to do if rbd-provisioner is not configured and was never - # configured on any backend. - for bk in ceph_backends: - svcs = utils.SBApiHelper.getListFromServices(bk) - if (constants.SB_SVC_RBD_PROVISIONER in svcs or - bk['capabilities'].get(constants.K8S_RBD_PROV_NAMESPACES_READY) or - bk['capabilities'].get(constants.K8S_RBD_PROV_ADMIN_SECRET_READY)): - break - else: - return new_storceph - - # In order for an RBD provisioner to work we need: - # - A couple of Ceph keys: - # 1. A cluster wide admin key (e.g. the one in - # /etc/ceph/ceph.client.admin.keyring) - # 2. A key for accessing the pool (e.g. client.kube-rbd) - # - The Ceph keys above passed into Kubernetes secrets: - # 1. An admin secret in the RBD Provisioner POD namespace with the - # Ceph cluster wide admin key. - # 2. One or more K8S keys with the Ceph pool key for each namespace - # we allow RBD PV and PVC creations. - - LOG.info("Updating rbd-provisioner configuration.") - # Manage Ceph cluster wide admin key and associated secret - we create - # it if needed or remove it if no longer needed. - admin_secret_exists = False - remove_admin_secret = True - for bk in ceph_backends: - svcs = utils.SBApiHelper.getListFromServices(bk) - - # Create secret - # Check to see if we need the admin Ceph key. This key is created - # once per cluster and references to it are kept in all Ceph tiers - # of that cluster. So make sure they are up to date. - if constants.SB_SVC_RBD_PROVISIONER in svcs: - remove_admin_secret = False - if bk['capabilities'].get(constants.K8S_RBD_PROV_ADMIN_SECRET_READY): - admin_secret_exists = True - else: - if not admin_secret_exists: - K8RbdProvisioner.create_k8s_admin_secret() - admin_secret_exists = True - bk['capabilities'][constants.K8S_RBD_PROV_ADMIN_SECRET_READY] = True - self._update_db_capabilities(bk, new_storceph) - # Remove admin secret and any references to it if RBD Provisioner is - # unconfigured. - if remove_admin_secret: - K8RbdProvisioner.remove_k8s_admin_secret() - for bk in ceph_backends: - if bk['capabilities'].get(constants.K8S_RBD_PROV_ADMIN_SECRET_READY): - del bk['capabilities'][constants.K8S_RBD_PROV_ADMIN_SECRET_READY] - self._update_db_capabilities(bk, new_storceph) - - for bk in ceph_backends: - self._update_k8s_ceph_pool_secrets(bk) - - # Return updated new_storceph reference - return new_storceph - def get_osd_tree(self): """Get OSD tree info return: list of nodes and a list of stray osds e.g.: @@ -1174,13 +1026,24 @@ class CephOperator(object): return body["output"] def get_ceph_cluster_info_availability(self): + # TODO(CephPoolsDecouple): rework # Check if the ceph cluster is ready to return statistics storage_hosts = self._db_api.ihost_get_by_personality( constants.STORAGE) + + is_aio_kubernetes = ( + tsc.system_type == constants.TIS_AIO_BUILD and + utils.is_kubernetes_config(self._db_api)) + + if not storage_hosts and is_aio_kubernetes: + storage_hosts = self._db_api.ihost_get_by_personality( + constants.CONTROLLER) + # If there is no storage node present, ceph usage # information is not relevant if not storage_hosts: return False + # At least one storage node must be in available state for host in storage_hosts: if host['availability'] == constants.AVAILABILITY_AVAILABLE: @@ -1190,6 +1053,7 @@ class CephOperator(object): return False return True + # TODO(CephPoolsDecouple): rework - determine the existing pools def get_pools_config(self): for pool in CEPH_POOLS: # Here it is okay for object pool name is either @@ -1249,6 +1113,7 @@ class CephOperator(object): return storage_hosts_upgraded + # TODO(CephPoolsDecouple): remove # TIER SUPPORT def _calculate_target_pg_num_for_tier_pool(self, tiers_obj, pool_name, storage_hosts): @@ -1364,6 +1229,7 @@ class CephOperator(object): return target_pg_num, osds_raw + # TODO(CephPoolsDecouple): remove def audit_osd_pool_on_tier(self, tier_obj, storage_hosts, pool_name): """ Audit an osd pool and update pg_num, pgp_num accordingly. storage_hosts; list of known storage host objects @@ -1453,6 +1319,7 @@ class CephOperator(object): # we attempt to increase the pgp number. We will wait for the # audit to call us and increase the pgp number at that point. + # TODO(CephPoolsDecouple): remove def audit_osd_quotas_for_tier(self, tier_obj): # TODO(rchurch): Make this smarter.Just look at the OSD for the tier to @@ -1639,6 +1506,7 @@ class CephOperator(object): # Special case: For now with one pool allow no quota self.executed_default_quota_check_by_tier[tier_obj.name] = True + # TODO(CephPoolsDecouple): remove def audit_osd_pools_by_tier(self): """ Check osd pool pg_num vs calculated target pg_num. Set pool quotas default values dynamically depending diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index fe4d0c4dde..de83c5f457 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -1512,13 +1512,12 @@ class ConductorManager(service.PeriodicService): host.action == constants.FORCE_UNLOCK_ACTION or host.action == constants.UNLOCK_ACTION): + # TODO(CephPoolsDecouple): remove # Ensure the OSD pools exists. In the case of a system restore, # the pools must be re-created when the first storage node is # unlocked. - self._ceph.configure_osd_pools() - - # Generate CEPH keys for k8s pools. - self.check_and_update_rbd_provisioner(context) + if not utils.is_kubernetes_config(self.dbapi): + self._ceph.configure_osd_pools() # Generate host configuration files self._puppet.update_host_config(host) @@ -1530,6 +1529,7 @@ class ConductorManager(service.PeriodicService): # Set up the PXE config file for this host so it can run the installer self._update_pxe_config(host) + # TODO(CephPoolsDecouple): remove def configure_osd_pools(self, context, ceph_backend=None, new_pool_size=None, new_pool_min_size=None): """Configure or update configuration of the OSD pools. If none of the optionals are provided then all pools are updated based on DB configuration. @@ -4941,8 +4941,13 @@ class ConductorManager(service.PeriodicService): continue self._audit_ihost_action(host) + # TODO(CephPoolsDecouple): remove @periodic_task.periodic_task(spacing=60) def _osd_pool_audit(self, context): + if utils.is_kubernetes_config(self.dbapi): + LOG.debug("_osd_pool_audit skip") + return + # Only do the audit if ceph is configured. if not StorageBackendConfig.has_backend( self.dbapi, @@ -4950,6 +4955,8 @@ class ConductorManager(service.PeriodicService): ): return + LOG.debug("_osd_pool_audit") + # Only run the pool audit task if we have at least one storage node # available. Pools are created with initial PG num values and quotas # when the first OSD is added. This is done with only controller-0 @@ -5002,32 +5009,13 @@ class ConductorManager(service.PeriodicService): cmd = ['kubectl', '--kubeconfig=/etc/kubernetes/admin.conf', 'get', 'namespaces', '-o', 'go-template=\'{{range .items}}{{.metadata.name}}\'{{end}}\''] - stdout, _ = cutils.execute(*cmd, run_as_root=False) + stdout, stderr = cutils.execute(*cmd, run_as_root=False) namespaces = [n for n in stdout.split("\'") if n] return namespaces except exception.ProcessExecutionError as e: raise exception.SysinvException( _("Error getting Kubernetes list of namespaces, " - "Details: %s" % str(e))) - - def check_and_update_rbd_provisioner(self, context, new_storceph=None): - """ Check and/or update RBD Provisioner configuration for all Ceph - internal backends. - - This function should be called in two cases: - 1. When making any changes to the rbd-provisioner service. - 2. When delaying changes due to Ceph not being up. - - To allow delayed executions we check DB entries for changes and only - then proceed with time consuming modifications. - - Note: This function assumes a functional Ceph cluster - - :param new_storceph a storage backend object as_dict() with updated - data. This is required as database updates can happen later. - :returns an updated version of new_storceph or None - """ - return self._ceph.check_and_update_rbd_provisioner(new_storceph) + "Details: %s") % str(e)) def configure_isystemname(self, context, systemname): """Configure the systemname with the supplied data. @@ -5146,7 +5134,9 @@ class ConductorManager(service.PeriodicService): # Update the osdid in the stor object istor_obj['osdid'] = body['output']['osdid'] - self._ceph.configure_osd_pools() + # TODO(CephPoolsDecouple): remove + if not utils.is_kubernetes_config(self.dbapi): + self._ceph.configure_osd_pools() return istor_obj @@ -6336,8 +6326,6 @@ class ConductorManager(service.PeriodicService): if utils.is_aio_simplex_system(self.dbapi): task = None cceph.fix_crushmap(self.dbapi) - # Ceph is up, update rbd-provisioner config if needed. - self.check_and_update_rbd_provisioner(context) else: task = constants.SB_TASK_PROVISION_STORAGE values = {'state': state, diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py index a8bb1c505f..044c3566b2 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py @@ -117,6 +117,7 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): host=host, do_compute_apply=do_compute_apply)) + # TODO(CephPoolsDecouple): remove def configure_osd_pools(self, context, ceph_backend=None, new_pool_size=None, new_pool_min_size=None): """Configure or update configuration of the OSD pools. If none of the optionals are provided then all pools are updated based on DB configuration. @@ -912,27 +913,6 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): return self.call(context, self.make_msg('get_k8s_namespaces')) - def check_and_update_rbd_provisioner(self, context, new_storceph=None): - """ Check and/or update RBD Provisioner is correctly configured - for all Ceph internal backends. - - This function should be called in two cases: - 1. When making any change to rbd-provisioner - 2. When delaying changes due to Ceph not being up - - To allow delayed executions we check DB entries for changes and only - then proceed with time consuming modifications. - - Note: This function assumes a fully functional Ceph cluster - - :param new_storceph a storage backend object as_dict() with updated - data. This is needed as database updates can happen later. - :returns an updated version of new_storceph - """ - return self.call(context, - self.make_msg('check_and_update_rbd_provisioner', - new_storceph=new_storceph)) - def report_config_status(self, context, iconfig, status, error=None): """ Callback from Sysinv Agent on manifest apply success or failure diff --git a/sysinv/sysinv/sysinv/sysinv/helm/cinder.py b/sysinv/sysinv/sysinv/sysinv/helm/cinder.py index 785281bd6b..053b583494 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/cinder.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/cinder.py @@ -90,14 +90,12 @@ class CinderHelm(openstack.OpenstackBaseHelm): # tier pools. 'replication': replication, 'crush_rule': ruleset, - 'chunk_size': 8 }, 'volume': { - # We don't use the chart to configure the cinder-volumes - # pool, so these values don't have any impact right now. + # The cinder chart doesn't currently support specifying + # the config for multiple volume/backup pools. 'replication': replication, 'crush_rule': ruleset, - 'chunk_size': 8 } } } diff --git a/sysinv/sysinv/sysinv/sysinv/helm/rbd_provisioner.py b/sysinv/sysinv/sysinv/sysinv/helm/rbd_provisioner.py index fd881b6f99..a98e568603 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/rbd_provisioner.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/rbd_provisioner.py @@ -23,8 +23,17 @@ class RbdProvisionerHelm(base.BaseHelm): common.HELM_NS_KUBE_SYSTEM ] + SERVICE_NAME = 'rbd-provisioner' SERVICE_PORT_MON = 6789 + @property + def docker_repo_source(self): + return common.DOCKER_SRC_STX + + @property + def docker_repo_tag(self): + return common.DOCKER_SRCS[self.docker_repo_source][common.IMG_TAG_KEY] + def get_namespaces(self): return self.SUPPORTED_NAMESPACES @@ -52,20 +61,84 @@ class RbdProvisionerHelm(base.BaseHelm): "adminSecretName": constants.K8S_RBD_PROV_ADMIN_SECRET_NAME } + # Get tier info. + tiers = self.dbapi.storage_tier_get_list() + primary_tier_name = \ + constants.SB_TIER_DEFAULT_NAMES[constants.SB_TIER_TYPE_CEPH] + classes = [] for bk in rbd_provisioner_bks: + # Get the ruleset for the new kube-rbd pool. + tier = next((t for t in tiers if t.forbackendid == bk.id), None) + if not tier: + raise Exception("No tier present for backend %s" % bk.name) + + rule_name = "{0}{1}{2}".format( + tier.name, + constants.CEPH_CRUSH_TIER_SUFFIX, + "-ruleset").replace('-', '_') + + # Check namespaces. We need to know on what namespaces to create + # the secrets for the kube-rbd pools. + pool_secrets_namespaces = bk.capabilities.get( + constants.K8S_RBD_PROV_NAMESPACES) + if not pool_secrets_namespaces: + raise Exception("Please specify the rbd_provisioner_namespaces" + " for the %s backend." % bk.name) + cls = { "name": K8RbdProvisioner.get_storage_class_name(bk), - "pool": K8RbdProvisioner.get_pool(bk), + "pool_name": K8RbdProvisioner.get_pool(bk), + "pool_secrets_namespaces": pool_secrets_namespaces.encode( + 'utf8', 'strict'), + "replication": int(bk.capabilities.get("replication")), + "crush_rule_name": rule_name, + "chunk_size": 64, "userId": K8RbdProvisioner.get_user_id(bk), "userSecretName": K8RbdProvisioner.get_user_secret_name(bk) } classes.append(cls) + # Get all the info for creating the ephemeral pool. + ephemeral_pools = [] + # Right now the ephemeral pool will only use the primary tier. + rule_name = "{0}{1}{2}".format( + primary_tier_name, + constants.CEPH_CRUSH_TIER_SUFFIX, + "-ruleset").replace('-', '_') + + sb_list_ext = self.dbapi.storage_backend_get_list_by_type( + backend_type=constants.SB_TYPE_CEPH_EXTERNAL) + sb_list = self.dbapi.storage_backend_get_list_by_type( + backend_type=constants.SB_TYPE_CEPH) + + if sb_list_ext: + for sb in sb_list_ext: + if constants.SB_SVC_NOVA in sb.services: + rbd_pool = sb.capabilities.get('ephemeral_pool') + ephemeral_pool = { + "pool_name": rbd_pool, + "replication": int(sb.capabilities.get("replication")), + "crush_rule_name": rule_name, + "chunk_size": 64, + } + ephemeral_pools.append(ephemeral_pool) + # Treat internal CEPH. + if sb_list: + ephemeral_pool = { + "pool_name": constants.CEPH_POOL_EPHEMERAL_NAME, + "replication": int(sb_list[0].capabilities.get("replication")), + "crush_rule_name": rule_name, + "chunk_size": 64, + } + ephemeral_pools.append(ephemeral_pool) + overrides = { common.HELM_NS_KUBE_SYSTEM: { "classdefaults": classdefaults, - "classes": classes + "classes": classes, + "ephemeral_pools": ephemeral_pools, + "images": self._get_images_overrides(), } } @@ -76,3 +149,21 @@ class RbdProvisionerHelm(base.BaseHelm): namespace=namespace) else: return overrides + + def _get_images_overrides(self): + # TODO: Remove after ceph upgrade + # Format the name of the stx specific ceph config helper + ceph_config_helper_image = "{}/{}{}:{}".format( + common.DOCKER_SRCS[self.docker_repo_source][common.IMG_BASE_KEY], + common.DOCKER_SRCS[self.docker_repo_source][common.IMG_PREFIX_KEY], + 'ceph-config-helper', self.docker_repo_tag) + + rbd_provisioner_image = \ + 'quay.io/external_storage/rbd-provisioner:latest' + + return { + 'tags': { + 'rbd_provisioner': rbd_provisioner_image, + 'rbd_provisioner_storage_init': ceph_config_helper_image, + } + }