diff --git a/sysinv/sysinv/sysinv/sysinv/common/exception.py b/sysinv/sysinv/sysinv/sysinv/common/exception.py index e731715ef4..64371546bc 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/exception.py +++ b/sysinv/sysinv/sysinv/sysinv/common/exception.py @@ -1537,7 +1537,7 @@ class KubeVersionUnavailable(NotFound): message = "Getting kubeadm and kubelet versions failed" -class HelmTillerFailure(SysinvException): +class HelmFailure(SysinvException): message = _("Helm operation failure: %(reason)s") diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py index 466e5ed6c2..b3984ecbb4 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py @@ -717,9 +717,10 @@ class AppOperator(object): if chart_images: images_by_charts.update({chart.name: chart_images}) - with open(app.sync_imgfile, 'w') as f: - yaml.safe_dump(images_by_charts, f, explicit_start=True, - default_flow_style=False) + if images_by_charts: + with open(app.sync_imgfile, 'w') as f: + yaml.safe_dump(images_by_charts, f, explicit_start=True, + default_flow_style=False) def _retrieve_images_list(self, app_images_file): with io.open(app_images_file, 'r', encoding='utf-8') as f: @@ -1737,11 +1738,21 @@ class AppOperator(object): old_app_charts = [c.release for c in old_app.charts] deployed_releases = helm_utils.retrieve_helm_releases() for new_chart in new_app.charts: + # Cleanup the releases in the new application version + # but are not in the old application version if (new_chart.release not in old_app_charts and new_chart.release in deployed_releases): - # Cleanup the releases in the new application version - # but are not in the old application version - helm_utils.delete_helm_v3_release(new_chart.release) + # Send delete request in FluxCD so it doesn't + # recreate the helm release + self._kube.delete_custom_resource( + constants.FLUXCD_CRD_HELM_REL_GROUP, + constants.FLUXCD_CRD_HELM_REL_VERSION, + new_chart.namespace, + constants.FLUXCD_CRD_HELM_REL_PLURAL, + new_chart.metadata_name) + # Use helm to immediately remove the release + helm_utils.delete_helm_release(new_chart.release, + new_chart.namespace) else: rc = False @@ -2810,13 +2821,24 @@ class AppOperator(object): to_app_charts = [c.release for c in to_app.charts] deployed_releases = helm_utils.retrieve_helm_releases() for from_chart in from_app.charts: + # Cleanup the releases in the old application version + # but are not in the new application version if (from_chart.release not in to_app_charts and from_chart.release in deployed_releases): - # Cleanup the releases in the old application version - # but are not in the new application version - helm_utils.delete_helm_v3_release(from_chart.release) + # Send delete request in FluxCD so it doesn't + # recreate the helm release + self._kube.delete_custom_resource( + constants.FLUXCD_CRD_HELM_REL_GROUP, + constants.FLUXCD_CRD_HELM_REL_VERSION, + from_chart.namespace, + constants.FLUXCD_CRD_HELM_REL_PLURAL, + from_chart.metadata_name) + # Use helm to immediately remove the release + helm_utils.delete_helm_release(from_chart.release, + from_chart.namespace) LOG.info("Helm release %s for Application %s (%s) deleted" - % (from_chart.release, from_app.name, from_app.version)) + % (from_chart.release, from_app.name, + from_app.version)) self._cleanup(from_app, app_dir=False) self._utils._patch_report_app_dependencies( @@ -2925,8 +2947,10 @@ class AppOperator(object): helm_release_status, _ = self._fluxcd.get_helm_release_status(helm_release_dict) if helm_release_status == self._fluxcd.HELM_RELEASE_STATUS_UNKNOWN: - LOG.info("Removing helm release which has an operation in progress: {} - {}".format(namespace, release)) - # Send delete request in FluxCD so it doesn't recreate the helm release + LOG.info("Removing helm release which has an operation in " + "progress: {} - {}".format(namespace, release)) + # Send delete request in FluxCD so it doesn't recreate the helm + # release self._kube.delete_custom_resource( constants.FLUXCD_CRD_HELM_REL_GROUP, constants.FLUXCD_CRD_HELM_REL_VERSION, @@ -2934,7 +2958,9 @@ class AppOperator(object): constants.FLUXCD_CRD_HELM_REL_PLURAL, release) # Remove resource in Helm - helm_utils.delete_helm_v3_release(helm_release_dict['spec']['releaseName'], namespace=namespace) + helm_utils.delete_helm_release( + helm_release_dict['spec']['releaseName'], + namespace=namespace) if self._make_app_request(app, constants.APP_REMOVE_OP): # After fluxcd delete, the data for the releases are purged from diff --git a/sysinv/sysinv/sysinv/sysinv/helm/utils.py b/sysinv/sysinv/sysinv/sysinv/helm/utils.py index 55ef9c1341..b72ba28489 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/utils.py @@ -69,14 +69,14 @@ def refresh_helm_repo_information(): rpcapi.refresh_helm_repo_information(context.get_admin_context()) -def _retry_on_HelmTillerFailure(ex): +def _retry_on_HelmFailure(ex): LOG.info('Caught exception retrieving helm releases. Retrying... Exception: {}'.format(ex)) - return isinstance(ex, exception.HelmTillerFailure) + return isinstance(ex, exception.HelmFailure) @retry(stop_max_attempt_number=6, wait_fixed=20 * 1000, - retry_on_exception=_retry_on_HelmTillerFailure) -def retrieve_helm_v3_releases(): + retry_on_exception=_retry_on_HelmFailure) +def retrieve_helm_releases(): helm_list = subprocess.Popen( ['helm', '--kubeconfig', kubernetes.KUBERNETES_ADMIN_CONF, 'list', '--all-namespaces', '--output', 'yaml'], @@ -89,15 +89,15 @@ def retrieve_helm_v3_releases(): out, err = helm_list.communicate() if helm_list.returncode != 0: if err: - raise exception.HelmTillerFailure(reason=err) + raise exception.HelmFailure(reason=err) # killing the subprocesses with +kill() when timer expires returns EBADF # because the pipe is closed, but no error string on stderr. if helm_list.returncode == -9: - raise exception.HelmTillerFailure( + raise exception.HelmFailure( reason="helm list operation timed out after " "20 seconds. Terminated by threading timer.") - raise exception.HelmTillerFailure( + raise exception.HelmFailure( reason="helm list operation failed without error " "message, errno=%s" % helm_list.returncode) @@ -114,28 +114,14 @@ def retrieve_helm_v3_releases(): return deployed_releases except Exception as e: - raise exception.HelmTillerFailure( - reason="Failed to retrieve helmv3 releases: %s" % e) + raise exception.HelmFailure( + reason="Failed to retrieve helm releases: %s" % e) finally: timer.cancel() -def retrieve_helm_releases(): - """Retrieve the deployed helm releases - - Get the name, namespace and version for the deployed releases - by querying helm tiller - :return: a dict of deployed helm releases - """ - deployed_releases = {} - - deployed_releases.update(retrieve_helm_v3_releases()) - - return deployed_releases - - -def delete_helm_v3_release(release, namespace="default", flags=None): - """Delete helm v3 release +def delete_helm_release(release, namespace="default", flags=None): + """Delete helm release via callout to helm command :param release: Helm release name :param namespace: Helm release namespace @@ -161,19 +147,19 @@ def delete_helm_v3_release(release, namespace="default", flags=None): out, err = process.communicate() if err: if "not found" in err: - LOG.debug("Release %s not found or deleted already" % release) + LOG.error("Release %s/%s not found or deleted already" % (namespace, release)) return out, err - raise exception.HelmTillerFailure( + raise exception.HelmFailure( reason="Failed to delete release: %s" % err) elif not out: - err_msg = "Failed to execute helm v3 command. " \ + err_msg = "Failed to execute helm command. " \ "Helm response timeout." - raise exception.HelmTillerFailure(reason=err_msg) + raise exception.HelmFailure(reason=err_msg) return out, err except Exception as e: - LOG.error("Failed to execute helm v3 command: %s" % e) - raise exception.HelmTillerFailure( - reason="Failed to execute helm v3 command: %s" % e) + LOG.error("Failed to execute helm command: %s" % e) + raise exception.HelmFailure( + reason="Failed to execute helm command: %s" % e) finally: timer.cancel() @@ -214,12 +200,12 @@ def install_helm_chart_with_dry_run(args=None): if helm_install.returncode == 0: return out elif err: - raise exception.HelmTillerFailure(reason=err) + raise exception.HelmFailure(reason=err) else: err_msg = "Helm install --dry-run operation timeout." - raise exception.HelmTillerFailure(reason=err_msg) + raise exception.HelmFailure(reason=err_msg) except Exception as e: - raise exception.HelmTillerFailure( + raise exception.HelmFailure( reason="Failed to render helm chart: %s" % e) finally: if timer: