Merge "AppFrmwk: Cleanup unique helm releases over update"

This commit is contained in:
Zuul 2023-05-15 17:18:32 +00:00 committed by Gerrit Code Review
commit a27d0212f3
3 changed files with 61 additions and 49 deletions

View File

@ -1537,7 +1537,7 @@ class KubeVersionUnavailable(NotFound):
message = "Getting kubeadm and kubelet versions failed"
class HelmTillerFailure(SysinvException):
class HelmFailure(SysinvException):
message = _("Helm operation failure: %(reason)s")

View File

@ -717,9 +717,10 @@ class AppOperator(object):
if chart_images:
images_by_charts.update({chart.name: chart_images})
with open(app.sync_imgfile, 'w') as f:
yaml.safe_dump(images_by_charts, f, explicit_start=True,
default_flow_style=False)
if images_by_charts:
with open(app.sync_imgfile, 'w') as f:
yaml.safe_dump(images_by_charts, f, explicit_start=True,
default_flow_style=False)
def _retrieve_images_list(self, app_images_file):
with io.open(app_images_file, 'r', encoding='utf-8') as f:
@ -1737,11 +1738,21 @@ class AppOperator(object):
old_app_charts = [c.release for c in old_app.charts]
deployed_releases = helm_utils.retrieve_helm_releases()
for new_chart in new_app.charts:
# Cleanup the releases in the new application version
# but are not in the old application version
if (new_chart.release not in old_app_charts and
new_chart.release in deployed_releases):
# Cleanup the releases in the new application version
# but are not in the old application version
helm_utils.delete_helm_v3_release(new_chart.release)
# Send delete request in FluxCD so it doesn't
# recreate the helm release
self._kube.delete_custom_resource(
constants.FLUXCD_CRD_HELM_REL_GROUP,
constants.FLUXCD_CRD_HELM_REL_VERSION,
new_chart.namespace,
constants.FLUXCD_CRD_HELM_REL_PLURAL,
new_chart.metadata_name)
# Use helm to immediately remove the release
helm_utils.delete_helm_release(new_chart.release,
new_chart.namespace)
else:
rc = False
@ -2810,13 +2821,24 @@ class AppOperator(object):
to_app_charts = [c.release for c in to_app.charts]
deployed_releases = helm_utils.retrieve_helm_releases()
for from_chart in from_app.charts:
# Cleanup the releases in the old application version
# but are not in the new application version
if (from_chart.release not in to_app_charts and
from_chart.release in deployed_releases):
# Cleanup the releases in the old application version
# but are not in the new application version
helm_utils.delete_helm_v3_release(from_chart.release)
# Send delete request in FluxCD so it doesn't
# recreate the helm release
self._kube.delete_custom_resource(
constants.FLUXCD_CRD_HELM_REL_GROUP,
constants.FLUXCD_CRD_HELM_REL_VERSION,
from_chart.namespace,
constants.FLUXCD_CRD_HELM_REL_PLURAL,
from_chart.metadata_name)
# Use helm to immediately remove the release
helm_utils.delete_helm_release(from_chart.release,
from_chart.namespace)
LOG.info("Helm release %s for Application %s (%s) deleted"
% (from_chart.release, from_app.name, from_app.version))
% (from_chart.release, from_app.name,
from_app.version))
self._cleanup(from_app, app_dir=False)
self._utils._patch_report_app_dependencies(
@ -2925,8 +2947,10 @@ class AppOperator(object):
helm_release_status, _ = self._fluxcd.get_helm_release_status(helm_release_dict)
if helm_release_status == self._fluxcd.HELM_RELEASE_STATUS_UNKNOWN:
LOG.info("Removing helm release which has an operation in progress: {} - {}".format(namespace, release))
# Send delete request in FluxCD so it doesn't recreate the helm release
LOG.info("Removing helm release which has an operation in "
"progress: {} - {}".format(namespace, release))
# Send delete request in FluxCD so it doesn't recreate the helm
# release
self._kube.delete_custom_resource(
constants.FLUXCD_CRD_HELM_REL_GROUP,
constants.FLUXCD_CRD_HELM_REL_VERSION,
@ -2934,7 +2958,9 @@ class AppOperator(object):
constants.FLUXCD_CRD_HELM_REL_PLURAL,
release)
# Remove resource in Helm
helm_utils.delete_helm_v3_release(helm_release_dict['spec']['releaseName'], namespace=namespace)
helm_utils.delete_helm_release(
helm_release_dict['spec']['releaseName'],
namespace=namespace)
if self._make_app_request(app, constants.APP_REMOVE_OP):
# After fluxcd delete, the data for the releases are purged from

View File

@ -69,14 +69,14 @@ def refresh_helm_repo_information():
rpcapi.refresh_helm_repo_information(context.get_admin_context())
def _retry_on_HelmTillerFailure(ex):
def _retry_on_HelmFailure(ex):
LOG.info('Caught exception retrieving helm releases. Retrying... Exception: {}'.format(ex))
return isinstance(ex, exception.HelmTillerFailure)
return isinstance(ex, exception.HelmFailure)
@retry(stop_max_attempt_number=6, wait_fixed=20 * 1000,
retry_on_exception=_retry_on_HelmTillerFailure)
def retrieve_helm_v3_releases():
retry_on_exception=_retry_on_HelmFailure)
def retrieve_helm_releases():
helm_list = subprocess.Popen(
['helm', '--kubeconfig', kubernetes.KUBERNETES_ADMIN_CONF,
'list', '--all-namespaces', '--output', 'yaml'],
@ -89,15 +89,15 @@ def retrieve_helm_v3_releases():
out, err = helm_list.communicate()
if helm_list.returncode != 0:
if err:
raise exception.HelmTillerFailure(reason=err)
raise exception.HelmFailure(reason=err)
# killing the subprocesses with +kill() when timer expires returns EBADF
# because the pipe is closed, but no error string on stderr.
if helm_list.returncode == -9:
raise exception.HelmTillerFailure(
raise exception.HelmFailure(
reason="helm list operation timed out after "
"20 seconds. Terminated by threading timer.")
raise exception.HelmTillerFailure(
raise exception.HelmFailure(
reason="helm list operation failed without error "
"message, errno=%s" % helm_list.returncode)
@ -114,28 +114,14 @@ def retrieve_helm_v3_releases():
return deployed_releases
except Exception as e:
raise exception.HelmTillerFailure(
reason="Failed to retrieve helmv3 releases: %s" % e)
raise exception.HelmFailure(
reason="Failed to retrieve helm releases: %s" % e)
finally:
timer.cancel()
def retrieve_helm_releases():
"""Retrieve the deployed helm releases
Get the name, namespace and version for the deployed releases
by querying helm tiller
:return: a dict of deployed helm releases
"""
deployed_releases = {}
deployed_releases.update(retrieve_helm_v3_releases())
return deployed_releases
def delete_helm_v3_release(release, namespace="default", flags=None):
"""Delete helm v3 release
def delete_helm_release(release, namespace="default", flags=None):
"""Delete helm release via callout to helm command
:param release: Helm release name
:param namespace: Helm release namespace
@ -161,19 +147,19 @@ def delete_helm_v3_release(release, namespace="default", flags=None):
out, err = process.communicate()
if err:
if "not found" in err:
LOG.debug("Release %s not found or deleted already" % release)
LOG.error("Release %s/%s not found or deleted already" % (namespace, release))
return out, err
raise exception.HelmTillerFailure(
raise exception.HelmFailure(
reason="Failed to delete release: %s" % err)
elif not out:
err_msg = "Failed to execute helm v3 command. " \
err_msg = "Failed to execute helm command. " \
"Helm response timeout."
raise exception.HelmTillerFailure(reason=err_msg)
raise exception.HelmFailure(reason=err_msg)
return out, err
except Exception as e:
LOG.error("Failed to execute helm v3 command: %s" % e)
raise exception.HelmTillerFailure(
reason="Failed to execute helm v3 command: %s" % e)
LOG.error("Failed to execute helm command: %s" % e)
raise exception.HelmFailure(
reason="Failed to execute helm command: %s" % e)
finally:
timer.cancel()
@ -214,12 +200,12 @@ def install_helm_chart_with_dry_run(args=None):
if helm_install.returncode == 0:
return out
elif err:
raise exception.HelmTillerFailure(reason=err)
raise exception.HelmFailure(reason=err)
else:
err_msg = "Helm install --dry-run operation timeout."
raise exception.HelmTillerFailure(reason=err_msg)
raise exception.HelmFailure(reason=err_msg)
except Exception as e:
raise exception.HelmTillerFailure(
raise exception.HelmFailure(
reason="Failed to render helm chart: %s" % e)
finally:
if timer: