Merge "appfwk: fix app remove stuck after apply-failed"
This commit is contained in:
commit
b1715439ec
|
@ -3400,7 +3400,34 @@ class AppOperator(object):
|
|||
self._update_app_status(
|
||||
app, new_progress=constants.APP_PROGRESS_DELETE_MANIFEST)
|
||||
|
||||
if self._make_app_request(app, constants.APP_DELETE_OP):
|
||||
# Delete helm releases which have a helm operation running.
|
||||
# eg.: pending-install, pending-upgrade, etc.
|
||||
flux_helm_releases = [(c.metadata_name, c.namespace) for c in self._get_list_of_charts(app)]
|
||||
for release, namespace in flux_helm_releases:
|
||||
helm_release_dict = self._kube.get_custom_resource(
|
||||
constants.FLUXCD_CRD_HELM_REL_GROUP,
|
||||
constants.FLUXCD_CRD_HELM_REL_VERSION,
|
||||
namespace,
|
||||
constants.FLUXCD_CRD_HELM_REL_PLURAL,
|
||||
release)
|
||||
if not helm_release_dict:
|
||||
LOG.warning("FluxCD Helm release info for {} is not available".format(release))
|
||||
continue
|
||||
|
||||
helm_release_status, _ = self._fluxcd.get_helm_release_status(helm_release_dict)
|
||||
if helm_release_status == self._fluxcd.HELM_RELEASE_STATUS_UNKNOWN:
|
||||
LOG.info("Removing helm release which has an operation in progress: {} - {}".format(namespace, release))
|
||||
# Send delete request in FluxCD so it doesn't recreate the helm release
|
||||
self._kube.delete_custom_resource(
|
||||
constants.FLUXCD_CRD_HELM_REL_GROUP,
|
||||
constants.FLUXCD_CRD_HELM_REL_VERSION,
|
||||
namespace,
|
||||
constants.FLUXCD_CRD_HELM_REL_PLURAL,
|
||||
release)
|
||||
# Remove resource in Helm
|
||||
helm_utils.delete_helm_v3_release(helm_release_dict['spec']['releaseName'], namespace=namespace)
|
||||
|
||||
if self._make_app_request(app, constants.APP_REMOVE_OP):
|
||||
# After armada delete, the data for the releases are purged from
|
||||
# tiller/etcd, the releases info for the active app stored in sysinv
|
||||
# db should be set back to 0 and the inactive apps require to be
|
||||
|
@ -4794,7 +4821,7 @@ class FluxCDHelper(object):
|
|||
else:
|
||||
LOG.error("Applying %s failed. Skipping helm release "
|
||||
"cleanup...")
|
||||
elif operation == constants.APP_DELETE_OP:
|
||||
elif operation in [constants.APP_DELETE_OP, constants.APP_REMOVE_OP]:
|
||||
rc = self._delete(manifest_dir)
|
||||
elif operation == constants.APP_ROLLBACK_OP:
|
||||
pass
|
||||
|
@ -4823,6 +4850,7 @@ class FluxCDHelper(object):
|
|||
def _delete(self, manifest_dir):
|
||||
cmd = ['kubectl', '--kubeconfig', kubernetes.KUBERNETES_ADMIN_CONF,
|
||||
'delete', '-k', manifest_dir, '--ignore-not-found=true']
|
||||
|
||||
_, stderr = cutils.trycmd(*cmd)
|
||||
|
||||
if stderr:
|
||||
|
@ -4920,6 +4948,10 @@ class FluxCDHelper(object):
|
|||
else:
|
||||
return ''
|
||||
|
||||
# TODO (lfagunde):
|
||||
# Some methods in this class receive helm_chart_dict as a parameter.
|
||||
# Can move the call to _kube.get_custom_resource() into these functions
|
||||
# or create a helper function inside the class for it.
|
||||
def get_helm_release_status(self, helm_release_dict):
|
||||
"""helm_release_dict is of the form returned by _kube.get_custom_resource().
|
||||
Returns: 'status' of the release (Unlnown,True,False) and 'message'
|
||||
|
|
|
@ -38,6 +38,15 @@ LOG = logging.getLogger(__name__)
|
|||
# When python3 migration is finished, the explicit timer should
|
||||
# be removed.
|
||||
|
||||
# TODO(lfagunde):
|
||||
# Some of the logic in here is outdated and assumes the default helm used is v2,
|
||||
# such as the delete_helm_release() function.
|
||||
# Also, this module would benefit from refatoring to add more
|
||||
# functionality and make the current functions more flexible.
|
||||
# Could create a generic "execute_helm_cmd" style function and derive the
|
||||
# specific ones (list, delete, etc) from there. If that's done, remember
|
||||
# to update function calls done to this module from elsewhere in the code.
|
||||
|
||||
def kill_process_and_descendants(proc):
|
||||
# function to kill a process and its children processes
|
||||
for child in psutil.Process(proc.pid).children(recursive=True):
|
||||
|
@ -169,9 +178,9 @@ def retrieve_helm_releases():
|
|||
|
||||
|
||||
def delete_helm_release(release):
|
||||
"""Delete helm release
|
||||
"""Delete helm v2 release
|
||||
|
||||
This method deletes a helm release without --purge which removes
|
||||
This method deletes a helm v2 release without --purge which removes
|
||||
all associated resources from kubernetes but not from the store(ETCD)
|
||||
|
||||
In the scenario of updating application, the method is needed to clean
|
||||
|
@ -215,6 +224,50 @@ def delete_helm_release(release):
|
|||
timer.cancel()
|
||||
|
||||
|
||||
def delete_helm_v3_release(release, namespace="default", flags=None):
|
||||
"""Delete helm v3 release
|
||||
|
||||
:param release: Helm release name
|
||||
:param namespace: Helm release namespace
|
||||
:param flags: List with any other flags required to add to the command
|
||||
"""
|
||||
|
||||
env = os.environ.copy()
|
||||
env['PATH'] = '/usr/local/sbin:' + env['PATH']
|
||||
env['KUBECONFIG'] = kubernetes.KUBERNETES_ADMIN_CONF
|
||||
|
||||
helm_cmd = ['helm', 'uninstall', '-n', namespace, release]
|
||||
if flags:
|
||||
helm_cmd += flags
|
||||
|
||||
process = subprocess.Popen(
|
||||
helm_cmd,
|
||||
env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
universal_newlines=True)
|
||||
timer = threading.Timer(20, kill_process_and_descendants, [process])
|
||||
|
||||
try:
|
||||
timer.start()
|
||||
out, err = process.communicate()
|
||||
if err:
|
||||
if "not found" in err:
|
||||
LOG.debug("Release %s not found or deleted already" % release)
|
||||
return out, err
|
||||
raise exception.HelmTillerFailure(
|
||||
reason="Failed to delete release: %s" % err)
|
||||
elif not out:
|
||||
err_msg = "Failed to execute helm v3 command. " \
|
||||
"Helm response timeout."
|
||||
raise exception.HelmTillerFailure(reason=err_msg)
|
||||
return out, err
|
||||
except Exception as e:
|
||||
LOG.error("Failed to execute helm v3 command: %s" % e)
|
||||
raise exception.HelmTillerFailure(
|
||||
reason="Failed to execute helm v3 command: %s" % e)
|
||||
finally:
|
||||
timer.cancel()
|
||||
|
||||
|
||||
def _retry_on_HelmTillerFailure(ex):
|
||||
LOG.info('Caught HelmTillerFailure exception. Resetting tiller and retrying... '
|
||||
'Exception: {}'.format(ex))
|
||||
|
|
Loading…
Reference in New Issue