Merge "appfwk: fix app remove stuck after apply-failed"
This commit is contained in:
commit
b1715439ec
|
@ -3400,7 +3400,34 @@ class AppOperator(object):
|
||||||
self._update_app_status(
|
self._update_app_status(
|
||||||
app, new_progress=constants.APP_PROGRESS_DELETE_MANIFEST)
|
app, new_progress=constants.APP_PROGRESS_DELETE_MANIFEST)
|
||||||
|
|
||||||
if self._make_app_request(app, constants.APP_DELETE_OP):
|
# Delete helm releases which have a helm operation running.
|
||||||
|
# eg.: pending-install, pending-upgrade, etc.
|
||||||
|
flux_helm_releases = [(c.metadata_name, c.namespace) for c in self._get_list_of_charts(app)]
|
||||||
|
for release, namespace in flux_helm_releases:
|
||||||
|
helm_release_dict = self._kube.get_custom_resource(
|
||||||
|
constants.FLUXCD_CRD_HELM_REL_GROUP,
|
||||||
|
constants.FLUXCD_CRD_HELM_REL_VERSION,
|
||||||
|
namespace,
|
||||||
|
constants.FLUXCD_CRD_HELM_REL_PLURAL,
|
||||||
|
release)
|
||||||
|
if not helm_release_dict:
|
||||||
|
LOG.warning("FluxCD Helm release info for {} is not available".format(release))
|
||||||
|
continue
|
||||||
|
|
||||||
|
helm_release_status, _ = self._fluxcd.get_helm_release_status(helm_release_dict)
|
||||||
|
if helm_release_status == self._fluxcd.HELM_RELEASE_STATUS_UNKNOWN:
|
||||||
|
LOG.info("Removing helm release which has an operation in progress: {} - {}".format(namespace, release))
|
||||||
|
# Send delete request in FluxCD so it doesn't recreate the helm release
|
||||||
|
self._kube.delete_custom_resource(
|
||||||
|
constants.FLUXCD_CRD_HELM_REL_GROUP,
|
||||||
|
constants.FLUXCD_CRD_HELM_REL_VERSION,
|
||||||
|
namespace,
|
||||||
|
constants.FLUXCD_CRD_HELM_REL_PLURAL,
|
||||||
|
release)
|
||||||
|
# Remove resource in Helm
|
||||||
|
helm_utils.delete_helm_v3_release(helm_release_dict['spec']['releaseName'], namespace=namespace)
|
||||||
|
|
||||||
|
if self._make_app_request(app, constants.APP_REMOVE_OP):
|
||||||
# After armada delete, the data for the releases are purged from
|
# After armada delete, the data for the releases are purged from
|
||||||
# tiller/etcd, the releases info for the active app stored in sysinv
|
# tiller/etcd, the releases info for the active app stored in sysinv
|
||||||
# db should be set back to 0 and the inactive apps require to be
|
# db should be set back to 0 and the inactive apps require to be
|
||||||
|
@ -4794,7 +4821,7 @@ class FluxCDHelper(object):
|
||||||
else:
|
else:
|
||||||
LOG.error("Applying %s failed. Skipping helm release "
|
LOG.error("Applying %s failed. Skipping helm release "
|
||||||
"cleanup...")
|
"cleanup...")
|
||||||
elif operation == constants.APP_DELETE_OP:
|
elif operation in [constants.APP_DELETE_OP, constants.APP_REMOVE_OP]:
|
||||||
rc = self._delete(manifest_dir)
|
rc = self._delete(manifest_dir)
|
||||||
elif operation == constants.APP_ROLLBACK_OP:
|
elif operation == constants.APP_ROLLBACK_OP:
|
||||||
pass
|
pass
|
||||||
|
@ -4823,6 +4850,7 @@ class FluxCDHelper(object):
|
||||||
def _delete(self, manifest_dir):
|
def _delete(self, manifest_dir):
|
||||||
cmd = ['kubectl', '--kubeconfig', kubernetes.KUBERNETES_ADMIN_CONF,
|
cmd = ['kubectl', '--kubeconfig', kubernetes.KUBERNETES_ADMIN_CONF,
|
||||||
'delete', '-k', manifest_dir, '--ignore-not-found=true']
|
'delete', '-k', manifest_dir, '--ignore-not-found=true']
|
||||||
|
|
||||||
_, stderr = cutils.trycmd(*cmd)
|
_, stderr = cutils.trycmd(*cmd)
|
||||||
|
|
||||||
if stderr:
|
if stderr:
|
||||||
|
@ -4920,6 +4948,10 @@ class FluxCDHelper(object):
|
||||||
else:
|
else:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
# TODO (lfagunde):
|
||||||
|
# Some methods in this class receive helm_chart_dict as a parameter.
|
||||||
|
# Can move the call to _kube.get_custom_resource() into these functions
|
||||||
|
# or create a helper function inside the class for it.
|
||||||
def get_helm_release_status(self, helm_release_dict):
|
def get_helm_release_status(self, helm_release_dict):
|
||||||
"""helm_release_dict is of the form returned by _kube.get_custom_resource().
|
"""helm_release_dict is of the form returned by _kube.get_custom_resource().
|
||||||
Returns: 'status' of the release (Unlnown,True,False) and 'message'
|
Returns: 'status' of the release (Unlnown,True,False) and 'message'
|
||||||
|
|
|
@ -38,6 +38,15 @@ LOG = logging.getLogger(__name__)
|
||||||
# When python3 migration is finished, the explicit timer should
|
# When python3 migration is finished, the explicit timer should
|
||||||
# be removed.
|
# be removed.
|
||||||
|
|
||||||
|
# TODO(lfagunde):
|
||||||
|
# Some of the logic in here is outdated and assumes the default helm used is v2,
|
||||||
|
# such as the delete_helm_release() function.
|
||||||
|
# Also, this module would benefit from refatoring to add more
|
||||||
|
# functionality and make the current functions more flexible.
|
||||||
|
# Could create a generic "execute_helm_cmd" style function and derive the
|
||||||
|
# specific ones (list, delete, etc) from there. If that's done, remember
|
||||||
|
# to update function calls done to this module from elsewhere in the code.
|
||||||
|
|
||||||
def kill_process_and_descendants(proc):
|
def kill_process_and_descendants(proc):
|
||||||
# function to kill a process and its children processes
|
# function to kill a process and its children processes
|
||||||
for child in psutil.Process(proc.pid).children(recursive=True):
|
for child in psutil.Process(proc.pid).children(recursive=True):
|
||||||
|
@ -169,9 +178,9 @@ def retrieve_helm_releases():
|
||||||
|
|
||||||
|
|
||||||
def delete_helm_release(release):
|
def delete_helm_release(release):
|
||||||
"""Delete helm release
|
"""Delete helm v2 release
|
||||||
|
|
||||||
This method deletes a helm release without --purge which removes
|
This method deletes a helm v2 release without --purge which removes
|
||||||
all associated resources from kubernetes but not from the store(ETCD)
|
all associated resources from kubernetes but not from the store(ETCD)
|
||||||
|
|
||||||
In the scenario of updating application, the method is needed to clean
|
In the scenario of updating application, the method is needed to clean
|
||||||
|
@ -215,6 +224,50 @@ def delete_helm_release(release):
|
||||||
timer.cancel()
|
timer.cancel()
|
||||||
|
|
||||||
|
|
||||||
|
def delete_helm_v3_release(release, namespace="default", flags=None):
|
||||||
|
"""Delete helm v3 release
|
||||||
|
|
||||||
|
:param release: Helm release name
|
||||||
|
:param namespace: Helm release namespace
|
||||||
|
:param flags: List with any other flags required to add to the command
|
||||||
|
"""
|
||||||
|
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PATH'] = '/usr/local/sbin:' + env['PATH']
|
||||||
|
env['KUBECONFIG'] = kubernetes.KUBERNETES_ADMIN_CONF
|
||||||
|
|
||||||
|
helm_cmd = ['helm', 'uninstall', '-n', namespace, release]
|
||||||
|
if flags:
|
||||||
|
helm_cmd += flags
|
||||||
|
|
||||||
|
process = subprocess.Popen(
|
||||||
|
helm_cmd,
|
||||||
|
env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||||
|
universal_newlines=True)
|
||||||
|
timer = threading.Timer(20, kill_process_and_descendants, [process])
|
||||||
|
|
||||||
|
try:
|
||||||
|
timer.start()
|
||||||
|
out, err = process.communicate()
|
||||||
|
if err:
|
||||||
|
if "not found" in err:
|
||||||
|
LOG.debug("Release %s not found or deleted already" % release)
|
||||||
|
return out, err
|
||||||
|
raise exception.HelmTillerFailure(
|
||||||
|
reason="Failed to delete release: %s" % err)
|
||||||
|
elif not out:
|
||||||
|
err_msg = "Failed to execute helm v3 command. " \
|
||||||
|
"Helm response timeout."
|
||||||
|
raise exception.HelmTillerFailure(reason=err_msg)
|
||||||
|
return out, err
|
||||||
|
except Exception as e:
|
||||||
|
LOG.error("Failed to execute helm v3 command: %s" % e)
|
||||||
|
raise exception.HelmTillerFailure(
|
||||||
|
reason="Failed to execute helm v3 command: %s" % e)
|
||||||
|
finally:
|
||||||
|
timer.cancel()
|
||||||
|
|
||||||
|
|
||||||
def _retry_on_HelmTillerFailure(ex):
|
def _retry_on_HelmTillerFailure(ex):
|
||||||
LOG.info('Caught HelmTillerFailure exception. Resetting tiller and retrying... '
|
LOG.info('Caught HelmTillerFailure exception. Resetting tiller and retrying... '
|
||||||
'Exception: {}'.format(ex))
|
'Exception: {}'.format(ex))
|
||||||
|
|
Loading…
Reference in New Issue