Fix application rollback strategy for Flux

Update application rollback strategy for proper compatibility with
Flux.

This fixes a bug where applications fail to downgrade on DX and Standard
systems due to incompatibility between the previous existing rollback
approach used for Armada and Flux-based apps.

The deprecated code used for rolling back Armada-based applications was
removed. That code was still being called and causing an exception to be
raised due to Armada related chart attributes not being available
anymore.

The rollback to a previous version is now done by applying that
version using "kubectl apply -k <manifest dir>". That way Flux is able
to detect the version we are rolling back to and properly applies it.

Test Plan:
PASS: build-pkgs -a
PASS: build-image
PASS: full AIO-DX install
PASS: update cert-manager-1.0-64.tgz to cert-manager-1.0-65.tgz then
      check with "helm release -A -a" if chart and app versions were
      properly updated.
PASS: downgrade cert-manager-1.0-65.tgz to cert-manager-1.0-64.tgz then
      check with "helm release -A -a" if chart and app versions were
      properly downgraded.
PASS: full AIO-SX install

Closes-Bug: 2019259
Signed-off-by: Igor Soares <Igor.PiresSoares@windriver.com>
Change-Id: Ice1e4d58ff228aea1d4d530e4679ee07263d83f9
This commit is contained in:
Igor Soares 2023-05-11 17:04:28 -04:00
parent c423cd939c
commit fb8fdfe9ab
2 changed files with 5 additions and 103 deletions

View File

@ -491,26 +491,14 @@ class KubeAppController(rest.RestController):
applied_app.progress = None
applied_app.save()
# TODO revise comment below regarding armada
# If the version has ever applied before(inactive app found),
# use ----- rollback to apply application later, otherwise,
# use ----- apply.
# On the AIO-SX, always use ----- apply even it was applied
# before, issue on AIO-SX(replicas is 1) to leverage rollback,
# -----/helm rollback --wait does not wait for pods to be
# ready before it returns.
# related to helm issue,
# https://github.com/helm/helm/issues/4210
# https://github.com/helm/helm/issues/2006
# If the version has been applied before (inactive app found)
# then use the already existing app information,
# otherwise create the necessary target structure for updating
try:
target_app = objects.kube_app.get_inactive_app_by_name_version(
pecan.request.context, name, version)
target_app.status = constants.APP_UPDATE_IN_PROGRESS
target_app.save()
if cutils.is_aio_simplex_system(pecan.request.dbapi):
operation = constants.APP_APPLY_OP
else:
operation = constants.APP_ROLLBACK_OP
except exception.KubeAppInactiveNotFound:
target_app_data = {
'name': name,
@ -520,7 +508,6 @@ class KubeAppController(rest.RestController):
'status': constants.APP_UPDATE_IN_PROGRESS,
'active': True
}
operation = constants.APP_APPLY_OP
try:
target_app = pecan.request.dbapi.kube_app_create(target_app_data)
@ -538,7 +525,8 @@ class KubeAppController(rest.RestController):
pecan.request.rpcapi.perform_app_update(pecan.request.context,
applied_app, target_app,
tarfile, operation,
tarfile,
constants.APP_APPLY_OP,
lifecycle_hook_info,
reuse_overrides,
reuse_attributes)

View File

@ -117,7 +117,6 @@ def get_app_install_root_path_ownership():
return (uid, gid)
Chart = namedtuple('Chart', 'metadata_name name namespace location release labels sequenced')
FluxCDChart = namedtuple('FluxCDChart', 'metadata_name name namespace location '
'release chart_os_path chart_label '
'helm_repo_name')
@ -1782,82 +1781,6 @@ class AppOperator(object):
LOG.error("Application %s recover to version %s aborted!"
% (old_app.name, old_app.version))
def _perform_app_rollback(self, from_app, to_app):
"""Perform application rollback request
This method invokes fluxcd to rollback the application releases to
previous installed versions. The jobs for the current installed
releases require to be cleaned up before starting fluxcd rollback.
:param from_app: application object that application updating from
:param to_app: application object that application updating to
:return boolean: whether application rollback was successful
"""
LOG.info("Application %s (%s) rollback started." % (to_app.name, to_app.version))
try:
if AppOperator.is_app_aborted(to_app.name):
raise exception.KubeAppAbort()
to_db_app = self._dbapi.kube_app_get(to_app.name)
to_app_releases = \
self._dbapi.kube_app_chart_release_get_all(to_db_app.id)
from_db_app = self._dbapi.kube_app_get_inactive_by_name_version(
from_app.name, version=from_app.version)
from_app_releases = \
self._dbapi.kube_app_chart_release_get_all(from_db_app.id)
from_app_r_dict = {r.release: r.version for r in from_app_releases}
self._update_app_status(
to_app, new_progress=constants.APP_PROGRESS_ROLLBACK_RELEASES)
if AppOperator.is_app_aborted(to_app.name):
raise exception.KubeAppAbort()
charts_sequence = {c.release: c.sequenced for c in to_app.charts}
charts_labels = {c.release: c.labels for c in to_app.charts}
for to_app_r in to_app_releases:
if to_app_r.version != 0:
if (to_app_r.release not in from_app_r_dict or
(to_app_r.release in from_app_r_dict and
to_app_r.version != from_app_r_dict[to_app_r.release])):
# Append the release which needs to be rolled back
to_app.releases.append(
{'release': to_app_r.release,
'version': to_app_r.version,
'sequenced': charts_sequence[to_app_r.release]})
# Cleanup the jobs for the current installed release
if to_app_r.release in charts_labels:
for label in charts_labels[to_app_r.release]:
self._kube.kube_delete_collection_namespaced_job(
to_app_r.namespace, label)
LOG.info("Jobs deleted for release %s" % to_app_r.release)
if AppOperator.is_app_aborted(to_app.name):
raise exception.KubeAppAbort()
if self._make_app_request(to_app, constants.APP_ROLLBACK_OP):
self._update_app_status(to_app, constants.APP_APPLY_SUCCESS,
constants.APP_PROGRESS_COMPLETED)
LOG.info("Application %s (%s) rollback completed."
% (to_app.name, to_app.version))
return True
except exception.KubeAppAbort:
# If the update operation is aborted before fluxcd request is made,
# we don't want to return False which would trigger the recovery
# routine with an fluxcd request.
raise
except Exception as e:
# unexpected KubeAppNotFound, KubeAppInactiveNotFound, KeyError
# k8s exception:fail to cleanup release jobs
LOG.exception(e)
LOG.error("Application rollback aborted!")
return False
def perform_app_upload(self, rpc_app, tarfile, lifecycle_hook_info_app_upload, images=False):
"""Process application upload request
@ -2770,15 +2693,6 @@ class AppOperator(object):
lifecycle_hook_info_app_apply=lifecycle_hook_info_app_update,
caller='update')
lifecycle_hook_info_app_update.operation = constants.APP_UPDATE_OP
elif operation == constants.APP_ROLLBACK_OP:
# The app_rollback will use the previous helm releases known to
# the k8s cluster. Overrides are not generated from any plugins
# in the case. Make sure that the enabled plugins correspond to
# the version expected to be activated
self._plugins.activate_plugins(to_app)
# lifecycle hooks not used in perform_app_rollback
result = self._perform_app_rollback(from_app, to_app)
operation_successful = result