Skip app recovery if lifecycle fails during update

Skip recovery for applications that have update_failure_no_rollback
set to 'true' and that eventually fail to pass lifecycle semantic
checks during updates.

Triggering the recovery of an app that does not support rollbacks can
result in a broken state. This aims to standardize the behavior of the
application update process by equalizing how we handle lifecycle
semantic checks failures to other update errors such as apply failures.

Test Plan:
PASS: build-pkgs && build-image
PASS: AIO-SX fresh install
PASS: Create a modified version of cert-manager setting the
      'update_failure_no_rollback' option to 'true'.
      Update cert-manager to the modified version.
      Confirm that the update succeeded.
PASS: Create a modified version of cert-manager setting the
      'update_failure_no_rollback' option to 'true'.
      Force an exception when running lifecycle semantic checks.
      Update cert-manager to the modified version.
      Confirm that the update failed with a descriptive error message
      informing that the skip recovery feature is enabled.
      Fix the code and reapply the app.
      Confirm that the app was successfully applied.

Closes-Bug: 2064737

Change-Id: Ie90c5c3c3a79d8502eb9cc1aa11222963ba13621
Signed-off-by: Igor Soares <Igor.PiresSoares@windriver.com>
This commit is contained in:
Igor Soares 2024-05-01 16:07:16 -03:00
parent 159039de4c
commit 40fbc39522
1 changed files with 63 additions and 53 deletions

View File

@ -2871,7 +2871,13 @@ class AppOperator(object):
lifecycle_hook_info_app_upload=lifecycle_hook_info_app_update)
lifecycle_hook_info_app_update.operation = constants.APP_UPDATE_OP
# Get the skip_recovery flag from app metadata
keys = [constants.APP_METADATA_UPGRADES,
constants.APP_METADATA_UPDATE_FAILURE_SKIP_RECOVERY]
skip_recovery = bool(strtobool(str(self._get_metadata_value(to_app, keys, False))))
# Semantic checking for N+1 app
semantic_check_result = False
try:
lifecycle_hook_info = copy.deepcopy(lifecycle_hook_info_app_update)
lifecycle_hook_info.relative_timing = constants.APP_LIFECYCLE_TIMING_PRE
@ -2879,74 +2885,78 @@ class AppOperator(object):
lifecycle_hook_info[LifecycleConstants.EXTRA][LifecycleConstants.TO_APP] = True
self.app_lifecycle_actions(None, None, to_rpc_app, lifecycle_hook_info)
semantic_check_result = True
except exception.LifecycleSemanticCheckException as e:
LOG.info("App {} rejected operation {} for reason: {}"
"".format(to_app.name, constants.APP_UPDATE_OP, str(e)))
self._perform_app_recover(to_rpc_app, from_app, to_app,
lifecycle_hook_info_app_update,
fluxcd_process_required=False)
return False
if not skip_recovery:
self._perform_app_recover(to_rpc_app, from_app, to_app,
lifecycle_hook_info_app_update,
fluxcd_process_required=False)
return False
except Exception as e:
LOG.error("App {} operation {} semantic check error: {}"
"".format(to_app.name, constants.APP_UPDATE_OP, str(e)))
self._perform_app_recover(to_rpc_app, from_app, to_app,
lifecycle_hook_info_app_update,
fluxcd_process_required=False)
return False
if not skip_recovery:
self._perform_app_recover(to_rpc_app, from_app, to_app,
lifecycle_hook_info_app_update,
fluxcd_process_required=False)
return False
self.load_application_metadata_from_file(to_rpc_app)
if semantic_check_result:
self.load_application_metadata_from_file(to_rpc_app)
# Check whether the new application is compatible with the given k8s version.
# If k8s_version is none the check is performed against the active version.
self._utils._check_app_compatibility(to_app.name,
to_app.version,
k8s_version)
# Check whether the new application is compatible with the given k8s version.
# If k8s_version is none the check is performed against the active version.
self._utils._check_app_compatibility(to_app.name,
to_app.version,
k8s_version)
self._update_app_status(to_app, constants.APP_UPDATE_IN_PROGRESS)
self._update_app_status(to_app, constants.APP_UPDATE_IN_PROGRESS)
# Get the skip_recovery flag from app metadata
keys = [constants.APP_METADATA_UPGRADES,
constants.APP_METADATA_UPDATE_FAILURE_SKIP_RECOVERY]
skip_recovery = bool(strtobool(str(self._get_metadata_value(to_app, keys, False))))
if operation == constants.APP_APPLY_OP:
reuse_overrides = \
self._get_metadata_value(to_app,
constants.APP_METADATA_MAINTAIN_USER_OVERRIDES,
False)
if reuse_user_overrides is not None:
reuse_overrides = reuse_user_overrides
result = False
if operation == constants.APP_APPLY_OP:
reuse_overrides = \
self._get_metadata_value(to_app,
constants.APP_METADATA_MAINTAIN_USER_OVERRIDES,
False)
if reuse_user_overrides is not None:
reuse_overrides = reuse_user_overrides
# Preserve user overrides for the new app
if reuse_overrides:
self._preserve_user_overrides(from_app, to_app)
# Preserve user overrides for the new app
if reuse_overrides:
self._preserve_user_overrides(from_app, to_app)
reuse_app_attributes = \
self._get_metadata_value(to_app,
constants.APP_METADATA_MAINTAIN_ATTRIBUTES,
False)
if reuse_attributes is not None:
reuse_app_attributes = reuse_attributes
reuse_app_attributes = \
self._get_metadata_value(to_app,
constants.APP_METADATA_MAINTAIN_ATTRIBUTES,
False)
if reuse_attributes is not None:
reuse_app_attributes = reuse_attributes
# Preserve attributes for the new app
if reuse_app_attributes:
self._preserve_attributes(from_app, to_app)
# Preserve attributes for the new app
if reuse_app_attributes:
self._preserve_attributes(from_app, to_app)
# The app_apply will generate new versioned overrides for the
# app upgrade and will enable the new plugins for that version.
# The app_apply will generate new versioned overrides for the
# app upgrade and will enable the new plugins for that version.
# Note: this will not trigger the apply hooks present in
# conductor/manager:perform_app_apply
# Note: here we lose the information that this is an apply
# triggered by an update
# TODO(dvoicule): we may want to also trigger the apply hooks
# TODO(dvoicule): we may want to track the fact that this is
# called during an update
lifecycle_hook_info_app_update.operation = constants.APP_APPLY_OP
result = self.perform_app_apply(
to_rpc_app, mode=None,
lifecycle_hook_info_app_apply=lifecycle_hook_info_app_update,
caller='update')
lifecycle_hook_info_app_update.operation = constants.APP_UPDATE_OP
# Note: this will not trigger the apply hooks present in conductor/manager:perform_app_apply
# Note: here we lose the information that this is an apply triggered by an update
# TODO(dvoicule): we may want to also trigger the apply hooks
# TODO(dvoicule): we may want to track the fact that this is called during an update
lifecycle_hook_info_app_update.operation = constants.APP_APPLY_OP
result = self.perform_app_apply(to_rpc_app, mode=None,
lifecycle_hook_info_app_apply=lifecycle_hook_info_app_update,
caller='update')
lifecycle_hook_info_app_update.operation = constants.APP_UPDATE_OP
operation_successful = result
operation_successful = result
else:
operation_successful = semantic_check_result
# If operation failed consider doing the app recovery
do_recovery = not operation_successful
@ -3058,7 +3068,7 @@ class AppOperator(object):
self._deregister_app_abort(to_app.name)
self._clear_app_alarm(to_app.name)
return True
return operation_successful
def perform_app_remove(self, rpc_app, lifecycle_hook_info_app_remove, force=False):
"""Process application remove request