diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_patch_strategy.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_patch_strategy.py index 244b4f4c..db36ebc6 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_patch_strategy.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_patch_strategy.py @@ -2990,7 +2990,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, {'name': 'sw-patch-controllers', @@ -3008,7 +3008,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, ] @@ -3100,7 +3100,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, {'name': 'sw-patch-controllers', @@ -3118,7 +3118,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, ] @@ -3224,7 +3224,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'start-instances', 'entity_names': ['test_instance_0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, {'name': 'sw-patch-worker-hosts', @@ -3246,7 +3246,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'start-instances', 'entity_names': ['test_instance_1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, ] @@ -3349,7 +3349,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'start-instances', 'entity_names': ['test_instance_0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, {'name': 'sw-patch-worker-hosts', @@ -3371,7 +3371,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'start-instances', 'entity_names': ['test_instance_1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, ] @@ -3426,7 +3426,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, {'name': 'sw-patch-worker-hosts', @@ -3444,7 +3444,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, ] @@ -3499,7 +3499,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, {'name': 'sw-patch-worker-hosts', @@ -3517,7 +3517,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, ] @@ -3594,7 +3594,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'start-instances', 'entity_names': ['test_instance_0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, {'name': 'sw-patch-worker-hosts', @@ -3616,7 +3616,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'start-instances', 'entity_names': ['test_instance_1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, {'name': 'sw-patch-worker-hosts', @@ -3758,7 +3758,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'start-instances', 'entity_names': ['test_instance_0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, {'name': 'sw-patch-worker-hosts', @@ -3780,7 +3780,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'start-instances', 'entity_names': ['test_instance_1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, {'name': 'sw-patch-worker-hosts', @@ -3878,7 +3878,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, {'name': 'sw-patch-worker-hosts', @@ -3896,7 +3896,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, {'name': 'sw-patch-worker-hosts', @@ -4014,7 +4014,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, ] @@ -4072,7 +4072,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'start-instances', 'entity_names': ['test_instance_0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, ] @@ -4123,7 +4123,7 @@ class TestSwPatchStrategy(sw_update_testcase.SwUpdateStrategyTestCase): {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800} + 'timeout': 2400} ] }, ] diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_system_config_update_strategy.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_system_config_update_strategy.py index 8abdae61..c1ba084d 100644 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_system_config_update_strategy.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_system_config_update_strategy.py @@ -136,7 +136,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, ] @@ -188,7 +188,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, { @@ -207,7 +207,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase {'name': 'unlock-hosts', 'entity_names': ['controller-1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, ] @@ -276,7 +276,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, { @@ -295,7 +295,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase {'name': 'unlock-hosts', 'entity_names': ['controller-1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, { @@ -433,7 +433,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, { @@ -452,7 +452,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase {'name': 'unlock-hosts', 'entity_names': ['controller-1']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, { @@ -573,7 +573,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, { @@ -763,7 +763,7 @@ class TestSystemConfigUpdateStrategy(sw_update_testcase.SwUpdateStrategyTestCase {'name': 'unlock-hosts', 'entity_names': ['controller-0']}, {'name': 'wait-alarms-clear', - 'timeout': 1800}, + 'timeout': 2400}, ] }, { diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py index 2517e385..93aff2f8 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy.py @@ -52,6 +52,7 @@ NO_REBOOT_DELAY = 30 # constants used by the patching API for state and repo state PATCH_REPO_STATE_APPLIED = 'Applied' PATCH_STATE_APPLIED = 'Applied' +WAIT_ALARM_TIMEOUT = 2400 ################################################################### @@ -971,7 +972,8 @@ class UpdateControllerHostsMixin(object): host_list = [host] stage = strategy.StrategyStage(strategy_stage_name) stage.add_step(strategy.QueryAlarmsStep( - True, ignore_alarms=self._ignore_alarms)) + True, ignore_alarms=self._ignore_alarms, + ignore_alarms_conditional=self._ignore_alarms_conditional)) if reboot: stage.add_step(strategy.SwactHostsStep(host_list)) stage.add_step(strategy.LockHostsStep(host_list)) @@ -992,8 +994,9 @@ class UpdateControllerHostsMixin(object): # OSDs configured, but the alarms should clear quickly in # that case so this will not delay the update strategy. stage.add_step(strategy.WaitAlarmsClearStep( - timeout_in_secs=30 * 60, - ignore_alarms=self._ignore_alarms)) + timeout_in_secs=WAIT_ALARM_TIMEOUT, + ignore_alarms=self._ignore_alarms, + ignore_alarms_conditional=self._ignore_alarms_conditional)) else: # Less time required if host is not rebooting stage.add_step(strategy.SystemStabilizeStep( @@ -1004,7 +1007,8 @@ class UpdateControllerHostsMixin(object): host_list = [local_host] stage = strategy.StrategyStage(strategy_stage_name) stage.add_step(strategy.QueryAlarmsStep( - True, ignore_alarms=self._ignore_alarms)) + True, ignore_alarms=self._ignore_alarms, + ignore_alarms_conditional=self._ignore_alarms_conditional)) if reboot: stage.add_step(strategy.SwactHostsStep(host_list)) stage.add_step(strategy.LockHostsStep(host_list)) @@ -1025,8 +1029,9 @@ class UpdateControllerHostsMixin(object): # OSDs configured, but the alarms should clear quickly in # that case so this will not delay the update strategy. stage.add_step(strategy.WaitAlarmsClearStep( - timeout_in_secs=30 * 60, - ignore_alarms=self._ignore_alarms)) + timeout_in_secs=WAIT_ALARM_TIMEOUT, + ignore_alarms=self._ignore_alarms, + ignore_alarms_conditional=self._ignore_alarms_conditional)) else: # Less time required if host is not rebooting stage.add_step(strategy.SystemStabilizeStep( @@ -1105,7 +1110,8 @@ class UpdateStorageHostsMixin(object): for host_list in host_lists: stage = strategy.StrategyStage(strategy_stage_name) stage.add_step(strategy.QueryAlarmsStep( - True, ignore_alarms=self._ignore_alarms)) + True, ignore_alarms=self._ignore_alarms, + ignore_alarms_conditional=self._ignore_alarms_conditional)) if reboot: stage.add_step(strategy.LockHostsStep(host_list)) # Add the action step for these hosts (patch, etc..) @@ -1227,7 +1233,8 @@ class UpdateWorkerHostsMixin(object): stage = strategy.StrategyStage(strategy_stage_name) stage.add_step(strategy.QueryAlarmsStep( - True, ignore_alarms=self._ignore_alarms)) + True, ignore_alarms=self._ignore_alarms, + ignore_alarms_conditional=self._ignore_alarms_conditional)) if reboot: if 1 == len(host_list): @@ -1297,8 +1304,9 @@ class UpdateWorkerHostsMixin(object): for host in hosts_to_lock + hosts_to_reboot]): # Multiple personality nodes that need to wait for OSDs to sync: stage.add_step(strategy.WaitAlarmsClearStep( - timeout_in_secs=30 * 60, - ignore_alarms=self._ignore_alarms)) + timeout_in_secs=WAIT_ALARM_TIMEOUT, + ignore_alarms=self._ignore_alarms, + ignore_alarms_conditional=self._ignore_alarms_conditional)) else: if any([host.openstack_control or host.openstack_compute for host in hosts_to_lock + hosts_to_reboot]): @@ -1393,9 +1401,13 @@ class SwPatchStrategy(SwUpdateStrategy, '100.119', # PTP alarm for SyncE '900.701', # Node tainted ] + IGNORE_ALARMS_CONDITIONAL = {'750.006': 1800} self._ignore_alarms += IGNORE_ALARMS self._single_controller = single_controller + # This is to ignore the stale alarm(currently 750.006 is ignored). + self._ignore_alarms_conditional = IGNORE_ALARMS_CONDITIONAL + # initialize the variables required by the mixins # ie: self._nfvi_sw_patches, self._nfvi_sw_patch_hosts self.initialize_mixin() @@ -1409,7 +1421,8 @@ class SwPatchStrategy(SwUpdateStrategy, stage = strategy.StrategyStage( strategy.STRATEGY_STAGE_NAME.SW_PATCH_QUERY) stage.add_step( - strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms)) + strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms, + ignore_alarms_conditional=self._ignore_alarms_conditional)) stage.add_step(strategy.QuerySwPatchesStep()) stage.add_step(strategy.QuerySwPatchHostsStep()) self.build_phase.add_stage(stage) @@ -2370,7 +2383,7 @@ class SystemConfigUpdateStrategy(SwUpdateStrategy, ] self._ignore_alarms += IGNORE_ALARMS self._single_controller = single_controller - + self._ignore_alarms_conditional = None # initialize the variables required by the mixins self.initialize_mixin() @@ -3325,7 +3338,7 @@ class KubeUpgradeStrategy(SwUpdateStrategy, ] # self._ignore_alarms is declared in parent class self._ignore_alarms += IGNORE_ALARMS - + self._ignore_alarms_conditional = None # to_version and single_controller MUST be serialized self._to_version = to_version self._single_controller = single_controller diff --git a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py index a55df5ac..9a54a90e 100755 --- a/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py +++ b/nfv/nfv-vim/nfv_vim/strategy/_strategy_steps.py @@ -1912,13 +1912,18 @@ class QueryAlarmsStep(strategy.StrategyStep): """ Query Alarms - Strategy Step """ - def __init__(self, fail_on_alarms=False, ignore_alarms=None): + def __init__(self, fail_on_alarms=False, ignore_alarms=None, ignore_alarms_conditional=None): super(QueryAlarmsStep, self).__init__( STRATEGY_STEP_NAME.QUERY_ALARMS, timeout_in_secs=60) if ignore_alarms is None: ignore_alarms = [] self._fail_on_alarms = fail_on_alarms self._ignore_alarms = ignore_alarms + # For ignoring stale alarm for the specified amount of time. + # Currently we are ignoring 750.006 alarm for patch strategy. + if ignore_alarms_conditional is None: + ignore_alarms_conditional = {} + self._ignore_alarms_conditional = ignore_alarms_conditional @coroutine def _query_alarms_callback(self, fm_service): @@ -1940,7 +1945,8 @@ class QueryAlarmsStep(strategy.StrategyStep): "%s - uuid %s due to relaxed alarm " "strictness" % (nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid)) - elif nfvi_alarm.alarm_id not in self._ignore_alarms: + elif (nfvi_alarm.alarm_id not in self._ignore_alarms and + nfvi_alarm.alarm_id not in self._ignore_alarms_conditional): DLOG.warn("Alarm: %s" % nfvi_alarm.alarm_id) nfvi_alarms.append(nfvi_alarm) else: @@ -1982,6 +1988,7 @@ class QueryAlarmsStep(strategy.StrategyStep): super(QueryAlarmsStep, self).from_dict(data) self._fail_on_alarms = data['fail_on_alarms'] self._ignore_alarms = data['ignore_alarms'] + self._ignore_alarms_conditional = data['ignore_alarms_conditional'] return self def as_dict(self): @@ -1994,6 +2001,7 @@ class QueryAlarmsStep(strategy.StrategyStep): data['entity_uuids'] = list() data['fail_on_alarms'] = self._fail_on_alarms data['ignore_alarms'] = self._ignore_alarms + data['ignore_alarms_conditional'] = self._ignore_alarms_conditional return data @@ -2106,7 +2114,8 @@ class WaitAlarmsClearStep(strategy.StrategyStep): """ Alarm Wait - Strategy Step """ - def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None): + def __init__(self, timeout_in_secs=300, first_query_delay_in_secs=60, ignore_alarms=None, + ignore_alarms_conditional=None): super(WaitAlarmsClearStep, self).__init__( STRATEGY_STEP_NAME.WAIT_ALARMS_CLEAR, timeout_in_secs=timeout_in_secs) self._first_query_delay_in_secs = first_query_delay_in_secs @@ -2115,12 +2124,17 @@ class WaitAlarmsClearStep(strategy.StrategyStep): self._ignore_alarms = ignore_alarms self._wait_time = 0 self._query_inprogress = False + if ignore_alarms_conditional is None: + ignore_alarms_conditional = {} + self._ignore_alarms_conditional = ignore_alarms_conditional @coroutine def _query_alarms_callback(self): """ Query Alarms Callback """ + from datetime import datetime + response = (yield) DLOG.debug("Query-Alarms callback response=%s." % response) @@ -2138,6 +2152,27 @@ class WaitAlarmsClearStep(strategy.StrategyStep): "strictness" % (nfvi_alarm.alarm_id, nfvi_alarm.alarm_uuid)) elif nfvi_alarm.alarm_id not in self._ignore_alarms: + # For ignoring stale alarm(currently 750.006) + if nfvi_alarm.alarm_id in self._ignore_alarms_conditional: + format_string = "%Y-%m-%dT%H:%M:%S.%f" + alarm_timestamp = nfvi_alarm.timestamp + alarm_timestamp_obj = datetime.strptime( + alarm_timestamp, format_string) + current_time = datetime.now() + time_in_sec = ( + current_time - alarm_timestamp_obj).total_seconds() + # Ignoring stale alarm if present after specified amount of time + if self._ignore_alarms_conditional[nfvi_alarm.alarm_id] < int(time_in_sec): + # Appends stale alarm to list _ignore_alarms + # if specified timeout is reached. + self._ignore_alarms.append(nfvi_alarm.alarm_id) + else: + # Appends alarm to nfvi_alarms if, the specified + # timeout is not reached. + nfvi_alarms.append(nfvi_alarm) + else: + nfvi_alarms.append(nfvi_alarm) + nfvi_alarms.append(nfvi_alarm) else: DLOG.debug("Ignoring alarm %s - uuid %s" % @@ -2145,6 +2180,13 @@ class WaitAlarmsClearStep(strategy.StrategyStep): self.strategy.nfvi_alarms = nfvi_alarms if self.strategy.nfvi_alarms: + ignore_alarm_list = list(self._ignore_alarms_conditional.keys()) + for alarm in self.strategy.nfvi_alarms: + for remove_alarm in ignore_alarm_list: + if alarm['alarm_id'] == remove_alarm: + # Removes only the alarm which has + # not yet reached specified timeout. + self.strategy.nfvi_alarms.remove(alarm) # Keep waiting for alarms to clear pass else: @@ -2193,6 +2235,7 @@ class WaitAlarmsClearStep(strategy.StrategyStep): super(WaitAlarmsClearStep, self).from_dict(data) self._first_query_delay_in_secs = data['first_query_delay_in_secs'] self._ignore_alarms = data['ignore_alarms'] + self._ignore_alarms_conditional = data['ignore_alarms_conditional'] self._wait_time = 0 self._query_inprogress = False return self @@ -2207,6 +2250,7 @@ class WaitAlarmsClearStep(strategy.StrategyStep): data['entity_uuids'] = list() data['first_query_delay_in_secs'] = self._first_query_delay_in_secs data['ignore_alarms'] = self._ignore_alarms + data['ignore_alarms_conditional'] = self._ignore_alarms_conditional return data