diff --git a/puppet-manifests/centos/build_srpm.data b/puppet-manifests/centos/build_srpm.data index 65999a7567..44049d7bb8 100644 --- a/puppet-manifests/centos/build_srpm.data +++ b/puppet-manifests/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="src" -TIS_PATCH_VER=62 +TIS_PATCH_VER=63 diff --git a/puppet-manifests/src/hieradata/controller.yaml b/puppet-manifests/src/hieradata/controller.yaml index fc1c7d3c11..52c7a0adac 100644 --- a/puppet-manifests/src/hieradata/controller.yaml +++ b/puppet-manifests/src/hieradata/controller.yaml @@ -49,6 +49,7 @@ CONFIG_ADMIN_PROJECT_DOMAIN_NAME: Default platform::mtce::agent::params::compute_boot_timeout: 720 platform::mtce::agent::params::controller_boot_timeout: 1200 platform::mtce::agent::params::heartbeat_period: 100 +platform::mtce::agent::params::heartbeat_failure_action: 'fail' platform::mtce::agent::params::heartbeat_failure_threshold: 10 platform::mtce::agent::params::heartbeat_degrade_threshold: 6 platform::mtce::agent::params::mnfa_threshold: 2 diff --git a/puppet-manifests/src/modules/platform/manifests/mtce.pp b/puppet-manifests/src/modules/platform/manifests/mtce.pp index a9e0f1f276..b146abf56a 100644 --- a/puppet-manifests/src/modules/platform/manifests/mtce.pp +++ b/puppet-manifests/src/modules/platform/manifests/mtce.pp @@ -12,6 +12,7 @@ class platform::mtce::params ( $controller_boot_timeout = undef, $heartbeat_degrade_threshold = undef, $heartbeat_failure_threshold = undef, + $heartbeat_failure_action = undef, $heartbeat_period = undef, $mtce_multicast = undef, $mnfa_threshold = undef, diff --git a/puppet-modules-wrs/puppet-mtce/centos/build_srpm.data b/puppet-modules-wrs/puppet-mtce/centos/build_srpm.data index 38be379ea2..f0a35f7823 100644 --- a/puppet-modules-wrs/puppet-mtce/centos/build_srpm.data +++ b/puppet-modules-wrs/puppet-mtce/centos/build_srpm.data @@ -1,3 +1,3 @@ SRC_DIR="src" COPY_LIST="$SRC_DIR/LICENSE" -TIS_PATCH_VER=7 +TIS_PATCH_VER=8 diff --git a/puppet-modules-wrs/puppet-mtce/src/mtce/templates/mtc_ini.erb b/puppet-modules-wrs/puppet-mtce/src/mtce/templates/mtc_ini.erb index bac8185700..54fd506444 100644 --- a/puppet-modules-wrs/puppet-mtce/src/mtce/templates/mtc_ini.erb +++ b/puppet-modules-wrs/puppet-mtce/src/mtce/templates/mtc_ini.erb @@ -16,6 +16,19 @@ heartbeat_period = <%= @heartbeat_period %> ; Heartbeat period in milliseconds heartbeat_failure_threshold = <%= @heartbeat_failure_threshold %> ; Heartbeat failure threshold count. heartbeat_degrade_threshold = <%= @heartbeat_degrade_threshold %> ; Heartbeat degrade threshold count. +; Heartbeat Loss / Failure Action Selection. +; The action to take on host heartbeat failure. +; Supported actions are +; fail = fail host and raise network specific heartbeat alarms +; degrade = degrade host and raise network specific heartbeat alarms +; alarm = raise network specific heartbeat alarms only +; none = no action and no alarms +; Selected action applies to all hosts in the system +; Default is fail +; To modify execute: +; system service-parameter-modify platform maintenance heartbeat_failure_action= +heartbeat_failure_action = <%= @heartbeat_failure_action %> + ; Multi-Node Failure Avoidance (MNFA) Activation and Deactivation threshold. ; The minimum number of hosts that fail heartbeat within the ; heartbeat_failure_threshold upon which Maintenance activates MNFA Mode. diff --git a/sysinv/sysinv/centos/build_srpm.data b/sysinv/sysinv/centos/build_srpm.data index a7d006a129..e6a2d4a1d7 100644 --- a/sysinv/sysinv/centos/build_srpm.data +++ b/sysinv/sysinv/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="sysinv" -TIS_PATCH_VER=280 +TIS_PATCH_VER=281 diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index e3d49b7a4f..692b8b9bb5 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -942,6 +942,7 @@ SERVICE_PARAM_NAME_SYSINV_FIREWALL_RULES_ID = 'firewall_rules_id' SERVICE_PARAM_PLAT_MTCE_COMPUTE_BOOT_TIMEOUT = 'compute_boot_timeout' SERVICE_PARAM_PLAT_MTCE_CONTROLLER_BOOT_TIMEOUT = 'controller_boot_timeout' SERVICE_PARAM_PLAT_MTCE_HBS_PERIOD = 'heartbeat_period' +SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION = 'heartbeat_failure_action' SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_THRESHOLD = 'heartbeat_failure_threshold' SERVICE_PARAM_PLAT_MTCE_HBS_DEGRADE_THRESHOLD = 'heartbeat_degrade_threshold' SERVICE_PARAM_PLAT_MTCE_MNFA_THRESHOLD = 'mnfa_threshold' @@ -950,6 +951,7 @@ SERVICE_PARAM_PLAT_MTCE_MNFA_TIMEOUT = 'mnfa_timeout' SERVICE_PARAM_PLAT_MTCE_COMPUTE_BOOT_TIMEOUT_DEFAULT = 720 SERVICE_PARAM_PLAT_MTCE_CONTROLLER_BOOT_TIMEOUT_DEFAULT = 1200 SERVICE_PARAM_PLAT_MTCE_HBS_PERIOD_DEFAULT = 100 +SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_DEFAULT = 'fail' SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_THRESHOLD_DEFAULT = 10 SERVICE_PARAM_PLAT_MTCE_HBS_DEGRADE_THRESHOLD_DEFAULT = 6 SERVICE_PARAM_PLAT_MTCE_MNFA_THRESHOLD_DEFAULT = 2 diff --git a/sysinv/sysinv/sysinv/sysinv/common/service_parameter.py b/sysinv/sysinv/sysinv/sysinv/common/service_parameter.py index 1c7968df35..153a5da3f4 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/service_parameter.py +++ b/sysinv/sysinv/sysinv/sysinv/common/service_parameter.py @@ -545,6 +545,27 @@ def _validate_hbs_period(name, value): SERVICE_PARAM_PLAT_MTCE_HBS_PERIOD_MAX) +def _validate_hbs_failure_action(name, value): + error = False + try: + if str(value) != SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_FAIL and \ + str(value) != SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_DEGRADE and \ + str(value) != SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_ALARM and \ + str(value) != SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_NONE: + error = True + + except ValueError: + error = True + + if error is True: + raise wsme.exc.ClientSideError(_( + "Action must be one of '%s', '%s', '%s' or '%s'" % + (SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_FAIL, + SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_DEGRADE, + SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_ALARM, + SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_NONE))) + + def _validate_hbs_failure_threshold(name, value): _validate_range(name, value, SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_THRESHOLD_MIN, @@ -1331,6 +1352,7 @@ PLATFORM_MTCE_PARAMETER_MANDATORY = [ constants.SERVICE_PARAM_PLAT_MTCE_COMPUTE_BOOT_TIMEOUT, constants.SERVICE_PARAM_PLAT_MTCE_CONTROLLER_BOOT_TIMEOUT, constants.SERVICE_PARAM_PLAT_MTCE_HBS_PERIOD, + constants.SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION, constants.SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_THRESHOLD, constants.SERVICE_PARAM_PLAT_MTCE_HBS_DEGRADE_THRESHOLD, constants.SERVICE_PARAM_PLAT_MTCE_MNFA_THRESHOLD, @@ -1349,6 +1371,10 @@ SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_THRESHOLD_MIN = 10 SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_THRESHOLD_MAX = 100 SERVICE_PARAM_PLAT_MTCE_HBS_DEGRADE_THRESHOLD_MIN = 4 SERVICE_PARAM_PLAT_MTCE_HBS_DEGRADE_THRESHOLD_MAX = 100 +SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_FAIL = 'fail' +SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_DEGRADE = 'degrade' +SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_ALARM = 'alarm' +SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_NONE = 'none' SERVICE_PARAM_PLAT_MTCE_MNFA_THRESHOLD_MIN = 2 SERVICE_PARAM_PLAT_MTCE_MNFA_THRESHOLD_MAX = 100 SERVICE_PARAM_PLAT_MTCE_MNFA_TIMEOUT_MIN = 100 @@ -1361,6 +1387,8 @@ PLATFORM_MTCE_PARAMETER_VALIDATOR = { _validate_controller_boot_timeout, constants.SERVICE_PARAM_PLAT_MTCE_HBS_PERIOD: _validate_hbs_period, + constants.SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION: + _validate_hbs_failure_action, constants.SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_THRESHOLD: _validate_hbs_failure_threshold, constants.SERVICE_PARAM_PLAT_MTCE_HBS_DEGRADE_THRESHOLD: @@ -1375,6 +1403,7 @@ PLATFORM_MTCE_PARAMETER_RESOURCE = { constants.SERVICE_PARAM_PLAT_MTCE_COMPUTE_BOOT_TIMEOUT: 'platform::mtce::params::compute_boot_timeout', constants.SERVICE_PARAM_PLAT_MTCE_CONTROLLER_BOOT_TIMEOUT: 'platform::mtce::params::controller_boot_timeout', constants.SERVICE_PARAM_PLAT_MTCE_HBS_PERIOD: 'platform::mtce::params::heartbeat_period', + constants.SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION: 'platform::mtce::params::heartbeat_failure_action', constants.SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_THRESHOLD: 'platform::mtce::params::heartbeat_failure_threshold', constants.SERVICE_PARAM_PLAT_MTCE_HBS_DEGRADE_THRESHOLD: 'platform::mtce::params::heartbeat_degrade_threshold', constants.SERVICE_PARAM_PLAT_MTCE_MNFA_THRESHOLD: 'platform::mtce::params::mnfa_threshold', diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index adbfacd04e..61eab01206 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -476,6 +476,11 @@ class ConductorManager(service.PeriodicService): 'name': constants.SERVICE_PARAM_PLAT_MTCE_HBS_PERIOD, 'value': constants.SERVICE_PARAM_PLAT_MTCE_HBS_PERIOD_DEFAULT, }, + {'service': constants.SERVICE_TYPE_PLATFORM, + 'section': constants.SERVICE_PARAM_SECTION_PLATFORM_MAINTENANCE, + 'name': constants.SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION, + 'value': constants.SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_ACTION_DEFAULT, + }, {'service': constants.SERVICE_TYPE_PLATFORM, 'section': constants.SERVICE_PARAM_SECTION_PLATFORM_MAINTENANCE, 'name': constants.SERVICE_PARAM_PLAT_MTCE_HBS_FAILURE_THRESHOLD,