Apply NoExecute taint to locked nodes
This change applies the NoExecute taint to AIO-DX and worker nodes when they are locked. This causes pods to be evicted from the node and prevents new pods from being scheduled on that node. When the node is unlocked and has rebooted, the taint is removed. Change-Id: I2a7c8843a68661e15224260c53fd171920473161 story: 2002843 task: 29359 Signed-off-by: Bart Wensley <barton.wensley@windriver.com>
This commit is contained in:
parent
a6732cbe47
commit
3e9d13ec85
|
@ -884,19 +884,6 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI):
|
|||
future.set_timeouts(config.CONF.get('nfvi-timeouts', None))
|
||||
|
||||
if self._host_supports_kubernetes(host_personality):
|
||||
if True:
|
||||
# For now, we do not want to apply the NoExecute taint.
|
||||
# When the VIM detects that a service is failed on a host,
|
||||
# it goes through a disable/enable cycle. This would cause
|
||||
# the NoExecute taint to be applied/removed which causes
|
||||
# most pods to be stopped/started. If the pods don't come
|
||||
# back quickly enough the VIM will attempt another
|
||||
# disable/enable, which can go on forever. For now,
|
||||
# we will just avoid tainting hosts.
|
||||
# TODO(bwensley): Rework when support for pure k8s hosts is
|
||||
# added.
|
||||
pass
|
||||
else:
|
||||
response['reason'] = 'failed to disable kubernetes services'
|
||||
|
||||
# To disable kubernetes we add the NoExecute taint to the
|
||||
|
|
|
@ -669,11 +669,12 @@ def query_network_agents(token, host_name, check_fully_up):
|
|||
Input parameter check_fully_up set to True will check for
|
||||
both alive and admin_state_up, otherwise only alive is checked.
|
||||
"""
|
||||
try:
|
||||
url, api_cmd, api_cmd_headers, result_data = get_network_agents(
|
||||
token, host_name)
|
||||
|
||||
agent_state = 'up'
|
||||
alive = False
|
||||
admin_state_up = False
|
||||
supported_agents = [AGENT_TYPE.L3, AGENT_TYPE.DHCP]
|
||||
for supported_agent in supported_agents:
|
||||
found = False
|
||||
|
@ -709,9 +710,4 @@ def query_network_agents(token, host_name, check_fully_up):
|
|||
agent_state = 'down'
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
DLOG.exception("Caught exception trying to query host %s "
|
||||
"agent states: %s" % (host_name, e))
|
||||
agent_state = 'down'
|
||||
|
||||
return agent_state
|
||||
|
|
|
@ -38,6 +38,13 @@ class SwMgmtDirector(object):
|
|||
"""
|
||||
return self._sw_update
|
||||
|
||||
@property
|
||||
def single_controller(self):
|
||||
"""
|
||||
Returns whether this is a single controller configuration
|
||||
"""
|
||||
return self._single_controller
|
||||
|
||||
def create_sw_patch_strategy(self, controller_apply_type, storage_apply_type,
|
||||
swift_apply_type, worker_apply_type,
|
||||
max_parallel_worker_hosts,
|
||||
|
|
|
@ -234,9 +234,6 @@ class DisableHostTask(state_machine.StateTask):
|
|||
if host.host_service_configured(objects.HOST_SERVICES.GUEST):
|
||||
task_work_list.append(DisableHostServicesTaskWork(
|
||||
self, host, objects.HOST_SERVICES.GUEST))
|
||||
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
|
||||
task_work_list.append(DisableHostServicesTaskWork(
|
||||
self, host, objects.HOST_SERVICES.CONTAINER))
|
||||
if host.host_service_configured(objects.HOST_SERVICES.COMPUTE):
|
||||
task_work_list.append(QueryHypervisorTaskWork(
|
||||
self, host, force_pass=True))
|
||||
|
@ -248,6 +245,17 @@ class DisableHostTask(state_machine.StateTask):
|
|||
task_work_list.append(NotifyHostDisabledTaskWork(
|
||||
self, host, objects.HOST_SERVICES.NETWORK))
|
||||
task_work_list.append(NotifyInstancesHostDisabledTaskWork(self, host))
|
||||
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
|
||||
# Only disable the container services if the host is being locked
|
||||
# and we are not running in a single controller configuration. In
|
||||
# a single controller configuration we keep the container services
|
||||
# running.
|
||||
if self._host.is_locking():
|
||||
from nfv_vim import directors
|
||||
sw_mgmt_director = directors.get_sw_mgmt_director()
|
||||
if not sw_mgmt_director.single_controller:
|
||||
task_work_list.append(DisableHostServicesTaskWork(
|
||||
self, host, objects.HOST_SERVICES.CONTAINER))
|
||||
task_work_list.append(notify_host_services_task(
|
||||
self, host, force_pass=True))
|
||||
if host.host_service_configured(objects.HOST_SERVICES.COMPUTE):
|
||||
|
@ -443,8 +451,21 @@ class NotifyDisabledHostTask(state_machine.StateTask):
|
|||
Notify Disabled Host Task
|
||||
"""
|
||||
def __init__(self, host):
|
||||
from nfv_vim import objects
|
||||
|
||||
self._host_reference = weakref.ref(host)
|
||||
task_work_list = list()
|
||||
if host.host_service_configured(objects.HOST_SERVICES.CONTAINER):
|
||||
# Only disable the container services if the host is being locked
|
||||
# and we are not running in a single controller configuration. In
|
||||
# a single controller configuration we keep the container services
|
||||
# running.
|
||||
if self._host.is_locking():
|
||||
from nfv_vim import directors
|
||||
sw_mgmt_director = directors.get_sw_mgmt_director()
|
||||
if not sw_mgmt_director.single_controller:
|
||||
task_work_list.append(DisableHostServicesTaskWork(
|
||||
self, host, objects.HOST_SERVICES.CONTAINER))
|
||||
task_work_list.append(NotifyHostServicesDisabledTaskWork(
|
||||
self, host, force_pass=True))
|
||||
super(NotifyDisabledHostTask, self).__init__(
|
||||
|
|
Loading…
Reference in New Issue