From 78a022103aa5ecef751a08b797141d55833f36ad Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Wed, 30 Jan 2019 14:47:08 -0500 Subject: [PATCH 1/2] VIM: allow cold migrate and evac with remote_storage label Store the remote_storage label against the host object and always allow cold migration and evacuation for VMs on hosts with the label enabled. Change-Id: Iaf64474893f1426f1c24db9f535ac4c99faa0559 Story: 2004447 Task: 29190 Signed-off-by: Kevin Smith --- .../nfvi_plugins/nfvi_infrastructure_api.py | 35 ++++++++++++++----- .../nfv_unit_tests/tests/test_instance.py | 1 + .../tests/test_sw_patch_strategy.py | 1 + .../tests/test_sw_upgrade_strategy.py | 1 + .../_database_infrastructure_module.py | 1 + .../nfv_vim/database/_database_migrate.py | 1 + nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_host.py | 5 ++- nfv/nfv-vim/nfv_vim/objects/_host.py | 7 ++++ nfv/nfv-vim/nfv_vim/objects/_instance.py | 26 ++++++++++---- 9 files changed, 63 insertions(+), 15 deletions(-) diff --git a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py index 7b627d2d..19be9827 100755 --- a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py +++ b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py @@ -134,9 +134,11 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): openstack_compute = False openstack_control = False + remote_storage = False OS_COMPUTE = nfvi.objects.v1.HOST_LABEL_KEYS.OS_COMPUTE_NODE OS_CONTROL = nfvi.objects.v1.HOST_LABEL_KEYS.OS_CONTROL_PLANE + REMOTE_STORAGE = nfvi.objects.v1.HOST_LABEL_KEYS.REMOTE_STORAGE LABEL_ENABLED = nfvi.objects.v1.HOST_LABEL_VALUES.ENABLED for host_label in host_label_list: @@ -147,8 +149,11 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): elif host_label['label_key'] == OS_CONTROL: if host_label['label_value'] == LABEL_ENABLED: openstack_control = True + elif host_label['label_key'] == REMOTE_STORAGE: + if host_label['label_value'] == LABEL_ENABLED: + remote_storage = True - return (openstack_compute, openstack_control) + return (openstack_compute, openstack_control, remote_storage) def __init__(self): super(NFVIInfrastructureAPI, self).__init__() @@ -378,7 +383,8 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): host_label_list = future.result.data['labels'] - openstack_compute, openstack_control = self._get_host_labels(host_label_list) + openstack_compute, openstack_control, remote_storage = \ + self._get_host_labels(host_label_list) admin_state, oper_state, avail_status, nfvi_data \ = host_state(host_uuid, host_name, host_personality, @@ -400,6 +406,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): target_load, openstack_compute, openstack_control, + remote_storage, nfvi_data) host_objs.append(host_obj) @@ -520,7 +527,8 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): host_label_list = future.result.data['labels'] - openstack_compute, openstack_control = self._get_host_labels(host_label_list) + openstack_compute, openstack_control, remote_storage = \ + self._get_host_labels(host_label_list) host_obj = nfvi.objects.v1.Host(host_uuid, host_name, host_sub_functions, @@ -532,6 +540,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): target_load, openstack_compute, openstack_control, + remote_storage, nfvi_data) response['result-data'] = host_obj @@ -1006,7 +1015,8 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): host_label_list = future.result.data['labels'] - openstack_compute, openstack_control = self._get_host_labels(host_label_list) + openstack_compute, openstack_control, remote_storage = \ + self._get_host_labels(host_label_list) host_obj = nfvi.objects.v1.Host(host_uuid, host_name, host_sub_functions, @@ -1018,6 +1028,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): target_load, openstack_compute, openstack_control, + remote_storage, nfvi_data) response['result-data'] = host_obj @@ -1122,7 +1133,8 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): host_label_list = future.result.data['labels'] - openstack_compute, openstack_control = self._get_host_labels(host_label_list) + openstack_compute, openstack_control, remote_storage = \ + self._get_host_labels(host_label_list) host_obj = nfvi.objects.v1.Host(host_uuid, host_name, host_sub_functions, @@ -1134,6 +1146,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): target_load, openstack_compute, openstack_control, + remote_storage, nfvi_data) response['result-data'] = host_obj @@ -1237,7 +1250,8 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): host_label_list = future.result.data['labels'] - openstack_compute, openstack_control = self._get_host_labels(host_label_list) + openstack_compute, openstack_control, remote_storage = \ + self._get_host_labels(host_label_list) host_obj = nfvi.objects.v1.Host(host_uuid, host_name, host_sub_functions, @@ -1249,6 +1263,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): target_load, openstack_compute, openstack_control, + remote_storage, nfvi_data) response['result-data'] = host_obj @@ -1351,7 +1366,8 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): host_label_list = future.result.data['labels'] - openstack_compute, openstack_control = self._get_host_labels(host_label_list) + openstack_compute, openstack_control, remote_storage = \ + self._get_host_labels(host_label_list) host_obj = nfvi.objects.v1.Host(host_uuid, host_name, host_sub_functions, @@ -1363,6 +1379,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): target_load, openstack_compute, openstack_control, + remote_storage, nfvi_data) response['result-data'] = host_obj @@ -1518,7 +1535,8 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): host_label_list = future.result.data['labels'] - openstack_compute, openstack_control = self._get_host_labels(host_label_list) + openstack_compute, openstack_control, remote_storage = \ + self._get_host_labels(host_label_list) host_obj = nfvi.objects.v1.Host(host_uuid, host_name, host_sub_functions, @@ -1530,6 +1548,7 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): target_load, openstack_compute, openstack_control, + remote_storage, nfvi_data) response['result-data'] = host_obj diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_instance.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_instance.py index 21425307..19f72f35 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_instance.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_instance.py @@ -217,6 +217,7 @@ class TestInstance(testcase.NFVTestCase): target_load=target_load, openstack_compute=False, openstack_control=False, + remote_storage=False, uptime='1000' ) diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_patch_strategy.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_patch_strategy.py index b7013f91..9f3e60cf 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_patch_strategy.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_patch_strategy.py @@ -296,6 +296,7 @@ class TestSwPatchStrategy(testcase.NFVTestCase): target_load='12.01', openstack_compute=False, openstack_control=False, + remote_storage=False, uptime='1000' ) diff --git a/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_upgrade_strategy.py b/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_upgrade_strategy.py index 84a10c98..7c02f4ec 100755 --- a/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_upgrade_strategy.py +++ b/nfv/nfv-tests/nfv_unit_tests/tests/test_sw_upgrade_strategy.py @@ -274,6 +274,7 @@ class TestSwUpgradeStrategy(testcase.NFVTestCase): target_load=target_load, openstack_compute=False, openstack_control=False, + remote_storage=False, uptime='1000' ) diff --git a/nfv/nfv-vim/nfv_vim/database/_database_infrastructure_module.py b/nfv/nfv-vim/nfv_vim/database/_database_infrastructure_module.py index cf74ba11..08c08a8b 100755 --- a/nfv/nfv-vim/nfv_vim/database/_database_infrastructure_module.py +++ b/nfv/nfv-vim/nfv_vim/database/_database_infrastructure_module.py @@ -126,6 +126,7 @@ def database_host_get_list(): nfvi_host_data['target_load'], nfvi_host_data['openstack_compute'], nfvi_host_data['openstack_control'], + nfvi_host_data['remote_storage'], nfvi_host_data['nfvi_data']) host_obj = objects.Host(nfvi_host, host.state, host.action, diff --git a/nfv/nfv-vim/nfv_vim/database/_database_migrate.py b/nfv/nfv-vim/nfv_vim/database/_database_migrate.py index 4d654b56..350764af 100755 --- a/nfv/nfv-vim/nfv_vim/database/_database_migrate.py +++ b/nfv/nfv-vim/nfv_vim/database/_database_migrate.py @@ -23,6 +23,7 @@ def _migrate_hosts_v5_to_v6(session, hosts_v5, hosts_v6): nfvi_host_data = json.loads(host_v5.nfvi_host_data) nfvi_host_data['openstack_compute'] = False nfvi_host_data['openstack_control'] = False + nfvi_host_data['remote_storage'] = False host_v6.nfvi_host_data = json.dumps(nfvi_host_data) session.add(host_v6) diff --git a/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_host.py b/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_host.py index 15dce4a7..d3f314be 100755 --- a/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_host.py +++ b/nfv/nfv-vim/nfv_vim/nfvi/objects/v1/_host.py @@ -88,6 +88,7 @@ class KubernetesLabelKeys(Constants): """ OS_COMPUTE_NODE = Constant('openstack-compute-node') OS_CONTROL_PLANE = Constant('openstack-control-plane') + REMOTE_STORAGE = Constant('remote-storage') # Host Constant Instantiation @@ -108,6 +109,7 @@ class Host(ObjectData): avail_status, action, uptime, software_load, target_load, openstack_compute=False, openstack_control=False, + remote_storage=False, nfvi_data=None): super(Host, self).__init__('1.0.0') self.update(dict(uuid=uuid, name=name, personality=personality, @@ -119,6 +121,7 @@ class Host(ObjectData): software_load=software_load, target_load=target_load, openstack_compute=openstack_compute, - openstack_control=openstack_control)) + openstack_control=openstack_control, + remote_storage=remote_storage)) self.nfvi_data = nfvi_data diff --git a/nfv/nfv-vim/nfv_vim/objects/_host.py b/nfv/nfv-vim/nfv_vim/objects/_host.py index 4df54f9e..9cfec8f0 100755 --- a/nfv/nfv-vim/nfv_vim/objects/_host.py +++ b/nfv/nfv-vim/nfv_vim/objects/_host.py @@ -317,6 +317,13 @@ class Host(ObjectData): """ return self._nfvi_host.openstack_control + @property + def remote_storage(self): + """ + Returns remote_storage for this host + """ + return self._nfvi_host.remote_storage + @property def recover_instances(self): """ diff --git a/nfv/nfv-vim/nfv_vim/objects/_instance.py b/nfv/nfv-vim/nfv_vim/objects/_instance.py index 02e86327..d3889c7a 100755 --- a/nfv/nfv-vim/nfv_vim/objects/_instance.py +++ b/nfv/nfv-vim/nfv_vim/objects/_instance.py @@ -1456,6 +1456,8 @@ class Instance(ObjectData): """ Returns true if the instance can be cold-migrated """ + from nfv_vim import tables + if not system_initiated: # Always allow user initiated cold migration return True @@ -1464,9 +1466,14 @@ class Instance(ObjectData): # Always allow cold migration when booted from a volume return True - # TODO(bwensley): Always allow cold migration for instances using - # remote storage. There is currently no way to determine this, but we - # should eventually be able to check for a label on the compute host. + host_table = tables.tables_get_host_table() + host = host_table.get(self.host_name, None) + + if host is not None: + if host.remote_storage: + # Always allow cold migration for instances using + # remote storage + return True config_option = 'max_cold_migrate_local_image_disk_gb' @@ -1487,6 +1494,8 @@ class Instance(ObjectData): """ Returns true if the instance can be evacuated """ + from nfv_vim import tables + if not system_initiated: # Always allow user initiated evacuate return True @@ -1495,9 +1504,14 @@ class Instance(ObjectData): # Always allow evacuate when booted from a volume return True - # TODO(bwensley): Always allow evacuate for instances using remote - # storage. There is currently no way to determine this, but we should - # eventually be able to check for a label on the compute host. + host_table = tables.tables_get_host_table() + host = host_table.get(self.host_name, None) + + if host is not None: + if host.remote_storage: + # Always allow evacuation for instances using + # remote storage + return True config_option = 'max_evacuate_local_image_disk_gb' From 3e9d13ec85424a234f815d5db7e5357a68a4c5f9 Mon Sep 17 00:00:00 2001 From: Bart Wensley Date: Tue, 5 Feb 2019 14:09:24 -0600 Subject: [PATCH 2/2] Apply NoExecute taint to locked nodes This change applies the NoExecute taint to AIO-DX and worker nodes when they are locked. This causes pods to be evicted from the node and prevents new pods from being scheduled on that node. When the node is unlocked and has rebooted, the taint is removed. Change-Id: I2a7c8843a68661e15224260c53fd171920473161 story: 2002843 task: 29359 Signed-off-by: Bart Wensley --- .../nfvi_plugins/nfvi_infrastructure_api.py | 37 +++------ .../nfvi_plugins/openstack/neutron.py | 80 +++++++++---------- .../nfv_vim/directors/_sw_mgmt_director.py | 7 ++ nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py | 27 ++++++- 4 files changed, 81 insertions(+), 70 deletions(-) diff --git a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py index 7b627d2d..7b834e9c 100755 --- a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py +++ b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/nfvi_infrastructure_api.py @@ -884,34 +884,21 @@ class NFVIInfrastructureAPI(nfvi.api.v1.NFVIInfrastructureAPI): future.set_timeouts(config.CONF.get('nfvi-timeouts', None)) if self._host_supports_kubernetes(host_personality): - if True: - # For now, we do not want to apply the NoExecute taint. - # When the VIM detects that a service is failed on a host, - # it goes through a disable/enable cycle. This would cause - # the NoExecute taint to be applied/removed which causes - # most pods to be stopped/started. If the pods don't come - # back quickly enough the VIM will attempt another - # disable/enable, which can go on forever. For now, - # we will just avoid tainting hosts. - # TODO(bwensley): Rework when support for pure k8s hosts is - # added. - pass - else: - response['reason'] = 'failed to disable kubernetes services' + response['reason'] = 'failed to disable kubernetes services' - # To disable kubernetes we add the NoExecute taint to the - # node. This removes pods that can be scheduled elsewhere - # and prevents new pods from scheduling on the node. - future.work(kubernetes_client.taint_node, - host_name, "NoExecute", "services", "disabled") + # To disable kubernetes we add the NoExecute taint to the + # node. This removes pods that can be scheduled elsewhere + # and prevents new pods from scheduling on the node. + future.work(kubernetes_client.taint_node, + host_name, "NoExecute", "services", "disabled") - future.result = (yield) + future.result = (yield) - if not future.result.is_complete(): - DLOG.error("Kubernetes taint_node failed, operation " - "did not complete, host_uuid=%s, host_name=%s." - % (host_uuid, host_name)) - return + if not future.result.is_complete(): + DLOG.error("Kubernetes taint_node failed, operation " + "did not complete, host_uuid=%s, host_name=%s." + % (host_uuid, host_name)) + return response['completed'] = True response['reason'] = '' diff --git a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/neutron.py b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/neutron.py index 5efeba68..61d3b7de 100755 --- a/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/neutron.py +++ b/nfv/nfv-plugins/nfv_plugins/nfvi_plugins/openstack/neutron.py @@ -669,49 +669,45 @@ def query_network_agents(token, host_name, check_fully_up): Input parameter check_fully_up set to True will check for both alive and admin_state_up, otherwise only alive is checked. """ - try: - url, api_cmd, api_cmd_headers, result_data = get_network_agents( - token, host_name) + url, api_cmd, api_cmd_headers, result_data = get_network_agents( + token, host_name) - agent_state = 'up' - supported_agents = [AGENT_TYPE.L3, AGENT_TYPE.DHCP] - for supported_agent in supported_agents: - found = False - for agent in result_data: - agent_type = agent.get('agent_type', '') - host = agent.get('host', '') - if (agent_type == supported_agent) and (host == host_name): - DLOG.verbose("found agent %s for host %s" % - (supported_agent, host_name)) - alive = agent.get('alive', False) - admin_state_up = agent.get('admin_state_up', False) - # found the agent of interest. - found = True - break - if found: - if check_fully_up: - if not (alive and admin_state_up): - DLOG.verbose("host %s agent %s not fully up. alive: %s," - " admin_state_up: %s" % - (host_name, supported_agent, - alive, admin_state_up)) - agent_state = 'down' - break - else: - if not alive: - DLOG.verbose("host %s agent %s not alive" % - (host_name, supported_agent)) - agent_state = 'down' - break - else: - DLOG.error("host %s agent %s not present" % - (host_name, supported_agent)) - agent_state = 'down' + agent_state = 'up' + alive = False + admin_state_up = False + supported_agents = [AGENT_TYPE.L3, AGENT_TYPE.DHCP] + for supported_agent in supported_agents: + found = False + for agent in result_data: + agent_type = agent.get('agent_type', '') + host = agent.get('host', '') + if (agent_type == supported_agent) and (host == host_name): + DLOG.verbose("found agent %s for host %s" % + (supported_agent, host_name)) + alive = agent.get('alive', False) + admin_state_up = agent.get('admin_state_up', False) + # found the agent of interest. + found = True break - - except Exception as e: - DLOG.exception("Caught exception trying to query host %s " - "agent states: %s" % (host_name, e)) - agent_state = 'down' + if found: + if check_fully_up: + if not (alive and admin_state_up): + DLOG.verbose("host %s agent %s not fully up. alive: %s," + " admin_state_up: %s" % + (host_name, supported_agent, + alive, admin_state_up)) + agent_state = 'down' + break + else: + if not alive: + DLOG.verbose("host %s agent %s not alive" % + (host_name, supported_agent)) + agent_state = 'down' + break + else: + DLOG.error("host %s agent %s not present" % + (host_name, supported_agent)) + agent_state = 'down' + break return agent_state diff --git a/nfv/nfv-vim/nfv_vim/directors/_sw_mgmt_director.py b/nfv/nfv-vim/nfv_vim/directors/_sw_mgmt_director.py index 1e822ac2..f14f5444 100755 --- a/nfv/nfv-vim/nfv_vim/directors/_sw_mgmt_director.py +++ b/nfv/nfv-vim/nfv_vim/directors/_sw_mgmt_director.py @@ -38,6 +38,13 @@ class SwMgmtDirector(object): """ return self._sw_update + @property + def single_controller(self): + """ + Returns whether this is a single controller configuration + """ + return self._single_controller + def create_sw_patch_strategy(self, controller_apply_type, storage_apply_type, swift_apply_type, worker_apply_type, max_parallel_worker_hosts, diff --git a/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py b/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py index 159bd802..82e1fa79 100755 --- a/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py +++ b/nfv/nfv-vim/nfv_vim/host_fsm/_host_tasks.py @@ -234,9 +234,6 @@ class DisableHostTask(state_machine.StateTask): if host.host_service_configured(objects.HOST_SERVICES.GUEST): task_work_list.append(DisableHostServicesTaskWork( self, host, objects.HOST_SERVICES.GUEST)) - if host.host_service_configured(objects.HOST_SERVICES.CONTAINER): - task_work_list.append(DisableHostServicesTaskWork( - self, host, objects.HOST_SERVICES.CONTAINER)) if host.host_service_configured(objects.HOST_SERVICES.COMPUTE): task_work_list.append(QueryHypervisorTaskWork( self, host, force_pass=True)) @@ -248,6 +245,17 @@ class DisableHostTask(state_machine.StateTask): task_work_list.append(NotifyHostDisabledTaskWork( self, host, objects.HOST_SERVICES.NETWORK)) task_work_list.append(NotifyInstancesHostDisabledTaskWork(self, host)) + if host.host_service_configured(objects.HOST_SERVICES.CONTAINER): + # Only disable the container services if the host is being locked + # and we are not running in a single controller configuration. In + # a single controller configuration we keep the container services + # running. + if self._host.is_locking(): + from nfv_vim import directors + sw_mgmt_director = directors.get_sw_mgmt_director() + if not sw_mgmt_director.single_controller: + task_work_list.append(DisableHostServicesTaskWork( + self, host, objects.HOST_SERVICES.CONTAINER)) task_work_list.append(notify_host_services_task( self, host, force_pass=True)) if host.host_service_configured(objects.HOST_SERVICES.COMPUTE): @@ -443,8 +451,21 @@ class NotifyDisabledHostTask(state_machine.StateTask): Notify Disabled Host Task """ def __init__(self, host): + from nfv_vim import objects + self._host_reference = weakref.ref(host) task_work_list = list() + if host.host_service_configured(objects.HOST_SERVICES.CONTAINER): + # Only disable the container services if the host is being locked + # and we are not running in a single controller configuration. In + # a single controller configuration we keep the container services + # running. + if self._host.is_locking(): + from nfv_vim import directors + sw_mgmt_director = directors.get_sw_mgmt_director() + if not sw_mgmt_director.single_controller: + task_work_list.append(DisableHostServicesTaskWork( + self, host, objects.HOST_SERVICES.CONTAINER)) task_work_list.append(NotifyHostServicesDisabledTaskWork( self, host, force_pass=True)) super(NotifyDisabledHostTask, self).__init__(