nfv/nfv/nfv-vim/nfv_vim/directors/_instance_director.py

#
# Copyright (c) 2015-2016 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import os
import six

from nfv_common import config
from nfv_common import debug
from nfv_common import timers
from nfv_common.helpers import coroutine
from nfv_common.helpers import Singleton

from nfv_vim import dor
from nfv_vim import nfvi
from nfv_vim import objects
from nfv_vim import tables

from nfv_vim.directors._directors_defs import Operation
from nfv_vim.directors._directors_defs import OPERATION_STATE
from nfv_vim.directors._directors_defs import OPERATION_TYPE

DLOG = debug.debug_get_logger('nfv_vim.instance_director')

_instance_director = None

NFV_VIM_UNLOCK_COMPLETE_FILE = '/var/run/.nfv-vim.unlock_complete'


@six.add_metaclass(Singleton)
class InstanceDirector(object):
    """
    Instance Director
    """
    def __init__(self, max_concurrent_recovering_instances,
                 max_concurrent_migrates_per_host,
                 max_concurrent_evacuates_per_host, recovery_audit_interval,
                 recovery_audit_cooldown, recovery_audit_batch_interval,
                 recovery_cooldown, rebuild_timeout, reboot_timeout,
                 migrate_timeout, single_hypervisor,
                 recovery_threshold, max_throttled_recovering_instances):
        self._max_concurrent_recovering_instances \
            = max_concurrent_recovering_instances
        self._max_concurrent_migrates_per_host \
            = max_concurrent_migrates_per_host
        self._max_concurrent_evacuates_per_host \
            = max_concurrent_evacuates_per_host
        self._recovery_audit_interval = recovery_audit_interval
        self._recovery_audit_cooldown = recovery_audit_cooldown
        self._recovery_audit_batch_interval = recovery_audit_batch_interval
        self._recovery_cooldown = recovery_cooldown
        self._rebuild_timeout = rebuild_timeout
        self._reboot_timeout = reboot_timeout
        self._migrate_timeout = migrate_timeout
        self._single_hypervisor = single_hypervisor
        self._recovery_threshold = recovery_threshold
        self._max_throttled_recovering_instances \
            = max_throttled_recovering_instances
        self._host_operations = dict()
        self._reboot_count = dict()
        self._instance_recovery_list = list()
        self._instance_failed_list = list()
        self._instance_rebuilding_list = list()
        self._instance_migrating_list = list()
        self._instance_rebooting_list = list()
        self._instance_cleanup_list = list()
        self._next_audit_interval = recovery_audit_interval

        if not nfvi.nfvi_compute_plugin_disabled():
            # Do not launch audit if compute plugin not enabled.
            self._timer_audit_instances = timers.timers_create_timer(
                "audit-instances", recovery_audit_cooldown,
                recovery_audit_interval, self.audit_instances)
        else:
            self._timer_audit_instances = None

        self._timer_cleanup_instances = None

    @staticmethod
    def _is_host_enabled(host_name):
        """
        Returns true if the hypervisor is enabled
        """
        host_table = tables.tables_get_host_table()
        host = host_table.get(host_name, None)
        if host is not None:
            if host.nfvi_host_is_enabled():
                return True
        return False

    @staticmethod
    def _is_hypervisor_enabled(host_name):
        """
        Returns true if the hypervisor is enabled
        """
        hypervisor_table = tables.tables_get_hypervisor_table()
        hypervisor = hypervisor_table.get_by_host_name(host_name)
        if hypervisor is not None:
            if hypervisor.is_enabled():
                return True
        return False

    @staticmethod
    def _hypervisors_available(min_count=1, excluded_hosts=None):
        """
        Returns true if at least given count of hosts and hypervisors are enabled
        """
        if excluded_hosts is None:
            excluded_hosts = list()

        available_count = 0
        host_table = tables.tables_get_host_table()
        hypervisor_table = tables.tables_get_hypervisor_table()

        for host_name in host_table.keys():
            if host_name in excluded_hosts:
                continue

            host = host_table.get(host_name, None)
            if host.nfvi_host_is_enabled():
                hypervisor = hypervisor_table.get_by_host_name(host_name)
                if hypervisor is not None:
                    if hypervisor.is_enabled():
                        available_count += 1

        return available_count >= min_count

    @staticmethod
    def upgrade_inprogress():
        """
        Returns true if the system is going through an upgrade
        """
        host_table = tables.tables_get_host_table()

        for host_name in host_table.keys():
            host = host_table.get(host_name, None)
            if host is not None:
                if host.upgrade_inprogress and not host.recover_instances:
                    return True

        return False

    @staticmethod
    def instance_action_allowed(instance, action_type):
        """
        Returns true if instance action is allowed
        """
        DLOG.info("Instance action allowed for %s, action_type=%s"
                  % (instance.name, action_type))
        return not InstanceDirector.upgrade_inprogress()

    def _instance_recovery_allowed(self, instance):
        """
        Returns true if instance recovery is allowed
        """
        recovery_allowed = False

        if instance.is_rebuilding():
            if instance.elapsed_time_in_state >= self._rebuild_timeout:
                recovery_allowed = True

        # We only recover failed live migrations - not failed cold migrations
        # or failed resize operations (instance.is_resizing).
        elif instance.is_migrating():
            if instance.elapsed_time_in_state >= self._migrate_timeout:
                recovery_allowed = True

        elif instance.is_rebooting():
            if instance.elapsed_time_in_state >= self._reboot_timeout:
                recovery_allowed = True

        else:
            if instance.elapsed_time_in_state >= self._recovery_cooldown:
                recovery_allowed = True

        return recovery_allowed

    def _get_instance_recovery_list(self):
        """
        Get instance recovery list after the previous list is exhausted
        """
        next_audit_interval = self._recovery_audit_interval

        # Get all instances that are to be considered for recovery.
        instances_recover = list()
        instances_failed = list()
        instances_rebuilding = list()
        instances_migrating = list()
        instances_rebooting = list()
        instance_tracking_uuids = list()
        instance_table = tables.tables_get_instance_table()

        # Check for failed instances; exclude instances that are part of a
        # host operation or have recently failed. Also check for failed
        # instances stuck recovering.
        for instance_uuid in instance_table:
            instance = instance_table[instance_uuid]

            host_operation = self._host_operations.get(instance.host_name, None)
            if host_operation is not None:
                if host_operation.is_inprogress():
                    DLOG.debug("Skip recovery of instance %s, host %s operation "
                               "inprogress." % (instance.name, instance.host_name))
                    next_audit_interval = self._recovery_audit_cooldown
                    continue

            if instance.host_name is None:
                DLOG.info("Can't recover instance %s, host is not valid."
                          % instance.name)
                continue

            if not (instance.is_deleting() or instance.is_deleted() or
                    instance.is_locked()) and instance.is_failed():
                next_audit_interval = self._recovery_audit_cooldown
                instance_tracking_uuids.append(instance.uuid)

                if self._instance_recovery_allowed(instance):
                    instances_recover.append(instance)
                else:
                    if instance.is_rebuilding():
                        instances_rebuilding.append(instance)
                    elif instance.is_migrating():
                        instances_migrating.append(instance)
                    elif instance.is_rebooting():
                        instances_rebooting.append(instance)
                    else:
                        instances_failed.append(instance)

        # Remove reboot counts for instances that recovered
        reboot_tracking_instance_uuids = self._reboot_count.keys()

        for instance_uuid in reboot_tracking_instance_uuids:
            if instance_uuid not in instance_tracking_uuids:
                del self._reboot_count[instance_uuid]

        # Initialize reboot counts for new instances
        for instance_uuid in instance_tracking_uuids:
            if instance_uuid not in self._reboot_count:
                self._reboot_count[instance_uuid] = 0

        # Order instances based on recovery priority
        instances_recover.sort(key=objects.Instance.recovery_sort_key,
                               reverse=True)

        return (next_audit_interval, instances_recover, instances_failed,
                instances_rebuilding, instances_migrating, instances_rebooting)

    def _host_migrate_instances(self, host, host_operation):
        """
        Host Migrate Instances
        """
        if host_operation.operation_type not in [OPERATION_TYPE.HOST_LOCK_FORCE,
                                                 OPERATION_TYPE.HOST_LOCK]:
            if not dor.dor_is_complete():
                DLOG.info("DOR is not complete, can't migrate instances off of "
                          "host %s." % host.name)
                self.reschedule_audit_instances(self._recovery_audit_cooldown)
                return

        if self.upgrade_inprogress():
            DLOG.info("Upgrade inprogress, can't migrate instances off of "
                      "host %s." % host.name)
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            return

        if not self._hypervisors_available(min_count=1):
            DLOG.info("No hypervisors available, can't migrate instances "
                      "off of host %s." % host.name)
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            return

        if OPERATION_TYPE.HOST_LOCK_FORCE == host_operation.operation_type:
            DLOG.info("Force-Lock issued, can't migrate instances off of "
                      "host %s." % host.name)
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            return

        initiated_by = objects.INSTANCE_ACTION_INITIATED_BY.DIRECTOR
        if OPERATION_TYPE.HOST_LOCK_FORCE == host_operation.operation_type:
            reason = "host force lock command issued"
        elif OPERATION_TYPE.HOST_LOCK == host_operation.operation_type:
            reason = "host lock command issued"
        elif OPERATION_TYPE.HOST_DISABLE == host_operation.operation_type:
            reason = "host disabled"
        elif OPERATION_TYPE.HOST_FAILED == host_operation.operation_type:
            if host.is_component_failure():
                reason = "host component failure"
            else:
                reason = "host failed"
        elif OPERATION_TYPE.MIGRATE_INSTANCES == host_operation.operation_type:
            reason = "migrate instances requested"
        else:
            reason = None

        migrates_inprogress = host_operation.total_inprogress()

        instance_table = tables.tables_get_instance_table()
        for instance in instance_table.on_host(host.name):
            if host_operation.instance_exists(instance.uuid):
                continue

            if instance.is_deleting() or instance.is_deleted() or \
                    instance.is_locked() or instance.is_failed():
                continue

            method = objects.INSTANCE_ACTION_TYPE.COLD_MIGRATE

            if instance.is_enabled() or instance.is_paused():
                if instance.supports_live_migration():
                    method = objects.INSTANCE_ACTION_TYPE.LIVE_MIGRATE

            if host_operation.operation_type in [OPERATION_TYPE.HOST_LOCK,
                                                 OPERATION_TYPE.MIGRATE_INSTANCES]:
                if OPERATION_TYPE.HOST_LOCK == host_operation.operation_type:
                    preamble = "Lock of host"
                else:
                    preamble = "Migrate instances from host"

                if instance.is_paused() and \
                        objects.INSTANCE_ACTION_TYPE.COLD_MIGRATE == method:
                    reason = ("%s %s failed because instance %s is paused."
                              % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_suspended():
                    reason = ("%s %s failed because instance %s is suspended."
                              % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_migrating() or instance.is_cold_migrating():
                    reason = ("%s %s failed because instance %s is migrating."
                              % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_rebuilding():
                    reason = ("%s %s failed because instance %s is rebuilding."
                              % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_action_running():
                    # Nova will not migrate an instance if an action is already
                    # running.
                    reason = (
                        "%s %s failed because instance %s action in progress."
                        % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_resized():
                    # Nova will not migrate an instance if is resized and
                    # waiting for confirmation.
                    reason = (
                        "%s %s failed because instance %s is resizing."
                        % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif not self._hypervisors_available(min_count=1,
                                                     excluded_hosts=[host.name]):
                    reason = ("%s %s failed because there are no other "
                              "hypervisors available." % (preamble, host.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                else:
                    if objects.INSTANCE_ACTION_TYPE.LIVE_MIGRATE == method:
                        if not instance.can_live_migrate(system_initiated=True):
                            reason = ("%s %s failed because instance %s "
                                      "can't be live-migrated by the system.  "
                                      "Manually move the instance off of host %s."
                                      % (preamble, host.name, instance.name,
                                         host.name))
                            DLOG.info(reason)
                            host_operation.add_instance(instance.uuid,
                                                        OPERATION_STATE.FAILED)
                            host_operation.update_failure_reason(reason)
                            return
                    else:
                        if not instance.can_cold_migrate(system_initiated=True):
                            reason = ("%s %s failed because instance %s "
                                      "can't be cold-migrated by the system.  "
                                      "Manually move the instance off of host %s."
                                      % (preamble, host.name, instance.name,
                                         host.name))
                            DLOG.info(reason)
                            host_operation.add_instance(instance.uuid,
                                                        OPERATION_STATE.FAILED)
                            host_operation.update_failure_reason(reason)
                            return
            else:
                if instance.is_paused() and \
                        objects.INSTANCE_ACTION_TYPE.COLD_MIGRATE == method:
                    DLOG.info("Instance %s set as failed on host %s "
                              "because it is paused." % (instance.name,
                                                         host.name))
                    instance.fail(reason + " and instance is paused")
                    continue
                elif instance.is_suspended():
                    DLOG.info("Instance %s set as failed on host %s because "
                              "it is suspended." % (instance.name, host.name))
                    instance.fail(reason + " and instance is suspended")
                    continue
                elif instance.is_migrating() or instance.is_cold_migrating():
                    # Allow current migrations to continue
                    DLOG.info("Instance %s on host %s is already migrating."
                              % (instance.name, host.name))
                elif instance.is_rebuilding():
                    DLOG.info("Instance %s set as failed on host %s because "
                              "it is rebuilding." % (instance.name, host.name))
                    instance.fail(reason + " and instance is rebuilding")
                    continue
                elif instance.is_action_running():
                    DLOG.info("Instance %s set as failed on host %s because "
                              "an action is in progress." % (instance.name,
                                                             host.name))
                    instance.fail(reason +
                                  " and instance has action in progress")
                    continue
                elif instance.is_resized():
                    DLOG.info("Instance %s set as failed on host %s because "
                              "it is resized." % (instance.name, host.name))
                    instance.fail(reason + " and instance is resized")
                    continue
                else:
                    if objects.INSTANCE_ACTION_TYPE.LIVE_MIGRATE == method:
                        if not instance.can_live_migrate(system_initiated=True):
                            DLOG.info("Instance %s set as failed on host %s "
                                      "because the system can't live-migrate "
                                      "instance." % (instance.name, host.name))
                            instance.fail(reason + " and the system can't "
                                          "live-migrate instance")
                            continue
                    else:
                        if not instance.can_cold_migrate(system_initiated=True):
                            DLOG.info("Instance %s set as failed on host %s "
                                      "because the system can't cold-migrate "
                                      "instance." % (instance.name, host.name))
                            instance.fail(reason + " and the system can't "
                                          "cold-migrate instance")
                            continue

            host_operation.add_instance(instance.uuid, OPERATION_STATE.INPROGRESS)

            if not (instance.is_migrating() or instance.is_cold_migrating()):
                instance.do_action(method, initiated_by=initiated_by, reason=reason)

            migrates_inprogress += 1
            if migrates_inprogress >= self._max_concurrent_migrates_per_host:
                break

    def _host_evacuate_instances(self, host, host_operation):
        """
        Host Evacuate Instances
        """
        do_evacuates = True

        if host_operation.operation_type not in [OPERATION_TYPE.HOST_LOCK_FORCE,
                                                 OPERATION_TYPE.HOST_LOCK]:
            if not dor.dor_is_complete():
                DLOG.info("DOR is not complete, can't evacuate instances off of "
                          "host %s." % host.name)
                self.reschedule_audit_instances(self._recovery_audit_cooldown)
                do_evacuates = False

        if do_evacuates and self.upgrade_inprogress():
            DLOG.info("Upgrade inprogress, can't evacuate instances off of "
                      "host %s." % host.name)
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            do_evacuates = False

        if do_evacuates and not self._hypervisors_available(min_count=1):
            DLOG.info("No hypervisors available, can't evacuate instances "
                      "off of host %s." % host.name)
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            do_evacuates = False

        if do_evacuates and \
                OPERATION_TYPE.HOST_LOCK_FORCE == host_operation.operation_type:
            DLOG.info("Force-Lock issued, can't evacuate instances off of "
                      "host %s until it is rebooted." % host.name)
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            do_evacuates = False

        initiated_by = objects.INSTANCE_ACTION_INITIATED_BY.DIRECTOR
        if OPERATION_TYPE.HOST_LOCK_FORCE == host_operation.operation_type:
            reason = "host force lock command issued"
        elif OPERATION_TYPE.HOST_LOCK == host_operation.operation_type:
            reason = "host lock command issued"
        elif OPERATION_TYPE.HOST_DISABLE == host_operation.operation_type:
            reason = "host disable action"
        elif OPERATION_TYPE.HOST_FAILED == host_operation.operation_type:
            if host.is_component_failure():
                reason = "host component failure"
            else:
                reason = "host failed"
        else:
            reason = None

        evacuates_inprogress = host_operation.total_inprogress()

        instance_table = tables.tables_get_instance_table()

        # Sort the instances on this host based on their recovery priority
        evacuate_priority_list = list()
        for instance in instance_table.on_host(host.name):
            evacuate_priority_list.append(instance)
        evacuate_priority_list.sort(
            key=objects.Instance.recovery_sort_key, reverse=True)

        for instance in evacuate_priority_list:
            if host_operation.instance_exists(instance.uuid):
                continue

            if instance.is_deleting() or instance.is_deleted() or \
                    instance.is_locked():
                continue

            if OPERATION_TYPE.HOST_LOCK == host_operation.operation_type:
                if instance.is_paused():
                    reason = ("Lock of host %s failed because instance %s "
                              "is paused." % (host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_suspended():
                    reason = ("Lock of host %s failed because instance %s "
                              "is suspended." % (host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_migrating() or instance.is_cold_migrating():
                    reason = ("Lock of host %s failed because instance %s "
                              "is migrating." % (host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_rebuilding():
                    reason = ("Lock of host %s failed because instance %s "
                              "is rebuilding." % (host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_action_running():
                    # Nova will not evacuate an instance if an action is already
                    # running.
                    reason = ("Lock of host %s failed because instance %s "
                              "action in progress." % (host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_resized():
                    # Nova will not migrate an instance if is resized and
                    # waiting for confirmation.
                    reason = ("Lock of host %s failed because instance %s "
                              "is resizing." % (host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif not self._hypervisors_available(min_count=1,
                                                     excluded_hosts=[host.name]):
                    reason = ("Lock of host %s failed because there are no "
                              "other hypervisors available." % host.name)
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif not instance.can_evacuate(system_initiated=True):
                    reason = ("Lock of host %s failed because instance %s "
                              "can't be evacuated by the system.  Manually "
                              "move the instance off of host %s."
                              % (host.name, instance.name, host.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
            else:
                if not instance.is_failed():
                    instance.fail(reason)

            if do_evacuates:
                if evacuates_inprogress < self._max_concurrent_evacuates_per_host:
                    if instance.auto_recovery and instance.recoverable and \
                            instance.can_evacuate(system_initiated=True):
                        host_operation.add_instance(instance.uuid,
                                                    OPERATION_STATE.INPROGRESS)

                        instance.do_action(objects.INSTANCE_ACTION_TYPE.EVACUATE,
                                           initiated_by=initiated_by, reason=reason)
                        evacuates_inprogress += 1

    @staticmethod
    def _host_stop_instances(host, host_operation, instance_uuids):
        """
        Host Stop Instances
        """
        initiated_by = objects.INSTANCE_ACTION_INITIATED_BY.DIRECTOR
        if OPERATION_TYPE.STOP_INSTANCES == host_operation.operation_type:
            reason = "stop instances issued"
        elif OPERATION_TYPE.HOST_LOCK_FORCE == host_operation.operation_type:
            reason = "host force lock command issued"
        elif OPERATION_TYPE.HOST_LOCK == host_operation.operation_type:
            reason = "host lock command issued"
        else:
            reason = ("Unsupported operation (%s) against host %s."
                      % (host_operation.operation_type, host.name))
            DLOG.info(reason)
            host_operation.set_failed(reason)
            return

        instance_table = tables.tables_get_instance_table()
        for instance in instance_table.on_host(host.name):
            if instance.uuid not in instance_uuids:
                # We were not asked to stop this instance
                DLOG.info("Ignoring instance %s while stopping instances on "
                          "host %s" % (instance.name, host.name))
                continue

            if host_operation.instance_exists(instance.uuid):
                continue

            if instance.is_deleting() or instance.is_deleted() or \
                    instance.is_locked():
                continue

            if host_operation.operation_type in [OPERATION_TYPE.HOST_LOCK,
                                                 OPERATION_TYPE.STOP_INSTANCES]:
                # Fail the operation if an instance cannot be stopped
                if OPERATION_TYPE.HOST_LOCK == host_operation.operation_type:
                    preamble = "Lock of host"
                else:
                    preamble = "Stop instances on host"

                if instance.is_paused():
                    reason = ("%s %s failed because instance %s is paused."
                              % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_suspended():
                    reason = ("%s %s failed because instance %s is suspended."
                              % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_migrating() or instance.is_cold_migrating():
                    reason = ("%s %s failed because instance %s is migrating."
                              % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_rebuilding():
                    reason = ("%s %s failed because instance %s is rebuilding."
                              % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_action_running():
                    # Nova will not stop an instance if an action is already
                    # running.
                    reason = (
                        "%s %s failed because instance %s action in progress."
                        % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
                elif instance.is_resized():
                    # Nova will not stop an instance if is resized and
                    # waiting for confirmation.
                    reason = (
                        "%s %s failed because instance %s is resizing."
                        % (preamble, host.name, instance.name))
                    DLOG.info(reason)
                    host_operation.add_instance(instance.uuid,
                                                OPERATION_STATE.FAILED)
                    host_operation.update_failure_reason(reason)
                    return
            else:
                # Force lock - fail instances that cannot be stopped.
                if instance.is_paused():
                    DLOG.info("Instance %s set as failed on host %s "
                              "because it is paused." % (instance.name,
                                                         host.name))
                    instance.fail(reason + " and instance is paused")
                    continue
                elif instance.is_suspended():
                    DLOG.info("Instance %s set as failed on host %s because "
                              "it is suspended." % (instance.name, host.name))
                    instance.fail(reason + " and instance is suspended")
                    continue
                elif instance.is_migrating() or instance.is_cold_migrating():
                    DLOG.info("Instance %s set as failed on host %s because "
                              "it is migrating." % (instance.name, host.name))
                    instance.fail(reason + " and instance is migrating")
                    continue
                elif instance.is_rebuilding():
                    DLOG.info("Instance %s set as failed on host %s because "
                              "it is rebuilding." % (instance.name, host.name))
                    instance.fail(reason + " and instance is rebuilding")
                    continue
                elif instance.is_action_running():
                    DLOG.info("Instance %s set as failed on host %s because "
                              "an action is in progress." % (instance.name,
                                                             host.name))
                    instance.fail(reason +
                                  " and instance has action in progress")
                    continue
                elif instance.is_resized():
                    DLOG.info("Instance %s set as failed on host %s because "
                              "it is resized." % (instance.name, host.name))
                    instance.fail(reason + " and instance is resized")
                    continue

            host_operation.add_instance(instance.uuid, OPERATION_STATE.INPROGRESS)

            instance.do_action(objects.INSTANCE_ACTION_TYPE.STOP,
                               initiated_by=initiated_by, reason=reason)

    @staticmethod
    def _host_start_instances(host, host_operation, instance_uuids):
        """
        Host Start Instances
        """
        if host_operation.operation_type not in [
                OPERATION_TYPE.START_INSTANCES,
                OPERATION_TYPE.START_INSTANCES_SERIAL]:
            reason = ("Unsupported operation (%s) against host %s."
                      % (host_operation.operation_type, host.name))
            DLOG.info(reason)
            host_operation.set_failed(reason)
            return

        initiated_by = objects.INSTANCE_ACTION_INITIATED_BY.DIRECTOR
        if OPERATION_TYPE.START_INSTANCES == host_operation.operation_type:
            reason = "start instances issued"
        elif OPERATION_TYPE.START_INSTANCES_SERIAL == \
                host_operation.operation_type:
            reason = "start instances serial issued"
        else:
            reason = None

        starts_inprogress = 0

        instance_table = tables.tables_get_instance_table()
        for instance in instance_table.on_host(host.name):
            if instance.uuid not in instance_uuids:
                # We were not asked to start this instance
                DLOG.info("Ignoring instance %s while starting instances on "
                          "host %s" % (instance.name, host.name))
                continue

            if host_operation.instance_exists(instance.uuid):
                continue

            if instance.is_deleting() or instance.is_deleted() or \
                    instance.is_failed():
                continue

            if instance.is_paused():
                reason = ("Start instances on host %s failed because instance %s "
                          "is paused." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid, OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return
            elif instance.is_suspended():
                reason = ("Start instances on host %s failed because instance %s "
                          "is suspended." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid, OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return
            elif instance.is_migrating() or instance.is_cold_migrating():
                reason = ("Start instances on host %s failed because instance %s "
                          "is migrating." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid, OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return
            elif instance.is_rebuilding():
                reason = ("Start instances on host %s failed because instance %s "
                          "is rebuilding." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid, OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return
            elif instance.is_action_running():
                # Nova will not start an instance if an action is already
                # running.
                reason = (
                    "Start instances on host %s failed because instance %s "
                    "action in progress." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return
            elif instance.is_resized():
                # Nova will not start an instance if is resized and
                # waiting for confirmation.
                reason = (
                    "Start instances on host %s failed because instance %s "
                    "is resizing." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return
            elif not instance.is_locked():
                reason = ("Start instances on host %s failed because instance %s "
                          "is not locked." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid, OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return

            if OPERATION_TYPE.START_INSTANCES_SERIAL == \
                    host_operation.operation_type and starts_inprogress >= 1:
                # When starting instances in serial, the first instance is
                # started and the rest are set to the READY state, to be
                # started later.
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.READY)
            else:
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.INPROGRESS)
                instance.do_action(objects.INSTANCE_ACTION_TYPE.START,
                                   initiated_by=initiated_by, reason=reason)
                starts_inprogress += 1

    def instance_migrate_complete(self, instance, from_host_name, failed=False,
                                  timed_out=False, cancelled=False):
        """
        Instance Migrate Complete
        """
        from nfv_vim import directors

        host_director = directors.get_host_director()
        host_operation = self._host_operations.get(from_host_name, None)
        if host_operation is None:
            DLOG.verbose("No host %s operation inprogress." % from_host_name)
            return

        if host_operation.operation_type not in [OPERATION_TYPE.HOST_LOCK_FORCE,
                                                 OPERATION_TYPE.HOST_LOCK,
                                                 OPERATION_TYPE.HOST_DISABLE,
                                                 OPERATION_TYPE.HOST_FAILED,
                                                 OPERATION_TYPE.MIGRATE_INSTANCES]:
            DLOG.verbose("Unexpected host %s operation %s, ignoring."
                         % (from_host_name, host_operation.operation_type))
            return

        host_table = tables.tables_get_host_table()
        from_host = host_table.get(from_host_name, None)
        if from_host is None:
            DLOG.verbose("Host %s does not exist." % from_host_name)
            return

        if failed:
            reason = ("Migrate of instance %s from host %s failed."
                      % (instance.name, from_host_name))
            host_operation_state = OPERATION_STATE.FAILED
            DLOG.info(reason)

        elif timed_out:
            reason = ("Migrate of instance %s from host %s timed out."
                      % (instance.name, from_host_name))
            host_operation_state = OPERATION_STATE.TIMED_OUT
            DLOG.info(reason)

        elif cancelled:
            reason = ("Migrate of instance %s on host %s cancelled."
                      % (instance.name, from_host_name))
            host_operation_state = OPERATION_STATE.CANCELLED
            DLOG.info(reason)

        else:
            reason = ("Migrate of instance %s from host %s succeeded."
                      % (instance.name, from_host_name))
            host_operation_state = OPERATION_STATE.COMPLETED
            DLOG.info(reason)

        host_operation.update_instance(instance.uuid, host_operation_state)

        if host_operation.operation_type in [OPERATION_TYPE.HOST_LOCK,
                                             OPERATION_TYPE.MIGRATE_INSTANCES]:
            if OPERATION_STATE.COMPLETED != host_operation_state:
                host_operation.update_failure_reason(reason)
                host_director.host_instances_moved(from_host, host_operation)
                if OPERATION_TYPE.MIGRATE_INSTANCES == \
                        host_operation.operation_type:
                    sw_mgmt_director = directors.get_sw_mgmt_director()
                    sw_mgmt_director.migrate_instances_failed(reason)
                host_operation = self._host_operations.get(from_host.name, None)
                if host_operation is not None:
                    del self._host_operations[from_host.name]
                return
        else:
            if OPERATION_STATE.COMPLETED != host_operation_state:
                if not instance.is_failed():
                    instance.fail()

        # Continue with the next batch of instances
        if self._is_hypervisor_enabled(from_host_name):
            self._host_migrate_instances(from_host, host_operation)
        else:
            self._host_evacuate_instances(from_host, host_operation)

        # Check if host operation is complete
        if host_operation.is_inprogress():
            from_host.notify_instance_moved()
        else:
            host_director.host_instances_moved(from_host, host_operation)
            host_operation = self._host_operations.get(from_host.name, None)
            if host_operation is not None:
                del self._host_operations[from_host.name]

    def instance_evacuate_complete(self, instance, from_host_name, failed=False,
                                   timed_out=False, cancelled=False):
        """
        Instance Evacuate Complete
        """
        from nfv_vim import directors

        host_director = directors.get_host_director()
        host_operation = self._host_operations.get(from_host_name, None)
        if host_operation is None:
            DLOG.verbose("No host %s operation inprogress." % from_host_name)
            return

        if host_operation.operation_type not in [OPERATION_TYPE.HOST_LOCK_FORCE,
                                                 OPERATION_TYPE.HOST_LOCK,
                                                 OPERATION_TYPE.HOST_DISABLE,
                                                 OPERATION_TYPE.HOST_FAILED]:
            DLOG.verbose("Unexpected host %s operation %s, ignoring."
                         % (from_host_name, host_operation.operation_type))
            return

        host_table = tables.tables_get_host_table()
        from_host = host_table.get(from_host_name, None)
        if from_host is None:
            DLOG.verbose("Host %s does not exist." % from_host_name)
            return

        if failed:
            reason = ("Evacuate of instance %s from host %s failed."
                      % (instance.name, from_host_name))
            host_operation_state = OPERATION_STATE.FAILED
            DLOG.info(reason)

        elif timed_out:
            reason = ("Evacuate of instance %s from host %s timed out."
                      % (instance.name, from_host_name))
            host_operation_state = OPERATION_STATE.TIMED_OUT
            DLOG.info(reason)

        elif cancelled:
            reason = ("Evacuate of instance %s on host %s cancelled."
                      % (instance.name, from_host_name))
            host_operation_state = OPERATION_STATE.CANCELLED
            DLOG.info(reason)

        else:
            reason = ("Evacuate of instance %s from host %s succeeded."
                      % (instance.name, from_host_name))
            host_operation_state = OPERATION_STATE.COMPLETED
            DLOG.info(reason)

        host_operation.update_instance(instance.uuid, host_operation_state)

        if OPERATION_TYPE.HOST_LOCK == host_operation.operation_type:
            if OPERATION_STATE.COMPLETED != host_operation_state:
                host_operation.update_failure_reason(reason)
                host_director.host_instances_moved(from_host, host_operation)
                host_operation = self._host_operations.get(from_host.name, None)
                if host_operation is not None:
                    del self._host_operations[from_host.name]
                return
        else:
            if OPERATION_STATE.COMPLETED != host_operation_state:
                if not instance.is_failed():
                    instance.fail()

        # Continue with the next batch of instances
        self._host_evacuate_instances(from_host, host_operation)

        # Check if host operation is complete
        if host_operation.is_inprogress():
            from_host.notify_instance_moved()
        else:
            host_director.host_instances_moved(from_host, host_operation)
            host_operation = self._host_operations.get(from_host.name, None)
            if host_operation is not None:
                del self._host_operations[from_host.name]

    def instance_stop_complete(self, instance, on_host_name, failed=False,
                               timed_out=False, cancelled=False):
        """
        Instance Stop Complete
        """
        from nfv_vim import directors

        host_director = directors.get_host_director()
        host_operation = self._host_operations.get(on_host_name, None)
        if host_operation is None:
            DLOG.verbose("No host %s operation inprogress." % on_host_name)
            return

        if host_operation.operation_type not in [OPERATION_TYPE.STOP_INSTANCES,
                                                 OPERATION_TYPE.HOST_LOCK_FORCE,
                                                 OPERATION_TYPE.HOST_LOCK]:
            DLOG.verbose("Unexpected host %s operation %s, ignoring."
                         % (on_host_name, host_operation.operation_type))
            return

        host_table = tables.tables_get_host_table()
        host = host_table.get(on_host_name, None)
        if host is None:
            DLOG.verbose("Host %s does not exist." % on_host_name)
            return

        if failed:
            reason = ("Stop of instance %s on host %s failed."
                      % (instance.name, on_host_name))
            host_operation_state = OPERATION_STATE.FAILED
            DLOG.info(reason)

        elif timed_out:
            reason = ("Stop of instance %s on host %s timed out."
                      % (instance.name, on_host_name))
            host_operation_state = OPERATION_STATE.TIMED_OUT
            DLOG.info(reason)

        elif cancelled:
            reason = ("Stop of instance %s on host %s cancelled."
                      % (instance.name, on_host_name))
            host_operation_state = OPERATION_STATE.CANCELLED
            DLOG.info(reason)

        else:
            reason = ("Stop of instance %s on host %s succeeded."
                      % (instance.name, on_host_name))
            host_operation_state = OPERATION_STATE.COMPLETED
            DLOG.info(reason)

        host_operation.update_instance(instance.uuid, host_operation_state)

        if host_operation.operation_type in [OPERATION_TYPE.STOP_INSTANCES,
                                             OPERATION_TYPE.HOST_LOCK]:
            if OPERATION_STATE.COMPLETED != host_operation_state:
                host_operation.update_failure_reason(reason)
                host_director.host_instances_stopped(host, host_operation)
                host_operation = self._host_operations.get(host.name, None)
                if host_operation is not None:
                    del self._host_operations[host.name]
                return
        else:
            if OPERATION_STATE.COMPLETED != host_operation_state:
                # Fail the instance because we are going to proceed with the
                # operation regardless. Don't fail the instance if the stop
                # operation was cancelled - this is a force lock and we
                # fail instances that have operations in progress. Doing
                # another fail here will cause a loop because the
                # instance.is_failed() will not be true until nova has
                # reported the updated state to us.
                if not instance.is_failed() and not cancelled:
                    instance.fail()

        # Check if host operation is complete
        if host_operation.is_inprogress():
            host.notify_instance_stopped()
        else:
            host_director.host_instances_stopped(host, host_operation)
            host_operation = self._host_operations.get(host.name, None)
            if host_operation is not None:
                del self._host_operations[host.name]

    def instance_start_complete(self, instance, on_host_name, failed=False,
                                timed_out=False, cancelled=False):
        """
        Instance Start Complete
        """
        host_operation = self._host_operations.get(on_host_name, None)
        if host_operation is None:
            DLOG.verbose("No host %s operation inprogress." % on_host_name)
            return

        if host_operation.operation_type not in [
                OPERATION_TYPE.START_INSTANCES,
                OPERATION_TYPE.START_INSTANCES_SERIAL]:
            DLOG.verbose("Unexpected host %s operation %s, ignoring."
                         % (on_host_name, host_operation.operation_type))
            return

        host_table = tables.tables_get_host_table()
        host = host_table.get(on_host_name, None)
        if host is None:
            DLOG.verbose("Host %s does not exist." % on_host_name)
            return

        if failed:
            reason = ("Start of instance %s on host %s failed."
                      % (instance.name, on_host_name))
            host_operation_state = OPERATION_STATE.FAILED
            DLOG.info(reason)

        elif timed_out:
            reason = ("Start of instance %s on host %s timed out."
                      % (instance.name, on_host_name))
            host_operation_state = OPERATION_STATE.TIMED_OUT
            DLOG.info(reason)

        elif cancelled:
            reason = ("Start of instance %s on host %s cancelled."
                      % (instance.name, on_host_name))
            host_operation_state = OPERATION_STATE.CANCELLED
            DLOG.info(reason)

        else:
            reason = ("Start of instance %s on host %s succeeded."
                      % (instance.name, on_host_name))
            host_operation_state = OPERATION_STATE.COMPLETED
            DLOG.info(reason)

        host_operation.update_instance(instance.uuid, host_operation_state)

        if OPERATION_STATE.COMPLETED != host_operation_state:
            host_operation.update_failure_reason(reason)

        if OPERATION_TYPE.START_INSTANCES_SERIAL == \
                host_operation.operation_type:
            # Check if there is another instance on this host ready to start.
            # We continue starting instances even if the previous instance
            # failed to start.
            instance_table = tables.tables_get_instance_table()
            for instance in instance_table.on_host(host.name):
                if host_operation.instance_ready(instance.uuid):
                    host_operation.update_instance(instance.uuid,
                                                   OPERATION_STATE.INPROGRESS)
                    instance.do_action(
                        objects.INSTANCE_ACTION_TYPE.START,
                        initiated_by=objects.INSTANCE_ACTION_INITIATED_BY.DIRECTOR,
                        reason="start instances serial issued")
                    return

        # Check if host operation is complete
        if not host_operation.is_inprogress():
            host_operation = self._host_operations.get(host.name, None)
            if host_operation is not None:
                del self._host_operations[host.name]

    def _host_disabling_okay(self, host, host_operation):
        """
        Host Disabling Semantic checks
        """
        if OPERATION_TYPE.HOST_LOCK != host_operation.operation_type:
            return True

        instance_table = tables.tables_get_instance_table()
        for instance in instance_table.on_host(host.name):
            if instance.is_deleting() or instance.is_deleted() or \
                    instance.is_locked() or instance.is_failed():
                continue

            if self._is_hypervisor_enabled(host.name):
                if self._single_hypervisor:
                    # Only one hypervisor so instances will be stopped
                    operation = objects.INSTANCE_ACTION_TYPE.STOP
                else:
                    # Default behaviour is to cold migrate instance
                    operation = objects.INSTANCE_ACTION_TYPE.COLD_MIGRATE

                    # Unless live migration is supported...
                    if instance.is_enabled() or instance.is_paused():
                        if instance.supports_live_migration():
                            operation = objects.INSTANCE_ACTION_TYPE.LIVE_MIGRATE
            else:
                operation = objects.INSTANCE_ACTION_TYPE.EVACUATE

            if instance.is_paused() and \
                    objects.INSTANCE_ACTION_TYPE.LIVE_MIGRATE != operation:
                reason = ("Lock of host %s rejected because instance %s "
                          "is paused." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return False
            elif instance.is_suspended():
                reason = ("Lock of host %s rejected because instance %s "
                          "is suspended." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return False
            elif instance.is_migrating() or instance.is_cold_migrating():
                reason = ("Lock of host %s rejected because instance %s "
                          "is migrating." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return False
            elif instance.is_rebuilding():
                reason = ("Lock of host %s rejected because instance %s "
                          "is rebuilding." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return False
            elif instance.is_action_running():
                # Nova will not migrate or evacuate an instance if an action
                # is already running.
                reason = ("Lock of host %s rejected because instance %s "
                          "action in progress." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return False
            elif instance.is_resized():
                # Nova will not migrate or evacuate an instance if is resized
                # and waiting for confirmation.
                reason = ("Lock of host %s rejected because instance %s "
                          "is resizing." % (host.name, instance.name))
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return False
            elif not self._single_hypervisor and \
                    not self._hypervisors_available(min_count=1,
                                                    excluded_hosts=[host.name]):
                reason = ("Lock of host %s rejected because there are no "
                          "other hypervisors available." % host.name)
                DLOG.info(reason)
                host_operation.add_instance(instance.uuid,
                                            OPERATION_STATE.FAILED)
                host_operation.update_failure_reason(reason)
                return False
            else:
                if objects.INSTANCE_ACTION_TYPE.LIVE_MIGRATE == operation:
                    if not instance.can_live_migrate(system_initiated=True):
                        reason = ("Lock of host %s rejected because instance %s "
                                  "can't be live-migrated by the system.  "
                                  "Manually move the instance off of host %s."
                                  % (host.name, instance.name, host.name))
                        DLOG.info(reason)
                        host_operation.add_instance(instance.uuid,
                                                    OPERATION_STATE.FAILED)
                        host_operation.update_failure_reason(reason)
                        return False
                elif objects.INSTANCE_ACTION_TYPE.COLD_MIGRATE == operation:
                    if not instance.can_cold_migrate(system_initiated=True):
                        reason = ("Lock of host %s rejected because instance %s "
                                  "can't be cold-migrated by the system.  "
                                  "Manually move the instance off of host %s."
                                  % (host.name, instance.name, host.name))
                        DLOG.info(reason)
                        host_operation.add_instance(instance.uuid,
                                                    OPERATION_STATE.FAILED)
                        host_operation.update_failure_reason(reason)
                        return False
                elif objects.INSTANCE_ACTION_TYPE.EVACUATE == operation:
                    if not instance.can_evacuate(system_initiated=True):
                        reason = ("Lock of host %s rejected because instance %s "
                                  "can't be evacuated by the system.  Manually "
                                  "move the instance off of host %s."
                                  % (host.name, instance.name, host.name))
                        DLOG.info(reason)
                        host_operation.add_instance(instance.uuid,
                                                    OPERATION_STATE.FAILED)
                        host_operation.update_failure_reason(reason)
                        return False

        return True

    def host_operation_cancel(self, host_name):
        """
        Host Operation Cancel
        """
        host_operation = self._host_operations.get(host_name, None)
        if host_operation is not None:
            DLOG.info("Canceling host operation %s for host %s."
                      % (host_operation.operation_type, host_name))
            del self._host_operations[host_name]

    def host_services_disabling(self, host):
        """
        Host Services Disabling
        """
        DLOG.info("Host %s services disabling." % host.name)

        if host.is_force_lock():
            host_operation_type = OPERATION_TYPE.HOST_LOCK_FORCE
        elif host.is_locking():
            host_operation_type = OPERATION_TYPE.HOST_LOCK
        elif host.is_failed():
            host_operation_type = OPERATION_TYPE.HOST_FAILED
        else:
            host_operation_type = OPERATION_TYPE.HOST_DISABLE

        host_operation = self._host_operations.get(host.name, None)
        if host_operation is not None:
            DLOG.debug("Canceling previous host operation %s, before "
                       "continuing with host operation %s for %s."
                       % (host_operation.operation_type, host_operation_type,
                          host.name))
            del self._host_operations[host.name]

        host_operation = Operation(host_operation_type)

        DLOG.verbose("Host %s operation %s inprogress."
                     % (host.name, host_operation_type))

        if self._host_disabling_okay(host, host_operation):
            self._host_operations[host.name] = host_operation
            if self._single_hypervisor:
                # In single hypervisor configurations, we stop the instances
                # before disabling the host services.
                instance_table = tables.tables_get_instance_table()
                instance_uuids = list()

                for instance in instance_table.on_host(host.name):
                    # Stop any running instances.
                    if not instance.is_locked():
                        instance_uuids.append(instance.uuid)
                        # Instance should be unlocked when hypervisor recovers
                        instance.unlock_to_recover = True
                if instance_uuids:
                    self._host_stop_instances(host, host_operation, instance_uuids)
            elif self._is_hypervisor_enabled(host.name):
                # Migrate instances from this host before stopping host services.
                self._host_migrate_instances(host, host_operation)
            else:
                # Evacuate instances from this host before stopping host services.
                self._host_evacuate_instances(host, host_operation)

        return host_operation

    def host_services_disabled(self, host):
        """
        Host Services Disabled
        """
        DLOG.info("Host %s services disabled." % host.name)

        if host.is_force_lock():
            host_operation_type = OPERATION_TYPE.HOST_LOCK_FORCE
        elif host.is_locking():
            host_operation_type = OPERATION_TYPE.HOST_LOCK
        elif host.is_failed():
            host_operation_type = OPERATION_TYPE.HOST_FAILED
        else:
            host_operation_type = OPERATION_TYPE.HOST_DISABLE

        host_operation = self._host_operations.get(host.name, None)
        if host_operation is not None:
            DLOG.debug("Canceling previous host operation %s, before "
                       "continuing with host operation %s for %s."
                       % (host_operation.operation_type, host_operation_type,
                          host.name))
            del self._host_operations[host.name]

        host_operation = Operation(host_operation_type)

        DLOG.verbose("Host %s operation %s inprogress."
                     % (host.name, host_operation_type))

        self._host_operations[host.name] = host_operation
        # Do not evacuate instances from this host if we are in a single
        # hypervisor configuration.
        if not self._single_hypervisor:
            self._host_evacuate_instances(host, host_operation)
        return host_operation

    def host_disabled(self, host):
        """
        Host Disabled
        """
        DLOG.info("Host %s disabled." % host.name)

        host_operation = self._host_operations.get(host.name, None)
        if host_operation is not None:
            DLOG.debug("Canceling host operation %s for %s."
                       % (host_operation.operation_type, host.name))
            del self._host_operations[host.name]

    @staticmethod
    def host_offline(host):
        """
        Host Offline
        """
        instance_table = tables.tables_get_instance_table()
        for instance in instance_table.on_host(host.name):
            if instance.is_deleting() or instance.is_deleted():
                continue

            DLOG.info("Host %s is offline, notifying instance %s."
                      % (host.name, instance.name))
            instance.host_offline()

    def host_audit(self, host):
        """
        Host Audit
        """
        if not dor.system_is_stabilized():
            DLOG.info("DOR system stabilization is not complete, can't audit "
                      "instances on host %s." % host.name)
            return

        if self.upgrade_inprogress():
            DLOG.info("Upgrade inprogress, can't audit instances on host %s."
                      % host.name)
            return

        host_operation = self._host_operations.get(host.name, None)
        if host_operation is not None:
            DLOG.debug("Host operation %s for %s inprogress, can't audit "
                       "instances." % (host_operation.operation_type,
                                       host.name))
            return

        if not host.nfvi_host_is_enabled() or host.is_failed() or host.is_offline():
            instance_table = tables.tables_get_instance_table()
            for instance in instance_table.on_host(host.name):
                if instance.is_deleting() or instance.is_deleted() or \
                        instance.is_locked() or instance.is_failed():
                    continue

                DLOG.info("Host %s is failed or offline, setting instance %s "
                          "to failed, host audit." % (host.name, instance.name))
                instance.fail()

    @staticmethod
    def host_has_instances(host, skip_stopped=False):
        """
        Returns true if the given host has instances
        """
        count = 0
        instance_table = tables.tables_get_instance_table()
        for instance in instance_table.on_host(host.name):
            if instance.is_deleting() or instance.is_deleted():
                continue

            if skip_stopped and instance.is_locked():
                continue

            count += 1

        DLOG.info("Host %s has %s instances." % (host.name, count))
        return 0 < count

    @coroutine
    def _instance_create_callback(self, instance_name, callback):
        """
        Instance Create Callback
        """
        response = (yield)
        DLOG.verbose("Instance-Create callback response=%s." % response)
        if response['completed']:
            nfvi_instance = response['result-data']
            instance_table = tables.tables_get_instance_table()
            instance = instance_table.get(nfvi_instance.uuid, None)
            if instance is None:
                instance = objects.Instance(nfvi_instance)
                instance_table[instance.uuid] = instance
            instance.nfvi_instance_update(nfvi_instance)
            callback(response['completed'], instance_name, instance.uuid)
        else:
            callback(response['completed'], instance_name, None)

    @coroutine
    def _instance_type_create_callback(self, instance_name, instance_type_uuid,
                                       image_uuid, block_devices, networks,
                                       callback):
        """
        Instance-Type Create Callback
        """
        response = (yield)
        DLOG.verbose("Instance-Type-Create callback response=%s." % response)
        if response['completed']:
            nfvi_instance_type = response['result-data']
            instance_type_table = tables.tables_get_instance_type_table()
            instance_type_table[nfvi_instance_type.uuid] = nfvi_instance_type

            instance_create_callback = self._instance_create_callback(
                instance_name, callback)

            nfvi.nfvi_create_instance(instance_name, instance_type_uuid,
                                      image_uuid, block_devices, networks,
                                      instance_create_callback)
        else:
            callback(response['completed'], instance_name, None)

    def create_instance(self, instance_name, instance_type_uuid, vcpus,
                        mem_mb, disk_gb, ephemeral_gb, swap_gb, image_uuid,
                        block_devices, networks, auto_recovery,
                        live_migration_timeout, live_migration_max_downtime,
                        callback):
        """
        Create an instance
        """
        instance_type_create_callback = self._instance_type_create_callback(
            instance_name, instance_type_uuid, image_uuid, block_devices,
            networks, callback)

        instance_type_name = "%s-type" % instance_name
        instance_type_attributes = \
            nfvi.objects.v1.InstanceTypeAttributes(
                vcpus, mem_mb, disk_gb, ephemeral_gb, swap_gb, None, auto_recovery,
                live_migration_timeout, live_migration_max_downtime,
                nfvi.objects.v1.STORAGE_TYPE.LOCAL_LVM_BACKED)
        nfvi.nfvi_create_instance_type(instance_type_uuid, instance_type_name,
                                       instance_type_attributes,
                                       instance_type_create_callback)

    @staticmethod
    def delete_instance(instance):
        """
        Delete an instance
        """
        DLOG.info("Instance %s delete requested." % instance.uuid)
        instance.do_action(objects.INSTANCE_ACTION_TYPE.DELETE)

    @staticmethod
    def instance_audit(instance):
        """
        Notifies the instance director that an instance audit is inprogress
        """
        from nfv_vim import directors

        DLOG.verbose("Notify other directors that an instance %s audit is "
                     "inprogress." % instance.name)

        sw_mgmt_director = directors.get_sw_mgmt_director()
        sw_mgmt_director.host_audit(instance)

    @staticmethod
    def instance_state_change_notify(instance):
        """
        Notifies the instance director that a instance has changed state
        """
        from nfv_vim import directors

        DLOG.info("Instance %s state change notification." % instance.name)

        sw_mgmt_director = directors.get_sw_mgmt_director()
        sw_mgmt_director.instance_state_change(instance)

    def instance_recovered(self, instance):
        """
        Instance has signalled that it has recovered
        """
        DLOG.info("Instance %s has recovered on host %s."
                  % (instance.name, instance.host_name))
        self._reboot_count[instance.uuid] = 0

    def recover_instance(self, instance, recovery_method=None, force_fail=False,
                         fail_reason=None):
        """
        Recover an instance
        """
        if not dor.system_is_stabilized():
            DLOG.info("DOR system stabilization is not complete, can't "
                      "recover instance %s." % instance.name)
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            return False

        if self.upgrade_inprogress():
            DLOG.info("Upgrade inprogress, can't recover instance %s."
                      % instance.name)
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            return False

        if instance.uuid not in self._reboot_count:
            self._reboot_count[instance.uuid] = 0

        if not dor.dor_is_complete():
            self._reboot_count[instance.uuid] = 0

        method = recovery_method

        if method is None:
            if self._is_host_enabled(instance.host_name) and \
                    self._is_hypervisor_enabled(instance.host_name):
                # Evacuates are indicated by the instance is rebuilding state
                if instance.is_rebuilding():
                    force_fail = True
                    instance.cancel_action(objects.INSTANCE_ACTION_TYPE.REBUILD)
                    if instance.image_uuid is not None:
                        method = objects.INSTANCE_ACTION_TYPE.REBUILD
                    else:
                        method = objects.INSTANCE_ACTION_TYPE.REBOOT

                elif instance.is_migrating():
                    force_fail = True
                    instance.cancel_action(
                        objects.INSTANCE_ACTION_TYPE.LIVE_MIGRATE)
                    if instance.image_uuid is not None:
                        method = objects.INSTANCE_ACTION_TYPE.REBUILD
                    else:
                        method = objects.INSTANCE_ACTION_TYPE.REBOOT
                else:
                    if instance.is_rebooting():
                        force_fail = True
                        instance.cancel_action(objects.INSTANCE_ACTION_TYPE.REBOOT)

                    # Escalate to rebuild if last reboot didn't work
                    if self._reboot_count[instance.uuid] > 0:
                        if instance.image_uuid is not None:
                            method = objects.INSTANCE_ACTION_TYPE.REBUILD
                        else:
                            method = objects.INSTANCE_ACTION_TYPE.REBOOT
                    else:
                        method = objects.INSTANCE_ACTION_TYPE.REBOOT

            elif instance.can_evacuate(system_initiated=True):
                if dor.dor_is_complete():
                    method = objects.INSTANCE_ACTION_TYPE.EVACUATE
                    if instance.is_rebooting():
                        force_fail = True
                        instance.cancel_action(objects.INSTANCE_ACTION_TYPE.REBOOT)
                else:
                    host_table = tables.tables_get_host_table()
                    host = host_table.get(instance.host_name, None)
                    if host is None:
                        method = objects.INSTANCE_ACTION_TYPE.EVACUATE
                        if instance.is_rebooting():
                            force_fail = True
                            instance.cancel_action(
                                objects.INSTANCE_ACTION_TYPE.REBOOT)
            else:
                DLOG.info("Instance %s can't be evacuated by the system."
                          % instance.name)
                if not instance.is_failed() or force_fail:
                    instance.fail(fail_reason)

        if method is not None:
            DLOG.info("Attempt recovery of instance %s by %s, "
                      "uuid=%s, host_name=%s, admin_state=%s, "
                      "oper_state=%s, avail_status=%s, action=%s, "
                      "elapse_time_in_state=%s secs."
                      % (instance.name, method, instance.uuid,
                         instance.host_name, instance.admin_state,
                         instance.oper_state, instance.avail_status,
                         instance.action, instance.elapsed_time_in_state))

            if not instance.is_failed() or force_fail:
                instance.fail(fail_reason)

            if not instance.auto_recovery:
                DLOG.info("Recovery of instance %s by %s is skipped since "
                          "auto-recovery is turned off." % (instance.name, method))
                return False

            if not instance.recoverable:
                DLOG.info("Instance %s by %s is skipped since instance is not "
                          "recoverable." % (instance.name, method))
                return False

            initiated_by = objects.INSTANCE_ACTION_INITIATED_BY.DIRECTOR

            if objects.INSTANCE_ACTION_TYPE.REBOOT == method:
                self._reboot_count[instance.uuid] += 1
                instance.do_action(method, initiated_by=initiated_by)

            elif objects.INSTANCE_ACTION_TYPE.REBUILD == method:
                self._reboot_count[instance.uuid] = 0
                instance.do_action(method, initiated_by=initiated_by)

            elif objects.INSTANCE_ACTION_TYPE.EVACUATE == method:
                self._reboot_count[instance.uuid] = 0
                instance.do_action(method, initiated_by=initiated_by)

            elif objects.INSTANCE_ACTION_TYPE.STOP == method:
                self._reboot_count[instance.uuid] = 0
                instance.do_action(method, initiated_by=initiated_by)

        return method is not None

    def reschedule_audit_instances(self, interval=None):
        """
        Reschedule audit instances
        """
        if interval is None:
            interval = self._next_audit_interval

        if self._timer_audit_instances is not None:
            timers.timers_reschedule_timer(self._timer_audit_instances,
                                           interval)
            DLOG.verbose("Recovery audit is rescheduled to %s second "
                         "intervals." % interval)

    def recover_instances(self, audit=False):
        """
        Recover instances that were previously launched but are currently
        failed or executing an action for a very long time
        """
        if not dor.system_is_stabilized():
            DLOG.info("DOR system stabilization is not complete, can't recover "
                      "instances.")
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            return

        if self.upgrade_inprogress():
            DLOG.info("Upgrade inprogress, can't recover instances.")
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            return

        if not self._hypervisors_available(min_count=1):
            DLOG.info("No hypervisors available, can't recover instances.")
            self.reschedule_audit_instances(self._recovery_audit_cooldown)
            return

        if not audit:
            interval = self._recovery_audit_cooldown
            if 0 == len(self._instance_recovery_list):
                if self._next_audit_interval == self._recovery_audit_interval:
                    self.reschedule_audit_instances(interval)
            self._next_audit_interval = interval
            return

        if 0 == len(self._instance_recovery_list):
            (self._next_audit_interval, self._instance_recovery_list,
             self._instance_failed_list, self._instance_rebuilding_list,
             self._instance_migrating_list, self._instance_rebooting_list) \
                = self._get_instance_recovery_list()
            DLOG.info("Running recovery audit, instances_to_recover=%s, "
                      "instances_failed=%s, instances_rebuilding=%s, "
                      "instances_migrating=%s, instances_rebooting=%s."
                      % (len(self._instance_recovery_list),
                         len(self._instance_failed_list),
                         len(self._instance_rebuilding_list),
                         len(self._instance_migrating_list),
                         len(self._instance_rebooting_list)))
        else:
            DLOG.info("Running recovery audit, instances_remaining=%s."
                      % len(self._instance_recovery_list))

        # Attempt to recover instances, whether resources are available or not
        instance_table = tables.tables_get_instance_table()

        count = 0
        # Use a lower cutoff when there are a large number of instances to recover
        cutoff = self._max_concurrent_recovering_instances
        if len(self._instance_recovery_list) > self._recovery_threshold:
            cutoff = self._max_throttled_recovering_instances
        for instance_recover in list(self._instance_recovery_list):
            instance = instance_table.get(instance_recover.uuid, None)
            if instance is not None:
                host_operation_inprogress = False
                host_operation = self._host_operations.get(instance.host_name, None)
                if host_operation is not None:
                    if host_operation.is_inprogress():
                        DLOG.debug("Skip recovery of instance %s, host %s "
                                   "operation inprogress." % (instance.name,
                                                              instance.host_name))
                        host_operation_inprogress = True

                if not host_operation_inprogress:
                    if not (instance.is_deleting() or instance.is_deleted() or
                            instance.is_locked()) and instance.is_failed():
                        if self._instance_recovery_allowed(instance):
                            if self.recover_instance(instance):
                                count += 1

            self._instance_recovery_list.remove(instance_recover)
            if count >= cutoff:
                break

        if 0 == len(self._instance_recovery_list):
            self.unlock_instances()
            self.reschedule_audit_instances()
            DLOG.info("Completed recovery audit cycle.")
        else:
            self.reschedule_audit_instances(self._recovery_audit_batch_interval)
            DLOG.info("Completed recovery audit batch.")

    def unlock_instances(self):
        """
        Unlock (start) instances that were locked (stopped) when a single
        hypervisor configuration had its hypervisor disabled. This should only
        be done after all unlocked instances have been recovered.
        """
        if not os.path.exists(NFV_VIM_UNLOCK_COMPLETE_FILE):
            if self._single_hypervisor:
                DLOG.info("Unlocking instances after hypervisor enabled")
                instance_table = tables.tables_get_instance_table()
                instance_uuids = list()

                for instance in instance_table.itervalues():
                    if instance.unlock_to_recover and instance.is_locked():
                        instance_uuids.append(instance.uuid)
                    instance.unlock_to_recover = False

                if instance_uuids:
                    self.start_instances(instance_uuids, serial=True)

            # Do not attempt to do the unlock again.
            open(NFV_VIM_UNLOCK_COMPLETE_FILE, 'w').close()

    @coroutine
    def audit_instances(self):
        """
        Audit Instances
        """
        while True:
            (yield)
            self.recover_instances(audit=True)

    def cleanup_instance(self, instance_uuid):
        """
        Cleanup an instance
        """
        if instance_uuid not in self._instance_cleanup_list:
            self._instance_cleanup_list.append(instance_uuid)

        if self._timer_cleanup_instances is None:
            self._timer_cleanup_instances = timers.timers_create_timer(
                "cleanup-instances", 1, 1, self.cleanup_instances)

    @coroutine
    def cleanup_instances(self):
        """
        Cleanup Instances
        """
        (yield)
        trigger_recovery = False
        instance_table = tables.tables_get_instance_table()

        for instance_uuid in self._instance_cleanup_list:
            instance = instance_table.get(instance_uuid, None)
            if instance is not None:
                if instance.is_deleted():
                    DLOG.info("Cleaned up instance %s" % instance.name)
                    del instance_table[instance_uuid]
                    trigger_recovery = True

        self._instance_cleanup_list[:] = list()
        self._timer_cleanup_instances = None

        if trigger_recovery:
            DLOG.info("Recover-Instances-Audit triggered by instance deletion.")
            self.recover_instances()

    def migrate_instances(self, instance_uuids):
        """
        Migrate Instances
        """
        DLOG.info("Migrate instances uuids=%s." % instance_uuids)

        host_table = tables.tables_get_host_table()
        instance_table = tables.tables_get_instance_table()

        overall_operation = Operation(OPERATION_TYPE.MIGRATE_INSTANCES)

        host_operations = dict()
        for instance_uuid in instance_uuids:
            instance = instance_table.get(instance_uuid, None)
            if instance is None:
                reason = "Instance %s does not exist." % instance_uuid
                DLOG.info(reason)
                overall_operation.set_failed(reason)
                return overall_operation

            host = host_table.get(instance.host_name, None)
            if host is None:
                reason = "Host %s does not exist." % instance.host_name
                DLOG.info(reason)
                overall_operation.set_failed(reason)
                return overall_operation

            host_operation = self._host_operations.get(instance.host_name, None)
            if host_operation is not None:
                if host_operation.is_inprogress():
                    reason = ("Another host operation %s is already inprogress "
                              "for host %s." % (host_operation.operation_type,
                                                instance.host_name))
                    DLOG.info(reason)
                    overall_operation.set_failed(reason)
                    return overall_operation
                else:
                    del self._host_operations[instance.host_name]

            host_operation = host_operations.get(instance.host_name, None)
            if host_operation is None:
                host_operation = Operation(OPERATION_TYPE.MIGRATE_INSTANCES)
                host_operations[instance.host_name] = host_operation

        for host_name, host_operation in host_operations.iteritems():
            self._host_operations[host_name] = host_operation
            self._host_migrate_instances(host_table[host_name], host_operation)
            if host_operation.is_inprogress():
                overall_operation.add_host(host_name, OPERATION_STATE.INPROGRESS)
            elif host_operation.is_failed():
                overall_operation.add_host(host_name, OPERATION_STATE.FAILED)
                overall_operation.update_failure_reason(host_operation.reason)
                break
            elif host_operation.is_timed_out():
                overall_operation.add_host(host_name, OPERATION_STATE.TIMED_OUT)
                overall_operation.update_failure_reason(host_operation.reason)
                break
            else:
                overall_operation.add_host(host_name, OPERATION_STATE.COMPLETED)

        return overall_operation

    def stop_instances(self, instance_uuids):
        """
        Stop Instances
        """
        DLOG.info("Stop instances uuids=%s." % instance_uuids)

        host_table = tables.tables_get_host_table()
        instance_table = tables.tables_get_instance_table()

        overall_operation = Operation(OPERATION_TYPE.STOP_INSTANCES)

        host_operations = dict()
        for instance_uuid in instance_uuids:
            instance = instance_table.get(instance_uuid, None)
            if instance is None:
                reason = "Instance %s does not exist." % instance_uuid
                DLOG.info(reason)
                overall_operation.set_failed(reason)
                return overall_operation

            host = host_table.get(instance.host_name, None)
            if host is None:
                reason = "Host %s does not exist." % instance.host_name
                DLOG.info(reason)
                overall_operation.set_failed(reason)
                return overall_operation

            host_operation = self._host_operations.get(instance.host_name, None)
            if host_operation is not None:
                if host_operation.is_inprogress():
                    reason = ("Another host operation %s is already inprogress "
                              "for host %s." % (host_operation.operation_type,
                                                instance.host_name))
                    DLOG.info(reason)
                    overall_operation.set_failed(reason)
                    return overall_operation
                else:
                    del self._host_operations[instance.host_name]

            host_operation = host_operations.get(instance.host_name, None)
            if host_operation is None:
                host_operation = Operation(OPERATION_TYPE.STOP_INSTANCES)
                host_operations[instance.host_name] = host_operation

        for host_name, host_operation in host_operations.iteritems():
            self._host_operations[host_name] = host_operation
            self._host_stop_instances(host_table[host_name], host_operation,
                                      instance_uuids)
            if host_operation.is_inprogress():
                overall_operation.add_host(host_name, OPERATION_STATE.INPROGRESS)
            elif host_operation.is_failed():
                overall_operation.add_host(host_name, OPERATION_STATE.FAILED)
                overall_operation.update_failure_reason(host_operation.reason)
                break
            elif host_operation.is_timed_out():
                overall_operation.add_host(host_name, OPERATION_STATE.TIMED_OUT)
                overall_operation.update_failure_reason(host_operation.reason)
                break
            else:
                overall_operation.add_host(host_name, OPERATION_STATE.COMPLETED)

        return overall_operation

    def start_instances(self, instance_uuids, serial=False):
        """
        Start Instances
        """
        DLOG.info("Start instances uuids=%s." % instance_uuids)

        host_table = tables.tables_get_host_table()
        instance_table = tables.tables_get_instance_table()

        if serial:
            operation_type = OPERATION_TYPE.START_INSTANCES_SERIAL
        else:
            operation_type = OPERATION_TYPE.START_INSTANCES

        overall_operation = Operation(operation_type)

        host_operations = dict()
        for instance_uuid in instance_uuids:
            instance = instance_table.get(instance_uuid, None)
            if instance is None:
                reason = "Instance %s does not exist." % instance_uuid
                DLOG.info(reason)
                overall_operation.set_failed(reason)
                return overall_operation

            host = host_table.get(instance.host_name, None)
            if host is None:
                reason = "Host %s does not exist." % instance.host_name
                DLOG.info(reason)
                overall_operation.set_failed(reason)
                return overall_operation

            host_operation = self._host_operations.get(instance.host_name, None)
            if host_operation is not None:
                if host_operation.is_inprogress():
                    reason = ("Another host operation %s is already inprogress "
                              "for host %s." % (host_operation.operation_type,
                                                instance.host_name))
                    DLOG.info(reason)
                    overall_operation.set_failed(reason)
                    return overall_operation
                else:
                    del self._host_operations[instance.host_name]

            host_operation = host_operations.get(instance.host_name, None)
            if host_operation is None:
                host_operation = Operation(operation_type)
                host_operations[instance.host_name] = host_operation

        for host_name, host_operation in host_operations.iteritems():
            self._host_operations[host_name] = host_operation
            self._host_start_instances(host_table[host_name], host_operation,
                                       instance_uuids)
            if host_operation.is_inprogress():
                overall_operation.add_host(host_name, OPERATION_STATE.INPROGRESS)
            elif host_operation.is_failed():
                overall_operation.add_host(host_name, OPERATION_STATE.FAILED)
                overall_operation.update_failure_reason(host_operation.reason)
                break
            elif host_operation.is_timed_out():
                overall_operation.add_host(host_name, OPERATION_STATE.TIMED_OUT)
                overall_operation.update_failure_reason(host_operation.reason)
                break
            else:
                overall_operation.add_host(host_name, OPERATION_STATE.COMPLETED)

        return overall_operation


def get_instance_director():
    """
    Returns the Instance Director
    """
    return _instance_director


def instance_director_initialize():
    """
    Initialize Instance Director
    """
    global _instance_director

    if config.section_exists('instance-configuration'):
        section = config.CONF['instance-configuration']
        max_concurrent_recovering_instances \
            = int(section.get('max_concurrent_recovering_instances', 4))
        max_concurrent_migrates_per_host \
            = int(section.get('max_concurrent_migrates_per_host', 1))
        max_concurrent_evacuates_per_host \
            = int(section.get('max_concurrent_evacuates_per_host', 1))
        recovery_audit_interval \
            = int(section.get('recovery_audit_interval', 330))
        recovery_audit_cooldown \
            = int(section.get('recovery_audit_cooldown', 30))
        recovery_audit_batch_interval \
            = int(section.get('recovery_audit_batch_interval', 2))
        recovery_cooldown \
            = int(section.get('recovery_cooldown', 30))
        rebuild_timeout \
            = int(section.get('rebuild_timeout', 900))
        reboot_timeout \
            = int(section.get('reboot_timeout', 300))
        migrate_timeout \
            = int(section.get('migrate_timeout', 960))
        single_hypervisor \
            = (section.get('single_hypervisor', 'false').lower() == 'true')
        recovery_threshold = int(section.get('recovery_threshold', 250))
        max_throttled_recovering_instances \
            = int(section.get('max_throttled_recovering_instances', 2))

    else:
        max_concurrent_recovering_instances = 4
        max_concurrent_migrates_per_host = 1
        max_concurrent_evacuates_per_host = 1
        recovery_audit_interval = 330
        recovery_audit_cooldown = 30
        recovery_audit_batch_interval = 2
        recovery_cooldown = 30
        rebuild_timeout = 900
        reboot_timeout = 300
        migrate_timeout = 960
        single_hypervisor = False
        recovery_threshold = 250
        max_throttled_recovering_instances = 2

    _instance_director = InstanceDirector(
        max_concurrent_recovering_instances,
        max_concurrent_migrates_per_host,
        max_concurrent_evacuates_per_host,
        recovery_audit_interval,
        recovery_audit_cooldown,
        recovery_audit_batch_interval,
        recovery_cooldown,
        rebuild_timeout,
        reboot_timeout,
        migrate_timeout,
        single_hypervisor,
        recovery_threshold,
        max_throttled_recovering_instances)


def instance_director_finalize():
    """
    Finalize Instance Director
    """
    pass