distcloud/dcmanager/manager/subcloud_manager.py

591 lines
25 KiB
Python

# Copyright 2017 Ericsson AB.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2017-2018 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
import filecmp
import netaddr
import os
from oslo_log import log as logging
from oslo_messaging import RemoteError
from tsconfig.tsconfig import CONFIG_PATH
from tsconfig.tsconfig import SW_VERSION
from dcorch.common import consts as dcorch_consts
from dcorch.drivers.openstack.keystone_v3 import KeystoneClient
from dcorch.rpc import client as dcorch_rpc_client
from dcmanager.common import consts
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import manager
from dcmanager.db import api as db_api
from dcmanager.drivers.openstack.sysinv_v1 import SysinvClient
from fm_api import constants as fm_const
from fm_api import fm_api
LOG = logging.getLogger(__name__)
# Name of our distributed cloud addn_hosts file for dnsmasq
# to read. This file is referenced in dnsmasq.conf
ADDN_HOSTS_DC = 'dnsmasq.addn_hosts_dc'
class SubcloudManager(manager.Manager):
"""Manages tasks related to subclouds."""
def __init__(self, *args, **kwargs):
LOG.debug(_('SubcloudManager initialization...'))
super(SubcloudManager, self).__init__(service_name="subcloud_manager",
*args, **kwargs)
self.context = context.get_admin_context()
self.dcorch_rpc_client = dcorch_rpc_client.EngineClient()
self.fm_api = fm_api.FaultAPIs()
def add_subcloud(self, context, payload):
"""Add subcloud and notify orchestrators.
:param context: request context object
:param name: name of subcloud to add
:param payload: subcloud configuration
"""
LOG.info("Adding subcloud %s." % payload['name'])
try:
subcloud = db_api.subcloud_get_by_name(context, payload['name'])
except exceptions.SubcloudNameNotFound:
pass
else:
raise exceptions.BadRequest(
resource='subcloud',
msg='Subcloud with that name already exists')
# Subcloud is added with software version that matches system
# controller.
software_version = SW_VERSION
try:
subcloud = db_api.subcloud_create(
context,
payload['name'],
payload.get('description'),
payload.get('location'),
software_version,
payload['management-subnet'],
payload['management-gateway-ip'],
payload['management-start-ip'],
payload['management-end-ip'],
payload['systemcontroller-gateway-ip'])
except Exception as e:
LOG.exception(e)
raise e
# Populate the subcloud status table with all endpoints
for endpoint in dcorch_consts.ENDPOINT_TYPES_LIST:
db_api.subcloud_status_create(context,
subcloud.id,
endpoint)
try:
# Create a new route to this subcloud on the management interface
# on both controllers.
m_ks_client = KeystoneClient()
subcloud_subnet = netaddr.IPNetwork(payload['management-subnet'])
session = m_ks_client.endpoint_cache.get_session_from_token(
context.auth_token, context.project)
sysinv_client = SysinvClient(consts.DEFAULT_REGION_NAME, session)
controllers = sysinv_client.get_controller_hosts()
for controller in controllers:
management_interface = sysinv_client.get_management_interface(
controller.hostname)
if management_interface is not None:
sysinv_client.create_route(
management_interface.uuid,
str(subcloud_subnet.ip),
subcloud_subnet.prefixlen,
payload['systemcontroller-gateway-ip'],
1)
# Create identity endpoints to this subcloud on the
# management-start-ip of the subcloud which will be allocated
# as the floating Management IP of the Subcloud if the
# Address Pool is not shared. Incase the endpoint entry
# is incorrect, or the management IP of the subcloud is changed
# in the future, it will not go managed or will show up as
# out of sync. To fix this use Openstack endpoint commands
# on the SystemController to change the subcloud endpoint
ks_service_id = None
for service in m_ks_client.services_list:
if service.type == dcorch_consts.ENDPOINT_TYPE_IDENTITY:
ks_service_id = service.id
break
else:
raise exceptions.BadRequest(
resource='subcloud',
msg='No Identity service found on SystemController')
identity_endpoint_ip = payload['management-start-ip']
if netaddr.IPAddress(identity_endpoint_ip).version == 6:
identity_endpoint_url = \
"http://[{}]:5000/v3".format(identity_endpoint_ip)
else:
identity_endpoint_url = \
"http://{}:5000/v3".format(identity_endpoint_ip)
for iface in ['internal', 'admin']:
m_ks_client.keystone_client.endpoints.create(
ks_service_id,
identity_endpoint_url,
interface=iface,
region=subcloud.name)
# Inform orchestrator that subcloud has been added
self.dcorch_rpc_client.add_subcloud(
context, subcloud.name, subcloud.software_version)
# Regenerate the addn_hosts_dc file
self._create_addn_hosts_dc(context)
return db_api.subcloud_db_model_to_dict(subcloud)
except Exception as e:
LOG.exception(e)
# If we failed to create the subcloud, clean up anything we may
# have done.
self._delete_subcloud_routes(context, subcloud)
db_api.subcloud_destroy(context, subcloud.id)
raise e
def _create_addn_hosts_dc(self, context):
"""Generate the addn_hosts_dc file for hostname/ip translation"""
addn_hosts_dc = os.path.join(CONFIG_PATH, ADDN_HOSTS_DC)
addn_hosts_dc_temp = addn_hosts_dc + '.temp'
subclouds = db_api.subcloud_get_all(context)
with open(addn_hosts_dc_temp, 'w') as f_out_addn_dc_temp:
for subcloud in subclouds:
addn_dc_line = subcloud.management_start_ip + ' ' + \
subcloud.name + '\n'
f_out_addn_dc_temp.write(addn_dc_line)
# if no more subclouds, create empty file so dnsmasq does not
# emit an error log.
if not subclouds:
f_out_addn_dc_temp.write(' ')
if not filecmp.cmp(addn_hosts_dc_temp, addn_hosts_dc):
os.rename(addn_hosts_dc_temp, addn_hosts_dc)
# restart dnsmasq so it can re-read our addn_hosts file.
os.system("pkill -HUP dnsmasq")
def _delete_subcloud_routes(self, context, subcloud):
"""Delete the routes to this subcloud"""
keystone_client = KeystoneClient()
# Delete subcloud's identity endpoints
keystone_client.delete_endpoints(subcloud.name)
# Delete the route to this subcloud on the management interface on
# both controllers.
management_subnet = netaddr.IPNetwork(subcloud.management_subnet)
session = keystone_client.endpoint_cache.get_session_from_token(
context.auth_token, context.project)
sysinv_client = SysinvClient(consts.DEFAULT_REGION_NAME, session)
controllers = sysinv_client.get_controller_hosts()
for controller in controllers:
management_interface = sysinv_client.get_management_interface(
controller.hostname)
if management_interface is not None:
sysinv_client.delete_route(
management_interface.uuid,
str(management_subnet.ip),
management_subnet.prefixlen,
str(netaddr.IPAddress(
subcloud.systemcontroller_gateway_ip)),
1)
def delete_subcloud(self, context, subcloud_id):
"""Delete subcloud and notify orchestrators.
:param context: request context object.
:param subcloud_id: id of subcloud to delete
"""
LOG.info("Deleting subcloud %s." % subcloud_id)
# Retrieve the subcloud details from the database
subcloud = db_api.subcloud_get(context, subcloud_id)
# Semantic checking
if subcloud.management_state != consts.MANAGEMENT_UNMANAGED:
raise exceptions.SubcloudNotUnmanaged()
if subcloud.availability_status == \
consts.AVAILABILITY_ONLINE:
raise exceptions.SubcloudNotOffline()
# Inform orchestrators that subcloud has been deleted
try:
self.dcorch_rpc_client.del_subcloud(context, subcloud.name)
except RemoteError as e:
if "SubcloudNotFound" in e:
pass
# We only delete subcloud endpoints, region and user information
# in the Central Region. The subcloud is already unmanaged and powered
# down so is not accessible. Therefore set up a session with the
# Central Region Keystone ONLY.
keystone_client = KeystoneClient()
# Delete keystone endpoints for subcloud
keystone_client.delete_endpoints(subcloud.name)
keystone_client.delete_region(subcloud.name)
# Delete the routes to this subcloud
self._delete_subcloud_routes(context, subcloud)
# Remove the subcloud from the database
try:
db_api.subcloud_destroy(context, subcloud_id)
except Exception as e:
LOG.exception(e)
raise e
# Clear the offline fault associated with this subcloud as we
# are deleting it. Note that endpoint out-of-sync alarms should
# have been cleared when the subcloud was unmanaged and the endpoint
# sync statuses were set to unknown.
entity_instance_id = "subcloud=%s" % subcloud.name
try:
subcloud_offline = fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE
fault = self.fm_api.get_fault(subcloud_offline,
entity_instance_id)
if fault:
self.fm_api.clear_fault(subcloud_offline,
entity_instance_id)
except Exception as e:
LOG.info("Problem clearing offline fault for "
"subcloud %s" % subcloud.name)
LOG.exception(e)
# Regenerate the addn_hosts_dc file
self._create_addn_hosts_dc(context)
def update_subcloud(self, context, subcloud_id, management_state=None,
description=None, location=None):
"""Update subcloud and notify orchestrators.
:param context: request context object
:param subcloud_id: id of subcloud to update
:param management_state: new management state
:param description: new description
:param location: new location
"""
LOG.info("Updating subcloud %s." % subcloud_id)
# Get the subcloud details from the database
subcloud = db_api.subcloud_get(context, subcloud_id)
# Semantic checking
if management_state:
if management_state == consts.MANAGEMENT_UNMANAGED:
if subcloud.management_state == consts.MANAGEMENT_UNMANAGED:
LOG.warning("Subcloud %s already unmanaged" % subcloud_id)
raise exceptions.BadRequest(
resource='subcloud',
msg='Subcloud is already unmanaged')
elif management_state == consts.MANAGEMENT_MANAGED:
if subcloud.management_state == consts.MANAGEMENT_MANAGED:
LOG.warning("Subcloud %s already managed" % subcloud_id)
raise exceptions.BadRequest(
resource='subcloud',
msg='Subcloud is already managed')
if subcloud.availability_status != \
consts.AVAILABILITY_ONLINE:
LOG.warning("Subcloud %s is not online" % subcloud_id)
raise exceptions.SubcloudNotOnline()
else:
LOG.error("Invalid management_state %s" % management_state)
raise exceptions.InternalError()
subcloud = db_api.subcloud_update(context, subcloud_id,
management_state=management_state,
description=description,
location=location)
# Inform orchestrators that subcloud has been updated
if management_state:
try:
# Inform orchestrator of state change
self.dcorch_rpc_client.update_subcloud_states(
context,
subcloud.name,
management_state,
subcloud.availability_status)
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
'availability:%s' % (subcloud.name,
management_state,
subcloud.availability_status))
except Exception as e:
LOG.exception(e)
LOG.warn('Problem informing dcorch of subcloud '
'state change, subcloud: %s' % subcloud.name)
if management_state == consts.MANAGEMENT_UNMANAGED:
# set all endpoint statuses to unknown
self.update_subcloud_endpoint_status(
context,
subcloud_name=subcloud.name,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_UNKNOWN)
return db_api.subcloud_db_model_to_dict(subcloud)
def _update_endpoint_status_for_subcloud(self, context, subcloud_id,
endpoint_type, sync_status,
alarmable):
"""Update subcloud endpoint status
:param context: request context object
:param subcloud_id: id of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
"""
subcloud_status_list = []
subcloud = None
# retrieve the info from the db for this subcloud.
# subcloud_id should not be None
try:
for subcloud, subcloud_status in db_api. \
subcloud_get_with_status(context, subcloud_id):
if subcloud_status:
subcloud_status_list.append(
db_api.subcloud_endpoint_status_db_model_to_dict(
subcloud_status))
except Exception as e:
LOG.exception(e)
raise e
if subcloud:
if endpoint_type:
# updating a single endpoint on a single subcloud
for subcloud_status in subcloud_status_list:
if subcloud_status['endpoint_type'] == endpoint_type:
if subcloud_status['sync_status'] == sync_status:
# No change in the sync_status
LOG.debug("Sync status (%s) for subcloud %s did "
"not change - ignore update" %
(sync_status, subcloud.name))
return
# We found the endpoint
break
else:
# We did not find the endpoint
raise exceptions.BadRequest(
resource='subcloud',
msg='Endpoint %s not found for subcloud' %
endpoint_type)
LOG.info("Updating subcloud:%s endpoint:%s sync:%s" %
(subcloud.name, endpoint_type, sync_status))
db_api.subcloud_status_update(context,
subcloud_id,
endpoint_type,
sync_status)
entity_instance_id = "subcloud=%s.resource=%s" % \
(subcloud.name, endpoint_type)
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
entity_instance_id)
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
and fault:
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
entity_instance_id)
except Exception as e:
LOG.exception(e)
elif not fault and alarmable and \
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=entity_type_id,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
reason_text=("%s %s sync_status is "
"out-of-sync" %
(subcloud.name, endpoint_type)),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
proposed_repair_action="If problem persists "
"contact next level "
"of support",
service_affecting=False)
self.fm_api.set_fault(fault)
except Exception as e:
LOG.exception(e)
else:
# update all endpoints on this subcloud
LOG.info("Updating all subclouds, endpoint: %s sync: %s" %
(endpoint_type, sync_status))
for entry in subcloud_status_list:
endpoint = entry[consts.ENDPOINT_TYPE]
db_api.subcloud_status_update(context,
subcloud_id,
endpoint,
sync_status)
entity_instance_id = "subcloud=%s.resource=%s" % \
(subcloud.name, endpoint)
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
entity_instance_id)
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
and fault:
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
entity_instance_id)
except Exception as e:
LOG.exception(e)
elif not fault and alarmable and \
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=entity_type_id,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
reason_text=("%s %s sync_status is "
"out-of-sync" %
(subcloud.name, endpoint)),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
proposed_repair_action="If problem persists "
"contact next level "
"of support",
service_affecting=False)
self.fm_api.set_fault(fault)
except Exception as e:
LOG.exception(e)
else:
LOG.error("Subcloud not found:%s" % subcloud_id)
def update_subcloud_endpoint_status(
self, context,
subcloud_name=None,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
alarmable=True):
"""Update subcloud endpoint status
:param context: request context object
:param subcloud_name: name of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
"""
subcloud = None
if subcloud_name:
try:
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
except Exception as e:
LOG.exception(e)
raise e
# Only allow updating the sync status if managed and online.
# This means if a subcloud is going offline or unmanaged, then
# the sync status update must be done first.
if (((subcloud.availability_status ==
consts.AVAILABILITY_ONLINE)
and (subcloud.management_state ==
consts.MANAGEMENT_MANAGED))
or (sync_status != consts.SYNC_STATUS_IN_SYNC)):
# update a single subcloud
try:
self._update_endpoint_status_for_subcloud(context,
subcloud.id,
endpoint_type,
sync_status,
alarmable)
except Exception as e:
LOG.exception(e)
raise e
else:
LOG.info("Ignoring unmanaged/offline subcloud sync_status "
"update for subcloud:%s endpoint:%s sync:%s" %
(subcloud_name, endpoint_type, sync_status))
else:
# update all subclouds
for subcloud in db_api.subcloud_get_all(context):
if (((subcloud.availability_status ==
consts.AVAILABILITY_ONLINE)
and (subcloud.management_state ==
consts.MANAGEMENT_MANAGED))
or (sync_status != consts.SYNC_STATUS_IN_SYNC)):
try:
self._update_endpoint_status_for_subcloud(
context,
subcloud.id,
endpoint_type,
sync_status,
alarmable)
except Exception as e:
LOG.exception(e)
raise e
else:
LOG.info("Ignoring unmanaged/offline subcloud sync_status "
"update for subcloud:%s endpoint:%s sync:%s" %
(subcloud.name, endpoint_type, sync_status))