distcloud/distributedcloud/dcmanager/manager/subcloud_manager.py

1685 lines
73 KiB
Python

# Copyright 2017 Ericsson AB.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
import filecmp
import json
import keyring
import netaddr
import os
import threading
import time
from oslo_log import log as logging
from oslo_messaging import RemoteError
from tsconfig.tsconfig import CONFIG_PATH
from tsconfig.tsconfig import SW_VERSION
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.exceptions import PlaybookExecutionFailed
from dccommon import kubeoperator
from dccommon.subcloud_install import SubcloudInstall
from dccommon.utils import run_playbook
from dcorch.common import consts as dcorch_consts
from dcorch.rpc import client as dcorch_rpc_client
from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
from dcmanager.common import consts
from dcmanager.common.consts import INVENTORY_FILE_POSTFIX
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import manager
from dcmanager.common import utils
from dcmanager.rpc import client as rpc_client
from dcmanager.db import api as db_api
from fm_api import constants as fm_const
from fm_api import fm_api
LOG = logging.getLogger(__name__)
# Name of our distributed cloud addn_hosts file for dnsmasq
# to read. This file is referenced in dnsmasq.conf
ADDN_HOSTS_DC = 'dnsmasq.addn_hosts_dc'
# Subcloud configuration paths
ANSIBLE_SUBCLOUD_PLAYBOOK = \
'/usr/share/ansible/stx-ansible/playbooks/bootstrap.yml'
ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK = \
'/usr/share/ansible/stx-ansible/playbooks/install.yml'
ANSIBLE_SUBCLOUD_RESTORE_PLAYBOOK = \
'/usr/share/ansible/stx-ansible/playbooks/restore_platform.yml'
ANSIBLE_HOST_VALIDATION_PLAYBOOK = \
'/usr/share/ansible/stx-ansible/playbooks/validate_host.yml'
ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK = \
'/usr/share/ansible/stx-ansible/playbooks/rehome_subcloud.yml'
USERS_TO_REPLICATE = [
'sysinv',
'patching',
'vim',
'mtce',
'fm',
'barbican',
'dcmanager']
# The timeout of the rehome playbook is set to 180 seconds as it takes a
# long time for privilege escalation before resetting the host route and
# LDAP server address in a subcloud.
REHOME_PLAYBOOK_TIMEOUT = "180" # 180 seconds
SC_INTERMEDIATE_CERT_DURATION = "8760h" # 1 year = 24 hours x 365
SC_INTERMEDIATE_CERT_RENEW_BEFORE = "720h" # 30 days
CERT_NAMESPACE = "dc-cert"
TRANSITORY_STATES = {consts.DEPLOY_STATE_NONE: consts.DEPLOY_STATE_DEPLOY_PREP_FAILED,
consts.DEPLOY_STATE_PRE_DEPLOY: consts.DEPLOY_STATE_DEPLOY_PREP_FAILED,
consts.DEPLOY_STATE_PRE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
consts.DEPLOY_STATE_INSTALLING: consts.DEPLOY_STATE_INSTALL_FAILED,
consts.DEPLOY_STATE_BOOTSTRAPPING: consts.DEPLOY_STATE_BOOTSTRAP_FAILED,
consts.DEPLOY_STATE_DEPLOYING: consts.DEPLOY_STATE_DEPLOY_FAILED,
consts.DEPLOY_STATE_MIGRATING_DATA: consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
consts.DEPLOY_STATE_PRE_RESTORE: consts.DEPLOY_STATE_RESTORE_PREP_FAILED,
consts.DEPLOY_STATE_RESTORING: consts.DEPLOY_STATE_RESTORE_FAILED,
}
def sync_update_subcloud_endpoint_status(func):
"""Synchronized lock decorator for _update_subcloud_endpoint_status. """
def _get_lock_and_call(*args, **kwargs):
"""Get a single fair lock per subcloud based on subcloud name. """
# subcloud name is the 3rd argument to
# _update_subcloud_endpoint_status()
@utils.synchronized(args[2], external=False, fair=True)
def _call_func(*args, **kwargs):
return func(*args, **kwargs)
return _call_func(*args, **kwargs)
return _get_lock_and_call
class SubcloudManager(manager.Manager):
"""Manages tasks related to subclouds."""
def __init__(self, *args, **kwargs):
LOG.debug(_('SubcloudManager initialization...'))
super(SubcloudManager, self).__init__(service_name="subcloud_manager",
*args, **kwargs)
self.context = context.get_admin_context()
self.dcorch_rpc_client = dcorch_rpc_client.EngineClient()
self.fm_api = fm_api.FaultAPIs()
self.audit_rpc_client = dcmanager_audit_rpc_client.ManagerAuditClient()
@staticmethod
def _get_subcloud_cert_name(subcloud_name):
cert_name = "%s-adminep-ca-certificate" % subcloud_name
return cert_name
@staticmethod
def _get_subcloud_cert_secret_name(subcloud_name):
secret_name = "%s-adminep-ca-certificate" % subcloud_name
return secret_name
@staticmethod
def _create_intermediate_ca_cert(payload):
subcloud_name = payload["name"]
cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name)
secret_name = SubcloudManager._get_subcloud_cert_secret_name(
subcloud_name)
cert = {
"apiVersion": "cert-manager.io/v1alpha2",
"kind": "Certificate",
"metadata": {
"namespace": CERT_NAMESPACE,
"name": cert_name
},
"spec": {
"secretName": secret_name,
"duration": SC_INTERMEDIATE_CERT_DURATION,
"renewBefore": SC_INTERMEDIATE_CERT_RENEW_BEFORE,
"issuerRef": {
"kind": "Issuer",
"name": "dc-adminep-root-ca-issuer"
},
"commonName": cert_name,
"isCA": True,
},
}
kube = kubeoperator.KubeOperator()
kube.apply_cert_manager_certificate(CERT_NAMESPACE, cert_name, cert)
for count in range(1, 20):
secret = kube.kube_get_secret(secret_name, CERT_NAMESPACE)
if not hasattr(secret, 'data'):
time.sleep(1)
LOG.debug('Wait for %s ... %s' % (secret_name, count))
continue
data = secret.data
if ('ca.crt' not in data or
'tls.crt' not in data or 'tls.key' not in data) or \
not (data['ca.crt'] and data['tls.crt'] and data['tls.key']):
# ca cert, certificate and key pair are needed and must exist
# for creating an intermediate ca. If not, certificate is not
# ready yet.
time.sleep(1)
LOG.debug('Wait for %s ... %s' % (secret_name, count))
continue
payload['dc_root_ca_cert'] = data['ca.crt']
payload['sc_ca_cert'] = data['tls.crt']
payload['sc_ca_key'] = data['tls.key']
return
raise Exception("Secret for certificate %s is not ready." % cert_name)
def _get_ansible_filename(self, subcloud_name, postfix='.yml'):
ansible_filename = os.path.join(
consts.ANSIBLE_OVERRIDES_PATH,
subcloud_name + postfix)
return ansible_filename
def compose_install_command(self, subcloud_name, ansible_subcloud_inventory_file):
install_command = [
"ansible-playbook", ANSIBLE_SUBCLOUD_INSTALL_PLAYBOOK,
"-i", ansible_subcloud_inventory_file,
"--limit", subcloud_name,
"-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
subcloud_name + '/' + "install_values.yml"]
return install_command
def compose_apply_command(self, subcloud_name, ansible_subcloud_inventory_file):
apply_command = [
"ansible-playbook", ANSIBLE_SUBCLOUD_PLAYBOOK, "-i",
ansible_subcloud_inventory_file,
"--limit", subcloud_name
]
# Add the overrides dir and region_name so the playbook knows
# which overrides to load
apply_command += [
"-e", str("override_files_dir='%s' region_name=%s") % (
consts.ANSIBLE_OVERRIDES_PATH, subcloud_name)]
return apply_command
def compose_deploy_command(self, subcloud_name, ansible_subcloud_inventory_file, payload):
deploy_command = [
"ansible-playbook", payload[consts.DEPLOY_PLAYBOOK],
"-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
subcloud_name + '_deploy_values.yml',
"-i", ansible_subcloud_inventory_file,
"--limit", subcloud_name
]
return deploy_command
def compose_check_target_command(self, subcloud_name,
ansible_subcloud_inventory_file, payload):
check_target_command = [
"ansible-playbook", ANSIBLE_HOST_VALIDATION_PLAYBOOK,
"-i", ansible_subcloud_inventory_file,
"--limit", subcloud_name,
"-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
subcloud_name + "_check_target_values.yml"]
return check_target_command
def compose_restore_command(self, subcloud_name,
ansible_subcloud_inventory_file, payload):
restore_command = [
"ansible-playbook", ANSIBLE_SUBCLOUD_RESTORE_PLAYBOOK,
"-i", ansible_subcloud_inventory_file,
"--limit", subcloud_name,
"-e", "@%s" % consts.ANSIBLE_OVERRIDES_PATH + "/" +
subcloud_name + "_restore_values.yml"]
return restore_command
def compose_rehome_command(self, subcloud_name, ansible_subcloud_inventory_file):
rehome_command = [
"ansible-playbook", ANSIBLE_SUBCLOUD_REHOME_PLAYBOOK,
"-i", ansible_subcloud_inventory_file,
"--limit", subcloud_name,
"--timeout", REHOME_PLAYBOOK_TIMEOUT,
"-e", str("override_files_dir='%s' region_name=%s") % (
consts.ANSIBLE_OVERRIDES_PATH, subcloud_name)]
return rehome_command
def add_subcloud(self, context, payload):
"""Add subcloud and notify orchestrators.
:param context: request context object
:param payload: subcloud configuration
"""
LOG.info("Adding subcloud %s." % payload['name'])
subcloud_id = db_api.subcloud_get_by_name(context, payload['name']).id
# Check the migrate option from payload
migrate_str = payload.get('migrate', '')
migrate_flag = (migrate_str.lower() == 'true')
if migrate_flag:
subcloud = db_api.subcloud_update(
context, subcloud_id,
deploy_status=consts.DEPLOY_STATE_PRE_REHOME)
else:
subcloud = db_api.subcloud_update(
context, subcloud_id,
deploy_status=consts.DEPLOY_STATE_PRE_DEPLOY)
# Populate the subcloud status table with all endpoints
for endpoint in dcorch_consts.ENDPOINT_TYPES_LIST:
db_api.subcloud_status_create(context,
subcloud.id,
endpoint)
try:
# Ansible inventory filename for the specified subcloud
ansible_subcloud_inventory_file = self._get_ansible_filename(
subcloud.name, INVENTORY_FILE_POSTFIX)
# Create a new route to this subcloud on the management interface
# on both controllers.
m_ks_client = OpenStackDriver(
region_name=consts.DEFAULT_REGION_NAME,
region_clients=None).keystone_client
subcloud_subnet = netaddr.IPNetwork(payload['management_subnet'])
sysinv_client = SysinvClient(consts.DEFAULT_REGION_NAME,
m_ks_client.session)
controllers = sysinv_client.get_controller_hosts()
for controller in controllers:
management_interface = sysinv_client.get_management_interface(
controller.hostname)
if management_interface is not None:
sysinv_client.create_route(
management_interface.uuid,
str(subcloud_subnet.ip),
subcloud_subnet.prefixlen,
payload['systemcontroller_gateway_address'],
1)
# Create endpoints to this subcloud on the
# management-start-ip of the subcloud which will be allocated
# as the floating Management IP of the Subcloud if the
# Address Pool is not shared. Incase the endpoint entries
# are incorrect, or the management IP of the subcloud is changed
# in the future, it will not go managed or will show up as
# out of sync. To fix this use Openstack endpoint commands
# on the SystemController to change the subcloud endpoints.
# The non-identity endpoints are added to facilitate horizon access
# from the System Controller to the subcloud.
endpoint_config = []
endpoint_ip = payload['management_start_address']
if netaddr.IPAddress(endpoint_ip).version == 6:
endpoint_ip = '[' + endpoint_ip + ']'
for service in m_ks_client.services_list:
if service.type == dcorch_consts.ENDPOINT_TYPE_PLATFORM:
admin_endpoint_url = "https://{}:6386/v1".format(endpoint_ip)
endpoint_config.append({"id": service.id,
"admin_endpoint_url": admin_endpoint_url})
elif service.type == dcorch_consts.ENDPOINT_TYPE_IDENTITY:
admin_endpoint_url = "https://{}:5001/v3".format(endpoint_ip)
endpoint_config.append({"id": service.id,
"admin_endpoint_url": admin_endpoint_url})
elif service.type == dcorch_consts.ENDPOINT_TYPE_PATCHING:
admin_endpoint_url = "https://{}:5492".format(endpoint_ip)
endpoint_config.append({"id": service.id,
"admin_endpoint_url": admin_endpoint_url})
elif service.type == dcorch_consts.ENDPOINT_TYPE_FM:
admin_endpoint_url = "https://{}:18003".format(endpoint_ip)
endpoint_config.append({"id": service.id,
"admin_endpoint_url": admin_endpoint_url})
elif service.type == dcorch_consts.ENDPOINT_TYPE_NFV:
admin_endpoint_url = "https://{}:4546".format(endpoint_ip)
endpoint_config.append({"id": service.id,
"admin_endpoint_url": admin_endpoint_url})
if len(endpoint_config) < 5:
raise exceptions.BadRequest(
resource='subcloud',
msg='Missing service in SystemController')
for endpoint in endpoint_config:
m_ks_client.keystone_client.endpoints.create(
endpoint["id"],
endpoint['admin_endpoint_url'],
interface=dccommon_consts.KS_ENDPOINT_ADMIN,
region=subcloud.name)
# Inform orchestrator that subcloud has been added
self.dcorch_rpc_client.add_subcloud(
context, subcloud.name, subcloud.software_version)
# create entry into alarm summary table, will get real values later
alarm_updates = {'critical_alarms': -1,
'major_alarms': -1,
'minor_alarms': -1,
'warnings': -1,
'cloud_status': consts.ALARMS_DISABLED}
db_api.subcloud_alarms_create(context, subcloud.name,
alarm_updates)
# Regenerate the addn_hosts_dc file
self._create_addn_hosts_dc(context)
# Query system controller keystone admin user/project IDs,
# services project id, sysinv and dcmanager user id and store in
# payload so they get copied to the override file
self._get_keystone_ids(m_ks_client, payload)
# Add the admin and service user passwords to the payload so they
# get copied to the override file
payload['ansible_become_pass'] = payload['sysadmin_password']
payload['ansible_ssh_pass'] = payload['sysadmin_password']
payload['admin_password'] = str(keyring.get_password('CGCS',
'admin'))
if "install_values" in payload:
payload['install_values']['ansible_ssh_pass'] = \
payload['sysadmin_password']
if 'image' not in payload['install_values']:
matching_iso, matching_sig = utils.get_vault_load_files(
SW_VERSION)
payload['install_values'].update({'image': matching_iso})
deploy_command = None
if "deploy_playbook" in payload:
self._prepare_for_deployment(payload, subcloud.name)
deploy_command = self.compose_deploy_command(
subcloud.name,
ansible_subcloud_inventory_file,
payload)
del payload['sysadmin_password']
payload['users'] = dict()
for user in USERS_TO_REPLICATE:
payload['users'][user] = \
str(keyring.get_password(
user, dccommon_consts.SERVICES_USER_NAME))
# The password of smapi user is expected to the aligned during rehoming
# a subcloud. Add smapi into the user to replace list.
if migrate_flag:
payload['users']['smapi'] = \
str(keyring.get_password(
'smapi', dccommon_consts.SERVICES_USER_NAME))
# Create the ansible inventory for the new subcloud
utils.create_subcloud_inventory(payload,
ansible_subcloud_inventory_file)
# create subcloud intermediate certificate and pass in keys
self._create_intermediate_ca_cert(payload)
# Write this subclouds overrides to file
# NOTE: This file should not be deleted if subcloud add fails
# as it is used for debugging
self._write_subcloud_ansible_config(context, payload)
if migrate_flag:
rehome_command = self.compose_rehome_command(
subcloud.name,
ansible_subcloud_inventory_file)
apply_thread = threading.Thread(
target=self.run_deploy,
args=(subcloud, payload, context,
None, None, None, None, None, rehome_command))
else:
install_command = None
if "install_values" in payload:
install_command = self.compose_install_command(
subcloud.name,
ansible_subcloud_inventory_file)
apply_command = self.compose_apply_command(
subcloud.name,
ansible_subcloud_inventory_file)
apply_thread = threading.Thread(
target=self.run_deploy,
args=(subcloud, payload, context,
install_command, apply_command, deploy_command))
apply_thread.start()
return db_api.subcloud_db_model_to_dict(subcloud)
except Exception:
LOG.exception("Failed to create subcloud %s" % payload['name'])
# If we failed to create the subcloud, update the
# deployment status
if migrate_flag:
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_REHOME_PREP_FAILED)
else:
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_DEPLOY_PREP_FAILED)
def reconfigure_subcloud(self, context, subcloud_id, payload):
"""Reconfigure subcloud
:param context: request context object
:param payload: subcloud configuration
"""
LOG.info("Reconfiguring subcloud %s." % subcloud_id)
subcloud = db_api.subcloud_update(
context, subcloud_id,
deploy_status=consts.DEPLOY_STATE_PRE_DEPLOY)
try:
# Ansible inventory filename for the specified subcloud
ansible_subcloud_inventory_file = self._get_ansible_filename(
subcloud.name, INVENTORY_FILE_POSTFIX)
deploy_command = None
if "deploy_playbook" in payload:
self._prepare_for_deployment(payload, subcloud.name)
deploy_command = self.compose_deploy_command(
subcloud.name,
ansible_subcloud_inventory_file,
payload)
del payload['sysadmin_password']
apply_thread = threading.Thread(
target=self.run_deploy,
args=(subcloud, payload, context, None, None, deploy_command))
apply_thread.start()
return db_api.subcloud_db_model_to_dict(subcloud)
except Exception:
LOG.exception("Failed to create subcloud %s" % subcloud.name)
# If we failed to create the subcloud, update the
# deployment status
db_api.subcloud_update(
context, subcloud_id,
deploy_status=consts.DEPLOY_STATE_DEPLOY_PREP_FAILED)
def _get_keystone_ids(self, keystone_client, payload):
"""Get keystone user_id and project_id
:param keystone_client: keystone client
:param payload: subcloud configuration
"""
admin_user_id = None
sysinv_user_id = None
dcmanager_user_id = None
admin_project_id = None
services_project_id = None
user_list = keystone_client.get_enabled_users(id_only=False)
for user in user_list:
if user.name == dccommon_consts.ADMIN_USER_NAME:
admin_user_id = user.id
elif user.name == dccommon_consts.SYSINV_USER_NAME:
sysinv_user_id = user.id
elif user.name == dccommon_consts.DCMANAGER_USER_NAME:
dcmanager_user_id = user.id
project_list = keystone_client.get_enabled_projects(id_only=False)
for project in project_list:
if project.name == dccommon_consts.ADMIN_PROJECT_NAME:
admin_project_id = project.id
elif project.name == dccommon_consts.SERVICES_USER_NAME:
services_project_id = project.id
payload['system_controller_keystone_admin_user_id'] = \
admin_user_id
payload['system_controller_keystone_admin_project_id'] = \
admin_project_id
payload['system_controller_keystone_services_project_id'] = \
services_project_id
payload['system_controller_keystone_sysinv_user_id'] = \
sysinv_user_id
payload['system_controller_keystone_dcmanager_user_id'] = \
dcmanager_user_id
def reinstall_subcloud(self, context, subcloud_id, payload):
"""Reinstall subcloud
:param context: request context object
:param subcloud_id: subcloud id from db
:param payload: subcloud reinstall
"""
# Retrieve the subcloud details from the database
subcloud = db_api.subcloud_get(context, subcloud_id)
LOG.info("Reinstalling subcloud %s." % subcloud_id)
try:
ansible_subcloud_inventory_file = self._get_ansible_filename(
subcloud.name, INVENTORY_FILE_POSTFIX)
m_ks_client = OpenStackDriver(
region_name=consts.DEFAULT_REGION_NAME,
region_clients=None).keystone_client
self._get_keystone_ids(m_ks_client, payload)
payload['admin_password'] = str(
keyring.get_password('CGCS', 'admin'))
payload['ansible_become_pass'] = payload['sysadmin_password']
payload['ansible_ssh_pass'] = payload['sysadmin_password']
payload['install_values']['ansible_ssh_pass'] = \
payload['sysadmin_password']
payload['install_values']['ansible_become_pass'] = \
payload['sysadmin_password']
payload['bootstrap-address'] = \
payload['install_values']['bootstrap_address']
deploy_command = None
if "deploy_playbook" in payload:
self._prepare_for_deployment(payload, subcloud.name)
deploy_command = self.compose_deploy_command(
subcloud.name,
ansible_subcloud_inventory_file,
payload)
del payload['sysadmin_password']
payload['users'] = dict()
for user in USERS_TO_REPLICATE:
payload['users'][user] = \
str(keyring.get_password(
user, dccommon_consts.SERVICES_USER_NAME))
utils.create_subcloud_inventory(payload,
ansible_subcloud_inventory_file)
self._create_intermediate_ca_cert(payload)
self._write_subcloud_ansible_config(context, payload)
install_command = self.compose_install_command(
subcloud.name,
ansible_subcloud_inventory_file)
apply_command = self.compose_apply_command(
subcloud.name,
ansible_subcloud_inventory_file)
apply_thread = threading.Thread(
target=self.run_deploy,
args=(subcloud, payload, context,
install_command, apply_command, deploy_command))
apply_thread.start()
return db_api.subcloud_db_model_to_dict(subcloud)
except Exception:
LOG.exception("Failed to reinstall subcloud %s" % subcloud.name)
# If we failed to reinstall the subcloud, update the
# deployment status
db_api.subcloud_update(
context, subcloud_id,
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED)
def _create_check_target_override_file(self, payload, subcloud_name):
check_target_override_file = os.path.join(
consts.ANSIBLE_OVERRIDES_PATH, subcloud_name +
'_check_target_values.yml')
with open(check_target_override_file, 'w') as f_out:
f_out.write(
'---\n'
)
for k, v in payload['check_target_values'].items():
f_out.write("%s: %s\n" % (k, json.dumps(v)))
def _create_restore_override_file(self, payload, subcloud_name):
restore_override_file = os.path.join(
consts.ANSIBLE_OVERRIDES_PATH, subcloud_name +
'_restore_values.yml')
with open(restore_override_file, 'w') as f_out:
f_out.write(
'---\n'
)
for k, v in payload['restore_values'].items():
f_out.write("%s: %s\n" % (k, json.dumps(v)))
def _prepare_for_restore(self, payload, subcloud_name):
payload['check_target_values'] = dict()
payload['check_target_values']['ansible_ssh_pass'] = \
payload['sysadmin_password']
payload['check_target_values']['software_version'] = SW_VERSION
payload['check_target_values']['bootstrap_address'] = \
payload['bootstrap-address']
payload['check_target_values']['check_bootstrap_address'] = 'true'
payload['check_target_values']['check_patches'] = 'false'
self._create_check_target_override_file(payload, subcloud_name)
payload['restore_values']['ansible_ssh_pass'] = \
payload['sysadmin_password']
payload['restore_values']['ansible_become_pass'] = \
payload['sysadmin_password']
payload['restore_values']['admin_password'] = \
str(keyring.get_password('CGCS', 'admin'))
payload['restore_values']['skip_patches_restore'] = 'true'
self._create_restore_override_file(payload, subcloud_name)
def restore_subcloud(self, context, subcloud_id, payload):
"""Restore subcloud
:param context: request context object
:param subcloud_id: subcloud id from db
:param payload: subcloud restore detail
"""
# Retrieve the subcloud details from the database
subcloud = db_api.subcloud_get(context, subcloud_id)
if subcloud.management_state != consts.MANAGEMENT_UNMANAGED:
raise exceptions.SubcloudNotUnmanaged()
db_api.subcloud_update(context, subcloud_id,
deploy_status=consts.DEPLOY_STATE_PRE_RESTORE)
try:
# Ansible inventory filename for the specified subcloud
ansible_subcloud_inventory_file = self._get_ansible_filename(
subcloud.name, INVENTORY_FILE_POSTFIX)
# Add parameters used to generate inventory
payload['name'] = subcloud.name
payload['bootstrap-address'] = \
payload['install_values']['bootstrap_address']
payload['software_version'] = SW_VERSION
install_command = None
if payload['with_install']:
# Redfish capable subclouds
LOG.info("Reinstalling subcloud %s." % subcloud.name)
# Disegard the current 'image' config. Always reinstall with
# the system controller active image in dc-vault.
matching_iso, matching_sig = utils.get_vault_load_files(SW_VERSION)
payload['install_values'].update({'image': matching_iso})
payload['install_values']['ansible_ssh_pass'] = \
payload['sysadmin_password']
utils.create_subcloud_inventory(payload,
ansible_subcloud_inventory_file)
install_command = self.compose_install_command(
subcloud.name, ansible_subcloud_inventory_file)
else:
# Non Redfish capable subcloud
# Shouldn't get here as the API has already rejected the request.
return
# Prepare for restore
self._prepare_for_restore(payload, subcloud.name)
check_target_command = self.compose_check_target_command(
subcloud.name, ansible_subcloud_inventory_file, payload)
restore_command = self.compose_restore_command(
subcloud.name, ansible_subcloud_inventory_file, payload)
apply_thread = threading.Thread(
target=self.run_deploy,
args=(subcloud, payload, context,
install_command, None, None, check_target_command, restore_command))
apply_thread.start()
return db_api.subcloud_db_model_to_dict(subcloud)
except Exception:
LOG.exception("Failed to restore subcloud %s" % subcloud.name)
db_api.subcloud_update(
context, subcloud_id,
deploy_status=consts.DEPLOY_STATE_RESTORE_PREP_FAILED)
@staticmethod
def run_deploy(subcloud, payload, context,
install_command=None, apply_command=None,
deploy_command=None, check_target_command=None,
restore_command=None, rehome_command=None):
log_file = os.path.join(consts.DC_ANSIBLE_LOG_DIR, subcloud.name) + \
'_playbook_output.log'
if install_command:
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL)
try:
install = SubcloudInstall(context, subcloud.name)
install.prep(consts.ANSIBLE_OVERRIDES_PATH,
payload['install_values'])
except Exception as e:
LOG.exception(e)
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL_FAILED)
LOG.error(str(e))
install.cleanup()
return
# Run the remote install playbook
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_INSTALLING)
try:
install.install(consts.DC_ANSIBLE_LOG_DIR, install_command)
except Exception as e:
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_INSTALL_FAILED)
LOG.error(str(e))
install.cleanup()
return
install.cleanup()
LOG.info("Successfully installed subcloud %s" % subcloud.name)
# Leave the following block here in case there is another use
# case besides subcloud restore where validating host post
# fresh install is necessary.
if check_target_command:
try:
run_playbook(log_file, check_target_command)
except PlaybookExecutionFailed:
msg = "Failed to run the validate host playbook" \
" for subcloud %s, check individual log at " \
"%s for detailed output." % (
subcloud.name,
log_file)
LOG.error(msg)
if restore_command:
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_RESTORE_PREP_FAILED)
return
LOG.info("Successfully checked subcloud %s" % subcloud.name)
if apply_command:
try:
# Update the subcloud to bootstrapping
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_BOOTSTRAPPING)
except Exception as e:
LOG.exception(e)
raise e
# Run the ansible boostrap-subcloud playbook
try:
run_playbook(log_file, apply_command)
except PlaybookExecutionFailed:
msg = "Failed to run the subcloud bootstrap playbook" \
" for subcloud %s, check individual log at " \
"%s for detailed output." % (
subcloud.name,
log_file)
LOG.error(msg)
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_BOOTSTRAP_FAILED)
return
LOG.info("Successfully bootstrapped subcloud %s" %
subcloud.name)
if deploy_command:
# Run the custom deploy playbook
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_DEPLOYING)
try:
run_playbook(log_file, deploy_command)
except PlaybookExecutionFailed:
msg = "Failed to run the subcloud deploy playbook" \
" for subcloud %s, check individual log at " \
"%s for detailed output." % (
subcloud.name,
log_file)
LOG.error(msg)
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_DEPLOY_FAILED)
return
LOG.info("Successfully deployed subcloud %s" %
subcloud.name)
elif restore_command:
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_RESTORING)
# Run the restore platform playbook
try:
run_playbook(log_file, restore_command)
except PlaybookExecutionFailed:
msg = "Failed to run the subcloud restore playbook" \
" for subcloud %s, check individual log at " \
"%s for detailed output." % (
subcloud.name,
log_file)
LOG.error(msg)
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_RESTORE_FAILED)
return
LOG.info("Successfully restored controller-0 of subcloud %s" %
subcloud.name)
if rehome_command:
# Update the deploy status to rehoming
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_REHOMING)
# Run the rehome-subcloud playbook
try:
run_playbook(log_file, rehome_command)
except PlaybookExecutionFailed:
msg = "Failed to run the subcloud rehome playbook" \
" for subcloud %s, check individual log at " \
"%s for detailed output." % (
subcloud.name,
log_file)
LOG.error(msg)
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_REHOME_FAILED)
return
LOG.info("Successfully rehomed subcloud %s" %
subcloud.name)
db_api.subcloud_update(
context, subcloud.id,
deploy_status=consts.DEPLOY_STATE_DONE)
def _create_addn_hosts_dc(self, context):
"""Generate the addn_hosts_dc file for hostname/ip translation"""
addn_hosts_dc = os.path.join(CONFIG_PATH, ADDN_HOSTS_DC)
addn_hosts_dc_temp = addn_hosts_dc + '.temp'
subclouds = db_api.subcloud_get_all(context)
with open(addn_hosts_dc_temp, 'w') as f_out_addn_dc_temp:
for subcloud in subclouds:
addn_dc_line = subcloud.management_start_ip + ' ' + \
subcloud.name + '\n'
f_out_addn_dc_temp.write(addn_dc_line)
# if no more subclouds, create empty file so dnsmasq does not
# emit an error log.
if not subclouds:
f_out_addn_dc_temp.write(' ')
if not filecmp.cmp(addn_hosts_dc_temp, addn_hosts_dc):
os.rename(addn_hosts_dc_temp, addn_hosts_dc)
# restart dnsmasq so it can re-read our addn_hosts file.
os.system("pkill -HUP dnsmasq")
def _write_subcloud_ansible_config(self, context, payload):
"""Create the override file for usage with the specified subcloud"""
overrides_file = os.path.join(consts.ANSIBLE_OVERRIDES_PATH,
payload['name'] + '.yml')
m_ks_client = OpenStackDriver(
region_name=consts.DEFAULT_REGION_NAME,
region_clients=None).keystone_client
sysinv_client = SysinvClient(consts.DEFAULT_REGION_NAME, m_ks_client.session)
mgmt_pool = sysinv_client.get_management_address_pool()
mgmt_floating_ip = mgmt_pool.floating_address
mgmt_subnet = "%s/%d" % (mgmt_pool.network, mgmt_pool.prefix)
oam_addresses = sysinv_client.get_oam_addresses()
oam_floating_ip = oam_addresses.oam_floating_ip
oam_subnet = oam_addresses.oam_subnet
with open(overrides_file, 'w') as f_out_overrides_file:
f_out_overrides_file.write(
'---'
'\nregion_config: yes'
'\ndistributed_cloud_role: subcloud'
'\nsystem_controller_subnet: ' + mgmt_subnet +
'\nsystem_controller_floating_address: ' + mgmt_floating_ip +
'\nsystem_controller_oam_subnet: ' + oam_subnet +
'\nsystem_controller_oam_floating_address: ' + oam_floating_ip
+ '\n'
)
for k, v in payload.items():
if k not in ['deploy_playbook', 'deploy_values',
'deploy_config', 'deploy_chart',
'deploy_overrides', 'install_values']:
f_out_overrides_file.write("%s: %s\n" % (k, json.dumps(v)))
def _write_deploy_files(self, payload, subcloud_name):
"""Create the deploy value files for the subcloud"""
deploy_values_file = os.path.join(
consts.ANSIBLE_OVERRIDES_PATH, subcloud_name +
'_deploy_values.yml')
with open(deploy_values_file, 'w') as f_out_deploy_values_file:
json.dump(payload['deploy_values'], f_out_deploy_values_file)
def _prepare_for_deployment(self, payload, subcloud_name):
payload['deploy_values'] = dict()
payload['deploy_values']['ansible_become_pass'] = \
payload['sysadmin_password']
payload['deploy_values']['ansible_ssh_pass'] = \
payload['sysadmin_password']
payload['deploy_values']['admin_password'] = \
str(keyring.get_password('CGCS', 'admin'))
payload['deploy_values']['deployment_config'] = \
payload[consts.DEPLOY_CONFIG]
payload['deploy_values']['deployment_manager_chart'] = \
payload[consts.DEPLOY_CHART]
payload['deploy_values']['deployment_manager_overrides'] = \
payload[consts.DEPLOY_OVERRIDES]
self._write_deploy_files(payload, subcloud_name)
def _delete_subcloud_routes(self, context, subcloud):
"""Delete the routes to this subcloud"""
keystone_client = OpenStackDriver(
region_name=consts.DEFAULT_REGION_NAME,
region_clients=None).keystone_client
# Delete the route to this subcloud on the management interface on
# both controllers.
management_subnet = netaddr.IPNetwork(subcloud.management_subnet)
sysinv_client = SysinvClient(consts.DEFAULT_REGION_NAME, keystone_client.session)
controllers = sysinv_client.get_controller_hosts()
for controller in controllers:
management_interface = sysinv_client.get_management_interface(
controller.hostname)
if management_interface is not None:
sysinv_client.delete_route(
management_interface.uuid,
str(management_subnet.ip),
management_subnet.prefixlen,
str(netaddr.IPAddress(
subcloud.systemcontroller_gateway_ip)),
1)
@staticmethod
def _delete_subcloud_cert(subcloud_name):
cert_name = SubcloudManager._get_subcloud_cert_name(subcloud_name)
secret_name = SubcloudManager._get_subcloud_cert_secret_name(
subcloud_name)
kube = kubeoperator.KubeOperator()
kube.delete_cert_manager_certificate(CERT_NAMESPACE, cert_name)
kube.kube_delete_secret(secret_name, CERT_NAMESPACE)
LOG.info("cert %s and secret %s are deleted" % (cert_name, secret_name))
def _remove_subcloud_details(self, context,
subcloud,
ansible_subcloud_inventory_file):
"""Remove subcloud details from database and inform orchestrators"""
# Inform orchestrators that subcloud has been deleted
try:
self.dcorch_rpc_client.del_subcloud(context, subcloud.name)
except RemoteError as e:
if "SubcloudNotFound" in e:
pass
# delete the associated alarm entry
try:
db_api.subcloud_alarms_delete(context, subcloud.name)
except RemoteError as e:
if "SubcloudNotFound" in e:
pass
# We only delete subcloud endpoints, region and user information
# in the Central Region. The subcloud is already unmanaged and powered
# down so is not accessible. Therefore set up a session with the
# Central Region Keystone ONLY.
keystone_client = OpenStackDriver(
region_name=consts.DEFAULT_REGION_NAME,
region_clients=None).keystone_client
# Delete keystone endpoints for subcloud
keystone_client.delete_endpoints(subcloud.name)
keystone_client.delete_region(subcloud.name)
# Delete the routes to this subcloud
self._delete_subcloud_routes(context, subcloud)
# Remove the subcloud from the database
try:
db_api.subcloud_destroy(context, subcloud.id)
except Exception as e:
LOG.exception(e)
raise e
# Delete the ansible inventory for the new subcloud
utils.delete_subcloud_inventory(ansible_subcloud_inventory_file)
# Delete the subcloud intermediate certificate
SubcloudManager._delete_subcloud_cert(subcloud.name)
# Regenerate the addn_hosts_dc file
self._create_addn_hosts_dc(context)
def delete_subcloud(self, context, subcloud_id):
"""Delete subcloud and notify orchestrators.
:param context: request context object.
:param subcloud_id: id of subcloud to delete
"""
LOG.info("Deleting subcloud %s." % subcloud_id)
# Retrieve the subcloud details from the database
subcloud = db_api.subcloud_get(context, subcloud_id)
# Semantic checking
if subcloud.management_state != consts.MANAGEMENT_UNMANAGED:
raise exceptions.SubcloudNotUnmanaged()
if subcloud.availability_status == \
consts.AVAILABILITY_ONLINE:
raise exceptions.SubcloudNotOffline()
# Ansible inventory filename for the specified subcloud
ansible_subcloud_inventory_file = self._get_ansible_filename(
subcloud.name, INVENTORY_FILE_POSTFIX)
self._remove_subcloud_details(context,
subcloud,
ansible_subcloud_inventory_file)
# Clear the offline fault associated with this subcloud as we
# are deleting it. Note that endpoint out-of-sync alarms should
# have been cleared when the subcloud was unmanaged and the endpoint
# sync statuses were set to unknown.
entity_instance_id = "subcloud=%s" % subcloud.name
try:
subcloud_offline = fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE
fault = self.fm_api.get_fault(subcloud_offline,
entity_instance_id)
if fault:
self.fm_api.clear_fault(subcloud_offline,
entity_instance_id)
except Exception as e:
LOG.info("Problem clearing offline fault for "
"subcloud %s" % subcloud.name)
LOG.exception(e)
def update_subcloud(self,
context,
subcloud_id,
management_state=None,
description=None,
location=None,
group_id=None,
data_install=None,
force=None):
"""Update subcloud and notify orchestrators.
:param context: request context object
:param subcloud_id: id of subcloud to update
:param management_state: new management state
:param description: new description
:param location: new location
:param group_id: new subcloud group id
:param data_install: subcloud install values
:param force: force flag
"""
LOG.info("Updating subcloud %s." % subcloud_id)
# Get the subcloud details from the database
subcloud = db_api.subcloud_get(context, subcloud_id)
original_management_state = subcloud.management_state
# Semantic checking
if management_state:
if management_state == consts.MANAGEMENT_UNMANAGED:
if subcloud.management_state == consts.MANAGEMENT_UNMANAGED:
LOG.warning("Subcloud %s already unmanaged" % subcloud_id)
raise exceptions.BadRequest(
resource='subcloud',
msg='Subcloud is already unmanaged')
elif management_state == consts.MANAGEMENT_MANAGED:
if subcloud.management_state == consts.MANAGEMENT_MANAGED:
LOG.warning("Subcloud %s already managed" % subcloud_id)
raise exceptions.BadRequest(
resource='subcloud',
msg='Subcloud is already managed')
elif not force:
if subcloud.deploy_status != consts.DEPLOY_STATE_DONE:
LOG.warning("Subcloud %s can be managed only when"
"deploy_status is complete" % subcloud_id)
raise exceptions.BadRequest(
resource='subcloud',
msg='Subcloud can be managed only if deploy status is complete')
if subcloud.availability_status != \
consts.AVAILABILITY_ONLINE:
LOG.warning("Subcloud %s is not online" % subcloud_id)
raise exceptions.SubcloudNotOnline()
else:
LOG.error("Invalid management_state %s" % management_state)
raise exceptions.InternalError()
subcloud = db_api.subcloud_update(context,
subcloud_id,
management_state=management_state,
description=description,
location=location,
group_id=group_id,
data_install=data_install)
# Inform orchestrators that subcloud has been updated
if management_state:
try:
# Inform orchestrator of state change
self.dcorch_rpc_client.update_subcloud_states(
context,
subcloud.name,
management_state,
subcloud.availability_status)
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
'availability:%s' % (subcloud.name,
management_state,
subcloud.availability_status))
except Exception as e:
LOG.exception(e)
LOG.warn('Problem informing dcorch of subcloud '
'state change, resume to original state, subcloud: %s'
% subcloud.name)
management_state = original_management_state
subcloud = \
db_api.subcloud_update(context, subcloud_id,
management_state=management_state,
description=description,
location=location)
if management_state == consts.MANAGEMENT_UNMANAGED:
# set all endpoint statuses to unknown, except the dc-cert
# endpoint which continues to be audited for unmanaged
# subclouds
self.update_subcloud_endpoint_status(
context,
subcloud_name=subcloud.name,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_UNKNOWN,
ignore_endpoints=[dcorch_consts.ENDPOINT_TYPE_DC_CERT])
elif management_state == consts.MANAGEMENT_MANAGED:
# Subcloud is managed
# Tell cert-mon to audit endpoint certificate
LOG.info('Request for managed audit for %s' % subcloud.name)
dc_notification = rpc_client.DCManagerNotifications()
dc_notification.subcloud_managed(context, subcloud.name)
# Trigger all the audits for the subcloud so it can update the
# sync status ASAP.
self.audit_rpc_client.trigger_subcloud_audits(context, subcloud_id)
return db_api.subcloud_db_model_to_dict(subcloud)
def _update_online_managed_subcloud(self, context, subcloud_id,
endpoint_type, sync_status,
alarmable, ignore_endpoints=None):
"""Update online/managed subcloud endpoint status
:param context: request context object
:param subcloud_id: id of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
:param alarmable: controls raising an alarm if applicable
:param ignore_endpoints: list of endpoints to ignore (only used if
endpoint_type is None)
"""
if ignore_endpoints is None:
ignore_endpoints = []
subcloud_status_list = []
subcloud = None
# retrieve the info from the db for this subcloud.
# subcloud_id should not be None
try:
for subcloud, subcloud_status in db_api. \
subcloud_get_with_status(context, subcloud_id):
if subcloud_status:
subcloud_status_list.append(
db_api.subcloud_endpoint_status_db_model_to_dict(
subcloud_status))
except Exception as e:
LOG.exception(e)
raise e
if subcloud:
if endpoint_type:
# updating a single endpoint on a single subcloud
for subcloud_status in subcloud_status_list:
if subcloud_status['endpoint_type'] == endpoint_type:
if subcloud_status['sync_status'] == sync_status:
# No change in the sync_status
LOG.debug("Sync status (%s) for subcloud %s did "
"not change - ignore update" %
(sync_status, subcloud.name))
return
# We found the endpoint
break
else:
# We did not find the endpoint
raise exceptions.BadRequest(
resource='subcloud',
msg='Endpoint %s not found for subcloud' %
endpoint_type)
LOG.info("Updating subcloud:%s endpoint:%s sync:%s" %
(subcloud.name, endpoint_type, sync_status))
db_api.subcloud_status_update(context,
subcloud_id,
endpoint_type,
sync_status)
entity_instance_id = "subcloud=%s.resource=%s" % \
(subcloud.name, endpoint_type)
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
entity_instance_id)
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
and fault:
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
entity_instance_id)
except Exception as e:
LOG.exception(e)
elif not fault and alarmable and \
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=entity_type_id,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
reason_text=("%s %s sync_status is "
"out-of-sync" %
(subcloud.name, endpoint_type)),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
proposed_repair_action="If problem persists "
"contact next level "
"of support",
service_affecting=False)
self.fm_api.set_fault(fault)
except Exception as e:
LOG.exception(e)
else:
# update all endpoints on this subcloud
LOG.info("Updating all endpoints on subcloud: %s sync: %s "
"ignore_endpoints: %s" %
(subcloud.name, sync_status, ignore_endpoints))
for entry in subcloud_status_list:
endpoint = entry[consts.ENDPOINT_TYPE]
if endpoint in ignore_endpoints:
# Do not update this endpoint
continue
db_api.subcloud_status_update(context,
subcloud_id,
endpoint,
sync_status)
entity_instance_id = "subcloud=%s.resource=%s" % \
(subcloud.name, endpoint)
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
entity_instance_id)
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
and fault:
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
entity_instance_id)
except Exception as e:
LOG.exception(e)
elif not fault and alarmable and \
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=entity_type_id,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
reason_text=("%s %s sync_status is "
"out-of-sync" %
(subcloud.name, endpoint)),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
proposed_repair_action="If problem persists "
"contact next level "
"of support",
service_affecting=False)
self.fm_api.set_fault(fault)
except Exception as e:
LOG.exception(e)
else:
LOG.error("Subcloud not found:%s" % subcloud_id)
@sync_update_subcloud_endpoint_status
def _update_subcloud_endpoint_status(
self, context,
subcloud_name,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
alarmable=True,
ignore_endpoints=None):
"""Update subcloud endpoint status
:param context: request context object
:param subcloud_name: name of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
:param alarmable: controls raising an alarm if applicable
:param ignore_endpoints: list of endpoints to ignore (only used if
endpoint_type is None)
"""
if ignore_endpoints is None:
ignore_endpoints = []
if not subcloud_name:
raise exceptions.BadRequest(
resource='subcloud',
msg='Subcloud name not provided')
try:
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
except Exception as e:
LOG.exception(e)
raise e
# Only allow updating the sync status if managed and online.
# except dc-cert endpoint is audit only when subcloud is online
# this could happen before subcloud is being managed.
# This means if a subcloud is going offline or unmanaged, then
# the sync status update must be done first.
if (((subcloud.availability_status ==
consts.AVAILABILITY_ONLINE)
and (subcloud.management_state ==
consts.MANAGEMENT_MANAGED or
endpoint_type == dcorch_consts.ENDPOINT_TYPE_DC_CERT))
or (sync_status != consts.SYNC_STATUS_IN_SYNC)):
# update a single subcloud
try:
self._update_online_managed_subcloud(context,
subcloud.id,
endpoint_type,
sync_status,
alarmable,
ignore_endpoints)
except Exception as e:
LOG.exception(e)
raise e
else:
LOG.info("Ignoring subcloud sync_status update for subcloud:%s "
"availability:%s management:%s endpoint:%s sync:%s" %
(subcloud_name, subcloud.availability_status,
subcloud.management_state, endpoint_type, sync_status))
def update_subcloud_endpoint_status(
self, context,
subcloud_name=None,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
alarmable=True,
ignore_endpoints=None):
"""Update subcloud endpoint status
:param context: request context object
:param subcloud_name: name of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
:param alarmable: controls raising an alarm if applicable
:param ignore_endpoints: list of endpoints to ignore (only used if
endpoint_type is None)
"""
if ignore_endpoints is None:
ignore_endpoints = []
if subcloud_name:
self._update_subcloud_endpoint_status(
context, subcloud_name, endpoint_type, sync_status, alarmable,
ignore_endpoints)
else:
# update all subclouds
for subcloud in db_api.subcloud_get_all(context):
self._update_subcloud_endpoint_status(
context, subcloud.name, endpoint_type, sync_status,
alarmable, ignore_endpoints)
def _update_subcloud_state(self, context, subcloud_name,
management_state, availability_status):
try:
self.dcorch_rpc_client.update_subcloud_states(
context, subcloud_name, management_state, availability_status)
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
'availability:%s' %
(subcloud_name,
management_state,
availability_status))
except Exception:
LOG.exception('Problem informing dcorch of subcloud state change,'
'subcloud: %s' % subcloud_name)
def _raise_or_clear_subcloud_status_alarm(self, subcloud_name,
availability_status):
entity_instance_id = "subcloud=%s" % subcloud_name
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
entity_instance_id)
if fault and (availability_status == consts.AVAILABILITY_ONLINE):
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
entity_instance_id)
except Exception:
LOG.exception("Failed to clear offline alarm for subcloud: %s",
subcloud_name)
elif not fault and \
(availability_status == consts.AVAILABILITY_OFFLINE):
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_CRITICAL,
reason_text=('%s is offline' % subcloud_name),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_29,
proposed_repair_action="Wait for subcloud to "
"become online; if "
"problem persists contact "
"next level of support.",
service_affecting=True)
self.fm_api.set_fault(fault)
except Exception:
LOG.exception("Failed to raise offline alarm for subcloud: %s",
subcloud_name)
def update_subcloud_availability(self, context, subcloud_name,
availability_status,
update_state_only=False,
audit_fail_count=None):
try:
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
except Exception:
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
raise
if update_state_only:
# Nothing has changed, but we want to send a state update for this
# subcloud as an audit. Get the most up-to-date data.
self._update_subcloud_state(context, subcloud_name,
subcloud.management_state,
availability_status)
elif availability_status is None:
# only update the audit fail count
try:
db_api.subcloud_update(self.context, subcloud.id,
audit_fail_count=audit_fail_count)
except exceptions.SubcloudNotFound:
# slim possibility subcloud could have been deleted since
# we found it in db, ignore this benign error.
LOG.info('Ignoring SubcloudNotFound when attempting '
'audit_fail_count update: %s' % subcloud_name)
return
else:
self._raise_or_clear_subcloud_status_alarm(subcloud_name,
availability_status)
if availability_status == consts.AVAILABILITY_OFFLINE:
# Subcloud is going offline, set all endpoint statuses to
# unknown.
self._update_subcloud_endpoint_status(
context, subcloud_name, endpoint_type=None,
sync_status=consts.SYNC_STATUS_UNKNOWN)
try:
updated_subcloud = db_api.subcloud_update(
context,
subcloud.id,
availability_status=availability_status,
audit_fail_count=audit_fail_count)
except exceptions.SubcloudNotFound:
# slim possibility subcloud could have been deleted since
# we found it in db, ignore this benign error.
LOG.info('Ignoring SubcloudNotFound when attempting state'
' update: %s' % subcloud_name)
return
if availability_status == consts.AVAILABILITY_ONLINE:
# Subcloud is going online
# Tell cert-mon to audit endpoint certificate.
LOG.info('Request for online audit for %s' % subcloud_name)
dc_notification = rpc_client.DCManagerNotifications()
dc_notification.subcloud_online(context, subcloud_name)
# Trigger all the audits for the subcloud so it can update the
# sync status ASAP.
self.audit_rpc_client.trigger_subcloud_audits(context,
subcloud.id)
# Send dcorch a state update
self._update_subcloud_state(context, subcloud_name,
updated_subcloud.management_state,
availability_status)
def update_subcloud_sync_endpoint_type(self, context,
subcloud_name,
endpoint_type_list,
openstack_installed):
operation = 'add' if openstack_installed else 'remove'
func_switcher = {
'add': (
self.dcorch_rpc_client.add_subcloud_sync_endpoint_type,
db_api.subcloud_status_create
),
'remove': (
self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type,
db_api.subcloud_status_delete
)
}
try:
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
except Exception:
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
raise
try:
# Notify dcorch to add/remove sync endpoint type list
func_switcher[operation][0](self.context, subcloud_name,
endpoint_type_list)
LOG.info('Notifying dcorch, subcloud: %s new sync endpoint: %s' %
(subcloud_name, endpoint_type_list))
# Update subcloud status table by adding/removing openstack sync
# endpoint types
for endpoint_type in endpoint_type_list:
func_switcher[operation][1](self.context, subcloud.id,
endpoint_type)
# Update openstack_installed of subcloud table
db_api.subcloud_update(self.context, subcloud.id,
openstack_installed=openstack_installed)
except Exception:
LOG.exception('Problem informing dcorch of subcloud sync endpoint'
' type change, subcloud: %s' % subcloud_name)
def handle_subcloud_operations_in_progress(self):
"""Identify subclouds in transitory stages and update subcloud deploy
state to failure.
"""
LOG.info('Identifying subclouds in transitory stages.')
# identify subclouds in transitory states
subclouds_in_transitory_states = [subcloud for subcloud in
db_api.subcloud_get_all(self.context)
if subcloud.deploy_status in TRANSITORY_STATES.keys()]
# update the deploy_state to the corresponding failure state
for subcloud in subclouds_in_transitory_states:
new_deploy_status = TRANSITORY_STATES[subcloud.deploy_status]
LOG.info("Changing subcloud %s deploy status from %s to %s."
% (subcloud.name, subcloud.deploy_status, new_deploy_status))
db_api.subcloud_update(
self.context,
subcloud.id,
deploy_status=new_deploy_status)