Refactor portion of dcmanager into dcmanager-state process

This change breaks down the monolithic 'dcmanager' process into a
separate state-handling multi-process architecture. Subcloud state
management is handled by the new state processes. This is a performance
optimization, increasing the throughput of subcloud state change
operations during dcmanager audits. It also optimizes performance
handling of state updates from dcorch.

Refactoring. These top-level methods are moved from dcmanager
SubcloudManager to SubcloudStateManager (along with associated
RPC/service APIs above):
- update_subcloud_endpoint_status
- update_subcloud_availability
- plus internal methods, including the subcloud-level synchronization
  block

New service: DCManagerStateService()
- distributedcloud/dcmanager/state/service.py
    - overriding max_pool_size, max_overflow values from
      /etc/dcmanager/dcmanager.conf

New state manager: SubcloudStateManager()
- distributedcloud/dcmanager/state/subcloud_state_manager.py

New RPC client: SubcloudStateClient()
- Added to existing distributedcloud/dcmanager/rpc/client.py

New RPC topic:
- TOPIC_DC_MANAGER_STATE = "dcmanager-state"

SM service changes:
- distributedcloud/dcmanager/cmd/state.py
- distributedcloud/ocf/dcmanager-state

Other notable changes (from testing):
- Cleanup: unused ThreadGroupManager removed from dcmananager service.py
- generic_sync_manager: Add eventlet yield during subcloud processing
  during initialization
- dcorch: fix exceptions on shutdown due to race on threadgroup shutdown
- dcorch: log service startup with worker config

Test cases:
- update test cases where necessary to used the newly refactored code
- since there is no new logic, no additional tests are required

Test Plan:
PASS:
- Test various audit scenarios in small virtualized environment
- Test audit scenarios in lab setting with a large number of subclouds
    - subclouds going offline/online (including flooding)
    - dead office recovery
    - swact
    - system soak
- Validate dcmanager-state service lifecycle and dependencies

Story: 2009725
Task: 44317

Change-Id: I2c9a0f84e8cf638632ca319545e9e93e6f43f263
Signed-off-by: Kyle MacLeod <kyle.macleod@windriver.com>
This commit is contained in:
Kyle MacLeod 2022-01-17 17:10:36 -05:00
parent d50ad87fe9
commit fc5afdacf2
36 changed files with 1368 additions and 688 deletions

View File

@ -134,6 +134,7 @@ install -d -m 755 %{buildroot}%{_tmpfilesdir}
install -d -m 755 %{buildroot}/var/log/dcmanager
install -d -m 755 %{buildroot}/var/cache/dcmanager
install -d -m 755 %{buildroot}%{_sysconfdir}/dcmanager/
# TODO(kmacleod) Remove systemd unit files, they are not used:
# install systemd unit files
install -p -D -m 644 %{SOURCE1} %{buildroot}%{_unitdir}/dcmanager-api.service
install -p -D -m 644 %{SOURCE2} %{buildroot}%{_unitdir}/dcmanager-manager.service
@ -151,6 +152,7 @@ install -p -D -m 644 %{SOURCE16} %{buildroot}%{_sysconfdir}/logrotate.d/distclou
install -d -m 755 %{buildroot}/var/log/dcorch
install -d -m 755 %{buildroot}/var/cache/dcorch
install -d -m 755 %{buildroot}%{_sysconfdir}/dcorch/
# TODO(kmacleod) Remove systemd unit files, they are not used:
# install systemd unit files
install -p -D -m 644 %{SOURCE3} %{buildroot}%{_unitdir}/dcorch-api.service
install -p -D -m 644 %{SOURCE4} %{buildroot}%{_unitdir}/dcorch-engine.service
@ -208,6 +210,7 @@ install -m 755 -D -p %{SOURCE12} %{buildroot}/%{_bindir}/clean-dcorch
%{_bindir}/dcmanager-manager
%{_unitdir}/dcmanager-manager.service
%{_bindir}/dcmanager-manage
%{_bindir}/dcmanager-state
%{_tmpfilesdir}/dcmanager.conf
%dir %attr(0755,root,root) %{_localstatedir}/log/dcmanager
%dir %attr(0755,root,root) %{_localstatedir}/cache/dcmanager

View File

@ -1,6 +1,7 @@
# Distributed Cloud Log destination
destination d_dcmanager { file("/var/log/dcmanager/dcmanager.log" template(t_preformatted)); };
destination d_dcmanager_audit { file("/var/log/dcmanager/audit.log" template(t_preformatted)); };
destination d_dcmanager_state { file("/var/log/dcmanager/state.log" template(t_preformatted)); };
destination d_dcmanager_orch { file("/var/log/dcmanager/orchestrator.log" template(t_preformatted)); };
destination d_dcorch { file("/var/log/dcorch/dcorch.log" template(t_preformatted)); };
destination d_dcdbsync { file("/var/log/dcdbsync/dcdbsync.log" template(t_preformatted)); };
@ -9,6 +10,7 @@ destination d_dcdbsync_openstack { file("/var/log/dcdbsync/dcdbsync_openstack.lo
# Distributed Cloud Log Filters
filter f_dcmanagermanager { facility(local4) and program(dcmanager-manager); };
filter f_dcmanageraudit { facility(local4) and program(dcmanager-audit); };
filter f_dcmanagerstate { facility(local4) and program(dcmanager-state); };
filter f_dcmanagerorchestrator { facility(local4) and program(dcmanager-orchestrator); };
filter f_dcmanagerapi { facility(local4) and program(dcmanager-api); };
@ -21,6 +23,7 @@ filter f_dcdbsyncopenstackapi { facility(local4) and program(dcdbsync-api); }
# Distributed Cloud Log Path
log {source(s_src); filter(f_dcmanagermanager); destination(d_dcmanager); };
log {source(s_src); filter(f_dcmanageraudit); destination(d_dcmanager_audit); };
log {source(s_src); filter(f_dcmanagerstate); destination(d_dcmanager_state); };
log {source(s_src); filter(f_dcmanagerorchestrator); destination(d_dcmanager_orch); };
log {source(s_src); filter(f_dcmanagerapi); destination(d_dcmanager); };
log {source(s_src); filter(f_dcorchengine); destination(d_dcorch); };

View File

@ -115,7 +115,8 @@ class SubcloudsController(object):
def __init__(self):
super(SubcloudsController, self).__init__()
self.rpc_client = rpc_client.ManagerClient()
self.dcmanager_rpc_client = rpc_client.ManagerClient()
self.dcmanager_state_rpc_client = rpc_client.SubcloudStateClient()
# to do the version compatibility for future purpose
def _determine_version_cap(self, target):
@ -988,7 +989,7 @@ class SubcloudsController(object):
subcloud = self._add_subcloud_to_database(context, payload)
# Ask dcmanager-manager to add the subcloud.
# It will do all the real work...
self.rpc_client.add_subcloud(context, payload)
self.dcmanager_rpc_client.add_subcloud(context, payload)
return db_api.subcloud_db_model_to_dict(subcloud)
except RemoteError as e:
pecan.abort(422, e.value)
@ -1077,9 +1078,9 @@ class SubcloudsController(object):
data_install = json.dumps(payload[INSTALL_VALUES])
try:
# Inform dcmanager-manager that subcloud has been updated.
# Inform dcmanager that subcloud has been updated.
# It will do all the real work...
subcloud = self.rpc_client.update_subcloud(
subcloud = self.dcmanager_rpc_client.update_subcloud(
context, subcloud_id, management_state=management_state,
description=description, location=location, group_id=group_id,
data_install=data_install, force=force_flag)
@ -1115,8 +1116,8 @@ class SubcloudsController(object):
pecan.abort(400, msg)
try:
subcloud = self.rpc_client.reconfigure_subcloud(context, subcloud_id,
payload)
subcloud = self.dcmanager_rpc_client.reconfigure_subcloud(
context, subcloud_id, payload)
return subcloud
except RemoteError as e:
pecan.abort(422, e.value)
@ -1269,7 +1270,7 @@ class SubcloudsController(object):
deploy_status=consts.DEPLOY_STATE_PRE_INSTALL,
data_install=data_install)
self.rpc_client.reinstall_subcloud(
self.dcmanager_rpc_client.reinstall_subcloud(
context, subcloud_id, payload)
return db_api.subcloud_db_model_to_dict(subcloud)
@ -1348,8 +1349,9 @@ class SubcloudsController(object):
pecan.abort(400, msg)
try:
self.rpc_client.restore_subcloud(context, subcloud_id,
payload)
self.dcmanager_rpc_client.restore_subcloud(context,
subcloud_id,
payload)
# Return deploy_status as pre-restore
subcloud.deploy_status = consts.DEPLOY_STATE_PRE_RESTORE
return db_api.subcloud_db_model_to_dict(subcloud)
@ -1391,7 +1393,8 @@ class SubcloudsController(object):
try:
# Ask dcmanager-manager to delete the subcloud.
# It will do all the real work...
return self.rpc_client.delete_subcloud(context, subcloud_id)
return self.dcmanager_rpc_client.delete_subcloud(context,
subcloud_id)
except RemoteError as e:
pecan.abort(422, e.value)
except Exception as e:
@ -1429,7 +1432,7 @@ class SubcloudsController(object):
LOG.info('update %s set %s=%s' % (subcloud_name, endpoint, status))
context = restcomm.extract_context_from_environ()
self.rpc_client.update_subcloud_endpoint_status(
self.dcmanager_state_rpc_client.update_subcloud_endpoint_status(
context, subcloud_name, endpoint, status)
result = {'result': 'OK'}

View File

@ -15,14 +15,14 @@ class Auditor(object):
# todo(abailey): determine if add_metaclass is still required
six.add_metaclass(abc.ABCMeta)
def __init__(self, context, dcmanager_rpc_client, endpoint_type):
def __init__(self, context, dcmanager_state_rpc_client, endpoint_type):
self.context = context
self.dcmanager_rpc_client = dcmanager_rpc_client
self.state_rpc_client = dcmanager_state_rpc_client
self.endpoint_type = endpoint_type
def _set_subcloud_sync_status(self, sc_name, sc_sync_status):
"""Update the sync status for endpoint."""
self.dcmanager_rpc_client.update_subcloud_endpoint_status(
self.state_rpc_client.update_subcloud_endpoint_status(
self.context,
subcloud_name=sc_name,
endpoint_type=self.endpoint_type,

View File

@ -70,15 +70,15 @@ class FirmwareAuditData(object):
class FirmwareAudit(object):
"""Manages tasks related to firmware audits."""
def __init__(self, context, dcmanager_rpc_client):
def __init__(self, context, dcmanager_state_rpc_client):
LOG.debug('FirmwareAudit initialization...')
self.context = context
self.dcmanager_rpc_client = dcmanager_rpc_client
self.state_rpc_client = dcmanager_state_rpc_client
self.audit_count = 0
def _update_subcloud_sync_status(self, sc_name, sc_endpoint_type,
sc_status):
self.dcmanager_rpc_client.update_subcloud_endpoint_status(
self.state_rpc_client.update_subcloud_endpoint_status(
self.context,
subcloud_name=sc_name,
endpoint_type=sc_endpoint_type,

View File

@ -27,10 +27,10 @@ MONITORED_ALARM_ENTITIES = ['system.certificate.kubernetes-root-ca', ]
class KubeRootcaUpdateAudit(Auditor):
"""Manages tasks related to kube rootca update audits."""
def __init__(self, context, dcmanager_rpc_client):
def __init__(self, context, dcmanager_state_rpc_client):
super(KubeRootcaUpdateAudit, self).__init__(
context,
dcmanager_rpc_client,
dcmanager_state_rpc_client,
dcorch_consts.ENDPOINT_TYPE_KUBE_ROOTCA
)
self.audit_type = "kube rootca update"

View File

@ -52,15 +52,15 @@ class KubernetesAuditData(object):
class KubernetesAudit(object):
"""Manages tasks related to kubernetes audits."""
def __init__(self, context, dcmanager_rpc_client):
def __init__(self, context, dcmanager_state_rpc_client):
LOG.debug('KubernetesAudit initialization...')
self.context = context
self.dcmanager_rpc_client = dcmanager_rpc_client
self.state_rpc_client = dcmanager_state_rpc_client
self.audit_count = 0
def _update_subcloud_sync_status(self, sc_name, sc_endpoint_type,
sc_status):
self.dcmanager_rpc_client.update_subcloud_endpoint_status(
self.state_rpc_client.update_subcloud_endpoint_status(
self.context,
subcloud_name=sc_name,
endpoint_type=sc_endpoint_type,

View File

@ -58,15 +58,15 @@ class PatchAuditData(object):
class PatchAudit(object):
"""Manages tasks related to patch audits."""
def __init__(self, context, dcmanager_rpc_client):
def __init__(self, context, dcmanager_state_rpc_client):
LOG.debug('PatchAudit initialization...')
self.context = context
self.dcmanager_rpc_client = dcmanager_rpc_client
self.state_rpc_client = dcmanager_state_rpc_client
self.audit_count = 0
def _update_subcloud_sync_status(self, sc_name, sc_endpoint_type,
sc_status):
self.dcmanager_rpc_client.update_subcloud_endpoint_status(
self.state_rpc_client.update_subcloud_endpoint_status(
self.context,
subcloud_name=sc_name,
endpoint_type=sc_endpoint_type,

View File

@ -66,14 +66,15 @@ class DCManagerAuditService(service.Service):
self.subcloud_audit_manager = None
def start(self):
self.init_tgm()
self.init_audit_managers()
target = oslo_messaging.Target(version=self.rpc_api_version,
server=self.host,
topic=self.topic)
self.target = target
self._rpc_server = rpc_messaging.get_rpc_server(self.target, self)
self._rpc_server.start()
self.init_tgm()
self.init_audit_managers()
super(DCManagerAuditService, self).start()
def init_tgm(self):
@ -99,7 +100,8 @@ class DCManagerAuditService(service.Service):
def stop(self):
self._stop_rpc_server()
self.TG.stop()
if self.TG:
self.TG.stop()
# Terminate the engine process
LOG.info("All threads were gone, terminating engine")
@ -228,7 +230,8 @@ class DCManagerAuditWorkerService(service.Service):
def stop(self):
self._stop_rpc_server()
self.TG.stop()
if self.TG:
self.TG.stop()
# Terminate the engine process
LOG.info("All threads were gone, terminating audit-worker engine")

View File

@ -58,6 +58,7 @@ class SubcloudAuditWorkerManager(manager.Manager):
service_name="subcloud_audit_worker_manager")
self.context = context.get_admin_context()
self.dcmanager_rpc_client = dcmanager_rpc_client.ManagerClient()
self.state_rpc_client = dcmanager_rpc_client.SubcloudStateClient()
# Keeps track of greenthreads we create to do work.
self.thread_group_manager = scheduler.ThreadGroupManager(
thread_pool_size=100)
@ -66,15 +67,15 @@ class SubcloudAuditWorkerManager(manager.Manager):
self.alarm_aggr = alarm_aggregation.AlarmAggregation(self.context)
# todo(abailey): refactor the design pattern for adding new audits
self.patch_audit = patch_audit.PatchAudit(
self.context, self.dcmanager_rpc_client)
self.context, self.state_rpc_client)
self.firmware_audit = firmware_audit.FirmwareAudit(
self.context, self.dcmanager_rpc_client)
self.context, self.state_rpc_client)
self.kubernetes_audit = kubernetes_audit.KubernetesAudit(
self.context, self.dcmanager_rpc_client)
self.context, self.state_rpc_client)
self.kube_rootca_update_audit = \
kube_rootca_update_audit.KubeRootcaUpdateAudit(
self.context,
self.dcmanager_rpc_client)
self.state_rpc_client)
self.pid = os.getpid()
def audit_subclouds(self,
@ -184,14 +185,14 @@ class SubcloudAuditWorkerManager(manager.Manager):
update_state_only=False,
audit_fail_count=None):
try:
self.dcmanager_rpc_client.update_subcloud_availability(
self.state_rpc_client.update_subcloud_availability(
self.context, subcloud_name, availability_status,
update_state_only, audit_fail_count)
LOG.info('Notifying dcmanager, subcloud:%s, availability:%s' %
LOG.info('Notifying dcmanager-state, subcloud:%s, availability:%s' %
(subcloud_name,
availability_status))
except Exception:
LOG.exception('Problem informing dcmanager of subcloud '
LOG.exception('Problem informing dcmanager-state of subcloud '
'availability state change, subcloud: %s'
% subcloud_name)

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
"""
DC Manager State Engine Server.
"""
import eventlet
eventlet.monkey_patch()
from oslo_config import cfg
from oslo_i18n import _lazy
from oslo_log import log as logging
from oslo_service import service
from dcmanager.common import config
from dcmanager.common import messaging
from dcorch.common import messaging as dcorch_messaging
_lazy.enable_lazy()
config.register_options()
config.register_keystone_options()
LOG = logging.getLogger('dcmanager.state')
def main():
logging.register_options(cfg.CONF)
cfg.CONF(project='dcmanager', prog='dcmanager-state')
logging.setup(cfg.CONF, 'dcmanager-state')
logging.set_defaults()
messaging.setup()
dcorch_messaging.setup()
from dcmanager.state import service as state
# Override values from /etc/dcmanager/dcmanager.conf specific
# to dcmanager-state:
cfg.CONF.set_override('max_pool_size', 10, group='database')
cfg.CONF.set_override('max_overflow', 100, group='database')
LOG.info("Starting...")
LOG.debug("Configuration:")
cfg.CONF.log_opt_values(LOG, logging.DEBUG)
LOG.info("Launching service, host=%s, state_workers=%s ...",
cfg.CONF.host, cfg.CONF.state_workers)
srv = state.DCManagerStateService(cfg.CONF.host)
launcher = service.launch(cfg.CONF, srv, workers=cfg.CONF.state_workers)
launcher.wait()
if __name__ == '__main__':
main()

View File

@ -147,10 +147,15 @@ scheduler_opts = [
]
common_opts = [
# TODO(kmacleod): these worker parameters should be added to puppet
# in order that they make it into /etc/dcmanager/dcmanager.conf
# for system engineering purposes
cfg.IntOpt('workers', default=1,
help='number of workers'),
cfg.IntOpt('orch_workers', default=1,
help='number of orchestrator workers'),
cfg.IntOpt('state_workers', default=4,
help='number of state workers'),
cfg.IntOpt('audit_workers', default=1,
help='number of audit workers'),
cfg.IntOpt('audit_worker_workers', default=4,

View File

@ -16,11 +16,9 @@
RPC_API_VERSION = "1.0"
TOPIC_DC_MANAGER = "dcmanager"
TOPIC_DC_MANAGER_STATE = "dcmanager-state"
TOPIC_DC_MANAGER_AUDIT = "dcmanager-audit"
TOPIC_DC_MANAGER_AUDIT_WORKER = "dcmanager-audit-worker"
TOPIC_DC_MANAGER_ORCHESTRATOR = "dcmanager-orchestrator"
CERTS_VAULT_DIR = "/opt/dc-vault/certs"

View File

@ -22,15 +22,12 @@ import oslo_messaging
from oslo_service import service
from oslo_utils import uuidutils
from dcorch.common import consts as dcorch_consts
from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
from dcmanager.common import consts
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import messaging as rpc_messaging
from dcmanager.common import scheduler
from dcmanager.manager.subcloud_manager import SubcloudManager
CONF = cfg.CONF
@ -69,21 +66,16 @@ class DCManagerService(service.Service):
# The following are initialized here, but assigned in start() which
# happens after the fork when spawning multiple worker processes
self.engine_id = None
self.TG = None
self.target = None
self._rpc_server = None
self.subcloud_manager = None
self.audit_rpc_client = None
def init_tgm(self):
self.TG = scheduler.ThreadGroupManager()
def init_managers(self):
self.subcloud_manager = SubcloudManager()
def start(self):
self.dcmanager_id = uuidutils.generate_uuid()
self.init_tgm()
self.init_managers()
target = oslo_messaging.Target(version=self.rpc_api_version,
server=self.host,
@ -151,60 +143,6 @@ class DCManagerService(service.Service):
subcloud_id,
payload)
@request_context
def update_subcloud_endpoint_status(self, context, subcloud_name=None,
endpoint_type=None,
sync_status=consts.
SYNC_STATUS_OUT_OF_SYNC,
alarmable=True):
# Updates subcloud endpoint sync status
LOG.info("Handling update_subcloud_endpoint_status request for "
"subcloud: (%s) endpoint: (%s) status:(%s) "
% (subcloud_name, endpoint_type, sync_status))
self.subcloud_manager. \
update_subcloud_endpoint_status(context,
subcloud_name,
endpoint_type,
sync_status,
alarmable)
# If the patching sync status is being set to unknown, trigger the
# patching audit so it can update the sync status ASAP.
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_PATCHING and \
sync_status == consts.SYNC_STATUS_UNKNOWN:
self.audit_rpc_client.trigger_patch_audit(context)
# If the firmware sync status is being set to unknown, trigger the
# firmware audit so it can update the sync status ASAP.
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_FIRMWARE and \
sync_status == consts.SYNC_STATUS_UNKNOWN:
self.audit_rpc_client.trigger_firmware_audit(context)
# If the kubernetes sync status is being set to unknown, trigger the
# kubernetes audit so it can update the sync status ASAP.
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_KUBERNETES and \
sync_status == consts.SYNC_STATUS_UNKNOWN:
self.audit_rpc_client.trigger_kubernetes_audit(context)
return
@request_context
def update_subcloud_availability(self, context,
subcloud_name,
availability_status,
update_state_only=False,
audit_fail_count=None):
# Updates subcloud availability
LOG.info("Handling update_subcloud_availability request for: %s" %
subcloud_name)
self.subcloud_manager.update_subcloud_availability(
context,
subcloud_name,
availability_status,
update_state_only,
audit_fail_count)
@request_context
def update_subcloud_sync_endpoint_type(self, context, subcloud_name,
endpoint_type_list,
@ -228,9 +166,6 @@ class DCManagerService(service.Service):
def stop(self):
self._stop_rpc_server()
self.TG.stop()
# Terminate the engine process
LOG.info("All threads were gone, terminating engine")
super(DCManagerService, self).stop()

View File

@ -48,9 +48,8 @@ from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import manager
from dcmanager.common import utils
from dcmanager.rpc import client as rpc_client
from dcmanager.db import api as db_api
from dcmanager.rpc import client as dcmanager_rpc_client
from fm_api import constants as fm_const
from fm_api import fm_api
@ -102,23 +101,6 @@ TRANSITORY_STATES = {consts.DEPLOY_STATE_NONE: consts.DEPLOY_STATE_DEPLOY_PREP_F
}
def sync_update_subcloud_endpoint_status(func):
"""Synchronized lock decorator for _update_subcloud_endpoint_status. """
def _get_lock_and_call(*args, **kwargs):
"""Get a single fair lock per subcloud based on subcloud name. """
# subcloud name is the 3rd argument to
# _update_subcloud_endpoint_status()
@utils.synchronized(args[2], external=False, fair=True)
def _call_func(*args, **kwargs):
return func(*args, **kwargs)
return _call_func(*args, **kwargs)
return _get_lock_and_call
class SubcloudManager(manager.Manager):
"""Manages tasks related to subclouds."""
@ -131,6 +113,7 @@ class SubcloudManager(manager.Manager):
self.dcorch_rpc_client = dcorch_rpc_client.EngineClient()
self.fm_api = fm_api.FaultAPIs()
self.audit_rpc_client = dcmanager_audit_rpc_client.ManagerAuditClient()
self.state_rpc_client = dcmanager_rpc_client.SubcloudStateClient()
@staticmethod
def _get_subcloud_cert_name(subcloud_name):
@ -1240,11 +1223,10 @@ class SubcloudManager(manager.Manager):
location=location)
if management_state == consts.MANAGEMENT_UNMANAGED:
# set all endpoint statuses to unknown, except the dc-cert
# endpoint which continues to be audited for unmanaged
# subclouds
self.update_subcloud_endpoint_status(
self.state_rpc_client.update_subcloud_endpoint_status_sync(
context,
subcloud_name=subcloud.name,
endpoint_type=None,
@ -1254,7 +1236,7 @@ class SubcloudManager(manager.Manager):
# Subcloud is managed
# Tell cert-mon to audit endpoint certificate
LOG.info('Request for managed audit for %s' % subcloud.name)
dc_notification = rpc_client.DCManagerNotifications()
dc_notification = dcmanager_rpc_client.DCManagerNotifications()
dc_notification.subcloud_managed(context, subcloud.name)
# Since sysinv user is sync'ed during bootstrap, trigger the
# related audits. Patch and load audits are delayed until the
@ -1266,417 +1248,6 @@ class SubcloudManager(manager.Manager):
return db_api.subcloud_db_model_to_dict(subcloud)
def _do_update_subcloud_endpoint_status(self, context, subcloud_id,
endpoint_type, sync_status,
alarmable, ignore_endpoints=None):
"""Update online/managed subcloud endpoint status
:param context: request context object
:param subcloud_id: id of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
:param alarmable: controls raising an alarm if applicable
:param ignore_endpoints: list of endpoints to ignore (only used if
endpoint_type is None)
"""
if ignore_endpoints is None:
ignore_endpoints = []
subcloud_status_list = []
subcloud = None
original_identity_status = None
# retrieve the info from the db for this subcloud.
# subcloud_id should not be None
try:
for subcloud, subcloud_status in db_api. \
subcloud_get_with_status(context, subcloud_id):
if subcloud_status:
subcloud_status_list.append(
db_api.subcloud_endpoint_status_db_model_to_dict(
subcloud_status))
if subcloud_status.endpoint_type == \
dcorch_consts.ENDPOINT_TYPE_IDENTITY:
original_identity_status = subcloud_status.sync_status
except Exception as e:
LOG.exception(e)
raise e
if subcloud:
if endpoint_type:
# updating a single endpoint on a single subcloud
for subcloud_status in subcloud_status_list:
if subcloud_status['endpoint_type'] == endpoint_type:
if subcloud_status['sync_status'] == sync_status:
# No change in the sync_status
LOG.debug("Sync status (%s) for subcloud %s did "
"not change - ignore update" %
(sync_status, subcloud.name))
return
# We found the endpoint
break
else:
# We did not find the endpoint
raise exceptions.BadRequest(
resource='subcloud',
msg='Endpoint %s not found for subcloud' %
endpoint_type)
LOG.info("Updating subcloud:%s endpoint:%s sync:%s" %
(subcloud.name, endpoint_type, sync_status))
db_api.subcloud_status_update(context,
subcloud_id,
endpoint_type,
sync_status)
# Trigger subcloud patch and load audits for the subcloud after
# its identity endpoint turns to other status from unknown
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_IDENTITY \
and sync_status != consts.SYNC_STATUS_UNKNOWN \
and original_identity_status == consts.SYNC_STATUS_UNKNOWN:
LOG.debug('Request for patch and load audit for %s after updating '
'identity out of unknown' % subcloud.name)
self.audit_rpc_client.trigger_subcloud_patch_load_audits(
context, subcloud_id)
entity_instance_id = "subcloud=%s.resource=%s" % \
(subcloud.name, endpoint_type)
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
entity_instance_id)
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
and fault:
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
entity_instance_id)
except Exception as e:
LOG.exception(e)
elif not fault and alarmable and \
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=entity_type_id,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
reason_text=("%s %s sync_status is "
"out-of-sync" %
(subcloud.name, endpoint_type)),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
proposed_repair_action="If problem persists "
"contact next level "
"of support",
service_affecting=False)
self.fm_api.set_fault(fault)
except Exception as e:
LOG.exception(e)
else:
# update all endpoints on this subcloud
LOG.info("Updating all endpoints on subcloud: %s sync: %s "
"ignore_endpoints: %s" %
(subcloud.name, sync_status, ignore_endpoints))
# TODO(yuxing): The following code can be further optimized when
# batch alarm clearance APIs are available, so we don't need to
# loop over all the endpoints of a given subcloud, e.g.
# if not ignore_endpoints:
# db_api.subcloud_status_update_endpoints_all(...)
# else:
# db_api.subcloud_status_update_endpoints(...)
endpoint_to_update_list = []
for entry in subcloud_status_list:
endpoint = entry[consts.ENDPOINT_TYPE]
if endpoint in ignore_endpoints:
# Do not update this endpoint
continue
endpoint_to_update_list.append(endpoint)
entity_instance_id = "subcloud=%s.resource=%s" % \
(subcloud.name, endpoint)
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
entity_instance_id)
# TODO(yuxing): batch clear all the out-of-sync alarms of a
# given subcloud if fm_api support it. Be careful with the
# dc-cert endpoint when adding the above; the endpoint
# alarm must remain for offline subclouds.
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
and fault:
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
entity_instance_id)
except Exception as e:
LOG.exception(e)
elif not fault and alarmable and \
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=entity_type_id,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
reason_text=("%s %s sync_status is "
"out-of-sync" %
(subcloud.name, endpoint)),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
proposed_repair_action="If problem persists "
"contact next level "
"of support",
service_affecting=False)
self.fm_api.set_fault(fault)
except Exception as e:
LOG.exception(e)
if endpoint_to_update_list:
try:
db_api.subcloud_status_update_endpoints(context, subcloud_id,
endpoint_to_update_list,
sync_status)
except Exception as e:
LOG.exception(e)
else:
LOG.error("Subcloud not found:%s" % subcloud_id)
@sync_update_subcloud_endpoint_status
def _update_subcloud_endpoint_status(
self, context,
subcloud_name,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
alarmable=True,
ignore_endpoints=None):
"""Update subcloud endpoint status
:param context: request context object
:param subcloud_name: name of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
:param alarmable: controls raising an alarm if applicable
:param ignore_endpoints: list of endpoints to ignore (only used if
endpoint_type is None)
"""
if ignore_endpoints is None:
ignore_endpoints = []
if not subcloud_name:
raise exceptions.BadRequest(
resource='subcloud',
msg='Subcloud name not provided')
try:
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
except Exception as e:
LOG.exception(e)
raise e
# Rules for updating sync status:
#
# Always update if not in-sync.
#
# Otherwise, only update the sync status if managed and online
# (unless dc-cert).
#
# Most endpoints are audited only when the subcloud is managed and
# online. An exception is the dc-cert endpoint, which is audited
# whenever the subcloud is online (managed or unmanaged).
#
# This means if a subcloud is going offline or unmanaged, then
# the sync status update must be done first.
#
if (sync_status != consts.SYNC_STATUS_IN_SYNC or
((subcloud.availability_status == consts.AVAILABILITY_ONLINE) and
(subcloud.management_state == consts.MANAGEMENT_MANAGED
or endpoint_type == dcorch_consts.ENDPOINT_TYPE_DC_CERT))):
# update a single subcloud
try:
self._do_update_subcloud_endpoint_status(context,
subcloud.id,
endpoint_type,
sync_status,
alarmable,
ignore_endpoints)
except Exception as e:
LOG.exception(e)
raise e
else:
LOG.info("Ignoring subcloud sync_status update for subcloud:%s "
"availability:%s management:%s endpoint:%s sync:%s" %
(subcloud_name, subcloud.availability_status,
subcloud.management_state, endpoint_type, sync_status))
def update_subcloud_endpoint_status(
self, context,
subcloud_name=None,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
alarmable=True,
ignore_endpoints=None):
"""Update subcloud endpoint status
:param context: request context object
:param subcloud_name: name of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
:param alarmable: controls raising an alarm if applicable
:param ignore_endpoints: list of endpoints to ignore (only used if
endpoint_type is None)
"""
if ignore_endpoints is None:
ignore_endpoints = []
if subcloud_name:
self._update_subcloud_endpoint_status(
context, subcloud_name, endpoint_type, sync_status, alarmable,
ignore_endpoints)
else:
# update all subclouds
for subcloud in db_api.subcloud_get_all(context):
self._update_subcloud_endpoint_status(
context, subcloud.name, endpoint_type, sync_status,
alarmable, ignore_endpoints)
def _update_subcloud_state(self, context, subcloud_name,
management_state, availability_status):
try:
self.dcorch_rpc_client.update_subcloud_states(
context, subcloud_name, management_state, availability_status)
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
'availability:%s' %
(subcloud_name,
management_state,
availability_status))
except Exception:
LOG.exception('Problem informing dcorch of subcloud state change,'
'subcloud: %s' % subcloud_name)
def _raise_or_clear_subcloud_status_alarm(self, subcloud_name,
availability_status):
entity_instance_id = "subcloud=%s" % subcloud_name
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
entity_instance_id)
if fault and (availability_status == consts.AVAILABILITY_ONLINE):
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
entity_instance_id)
except Exception:
LOG.exception("Failed to clear offline alarm for subcloud: %s",
subcloud_name)
elif not fault and \
(availability_status == consts.AVAILABILITY_OFFLINE):
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_CRITICAL,
reason_text=('%s is offline' % subcloud_name),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_29,
proposed_repair_action="Wait for subcloud to "
"become online; if "
"problem persists contact "
"next level of support.",
service_affecting=True)
self.fm_api.set_fault(fault)
except Exception:
LOG.exception("Failed to raise offline alarm for subcloud: %s",
subcloud_name)
def update_subcloud_availability(self, context, subcloud_name,
availability_status,
update_state_only=False,
audit_fail_count=None):
try:
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
except Exception:
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
raise
if update_state_only:
# Nothing has changed, but we want to send a state update for this
# subcloud as an audit. Get the most up-to-date data.
self._update_subcloud_state(context, subcloud_name,
subcloud.management_state,
availability_status)
elif availability_status is None:
# only update the audit fail count
try:
db_api.subcloud_update(self.context, subcloud.id,
audit_fail_count=audit_fail_count)
except exceptions.SubcloudNotFound:
# slim possibility subcloud could have been deleted since
# we found it in db, ignore this benign error.
LOG.info('Ignoring SubcloudNotFound when attempting '
'audit_fail_count update: %s' % subcloud_name)
return
else:
self._raise_or_clear_subcloud_status_alarm(subcloud_name,
availability_status)
if availability_status == consts.AVAILABILITY_OFFLINE:
# Subcloud is going offline, set all endpoint statuses to
# unknown.
self._update_subcloud_endpoint_status(
context, subcloud_name, endpoint_type=None,
sync_status=consts.SYNC_STATUS_UNKNOWN)
try:
updated_subcloud = db_api.subcloud_update(
context,
subcloud.id,
availability_status=availability_status,
audit_fail_count=audit_fail_count)
except exceptions.SubcloudNotFound:
# slim possibility subcloud could have been deleted since
# we found it in db, ignore this benign error.
LOG.info('Ignoring SubcloudNotFound when attempting state'
' update: %s' % subcloud_name)
return
if availability_status == consts.AVAILABILITY_ONLINE:
# Subcloud is going online
# Tell cert-mon to audit endpoint certificate.
LOG.info('Request for online audit for %s' % subcloud_name)
dc_notification = rpc_client.DCManagerNotifications()
dc_notification.subcloud_online(context, subcloud_name)
# Trigger all the audits for the subcloud so it can update the
# sync status ASAP.
self.audit_rpc_client.trigger_subcloud_audits(context,
subcloud.id)
# Send dcorch a state update
self._update_subcloud_state(context, subcloud_name,
updated_subcloud.management_state,
availability_status)
def update_subcloud_sync_endpoint_type(self, context,
subcloud_name,
endpoint_type_list,
@ -1719,10 +1290,7 @@ class SubcloudManager(manager.Manager):
' type change, subcloud: %s' % subcloud_name)
def handle_subcloud_operations_in_progress(self):
"""Identify subclouds in transitory stages and update subcloud deploy
state to failure.
"""
"""Identify subclouds in transitory stages and update subcloud deploy state to failure."""
LOG.info('Identifying subclouds in transitory stages.')

View File

@ -32,9 +32,9 @@ class FinishingFwUpdateState(BaseState):
"Setting endpoint status of %s to %s"
% (dcorch_consts.ENDPOINT_TYPE_FIRMWARE,
consts.SYNC_STATUS_IN_SYNC))
rpc_client = dcmanager_rpc_client.ManagerClient()
dcmanager_state_rpc_client = dcmanager_rpc_client.SubcloudStateClient()
# The subcloud name is the same as the region in the strategy_step
rpc_client.update_subcloud_endpoint_status(
dcmanager_state_rpc_client.update_subcloud_endpoint_status(
self.context,
subcloud_name=self.get_region_name(strategy_step),
endpoint_type=dcorch_consts.ENDPOINT_TYPE_FIRMWARE,

View File

@ -56,6 +56,56 @@ class RPCClient(object):
return client.cast(ctxt, method, **kwargs)
class SubcloudStateClient(RPCClient):
"""Client to update subcloud availability."""
BASE_RPC_API_VERSION = '1.0'
def __init__(self):
super(SubcloudStateClient, self).__init__(
consts.TOPIC_DC_MANAGER_STATE,
self.BASE_RPC_API_VERSION)
def update_subcloud_availability(self, ctxt,
subcloud_name,
availability_status,
update_state_only=False,
audit_fail_count=None):
# Note: synchronous
return self.call(
ctxt,
self.make_msg('update_subcloud_availability',
subcloud_name=subcloud_name,
availability_status=availability_status,
update_state_only=update_state_only,
audit_fail_count=audit_fail_count))
def update_subcloud_endpoint_status(self, ctxt, subcloud_name=None,
endpoint_type=None,
sync_status=consts.
SYNC_STATUS_OUT_OF_SYNC,
ignore_endpoints=None):
# Note: This is an asynchronous operation.
# See below for synchronous method call
return self.cast(ctxt, self.make_msg('update_subcloud_endpoint_status',
subcloud_name=subcloud_name,
endpoint_type=endpoint_type,
sync_status=sync_status,
ignore_endpoints=ignore_endpoints))
def update_subcloud_endpoint_status_sync(self, ctxt, subcloud_name=None,
endpoint_type=None,
sync_status=consts.
SYNC_STATUS_OUT_OF_SYNC,
ignore_endpoints=None):
# Note: synchronous
return self.call(ctxt, self.make_msg('update_subcloud_endpoint_status',
subcloud_name=subcloud_name,
endpoint_type=endpoint_type,
sync_status=sync_status,
ignore_endpoints=ignore_endpoints))
class ManagerClient(RPCClient):
"""Client side of the DC Manager rpc API.
@ -105,28 +155,6 @@ class ManagerClient(RPCClient):
subcloud_id=subcloud_id,
payload=payload))
def update_subcloud_endpoint_status(self, ctxt, subcloud_name=None,
endpoint_type=None,
sync_status=consts.
SYNC_STATUS_OUT_OF_SYNC):
return self.cast(ctxt, self.make_msg('update_subcloud_endpoint_status',
subcloud_name=subcloud_name,
endpoint_type=endpoint_type,
sync_status=sync_status))
def update_subcloud_availability(self, ctxt,
subcloud_name,
availability_status,
update_state_only=False,
audit_fail_count=None):
return self.call(
ctxt,
self.make_msg('update_subcloud_availability',
subcloud_name=subcloud_name,
availability_status=availability_status,
update_state_only=update_state_only,
audit_fail_count=audit_fail_count))
def update_subcloud_sync_endpoint_type(self, ctxt,
subcloud_name,
endpoint_type_list,

View File

@ -0,0 +1,12 @@
===============================
Service
===============================
DC Manager State Service has responsibility for:
Subcloud state updates coming from dcmanager-manager service
service.py:
run DC Manager State Service in multi-worker mode, and establish RPC server
subcloud_state_manager.py:
Provide subcloud state updates

View File

@ -0,0 +1,166 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2022 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
import functools
import six
from oslo_config import cfg
from oslo_log import log as logging
import oslo_messaging
from oslo_service import service
from dcorch.common import consts as dcorch_consts
from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
from dcmanager.common import consts
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import messaging as rpc_messaging
from dcmanager.state.subcloud_state_manager import SubcloudStateManager
LOG = logging.getLogger(__name__)
def request_context(func):
@functools.wraps(func)
def wrapped(self, ctx, *args, **kwargs):
if ctx is not None and not isinstance(ctx, context.RequestContext):
ctx = context.RequestContext.from_dict(ctx.to_dict())
try:
return func(self, ctx, *args, **kwargs)
except exceptions.DCManagerException:
raise oslo_messaging.rpc.dispatcher.ExpectedException()
return wrapped
class DCManagerStateService(service.Service):
"""Lifecycle manager for a running service.
- All the methods in here are called from the RPC client.
- If a RPC call does not have a corresponding method here, an exception
will be thrown.
- Arguments to these calls are added dynamically and will be treated as
keyword arguments by the RPC client.
"""
def __init__(self, host):
super(DCManagerStateService, self).__init__()
self.host = cfg.CONF.host
self.rpc_api_version = consts.RPC_API_VERSION
self.topic = consts.TOPIC_DC_MANAGER_STATE
# The following are initialized here, but assigned in start() which
# happens after the fork when spawning multiple worker processes
self.engine_id = None
self.target = None
self._rpc_server = None
self.subcloud_state_manager = None
self.audit_rpc_client = None
def _init_managers(self):
self.subcloud_state_manager = SubcloudStateManager()
def start(self):
LOG.info("Starting %s", self.__class__.__name__)
self._init_managers()
target = oslo_messaging.Target(version=self.rpc_api_version,
server=self.host,
topic=self.topic)
self.target = target
self._rpc_server = rpc_messaging.get_rpc_server(self.target, self)
self._rpc_server.start()
# Used to notify dcmanager-audit
self.audit_rpc_client = dcmanager_audit_rpc_client.ManagerAuditClient()
super(DCManagerStateService, self).start()
def _stop_rpc_server(self):
# Stop RPC connection to prevent new requests
LOG.debug(_("Attempting to stop engine service..."))
try:
self._rpc_server.stop()
self._rpc_server.wait()
LOG.info('Engine service stopped successfully')
except Exception as ex:
LOG.error('Failed to stop engine service: %s',
six.text_type(ex))
def stop(self):
LOG.info("Stopping %s", self.__class__.__name__)
self._stop_rpc_server()
# Terminate the engine process
LOG.info("All threads were gone, terminating engine")
super(DCManagerStateService, self).stop()
@request_context
def update_subcloud_endpoint_status(self, context, subcloud_name=None,
endpoint_type=None,
sync_status=consts.
SYNC_STATUS_OUT_OF_SYNC,
alarmable=True,
ignore_endpoints=None):
# Updates subcloud endpoint sync status
LOG.info("Handling update_subcloud_endpoint_status request for "
"subcloud: (%s) endpoint: (%s) status:(%s) "
% (subcloud_name, endpoint_type, sync_status))
self.subcloud_state_manager. \
update_subcloud_endpoint_status(context,
subcloud_name,
endpoint_type,
sync_status,
alarmable,
ignore_endpoints)
# If the patching sync status is being set to unknown, trigger the
# patching audit so it can update the sync status ASAP.
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_PATCHING and \
sync_status == consts.SYNC_STATUS_UNKNOWN:
self.audit_rpc_client.trigger_patch_audit(context)
# If the firmware sync status is being set to unknown, trigger the
# firmware audit so it can update the sync status ASAP.
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_FIRMWARE and \
sync_status == consts.SYNC_STATUS_UNKNOWN:
self.audit_rpc_client.trigger_firmware_audit(context)
# If the kubernetes sync status is being set to unknown, trigger the
# kubernetes audit so it can update the sync status ASAP.
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_KUBERNETES and \
sync_status == consts.SYNC_STATUS_UNKNOWN:
self.audit_rpc_client.trigger_kubernetes_audit(context)
return
@request_context
def update_subcloud_availability(self, context,
subcloud_name,
availability_status,
update_state_only=False,
audit_fail_count=None):
# Updates subcloud availability
LOG.info("Handling update_subcloud_availability request for: %s" %
subcloud_name)
self.subcloud_state_manager.update_subcloud_availability(
context,
subcloud_name,
availability_status,
update_state_only,
audit_fail_count)

View File

@ -0,0 +1,522 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2022 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
from oslo_log import log as logging
from dcorch.common import consts as dcorch_consts
from dcorch.rpc import client as dcorch_rpc_client
from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
from dcmanager.common import consts
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common import manager
from dcmanager.common import utils
from dcmanager.rpc import client as rpc_client
from dcmanager.db import api as db_api
from fm_api import constants as fm_const
from fm_api import fm_api
LOG = logging.getLogger(__name__)
def sync_update_subcloud_endpoint_status(func):
"""Synchronized lock decorator for _update_subcloud_endpoint_status. """
def _get_lock_and_call(*args, **kwargs):
"""Get a single fair lock per subcloud based on subcloud name. """
# subcloud name is the 3rd argument to
# _update_subcloud_endpoint_status()
@utils.synchronized(args[2], external=True, fair=True)
def _call_func(*args, **kwargs):
return func(*args, **kwargs)
return _call_func(*args, **kwargs)
return _get_lock_and_call
class SubcloudStateManager(manager.Manager):
"""Manages tasks related to subclouds."""
def __init__(self, *args, **kwargs):
LOG.debug('SubcloudStateManager initialization...')
super(SubcloudStateManager,
self).__init__(service_name="subcloud_manager", *args, **kwargs)
self.context = context.get_admin_context()
self.dcorch_rpc_client = dcorch_rpc_client.EngineClient()
self.fm_api = fm_api.FaultAPIs()
self.audit_rpc_client = dcmanager_audit_rpc_client.ManagerAuditClient()
def _do_update_subcloud_endpoint_status(self, context, subcloud_id,
endpoint_type, sync_status,
alarmable, ignore_endpoints=None):
"""Update online/managed subcloud endpoint status
:param context: request context object
:param subcloud_id: id of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
:param alarmable: controls raising an alarm if applicable
:param ignore_endpoints: list of endpoints to ignore (only used if
endpoint_type is None)
"""
if ignore_endpoints is None:
ignore_endpoints = []
subcloud_status_list = []
subcloud = None
original_identity_status = None
# retrieve the info from the db for this subcloud.
# subcloud_id should not be None
try:
for subcloud, subcloud_status in db_api. \
subcloud_get_with_status(context, subcloud_id):
if subcloud_status:
subcloud_status_list.append(
db_api.subcloud_endpoint_status_db_model_to_dict(
subcloud_status))
if subcloud_status.endpoint_type == \
dcorch_consts.ENDPOINT_TYPE_IDENTITY:
original_identity_status = subcloud_status.sync_status
except Exception as e:
LOG.exception(e)
raise e
if subcloud:
if endpoint_type:
# updating a single endpoint on a single subcloud
for subcloud_status in subcloud_status_list:
if subcloud_status['endpoint_type'] == endpoint_type:
if subcloud_status['sync_status'] == sync_status:
# No change in the sync_status
LOG.debug("Sync status (%s) for subcloud %s did "
"not change - ignore update" %
(sync_status, subcloud.name))
return
# We found the endpoint
break
else:
# We did not find the endpoint
raise exceptions.BadRequest(
resource='subcloud',
msg='Endpoint %s not found for subcloud' %
endpoint_type)
LOG.info("Updating subcloud:%s endpoint:%s sync:%s" %
(subcloud.name, endpoint_type, sync_status))
db_api.subcloud_status_update(context,
subcloud_id,
endpoint_type,
sync_status)
# Trigger subcloud patch and load audits for the subcloud after
# its identity endpoint turns to other status from unknown
if endpoint_type == dcorch_consts.ENDPOINT_TYPE_IDENTITY \
and sync_status != consts.SYNC_STATUS_UNKNOWN \
and original_identity_status == consts.SYNC_STATUS_UNKNOWN:
LOG.debug('Request for patch and load audit for %s after updating '
'identity out of unknown' % subcloud.name)
self.audit_rpc_client.trigger_subcloud_patch_load_audits(
context, subcloud_id)
entity_instance_id = "subcloud=%s.resource=%s" % \
(subcloud.name, endpoint_type)
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
entity_instance_id)
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
and fault:
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
entity_instance_id)
except Exception as e:
LOG.exception(e)
elif not fault and alarmable and \
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=entity_type_id,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
reason_text=("%s %s sync_status is "
"out-of-sync" %
(subcloud.name, endpoint_type)),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
proposed_repair_action="If problem persists "
"contact next level "
"of support",
service_affecting=False)
self.fm_api.set_fault(fault)
except Exception as e:
LOG.exception(e)
else:
# update all endpoints on this subcloud
LOG.info("Updating all endpoints on subcloud: %s sync: %s "
"ignore_endpoints: %s" %
(subcloud.name, sync_status, ignore_endpoints))
# TODO(yuxing): The following code can be further optimized when
# batch alarm clearance APIs are available, so we don't need to
# loop over all the endpoints of a given subcloud, e.g.
# if not ignore_endpoints:
# db_api.subcloud_status_update_endpoints_all(...)
# else:
# db_api.subcloud_status_update_endpoints(...)
endpoint_to_update_list = []
for entry in subcloud_status_list:
endpoint = entry[consts.ENDPOINT_TYPE]
if endpoint in ignore_endpoints:
# Do not update this endpoint
continue
endpoint_to_update_list.append(endpoint)
entity_instance_id = "subcloud=%s.resource=%s" % \
(subcloud.name, endpoint)
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC,
entity_instance_id)
# TODO(yuxing): batch clear all the out-of-sync alarms of a
# given subcloud if fm_api support it. Be careful with the
# dc-cert endpoint when adding the above; the endpoint
# alarm must remain for offline subclouds.
if (sync_status != consts.SYNC_STATUS_OUT_OF_SYNC) \
and fault:
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
entity_instance_id)
except Exception as e:
LOG.exception(e)
elif not fault and alarmable and \
(sync_status == consts.SYNC_STATUS_OUT_OF_SYNC):
entity_type_id = fm_const.FM_ENTITY_TYPE_SUBCLOUD
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_RESOURCE_OUT_OF_SYNC, # noqa
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=entity_type_id,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_MAJOR,
reason_text=("%s %s sync_status is "
"out-of-sync" %
(subcloud.name, endpoint)),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_2,
proposed_repair_action="If problem persists "
"contact next level "
"of support",
service_affecting=False)
self.fm_api.set_fault(fault)
except Exception as e:
LOG.exception(e)
if endpoint_to_update_list:
try:
db_api.subcloud_status_update_endpoints(context, subcloud_id,
endpoint_to_update_list,
sync_status)
except Exception as e:
LOG.exception(e)
else:
LOG.error("Subcloud not found:%s" % subcloud_id)
@sync_update_subcloud_endpoint_status
def _update_subcloud_endpoint_status(
self, context,
subcloud_name,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
alarmable=True,
ignore_endpoints=None):
"""Update subcloud endpoint status
:param context: request context object
:param subcloud_name: name of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
:param alarmable: controls raising an alarm if applicable
:param ignore_endpoints: list of endpoints to ignore (only used if
endpoint_type is None)
"""
if ignore_endpoints is None:
ignore_endpoints = []
if not subcloud_name:
raise exceptions.BadRequest(
resource='subcloud',
msg='Subcloud name not provided')
try:
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
except Exception as e:
LOG.exception(e)
raise e
# Rules for updating sync status:
#
# Always update if not in-sync.
#
# Otherwise, only update the sync status if managed and online
# (unless dc-cert).
#
# Most endpoints are audited only when the subcloud is managed and
# online. An exception is the dc-cert endpoint, which is audited
# whenever the subcloud is online (managed or unmanaged).
#
# This means if a subcloud is going offline or unmanaged, then
# the sync status update must be done first.
#
if (sync_status != consts.SYNC_STATUS_IN_SYNC or
((subcloud.availability_status == consts.AVAILABILITY_ONLINE) and
(subcloud.management_state == consts.MANAGEMENT_MANAGED
or endpoint_type == dcorch_consts.ENDPOINT_TYPE_DC_CERT))):
# update a single subcloud
try:
self._do_update_subcloud_endpoint_status(context,
subcloud.id,
endpoint_type,
sync_status,
alarmable,
ignore_endpoints)
except Exception as e:
LOG.exception(e)
raise e
else:
LOG.info("Ignoring subcloud sync_status update for subcloud:%s "
"availability:%s management:%s endpoint:%s sync:%s" %
(subcloud_name, subcloud.availability_status,
subcloud.management_state, endpoint_type, sync_status))
def update_subcloud_endpoint_status(
self, context,
subcloud_name=None,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
alarmable=True,
ignore_endpoints=None):
"""Update subcloud endpoint status
:param context: request context object
:param subcloud_name: name of subcloud to update
:param endpoint_type: endpoint type to update
:param sync_status: sync status to set
:param alarmable: controls raising an alarm if applicable
:param ignore_endpoints: list of endpoints to ignore (only used if
endpoint_type is None)
"""
if ignore_endpoints is None:
ignore_endpoints = []
if subcloud_name:
self._update_subcloud_endpoint_status(
context, subcloud_name, endpoint_type, sync_status, alarmable,
ignore_endpoints)
else:
# update all subclouds
for subcloud in db_api.subcloud_get_all(context):
self._update_subcloud_endpoint_status(
context, subcloud.name, endpoint_type, sync_status,
alarmable, ignore_endpoints)
def _update_subcloud_state(self, context, subcloud_name,
management_state, availability_status):
try:
LOG.info('Notifying dcorch, subcloud:%s management: %s, '
'availability:%s' %
(subcloud_name,
management_state,
availability_status))
self.dcorch_rpc_client.update_subcloud_states(
context, subcloud_name, management_state, availability_status)
except Exception:
LOG.exception('Problem informing dcorch of subcloud state change,'
'subcloud: %s' % subcloud_name)
def _raise_or_clear_subcloud_status_alarm(self, subcloud_name,
availability_status):
entity_instance_id = "subcloud=%s" % subcloud_name
fault = self.fm_api.get_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
entity_instance_id)
if fault and (availability_status == consts.AVAILABILITY_ONLINE):
try:
self.fm_api.clear_fault(
fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
entity_instance_id)
except Exception:
LOG.exception("Failed to clear offline alarm for subcloud: %s",
subcloud_name)
elif not fault and \
(availability_status == consts.AVAILABILITY_OFFLINE):
try:
fault = fm_api.Fault(
alarm_id=fm_const.FM_ALARM_ID_DC_SUBCLOUD_OFFLINE,
alarm_state=fm_const.FM_ALARM_STATE_SET,
entity_type_id=fm_const.FM_ENTITY_TYPE_SUBCLOUD,
entity_instance_id=entity_instance_id,
severity=fm_const.FM_ALARM_SEVERITY_CRITICAL,
reason_text=('%s is offline' % subcloud_name),
alarm_type=fm_const.FM_ALARM_TYPE_0,
probable_cause=fm_const.ALARM_PROBABLE_CAUSE_29,
proposed_repair_action="Wait for subcloud to "
"become online; if "
"problem persists contact "
"next level of support.",
service_affecting=True)
self.fm_api.set_fault(fault)
except Exception:
LOG.exception("Failed to raise offline alarm for subcloud: %s",
subcloud_name)
def update_subcloud_availability(self, context, subcloud_name,
availability_status,
update_state_only=False,
audit_fail_count=None):
try:
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
except Exception:
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
raise
if update_state_only:
# Nothing has changed, but we want to send a state update for this
# subcloud as an audit. Get the most up-to-date data.
self._update_subcloud_state(context, subcloud_name,
subcloud.management_state,
availability_status)
elif availability_status is None:
# only update the audit fail count
try:
db_api.subcloud_update(self.context, subcloud.id,
audit_fail_count=audit_fail_count)
except exceptions.SubcloudNotFound:
# slim possibility subcloud could have been deleted since
# we found it in db, ignore this benign error.
LOG.info('Ignoring SubcloudNotFound when attempting '
'audit_fail_count update: %s' % subcloud_name)
return
else:
self._raise_or_clear_subcloud_status_alarm(subcloud_name,
availability_status)
if availability_status == consts.AVAILABILITY_OFFLINE:
# Subcloud is going offline, set all endpoint statuses to
# unknown.
self._update_subcloud_endpoint_status(
context, subcloud_name, endpoint_type=None,
sync_status=consts.SYNC_STATUS_UNKNOWN)
try:
updated_subcloud = db_api.subcloud_update(
context,
subcloud.id,
availability_status=availability_status,
audit_fail_count=audit_fail_count)
except exceptions.SubcloudNotFound:
# slim possibility subcloud could have been deleted since
# we found it in db, ignore this benign error.
LOG.info('Ignoring SubcloudNotFound when attempting state'
' update: %s' % subcloud_name)
return
if availability_status == consts.AVAILABILITY_ONLINE:
# Subcloud is going online
# Tell cert-mon to audit endpoint certificate.
LOG.info('Request for online audit for %s' % subcloud_name)
dc_notification = rpc_client.DCManagerNotifications()
dc_notification.subcloud_online(context, subcloud_name)
# Trigger all the audits for the subcloud so it can update the
# sync status ASAP.
self.audit_rpc_client.trigger_subcloud_audits(context,
subcloud.id)
# Send dcorch a state update
self._update_subcloud_state(context, subcloud_name,
updated_subcloud.management_state,
availability_status)
def update_subcloud_sync_endpoint_type(self, context,
subcloud_name,
endpoint_type_list,
openstack_installed):
operation = 'add' if openstack_installed else 'remove'
func_switcher = {
'add': (
self.dcorch_rpc_client.add_subcloud_sync_endpoint_type,
db_api.subcloud_status_create
),
'remove': (
self.dcorch_rpc_client.remove_subcloud_sync_endpoint_type,
db_api.subcloud_status_delete
)
}
try:
subcloud = db_api.subcloud_get_by_name(context, subcloud_name)
except Exception:
LOG.exception("Failed to get subcloud by name: %s" % subcloud_name)
raise
try:
# Notify dcorch to add/remove sync endpoint type list
func_switcher[operation][0](self.context, subcloud_name,
endpoint_type_list)
LOG.info('Notifying dcorch, subcloud: %s new sync endpoint: %s' %
(subcloud_name, endpoint_type_list))
# Update subcloud status table by adding/removing openstack sync
# endpoint types
for endpoint_type in endpoint_type_list:
func_switcher[operation][1](self.context, subcloud.id,
endpoint_type)
# Update openstack_installed of subcloud table
db_api.subcloud_update(self.context, subcloud.id,
openstack_installed=openstack_installed)
except Exception:
LOG.exception('Problem informing dcorch of subcloud sync endpoint'
' type change, subcloud: %s' % subcloud_name)

View File

@ -240,6 +240,10 @@ class TestSubcloudPost(testroot.DCManagerApiTest,
self.mock_rpc_client = p.start()
self.addCleanup(p.stop)
p = mock.patch.object(rpc_client, 'SubcloudStateClient')
self.mock_rpc_state_client = p.start()
self.addCleanup(p.stop)
def _verify_post_failure(self, response, param, value):
self.assertEqual(http_client.BAD_REQUEST,
response.status_code,
@ -773,6 +777,10 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest):
super(TestSubcloudAPIOther, self).setUp()
self.ctx = utils.dummy_context()
p = mock.patch.object(rpc_client, 'SubcloudStateClient')
self.mock_rpc_state_client = p.start()
self.addCleanup(p.stop)
@mock.patch.object(rpc_client, 'ManagerClient')
def test_delete_subcloud(self, mock_rpc_client):
subcloud = fake_subcloud.create_fake_subcloud(self.ctx)
@ -1162,19 +1170,20 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest):
headers=FAKE_HEADERS, params=data)
@mock.patch.object(rpc_client, 'ManagerClient')
@mock.patch.object(rpc_client, 'SubcloudStateClient')
@mock.patch.object(subclouds.SubcloudsController, '_get_updatestatus_payload')
def test_subcloud_updatestatus(self, mock_get_updatestatus_payload,
mock_rpc_client):
mock_rpc_state_client, _):
subcloud = fake_subcloud.create_fake_subcloud(self.ctx)
data = {'endpoint': 'dc-cert', 'status': 'in-sync'}
mock_get_updatestatus_payload.return_value = data
mock_rpc_client().update_subcloud_endpoint_status.return_value = True
mock_rpc_state_client().update_subcloud_endpoint_status.return_value = True
response = self.app.patch_json(
FAKE_URL + '/' + str(subcloud.id) + '/update_status',
data, headers=FAKE_HEADERS)
mock_rpc_client().update_subcloud_endpoint_status.assert_called_once_with(
mock_rpc_state_client().update_subcloud_endpoint_status.assert_called_once_with(
mock.ANY,
subcloud.name,
'dc-cert',

View File

@ -32,15 +32,13 @@ from dcorch.common import consts as dcorch_consts
CONF = cfg.CONF
class FakeDCManagerAPI(object):
class FakeDCManagerStateAPI(object):
def __init__(self):
self.update_subcloud_availability = mock.MagicMock()
self.update_subcloud_endpoint_status = mock.MagicMock()
class FakeAuditWorkerAPI(object):
def __init__(self):
self.audit_subclouds = mock.MagicMock()
@ -405,11 +403,12 @@ class TestFirmwareAudit(base.DCManagerTestCase):
super(TestFirmwareAudit, self).setUp()
self.ctxt = utils.dummy_context()
# Mock the DCManager API
self.fake_dcmanager_api = FakeDCManagerAPI()
p = mock.patch('dcmanager.rpc.client.ManagerClient')
self.mock_dcmanager_api = p.start()
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
# Mock the DCManager subcloud state API
self.fake_dcmanager_state_api = FakeDCManagerStateAPI()
p = mock.patch('dcmanager.rpc.client.SubcloudStateClient')
self.mock_dcmanager_state_api = p.start()
self.mock_dcmanager_state_api.return_value = \
self.fake_dcmanager_state_api
self.addCleanup(p.stop)
# Mock the Audit Worker API
@ -436,10 +435,10 @@ class TestFirmwareAudit(base.DCManagerTestCase):
def test_init(self):
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
self.assertIsNotNone(fm)
self.assertEqual(self.ctxt, fm.context)
self.assertEqual(self.fake_dcmanager_api, fm.dcmanager_rpc_client)
self.assertEqual(self.fake_dcmanager_state_api, fm.state_rpc_client)
@mock.patch.object(patch_audit, 'SysinvClient')
@mock.patch.object(patch_audit, 'PatchingClient')
@ -458,7 +457,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
mock_fw_sysinv_client.side_effect = FakeSysinvClientNoAuditData
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
firmware_audit_data = self.get_fw_audit_data(am)
@ -470,7 +469,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_FIRMWARE,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -490,7 +489,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
mock_fw_sysinv_client.side_effect = FakeSysinvClientNoEnabledDevices
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
firmware_audit_data = self.get_fw_audit_data(am)
@ -502,7 +501,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_FIRMWARE,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -521,7 +520,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
mock_fw_sysinv_client.side_effect = FakeSysinvClientImageWithoutLabels
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
firmware_audit_data = self.get_fw_audit_data(am)
@ -533,7 +532,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_FIRMWARE,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -552,7 +551,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
mock_fw_sysinv_client.side_effect = FakeSysinvClientImageNotApplied
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
firmware_audit_data = self.get_fw_audit_data(am)
@ -564,7 +563,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_FIRMWARE,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -583,7 +582,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
mock_fw_sysinv_client.side_effect = FakeSysinvClientImageNotWritten
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
firmware_audit_data = self.get_fw_audit_data(am)
@ -595,7 +594,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_FIRMWARE,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -614,7 +613,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
mock_fw_sysinv_client.side_effect = FakeSysinvClientImageWithLabels
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
firmware_audit_data = self.get_fw_audit_data(am)
@ -626,7 +625,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_FIRMWARE,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -645,7 +644,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
mock_fw_sysinv_client.side_effect = FakeSysinvClientNoMatchingDeviceLabel
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
firmware_audit_data = self.get_fw_audit_data(am)
@ -657,7 +656,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_FIRMWARE,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -676,7 +675,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
mock_fw_sysinv_client.side_effect = FakeSysinvClientNoMatchingDeviceId
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
firmware_audit_data = self.get_fw_audit_data(am)
@ -688,5 +687,5 @@ class TestFirmwareAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_FIRMWARE,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)

View File

@ -30,8 +30,7 @@ PREVIOUS_KUBE_VERSION = 'v1.2.3'
UPGRADED_KUBE_VERSION = 'v1.2.3-a'
class FakeDCManagerAPI(object):
class FakeDCManagerStateAPI(object):
def __init__(self):
self.update_subcloud_availability = mock.MagicMock()
self.update_subcloud_endpoint_status = mock.MagicMock()
@ -80,11 +79,12 @@ class TestKubernetesAudit(base.DCManagerTestCase):
super(TestKubernetesAudit, self).setUp()
self.ctxt = utils.dummy_context()
# Mock the DCManager API
self.fake_dcmanager_api = FakeDCManagerAPI()
p = mock.patch('dcmanager.rpc.client.ManagerClient')
self.mock_dcmanager_api = p.start()
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
# Mock the DCManager subcloud state API
self.fake_dcmanager_state_api = FakeDCManagerStateAPI()
p = mock.patch('dcmanager.rpc.client.SubcloudStateClient')
self.mock_dcmanager_state_api = p.start()
self.mock_dcmanager_state_api.return_value = \
self.fake_dcmanager_state_api
self.addCleanup(p.stop)
# Mock the Audit Worker API
@ -151,17 +151,18 @@ class TestKubernetesAudit(base.DCManagerTestCase):
def test_init(self):
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
self.assertIsNotNone(audit)
self.assertEqual(self.ctxt, audit.context)
self.assertEqual(self.fake_dcmanager_api, audit.dcmanager_rpc_client)
self.assertEqual(self.fake_dcmanager_state_api,
audit.state_rpc_client)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_no_kubernetes_audit_data_to_sync(self, mock_context):
mock_context.get_admin_context.return_value = self.ctxt
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.kubernetes_audit = audit
kubernetes_audit_data = self.get_kube_audit_data(am)
@ -173,14 +174,14 @@ class TestKubernetesAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_kubernetes_audit_data_out_of_sync_older(self, mock_context):
mock_context.get_admin_context.return_value = self.ctxt
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.kubernetes_audit = audit
@ -201,14 +202,14 @@ class TestKubernetesAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(subcloud_audit_manager, 'context')
def test_kubernetes_audit_data_out_of_sync_newer(self, mock_context):
mock_context.get_admin_context.return_value = self.ctxt
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.kubernetes_audit = audit
@ -229,7 +230,7 @@ class TestKubernetesAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(subcloud_audit_manager, 'context')
@ -237,7 +238,7 @@ class TestKubernetesAudit(base.DCManagerTestCase):
mock_context):
mock_context.get_admin_context.return_value = self.ctxt
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.kubernetes_audit = audit
@ -258,7 +259,7 @@ class TestKubernetesAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(subcloud_audit_manager, 'context')
@ -268,7 +269,7 @@ class TestKubernetesAudit(base.DCManagerTestCase):
# even if the kube versions match
mock_context.get_admin_context.return_value = self.ctxt
audit = kubernetes_audit.KubernetesAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.kubernetes_audit = audit
@ -293,5 +294,5 @@ class TestKubernetesAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_KUBERNETES,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)

View File

@ -31,8 +31,7 @@ from dcorch.common import consts as dcorch_consts
CONF = cfg.CONF
class FakeDCManagerAPI(object):
class FakeDCManagerStateAPI(object):
def __init__(self):
self.update_subcloud_availability = mock.MagicMock()
self.update_subcloud_endpoint_status = mock.MagicMock()
@ -253,11 +252,12 @@ class TestPatchAudit(base.DCManagerTestCase):
super(TestPatchAudit, self).setUp()
self.ctxt = utils.dummy_context()
# Mock the DCManager API
self.fake_dcmanager_api = FakeDCManagerAPI()
p = mock.patch('dcmanager.rpc.client.ManagerClient')
self.mock_dcmanager_api = p.start()
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
# Mock the DCManager subcloud state API
self.fake_dcmanager_state_api = FakeDCManagerStateAPI()
p = mock.patch('dcmanager.rpc.client.SubcloudStateClient')
self.mock_dcmanager_state_api = p.start()
self.mock_dcmanager_state_api.return_value = \
self.fake_dcmanager_state_api
self.addCleanup(p.stop)
# Mock the Audit Worker API
@ -277,10 +277,10 @@ class TestPatchAudit(base.DCManagerTestCase):
def test_init(self):
pm = patch_audit.PatchAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
self.assertIsNotNone(pm)
self.assertEqual(self.ctxt, pm.context)
self.assertEqual(self.fake_dcmanager_api, pm.dcmanager_rpc_client)
self.assertEqual(self.fake_dcmanager_state_api, pm.state_rpc_client)
@mock.patch.object(patch_audit, 'SysinvClient')
@mock.patch.object(patch_audit, 'PatchingClient')
@ -295,7 +295,7 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_sysinv_client.side_effect = FakeSysinvClientOneLoad
pm = patch_audit.PatchAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.patch_audit = pm
@ -313,7 +313,7 @@ class TestPatchAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_LOAD,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status. \
self.fake_dcmanager_state_api.update_subcloud_endpoint_status. \
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -326,7 +326,7 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_sysinv_client):
mock_context.get_admin_context.return_value = self.ctxt
pm = patch_audit.PatchAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.patch_audit = pm
@ -374,7 +374,7 @@ class TestPatchAudit(base.DCManagerTestCase):
sync_status=consts.SYNC_STATUS_IN_SYNC),
]
self.fake_dcmanager_api.update_subcloud_endpoint_status.\
self.fake_dcmanager_state_api.update_subcloud_endpoint_status.\
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -387,7 +387,7 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_sysinv_client):
mock_context.get_admin_context.return_value = self.ctxt
pm = patch_audit.PatchAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.patch_audit = pm
@ -408,7 +408,7 @@ class TestPatchAudit(base.DCManagerTestCase):
subcloud_name=name,
endpoint_type=dcorch_consts.ENDPOINT_TYPE_LOAD,
sync_status=consts.SYNC_STATUS_IN_SYNC)]
self.fake_dcmanager_api.update_subcloud_endpoint_status.\
self.fake_dcmanager_state_api.update_subcloud_endpoint_status.\
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -422,7 +422,7 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_sysinv_client):
mock_context.get_admin_context.return_value = self.ctxt
pm = patch_audit.PatchAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.patch_audit = pm
mock_patching_client.side_effect = FakePatchingClientInSync
@ -452,7 +452,7 @@ class TestPatchAudit(base.DCManagerTestCase):
endpoint_type=dcorch_consts.ENDPOINT_TYPE_LOAD,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC),
]
self.fake_dcmanager_api.update_subcloud_endpoint_status.\
self.fake_dcmanager_state_api.update_subcloud_endpoint_status.\
assert_has_calls(expected_calls)
@mock.patch.object(patch_audit, 'SysinvClient')
@ -466,7 +466,7 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_sysinv_client):
mock_context.get_admin_context.return_value = self.ctxt
pm = patch_audit.PatchAudit(self.ctxt,
self.fake_dcmanager_api)
self.fake_dcmanager_state_api)
am = subcloud_audit_manager.SubcloudAuditManager()
am.patch_audit = pm
mock_patching_client.side_effect = FakePatchingClientInSync
@ -496,5 +496,5 @@ class TestPatchAudit(base.DCManagerTestCase):
endpoint_type=dcorch_consts.ENDPOINT_TYPE_LOAD,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC),
]
self.fake_dcmanager_api.update_subcloud_endpoint_status.\
self.fake_dcmanager_state_api.update_subcloud_endpoint_status.\
assert_has_calls(expected_calls)

View File

@ -31,8 +31,12 @@ from dcmanager.tests import base
class FakeDCManagerAPI(object):
def __init__(self):
self.update_subcloud_availability = mock.MagicMock()
self.update_subcloud_sync_endpoint_type = mock.MagicMock()
class FakeDCManagerStateAPI(object):
def __init__(self):
self.update_subcloud_availability = mock.MagicMock()
self.update_subcloud_endpoint_status = mock.MagicMock()
@ -239,6 +243,14 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
self.addCleanup(p.stop)
# Mock the DCManager subcloud state API
self.fake_dcmanager_state_api = FakeDCManagerStateAPI()
p = mock.patch('dcmanager.rpc.client.SubcloudStateClient')
self.mock_dcmanager_state_api = p.start()
self.mock_dcmanager_state_api.return_value = \
self.fake_dcmanager_state_api
self.addCleanup(p.stop)
# Mock the Audit Worker API
self.fake_audit_worker_api = FakeAuditWorkerAPI()
p = mock.patch('dcmanager.audit.rpcapi.ManagerAuditWorkerClient')
@ -413,7 +425,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_kube_rootca_update_audit)
# Verify the subcloud was set to online
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
self.fake_dcmanager_state_api.update_subcloud_availability.assert_called_with(
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
False, 0)
@ -485,7 +497,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_kube_rootca_update_audit)
# Verify the subcloud was set to online
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
self.fake_dcmanager_state_api.update_subcloud_availability.assert_called_with(
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
False, 0)
@ -538,7 +550,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_kube_rootca_update_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
self.fake_dcmanager_state_api.update_subcloud_availability.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called
@ -581,7 +593,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_kube_rootca_update_audit=False)
# Verify the subcloud state was updated even though no change
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
self.fake_dcmanager_state_api.update_subcloud_availability.assert_called_with(
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
True, None)
@ -661,7 +673,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
self.assertEqual(subcloud.audit_fail_count, audit_fail_count)
# Verify the update_subcloud_availability was not called
self.fake_dcmanager_api.update_subcloud_availability.assert_not_called()
self.fake_dcmanager_state_api.update_subcloud_availability.assert_not_called()
# Update the DB like dcmanager would do.
subcloud = db_api.subcloud_update(
@ -689,7 +701,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
self.assertEqual(subcloud.audit_fail_count, audit_fail_count)
# Verify the update_subcloud_availability was not called
self.fake_dcmanager_api.update_subcloud_availability.assert_not_called()
self.fake_dcmanager_state_api.update_subcloud_availability.assert_not_called()
# Verify alarm update is called only once
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
@ -754,7 +766,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
do_kube_rootca_update_audit=do_kube_rootca_update_audit)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
self.fake_dcmanager_state_api.update_subcloud_availability.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called
@ -831,7 +843,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
subcloud.audit_fail_count, audit_fail_count + 1)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
self.fake_dcmanager_state_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were not updated
@ -898,7 +910,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
False) # do_kube_rootca_audit
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
self.fake_dcmanager_state_api.update_subcloud_availability.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called
@ -960,7 +972,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
False) # do_kube_rootca_update_audit
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
self.fake_dcmanager_state_api.update_subcloud_availability.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called
@ -1021,7 +1033,7 @@ class TestAuditWorkerManager(base.DCManagerTestCase):
False) # do_kube_rootca_update_audit
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
self.fake_dcmanager_state_api.update_subcloud_availability.\
assert_not_called()
# Verify the _update_subcloud_audit_fail_count is not called

View File

@ -61,10 +61,6 @@ class TestDCManagerService(base.DCManagerTestCase):
self.assertEqual(self.service_obj.host, 'localhost')
self.assertEqual(self.service_obj.topic, 'dcmanager')
def test_init_tgm(self):
self.service_obj.init_tgm()
self.assertIsNotNone(self.service_obj.TG)
@mock.patch.object(service, 'SubcloudManager')
def test_init_managers(self, mock_subcloud_manager):
self.service_obj.init_managers()
@ -80,7 +76,6 @@ class TestDCManagerService(base.DCManagerTestCase):
@mock.patch.object(service, 'SubcloudManager')
def test_add_subcloud(self, mock_subcloud_manager):
self.service_obj.init_tgm()
self.service_obj.init_managers()
self.service_obj.add_subcloud(
self.context, payload={'name': 'testname'})
@ -89,7 +84,6 @@ class TestDCManagerService(base.DCManagerTestCase):
@mock.patch.object(service, 'SubcloudManager')
def test_delete_subcloud(self, mock_subcloud_manager):
self.service_obj.init_tgm()
self.service_obj.init_managers()
self.service_obj.delete_subcloud(
self.context, subcloud_id=1)
@ -98,7 +92,6 @@ class TestDCManagerService(base.DCManagerTestCase):
@mock.patch.object(service, 'SubcloudManager')
def test_update_subcloud(self, mock_subcloud_manager):
self.service_obj.init_tgm()
self.service_obj.init_managers()
self.service_obj.update_subcloud(
self.context, subcloud_id=1, management_state='testmgmtstatus')

View File

@ -29,6 +29,7 @@ from dcmanager.common import exceptions
from dcmanager.common import utils as cutils
from dcmanager.db.sqlalchemy import api as db_api
from dcmanager.manager import subcloud_manager
from dcmanager.state import subcloud_state_manager
from dcmanager.tests import base
from dcmanager.tests.unit.common import fake_subcloud
from dcmanager.tests import utils
@ -37,12 +38,17 @@ from tsconfig.tsconfig import SW_VERSION
class FakeDCManagerAuditAPI(object):
def __init__(self):
self.trigger_subcloud_audits = mock.MagicMock()
self.trigger_subcloud_patch_load_audits = mock.MagicMock()
class FakeDCManagerStateAPI(object):
def __init__(self):
self.update_subcloud_availability = mock.MagicMock()
self.update_subcloud_endpoint_status = mock.MagicMock()
class FakeDCOrchAPI(object):
def __init__(self):
self.update_subcloud_states = mock.MagicMock()
@ -233,6 +239,14 @@ class TestSubcloudManager(base.DCManagerTestCase):
self.fake_dcmanager_audit_api
self.addCleanup(p.stop)
# Mock the DCManager subcloud state API
self.fake_dcmanager_state_api = FakeDCManagerStateAPI()
p = mock.patch('dcmanager.rpc.client.SubcloudStateClient')
self.mock_dcmanager_state_api = p.start()
self.mock_dcmanager_state_api.return_value = \
self.fake_dcmanager_state_api
self.addCleanup(p.stop)
# Mock the DCOrch API
self.fake_dcorch_api = FakeDCOrchAPI()
p = mock.patch('dcorch.rpc.client.EngineClient')
@ -693,7 +707,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
self.assertEqual(status.sync_status, consts.SYNC_STATUS_UNKNOWN)
# Update/verify each status with the default sync state: out-of-sync
sm = subcloud_manager.SubcloudManager()
ssm = subcloud_state_manager.SubcloudStateManager()
for endpoint in [dcorch_consts.ENDPOINT_TYPE_PLATFORM,
dcorch_consts.ENDPOINT_TYPE_IDENTITY,
dcorch_consts.ENDPOINT_TYPE_PATCHING,
@ -701,7 +715,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
dcorch_consts.ENDPOINT_TYPE_NFV,
dcorch_consts.ENDPOINT_TYPE_DC_CERT]:
# Update
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint)
@ -720,7 +734,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
dcorch_consts.ENDPOINT_TYPE_FM,
dcorch_consts.ENDPOINT_TYPE_NFV,
dcorch_consts.ENDPOINT_TYPE_DC_CERT]:
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=consts.SYNC_STATUS_IN_SYNC)
@ -734,7 +748,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
# Attempt to update each status to be unknown for an offline/unmanaged
# subcloud. This is allowed.
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_UNKNOWN)
@ -753,7 +767,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
# Attempt to update each status to be out-of-sync for an
# offline/unmanaged subcloud. Exclude one endpoint. This is allowed.
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=None,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC,
@ -795,7 +809,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
dcorch_consts.ENDPOINT_TYPE_PATCHING,
dcorch_consts.ENDPOINT_TYPE_FM,
dcorch_consts.ENDPOINT_TYPE_NFV]:
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=consts.SYNC_STATUS_IN_SYNC)
@ -810,7 +824,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
# Attempt to update dc-cert status to be in-sync for an
# online/unmanaged subcloud. This is allowed. Verify the change.
endpoint = dcorch_consts.ENDPOINT_TYPE_DC_CERT
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=consts.SYNC_STATUS_IN_SYNC)
@ -840,7 +854,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
dcorch_consts.ENDPOINT_TYPE_FM,
dcorch_consts.ENDPOINT_TYPE_NFV,
dcorch_consts.ENDPOINT_TYPE_DC_CERT]:
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=consts.SYNC_STATUS_IN_SYNC)
@ -860,7 +874,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
dcorch_consts.ENDPOINT_TYPE_FM,
dcorch_consts.ENDPOINT_TYPE_NFV,
dcorch_consts.ENDPOINT_TYPE_DC_CERT]:
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=consts.SYNC_STATUS_OUT_OF_SYNC)
@ -888,7 +902,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
mock_dcmanager_api = p.start()
mock_dcmanager_api.return_value = fake_dcmanager_cermon_api
sm = subcloud_manager.SubcloudManager()
ssm = subcloud_state_manager.SubcloudStateManager()
db_api.subcloud_update(self.ctx, subcloud.id,
management_state=consts.MANAGEMENT_MANAGED)
@ -904,8 +918,8 @@ class TestSubcloudManager(base.DCManagerTestCase):
self.assertIsNotNone(status)
self.assertEqual(status.sync_status, consts.SYNC_STATUS_UNKNOWN)
sm.update_subcloud_availability(self.ctx, subcloud.name,
consts.AVAILABILITY_ONLINE)
ssm.update_subcloud_availability(self.ctx, subcloud.name,
consts.AVAILABILITY_ONLINE)
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
# Verify the subcloud was set to online
@ -935,7 +949,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
mock_dcmanager_api = p.start()
mock_dcmanager_api.return_value = fake_dcmanager_cermon_api
sm = subcloud_manager.SubcloudManager()
ssm = subcloud_state_manager.SubcloudStateManager()
# Note that we have intentionally left the subcloud as "unmanaged"
@ -951,8 +965,8 @@ class TestSubcloudManager(base.DCManagerTestCase):
self.assertIsNotNone(status)
self.assertEqual(status.sync_status, consts.SYNC_STATUS_UNKNOWN)
sm.update_subcloud_availability(self.ctx, subcloud.name,
consts.AVAILABILITY_ONLINE)
ssm.update_subcloud_availability(self.ctx, subcloud.name,
consts.AVAILABILITY_ONLINE)
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
# Verify the subcloud was set to online
@ -977,7 +991,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
management_state=consts.MANAGEMENT_MANAGED,
availability_status=consts.AVAILABILITY_ONLINE)
sm = subcloud_manager.SubcloudManager()
ssm = subcloud_state_manager.SubcloudStateManager()
# create sync statuses for endpoints and set them to in-sync
for endpoint in [dcorch_consts.ENDPOINT_TYPE_PLATFORM,
@ -987,7 +1001,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
dcorch_consts.ENDPOINT_TYPE_NFV]:
db_api.subcloud_status_create(
self.ctx, subcloud.id, endpoint)
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=consts.SYNC_STATUS_IN_SYNC)
@ -999,9 +1013,9 @@ class TestSubcloudManager(base.DCManagerTestCase):
# Audit fails once
audit_fail_count = 1
sm.update_subcloud_availability(self.ctx, subcloud.name,
availability_status=None,
audit_fail_count=audit_fail_count)
ssm.update_subcloud_availability(self.ctx, subcloud.name,
availability_status=None,
audit_fail_count=audit_fail_count)
# Verify the subclcoud availability was not updated
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
self.assertEqual(updated_subcloud.availability_status,
@ -1014,9 +1028,9 @@ class TestSubcloudManager(base.DCManagerTestCase):
# Audit fails again
audit_fail_count = audit_fail_count + 1
sm.update_subcloud_availability(self.ctx, subcloud.name,
consts.AVAILABILITY_OFFLINE,
audit_fail_count=audit_fail_count)
ssm.update_subcloud_availability(self.ctx, subcloud.name,
consts.AVAILABILITY_OFFLINE,
audit_fail_count=audit_fail_count)
# Verify the subclcoud availability was updated
updated_subcloud = db_api.subcloud_get_by_name(self.ctx, 'subcloud1')
@ -1039,7 +1053,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
sm = subcloud_manager.SubcloudManager()
ssm = subcloud_state_manager.SubcloudStateManager()
# Set the subcloud to online/managed
db_api.subcloud_update(self.ctx, subcloud.id,
@ -1060,7 +1074,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
consts.SYNC_STATUS_UNKNOWN]:
# Update identity to the original status
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=original_sync_status)
@ -1070,7 +1084,7 @@ class TestSubcloudManager(base.DCManagerTestCase):
self.fake_dcmanager_audit_api.trigger_subcloud_patch_load_audits.call_count
# Update identity to new status and get the count of the trigger again
sm.update_subcloud_endpoint_status(
ssm.update_subcloud_endpoint_status(
self.ctx, subcloud_name=subcloud.name,
endpoint_type=endpoint,
sync_status=new_sync_status)

View File

@ -389,7 +389,7 @@ class SysinvAPIController(APIController):
def __init__(self, app, conf):
super(SysinvAPIController, self).__init__(app, conf)
self.dcmanager_rpc_client = dcmanager_rpc_client.ManagerClient()
self.dcmanager_state_rpc_client = dcmanager_rpc_client.SubcloudStateClient()
self.response_hander_map = {
self.ENDPOINT_TYPE: self._process_response
}
@ -423,7 +423,7 @@ class SysinvAPIController(APIController):
# Send a RPC to dcmanager
LOG.info("Send RPC to dcmanager to set: %s sync status to: %s"
% (endpoint_type, sync_status))
self.dcmanager_rpc_client.update_subcloud_endpoint_status(
self.dcmanager_state_rpc_client.update_subcloud_endpoint_status(
self.ctxt,
endpoint_type=endpoint_type,
sync_status=sync_status)

View File

@ -64,7 +64,7 @@ class PatchAPIController(Middleware):
super(PatchAPIController, self).__init__(app)
self.ctxt = context.get_admin_context()
self._default_dispatcher = APIDispatcher(app)
self.rpc_client = dcmanager_rpc_client.ManagerClient()
self.dcmanager_state_rpc_client = dcmanager_rpc_client.SubcloudStateClient()
self.response_hander_map = {
proxy_consts.PATCH_ACTION_UPLOAD: self.patch_upload_req,
proxy_consts.PATCH_ACTION_UPLOAD_DIR: self.patch_upload_dir_req,
@ -186,7 +186,7 @@ class PatchAPIController(Middleware):
# Send a RPC to dcmanager
LOG.info("Send RPC to dcmanager to set patching sync status to "
"unknown")
self.rpc_client.update_subcloud_endpoint_status(
self.dcmanager_state_rpc_client.update_subcloud_endpoint_status(
self.ctxt,
endpoint_type=self.ENDPOINT_TYPE,
sync_status=dcmanager_consts.SYNC_STATUS_UNKNOWN)

View File

@ -43,6 +43,9 @@ def main():
messaging.setup()
dmanager_messaging.setup()
LOG.info("Launching dcorch-engine, host=%s, workers=%s ...",
cfg.CONF.host, cfg.CONF.workers)
srv = engine.EngineService(cfg.CONF.host,
consts.TOPIC_ORCH_ENGINE)
launcher = service.launch(cfg.CONF,

View File

@ -75,6 +75,7 @@ class GenericSyncManager(object):
self.create_sync_objects(sc.region_name, sc.capabilities)
LOG.info('Engine id:(%s) create_sync_objects for'
'subcloud:%s.' % (self.engine_id, sc.region_name))
eventlet.sleep(0) # cooperative yield
def create_sync_objects(self, subcloud_name, capabilities):
"""Create sync object objects for the subcloud

View File

@ -108,6 +108,7 @@ class EngineService(service.Service):
self.TG.start(self.ism.initial_sync_thread, self.engine_id)
def start(self):
LOG.info("Starting %s", self.__class__.__name__)
self.engine_id = uuidutils.generate_uuid()
target = oslo_messaging.Target(version=self.rpc_api_version,
server=self.host,
@ -303,9 +304,10 @@ class EngineService(service.Service):
# Stop RPC connection to prevent new requests
LOG.debug(_("Attempting to stop engine service..."))
try:
self._rpc_server.stop()
self._rpc_server.wait()
LOG.info('Engine service stopped successfully')
if self._rpc_server:
self._rpc_server.stop()
self._rpc_server.wait()
LOG.info('Engine service stopped successfully')
except Exception as ex:
LOG.error('Failed to stop engine service: %s',
six.text_type(ex))
@ -313,7 +315,9 @@ class EngineService(service.Service):
def stop(self):
self._stop_rpc_server()
self.TG.stop()
if self.TG:
self.TG.stop()
# Terminate the engine process
LOG.info("All threads were gone, terminating engine")
super(EngineService, self).stop()

View File

@ -76,7 +76,7 @@ class SyncThread(object):
self.log_extra = {
"instance": self.subcloud_name + ": "}
self.dcmanager_rpc_client = dcmanager_rpc_client.ManagerClient()
self.dcmanager_state_rpc_client = dcmanager_rpc_client.SubcloudStateClient()
self.sc_admin_session = None
self.admin_session = None
@ -282,7 +282,7 @@ class SyncThread(object):
LOG.info("{}: set_sync_status {}".format(self.subcloud_name, sync_status),
extra=self.log_extra)
self.dcmanager_rpc_client.update_subcloud_endpoint_status(
self.dcmanager_state_rpc_client.update_subcloud_endpoint_status(
self.ctxt, self.subcloud_name,
self.endpoint_type, sync_status)

View File

@ -0,0 +1,327 @@
#!/bin/sh
# OpenStack DC Manager State Service (dcmanager-state)
#
# Description:
# Manages an OpenStack DC Manager State Service (dcmanager-state)
# process as an HA resource
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_config
# OCF_RESKEY_user
# OCF_RESKEY_pid
# OCF_RESKEY_additional_parameters
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Fill in some defaults if no values are specified
OCF_RESKEY_binary_default="/usr/bin/dcmanager-state"
OCF_RESKEY_config_default="/etc/dcmanager/dcmanager.conf"
OCF_RESKEY_user_default="root"
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
export TMPDIR=/var/run/dcmanager
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an OpenStack DC Manager State service (dcmanager-state) process as an HA resource
The 'start' operation starts the dcmanager-state service.
The 'stop' operation stops the dcmanager-state service.
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the dcmanager-state service is running
The 'monitor' operation reports whether the dcmanager-state service seems to be working
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="dcmanager-state">
<version>1.0</version>
<longdesc lang="en">
Resource agent for the DC Manager service (dcmanager-state)
</longdesc>
<shortdesc lang="en">Manages the OpenStack DC Manager State Service (dcmanager-state)</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the DC Manager State Service binary (dcmanager-state)
</longdesc>
<shortdesc lang="en">DC Manager State Service binary (dcmanager-state)</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Location of the DC Manager State Service (dcmanager-state) configuration file
</longdesc>
<shortdesc lang="en">DC Manager State Service (dcmanager-state registry) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running DC Manager State Service (dcmanager-state)
</longdesc>
<shortdesc lang="en">DC Manager State Service (dcmanager-state) user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pid file to use for this DC Manager State Service (dcmanager-state) instance
</longdesc>
<shortdesc lang="en">DC Manager State Service (dcmanager-state) pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters to pass on to the OpenStack NovaAPI (dcmanager-state)
</longdesc>
<shortdesc lang="en">Additional parameters for dcmanager-state</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" timeout="20" />
<action name="monitor" timeout="10" interval="5" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# Functions invoked by resource manager actions
dcmanager_state_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary curl
check_binary tr
check_binary grep
check_binary cut
check_binary head
# A config file on shared storage that is not available
# during probes is OK.
if [ ! -f $OCF_RESKEY_config ]; then
if ! ocf_is_probe; then
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
true
}
dcmanager_state_status() {
local pid
local rc
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log info "DC Manager State Service (dcmanager-state) is not running"
return $OCF_NOT_RUNNING
else
pid=`cat $OCF_RESKEY_pid`
fi
ocf_run -warn kill -s 0 $pid
rc=$?
if [ $rc -eq 0 ]; then
return $OCF_SUCCESS
else
ocf_log info "Old PID file found, but DC Manager State Service (dcmanager-state) is not running"
rm -f $OCF_RESKEY_pid
return $OCF_NOT_RUNNING
fi
}
dcmanager_state_monitor() {
local rc
dcmanager_state_status
rc=$?
# If status returned anything but success, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
# Further verify the service availibility.
ocf_log debug "DC Manager State Service (dcmanager-state) monitor succeeded"
return $OCF_SUCCESS
}
dcmanager_state_start() {
local rc
dcmanager_state_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "DC Manager State Service (dcmanager-state) already running"
return $OCF_SUCCESS
fi
# Change the working dir to /, to be sure it's accesible
cd /
# run the actual dcmanager-state daemon. Don't use ocf_run as we're sending the tool's output
# straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
$OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required
while true; do
dcmanager_state_monitor
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_log err "DC Manager State Service (dcmanager-state) start failed"
exit $OCF_ERR_GENERIC
fi
sleep 1
done
ocf_log info "DC Manager State Service (dcmanager-state) started"
return $OCF_SUCCESS
}
dcmanager_state_confirm_stop() {
local my_bin
local my_processes
my_binary=`which ${OCF_RESKEY_binary}`
my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"`
if [ -n "${my_processes}" ]
then
ocf_log info "About to SIGKILL the following: ${my_processes}"
pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"
fi
}
dcmanager_state_stop() {
local rc
local pid
dcmanager_state_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
ocf_log info "DC Manager State Service (dcmanager-state) already stopped"
dcmanager_state_confirm_stop
return $OCF_SUCCESS
fi
# Try SIGTERM
pid=`cat $OCF_RESKEY_pid`
ocf_run kill -s TERM $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "DC Manager State Service (dcmanager-state) couldn't be stopped"
dcmanager_state_confirm_stop
exit $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
fi
count=0
while [ $count -lt $shutdown_timeout ]; do
dcmanager_state_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
break
fi
count=`expr $count + 1`
sleep 1
ocf_log debug "DC Manager State Service (dcmanager-state) still hasn't stopped yet. Waiting ..."
done
dcmanager_state_status
rc=$?
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# SIGTERM didn't help either, try SIGKILL
ocf_log info "DC Manager State Service (dcmanager-state) failed to stop after ${shutdown_timeout}s \
using SIGTERM. Trying SIGKILL ..."
ocf_run kill -s KILL $pid
fi
dcmanager_state_confirm_stop
ocf_log info "DC Manager State Service (dcmanager-state) stopped"
rm -f $OCF_RESKEY_pid
return $OCF_SUCCESS
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
# Anything except meta-data and help must pass validation
dcmanager_state_validate || exit $?
# What kind of method was invoked?
case "$1" in
start) dcmanager_state_start;;
stop) dcmanager_state_stop;;
status) dcmanager_state_status;;
monitor) dcmanager_state_monitor;;
validate-all) ;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac

View File

@ -34,6 +34,7 @@ console_scripts =
dcmanager-orchestrator = dcmanager.cmd.orchestrator:main
dcmanager-manager = dcmanager.cmd.manager:main
dcmanager-manage = dcmanager.cmd.manage:main
dcmanager-state = dcmanager.cmd.state:main
dcorch-api = dcorch.cmd.api:main
dcorch-engine = dcorch.cmd.engine:main
dcorch-manage = dcorch.cmd.manage:main