Controller Services swact/failover time reduction

Add full support for Active/Active redudancy model
1. services could have enable dependency to services in other service groups
   (standby group)
2. An active/active service failure will degraded the service group it is in
3. A failure of active/active service would not prevent a swact
4. Locking a controller that is sole active/active service
provider will be rejected. But lock with force option will still proceed
to lock the node.
5. sm-api bind to port 7777 on mgmt interface. (was localhost:7777)

Change-Id: I6f0354e1e8fc606c6f3b8b33e3ab862b47824232
Signed-off-by: Jack Ding <jack.ding@windriver.com>
This commit is contained in:
Bin Qian 2018-06-01 13:45:08 -04:00 committed by Jack Ding
parent b62c18debf
commit 7de45afb19
18 changed files with 507 additions and 60 deletions

View File

@ -16,7 +16,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2013-2014 Wind River Systems, Inc.
# Copyright (c) 2013-2018 Wind River Systems, Inc.
#
@ -25,10 +25,14 @@ from pecan import rest
import wsme
from wsme import types as wtypes
import wsmeext.pecan as wsme_pecan
import socket
import urllib2
import json
from sm_api.api.controllers.v1 import base
from sm_api.api.controllers.v1 import smc_api
from sm_api.openstack.common import log
from sm_api.api.controllers.v1 import services
LOG = log.getLogger(__name__)
@ -36,6 +40,7 @@ ERR_CODE_SUCCESS = "0"
ERR_CODE_HOST_NOT_FOUND = "-1000"
ERR_CODE_ACTION_FAILED = "-1001"
ERR_CODE_NO_HOST_TO_SWACT_TO = "-1002"
ERR_CODE_LOCK_SOLE_SERVICE_PROVIDER = "-1003"
SM_NODE_STATE_UNKNOWN = "unknown"
SM_NODE_ADMIN_LOCKED = "locked"
@ -55,6 +60,13 @@ SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N_PLUS_M = "N + M"
SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N_TO_1 = "N to 1"
SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N_TO_N = "N to N"
SM_SERVICE_SEVERITY_NIL = "nil"
SM_SERVICE_SEVERITY_UNKNOWN = "unknown"
SM_SERVICE_SEVERITY_NONE = "none"
SM_SERVICE_SEVERITY_MINOR = "minor"
SM_SERVICE_SEVERITY_MAJOR = "major"
SM_SERVICE_SEVERITY_CRITICAL = "critical"
# sm_types.c
SM_SERVICE_GROUP_STATE_NIL = "nil"
SM_SERVICE_GROUP_STATE_NA = "not-applicable"
@ -87,6 +99,77 @@ SM_SERVICE_GROUP_CONDITION_RECOVERY_FAILURE = "recovery-failure"
SM_SERVICE_GROUP_CONDITION_ACTION_FAILURE = "action-failure"
SM_SERVICE_GROUP_CONDITION_FATAL_FAILURE = "fatal-failure"
SM_SERVICE_STATE_NIL = "nil"
SM_SERVICE_STATE_NA = "not-applicable"
SM_SERVICE_STATE_INITIAL = "initial"
SM_SERVICE_STATE_UNKNOWN = "unknown"
SM_SERVICE_STATE_ENABLED_STANDBY = "enabled-standby"
SM_SERVICE_STATE_ENABLED_GO_STANDBY = "enabled-go-standby"
SM_SERVICE_STATE_ENABLED_GO_ACTIVE = "enabled-go-active"
SM_SERVICE_STATE_ENABLED_ACTIVE = "enabled-active"
SM_SERVICE_STATE_ENABLING = "enabling"
SM_SERVICE_STATE_ENABLING_THROTTLE = "enabling-throttle"
SM_SERVICE_STATE_DISABLING = "disabling"
SM_SERVICE_STATE_DISABLED = "disabled"
SM_SERVICE_STATE_SHUTDOWN = "shutdown"
LOCAL_HOST_NAME = socket.gethostname()
def rest_api_request(token, method, api_cmd, api_cmd_headers=None,
api_cmd_payload=None, timeout=10):
"""
Make a rest-api request
Returns: response as a dictionary
"""
LOG.info("%s cmd:%s hdr:%s payload:%s" % (
method, api_cmd, api_cmd_headers, api_cmd_payload))
response = None
try:
request_info = urllib2.Request(api_cmd)
request_info.get_method = lambda: method
if token:
request_info.add_header("X-Auth-Token", token.get_id())
request_info.add_header("Accept", "application/json")
if api_cmd_headers is not None:
for header_type, header_value in api_cmd_headers.items():
request_info.add_header(header_type, header_value)
if api_cmd_payload is not None:
request_info.add_data(api_cmd_payload)
request = urllib2.urlopen(request_info, timeout=timeout)
response = request.read()
if response == "":
response = {}
else:
response = json.loads(response)
request.close()
LOG.info("Response=%s" % response)
except urllib2.HTTPError as e:
if 401 == e.code:
if token:
token.set_expired()
LOG.warn("HTTP Error e.code=%s e=%s" % (e.code, e))
if hasattr(e, 'msg') and e.msg:
response = json.loads(e.msg)
else:
response = {}
LOG.info("HTTPError response=%s" % (response))
except urllib2.URLError as urle:
LOG.debug("Connection refused")
return response
class ServiceNodeCommand(base.APIBase):
origin = wtypes.text
@ -108,6 +191,7 @@ class ServiceNodeCommandResult(base.APIBase):
# Result
error_code = wtypes.text
error_details = wtypes.text
impact_service_list = [wtypes.text]
class ServiceNode(base.APIBase):
@ -220,6 +304,12 @@ class ServiceNodeController(rest.RestController):
LOG.debug("sm-api have_active_sm_services: False")
return swactable_sm_services
def _is_aa_service_group(self, sdm):
if SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N == \
sdm.redundancy_model and 2 == sdm.n_active:
return True
return False
def _swact_pre_check(self, hostname):
# run pre-swact checks, verify that services are in the right state
# to accept service
@ -250,15 +340,22 @@ class ServiceNodeController(rest.RestController):
% (sm_sda.node_name, sm_sda.node_name))
break
# Verify that
# all the services are in the standby or active
# state on the other host
# degraded or failure of A/A service on the target host
# would not stop swact
sdm = self._sm_sdm_get(sm_sda.name,
sm_sda.service_group_name)
if (self._is_aa_service_group(sdm)):
continue
# Verify that
# all the services are in the standby state on the
# other host
# or service only provisioned in the other host
# or service state are the same on both hosts
if SM_SERVICE_GROUP_STATE_ACTIVE != sm_sda.state \
and SM_SERVICE_GROUP_STATE_STANDBY != sm_sda.state \
and origin_state.has_key(sm_sda.service_group_name) \
and origin_state[sm_sda.service_group_name] != sm_sda.state:
and SM_SERVICE_GROUP_STATE_STANDBY != sm_sda.state \
and origin_state.has_key(sm_sda.service_group_name) \
and origin_state[sm_sda.service_group_name] != sm_sda.state:
check_result = (
"%s on %s is not ready to take service, "
"service not in the active or standby "
@ -329,6 +426,91 @@ class ServiceNodeController(rest.RestController):
LOG.info("%s" % sm_state_ht)
return sm_state_ht
def get_remote_svc(self, hostname, service_name):
sm_api_port = 7777
sm_api_path = "http://{host}:{port}". \
format(host=hostname, port=sm_api_port)
api_cmd = sm_api_path
api_cmd += "/v1/services/%s" % service_name
api_cmd_headers = dict()
api_cmd_headers['Content-type'] = "application/json"
api_cmd_headers['Accept'] = "application/json"
api_cmd_headers['User-Agent'] = "sm/1.0"
response = rest_api_request(None, "GET", api_cmd, api_cmd_headers, None)
return response
def _lock_pre_check(self, hostname):
services = pecan.request.dbapi.sm_service_get_list()
ill_services = []
for service in services:
if (SM_SERVICE_STATE_ENABLED_ACTIVE == service.desired_state and
SM_SERVICE_STATE_ENABLED_ACTIVE != service.state):
ill_services.append(service.name)
chk_list = {}
service_groups = pecan.request.dbapi.iservicegroup_get_list()
for service_group in service_groups:
sdm = pecan.request.dbapi.sm_sdm_get(
"controller", service_group.name)
if (SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N ==
sdm.redundancy_model and 1 < sdm.n_active):
sgms = pecan.request.dbapi.sm_service_group_members_get_list(
service_group.name)
for sgm in sgms:
if (SM_SERVICE_SEVERITY_CRITICAL ==
sgm.service_failure_impact and
sgm.service_name in ill_services):
if service_group.name in chk_list:
chk_list[service_group.name].\
append(sgm.service_name)
else:
chk_list[service_group.name] = [sgm.service_name]
if len(chk_list) == 0:
return None
sdas = pecan.request.dbapi.sm_sda_get_list()
for sda in sdas:
if (sda.node_name not in [LOCAL_HOST_NAME, hostname] and
sda.service_group_name in chk_list):
for service_name in chk_list[sda.service_group_name]:
rsvc = self.get_remote_svc(sda.node_name, service_name)
if (SM_SERVICE_STATE_ENABLED_ACTIVE ==
rsvc['desired_state'] and
SM_SERVICE_STATE_ENABLED_ACTIVE == rsvc['state']):
chk_list[sda.service_group_name].remove(service_name)
all_good = True
for svcs in chk_list.values():
if len(svcs) > 0:
all_good = False
break
if all_good:
return None
target_services = []
for sda in sdas:
if (sda.node_name == hostname and
sda.service_group_name in chk_list):
for service_name in chk_list[sda.service_group_name]:
LOG.info("checking %s on %s" % (service_name, hostname))
rsvc = self.get_remote_svc(sda.node_name, service_name)
if rsvc is None:
continue
if (SM_SERVICE_STATE_ENABLED_ACTIVE ==
rsvc['desired_state'] and
SM_SERVICE_STATE_ENABLED_ACTIVE == rsvc['state']):
LOG.info("which is %s %s" % (rsvc['desired_state'], rsvc['state']))
target_services.append(service_name)
LOG.info("services %s solely running on %s" % (','.join(target_services), hostname))
if len(target_services) > 0:
return target_services
return None
def _do_modify_command(self, hostname, command):
if command.action == smc_api.SM_NODE_ACTION_SWACT_PRE_CHECK or \
@ -352,6 +534,31 @@ class ServiceNodeController(rest.RestController):
avail=command.avail, error_code=ERR_CODE_SUCCESS,
error_details=check_result)
return wsme.api.Response(result, status_code=200)
elif command.action in [smc_api.SM_NODE_ACTION_LOCK,
smc_api.SM_NODE_ACTION_LOCK_PRE_CHECK]:
impact_services = self._lock_pre_check(hostname)
if impact_services is not None:
result = ServiceNodeCommandResult(
origin="sm", hostname=hostname, action=command.action,
admin=command.admin, oper=command.oper,
avail=command.avail,
error_code=ERR_CODE_LOCK_SOLE_SERVICE_PROVIDER,
impact_service_list=impact_services,
error_details="%s is the sole provider of some services."
% hostname)
if command.action == smc_api.SM_NODE_ACTION_LOCK_PRE_CHECK:
return wsme.api.Response(result, status_code=200)
return wsme.api.Response(result, status_code=400)
elif smc_api.SM_NODE_ACTION_LOCK_PRE_CHECK == command.action:
result = ServiceNodeCommandResult(
origin="sm", hostname=hostname, action=command.action,
admin=command.admin, oper=command.oper,
avail=command.avail, error_code=ERR_CODE_SUCCESS,
impact_service_list=impact_services,
error_details=None)
return wsme.api.Response(result, status_code=200)
if command.action == smc_api.SM_NODE_ACTION_UNLOCK or \
command.action == smc_api.SM_NODE_ACTION_LOCK or \

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2014 Wind River Systems, Inc.
# Copyright (c) 2014-2018 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -113,8 +113,12 @@ class ServicesController(rest.RestController):
@wsme_pecan.wsexpose(Services, unicode)
def get_one(self, uuid):
rpc_sg = objects.service.get_by_uuid(pecan.request.context, uuid)
return Services.convert_with_links(rpc_sg)
return Services.convert_with_links(rpc_sg)
@wsme_pecan.wsexpose(Services, unicode)
def get_service(self, name):
rpc_sg = objects.service.get_by_name(pecan.request.context, name)
return Services.convert_with_links(rpc_sg)
@wsme_pecan.wsexpose(ServicesCollection, unicode, int,
unicode, unicode)

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2014 Wind River Systems, Inc.
# Copyright (c) 2014-2018 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -38,6 +38,8 @@ SM_API_MAX_MSG_SIZE = 2048
SM_NODE_ACTION_UNLOCK = "unlock"
SM_NODE_ACTION_LOCK = "lock"
SM_NODE_ACTION_LOCK_FORCE = "lock-force"
SM_NODE_ACTION_LOCK_PRE_CHECK = "lock-pre-check"
SM_NODE_ACTION_SWACT_PRE_CHECK = "swact-pre-check"
SM_NODE_ACTION_SWACT = "swact"
SM_NODE_ACTION_SWACT_FORCE = "swact-force"

View File

@ -18,7 +18,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2013-2014 Wind River Systems, Inc.
# Copyright (c) 2013-2018 Wind River Systems, Inc.
#
@ -28,6 +28,7 @@ import logging
import os.path
import sys
import time
import socket
from oslo_config import cfg
@ -62,9 +63,7 @@ def main():
sm_api_service.prepare_service(sys.argv)
# Build and start the WSGI app
# host = CONF.sm_api_api_bind_ip
# port = CONF.sm_api_api_port
host = 'localhost'
host = socket.gethostname()
port = 7777
wsgi = simple_server.make_server(host, port,
app.VersionSelectorApplication(),

View File

@ -399,3 +399,7 @@ class Unauthorized(SmApiException):
class HTTPNotFound(NotFound):
pass
class ServiceNotFound(NotFound):
message = _("service %(service)s could not be found.")

View File

@ -170,6 +170,13 @@ class Connection(object):
@abc.abstractmethod
def sm_service_get_by_name(self, name):
"""Return a list of services by name.
"""Return a service by name.
:param name: The name of the services.
"""
@abc.abstractmethod
def sm_service_group_members_get_list(self, service_group_name):
"""Return service group members in a service group
:param service_group_name: service group name
"""

View File

@ -15,7 +15,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2013-2014 Wind River Systems, Inc.
# Copyright (c) 2013-2018 Wind River Systems, Inc.
#
@ -259,9 +259,18 @@ class Connection(api.Connection):
def sm_service_get_by_name(self, name):
result = model_query(models.service, read_deleted="no").\
filter_by(name=name)
# first() since want a list
if not result:
raise exception.NodeNotFound(node=name)
raise exception.ServiceNotFound(service=name)
return result
@objects.objectify(objects.service_group_member)
def sm_service_group_members_get_list(self, service_group_name):
result = model_query(models.sm_service_group_member,
read_deleted="no").\
filter_by(provisioned='yes').\
filter_by(name=service_group_name)
return result

View File

@ -15,7 +15,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2013-2014 Wind River Systems, Inc.
# Copyright (c) 2013-2018 Wind River Systems, Inc.
#
@ -126,6 +126,8 @@ class sm_sdm(Base):
name = Column(String(255))
service_group_name = Column(String(255))
redundancy_model = Column(String(255)) # sm_types.h
n_active = Column(Integer)
m_standby = Column(Integer)
# sm_service_domain_assignments
@ -152,3 +154,13 @@ class sm_node(Base):
operational_state = Column(String(255))
availability_status = Column(String(255))
ready_state = Column(String(255))
class sm_service_group_member(Base):
__tablename__ = 'service_group_members'
id = Column(Integer, primary_key=True)
provisioned = Column(String(255))
name = Column(String(255))
service_name = Column(String(255))
service_failure_impact = Column(String(255))

View File

@ -12,7 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2013-2014 Wind River Systems, Inc.
# Copyright (c) 2013-2018 Wind River Systems, Inc.
#
@ -23,6 +23,7 @@ from sm_api.objects import smo_service
from sm_api.objects import smo_sdm
from sm_api.objects import smo_sda
from sm_api.objects import smo_node
from sm_api.objects import smo_sgm
def objectify(klass):
@ -46,9 +47,11 @@ service = smo_service.service
sm_sdm = smo_sdm.sm_sdm
sm_sda = smo_sda.sm_sda
sm_node = smo_node.sm_node
service_group_member = smo_sgm.service_group_member
__all__ = (
service_groups,
service_group_member,
service,
sm_sdm,
sm_sda,

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2013-2014 Wind River Systems, Inc.
# Copyright (c) 2013-2018 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -22,6 +22,8 @@ class sm_sdm(base.Sm_apiObject):
'name': utils.str_or_none,
'service_group_name': utils.str_or_none,
'redundancy_model': utils.str_or_none,
'n_active': int,
'm_standby': int,
}
@staticmethod

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2013-2014 Wind River Systems, Inc.
# Copyright (c) 2013-2018 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -36,7 +36,7 @@ class service(base.Sm_apiObject):
@base.remotable_classmethod
def get_by_uuid(cls, context, uuid):
"""Find a server based on uuid and return a Node object.
"""Find a service based on uuid and return a service object.
:param uuid: the uuid of a server.
:returns: a :class:`Node` object.
@ -45,6 +45,16 @@ class service(base.Sm_apiObject):
db_server = cls.dbapi.sm_service_get(uuid)
return service._from_db_object(cls(), db_server)
@base.remotable_classmethod
def get_by_name(cls, context, name):
"""Find a service based on service name .
:param name: the name of a service.
:returns: a :class:`service` object.
"""
service = cls.dbapi.sm_service_get_by_name(name)
return service._from_db_object(cls(), service)
@base.remotable
def save(self, context):
"""Save updates to this Node.

View File

@ -0,0 +1,59 @@
#
# Copyright (c) 2018 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# coding=utf-8
#
from sm_api.db import api as db_api
from sm_api.objects import base
from sm_api.objects import utils
class service_group_member(base.Sm_apiObject):
dbapi = db_api.get_instance()
fields = {
'id': utils.int_or_none,
'name': utils.str_or_none,
'service_name': utils.str_or_none,
'service_failure_impact': utils.str_or_none
}
@staticmethod
def _from_db_object(server, db_server):
"""Converts a database entity to a formal object."""
for field in server.fields:
server[field] = db_server[field]
server.obj_reset_changes()
return server
@base.remotable_classmethod
def get_by_service_group(cls, context, service_group_name):
"""Find a server based on uuid and return a Node object.
:param uuid: the uuid of a server.
:returns: a :class:`Node` object.
"""
db_server = cls.dbapi.iservicegroup_member_get(service_group_name)
return service_group_member._from_db_object(cls(), db_server)
@base.remotable
def save(self, context):
"""Save service group member to this Node.
:param context: Security context
"""
raise NotImplemented("This method is intentially not implemented")
@base.remotable
def refresh(self, context):
current = self.__class__.get_by_uuid(context, uuid=self.uuid)
for field in self.fields:
if (hasattr(self, base.get_attrname(field)) and
self[field] != current[field]):
self[field] = current[field]

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2014 Wind River Systems, Inc.
// Copyright (c) 2014-2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
@ -13,6 +13,7 @@
#include "sm_time.h"
#include "sm_service_table.h"
#include "sm_service_dependency_table.h"
#include "sm_service_domain_member_table.h"
// ****************************************************************************
// Service Dependency - Dependent State Compare
@ -147,55 +148,99 @@ SmErrorT sm_service_dependency_go_standby_met( SmServiceT* service, bool* met )
}
// ****************************************************************************
// ****************************************************************************
// Service Dependency - Enable Met, per dependent
// ================================
static void _sm_service_enable_dependency_met(
void* user_data[], SmServiceDependencyT* service_dependency )
{
bool *dependency_met = (bool*)user_data[0];
if( '\0' == service_dependency->dependent[0] )
{
DPRINTFD( "Service (%s) has no dependencies.", service_dependency->service_name );
return;
}
SmServiceT* dependent_service = sm_service_table_read( service_dependency->dependent );
if( NULL == dependent_service )
{
DPRINTFE( "Failed to read service (%s), error=%s.",
service_dependency->service_name,
sm_error_str(SM_NOT_FOUND) );
return;
}
if( SM_SERVICE_STATE_ENABLED_ACTIVE != dependent_service->state &&
SM_SERVICE_STATE_ENABLED_STANDBY != dependent_service->desired_state)
{
*dependency_met = false;
}
}
// ****************************************************************************
// ****************************************************************************
// Service Dependency - Enable Met
// ===============================
SmErrorT sm_service_dependency_enable_met( SmServiceT* service, bool* met )
{
bool at_least_one = false;
bool dependency_met = true;
SmCompareOperatorT compare_operator = SM_COMPARE_OPERATOR_LE;
void* user_data[] = {service, &dependency_met, &compare_operator,
&at_least_one};
void* user_data[] = {&dependency_met};
*met = false;
sm_service_dependency_table_foreach( SM_SERVICE_DEPENDENCY_TYPE_ACTION,
service->name, SM_SERVICE_STATE_NA, SM_SERVICE_ACTION_ENABLE,
user_data, sm_service_dependency_dependent_state_compare );
user_data, _sm_service_enable_dependency_met );
if( at_least_one )
{
*met = dependency_met;
} else {
*met = true;
}
*met = dependency_met;
return( SM_OKAY );
}
// ****************************************************************************
// ****************************************************************************
// Service Dependency - Disable Met per dependent
// ================================
static void _sm_service_disable_dependency_met(
void* user_data[], SmServiceDependencyT* service_dependency )
{
bool *dependency_met = (bool*)user_data[0];
if( '\0' == service_dependency->dependent[0] )
{
DPRINTFD( "Service (%s) has no dependencies.", service_dependency->service_name );
return;
}
SmServiceT* dependent_service = sm_service_table_read( service_dependency->dependent );
if( NULL == dependent_service )
{
DPRINTFE( "Failed to read service (%s), error=%s.",
service_dependency->service_name,
sm_error_str(SM_NOT_FOUND) );
return;
}
if( SM_SERVICE_STATE_DISABLED != dependent_service->state &&
SM_SERVICE_STATE_ENABLED_ACTIVE != dependent_service->desired_state)
{
*dependency_met = false;
}
}
// ****************************************************************************
// ****************************************************************************
// Service Dependency - Disable Met
// ================================
SmErrorT sm_service_dependency_disable_met( SmServiceT* service, bool* met )
{
bool at_least_one = false;
bool dependency_met = true;
SmCompareOperatorT compare_operator = SM_COMPARE_OPERATOR_GE;
void* user_data[] = {service, &dependency_met, &compare_operator,
&at_least_one};
void* user_data[] = {&dependency_met};
*met = false;
sm_service_dependency_table_foreach( SM_SERVICE_DEPENDENCY_TYPE_ACTION,
service->name, SM_SERVICE_STATE_NA, SM_SERVICE_ACTION_DISABLE,
user_data, sm_service_dependency_dependent_state_compare );
user_data, _sm_service_disable_dependency_met );
if( at_least_one )
{
*met = dependency_met;
} else {
*met = true;
}
*met = dependency_met;
return( SM_OKAY );
}

View File

@ -1060,6 +1060,25 @@ SmErrorT sm_service_domain_utils_service_domain_neighbor_cleanup(
}
// ****************************************************************************
// ****************************************************************************
// Service Domain Utilities is aa service group
// =====================================
bool sm_is_aa_service_group(char* service_group_name)
{
SmServiceDomainMemberT* service_domain_member;
service_domain_member = sm_service_domain_member_table_read_service_group( service_group_name );
if( NULL != service_domain_member )
{
if( SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N == service_domain_member->redundancy_model &&
service_domain_member->n_active > 1 )
{
return true;
}
}
return false;
}
// ****************************************************************************
// ****************************************************************************
// Service Domain Utilities - Initialize
// =====================================

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2014 Wind River Systems, Inc.
// Copyright (c) 2014-2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
@ -144,6 +144,12 @@ extern SmErrorT sm_service_domain_utils_service_domain_neighbor_cleanup(
char name[], char node_name[] );
// ****************************************************************************
// ****************************************************************************
// Service Domain Utilities is aa service group
// ==============================
extern bool sm_is_aa_service_group(char* service_group_name);
// ****************************************************************************
// ****************************************************************************
// Service Domain Utilities - Initialize
// =====================================

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2014 Wind River Systems, Inc.
// Copyright (c) 2014-2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
@ -16,6 +16,7 @@
#include "sm_service_group_member_table.h"
#include "sm_service_api.h"
#include "sm_service_group_health.h"
#include "sm_service_domain_utils.h"
// ****************************************************************************
// Service Group Audit - Set Service Reason Text
@ -146,15 +147,18 @@ static void sm_service_group_audit_service_for_status( void* user_data[],
int* failed = (int*) user_data[3];
int* degraded = (int*) user_data[4];
int* warn = (int*) user_data[5];
bool* reason_text_writable = (bool*) user_data[6];
char* reason_text = (char*) user_data[7];
int reason_text_size = *(int*) user_data[8];
int* healthy = (int*) user_data[6];
bool* reason_text_writable = (bool*) user_data[7];
char* reason_text = (char*) user_data[8];
int reason_text_size = *(int*) user_data[9];
SmServiceGroupStatusT prev_status = *status;
SmServiceGroupConditionT prev_condition = *condition;
SmServiceStatusT sgm_imply_status;
SmServiceGroupStatusT mapped_status;
SmServiceGroupConditionT mapped_condition;
char service_reason_text[SM_SERVICE_GROUP_REASON_TEXT_MAX_CHAR] = "";
sgm_imply_status = service_group_member->service_status;
if( 0 != service_group_member->service_failure_timestamp )
{
elapsed_ms = sm_time_get_elapsed_ms( NULL );
@ -162,27 +166,34 @@ static void sm_service_group_audit_service_for_status( void* user_data[],
if( service_group->failure_debounce_in_ms >= delta_ms )
{
DPRINTFD( "Service group (%s) member (%s) failure debounce "
"still in effect, indicating member as unhealthy, "
DPRINTFD( "Service group (%s) member (%s) failure debounce (%d) "
"still in effect since (%li), indicating member as unhealthy, "
"delta_ms=%li.", service_group->name,
service_group_member->service_name, delta_ms );
service_group_member->service_name,
service_group->failure_debounce_in_ms,
service_group_member->service_failure_timestamp,
delta_ms );
do_increment = false;
switch( service_group_member->service_failure_impact )
{
case SM_SERVICE_SEVERITY_NONE:
sgm_imply_status = SM_SERVICE_STATUS_NONE;
break;
case SM_SERVICE_SEVERITY_MINOR:
sgm_imply_status = SM_SERVICE_STATUS_NONE;
++(*warn);
break;
case SM_SERVICE_SEVERITY_MAJOR:
sgm_imply_status = SM_SERVICE_STATUS_DEGRADED;
++(*degraded);
break;
case SM_SERVICE_SEVERITY_CRITICAL:
sgm_imply_status = SM_SERVICE_STATUS_FAILED;
++(*failed);
break;
@ -190,13 +201,17 @@ static void sm_service_group_audit_service_for_status( void* user_data[],
break;
}
} else {
++(*healthy);
DPRINTFD( "Service group (%s) member (%s) failure debounce "
"no longer in effect, delta_ms=%li.", service_group->name,
service_group_member->service_name, delta_ms );
}
}else
{
++(*healthy);
}
switch( service_group_member->service_status )
switch( sgm_imply_status )
{
case SM_SERVICE_STATUS_NONE:
mapped_status = SM_SERVICE_GROUP_STATUS_NONE;
@ -452,7 +467,7 @@ static void sm_service_group_audit_service_for_status( void* user_data[],
// ============================
SmErrorT sm_service_group_audit_status( SmServiceGroupT* service_group )
{
int failed=0, degraded=0, warn=0;
int failed=0, degraded=0, warn=0, healthy=0;
SmServiceGroupStatusT audit_status = SM_SERVICE_GROUP_STATUS_NONE;
SmServiceGroupConditionT audit_condition = SM_SERVICE_GROUP_CONDITION_NONE;
int64_t audit_health = 0;
@ -460,7 +475,7 @@ SmErrorT sm_service_group_audit_status( SmServiceGroupT* service_group )
char reason_text[SM_SERVICE_GROUP_REASON_TEXT_MAX_CHAR] = "";
int reason_text_size = SM_SERVICE_GROUP_REASON_TEXT_MAX_CHAR;
void* user_data[] = { service_group, &audit_status, &audit_condition,
&failed, &degraded, &warn, &reason_text_writable,
&failed, &degraded, &warn, &healthy, &reason_text_writable,
reason_text, &reason_text_size };
sm_service_group_member_table_foreach_member( service_group->name,
@ -468,7 +483,14 @@ SmErrorT sm_service_group_audit_status( SmServiceGroupT* service_group )
audit_health = sm_service_group_health_calculate( failed, degraded, warn );
service_group->status = audit_status;
if(SM_SERVICE_GROUP_STATUS_FAILED == audit_status && 0 < healthy &&
sm_is_aa_service_group(service_group->name))
{
service_group->status = SM_SERVICE_GROUP_STATUS_DEGRADED;
}else
{
service_group->status = audit_status;
}
service_group->condition = audit_condition;
service_group->health = audit_health;

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2014 Wind River Systems, Inc.
// Copyright (c) 2014-2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
@ -22,6 +22,8 @@
#include "sm_service_go_active.h"
#include "sm_service_go_standby.h"
#include "sm_service_audit.h"
#include "sm_service_group_table.h"
#include "sm_service_group_member_table.h"
static SmListT* _services = NULL;
static SmDbHandleT* _sm_db_handle = NULL;
@ -351,6 +353,38 @@ SmErrorT sm_service_table_persist( SmServiceT* service )
}
// ****************************************************************************
// ****************************************************************************
// Service - Loop service members
// ===============================
static void _sm_loop_service_group_members( void* user_data[],
SmServiceGroupMemberT* service_group_member )
{
SmServiceT* service;
service = sm_service_table_read( service_group_member->service_name );
if( NULL == service )
{
DPRINTFE( "Could not find service (%s) of "
"service group (%s).",
service_group_member->service_name,
service_group_member->name);
return;
}
snprintf(service->group_name, sizeof(service->group_name), "%s", service_group_member->name);
}
// ****************************************************************************
// ****************************************************************************
// Service Table - Loop service groups
// =================================================
static void _sm_loop_service_groups(
void* user_data[], SmServiceGroupT* service_group )
{
sm_service_group_member_table_foreach_member( service_group->name,
NULL, _sm_loop_service_group_members );
}
// ****************************************************************************
// ****************************************************************************
// Service Table - Initialize
// ==========================
@ -376,6 +410,8 @@ SmErrorT sm_service_table_initialize( void )
return( error );
}
sm_service_group_table_foreach( NULL, _sm_loop_service_groups );
return( SM_OKAY );
}
// ****************************************************************************

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2014 Wind River Systems, Inc.
// Copyright (c) 2014-2018 Wind River Systems, Inc.
//
// SPDX-License-Identifier: Apache-2.0
//
@ -56,6 +56,7 @@ typedef struct
bool disable_check_dependency;
//flag to indicate disable a service without disabling its dependency
bool disable_skip_dependent;
char group_name[SM_SERVICE_GROUP_NAME_MAX_CHAR];
} SmServiceT;
typedef void (*SmServiceTableForEachCallbackT)