From 7de45afb190104a042985771ecce997ef872b73b Mon Sep 17 00:00:00 2001 From: Bin Qian Date: Fri, 1 Jun 2018 13:45:08 -0400 Subject: [PATCH] Controller Services swact/failover time reduction Add full support for Active/Active redudancy model 1. services could have enable dependency to services in other service groups (standby group) 2. An active/active service failure will degraded the service group it is in 3. A failure of active/active service would not prevent a swact 4. Locking a controller that is sole active/active service provider will be rejected. But lock with force option will still proceed to lock the node. 5. sm-api bind to port 7777 on mgmt interface. (was localhost:7777) Change-Id: I6f0354e1e8fc606c6f3b8b33e3ab862b47824232 Signed-off-by: Jack Ding --- .../sm_api/api/controllers/v1/servicenode.py | 221 +++++++++++++++++- .../sm_api/api/controllers/v1/services.py | 8 +- .../sm_api/api/controllers/v1/smc_api.py | 4 +- service-mgmt-api/sm-api/sm_api/cmd/api.py | 7 +- .../sm-api/sm_api/common/exception.py | 4 + service-mgmt-api/sm-api/sm_api/db/api.py | 9 +- .../sm-api/sm_api/db/sqlalchemy/api.py | 15 +- .../sm-api/sm_api/db/sqlalchemy/models.py | 14 +- .../sm-api/sm_api/objects/__init__.py | 5 +- .../sm-api/sm_api/objects/smo_sdm.py | 4 +- .../sm-api/sm_api/objects/smo_service.py | 14 +- .../sm-api/sm_api/objects/smo_sgm.py | 59 +++++ .../sm-1.0.0/src/sm_service_dependency.c | 91 ++++++-- .../sm-1.0.0/src/sm_service_domain_utils.c | 19 ++ .../sm-1.0.0/src/sm_service_domain_utils.h | 8 +- .../sm-1.0.0/src/sm_service_group_audit.c | 44 +++- service-mgmt/sm-1.0.0/src/sm_service_table.c | 38 ++- service-mgmt/sm-1.0.0/src/sm_service_table.h | 3 +- 18 files changed, 507 insertions(+), 60 deletions(-) create mode 100644 service-mgmt-api/sm-api/sm_api/objects/smo_sgm.py diff --git a/service-mgmt-api/sm-api/sm_api/api/controllers/v1/servicenode.py b/service-mgmt-api/sm-api/sm_api/api/controllers/v1/servicenode.py index 67ccaa48..85f97832 100755 --- a/service-mgmt-api/sm-api/sm_api/api/controllers/v1/servicenode.py +++ b/service-mgmt-api/sm-api/sm_api/api/controllers/v1/servicenode.py @@ -16,7 +16,7 @@ # License for the specific language governing permissions and limitations # under the License. # -# Copyright (c) 2013-2014 Wind River Systems, Inc. +# Copyright (c) 2013-2018 Wind River Systems, Inc. # @@ -25,10 +25,14 @@ from pecan import rest import wsme from wsme import types as wtypes import wsmeext.pecan as wsme_pecan +import socket +import urllib2 +import json from sm_api.api.controllers.v1 import base from sm_api.api.controllers.v1 import smc_api from sm_api.openstack.common import log +from sm_api.api.controllers.v1 import services LOG = log.getLogger(__name__) @@ -36,6 +40,7 @@ ERR_CODE_SUCCESS = "0" ERR_CODE_HOST_NOT_FOUND = "-1000" ERR_CODE_ACTION_FAILED = "-1001" ERR_CODE_NO_HOST_TO_SWACT_TO = "-1002" +ERR_CODE_LOCK_SOLE_SERVICE_PROVIDER = "-1003" SM_NODE_STATE_UNKNOWN = "unknown" SM_NODE_ADMIN_LOCKED = "locked" @@ -55,6 +60,13 @@ SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N_PLUS_M = "N + M" SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N_TO_1 = "N to 1" SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N_TO_N = "N to N" +SM_SERVICE_SEVERITY_NIL = "nil" +SM_SERVICE_SEVERITY_UNKNOWN = "unknown" +SM_SERVICE_SEVERITY_NONE = "none" +SM_SERVICE_SEVERITY_MINOR = "minor" +SM_SERVICE_SEVERITY_MAJOR = "major" +SM_SERVICE_SEVERITY_CRITICAL = "critical" + # sm_types.c SM_SERVICE_GROUP_STATE_NIL = "nil" SM_SERVICE_GROUP_STATE_NA = "not-applicable" @@ -87,6 +99,77 @@ SM_SERVICE_GROUP_CONDITION_RECOVERY_FAILURE = "recovery-failure" SM_SERVICE_GROUP_CONDITION_ACTION_FAILURE = "action-failure" SM_SERVICE_GROUP_CONDITION_FATAL_FAILURE = "fatal-failure" +SM_SERVICE_STATE_NIL = "nil" +SM_SERVICE_STATE_NA = "not-applicable" +SM_SERVICE_STATE_INITIAL = "initial" +SM_SERVICE_STATE_UNKNOWN = "unknown" +SM_SERVICE_STATE_ENABLED_STANDBY = "enabled-standby" +SM_SERVICE_STATE_ENABLED_GO_STANDBY = "enabled-go-standby" +SM_SERVICE_STATE_ENABLED_GO_ACTIVE = "enabled-go-active" +SM_SERVICE_STATE_ENABLED_ACTIVE = "enabled-active" +SM_SERVICE_STATE_ENABLING = "enabling" +SM_SERVICE_STATE_ENABLING_THROTTLE = "enabling-throttle" +SM_SERVICE_STATE_DISABLING = "disabling" +SM_SERVICE_STATE_DISABLED = "disabled" +SM_SERVICE_STATE_SHUTDOWN = "shutdown" + +LOCAL_HOST_NAME = socket.gethostname() + + +def rest_api_request(token, method, api_cmd, api_cmd_headers=None, + api_cmd_payload=None, timeout=10): + + + """ + Make a rest-api request + Returns: response as a dictionary + """ + + LOG.info("%s cmd:%s hdr:%s payload:%s" % ( + method, api_cmd, api_cmd_headers, api_cmd_payload)) + + response = None + try: + request_info = urllib2.Request(api_cmd) + request_info.get_method = lambda: method + if token: + request_info.add_header("X-Auth-Token", token.get_id()) + request_info.add_header("Accept", "application/json") + + if api_cmd_headers is not None: + for header_type, header_value in api_cmd_headers.items(): + request_info.add_header(header_type, header_value) + + if api_cmd_payload is not None: + request_info.add_data(api_cmd_payload) + + request = urllib2.urlopen(request_info, timeout=timeout) + response = request.read() + + if response == "": + response = {} + else: + response = json.loads(response) + request.close() + + LOG.info("Response=%s" % response) + + except urllib2.HTTPError as e: + if 401 == e.code: + if token: + token.set_expired() + LOG.warn("HTTP Error e.code=%s e=%s" % (e.code, e)) + if hasattr(e, 'msg') and e.msg: + response = json.loads(e.msg) + else: + response = {} + + LOG.info("HTTPError response=%s" % (response)) + except urllib2.URLError as urle: + LOG.debug("Connection refused") + + return response + class ServiceNodeCommand(base.APIBase): origin = wtypes.text @@ -108,6 +191,7 @@ class ServiceNodeCommandResult(base.APIBase): # Result error_code = wtypes.text error_details = wtypes.text + impact_service_list = [wtypes.text] class ServiceNode(base.APIBase): @@ -220,6 +304,12 @@ class ServiceNodeController(rest.RestController): LOG.debug("sm-api have_active_sm_services: False") return swactable_sm_services + def _is_aa_service_group(self, sdm): + if SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N == \ + sdm.redundancy_model and 2 == sdm.n_active: + return True + return False + def _swact_pre_check(self, hostname): # run pre-swact checks, verify that services are in the right state # to accept service @@ -250,15 +340,22 @@ class ServiceNodeController(rest.RestController): % (sm_sda.node_name, sm_sda.node_name)) break - # Verify that - # all the services are in the standby or active - # state on the other host + # degraded or failure of A/A service on the target host + # would not stop swact + sdm = self._sm_sdm_get(sm_sda.name, + sm_sda.service_group_name) + if (self._is_aa_service_group(sdm)): + continue + + # Verify that + # all the services are in the standby state on the + # other host # or service only provisioned in the other host # or service state are the same on both hosts if SM_SERVICE_GROUP_STATE_ACTIVE != sm_sda.state \ - and SM_SERVICE_GROUP_STATE_STANDBY != sm_sda.state \ - and origin_state.has_key(sm_sda.service_group_name) \ - and origin_state[sm_sda.service_group_name] != sm_sda.state: + and SM_SERVICE_GROUP_STATE_STANDBY != sm_sda.state \ + and origin_state.has_key(sm_sda.service_group_name) \ + and origin_state[sm_sda.service_group_name] != sm_sda.state: check_result = ( "%s on %s is not ready to take service, " "service not in the active or standby " @@ -329,6 +426,91 @@ class ServiceNodeController(rest.RestController): LOG.info("%s" % sm_state_ht) return sm_state_ht + def get_remote_svc(self, hostname, service_name): + sm_api_port = 7777 + sm_api_path = "http://{host}:{port}". \ + format(host=hostname, port=sm_api_port) + + api_cmd = sm_api_path + api_cmd += "/v1/services/%s" % service_name + + api_cmd_headers = dict() + api_cmd_headers['Content-type'] = "application/json" + api_cmd_headers['Accept'] = "application/json" + api_cmd_headers['User-Agent'] = "sm/1.0" + + response = rest_api_request(None, "GET", api_cmd, api_cmd_headers, None) + + return response + + def _lock_pre_check(self, hostname): + services = pecan.request.dbapi.sm_service_get_list() + ill_services = [] + for service in services: + if (SM_SERVICE_STATE_ENABLED_ACTIVE == service.desired_state and + SM_SERVICE_STATE_ENABLED_ACTIVE != service.state): + ill_services.append(service.name) + + chk_list = {} + service_groups = pecan.request.dbapi.iservicegroup_get_list() + for service_group in service_groups: + sdm = pecan.request.dbapi.sm_sdm_get( + "controller", service_group.name) + if (SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N == + sdm.redundancy_model and 1 < sdm.n_active): + sgms = pecan.request.dbapi.sm_service_group_members_get_list( + service_group.name) + for sgm in sgms: + if (SM_SERVICE_SEVERITY_CRITICAL == + sgm.service_failure_impact and + sgm.service_name in ill_services): + if service_group.name in chk_list: + chk_list[service_group.name].\ + append(sgm.service_name) + else: + chk_list[service_group.name] = [sgm.service_name] + + if len(chk_list) == 0: + return None + + sdas = pecan.request.dbapi.sm_sda_get_list() + for sda in sdas: + if (sda.node_name not in [LOCAL_HOST_NAME, hostname] and + sda.service_group_name in chk_list): + for service_name in chk_list[sda.service_group_name]: + rsvc = self.get_remote_svc(sda.node_name, service_name) + if (SM_SERVICE_STATE_ENABLED_ACTIVE == + rsvc['desired_state'] and + SM_SERVICE_STATE_ENABLED_ACTIVE == rsvc['state']): + chk_list[sda.service_group_name].remove(service_name) + + all_good = True + for svcs in chk_list.values(): + if len(svcs) > 0: + all_good = False + break + if all_good: + return None + + target_services = [] + for sda in sdas: + if (sda.node_name == hostname and + sda.service_group_name in chk_list): + for service_name in chk_list[sda.service_group_name]: + LOG.info("checking %s on %s" % (service_name, hostname)) + rsvc = self.get_remote_svc(sda.node_name, service_name) + if rsvc is None: + continue + if (SM_SERVICE_STATE_ENABLED_ACTIVE == + rsvc['desired_state'] and + SM_SERVICE_STATE_ENABLED_ACTIVE == rsvc['state']): + LOG.info("which is %s %s" % (rsvc['desired_state'], rsvc['state'])) + target_services.append(service_name) + LOG.info("services %s solely running on %s" % (','.join(target_services), hostname)) + if len(target_services) > 0: + return target_services + return None + def _do_modify_command(self, hostname, command): if command.action == smc_api.SM_NODE_ACTION_SWACT_PRE_CHECK or \ @@ -352,6 +534,31 @@ class ServiceNodeController(rest.RestController): avail=command.avail, error_code=ERR_CODE_SUCCESS, error_details=check_result) return wsme.api.Response(result, status_code=200) + elif command.action in [smc_api.SM_NODE_ACTION_LOCK, + smc_api.SM_NODE_ACTION_LOCK_PRE_CHECK]: + impact_services = self._lock_pre_check(hostname) + if impact_services is not None: + result = ServiceNodeCommandResult( + origin="sm", hostname=hostname, action=command.action, + admin=command.admin, oper=command.oper, + avail=command.avail, + error_code=ERR_CODE_LOCK_SOLE_SERVICE_PROVIDER, + impact_service_list=impact_services, + error_details="%s is the sole provider of some services." + % hostname) + + if command.action == smc_api.SM_NODE_ACTION_LOCK_PRE_CHECK: + return wsme.api.Response(result, status_code=200) + + return wsme.api.Response(result, status_code=400) + elif smc_api.SM_NODE_ACTION_LOCK_PRE_CHECK == command.action: + result = ServiceNodeCommandResult( + origin="sm", hostname=hostname, action=command.action, + admin=command.admin, oper=command.oper, + avail=command.avail, error_code=ERR_CODE_SUCCESS, + impact_service_list=impact_services, + error_details=None) + return wsme.api.Response(result, status_code=200) if command.action == smc_api.SM_NODE_ACTION_UNLOCK or \ command.action == smc_api.SM_NODE_ACTION_LOCK or \ diff --git a/service-mgmt-api/sm-api/sm_api/api/controllers/v1/services.py b/service-mgmt-api/sm-api/sm_api/api/controllers/v1/services.py index e38393cc..6b47f1b0 100644 --- a/service-mgmt-api/sm-api/sm_api/api/controllers/v1/services.py +++ b/service-mgmt-api/sm-api/sm_api/api/controllers/v1/services.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2014 Wind River Systems, Inc. +# Copyright (c) 2014-2018 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -113,8 +113,12 @@ class ServicesController(rest.RestController): @wsme_pecan.wsexpose(Services, unicode) def get_one(self, uuid): rpc_sg = objects.service.get_by_uuid(pecan.request.context, uuid) + return Services.convert_with_links(rpc_sg) - return Services.convert_with_links(rpc_sg) + @wsme_pecan.wsexpose(Services, unicode) + def get_service(self, name): + rpc_sg = objects.service.get_by_name(pecan.request.context, name) + return Services.convert_with_links(rpc_sg) @wsme_pecan.wsexpose(ServicesCollection, unicode, int, unicode, unicode) diff --git a/service-mgmt-api/sm-api/sm_api/api/controllers/v1/smc_api.py b/service-mgmt-api/sm-api/sm_api/api/controllers/v1/smc_api.py index 42853085..d406bf8d 100755 --- a/service-mgmt-api/sm-api/sm_api/api/controllers/v1/smc_api.py +++ b/service-mgmt-api/sm-api/sm_api/api/controllers/v1/smc_api.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2014 Wind River Systems, Inc. +# Copyright (c) 2014-2018 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -38,6 +38,8 @@ SM_API_MAX_MSG_SIZE = 2048 SM_NODE_ACTION_UNLOCK = "unlock" SM_NODE_ACTION_LOCK = "lock" +SM_NODE_ACTION_LOCK_FORCE = "lock-force" +SM_NODE_ACTION_LOCK_PRE_CHECK = "lock-pre-check" SM_NODE_ACTION_SWACT_PRE_CHECK = "swact-pre-check" SM_NODE_ACTION_SWACT = "swact" SM_NODE_ACTION_SWACT_FORCE = "swact-force" diff --git a/service-mgmt-api/sm-api/sm_api/cmd/api.py b/service-mgmt-api/sm-api/sm_api/cmd/api.py index bac1b924..8f581ea7 100644 --- a/service-mgmt-api/sm-api/sm_api/cmd/api.py +++ b/service-mgmt-api/sm-api/sm_api/cmd/api.py @@ -18,7 +18,7 @@ # License for the specific language governing permissions and limitations # under the License. # -# Copyright (c) 2013-2014 Wind River Systems, Inc. +# Copyright (c) 2013-2018 Wind River Systems, Inc. # @@ -28,6 +28,7 @@ import logging import os.path import sys import time +import socket from oslo_config import cfg @@ -62,9 +63,7 @@ def main(): sm_api_service.prepare_service(sys.argv) # Build and start the WSGI app - # host = CONF.sm_api_api_bind_ip - # port = CONF.sm_api_api_port - host = 'localhost' + host = socket.gethostname() port = 7777 wsgi = simple_server.make_server(host, port, app.VersionSelectorApplication(), diff --git a/service-mgmt-api/sm-api/sm_api/common/exception.py b/service-mgmt-api/sm-api/sm_api/common/exception.py index 269305d0..eff1befa 100644 --- a/service-mgmt-api/sm-api/sm_api/common/exception.py +++ b/service-mgmt-api/sm-api/sm_api/common/exception.py @@ -399,3 +399,7 @@ class Unauthorized(SmApiException): class HTTPNotFound(NotFound): pass + + +class ServiceNotFound(NotFound): + message = _("service %(service)s could not be found.") \ No newline at end of file diff --git a/service-mgmt-api/sm-api/sm_api/db/api.py b/service-mgmt-api/sm-api/sm_api/db/api.py index 975f95be..035114db 100644 --- a/service-mgmt-api/sm-api/sm_api/db/api.py +++ b/service-mgmt-api/sm-api/sm_api/db/api.py @@ -170,6 +170,13 @@ class Connection(object): @abc.abstractmethod def sm_service_get_by_name(self, name): - """Return a list of services by name. + """Return a service by name. :param name: The name of the services. """ + + + @abc.abstractmethod + def sm_service_group_members_get_list(self, service_group_name): + """Return service group members in a service group + :param service_group_name: service group name + """ diff --git a/service-mgmt-api/sm-api/sm_api/db/sqlalchemy/api.py b/service-mgmt-api/sm-api/sm_api/db/sqlalchemy/api.py index 0b550353..11f2e783 100755 --- a/service-mgmt-api/sm-api/sm_api/db/sqlalchemy/api.py +++ b/service-mgmt-api/sm-api/sm_api/db/sqlalchemy/api.py @@ -15,7 +15,7 @@ # License for the specific language governing permissions and limitations # under the License. # -# Copyright (c) 2013-2014 Wind River Systems, Inc. +# Copyright (c) 2013-2018 Wind River Systems, Inc. # @@ -259,9 +259,18 @@ class Connection(api.Connection): def sm_service_get_by_name(self, name): result = model_query(models.service, read_deleted="no").\ filter_by(name=name) - # first() since want a list if not result: - raise exception.NodeNotFound(node=name) + raise exception.ServiceNotFound(service=name) + + return result + + + @objects.objectify(objects.service_group_member) + def sm_service_group_members_get_list(self, service_group_name): + result = model_query(models.sm_service_group_member, + read_deleted="no").\ + filter_by(provisioned='yes').\ + filter_by(name=service_group_name) return result diff --git a/service-mgmt-api/sm-api/sm_api/db/sqlalchemy/models.py b/service-mgmt-api/sm-api/sm_api/db/sqlalchemy/models.py index 024da109..d41335d5 100755 --- a/service-mgmt-api/sm-api/sm_api/db/sqlalchemy/models.py +++ b/service-mgmt-api/sm-api/sm_api/db/sqlalchemy/models.py @@ -15,7 +15,7 @@ # License for the specific language governing permissions and limitations # under the License. # -# Copyright (c) 2013-2014 Wind River Systems, Inc. +# Copyright (c) 2013-2018 Wind River Systems, Inc. # @@ -126,6 +126,8 @@ class sm_sdm(Base): name = Column(String(255)) service_group_name = Column(String(255)) redundancy_model = Column(String(255)) # sm_types.h + n_active = Column(Integer) + m_standby = Column(Integer) # sm_service_domain_assignments @@ -152,3 +154,13 @@ class sm_node(Base): operational_state = Column(String(255)) availability_status = Column(String(255)) ready_state = Column(String(255)) + + +class sm_service_group_member(Base): + __tablename__ = 'service_group_members' + + id = Column(Integer, primary_key=True) + provisioned = Column(String(255)) + name = Column(String(255)) + service_name = Column(String(255)) + service_failure_impact = Column(String(255)) diff --git a/service-mgmt-api/sm-api/sm_api/objects/__init__.py b/service-mgmt-api/sm-api/sm_api/objects/__init__.py index 9869fd5c..7b6115c2 100644 --- a/service-mgmt-api/sm-api/sm_api/objects/__init__.py +++ b/service-mgmt-api/sm-api/sm_api/objects/__init__.py @@ -12,7 +12,7 @@ # License for the specific language governing permissions and limitations # under the License. # -# Copyright (c) 2013-2014 Wind River Systems, Inc. +# Copyright (c) 2013-2018 Wind River Systems, Inc. # @@ -23,6 +23,7 @@ from sm_api.objects import smo_service from sm_api.objects import smo_sdm from sm_api.objects import smo_sda from sm_api.objects import smo_node +from sm_api.objects import smo_sgm def objectify(klass): @@ -46,9 +47,11 @@ service = smo_service.service sm_sdm = smo_sdm.sm_sdm sm_sda = smo_sda.sm_sda sm_node = smo_node.sm_node +service_group_member = smo_sgm.service_group_member __all__ = ( service_groups, + service_group_member, service, sm_sdm, sm_sda, diff --git a/service-mgmt-api/sm-api/sm_api/objects/smo_sdm.py b/service-mgmt-api/sm-api/sm_api/objects/smo_sdm.py index 03c616bb..cdee8fcf 100644 --- a/service-mgmt-api/sm-api/sm_api/objects/smo_sdm.py +++ b/service-mgmt-api/sm-api/sm_api/objects/smo_sdm.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2013-2014 Wind River Systems, Inc. +# Copyright (c) 2013-2018 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -22,6 +22,8 @@ class sm_sdm(base.Sm_apiObject): 'name': utils.str_or_none, 'service_group_name': utils.str_or_none, 'redundancy_model': utils.str_or_none, + 'n_active': int, + 'm_standby': int, } @staticmethod diff --git a/service-mgmt-api/sm-api/sm_api/objects/smo_service.py b/service-mgmt-api/sm-api/sm_api/objects/smo_service.py index 73cc53b7..d9fb7122 100644 --- a/service-mgmt-api/sm-api/sm_api/objects/smo_service.py +++ b/service-mgmt-api/sm-api/sm_api/objects/smo_service.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2013-2014 Wind River Systems, Inc. +# Copyright (c) 2013-2018 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -36,7 +36,7 @@ class service(base.Sm_apiObject): @base.remotable_classmethod def get_by_uuid(cls, context, uuid): - """Find a server based on uuid and return a Node object. + """Find a service based on uuid and return a service object. :param uuid: the uuid of a server. :returns: a :class:`Node` object. @@ -45,6 +45,16 @@ class service(base.Sm_apiObject): db_server = cls.dbapi.sm_service_get(uuid) return service._from_db_object(cls(), db_server) + @base.remotable_classmethod + def get_by_name(cls, context, name): + """Find a service based on service name . + + :param name: the name of a service. + :returns: a :class:`service` object. + """ + service = cls.dbapi.sm_service_get_by_name(name) + return service._from_db_object(cls(), service) + @base.remotable def save(self, context): """Save updates to this Node. diff --git a/service-mgmt-api/sm-api/sm_api/objects/smo_sgm.py b/service-mgmt-api/sm-api/sm_api/objects/smo_sgm.py new file mode 100644 index 00000000..0257a5ec --- /dev/null +++ b/service-mgmt-api/sm-api/sm_api/objects/smo_sgm.py @@ -0,0 +1,59 @@ +# +# Copyright (c) 2018 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# vim: tabstop=4 shiftwidth=4 softtabstop=4 +# coding=utf-8 +# + +from sm_api.db import api as db_api +from sm_api.objects import base +from sm_api.objects import utils + + +class service_group_member(base.Sm_apiObject): + + dbapi = db_api.get_instance() + + fields = { + 'id': utils.int_or_none, + 'name': utils.str_or_none, + 'service_name': utils.str_or_none, + 'service_failure_impact': utils.str_or_none + } + + @staticmethod + def _from_db_object(server, db_server): + """Converts a database entity to a formal object.""" + for field in server.fields: + server[field] = db_server[field] + + server.obj_reset_changes() + return server + + @base.remotable_classmethod + def get_by_service_group(cls, context, service_group_name): + """Find a server based on uuid and return a Node object. + + :param uuid: the uuid of a server. + :returns: a :class:`Node` object. + """ + db_server = cls.dbapi.iservicegroup_member_get(service_group_name) + return service_group_member._from_db_object(cls(), db_server) + + @base.remotable + def save(self, context): + """Save service group member to this Node. + :param context: Security context + """ + raise NotImplemented("This method is intentially not implemented") + + @base.remotable + def refresh(self, context): + current = self.__class__.get_by_uuid(context, uuid=self.uuid) + for field in self.fields: + if (hasattr(self, base.get_attrname(field)) and + self[field] != current[field]): + self[field] = current[field] diff --git a/service-mgmt/sm-1.0.0/src/sm_service_dependency.c b/service-mgmt/sm-1.0.0/src/sm_service_dependency.c index 698e5474..7c33a977 100644 --- a/service-mgmt/sm-1.0.0/src/sm_service_dependency.c +++ b/service-mgmt/sm-1.0.0/src/sm_service_dependency.c @@ -1,5 +1,5 @@ // -// Copyright (c) 2014 Wind River Systems, Inc. +// Copyright (c) 2014-2018 Wind River Systems, Inc. // // SPDX-License-Identifier: Apache-2.0 // @@ -13,6 +13,7 @@ #include "sm_time.h" #include "sm_service_table.h" #include "sm_service_dependency_table.h" +#include "sm_service_domain_member_table.h" // **************************************************************************** // Service Dependency - Dependent State Compare @@ -147,55 +148,99 @@ SmErrorT sm_service_dependency_go_standby_met( SmServiceT* service, bool* met ) } // **************************************************************************** +// **************************************************************************** +// Service Dependency - Enable Met, per dependent +// ================================ +static void _sm_service_enable_dependency_met( + void* user_data[], SmServiceDependencyT* service_dependency ) +{ + bool *dependency_met = (bool*)user_data[0]; + if( '\0' == service_dependency->dependent[0] ) + { + DPRINTFD( "Service (%s) has no dependencies.", service_dependency->service_name ); + return; + } + + SmServiceT* dependent_service = sm_service_table_read( service_dependency->dependent ); + if( NULL == dependent_service ) + { + DPRINTFE( "Failed to read service (%s), error=%s.", + service_dependency->service_name, + sm_error_str(SM_NOT_FOUND) ); + return; + } + + if( SM_SERVICE_STATE_ENABLED_ACTIVE != dependent_service->state && + SM_SERVICE_STATE_ENABLED_STANDBY != dependent_service->desired_state) + { + *dependency_met = false; + } +} +// **************************************************************************** + // **************************************************************************** // Service Dependency - Enable Met // =============================== SmErrorT sm_service_dependency_enable_met( SmServiceT* service, bool* met ) { - bool at_least_one = false; bool dependency_met = true; - SmCompareOperatorT compare_operator = SM_COMPARE_OPERATOR_LE; - void* user_data[] = {service, &dependency_met, &compare_operator, - &at_least_one}; + void* user_data[] = {&dependency_met}; *met = false; sm_service_dependency_table_foreach( SM_SERVICE_DEPENDENCY_TYPE_ACTION, service->name, SM_SERVICE_STATE_NA, SM_SERVICE_ACTION_ENABLE, - user_data, sm_service_dependency_dependent_state_compare ); + user_data, _sm_service_enable_dependency_met ); - if( at_least_one ) - { - *met = dependency_met; - } else { - *met = true; - } + *met = dependency_met; return( SM_OKAY ); } // **************************************************************************** +// **************************************************************************** +// Service Dependency - Disable Met per dependent +// ================================ +static void _sm_service_disable_dependency_met( + void* user_data[], SmServiceDependencyT* service_dependency ) +{ + bool *dependency_met = (bool*)user_data[0]; + if( '\0' == service_dependency->dependent[0] ) + { + DPRINTFD( "Service (%s) has no dependencies.", service_dependency->service_name ); + return; + } + + SmServiceT* dependent_service = sm_service_table_read( service_dependency->dependent ); + if( NULL == dependent_service ) + { + DPRINTFE( "Failed to read service (%s), error=%s.", + service_dependency->service_name, + sm_error_str(SM_NOT_FOUND) ); + return; + } + + if( SM_SERVICE_STATE_DISABLED != dependent_service->state && + SM_SERVICE_STATE_ENABLED_ACTIVE != dependent_service->desired_state) + { + *dependency_met = false; + } +} +// **************************************************************************** + // **************************************************************************** // Service Dependency - Disable Met // ================================ SmErrorT sm_service_dependency_disable_met( SmServiceT* service, bool* met ) { - bool at_least_one = false; bool dependency_met = true; - SmCompareOperatorT compare_operator = SM_COMPARE_OPERATOR_GE; - void* user_data[] = {service, &dependency_met, &compare_operator, - &at_least_one}; + void* user_data[] = {&dependency_met}; *met = false; sm_service_dependency_table_foreach( SM_SERVICE_DEPENDENCY_TYPE_ACTION, service->name, SM_SERVICE_STATE_NA, SM_SERVICE_ACTION_DISABLE, - user_data, sm_service_dependency_dependent_state_compare ); + user_data, _sm_service_disable_dependency_met ); - if( at_least_one ) - { - *met = dependency_met; - } else { - *met = true; - } + *met = dependency_met; return( SM_OKAY ); } diff --git a/service-mgmt/sm-1.0.0/src/sm_service_domain_utils.c b/service-mgmt/sm-1.0.0/src/sm_service_domain_utils.c index e4fbfeed..39ba5589 100644 --- a/service-mgmt/sm-1.0.0/src/sm_service_domain_utils.c +++ b/service-mgmt/sm-1.0.0/src/sm_service_domain_utils.c @@ -1060,6 +1060,25 @@ SmErrorT sm_service_domain_utils_service_domain_neighbor_cleanup( } // **************************************************************************** +// **************************************************************************** +// Service Domain Utilities is aa service group +// ===================================== +bool sm_is_aa_service_group(char* service_group_name) +{ + SmServiceDomainMemberT* service_domain_member; + service_domain_member = sm_service_domain_member_table_read_service_group( service_group_name ); + if( NULL != service_domain_member ) + { + if( SM_SERVICE_DOMAIN_MEMBER_REDUNDANCY_MODEL_N == service_domain_member->redundancy_model && + service_domain_member->n_active > 1 ) + { + return true; + } + } + return false; +} +// **************************************************************************** + // **************************************************************************** // Service Domain Utilities - Initialize // ===================================== diff --git a/service-mgmt/sm-1.0.0/src/sm_service_domain_utils.h b/service-mgmt/sm-1.0.0/src/sm_service_domain_utils.h index 250db35d..209f6166 100644 --- a/service-mgmt/sm-1.0.0/src/sm_service_domain_utils.h +++ b/service-mgmt/sm-1.0.0/src/sm_service_domain_utils.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2014 Wind River Systems, Inc. +// Copyright (c) 2014-2018 Wind River Systems, Inc. // // SPDX-License-Identifier: Apache-2.0 // @@ -144,6 +144,12 @@ extern SmErrorT sm_service_domain_utils_service_domain_neighbor_cleanup( char name[], char node_name[] ); // **************************************************************************** +// **************************************************************************** +// Service Domain Utilities is aa service group +// ============================== +extern bool sm_is_aa_service_group(char* service_group_name); +// **************************************************************************** + // **************************************************************************** // Service Domain Utilities - Initialize // ===================================== diff --git a/service-mgmt/sm-1.0.0/src/sm_service_group_audit.c b/service-mgmt/sm-1.0.0/src/sm_service_group_audit.c index 73d96064..c07a572a 100644 --- a/service-mgmt/sm-1.0.0/src/sm_service_group_audit.c +++ b/service-mgmt/sm-1.0.0/src/sm_service_group_audit.c @@ -1,5 +1,5 @@ // -// Copyright (c) 2014 Wind River Systems, Inc. +// Copyright (c) 2014-2018 Wind River Systems, Inc. // // SPDX-License-Identifier: Apache-2.0 // @@ -16,6 +16,7 @@ #include "sm_service_group_member_table.h" #include "sm_service_api.h" #include "sm_service_group_health.h" +#include "sm_service_domain_utils.h" // **************************************************************************** // Service Group Audit - Set Service Reason Text @@ -146,15 +147,18 @@ static void sm_service_group_audit_service_for_status( void* user_data[], int* failed = (int*) user_data[3]; int* degraded = (int*) user_data[4]; int* warn = (int*) user_data[5]; - bool* reason_text_writable = (bool*) user_data[6]; - char* reason_text = (char*) user_data[7]; - int reason_text_size = *(int*) user_data[8]; + int* healthy = (int*) user_data[6]; + bool* reason_text_writable = (bool*) user_data[7]; + char* reason_text = (char*) user_data[8]; + int reason_text_size = *(int*) user_data[9]; SmServiceGroupStatusT prev_status = *status; SmServiceGroupConditionT prev_condition = *condition; + SmServiceStatusT sgm_imply_status; SmServiceGroupStatusT mapped_status; SmServiceGroupConditionT mapped_condition; char service_reason_text[SM_SERVICE_GROUP_REASON_TEXT_MAX_CHAR] = ""; + sgm_imply_status = service_group_member->service_status; if( 0 != service_group_member->service_failure_timestamp ) { elapsed_ms = sm_time_get_elapsed_ms( NULL ); @@ -162,27 +166,34 @@ static void sm_service_group_audit_service_for_status( void* user_data[], if( service_group->failure_debounce_in_ms >= delta_ms ) { - DPRINTFD( "Service group (%s) member (%s) failure debounce " - "still in effect, indicating member as unhealthy, " + DPRINTFD( "Service group (%s) member (%s) failure debounce (%d) " + "still in effect since (%li), indicating member as unhealthy, " "delta_ms=%li.", service_group->name, - service_group_member->service_name, delta_ms ); + service_group_member->service_name, + service_group->failure_debounce_in_ms, + service_group_member->service_failure_timestamp, + delta_ms ); do_increment = false; switch( service_group_member->service_failure_impact ) { case SM_SERVICE_SEVERITY_NONE: + sgm_imply_status = SM_SERVICE_STATUS_NONE; break; case SM_SERVICE_SEVERITY_MINOR: + sgm_imply_status = SM_SERVICE_STATUS_NONE; ++(*warn); break; case SM_SERVICE_SEVERITY_MAJOR: + sgm_imply_status = SM_SERVICE_STATUS_DEGRADED; ++(*degraded); break; case SM_SERVICE_SEVERITY_CRITICAL: + sgm_imply_status = SM_SERVICE_STATUS_FAILED; ++(*failed); break; @@ -190,13 +201,17 @@ static void sm_service_group_audit_service_for_status( void* user_data[], break; } } else { + ++(*healthy); DPRINTFD( "Service group (%s) member (%s) failure debounce " "no longer in effect, delta_ms=%li.", service_group->name, service_group_member->service_name, delta_ms ); } + }else + { + ++(*healthy); } - switch( service_group_member->service_status ) + switch( sgm_imply_status ) { case SM_SERVICE_STATUS_NONE: mapped_status = SM_SERVICE_GROUP_STATUS_NONE; @@ -452,7 +467,7 @@ static void sm_service_group_audit_service_for_status( void* user_data[], // ============================ SmErrorT sm_service_group_audit_status( SmServiceGroupT* service_group ) { - int failed=0, degraded=0, warn=0; + int failed=0, degraded=0, warn=0, healthy=0; SmServiceGroupStatusT audit_status = SM_SERVICE_GROUP_STATUS_NONE; SmServiceGroupConditionT audit_condition = SM_SERVICE_GROUP_CONDITION_NONE; int64_t audit_health = 0; @@ -460,7 +475,7 @@ SmErrorT sm_service_group_audit_status( SmServiceGroupT* service_group ) char reason_text[SM_SERVICE_GROUP_REASON_TEXT_MAX_CHAR] = ""; int reason_text_size = SM_SERVICE_GROUP_REASON_TEXT_MAX_CHAR; void* user_data[] = { service_group, &audit_status, &audit_condition, - &failed, °raded, &warn, &reason_text_writable, + &failed, °raded, &warn, &healthy, &reason_text_writable, reason_text, &reason_text_size }; sm_service_group_member_table_foreach_member( service_group->name, @@ -468,7 +483,14 @@ SmErrorT sm_service_group_audit_status( SmServiceGroupT* service_group ) audit_health = sm_service_group_health_calculate( failed, degraded, warn ); - service_group->status = audit_status; + if(SM_SERVICE_GROUP_STATUS_FAILED == audit_status && 0 < healthy && + sm_is_aa_service_group(service_group->name)) + { + service_group->status = SM_SERVICE_GROUP_STATUS_DEGRADED; + }else + { + service_group->status = audit_status; + } service_group->condition = audit_condition; service_group->health = audit_health; diff --git a/service-mgmt/sm-1.0.0/src/sm_service_table.c b/service-mgmt/sm-1.0.0/src/sm_service_table.c index cb23dbd6..62893944 100644 --- a/service-mgmt/sm-1.0.0/src/sm_service_table.c +++ b/service-mgmt/sm-1.0.0/src/sm_service_table.c @@ -1,5 +1,5 @@ // -// Copyright (c) 2014 Wind River Systems, Inc. +// Copyright (c) 2014-2018 Wind River Systems, Inc. // // SPDX-License-Identifier: Apache-2.0 // @@ -22,6 +22,8 @@ #include "sm_service_go_active.h" #include "sm_service_go_standby.h" #include "sm_service_audit.h" +#include "sm_service_group_table.h" +#include "sm_service_group_member_table.h" static SmListT* _services = NULL; static SmDbHandleT* _sm_db_handle = NULL; @@ -351,6 +353,38 @@ SmErrorT sm_service_table_persist( SmServiceT* service ) } // **************************************************************************** +// **************************************************************************** +// Service - Loop service members +// =============================== +static void _sm_loop_service_group_members( void* user_data[], + SmServiceGroupMemberT* service_group_member ) +{ + SmServiceT* service; + service = sm_service_table_read( service_group_member->service_name ); + if( NULL == service ) + { + DPRINTFE( "Could not find service (%s) of " + "service group (%s).", + service_group_member->service_name, + service_group_member->name); + return; + } + + snprintf(service->group_name, sizeof(service->group_name), "%s", service_group_member->name); +} +// **************************************************************************** + +// **************************************************************************** +// Service Table - Loop service groups +// ================================================= +static void _sm_loop_service_groups( + void* user_data[], SmServiceGroupT* service_group ) +{ + sm_service_group_member_table_foreach_member( service_group->name, + NULL, _sm_loop_service_group_members ); +} +// **************************************************************************** + // **************************************************************************** // Service Table - Initialize // ========================== @@ -376,6 +410,8 @@ SmErrorT sm_service_table_initialize( void ) return( error ); } + sm_service_group_table_foreach( NULL, _sm_loop_service_groups ); + return( SM_OKAY ); } // **************************************************************************** diff --git a/service-mgmt/sm-1.0.0/src/sm_service_table.h b/service-mgmt/sm-1.0.0/src/sm_service_table.h index 3049f264..a3c138a8 100644 --- a/service-mgmt/sm-1.0.0/src/sm_service_table.h +++ b/service-mgmt/sm-1.0.0/src/sm_service_table.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2014 Wind River Systems, Inc. +// Copyright (c) 2014-2018 Wind River Systems, Inc. // // SPDX-License-Identifier: Apache-2.0 // @@ -56,6 +56,7 @@ typedef struct bool disable_check_dependency; //flag to indicate disable a service without disabling its dependency bool disable_skip_dependent; + char group_name[SM_SERVICE_GROUP_NAME_MAX_CHAR]; } SmServiceT; typedef void (*SmServiceTableForEachCallbackT)