Change heartbeat-status to availablity-state of system peer
Fix availablity-state field is missing of system peer Test Plan: - PASS: Create/Update the system peer check the availability- state is exist. - PASS: Get details of the system peer check the availability- state is exist. - PASS: Peer monitor can change the availability-state when the peer site is avaliable. - PASS: Raise an alarm when the peer is unavaliable. Closes-Bug: 2045717 Change-Id: I497e3d21355c08d000a77965e6a6f36c6f483936 Signed-off-by: Zhang Rong(Jon) <rong.zhang@windriver.com>
This commit is contained in:
parent
c35aa8f566
commit
7af5caf83c
|
@ -457,9 +457,9 @@ DCMANAGER_V1_HTTP_AGENT = 'dcmanager/1.0'
|
|||
# batch rehome manage state wait timeout
|
||||
BATCH_REHOME_MGMT_STATES_TIMEOUT = 900
|
||||
|
||||
# System peer heartbeat status
|
||||
SYSTEM_PEER_HEARTBEAT_STATUS_ALIVE = 'alive'
|
||||
SYSTEM_PEER_HEARTBEAT_STATUS_FAILURE = 'failure'
|
||||
# System peer availability state
|
||||
SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE = 'available'
|
||||
SYSTEM_PEER_AVAILABILITY_STATE_UNAVAILABLE = 'unavailable'
|
||||
|
||||
# Peer group migration status
|
||||
PEER_GROUP_MIGRATING = 'migrating'
|
||||
|
|
|
@ -382,7 +382,7 @@ def system_peer_db_model_to_dict(system_peer):
|
|||
"heartbeat-failure-policy": system_peer.heartbeat_failure_policy,
|
||||
"heartbeat-maintenance-timeout": system_peer.
|
||||
heartbeat_maintenance_timeout,
|
||||
"heartbeat-status": system_peer.heartbeat_status,
|
||||
"availability-state": system_peer.availability_state,
|
||||
"created-at": system_peer.created_at,
|
||||
"updated-at": system_peer.updated_at}
|
||||
return result
|
||||
|
@ -443,7 +443,7 @@ def system_peer_update(context, peer_id,
|
|||
heartbeat_failure_threshold=None,
|
||||
heartbeat_failure_policy=None,
|
||||
heartbeat_maintenance_timeout=None,
|
||||
heartbeat_status=None):
|
||||
availability_state=None):
|
||||
"""Update the system peer or raise if it does not exist."""
|
||||
return IMPL.system_peer_update(context, peer_id,
|
||||
peer_uuid, peer_name,
|
||||
|
@ -454,7 +454,7 @@ def system_peer_update(context, peer_id,
|
|||
heartbeat_failure_threshold,
|
||||
heartbeat_failure_policy,
|
||||
heartbeat_maintenance_timeout,
|
||||
heartbeat_status)
|
||||
availability_state)
|
||||
|
||||
|
||||
def system_peer_destroy(context, peer_id):
|
||||
|
|
|
@ -892,7 +892,7 @@ def system_peer_create(context,
|
|||
heartbeat_failure_threshold=3,
|
||||
heartbeat_failure_policy="alarm",
|
||||
heartbeat_maintenance_timeout=600,
|
||||
heartbeat_status="created"):
|
||||
availability_state="created"):
|
||||
with write_session() as session:
|
||||
system_peer_ref = models.SystemPeer()
|
||||
system_peer_ref.peer_uuid = peer_uuid
|
||||
|
@ -908,7 +908,7 @@ def system_peer_create(context,
|
|||
system_peer_ref.heartbeat_failure_policy = heartbeat_failure_policy
|
||||
system_peer_ref.heartbeat_maintenance_timeout = \
|
||||
heartbeat_maintenance_timeout
|
||||
system_peer_ref.heartbeat_status = heartbeat_status
|
||||
system_peer_ref.availability_state = availability_state
|
||||
session.add(system_peer_ref)
|
||||
return system_peer_ref
|
||||
|
||||
|
@ -923,7 +923,7 @@ def system_peer_update(context, peer_id,
|
|||
heartbeat_failure_threshold=None,
|
||||
heartbeat_failure_policy=None,
|
||||
heartbeat_maintenance_timeout=None,
|
||||
heartbeat_status=None):
|
||||
availability_state=None):
|
||||
with write_session() as session:
|
||||
system_peer_ref = system_peer_get(context, peer_id)
|
||||
if peer_uuid is not None:
|
||||
|
@ -950,8 +950,8 @@ def system_peer_update(context, peer_id,
|
|||
if heartbeat_maintenance_timeout is not None:
|
||||
system_peer_ref.heartbeat_maintenance_timeout = \
|
||||
heartbeat_maintenance_timeout
|
||||
if heartbeat_status is not None:
|
||||
system_peer_ref.heartbeat_status = heartbeat_status
|
||||
if availability_state is not None:
|
||||
system_peer_ref.availability_state = availability_state
|
||||
system_peer_ref.save(session)
|
||||
return system_peer_ref
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ def upgrade(migrate_engine):
|
|||
sqlalchemy.Column('heartbeat_failure_threshold', sqlalchemy.Integer),
|
||||
sqlalchemy.Column('heartbeat_failure_policy', sqlalchemy.String(255)),
|
||||
sqlalchemy.Column('heartbeat_maintenance_timeout', sqlalchemy.Integer),
|
||||
sqlalchemy.Column('heartbeat_status', sqlalchemy.String(255)),
|
||||
sqlalchemy.Column('availability_state', sqlalchemy.String(255)),
|
||||
sqlalchemy.Column('reserved_1', sqlalchemy.Text),
|
||||
sqlalchemy.Column('reserved_2', sqlalchemy.Text),
|
||||
sqlalchemy.Column('created_at', sqlalchemy.DateTime),
|
||||
|
|
|
@ -117,7 +117,7 @@ class SystemPeer(BASE, DCManagerBase):
|
|||
heartbeat_failure_threshold = Column(Integer)
|
||||
heartbeat_failure_policy = Column(String(255))
|
||||
heartbeat_maintenance_timeout = Column(Integer)
|
||||
heartbeat_status = Column(String(255))
|
||||
availability_state = Column(String(255))
|
||||
|
||||
|
||||
class SubcloudGroup(BASE, DCManagerBase):
|
||||
|
|
|
@ -131,17 +131,17 @@ class PeerMonitor(object):
|
|||
self._raise_failure()
|
||||
db_api.system_peer_update(
|
||||
self.context, self.peer.id,
|
||||
heartbeat_status=consts.SYSTEM_PEER_HEARTBEAT_STATUS_FAILURE)
|
||||
availability_state=consts.SYSTEM_PEER_AVAILABILITY_STATE_UNAVAILABLE)
|
||||
failure_count = 0
|
||||
self._set_require_audit_flag_to_associated_peer_groups()
|
||||
else:
|
||||
failure_count = 0
|
||||
self._audit_local_peer_groups(remote_pg_list)
|
||||
if self.peer.heartbeat_status != \
|
||||
consts.SYSTEM_PEER_HEARTBEAT_STATUS_ALIVE:
|
||||
if self.peer.availability_state != \
|
||||
consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE:
|
||||
db_api.system_peer_update(
|
||||
self.context, self.peer.id,
|
||||
heartbeat_status=consts.SYSTEM_PEER_HEARTBEAT_STATUS_ALIVE)
|
||||
availability_state=consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE)
|
||||
LOG.info("DC %s back online, clear alarm" %
|
||||
self.peer.peer_name)
|
||||
self._clear_failure()
|
||||
|
|
|
@ -398,11 +398,11 @@ class SubcloudManager(manager.Manager):
|
|||
return rehome_command
|
||||
|
||||
def _migrate_manage_subcloud(
|
||||
self, context, payload, alive_system_peers, subcloud):
|
||||
self, context, payload, available_system_peers, subcloud):
|
||||
success = True
|
||||
# Try to unmanage the subcloud on peer system
|
||||
if alive_system_peers:
|
||||
if self._unmanage_system_peer_subcloud(alive_system_peers,
|
||||
if available_system_peers:
|
||||
if self._unmanage_system_peer_subcloud(available_system_peers,
|
||||
subcloud):
|
||||
success = False
|
||||
LOG.warning("Unmanged subcloud: %s error on peer system, "
|
||||
|
@ -461,9 +461,9 @@ class SubcloudManager(manager.Manager):
|
|||
for association in associations:
|
||||
system_peer = db_api.system_peer_get(
|
||||
self.context, association.system_peer_id)
|
||||
# Get 'alive' system peer
|
||||
if system_peer.heartbeat_status != \
|
||||
consts.SYSTEM_PEER_HEARTBEAT_STATUS_ALIVE:
|
||||
# Get 'available' system peer
|
||||
if system_peer.availability_state != \
|
||||
consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE:
|
||||
LOG.warning("Peer system %s offline, skip checking" %
|
||||
system_peer.peer_name)
|
||||
continue
|
||||
|
|
|
@ -31,7 +31,7 @@ SAMPLE_HEARTBEAT_INTERVAL = 10
|
|||
SAMPLE_HEARTBEAT_FAILURE_THRESHOLD = 3
|
||||
SAMPLE_HEARTBEAT_FAILURES_POLICY = 'alarm'
|
||||
SAMPLE_HEARTBEAT_MAINTENANCE_TIMEOUT = 600
|
||||
SAMPLE_HEARTBEAT_STATUS_ALIVE = 'alive'
|
||||
SAMPLE_AVAILABILITY_STATE_AVAILABLE = 'available'
|
||||
|
||||
# SAMPLE SUBCLOUD PEER GROUP DATA
|
||||
SAMPLE_SUBCLOUD_PEER_GROUP_NAME = 'GroupX'
|
||||
|
@ -207,8 +207,9 @@ class TestPeerGroupAssociationPost(testroot.DCManagerApiTest,
|
|||
context = utils.dummy_context()
|
||||
self.context = context
|
||||
peer_id, _ = self._create_db_related_objects(context)
|
||||
db_api.system_peer_update(context, peer_id=peer_id,
|
||||
heartbeat_status=SAMPLE_HEARTBEAT_STATUS_ALIVE)
|
||||
db_api.system_peer_update(
|
||||
context, peer_id=peer_id,
|
||||
availability_state=SAMPLE_AVAILABILITY_STATE_AVAILABLE)
|
||||
|
||||
def verify_post_failure(self, response):
|
||||
# Failures will return text rather than JSON
|
||||
|
|
Loading…
Reference in New Issue