Change heartbeat-status to availablity-state of system peer

Fix availablity-state field is missing of system peer

Test Plan:
- PASS: Create/Update the system peer check the availability-
        state is exist.
- PASS: Get details of the system peer check the availability-
        state is exist.
- PASS: Peer monitor can change the availability-state when
        the peer site is avaliable.
- PASS: Raise an alarm when the peer is unavaliable.

Closes-Bug: 2045717

Change-Id: I497e3d21355c08d000a77965e6a6f36c6f483936
Signed-off-by: Zhang Rong(Jon) <rong.zhang@windriver.com>
This commit is contained in:
Zhang Rong(Jon) 2023-12-04 15:54:29 +08:00
parent c35aa8f566
commit 7af5caf83c
8 changed files with 27 additions and 26 deletions

View File

@ -457,9 +457,9 @@ DCMANAGER_V1_HTTP_AGENT = 'dcmanager/1.0'
# batch rehome manage state wait timeout
BATCH_REHOME_MGMT_STATES_TIMEOUT = 900
# System peer heartbeat status
SYSTEM_PEER_HEARTBEAT_STATUS_ALIVE = 'alive'
SYSTEM_PEER_HEARTBEAT_STATUS_FAILURE = 'failure'
# System peer availability state
SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE = 'available'
SYSTEM_PEER_AVAILABILITY_STATE_UNAVAILABLE = 'unavailable'
# Peer group migration status
PEER_GROUP_MIGRATING = 'migrating'

View File

@ -382,7 +382,7 @@ def system_peer_db_model_to_dict(system_peer):
"heartbeat-failure-policy": system_peer.heartbeat_failure_policy,
"heartbeat-maintenance-timeout": system_peer.
heartbeat_maintenance_timeout,
"heartbeat-status": system_peer.heartbeat_status,
"availability-state": system_peer.availability_state,
"created-at": system_peer.created_at,
"updated-at": system_peer.updated_at}
return result
@ -443,7 +443,7 @@ def system_peer_update(context, peer_id,
heartbeat_failure_threshold=None,
heartbeat_failure_policy=None,
heartbeat_maintenance_timeout=None,
heartbeat_status=None):
availability_state=None):
"""Update the system peer or raise if it does not exist."""
return IMPL.system_peer_update(context, peer_id,
peer_uuid, peer_name,
@ -454,7 +454,7 @@ def system_peer_update(context, peer_id,
heartbeat_failure_threshold,
heartbeat_failure_policy,
heartbeat_maintenance_timeout,
heartbeat_status)
availability_state)
def system_peer_destroy(context, peer_id):

View File

@ -892,7 +892,7 @@ def system_peer_create(context,
heartbeat_failure_threshold=3,
heartbeat_failure_policy="alarm",
heartbeat_maintenance_timeout=600,
heartbeat_status="created"):
availability_state="created"):
with write_session() as session:
system_peer_ref = models.SystemPeer()
system_peer_ref.peer_uuid = peer_uuid
@ -908,7 +908,7 @@ def system_peer_create(context,
system_peer_ref.heartbeat_failure_policy = heartbeat_failure_policy
system_peer_ref.heartbeat_maintenance_timeout = \
heartbeat_maintenance_timeout
system_peer_ref.heartbeat_status = heartbeat_status
system_peer_ref.availability_state = availability_state
session.add(system_peer_ref)
return system_peer_ref
@ -923,7 +923,7 @@ def system_peer_update(context, peer_id,
heartbeat_failure_threshold=None,
heartbeat_failure_policy=None,
heartbeat_maintenance_timeout=None,
heartbeat_status=None):
availability_state=None):
with write_session() as session:
system_peer_ref = system_peer_get(context, peer_id)
if peer_uuid is not None:
@ -950,8 +950,8 @@ def system_peer_update(context, peer_id,
if heartbeat_maintenance_timeout is not None:
system_peer_ref.heartbeat_maintenance_timeout = \
heartbeat_maintenance_timeout
if heartbeat_status is not None:
system_peer_ref.heartbeat_status = heartbeat_status
if availability_state is not None:
system_peer_ref.availability_state = availability_state
system_peer_ref.save(session)
return system_peer_ref

View File

@ -61,7 +61,7 @@ def upgrade(migrate_engine):
sqlalchemy.Column('heartbeat_failure_threshold', sqlalchemy.Integer),
sqlalchemy.Column('heartbeat_failure_policy', sqlalchemy.String(255)),
sqlalchemy.Column('heartbeat_maintenance_timeout', sqlalchemy.Integer),
sqlalchemy.Column('heartbeat_status', sqlalchemy.String(255)),
sqlalchemy.Column('availability_state', sqlalchemy.String(255)),
sqlalchemy.Column('reserved_1', sqlalchemy.Text),
sqlalchemy.Column('reserved_2', sqlalchemy.Text),
sqlalchemy.Column('created_at', sqlalchemy.DateTime),

View File

@ -117,7 +117,7 @@ class SystemPeer(BASE, DCManagerBase):
heartbeat_failure_threshold = Column(Integer)
heartbeat_failure_policy = Column(String(255))
heartbeat_maintenance_timeout = Column(Integer)
heartbeat_status = Column(String(255))
availability_state = Column(String(255))
class SubcloudGroup(BASE, DCManagerBase):

View File

@ -131,17 +131,17 @@ class PeerMonitor(object):
self._raise_failure()
db_api.system_peer_update(
self.context, self.peer.id,
heartbeat_status=consts.SYSTEM_PEER_HEARTBEAT_STATUS_FAILURE)
availability_state=consts.SYSTEM_PEER_AVAILABILITY_STATE_UNAVAILABLE)
failure_count = 0
self._set_require_audit_flag_to_associated_peer_groups()
else:
failure_count = 0
self._audit_local_peer_groups(remote_pg_list)
if self.peer.heartbeat_status != \
consts.SYSTEM_PEER_HEARTBEAT_STATUS_ALIVE:
if self.peer.availability_state != \
consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE:
db_api.system_peer_update(
self.context, self.peer.id,
heartbeat_status=consts.SYSTEM_PEER_HEARTBEAT_STATUS_ALIVE)
availability_state=consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE)
LOG.info("DC %s back online, clear alarm" %
self.peer.peer_name)
self._clear_failure()

View File

@ -398,11 +398,11 @@ class SubcloudManager(manager.Manager):
return rehome_command
def _migrate_manage_subcloud(
self, context, payload, alive_system_peers, subcloud):
self, context, payload, available_system_peers, subcloud):
success = True
# Try to unmanage the subcloud on peer system
if alive_system_peers:
if self._unmanage_system_peer_subcloud(alive_system_peers,
if available_system_peers:
if self._unmanage_system_peer_subcloud(available_system_peers,
subcloud):
success = False
LOG.warning("Unmanged subcloud: %s error on peer system, "
@ -461,9 +461,9 @@ class SubcloudManager(manager.Manager):
for association in associations:
system_peer = db_api.system_peer_get(
self.context, association.system_peer_id)
# Get 'alive' system peer
if system_peer.heartbeat_status != \
consts.SYSTEM_PEER_HEARTBEAT_STATUS_ALIVE:
# Get 'available' system peer
if system_peer.availability_state != \
consts.SYSTEM_PEER_AVAILABILITY_STATE_AVAILABLE:
LOG.warning("Peer system %s offline, skip checking" %
system_peer.peer_name)
continue

View File

@ -31,7 +31,7 @@ SAMPLE_HEARTBEAT_INTERVAL = 10
SAMPLE_HEARTBEAT_FAILURE_THRESHOLD = 3
SAMPLE_HEARTBEAT_FAILURES_POLICY = 'alarm'
SAMPLE_HEARTBEAT_MAINTENANCE_TIMEOUT = 600
SAMPLE_HEARTBEAT_STATUS_ALIVE = 'alive'
SAMPLE_AVAILABILITY_STATE_AVAILABLE = 'available'
# SAMPLE SUBCLOUD PEER GROUP DATA
SAMPLE_SUBCLOUD_PEER_GROUP_NAME = 'GroupX'
@ -207,8 +207,9 @@ class TestPeerGroupAssociationPost(testroot.DCManagerApiTest,
context = utils.dummy_context()
self.context = context
peer_id, _ = self._create_db_related_objects(context)
db_api.system_peer_update(context, peer_id=peer_id,
heartbeat_status=SAMPLE_HEARTBEAT_STATUS_ALIVE)
db_api.system_peer_update(
context, peer_id=peer_id,
availability_state=SAMPLE_AVAILABILITY_STATE_AVAILABLE)
def verify_post_failure(self, response):
# Failures will return text rather than JSON