Check vim strategy before updating network

It is risky to update a subcloud's network if there is an existing
vim strategy: the on-going vim strategy triggers lock/unlock/swact
can interrupt the ansible-playbook, leaving the subcloud in an
un-recoverable state.

To prevent this issue, this commit checks the on-going vim strategy,
and blocks the further operation if it exists.

Test plan:
1. Passed - deploy a DC with this change.
2. Passed - create a kube-rootca-update orchestration against a
subcloud, verify the subcloud update with network reconfiguration is
blocked by the ongoing strategy, verify the network reconfiguration is
not blocked after the strategy applied.
3. Passed - create a system-config-update strategy by updating the
addresspool on the subcloud, verify the subcloud update with network
reconfiguration is blocked when applying this strategy, verify this
operation is not blocked after the strategy applied.

Story: 2010722
Task: 49414
Signed-off-by: Yuxing Jiang <Yuxing.Jiang@windriver.com>
Change-Id: I2c3b824ebbd6766996f3422c681f16d03b6063fa
This commit is contained in:
Yuxing Jiang 2024-01-16 13:44:52 -05:00
parent 30812c4411
commit bff2f0aa2f
3 changed files with 206 additions and 12 deletions

View File

@ -1,5 +1,5 @@
# Copyright 2016 Ericsson AB
# Copyright (c) 2017-2022 Wind River Systems, Inc.
# Copyright (c) 2017-2022,2024 Wind River Systems, Inc.
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
@ -29,6 +29,7 @@ STRATEGY_NAME_KUBE_ROOTCA_UPDATE = 'kube-rootca-update'
STRATEGY_NAME_KUBE_UPGRADE = 'kube-upgrade'
STRATEGY_NAME_SW_PATCH = 'sw-patch'
STRATEGY_NAME_SW_UPGRADE = 'sw-upgrade'
STRATEGY_NAME_SYS_CONFIG_UPDATE = "system-config-update"
APPLY_TYPE_SERIAL = 'serial'
APPLY_TYPE_PARALLEL = 'parallel'
@ -64,6 +65,12 @@ STATE_ABORT_FAILED = 'abort-failed'
STATE_ABORT_TIMEOUT = 'abort-timeout'
STATE_ABORTED = 'aborted'
TRANSITORY_STATES = [STATE_INITIAL,
STATE_BUILDING,
STATE_READY_TO_APPLY,
STATE_APPLYING,
STATE_ABORTING]
# The exception message when vim authorization fails
VIM_AUTHORIZATION_FAILED = "Authorization failed"

View File

@ -40,7 +40,9 @@ from keystoneauth1 import exceptions as keystone_exceptions
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.fm import FmClient
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.drivers.openstack import vim
from dccommon import exceptions as dccommon_exceptions
from dcmanager.api.controllers import restcomm
@ -170,6 +172,56 @@ class SubcloudsController(object):
payload.update(json.loads(request.body))
return payload
def _check_existing_vim_strategy(self, context, subcloud):
"""Check existing vim strategy by state.
An on-going vim strategy may interfere with subcloud reconfiguration
attempt and result in unrecoverable failure. Check if there is an
on-going strategy and whether it is in a state that is safe to proceed.
:param context: request context object.
:param subcloud: subcloud object.
:returns bool: True if on-going vim strategy found or not searchable,
otherwise False.
"""
strategy_steps = None
# Firstly, check the DC orchestrated vim strategies from database
try:
strategy_steps = db_api.strategy_step_get(context, subcloud.id)
except exceptions.StrategyStepNotFound:
LOG.debug(f"No existing vim strategy steps on subcloud: {subcloud.name}")
except Exception:
LOG.exception("Failed to get strategy steps on subcloud: "
f"{subcloud.name}.")
return True
if strategy_steps and strategy_steps.state not in (
consts.STRATEGY_STATE_COMPLETE,
consts.STRATEGY_STATE_ABORTED,
consts.STRATEGY_STATE_FAILED
):
return True
# Then check the system config update strategy
try:
keystone_client = OpenStackDriver(
region_name=subcloud.region_name,
region_clients=None).keystone_client
vim_client = vim.VimClient(subcloud.region_name,
keystone_client.session)
strategy = vim_client.get_strategy(
strategy_name=vim.STRATEGY_NAME_SYS_CONFIG_UPDATE,
raise_error_if_missing=False)
except Exception:
# Don't block the operation when the vim service is inaccessible
LOG.warning(f"Openstack admin endpoints on subcloud: {subcloud.name} "
"are unaccessible")
return False
return strategy and strategy.state in vim.TRANSITORY_STATES
# TODO(nicodemos): Check if subcloud is online and network already exist in the
# subcloud when the lock/unlock is not required for network reconfiguration
def _validate_network_reconfiguration(self, payload, subcloud):
@ -671,6 +723,15 @@ class SubcloudsController(object):
)
# Validation
self._validate_network_reconfiguration(payload, subcloud)
# Validate there's no on-going vim strategy
if self._check_existing_vim_strategy(context, subcloud):
error_msg = (
"Reconfiguring subcloud network is not allowed while "
"there is an on-going orchestrated operation in this "
"subcloud. Please try again after the strategy has "
"completed."
)
pecan.abort(400, error_msg)
management_state = payload.get('management-state')
group_id = payload.get('group_id')

View File

@ -42,7 +42,9 @@ from dcmanager.rpc import client as rpc_client
from dcmanager.tests.unit.api import test_root_controller as testroot
from dcmanager.tests.unit.api.v1.controllers.mixins import APIMixin
from dcmanager.tests.unit.api.v1.controllers.mixins import PostMixin
from dcmanager.tests.unit.common import fake_strategy
from dcmanager.tests.unit.common import fake_subcloud
from dcmanager.tests.unit.fakes import FakeVimStrategy
from dcmanager.tests import utils
SAMPLE_SUBCLOUD_NAME = "SubcloudX"
@ -1405,17 +1407,15 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest):
deploy_status=None)
self.assertEqual(response.status_int, 200)
@mock.patch.object(subclouds.SubcloudsController,
"_check_existing_vim_strategy")
@mock.patch.object(psd_common, "get_network_address_pool")
@mock.patch.object(
subclouds.SubcloudsController, "_validate_network_reconfiguration"
)
@mock.patch.object(subclouds.SubcloudsController,
"_validate_network_reconfiguration")
@mock.patch.object(subclouds.SubcloudsController, "_get_patch_data")
def test_patch_subcloud_network_values(
self,
mock_get_patch_data,
mock_validate_network_reconfiguration,
mock_mgmt_address_pool,
):
self, mock_get_patch_data, mock_validate_network_reconfiguration,
mock_mgmt_address_pool, mock_check_existing_vim_strategy):
subcloud = fake_subcloud.create_fake_subcloud(self.ctx)
db_api.subcloud_update(
self.ctx,
@ -1438,6 +1438,7 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest):
"192.168.204.0", 24, "192.168.204.2", "192.168.204.100"
)
mock_mgmt_address_pool.return_value = fake_management_address_pool
mock_check_existing_vim_strategy.return_value = False
self.mock_rpc_client().update_subcloud_with_network_reconfig.return_value = (
True
@ -1454,11 +1455,54 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest):
)
self.assertEqual(response.status_int, 200)
@mock.patch.object(subclouds.SubcloudsController,
"_check_existing_vim_strategy")
@mock.patch.object(psd_common, "get_network_address_pool")
@mock.patch.object(subclouds.SubcloudsController,
"_validate_network_reconfiguration")
@mock.patch.object(subclouds.SubcloudsController, "_get_patch_data")
def test_patch_subcloud_network_values_with_onging_strategy(
self, mock_get_patch_data, mock_validate_network_reconfiguration,
mock_mgmt_address_pool, mock_check_existing_vim_strategy):
subcloud = fake_subcloud.create_fake_subcloud(self.ctx)
db_api.subcloud_update(
self.ctx, subcloud.id,
availability_status=dccommon_consts.AVAILABILITY_ONLINE)
fake_password = (
base64.b64encode('testpass'.encode("utf-8"))).decode('ascii')
payload = {'sysadmin_password': fake_password,
'bootstrap_address': "192.168.102.2",
'management_subnet': "192.168.102.0/24",
'management_start_ip': "192.168.102.5",
'management_end_ip': "192.168.102.49",
'management_gateway_ip': "192.168.102.1"}
fake_management_address_pool = FakeAddressPool('192.168.204.0', 24,
'192.168.204.2',
'192.168.204.100')
mock_mgmt_address_pool.return_value = fake_management_address_pool
mock_check_existing_vim_strategy.return_value = True
self.mock_rpc_client().update_subcloud_with_network_reconfig.\
return_value = True
mock_get_patch_data.return_value = payload
six.assertRaisesRegex(
self,
webtest.app.AppError,
"400 *",
self.app.patch_json,
f"{ FAKE_URL }/{ subcloud.id }",
headers=FAKE_HEADERS,
params=payload,
)
mock_validate_network_reconfiguration.assert_called_once()
self.mock_rpc_client().update_subcloud_with_network_reconfig.\
assert_not_called()
@mock.patch.object(subclouds.SubcloudsController, "_get_patch_data")
@mock.patch.object(cutils, "get_vault_load_files")
def test_patch_subcloud_install_values(
self, mock_vault_files, mock_get_patch_data
):
def test_patch_subcloud_install_values(self, mock_vault_files,
mock_get_patch_data):
mock_vault_files.return_value = ("fake_iso", "fake_sig")
subcloud = fake_subcloud.create_fake_subcloud(self.ctx, data_install=None)
payload = {}
@ -2866,3 +2910,85 @@ class TestSubcloudAPIOther(testroot.DCManagerApiTest):
existing_networks=None,
operation=None,
)
@mock.patch.object(subclouds, "OpenStackDriver")
@mock.patch.object(subclouds.vim, "VimClient")
def test_check_other_strategy_exists(self, mock_vim_client, mock_os_driver):
subcloud = fake_subcloud.create_fake_subcloud(self.ctx)
fake_strategy.create_fake_strategy_step(
self.ctx,
subcloud_id=subcloud.id,
state=consts.STRATEGY_STATE_INITIAL)
sc = subclouds.SubcloudsController()
result = sc._check_existing_vim_strategy(self.ctx, subcloud)
self.assertTrue(result)
mock_os_driver.assert_not_called()
mock_vim_client.assert_not_called()
@mock.patch.object(subclouds, "db_api")
@mock.patch.object(subclouds, "OpenStackDriver")
@mock.patch.object(subclouds.vim, "VimClient")
def test_no_vim_strategy(self, mock_vim_client, mock_os_driver, mock_db_api):
subcloud = fake_subcloud.create_fake_subcloud(self.ctx)
sc = subclouds.SubcloudsController()
result = sc._check_existing_vim_strategy(self.ctx, subcloud)
mock_vim_client_instance = mock_vim_client.return_value
mock_vim_client_instance.get_strategy.return_value = None
mock_db_api.strategy_step_get.side_effect = \
exceptions.StrategyStepNotFound
sc = subclouds.SubcloudsController()
result = sc._check_existing_vim_strategy(self.ctx, subcloud)
mock_os_driver.assert_called_with(region_name=subcloud.region_name,
region_clients=None)
mock_vim_client.assert_called_with(
subcloud.region_name,
mock_os_driver.return_value.keystone_client.session)
self.assertFalse(result)
@mock.patch.object(subclouds, "db_api")
@mock.patch.object(subclouds, "OpenStackDriver")
@mock.patch.object(subclouds.vim, "VimClient")
def test_check_system_config_update_strategy_exists(
self, mock_vim_client, mock_os_driver, mock_db_api
):
subcloud = fake_subcloud.create_fake_subcloud(self.ctx)
fake_strategy = FakeVimStrategy(state=subclouds.vim.STATE_APPLYING)
mock_db_api.strategy_step_get.side_effect = \
exceptions.StrategyStepNotFound
mock_vim_client_instance = mock_vim_client.return_value
mock_vim_client_instance.get_strategy.return_value = fake_strategy
sc = subclouds.SubcloudsController()
result = sc._check_existing_vim_strategy(self.ctx, subcloud)
mock_os_driver.assert_called_with(region_name=subcloud.region_name,
region_clients=None)
mock_vim_client.assert_called_with(
subcloud.region_name,
mock_os_driver.return_value.keystone_client.session)
self.assertTrue(result)
@mock.patch.object(subclouds, "db_api")
@mock.patch.object(subclouds, "OpenStackDriver")
@mock.patch.object(subclouds.vim, "VimClient")
def test_check_system_config_update_strategy_applied(
self, mock_vim_client, mock_os_driver, mock_db_api
):
subcloud = fake_subcloud.create_fake_subcloud(self.ctx)
fake_strategy = FakeVimStrategy(state=subclouds.vim.STATE_APPLIED)
mock_db_api.strategy_step_get.side_effect = \
exceptions.StrategyStepNotFound
mock_vim_client_instance = mock_vim_client.return_value
mock_vim_client_instance.get_strategy.return_value = fake_strategy
sc = subclouds.SubcloudsController()
result = sc._check_existing_vim_strategy(self.ctx, subcloud)
mock_os_driver.assert_called_with(region_name=subcloud.region_name,
region_clients=None)
mock_vim_client.assert_called_with(
subcloud.region_name,
mock_os_driver.return_value.keystone_client.session)
self.assertFalse(result)