Block host-unlock till kernel manifest completes

If the following commands are issued in quick succession,
1. system host-kernel-modify controller-0 lowlatency
2. system host-unlock controller-0

The puppet runtime manifests , which is executed asyncronously,
will not have enough time to run and will end up being run
on the next reboot leading to alarms being raised.

This feature will add reporting of kernel runtime
manifest status. The 'in progress' status will be persisted
in the ihost table and used to validate host-unlock

Story: 2010731
Task: 48684

Test plan:

PASS - AIO-SX: DM config with kernel: lowlatency
               Verify no kernel config alarms raised
               and lowlatency kernel is running

PASS - AIO-DX: DM config with kernel: lowlatency
               Verify no kernel config alarms raised
               and lowlatency kernel is running

PASS - AIO-DX: Test really fast unlock
               Verify unlock is blocked

Change-Id: I5f30e6f94eae3b287b402a15d1739d61b7d20ca9
Signed-off-by: Kyale, Eliud <Eliud.Kyale@windriver.com>
This commit is contained in:
Kyale, Eliud 2023-08-30 11:35:35 -04:00
parent 30308599e4
commit 703592fa1a
11 changed files with 161 additions and 5 deletions

View File

@ -5561,6 +5561,7 @@ class HostController(rest.RestController):
self.unlock_update_mgmt_interface(hostupdate.ihost_patch)
self.check_unlock_partitions(hostupdate)
self.check_unlock_patching(hostupdate, force_unlock)
self.check_unlock_kernel_config_status(hostupdate, force_unlock)
hostupdate.configure_required = True
if ((os.path.isfile(constants.ANSIBLE_BOOTSTRAP_FLAG) or
@ -5575,6 +5576,26 @@ class HostController(rest.RestController):
return True
def check_unlock_kernel_config_status(self, hostupdate, force_unlock):
""" Check whether kernel configuration is in progress.
Force unlock will bypass check
"""
if force_unlock:
return
hostname = hostupdate.ihost_patch.get('hostname')
subfunctions = hostupdate.ihost_patch.get('subfunctions')
kernel_config_status = hostupdate.ihost_patch.get('kernel_config_status')
if constants.WORKER not in subfunctions:
return
if kernel_config_status == constants.KERNEL_CONFIG_STATUS_PENDING:
msg = (f'Can not unlock {hostname} '
'kernel configuration in progress.')
raise wsme.exc.ClientSideError(_(msg))
def check_unlock_patching(self, hostupdate, force_unlock):
"""Check whether the host is patch current.
"""

View File

@ -135,7 +135,8 @@ class Kernel(base.APIBase):
updated_subfunctions = Kernel._create_subfunctions_str(subfunctions_set)
updates = \
{
constants.SUBFUNCTIONS: updated_subfunctions
constants.SUBFUNCTIONS: updated_subfunctions,
'kernel_config_status': constants.KERNEL_CONFIG_STATUS_PENDING
}
ihost.save_changes(pecan.request.context, updates)

View File

@ -130,6 +130,8 @@ KERNEL_STANDARD = 'standard'
SUPPORTED_KERNELS = [KERNEL_LOWLATENCY, KERNEL_STANDARD]
KERNEL_CONFIG_STATUS_PENDING = 'config_pending'
# CPU functions
PLATFORM_FUNCTION = "Platform"
VSWITCH_FUNCTION = "Vswitch"

View File

@ -1310,9 +1310,49 @@ class ConductorManager(service.PeriodicService):
"classes": [
'platform::grub::kernel_image::runtime',
'platform::config::file::subfunctions::lowlatency::runtime'
]
],
puppet_common.REPORT_STATUS_CFG: puppet_common.REPORT_KERNEL_CONFIG
}
self._config_apply_runtime_manifest(context, config_uuid, config_dict)
self._config_apply_runtime_manifest(context, config_uuid,
config_dict, force=True)
def report_kernel_config_complete(self, context, ihost_uuid, status, error):
""" Report kernel config runtime manifest from agent completed run
The runtime manifest has completed either in failure or success
Args:
context: admin context
ihost_uuid (uuid): host uuid
status: operation status
error: err content as a dict of type:
error = {
'class': str(ex.__class__.__name__),
'module': str(ex.__class__.__module__),
'message': six.text_type(ex),
'tb': traceback.format_exception(*ex),
'args': ex.args,
'kwargs': ex.kwargs
}
"""
ihost_uuid = ihost_uuid.strip()
try:
host = self.dbapi.ihost_get(ihost_uuid)
except exception.ServerNotFound:
LOG.info('Kernel runtime manifest completed report '
f'uuid={ihost_uuid} '
f'status={status} '
f'error={error}')
LOG.error(f'Host not found {ihost_uuid}')
return None
hostname = host['hostname']
LOG.info('Kernel runtime manifest completed report '
f'{hostname} status={status} error={error}')
# update db with kernel_config_status update and reload host object
host.save_changes(context, {'kernel_config_status': status})
host = self.dbapi.ihost_get(ihost_uuid)
LOG.info(f"DB updated {hostname} "
f"kernel_config_status={host['kernel_config_status']}")
def report_kernel_running(self, context, ihost_uuid, kernel_running: str):
"""Report from sysinv agent with the running kernel of that host
@ -9426,6 +9466,11 @@ class ConductorManager(service.PeriodicService):
LOG.error("No match for sysinv-agent manifest application reported! "
"reported_cfg: %(cfg)s status: %(status)s "
"iconfig: %(iconfig)s" % args)
elif reported_cfg == puppet_common.REPORT_KERNEL_CONFIG:
# The agent is reporting runtime kernel config params have been applied
host_uuid = iconfig['host_uuid']
self.report_kernel_config_complete(context, host_uuid, status, error)
success = (status == puppet_common.REPORT_SUCCESS)
elif reported_cfg == puppet_common.REPORT_UPGRADE_ABORT:
kube_upgrade_obj = objects.kube_upgrade.get_one(context)
# The agent is reporting the runtime kube_upgrade_abort has been applied.

View File

@ -14,6 +14,7 @@ def upgrade(migrate_engine):
migrate_engine.connect()
i_host = Table('i_host', meta, autoload=True)
i_host.create_column(Column('kernel_running', String(64)))
i_host.create_column(Column('kernel_config_status', String(255)))
def downgrade(migrate_engine):

View File

@ -205,6 +205,7 @@ class ihost(Base):
personality = Column(invPersonalityEnum)
kernel_running = Column(String(64))
kernel_config_status = Column(String(255), default="")
subfunctions = Column(String(255))
subfunction_oper = Column(operEnum, default="disabled")
subfunction_avail = Column(availEnum, default="not-installed")

View File

@ -49,6 +49,7 @@ class Host(base.SysinvObject):
'hostname': utils.str_or_none,
'personality': utils.str_or_none,
'kernel_running': utils.str_or_none,
'kernel_config_status': utils.str_or_none,
'subfunctions': utils.str_or_none,
'subfunction_oper': utils.str_or_none,
'subfunction_avail': utils.str_or_none,

View File

@ -54,6 +54,7 @@ REPORT_KUBE_CERT_UPDATE_PODS_TRUSTNEWCA = \
'pods_' + constants.KUBE_CERT_UPDATE_TRUSTNEWCA
REPORT_KUBE_UPDATE_KUBELET_PARAMS = 'update_kubelet_params'
REPORT_HTTP_CONFIG = 'http_config'
REPORT_KERNEL_CONFIG = 'host_kernel_config'
REPORT_UPGRADE_ABORT = 'upgrade_abort'

View File

@ -2830,6 +2830,85 @@ class TestPatch(TestHost):
self.assertEqual(response.content_type, 'application/json')
self.assertEqual(response.status_code, http_client.OK)
def test_worker_unlock_during_kernel_configuration(self):
# Create controller-0
self._create_controller_0(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create controller-1
self._create_controller_1(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create worker-0
w0_host = self._create_worker(
mgmt_ip='192.168.204.5',
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_LOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
kernel_config_status=constants.KERNEL_CONFIG_STATUS_PENDING)
self._create_test_host_platform_interface(w0_host)
self._create_test_host_cpus(w0_host, platform=1, vswitch=2, application=12)
w0_hostname = w0_host['hostname']
response = self._patch_host_action(
w0_hostname, constants.UNLOCK_ACTION,
'sysinv-test', expect_errors=True)
# Verify that the unlock was not sent to the VIM
self.mock_vim_api_host_action.assert_not_called()
# Verify that the host was not modified in maintenance
self.mock_mtce_api_host_modify.assert_not_called()
self.assertEqual(response.content_type, 'application/json')
self.assertEqual(response.status_code, http_client.BAD_REQUEST)
self.assertTrue(response.json['error_message'])
self.assertIn(f"Can not unlock {w0_hostname} "
"kernel configuration in progress",
response.json['error_message'])
def test_worker_force_unlock_during_kernel_configuration(self):
# Create controller-0
self._create_controller_0(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create controller-1
self._create_controller_1(
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_UNLOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE)
# Create worker-0
w0_host = self._create_worker(
mgmt_ip='192.168.204.5',
invprovision=constants.PROVISIONED,
administrative=constants.ADMIN_LOCKED,
operational=constants.OPERATIONAL_ENABLED,
availability=constants.AVAILABILITY_ONLINE,
kernel_config_status=constants.KERNEL_CONFIG_STATUS_PENDING)
self._create_test_host_platform_interface(w0_host)
self._create_test_host_cpus(w0_host, platform=1, vswitch=2, application=12)
w0_hostname = w0_host['hostname']
response = self._patch_host_action(
w0_hostname, constants.FORCE_UNLOCK_ACTION,
'sysinv-test')
self.assertEqual(response.content_type, 'application/json')
self.assertEqual(response.status_code, http_client.OK)
class TestPatchStdDuplexControllerAction(TestHost):

View File

@ -5191,7 +5191,8 @@ class ManagerTestCase(base.DbTestCase):
"classes": [
'platform::grub::kernel_image::runtime',
'platform::config::file::subfunctions::lowlatency::runtime'
]
],
'report_status': 'host_kernel_config'
}
config_uuid = '1234'
mock_config_update_hosts.return_value = config_uuid
@ -5201,7 +5202,8 @@ class ManagerTestCase(base.DbTestCase):
mock_config_update_hosts.assert_called_once()
mock_config_apply_runtime_manifest.assert_called_once_with(mock.ANY,
config_uuid,
config_dict)
config_dict,
force=True)
@mock.patch('sysinv.conductor.manager.'
'ConductorManager._config_apply_runtime_manifest')

View File

@ -112,6 +112,7 @@ def post_get_test_ihost(**kw):
del inv['recordtype']
del inv['uuid']
del inv['kernel_running']
del inv['kernel_config_status']
return inv
@ -142,6 +143,7 @@ def get_test_ihost(**kw):
'task': kw.get('task', None),
'capabilities': kw.get('capabilities', {}),
'kernel_running': kw.get('kernel_running', constants.KERNEL_STANDARD),
'kernel_config_status': kw.get('kernel_config_status', ''),
'subfunctions': kw.get('subfunctions', "ihostsubfunctions"),
'subfunction_oper': kw.get('subfunction_oper', "disabled"),
'subfunction_avail': kw.get('subfunction_avail', "not-installed"),