Upgrade health check for Platform Issuer

Include health check for 'system-local-ca' overall sanity. Verify:
- Secret fields are in place and valid;
- ClusterIssuer is in place;
- RCA certificate is trusted by platform;
- ICA certificate chain can be verified;
- Expected certificates (Docker Registry, REST API / GUI and Local
  OpenLDAP) are managed by cert-manager, are in place and their chain
  can be verified.

In case of failures, a warning informing the user that the
certificates are expected to be managed by cert-manager before
upgrading will also be displayed after the specific error description.

Test plan:
(System deployed with create_platform_certificates enabled)

PASS: Perform 'system health-query-upgrade' command.
      Verify message "Platform Issuer and expected certificates are
      healthy: [OK]" is displayed.

PASS: Replace 'ca.crt' field from system-local-ca secret to an empty
      string.
      Perform 'system health-query-upgrade' command.
      Verify message "Platform Issuer and expected certificates are
      healthy: [OK]" is displayed.

PASS: Delete 'system-registry-local-certificate'.
      Perform 'system health-query-upgrade' command.
      Verify message "Platform Issuer and expected certificates are
      healthy: [Fail]" is displayed, followed by the error description
      and a warning for the user to perform the CA update procedure
      (cert-manager migration).

PASS: Remove ClusterIssuer system-local-ca.
      Perform 'system health-query-upgrade' command.
      Verify message "Platform Issuer and expected certificates are
      healthy: [Fail]" is displayed, followed by the error description
      and a warning for the user to perform the CA update procedure
      (cert-manager migration).

PASS: Uninstall system-local-ca's RCA ssl_ca certificate (system
      certificate-uninstall -m ssl_ca).
      Perform 'system health-query-upgrade' command.
      Verify message "Platform Issuer and expected certificates are
      healthy: [Fail]" is displayed, followed by the error description
      and a warning for the user to perform the CA update procedure
      (cert-manager migration).

PASS: Remove any field from system-local-ca secret.
      Perform 'system health-query-upgrade' command.
      Verify message "Platform Issuer and expected certificates are
      healthy: [Fail]" is displayed, followed by the error description
      and a warning for the user to perform the CA update procedure
      (cert-manager migration).

PASS: Remove secret system-local-ca.
      Perform 'system health-query-upgrade' command.
      Verify message "Platform Issuer and expected certificates are
      healthy: [Fail]" is displayed, followed by the error description
      and a warning for the user to perform the CA update procedure
      (cert-manager migration).

Story: 2009811
Task: 49606

Change-Id: I0190d6b9092351a61a0b92bab1ea107bb05f8633
Signed-off-by: Marcelo Loebens <Marcelo.DeCastroLoebens@windriver.com>
This commit is contained in:
Marcelo Loebens 2024-02-22 09:38:01 -04:00
parent e5b1f55e64
commit 5f88874ef1
4 changed files with 267 additions and 19 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2018-2023 Wind River Systems, Inc.
# Copyright (c) 2018-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -392,6 +392,172 @@ class Health(object):
else:
return True, psp_list
def _check_local_issuer_clusterIssuer(self):
err_msg = ''
local_ca_issuer = self._kube_operator.get_clusterwide_custom_resource(
kubernetes.CERT_MANAGER_GROUP,
kubernetes.CERT_MANAGER_VERSION,
'clusterissuers',
constants.LOCAL_CA_SECRET_NAME)
if local_ca_issuer:
if not utils.check_k8s_resource_ready(local_ca_issuer):
err_msg += 'Local ClusterIssuer is not Ready.\n'
else:
err_msg += 'Local ClusterIssuer could not be found.\n'
return err_msg
def _check_local_issuer_secret_data(self):
err_msg = ''
ca_secret = self._kube_operator.kube_get_secret(constants.LOCAL_CA_SECRET_NAME,
constants.CERT_NAMESPACE_PLATFORM_CA_CERTS)
if not ca_secret or not hasattr(ca_secret, 'data') or not hasattr(ca_secret, 'type'):
err_msg += 'Platform Issuer (system-local-ca) secret data could not be retrieved.\n'
else:
if ca_secret.type != constants.K8S_SECRET_TYPE_TLS:
err_msg += 'Platform Issuer (system-local-ca) secret data type is invalid.\n'
else:
data = ca_secret.data
if ('ca.crt' not in data or 'tls.crt' not in data or 'tls.key' not in data):
err_msg += 'Missing field in Platform Issuer (system-local-ca) secret data.\n'
certs_list = self._kube_operator.list_namespaced_custom_resources(
kubernetes.CERT_MANAGER_GROUP,
kubernetes.CERT_MANAGER_VERSION,
constants.CERT_NAMESPACE_PLATFORM_CA_CERTS,
'certificates')
if certs_list:
for cert_obj in certs_list:
if cert_obj.get('spec').get('secretName') == constants.LOCAL_CA_SECRET_NAME:
err_msg += 'Platform Issuer (system-local-ca) secret data is in an invalid state.\n'
LOG.error('%s is not expected to be owned by a Certificate.'
% constants.LOCAL_CA_SECRET_NAME)
return err_msg
def _check_local_issuer_CA_cert_chain(self):
err_msg = tls_crt = tls_key = ca_crt = ''
try:
tls_crt, tls_key, ca_crt = utils.get_certificate_from_secret(
constants.LOCAL_CA_SECRET_NAME,
constants.CERT_NAMESPACE_PLATFORM_CA_CERTS)
except Exception as e:
LOG.exception(e)
err_msg += 'Platform Issuer CA data could not be retrieved.\n'
return err_msg
if not bool(tls_crt) or not bool(tls_key):
err_msg += 'Platform Issuer CA certificate and/or key data is empty.\n'
return err_msg
# RCA
if utils.verify_self_signed_ca_cert(tls_crt):
if not utils.verify_cert_chain_trusted(tls_crt):
err_msg += 'Platform Issuer Root CA certificate is not trusted by the platform.\n'
return err_msg
elif bool(ca_crt) and ca_crt != tls_crt:
err_msg += 'Platform Issuer CA certificate chain is incorrect.\n'
return err_msg
# ICA
else:
if ca_crt != tls_crt:
if bool(ca_crt):
if not utils.verify_cert_chain_trusted(ca_crt):
err_msg += 'Platform Issuer Root CA certificate is not trusted by the platform.\n'
return err_msg
if not utils.verify_cert_issuer(tls_crt, ca_crt):
err_msg += 'Platform Issuer Intermediate CA certificate chain is incorrect.\n'
return err_msg
else:
if not utils.verify_cert_chain_trusted(tls_crt):
err_msg += 'Platform Issuer Root CA certificate is not trusted by the platform.\n'
return err_msg
else:
if not utils.verify_cert_chain_trusted(tls_crt):
err_msg += 'Platform Issuer Root CA certificate is not trusted by the platform.\n'
return err_msg
return err_msg
def _check_leaf_certificate_chain(self, cert_name, cert_namespace):
err_msg = tls_crt = tls_key = ''
try:
tls_crt, tls_key, _ = utils.get_certificate_from_secret(cert_name, cert_namespace)
except Exception as e:
LOG.exception(e)
err_msg += ('Certificate - %s - data could not be retrieved.\n' % cert_name)
return err_msg
if not bool(tls_crt) or not bool(tls_key):
err_msg += ('Certificate - %s - cert and/or key data is empty.\n' % cert_name)
elif not utils.verify_cert_chain_trusted(tls_crt):
err_msg += ('Certificate - %s - chain cannot be verified as trusted.\n' % cert_name)
return err_msg
def _check_expected_platform_certs(self):
err_msg = ''
expected_certs = [constants.RESTAPI_CERT_SECRET_NAME,
constants.REGISTRY_CERT_SECRET_NAME]
system = self._dbapi.isystem_get_one()
if system.distributed_cloud_role != constants.DISTRIBUTED_CLOUD_ROLE_SUBCLOUD:
expected_certs.append(constants.OPENLDAP_CERT_SECRET_NAME)
for cert in expected_certs:
cert_data = self._kube_operator.get_custom_resource(
kubernetes.CERT_MANAGER_GROUP,
kubernetes.CERT_MANAGER_VERSION,
kubernetes.NAMESPACE_DEPLOYMENT,
'certificates',
cert)
if cert_data:
if not utils.check_k8s_resource_ready(cert_data):
err_msg += ('Expected Certificate - %s - is not Ready.\n' % cert)
elif cert_data.get('spec').get('issuerRef').get('name') != constants.LOCAL_CA_SECRET_NAME:
err_msg += ('Expected Certificate - %s - was not issued by the Platform Issuer.\n' % cert)
elif cert_data.get('spec').get('secretName') != cert:
err_msg += ('Expected Certificate - %s - secret name is different from expected.\n' % cert)
else:
err_msg += self._check_leaf_certificate_chain(cert, kubernetes.NAMESPACE_DEPLOYMENT)
else:
err_msg += ('Expected Certificate - %s - could not be found.\n' % cert)
return err_msg
def _check_local_issuer_health(self):
err_msg = ''
update_ca_warning = (
"* \n"
"* Warning: User is expected to convert Platform certificates (e.g. System REST API / GUI and \n"
"* Local Docker Registry) to use cert-manager and be issued by system-local-ca ClusterIssuer, \n"
"* before upgrading.\n"
"* If you haven\'t yet, this might be the cause of the issues detected. Please perform the\n"
"* \'Update system-local-ca or Migrate Platform Certificates to use Cert Manager\' procedure\n"
"* before continuing.\n"
"* \n"
)
check_methods = [self._check_local_issuer_secret_data(),
self._check_local_issuer_CA_cert_chain(),
self._check_local_issuer_clusterIssuer(),
self._check_expected_platform_certs()]
try:
for method in check_methods:
if err_msg == '':
err_msg += method
except Exception as e:
LOG.exception(e)
err_msg += "Could not finish Platform Issuer (system-local-ca) health verification.\n"
if bool(err_msg):
err_msg += update_ca_warning
return not bool(err_msg), err_msg
def get_system_health(self, context, force=False, alarm_ignore_list=None):
"""Returns the general health of the system
@ -563,6 +729,7 @@ class Health(object):
a health check
"""
# Does a general health check then does the following:
# The platform issuer (system-local-ca) and certs are healthy
# A load is imported
# The load patch requirements are met
# The license is valid for the N+1 load
@ -595,6 +762,13 @@ class Health(object):
health_ok = health_ok and success
# Check the platform issuer ('system-local-ca') and platform certificates
success, msg = self._check_local_issuer_health()
output += _('Platform Issuer and expected certificates are healthy: [%s]\n') \
% (Health.SUCCESS_MSG if success else Health.FAIL_MSG)
output += msg
health_ok = health_ok and success
loads = self._dbapi.load_get_list()
try:
imported_load = utils.get_imported_load(loads)

View File

@ -18,7 +18,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2013-2023 Wind River Systems, Inc.
# Copyright (c) 2013-2024 Wind River Systems, Inc.
#
@ -3050,6 +3050,13 @@ def get_admin_ep_cert(dc_role):
return secret_data
def check_k8s_resource_ready(object_dict):
for item in object_dict.get('status', {}).get('conditions', {}):
if item.get('type', None) == 'Ready':
return True
return False
def get_secret_type(secret_name, secret_ns):
"""
Get k8s secret type
@ -3077,6 +3084,7 @@ def get_certificate_from_secret(secret_name, secret_ns):
:param secret_ns: the namespace of the secret
:return: tls_crt: the certificate.
tls_key: the corresponding private key of the certificate.
ca_crt: the CA certificate that issued tls_crt if available.
raise Exception for kubernetes data errors
"""
kube = kubernetes.KubeOperator()
@ -3093,11 +3101,17 @@ def get_certificate_from_secret(secret_name, secret_ns):
try:
tls_crt = base64.decode_as_text(data['tls.crt'])
tls_key = base64.decode_as_text(data['tls.key'])
if 'ca.crt' in data:
ca_crt = base64.decode_as_text(data['ca.crt'])
else:
LOG.warning("Secret does't have CA data stored: %s\\%s" %
(secret_ns, secret_name))
ca_crt = ''
except TypeError:
raise Exception('Certificate secret data is invalid %s\\%s' %
(secret_ns, secret_name))
return tls_crt, tls_key
return tls_crt, tls_key, ca_crt
def get_ca_certificate_from_opaque_secret(secret_name, secret_ns):
@ -3130,6 +3144,8 @@ def get_ca_certificate_from_opaque_secret(secret_name, secret_ns):
return ca_crt
# TODO(mdecastr): verify replacing cert verification methods that
# rely on proc calls w/ lib cryptography
def verify_self_signed_ca_cert(crt):
with tempfile.NamedTemporaryFile() as tmpfile:
tmpfile.write(crt.encode('utf8'))
@ -3138,8 +3154,7 @@ def verify_self_signed_ca_cert(crt):
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
universal_newlines=True)
stdout, stderr = proc.communicate(input=crt)
stdout, stderr = proc.communicate()
proc.wait()
if 0 == proc.returncode:
return True
@ -3149,6 +3164,64 @@ def verify_self_signed_ca_cert(crt):
return False
def verify_cert_issuer(cert, issuer):
tmpfile_crt = tempfile.NamedTemporaryFile()
tmpfile_crt.write(
extract_certs_from_pem(cert.encode('utf-8'))[-1].public_bytes(serialization.Encoding.PEM))
tmpfile_crt.flush()
tmpfile_issuer = tempfile.NamedTemporaryFile()
tmpfile_issuer.write(
extract_certs_from_pem(issuer.encode('utf-8'))[0].public_bytes(serialization.Encoding.PEM))
tmpfile_issuer.flush()
cmd = ['openssl', 'verify', '-partial_chain', '-trusted', tmpfile_issuer.name, tmpfile_crt.name]
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
universal_newlines=True)
stdout, stderr = proc.communicate()
proc.wait()
if 0 == proc.returncode:
return True
else:
LOG.error('Provided issuer does not match cert\n%s\n%s\n%s' %
(cert + issuer, stdout, stderr))
return False
def verify_cert_chain_trusted(cert_chain):
certs = extract_certs_from_pem(cert_chain.encode('utf-8'))
certs_number = len(certs)
for index, cert in enumerate(certs):
if index == certs_number - 1:
return verify_cert_against_trusted_bundle(
cert.public_bytes(serialization.Encoding.PEM).decode('utf-8'))
if not verify_cert_issuer(cert.public_bytes(serialization.Encoding.PEM).decode('utf-8'),
certs[index + 1].public_bytes(serialization.Encoding.PEM).decode('utf-8')):
LOG.error('Provided cert chain cannot be verified as trusted\n%s\n%s\n%s' % cert_chain)
return False
def verify_cert_against_trusted_bundle(crt):
with tempfile.NamedTemporaryFile() as tmpfile:
tmpfile.write(crt.encode('utf8'))
tmpfile.flush()
cmd = ['openssl', 'verify', '-trusted', constants.SSL_CERT_CA_FILE_SHARED, tmpfile.name]
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
universal_newlines=True)
stdout, stderr = proc.communicate()
proc.wait()
if 0 == proc.returncode:
return True
else:
LOG.info('Provided cert cannot be verified as trusted\n%s\n%s\n%s' %
(crt, stdout, stderr))
return False
def verify_ca_crt(crt):
with tempfile.NamedTemporaryFile() as tmpfile:
tmpfile.write(crt.encode('utf8'))

View File

@ -14878,20 +14878,21 @@ class ConductorManager(service.PeriodicService):
# Currently we don't support renewing 'system-local-ca' certificate,
# so if the secret is owned by a certificate resource managed by
# cert-manager, we need to delete it.
certificate = kube_operator.get_custom_resource(
# cert-manager we need to delete it as well.
certs_list = kube_operator.list_namespaced_custom_resources(
kubernetes.CERT_MANAGER_GROUP,
kubernetes.CERT_MANAGER_VERSION,
constants.CERT_NAMESPACE_PLATFORM_CA_CERTS,
'certificates',
constants.LOCAL_CA_SECRET_NAME)
if certificate is not None:
kube_operator.delete_custom_resource(
kubernetes.CERT_MANAGER_GROUP,
kubernetes.CERT_MANAGER_VERSION,
constants.CERT_NAMESPACE_PLATFORM_CA_CERTS,
'certificates',
constants.LOCAL_CA_SECRET_NAME)
'certificates')
if certs_list:
for cert_obj in certs_list:
if cert_obj.get('spec').get('secretName') == constants.LOCAL_CA_SECRET_NAME:
kube_operator.delete_custom_resource(
kubernetes.CERT_MANAGER_GROUP,
kubernetes.CERT_MANAGER_VERSION,
constants.CERT_NAMESPACE_PLATFORM_CA_CERTS,
'certificates',
cert_obj.get('metadata').get('name'))
secret = kube_operator.kube_get_secret(
constants.LOCAL_CA_SECRET_NAME,

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2017-2022 Wind River Systems, Inc.
# Copyright (c) 2017-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -70,7 +70,7 @@ class LdapPuppet(base.BasePuppet):
constants.OPENLDAP_CA_CERT_SECRET_NAME,
constants.CERT_NAMESPACE_PLATFORM_CA_CERTS)
else:
ldap_ca_cert, _ = utils.get_certificate_from_secret(
ldap_ca_cert, _, _ = utils.get_certificate_from_secret(
constants.OPENLDAP_CA_CERT_SECRET_NAME,
constants.CERT_NAMESPACE_PLATFORM_CA_CERTS)
@ -79,7 +79,7 @@ class LdapPuppet(base.BasePuppet):
'platform::ldap::params::ca_cert': ldap_ca_cert,
})
else:
ldap_cert, ldap_key = utils.get_certificate_from_secret(
ldap_cert, ldap_key, _ = utils.get_certificate_from_secret(
constants.OPENLDAP_CERT_SECRET_NAME,
constants.CERT_NAMESPACE_PLATFORM_CERTS)