diff --git a/controllerconfig/controllerconfig/controllerconfig/upgrades/controller.py b/controllerconfig/controllerconfig/controllerconfig/upgrades/controller.py index c2a8355848..1504f626a5 100644 --- a/controllerconfig/controllerconfig/controllerconfig/upgrades/controller.py +++ b/controllerconfig/controllerconfig/controllerconfig/upgrades/controller.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2016-2022 Wind River Systems, Inc. +# Copyright (c) 2016-2024 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -54,8 +54,8 @@ LOG = logging.getLogger(__name__) POSTGRES_BIN = utils.get_postgres_bin() POSTGRES_MOUNT_PATH = '/mnt/postgresql' POSTGRES_DUMP_MOUNT_PATH = '/mnt/db_dump' -DB_CONNECTION_FORMAT = "connection=postgresql://%s:%s@127.0.0.1/%s\n" -DB_BARBICAN_CONNECTION_FORMAT = "postgresql://%s:%s@127.0.0.1/%s" +DB_CONNECTION_CONF_FORMAT = "connection=postgresql://%s:%s@127.0.0.1/%s\n" +DB_CONNECTION_EXEC_FORMAT = "postgresql://%s:%s@127.0.0.1/%s" restore_patching_complete = '/etc/platform/.restore_patching_complete' restore_compute_ready = '/var/run/.restore_compute_ready' @@ -159,14 +159,21 @@ def get_shared_services(): return shared_services -def get_connection_string(db_credentials, database): - """ Generates a connection string for a given database""" +def get_connection_string(db_credentials, database, exec_format=False): + """ Generates a connection string for a given database + exec_format + True: the connection string can be used in line command + ( ex: barbican ) or in psycopg2.connect + False: the connection string is to be used in .conf files + """ username = db_credentials[database]['username'] password = db_credentials[database]['password'] - if database == 'barbican': - return DB_BARBICAN_CONNECTION_FORMAT % (username, password, database) + + if exec_format: + return DB_CONNECTION_EXEC_FORMAT % (username, password, database) else: - return DB_CONNECTION_FORMAT % (username, password, database) + # use format to be used in .conf files + return DB_CONNECTION_CONF_FORMAT % (username, password, database) def create_temp_filesystem(vgname, lvname, mountpoint, size): @@ -660,7 +667,8 @@ def migrate_databases(from_release, shared_services, db_credentials, # Migrate barbican ('barbican', 'barbican-manage db upgrade ' + - '--db-url %s' % get_connection_string(db_credentials, 'barbican')), + '--db-url %s' % get_connection_string(db_credentials, 'barbican', + True)), ] # Migrate fm @@ -833,12 +841,9 @@ def apply_sriov_config(db_credentials, hostname): # If controller-1 has any FEC devices or sriov vfs configured, apply the # sriov runtime manifest. We can't apply it from controller-0 during the # host-unlock process as controller-1 is running the new release. - database = 'sysinv' - username = db_credentials[database]['username'] - password = db_credentials[database]['password'] - # psycopg2 can connect with the barbican string eg postgresql:// ... - connection_string = DB_BARBICAN_CONNECTION_FORMAT % ( - username, password, database) + + connection_string = get_connection_string(db_credentials, 'sysinv', True) + conn = psycopg2.connect(connection_string) cur = conn.cursor() cur.execute( @@ -874,6 +879,32 @@ def apply_sriov_config(db_credentials, hostname): os.remove(tmpfile) +def get_db_host_mgmt_ip(db_credentials, hostname): + """ Get the Hostname management IP from DB""" + + # the postgres server was stopped during the upgrade_controller + # need to use db_credentials to acess the DB + connection_string = get_connection_string(db_credentials, 'sysinv', True) + conn = psycopg2.connect(connection_string) + + db_hostname = hostname + "-mgmt" + try: + cur = conn.cursor() + cur.execute("SELECT address FROM addresses WHERE name='{}';".format( + db_hostname)) + row = cur.fetchone() + + if row is None: + msg = "MGMT IP not found for: '{}'".format(db_hostname) + raise Exception(msg) + + return row[0] + + except Exception as ex: + LOG.error("Failed to get MGMT IP for: '%s'" % db_hostname) + raise ex + + def upgrade_controller(from_release, to_release): """ Executed on the release N+1 side upgrade controller-1. """ @@ -1047,6 +1078,33 @@ def upgrade_controller(from_release, to_release): LOG.info("Failed to update hiera configuration") raise + # this is just necessary for 22.12 + # since the old releases uses the hieradata/.yaml + # and the new one uses hieradata/.yaml + # during the AIO-DX upgrade, the controller-0 runs the old + # release to upgrade the controller-1 + # the controller-0 want to still use hieradata/.yaml + # but the controller-1 want to use hieradata/.yaml + # so rename the .yaml to .yaml + # and creates a symlink: .yaml -> .yaml + try: + ctrl1_mgmt_ip = get_db_host_mgmt_ip(db_credentials, + utils.CONTROLLER_1_HOSTNAME) + except Exception as e: + LOG.exception(e) + LOG.info("Failed to get MGMT IP for controller-1 during upgrade") + raise + + ctrl1_hostname_hieradata = constants.HIERADATA_PERMDIR + "/" \ + + utils.CONTROLLER_1_HOSTNAME + ".yaml" + + ctrl1_ipaddr_hieradata_file = ctrl1_mgmt_ip + ".yaml" + ctrl1_ipaddr_hieradata = constants.HIERADATA_PERMDIR + "/" \ + + ctrl1_ipaddr_hieradata_file + + os.rename(ctrl1_hostname_hieradata, ctrl1_ipaddr_hieradata) + os.symlink(ctrl1_ipaddr_hieradata_file, ctrl1_hostname_hieradata) + apply_sriov_config(db_credentials, utils.CONTROLLER_1_HOSTNAME) # Remove /etc/kubernetes/admin.conf after it is used to generate diff --git a/controllerconfig/controllerconfig/scripts/controller_config b/controllerconfig/controllerconfig/scripts/controller_config index 17d99e9c28..7226b35c49 100755 --- a/controllerconfig/controllerconfig/scripts/controller_config +++ b/controllerconfig/controllerconfig/scripts/controller_config @@ -709,10 +709,21 @@ start() # Apply the puppet manifest HIERADATA_PATH=${PUPPET_CACHE}/hieradata HOST_HIERA=${HIERADATA_PATH}/${HOST}.yaml + IP_HIERA=${HIERADATA_PATH}/${IPADDR}.yaml + PUPPET_YML=${HOST} + + # if IP_HIERA exists, it means an upgrade + # must use the .yaml because the active controller + # running the previous version doesn't update the hostname.yaml + if [ -e $ETC_PLATFORM_DIR/.upgrade_do_not_use_fqdn ] && \ + [ -f ${IP_HIERA} ]; then + HOST_HIERA=${IP_HIERA} + PUPPET_YML=${IPADDR} + fi if [ -f ${HOST_HIERA} ]; then - echo "$0: Running puppet manifest apply" - puppet-manifest-apply.sh ${HIERADATA_PATH} ${HOST} ${subfunction} + echo "$0: Running puppet manifest apply for: ${PUPPET_YML}" + puppet-manifest-apply.sh ${HIERADATA_PATH} ${PUPPET_YML} ${subfunction} RC=$? if [ $RC -ne 0 ] then diff --git a/controllerconfig/controllerconfig/upgrade-scripts/29-mgmt-network-fqdn-reconfig.py b/controllerconfig/controllerconfig/upgrade-scripts/29-mgmt-network-fqdn-reconfig.py new file mode 100644 index 0000000000..9f94d0141a --- /dev/null +++ b/controllerconfig/controllerconfig/upgrade-scripts/29-mgmt-network-fqdn-reconfig.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# Copyright (c) 2024 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# This migration script is used to disable the FQDN during an upgrade +# this flag must be deleted during thr upgrade complete/abort +# during the upgrade the controller-0 runs version X +# and controller-1 runs version X+1 +# to use the FQDN the active controller must run dnsmasq +# with the FQDN entries. It doesn't happen during an upgrade. +# +# during migrate stage of platform upgrade. It will: +# - create a flag that will be used by sysinv and puppet code to not +# use FQDN entries + +import sys + +import subprocess +import os.path +import socket + +from controllerconfig.common import log + +PLATFORM_CONF_PATH = '/etc/platform' +PLATFORM_SIMPLEX_FLAG = '/etc/platform/simplex' +UPGRADE_DO_NOT_USE_FQDN = PLATFORM_CONF_PATH + \ + '/.upgrade_do_not_use_fqdn' + + +LOG = log.get_logger(__name__) + + +def remove_unused_files_from_hieradata(to_release): + # after the upgrade it is necessary to remove the old hieradata + # .yaml from /opt/platform/puppet//hieradata + # the reason is: it was replaced by .yaml + # i.e: controller-1.yaml + ctrl1_mgmt_ip = socket.getaddrinfo("controller-1", None)[0][4][0] + + ctrl1_old_hiera = "/opt/platform/puppet/{}/hieradata/{}.yaml".format( + to_release, ctrl1_mgmt_ip) + + command = "rm -f {}".format(ctrl1_old_hiera) + + sub = subprocess.Popen(command, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + stdout, stderr = sub.communicate() + + if sub.returncode != 0: + LOG.error('Cmd Failed:\n%s\n.%s\n%s' % + (command, stdout, stderr)) + raise Exception('Error removing unused file: {} '.format( + ctrl1_old_hiera)) + + +def main(): + action = None + from_release = None + to_release = None + arg = 1 + res = 0 + + log.configure() + + while arg < len(sys.argv): + if arg == 1: + from_release = sys.argv[arg] + elif arg == 2: + to_release = sys.argv[arg] + elif arg == 3: + action = sys.argv[arg] + else: + LOG.error("Invalid option %s." % sys.argv[arg]) + return 1 + arg += 1 + + LOG.info("%s invoked with from_release = %s to_release = %s action = %s" + % (sys.argv[0], from_release, to_release, action)) + + # create a flag to not use FQDN during a SW upgrade + # this flag must be deleted during thr upgrade complete/abort + # during the activate, remove the unused file in hieradata + if not os.path.exists(PLATFORM_SIMPLEX_FLAG): + if action in ['start', 'migrate'] and \ + from_release in ['21.12', '22.12']: + open(UPGRADE_DO_NOT_USE_FQDN, 'w').close() + + elif action in ['activate'] and to_release in ['24.09']: + remove_unused_files_from_hieradata(to_release) + + return res + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sysinv/sysinv/sysinv/sysinv/agent/manager.py b/sysinv/sysinv/sysinv/sysinv/agent/manager.py index 3b4fd41a4d..d16eb5a33f 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py @@ -850,21 +850,7 @@ class AgentManager(service.PeriodicService): while (timeutils.utcnow() - wait_time).total_seconds() < MAXSLEEP: # wait for controller to come up first may be a DOR try: - ihost, mgmt_addr = rpcapi.get_ihost_by_macs(icontext, host_macs) - except ValueError: - # Retry because the N-1 host does not support the mgmt_addr - # parameter during upgrade - try: - ihost = rpcapi.get_ihost_by_macs(icontext, host_macs) - except Timeout: - if not rpc_timeout: - rpc_timeout = True - LOG.info("get_ihost_by_macs rpc Timeout.") - time.sleep(5) # avoid calling timedout RPC in sequence - continue - except Exception: - LOG.warn("Conductor RPC get_ihost_by_macs exception " - "response") + ihost = rpcapi.get_ihost_by_macs(icontext, host_macs) except Timeout: if not rpc_timeout: rpc_timeout = True @@ -903,6 +889,17 @@ class AgentManager(service.PeriodicService): LOG.info("get_address_by_host_networktype rpc Timeout.") time.sleep(5) # avoid calling timedout RPC in sequence continue + except RemoteError: + try: + # active controller is running an old release + # without get_address_by_host_networktype RPC + mgmt_addr = ihost['mgmt_ip'] + LOG.info("get_address_by_host_networktype rpc RemoteError." + "using mgmt_ip from ihost: {}".format(mgmt_addr)) + except Exception: + LOG.warn("ihost_inv_get_and_report: ihost does not have " + "mgmt_ip") + except Exception as ex: LOG.warn("Conductor RPC get_address_by_host_networktype " "exception response %s" % ex) diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index 29e57c405b..76f6e52870 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2013-2023 Wind River Systems, Inc. +# Copyright (c) 2013-2024 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -2121,6 +2121,10 @@ DEFAULT_DNS_SERVICE_DOMAIN = 'cluster.local' ANSIBLE_BOOTSTRAP_FLAG = os.path.join(tsc.VOLATILE_PATH, ".ansible_bootstrap") ANSIBLE_BOOTSTRAP_COMPLETED_FLAG = os.path.join(tsc.PLATFORM_CONF_PATH, ".bootstrap_completed") +# just used for upgrade purposes +OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG = os.path.join(tsc.CONFIG_PATH, + ".bootstrap_completed") + UNLOCK_READY_FLAG = os.path.join(tsc.PLATFORM_CONF_PATH, ".unlock_ready") INVENTORY_WAIT_TIMEOUT_IN_SECS = 120 DEFAULT_RPCAPI_TIMEOUT_IN_SECS = 60 diff --git a/sysinv/sysinv/sysinv/sysinv/common/utils.py b/sysinv/sysinv/sysinv/sysinv/common/utils.py index 0880d25c74..b1d2d68845 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/common/utils.py @@ -2406,13 +2406,18 @@ def is_inventory_config_complete(dbapi, forihostid): return False -def is_fqdn_ready_to_use(): +def is_fqdn_ready_to_use(ignore_upgrade=False): """ Return true if FQDN can be used instead of IP ADDRESS The use of FQDN is limited to management network after the bootstrap. + During an duplex/standard upgrade the FQDN can't be used + since the old release doesn't support it. """ - if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG)): + if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) and + (os.path.isfile(tsc.PLATFORM_SIMPLEX_FLAG) or + (not os.path.isfile(tsc.UPGRADE_DO_NOT_USE_FQDN) or + ignore_upgrade))): return True return False diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index bc39609e98..8b7ebf6650 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -1324,6 +1324,11 @@ class ConductorManager(service.PeriodicService): hostname = re.sub("-%s$" % constants.NETWORK_TYPE_MGMT, '', str(address.name)) + # during an upgrade the DB can have the unused + # controller-platform-nfs entry that must be ignored + if (hostname == 'controller-platform-nfs'): + continue + if (hostname != constants.SYSTEM_CONTROLLER_GATEWAY_IP_NAME): controller_alias = [constants.CONTROLLER_HOSTNAME, constants.DOCKER_REGISTRY_HOST, @@ -6477,7 +6482,7 @@ class ConductorManager(service.PeriodicService): :param context: an admin context :param ihost_macs: list of mac addresses - :returns: ihost object, including all fields and mgmt address. + :returns: ihost object, including all fields. """ ihosts = self.dbapi.ihost_get_list() @@ -6494,11 +6499,7 @@ class ConductorManager(service.PeriodicService): for host in ihosts: if host.mgmt_mac == mac: LOG.info("Host found ihost db for macs: %s" % host.hostname) - mgmt_addr = None - mgmt_addr = self.get_address_by_host_networktype( - context, host.hostname, - constants.NETWORK_TYPE_MGMT) - return host, mgmt_addr + return host LOG.debug("RPC get_ihost_by_macs called but found no ihost.") def get_ihost_by_hostname(self, context, ihost_hostname): @@ -14281,6 +14282,18 @@ class ConductorManager(service.PeriodicService): LOG.info("Deleting Sysinv Hybrid state") rpcapi.delete_sysinv_hybrid_state(context, controller_1['uuid']) + # TODO(fcorream): This is just needed for upgrade from R7 to R8 + # need to remove the flag that disables the use of FQDN during the + # upgrade + if (tsc.system_mode != constants.SYSTEM_MODE_SIMPLEX): + personalities = [constants.CONTROLLER] + config_uuid = self._config_update_hosts(context, personalities) + config_dict = { + "personalities": personalities, + "classes": ['platform::network::upgrade_fqdn_cleanup::runtime'], + } + self._config_apply_runtime_manifest(context, config_uuid, config_dict) + # Clear upgrades alarm entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST, constants.CONTROLLER_HOSTNAME) diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/ldap.py b/sysinv/sysinv/sysinv/sysinv/puppet/ldap.py index 5029c91105..03435768c1 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/ldap.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/ldap.py @@ -45,8 +45,14 @@ class LdapPuppet(base.BasePuppet): def _is_openldap_certificate_created(self): """ Returns True when it's safe to read the openldap certificate. """ + # TODO: Remove OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG + # just needed for upgrade to R9 ( 24.09 ) + is_upgrading = utils.is_upgrade_in_progress(self.dbapi)[0] + bootstrap_completed = \ - os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) + os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) or \ + (is_upgrading and + os.path.isfile(constants.OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG)) return bootstrap_completed diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/networking.py b/sysinv/sysinv/sysinv/sysinv/puppet/networking.py index 62b6c18dbd..30c7c2087f 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/networking.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/networking.py @@ -61,7 +61,7 @@ class NetworkingPuppet(base.BasePuppet): }) # create flag for the mate controller to use FQDN or not - if utils.is_fqdn_ready_to_use(): + if utils.is_fqdn_ready_to_use(True): fqdn_ready = True else: fqdn_ready = False diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/openstack.py b/sysinv/sysinv/sysinv/sysinv/puppet/openstack.py index 64e60ea95d..e8431b7f60 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/openstack.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/openstack.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2017-2020 Wind River Systems, Inc. +# Copyright (c) 2017-2024 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -108,7 +108,17 @@ class OpenstackBasePuppet(base.BasePuppet): # (by services' endpoint reconfiguration), the system commands # to add networks etc during ansible bootstrap will fail as # haproxy has not been configured yet. - if os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) and \ + + # TODO: Remove OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG + # just needed for upgrade to R9 + is_upgrading = cutils.is_upgrade_in_progress(self.dbapi)[0] + + bootstrap_completed = \ + os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) or \ + (is_upgrading and + os.path.isfile(constants.OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG)) + + if bootstrap_completed and \ (self._distributed_cloud_role() == constants.DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER or self._distributed_cloud_role() == diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/platform.py b/sysinv/sysinv/sysinv/sysinv/puppet/platform.py index e7dfb05b95..f159c5f469 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/platform.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/platform.py @@ -956,7 +956,16 @@ class PlatformPuppet(base.BasePuppet): def _get_dc_root_ca_config(self): config = {} system = self._get_system() - if os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG): + # TODO: Remove OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG + # just needed for upgrade to R9 + is_upgrading = utils.is_upgrade_in_progress(self.dbapi)[0] + + bootstrap_completed = \ + os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) or \ + (is_upgrading and + os.path.isfile(constants.OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG)) + + if bootstrap_completed: cert_data = utils.get_admin_ep_cert( system.distributed_cloud_role) diff --git a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py index 70a09caaa9..dad633a5f9 100644 --- a/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py +++ b/sysinv/sysinv/sysinv/sysinv/tests/conductor/test_manager.py @@ -2949,7 +2949,7 @@ class ManagerTestCase(base.DbTestCase): 'address': '192.168.204.3'} utils.create_test_address(**address) ihost_macs = ['22:44:33:55:11:66', '22:44:33:88:11:66'] - ihost, mgmt_addr = self.service.get_ihost_by_macs(self.context, ihost_macs) + ihost = self.service.get_ihost_by_macs(self.context, ihost_macs) self.assertEqual(ihost.mgmt_mac, '22:44:33:55:11:66') def test_get_ihost_by_macs_no_match(self): diff --git a/tsconfig/tsconfig/tsconfig/tsconfig.py b/tsconfig/tsconfig/tsconfig/tsconfig.py index fb5f36e289..000542ad32 100644 --- a/tsconfig/tsconfig/tsconfig/tsconfig.py +++ b/tsconfig/tsconfig/tsconfig/tsconfig.py @@ -1,5 +1,5 @@ """ -Copyright (c) 2014-2023 Wind River Systems, Inc. +Copyright (c) 2014-2024 Wind River Systems, Inc. SPDX-License-Identifier: Apache-2.0 @@ -223,6 +223,11 @@ MGMT_NETWORK_RECONFIGURATION_UNLOCK = os.path.join( MGMT_NETWORK_RECONFIG_UPDATE_HOST_FILES = os.path.join( PLATFORM_CONF_PATH, ".mgmt_reconfig_update_hosts_file") +# Set in the upgrade script to not use FQDN during SW upgrade +# must be deleted after upgrade complete /abort +UPGRADE_DO_NOT_USE_FQDN = os.path.join( + PLATFORM_CONF_PATH, ".upgrade_do_not_use_fqdn") + # Worker configuration flags # Set after initial application of node manifest