Merge "Upgrade changes to support MGMT FQDN"

This commit is contained in:
Zuul 2024-03-08 19:56:49 +00:00 committed by Gerrit Code Review
commit b9ab073997
13 changed files with 263 additions and 48 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2016-2022 Wind River Systems, Inc.
# Copyright (c) 2016-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -54,8 +54,8 @@ LOG = logging.getLogger(__name__)
POSTGRES_BIN = utils.get_postgres_bin()
POSTGRES_MOUNT_PATH = '/mnt/postgresql'
POSTGRES_DUMP_MOUNT_PATH = '/mnt/db_dump'
DB_CONNECTION_FORMAT = "connection=postgresql://%s:%s@127.0.0.1/%s\n"
DB_BARBICAN_CONNECTION_FORMAT = "postgresql://%s:%s@127.0.0.1/%s"
DB_CONNECTION_CONF_FORMAT = "connection=postgresql://%s:%s@127.0.0.1/%s\n"
DB_CONNECTION_EXEC_FORMAT = "postgresql://%s:%s@127.0.0.1/%s"
restore_patching_complete = '/etc/platform/.restore_patching_complete'
restore_compute_ready = '/var/run/.restore_compute_ready'
@ -159,14 +159,21 @@ def get_shared_services():
return shared_services
def get_connection_string(db_credentials, database):
""" Generates a connection string for a given database"""
def get_connection_string(db_credentials, database, exec_format=False):
""" Generates a connection string for a given database
exec_format
True: the connection string can be used in line command
( ex: barbican ) or in psycopg2.connect
False: the connection string is to be used in .conf files
"""
username = db_credentials[database]['username']
password = db_credentials[database]['password']
if database == 'barbican':
return DB_BARBICAN_CONNECTION_FORMAT % (username, password, database)
if exec_format:
return DB_CONNECTION_EXEC_FORMAT % (username, password, database)
else:
return DB_CONNECTION_FORMAT % (username, password, database)
# use format to be used in .conf files
return DB_CONNECTION_CONF_FORMAT % (username, password, database)
def create_temp_filesystem(vgname, lvname, mountpoint, size):
@ -660,7 +667,8 @@ def migrate_databases(from_release, shared_services, db_credentials,
# Migrate barbican
('barbican',
'barbican-manage db upgrade ' +
'--db-url %s' % get_connection_string(db_credentials, 'barbican')),
'--db-url %s' % get_connection_string(db_credentials, 'barbican',
True)),
]
# Migrate fm
@ -833,12 +841,9 @@ def apply_sriov_config(db_credentials, hostname):
# If controller-1 has any FEC devices or sriov vfs configured, apply the
# sriov runtime manifest. We can't apply it from controller-0 during the
# host-unlock process as controller-1 is running the new release.
database = 'sysinv'
username = db_credentials[database]['username']
password = db_credentials[database]['password']
# psycopg2 can connect with the barbican string eg postgresql:// ...
connection_string = DB_BARBICAN_CONNECTION_FORMAT % (
username, password, database)
connection_string = get_connection_string(db_credentials, 'sysinv', True)
conn = psycopg2.connect(connection_string)
cur = conn.cursor()
cur.execute(
@ -874,6 +879,32 @@ def apply_sriov_config(db_credentials, hostname):
os.remove(tmpfile)
def get_db_host_mgmt_ip(db_credentials, hostname):
""" Get the Hostname management IP from DB"""
# the postgres server was stopped during the upgrade_controller
# need to use db_credentials to acess the DB
connection_string = get_connection_string(db_credentials, 'sysinv', True)
conn = psycopg2.connect(connection_string)
db_hostname = hostname + "-mgmt"
try:
cur = conn.cursor()
cur.execute("SELECT address FROM addresses WHERE name='{}';".format(
db_hostname))
row = cur.fetchone()
if row is None:
msg = "MGMT IP not found for: '{}'".format(db_hostname)
raise Exception(msg)
return row[0]
except Exception as ex:
LOG.error("Failed to get MGMT IP for: '%s'" % db_hostname)
raise ex
def upgrade_controller(from_release, to_release):
""" Executed on the release N+1 side upgrade controller-1. """
@ -1047,6 +1078,33 @@ def upgrade_controller(from_release, to_release):
LOG.info("Failed to update hiera configuration")
raise
# this is just necessary for 22.12
# since the old releases uses the hieradata/<mgmt_ip>.yaml
# and the new one uses hieradata/<hostname>.yaml
# during the AIO-DX upgrade, the controller-0 runs the old
# release to upgrade the controller-1
# the controller-0 want to still use hieradata/<mgmt_ip>.yaml
# but the controller-1 want to use hieradata/<hostname>.yaml
# so rename the <hostname>.yaml to <mgmt_ip>.yaml
# and creates a symlink: <hostname>.yaml -> <mgmt_ip>.yaml
try:
ctrl1_mgmt_ip = get_db_host_mgmt_ip(db_credentials,
utils.CONTROLLER_1_HOSTNAME)
except Exception as e:
LOG.exception(e)
LOG.info("Failed to get MGMT IP for controller-1 during upgrade")
raise
ctrl1_hostname_hieradata = constants.HIERADATA_PERMDIR + "/" \
+ utils.CONTROLLER_1_HOSTNAME + ".yaml"
ctrl1_ipaddr_hieradata_file = ctrl1_mgmt_ip + ".yaml"
ctrl1_ipaddr_hieradata = constants.HIERADATA_PERMDIR + "/" \
+ ctrl1_ipaddr_hieradata_file
os.rename(ctrl1_hostname_hieradata, ctrl1_ipaddr_hieradata)
os.symlink(ctrl1_ipaddr_hieradata_file, ctrl1_hostname_hieradata)
apply_sriov_config(db_credentials, utils.CONTROLLER_1_HOSTNAME)
# Remove /etc/kubernetes/admin.conf after it is used to generate

View File

@ -709,10 +709,21 @@ start()
# Apply the puppet manifest
HIERADATA_PATH=${PUPPET_CACHE}/hieradata
HOST_HIERA=${HIERADATA_PATH}/${HOST}.yaml
IP_HIERA=${HIERADATA_PATH}/${IPADDR}.yaml
PUPPET_YML=${HOST}
# if IP_HIERA exists, it means an upgrade
# must use the <mgmt_ip>.yaml because the active controller
# running the previous version doesn't update the hostname.yaml
if [ -e $ETC_PLATFORM_DIR/.upgrade_do_not_use_fqdn ] && \
[ -f ${IP_HIERA} ]; then
HOST_HIERA=${IP_HIERA}
PUPPET_YML=${IPADDR}
fi
if [ -f ${HOST_HIERA} ]; then
echo "$0: Running puppet manifest apply"
puppet-manifest-apply.sh ${HIERADATA_PATH} ${HOST} ${subfunction}
echo "$0: Running puppet manifest apply for: ${PUPPET_YML}"
puppet-manifest-apply.sh ${HIERADATA_PATH} ${PUPPET_YML} ${subfunction}
RC=$?
if [ $RC -ne 0 ]
then

View File

@ -0,0 +1,97 @@
#!/usr/bin/env python
# Copyright (c) 2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This migration script is used to disable the FQDN during an upgrade
# this flag must be deleted during thr upgrade complete/abort
# during the upgrade the controller-0 runs version X
# and controller-1 runs version X+1
# to use the FQDN the active controller must run dnsmasq
# with the FQDN entries. It doesn't happen during an upgrade.
#
# during migrate stage of platform upgrade. It will:
# - create a flag that will be used by sysinv and puppet code to not
# use FQDN entries
import sys
import subprocess
import os.path
import socket
from controllerconfig.common import log
PLATFORM_CONF_PATH = '/etc/platform'
PLATFORM_SIMPLEX_FLAG = '/etc/platform/simplex'
UPGRADE_DO_NOT_USE_FQDN = PLATFORM_CONF_PATH + \
'/.upgrade_do_not_use_fqdn'
LOG = log.get_logger(__name__)
def remove_unused_files_from_hieradata(to_release):
# after the upgrade it is necessary to remove the old hieradata
# <ctrl1>.yaml from /opt/platform/puppet/<TO_RELEASE>/hieradata
# the reason is: it was replaced by <hostname>.yaml
# i.e: controller-1.yaml
ctrl1_mgmt_ip = socket.getaddrinfo("controller-1", None)[0][4][0]
ctrl1_old_hiera = "/opt/platform/puppet/{}/hieradata/{}.yaml".format(
to_release, ctrl1_mgmt_ip)
command = "rm -f {}".format(ctrl1_old_hiera)
sub = subprocess.Popen(command, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = sub.communicate()
if sub.returncode != 0:
LOG.error('Cmd Failed:\n%s\n.%s\n%s' %
(command, stdout, stderr))
raise Exception('Error removing unused file: {} '.format(
ctrl1_old_hiera))
def main():
action = None
from_release = None
to_release = None
arg = 1
res = 0
log.configure()
while arg < len(sys.argv):
if arg == 1:
from_release = sys.argv[arg]
elif arg == 2:
to_release = sys.argv[arg]
elif arg == 3:
action = sys.argv[arg]
else:
LOG.error("Invalid option %s." % sys.argv[arg])
return 1
arg += 1
LOG.info("%s invoked with from_release = %s to_release = %s action = %s"
% (sys.argv[0], from_release, to_release, action))
# create a flag to not use FQDN during a SW upgrade
# this flag must be deleted during thr upgrade complete/abort
# during the activate, remove the unused file in hieradata
if not os.path.exists(PLATFORM_SIMPLEX_FLAG):
if action in ['start', 'migrate'] and \
from_release in ['21.12', '22.12']:
open(UPGRADE_DO_NOT_USE_FQDN, 'w').close()
elif action in ['activate'] and to_release in ['24.09']:
remove_unused_files_from_hieradata(to_release)
return res
if __name__ == "__main__":
sys.exit(main())

View File

@ -850,21 +850,7 @@ class AgentManager(service.PeriodicService):
while (timeutils.utcnow() - wait_time).total_seconds() < MAXSLEEP:
# wait for controller to come up first may be a DOR
try:
ihost, mgmt_addr = rpcapi.get_ihost_by_macs(icontext, host_macs)
except ValueError:
# Retry because the N-1 host does not support the mgmt_addr
# parameter during upgrade
try:
ihost = rpcapi.get_ihost_by_macs(icontext, host_macs)
except Timeout:
if not rpc_timeout:
rpc_timeout = True
LOG.info("get_ihost_by_macs rpc Timeout.")
time.sleep(5) # avoid calling timedout RPC in sequence
continue
except Exception:
LOG.warn("Conductor RPC get_ihost_by_macs exception "
"response")
ihost = rpcapi.get_ihost_by_macs(icontext, host_macs)
except Timeout:
if not rpc_timeout:
rpc_timeout = True
@ -903,6 +889,17 @@ class AgentManager(service.PeriodicService):
LOG.info("get_address_by_host_networktype rpc Timeout.")
time.sleep(5) # avoid calling timedout RPC in sequence
continue
except RemoteError:
try:
# active controller is running an old release
# without get_address_by_host_networktype RPC
mgmt_addr = ihost['mgmt_ip']
LOG.info("get_address_by_host_networktype rpc RemoteError."
"using mgmt_ip from ihost: {}".format(mgmt_addr))
except Exception:
LOG.warn("ihost_inv_get_and_report: ihost does not have "
"mgmt_ip")
except Exception as ex:
LOG.warn("Conductor RPC get_address_by_host_networktype "
"exception response %s" % ex)

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2013-2023 Wind River Systems, Inc.
# Copyright (c) 2013-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -2122,6 +2122,10 @@ DEFAULT_DNS_SERVICE_DOMAIN = 'cluster.local'
ANSIBLE_BOOTSTRAP_FLAG = os.path.join(tsc.VOLATILE_PATH, ".ansible_bootstrap")
ANSIBLE_BOOTSTRAP_COMPLETED_FLAG = os.path.join(tsc.PLATFORM_CONF_PATH,
".bootstrap_completed")
# just used for upgrade purposes
OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG = os.path.join(tsc.CONFIG_PATH,
".bootstrap_completed")
UNLOCK_READY_FLAG = os.path.join(tsc.PLATFORM_CONF_PATH, ".unlock_ready")
INVENTORY_WAIT_TIMEOUT_IN_SECS = 120
DEFAULT_RPCAPI_TIMEOUT_IN_SECS = 60

View File

@ -2407,13 +2407,18 @@ def is_inventory_config_complete(dbapi, forihostid):
return False
def is_fqdn_ready_to_use():
def is_fqdn_ready_to_use(ignore_upgrade=False):
"""
Return true if FQDN can be used instead of IP ADDRESS
The use of FQDN is limited to management network
after the bootstrap.
During an duplex/standard upgrade the FQDN can't be used
since the old release doesn't support it.
"""
if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG)):
if (os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) and
(os.path.isfile(tsc.PLATFORM_SIMPLEX_FLAG) or
(not os.path.isfile(tsc.UPGRADE_DO_NOT_USE_FQDN) or
ignore_upgrade))):
return True
return False

View File

@ -1406,6 +1406,11 @@ class ConductorManager(service.PeriodicService):
hostname = re.sub("-%s$" % constants.NETWORK_TYPE_MGMT,
'', str(address.name))
# during an upgrade the DB can have the unused
# controller-platform-nfs entry that must be ignored
if (hostname == 'controller-platform-nfs'):
continue
if (hostname != constants.SYSTEM_CONTROLLER_GATEWAY_IP_NAME):
controller_alias = [constants.CONTROLLER_HOSTNAME,
constants.DOCKER_REGISTRY_HOST,
@ -6574,7 +6579,7 @@ class ConductorManager(service.PeriodicService):
:param context: an admin context
:param ihost_macs: list of mac addresses
:returns: ihost object, including all fields and mgmt address.
:returns: ihost object, including all fields.
"""
ihosts = self.dbapi.ihost_get_list()
@ -6591,11 +6596,7 @@ class ConductorManager(service.PeriodicService):
for host in ihosts:
if host.mgmt_mac == mac:
LOG.info("Host found ihost db for macs: %s" % host.hostname)
mgmt_addr = None
mgmt_addr = self.get_address_by_host_networktype(
context, host.hostname,
constants.NETWORK_TYPE_MGMT)
return host, mgmt_addr
return host
LOG.debug("RPC get_ihost_by_macs called but found no ihost.")
def get_ihost_by_hostname(self, context, ihost_hostname):
@ -14381,6 +14382,18 @@ class ConductorManager(service.PeriodicService):
LOG.info("Deleting Sysinv Hybrid state")
rpcapi.delete_sysinv_hybrid_state(context, controller_1['uuid'])
# TODO(fcorream): This is just needed for upgrade from R7 to R8
# need to remove the flag that disables the use of FQDN during the
# upgrade
if (tsc.system_mode != constants.SYSTEM_MODE_SIMPLEX):
personalities = [constants.CONTROLLER]
config_uuid = self._config_update_hosts(context, personalities)
config_dict = {
"personalities": personalities,
"classes": ['platform::network::upgrade_fqdn_cleanup::runtime'],
}
self._config_apply_runtime_manifest(context, config_uuid, config_dict)
# Clear upgrades alarm
entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
constants.CONTROLLER_HOSTNAME)

View File

@ -45,8 +45,14 @@ class LdapPuppet(base.BasePuppet):
def _is_openldap_certificate_created(self):
""" Returns True when it's safe to read the openldap certificate.
"""
# TODO<fcorream>: Remove OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG
# just needed for upgrade to R9 ( 24.09 )
is_upgrading = utils.is_upgrade_in_progress(self.dbapi)[0]
bootstrap_completed = \
os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG)
os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) or \
(is_upgrading and
os.path.isfile(constants.OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG))
return bootstrap_completed

View File

@ -61,7 +61,7 @@ class NetworkingPuppet(base.BasePuppet):
})
# create flag for the mate controller to use FQDN or not
if utils.is_fqdn_ready_to_use():
if utils.is_fqdn_ready_to_use(True):
fqdn_ready = True
else:
fqdn_ready = False

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2024 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -108,7 +108,17 @@ class OpenstackBasePuppet(base.BasePuppet):
# (by services' endpoint reconfiguration), the system commands
# to add networks etc during ansible bootstrap will fail as
# haproxy has not been configured yet.
if os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) and \
# TODO<fcorream>: Remove OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG
# just needed for upgrade to R9
is_upgrading = cutils.is_upgrade_in_progress(self.dbapi)[0]
bootstrap_completed = \
os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) or \
(is_upgrading and
os.path.isfile(constants.OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG))
if bootstrap_completed and \
(self._distributed_cloud_role() ==
constants.DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER or
self._distributed_cloud_role() ==

View File

@ -956,7 +956,16 @@ class PlatformPuppet(base.BasePuppet):
def _get_dc_root_ca_config(self):
config = {}
system = self._get_system()
if os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG):
# TODO<fcorream>: Remove OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG
# just needed for upgrade to R9
is_upgrading = utils.is_upgrade_in_progress(self.dbapi)[0]
bootstrap_completed = \
os.path.isfile(constants.ANSIBLE_BOOTSTRAP_COMPLETED_FLAG) or \
(is_upgrading and
os.path.isfile(constants.OLD_ANSIBLE_BOOTSTRAP_COMPLETED_FLAG))
if bootstrap_completed:
cert_data = utils.get_admin_ep_cert(
system.distributed_cloud_role)

View File

@ -3050,7 +3050,7 @@ class ManagerTestCase(base.DbTestCase):
'address': '192.168.204.3'}
utils.create_test_address(**address)
ihost_macs = ['22:44:33:55:11:66', '22:44:33:88:11:66']
ihost, mgmt_addr = self.service.get_ihost_by_macs(self.context, ihost_macs)
ihost = self.service.get_ihost_by_macs(self.context, ihost_macs)
self.assertEqual(ihost.mgmt_mac, '22:44:33:55:11:66')
def test_get_ihost_by_macs_no_match(self):

View File

@ -1,5 +1,5 @@
"""
Copyright (c) 2014-2023 Wind River Systems, Inc.
Copyright (c) 2014-2024 Wind River Systems, Inc.
SPDX-License-Identifier: Apache-2.0
@ -223,6 +223,11 @@ MGMT_NETWORK_RECONFIGURATION_UNLOCK = os.path.join(
MGMT_NETWORK_RECONFIG_UPDATE_HOST_FILES = os.path.join(
PLATFORM_CONF_PATH, ".mgmt_reconfig_update_hosts_file")
# Set in the upgrade script to not use FQDN during SW upgrade
# must be deleted after upgrade complete /abort
UPGRADE_DO_NOT_USE_FQDN = os.path.join(
PLATFORM_CONF_PATH, ".upgrade_do_not_use_fqdn")
# Worker configuration flags
# Set after initial application of node manifest