#!/bin/bash # # Copyright (c) 2013-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # # # chkconfig: 2345 80 80 # ### BEGIN INIT INFO # Provides: controller_config # Short-Description: Controller node config agent # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Required-Start: # Required-Stop: ### END INIT INFO . /usr/bin/tsconfig . /etc/platform/platform.conf PLATFORM_DIR=/opt/platform ETC_PLATFORM_DIR=/etc/platform VAULT_DIR=$PLATFORM_DIR/.keyring/${SW_VERSION}/python_keyring CONFIG_DIR=$CONFIG_PATH VOLATILE_CONFIG_PASS="/var/run/.config_pass" VOLATILE_CONFIG_FAIL="/var/run/.config_fail" COMPLETED="/etc/platform/.initial_config_complete" INITIAL_MANIFEST_APPLY_FAILED="/etc/platform/.initial_manifest_apply_failed" DELAY_SEC=70 CONTROLLER_UPGRADE_STARTED_FILE="$(basename ${CONTROLLER_UPGRADE_STARTED_FLAG})" IMA_POLICY=/etc/ima.policy PUPPET_CACHE=/etc/puppet/cache PUPPET_CACHE_TMP=/etc/puppet/cache.tmp ACTIVE_CONTROLLER_NOT_FOUND_FLAG="/var/run/.active_controller_not_found" CERT_DIR=/etc/pki/ca-trust/source/anchors OS_ID=$(grep '^ID=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g') if [ "$OS_ID" == "debian" ] then UPDATE_CA_CMD="update-ca-certificates --localcertsdir ${CERT_DIR}" else UPDATE_CA_CMD="update-ca-trust extract" fi if [ ! -e "${CERT_DIR}" ] then mkdir -p ${CERT_DIR} fi fatal_error() { cat </dev/null drbdadm primary drbd-platform if [ $? -ne 0 ] then drbdadm down drbd-platform systemctl stop drbd.service fatal_error "Failed to make drbd-platform primary" fi mount $PLATFORM_DIR if [ $? -ne 0 ] then # Try mount without fstab echo "Retrying mount platform without fstab" mount /dev/drbd2 $PLATFORM_DIR if [ $? -ne 0 ] then # Add sleep to avoid "Device is held open by someone" error sleep 1 drbdadm secondary drbd-platform drbdadm down drbd-platform systemctl stop drbd.service fatal_error "Unable to mount $PLATFORM_DIR" fi fi else mkdir -p $PLATFORM_DIR nfs-mount controller-platform-nfs:$PLATFORM_DIR $PLATFORM_DIR if [ $? -ne 0 ] then fatal_error "Unable to mount $PLATFORM_DIR" fi fi } umount_platform_dir() { if [ -e "${ACTIVE_CONTROLLER_NOT_FOUND_FLAG}" ] then return fi if [ -e "${PLATFORM_SIMPLEX_FLAG}" ] then umount $PLATFORM_DIR drbdadm secondary drbd-platform drbdadm down drbd-platform systemctl stop drbd.service else umount $PLATFORM_DIR fi } start() { if [ -f /etc/platform/installation_failed ] ; then fatal_error "/etc/platform/installation_failed flag is set. Aborting." fi ###### SECURITY PROFILE (EXTENDED) ################# # If we are in Extended Security Profile mode, # # then before anything else, we need to load the # # IMA Policy so that all configuration operations # # can be measured and appraised # ##################################################### if [ "${security_profile}" = "extended" ] then IMA_LOAD_PATH=/sys/kernel/security/ima/policy if [ -f ${IMA_LOAD_PATH} ]; then echo "Loading IMA Policy" # Best effort operation only, if policy is # malformed then audit logs will indicate this, # and customer will need to load policy manually cat $IMA_POLICY > ${IMA_LOAD_PATH} [ $? -eq 0 ] || logger -t $0 -p warn "IMA Policy could not be loaded, see audit.log" else # the securityfs mount should have been # created had the IMA module loaded properly. # This is therefore a fatal error fatal_error "${IMA_LOAD_PATH} not available. Aborting." fi fi # If hostname is undefined or localhost, something is wrong HOST=$(hostname) if [ -z "$HOST" -o "$HOST" = "localhost" ] then fatal_error "Host undefined. Unable to perform config" fi if [ $HOST != "controller-0" -a $HOST != "controller-1" ] then fatal_error "Invalid hostname for controller node: $HOST" fi IPADDR=$(get_ip $HOST) if [ -z "$IPADDR" ] then fatal_error "Unable to get IP from host: $HOST" fi if [ -f ${INITIAL_MANIFEST_APPLY_FAILED} ] then fatal_error "Initial manifest application failed; Host must be re-installed." fi echo "Configuring controller node... ( IP: ${IPADDR} )" # Remove the flag if it exists rm -f ${ACTIVE_CONTROLLER_NOT_FOUND_FLAG} if [ ! -e "${PLATFORM_SIMPLEX_FLAG}" ] then # try for DELAY_SEC seconds to reach controller-platform-nfs /usr/local/bin/connectivity_test -t ${DELAY_SEC} -i ${IPADDR} controller-platform-nfs if [ $? -ne 0 ] then # 'controller-platform-nfs' is not available, try to config using # locally cached hieradata. echo "No active controller found, will try to config using local cached hieradata." touch ${ACTIVE_CONTROLLER_NOT_FOUND_FLAG} fi if [ ! -e "${ACTIVE_CONTROLLER_NOT_FOUND_FLAG}" ] then # Check whether our installed load matches the active controller CONTROLLER_UUID=`curl -sf http://controller:${http_port}/feed/rel-${SW_VERSION}/install_uuid` if [ $? -ne 0 ] then fatal_error "Unable to retrieve installation uuid from active controller" fi INSTALL_UUID=`cat /var/www/pages/feed/rel-${SW_VERSION}/install_uuid` if [ "$INSTALL_UUID" != "$CONTROLLER_UUID" ] then fatal_error "This node is running a different load than the active controller and must be reinstalled" fi fi fi mount_platform_dir # Cleanup from any previous config runs if [ -e $VOLATILE_CONFIG_FAIL ] then rm -f $VOLATILE_CONFIG_FAIL fi if [ -e $VOLATILE_CONFIG_PASS ] then rm -f $VOLATILE_CONFIG_PASS fi if [ -e $CONFIG_DIR/.license ] then cp $CONFIG_DIR/.license /etc/platform/.license if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/.license" fi fi if [ -e $CONFIG_DIR/server-cert.pem ] then cp $CONFIG_DIR/server-cert.pem /etc/ssl/private/server-cert.pem if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/server-cert.pem" fi fi if [ -e $CONFIG_DIR/registry-cert-pkcs1.key ] then cp $CONFIG_DIR/registry-cert-pkcs1.key /etc/ssl/private/registry-cert-pkcs1.key if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/registry-cert-pkcs1.key" fi fi if [ -e $CONFIG_DIR/etcd/etcd-server.crt ] then cp $CONFIG_DIR/etcd/etcd-server.crt /etc/etcd/etcd-server.crt if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/etcd/etcd-server.crt" fi fi if [ -e $CONFIG_DIR/etcd/etcd-server.key ] then cp $CONFIG_DIR/etcd/etcd-server.key /etc/etcd/etcd-server.key && chmod 600 /etc/etcd/etcd-server.key if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/etcd/etcd-server.key" fi fi if [ -e $CONFIG_DIR/etcd/etcd-client.crt ] then cp $CONFIG_DIR/etcd/etcd-client.crt /etc/etcd/etcd-client.crt if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/etcd/etcd-client.crt" fi fi if [ -e $CONFIG_DIR/etcd/etcd-client.key ] then cp $CONFIG_DIR/etcd/etcd-client.key /etc/etcd/etcd-client.key && chmod 600 /etc/etcd/etcd-client.key if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/etcd/etcd-client.key" fi fi if [ -e $CONFIG_DIR/etcd/ca.crt ] then cp $CONFIG_DIR/etcd/ca.crt /etc/etcd/ca.crt if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/etcd/ca.crt" fi fi if [ -e $CONFIG_DIR/etcd/ca.key ] then cp $CONFIG_DIR/etcd/ca.key /etc/etcd/ca.key && chmod 600 /etc/etcd/ca.key if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/etcd/ca.key" fi fi if [ -e $CONFIG_DIR/registry-cert.key ] then cp $CONFIG_DIR/registry-cert.key /etc/ssl/private/registry-cert.key && chmod 600 /etc/ssl/private/registry-cert.key if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/registry-cert.key" fi fi if [ -e $CONFIG_DIR/registry-cert.crt ] then cp $CONFIG_DIR/registry-cert.crt /etc/ssl/private/registry-cert.crt if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to certificates dir" fi mkdir -p /etc/docker/certs.d/registry.local:9001/ chmod 700 /etc/docker/certs.d/registry.local:9001/ cp $CONFIG_DIR/registry-cert.crt /etc/docker/certs.d/registry.local:9001/registry-cert.crt if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to docker dir" fi fi if [ -e $CONFIG_DIR/registry.central/registry-cert.crt ] then mkdir -p /etc/docker/certs.d/registry.central:9001/ chmod 700 /etc/docker/certs.d/registry.central:9001/ cp $CONFIG_DIR/registry.central/registry-cert.crt /etc/docker/certs.d/registry.central:9001/registry-cert.crt if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/registry-cert.crt to docker dir for central registry" fi fi if [ -e $CONFIG_DIR/dc-adminep-root-ca.crt ] then cp $CONFIG_DIR/dc-adminep-root-ca.crt $CERT_DIR if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/dc-adminep-root-ca.crt to certificates dir" fi # Update system trusted CA cert list with the new CA cert. $UPDATE_CA_CMD if [ $? -ne 0 ] then fatal_error "Unable to update system trusted CA certificate list" fi fi if [ -e $CONFIG_DIR/openstack ] then if [ ! -e /etc/ssl/private/openstack ] then mkdir -p /etc/ssl/private/openstack chmod 755 /etc/ssl/private/openstack fi cp -p $CONFIG_DIR/openstack/* /etc/ssl/private/openstack if [ $? -ne 0 ] then fatal_error "Unable to copy openstack certificate files" fi fi # Copy over external_ceph config files if [ -e $CONFIG_DIR/ceph-config ] then cp $CONFIG_DIR/ceph-config/*.conf /etc/ceph/ if [ $? -ne 0 ] then fatal_error "Unable to copy ceph-external config files" fi fi # Copy over kube-apiserver encryption provider config, admission control config and # extra files referenced in admission control config if [ -e $CONFIG_DIR/kubernetes/ ] then FILES=`ls $CONFIG_DIR/kubernetes/*.yaml` for FILE in $FILES do cp $FILE /etc/kubernetes/ if [ $? -ne 0 ] then fatal_error "Unable to copy kube-apiserver config file: $FILE" else FILENAME=`basename $FILE` chmod 600 /etc/kubernetes/$FILENAME fi done fi # Copy over default audit policy config if [ -e $CONFIG_DIR/kubernetes/default-audit-policy.yaml ] then cp $CONFIG_DIR/kubernetes/default-audit-policy.yaml /etc/kubernetes/ if [ $? -ne 0 ] then fatal_error "Unable to copy default audit policy config file" else chmod 600 /etc/kubernetes/default-audit-policy.yaml fi fi # Copy over k8s-coredump-handler token if [ -e $CONFIG_DIR/k8s-coredump-conf.json ] then cp $CONFIG_DIR/k8s-coredump-conf.json /etc/k8s-coredump-conf.json if [ $? -ne 0 ] then fatal_error "Unable to copy k8s-coredump-handler token config file" else chmod 600 /etc/k8s-coredump-conf.json fi fi if [ ! -e "${ACTIVE_CONTROLLER_NOT_FOUND_FLAG}" ] then # Keep the /opt/branding directory to preserve any new files rm -rf /opt/branding/*.tgz cp $CONFIG_DIR/branding/*.tgz /opt/branding 2>/dev/null # banner customization always returns 0, success: /usr/sbin/apply_banner_customization cp $CONFIG_DIR/hosts /etc/hosts if [ $? -ne 0 ] then fatal_error "Unable to copy $CONFIG_DIR/hosts" fi fi # Replace the dnsmasq files with new Management Network range if [ -e $ETC_PLATFORM_DIR/.mgmt_network_reconfiguration_unlock ]; then echo "Management networking reconfiguration ongoing, replacing dnsmasq config files." if [ -e $CONFIG_DIR/dnsmasq.addn_hosts.temp ] && \ [ -e $CONFIG_DIR/dnsmasq.hosts.temp ]; then mv -f $CONFIG_DIR/dnsmasq.hosts.temp $CONFIG_DIR/dnsmasq.hosts mv -f $CONFIG_DIR/dnsmasq.addn_hosts.temp $CONFIG_DIR/dnsmasq.addn_hosts # update the cached files too mv -f $ETC_PLATFORM_DIR/dnsmasq.hosts.temp $ETC_PLATFORM_DIR/dnsmasq.hosts mv -f $ETC_PLATFORM_DIR/dnsmasq.addn_hosts.temp $ETC_PLATFORM_DIR/dnsmasq.addn_hosts else fatal_error "Management networking reconfiguration ongoing and dnsmasq files do not exist." fi # Create a flag to update files in /opt/platform/ ( i.e: hosts file ) touch $ETC_PLATFORM_DIR/.mgmt_reconfig_update_hosts_file # delete flags rm -f $ETC_PLATFORM_DIR/.mgmt_network_reconfiguration_ongoing rm -f $ETC_PLATFORM_DIR/.mgmt_network_reconfiguration_unlock # Create a flag to indicate that ceph mon reconfiguration is required if ceph is configured if [ -e $ETC_PLATFORM_DIR/.node_ceph_configured ] then touch $ETC_PLATFORM_DIR/.ceph_mon_reconfig_required fi fi hostname > /etc/hostname if [ $? -ne 0 ] then fatal_error "Unable to write /etc/hostname" fi # Our PXE config files are located in the config directory. Create a # symbolic link if it is not already created. if [ ! -L /var/pxeboot/pxelinux.cfg ] then ln -sf $CONFIG_DIR/pxelinux.cfg /var/pxeboot/pxelinux.cfg fi # Upgrade related checks if [ ! -e "${PLATFORM_SIMPLEX_FLAG}" ] && [ ! -e "${ACTIVE_CONTROLLER_NOT_FOUND_FLAG}" ] then VOLATILE_ETC_PLATFORM_MOUNT=$VOLATILE_PATH/etc_platform mkdir $VOLATILE_ETC_PLATFORM_MOUNT nfs-mount controller-platform-nfs:/etc/platform $VOLATILE_ETC_PLATFORM_MOUNT if [ $? -eq 0 ] then # Generate Rollback flag if necessary if [ -f $VOLATILE_ETC_PLATFORM_MOUNT/.upgrade_rollback ] then touch $UPGRADE_ROLLBACK_FLAG fi # Check whether we are upgrading controller-1. UPGRADE_CONTROLLER=0 if [ -f $VOLATILE_ETC_PLATFORM_MOUNT/.upgrade_controller_1 ] then if [ -f $VOLATILE_ETC_PLATFORM_MOUNT/.upgrade_controller_1_fail ] then exit_error "Controller-1 upgrade previously failed. Upgrade must be aborted." fi if [ -f $VOLATILE_ETC_PLATFORM_MOUNT/$CONTROLLER_UPGRADE_STARTED_FILE ] then touch $VOLATILE_ETC_PLATFORM_MOUNT/.upgrade_controller_1_fail exit_error "Controller-1 data migration already in progress. Upgrade must be aborted" fi touch $VOLATILE_ETC_PLATFORM_MOUNT/$CONTROLLER_UPGRADE_STARTED_FILE UPGRADE_CONTROLLER=1 fi # Check whether software versions match on the two controllers MATE_SW_VERSION=`grep sw_version $VOLATILE_ETC_PLATFORM_MOUNT/platform.conf | awk -F\= '{print $2}'` if [ $SW_VERSION != $MATE_SW_VERSION ] then echo "Controllers are running different software versions" echo "SW_VERSION: $SW_VERSION MATE_SW_VERSION: $MATE_SW_VERSION" # This environment variable allows puppet manifests to behave # differently when the controller software versions do not match. export CONTROLLER_SW_VERSIONS_MISMATCH=true fi umount $VOLATILE_ETC_PLATFORM_MOUNT rmdir $VOLATILE_ETC_PLATFORM_MOUNT if [ $UPGRADE_CONTROLLER -eq 1 ] then #R3 Removed umount_platform_dir echo "Upgrading controller-1. This will take some time..." /usr/bin/upgrade_controller $MATE_SW_VERSION $SW_VERSION exit $? fi else umount_platform_dir rmdir $VOLATILE_ETC_PLATFORM_MOUNT fatal_error "Unable to mount /etc/platform" fi fi if [ ! -e "${ACTIVE_CONTROLLER_NOT_FOUND_FLAG}" ] then mkdir -p /etc/postgresql/ OS_ID=$(grep '^ID=' /etc/os-release | cut -f2- -d= | sed -e 's/\"//g') if [ ${OS_ID} == "centos" ]; then REAL_CONFIG_DIR=/etc/postgresql elif [ ${OS_ID} == "debian" ]; then REAL_CONFIG_DIR=/etc/postgresql/13/main for f in ${CONFIG_DIR}/postgresql/*.conf ; do link_basename=$(basename ${f}) link=/etc/postgresql/${link_basename} echo "Removing ${link} to allow link to be created" rm ${link} echo "Linking ${link} to ${REAL_CONFIG_DIR}/${link_basename}" ln -s ${REAL_CONFIG_DIR}/${link_basename} ${link} done fi cp -p ${CONFIG_DIR}/postgresql/*.conf ${REAL_CONFIG_DIR}/ if [ $? -ne 0 ] then fatal_error "Unable to copy .conf files to /etc/postgresql" fi # rsync the hieradata to temp cache directory rm -rf ${PUPPET_CACHE_TMP} rsync -a "${PUPPET_PATH}/hieradata" "${PUPPET_CACHE_TMP}" if [ $? -ne 0 ] then umount_platform_dir fatal_error "Failed to rsync puppet hieradata from ${PUPPET_PATH} to temp cache directory ${PUPPET_CACHE_TMP}" fi # flush data to persistent storage and rename the temp puppet cache # directory to final cache directory. This is more atomic than straight # copy and minimize the chance to have incomplete or corrupted cached # hieradata. sync rm -rf ${PUPPET_CACHE} mv "${PUPPET_CACHE_TMP}" "${PUPPET_CACHE}" if [ $? -ne 0 ] then umount_platform_dir fatal_error "Failed to rename puppet temp cache directory ${PUPPET_CACHE_TMP} to cache directory ${PUPPET_CACHE}" fi # Copy the staging secured vault cp -RL $VAULT_DIR /tmp if [ $? -ne 0 ] then umount_platform_dir fatal_error "Failed to copy vault directory $VAULT_DIR" fi fi # Unmount umount_platform_dir # Apply the puppet manifest HIERADATA_PATH=${PUPPET_CACHE}/hieradata HOST_HIERA=${HIERADATA_PATH}/${HOST}.yaml if [ -f ${HOST_HIERA} ]; then echo "$0: Running puppet manifest apply" puppet-manifest-apply.sh ${HIERADATA_PATH} ${HOST} ${subfunction} RC=$? if [ $RC -ne 0 ] then fatal_error "Failed to run the puppet manifest (RC:$RC)" if [ ! -f ${COMPLETED} ] then # The initial manifest application failed. We need to remember # this so we don't attempt to reapply them after a reboot. # Many of our manifests do not support being run more than # once with the $COMPLETED flag unset. touch $INITIAL_MANIFEST_APPLY_FAILED fatal_error "Failed to run the puppet manifest (RC:$RC); Host must be re-installed." else fatal_error "Failed to run the puppet manifest (RC:$RC)" fi fi else fatal_error "Host configuration not yet available for this node ($(hostname)=${IPADDR}); aborting configuration." fi # Cleanup the secured vault rm -rf /tmp/python_keyring if [ ! -e "${PLATFORM_SIMPLEX_FLAG}" ] then # The second controller is now configured - remove the simplex flag on # the mate controller. mkdir /tmp/mateflag nfs-mount controller-platform-nfs:/etc/platform /tmp/mateflag if [ $? -eq 0 ] then rm -f /tmp/mateflag/simplex umount /tmp/mateflag rmdir /tmp/mateflag else echo "Unable to mount /etc/platform" fi fi touch $COMPLETED touch $VOLATILE_CONFIG_PASS } stop () { # Nothing to do return } case "$1" in start) start ;; stop) stop ;; *) echo "Usage: $0 {start|stop}" exit 1 ;; esac exit 0