Merge "Parallel downloads"

This commit is contained in:
Zuul 2021-03-25 00:00:22 +00:00 committed by Gerrit Code Review
commit c7572a6327
4 changed files with 267 additions and 73 deletions

View File

@ -1,12 +1,12 @@
#!/bin/bash -e
#!/bin/bash
#
# SPDX-License-Identifier: Apache-2.0
#
# download RPMs/SRPMs from different sources.
# this script was originated by Brian Avery, and later updated by Yong Hu
set -o errexit
set -o nounset
# set -o errexit
# set -o nounset
# By default, we use "sudo" and we don't use a local yum.conf. These can
# be overridden via flags.
@ -185,6 +185,181 @@ if [ $CLEAN_LOGS_ONLY -eq 1 ];then
exit 0
fi
STOP_SCHEDULING=0
FOUND_ERRORS=0
MAX_WORKERS=8
workers=0
max_workers=$MAX_WORKERS
# An array that maps worker index to pid, or to two special values
# 'Idle' indicates no running thread.
# 'Busy' indicates the worker is allocated, but it's pid isn't known yet.
declare -A dl_env
#
# init_dl_env: Init the array that maps worker index to pid.
#
init_dl_env () {
local i=0
local stop
stop=$((max_workers-1))
for i in $(seq 0 $stop); do
dl_env[$i]='Idle'
done
}
#
# get_idle_dl_env: Find an idle worker, mark it allocated
# and return it's index.
get_idle_dl_env () {
local i=0
local stop
stop=$((max_workers-1))
if [ $stop -ge 255 ]; then
stop=254
fi
for i in $(seq 0 $stop); do
if [ ${dl_env[$i]} == 'Idle' ]; then
dl_env[$i]='Busy'
return $i
fi
done
return 255
}
#
# set_dl_env_pid: Set the pid of a previously allocated worker
#
set_dl_env_pid () {
local idx=$1
local val=$2
dl_env[$idx]=$val
}
#
# release_dl_env: Mark a worker as idle. Call after reaping the thread.
#
release_dl_env () {
local idx=$1
dl_env[$idx]='Idle'
}
#
# reaper: Look for worker threads that have exited.
# Check/log it's exit code, and release the worker.
# Return the number of threads reaped.
#
reaper () {
local reaped=0
local last_reaped=-1
local i=0
local stop
local p=0
local ret=0
stop=$((max_workers-1))
if [ $stop -ge 255 ]; then
stop=254
fi
while [ $reaped -gt $last_reaped ]; do
last_reaped=$reaped
for i in $(seq 0 $stop); do
p=${dl_env[$i]}
if [ "$p" == "Idle" ] || [ "$p" == "Busy" ]; then
continue
fi
# echo "test $i $p"
kill -0 $p &> /dev/null
if [ $? -ne 0 ]; then
wait $p
ret=$?
workers=$((workers-1))
reaped=$((reaped+1))
release_dl_env $i
if [ $ret -ne 0 ]; then
sleep 1
echo "ERROR: $FUNCNAME (${LINENO}): Failed to download in 'b$i'"
cat "$DL_MIRROR_LOG_DIR/$i" >> $DL_MIRROR_LOG_DIR/errors
echo "ERROR: $FUNCNAME (${LINENO}): Failed to download in 'b$i'" >> $DL_MIRROR_LOG_DIR/errors
echo "" >> $DL_MIRROR_LOG_DIR/errors
FOUND_ERRORS=1
fi
fi
done
done
return $reaped
}
#
# download_worker: Download one file.
# This is the entry point for a worker thread.
#
download_worker () {
local dl_idx=$1
local ff="$2"
local _level=$3
local rpm_name=""
local dest_dir=""
local rc=0
local dl_result=1
local lvl=""
local download_cmd=""
local download_url=""
local SFILE=""
local _arch=""
_arch=$(get_arch_from_rpm $ff)
rpm_name="$(get_rpm_name $ff)"
dest_dir="$(get_dest_directory $_arch)"
if [ ! -e $dest_dir/$rpm_name ]; then
for dl_src in $dl_source; do
case $dl_src in
$dl_from_stx_mirror)
lvl=$dl_from_stx_mirror
;;
$dl_from_upstream)
lvl=$_level
;;
*)
echo "Error: Unknown dl_source '$dl_src'"
continue
;;
esac
download_cmd="$(get_download_cmd $ff $lvl)"
echo "Looking for $rpm_name"
echo "--> run: $download_cmd"
if $download_cmd ; then
download_url="$(get_url $ff $lvl)"
SFILE="$(get_rpm_level_name $rpm_name $lvl)"
process_result "$_arch" "$dest_dir" "$download_url" "$SFILE"
dl_result=0
break
else
echo "Warning: $rpm_name not found"
fi
done
if [ $dl_result -eq 1 ]; then
echo "Error: $rpm_name not found"
echo "missing_srpm:$rpm_name" >> $LOG
echo $rpm_name >> $MISSING_SRPMS
rc=1
fi
else
echo "Already have $dest_dir/$rpm_name"
fi
return $rc
}
# Function to download different types of RPMs in different ways
download () {
local _file=$1
@ -194,75 +369,62 @@ download () {
local _arch=""
local rc=0
local download_cmd=""
local download_url=""
local rpm_name=""
local SFILE=""
local lvl
local dl_result
FOUND_ERRORS=0
_list=$(cat $_file)
_from=$(get_from $_file)
echo "now the rpm will come from: $_from"
for ff in $_list; do
_arch=$(get_arch_from_rpm $ff)
rpm_name="$(get_rpm_name $ff)"
dest_dir="$(get_dest_directory $_arch)"
if [ ! -e $dest_dir/$rpm_name ]; then
dl_result=1
for dl_src in $dl_source; do
case $dl_src in
$dl_from_stx_mirror)
lvl=$dl_from_stx_mirror
;;
$dl_from_upstream)
lvl=$_level
;;
*)
echo "Error: Unknown dl_source '$dl_src'"
continue
;;
esac
download_cmd="$(get_download_cmd $ff $lvl)"
echo "Looking for $rpm_name"
echo "--> run: $download_cmd"
if $download_cmd ; then
download_url="$(get_url $ff $lvl)"
SFILE="$(get_rpm_level_name $rpm_name $lvl)"
process_result "$_arch" "$dest_dir" "$download_url" "$SFILE"
dl_result=0
break
else
echo "Warning: $rpm_name not found"
fi
done
if [ $dl_result -eq 1 ]; then
echo "Error: $rpm_name not found"
echo "missing_srpm:$rpm_name" >> $LOG
echo $rpm_name >> $MISSING_SRPMS
rc=1
# Free up a worker if none available
while [ $workers -ge $max_workers ]; do
reaper
reaped=$?
if [ $reaped -eq 0 ]; then
sleep 0.1
fi
else
echo "Already have $dest_dir/$rpm_name"
done
# Allocate a worker. b=the worker index
workers=$((workers+1))
get_idle_dl_env
b=$?
if [ $b -ge 255 ]; then
echo "get_idle_dl_env failed to find a free slot"
exit 1
fi
echo
PREFIX="b$b"
# Launch a thread in the background
( download_worker $b $ff $_level 2>&1 | sed "s#^#${PREFIX}: #" | tee $DL_MIRROR_LOG_DIR/$b; exit ${PIPESTATUS[0]} ) &
# Record the pid of background process
pp=$!
set_dl_env_pid $b $pp
done
return $rc
# Wait for remaining workers to exit
while [ $workers -gt 0 ]; do
reaper
reaped=$?
if [ $reaped -eq 0 ]; then
sleep 0.1
fi
done
return $FOUND_ERRORS
}
# Init the pool of worker threads
init_dl_env
# Prime the cache
loop_count=0
max_loop_count=5
echo "${SUDOCMD} yum ${YUMCONFOPT} ${RELEASEVER} makecache"
while ! ${SUDOCMD} yum ${YUMCONFOPT} ${RELEASEVER} makecache ; do
while ! ${SUDOCMD} yum ${YUMCONFOPT} ${RELEASEVER} makecache fast ; do
# To protect against intermittent 404 errors, we'll retry
# a few times. The suspected issue is pulling repodata
# from multiple source that are temporarily inconsistent.

View File

@ -1,4 +1,4 @@
#!/bin/bash -e
#!/bin/bash
#
# SPDX-License-Identifier: Apache-2.0
#
@ -19,6 +19,11 @@ cleanup () {
trap "cleanup ; exit 1" INT HUP TERM QUIT
trap "cleanup" EXIT
# Clear the error log before we begin
if [ -f $DL_MIRROR_LOG_DIR/errors ]; then
rm -f $DL_MIRROR_LOG_DIR/errors
fi
# A temporary compatability step to save download time
# during the shift to the new DL_MIRROR_OUTPUT_DIR location.
#
@ -115,8 +120,8 @@ make_stx_mirror_yum_conf="${DOWNLOAD_MIRROR_DIR}/make_stx_mirror_yum_conf.sh"
# track optional arguments
change_group_ids=1
use_system_yum_conf=1
alternate_yum_conf=""
use_system_yum_conf=0
alternate_yum_conf="${DOWNLOAD_MIRROR_DIR}/yum.conf.sample"
alternate_repo_dir=""
rpm_downloader_extra_args=""
tarball_downloader_extra_args=""
@ -166,6 +171,8 @@ dl_from_upstream () {
MULTIPLE_DL_FLAG_ERROR_MSG="Error: Please use only one of: -s,-S,-u,-U"
TEMP_DIR=""
TEMP_DIR_CLEANUP=""
multiple_dl_flag_check () {
if [ "$dl_flag" != "" ]; then
@ -177,7 +184,7 @@ multiple_dl_flag_check () {
# Parse out optional arguments
while getopts "c:Cd:ghI:sl:L:nSuUW:" o; do
while getopts "c:Cd:ghI:sl:L:nt:ySuUW:" o; do
case "${o}" in
c)
# Pass -c ("use alternate yum.conf") to rpm downloader
@ -214,6 +221,15 @@ while getopts "c:Cd:ghI:sl:L:nSuUW:" o; do
rpm_downloader_extra_args="${rpm_downloader_extra_args} -n"
SUDO=""
;;
t)
# Set TEMP_DIR
TEMP_DIR="${OPTARG}"
;;
y)
# Use hosts /etc/yum.conf
use_system_yum_conf=1
alternate_yum_conf=""
;;
s)
# Download from StarlingX mirror only. Do not use upstream sources.
multiple_dl_flag_check
@ -383,16 +399,15 @@ echo "step #0: Configuring yum repos ..."
if [ ${use_system_yum_conf} -ne 0 ]; then
# Restore StarlingX_3rd repos from backup
REPO_SOURCE_DIR=/localdisk/yum.repos.d
REPO_DIR=/etc/yum.repos.d
if [ -d $REPO_SOURCE_DIR ] && [ -d $REPO_DIR ]; then
${SUDO} \cp -f $REPO_SOURCE_DIR/*.repo $REPO_DIR/
fi
if [ $layer != "all" ]; then
if [ -d ${config_dir}/${distro}/${layer}/yum.repos.d ]; then
${SUDO} \cp -f ${config_dir}/${distro}/${layer}/yum.repos.d/*.repo $REPO_DIR
${SUDO} \cp -f -v ${config_dir}/${distro}/${layer}/yum.repos.d/*.repo $REPO_DIR/
fi
else
# copy all layers
${SUDO} \cp -f -v ${config_dir}/${distro}/*/yum.repos.d/*.repo $REPO_DIR/
fi
fi
@ -411,7 +426,6 @@ if [ $use_system_yum_conf -eq 0 ]; then
fi
fi
TEMP_DIR=""
rpm_downloader_extra_args="${rpm_downloader_extra_args} -D $distro"
if [ "$dl_flag" != "" ]; then
@ -428,7 +442,19 @@ if ! dl_from_stx; then
else
# We want to use stx mirror, so we need to create a new, modified yum.conf and yum.repos.d.
# The modifications will add or substitute repos pointing to the StralingX mirror.
TEMP_DIR=$(mktemp -d /tmp/stx_mirror_XXXXXX)
if [ "$TEMP_DIR" == "" ]; then
if [ "$MY_WORKSPACE" != "" ]; then
TEMP_DIR="$MY_WORKSPACE/tmp/yum"
else
TEMP_DIR=$(mktemp -d /tmp/stx_mirror_XXXXXX)
TEMP_DIR_CLEANUP="y"
fi
fi
if [ ! -d $TEMP_DIR ]; then
mkdir -p ${TEMP_DIR}
fi
TEMP_CONF="$TEMP_DIR/yum.conf"
need_file ${make_stx_mirror_yum_conf}
need_dir ${TEMP_DIR}
@ -685,7 +711,7 @@ fi
#
# Clean up the mktemp directory, if required.
#
if [ "$TEMP_DIR" != "" ]; then
if [ "$TEMP_DIR" != "" ] && [ "$TEMP_DIR_CLEANUP" == "y" ]; then
echo "${SUDO} rm -rf $TEMP_DIR"
${SUDO} \rm -rf "$TEMP_DIR"
fi

View File

@ -15,6 +15,7 @@ MAKE_STX_MIRROR_YUM_CONF_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
source "$MAKE_STX_MIRROR_YUM_CONF_DIR/url_utils.sh"
DISTRO="centos"
SUDO=sudo
TEMP_DIR=""
SRC_REPO_DIR="$MAKE_STX_MIRROR_YUM_CONF_DIR/yum.repos.d"
@ -38,6 +39,7 @@ usage () {
echo " 'yum.repos.d' in same directory as this script"
echo "-l <layer> = Download only packages required to build a given layer"
echo "-u <lower-layer>,<build-type>,<repo_url> = Add/change the repo baseurl for a lower layer"
echo "-n don't use sudo"
}
declare -A layer_urls
@ -61,7 +63,7 @@ set_layer_urls () {
#
# option processing
#
while getopts "D:d:l:Rr:u:y:" o; do
while getopts "D:d:l:nRr:u:y:" o; do
case "${o}" in
D)
DISTRO="${OPTARG}"
@ -72,6 +74,9 @@ while getopts "D:d:l:Rr:u:y:" o; do
l)
LAYER="${OPTARG}"
;;
n)
SUDO=""
;;
r)
SRC_REPO_DIR="${OPTARG}"
;;
@ -132,7 +137,7 @@ get_releasever () {
if [ -f $SRC_YUM_CONF ] && grep -q '^releasever=' $SRC_YUM_CONF; then
grep '^releasever=' $SRC_YUM_CONF | cut -d '=' -f 2
else
yum version nogroups | grep Installed | cut -d ' ' -f 2 | cut -d '/' -f 1
${SUDO} yum version nogroups | grep Installed | cut -d ' ' -f 2 | cut -d '/' -f 1
fi
}
@ -143,7 +148,7 @@ get_releasever () {
# cross compiling.
#
get_arch () {
yum version nogroups | grep Installed | cut -d ' ' -f 2 | cut -d '/' -f 2
${SUDO} yum version nogroups | grep Installed | cut -d ' ' -f 2 | cut -d '/' -f 2
}

View File

@ -23,7 +23,7 @@ get_yum_command() {
yumdownloader_extra_opts="--exclude='*.i686' --archlist=noarch,x86_64"
fi
echo "yumdownloader -q -C ${YUMCONFOPT} ${RELEASEVER} $yumdownloader_extra_opts $rpm_name"
echo "${SUDO} yumdownloader -q -C ${YUMCONFOPT} ${RELEASEVER} $yumdownloader_extra_opts $rpm_name"
}
get_wget_command() {
@ -74,7 +74,7 @@ get_url() {
# filter urls for the desitered arch.
local arr=( $(split_filename $_name) )
local arch=${arr[3]}
_ret="$($_url_cmd | grep "[.]$arch[.]rpm$" | head -n 1)"
_ret="$(${SUDO} $_url_cmd | grep "[.]$arch[.]rpm$" | head -n 1)"
fi
echo "$_ret"
}
@ -173,7 +173,8 @@ get_download_cmd() {
download_cmd="$(get_wget_command $rpm_name)"
else
# yumdownloader with the appropriate flag for src, noarch or x86_64
download_cmd="${SUDOCMD} $(get_yum_command $rpm_name $_level)"
# download_cmd="${SUDOCMD} $(get_yum_command $rpm_name $_level)"
download_cmd="$(get_yum_command $rpm_name $_level)"
fi
else
# Build wget command