Merge "use curl + avoid partial downloads"

This commit is contained in:
Zuul 2021-11-15 20:15:27 +00:00 committed by Gerrit Code Review
commit 03d8c32830
10 changed files with 117 additions and 45 deletions

View File

@ -202,19 +202,24 @@ number_of_cpus () {
/usr/bin/nproc
}
# FIXME: curl would work better here, but it doesn't support recursive downloads.
#
# Wget corrupts files in some cases:
# - if the download stalls half-way and --tries is set to > 1, and the web
# server doesn't support the Range header with the upper limit omitted,
# (eg Range: bytes=18671712-) wget returns success (0) and leaves a partial
# file behind
# - if download fails half-way, or wget is interrupted, wget returns
# non-zero, but may leave a partial file behind. This is to be expected,
# but we can't easily tell which files were downloaded fully in this case.
#
# See https://bugs.launchpad.net/starlingx/+bug/1950017
get_remote_dir () {
local url="${1}"
local dest_dir="${2}"
mkdir -p "${dest_dir}" || return 1
\rm "${dest_dir}/"index.html*
wget -c -N --recursive --no-parent --no-host-directories --no-directories --directory-prefix="${dest_dir}" "${url}/"
}
get_remote_file () {
local url="${1}"
local dest_dir="${2}"
mkdir -p "${dest_dir}" || return 1
wget -c -N --no-parent --no-host-directories --no-directories --directory-prefix="${dest_dir}" "${url}"
wget -c -N --timeout 15 --recursive --no-parent --no-host-directories --no-directories --directory-prefix="${dest_dir}" "${url}/"
}
get_remote_file_overwrite () {
@ -226,7 +231,7 @@ get_remote_file_overwrite () {
if [ -f "${dest_file}" ]; then
\rm "${dest_file}"
fi
wget -c -N --no-parent --no-host-directories --no-directories --directory-prefix="${dest_dir}" "${url}"
download_file --timestamps "$url" "$dest_file"
}
clean_repodata () {

View File

@ -11,6 +11,7 @@
DL_OTHER_FROM_CENTOS_REPO_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
source $DL_OTHER_FROM_CENTOS_REPO_DIR/url_utils.sh
source $DL_OTHER_FROM_CENTOS_REPO_DIR/utils.sh
usage () {
echo "$0 [-D <distro>] [-s|-S|-u|-U] [-h] <other_download_list.ini> <save_path> [<force_update>]"
@ -144,7 +145,7 @@ for ff in $all; do
echo "remote path: $url_prefix/$_name"
echo "local path: $save_path/$_name"
if wget $url_prefix/$_name; then
if download_file $url_prefix/$_name; then
file_name=`basename $_name`
sub_path=`dirname $_name`
if [ -e "./$file_name" ]; then

View File

@ -345,6 +345,8 @@ download_worker () {
break
else
echo "Warning: $rpm_name not found"
SFILE="$(get_rpm_level_name $rpm_name $lvl)"
\rm -f "$SFILE"
fi
done

View File

@ -144,7 +144,7 @@ is_tarball() {
return $FOUND
}
# Download function using wget command
# Download function using curl or similar command
download_package() {
local tarball_name="$1"
@ -169,11 +169,11 @@ download_package() {
;;
esac
wget --spider "$url"
url_exists "$url"
if [ $? != 0 ]; then
echo "Warning: '$url' is broken"
else
wget -q -t 5 --wait=15 -O "$tarball_name" "$url"
download_file --quiet "$url" "$tarball_name"
if [ $? -eq 0 ]; then
if is_tarball "$tarball_name"; then
echo "Ok: $download_path"
@ -293,7 +293,7 @@ for line in $(cat $tarball_file); do
rm -rf $directory_name
popd > /dev/null # pushd $directory_name
elif [[ "$tarball_name" = 'chartmuseum-v0.12.0-amd64' ]]; then
wget -q -t 5 --wait=15 -O "$tarball_name" "$tarball_url"
download_file --quiet "$tarball_url" "$tarball_name"
if [ $? -ne 0 ]; then
error_count=$((error_count + 1))
popd > /dev/null # pushd $output_tarball
@ -301,7 +301,7 @@ for line in $(cat $tarball_file); do
fi
elif [[ "$tarball_name" = 'OPAE_1.3.7-5_el7.zip' ]]; then
srpm_path="${directory_name}/source_code/"
wget -q -t 5 --wait=15 -O "$tarball_name" "$tarball_url"
download_file --quiet "$tarball_url" "$tarball_name"
if [ $? -ne 0 ]; then
error_count=$((error_count + 1))
popd > /dev/null # pushd $output_tarball
@ -450,7 +450,7 @@ for line in $(cat $tarball_file); do
src_rpm_name="$(echo "$tarball_url" | rev | cut -d/ -f1 | rev)"
wget -q -t 5 --wait=15 -O "$src_rpm_name" "$tarball_url"
download_file --quiet "$tarball_url" "$src_rpm_name"
if [ $? -eq 0 ]; then
rpm2cpio "$src_rpm_name" | cpio --quiet -i "$tarball_name"
mv "$tarball_name" ..
@ -486,9 +486,8 @@ for line in $(cat $tarball_file); do
;;
esac
download_cmd="wget -q -t 5 --wait=15 $url -O $download_path"
if $download_cmd ; then
download_file --quiet "$url" "$download_path"
if [[ $? -eq 0 ]] ; then
if ! is_tarball "$download_path"; then
echo "Warning: file from $url is not a tarball."
\rm "$download_path"

View File

@ -539,7 +539,7 @@ else
fi
#download RPMs/SRPMs from 3rd_party websites (not CentOS repos) by "wget"
#download RPMs/SRPMs from 3rd_party websites (not CentOS repos) using curl
echo "step #2: start downloading RPMs/SRPMs from 3rd-party websites..."
list=${rpms_from_3rd_parties}
level=L1

View File

@ -12,7 +12,7 @@
MAKE_STX_MIRROR_YUM_CONF_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
source "$MAKE_STX_MIRROR_YUM_CONF_DIR/url_utils.sh"
source "$MAKE_STX_MIRROR_YUM_CONF_DIR/utils.sh" || exit 1
DISTRO="centos"
SUDO=sudo
@ -252,7 +252,7 @@ for REPO in $(find "$CENGN_REPOS_DIR" -type f -name '*repo'); do
CENGN_URL="$(url_to_stx_mirror_url "$URL" "$DISTRO")"
# Test CENGN url
wget -q --spider $CENGN_URL
url_exists --quiet "$CENGN_URL"
if [ $? -eq 0 ]; then
# OK, make substitution
sed "s#^baseurl=$URL\$#baseurl=$CENGN_URL#" -i "$REPO"

View File

@ -86,7 +86,8 @@ fi
STARLINGX_ADD_PKGS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
source $STARLINGX_ADD_PKGS_DIR/../toCOPY/lst_utils.sh
source $STARLINGX_ADD_PKGS_DIR/../toCOPY/lst_utils.sh || exit 1
source $STARLINGX_ADD_PKGS_DIR/utils.sh || exit 1
STXTOOLS=${MY_REPO_ROOT_DIR}/stx-tools
@ -332,7 +333,7 @@ function download_pkg {
echo "Downloading $url"
rpm_path=$LOCALREPO_PATH/$arch/$(basename $relativepath)
wget -q -O $rpm_path $url
download_file --quiet "$url" "$rpm_path"
if [ $? -ne 0 ]; then
echo "Failed to download $url" >&2

View File

@ -8,12 +8,12 @@
DL_UTILS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
if [ -f "$DL_UTILS_DIR/url_utils.sh" ]; then
source "$DL_UTILS_DIR/url_utils.sh"
elif [ -f "$DL_UTILS_DIR/../url_utils.sh" ]; then
source "$DL_UTILS_DIR/../url_utils.sh"
if [ -f "$DL_UTILS_DIR/utils.sh" ]; then
source "$DL_UTILS_DIR/utils.sh"
elif [ -f "$DL_UTILS_DIR/../utils.sh" ]; then
source "$DL_UTILS_DIR/../utils.sh"
else
echo "Error: Can't find 'url_utils.sh'"
echo "Error: Can't find 'utils.sh'"
exit 1
fi
@ -257,13 +257,15 @@ dl_file_from_url () {
fi
fi
CMD="wget '$URL' --tries=5 --wait=15 --output-document='$DOWNLOAD_PATH'"
CMD="$(get_download_file_command $URL $DOWNLOAD_PATH.dl_part)"
echo "$CMD"
eval $CMD
if [ $? -ne 0 ]; then
\rm -f "$DOWNLOAD_PATH.dl_part"
echo "Error: $CMD"
return 1
fi
\mv -fT "$DOWNLOAD_PATH.dl_part" "$DOWNLOAD_PATH"
;;
*)
echo "Error: Unknown protocol '$PROTOCOL' for url '$URL'"

View File

@ -6,7 +6,9 @@
UTILS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
source $UTILS_DIR/url_utils.sh
: ${_CURL_OPTS="--fail --location --connect-timeout 15 --speed-time 15 --speed-limit 1 --retry 5"}
source $UTILS_DIR/url_utils.sh || exit 1
get_yum_command() {
local _file=$1
@ -26,17 +28,77 @@ get_yum_command() {
echo "${SUDO} yumdownloader -q -C ${YUMCONFOPT} ${RELEASEVER} $yumdownloader_extra_opts $rpm_name"
}
get_wget_command() {
# Usage: get_download_file_command [--quiet] [--timestamps] URL [OUTPUT_FILE]
get_download_file_command() {
local _opts="$_CURL_OPTS"
while true ; do
case "$1" in
--quiet) _opts+=" --silent --show-error" ;;
--timestamps) _opts+=" --remote-time" ;;
-*)
echo >&2 "Unknown option $1"
return 1
;;
*)
break
esac
shift
done
local _name="$1"
local _ret=""
if [[ "$_name" == http?(s)://* ]]; then
_ret="wget -q $_name"
if [[ $# -gt 1 ]]; then
_opts+=" -o $2"
else
_ret="wget -q $(koji_url $_name)"
_opts+=" -O"
fi
if [[ "$_name" == http?(s)://* ]]; then
_ret="curl $_opts $_name"
else
_ret="curl $_opts $(koji_url $_name)"
fi
echo "$_ret"
}
# Usage: download_file [--quiet] [--timestamps] URL [OUTPUT_FILE]
download_file() {
local _opts="$_CURL_OPTS"
while true ; do
case "$1" in
--quiet) _opts+=" --silent --show-error" ;;
--timestamps) _opts+=" --remote-time" ;;
-*)
echo >&2 "Unknown option $1"
return 1
;;
*)
break
esac
shift
done
if [[ "$#" -gt 1 ]] ; then
local _dest_file="$2"
else
local _dest_file="$(basename "$1")"
fi
if curl $_opts -o "${_dest_file}.dl_part" "$1" ; then
\mv -fT "${_dest_file}.dl_part" "${_dest_file}"
return 0
fi
\rm -f "${_dest_file}.dl_part"
return 1
}
# Usage: url_exists [--quiet] URL
url_exists() {
local _opts
if [[ "$1" == "--quiet" ]] ; then
_opts+=" --quiet"
shift
fi
wget $_opts --spider "$1"
}
get_rpm_level_name() {
local _rpm_name=$1
local _level=$2
@ -166,20 +228,20 @@ get_download_cmd() {
local ff="$1"
local _level="$2"
# Decide if the list will be downloaded using yumdownloader or wget
# Decide if the list will be downloaded using yumdownloader or curl
if [[ $ff != *"#"* ]]; then
rpm_name=$ff
if [ $_level == "K1" ]; then
download_cmd="$(get_wget_command $rpm_name)"
download_cmd="$(get_download_file_command --quiet $rpm_name)"
else
# yumdownloader with the appropriate flag for src, noarch or x86_64
# download_cmd="${SUDOCMD} $(get_yum_command $rpm_name $_level)"
download_cmd="$(get_yum_command $rpm_name $_level)"
fi
else
# Build wget command
# Build the download command
rpm_url=$(get_url "$ff" "$_level")
download_cmd="$(get_wget_command $rpm_url)"
download_cmd="$(get_download_file_command --quiet $rpm_url)"
fi
echo "$download_cmd"

View File

@ -18,7 +18,7 @@ source utils.sh
check_result() {
local _res="$1"
local _expect="$2"
if [ "$_res" != "$_expect" ]; then
if [[ "$_res" != $_expect ]]; then
echo "Fail"
echo "expected $_expect"
echo "returned $_res"
@ -27,14 +27,14 @@ check_result() {
echo "Success"
}
# get_wget_command
# get_download_file_command
res=$(get_wget_command "https://libvirt.org/sources/python/libvirt-python-3.5.0-1.fc24.src.rpm")
expect="wget -q https://libvirt.org/sources/python/libvirt-python-3.5.0-1.fc24.src.rpm"
res=$(get_download_file_command "https://libvirt.org/sources/python/libvirt-python-3.5.0-1.fc24.src.rpm")
expect="curl* https://libvirt.org/sources/python/libvirt-python-3.5.0-1.fc24.src.rpm"
check_result "$res" "$expect"
res=$(get_wget_command "python2-httpbin-0.5.0-6.el7.noarch.rpm")
expect="wget -q https://kojipkgs.fedoraproject.org/packages/python2-httpbin/0.5.0/6.el7/noarch/python2-httpbin-0.5.0-6.el7.noarch.rpm"
res=$(get_download_file_command --quiet "python2-httpbin-0.5.0-6.el7.noarch.rpm")
expect="curl*--silent* https://kojipkgs.fedoraproject.org/packages/python2-httpbin/0.5.0/6.el7/noarch/python2-httpbin-0.5.0-6.el7.noarch.rpm"
check_result "$res" "$expect"
# get_url