Merge "use curl + avoid partial downloads"

This commit is contained in:
Zuul 2021-11-15 20:15:27 +00:00 committed by Gerrit Code Review
commit 03d8c32830
10 changed files with 117 additions and 45 deletions

View File

@ -202,19 +202,24 @@ number_of_cpus () {
/usr/bin/nproc /usr/bin/nproc
} }
# FIXME: curl would work better here, but it doesn't support recursive downloads.
#
# Wget corrupts files in some cases:
# - if the download stalls half-way and --tries is set to > 1, and the web
# server doesn't support the Range header with the upper limit omitted,
# (eg Range: bytes=18671712-) wget returns success (0) and leaves a partial
# file behind
# - if download fails half-way, or wget is interrupted, wget returns
# non-zero, but may leave a partial file behind. This is to be expected,
# but we can't easily tell which files were downloaded fully in this case.
#
# See https://bugs.launchpad.net/starlingx/+bug/1950017
get_remote_dir () { get_remote_dir () {
local url="${1}" local url="${1}"
local dest_dir="${2}" local dest_dir="${2}"
mkdir -p "${dest_dir}" || return 1 mkdir -p "${dest_dir}" || return 1
\rm "${dest_dir}/"index.html* \rm "${dest_dir}/"index.html*
wget -c -N --recursive --no-parent --no-host-directories --no-directories --directory-prefix="${dest_dir}" "${url}/" wget -c -N --timeout 15 --recursive --no-parent --no-host-directories --no-directories --directory-prefix="${dest_dir}" "${url}/"
}
get_remote_file () {
local url="${1}"
local dest_dir="${2}"
mkdir -p "${dest_dir}" || return 1
wget -c -N --no-parent --no-host-directories --no-directories --directory-prefix="${dest_dir}" "${url}"
} }
get_remote_file_overwrite () { get_remote_file_overwrite () {
@ -226,7 +231,7 @@ get_remote_file_overwrite () {
if [ -f "${dest_file}" ]; then if [ -f "${dest_file}" ]; then
\rm "${dest_file}" \rm "${dest_file}"
fi fi
wget -c -N --no-parent --no-host-directories --no-directories --directory-prefix="${dest_dir}" "${url}" download_file --timestamps "$url" "$dest_file"
} }
clean_repodata () { clean_repodata () {

View File

@ -11,6 +11,7 @@
DL_OTHER_FROM_CENTOS_REPO_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )" DL_OTHER_FROM_CENTOS_REPO_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
source $DL_OTHER_FROM_CENTOS_REPO_DIR/url_utils.sh source $DL_OTHER_FROM_CENTOS_REPO_DIR/url_utils.sh
source $DL_OTHER_FROM_CENTOS_REPO_DIR/utils.sh
usage () { usage () {
echo "$0 [-D <distro>] [-s|-S|-u|-U] [-h] <other_download_list.ini> <save_path> [<force_update>]" echo "$0 [-D <distro>] [-s|-S|-u|-U] [-h] <other_download_list.ini> <save_path> [<force_update>]"
@ -144,7 +145,7 @@ for ff in $all; do
echo "remote path: $url_prefix/$_name" echo "remote path: $url_prefix/$_name"
echo "local path: $save_path/$_name" echo "local path: $save_path/$_name"
if wget $url_prefix/$_name; then if download_file $url_prefix/$_name; then
file_name=`basename $_name` file_name=`basename $_name`
sub_path=`dirname $_name` sub_path=`dirname $_name`
if [ -e "./$file_name" ]; then if [ -e "./$file_name" ]; then

View File

@ -345,6 +345,8 @@ download_worker () {
break break
else else
echo "Warning: $rpm_name not found" echo "Warning: $rpm_name not found"
SFILE="$(get_rpm_level_name $rpm_name $lvl)"
\rm -f "$SFILE"
fi fi
done done

View File

@ -144,7 +144,7 @@ is_tarball() {
return $FOUND return $FOUND
} }
# Download function using wget command # Download function using curl or similar command
download_package() { download_package() {
local tarball_name="$1" local tarball_name="$1"
@ -169,11 +169,11 @@ download_package() {
;; ;;
esac esac
wget --spider "$url" url_exists "$url"
if [ $? != 0 ]; then if [ $? != 0 ]; then
echo "Warning: '$url' is broken" echo "Warning: '$url' is broken"
else else
wget -q -t 5 --wait=15 -O "$tarball_name" "$url" download_file --quiet "$url" "$tarball_name"
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
if is_tarball "$tarball_name"; then if is_tarball "$tarball_name"; then
echo "Ok: $download_path" echo "Ok: $download_path"
@ -293,7 +293,7 @@ for line in $(cat $tarball_file); do
rm -rf $directory_name rm -rf $directory_name
popd > /dev/null # pushd $directory_name popd > /dev/null # pushd $directory_name
elif [[ "$tarball_name" = 'chartmuseum-v0.12.0-amd64' ]]; then elif [[ "$tarball_name" = 'chartmuseum-v0.12.0-amd64' ]]; then
wget -q -t 5 --wait=15 -O "$tarball_name" "$tarball_url" download_file --quiet "$tarball_url" "$tarball_name"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
error_count=$((error_count + 1)) error_count=$((error_count + 1))
popd > /dev/null # pushd $output_tarball popd > /dev/null # pushd $output_tarball
@ -301,7 +301,7 @@ for line in $(cat $tarball_file); do
fi fi
elif [[ "$tarball_name" = 'OPAE_1.3.7-5_el7.zip' ]]; then elif [[ "$tarball_name" = 'OPAE_1.3.7-5_el7.zip' ]]; then
srpm_path="${directory_name}/source_code/" srpm_path="${directory_name}/source_code/"
wget -q -t 5 --wait=15 -O "$tarball_name" "$tarball_url" download_file --quiet "$tarball_url" "$tarball_name"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
error_count=$((error_count + 1)) error_count=$((error_count + 1))
popd > /dev/null # pushd $output_tarball popd > /dev/null # pushd $output_tarball
@ -450,7 +450,7 @@ for line in $(cat $tarball_file); do
src_rpm_name="$(echo "$tarball_url" | rev | cut -d/ -f1 | rev)" src_rpm_name="$(echo "$tarball_url" | rev | cut -d/ -f1 | rev)"
wget -q -t 5 --wait=15 -O "$src_rpm_name" "$tarball_url" download_file --quiet "$tarball_url" "$src_rpm_name"
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
rpm2cpio "$src_rpm_name" | cpio --quiet -i "$tarball_name" rpm2cpio "$src_rpm_name" | cpio --quiet -i "$tarball_name"
mv "$tarball_name" .. mv "$tarball_name" ..
@ -486,9 +486,8 @@ for line in $(cat $tarball_file); do
;; ;;
esac esac
download_cmd="wget -q -t 5 --wait=15 $url -O $download_path" download_file --quiet "$url" "$download_path"
if [[ $? -eq 0 ]] ; then
if $download_cmd ; then
if ! is_tarball "$download_path"; then if ! is_tarball "$download_path"; then
echo "Warning: file from $url is not a tarball." echo "Warning: file from $url is not a tarball."
\rm "$download_path" \rm "$download_path"

View File

@ -539,7 +539,7 @@ else
fi fi
#download RPMs/SRPMs from 3rd_party websites (not CentOS repos) by "wget" #download RPMs/SRPMs from 3rd_party websites (not CentOS repos) using curl
echo "step #2: start downloading RPMs/SRPMs from 3rd-party websites..." echo "step #2: start downloading RPMs/SRPMs from 3rd-party websites..."
list=${rpms_from_3rd_parties} list=${rpms_from_3rd_parties}
level=L1 level=L1

View File

@ -12,7 +12,7 @@
MAKE_STX_MIRROR_YUM_CONF_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )" MAKE_STX_MIRROR_YUM_CONF_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
source "$MAKE_STX_MIRROR_YUM_CONF_DIR/url_utils.sh" source "$MAKE_STX_MIRROR_YUM_CONF_DIR/utils.sh" || exit 1
DISTRO="centos" DISTRO="centos"
SUDO=sudo SUDO=sudo
@ -252,7 +252,7 @@ for REPO in $(find "$CENGN_REPOS_DIR" -type f -name '*repo'); do
CENGN_URL="$(url_to_stx_mirror_url "$URL" "$DISTRO")" CENGN_URL="$(url_to_stx_mirror_url "$URL" "$DISTRO")"
# Test CENGN url # Test CENGN url
wget -q --spider $CENGN_URL url_exists --quiet "$CENGN_URL"
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
# OK, make substitution # OK, make substitution
sed "s#^baseurl=$URL\$#baseurl=$CENGN_URL#" -i "$REPO" sed "s#^baseurl=$URL\$#baseurl=$CENGN_URL#" -i "$REPO"

View File

@ -86,7 +86,8 @@ fi
STARLINGX_ADD_PKGS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )" STARLINGX_ADD_PKGS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
source $STARLINGX_ADD_PKGS_DIR/../toCOPY/lst_utils.sh source $STARLINGX_ADD_PKGS_DIR/../toCOPY/lst_utils.sh || exit 1
source $STARLINGX_ADD_PKGS_DIR/utils.sh || exit 1
STXTOOLS=${MY_REPO_ROOT_DIR}/stx-tools STXTOOLS=${MY_REPO_ROOT_DIR}/stx-tools
@ -332,7 +333,7 @@ function download_pkg {
echo "Downloading $url" echo "Downloading $url"
rpm_path=$LOCALREPO_PATH/$arch/$(basename $relativepath) rpm_path=$LOCALREPO_PATH/$arch/$(basename $relativepath)
wget -q -O $rpm_path $url download_file --quiet "$url" "$rpm_path"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Failed to download $url" >&2 echo "Failed to download $url" >&2

View File

@ -8,12 +8,12 @@
DL_UTILS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )" DL_UTILS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
if [ -f "$DL_UTILS_DIR/url_utils.sh" ]; then if [ -f "$DL_UTILS_DIR/utils.sh" ]; then
source "$DL_UTILS_DIR/url_utils.sh" source "$DL_UTILS_DIR/utils.sh"
elif [ -f "$DL_UTILS_DIR/../url_utils.sh" ]; then elif [ -f "$DL_UTILS_DIR/../utils.sh" ]; then
source "$DL_UTILS_DIR/../url_utils.sh" source "$DL_UTILS_DIR/../utils.sh"
else else
echo "Error: Can't find 'url_utils.sh'" echo "Error: Can't find 'utils.sh'"
exit 1 exit 1
fi fi
@ -257,13 +257,15 @@ dl_file_from_url () {
fi fi
fi fi
CMD="wget '$URL' --tries=5 --wait=15 --output-document='$DOWNLOAD_PATH'" CMD="$(get_download_file_command $URL $DOWNLOAD_PATH.dl_part)"
echo "$CMD" echo "$CMD"
eval $CMD eval $CMD
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
\rm -f "$DOWNLOAD_PATH.dl_part"
echo "Error: $CMD" echo "Error: $CMD"
return 1 return 1
fi fi
\mv -fT "$DOWNLOAD_PATH.dl_part" "$DOWNLOAD_PATH"
;; ;;
*) *)
echo "Error: Unknown protocol '$PROTOCOL' for url '$URL'" echo "Error: Unknown protocol '$PROTOCOL' for url '$URL'"

View File

@ -6,7 +6,9 @@
UTILS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )" UTILS_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}" )" )"
source $UTILS_DIR/url_utils.sh : ${_CURL_OPTS="--fail --location --connect-timeout 15 --speed-time 15 --speed-limit 1 --retry 5"}
source $UTILS_DIR/url_utils.sh || exit 1
get_yum_command() { get_yum_command() {
local _file=$1 local _file=$1
@ -26,17 +28,77 @@ get_yum_command() {
echo "${SUDO} yumdownloader -q -C ${YUMCONFOPT} ${RELEASEVER} $yumdownloader_extra_opts $rpm_name" echo "${SUDO} yumdownloader -q -C ${YUMCONFOPT} ${RELEASEVER} $yumdownloader_extra_opts $rpm_name"
} }
get_wget_command() { # Usage: get_download_file_command [--quiet] [--timestamps] URL [OUTPUT_FILE]
get_download_file_command() {
local _opts="$_CURL_OPTS"
while true ; do
case "$1" in
--quiet) _opts+=" --silent --show-error" ;;
--timestamps) _opts+=" --remote-time" ;;
-*)
echo >&2 "Unknown option $1"
return 1
;;
*)
break
esac
shift
done
local _name="$1" local _name="$1"
local _ret="" local _ret=""
if [[ "$_name" == http?(s)://* ]]; then if [[ $# -gt 1 ]]; then
_ret="wget -q $_name" _opts+=" -o $2"
else else
_ret="wget -q $(koji_url $_name)" _opts+=" -O"
fi
if [[ "$_name" == http?(s)://* ]]; then
_ret="curl $_opts $_name"
else
_ret="curl $_opts $(koji_url $_name)"
fi fi
echo "$_ret" echo "$_ret"
} }
# Usage: download_file [--quiet] [--timestamps] URL [OUTPUT_FILE]
download_file() {
local _opts="$_CURL_OPTS"
while true ; do
case "$1" in
--quiet) _opts+=" --silent --show-error" ;;
--timestamps) _opts+=" --remote-time" ;;
-*)
echo >&2 "Unknown option $1"
return 1
;;
*)
break
esac
shift
done
if [[ "$#" -gt 1 ]] ; then
local _dest_file="$2"
else
local _dest_file="$(basename "$1")"
fi
if curl $_opts -o "${_dest_file}.dl_part" "$1" ; then
\mv -fT "${_dest_file}.dl_part" "${_dest_file}"
return 0
fi
\rm -f "${_dest_file}.dl_part"
return 1
}
# Usage: url_exists [--quiet] URL
url_exists() {
local _opts
if [[ "$1" == "--quiet" ]] ; then
_opts+=" --quiet"
shift
fi
wget $_opts --spider "$1"
}
get_rpm_level_name() { get_rpm_level_name() {
local _rpm_name=$1 local _rpm_name=$1
local _level=$2 local _level=$2
@ -166,20 +228,20 @@ get_download_cmd() {
local ff="$1" local ff="$1"
local _level="$2" local _level="$2"
# Decide if the list will be downloaded using yumdownloader or wget # Decide if the list will be downloaded using yumdownloader or curl
if [[ $ff != *"#"* ]]; then if [[ $ff != *"#"* ]]; then
rpm_name=$ff rpm_name=$ff
if [ $_level == "K1" ]; then if [ $_level == "K1" ]; then
download_cmd="$(get_wget_command $rpm_name)" download_cmd="$(get_download_file_command --quiet $rpm_name)"
else else
# yumdownloader with the appropriate flag for src, noarch or x86_64 # yumdownloader with the appropriate flag for src, noarch or x86_64
# download_cmd="${SUDOCMD} $(get_yum_command $rpm_name $_level)" # download_cmd="${SUDOCMD} $(get_yum_command $rpm_name $_level)"
download_cmd="$(get_yum_command $rpm_name $_level)" download_cmd="$(get_yum_command $rpm_name $_level)"
fi fi
else else
# Build wget command # Build the download command
rpm_url=$(get_url "$ff" "$_level") rpm_url=$(get_url "$ff" "$_level")
download_cmd="$(get_wget_command $rpm_url)" download_cmd="$(get_download_file_command --quiet $rpm_url)"
fi fi
echo "$download_cmd" echo "$download_cmd"

View File

@ -18,7 +18,7 @@ source utils.sh
check_result() { check_result() {
local _res="$1" local _res="$1"
local _expect="$2" local _expect="$2"
if [ "$_res" != "$_expect" ]; then if [[ "$_res" != $_expect ]]; then
echo "Fail" echo "Fail"
echo "expected $_expect" echo "expected $_expect"
echo "returned $_res" echo "returned $_res"
@ -27,14 +27,14 @@ check_result() {
echo "Success" echo "Success"
} }
# get_wget_command # get_download_file_command
res=$(get_wget_command "https://libvirt.org/sources/python/libvirt-python-3.5.0-1.fc24.src.rpm") res=$(get_download_file_command "https://libvirt.org/sources/python/libvirt-python-3.5.0-1.fc24.src.rpm")
expect="wget -q https://libvirt.org/sources/python/libvirt-python-3.5.0-1.fc24.src.rpm" expect="curl* https://libvirt.org/sources/python/libvirt-python-3.5.0-1.fc24.src.rpm"
check_result "$res" "$expect" check_result "$res" "$expect"
res=$(get_wget_command "python2-httpbin-0.5.0-6.el7.noarch.rpm") res=$(get_download_file_command --quiet "python2-httpbin-0.5.0-6.el7.noarch.rpm")
expect="wget -q https://kojipkgs.fedoraproject.org/packages/python2-httpbin/0.5.0/6.el7/noarch/python2-httpbin-0.5.0-6.el7.noarch.rpm" expect="curl*--silent* https://kojipkgs.fedoraproject.org/packages/python2-httpbin/0.5.0/6.el7/noarch/python2-httpbin-0.5.0-6.el7.noarch.rpm"
check_result "$res" "$expect" check_result "$res" "$expect"
# get_url # get_url