kumquat-buildroot/support/download/dl-wrapper

264 lines
9.3 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env bash
# This script is a wrapper to the other download backends.
# Its role is to ensure atomicity when saving downloaded files
# back to BR2_DL_DIR, and not clutter BR2_DL_DIR with partial,
# failed downloads.
#
# Call it with -h to see some help.
# To avoid cluttering BR2_DL_DIR, we download to a trashable
# location, namely in $(BUILD_DIR).
# Then, we move the downloaded file to a temporary file in the
# same directory as the final output file.
# This allows us to finally atomically rename it to its final
# name.
# If anything goes wrong, we just remove all the temporaries
# created so far.
# We want to catch any unexpected failure, and exit immediately.
set -e
export BR_BACKEND_DL_GETOPTS=":hc:d:o:n:N:H:ru:qf:e"
2018-04-02 10:14:22 +02:00
main() {
local OPT OPTARG
local backend output hfile recurse quiet rc
local -a uris
# Parse our options; anything after '--' is for the backend
while getopts ":hc:d:D:o:n:N:H:rf:u:q" OPT; do
case "${OPT}" in
h) help; exit 0;;
c) cset="${OPTARG}";;
d) dl_dir="${OPTARG}";;
D) old_dl_dir="${OPTARG}";;
o) output="${OPTARG}";;
n) raw_base_name="${OPTARG}";;
N) base_name="${OPTARG}";;
H) hfile="${OPTARG}";;
r) recurse="-r";;
f) filename="${OPTARG}";;
u) uris+=( "${OPTARG}" );;
q) quiet="-q";;
:) error "option '%s' expects a mandatory argument\n" "${OPTARG}";;
\?) error "unknown option '%s'\n" "${OPTARG}";;
esac
done
# Forget our options, and keep only those for the backend
shift $((OPTIND-1))
if [ -z "${output}" ]; then
error "no output specified, use -o\n"
fi
# Legacy handling: check if the file already exists in the global
# download directory. If it does, hard-link it. If it turns out it
# was an incorrect download, we'd still check it below anyway.
# If we can neither link nor copy, fallback to doing a download.
# NOTE! This is not atomic, is subject to TOCTTOU, but the whole
# dl-wrapper runs under an flock, so we're safe.
if [ ! -e "${output}" -a -e "${old_dl_dir}/${filename}" ]; then
ln "${old_dl_dir}/${filename}" "${output}" || \
cp "${old_dl_dir}/${filename}" "${output}" || \
true
fi
# If the output file already exists and:
# - there's no .hash file: do not download it again and exit promptly
# - matches all its hashes: do not download it again and exit promptly
# - fails at least one of its hashes: force a re-download
# - there's no hash (but a .hash file): consider it a hard error
if [ -e "${output}" ]; then
if support/download/check-hash ${quiet} "${hfile}" "${output}" "${output##*/}"; then
pkg-download: check hashes for locally cached files In some cases, upstream just update their releases in-place, without renaming them. When that package is updated in Buildroot, a new hash to match the new upstream release is included in the corresponding .hash file. As a consequence, users who previously downloaded that package's tarball with an older version of Buildroot, will get stuck with an old archive for that package, and after updating their Buildroot copy, will be greeted with a failed download, due to the local file not matching the new hashes. Also, an upstream would sometime serve us HTML garbage instead of the actual tarball we requested, like SourceForge does from time for as-yet unknown reasons. So, to avoid this situation, check the hashes prior to doing the download. If the hashes match, consider the locally cached file genuine, and do not download it. However, if the locally cached file does not match the known hashes we have for it, it is promptly removed, and a download is re-attempted. Note: this does not add any overhead compared to the previous situation, because we were already checking hashes of locally cached files. It just changes the order in which we do the checks. For the records, here is the overhead of hashing a 231MiB file (qt-everywhere-opensource-src-4.8.6.tar.gz) on a core-i5 @2.5GHz: cache-cold cache-hot sha1 1.914s 0.762s sha256 2.109s 1.270s But again, this overhead already existed before this patch. Signed-off-by: "Yann E. MORIN" <yann.morin.1998@free.fr> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Peter Korsgaard <jacmet@uclibc.org> Cc: Gustavo Zacarias <gustavo@zacarias.com.ar> Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
2014-12-11 23:52:08 +01:00
exit 0
elif [ ${?} -ne 2 ]; then
# Do not remove the file, otherwise it might get re-downloaded
# from a later location (i.e. primary -> upstream -> mirror).
# Do not print a message, check-hash already did.
exit 1
pkg-download: check hashes for locally cached files In some cases, upstream just update their releases in-place, without renaming them. When that package is updated in Buildroot, a new hash to match the new upstream release is included in the corresponding .hash file. As a consequence, users who previously downloaded that package's tarball with an older version of Buildroot, will get stuck with an old archive for that package, and after updating their Buildroot copy, will be greeted with a failed download, due to the local file not matching the new hashes. Also, an upstream would sometime serve us HTML garbage instead of the actual tarball we requested, like SourceForge does from time for as-yet unknown reasons. So, to avoid this situation, check the hashes prior to doing the download. If the hashes match, consider the locally cached file genuine, and do not download it. However, if the locally cached file does not match the known hashes we have for it, it is promptly removed, and a download is re-attempted. Note: this does not add any overhead compared to the previous situation, because we were already checking hashes of locally cached files. It just changes the order in which we do the checks. For the records, here is the overhead of hashing a 231MiB file (qt-everywhere-opensource-src-4.8.6.tar.gz) on a core-i5 @2.5GHz: cache-cold cache-hot sha1 1.914s 0.762s sha256 2.109s 1.270s But again, this overhead already existed before this patch. Signed-off-by: "Yann E. MORIN" <yann.morin.1998@free.fr> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Peter Korsgaard <jacmet@uclibc.org> Cc: Gustavo Zacarias <gustavo@zacarias.com.ar> Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
2014-12-11 23:52:08 +01:00
fi
rm -f "${output}"
warn "Re-downloading '%s'...\n" "${output##*/}"
fi
# Look through all the uris that we were given to download the package
# source
download_and_check=0
rc=1
for uri in "${uris[@]}"; do
backend=${uri%%+*}
case "${backend}" in
git|svn|cvs|bzr|file|scp|hg) ;;
*) backend="wget" ;;
esac
uri=${uri#*+}
urlencode=${backend#*|}
# urlencode must be "urlencode"
[ "${urlencode}" != "urlencode" ] && urlencode=""
# tmpd is a temporary directory in which backends may store
# intermediate by-products of the download.
# tmpf is the file in which the backends should put the downloaded
# content.
# tmpd is located in $(BUILD_DIR), so as not to clutter the (precious)
# $(BR2_DL_DIR)
# We let the backends create tmpf, so they are able to set whatever
# permission bits they want (although we're only really interested in
# the executable bit.)
tmpd="$(mktemp -d "${BUILD_DIR}/.${output##*/}.XXXXXX")"
tmpf="${tmpd}/output"
# Helpers expect to run in a directory that is *really* trashable, so
# they are free to create whatever files and/or sub-dirs they might need.
# Doing the 'cd' here rather than in all backends is easier.
cd "${tmpd}"
# If the backend fails, we can just remove the content of the temporary
# directory to remove all the cruft it may have left behind, and try
# the next URI until it succeeds. Once out of URI to try, we need to
# cleanup and exit.
if ! "${OLDPWD}/support/download/${backend}" \
$([ -n "${urlencode}" ] && printf %s '-e') \
-c "${cset}" \
-d "${dl_dir}" \
-n "${raw_base_name}" \
-N "${base_name}" \
-f "${filename}" \
-u "${uri}" \
-o "${tmpf}" \
${quiet} ${recurse} -- "${@}"
then
# cd back to keep path coherence
cd "${OLDPWD}"
rm -rf "${tmpd}"
continue
fi
# cd back to free the temp-dir, so we can remove it later
cd "${OLDPWD}"
# Check if the downloaded file is sane, and matches the stored hashes
# for that file
if support/download/check-hash ${quiet} "${hfile}" "${tmpf}" "${output##*/}"; then
rc=0
else
if [ ${?} -ne 3 ]; then
rm -rf "${tmpd}"
continue
fi
# the hash file exists and there was no hash to check the file
# against
rc=1
fi
download_and_check=1
break
done
# We tried every URI possible, none seems to work or to check against the
# available hash. *ABORT MISSION*
if [ "${download_and_check}" -eq 0 ]; then
rm -rf "${tmpd}"
exit 1
fi
# tmp_output is in the same directory as the final output, so we can
# later move it atomically.
tmp_output="$(mktemp "${output}.XXXXXX")"
# 'mktemp' creates files with 'go=-rwx', so the files are not accessible
# to users other than the one doing the download (and root, of course).
# This can be problematic when a shared BR2_DL_DIR is used by different
# users (e.g. on a build server), where all users may write to the shared
# location, since other users would not be allowed to read the files
# another user downloaded.
# So, we restore the 'go' access rights to a more sensible value, while
# still abiding by the current user's umask. We must do that before the
# final 'mv', so just do it now.
# Some backends (cp and scp) may create executable files, so we need to
# carry the executable bit if needed.
[ -x "${tmpf}" ] && new_mode=755 || new_mode=644
new_mode=$(printf "%04o" $((0${new_mode} & ~0$(umask))))
chmod ${new_mode} "${tmp_output}"
# We must *not* unlink tmp_output, otherwise there is a small window
# during which another download process may create the same tmp_output
# name (very, very unlikely; but not impossible.)
# Using 'cp' is not reliable, since 'cp' may unlink the destination file
# if it is unable to open it with O_WRONLY|O_TRUNC; see:
# http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cp.html
# Since the destination filesystem can be anything, it might not support
# O_TRUNC, so 'cp' would unlink it first.
# Use 'cat' and append-redirection '>>' to save to the final location,
# since that is the only way we can be 100% sure of the behaviour.
if ! cat "${tmpf}" >>"${tmp_output}"; then
rm -rf "${tmpd}" "${tmp_output}"
exit 1
fi
rm -rf "${tmpd}"
# tmp_output and output are on the same filesystem, so POSIX guarantees
# that 'mv' is atomic, because it then uses rename() that POSIX mandates
# to be atomic, see:
# http://pubs.opengroup.org/onlinepubs/9699919799/functions/rename.html
if ! mv -f "${tmp_output}" "${output}"; then
rm -f "${tmp_output}"
exit 1
fi
return ${rc}
}
help() {
cat <<_EOF_
NAME
${my_name} - download wrapper for Buildroot
SYNOPSIS
${my_name} [OPTION]... -- [BACKEND OPTION]...
DESCRIPTION
Wrapper script around different download mechanisms. Ensures that
concurrent downloads do not conflict, that partial downloads are
properly evicted without leaving temporary files, and that access
rights are maintained.
-h This help text.
-u URIs
The URI to get the file from, the URI must respect the format given in
the example.
You may give as many '-u URI' as you want, the script will stop at the
frist successful download.
Example: backend+URI; git+http://example.com or http+http://example.com
-o FILE
Store the downloaded archive in FILE.
-H FILE
Use FILE to read hashes from, and check them against the downloaded
archive.
Exit status:
0 if OK
!0 in case of error
ENVIRONMENT
BUILD_DIR
The path to Buildroot's build dir
_EOF_
}
trace() { local msg="${1}"; shift; printf "%s: ${msg}" "${my_name}" "${@}"; }
warn() { trace "${@}" >&2; }
errorN() { local ret="${1}"; shift; warn "${@}"; exit ${ret}; }
error() { errorN 1 "${@}"; }
my_name="${0##*/}"
main "${@}"