kumquat-buildroot/support/download/dl-wrapper

234 lines
8.8 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env bash
# This script is a wrapper to the other download backends.
# Its role is to ensure atomicity when saving downloaded files
# back to BR2_DL_DIR, and not clutter BR2_DL_DIR with partial,
# failed downloads.
# To avoid cluttering BR2_DL_DIR, we download to a trashable
# location, namely in $(BUILD_DIR).
# Then, we move the downloaded file to a temporary file in the
# same directory as the final output file.
# This allows us to finally atomically rename it to its final
# name.
# If anything goes wrong, we just remove all the temporaries
# created so far.
# We want to catch any unexpected failure, and exit immediately.
set -e
export BR_BACKEND_DL_GETOPTS=":hc:d:o:n:N:H:lru:qf:e"
2018-04-02 10:14:22 +02:00
main() {
local OPT OPTARG
support/download: teach dl-wrapper to handle more than one hash file Currently, we expect and only use hash files that lie within the package directory, alongside the .mk file. Those hash files are thus bundled with Buildroot. This implies that only what's known to Buildroot can ever get into those hash files. For packages where the version is fixed (or a static choice), then we can carry hashes for those known versions. However, we do have a few packages for which the version is a free-form entry, where the user can provide a custom location and/or version. like a custom VCS tree and revision, or a custom tarball URL. This means that Buildroot has no way to be able to cary hashes for such custom versions. This means that there is no integrity check that what was downloaded is what was expected. For a sha1 in a git tree, this is a minor issue, because the sha1 by itself is already a hash of the expected content. But for custom tarballs URLs, or for a tag in a VCS, there is indeed no integrity check. Buildroot can't provide such hashes, but interested users may want to provide those, and currently there is no (easy) way to do so. So, we need our download helpers to be able to accept more than one hash file to lookup for hashes. Extend the dl-wrapper and the check-hash helpers thusly, and update the legal-info accordingly. Note that, to be able to pass more than one hash file, we also need to re-order the arguments passed to support/download/check-hash, which also impies some shuffling in the three places it is called: - 2 in dl-wrapper - 1 in the legal-info infra That in turn also requires that the legal-license-file macro args get re-ordered to have the hash file last; we take the opportunity to also move the HOST/TARGET arg to be first, like in the other legal-info macros. Reported-by: "Martin Zeiser (mzeiser)" <mzeiser@cisco.com> Signed-off-by: Yann E. MORIN <yann.morin.1998@free.fr> Cc: Peter Korsgaard <peter@korsgaard.com> Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
2023-11-06 20:09:12 +01:00
local backend output large_file recurse quiet rc
local -a uris hfiles
# Parse our options; anything after '--' is for the backend
while getopts ":c:d:D:o:n:N:H:lrf:u:qp:" OPT; do
case "${OPT}" in
c) cset="${OPTARG}";;
d) dl_dir="${OPTARG}";;
D) old_dl_dir="${OPTARG}";;
o) output="${OPTARG}";;
n) raw_base_name="${OPTARG}";;
N) base_name="${OPTARG}";;
support/download: teach dl-wrapper to handle more than one hash file Currently, we expect and only use hash files that lie within the package directory, alongside the .mk file. Those hash files are thus bundled with Buildroot. This implies that only what's known to Buildroot can ever get into those hash files. For packages where the version is fixed (or a static choice), then we can carry hashes for those known versions. However, we do have a few packages for which the version is a free-form entry, where the user can provide a custom location and/or version. like a custom VCS tree and revision, or a custom tarball URL. This means that Buildroot has no way to be able to cary hashes for such custom versions. This means that there is no integrity check that what was downloaded is what was expected. For a sha1 in a git tree, this is a minor issue, because the sha1 by itself is already a hash of the expected content. But for custom tarballs URLs, or for a tag in a VCS, there is indeed no integrity check. Buildroot can't provide such hashes, but interested users may want to provide those, and currently there is no (easy) way to do so. So, we need our download helpers to be able to accept more than one hash file to lookup for hashes. Extend the dl-wrapper and the check-hash helpers thusly, and update the legal-info accordingly. Note that, to be able to pass more than one hash file, we also need to re-order the arguments passed to support/download/check-hash, which also impies some shuffling in the three places it is called: - 2 in dl-wrapper - 1 in the legal-info infra That in turn also requires that the legal-license-file macro args get re-ordered to have the hash file last; we take the opportunity to also move the HOST/TARGET arg to be first, like in the other legal-info macros. Reported-by: "Martin Zeiser (mzeiser)" <mzeiser@cisco.com> Signed-off-by: Yann E. MORIN <yann.morin.1998@free.fr> Cc: Peter Korsgaard <peter@korsgaard.com> Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
2023-11-06 20:09:12 +01:00
H) hfiles+=( "${OPTARG}" );;
l) large_file="-l";;
r) recurse="-r";;
f) filename="${OPTARG}";;
u) uris+=( "${OPTARG}" );;
p) post_process="${OPTARG}";;
q) quiet="-q";;
:) error "option '%s' expects a mandatory argument\n" "${OPTARG}";;
\?) error "unknown option '%s'\n" "${OPTARG}";;
esac
done
# Forget our options, and keep only those for the backend
shift $((OPTIND-1))
if [ -z "${output}" ]; then
error "no output specified, use -o\n"
fi
# Legacy handling: check if the file already exists in the global
# download directory. If it does, hard-link it. If it turns out it
# was an incorrect download, we'd still check it below anyway.
# If we can neither link nor copy, fallback to doing a download.
# NOTE! This is not atomic, is subject to TOCTTOU, but the whole
# dl-wrapper runs under an flock, so we're safe.
if [ ! -e "${output}" -a -e "${old_dl_dir}/${filename}" ]; then
ln "${old_dl_dir}/${filename}" "${output}" || \
cp "${old_dl_dir}/${filename}" "${output}" || \
true
fi
# If the output file already exists and:
# - there's no .hash file: do not download it again and exit promptly
# - matches all its hashes: do not download it again and exit promptly
# - fails at least one of its hashes: force a re-download
# - there's no hash (but a .hash file): consider it a hard error
if [ -e "${output}" ]; then
support/download: teach dl-wrapper to handle more than one hash file Currently, we expect and only use hash files that lie within the package directory, alongside the .mk file. Those hash files are thus bundled with Buildroot. This implies that only what's known to Buildroot can ever get into those hash files. For packages where the version is fixed (or a static choice), then we can carry hashes for those known versions. However, we do have a few packages for which the version is a free-form entry, where the user can provide a custom location and/or version. like a custom VCS tree and revision, or a custom tarball URL. This means that Buildroot has no way to be able to cary hashes for such custom versions. This means that there is no integrity check that what was downloaded is what was expected. For a sha1 in a git tree, this is a minor issue, because the sha1 by itself is already a hash of the expected content. But for custom tarballs URLs, or for a tag in a VCS, there is indeed no integrity check. Buildroot can't provide such hashes, but interested users may want to provide those, and currently there is no (easy) way to do so. So, we need our download helpers to be able to accept more than one hash file to lookup for hashes. Extend the dl-wrapper and the check-hash helpers thusly, and update the legal-info accordingly. Note that, to be able to pass more than one hash file, we also need to re-order the arguments passed to support/download/check-hash, which also impies some shuffling in the three places it is called: - 2 in dl-wrapper - 1 in the legal-info infra That in turn also requires that the legal-license-file macro args get re-ordered to have the hash file last; we take the opportunity to also move the HOST/TARGET arg to be first, like in the other legal-info macros. Reported-by: "Martin Zeiser (mzeiser)" <mzeiser@cisco.com> Signed-off-by: Yann E. MORIN <yann.morin.1998@free.fr> Cc: Peter Korsgaard <peter@korsgaard.com> Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
2023-11-06 20:09:12 +01:00
if support/download/check-hash ${quiet} "${output}" "${output##*/}" "${hfiles[@]}"; then
pkg-download: check hashes for locally cached files In some cases, upstream just update their releases in-place, without renaming them. When that package is updated in Buildroot, a new hash to match the new upstream release is included in the corresponding .hash file. As a consequence, users who previously downloaded that package's tarball with an older version of Buildroot, will get stuck with an old archive for that package, and after updating their Buildroot copy, will be greeted with a failed download, due to the local file not matching the new hashes. Also, an upstream would sometime serve us HTML garbage instead of the actual tarball we requested, like SourceForge does from time for as-yet unknown reasons. So, to avoid this situation, check the hashes prior to doing the download. If the hashes match, consider the locally cached file genuine, and do not download it. However, if the locally cached file does not match the known hashes we have for it, it is promptly removed, and a download is re-attempted. Note: this does not add any overhead compared to the previous situation, because we were already checking hashes of locally cached files. It just changes the order in which we do the checks. For the records, here is the overhead of hashing a 231MiB file (qt-everywhere-opensource-src-4.8.6.tar.gz) on a core-i5 @2.5GHz: cache-cold cache-hot sha1 1.914s 0.762s sha256 2.109s 1.270s But again, this overhead already existed before this patch. Signed-off-by: "Yann E. MORIN" <yann.morin.1998@free.fr> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Peter Korsgaard <jacmet@uclibc.org> Cc: Gustavo Zacarias <gustavo@zacarias.com.ar> Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
2014-12-11 23:52:08 +01:00
exit 0
elif [ ${?} -ne 2 ]; then
# Do not remove the file, otherwise it might get re-downloaded
# from a later location (i.e. primary -> upstream -> mirror).
# Do not print a message, check-hash already did.
exit 1
pkg-download: check hashes for locally cached files In some cases, upstream just update their releases in-place, without renaming them. When that package is updated in Buildroot, a new hash to match the new upstream release is included in the corresponding .hash file. As a consequence, users who previously downloaded that package's tarball with an older version of Buildroot, will get stuck with an old archive for that package, and after updating their Buildroot copy, will be greeted with a failed download, due to the local file not matching the new hashes. Also, an upstream would sometime serve us HTML garbage instead of the actual tarball we requested, like SourceForge does from time for as-yet unknown reasons. So, to avoid this situation, check the hashes prior to doing the download. If the hashes match, consider the locally cached file genuine, and do not download it. However, if the locally cached file does not match the known hashes we have for it, it is promptly removed, and a download is re-attempted. Note: this does not add any overhead compared to the previous situation, because we were already checking hashes of locally cached files. It just changes the order in which we do the checks. For the records, here is the overhead of hashing a 231MiB file (qt-everywhere-opensource-src-4.8.6.tar.gz) on a core-i5 @2.5GHz: cache-cold cache-hot sha1 1.914s 0.762s sha256 2.109s 1.270s But again, this overhead already existed before this patch. Signed-off-by: "Yann E. MORIN" <yann.morin.1998@free.fr> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Peter Korsgaard <jacmet@uclibc.org> Cc: Gustavo Zacarias <gustavo@zacarias.com.ar> Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
2014-12-11 23:52:08 +01:00
fi
rm -f "${output}"
warn "Re-downloading '%s'...\n" "${output##*/}"
fi
# Look through all the uris that we were given to download the package
# source
download_and_check=0
rc=1
for uri in "${uris[@]}"; do
download: fix file:// BR2_PRIMARY_SITE (download cache) wget is the only downloader currently usable with BR2_PRIMARY_SITE, and that doesn't work at all for file:// URLs. The symptoms are these: support/download/dl-wrapper -c '2.4.47' -d '/PATH/build/sw/source/attr' -D '/PATH/build/sw/source' -f 'attr-2.4.47.src.tar.gz' -H 'package/attr//attr.hash' -n 'attr-2.4.47' -N 'attr' -o '/PATH/build/sw/source/attr/attr-2.4.47.src.tar.gz' -u file\|urlencode+file:///NFS/buildroot_dl_cache/attr -u file\|urlencode+file:///NFS/buildroot_dl_cache -u http+http://download.savannah.gnu.org/releases/attr -u http\|urlencode+http://sources.buildroot.net/attr -u http\|urlencode+http://sources.buildroot.net -- file:///NFS/buildroot_dl_cache/attr/attr-2.4.47.src.tar.gz: Unsupported scheme `file'. ERROR: attr-2.4.47.src.tar.gz has wrong sha256 hash: ERROR: expected: 25772f653ac5b2e3ceeb89df50e4688891e21f723c460636548971652af0a859 ERROR: got : e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ERROR: Incomplete download, or man-in-the-middle (MITM) attack In the case of custom Linux kernel versions, this is fatal, because there isn't necessarily a hash file to indicate that wget's empty tarball is wrong. This seems to have been broken by commit c8ef0c03b0b, because: 1. BR2_PRIMARY_SITE always appends "urlencode" (package/pkg-download.mk) 2. Anything with the "|urlencode" suffix in $uri will end up using wget due to the backend case wildcarding. 3. The wget backend rejects file:/// URLs ("unsupported scheme"), and we end up with an empty .tar.gz file in the downloads directory. Fix that by shell-extracting the backend name from the left of "|". I'm not positive if all URLs will have a "|", so this code only looks for a "|" left of the "+". Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com> Reviewed-by: "Yann E. MORIN" <yann.morin.1998@free.fr> Tested-by: "Yann E. MORIN" <yann.morin.1998@free.fr> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
2018-08-16 01:44:34 +02:00
backend_urlencode="${uri%%+*}"
backend="${backend_urlencode%|*}"
case "${backend}" in
git|svn|cvs|bzr|file|scp|hg|sftp) ;;
*) backend="wget" ;;
esac
uri=${uri#*+}
urlencode=${backend_urlencode#*|}
# urlencode must be "urlencode"
[ "${urlencode}" != "urlencode" ] && urlencode=""
# tmpd is a temporary directory in which backends may store
# intermediate by-products of the download.
# tmpf is the file in which the backends should put the downloaded
# content.
# tmpd is located in $(BUILD_DIR), so as not to clutter the (precious)
# $(BR2_DL_DIR)
# We let the backends create tmpf, so they are able to set whatever
# permission bits they want (although we're only really interested in
# the executable bit.)
tmpd="$(mktemp -d "${BUILD_DIR}/.${output##*/}.XXXXXX")"
tmpf="${tmpd}/output"
# Helpers expect to run in a directory that is *really* trashable, so
# they are free to create whatever files and/or sub-dirs they might need.
# Doing the 'cd' here rather than in all backends is easier.
cd "${tmpd}"
# If the backend fails, we can just remove the content of the temporary
# directory to remove all the cruft it may have left behind, and try
# the next URI until it succeeds. Once out of URI to try, we need to
# cleanup and exit.
if ! "${OLDPWD}/support/download/${backend}" \
$([ -n "${urlencode}" ] && printf %s '-e') \
-c "${cset}" \
-d "${dl_dir}" \
-n "${raw_base_name}" \
-N "${base_name}" \
-f "${filename}" \
-u "${uri}" \
-o "${tmpf}" \
${quiet} ${large_file} ${recurse} -- "${@}"
then
# cd back to keep path coherence
cd "${OLDPWD}"
rm -rf "${tmpd}"
continue
fi
if [ -n "${post_process}" ] ; then
if ! "${OLDPWD}/support/download/${post_process}-post-process" \
-o "${tmpf}" \
-n "${raw_base_name}"
then
# cd back to keep path coherence
cd "${OLDPWD}"
rm -rf "${tmpd}"
continue
fi
fi
# cd back to free the temp-dir, so we can remove it later
cd "${OLDPWD}"
# Check if the downloaded file is sane, and matches the stored hashes
# for that file
support/download: teach dl-wrapper to handle more than one hash file Currently, we expect and only use hash files that lie within the package directory, alongside the .mk file. Those hash files are thus bundled with Buildroot. This implies that only what's known to Buildroot can ever get into those hash files. For packages where the version is fixed (or a static choice), then we can carry hashes for those known versions. However, we do have a few packages for which the version is a free-form entry, where the user can provide a custom location and/or version. like a custom VCS tree and revision, or a custom tarball URL. This means that Buildroot has no way to be able to cary hashes for such custom versions. This means that there is no integrity check that what was downloaded is what was expected. For a sha1 in a git tree, this is a minor issue, because the sha1 by itself is already a hash of the expected content. But for custom tarballs URLs, or for a tag in a VCS, there is indeed no integrity check. Buildroot can't provide such hashes, but interested users may want to provide those, and currently there is no (easy) way to do so. So, we need our download helpers to be able to accept more than one hash file to lookup for hashes. Extend the dl-wrapper and the check-hash helpers thusly, and update the legal-info accordingly. Note that, to be able to pass more than one hash file, we also need to re-order the arguments passed to support/download/check-hash, which also impies some shuffling in the three places it is called: - 2 in dl-wrapper - 1 in the legal-info infra That in turn also requires that the legal-license-file macro args get re-ordered to have the hash file last; we take the opportunity to also move the HOST/TARGET arg to be first, like in the other legal-info macros. Reported-by: "Martin Zeiser (mzeiser)" <mzeiser@cisco.com> Signed-off-by: Yann E. MORIN <yann.morin.1998@free.fr> Cc: Peter Korsgaard <peter@korsgaard.com> Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
2023-11-06 20:09:12 +01:00
if support/download/check-hash ${quiet} "${tmpf}" "${output##*/}" "${hfiles[@]}"; then
rc=0
else
if [ ${?} -ne 3 ]; then
rm -rf "${tmpd}"
continue
fi
# the hash file exists and there was no hash to check the file
# against
rc=1
fi
download_and_check=1
break
done
# We tried every URI possible, none seems to work or to check against the
# available hash. *ABORT MISSION*
if [ "${download_and_check}" -eq 0 ]; then
rm -rf "${tmpd}"
exit 1
fi
# tmp_output is in the same directory as the final output, so we can
# later move it atomically.
tmp_output="$(mktemp "${output}.XXXXXX")"
# 'mktemp' creates files with 'go=-rwx', so the files are not accessible
# to users other than the one doing the download (and root, of course).
# This can be problematic when a shared BR2_DL_DIR is used by different
# users (e.g. on a build server), where all users may write to the shared
# location, since other users would not be allowed to read the files
# another user downloaded.
# So, we restore the 'go' access rights to a more sensible value, while
# still abiding by the current user's umask. We must do that before the
# final 'mv', so just do it now.
# Some backends (cp and scp) may create executable files, so we need to
# carry the executable bit if needed.
[ -x "${tmpf}" ] && new_mode=755 || new_mode=644
new_mode=$(printf "%04o" $((0${new_mode} & ~0$(umask))))
chmod ${new_mode} "${tmp_output}"
# We must *not* unlink tmp_output, otherwise there is a small window
# during which another download process may create the same tmp_output
# name (very, very unlikely; but not impossible.)
# Using 'cp' is not reliable, since 'cp' may unlink the destination file
# if it is unable to open it with O_WRONLY|O_TRUNC; see:
# http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cp.html
# Since the destination filesystem can be anything, it might not support
# O_TRUNC, so 'cp' would unlink it first.
# Use 'cat' and append-redirection '>>' to save to the final location,
# since that is the only way we can be 100% sure of the behaviour.
if ! cat "${tmpf}" >>"${tmp_output}"; then
rm -rf "${tmpd}" "${tmp_output}"
exit 1
fi
rm -rf "${tmpd}"
# tmp_output and output are on the same filesystem, so POSIX guarantees
# that 'mv' is atomic, because it then uses rename() that POSIX mandates
# to be atomic, see:
# http://pubs.opengroup.org/onlinepubs/9699919799/functions/rename.html
if ! mv -f "${tmp_output}" "${output}"; then
rm -f "${tmp_output}"
exit 1
fi
return ${rc}
}
trace() { local msg="${1}"; shift; printf "%s: ${msg}" "${my_name}" "${@}"; }
warn() { trace "${@}" >&2; }
errorN() { local ret="${1}"; shift; warn "${@}"; exit ${ret}; }
error() { errorN 1 "${@}"; }
my_name="${0##*/}"
main "${@}"