#!/usr/bin/env bash # NOTE: if the output of this backend has to change (e.g. we change what gets # included in the archive (e.g. LFS), or we change the format of the archive # (e.g. tar options, compression ratio or method)), we MUST update the format # version in the variable BR_FMT_VERSION_git, in package/pkg-download.mk. # We want to catch any unexpected failure, and exit immediately set -E # Download helper for git, to be called from the download wrapper script # # Options: # -q Be quiet. # -r Clone and archive sub-modules. # -o FILE Generate archive in FILE. # -u URI Clone from repository at URI. # -c CSET Use changeset CSET. # -n NAME Use basename NAME. # # Environment: # GIT : the git command to call . "${0%/*}/helpers" # Save our path and options in case we need to call ourselves again myname="${0}" declare -a OPTS=("${@}") # This function is called when an error occurs. Its job is to attempt a # clone from scratch (only once!) in case the git tree is borked, or in # case an unexpected and unsupported situation arises with submodules # or uncommitted stuff (e.g. if the user manually mucked around in the # git cache). _on_error() { local ret=${?} printf "Detected a corrupted git cache.\n" >&2 if ${BR_GIT_BACKEND_FIRST_FAULT:-false}; then printf "This is the second time in a row; bailing out\n" >&2 exit ${ret} fi export BR_GIT_BACKEND_FIRST_FAULT=true printf "Removing it and starting afresh.\n" >&2 popd >/dev/null rm -rf "${git_cache}" exec "${myname}" "${OPTS[@]}" || exit ${ret} } quiet= recurse=0 while getopts "${BR_BACKEND_DL_GETOPTS}" OPT; do case "${OPT}" in q) quiet=-q; exec >/dev/null;; r) recurse=1;; o) output="${OPTARG}";; u) uri="${OPTARG}";; c) cset="${OPTARG}";; d) dl_dir="${OPTARG}";; n) basename="${OPTARG}";; :) printf "option '%s' expects a mandatory argument\n" "${OPTARG}"; exit 1;; \?) printf "unknown option '%s'\n" "${OPTARG}" >&2; exit 1;; esac done shift $((OPTIND-1)) # Get rid of our options # Create and cd into the directory that will contain the local git cache git_cache="${dl_dir}/git" mkdir -p "${git_cache}" pushd "${git_cache}" >/dev/null # Any error now should try to recover trap _on_error ERR # Caller needs to single-quote its arguments to prevent them from # being expanded a second time (in case there are spaces in them) _git() { if [ -z "${quiet}" ]; then printf '%s ' GIT_DIR="${git_cache}/.git" ${GIT} "${@}"; printf '\n' fi _plain_git "$@" } # Note: please keep command below aligned with what is printed above _plain_git() { eval GIT_DIR="${git_cache}/.git" ${GIT} "${@}" } # Create a warning file, that the user should not use the git cache. # It's ours. Our precious. cat <<-_EOF_ >"${dl_dir}/git.readme" IMPORTANT NOTE! The git tree located in this directory is for the exclusive use by Buildroot, which uses it as a local cache to reduce bandwidth usage. Buildroot *will* trash any changes in that tree whenever it needs to use it. Buildroot may even remove it in case it detects the repository may have been damaged or corrupted. Do *not* work in that directory; your changes will eventually get lost. Do *not* even use it as a remote, or as the source for new worktrees; your commits will eventually get lost. _EOF_ # Initialise a repository in the git cache. If the repository already # existed, this is a noop, unless the repository was broken, in which # case this magically restores it to working conditions. In the latter # case, we might be missing blobs, but that's not a problem: we'll # fetch what we need later anyway. # # We can still go through the wrapper, because 'init' does not use the # path pointed to by GIT_DIR, but really uses the directory passed as # argument. _git init . # Ensure the repo has an origin (in case a previous run was killed). if ! _plain_git remote |grep -q -E '^origin$'; then _git remote add origin "'${uri}'" fi _git remote set-url origin "'${uri}'" printf "Fetching all references\n" _git fetch origin _git fetch origin -t # Try to get the special refs exposed by some forges (pull-requests for # github, changes for gerrit...). There is no easy way to know whether # the cset the user passed us is such a special ref or a tag or a sha1 # or whatever else. We'll eventually fail at checking out that cset, # below, if there is an issue anyway. Since most of the cset we're gonna # have to clone are not such special refs, consign the output to oblivion # so as not to alarm unsuspecting users, but still trace it as a warning. if ! _git fetch origin "'${cset}:${cset}'" >/dev/null 2>&1; then printf "Could not fetch special ref '%s'; assuming it is not special.\n" "${cset}" fi # Check that the changeset does exist. If it does not, re-cloning from # scratch won't help, so we don't want to trash the repository for a # missing commit. We just exit without going through the ERR trap. if ! _git rev-parse --quiet --verify "'${cset}^{commit}'" >/dev/null 2>&1; then printf "Commit '%s' does not exist in this repository.\n" "${cset}" exit 1 fi # The new cset we want to checkout might have different submodules, or # have sub-dirs converted to/from a submodule. So we would need to # deregister _current_ submodules before we checkout. # # Using "git submodule deinit --all" would remove all the files for # all submodules, including the corresponding .git files or directories. # However, it was only introduced with git-1.8.3, which is too recent # for some enterprise-grade distros. # # So, we fall-back to just removing all submodules directories. We do # not need to be recursive, as removing a submodule will de-facto remove # its own submodules. # # For recent git versions, the repository for submodules is stored # inside the repository of the super repository, so the following will # only remove the working copies of submodules, effectively caching the # submodules. # # For older versions however, the repository is stored in the .git/ of # the submodule directory, so the following will effectively remove the # the working copy as well as the repository, which means submodules # will not be cached for older versions. # cmd='printf "Deregistering submodule \"%s\"\n" "${path}" && cd .. && rm -rf "${path##*/}"' _git submodule --quiet foreach "'${cmd}'" # Checkout the required changeset, so that we can update the required # submodules. _git checkout -f -q "'${cset}'" # Get rid of now-untracked directories (in case a git operation was # interrupted in a previous run, or to get rid of empty directories # that were parents of submodules removed above). _git clean -ffdx # Get date of commit to generate a reproducible archive. # %ci is ISO 8601, so it's fully qualified, with TZ and all. date="$( _plain_git log -1 --pretty=format:%ci )" # There might be submodules, so fetch them. if [ ${recurse} -eq 1 ]; then _git submodule update --init --recursive # Older versions of git will store the absolute path of the git tree # in the .git of submodules, while newer versions just use relative # paths. Detect and fix the older variants to use relative paths, so # that the archives are reproducible across a wider range of git # versions. However, we can't do that if git is too old and uses # full repositories for submodules. cmd='printf "%s\n" "${path}/"' for module_dir in $( _plain_git submodule --quiet foreach "'${cmd}'" ); do [ -f "${module_dir}/.git" ] || continue relative_dir="$( sed -r -e 's,/+,/,g; s,[^/]+/,../,g' <<<"${module_dir}" )" sed -r -i -e "s:^gitdir\: $(pwd)/:gitdir\: "${relative_dir}":" "${module_dir}/.git" done fi popd >/dev/null # Generate the archive. # We do not want the .git dir; we keep other .git files, in case they are the # only files in their directory. # The .git dir would generate non reproducible tarballs as it depends on # the state of the remote server. It also would generate large tarballs # (gigabytes for some linux trees) when a full clone took place. mk_tar_gz "${git_cache}" "${basename}" "${date}" "${output}" ".git/*"