#!/usr/bin/env bash # Copyright © Tavian Barnes # SPDX-License-Identifier: 0BSD # Creates a directory tree that matches a git repo, but with empty files. E.g. # # $ ./bench/clone-tree.sh "https://.../linux.git" v6.5 ./linux ./linux.git # # will create or update a shallow clone at ./linux.git, then create a directory # tree at ./linux with the same directory tree as the tag v6.5, except all files # will be empty. set -eu if (($# != 4)); then printf 'Usage: %s https://url/of/repo.git path/to/checkout path/to/repo.git\n' "$0" >&2 exit 1 fi URL="$1" TAG="$2" DIR="$3" REPO="$4" BENCH=$(dirname -- "${BASH_SOURCE[0]}") BIN=$(realpath -- "$BENCH/../bin") BFS="$BIN/bfs" XTOUCH="$BIN/tests/xtouch" if [ "${NPROC-}" ]; then # Use fewer cores in recursive calls export NPROC=$(((NPROC + 1) / 2)) else export NPROC=$(nproc) fi JOBS=$((NPROC < 8 ? NPROC : 8)) do-git() { git -C "$REPO" "$@" } if ! [ -e "$REPO" ]; then mkdir -p -- "$REPO" do-git init -q --bare fi has-ref() { do-git rev-list --quiet -1 --missing=allow-promisor "$1" &>/dev/null } sparse-fetch() { do-git -c fetch.negotiationAlgorithm=noop fetch -q --filter=blob:none --depth=1 --no-tags --no-write-fetch-head --no-auto-gc "$@" } if ! has-ref "$TAG"; then printf 'Fetching %s ...\n' "$TAG" >&2 do-git config remote.origin.url "$URL" if ((${#TAG} >= 40)); then sparse-fetch origin "$TAG" else sparse-fetch origin tag "$TAG" fi fi # Delete a tree in parallel clean() { local d=5 "$BFS" -f "$1" -mindepth $d -maxdepth $d -type d -print0 \ | xargs -0r -n1 -P$JOBS -- "$BFS" -j1 -mindepth 1 -delete -f "$BFS" -f "$1" -delete } if [ -e "$DIR" ]; then printf 'Cleaning old directory tree %s ...\n' "$DIR" >&2 TMP=$(mktemp -dp "$(dirname -- "$DIR")") mv -- "$DIR" "$TMP" clean "$TMP" & fi # List gitlinks (submodule references) in the tree ls-gitlinks() { do-git ls-tree -zr "$TAG" \ | sed -zn 's/.* commit //p' } # Get the submodule ID for a path submodule-for-path() { do-git config --blob "$TAG:.gitmodules" \ --name-only \ --fixed-value \ --get-regexp 'submodule\..**\.path' "$1" \ | sed -En 's/submodule\.(.*)\.path/\1/p' } # Get the URL for a submodule submodule-url() { # - https://chrome-internal.googlesource.com/ # - not publicly accessible # - https://chromium.googlesource.com/external/github.com/WebKit/webkit.git # - is accessible, but the commit (59e9de61b7b3) isn't # - https://android.googlesource.com/ # - is accessible, but you need an account do-git config --blob "$TAG:.gitmodules" \ --get "submodule.$1.url" \ | sed -E \ -e '\|^https://chrome-internal.googlesource.com/|Q1' \ -e '\|^https://chromium.googlesource.com/external/github.com/WebKit/webkit.git|Q1' \ -e '\|^https://android.googlesource.com/|Q1' } # Recursively checkout submodules while read -rd '' SUBREF SUBDIR; do SUBNAME=$(submodule-for-path "$SUBDIR") SUBURL=$(submodule-url "$SUBNAME") || continue if (($(jobs -pr | wc -w) >= JOBS)); then wait -n fi "$0" "$SUBURL" "$SUBREF" "$DIR/$SUBDIR" "$REPO/modules/$SUBNAME" & done < <(ls-gitlinks) # Touch files in parallel xtouch() ( cd "$DIR" if ((JOBS > 1)); then xargs -0r -n4096 -P$JOBS -- "$XTOUCH" -p -- else xargs -0r -- "$XTOUCH" -p -- fi ) # Check out files printf 'Checking out %s ...\n' "$DIR" >&2 mkdir -p -- "$DIR" do-git ls-tree -zr "$TAG"\ | sed -zn 's/.* blob .*\t//p' \ | xtouch # Wait for cleaning/submodules wait