summaryrefslogtreecommitdiffstats
path: root/bench/clone-tree.sh
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2024-05-07 15:42:46 -0400
committerTavian Barnes <tavianator@tavianator.com>2024-05-07 15:42:46 -0400
commit452d6697e0f92326ab139eed4eadd9c2fd8b55ca (patch)
tree0feeb3722dcf6debb6c33c5175342bf1d70a1dba /bench/clone-tree.sh
parenta4299f9bc1d3e60a7e628561e8d650c2a241e1c2 (diff)
parentc5cf2cf90834f2f56b2940d2a499a1a614ebfd21 (diff)
downloadbfs-452d6697e0f92326ab139eed4eadd9c2fd8b55ca.tar.xz
Merge branch 'main' into find2fdfind2fd
Diffstat (limited to 'bench/clone-tree.sh')
-rwxr-xr-xbench/clone-tree.sh143
1 files changed, 143 insertions, 0 deletions
diff --git a/bench/clone-tree.sh b/bench/clone-tree.sh
new file mode 100755
index 0000000..744b5f4
--- /dev/null
+++ b/bench/clone-tree.sh
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+
+# Copyright © Tavian Barnes <tavianator@tavianator.com>
+# SPDX-License-Identifier: 0BSD
+
+# Creates a directory tree that matches a git repo, but with empty files. E.g.
+#
+# $ ./bench/clone-tree.sh "https://.../linux.git" v6.5 ./linux ./linux.git
+#
+# will create or update a shallow clone at ./linux.git, then create a directory
+# tree at ./linux with the same directory tree as the tag v6.5, except all files
+# will be empty.
+
+set -eu
+
+if (($# != 4)); then
+ printf 'Usage: %s https://url/of/repo.git <TAG> path/to/checkout path/to/repo.git\n' "$0" >&2
+ exit 1
+fi
+
+URL="$1"
+TAG="$2"
+DIR="$3"
+REPO="$4"
+
+BENCH=$(dirname -- "${BASH_SOURCE[0]}")
+BIN=$(realpath -- "$BENCH/../bin")
+BFS="$BIN/bfs"
+XTOUCH="$BIN/tests/xtouch"
+
+if [ "${NPROC-}" ]; then
+ # Use fewer cores in recursive calls
+ export NPROC=$(((NPROC + 1) / 2))
+else
+ export NPROC=$(nproc)
+fi
+
+JOBS=$((NPROC < 8 ? NPROC : 8))
+
+do-git() {
+ git -C "$REPO" "$@"
+}
+
+if ! [ -e "$REPO" ]; then
+ mkdir -p -- "$REPO"
+ do-git init -q --bare
+fi
+
+has-ref() {
+ do-git rev-list --quiet -1 --missing=allow-promisor "$1" &>/dev/null
+}
+
+sparse-fetch() {
+ do-git -c fetch.negotiationAlgorithm=noop fetch -q --filter=blob:none --depth=1 --no-tags --no-write-fetch-head --no-auto-gc "$@"
+}
+
+if ! has-ref "$TAG"; then
+ printf 'Fetching %s ...\n' "$TAG" >&2
+ do-git config remote.origin.url "$URL"
+ if ((${#TAG} >= 40)); then
+ sparse-fetch origin "$TAG"
+ else
+ sparse-fetch origin tag "$TAG"
+ fi
+fi
+
+# Delete a tree in parallel
+clean() {
+ local d=5
+ "$BFS" -f "$1" -mindepth $d -maxdepth $d -type d -print0 \
+ | xargs -0r -n1 -P$JOBS -- "$BFS" -j1 -mindepth 1 -delete -f
+ "$BFS" -f "$1" -delete
+}
+
+if [ -e "$DIR" ]; then
+ printf 'Cleaning old directory tree %s ...\n' "$DIR" >&2
+ TMP=$(mktemp -dp "$(dirname -- "$DIR")")
+ mv -- "$DIR" "$TMP"
+ clean "$TMP" &
+fi
+
+# List gitlinks (submodule references) in the tree
+ls-gitlinks() {
+ do-git ls-tree -zr "$TAG" \
+ | sed -zn 's/.* commit //p'
+}
+
+# Get the submodule ID for a path
+submodule-for-path() {
+ do-git config --blob "$TAG:.gitmodules" \
+ --name-only \
+ --fixed-value \
+ --get-regexp 'submodule\..**\.path' "$1" \
+ | sed -En 's/submodule\.(.*)\.path/\1/p'
+}
+
+# Get the URL for a submodule
+submodule-url() {
+ # - https://chrome-internal.googlesource.com/
+ # - not publicly accessible
+ # - https://chromium.googlesource.com/external/github.com/WebKit/webkit.git
+ # - is accessible, but the commit (59e9de61b7b3) isn't
+ # - https://android.googlesource.com/
+ # - is accessible, but you need an account
+
+ do-git config --blob "$TAG:.gitmodules" \
+ --get "submodule.$1.url" \
+ | sed -E \
+ -e '\|^https://chrome-internal.googlesource.com/|Q1' \
+ -e '\|^https://chromium.googlesource.com/external/github.com/WebKit/webkit.git|Q1' \
+ -e '\|^https://android.googlesource.com/|Q1'
+}
+
+# Recursively checkout submodules
+while read -rd '' SUBREF SUBDIR; do
+ SUBNAME=$(submodule-for-path "$SUBDIR")
+ SUBURL=$(submodule-url "$SUBNAME") || continue
+
+ if (($(jobs -pr | wc -w) >= JOBS)); then
+ wait -n
+ fi
+ "$0" "$SUBURL" "$SUBREF" "$DIR/$SUBDIR" "$REPO/modules/$SUBNAME" &
+done < <(ls-gitlinks)
+
+# Touch files in parallel
+xtouch() (
+ cd "$DIR"
+ if ((JOBS > 1)); then
+ xargs -0r -n4096 -P$JOBS -- "$XTOUCH" -p --
+ else
+ xargs -0r -- "$XTOUCH" -p --
+ fi
+)
+
+# Check out files
+printf 'Checking out %s ...\n' "$DIR" >&2
+mkdir -p -- "$DIR"
+do-git ls-tree -zr "$TAG"\
+ | sed -zn 's/.* blob .*\t//p' \
+ | xtouch
+
+# Wait for cleaning/submodules
+wait