summaryrefslogtreecommitdiffstats
path: root/bench
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2024-05-07 15:42:46 -0400
committerTavian Barnes <tavianator@tavianator.com>2024-05-07 15:42:46 -0400
commit452d6697e0f92326ab139eed4eadd9c2fd8b55ca (patch)
tree0feeb3722dcf6debb6c33c5175342bf1d70a1dba /bench
parenta4299f9bc1d3e60a7e628561e8d650c2a241e1c2 (diff)
parentc5cf2cf90834f2f56b2940d2a499a1a614ebfd21 (diff)
downloadbfs-find2fd.tar.xz
Merge branch 'main' into find2fdfind2fd
Diffstat (limited to 'bench')
-rw-r--r--bench/.gitignore3
-rw-r--r--bench/README.md51
-rw-r--r--bench/bench.sh715
-rwxr-xr-xbench/clone-tree.sh143
4 files changed, 912 insertions, 0 deletions
diff --git a/bench/.gitignore b/bench/.gitignore
new file mode 100644
index 0000000..170d850
--- /dev/null
+++ b/bench/.gitignore
@@ -0,0 +1,3 @@
+/corpus/
+/results/
+/worktree/
diff --git a/bench/README.md b/bench/README.md
new file mode 100644
index 0000000..56157a0
--- /dev/null
+++ b/bench/README.md
@@ -0,0 +1,51 @@
+This directory contains a suite of benchmarks used to evaluate `bfs` and detect performance regressions.
+To run them, you'll need the [tailfin] benchmark harness.
+You can read the full usage information with
+
+[tailfin]: https://github.com/tavianator/tailfin
+
+```console
+$ tailfin -n run bench/bench.sh --help
+Usage: tailfin run bench/bench.sh [--default]
+ [--complete] [--early-quit] [--print] [--strategies]
+ [--build=...] [--bfs] [--find] [--fd]
+ [--no-clean] [--help]
+...
+```
+
+The benchmarks use various git repositories to have a realistic and reproducible directory structure as a corpus.
+Currently, those are the [Linux], [Rust], and [Chromium] repos.
+The scripts will automatically clone those repos using [partial clone] filters to avoid downloading the actual file contents, saving bandwidth and space.
+
+[Linux]: https://github.com/torvalds/linux.git
+[Rust]: https://github.com/rust-lang/rust.git
+[Chromium]: https://chromium.googlesource.com/chromium/src.git
+[partial clone]: https://git-scm.com/docs/partial-clone
+
+You can try out a quick benchmark by running
+
+```console
+$ tailfin run bench/bench.sh --build=main --complete=linux
+```
+
+This will build the `main` branch, and measure the complete traversal of the Linux repo.
+Results will be both printed to the console and saved in a Markdown file, which you can find by running
+
+```console
+$ tailfin latest
+results/2023/09/29/15:32:49
+$ cat results/2023/09/29/15:32:49/runs/1/bench.md
+## Complete traversal
+...
+```
+
+To measure performance improvements/regressions of a change, compare the `main` branch to the topic branch on the full benchmark suite:
+
+```console
+$ tailfin run bench/bench.sh --build=main --build=branch --default
+```
+
+This will take a few minutes.
+Results from the full benchmark suite can be seen in performance-related pull requests, for example [#126].
+
+[#126]: https://github.com/tavianator/bfs/pull/126
diff --git a/bench/bench.sh b/bench/bench.sh
new file mode 100644
index 0000000..cf1ae49
--- /dev/null
+++ b/bench/bench.sh
@@ -0,0 +1,715 @@
+#!/hint/bash
+
+# Copyright © Tavian Barnes <tavianator@tavianator.com>
+# SPDX-License-Identifier: 0BSD
+
+declare -gA URLS=(
+ [chromium]="https://chromium.googlesource.com/chromium/src.git"
+ [linux]="https://github.com/torvalds/linux.git"
+ [rust]="https://github.com/rust-lang/rust.git"
+)
+
+declare -gA TAGS=(
+ [chromium]=119.0.6036.2
+ [linux]=v6.5
+ [rust]=1.72.1
+)
+
+COMPLETE_DEFAULT=(linux rust chromium)
+EARLY_QUIT_DEFAULT=(chromium)
+STAT_DEFAULT=(rust)
+PRINT_DEFAULT=(linux)
+STRATEGIES_DEFAULT=(rust)
+JOBS_DEFAULT=(rust)
+EXEC_DEFAULT=(linux)
+
+usage() {
+ printf 'Usage: tailfin run %s\n' "${BASH_SOURCE[0]}"
+ printf ' [--default] [--<BENCHMARK> [--<BENCHMARK>...]]\n'
+ printf ' [--build=...] [--bfs] [--find] [--fd]\n'
+ printf ' [--no-clean] [--help]\n\n'
+
+ printf ' --default\n'
+ printf ' Run the default set of benchmarks\n\n'
+
+ printf ' --complete[=CORPUS]\n'
+ printf ' Complete traversal benchmark.\n'
+ printf ' Default corpus is --complete="%s"\n\n' "${COMPLETE_DEFAULT[*]}"
+
+ printf ' --early-quit[=CORPUS]\n'
+ printf ' Early quitting benchmark.\n'
+ printf ' Default corpus is --early-quit=%s\n\n' "${EARLY_QUIT_DEFAULT[*]}"
+
+ printf ' --stat[=CORPUS]\n'
+ printf ' Traversal with stat().\n'
+ printf ' Default corpus is --stat=%s\n\n' "${STAT_DEFAULT[*]}"
+
+ printf ' --print[=CORPUS]\n'
+ printf ' Path printing benchmark.\n'
+ printf ' Default corpus is --print=%s\n\n' "${PRINT_DEFAULT[*]}"
+
+ printf ' --strategies[=CORPUS]\n'
+ printf ' Search strategy benchmark.\n'
+ printf ' Default corpus is --strategies=%s\n\n' "${STRATEGIES_DEFAULT[*]}"
+
+ printf ' --jobs[=CORPUS]\n'
+ printf ' Parallelism benchmark.\n'
+ printf ' Default corpus is --jobs=%s\n\n' "${JOBS_DEFAULT[*]}"
+
+ printf ' --exec[=CORPUS]\n'
+ printf ' Process spawning benchmark.\n'
+ printf ' Default corpus is --exec=%s\n\n' "${EXEC_DEFAULT[*]}"
+
+ printf ' --build=COMMIT\n'
+ printf ' Build this bfs commit and benchmark it. Specify multiple times to\n'
+ printf ' compare, e.g. --build=3.0.1 --build=3.0.2\n\n'
+
+ printf ' --bfs[=COMMAND]\n'
+ printf ' Benchmark an existing build of bfs\n\n'
+
+ printf ' --find[=COMMAND]\n'
+ printf ' Compare against find\n\n'
+
+ printf ' --fd[=COMMAND]\n'
+ printf ' Compare against fd\n\n'
+
+ printf ' --no-clean\n'
+ printf ' Use any existing corpora as-is\n\n'
+
+ printf ' --help\n'
+ printf ' This message\n\n'
+}
+
+# Hack to export an array
+export_array() {
+ local str=$(declare -p "$1" | sed 's/ -a / -ga /')
+ unset "$1"
+ export "$1=$str"
+}
+
+# Hack to import an array
+import_array() {
+ local cmd="${!1}"
+ unset "$1"
+ eval "$cmd"
+}
+
+# Set up the benchmarks
+setup() {
+ ROOT=$(realpath -- "$(dirname -- "${BASH_SOURCE[0]}")/..")
+ if ! [ "$PWD" -ef "$ROOT" ]; then
+ printf 'error: Please run this script from %s\n\n' "$ROOT" >&2
+ usage >&2
+ exit $EX_USAGE
+ fi
+
+ nproc=$(nproc)
+
+ # Options
+
+ CLEAN=1
+
+ BUILD=()
+ BFS=()
+ FIND=()
+ FD=()
+
+ COMPLETE=()
+ EARLY_QUIT=()
+ STAT=()
+ PRINT=()
+ STRATEGIES=()
+ JOBS=()
+ EXEC=()
+
+ for arg; do
+ case "$arg" in
+ # Flags
+ --no-clean)
+ CLEAN=0
+ ;;
+ # bfs commits/tags to benchmark
+ --build=*)
+ BUILD+=("${arg#*=}")
+ BFS+=("bfs-${arg#*=}")
+ ;;
+ # Utilities to benchmark against
+ --bfs)
+ BFS+=(bfs)
+ ;;
+ --bfs=*)
+ BFS+=("${arg#*=}")
+ ;;
+ --find)
+ FIND+=(find)
+ ;;
+ --find=*)
+ FIND+=("${arg#*=}")
+ ;;
+ --fd)
+ FD+=(fd)
+ ;;
+ --fd=*)
+ FD+=("${arg#*=}")
+ ;;
+ # Benchmark groups
+ --complete)
+ COMPLETE=("${COMPLETE_DEFAULT[@]}")
+ ;;
+ --complete=*)
+ read -ra COMPLETE <<<"${arg#*=}"
+ ;;
+ --early-quit)
+ EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}")
+ ;;
+ --early-quit=*)
+ read -ra EARLY_QUIT <<<"${arg#*=}"
+ ;;
+ --stat)
+ STAT=("${STAT_DEFAULT[@]}")
+ ;;
+ --stat=*)
+ read -ra STAT <<<"${arg#*=}"
+ ;;
+ --print)
+ PRINT=("${PRINT_DEFAULT[@]}")
+ ;;
+ --print=*)
+ read -ra PRINT <<<"${arg#*=}"
+ ;;
+ --strategies)
+ STRATEGIES=("${STRATEGIES_DEFAULT[@]}")
+ ;;
+ --strategies=*)
+ read -ra STRATEGIES <<<"${arg#*=}"
+ ;;
+ --jobs)
+ JOBS=("${JOBS_DEFAULT[@]}")
+ ;;
+ --jobs=*)
+ read -ra JOBS <<<"${arg#*=}"
+ ;;
+ --exec)
+ EXEC=("${EXEC_DEFAULT[@]}")
+ ;;
+ --exec=*)
+ read -ra EXEC <<<"${arg#*=}"
+ ;;
+ --default)
+ COMPLETE=("${COMPLETE_DEFAULT[@]}")
+ EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}")
+ STAT=("${STAT_DEFAULT[@]}")
+ PRINT=("${PRINT_DEFAULT[@]}")
+ STRATEGIES=("${STRATEGIES_DEFAULT[@]}")
+ JOBS=("${JOBS_DEFAULT[@]}")
+ EXEC=("${EXEC_DEFAULT[@]}")
+ ;;
+ --help)
+ usage
+ exit
+ ;;
+ *)
+ printf 'error: Unknown option %q\n\n' "$arg" >&2
+ usage >&2
+ exit $EX_USAGE
+ ;;
+ esac
+ done
+
+ if ((UID == 0)); then
+ max-freq
+ fi
+
+ echo "Building bfs ..."
+ as-user ./configure --enable-release
+ as-user make -s -j"$nproc" all
+
+ as-user mkdir -p bench/corpus
+
+ declare -A cloned=()
+ for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${STAT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}" "${JOBS[@]}" "${EXEC[@]}"; do
+ if ((cloned["$corpus"])); then
+ continue
+ fi
+ cloned["$corpus"]=1
+
+ dir="bench/corpus/$corpus"
+ if ((CLEAN)) || ! [ -e "$dir" ]; then
+ as-user ./bench/clone-tree.sh "${URLS[$corpus]}" "${TAGS[$corpus]}" "$dir"{,.git}
+ fi
+ done
+
+ if ((${#BUILD[@]} > 0)); then
+ echo "Creating bfs worktree ..."
+
+ worktree="bench/worktree"
+ as-user git worktree add -qd "$worktree"
+ defer as-user git worktree remove "$worktree"
+
+ bin="$(realpath -- "$SETUP_DIR")/bin"
+ as-user mkdir "$bin"
+
+ for commit in "${BUILD[@]}"; do
+ (
+ echo "Building bfs $commit ..."
+ cd "$worktree"
+ as-user git checkout -qd "$commit" --
+ if [ -e configure ]; then
+ as-user ./configure --enable-release
+ as-user make -s -j"$nproc"
+ else
+ as-user make -s -j"$nproc" release
+ fi
+ if [ -e ./bin/bfs ]; then
+ as-user cp ./bin/bfs "$bin/bfs-$commit"
+ else
+ as-user cp ./bfs "$bin/bfs-$commit"
+ fi
+ as-user make -s clean
+ )
+ done
+
+ # $SETUP_DIR contains `:` so it won't work in $PATH
+ # Work around this with a symlink
+ tmp=$(as-user mktemp)
+ as-user ln -sf "$bin" "$tmp"
+ defer rm "$tmp"
+ export PATH="$tmp:$PATH"
+ fi
+
+ export_array BFS
+ export_array FIND
+ export_array FD
+
+ export_array COMPLETE
+ export_array EARLY_QUIT
+ export_array STAT
+ export_array PRINT
+ export_array STRATEGIES
+ export_array JOBS
+ export_array EXEC
+
+ if ((UID == 0)); then
+ turbo-off
+ fi
+
+ sync
+}
+
+# Runs hyperfine and saves the output
+do-hyperfine() {
+ local tmp_md="$BENCH_DIR/.bench.md"
+ local md="$BENCH_DIR/bench.md"
+ local tmp_json="$BENCH_DIR/.bench.json"
+ local json="$BENCH_DIR/bench.json"
+
+ if (($# == 0)); then
+ printf 'Nothing to do\n\n' | tee -a "$md"
+ return 1
+ fi
+
+ hyperfine -w2 -M20 --export-markdown="$tmp_md" --export-json="$tmp_json" "$@" &>/dev/tty
+ cat "$tmp_md" >>"$md"
+ cat "$tmp_json" >>"$json"
+ rm "$tmp_md" "$tmp_json"
+
+ printf '\n' | tee -a "$md"
+}
+
+# Print the header for a benchmark group
+group() {
+ printf "## $1\\n\\n" "${@:2}" | tee -a "$BENCH_DIR/bench.md"
+}
+
+# Print the header for a benchmark subgroup
+subgroup() {
+ printf "### $1\\n\\n" "${@:2}" | tee -a "$BENCH_DIR/bench.md"
+}
+
+# Print the header for a benchmark sub-subgroup
+subsubgroup() {
+ printf "#### $1\\n\\n" "${@:2}" | tee -a "$BENCH_DIR/bench.md"
+}
+
+# Benchmark the complete traversal of a directory tree
+# (without printing anything)
+bench-complete-corpus() {
+ total=$(./bin/bfs "$2" -printf '.' | wc -c)
+
+ subgroup "%s (%'d files)" "$1" "$total"
+
+ cmds=()
+ for bfs in "${BFS[@]}"; do
+ cmds+=("$bfs $2 -false")
+ done
+
+ for find in "${FIND[@]}"; do
+ cmds+=("$find $2 -false")
+ done
+
+ for fd in "${FD[@]}"; do
+ cmds+=("$fd -u '^$' $2")
+ done
+
+ do-hyperfine "${cmds[@]}"
+}
+
+# All complete traversal benchmarks
+bench-complete() {
+ if (($#)); then
+ group "Complete traversal"
+
+ for corpus; do
+ bench-complete-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+ done
+ fi
+}
+
+# Benchmark quiting as soon as a file is seen
+bench-early-quit-corpus() {
+ dir="$2"
+ max_depth=$(./bin/bfs "$dir" -printf '%d\n' | sort -rn | head -n1)
+
+ subgroup '%s (depth %d)' "$1" "$max_depth"
+
+ # Save the list of unique filenames, along with their depth
+ UNIQ="$BENCH_DIR/uniq"
+ ./bin/bfs "$dir" -printf '%d %f\n' | sort -k2 | uniq -uf1 >"$UNIQ"
+
+ for ((i = 2; i <= max_depth; i *= 2)); do
+ subsubgroup 'Depth %d' "$i"
+
+ # Sample random uniquely-named files at depth $i
+ export FILES="$BENCH_DIR/uniq-$i"
+ sed -n "s/^$i //p" "$UNIQ" | shuf -n20 >"$FILES"
+ if ! [ -s "$FILES" ]; then
+ continue
+ fi
+
+ cmds=()
+ for bfs in "${BFS[@]}"; do
+ cmds+=("$bfs $dir -name \$(shuf -n1 \$FILES) -print -quit")
+ done
+
+ for find in "${FIND[@]}"; do
+ cmds+=("$find $dir -name \$(shuf -n1 \$FILES) -print -quit")
+ done
+
+ for fd in "${FD[@]}"; do
+ cmds+=("$fd -usg1 \$(shuf -n1 \$FILES) $dir")
+ done
+
+ do-hyperfine "${cmds[@]}"
+ done
+}
+
+# All early-quitting benchmarks
+bench-early-quit() {
+ if (($#)); then
+ group "Early termination"
+
+ for corpus; do
+ bench-early-quit-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+ done
+ fi
+}
+
+# Benchmark traversal with stat()
+bench-stat-corpus() {
+ total=$(./bin/bfs "$2" -printf '.' | wc -c)
+
+ subgroup "%s (%'d files)" "$1" "$total"
+
+ cmds=()
+ for bfs in "${BFS[@]}"; do
+ cmds+=("$bfs $2 -size 1024G")
+ done
+
+ for find in "${FIND[@]}"; do
+ cmds+=("$find $2 -size 1024G")
+ done
+
+ for fd in "${FD[@]}"; do
+ cmds+=("$fd -u --search-path $2 --size 1024Gi")
+ done
+
+ do-hyperfine "${cmds[@]}"
+}
+
+# stat() benchmarks
+bench-stat() {
+ if (($#)); then
+ group "Traversal with stat()"
+
+ for corpus; do
+ bench-stat-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+ done
+ fi
+}
+
+# Benchmark printing paths without colors
+bench-print-nocolor() {
+ subsubgroup '%s' "$1"
+
+ cmds=()
+ for bfs in "${BFS[@]}"; do
+ cmds+=("$bfs $2")
+ done
+
+ for find in "${FIND[@]}"; do
+ cmds+=("$find $2")
+ done
+
+ for fd in "${FD[@]}"; do
+ cmds+=("$fd -u --search-path $2")
+ done
+
+ do-hyperfine "${cmds[@]}"
+}
+
+# Benchmark printing paths with colors
+bench-print-color() {
+ subsubgroup '%s' "$1"
+
+ cmds=()
+ for bfs in "${BFS[@]}"; do
+ cmds+=("$bfs $2 -color")
+ done
+
+ for fd in "${FD[@]}"; do
+ cmds+=("$fd -u --search-path $2 --color=always")
+ done
+
+ do-hyperfine "${cmds[@]}"
+}
+
+# All printing benchmarks
+bench-print() {
+ if (($#)); then
+ group "Printing paths"
+
+ subgroup "Without colors"
+ for corpus; do
+ bench-print-nocolor "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+ done
+
+ subgroup "With colors"
+ for corpus; do
+ bench-print-color "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+ done
+ fi
+}
+
+# Benchmark search strategies
+bench-strategies-corpus() {
+ subgroup '%s' "$1"
+
+ if ((${#BFS[@]} == 1)); then
+ cmds=("$BFS -S "{bfs,dfs,ids,eds}" $2 -false")
+ do-hyperfine "${cmds[@]}"
+ else
+ for S in bfs dfs ids eds; do
+ subsubgroup '`-S %s`' "$S"
+
+ cmds=()
+ for bfs in "${BFS[@]}"; do
+ cmds+=("$bfs -S $S $2 -false")
+ done
+ do-hyperfine "${cmds[@]}"
+ done
+ fi
+}
+
+# All search strategy benchmarks
+bench-strategies() {
+ if (($#)); then
+ group "Search strategies"
+
+ for corpus; do
+ bench-strategies-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+ done
+ fi
+}
+
+# Benchmark parallelism
+bench-jobs-corpus() {
+ subgroup '%s' "$1"
+
+ if ((${#BFS[@]} + ${#FD[@]} == 1)); then
+ cmds=()
+ for bfs in "${BFS[@]}"; do
+ if "$bfs" -j1 -quit &>/dev/null; then
+ cmds+=("$bfs -j"{1,2,3,4,6,8,12,16}" $2 -false")
+ else
+ cmds+=("$bfs $2 -false")
+ fi
+ done
+
+ for fd in "${FD[@]}"; do
+ cmds+=("$fd -j"{1,2,3,4,6,8,12,16}" -u '^$' $2")
+ done
+
+ do-hyperfine "${cmds[@]}"
+ else
+ for j in 1 2 3 4 6 8 12 16; do
+ subsubgroup '`-j%d`' $j
+
+ cmds=()
+ for bfs in "${BFS[@]}"; do
+ if "$bfs" -j1 -quit &>/dev/null; then
+ cmds+=("$bfs -j$j $2 -false")
+ elif ((j == 1)); then
+ cmds+=("$bfs $2 -false")
+ fi
+ done
+
+ for fd in "${FD[@]}"; do
+ cmds+=("$fd -j$j -u '^$' $2")
+ done
+
+ if ((${#cmds[@]})); then
+ do-hyperfine "${cmds[@]}"
+ fi
+ done
+ fi
+}
+
+# All parallelism benchmarks
+bench-jobs() {
+ if (($#)); then
+ group "Parallelism"
+
+ for corpus; do
+ bench-jobs-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+ done
+ fi
+}
+
+# One file/process
+bench-exec-single() {
+ subsubgroup "One file per process"
+
+ cmds=()
+ for cmd in "${BFS[@]}" "${FIND[@]}"; do
+ cmds+=("$cmd $1 -maxdepth 2 -exec true -- {} \;")
+ done
+
+ for fd in "${FD[@]}"; do
+ cmds+=("$fd -u --search-path $1 --max-depth=2 -x true --")
+ # Without -j1, fd runs multiple processes in parallel, which is unfair
+ cmds+=("$fd -j1 -u --search-path $1 --max-depth=2 -x true --")
+ done
+
+ do-hyperfine "${cmds[@]}"
+}
+
+# Many files/process
+bench-exec-multi() {
+ subsubgroup "Many files per process"
+
+ cmds=()
+ for cmd in "${BFS[@]}" "${FIND[@]}"; do
+ cmds+=("$cmd $1 -exec true -- {} +")
+ done
+
+ for fd in "${FD[@]}"; do
+ cmds+=("$fd -u --search-path $1 -X true --")
+ done
+
+ do-hyperfine "${cmds[@]}"
+}
+
+# Many files, same dir
+bench-exec-chdir() {
+ if ((${#BFS[@]} + ${#FIND[@]} == 0)); then
+ return
+ fi
+
+ subsubgroup "Spawn in parent directory"
+
+ cmds=()
+ for cmd in "${BFS[@]}" "${FIND[@]}"; do
+ cmds+=("$cmd $1 -maxdepth 3 -execdir true -- {} +")
+ done
+
+ do-hyperfine "${cmds[@]}"
+}
+
+# Benchmark process spawning
+bench-exec-corpus() {
+ subgroup '%s' "$1"
+
+ bench-exec-single "$2"
+ bench-exec-multi "$2"
+ bench-exec-chdir "$2"
+}
+
+# All process spawning benchmarks
+bench-exec() {
+ if (($#)); then
+ group "Process spawning"
+
+ for corpus; do
+ bench-exec-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+ done
+ fi
+}
+
+# Print benchmarked versions
+bench-versions() {
+ subgroup "Versions"
+
+ local md="$BENCH_DIR/bench.md"
+
+ printf '```console\n' >>"$md"
+
+ {
+ for bfs in "${BFS[@]}"; do
+ printf '$ %s --version | head -n1\n' "$bfs"
+ "$bfs" --version | head -n1
+ done
+
+ for find in "${FIND[@]}"; do
+ printf '$ %s --version | head -n1\n' "$find"
+ "$find" --version | head -n1
+ done
+
+ for fd in "${FD[@]}"; do
+ printf '$ %s --version\n' "$fd"
+ "$fd" --version
+ done
+ } | tee -a "$md"
+
+ printf '```' >>"$md"
+}
+
+# Print benchmark details
+bench-details() {
+ group "Details"
+
+ bench-versions
+}
+
+# Run all the benchmarks
+bench() {
+ import_array BFS
+ import_array FIND
+ import_array FD
+
+ import_array COMPLETE
+ import_array EARLY_QUIT
+ import_array STAT
+ import_array PRINT
+ import_array STRATEGIES
+ import_array JOBS
+ import_array EXEC
+
+ bench-complete "${COMPLETE[@]}"
+ bench-early-quit "${EARLY_QUIT[@]}"
+ bench-stat "${STAT[@]}"
+ bench-print "${PRINT[@]}"
+ bench-strategies "${STRATEGIES[@]}"
+ bench-jobs "${JOBS[@]}"
+ bench-exec "${EXEC[@]}"
+ bench-details
+}
diff --git a/bench/clone-tree.sh b/bench/clone-tree.sh
new file mode 100755
index 0000000..744b5f4
--- /dev/null
+++ b/bench/clone-tree.sh
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+
+# Copyright © Tavian Barnes <tavianator@tavianator.com>
+# SPDX-License-Identifier: 0BSD
+
+# Creates a directory tree that matches a git repo, but with empty files. E.g.
+#
+# $ ./bench/clone-tree.sh "https://.../linux.git" v6.5 ./linux ./linux.git
+#
+# will create or update a shallow clone at ./linux.git, then create a directory
+# tree at ./linux with the same directory tree as the tag v6.5, except all files
+# will be empty.
+
+set -eu
+
+if (($# != 4)); then
+ printf 'Usage: %s https://url/of/repo.git <TAG> path/to/checkout path/to/repo.git\n' "$0" >&2
+ exit 1
+fi
+
+URL="$1"
+TAG="$2"
+DIR="$3"
+REPO="$4"
+
+BENCH=$(dirname -- "${BASH_SOURCE[0]}")
+BIN=$(realpath -- "$BENCH/../bin")
+BFS="$BIN/bfs"
+XTOUCH="$BIN/tests/xtouch"
+
+if [ "${NPROC-}" ]; then
+ # Use fewer cores in recursive calls
+ export NPROC=$(((NPROC + 1) / 2))
+else
+ export NPROC=$(nproc)
+fi
+
+JOBS=$((NPROC < 8 ? NPROC : 8))
+
+do-git() {
+ git -C "$REPO" "$@"
+}
+
+if ! [ -e "$REPO" ]; then
+ mkdir -p -- "$REPO"
+ do-git init -q --bare
+fi
+
+has-ref() {
+ do-git rev-list --quiet -1 --missing=allow-promisor "$1" &>/dev/null
+}
+
+sparse-fetch() {
+ do-git -c fetch.negotiationAlgorithm=noop fetch -q --filter=blob:none --depth=1 --no-tags --no-write-fetch-head --no-auto-gc "$@"
+}
+
+if ! has-ref "$TAG"; then
+ printf 'Fetching %s ...\n' "$TAG" >&2
+ do-git config remote.origin.url "$URL"
+ if ((${#TAG} >= 40)); then
+ sparse-fetch origin "$TAG"
+ else
+ sparse-fetch origin tag "$TAG"
+ fi
+fi
+
+# Delete a tree in parallel
+clean() {
+ local d=5
+ "$BFS" -f "$1" -mindepth $d -maxdepth $d -type d -print0 \
+ | xargs -0r -n1 -P$JOBS -- "$BFS" -j1 -mindepth 1 -delete -f
+ "$BFS" -f "$1" -delete
+}
+
+if [ -e "$DIR" ]; then
+ printf 'Cleaning old directory tree %s ...\n' "$DIR" >&2
+ TMP=$(mktemp -dp "$(dirname -- "$DIR")")
+ mv -- "$DIR" "$TMP"
+ clean "$TMP" &
+fi
+
+# List gitlinks (submodule references) in the tree
+ls-gitlinks() {
+ do-git ls-tree -zr "$TAG" \
+ | sed -zn 's/.* commit //p'
+}
+
+# Get the submodule ID for a path
+submodule-for-path() {
+ do-git config --blob "$TAG:.gitmodules" \
+ --name-only \
+ --fixed-value \
+ --get-regexp 'submodule\..**\.path' "$1" \
+ | sed -En 's/submodule\.(.*)\.path/\1/p'
+}
+
+# Get the URL for a submodule
+submodule-url() {
+ # - https://chrome-internal.googlesource.com/
+ # - not publicly accessible
+ # - https://chromium.googlesource.com/external/github.com/WebKit/webkit.git
+ # - is accessible, but the commit (59e9de61b7b3) isn't
+ # - https://android.googlesource.com/
+ # - is accessible, but you need an account
+
+ do-git config --blob "$TAG:.gitmodules" \
+ --get "submodule.$1.url" \
+ | sed -E \
+ -e '\|^https://chrome-internal.googlesource.com/|Q1' \
+ -e '\|^https://chromium.googlesource.com/external/github.com/WebKit/webkit.git|Q1' \
+ -e '\|^https://android.googlesource.com/|Q1'
+}
+
+# Recursively checkout submodules
+while read -rd '' SUBREF SUBDIR; do
+ SUBNAME=$(submodule-for-path "$SUBDIR")
+ SUBURL=$(submodule-url "$SUBNAME") || continue
+
+ if (($(jobs -pr | wc -w) >= JOBS)); then
+ wait -n
+ fi
+ "$0" "$SUBURL" "$SUBREF" "$DIR/$SUBDIR" "$REPO/modules/$SUBNAME" &
+done < <(ls-gitlinks)
+
+# Touch files in parallel
+xtouch() (
+ cd "$DIR"
+ if ((JOBS > 1)); then
+ xargs -0r -n4096 -P$JOBS -- "$XTOUCH" -p --
+ else
+ xargs -0r -- "$XTOUCH" -p --
+ fi
+)
+
+# Check out files
+printf 'Checking out %s ...\n' "$DIR" >&2
+mkdir -p -- "$DIR"
+do-git ls-tree -zr "$TAG"\
+ | sed -zn 's/.* blob .*\t//p' \
+ | xtouch
+
+# Wait for cleaning/submodules
+wait