5 files changed, 1401 insertions, 0 deletions
diff --git a/bench/.gitignore b/bench/.gitignore
new file mode 100644
index 0000000..170d850
--- /dev/null
+++ b/bench/.gitignore
@@ -0,0 +1,3 @@
+/corpus/
+/results/
+/worktree/
diff --git a/bench/README.md b/bench/README.md
new file mode 100644
index 0000000..56157a0
--- /dev/null
+++ b/bench/README.md
@@ -0,0 +1,51 @@
+This directory contains a suite of benchmarks used to evaluate `bfs` and detect performance regressions.
+To run them, you'll need the [tailfin] benchmark harness.
+You can read the full usage information with
+
+[tailfin]: https://github.com/tavianator/tailfin
+
+```console
+$ tailfin -n run bench/bench.sh --help
+Usage: tailfin run bench/bench.sh [--default]
+           [--complete] [--early-quit] [--print] [--strategies]
+           [--build=...] [--bfs] [--find] [--fd]
+           [--no-clean] [--help]
+...
+```
+
+The benchmarks use various git repositories to have a realistic and reproducible directory structure as a corpus.
+Currently, those are the [Linux], [Rust], and [Chromium] repos.
+The scripts will automatically clone those repos using [partial clone] filters to avoid downloading the actual file contents, saving bandwidth and space.
+
+[Linux]: https://github.com/torvalds/linux.git
+[Rust]: https://github.com/rust-lang/rust.git
+[Chromium]: https://chromium.googlesource.com/chromium/src.git
+[partial clone]: https://git-scm.com/docs/partial-clone
+
+You can try out a quick benchmark by running
+
+```console
+$ tailfin run bench/bench.sh --build=main --complete=linux
+```
+
+This will build the `main` branch, and measure the complete traversal of the Linux repo.
+Results will be both printed to the console and saved in a Markdown file, which you can find by running
+
+```console
+$ tailfin latest
+results/2023/09/29/15:32:49
+$ cat results/2023/09/29/15:32:49/runs/1/bench.md
+## Complete traversal
+...
+```
+
+To measure performance improvements/regressions of a change, compare the `main` branch to the topic branch on the full benchmark suite:
+
+```console
+$ tailfin run bench/bench.sh --build=main --build=branch --default
+```
+
+This will take a few minutes.
+Results from the full benchmark suite can be seen in performance-related pull requests, for example [#126].
+
+[#126]: https://github.com/tavianator/bfs/pull/126
diff --git a/bench/bench.sh b/bench/bench.sh
new file mode 100644
index 0000000..c9ed978
--- /dev/null
+++ b/bench/bench.sh
@@ -0,0 +1,749 @@
+#!/hint/bash
+
+# Copyright © Tavian Barnes <tavianator@tavianator.com>
+# SPDX-License-Identifier: 0BSD
+
+declare -gA URLS=(
+    [chromium]="https://chromium.googlesource.com/chromium/src.git"
+    [linux]="https://github.com/torvalds/linux.git"
+    [rust]="https://github.com/rust-lang/rust.git"
+)
+
+declare -gA TAGS=(
+    [chromium]=119.0.6036.2
+    [linux]=v6.5
+    [rust]=1.72.1
+)
+
+COMPLETE_DEFAULT=(linux rust chromium)
+EARLY_QUIT_DEFAULT=(chromium)
+STAT_DEFAULT=(rust)
+PRINT_DEFAULT=(linux)
+STRATEGIES_DEFAULT=(rust)
+JOBS_DEFAULT=(rust)
+EXEC_DEFAULT=(linux)
+SORTED_DEFAULT=(chromium)
+
+usage() {
+    printf 'Usage: tailfin run %s\n' "${BASH_SOURCE[0]}"
+    printf '           [--default] [--<BENCHMARK> [--<BENCHMARK>...]]\n'
+    printf '           [--build=...] [--bfs] [--find] [--fd]\n'
+    printf '           [--no-clean] [--help]\n\n'
+
+    printf '  --default\n'
+    printf '      Run the default set of benchmarks\n\n'
+
+    printf '  --complete[=CORPUS]\n'
+    printf '      Complete traversal benchmark.\n'
+    printf '      Default corpus is --complete="%s"\n\n' "${COMPLETE_DEFAULT[*]}"
+
+    printf '  --early-quit[=CORPUS]\n'
+    printf '      Early quitting benchmark.\n'
+    printf '      Default corpus is --early-quit=%s\n\n' "${EARLY_QUIT_DEFAULT[*]}"
+
+    printf '  --stat[=CORPUS]\n'
+    printf '      Traversal with stat().\n'
+    printf '      Default corpus is --stat=%s\n\n' "${STAT_DEFAULT[*]}"
+
+    printf '  --print[=CORPUS]\n'
+    printf '      Path printing benchmark.\n'
+    printf '      Default corpus is --print=%s\n\n' "${PRINT_DEFAULT[*]}"
+
+    printf '  --strategies[=CORPUS]\n'
+    printf '      Search strategy benchmark.\n'
+    printf '      Default corpus is --strategies=%s\n\n' "${STRATEGIES_DEFAULT[*]}"
+
+    printf '  --jobs[=CORPUS]\n'
+    printf '      Parallelism benchmark.\n'
+    printf '      Default corpus is --jobs=%s\n\n' "${JOBS_DEFAULT[*]}"
+
+    printf '  --exec[=CORPUS]\n'
+    printf '      Process spawning benchmark.\n'
+    printf '      Default corpus is --exec=%s\n\n' "${EXEC_DEFAULT[*]}"
+
+    printf '  --sorted[=CORPUS]\n'
+    printf '      Sorted traversal benchmark.\n'
+    printf '      Default corpus is --sorted=%s\n\n' "${SORTED_DEFAULT[*]}"
+
+    printf '  --build=COMMIT\n'
+    printf '      Build this bfs commit and benchmark it.  Specify multiple times to\n'
+    printf '      compare, e.g. --build=3.0.1 --build=3.0.2\n\n'
+
+    printf '  --bfs[=COMMAND]\n'
+    printf '      Benchmark an existing build of bfs\n\n'
+
+    printf '  --find[=COMMAND]\n'
+    printf '      Compare against find\n\n'
+
+    printf '  --fd[=COMMAND]\n'
+    printf '      Compare against fd\n\n'
+
+    printf '  --no-clean\n'
+    printf '      Use any existing corpora as-is\n\n'
+
+    printf '  --help\n'
+    printf '      This message\n\n'
+}
+
+# Hack to export an array
+export_array() {
+    local str=$(declare -p "$1" | sed 's/ -a / -ga /')
+    unset "$1"
+    export "$1=$str"
+}
+
+# Hack to import an array
+import_array() {
+    local cmd="${!1}"
+    unset "$1"
+    eval "$cmd"
+}
+
+# Set up the benchmarks
+setup() {
+    ROOT=$(realpath -- "$(dirname -- "${BASH_SOURCE[0]}")/..")
+    if ! [ "$PWD" -ef "$ROOT" ]; then
+        printf 'error: Please run this script from %s\n\n' "$ROOT" >&2
+        usage >&2
+        exit $EX_USAGE
+    fi
+
+    nproc=$(nproc)
+
+    # Options
+
+    CLEAN=1
+
+    BUILD=()
+    BFS=()
+    FIND=()
+    FD=()
+
+    COMPLETE=()
+    EARLY_QUIT=()
+    STAT=()
+    PRINT=()
+    STRATEGIES=()
+    JOBS=()
+    EXEC=()
+    SORTED=()
+
+    for arg; do
+        case "$arg" in
+            # Flags
+            --no-clean)
+                CLEAN=0
+                ;;
+            # bfs commits/tags to benchmark
+            --build=*)
+                BUILD+=("${arg#*=}")
+                BFS+=("bfs-${arg#*=}")
+                ;;
+            # Utilities to benchmark against
+            --bfs)
+                BFS+=(bfs)
+                ;;
+            --bfs=*)
+                BFS+=("${arg#*=}")
+                ;;
+            --find)
+                FIND+=(find)
+                ;;
+            --find=*)
+                FIND+=("${arg#*=}")
+                ;;
+            --fd)
+                FD+=(fd)
+                ;;
+            --fd=*)
+                FD+=("${arg#*=}")
+                ;;
+            # Benchmark groups
+            --complete)
+                COMPLETE=("${COMPLETE_DEFAULT[@]}")
+                ;;
+            --complete=*)
+                read -ra COMPLETE <<<"${arg#*=}"
+                ;;
+            --early-quit)
+                EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}")
+                ;;
+            --early-quit=*)
+                read -ra EARLY_QUIT <<<"${arg#*=}"
+                ;;
+            --stat)
+                STAT=("${STAT_DEFAULT[@]}")
+                ;;
+            --stat=*)
+                read -ra STAT <<<"${arg#*=}"
+                ;;
+            --print)
+                PRINT=("${PRINT_DEFAULT[@]}")
+                ;;
+            --print=*)
+                read -ra PRINT <<<"${arg#*=}"
+                ;;
+            --strategies)
+                STRATEGIES=("${STRATEGIES_DEFAULT[@]}")
+                ;;
+            --strategies=*)
+                read -ra STRATEGIES <<<"${arg#*=}"
+                ;;
+            --jobs)
+                JOBS=("${JOBS_DEFAULT[@]}")
+                ;;
+            --jobs=*)
+                read -ra JOBS <<<"${arg#*=}"
+                ;;
+            --exec)
+                EXEC=("${EXEC_DEFAULT[@]}")
+                ;;
+            --exec=*)
+                read -ra EXEC <<<"${arg#*=}"
+                ;;
+            --sorted)
+                SORTED=("${SORTED_DEFAULT[@]}")
+                ;;
+            --sorted=*)
+                read -ra SORTED <<<"${arg#*=}"
+                ;;
+            --default)
+                COMPLETE=("${COMPLETE_DEFAULT[@]}")
+                EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}")
+                STAT=("${STAT_DEFAULT[@]}")
+                PRINT=("${PRINT_DEFAULT[@]}")
+                STRATEGIES=("${STRATEGIES_DEFAULT[@]}")
+                JOBS=("${JOBS_DEFAULT[@]}")
+                EXEC=("${EXEC_DEFAULT[@]}")
+                SORTED=("${SORTED_DEFAULT[@]}")
+                ;;
+            --help)
+                usage
+                exit
+                ;;
+            *)
+                printf 'error: Unknown option %q\n\n' "$arg" >&2
+                usage >&2
+                exit $EX_USAGE
+                ;;
+        esac
+    done
+
+    if ((UID == 0)); then
+        max-freq
+    fi
+
+    echo "Building bfs ..."
+    as-user ./configure --enable-release
+    as-user make -s -j"$nproc" all
+
+    as-user mkdir -p bench/corpus
+
+    declare -A cloned=()
+    for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${STAT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}" "${JOBS[@]}" "${EXEC[@]}" "${SORTED[@]}"; do
+        if ((cloned["$corpus"])); then
+            continue
+        fi
+        cloned["$corpus"]=1
+
+        dir="bench/corpus/$corpus"
+        if ((CLEAN)) || ! [ -e "$dir" ]; then
+            as-user ./bench/clone-tree.sh "${URLS[$corpus]}" "${TAGS[$corpus]}" "$dir"{,.git}
+        fi
+    done
+
+    if ((${#BUILD[@]} > 0)); then
+        echo "Creating bfs worktree ..."
+
+        worktree="bench/worktree"
+        as-user git worktree add -qd "$worktree"
+        defer as-user git worktree remove "$worktree"
+
+        bin="$(realpath -- "$SETUP_DIR")/bin"
+        as-user mkdir "$bin"
+
+        for commit in "${BUILD[@]}"; do
+            (
+                echo "Building bfs $commit ..."
+                cd "$worktree"
+                as-user git checkout -qd "$commit" --
+                if [ -e configure ]; then
+                    as-user ./configure --enable-release
+                    as-user make -s -j"$nproc"
+                else
+                    as-user make -s -j"$nproc" release
+                fi
+                if [ -e ./bin/bfs ]; then
+                    as-user cp ./bin/bfs "$bin/bfs-$commit"
+                else
+                    as-user cp ./bfs "$bin/bfs-$commit"
+                fi
+                as-user make -s clean
+            )
+        done
+
+        export PATH="$bin:$PATH"
+    fi
+
+    export_array BFS
+    export_array FIND
+    export_array FD
+
+    export_array COMPLETE
+    export_array EARLY_QUIT
+    export_array STAT
+    export_array PRINT
+    export_array STRATEGIES
+    export_array JOBS
+    export_array EXEC
+    export_array SORTED
+
+    if ((UID == 0)); then
+        turbo-off
+    fi
+
+    sync
+}
+
+# Runs hyperfine and saves the output
+do-hyperfine() {
+    local tmp_md="$BENCH_DIR/.bench.md"
+    local md="$BENCH_DIR/bench.md"
+    local tmp_json="$BENCH_DIR/.bench.json"
+    local json="$BENCH_DIR/bench.json"
+
+    if (($# == 0)); then
+        printf 'Nothing to do\n\n' | tee -a "$md"
+        return 1
+    fi
+
+    hyperfine -w2 -M20 --export-markdown="$tmp_md" --export-json="$tmp_json" "$@" &>/dev/tty
+    cat "$tmp_md" >>"$md"
+    cat "$tmp_json" >>"$json"
+    rm "$tmp_md" "$tmp_json"
+
+    printf '\n' | tee -a "$md"
+}
+
+# Print the header for a benchmark group
+group() {
+    printf "## $1\\n\\n" "${@:2}" | tee -a "$BENCH_DIR/bench.md"
+}
+
+# Print the header for a benchmark subgroup
+subgroup() {
+    printf "### $1\\n\\n" "${@:2}" | tee -a "$BENCH_DIR/bench.md"
+}
+
+# Print the header for a benchmark sub-subgroup
+subsubgroup() {
+    printf "#### $1\\n\\n" "${@:2}" | tee -a "$BENCH_DIR/bench.md"
+}
+
+# Benchmark the complete traversal of a directory tree
+# (without printing anything)
+bench-complete-corpus() {
+    total=$(./bin/bfs "$2" -printf '.' | wc -c)
+
+    subgroup "%s (%'d files)" "$1" "$total"
+
+    cmds=()
+    for bfs in "${BFS[@]}"; do
+        cmds+=("$bfs $2 -false")
+    done
+
+    for find in "${FIND[@]}"; do
+        cmds+=("$find $2 -false")
+    done
+
+    for fd in "${FD[@]}"; do
+        cmds+=("$fd -u '^$' $2")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# All complete traversal benchmarks
+bench-complete() {
+    if (($#)); then
+        group "Complete traversal"
+
+        for corpus; do
+            bench-complete-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Benchmark quitting as soon as a file is seen
+bench-early-quit-corpus() {
+    dir="$2"
+    max_depth=$(./bin/bfs "$dir" -printf '%d\n' | sort -rn | head -n1)
+
+    subgroup '%s (depth %d)' "$1" "$max_depth"
+
+    # Save the list of unique filenames, along with their depth
+    UNIQ="$BENCH_DIR/uniq"
+    ./bin/bfs "$dir" -printf '%d %f\n' | sort -k2 | uniq -uf1 >"$UNIQ"
+
+    for ((i = 2; i <= max_depth; i *= 2)); do
+        subsubgroup 'Depth %d' "$i"
+
+        # Sample random uniquely-named files at depth $i
+        export FILES="$BENCH_DIR/uniq-$i"
+        sed -n "s/^$i //p" "$UNIQ" | shuf -n20 >"$FILES"
+        if ! [ -s "$FILES" ]; then
+            continue
+        fi
+
+        cmds=()
+        for bfs in "${BFS[@]}"; do
+            cmds+=("$bfs $dir -name \$(shuf -n1 \$FILES) -print -quit")
+        done
+
+        for find in "${FIND[@]}"; do
+            cmds+=("$find $dir -name \$(shuf -n1 \$FILES) -print -quit")
+        done
+
+        for fd in "${FD[@]}"; do
+            cmds+=("$fd -usg1 \$(shuf -n1 \$FILES) $dir")
+        done
+
+        do-hyperfine "${cmds[@]}"
+    done
+}
+
+# All early-quitting benchmarks
+bench-early-quit() {
+    if (($#)); then
+        group "Early termination"
+
+        for corpus; do
+            bench-early-quit-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Benchmark traversal with stat()
+bench-stat-corpus() {
+    total=$(./bin/bfs "$2" -printf '.' | wc -c)
+
+    subgroup "%s (%'d files)" "$1" "$total"
+
+    cmds=()
+    for bfs in "${BFS[@]}"; do
+        cmds+=("$bfs $2 -size 1024G")
+    done
+
+    for find in "${FIND[@]}"; do
+        cmds+=("$find $2 -size 1024G")
+    done
+
+    for fd in "${FD[@]}"; do
+        cmds+=("$fd -u --search-path $2 --size 1024Gi")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# stat() benchmarks
+bench-stat() {
+    if (($#)); then
+        group "Traversal with stat()"
+
+        for corpus; do
+            bench-stat-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Benchmark printing paths without colors
+bench-print-nocolor() {
+    subsubgroup '%s' "$1"
+
+    cmds=()
+    for bfs in "${BFS[@]}"; do
+        cmds+=("$bfs $2")
+    done
+
+    for find in "${FIND[@]}"; do
+        cmds+=("$find $2")
+    done
+
+    for fd in "${FD[@]}"; do
+        cmds+=("$fd -u --search-path $2")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# Benchmark printing paths with colors
+bench-print-color() {
+    subsubgroup '%s' "$1"
+
+    cmds=()
+    for bfs in "${BFS[@]}"; do
+        cmds+=("$bfs $2 -color")
+    done
+
+    for fd in "${FD[@]}"; do
+        cmds+=("$fd -u --search-path $2 --color=always")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# All printing benchmarks
+bench-print() {
+    if (($#)); then
+        group "Printing paths"
+
+        subgroup "Without colors"
+        for corpus; do
+            bench-print-nocolor "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+
+        subgroup "With colors"
+        for corpus; do
+            bench-print-color "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Benchmark search strategies
+bench-strategies-corpus() {
+    subgroup '%s' "$1"
+
+    if ((${#BFS[@]} == 1)); then
+        cmds=("$BFS -S "{bfs,dfs,ids,eds}" $2 -false")
+        do-hyperfine "${cmds[@]}"
+    else
+        for S in bfs dfs ids eds; do
+            subsubgroup '`-S %s`' "$S"
+
+            cmds=()
+            for bfs in "${BFS[@]}"; do
+                cmds+=("$bfs -S $S $2 -false")
+            done
+            do-hyperfine "${cmds[@]}"
+        done
+    fi
+}
+
+# All search strategy benchmarks
+bench-strategies() {
+    if (($#)); then
+        group "Search strategies"
+
+        for corpus; do
+            bench-strategies-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Benchmark parallelism
+bench-jobs-corpus() {
+    subgroup '%s' "$1"
+
+    if ((${#BFS[@]} + ${#FD[@]} == 1)); then
+        cmds=()
+        for bfs in "${BFS[@]}"; do
+            if "$bfs" -j1 -quit &>/dev/null; then
+                cmds+=("$bfs -j"{1,2,3,4,6,8,12,16}" $2 -false")
+            else
+                cmds+=("$bfs $2 -false")
+            fi
+        done
+
+        for fd in "${FD[@]}"; do
+            cmds+=("$fd -j"{1,2,3,4,6,8,12,16}" -u '^$' $2")
+        done
+
+        do-hyperfine "${cmds[@]}"
+    else
+        for j in 1 2 3 4 6 8 12 16; do
+            subsubgroup '`-j%d`' $j
+
+            cmds=()
+            for bfs in "${BFS[@]}"; do
+                if "$bfs" -j1 -quit &>/dev/null; then
+                    cmds+=("$bfs -j$j $2 -false")
+                elif ((j == 1)); then
+                    cmds+=("$bfs $2 -false")
+                fi
+            done
+
+            for fd in "${FD[@]}"; do
+                cmds+=("$fd -j$j -u '^$' $2")
+            done
+
+            if ((${#cmds[@]})); then
+                do-hyperfine "${cmds[@]}"
+            fi
+        done
+    fi
+}
+
+# All parallelism benchmarks
+bench-jobs() {
+    if (($#)); then
+        group "Parallelism"
+
+        for corpus; do
+            bench-jobs-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# One file/process
+bench-exec-single() {
+    subsubgroup "One file per process"
+
+    cmds=()
+    for cmd in "${BFS[@]}" "${FIND[@]}"; do
+        cmds+=("$cmd $1 -maxdepth 2 -exec true -- {} \;")
+    done
+
+    for fd in "${FD[@]}"; do
+        cmds+=("$fd -u --search-path $1 --max-depth=2 -x true --")
+        # Without -j1, fd runs multiple processes in parallel, which is unfair
+        cmds+=("$fd -j1 -u --search-path $1 --max-depth=2 -x true --")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# Many files/process
+bench-exec-multi() {
+    subsubgroup "Many files per process"
+
+    cmds=()
+    for cmd in "${BFS[@]}" "${FIND[@]}"; do
+        cmds+=("$cmd $1 -exec true -- {} +")
+    done
+
+    for fd in "${FD[@]}"; do
+        cmds+=("$fd -u --search-path $1 -X true --")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# Many files, same dir
+bench-exec-chdir() {
+    if ((${#BFS[@]} + ${#FIND[@]} == 0)); then
+        return
+    fi
+
+    subsubgroup "Spawn in parent directory"
+
+    cmds=()
+    for cmd in "${BFS[@]}" "${FIND[@]}"; do
+        cmds+=("$cmd $1 -maxdepth 3 -execdir true -- {} +")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# Benchmark process spawning
+bench-exec-corpus() {
+    subgroup '%s' "$1"
+
+    bench-exec-single "$2"
+    bench-exec-multi "$2"
+    bench-exec-chdir "$2"
+}
+
+# All process spawning benchmarks
+bench-exec() {
+    if (($#)); then
+        group "Process spawning"
+
+        for corpus; do
+            bench-exec-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Benchmark sorted traversal
+bench-sorted-corpus() {
+    subgroup '%s' "$1"
+
+    cmds=()
+    for bfs in "${BFS[@]}"; do
+        cmds+=("$bfs -s $2 -false")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# All sorted traversal benchmarks
+bench-sorted() {
+    if (($#)); then
+        group "Sorted traversal"
+
+        for corpus; do
+            bench-sorted-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Print benchmarked versions
+bench-versions() {
+    subgroup "Versions"
+
+    local md="$BENCH_DIR/bench.md"
+
+    printf '```console\n' >>"$md"
+
+    {
+        for bfs in "${BFS[@]}"; do
+            printf '$ %s --version | head -n1\n' "$bfs"
+            "$bfs" --version | head -n1
+        done
+
+        for find in "${FIND[@]}"; do
+            printf '$ %s --version | head -n1\n' "$find"
+            "$find" --version | head -n1
+        done
+
+        for fd in "${FD[@]}"; do
+            printf '$ %s --version\n' "$fd"
+            "$fd" --version
+        done
+    } | tee -a "$md"
+
+    printf '```' >>"$md"
+}
+
+# Print benchmark details
+bench-details() {
+    group "Details"
+
+    bench-versions
+}
+
+# Run all the benchmarks
+bench() {
+    import_array BFS
+    import_array FIND
+    import_array FD
+
+    import_array COMPLETE
+    import_array EARLY_QUIT
+    import_array STAT
+    import_array PRINT
+    import_array STRATEGIES
+    import_array JOBS
+    import_array EXEC
+    import_array SORTED
+
+    bench-complete "${COMPLETE[@]}"
+    bench-early-quit "${EARLY_QUIT[@]}"
+    bench-stat "${STAT[@]}"
+    bench-print "${PRINT[@]}"
+    bench-strategies "${STRATEGIES[@]}"
+    bench-jobs "${JOBS[@]}"
+    bench-exec "${EXEC[@]}"
+    bench-sorted "${SORTED[@]}"
+    bench-details
+}
diff --git a/bench/clone-tree.sh b/bench/clone-tree.sh
new file mode 100755
index 0000000..744b5f4
--- /dev/null
+++ b/bench/clone-tree.sh
@@ -0,0 +1,143 @@
+#!/usr/bin/env bash
+
+# Copyright © Tavian Barnes <tavianator@tavianator.com>
+# SPDX-License-Identifier: 0BSD
+
+# Creates a directory tree that matches a git repo, but with empty files.  E.g.
+#
+#     $ ./bench/clone-tree.sh "https://.../linux.git" v6.5 ./linux ./linux.git
+#
+# will create or update a shallow clone at ./linux.git, then create a directory
+# tree at ./linux with the same directory tree as the tag v6.5, except all files
+# will be empty.
+
+set -eu
+
+if (($# != 4)); then
+    printf 'Usage: %s https://url/of/repo.git <TAG> path/to/checkout path/to/repo.git\n' "$0" >&2
+    exit 1
+fi
+
+URL="$1"
+TAG="$2"
+DIR="$3"
+REPO="$4"
+
+BENCH=$(dirname -- "${BASH_SOURCE[0]}")
+BIN=$(realpath -- "$BENCH/../bin")
+BFS="$BIN/bfs"
+XTOUCH="$BIN/tests/xtouch"
+
+if [ "${NPROC-}" ]; then
+    # Use fewer cores in recursive calls
+    export NPROC=$(((NPROC + 1) / 2))
+else
+    export NPROC=$(nproc)
+fi
+
+JOBS=$((NPROC < 8 ? NPROC : 8))
+
+do-git() {
+    git -C "$REPO" "$@"
+}
+
+if ! [ -e "$REPO" ]; then
+    mkdir -p -- "$REPO"
+    do-git init -q --bare
+fi
+
+has-ref() {
+    do-git rev-list --quiet -1 --missing=allow-promisor "$1" &>/dev/null
+}
+
+sparse-fetch() {
+    do-git -c fetch.negotiationAlgorithm=noop fetch -q --filter=blob:none --depth=1 --no-tags --no-write-fetch-head --no-auto-gc "$@"
+}
+
+if ! has-ref "$TAG"; then
+    printf 'Fetching %s ...\n' "$TAG" >&2
+    do-git config remote.origin.url "$URL"
+    if ((${#TAG} >= 40)); then
+        sparse-fetch origin "$TAG"
+    else
+        sparse-fetch origin tag "$TAG"
+    fi
+fi
+
+# Delete a tree in parallel
+clean() {
+    local d=5
+    "$BFS" -f "$1" -mindepth $d -maxdepth $d -type d -print0 \
+        | xargs -0r -n1 -P$JOBS -- "$BFS" -j1 -mindepth 1 -delete -f
+    "$BFS" -f "$1" -delete
+}
+
+if [ -e "$DIR" ]; then
+    printf 'Cleaning old directory tree %s ...\n' "$DIR" >&2
+    TMP=$(mktemp -dp "$(dirname -- "$DIR")")
+    mv -- "$DIR" "$TMP"
+    clean "$TMP" &
+fi
+
+# List gitlinks (submodule references) in the tree
+ls-gitlinks() {
+    do-git ls-tree -zr "$TAG" \
+        | sed -zn 's/.* commit //p'
+}
+
+# Get the submodule ID for a path
+submodule-for-path() {
+    do-git config --blob "$TAG:.gitmodules" \
+                  --name-only \
+                  --fixed-value \
+                  --get-regexp 'submodule\..**\.path' "$1" \
+        | sed -En 's/submodule\.(.*)\.path/\1/p'
+}
+
+# Get the URL for a submodule
+submodule-url() {
+    # - https://chrome-internal.googlesource.com/
+    #   - not publicly accessible
+    # - https://chromium.googlesource.com/external/github.com/WebKit/webkit.git
+    #   - is accessible, but the commit (59e9de61b7b3) isn't
+    # - https://android.googlesource.com/
+    #   - is accessible, but you need an account
+
+    do-git config --blob "$TAG:.gitmodules" \
+                  --get "submodule.$1.url" \
+        | sed -E \
+              -e '\|^https://chrome-internal.googlesource.com/|Q1' \
+              -e '\|^https://chromium.googlesource.com/external/github.com/WebKit/webkit.git|Q1' \
+              -e '\|^https://android.googlesource.com/|Q1'
+}
+
+# Recursively checkout submodules
+while read -rd '' SUBREF SUBDIR; do
+    SUBNAME=$(submodule-for-path "$SUBDIR")
+    SUBURL=$(submodule-url "$SUBNAME") || continue
+
+    if (($(jobs -pr | wc -w) >= JOBS)); then
+        wait -n
+    fi
+    "$0" "$SUBURL" "$SUBREF" "$DIR/$SUBDIR" "$REPO/modules/$SUBNAME" &
+done < <(ls-gitlinks)
+
+# Touch files in parallel
+xtouch() (
+    cd "$DIR"
+    if ((JOBS > 1)); then
+        xargs -0r -n4096 -P$JOBS -- "$XTOUCH" -p --
+    else
+        xargs -0r -- "$XTOUCH" -p --
+    fi
+)
+
+# Check out files
+printf 'Checking out %s ...\n' "$DIR" >&2
+mkdir -p -- "$DIR"
+do-git ls-tree -zr "$TAG"\
+    | sed -zn 's/.* blob .*\t//p' \
+    | xtouch
+
+# Wait for cleaning/submodules
+wait
diff --git a/bench/ioq.c b/bench/ioq.c
new file mode 100644
index 0000000..fb9edbc
--- /dev/null
+++ b/bench/ioq.c
@@ -0,0 +1,455 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "atomic.h"
+#include "bfs.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "ioq.h"
+#include "sighook.h"
+#include "xtime.h"
+
+#include <errno.h>
+#include <locale.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+/** A latency sample. */
+struct lat {
+	/** The sampled latency. */
+	struct timespec time;
+	/** A random integer, for reservoir sampling. */
+	long key;
+};
+
+/** Number of latency samples to keep. */
+#define SAMPLES 1000
+/** Latency sampling period. */
+#define PERIOD 128
+
+/** Latency measurements. */
+struct lats {
+	/** Lowest observed latency. */
+	struct timespec min;
+	/** Highest observed latency. */
+	struct timespec max;
+	/** Total latency. */
+	struct timespec sum;
+	/** Number of measured requests. */
+	size_t count;
+
+	/** Priority queue for reservoir sampling. */
+	struct lat heap[SAMPLES];
+	/** Current size of the heap. */
+	size_t heap_size;
+};
+
+/** Initialize a latency reservoir. */
+static void lats_init(struct lats *lats) {
+	lats->min = (struct timespec) { .tv_sec = 1000 };
+	lats->max = (struct timespec) { 0 };
+	lats->sum = (struct timespec) { 0 };
+	lats->count = 0;
+	lats->heap_size = 0;
+}
+
+/** Binary heap parent. */
+static size_t heap_parent(size_t i) {
+	return (i - 1) / 2;
+}
+
+/** Binary heap left child. */
+static size_t heap_child(size_t i) {
+	return 2 * i + 1;
+}
+
+/** Binary heap smallest child. */
+static size_t heap_min_child(const struct lats *lats, size_t i) {
+	size_t j = heap_child(i);
+	size_t k = j + 1;
+	if (k < lats->heap_size && lats->heap[k].key < lats->heap[j].key) {
+		return k;
+	} else {
+		return j;
+	}
+}
+
+/** Check if the heap property is met. */
+static bool heap_check(const struct lat *parent, const struct lat *child) {
+	return parent->key <= child->key;
+}
+
+/** Reservoir sampling. */
+static void heap_push(struct lats *lats, const struct lat *lat) {
+	size_t i;
+
+	if (lats->heap_size < SAMPLES) {
+		// Heapify up
+		i = lats->heap_size++;
+		while (i > 0) {
+			size_t j = heap_parent(i);
+			if (heap_check(&lats->heap[j], lat)) {
+				break;
+			}
+			lats->heap[i] = lats->heap[j];
+			i = j;
+		}
+	} else if (lat->key > lats->heap[0].key) {
+		// Heapify down
+		i = 0;
+		while (true) {
+			size_t j = heap_min_child(lats, i);
+			if (j >= SAMPLES || heap_check(lat, &lats->heap[j])) {
+				break;
+			}
+			lats->heap[i] = lats->heap[j];
+			i = j;
+		}
+	} else {
+		// Reject
+		return;
+	}
+
+	lats->heap[i] = *lat;
+}
+
+/** Add a latency sample. */
+static void lats_push(struct lats *lats, const struct timespec *ts) {
+	timespec_min(&lats->min, ts);
+	timespec_max(&lats->max, ts);
+	timespec_add(&lats->sum, ts);
+	++lats->count;
+
+	struct lat lat = {
+		.time = *ts,
+		.key = lrand48(),
+	};
+	heap_push(lats, &lat);
+}
+
+/** Merge two latency reservoirs. */
+static void lats_merge(struct lats *into, const struct lats *from) {
+	timespec_min(&into->min, &from->min);
+	timespec_max(&into->max, &from->max);
+	timespec_add(&into->sum, &from->sum);
+	into->count += from->count;
+
+	for (size_t i = 0; i < from->heap_size; ++i) {
+		heap_push(into, &from->heap[i]);
+	}
+}
+
+/** Latency qsort() comparator. */
+static int lat_cmp(const void *a, const void *b) {
+	const struct lat *la = a;
+	const struct lat *lb = b;
+	return timespec_cmp(&la->time, &lb->time);
+}
+
+/** Sort the latency reservoir. */
+static void lats_sort(struct lats *lats) {
+	qsort(lats->heap, lats->heap_size, sizeof(lats->heap[0]), lat_cmp);
+}
+
+/** Get the nth percentile. */
+static const struct timespec *lats_percentile(const struct lats *lats, int percent) {
+	size_t i = lats->heap_size * percent / 100;
+	return &lats->heap[i].time;
+}
+
+/** Which clock to use for benchmarking. */
+static clockid_t clockid = CLOCK_REALTIME;
+
+/** Get a current time measurement. */
+static void gettime(struct timespec *tp) {
+	int ret = clock_gettime(clockid, tp);
+	bfs_everify(ret == 0, "clock_gettime(%d)", (int)clockid);
+}
+
+/**
+ * Time measurements.
+ */
+struct times {
+	/** The start time. */
+	struct timespec start;
+
+	/** Total requests started. */
+	size_t pushed;
+	/** Total requests finished. */
+	size_t popped;
+
+	/** The start time for the currently tracked request. */
+	struct timespec req_start;
+	/** Whether a timed request is currently in flight. */
+	bool timing;
+
+	/** Latency measurements. */
+	struct lats lats;
+};
+
+/** Initialize a timer. */
+static void times_init(struct times *times) {
+	gettime(&times->start);
+	times->pushed = 0;
+	times->popped = 0;
+	bfs_assert(!times->timing);
+	lats_init(&times->lats);
+}
+
+/** Finish timing a request. */
+static void track_latency(struct times *times) {
+	struct timespec elapsed;
+	gettime(&elapsed);
+	timespec_sub(&elapsed, &times->req_start);
+	lats_push(&times->lats, &elapsed);
+
+	bfs_assert(times->timing);
+	times->timing = false;
+}
+
+/** Add times to the totals, and reset the lap times. */
+static void times_lap(struct times *total, struct times *lap) {
+	total->pushed += lap->pushed;
+	total->popped += lap->popped;
+	lats_merge(&total->lats, &lap->lats);
+
+	times_init(lap);
+}
+
+/** Print some times. */
+static void times_print(struct times *times, long seconds) {
+	struct timespec elapsed;
+	gettime(&elapsed);
+	timespec_sub(&elapsed, &times->start);
+
+	double fsec = timespec_ns(&elapsed) / 1.0e9;
+
+	if (seconds > 0) {
+		printf("%5ld", seconds);
+	} else if (elapsed.tv_nsec >= 10 * 1000 * 1000) {
+		printf("%5.2f", fsec);
+	} else {
+		printf("%5.0f", fsec);
+	}
+
+	double iops = times->popped / fsec;
+	double mean = timespec_ns(&times->lats.sum) / times->lats.count;
+	double min = timespec_ns(&times->lats.min);
+	double max = timespec_ns(&times->lats.max);
+
+	lats_sort(&times->lats);
+	double n50 = timespec_ns(lats_percentile(&times->lats, 50));
+	double n90 = timespec_ns(lats_percentile(&times->lats, 90));
+	double n99 = timespec_ns(lats_percentile(&times->lats, 99));
+
+	printf(" │ %'12.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f\n", iops, mean, min, n50, n90, n99, max);
+	fflush(stdout);
+}
+
+/** Push an ioq request. */
+static bool push(struct ioq *ioq, enum ioq_nop_type type, struct times *lap) {
+	void *ptr = NULL;
+
+	// Track latency for a small fraction of requests
+	if (!lap->timing && (lap->pushed + 1) % PERIOD == 0) {
+		ptr = lap;
+		gettime(&lap->req_start);
+	}
+
+	int ret = ioq_nop(ioq, type, ptr);
+	if (ret != 0) {
+		bfs_everify(errno == EAGAIN, "ioq_nop(%d)", (int)type);
+		return false;
+	}
+
+	++lap->pushed;
+	if (ptr) {
+		lap->timing = true;
+	}
+	return true;
+}
+
+/** Pop an ioq request. */
+static bool pop(struct ioq *ioq, struct times *lap, bool block) {
+	struct ioq_ent *ent = ioq_pop(ioq, block);
+	if (!ent) {
+		return false;
+	}
+
+	if (ent->ptr) {
+		track_latency(lap);
+	}
+
+	ioq_free(ioq, ent);
+	++lap->popped;
+	return true;
+}
+
+/** ^C flag. */
+static atomic bool quit = false;
+
+/** ^C hook. */
+static void ctrlc(int sig, siginfo_t *info, void *arg) {
+	store(&quit, true, relaxed);
+}
+
+int main(int argc, char *argv[]) {
+	// Use CLOCK_MONOTONIC if available
+#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0
+	if (sysoption(MONOTONIC_CLOCK) > 0) {
+		clockid = CLOCK_MONOTONIC;
+	}
+#endif
+
+	// Enable thousands separators
+	setlocale(LC_ALL, "");
+
+	// -d: queue depth
+	unsigned int depth = 4096;
+	// -j: threads
+	unsigned int threads = 0;
+	// -t: timeout
+	double timeout = 5.0;
+	// -L|-H: ioq_nop() type
+	enum ioq_nop_type type = IOQ_NOP_LIGHT;
+
+	const char *cmd = argc > 0 ? argv[0] : "ioq";
+	int c;
+	while (c = getopt(argc, argv, ":d:j:t:LH"), c != -1) {
+		switch (c) {
+		case 'd':
+			if (xstrtoui(optarg, NULL, 10, &depth) != 0) {
+				fprintf(stderr, "%s: Bad depth '%s': %s\n", cmd, optarg, errstr());
+				return EXIT_FAILURE;
+			}
+			break;
+		case 'j':
+			if (xstrtoui(optarg, NULL, 10, &threads) != 0) {
+				fprintf(stderr, "%s: Bad thread count '%s': %s\n", cmd, optarg, errstr());
+				return EXIT_FAILURE;
+			}
+			break;
+		case 't':
+			if (xstrtod(optarg, NULL, &timeout) != 0) {
+				fprintf(stderr, "%s: Bad timeout '%s': %s\n", cmd, optarg, errstr());
+				return EXIT_FAILURE;
+			}
+			break;
+		case 'L':
+		 	type = IOQ_NOP_LIGHT;
+			break;
+		case 'H':
+		 	type = IOQ_NOP_HEAVY;
+			break;
+		case ':':
+			fprintf(stderr, "%s: Missing argument to -%c\n", cmd, optopt);
+			return EXIT_FAILURE;
+		case '?':
+			fprintf(stderr, "%s: Unrecognized option -%c\n", cmd, optopt);
+			return EXIT_FAILURE;
+		}
+	}
+
+	if (!threads) {
+		threads = nproc();
+		if (threads > 8) {
+			threads = 8;
+		}
+	}
+	if (threads < 2) {
+		threads = 2;
+	}
+	--threads;
+
+	// Listen for ^C to print the summary
+	struct sighook *hook = sighook(SIGINT, ctrlc, NULL, SH_CONTINUE | SH_ONESHOT);
+
+	printf("I/O queue benchmark (%s)\n\n", bfs_version);
+
+	printf("[-d] depth:   %u\n", depth);
+	printf("[-j] threads: %u (including main)\n", threads + 1);
+	if (type == IOQ_NOP_HEAVY) {
+		printf("[-H] type:    heavy (with syscalls)\n");
+	} else {
+		printf("[-L] type:    light (no syscalls)\n");
+	}
+	printf("\n");
+
+	printf(" Time │  Throughput  │ Latency │   min   │   50%%   │   90%%   │   99%%   │   max\n");
+	printf("  (s) │    (IO/s)    │ (ns/IO) │         │         │         │         │\n");
+	printf("══════╪══════════════╪═════════╪═════════╪═════════╪═════════╪═════════╪═════════\n");
+	fflush(stdout);
+
+	struct ioq *ioq = ioq_create(depth, threads);
+	bfs_everify(ioq, "ioq_create(%u, %u)", depth, threads);
+
+	// Pre-allocate all the requests
+	while (ioq_capacity(ioq) > 0) {
+		int ret = ioq_nop(ioq, type, NULL);
+		bfs_everify(ret == 0, "ioq_nop(%d)", (int)type);
+	}
+	while (true) {
+		struct ioq_ent *ent = ioq_pop(ioq, true);
+		if (!ent) {
+			break;
+		}
+		ioq_free(ioq, ent);
+	}
+
+	struct times total, lap;
+	times_init(&total);
+	lap = total;
+
+	long seconds = 0;
+	while (!load(&quit, relaxed)) {
+		bool was_timing = lap.timing;
+
+		for (int i = 0; i < 16; ++i) {
+			bool block = ioq_capacity(ioq) == 0;
+			if (!pop(ioq, &lap, block)) {
+				break;
+			}
+		}
+
+		if (was_timing && !lap.timing) {
+			struct timespec elapsed;
+			gettime(&elapsed);
+			timespec_sub(&elapsed, &total.start);
+
+			if (elapsed.tv_sec > seconds) {
+				seconds = elapsed.tv_sec;
+				times_print(&lap, seconds);
+				times_lap(&total, &lap);
+			}
+
+			double ns = timespec_ns(&elapsed);
+			if (timeout > 0 && ns >= timeout * 1.0e9) {
+				break;
+			}
+		}
+
+		for (int i = 0; i < 8; ++i) {
+			if (!push(ioq, type, &lap)) {
+				break;
+			}
+		}
+		ioq_submit(ioq);
+	}
+
+	while (pop(ioq, &lap, true));
+	times_lap(&total, &lap);
+
+	if (load(&quit, relaxed)) {
+		printf("\r──^C──┼──────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────\n");
+	} else {
+		printf("──────┼──────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────\n");
+	}
+	times_print(&total, 0);
+
+	ioq_destroy(ioq);
+	sigunhook(hook);
+	return 0;
+}