From fd2a86bb93f42a2c64b202f959dd5c8cbb724c69 Mon Sep 17 00:00:00 2001
From: Tavian Barnes <tavianator@tavianator.com>
Date: Fri, 29 Sep 2023 14:13:06 -0400
Subject: bench: Add benchmarking script

---
 bench/.gitignore |   3 +
 bench/bench.sh   | 491 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 494 insertions(+)
 create mode 100644 bench/.gitignore
 create mode 100644 bench/bench.sh

(limited to 'bench')

diff --git a/bench/.gitignore b/bench/.gitignore
new file mode 100644
index 0000000..170d850
--- /dev/null
+++ b/bench/.gitignore
@@ -0,0 +1,3 @@
+/corpus/
+/results/
+/worktree/
diff --git a/bench/bench.sh b/bench/bench.sh
new file mode 100644
index 0000000..09564ac
--- /dev/null
+++ b/bench/bench.sh
@@ -0,0 +1,491 @@
+#!/hint/bash
+
+# Copyright © Tavian Barnes <tavianator@tavianator.com>
+# SPDX-License-Identifier: 0BSD
+
+declare -gA URLS=(
+    [chromium]="https://chromium.googlesource.com/chromium/src.git"
+    [linux]="https://github.com/torvalds/linux.git"
+    [rust]="https://github.com/rust-lang/rust.git"
+)
+
+declare -gA TAGS=(
+    [chromium]=119.0.6036.2
+    [linux]=v6.5
+    [rust]=1.72.1
+)
+
+COMPLETE_DEFAULT=(linux rust chromium)
+EARLY_QUIT_DEFAULT=(chromium)
+PRINT_DEFAULT=(linux)
+STRATEGIES_DEFAULT=(linux)
+
+usage() {
+    printf 'Usage: tailfin run %s [--default]\n' "${BASH_SOURCE[0]}"
+    printf '           [--complete] [--early-quit] [--print] [--strategies]\n'
+    printf '           [--build=...] [--bfs] [--find] [--fd]\n'
+    printf '           [--no-clean] [--help]\n\n'
+
+    printf '  --default\n'
+    printf '      Run the default set of benchmarks\n\n'
+
+    printf '  --complete[=CORPUS]\n'
+    printf '      Complete traversal benchmark.  \n'
+    printf '      Default corpus is --complete="%s"\n\n' "${COMPLETE_DEFAULT[*]}"
+
+    printf '  --early-quit[=CORPUS]\n'
+    printf '      Early quitting benchmark.  \n'
+    printf '      Default corpus is --early-quit=chromium\n\n' "${EARLY_QUIT_DEFAULT[*]}"
+
+    printf '  --print[=CORPUS]\n'
+    printf '      Path printing benchmark.  \n'
+    printf '      Default corpus is --print=linux\n\n' "${PRINT_DEFAULT[*]}"
+
+    printf '  --strategies[=CORPUS]\n'
+    printf '      Search strategy benchmark.\n'
+    printf '      Default corpus is --strategies=linux\n\n' "${STRATEGIES_DEFAULT[*]}"
+
+    printf '  --build=COMMIT\n'
+    printf '      Build this bfs commit and benchmark it.  Specify multiple times to\n'
+    printf '      compare, e.g. --build=3.0.1 --build=3.0.2\n\n'
+
+    printf '  --bfs[=COMMAND]\n'
+    printf '      Benchmark an existing build of bfs\n\n'
+
+    printf '  --find[=COMMAND]\n'
+    printf '      Compare against find\n\n'
+
+    printf '  --fd[=COMMAND]\n'
+    printf '      Compare against fd\n\n'
+
+    printf '  --no-clean\n'
+    printf '      Use any existing corpora as-is\n\n'
+
+    printf '  --help\n'
+    printf '      This message\n\n'
+}
+
+# Hack to export an array
+export_array() {
+    local str=$(declare -p "$1" | sed 's/ -a / -ga /')
+    unset "$1"
+    export "$1=$str"
+}
+
+# Hack to import an array
+import_array() {
+    local cmd="${!1}"
+    unset "$1"
+    eval "$cmd"
+}
+
+# Set up the benchmarks
+setup() {
+    ROOT=$(realpath -- "$(dirname -- "${BASH_SOURCE[0]}")/..")
+    if ! [ "$PWD" -ef "$ROOT" ]; then
+        printf 'error: Please run this script from %s\n\n' "$ROOT" >&2
+        usage >&2
+        exit $EX_USAGE
+    fi
+
+    nproc=$(nproc)
+
+    # Options
+
+    CLEAN=1
+
+    BUILD=()
+    BFS=()
+    FIND=()
+    FD=()
+
+    COMPLETE=()
+    EARLY_QUIT=()
+    PRINT=()
+    STRATEGIES=()
+
+    for arg; do
+        case "$arg" in
+            # Flags
+            --no-clean)
+                CLEAN=0
+                ;;
+            # bfs commits/tags to benchmark
+            --build=*)
+                BUILD+=("${arg#*=}")
+                BFS+=("bfs-${arg#*=}")
+                ;;
+            # Utilities to benchmark against
+            --bfs)
+                BFS+=(bfs)
+                ;;
+            --bfs=*)
+                BFS+=("${arg#*=}")
+                ;;
+            --find)
+                FIND+=(find)
+                ;;
+            --find=*)
+                FIND+=("${arg#*=}")
+                ;;
+            --fd)
+                FD+=(fd)
+                ;;
+            --fd=*)
+                FD+=("${arg#*=}")
+                ;;
+            # Benchmark groups
+            --complete)
+                COMPLETE=("${COMPLETE_DEFAULT[@]}")
+                ;;
+            --complete=*)
+                read -ra COMPLETE <<<"${arg#*=}"
+                ;;
+            --early-quit)
+                EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}")
+                ;;
+            --early-quit=*)
+                read -ra EARLY_QUIT <<<"${arg#*=}"
+                ;;
+            --print)
+                PRINT=("${PRINT_DEFAULT[@]}")
+                ;;
+            --print=*)
+                read -ra PRINT <<<"${arg#*=}"
+                ;;
+            --strategies)
+                STRATEGIES=("${STRATEGIES_DEFAULT[@]}")
+                ;;
+            --strategies=*)
+                read -ra STRATEGIES <<<"${arg#*=}"
+                ;;
+            --default)
+                COMPLETE=("${COMPLETE_DEFAULT[@]}")
+                EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}")
+                PRINT=("${PRINT_DEFAULT[@]}")
+                STRATEGIES=("${STRATEGIES_DEFAULT[@]}")
+                ;;
+            --help)
+                usage
+                exit
+                ;;
+            *)
+                printf 'error: Unknown option %q\n\n' "$arg" >&2
+                usage >&2
+                exit $EX_USAGE
+                ;;
+        esac
+    done
+
+    if ((UID == 0)); then
+        max-freq
+    fi
+
+    echo "Building bfs ..."
+    as-user make -s -j"$nproc" release all
+
+    as-user mkdir -p bench/corpus
+
+    declare -A cloned=()
+    for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}"; do
+        if ((cloned["$corpus"])); then
+            continue
+        fi
+        cloned["$corpus"]=1
+
+        dir="bench/corpus/$corpus"
+        if ((CLEAN)) || ! [ -e "$dir" ]; then
+            as-user ./bench/clone-tree.sh "${URLS[$corpus]}" "${TAGS[$corpus]}" "$dir"{,.git}
+        fi
+    done
+
+    if ((${#BUILD[@]} > 0)); then
+        echo "Creating bfs worktree ..."
+
+        worktree="bench/worktree"
+        as-user git worktree add -qd "$worktree"
+        at-exit as-user git worktree remove "$worktree"
+
+        bin="$(realpath -- "$SETUP_DIR")/bin"
+        as-user mkdir "$bin"
+
+        for commit in "${BUILD[@]}"; do
+            (
+                echo "Building bfs $commit ..."
+                cd "$worktree"
+                as-user git checkout -qd "$commit" --
+                as-user make -s -j"$nproc" release
+                as-user cp ./bin/bfs "$bin/bfs-$commit"
+                as-user make -s clean
+            )
+        done
+
+        # $SETUP_DIR contains `:` so it won't work in $PATH
+        # Work around this with a symlink
+        tmp=$(as-user mktemp)
+        as-user ln -sf "$bin" "$tmp"
+        at-exit rm "$tmp"
+        export PATH="$tmp:$PATH"
+    fi
+
+    export_array BFS
+    export_array FIND
+    export_array FD
+
+    export_array COMPLETE
+    export_array EARLY_QUIT
+    export_array PRINT
+    export_array STRATEGIES
+
+    if ((UID == 0)); then
+        turbo-off
+    fi
+
+    sync
+}
+
+# Runs hyperfine and saves the output
+do-hyperfine() {
+    local tmp_md="$BENCH_DIR/.bench.md"
+    local md="$BENCH_DIR/bench.md"
+    local tmp_json="$BENCH_DIR/.bench.json"
+    local json="$BENCH_DIR/bench.json"
+
+    if (($# == 0)); then
+        printf 'Nothing to do\n\n' | tee -a "$md"
+        return 1
+    fi
+
+    hyperfine -w2 -M20 --export-markdown="$tmp_md" --export-json="$tmp_json" "$@" &>/dev/tty
+    cat "$tmp_md" >>"$md"
+    cat "$tmp_json" >>"$json"
+    rm "$tmp_md" "$tmp_json"
+
+    printf '\n' | tee -a "$md"
+}
+
+# Print the header for a benchmark group
+group() {
+    printf '## %s\n\n' "$1" | tee -a "$BENCH_DIR/bench.md"
+}
+
+# Print the header for a benchmark subgroup
+subgroup() {
+    printf '### %s\n\n' "$1" | tee -a "$BENCH_DIR/bench.md"
+}
+
+# Print the header for a benchmark sub-subgroup
+subsubgroup() {
+    printf '#### %s\n\n' "$1" | tee -a "$BENCH_DIR/bench.md"
+}
+
+# Benchmark the complete traversal of a directory tree
+# (without printing anything)
+bench-complete-corpus() {
+    total=$(./bin/bfs "$2" -printf '.' | wc -c)
+
+    subgroup "$1 ($total files)"
+
+    cmds=()
+    for bfs in "${BFS[@]}"; do
+        cmds+=("$bfs $2 -false")
+    done
+
+    for find in "${FIND[@]}"; do
+        cmds+=("$find $2 -false")
+    done
+
+    for fd in "${FD[@]}"; do
+        cmds+=("$fd -u '^$' $2")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# All complete traversal benchmarks
+bench-complete() {
+    if (($#)); then
+        group "Complete traversal"
+
+        for corpus; do
+            bench-complete-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Benchmark quiting as soon as a file is seen
+bench-early-quit-corpus() {
+    dir="$2"
+    max_depth=$(./bin/bfs "$dir" -printf '%d\n' | sort -rn | head -n1)
+
+    subgroup "$1 (depth $max_depth)"
+
+    # Save the list of unique filenames, along with their depth
+    UNIQ="$BENCH_DIR/uniq"
+    ./bin/bfs "$dir" -printf '%d %f\n' | sort -k2 | uniq -uf1 >"$UNIQ"
+
+    for ((i = 2; i <= max_depth; i *= 2)); do
+        subsubgroup "Depth $i"
+
+        # Sample random uniquely-named files at depth $i
+        export FILES="$BENCH_DIR/uniq-$i"
+        sed -n "s/^$i //p" "$UNIQ" | shuf -n20 >"$FILES"
+        if ! [ -s "$FILES" ]; then
+            continue
+        fi
+
+        cmds=()
+        for bfs in "${BFS[@]}"; do
+            cmds+=("$bfs $dir -name \$(shuf -n1 \$FILES) -print -quit")
+        done
+
+        for find in "${FIND[@]}"; do
+            cmds+=("$find $dir -name \$(shuf -n1 \$FILES) -print -quit")
+        done
+
+        for fd in "${FD[@]}"; do
+            cmds+=("$fd -usg1 \$(shuf -n1 \$FILES) $dir")
+        done
+
+        do-hyperfine "${cmds[@]}"
+    done
+}
+
+# All early-quitting benchmarks
+bench-early-quit() {
+    if (($#)); then
+        group "Early termination"
+
+        for corpus; do
+            bench-early-quit-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Benchmark printing paths without colors
+bench-print-nocolor() {
+    subsubgroup "$1"
+
+    cmds=()
+    for bfs in "${BFS[@]}"; do
+        cmds+=("$bfs $2")
+    done
+
+    for find in "${FIND[@]}"; do
+        cmds+=("$find $2")
+    done
+
+    for fd in "${FD[@]}"; do
+        cmds+=("$fd -u --search-path $2")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# Benchmark printing paths with colors
+bench-print-color() {
+    subsubgroup "$1"
+
+    cmds=()
+    for bfs in "${BFS[@]}"; do
+        cmds+=("$bfs $2 -color")
+    done
+
+    for fd in "${FD[@]}"; do
+        cmds+=("$fd -u --search-path $2 --color=always")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# All printing benchmarks
+bench-print() {
+    if (($#)); then
+        group "Printing paths"
+
+        subgroup "Without colors"
+        for corpus; do
+            bench-print-nocolor "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+
+        subgroup "With colors"
+        for corpus; do
+            bench-print-color "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Benchmark search strategies
+bench-strategies-corpus() {
+    subgroup "$1"
+
+    for bfs in "${BFS[@]}"; do
+        subsubgroup "$bfs"
+        cmds=("$bfs -S "{bfs,dfs,ids,eds}" $2")
+        do-hyperfine "${cmds[@]}"
+    done
+}
+
+# All search strategy benchmarks
+bench-strategies() {
+    if (($#)); then
+        group "Search strategies"
+
+        for corpus; do
+            bench-strategies-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
+# Print benchmarked versions
+bench-versions() {
+    subgroup "Versions"
+
+    local md="$BENCH_DIR/bench.md"
+
+    printf '```console\n' >>"$md"
+
+    {
+        for bfs in "${BFS[@]}"; do
+            printf '$ %s --version | head -n1\n' "$bfs"
+            "$bfs" --version | head -n1
+        done
+
+        for find in "${FIND[@]}"; do
+            printf '$ %s --version | head -n1\n' "$find"
+            "$find" --version | head -n1
+        done
+
+        for fd in "${FD[@]}"; do
+            printf '$ %s --version\n' "$fd"
+            "$fd" --version
+        done
+    } | tee -a "$md"
+
+    printf '```' >>"$md"
+}
+
+# Print benchmark details
+bench-details() {
+    group "Details"
+
+    bench-versions
+}
+
+# Run all the benchmarks
+bench() {
+    import_array BFS
+    import_array FIND
+    import_array FD
+
+    import_array COMPLETE
+    import_array EARLY_QUIT
+    import_array PRINT
+    import_array STRATEGIES
+
+    bench-complete "${COMPLETE[@]}"
+    bench-early-quit "${EARLY_QUIT[@]}"
+    bench-print "${PRINT[@]}"
+    bench-strategies "${STRATEGIES[@]}"
+    bench-details
+}
-- 
cgit v1.2.3