From 25660f5c062cf1e0d0982457614c9aa4584fa565 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Fri, 29 Sep 2023 15:13:41 -0400 Subject: tests/xtouch: Try creating the immediate parent first --- tests/xtouch.c | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/tests/xtouch.c b/tests/xtouch.c index 4a02bf3..80fad8d 100644 --- a/tests/xtouch.c +++ b/tests/xtouch.c @@ -48,31 +48,40 @@ static int at_flags(const struct args *args) { /** Create any parent directories of the given path. */ static int mkdirs(const char *path, mode_t mode) { - char *copy = strdup(path); - if (!copy) { - return -1; + int ret = -1; + char *dir = xdirname(path); + if (!dir) { + goto err; } - int ret = -1; - char *cur = copy + strspn(copy, "/"); - while (true) { - cur += strcspn(cur, "/"); + if (strcmp(dir, ".") == 0) { + goto done; + } + + // Optimistically try the immediate parent first + if (mkdir(dir, mode) == 0 || errno == EEXIST) { + goto done; + } + // Create the parents one-at-a-time + char *cur = dir + strspn(dir, "/"); + while (*cur) { + cur += strcspn(cur, "/"); char *next = cur + strspn(cur, "/"); - if (!*next) { - ret = 0; - break; - } + char c = *cur; *cur = '\0'; - if (mkdir(copy, mode) != 0 && errno != EEXIST) { - break; + if (mkdir(dir, mode) != 0 && errno != EEXIST) { + goto err; } - *cur = '/'; + *cur = c; cur = next; } - free(copy); +done: + ret = 0; +err: + free(dir); return ret; } -- cgit v1.2.3 From 28f519d9650004787be480f15e990f835d324d7f Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Thu, 28 Sep 2023 17:39:34 -0400 Subject: bench: New script to clone a git repo without file contents --- bench/clone-tree.sh | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100755 bench/clone-tree.sh diff --git a/bench/clone-tree.sh b/bench/clone-tree.sh new file mode 100755 index 0000000..744b5f4 --- /dev/null +++ b/bench/clone-tree.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash + +# Copyright © Tavian Barnes +# SPDX-License-Identifier: 0BSD + +# Creates a directory tree that matches a git repo, but with empty files. E.g. +# +# $ ./bench/clone-tree.sh "https://.../linux.git" v6.5 ./linux ./linux.git +# +# will create or update a shallow clone at ./linux.git, then create a directory +# tree at ./linux with the same directory tree as the tag v6.5, except all files +# will be empty. + +set -eu + +if (($# != 4)); then + printf 'Usage: %s https://url/of/repo.git path/to/checkout path/to/repo.git\n' "$0" >&2 + exit 1 +fi + +URL="$1" +TAG="$2" +DIR="$3" +REPO="$4" + +BENCH=$(dirname -- "${BASH_SOURCE[0]}") +BIN=$(realpath -- "$BENCH/../bin") +BFS="$BIN/bfs" +XTOUCH="$BIN/tests/xtouch" + +if [ "${NPROC-}" ]; then + # Use fewer cores in recursive calls + export NPROC=$(((NPROC + 1) / 2)) +else + export NPROC=$(nproc) +fi + +JOBS=$((NPROC < 8 ? NPROC : 8)) + +do-git() { + git -C "$REPO" "$@" +} + +if ! [ -e "$REPO" ]; then + mkdir -p -- "$REPO" + do-git init -q --bare +fi + +has-ref() { + do-git rev-list --quiet -1 --missing=allow-promisor "$1" &>/dev/null +} + +sparse-fetch() { + do-git -c fetch.negotiationAlgorithm=noop fetch -q --filter=blob:none --depth=1 --no-tags --no-write-fetch-head --no-auto-gc "$@" +} + +if ! has-ref "$TAG"; then + printf 'Fetching %s ...\n' "$TAG" >&2 + do-git config remote.origin.url "$URL" + if ((${#TAG} >= 40)); then + sparse-fetch origin "$TAG" + else + sparse-fetch origin tag "$TAG" + fi +fi + +# Delete a tree in parallel +clean() { + local d=5 + "$BFS" -f "$1" -mindepth $d -maxdepth $d -type d -print0 \ + | xargs -0r -n1 -P$JOBS -- "$BFS" -j1 -mindepth 1 -delete -f + "$BFS" -f "$1" -delete +} + +if [ -e "$DIR" ]; then + printf 'Cleaning old directory tree %s ...\n' "$DIR" >&2 + TMP=$(mktemp -dp "$(dirname -- "$DIR")") + mv -- "$DIR" "$TMP" + clean "$TMP" & +fi + +# List gitlinks (submodule references) in the tree +ls-gitlinks() { + do-git ls-tree -zr "$TAG" \ + | sed -zn 's/.* commit //p' +} + +# Get the submodule ID for a path +submodule-for-path() { + do-git config --blob "$TAG:.gitmodules" \ + --name-only \ + --fixed-value \ + --get-regexp 'submodule\..**\.path' "$1" \ + | sed -En 's/submodule\.(.*)\.path/\1/p' +} + +# Get the URL for a submodule +submodule-url() { + # - https://chrome-internal.googlesource.com/ + # - not publicly accessible + # - https://chromium.googlesource.com/external/github.com/WebKit/webkit.git + # - is accessible, but the commit (59e9de61b7b3) isn't + # - https://android.googlesource.com/ + # - is accessible, but you need an account + + do-git config --blob "$TAG:.gitmodules" \ + --get "submodule.$1.url" \ + | sed -E \ + -e '\|^https://chrome-internal.googlesource.com/|Q1' \ + -e '\|^https://chromium.googlesource.com/external/github.com/WebKit/webkit.git|Q1' \ + -e '\|^https://android.googlesource.com/|Q1' +} + +# Recursively checkout submodules +while read -rd '' SUBREF SUBDIR; do + SUBNAME=$(submodule-for-path "$SUBDIR") + SUBURL=$(submodule-url "$SUBNAME") || continue + + if (($(jobs -pr | wc -w) >= JOBS)); then + wait -n + fi + "$0" "$SUBURL" "$SUBREF" "$DIR/$SUBDIR" "$REPO/modules/$SUBNAME" & +done < <(ls-gitlinks) + +# Touch files in parallel +xtouch() ( + cd "$DIR" + if ((JOBS > 1)); then + xargs -0r -n4096 -P$JOBS -- "$XTOUCH" -p -- + else + xargs -0r -- "$XTOUCH" -p -- + fi +) + +# Check out files +printf 'Checking out %s ...\n' "$DIR" >&2 +mkdir -p -- "$DIR" +do-git ls-tree -zr "$TAG"\ + | sed -zn 's/.* blob .*\t//p' \ + | xtouch + +# Wait for cleaning/submodules +wait -- cgit v1.2.3 From fd2a86bb93f42a2c64b202f959dd5c8cbb724c69 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Fri, 29 Sep 2023 14:13:06 -0400 Subject: bench: Add benchmarking script --- bench/.gitignore | 3 + bench/bench.sh | 491 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 494 insertions(+) create mode 100644 bench/.gitignore create mode 100644 bench/bench.sh diff --git a/bench/.gitignore b/bench/.gitignore new file mode 100644 index 0000000..170d850 --- /dev/null +++ b/bench/.gitignore @@ -0,0 +1,3 @@ +/corpus/ +/results/ +/worktree/ diff --git a/bench/bench.sh b/bench/bench.sh new file mode 100644 index 0000000..09564ac --- /dev/null +++ b/bench/bench.sh @@ -0,0 +1,491 @@ +#!/hint/bash + +# Copyright © Tavian Barnes +# SPDX-License-Identifier: 0BSD + +declare -gA URLS=( + [chromium]="https://chromium.googlesource.com/chromium/src.git" + [linux]="https://github.com/torvalds/linux.git" + [rust]="https://github.com/rust-lang/rust.git" +) + +declare -gA TAGS=( + [chromium]=119.0.6036.2 + [linux]=v6.5 + [rust]=1.72.1 +) + +COMPLETE_DEFAULT=(linux rust chromium) +EARLY_QUIT_DEFAULT=(chromium) +PRINT_DEFAULT=(linux) +STRATEGIES_DEFAULT=(linux) + +usage() { + printf 'Usage: tailfin run %s [--default]\n' "${BASH_SOURCE[0]}" + printf ' [--complete] [--early-quit] [--print] [--strategies]\n' + printf ' [--build=...] [--bfs] [--find] [--fd]\n' + printf ' [--no-clean] [--help]\n\n' + + printf ' --default\n' + printf ' Run the default set of benchmarks\n\n' + + printf ' --complete[=CORPUS]\n' + printf ' Complete traversal benchmark. \n' + printf ' Default corpus is --complete="%s"\n\n' "${COMPLETE_DEFAULT[*]}" + + printf ' --early-quit[=CORPUS]\n' + printf ' Early quitting benchmark. \n' + printf ' Default corpus is --early-quit=chromium\n\n' "${EARLY_QUIT_DEFAULT[*]}" + + printf ' --print[=CORPUS]\n' + printf ' Path printing benchmark. \n' + printf ' Default corpus is --print=linux\n\n' "${PRINT_DEFAULT[*]}" + + printf ' --strategies[=CORPUS]\n' + printf ' Search strategy benchmark.\n' + printf ' Default corpus is --strategies=linux\n\n' "${STRATEGIES_DEFAULT[*]}" + + printf ' --build=COMMIT\n' + printf ' Build this bfs commit and benchmark it. Specify multiple times to\n' + printf ' compare, e.g. --build=3.0.1 --build=3.0.2\n\n' + + printf ' --bfs[=COMMAND]\n' + printf ' Benchmark an existing build of bfs\n\n' + + printf ' --find[=COMMAND]\n' + printf ' Compare against find\n\n' + + printf ' --fd[=COMMAND]\n' + printf ' Compare against fd\n\n' + + printf ' --no-clean\n' + printf ' Use any existing corpora as-is\n\n' + + printf ' --help\n' + printf ' This message\n\n' +} + +# Hack to export an array +export_array() { + local str=$(declare -p "$1" | sed 's/ -a / -ga /') + unset "$1" + export "$1=$str" +} + +# Hack to import an array +import_array() { + local cmd="${!1}" + unset "$1" + eval "$cmd" +} + +# Set up the benchmarks +setup() { + ROOT=$(realpath -- "$(dirname -- "${BASH_SOURCE[0]}")/..") + if ! [ "$PWD" -ef "$ROOT" ]; then + printf 'error: Please run this script from %s\n\n' "$ROOT" >&2 + usage >&2 + exit $EX_USAGE + fi + + nproc=$(nproc) + + # Options + + CLEAN=1 + + BUILD=() + BFS=() + FIND=() + FD=() + + COMPLETE=() + EARLY_QUIT=() + PRINT=() + STRATEGIES=() + + for arg; do + case "$arg" in + # Flags + --no-clean) + CLEAN=0 + ;; + # bfs commits/tags to benchmark + --build=*) + BUILD+=("${arg#*=}") + BFS+=("bfs-${arg#*=}") + ;; + # Utilities to benchmark against + --bfs) + BFS+=(bfs) + ;; + --bfs=*) + BFS+=("${arg#*=}") + ;; + --find) + FIND+=(find) + ;; + --find=*) + FIND+=("${arg#*=}") + ;; + --fd) + FD+=(fd) + ;; + --fd=*) + FD+=("${arg#*=}") + ;; + # Benchmark groups + --complete) + COMPLETE=("${COMPLETE_DEFAULT[@]}") + ;; + --complete=*) + read -ra COMPLETE <<<"${arg#*=}" + ;; + --early-quit) + EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}") + ;; + --early-quit=*) + read -ra EARLY_QUIT <<<"${arg#*=}" + ;; + --print) + PRINT=("${PRINT_DEFAULT[@]}") + ;; + --print=*) + read -ra PRINT <<<"${arg#*=}" + ;; + --strategies) + STRATEGIES=("${STRATEGIES_DEFAULT[@]}") + ;; + --strategies=*) + read -ra STRATEGIES <<<"${arg#*=}" + ;; + --default) + COMPLETE=("${COMPLETE_DEFAULT[@]}") + EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}") + PRINT=("${PRINT_DEFAULT[@]}") + STRATEGIES=("${STRATEGIES_DEFAULT[@]}") + ;; + --help) + usage + exit + ;; + *) + printf 'error: Unknown option %q\n\n' "$arg" >&2 + usage >&2 + exit $EX_USAGE + ;; + esac + done + + if ((UID == 0)); then + max-freq + fi + + echo "Building bfs ..." + as-user make -s -j"$nproc" release all + + as-user mkdir -p bench/corpus + + declare -A cloned=() + for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}"; do + if ((cloned["$corpus"])); then + continue + fi + cloned["$corpus"]=1 + + dir="bench/corpus/$corpus" + if ((CLEAN)) || ! [ -e "$dir" ]; then + as-user ./bench/clone-tree.sh "${URLS[$corpus]}" "${TAGS[$corpus]}" "$dir"{,.git} + fi + done + + if ((${#BUILD[@]} > 0)); then + echo "Creating bfs worktree ..." + + worktree="bench/worktree" + as-user git worktree add -qd "$worktree" + at-exit as-user git worktree remove "$worktree" + + bin="$(realpath -- "$SETUP_DIR")/bin" + as-user mkdir "$bin" + + for commit in "${BUILD[@]}"; do + ( + echo "Building bfs $commit ..." + cd "$worktree" + as-user git checkout -qd "$commit" -- + as-user make -s -j"$nproc" release + as-user cp ./bin/bfs "$bin/bfs-$commit" + as-user make -s clean + ) + done + + # $SETUP_DIR contains `:` so it won't work in $PATH + # Work around this with a symlink + tmp=$(as-user mktemp) + as-user ln -sf "$bin" "$tmp" + at-exit rm "$tmp" + export PATH="$tmp:$PATH" + fi + + export_array BFS + export_array FIND + export_array FD + + export_array COMPLETE + export_array EARLY_QUIT + export_array PRINT + export_array STRATEGIES + + if ((UID == 0)); then + turbo-off + fi + + sync +} + +# Runs hyperfine and saves the output +do-hyperfine() { + local tmp_md="$BENCH_DIR/.bench.md" + local md="$BENCH_DIR/bench.md" + local tmp_json="$BENCH_DIR/.bench.json" + local json="$BENCH_DIR/bench.json" + + if (($# == 0)); then + printf 'Nothing to do\n\n' | tee -a "$md" + return 1 + fi + + hyperfine -w2 -M20 --export-markdown="$tmp_md" --export-json="$tmp_json" "$@" &>/dev/tty + cat "$tmp_md" >>"$md" + cat "$tmp_json" >>"$json" + rm "$tmp_md" "$tmp_json" + + printf '\n' | tee -a "$md" +} + +# Print the header for a benchmark group +group() { + printf '## %s\n\n' "$1" | tee -a "$BENCH_DIR/bench.md" +} + +# Print the header for a benchmark subgroup +subgroup() { + printf '### %s\n\n' "$1" | tee -a "$BENCH_DIR/bench.md" +} + +# Print the header for a benchmark sub-subgroup +subsubgroup() { + printf '#### %s\n\n' "$1" | tee -a "$BENCH_DIR/bench.md" +} + +# Benchmark the complete traversal of a directory tree +# (without printing anything) +bench-complete-corpus() { + total=$(./bin/bfs "$2" -printf '.' | wc -c) + + subgroup "$1 ($total files)" + + cmds=() + for bfs in "${BFS[@]}"; do + cmds+=("$bfs $2 -false") + done + + for find in "${FIND[@]}"; do + cmds+=("$find $2 -false") + done + + for fd in "${FD[@]}"; do + cmds+=("$fd -u '^$' $2") + done + + do-hyperfine "${cmds[@]}" +} + +# All complete traversal benchmarks +bench-complete() { + if (($#)); then + group "Complete traversal" + + for corpus; do + bench-complete-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus" + done + fi +} + +# Benchmark quiting as soon as a file is seen +bench-early-quit-corpus() { + dir="$2" + max_depth=$(./bin/bfs "$dir" -printf '%d\n' | sort -rn | head -n1) + + subgroup "$1 (depth $max_depth)" + + # Save the list of unique filenames, along with their depth + UNIQ="$BENCH_DIR/uniq" + ./bin/bfs "$dir" -printf '%d %f\n' | sort -k2 | uniq -uf1 >"$UNIQ" + + for ((i = 2; i <= max_depth; i *= 2)); do + subsubgroup "Depth $i" + + # Sample random uniquely-named files at depth $i + export FILES="$BENCH_DIR/uniq-$i" + sed -n "s/^$i //p" "$UNIQ" | shuf -n20 >"$FILES" + if ! [ -s "$FILES" ]; then + continue + fi + + cmds=() + for bfs in "${BFS[@]}"; do + cmds+=("$bfs $dir -name \$(shuf -n1 \$FILES) -print -quit") + done + + for find in "${FIND[@]}"; do + cmds+=("$find $dir -name \$(shuf -n1 \$FILES) -print -quit") + done + + for fd in "${FD[@]}"; do + cmds+=("$fd -usg1 \$(shuf -n1 \$FILES) $dir") + done + + do-hyperfine "${cmds[@]}" + done +} + +# All early-quitting benchmarks +bench-early-quit() { + if (($#)); then + group "Early termination" + + for corpus; do + bench-early-quit-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus" + done + fi +} + +# Benchmark printing paths without colors +bench-print-nocolor() { + subsubgroup "$1" + + cmds=() + for bfs in "${BFS[@]}"; do + cmds+=("$bfs $2") + done + + for find in "${FIND[@]}"; do + cmds+=("$find $2") + done + + for fd in "${FD[@]}"; do + cmds+=("$fd -u --search-path $2") + done + + do-hyperfine "${cmds[@]}" +} + +# Benchmark printing paths with colors +bench-print-color() { + subsubgroup "$1" + + cmds=() + for bfs in "${BFS[@]}"; do + cmds+=("$bfs $2 -color") + done + + for fd in "${FD[@]}"; do + cmds+=("$fd -u --search-path $2 --color=always") + done + + do-hyperfine "${cmds[@]}" +} + +# All printing benchmarks +bench-print() { + if (($#)); then + group "Printing paths" + + subgroup "Without colors" + for corpus; do + bench-print-nocolor "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus" + done + + subgroup "With colors" + for corpus; do + bench-print-color "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus" + done + fi +} + +# Benchmark search strategies +bench-strategies-corpus() { + subgroup "$1" + + for bfs in "${BFS[@]}"; do + subsubgroup "$bfs" + cmds=("$bfs -S "{bfs,dfs,ids,eds}" $2") + do-hyperfine "${cmds[@]}" + done +} + +# All search strategy benchmarks +bench-strategies() { + if (($#)); then + group "Search strategies" + + for corpus; do + bench-strategies-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus" + done + fi +} + +# Print benchmarked versions +bench-versions() { + subgroup "Versions" + + local md="$BENCH_DIR/bench.md" + + printf '```console\n' >>"$md" + + { + for bfs in "${BFS[@]}"; do + printf '$ %s --version | head -n1\n' "$bfs" + "$bfs" --version | head -n1 + done + + for find in "${FIND[@]}"; do + printf '$ %s --version | head -n1\n' "$find" + "$find" --version | head -n1 + done + + for fd in "${FD[@]}"; do + printf '$ %s --version\n' "$fd" + "$fd" --version + done + } | tee -a "$md" + + printf '```' >>"$md" +} + +# Print benchmark details +bench-details() { + group "Details" + + bench-versions +} + +# Run all the benchmarks +bench() { + import_array BFS + import_array FIND + import_array FD + + import_array COMPLETE + import_array EARLY_QUIT + import_array PRINT + import_array STRATEGIES + + bench-complete "${COMPLETE[@]}" + bench-early-quit "${EARLY_QUIT[@]}" + bench-print "${PRINT[@]}" + bench-strategies "${STRATEGIES[@]}" + bench-details +} -- cgit v1.2.3 From 692098fdb922c464949fad7c5b9e36b531ea6f68 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Mon, 2 Oct 2023 10:49:41 -0400 Subject: bench: Add a README --- bench/README.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 bench/README.md diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 0000000..56157a0 --- /dev/null +++ b/bench/README.md @@ -0,0 +1,51 @@ +This directory contains a suite of benchmarks used to evaluate `bfs` and detect performance regressions. +To run them, you'll need the [tailfin] benchmark harness. +You can read the full usage information with + +[tailfin]: https://github.com/tavianator/tailfin + +```console +$ tailfin -n run bench/bench.sh --help +Usage: tailfin run bench/bench.sh [--default] + [--complete] [--early-quit] [--print] [--strategies] + [--build=...] [--bfs] [--find] [--fd] + [--no-clean] [--help] +... +``` + +The benchmarks use various git repositories to have a realistic and reproducible directory structure as a corpus. +Currently, those are the [Linux], [Rust], and [Chromium] repos. +The scripts will automatically clone those repos using [partial clone] filters to avoid downloading the actual file contents, saving bandwidth and space. + +[Linux]: https://github.com/torvalds/linux.git +[Rust]: https://github.com/rust-lang/rust.git +[Chromium]: https://chromium.googlesource.com/chromium/src.git +[partial clone]: https://git-scm.com/docs/partial-clone + +You can try out a quick benchmark by running + +```console +$ tailfin run bench/bench.sh --build=main --complete=linux +``` + +This will build the `main` branch, and measure the complete traversal of the Linux repo. +Results will be both printed to the console and saved in a Markdown file, which you can find by running + +```console +$ tailfin latest +results/2023/09/29/15:32:49 +$ cat results/2023/09/29/15:32:49/runs/1/bench.md +## Complete traversal +... +``` + +To measure performance improvements/regressions of a change, compare the `main` branch to the topic branch on the full benchmark suite: + +```console +$ tailfin run bench/bench.sh --build=main --build=branch --default +``` + +This will take a few minutes. +Results from the full benchmark suite can be seen in performance-related pull requests, for example [#126]. + +[#126]: https://github.com/tavianator/bfs/pull/126 -- cgit v1.2.3