diff options
Diffstat (limited to 'bench')
-rw-r--r-- | bench/bench.sh | 56 | ||||
-rw-r--r-- | bench/ioq.c | 455 |
2 files changed, 500 insertions, 11 deletions
diff --git a/bench/bench.sh b/bench/bench.sh index 1526fe5..c9ed978 100644 --- a/bench/bench.sh +++ b/bench/bench.sh @@ -22,6 +22,7 @@ PRINT_DEFAULT=(linux) STRATEGIES_DEFAULT=(rust) JOBS_DEFAULT=(rust) EXEC_DEFAULT=(linux) +SORTED_DEFAULT=(chromium) usage() { printf 'Usage: tailfin run %s\n' "${BASH_SOURCE[0]}" @@ -60,6 +61,10 @@ usage() { printf ' Process spawning benchmark.\n' printf ' Default corpus is --exec=%s\n\n' "${EXEC_DEFAULT[*]}" + printf ' --sorted[=CORPUS]\n' + printf ' Sorted traversal benchmark.\n' + printf ' Default corpus is --sorted=%s\n\n' "${SORTED_DEFAULT[*]}" + printf ' --build=COMMIT\n' printf ' Build this bfs commit and benchmark it. Specify multiple times to\n' printf ' compare, e.g. --build=3.0.1 --build=3.0.2\n\n' @@ -121,6 +126,7 @@ setup() { STRATEGIES=() JOBS=() EXEC=() + SORTED=() for arg; do case "$arg" in @@ -195,6 +201,12 @@ setup() { --exec=*) read -ra EXEC <<<"${arg#*=}" ;; + --sorted) + SORTED=("${SORTED_DEFAULT[@]}") + ;; + --sorted=*) + read -ra SORTED <<<"${arg#*=}" + ;; --default) COMPLETE=("${COMPLETE_DEFAULT[@]}") EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}") @@ -203,6 +215,7 @@ setup() { STRATEGIES=("${STRATEGIES_DEFAULT[@]}") JOBS=("${JOBS_DEFAULT[@]}") EXEC=("${EXEC_DEFAULT[@]}") + SORTED=("${SORTED_DEFAULT[@]}") ;; --help) usage @@ -221,13 +234,13 @@ setup() { fi echo "Building bfs ..." - as-user make -s -j"$nproc" RELEASE=y + as-user ./configure --enable-release as-user make -s -j"$nproc" all as-user mkdir -p bench/corpus declare -A cloned=() - for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${STAT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}" "${JOBS[@]}" "${EXEC[@]}"; do + for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${STAT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}" "${JOBS[@]}" "${EXEC[@]}" "${SORTED[@]}"; do if ((cloned["$corpus"])); then continue fi @@ -254,8 +267,8 @@ setup() { echo "Building bfs $commit ..." cd "$worktree" as-user git checkout -qd "$commit" -- - if [ -e config ]; then - as-user make -s -j"$nproc" config RELEASE=1 + if [ -e configure ]; then + as-user ./configure --enable-release as-user make -s -j"$nproc" else as-user make -s -j"$nproc" release @@ -269,12 +282,7 @@ setup() { ) done - # $SETUP_DIR contains `:` so it won't work in $PATH - # Work around this with a symlink - tmp=$(as-user mktemp) - as-user ln -sf "$bin" "$tmp" - defer rm "$tmp" - export PATH="$tmp:$PATH" + export PATH="$bin:$PATH" fi export_array BFS @@ -288,6 +296,7 @@ setup() { export_array STRATEGIES export_array JOBS export_array EXEC + export_array SORTED if ((UID == 0)); then turbo-off @@ -365,7 +374,7 @@ bench-complete() { fi } -# Benchmark quiting as soon as a file is seen +# Benchmark quitting as soon as a file is seen bench-early-quit-corpus() { dir="$2" max_depth=$(./bin/bfs "$dir" -printf '%d\n' | sort -rn | head -n1) @@ -655,6 +664,29 @@ bench-exec() { fi } +# Benchmark sorted traversal +bench-sorted-corpus() { + subgroup '%s' "$1" + + cmds=() + for bfs in "${BFS[@]}"; do + cmds+=("$bfs -s $2 -false") + done + + do-hyperfine "${cmds[@]}" +} + +# All sorted traversal benchmarks +bench-sorted() { + if (($#)); then + group "Sorted traversal" + + for corpus; do + bench-sorted-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus" + done + fi +} + # Print benchmarked versions bench-versions() { subgroup "Versions" @@ -703,6 +735,7 @@ bench() { import_array STRATEGIES import_array JOBS import_array EXEC + import_array SORTED bench-complete "${COMPLETE[@]}" bench-early-quit "${EARLY_QUIT[@]}" @@ -711,5 +744,6 @@ bench() { bench-strategies "${STRATEGIES[@]}" bench-jobs "${JOBS[@]}" bench-exec "${EXEC[@]}" + bench-sorted "${SORTED[@]}" bench-details } diff --git a/bench/ioq.c b/bench/ioq.c new file mode 100644 index 0000000..fb9edbc --- /dev/null +++ b/bench/ioq.c @@ -0,0 +1,455 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "atomic.h" +#include "bfs.h" +#include "bfstd.h" +#include "diag.h" +#include "ioq.h" +#include "sighook.h" +#include "xtime.h" + +#include <errno.h> +#include <locale.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> + +/** A latency sample. */ +struct lat { + /** The sampled latency. */ + struct timespec time; + /** A random integer, for reservoir sampling. */ + long key; +}; + +/** Number of latency samples to keep. */ +#define SAMPLES 1000 +/** Latency sampling period. */ +#define PERIOD 128 + +/** Latency measurements. */ +struct lats { + /** Lowest observed latency. */ + struct timespec min; + /** Highest observed latency. */ + struct timespec max; + /** Total latency. */ + struct timespec sum; + /** Number of measured requests. */ + size_t count; + + /** Priority queue for reservoir sampling. */ + struct lat heap[SAMPLES]; + /** Current size of the heap. */ + size_t heap_size; +}; + +/** Initialize a latency reservoir. */ +static void lats_init(struct lats *lats) { + lats->min = (struct timespec) { .tv_sec = 1000 }; + lats->max = (struct timespec) { 0 }; + lats->sum = (struct timespec) { 0 }; + lats->count = 0; + lats->heap_size = 0; +} + +/** Binary heap parent. */ +static size_t heap_parent(size_t i) { + return (i - 1) / 2; +} + +/** Binary heap left child. */ +static size_t heap_child(size_t i) { + return 2 * i + 1; +} + +/** Binary heap smallest child. */ +static size_t heap_min_child(const struct lats *lats, size_t i) { + size_t j = heap_child(i); + size_t k = j + 1; + if (k < lats->heap_size && lats->heap[k].key < lats->heap[j].key) { + return k; + } else { + return j; + } +} + +/** Check if the heap property is met. */ +static bool heap_check(const struct lat *parent, const struct lat *child) { + return parent->key <= child->key; +} + +/** Reservoir sampling. */ +static void heap_push(struct lats *lats, const struct lat *lat) { + size_t i; + + if (lats->heap_size < SAMPLES) { + // Heapify up + i = lats->heap_size++; + while (i > 0) { + size_t j = heap_parent(i); + if (heap_check(&lats->heap[j], lat)) { + break; + } + lats->heap[i] = lats->heap[j]; + i = j; + } + } else if (lat->key > lats->heap[0].key) { + // Heapify down + i = 0; + while (true) { + size_t j = heap_min_child(lats, i); + if (j >= SAMPLES || heap_check(lat, &lats->heap[j])) { + break; + } + lats->heap[i] = lats->heap[j]; + i = j; + } + } else { + // Reject + return; + } + + lats->heap[i] = *lat; +} + +/** Add a latency sample. */ +static void lats_push(struct lats *lats, const struct timespec *ts) { + timespec_min(&lats->min, ts); + timespec_max(&lats->max, ts); + timespec_add(&lats->sum, ts); + ++lats->count; + + struct lat lat = { + .time = *ts, + .key = lrand48(), + }; + heap_push(lats, &lat); +} + +/** Merge two latency reservoirs. */ +static void lats_merge(struct lats *into, const struct lats *from) { + timespec_min(&into->min, &from->min); + timespec_max(&into->max, &from->max); + timespec_add(&into->sum, &from->sum); + into->count += from->count; + + for (size_t i = 0; i < from->heap_size; ++i) { + heap_push(into, &from->heap[i]); + } +} + +/** Latency qsort() comparator. */ +static int lat_cmp(const void *a, const void *b) { + const struct lat *la = a; + const struct lat *lb = b; + return timespec_cmp(&la->time, &lb->time); +} + +/** Sort the latency reservoir. */ +static void lats_sort(struct lats *lats) { + qsort(lats->heap, lats->heap_size, sizeof(lats->heap[0]), lat_cmp); +} + +/** Get the nth percentile. */ +static const struct timespec *lats_percentile(const struct lats *lats, int percent) { + size_t i = lats->heap_size * percent / 100; + return &lats->heap[i].time; +} + +/** Which clock to use for benchmarking. */ +static clockid_t clockid = CLOCK_REALTIME; + +/** Get a current time measurement. */ +static void gettime(struct timespec *tp) { + int ret = clock_gettime(clockid, tp); + bfs_everify(ret == 0, "clock_gettime(%d)", (int)clockid); +} + +/** + * Time measurements. + */ +struct times { + /** The start time. */ + struct timespec start; + + /** Total requests started. */ + size_t pushed; + /** Total requests finished. */ + size_t popped; + + /** The start time for the currently tracked request. */ + struct timespec req_start; + /** Whether a timed request is currently in flight. */ + bool timing; + + /** Latency measurements. */ + struct lats lats; +}; + +/** Initialize a timer. */ +static void times_init(struct times *times) { + gettime(×->start); + times->pushed = 0; + times->popped = 0; + bfs_assert(!times->timing); + lats_init(×->lats); +} + +/** Finish timing a request. */ +static void track_latency(struct times *times) { + struct timespec elapsed; + gettime(&elapsed); + timespec_sub(&elapsed, ×->req_start); + lats_push(×->lats, &elapsed); + + bfs_assert(times->timing); + times->timing = false; +} + +/** Add times to the totals, and reset the lap times. */ +static void times_lap(struct times *total, struct times *lap) { + total->pushed += lap->pushed; + total->popped += lap->popped; + lats_merge(&total->lats, &lap->lats); + + times_init(lap); +} + +/** Print some times. */ +static void times_print(struct times *times, long seconds) { + struct timespec elapsed; + gettime(&elapsed); + timespec_sub(&elapsed, ×->start); + + double fsec = timespec_ns(&elapsed) / 1.0e9; + + if (seconds > 0) { + printf("%5ld", seconds); + } else if (elapsed.tv_nsec >= 10 * 1000 * 1000) { + printf("%5.2f", fsec); + } else { + printf("%5.0f", fsec); + } + + double iops = times->popped / fsec; + double mean = timespec_ns(×->lats.sum) / times->lats.count; + double min = timespec_ns(×->lats.min); + double max = timespec_ns(×->lats.max); + + lats_sort(×->lats); + double n50 = timespec_ns(lats_percentile(×->lats, 50)); + double n90 = timespec_ns(lats_percentile(×->lats, 90)); + double n99 = timespec_ns(lats_percentile(×->lats, 99)); + + printf(" │ %'12.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f\n", iops, mean, min, n50, n90, n99, max); + fflush(stdout); +} + +/** Push an ioq request. */ +static bool push(struct ioq *ioq, enum ioq_nop_type type, struct times *lap) { + void *ptr = NULL; + + // Track latency for a small fraction of requests + if (!lap->timing && (lap->pushed + 1) % PERIOD == 0) { + ptr = lap; + gettime(&lap->req_start); + } + + int ret = ioq_nop(ioq, type, ptr); + if (ret != 0) { + bfs_everify(errno == EAGAIN, "ioq_nop(%d)", (int)type); + return false; + } + + ++lap->pushed; + if (ptr) { + lap->timing = true; + } + return true; +} + +/** Pop an ioq request. */ +static bool pop(struct ioq *ioq, struct times *lap, bool block) { + struct ioq_ent *ent = ioq_pop(ioq, block); + if (!ent) { + return false; + } + + if (ent->ptr) { + track_latency(lap); + } + + ioq_free(ioq, ent); + ++lap->popped; + return true; +} + +/** ^C flag. */ +static atomic bool quit = false; + +/** ^C hook. */ +static void ctrlc(int sig, siginfo_t *info, void *arg) { + store(&quit, true, relaxed); +} + +int main(int argc, char *argv[]) { + // Use CLOCK_MONOTONIC if available +#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0 + if (sysoption(MONOTONIC_CLOCK) > 0) { + clockid = CLOCK_MONOTONIC; + } +#endif + + // Enable thousands separators + setlocale(LC_ALL, ""); + + // -d: queue depth + unsigned int depth = 4096; + // -j: threads + unsigned int threads = 0; + // -t: timeout + double timeout = 5.0; + // -L|-H: ioq_nop() type + enum ioq_nop_type type = IOQ_NOP_LIGHT; + + const char *cmd = argc > 0 ? argv[0] : "ioq"; + int c; + while (c = getopt(argc, argv, ":d:j:t:LH"), c != -1) { + switch (c) { + case 'd': + if (xstrtoui(optarg, NULL, 10, &depth) != 0) { + fprintf(stderr, "%s: Bad depth '%s': %s\n", cmd, optarg, errstr()); + return EXIT_FAILURE; + } + break; + case 'j': + if (xstrtoui(optarg, NULL, 10, &threads) != 0) { + fprintf(stderr, "%s: Bad thread count '%s': %s\n", cmd, optarg, errstr()); + return EXIT_FAILURE; + } + break; + case 't': + if (xstrtod(optarg, NULL, &timeout) != 0) { + fprintf(stderr, "%s: Bad timeout '%s': %s\n", cmd, optarg, errstr()); + return EXIT_FAILURE; + } + break; + case 'L': + type = IOQ_NOP_LIGHT; + break; + case 'H': + type = IOQ_NOP_HEAVY; + break; + case ':': + fprintf(stderr, "%s: Missing argument to -%c\n", cmd, optopt); + return EXIT_FAILURE; + case '?': + fprintf(stderr, "%s: Unrecognized option -%c\n", cmd, optopt); + return EXIT_FAILURE; + } + } + + if (!threads) { + threads = nproc(); + if (threads > 8) { + threads = 8; + } + } + if (threads < 2) { + threads = 2; + } + --threads; + + // Listen for ^C to print the summary + struct sighook *hook = sighook(SIGINT, ctrlc, NULL, SH_CONTINUE | SH_ONESHOT); + + printf("I/O queue benchmark (%s)\n\n", bfs_version); + + printf("[-d] depth: %u\n", depth); + printf("[-j] threads: %u (including main)\n", threads + 1); + if (type == IOQ_NOP_HEAVY) { + printf("[-H] type: heavy (with syscalls)\n"); + } else { + printf("[-L] type: light (no syscalls)\n"); + } + printf("\n"); + + printf(" Time │ Throughput │ Latency │ min │ 50%% │ 90%% │ 99%% │ max\n"); + printf(" (s) │ (IO/s) │ (ns/IO) │ │ │ │ │\n"); + printf("══════╪══════════════╪═════════╪═════════╪═════════╪═════════╪═════════╪═════════\n"); + fflush(stdout); + + struct ioq *ioq = ioq_create(depth, threads); + bfs_everify(ioq, "ioq_create(%u, %u)", depth, threads); + + // Pre-allocate all the requests + while (ioq_capacity(ioq) > 0) { + int ret = ioq_nop(ioq, type, NULL); + bfs_everify(ret == 0, "ioq_nop(%d)", (int)type); + } + while (true) { + struct ioq_ent *ent = ioq_pop(ioq, true); + if (!ent) { + break; + } + ioq_free(ioq, ent); + } + + struct times total, lap; + times_init(&total); + lap = total; + + long seconds = 0; + while (!load(&quit, relaxed)) { + bool was_timing = lap.timing; + + for (int i = 0; i < 16; ++i) { + bool block = ioq_capacity(ioq) == 0; + if (!pop(ioq, &lap, block)) { + break; + } + } + + if (was_timing && !lap.timing) { + struct timespec elapsed; + gettime(&elapsed); + timespec_sub(&elapsed, &total.start); + + if (elapsed.tv_sec > seconds) { + seconds = elapsed.tv_sec; + times_print(&lap, seconds); + times_lap(&total, &lap); + } + + double ns = timespec_ns(&elapsed); + if (timeout > 0 && ns >= timeout * 1.0e9) { + break; + } + } + + for (int i = 0; i < 8; ++i) { + if (!push(ioq, type, &lap)) { + break; + } + } + ioq_submit(ioq); + } + + while (pop(ioq, &lap, true)); + times_lap(&total, &lap); + + if (load(&quit, relaxed)) { + printf("\r──^C──┼──────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────\n"); + } else { + printf("──────┼──────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────\n"); + } + times_print(&total, 0); + + ioq_destroy(ioq); + sigunhook(hook); + return 0; +} |