110 files changed, 4082 insertions, 1050 deletions
diff --git a/.github/diag.sh b/.github/diag.sh
index fe78be8..d89e7a4 100755
--- a/.github/diag.sh
+++ b/.github/diag.sh
@@ -8,9 +8,13 @@
 
 set -eu
 
+SEDFLAGS="-En"
+if sed -u 's/s/s/' </dev/null &>/dev/null; then
+    SEDFLAGS="${SEDFLAGS}u"
+fi
+
 filter() {
-    sed -E 's/^(([^:]*):([^:]*):([^:]*): (warning|error): (.*))$/::\5 file=\2,line=\3,col=\4,title=Compiler \5::\6\
-\1/'
+    sed $SEDFLAGS 'p; s/^([^:]*):([^:]*):([^:]*): (warning|error): (.*)$/::\4 file=\1,line=\2,col=\3,title=Compiler \4::\5/p'
 }
 
 exec "$@" > >(filter) 2> >(filter >&2)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 20d3797..4075eb1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -3,11 +3,13 @@ name: CI
 on: [push, pull_request]
 
 jobs:
-  linux:
-    name: Linux
-
+  linux-x86:
+    name: Linux (x86)
     runs-on: ubuntu-24.04
 
+    # Don't run on both pushes and pull requests
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
     steps:
       - uses: actions/checkout@v4
 
@@ -16,7 +18,6 @@ jobs:
           sudo dpkg --add-architecture i386
           sudo apt-get update -y
           sudo apt-get install -y \
-              expect \
               mandoc \
               gcc-multilib \
               libgcc-s1:i386 \
@@ -35,8 +36,6 @@ jobs:
           sudo ln -s libacl.so.1 /lib/i386-linux-gnu/libacl.so
           sudo ln -s libcap.so.2 /lib/i386-linux-gnu/libcap.so
           sudo ln -s libonig.so.5 /lib/i386-linux-gnu/libonig.so
-          # Work around https://github.com/actions/runner-images/issues/9491
-          sudo sysctl vm.mmap_rnd_bits=28
 
       - name: Run tests
         run: |
@@ -44,22 +43,52 @@ jobs:
 
       - uses: actions/upload-artifact@v4
         with:
-          name: linux-config.log
+          name: linux-x86-config.log
+          path: distcheck-*/gen/config.log
+
+  linux-arm:
+    name: Linux (Arm64)
+    runs-on: ubuntu-24.04-arm
+
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update -y
+          sudo apt-get install -y \
+              mandoc \
+              acl \
+              libacl1-dev \
+              attr \
+              libcap2-bin \
+              libcap-dev \
+              libonig-dev \
+              liburing-dev
+
+      - name: Run tests
+        run: |
+          .github/diag.sh make -j$(nproc) distcheck
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: linux-arm-config.log
           path: distcheck-*/gen/config.log
 
   macos:
     name: macOS
+    runs-on: macos-15
 
-    runs-on: macos-14
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
 
     steps:
       - uses: actions/checkout@v4
 
       - name: Install dependencies
         run: |
-          brew install \
-              bash \
-              expect
+          brew install bash
 
       - name: Run tests
         run: |
@@ -68,25 +97,24 @@ jobs:
 
   freebsd:
     name: FreeBSD
-
     runs-on: ubuntu-24.04
 
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
     steps:
       - uses: actions/checkout@v4
 
       - name: Run tests
-        uses: cross-platform-actions/action@v0.25.0
+        uses: cross-platform-actions/action@v0.28.0
         with:
           operating_system: freebsd
-          version: "14.1"
+          version: "14.2"
 
           run: |
             sudo pkg install -y \
                 bash \
-                expect \
                 oniguruma \
-                pkgconf \
-                tcl-wrapper
+                pkgconf
             sudo mount -t fdescfs none /dev/fd
             .github/diag.sh make -j$(nproc) distcheck
 
@@ -97,22 +125,22 @@ jobs:
 
   openbsd:
     name: OpenBSD
-
     runs-on: ubuntu-24.04
 
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
     steps:
       - uses: actions/checkout@v4
 
       - name: Run tests
-        uses: cross-platform-actions/action@v0.25.0
+        uses: cross-platform-actions/action@v0.28.0
         with:
           operating_system: openbsd
-          version: "7.5"
+          version: "7.7"
 
           run: |
             sudo pkg_add \
                 bash \
-                expect \
                 gmake \
                 oniguruma
             jobs=$(sysctl -n hw.ncpu)
@@ -126,25 +154,25 @@ jobs:
 
   netbsd:
     name: NetBSD
-
     runs-on: ubuntu-24.04
 
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
     steps:
       - uses: actions/checkout@v4
 
       - name: Run tests
-        uses: cross-platform-actions/action@v0.25.0
+        uses: cross-platform-actions/action@v0.28.0
         with:
           operating_system: netbsd
-          version: "10.0"
+          version: "10.1"
 
           run: |
             PATH="/sbin:/usr/sbin:$PATH"
             sudo pkgin -y install \
                 bash \
                 oniguruma \
-                pkgconf \
-                tcl-expect
+                pkgconf
             jobs=$(sysctl -n hw.ncpu)
             ./configure
             .github/diag.sh make -j$jobs check TEST_FLAGS="--sudo --verbose=skipped"
@@ -156,9 +184,10 @@ jobs:
 
   dragonflybsd:
     name: DragonFly BSD
-
     runs-on: ubuntu-24.04
 
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
     steps:
       - uses: actions/checkout@v4
 
@@ -171,11 +200,9 @@ jobs:
           prepare: |
             pkg install -y \
                 bash \
-                expect \
                 oniguruma \
                 pkgconf \
-                sudo \
-                tcl-wrapper
+                sudo
             pw useradd -n action -m -G wheel -s /usr/local/bin/bash
             echo "%wheel ALL=(ALL) NOPASSWD: ALL" >>/usr/local/etc/sudoers
 
@@ -192,23 +219,23 @@ jobs:
 
   omnios:
     name: OmniOS
-
     runs-on: ubuntu-24.04
 
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+
     steps:
       - uses: actions/checkout@v4
 
       - name: Run tests
         uses: vmactions/omnios-vm@v1
         with:
-          release: "r151048"
+          release: "r151052"
           usesh: true
 
           prepare: |
             pkg install \
                 bash \
                 build-essential \
-                expect \
                 gnu-make \
                 onig \
                 sudo
diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
index 4cce8ed..e4e8f71 100644
--- a/.github/workflows/codecov.yml
+++ b/.github/workflows/codecov.yml
@@ -13,7 +13,6 @@ jobs:
         run: |
           sudo apt-get update -y
           sudo apt-get install -y \
-              expect \
               gcc \
               acl \
               libacl1-dev \
diff --git a/LICENSE b/LICENSE
index 9fd4f17..b0b26e0 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright © 2015-2024 Tavian Barnes <tavianator@tavianator.com> and the bfs contributors
+Copyright © 2015-2025 Tavian Barnes <tavianator@tavianator.com> and the bfs contributors
 
 Permission to use, copy, modify, and/or distribute this software for any purpose with or
 without fee is hereby granted.
diff --git a/Makefile b/Makefile
index b0d6e46..5e6d25c 100644
--- a/Makefile
+++ b/Makefile
@@ -43,9 +43,11 @@ bfs: bin/bfs
 BINS := \
     bin/bfs \
     bin/tests/mksock \
+    bin/tests/ptyx \
     bin/tests/units \
     bin/tests/xspawnee \
-    bin/tests/xtouch
+    bin/tests/xtouch \
+    bin/bench/ioq
 
 all: ${BINS}
 .PHONY: all
@@ -90,7 +92,7 @@ OBJS += obj/src/main.o
 
 ${BINS}:
 	@${MKDIR} ${@D}
-	+${MSG} "[ LD ] $@" ${CC} ${_CFLAGS} ${_LDFLAGS} ${.ALLSRC} ${_LDLIBS} -o $@
+	+${MSG} "[ LD ] $@" ${CC} ${_CFLAGS} ${_LDFLAGS} $^ ${_LDLIBS} -o $@
 	${POSTLINK}
 
 # Get the .c file for a .o file
@@ -103,7 +105,7 @@ gen/version.i.new::
 .SILENT: gen/version.i.new
 
 gen/version.i: gen/version.i.new
-	test -e $@ && cmp -s $@ ${.ALLSRC} && ${RM} ${.ALLSRC} || mv ${.ALLSRC} $@
+	test -e $@ && cmp -s $@ $^ && ${RM} $^ || mv $^ $@
 .SILENT: gen/version.i
 
 obj/src/version.o: gen/version.i
@@ -118,6 +120,7 @@ UTEST_BINS := \
 # Integration test binaries
 ITEST_BINS := \
     bin/tests/mksock \
+    bin/tests/ptyx \
     bin/tests/xtouch
 
 # Build (but don't run) test binaries
@@ -178,6 +181,9 @@ integration-tests: ${INTEGRATION_TESTS}
 bin/tests/mksock: obj/tests/mksock.o ${LIBBFS}
 OBJS += obj/tests/mksock.o
 
+bin/tests/ptyx: obj/tests/ptyx.o ${LIBBFS}
+OBJS += obj/tests/ptyx.o
+
 bin/tests/xtouch: obj/tests/xtouch.o ${LIBBFS}
 OBJS += obj/tests/xtouch.o
 
@@ -215,6 +221,14 @@ ${DISTCHECKS}::
 	    && ${MAKE} check TEST_FLAGS="--sudo --verbose=skipped"
 	@test "$${GITHUB_ACTIONS-}" != true || printf '::endgroup::\n'
 
+## Benchmarks (`make bench`)
+
+bench: bin/bench/ioq
+.PHONY: bench
+
+bin/bench/ioq: obj/bench/ioq.o ${LIBBFS}
+OBJS += obj/bench/ioq.o
+
 ## Automatic dependency tracking
 
 # Rebuild when the configuration changes
diff --git a/bench/bench.sh b/bench/bench.sh
index f249ffc..c9ed978 100644
--- a/bench/bench.sh
+++ b/bench/bench.sh
@@ -22,6 +22,7 @@ PRINT_DEFAULT=(linux)
 STRATEGIES_DEFAULT=(rust)
 JOBS_DEFAULT=(rust)
 EXEC_DEFAULT=(linux)
+SORTED_DEFAULT=(chromium)
 
 usage() {
     printf 'Usage: tailfin run %s\n' "${BASH_SOURCE[0]}"
@@ -60,6 +61,10 @@ usage() {
     printf '      Process spawning benchmark.\n'
     printf '      Default corpus is --exec=%s\n\n' "${EXEC_DEFAULT[*]}"
 
+    printf '  --sorted[=CORPUS]\n'
+    printf '      Sorted traversal benchmark.\n'
+    printf '      Default corpus is --sorted=%s\n\n' "${SORTED_DEFAULT[*]}"
+
     printf '  --build=COMMIT\n'
     printf '      Build this bfs commit and benchmark it.  Specify multiple times to\n'
     printf '      compare, e.g. --build=3.0.1 --build=3.0.2\n\n'
@@ -121,6 +126,7 @@ setup() {
     STRATEGIES=()
     JOBS=()
     EXEC=()
+    SORTED=()
 
     for arg; do
         case "$arg" in
@@ -195,6 +201,12 @@ setup() {
             --exec=*)
                 read -ra EXEC <<<"${arg#*=}"
                 ;;
+            --sorted)
+                SORTED=("${SORTED_DEFAULT[@]}")
+                ;;
+            --sorted=*)
+                read -ra SORTED <<<"${arg#*=}"
+                ;;
             --default)
                 COMPLETE=("${COMPLETE_DEFAULT[@]}")
                 EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}")
@@ -203,6 +215,7 @@ setup() {
                 STRATEGIES=("${STRATEGIES_DEFAULT[@]}")
                 JOBS=("${JOBS_DEFAULT[@]}")
                 EXEC=("${EXEC_DEFAULT[@]}")
+                SORTED=("${SORTED_DEFAULT[@]}")
                 ;;
             --help)
                 usage
@@ -227,7 +240,7 @@ setup() {
     as-user mkdir -p bench/corpus
 
     declare -A cloned=()
-    for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${STAT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}" "${JOBS[@]}" "${EXEC[@]}"; do
+    for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${STAT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}" "${JOBS[@]}" "${EXEC[@]}" "${SORTED[@]}"; do
         if ((cloned["$corpus"])); then
             continue
         fi
@@ -283,6 +296,7 @@ setup() {
     export_array STRATEGIES
     export_array JOBS
     export_array EXEC
+    export_array SORTED
 
     if ((UID == 0)); then
         turbo-off
@@ -650,6 +664,29 @@ bench-exec() {
     fi
 }
 
+# Benchmark sorted traversal
+bench-sorted-corpus() {
+    subgroup '%s' "$1"
+
+    cmds=()
+    for bfs in "${BFS[@]}"; do
+        cmds+=("$bfs -s $2 -false")
+    done
+
+    do-hyperfine "${cmds[@]}"
+}
+
+# All sorted traversal benchmarks
+bench-sorted() {
+    if (($#)); then
+        group "Sorted traversal"
+
+        for corpus; do
+            bench-sorted-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus"
+        done
+    fi
+}
+
 # Print benchmarked versions
 bench-versions() {
     subgroup "Versions"
@@ -698,6 +735,7 @@ bench() {
     import_array STRATEGIES
     import_array JOBS
     import_array EXEC
+    import_array SORTED
 
     bench-complete "${COMPLETE[@]}"
     bench-early-quit "${EARLY_QUIT[@]}"
@@ -706,5 +744,6 @@ bench() {
     bench-strategies "${STRATEGIES[@]}"
     bench-jobs "${JOBS[@]}"
     bench-exec "${EXEC[@]}"
+    bench-sorted "${SORTED[@]}"
     bench-details
 }
diff --git a/bench/ioq.c b/bench/ioq.c
new file mode 100644
index 0000000..fb9edbc
--- /dev/null
+++ b/bench/ioq.c
@@ -0,0 +1,455 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "atomic.h"
+#include "bfs.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "ioq.h"
+#include "sighook.h"
+#include "xtime.h"
+
+#include <errno.h>
+#include <locale.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+/** A latency sample. */
+struct lat {
+	/** The sampled latency. */
+	struct timespec time;
+	/** A random integer, for reservoir sampling. */
+	long key;
+};
+
+/** Number of latency samples to keep. */
+#define SAMPLES 1000
+/** Latency sampling period. */
+#define PERIOD 128
+
+/** Latency measurements. */
+struct lats {
+	/** Lowest observed latency. */
+	struct timespec min;
+	/** Highest observed latency. */
+	struct timespec max;
+	/** Total latency. */
+	struct timespec sum;
+	/** Number of measured requests. */
+	size_t count;
+
+	/** Priority queue for reservoir sampling. */
+	struct lat heap[SAMPLES];
+	/** Current size of the heap. */
+	size_t heap_size;
+};
+
+/** Initialize a latency reservoir. */
+static void lats_init(struct lats *lats) {
+	lats->min = (struct timespec) { .tv_sec = 1000 };
+	lats->max = (struct timespec) { 0 };
+	lats->sum = (struct timespec) { 0 };
+	lats->count = 0;
+	lats->heap_size = 0;
+}
+
+/** Binary heap parent. */
+static size_t heap_parent(size_t i) {
+	return (i - 1) / 2;
+}
+
+/** Binary heap left child. */
+static size_t heap_child(size_t i) {
+	return 2 * i + 1;
+}
+
+/** Binary heap smallest child. */
+static size_t heap_min_child(const struct lats *lats, size_t i) {
+	size_t j = heap_child(i);
+	size_t k = j + 1;
+	if (k < lats->heap_size && lats->heap[k].key < lats->heap[j].key) {
+		return k;
+	} else {
+		return j;
+	}
+}
+
+/** Check if the heap property is met. */
+static bool heap_check(const struct lat *parent, const struct lat *child) {
+	return parent->key <= child->key;
+}
+
+/** Reservoir sampling. */
+static void heap_push(struct lats *lats, const struct lat *lat) {
+	size_t i;
+
+	if (lats->heap_size < SAMPLES) {
+		// Heapify up
+		i = lats->heap_size++;
+		while (i > 0) {
+			size_t j = heap_parent(i);
+			if (heap_check(&lats->heap[j], lat)) {
+				break;
+			}
+			lats->heap[i] = lats->heap[j];
+			i = j;
+		}
+	} else if (lat->key > lats->heap[0].key) {
+		// Heapify down
+		i = 0;
+		while (true) {
+			size_t j = heap_min_child(lats, i);
+			if (j >= SAMPLES || heap_check(lat, &lats->heap[j])) {
+				break;
+			}
+			lats->heap[i] = lats->heap[j];
+			i = j;
+		}
+	} else {
+		// Reject
+		return;
+	}
+
+	lats->heap[i] = *lat;
+}
+
+/** Add a latency sample. */
+static void lats_push(struct lats *lats, const struct timespec *ts) {
+	timespec_min(&lats->min, ts);
+	timespec_max(&lats->max, ts);
+	timespec_add(&lats->sum, ts);
+	++lats->count;
+
+	struct lat lat = {
+		.time = *ts,
+		.key = lrand48(),
+	};
+	heap_push(lats, &lat);
+}
+
+/** Merge two latency reservoirs. */
+static void lats_merge(struct lats *into, const struct lats *from) {
+	timespec_min(&into->min, &from->min);
+	timespec_max(&into->max, &from->max);
+	timespec_add(&into->sum, &from->sum);
+	into->count += from->count;
+
+	for (size_t i = 0; i < from->heap_size; ++i) {
+		heap_push(into, &from->heap[i]);
+	}
+}
+
+/** Latency qsort() comparator. */
+static int lat_cmp(const void *a, const void *b) {
+	const struct lat *la = a;
+	const struct lat *lb = b;
+	return timespec_cmp(&la->time, &lb->time);
+}
+
+/** Sort the latency reservoir. */
+static void lats_sort(struct lats *lats) {
+	qsort(lats->heap, lats->heap_size, sizeof(lats->heap[0]), lat_cmp);
+}
+
+/** Get the nth percentile. */
+static const struct timespec *lats_percentile(const struct lats *lats, int percent) {
+	size_t i = lats->heap_size * percent / 100;
+	return &lats->heap[i].time;
+}
+
+/** Which clock to use for benchmarking. */
+static clockid_t clockid = CLOCK_REALTIME;
+
+/** Get a current time measurement. */
+static void gettime(struct timespec *tp) {
+	int ret = clock_gettime(clockid, tp);
+	bfs_everify(ret == 0, "clock_gettime(%d)", (int)clockid);
+}
+
+/**
+ * Time measurements.
+ */
+struct times {
+	/** The start time. */
+	struct timespec start;
+
+	/** Total requests started. */
+	size_t pushed;
+	/** Total requests finished. */
+	size_t popped;
+
+	/** The start time for the currently tracked request. */
+	struct timespec req_start;
+	/** Whether a timed request is currently in flight. */
+	bool timing;
+
+	/** Latency measurements. */
+	struct lats lats;
+};
+
+/** Initialize a timer. */
+static void times_init(struct times *times) {
+	gettime(&times->start);
+	times->pushed = 0;
+	times->popped = 0;
+	bfs_assert(!times->timing);
+	lats_init(&times->lats);
+}
+
+/** Finish timing a request. */
+static void track_latency(struct times *times) {
+	struct timespec elapsed;
+	gettime(&elapsed);
+	timespec_sub(&elapsed, &times->req_start);
+	lats_push(&times->lats, &elapsed);
+
+	bfs_assert(times->timing);
+	times->timing = false;
+}
+
+/** Add times to the totals, and reset the lap times. */
+static void times_lap(struct times *total, struct times *lap) {
+	total->pushed += lap->pushed;
+	total->popped += lap->popped;
+	lats_merge(&total->lats, &lap->lats);
+
+	times_init(lap);
+}
+
+/** Print some times. */
+static void times_print(struct times *times, long seconds) {
+	struct timespec elapsed;
+	gettime(&elapsed);
+	timespec_sub(&elapsed, &times->start);
+
+	double fsec = timespec_ns(&elapsed) / 1.0e9;
+
+	if (seconds > 0) {
+		printf("%5ld", seconds);
+	} else if (elapsed.tv_nsec >= 10 * 1000 * 1000) {
+		printf("%5.2f", fsec);
+	} else {
+		printf("%5.0f", fsec);
+	}
+
+	double iops = times->popped / fsec;
+	double mean = timespec_ns(&times->lats.sum) / times->lats.count;
+	double min = timespec_ns(&times->lats.min);
+	double max = timespec_ns(&times->lats.max);
+
+	lats_sort(&times->lats);
+	double n50 = timespec_ns(lats_percentile(&times->lats, 50));
+	double n90 = timespec_ns(lats_percentile(&times->lats, 90));
+	double n99 = timespec_ns(lats_percentile(&times->lats, 99));
+
+	printf(" │ %'12.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f\n", iops, mean, min, n50, n90, n99, max);
+	fflush(stdout);
+}
+
+/** Push an ioq request. */
+static bool push(struct ioq *ioq, enum ioq_nop_type type, struct times *lap) {
+	void *ptr = NULL;
+
+	// Track latency for a small fraction of requests
+	if (!lap->timing && (lap->pushed + 1) % PERIOD == 0) {
+		ptr = lap;
+		gettime(&lap->req_start);
+	}
+
+	int ret = ioq_nop(ioq, type, ptr);
+	if (ret != 0) {
+		bfs_everify(errno == EAGAIN, "ioq_nop(%d)", (int)type);
+		return false;
+	}
+
+	++lap->pushed;
+	if (ptr) {
+		lap->timing = true;
+	}
+	return true;
+}
+
+/** Pop an ioq request. */
+static bool pop(struct ioq *ioq, struct times *lap, bool block) {
+	struct ioq_ent *ent = ioq_pop(ioq, block);
+	if (!ent) {
+		return false;
+	}
+
+	if (ent->ptr) {
+		track_latency(lap);
+	}
+
+	ioq_free(ioq, ent);
+	++lap->popped;
+	return true;
+}
+
+/** ^C flag. */
+static atomic bool quit = false;
+
+/** ^C hook. */
+static void ctrlc(int sig, siginfo_t *info, void *arg) {
+	store(&quit, true, relaxed);
+}
+
+int main(int argc, char *argv[]) {
+	// Use CLOCK_MONOTONIC if available
+#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0
+	if (sysoption(MONOTONIC_CLOCK) > 0) {
+		clockid = CLOCK_MONOTONIC;
+	}
+#endif
+
+	// Enable thousands separators
+	setlocale(LC_ALL, "");
+
+	// -d: queue depth
+	unsigned int depth = 4096;
+	// -j: threads
+	unsigned int threads = 0;
+	// -t: timeout
+	double timeout = 5.0;
+	// -L|-H: ioq_nop() type
+	enum ioq_nop_type type = IOQ_NOP_LIGHT;
+
+	const char *cmd = argc > 0 ? argv[0] : "ioq";
+	int c;
+	while (c = getopt(argc, argv, ":d:j:t:LH"), c != -1) {
+		switch (c) {
+		case 'd':
+			if (xstrtoui(optarg, NULL, 10, &depth) != 0) {
+				fprintf(stderr, "%s: Bad depth '%s': %s\n", cmd, optarg, errstr());
+				return EXIT_FAILURE;
+			}
+			break;
+		case 'j':
+			if (xstrtoui(optarg, NULL, 10, &threads) != 0) {
+				fprintf(stderr, "%s: Bad thread count '%s': %s\n", cmd, optarg, errstr());
+				return EXIT_FAILURE;
+			}
+			break;
+		case 't':
+			if (xstrtod(optarg, NULL, &timeout) != 0) {
+				fprintf(stderr, "%s: Bad timeout '%s': %s\n", cmd, optarg, errstr());
+				return EXIT_FAILURE;
+			}
+			break;
+		case 'L':
+		 	type = IOQ_NOP_LIGHT;
+			break;
+		case 'H':
+		 	type = IOQ_NOP_HEAVY;
+			break;
+		case ':':
+			fprintf(stderr, "%s: Missing argument to -%c\n", cmd, optopt);
+			return EXIT_FAILURE;
+		case '?':
+			fprintf(stderr, "%s: Unrecognized option -%c\n", cmd, optopt);
+			return EXIT_FAILURE;
+		}
+	}
+
+	if (!threads) {
+		threads = nproc();
+		if (threads > 8) {
+			threads = 8;
+		}
+	}
+	if (threads < 2) {
+		threads = 2;
+	}
+	--threads;
+
+	// Listen for ^C to print the summary
+	struct sighook *hook = sighook(SIGINT, ctrlc, NULL, SH_CONTINUE | SH_ONESHOT);
+
+	printf("I/O queue benchmark (%s)\n\n", bfs_version);
+
+	printf("[-d] depth:   %u\n", depth);
+	printf("[-j] threads: %u (including main)\n", threads + 1);
+	if (type == IOQ_NOP_HEAVY) {
+		printf("[-H] type:    heavy (with syscalls)\n");
+	} else {
+		printf("[-L] type:    light (no syscalls)\n");
+	}
+	printf("\n");
+
+	printf(" Time │  Throughput  │ Latency │   min   │   50%%   │   90%%   │   99%%   │   max\n");
+	printf("  (s) │    (IO/s)    │ (ns/IO) │         │         │         │         │\n");
+	printf("══════╪══════════════╪═════════╪═════════╪═════════╪═════════╪═════════╪═════════\n");
+	fflush(stdout);
+
+	struct ioq *ioq = ioq_create(depth, threads);
+	bfs_everify(ioq, "ioq_create(%u, %u)", depth, threads);
+
+	// Pre-allocate all the requests
+	while (ioq_capacity(ioq) > 0) {
+		int ret = ioq_nop(ioq, type, NULL);
+		bfs_everify(ret == 0, "ioq_nop(%d)", (int)type);
+	}
+	while (true) {
+		struct ioq_ent *ent = ioq_pop(ioq, true);
+		if (!ent) {
+			break;
+		}
+		ioq_free(ioq, ent);
+	}
+
+	struct times total, lap;
+	times_init(&total);
+	lap = total;
+
+	long seconds = 0;
+	while (!load(&quit, relaxed)) {
+		bool was_timing = lap.timing;
+
+		for (int i = 0; i < 16; ++i) {
+			bool block = ioq_capacity(ioq) == 0;
+			if (!pop(ioq, &lap, block)) {
+				break;
+			}
+		}
+
+		if (was_timing && !lap.timing) {
+			struct timespec elapsed;
+			gettime(&elapsed);
+			timespec_sub(&elapsed, &total.start);
+
+			if (elapsed.tv_sec > seconds) {
+				seconds = elapsed.tv_sec;
+				times_print(&lap, seconds);
+				times_lap(&total, &lap);
+			}
+
+			double ns = timespec_ns(&elapsed);
+			if (timeout > 0 && ns >= timeout * 1.0e9) {
+				break;
+			}
+		}
+
+		for (int i = 0; i < 8; ++i) {
+			if (!push(ioq, type, &lap)) {
+				break;
+			}
+		}
+		ioq_submit(ioq);
+	}
+
+	while (pop(ioq, &lap, true));
+	times_lap(&total, &lap);
+
+	if (load(&quit, relaxed)) {
+		printf("\r──^C──┼──────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────\n");
+	} else {
+		printf("──────┼──────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────\n");
+	}
+	times_print(&total, 0);
+
+	ioq_destroy(ioq);
+	sigunhook(hook);
+	return 0;
+}
diff --git a/build/config.mk b/build/config.mk
index 6296168..663926c 100644
--- a/build/config.mk
+++ b/build/config.mk
@@ -7,21 +7,15 @@ include build/prelude.mk
 include build/exports.mk
 
 # All configuration steps
-config: gen/config.mk
+config: gen/config.mk gen/config.h
 .PHONY: config
 
-# Makefile fragments generated by `./configure`
-MKS := \
-    gen/vars.mk \
-    gen/flags.mk \
-    gen/pkgs.mk
-
 # The main configuration file, which includes the others
-gen/config.mk: ${MKS} gen/config.h
+gen/config.mk: gen/vars.mk gen/flags.mk gen/pkgs.mk
 	${MSG} "[ GEN] $@"
 	@printf '# %s\n' "$@" >$@
-	@printf 'include %s\n' ${MKS} >>$@
-	${VCAT} gen/config.mk
+	@printf 'include %s\n' $^ >>$@
+	${VCAT} $@
 .PHONY: gen/config.mk
 
 # Saves the configurable variables
diff --git a/build/flags.mk b/build/flags.mk
index 2562e03..3748a8a 100644
--- a/build/flags.mk
+++ b/build/flags.mk
@@ -8,14 +8,16 @@ include gen/vars.mk
 
 # Internal flags
 _CPPFLAGS := -Isrc -Igen -include src/prelude.h
-_CFLAGS := -std=c17 -pthread
+_CFLAGS := -std=c17
 _LDFLAGS :=
 _LDLIBS :=
 
 # Platform-specific system libraries
 LDLIBS,DragonFly := -lposix1e
+LDLIBS,FreeBSD := -lrt
 LDLIBS,Linux := -lrt
 LDLIBS,NetBSD := -lutil
+LDLIBS,QNX := -lregex -lsocket
 LDLIBS,SunOS := -lsec -lsocket -lnsl
 _LDLIBS += ${LDLIBS,${OS}}
 
@@ -29,6 +31,9 @@ _GCOV := ${TRUTHY,${GCOV}}
 _LINT := ${TRUTHY,${LINT}}
 _RELEASE := ${TRUTHY,${RELEASE}}
 
+LTO ?= ${RELEASE}
+_LTO := ${TRUTHY,${LTO}}
+
 ASAN_CFLAGS,y := -fsanitize=address
 LSAN_CFLAGS,y := -fsanitize=leak
 MSAN_CFLAGS,y := -fsanitize=memory -fsanitize-memory-track-origins
@@ -61,11 +66,14 @@ _CPPFLAGS += ${LINT_CPPFLAGS,${_LINT}}
 _CFLAGS += ${LINT_CFLAGS,${_LINT}}
 
 RELEASE_CPPFLAGS,y := -DNDEBUG
-RELEASE_CFLAGS,y := -O3 -flto=auto
+RELEASE_CFLAGS,y := -O3
 
 _CPPFLAGS += ${RELEASE_CPPFLAGS,${_RELEASE}}
 _CFLAGS += ${RELEASE_CFLAGS,${_RELEASE}}
 
+LTO_CFLAGS,y := -flto=auto
+_CFLAGS += ${LTO_CFLAGS,${_LTO}}
+
 # Configurable flags
 CFLAGS ?= -g -Wall
 
@@ -90,7 +98,8 @@ AUTO_FLAGS := \
     gen/flags/Wstrict-prototypes.mk \
     gen/flags/Wundef-prefix.mk \
     gen/flags/bind-now.mk \
-    gen/flags/deps.mk
+    gen/flags/deps.mk \
+    gen/flags/pthread.mk
 
 gen/flags.mk: ${AUTO_FLAGS}
 	${MSG} "[ GEN] $@"
@@ -101,8 +110,8 @@ gen/flags.mk: ${AUTO_FLAGS}
 	@printf '_LDLIBS := %s\n' "$$XLDLIBS" >>$@
 	@printf 'NOLIBS := %s\n' "$$XNOLIBS" >>$@
 	@test "${OS}-${SAN}" != FreeBSD-y || printf 'POSTLINK = elfctl -e +noaslr $$@\n' >>$@
-	@cat ${.ALLSRC} >>$@
-	@cat ${.ALLSRC:%=%.log} >gen/flags.log
+	@cat $^ >>$@
+	@cat ${^:%=%.log} >gen/flags.log
 	${VCAT} $@
 .PHONY: gen/flags.mk
 
diff --git a/build/flags/pthread.c b/build/flags/pthread.c
new file mode 100644
index 0000000..db09aa4
--- /dev/null
+++ b/build/flags/pthread.c
@@ -0,0 +1,8 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/// _CFLAGS += -pthread
+
+int main(void) {
+	return 0;
+}
diff --git a/build/has/dprintf.c b/build/has/dprintf.c
new file mode 100644
index 0000000..c206fa3
--- /dev/null
+++ b/build/has/dprintf.c
@@ -0,0 +1,8 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include <stdio.h>
+
+int main(void) {
+	return dprintf(1, "%s\n", "Hello world!");
+}
diff --git a/build/has/pragma-nounroll.c b/build/has/pragma-nounroll.c
new file mode 100644
index 0000000..2bdae14
--- /dev/null
+++ b/build/has/pragma-nounroll.c
@@ -0,0 +1,10 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/// -Werror
+
+int main(void) {
+#pragma nounroll
+	for (int i = 0; i < 100; ++i);
+	return 0;
+}
diff --git a/build/has/pthread-set-name-np.c b/build/has/pthread-set-name-np.c
new file mode 100644
index 0000000..324aab9
--- /dev/null
+++ b/build/has/pthread-set-name-np.c
@@ -0,0 +1,10 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include <pthread.h>
+#include <pthread_np.h>
+
+int main(void) {
+	pthread_set_name_np(pthread_self(), "name");
+	return 0;
+}
diff --git a/build/has/pthread-setname-np.c b/build/has/pthread-setname-np.c
new file mode 100644
index 0000000..a3b94c1
--- /dev/null
+++ b/build/has/pthread-setname-np.c
@@ -0,0 +1,8 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include <pthread.h>
+
+int main(void) {
+	return pthread_setname_np(pthread_self(), "name");
+}
diff --git a/build/has/sched-getaffinity.c b/build/has/sched-getaffinity.c
new file mode 100644
index 0000000..6f8fd98
--- /dev/null
+++ b/build/has/sched-getaffinity.c
@@ -0,0 +1,9 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include <sched.h>
+
+int main(void) {
+	cpu_set_t set;
+	return sched_getaffinity(0, sizeof(set), &set);
+}
diff --git a/build/has/tcsetwinsize.c b/build/has/tcsetwinsize.c
new file mode 100644
index 0000000..6717415
--- /dev/null
+++ b/build/has/tcsetwinsize.c
@@ -0,0 +1,9 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include <termios.h>
+
+int main(void) {
+	const struct winsize ws = {0};
+	return tcsetwinsize(0, &ws);
+}
diff --git a/build/header.mk b/build/header.mk
index f940e52..f15829a 100644
--- a/build/header.mk
+++ b/build/header.mk
@@ -20,6 +20,7 @@ HEADERS := \
     gen/has/acl-trivial.h \
     gen/has/builtin-riscv-pause.h \
     gen/has/confstr.h \
+    gen/has/dprintf.h \
     gen/has/extattr-get-file.h \
     gen/has/extattr-get-link.h \
     gen/has/extattr-list-file.h \
@@ -35,9 +36,13 @@ HEADERS := \
     gen/has/getprogname.h \
     gen/has/io-uring-max-workers.h \
     gen/has/pipe2.h \
+    gen/has/pragma-nounroll.h \
     gen/has/posix-getdents.h \
     gen/has/posix-spawn-addfchdir-np.h \
     gen/has/posix-spawn-addfchdir.h \
+    gen/has/pthread-set-name-np.h \
+    gen/has/pthread-setname-np.h \
+    gen/has/sched-getaffinity.h \
     gen/has/st-acmtim.h \
     gen/has/st-acmtimespec.h \
     gen/has/st-birthtim.h \
@@ -51,6 +56,7 @@ HEADERS := \
     gen/has/string-to-flags.h \
     gen/has/strtofflags.h \
     gen/has/tcgetwinsize.h \
+    gen/has/tcsetwinsize.h \
     gen/has/timegm.h \
     gen/has/timer-create.h \
     gen/has/tm-gmtoff.h \
@@ -64,9 +70,9 @@ gen/config.h: ${PKG_HEADERS} ${HEADERS}
 	@printf '// %s\n' "$@" >$@
 	@printf '#ifndef BFS_CONFIG_H\n' >>$@
 	@printf '#define BFS_CONFIG_H\n' >>$@
-	@cat ${.ALLSRC} >>$@
+	@cat $^ >>$@
 	@printf '#endif // BFS_CONFIG_H\n' >>$@
-	@cat gen/flags.log ${.ALLSRC:%=%.log} >gen/config.log
+	@cat gen/flags.log ${^:%=%.log} >gen/config.log
 	${VCAT} $@
 	@printf '%s' "$$CONFFLAGS" | build/embed.sh >gen/confflags.i
 	@printf '%s' "$$XCC" | build/embed.sh >gen/cc.i
diff --git a/build/pkgs.mk b/build/pkgs.mk
index 5de9ac2..f692739 100644
--- a/build/pkgs.mk
+++ b/build/pkgs.mk
@@ -19,7 +19,7 @@ gen/pkgs.mk: ${HEADERS}
 	    printf '_LDFLAGS += %s\n' "$$(build/pkgconf.sh --ldflags "$$@")"; \
 	    printf '_LDLIBS := %s $${_LDLIBS}\n' "$$(build/pkgconf.sh --ldlibs "$$@")"; \
 	}; \
-	gen $$(grep -l ' true$$' ${.ALLSRC} | sed 's|.*/\(.*\)\.h|\1|') >>$@
+	gen $$(grep -l ' true$$' $^ | sed 's|.*/\(.*\)\.h|\1|') >>$@
 	${VCAT} $@
 
 .PHONY: gen/pkgs.mk
diff --git a/build/prelude.mk b/build/prelude.mk
index c25dea4..6250d73 100644
--- a/build/prelude.mk
+++ b/build/prelude.mk
@@ -9,11 +9,9 @@
 # We don't use any suffix rules
 .SUFFIXES:
 
-# GNU make has $^ for the full list of targets, while BSD make has $> and the
-# long-form ${.ALLSRC}.  We could write $^ $> to get them both, but that would
-# break if one of them implemented support for the other.  So instead, bring
-# BSD's ${.ALLSRC} to GNU.
-.ALLSRC ?= $^
+# GNU make has $^ for the full list of targets, while BSD make has $> (and the
+# long-form ${.ALLSRC}).  We use the GNU version, bringing it to BSD like this:
+^ ?= $>
 
 # Installation paths
 DESTDIR ?=
diff --git a/build/version.sh b/build/version.sh
index ba5447f..ec0663a 100755
--- a/build/version.sh
+++ b/build/version.sh
@@ -14,5 +14,5 @@ if [ "${VERSION-}" ]; then
 elif [ -e "$DIR/.git" ] && command -v git >/dev/null 2>&1; then
     git -C "$DIR" describe --always --dirty
 else
-    echo "4.0.4"
+    echo "4.0.8"
 fi
diff --git a/configure b/configure
index 5156c91..7f0bd04 100755
--- a/configure
+++ b/configure
@@ -16,7 +16,7 @@ help() {
 Usage:
 
   \$ $0 [--enable-*|--disable-*] [--with-*|--without-*] [CC=...] [...]
-  \$ $MAKE -j$(nproc)
+  \$ $MAKE -j$(_nproc)
 
 Variables set in the environment or on the command line will be picked up:
 
@@ -66,21 +66,28 @@ Packaging:
 This script is a thin wrapper around a makefile-based configuration system.
 Any other arguments will be passed directly to the $MAKE invocation, e.g.
 
-  \$ $0 -j$(nproc) V=1
+  \$ $0 -j$(_nproc) V=1
 EOF
 }
 
+# Report a warning
+warn() {
+    fmt="$1"
+    shift
+    printf "%s: warning: $fmt\\n" "$0" "$@" >&2
+}
+
 # Report an argument parsing error
 invalid() {
-    printf 'error: Unrecognized option "%s"\n\n' "$1" >&2
+    printf '%s: error: Unrecognized option "%s"\n\n' "$0" "$1" >&2
     printf 'Run %s --help for more information.\n' "$0" >&2
     exit 1
 }
 
 # Get the number of cores to use
-nproc() {
+_nproc() {
     {
-        command nproc \
+        nproc \
             || sysctl -n hw.ncpu \
             || getconf _NPROCESSORS_ONLN \
             || echo 1
@@ -88,7 +95,7 @@ nproc() {
 }
 
 # Save the ./configure command line for bfs --version
-export CONFFLAGS="$*"
+export CONFFLAGS=""
 
 # Default to `make`
 MAKE="${MAKE-make}"
@@ -97,6 +104,13 @@ MAKE="${MAKE-make}"
 for arg; do
     shift
 
+    # Only add --options to CONFFLAGS, so we don't print FLAG=values twice in bfs --version
+    case "$arg" in
+        -*)
+            CONFFLAGS="${CONFFLAGS}${CONFFLAGS:+ }${arg}"
+            ;;
+    esac
+
     # --[(enable|disable|with|without)-]$name[=$value]
     value="${arg#*=}"
     name="${arg%%=*}"
@@ -136,7 +150,7 @@ for arg; do
                         --enable-*) arg="--with-${arg#--*-}" ;;
                         --disable-*) arg="--without-${arg#--*-}" ;;
                     esac
-                    printf 'warning: Treating "%s" like "%s"\n' "$old" "$arg" >&2
+                    warn 'Treating "%s" like "%s"' "$old" "$arg"
                     ;;
             esac
             ;;
@@ -150,7 +164,7 @@ for arg; do
 
         --enable-*|--disable-*)
             case "$name" in
-                release|asan|lsan|msan|tsan|ubsan|lint|gcov)
+                release|lto|asan|lsan|msan|tsan|ubsan|lint|gcov)
                     set -- "$@" "$NAME=$yn"
                     ;;
                 *)
@@ -175,13 +189,32 @@ for arg; do
             ;;
 
         --infodir=*|--build=*|--host=*|--target=*)
-            printf 'warning: Ignoring option "%s"\n' "$arg" >&2
+            warn 'Ignoring option "%s"' "$arg"
             ;;
 
         MAKE=*)
             MAKE="$value"
             ;;
 
+        # Warn about MAKE variables that have documented configure flags
+        RELEASE=*|LTO=*|ASAN=*|LSAN=*|MSAN=*|TSAN=*|UBSAN=*|LINT=*|GCOV=*)
+            name=$(printf '%s' "$NAME" | tr 'A-Z_' 'a-z-')
+            warn '"%s" is deprecated; use --enable-%s' "$arg" "$name"
+            set -- "$@" "$arg"
+            ;;
+
+        PREFIX=*|MANDIR=*|VERSION=*)
+            name=$(printf '%s' "$NAME" | tr 'A-Z_' 'a-z-')
+            warn '"%s" is deprecated; use --%s=%s' "$arg" "$name" "$value"
+            set -- "$@" "$arg"
+            ;;
+
+        WITH_*=*)
+            name=$(printf '%s' "$NAME" | tr 'A-Z_' 'a-z-')
+            warn '"%s" is deprecated; use --%s' "$arg" "$name"
+            set -- "$@" "$arg"
+            ;;
+
         # make flag (-j2) or variable (CC=clang)
         -*|*=*)
             set -- "$@" "$arg"
@@ -194,11 +227,11 @@ for arg; do
 done
 
 # Set up symbolic links for out-of-tree builds
-for f in Makefile build completions docs src tests; do
+for f in Makefile bench build completions docs src tests; do
     test -e "$f" || ln -s "$DIR/$f" "$f"
 done
 
-# Set MAKEFLAGS to -j$(nproc) if it's unset
-export MAKEFLAGS="${MAKEFLAGS--j$(nproc)}"
+# Set MAKEFLAGS to -j$(_nproc) if it's unset
+export MAKEFLAGS="${MAKEFLAGS--j$(_nproc)}"
 
 $MAKE -rf build/config.mk "$@"
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 7f3c7b7..56f53b4 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -1,12 +1,94 @@
 4.*
 ===
 
+4.0.8
+-----
+
+**June 20, 2025**
+
+### Bug fixes
+
+- Fixed an invalid optimization that transformed
+
+      $ bfs -user you -or -user me
+
+  into just
+
+      $ bfs -user you
+
+  The bug was originally introduced in bfs 2.0 (October 14, 2020).
+  ([#155](https://github.com/tavianator/bfs/issues/155))
+
+
+4.0.7
+-----
+
+**June 15, 2025**
+
+### Changes
+
+- `bfs` now takes CPU affinity into account when picking how many threads to use
+  ([`a36774b`](https://github.com/tavianator/bfs/commit/a36774be636c3429c6e73de33bf65a1bdbdcfb4b))
+
+- `-execdir /bin/...` is now allowed even with a relative path in `$PATH`
+  ([`cb40f51`](https://github.com/tavianator/bfs/commit/cb40f51e4e6375a10265484b6959c6b1b0591378))
+
+- *Expect* is no longer a test suite dependency
+  ([`7102fec`](https://github.com/tavianator/bfs/commit/7102fec257835302cb4978160bba4cbebd0b63e1))
+
+### Bug fixes
+
+- Only the last `-files0-from` argument now has any effect, to match GNU find
+  ([`a662fda`](https://github.com/tavianator/bfs/commit/a662fda2642e17478bc8e78adb4c6642a8505cdb))
+
+- Fixed `-execdir {}`, which was inadvertently broken in bfs 4.0
+  ([`def4a83`](https://github.com/tavianator/bfs/commit/def4a832425bfe94b96b8cb1146a83552b754fb4))
+
+
+4.0.6
+-----
+
+**February 26, 2025**
+
+### Bug fixes
+
+- Fixed `-fstype` with btrfs subvolumes (requires Linux 5.8+)
+  ([`0dccdae`](https://github.com/tavianator/bfs/commit/0dccdae4510ff5603247be871e64a6119647ea2a))
+
+- Fixed `-ls` with timestamps very far in the future
+  ([`dd5df1f`](https://github.com/tavianator/bfs/commit/dd5df1f8997550c5bf49205578027715b957bd01))
+
+- Fixed the `posix/exec_sigmask` test on mips64el Linux
+  ([`532dec0`](https://github.com/tavianator/bfs/commit/532dec0849dcdc3e15e530ac40a8168f146a41cd))
+
+- Fixed time-related tests with `mawk 1.3.4 20250131`
+  ([#152](https://github.com/tavianator/bfs/issues/152))
+
+
+4.0.5
+-----
+
+**January 18, 2025**
+
+### Bug fixes
+
+- Fixed a bug that could cause child processes (e.g. from `-exec`) to run with all signals blocked.
+  The bug was introduced in version 3.3.
+  ([`af207e7`](https://github.com/tavianator/bfs/commit/af207e702148e5c9ae08047d7a2dce6394653b62))
+
+### Changes
+
+- Fixed the build against old liburing versions
+  ([#147](https://github.com/tavianator/bfs/issues/147))
+
+- Async I/O performance optimizations
+
+
 4.0.4
 -----
 
 **October 31, 2024**
 
-
 ## Bug fixes
 
 - Fixed a man page typo
diff --git a/docs/bfs.1 b/docs/bfs.1
index 317ac5d..c6141a6 100644
--- a/docs/bfs.1
+++ b/docs/bfs.1
@@ -1,4 +1,6 @@
-.TH BFS 1 2024-10-31 "bfs 4.0.4"
+.\" Copyright © Tavian Barnes <tavianator@tavianator.com>
+.\" SPDX-License-Identifier: 0BSD
+.TH BFS 1 2025-06-15 "bfs 4.0.8"
 .SH NAME
 bfs \- breadth-first search for your files
 .SH SYNOPSIS
@@ -90,7 +92,9 @@ Follow all symbolic links.
 Never follow symbolic links (the default).
 .TP
 .B \-E
-Use extended regular expressions (same as \fB\-regextype \fIposix-extended\fR).
+Use extended regular expressions (same as
+.B \-regextype
+.IR posix-extended ).
 .TP
 .B \-X
 Filter out files with
@@ -109,19 +113,20 @@ The sorting takes place within each directory separately, which makes it differe
 but still provides a deterministic ordering.
 .TP
 .B \-x
-Don't descend into other mount points (same as \fB\-xdev\fR).
+Don't descend into other mount points (same as
+.BR \-xdev ).
 .TP
-\fB\-f \fIPATH\fR
+.BI "\-f " PATH
 Treat
 .I PATH
 as a path to search (useful if it begins with a dash).
 .TP
-\fB\-D \fIFLAG\fR
+.BI "\-D " FLAG
 Turn on a debugging flag (see
 .B \-D
 .IR help ).
 .PP
-\fB\-O\fIN\fR
+.BI \-O N
 .RS
 Enable optimization level
 .I N
@@ -177,14 +182,14 @@ Typically far faster than
 .IR ids .
 .RE
 .TP
-\fB\-j\fIN\fR
+.BI \-j N
 Search with
 .I N
 threads in parallel (default: number of CPUs, up to
 .IR 8 ).
 .SH OPERATORS
 .TP
-\fB( \fIexpression \fB)\fR
+.BI "( " expression " )"
 Parentheses are used for grouping expressions together.
 You'll probably have to write
 .B \e(
@@ -194,18 +199,25 @@ to avoid the parentheses being interpreted by the shell.
 .PP
 \fB! \fIexpression\fR
 .br
-\fB\-not \fIexpression\fR
+.B \-not
+.I expression
 .RS
 The "not" operator: returns the negation of the truth value of the
 .IR expression .
-You may have to write \fB\e! \fIexpression\fR to avoid \fB!\fR being interpreted by the shell.
+You may have to write \fB\e! \fIexpression\fR to avoid
+.B !
+being interpreted by the shell.
 .RE
 .PP
-\fIexpression\fR \fIexpression\fR
+.I expression expression
 .br
-\fIexpression \fB\-a \fIexpression\fR
+.I expression
+.B \-a
+.I expression
 .br
-\fIexpression \fB\-and \fIexpression\fR
+.I expression
+.B \-and
+.I expression
 .RS
 Short-circuiting "and" operator: if the left-hand
 .I expression
@@ -217,9 +229,13 @@ otherwise, returns
 .BR false .
 .RE
 .PP
-\fIexpression \fB\-o \fIexpression\fR
+.I expression
+.B \-o
+.I expression
 .br
-\fIexpression \fB\-or \fIexpression\fR
+.I expression
+.B \-or
+.I expression
 .RS
 Short-circuiting "or" operator: if the left-hand
 .I expression
@@ -231,14 +247,14 @@ otherwise, returns
 .BR true .
 .RE
 .TP
-\fIexpression \fB, \fIexpression\fR
+.IB "expression " , " expression"
 The "comma" operator: evaluates the left-hand
 .I expression
 but discards the result, returning the right-hand
 .IR expression .
 .SH SPECIAL FORMS
 .TP
-\fB\-exclude \fIexpression\fR
+.BI "\-exclude " expression
 Exclude all paths matching the
 .I expression
 from the search.
@@ -286,7 +302,7 @@ Search in post-order (descendents first).
 Follow all symbolic links (same as
 .BR \-L ).
 .TP
-\fB\-files0\-from \fIFILE\fR
+.BI "\-files0\-from " FILE
 Treat the NUL ('\e0')-separated paths in
 .I FILE
 as starting points for the search.
@@ -295,9 +311,9 @@ Pass
 .I \-
 to read the paths from standard input.
 .PP
-\fB\-ignore_readdir_race\fR
+.B \-ignore_readdir_race
 .br
-\fB\-noignore_readdir_race\fR
+.B \-noignore_readdir_race
 .RS
 Whether to report an error if
 .B bfs
@@ -305,9 +321,11 @@ detects that the file tree is modified during the search (default:
 .BR \-noignore_readdir_race ).
 .RE
 .PP
-\fB\-maxdepth \fIN\fR
+.B \-maxdepth
+.I N
 .br
-\fB\-mindepth \fIN\fR
+.B \-mindepth
+.I N
 .RS
 Ignore files deeper/shallower than
 .IR N .
@@ -325,7 +343,7 @@ Exclude hidden files and directories.
 .B \-noleaf
 Ignored; for compatibility with GNU find.
 .TP
-\fB\-regextype \fITYPE\fR
+.BI "\-regextype " TYPE
 Use
 .IR TYPE -flavored
 regular expressions.
@@ -403,13 +421,17 @@ Find files
 minutes ago.
 .RE
 .PP
-\fB\-anewer \fIFILE\fR
+.B \-anewer
+.I FILE
 .br
-\fB\-Bnewer \fIFILE\fR
+.B \-Bnewer
+.I FILE
 .br
-\fB\-cnewer \fIFILE\fR
+.B \-cnewer
+.I FILE
 .br
-\fB\-mnewer \fIFILE\fR
+.B \-mnewer
+.I FILE
 .RS
 Find files
 .BR a ccessed/ B irthed/ c hanged/ m odified
@@ -418,13 +440,17 @@ more recently than
 was modified.
 .RE
 .PP
-\fB\-asince \fITIME\fR
+.B \-asince
+.I TIME
 .br
-\fB\-Bsince \fITIME\fR
+.B \-Bsince
+.I TIME
 .br
-\fB\-csince \fITIME\fR
+.B \-csince
+.I TIME
 .br
-\fB\-msince \fITIME\fR
+.B \-msince
+.I TIME
 .RS
 Find files
 .BR a ccessed/ B irthed/ c hanged/ m odified
@@ -454,7 +480,7 @@ Find files with POSIX.1e
 .BR capabilities (7)
 set.
 .TP
-\fB\-context \fIGLOB\fR
+.BI "\-context " GLOB
 Find files whose SELinux context matches the
 .IR GLOB .
 .TP
@@ -485,7 +511,7 @@ Always false/true.
 Find files with matching inode
 .BR FLAGS .
 .TP
-\fB\-fstype \fITYPE\fR
+.BI "\-fstype " TYPE
 Find files on file systems with the given
 .IR TYPE .
 .PP
@@ -497,9 +523,11 @@ Find files owned by group/user ID
 .IR N .
 .RE
 .PP
-\fB\-group \fINAME\fR
+.B \-group
+.I NAME
 .br
-\fB\-user \fINAME\fR
+.B \-user
+.I NAME
 .RS
 Find files owned by the group/user
 .IR NAME .
@@ -509,15 +537,20 @@ Find files owned by the group/user
 Find hidden files (those beginning with
 .IR . ).
 .PP
-\fB\-ilname \fIGLOB\fR
+.B \-ilname
+.I GLOB
 .br
-\fB\-iname \fIGLOB\fR
+.B \-iname
+.I GLOB
 .br
-\fB\-ipath \fIGLOB\fR
+.B \-ipath
+.I GLOB
 .br
-\fB\-iregex \fIREGEX\fR
+.B \-iregex
+.I REGEX
 .br
-\fB\-iwholename \fIGLOB\fR
+.B \-iwholename
+.I GLOB
 .RS
 Case-insensitive versions of
 .BR \-lname / \-name / \-path / \-regex / \-wholename .
@@ -532,19 +565,19 @@ Find files with
 .I N
 hard links.
 .TP
-\fB\-lname \fIGLOB\fR
+.BI "\-lname " GLOB
 Find symbolic links whose target matches the
 .IR GLOB .
 .TP
-\fB\-name \fIGLOB\fR
+.BI "\-name " GLOB
 Find files whose name matches the
 .IR GLOB .
 .TP
-\fB\-newer \fIFILE\fR
+.BI "\-newer " FILE
 Find files newer than
 .IR FILE .
 .TP
-\fB\-newer\fIXY \fIREFERENCE\fR
+.BI \-newer "XY REFERENCE"
 Find files whose
 .I X
 time is newer than the
@@ -580,9 +613,11 @@ as an ISO 8601-style timestamp.  For example:
 Find files owned by nonexistent groups/users.
 .RE
 .PP
-\fB\-path \fIGLOB\fR
+.B \-path
+.I GLOB
 .br
-\fB\-wholename \fIGLOB\fR
+.B \-wholename
+.I GLOB
 .RS
 Find files whose entire path matches the
 .IR GLOB .
@@ -591,15 +626,15 @@ Find files whose entire path matches the
 \fB\-perm\fR [\fI\-+/\fR]\fIMODE\fR
 Find files with a matching mode.
 .TP
-\fB\-regex \fIREGEX\fR
+.BI "\-regex " REGEX
 Find files whose entire path matches the regular expression
 .IR REGEX .
 .TP
-\fB\-samefile \fIFILE\fR
+.BI "\-samefile " FILE
 Find hard links to
 .IR FILE .
 .TP
-\fB\-since \fITIME\fR
+.BI "\-since " TIME
 Find files modified since the ISO 8601-style timestamp
 .IR TIME .
 See
@@ -678,7 +713,7 @@ days after they were changed.
 Find files with extended attributes
 .RB ( xattr (7)).
 .TP
-\fB\-xattrname\fR \fINAME\fR
+.BI "\-xattrname " NAME
 Find files with the extended attribute
 .IR NAME .
 .TP
@@ -691,23 +726,27 @@ would not, and vice versa.
 .br
 .B \-rm
 .RS
-Delete any found files (implies \fB-depth\fR).
+Delete any found files (implies
+.BR \-depth ).
 .RE
 .TP
-\fB\-exec \fIcommand ... {} ;\fR
+.BI "\-exec " "command ... {} ;"
 Execute a command.
 .TP
-\fB\-exec \fIcommand ... {} +\fR
+.BI "\-exec " "command ... {} +"
 Execute a command with multiple files at once.
 .TP
-\fB\-ok \fIcommand ... {} ;\fR
+.BI "\-ok " "command ... {} ;"
 Prompt the user whether to execute a command.
 .PP
-\fB\-execdir \fIcommand ... {} ;\fR
+.B \-execdir
+.I command ... {} ;
 .br
-\fB\-execdir \fIcommand ... {} +\fR
+.B \-execdir
+.I command ... {} +
 .br
-\fB\-okdir \fIcommand ... {} ;\fR
+.B \-okdir
+.I command ... {} ;
 .RS
 Like
 .BR \-exec / \-ok ,
@@ -719,13 +758,17 @@ Exit immediately with the given status
 .RI ( 0
 if unspecified).
 .PP
-\fB\-fls \fIFILE\fR
+.B \-fls
+.I FILE
 .br
-\fB\-fprint \fIFILE\fR
+.B \-fprint
+.I FILE
 .br
-\fB\-fprint0 \fIFILE\fR
+.B \-fprint0
+.I FILE
 .br
-\fB\-fprintf \fIFILE FORMAT\fR
+.B \-fprintf
+.I FILE FORMAT
 .RS
 Like
 .BR \-ls / \-print / \-print0 / \-printf ,
@@ -734,7 +777,7 @@ but write to
 instead of standard output.
 .RE
 .TP
-\fB\-limit \fIN\fR
+.BI "\-limit " N
 Quit once this action is evaluated
 .I N
 times.
@@ -755,7 +798,7 @@ Useful in conjunction with
 .B xargs
 .IR \-0 .
 .TP
-\fB\-printf \fIFORMAT\fR
+.BI "\-printf " FORMAT
 Print according to a format string (see
 .BR find (1)).
 These additional format directives are supported:
@@ -901,7 +944,7 @@ is quoted to ensure the glob is processed by
 .B bfs
 rather than the shell.
 .TP
-\fBbfs \-name access_log \-L \fI/var\fR
+.BI "bfs \-name access_log \-L " /var
 Finds all files named
 .B access_log
 under
@@ -910,7 +953,7 @@ following symbolic links.
 .B bfs
 allows flags and paths to appear anywhere on the command line.
 .TP
-\fBbfs \fI~ \fB\-not \-user $USER\fR
+.BI "bfs " ~ " \-not \-user $USER"
 Prints all files in your home directory not owned by you.
 .TP
 .B bfs \-xtype l
diff --git a/src/alloc.c b/src/alloc.c
index ef9f6ab..f505eda 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -106,7 +106,7 @@ void *reserve(void *ptr, size_t align, size_t size, size_t count) {
 	// If we stayed within the same size class, reuse ptr.
 	if (count & (count - 1)) {
 		// Tell sanitizers about the new array element
-		sanitize_alloc((char *)ptr + old_size, size);
+		sanitize_resize(ptr, old_size, old_size + size, bit_ceil(count) * size);
 		errno = 0;
 		return ptr;
 	}
@@ -121,7 +121,7 @@ void *reserve(void *ptr, size_t align, size_t size, size_t count) {
 	}
 
 	// Pretend we only allocated one more element
-	sanitize_free((char *)ret + old_size + size, new_size - old_size - size);
+	sanitize_resize(ret, new_size, old_size + size, new_size);
 	errno = 0;
 	return ret;
 }
@@ -304,8 +304,7 @@ void *varena_alloc(struct varena *varena, size_t count) {
 	}
 
 	// Tell the sanitizers the exact size of the allocated struct
-	sanitize_free(ret, arena->size);
-	sanitize_alloc(ret, varena_exact_size(varena, count));
+	sanitize_resize(ret, arena->size, varena_exact_size(varena, count), arena->size);
 
 	return ret;
 }
@@ -317,15 +316,14 @@ void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t
 		return NULL;
 	}
 
-	size_t new_exact_size = varena_exact_size(varena, new_count);
-	size_t old_exact_size = varena_exact_size(varena, old_count);
+	size_t old_size = old_arena->size;
+	size_t new_size = new_arena->size;
 
 	if (new_arena == old_arena) {
-		if (new_count < old_count) {
-			sanitize_free((char *)ptr + new_exact_size, old_exact_size - new_exact_size);
-		} else if (new_count > old_count) {
-			sanitize_alloc((char *)ptr + old_exact_size, new_exact_size - old_exact_size);
-		}
+		sanitize_resize(ptr,
+			varena_exact_size(varena, old_count),
+			varena_exact_size(varena, new_count),
+			new_size);
 		return ptr;
 	}
 
@@ -334,17 +332,18 @@ void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t
 		return NULL;
 	}
 
-	size_t old_size = old_arena->size;
-	sanitize_alloc(ptr, old_size);
+	// Non-sanitized builds don't bother computing exact sizes, and just use
+	// the potentially-larger arena size for each size class instead.  To
+	// allow the below memcpy() to work with the less-precise sizes, expand
+	// the old allocation to its full capacity.
+	sanitize_resize(ptr, varena_exact_size(varena, old_count), old_size, old_size);
 
-	size_t new_size = new_arena->size;
 	size_t min_size = new_size < old_size ? new_size : old_size;
 	memcpy(ret, ptr, min_size);
 
 	arena_free(old_arena, ptr);
 
-	sanitize_free(ret, new_size);
-	sanitize_alloc(ret, new_exact_size);
+	sanitize_resize(ret, new_size, varena_exact_size(varena, new_count), new_size);
 	return ret;
 }
 
diff --git a/src/bar.c b/src/bar.c
index 3258df0..1b0691a 100644
--- a/src/bar.c
+++ b/src/bar.c
@@ -18,7 +18,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <sys/ioctl.h>
 #include <termios.h>
 #include <unistd.h>
 
@@ -33,25 +32,14 @@ struct bfs_bar {
 
 /** Get the terminal size, if possible. */
 static int bfs_bar_getsize(struct bfs_bar *bar) {
-#if BFS_HAS_TCGETWINSIZE || defined(TIOCGWINSZ)
 	struct winsize ws;
-
-#  if BFS_HAS_TCGETWINSIZE
-	int ret = tcgetwinsize(bar->fd, &ws);
-#  else
-	int ret = ioctl(bar->fd, TIOCGWINSZ, &ws);
-#  endif
-	if (ret != 0) {
-		return ret;
+	if (xtcgetwinsize(bar->fd, &ws) != 0) {
+		return -1;
 	}
 
 	store(&bar->width, ws.ws_col, relaxed);
 	store(&bar->height, ws.ws_row, relaxed);
 	return 0;
-#else
-	errno = ENOTSUP;
-	return -1;
-#endif
 }
 
 /** Write a string to the status bar (async-signal-safe). */
diff --git a/src/bfs.h b/src/bfs.h
index af4cf9f..3cee727 100644
--- a/src/bfs.h
+++ b/src/bfs.h
@@ -218,4 +218,24 @@ extern const char bfs_ldlibs[];
 #  define _target_clones(...)
 #endif
 
+/**
+ * Mark the size of a flexible array member.
+ */
+#if __has_attribute(counted_by)
+#  define _counted_by(...) __attribute__((counted_by(__VA_ARGS__)))
+#else
+#  define _counted_by(...)
+#endif
+
+/**
+ * Optimization hint to not unroll a loop.
+ */
+#if BFS_HAS_PRAGMA_NOUNROLL
+#  define _nounroll _Pragma("nounroll")
+#elif __GNUC__ && !__clang__
+#  define _nounroll _Pragma("GCC unroll 0")
+#else
+#  define _nounroll
+#endif
+
 #endif // BFS_H
diff --git a/src/bfstd.c b/src/bfstd.c
index b29fb7b..b78af7a 100644
--- a/src/bfstd.c
+++ b/src/bfstd.c
@@ -17,15 +17,18 @@
 #include <locale.h>
 #include <nl_types.h>
 #include <pthread.h>
+#include <sched.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/ioctl.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <termios.h>
 #include <unistd.h>
 #include <wchar.h>
 
@@ -186,16 +189,6 @@ char *xgetdelim(FILE *file, char delim) {
 	}
 }
 
-int open_cterm(int flags) {
-	char path[L_ctermid];
-	if (ctermid(path) == NULL || strlen(path) == 0) {
-		errno = ENOTTY;
-		return -1;
-	}
-
-	return open(path, flags);
-}
-
 const char *xgetprogname(void) {
 	const char *cmd = NULL;
 #if BFS_HAS_GETPROGNAME
@@ -211,35 +204,171 @@ const char *xgetprogname(void) {
 	return cmd;
 }
 
-int xstrtoll(const char *str, char **end, int base, long long *value) {
-	// strtoll() skips leading spaces, but we want to reject them
+/** Common prologue for xstrto*() wrappers. */
+static int xstrtox_prologue(const char *str) {
+	// strto*() skips leading spaces, but we want to reject them
 	if (xisspace(str[0])) {
 		errno = EINVAL;
 		return -1;
 	}
 
+	errno = 0;
+	return 0;
+}
+
+/** Common epilogue for xstrto*() wrappers. */
+static int xstrtox_epilogue(const char *str, char **end, char *endp) {
+	if (errno != 0) {
+		return -1;
+	}
+
+	if (end) {
+		*end = endp;
+	}
+
 	// If end is NULL, make sure the entire string is valid
-	bool entire = !end;
+	if (endp == str || (!end && *endp != '\0')) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	return 0;
+}
+
+int xstrtos(const char *str, char **end, int base, short *value) {
+	long n;
+	if (xstrtol(str, end, base, &n) != 0) {
+		return -1;
+	}
+
+	if (n < SHRT_MIN || n > SHRT_MAX) {
+		errno = ERANGE;
+		return -1;
+	}
+
+	*value = n;
+	return 0;
+}
+
+int xstrtoi(const char *str, char **end, int base, int *value) {
+	long n;
+	if (xstrtol(str, end, base, &n) != 0) {
+		return -1;
+	}
+
+	if (n < INT_MIN || n > INT_MAX) {
+		errno = ERANGE;
+		return -1;
+	}
+
+	*value = n;
+	return 0;
+}
+
+int xstrtol(const char *str, char **end, int base, long *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
 	char *endp;
-	if (!end) {
-		end = &endp;
+	*value = strtol(str, &endp, base);
+	return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtoll(const char *str, char **end, int base, long long *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
 	}
 
-	errno = 0;
-	long long result = strtoll(str, end, base);
-	if (errno != 0) {
+	char *endp;
+	*value = strtoll(str, &endp, base);
+	return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtof(const char *str, char **end, float *value) {
+	if (xstrtox_prologue(str) != 0) {
 		return -1;
 	}
 
-	if (*end == str || (entire && **end != '\0')) {
-		errno = EINVAL;
+	char *endp;
+	*value = strtof(str, &endp);
+	return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtod(const char *str, char **end, double *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
+	char *endp;
+	*value = strtod(str, &endp);
+	return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtous(const char *str, char **end, int base, unsigned short *value) {
+	unsigned long n;
+	if (xstrtoul(str, end, base, &n) != 0) {
 		return -1;
 	}
 
-	*value = result;
+	if (n > USHRT_MAX) {
+		errno = ERANGE;
+		return -1;
+	}
+
+	*value = n;
 	return 0;
 }
 
+int xstrtoui(const char *str, char **end, int base, unsigned int *value) {
+	unsigned long n;
+	if (xstrtoul(str, end, base, &n) != 0) {
+		return -1;
+	}
+
+	if (n > UINT_MAX) {
+		errno = ERANGE;
+		return -1;
+	}
+
+	*value = n;
+	return 0;
+}
+
+/** Common epilogue for xstrtou*() wrappers. */
+static int xstrtoux_epilogue(const char *str, char **end, char *endp) {
+	if (xstrtox_epilogue(str, end, endp) != 0) {
+		return -1;
+	}
+
+	if (str[0] == '-') {
+		errno = ERANGE;
+		return -1;
+	}
+
+	return 0;
+}
+
+int xstrtoul(const char *str, char **end, int base, unsigned long *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
+	char *endp;
+	*value = strtoul(str, &endp, base);
+	return xstrtoux_epilogue(str, end, endp);
+}
+
+int xstrtoull(const char *str, char **end, int base, unsigned long long *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
+	char *endp;
+	*value = strtoull(str, &endp, base);
+	return xstrtoux_epilogue(str, end, endp);
+}
+
 /** Compile and execute a regular expression for xrpmatch(). */
 static int xrpregex(nl_item item, const char *response) {
 	const char *pattern = nl_langinfo(item);
@@ -482,7 +611,9 @@ int rlim_cmp(rlim_t a, rlim_t b) {
 }
 
 dev_t xmakedev(int ma, int mi) {
-#ifdef makedev
+#if __QNX__
+	return makedev(0, ma, mi);
+#elif defined(makedev)
 	return makedev(ma, mi);
 #else
 	return (ma << 8) | mi;
@@ -513,6 +644,32 @@ pid_t xwaitpid(pid_t pid, int *status, int flags) {
 	return ret;
 }
 
+int open_cterm(int flags) {
+	char path[L_ctermid];
+	if (ctermid(path) == NULL || strlen(path) == 0) {
+		errno = ENOTTY;
+		return -1;
+	}
+
+	return open(path, flags);
+}
+
+int xtcgetwinsize(int fd, struct winsize *ws) {
+#if BFS_HAS_TCGETWINSIZE
+	return tcgetwinsize(fd, ws);
+#else
+	return ioctl(fd, TIOCGWINSZ, ws);
+#endif
+}
+
+int xtcsetwinsize(int fd, const struct winsize *ws) {
+#if BFS_HAS_TCSETWINSIZE
+	return tcsetwinsize(fd, ws);
+#else
+	return ioctl(fd, TIOCSWINSZ, ws);
+#endif
+}
+
 int dup_cloexec(int fd) {
 #ifdef F_DUPFD_CLOEXEC
 	return fcntl(fd, F_DUPFD_CLOEXEC, 0);
@@ -731,41 +888,103 @@ long xsysconf(int name) {
 	return ret;
 }
 
-size_t asciilen(const char *str) {
-	return asciinlen(str, strlen(str));
-}
+#if BFS_HAS_SCHED_GETAFFINITY
+/** Get the CPU count in an affinity mask of the given size. */
+static long bfs_sched_getaffinity(size_t size) {
+	cpu_set_t set, *pset = &set;
 
-size_t asciinlen(const char *str, size_t n) {
-	size_t i = 0;
+	if (size > sizeof(set)) {
+		pset = malloc(size);
+		if (!pset) {
+			return -1;
+		}
+	}
 
-#if SIZE_WIDTH % 8 == 0
-	// Word-at-a-time isascii()
-	for (size_t word; i + sizeof(word) <= n; i += sizeof(word)) {
-		memcpy(&word, str + i, sizeof(word));
+	long ret = -1;
+	if (sched_getaffinity(0, size, pset) == 0) {
+#  ifdef CPU_COUNT_S
+		ret = CPU_COUNT_S(size, pset);
+#  else
+		bfs_assert(size <= sizeof(set));
+		ret = CPU_COUNT(pset);
+# endif
+	}
 
-		const size_t mask = (SIZE_MAX / 0xFF) << 7; // 0x808080...
-		word &= mask;
-		if (!word) {
-			continue;
-		}
+	if (pset != &set) {
+		free(pset);
+	}
+	return ret;
+}
+#endif
+
+long nproc(void) {
+	long ret = 0;
 
-#if ENDIAN_NATIVE == ENDIAN_BIG
-		word = bswap(word);
-#elif ENDIAN_NATIVE != ENDIAN_LITTLE
+#if BFS_HAS_SCHED_GETAFFINITY
+	size_t size = sizeof(cpu_set_t);
+	do {
+		ret = bfs_sched_getaffinity(size);
+
+#  ifdef CPU_COUNT_S
+		// On Linux, sched_getaffinity(2) says:
+		//
+		//     When working on systems with large kernel CPU affinity masks, one must
+		//     dynamically allocate the mask argument (see CPU_ALLOC(3)).  Currently,
+		//     the only way to do this is by probing for the size of the required mask
+		//     using sched_getaffinity() calls with increasing mask sizes (until the
+		//     call does not fail with the error EINVAL).
+		size *= 2;
+#  else
+		// No support for dynamically-sized CPU masks
 		break;
+#  endif
+	} while (ret < 0 && errno == EINVAL);
 #endif
 
-		size_t first = trailing_zeros(word) / 8;
-		return i + first;
+	if (ret < 1) {
+		ret = xsysconf(_SC_NPROCESSORS_ONLN);
 	}
-#endif
 
-	for (; i < n; ++i) {
-		if (!xisascii(str[i])) {
-			break;
-		}
+	if (ret < 1) {
+		ret = 1;
 	}
 
+	return ret;
+}
+
+size_t asciilen(const char *str) {
+	return asciinlen(str, strlen(str));
+}
+
+size_t asciinlen(const char *str, size_t n) {
+	const unsigned char *ustr = (const unsigned char *)str;
+	size_t i = 0;
+
+	// Word-at-a-time isascii()
+#define CHUNK(n) CHUNK_(uint##n##_t, load8_leu##n)
+#define CHUNK_(type, load8) \
+	(n - i >= sizeof(type)) { \
+		type word = load8(ustr + i); \
+		type mask = (((type)-1) / 0xFF) << 7; /* 0x808080.. */ \
+		word &= mask; \
+		i += trailing_zeros(word) / 8; \
+		if (word) { \
+			return i; \
+		} \
+	}
+
+#if SIZE_WIDTH >= 64
+	while CHUNK(64);
+	if CHUNK(32);
+#else
+	while CHUNK(32);
+#endif
+	if CHUNK(16);
+	if CHUNK(8);
+
+#undef CHUNK_
+#undef CHUNK
+
 	return i;
 }
 
diff --git a/src/bfstd.h b/src/bfstd.h
index 97867fd..15dd949 100644
--- a/src/bfstd.h
+++ b/src/bfstd.h
@@ -158,16 +158,6 @@ FILE *xfopen(const char *path, int flags);
  */
 char *xgetdelim(FILE *file, char delim);
 
-/**
- * Open the controlling terminal.
- *
- * @flags
- *         The open() flags.
- * @return
- *         An open file descriptor, or -1 on failure.
- */
-int open_cterm(int flags);
-
 // #include <stdlib.h>
 
 /**
@@ -179,23 +169,56 @@ int open_cterm(int flags);
 const char *xgetprogname(void);
 
 /**
+ * Like xstrtol(), but for short.
+ */
+int xstrtos(const char *str, char **end, int base, short *value);
+
+/**
+ * Like xstrtol(), but for int.
+ */
+int xstrtoi(const char *str, char **end, int base, int *value);
+
+/**
+ * Wrapper for strtol() that forbids leading spaces.
+ */
+int xstrtol(const char *str, char **end, int base, long *value);
+
+/**
  * Wrapper for strtoll() that forbids leading spaces.
- *
- * @str
- *         The string to parse.
- * @end
- *         If non-NULL, will hold a pointer to the first invalid character.
- *         If NULL, the entire string must be valid.
- * @base
- *         The base for the conversion, or 0 to auto-detect.
- * @value
- *         Will hold the parsed integer value, on success.
- * @return
- *         0 on success, -1 on failure.
  */
 int xstrtoll(const char *str, char **end, int base, long long *value);
 
 /**
+ * Like xstrtoul(), but for unsigned short.
+ */
+int xstrtous(const char *str, char **end, int base, unsigned short *value);
+
+/**
+ * Like xstrtoul(), but for unsigned int.
+ */
+int xstrtoui(const char *str, char **end, int base, unsigned int *value);
+
+/**
+ * Wrapper for strtoul() that forbids leading spaces, negatives.
+ */
+int xstrtoul(const char *str, char **end, int base, unsigned long *value);
+
+/**
+ * Wrapper for strtoull() that forbids leading spaces, negatives.
+ */
+int xstrtoull(const char *str, char **end, int base, unsigned long long *value);
+
+/**
+ * Wrapper for strtof() that forbids leading spaces.
+ */
+int xstrtof(const char *str, char **end, float *value);
+
+/**
+ * Wrapper for strtod() that forbids leading spaces.
+ */
+int xstrtod(const char *str, char **end, double *value);
+
+/**
  * Process a yes/no prompt.
  *
  * @return 1 for yes, 0 for no, and -1 for unknown.
@@ -339,6 +362,29 @@ int xminor(dev_t dev);
  */
 pid_t xwaitpid(pid_t pid, int *status, int flags);
 
+#include <sys/ioctl.h> // May be necessary for struct winsize
+#include <termios.h>
+
+/**
+ * Open the controlling terminal.
+ *
+ * @flags
+ *         The open() flags.
+ * @return
+ *         An open file descriptor, or -1 on failure.
+ */
+int open_cterm(int flags);
+
+/**
+ * tcgetwinsize()/ioctl(TIOCGWINSZ) wrapper.
+ */
+int xtcgetwinsize(int fd, struct winsize *ws);
+
+/**
+ * tcsetwinsize()/ioctl(TIOCSWINSZ) wrapper.
+ */
+int xtcsetwinsize(int fd, const struct winsize *ws);
+
 // #include <unistd.h>
 
 /**
@@ -460,6 +506,11 @@ long xsysconf(int name);
 #define sysoption(name) \
 	(_POSIX_##name == 0 ? xsysconf(_SC_##name) : _POSIX_##name)
 
+/**
+ * Get the number of CPU threads available to the current process.
+ */
+long nproc(void);
+
 #include <wchar.h>
 
 /**
diff --git a/src/bftw.c b/src/bftw.c
index 61193d5..0ca6f34 100644
--- a/src/bftw.c
+++ b/src/bftw.c
@@ -253,7 +253,7 @@ struct bftw_file {
 	/** The length of the file's name. */
 	size_t namelen;
 	/** The file's name. */
-	char name[];
+	char name[]; // _counted_by(namelen + 1)
 };
 
 /**
@@ -1008,6 +1008,7 @@ static int bftw_ioq_pop(struct bftw_state *state, bool block) {
 		return -1;
 	}
 
+	ioq_submit(ioq);
 	struct ioq_ent *ent = ioq_pop(ioq, block);
 	if (!ent) {
 		return -1;
@@ -1051,6 +1052,10 @@ static int bftw_ioq_pop(struct bftw_state *state, bool block) {
 
 		bftw_queue_attach(&state->fileq, file, true);
 		break;
+
+	default:
+		bfs_bug("Unexpected ioq op %d", (int)op);
+		break;
 	}
 
 	ioq_free(ioq, ent);
@@ -1480,7 +1485,8 @@ fail:
 
 /** Check if we should stat() a file asynchronously. */
 static bool bftw_should_ioq_stat(struct bftw_state *state, struct bftw_file *file) {
-	// To avoid surprising users too much, process the roots in order
+	// POSIX wants the root paths to be processed in order
+	// See https://www.austingroupbugs.net/view.php?id=1859
 	if (file->depth == 0) {
 		return false;
 	}
@@ -1953,6 +1959,10 @@ static void bftw_flush(struct bftw_state *state) {
 
 	bftw_queue_flush(&state->dirq);
 	bftw_ioq_opendirs(state);
+
+	if (state->ioq) {
+		ioq_submit(state->ioq);
+	}
 }
 
 /** Close the current directory. */
diff --git a/src/bit.h b/src/bit.h
index 73a80dc..5d6fb9d 100644
--- a/src/bit.h
+++ b/src/bit.h
@@ -148,7 +148,7 @@
 #  define INTMAX_WIDTH UINTMAX_WIDTH
 #endif
 
-// C23 polyfill: byte order
+// N3022 polyfill: byte order
 
 #ifdef __STDC_ENDIAN_LITTLE__
 #  define ENDIAN_LITTLE __STDC_ENDIAN_LITTLE__
@@ -250,6 +250,58 @@ static inline uint8_t bswap_u8(uint8_t n) {
  */
 #define bswap(n) UINT_SELECT(n, bswap)(n)
 
+#define LOAD8_LEU8(ptr, i, n) ((uint##n##_t)((const unsigned char *)ptr)[(i) / 8] << (i))
+#define LOAD8_BEU8(ptr, i, n) ((uint##n##_t)((const unsigned char *)ptr)[(i) / 8] << (n - (i) - 8))
+
+/** Load a little-endian 8-bit word. */
+static inline uint8_t load8_leu8(const void *ptr) {
+	return LOAD8_LEU8(ptr, 0, 8);
+}
+
+/** Load a big-endian 8-bit word. */
+static inline uint8_t load8_beu8(const void *ptr) {
+	return LOAD8_BEU8(ptr, 0, 8);
+}
+
+#define LOAD8_LEU16(ptr, i, n) (LOAD8_LEU8(ptr, i, n) | LOAD8_LEU8(ptr, i + 8, n))
+#define LOAD8_BEU16(ptr, i, n) (LOAD8_BEU8(ptr, i, n) | LOAD8_BEU8(ptr, i + 8, n))
+
+/** Load a little-endian 16-bit word. */
+static inline uint16_t load8_leu16(const void *ptr) {
+	return LOAD8_LEU16(ptr, 0, 16);
+}
+
+/** Load a big-endian 16-bit word. */
+static inline uint16_t load8_beu16(const void *ptr) {
+	return LOAD8_BEU16(ptr, 0, 16);
+}
+
+#define LOAD8_LEU32(ptr, i, n) (LOAD8_LEU16(ptr, i, n) | LOAD8_LEU16(ptr, i + 16, n))
+#define LOAD8_BEU32(ptr, i, n) (LOAD8_BEU16(ptr, i, n) | LOAD8_BEU16(ptr, i + 16, n))
+
+/** Load a little-endian 32-bit word. */
+static inline uint32_t load8_leu32(const void *ptr) {
+	return LOAD8_LEU32(ptr, 0, 32);
+}
+
+/** Load a big-endian 32-bit word. */
+static inline uint32_t load8_beu32(const void *ptr) {
+	return LOAD8_BEU32(ptr, 0, 32);
+}
+
+#define LOAD8_LEU64(ptr, i, n) (LOAD8_LEU32(ptr, i, n) | LOAD8_LEU32(ptr, i + 32, n))
+#define LOAD8_BEU64(ptr, i, n) (LOAD8_BEU32(ptr, i, n) | LOAD8_BEU32(ptr, i + 32, n))
+
+/** Load a little-endian 64-bit word. */
+static inline uint64_t load8_leu64(const void *ptr) {
+	return LOAD8_LEU64(ptr, 0, 64);
+}
+
+/** Load a big-endian 64-bit word. */
+static inline uint64_t load8_beu64(const void *ptr) {
+	return LOAD8_BEU64(ptr, 0, 64);
+}
+
 // C23 polyfill: bit utilities
 
 #if __STDC_VERSION_STDBIT_H__ >= C23
diff --git a/src/color.c b/src/color.c
index e7f0973..f77877d 100644
--- a/src/color.c
+++ b/src/color.c
@@ -32,7 +32,7 @@ struct esc_seq {
 	/** The length of the escape sequence. */
 	size_t len;
 	/** The escape sequence itself, without a terminating NUL. */
-	char seq[];
+	char seq[] _counted_by(len);
 };
 
 /**
@@ -48,7 +48,7 @@ struct ext_color {
 	/** Whether the comparison should be case-sensitive. */
 	bool case_sensitive;
 	/** The extension to match (NUL-terminated). */
-	char ext[];
+	char ext[]; // _counted_by(len + 1);
 };
 
 struct colors {
@@ -143,13 +143,7 @@ static int init_esc(struct colors *colors, const char *name, const char *value,
 
 	*field = esc;
 
-	struct trie_leaf *leaf = trie_insert_str(&colors->names, name);
-	if (!leaf) {
-		return -1;
-	}
-
-	leaf->value = field;
-	return 0;
+	return trie_set_str(&colors->names, name, field);
 }
 
 /** Check if an escape sequence is equal to a string. */
@@ -159,8 +153,7 @@ static bool esc_eq(const struct esc_seq *esc, const char *str, size_t len) {
 
 /** Get an escape sequence from the table. */
 static struct esc_seq **get_esc(const struct colors *colors, const char *name) {
-	const struct trie_leaf *leaf = trie_find_str(&colors->names, name);
-	return leaf ? leaf->value : NULL;
+	return trie_get_str(&colors->names, name);
 }
 
 /** Append an escape sequence to a string. */
@@ -225,13 +218,7 @@ static int insert_ext(struct trie *trie, struct ext_color *ext) {
 	}
 
 	size_t len = ext->len + 1;
-	leaf = trie_insert_mem(trie, ext->ext, len);
-	if (!leaf) {
-		return -1;
-	}
-
-	leaf->value = ext;
-	return 0;
+	return trie_set_mem(trie, ext->ext, len, ext);
 }
 
 /** Set the color for an extension. */
@@ -620,6 +607,67 @@ fail:
 	return ret;
 }
 
+/** Parse the FreeBSD $LSCOLORS format. */
+static int parse_bsd_ls_colors(struct colors *colors, const char *lscolors) {
+	static const char *keys[] = {
+		"di", "ln", "so", "pi", "ex", "bd", "cd", "su", "sg", "tw", "ow"
+	};
+
+	static const char *fg_codes[256] = {
+		['a'] = "30", ['b'] = "31", ['c'] = "32", ['d'] = "33",
+		['e'] = "34", ['f'] = "35", ['g'] = "36", ['h'] = "37", ['x'] = "39",
+		['A'] = "1;30", ['B'] = "1;31", ['C'] = "1;32", ['D'] = "1;33",
+		['E'] = "1;34", ['F'] = "1;35", ['G'] = "1;36", ['H'] = "1;37", ['X'] = "1"
+	};
+
+	static const char *bg_codes[256] = {
+		['a'] = "40", ['b'] = "41", ['c'] = "42", ['d'] = "43",
+		['e'] = "44", ['f'] = "45", ['g'] = "46", ['h'] = "47", ['x'] = "49",
+		['A'] = "4;100", ['B'] = "4;101", ['C'] = "4;102", ['D'] = "4;103",
+		['E'] = "4;104", ['F'] = "4;105", ['G'] = "4;106", ['H'] = "4;107", ['X'] = "4;49"
+	};
+
+	// Please refer to https://man.freebsd.org/cgi/man.cgi?ls(1)#ENVIRONMENT
+	char complete_colors[] = "exfxcxdxbxegedabagacad";
+
+	size_t max = strlen(complete_colors);
+	size_t len = strnlen(lscolors, max + 1);
+	if (len == 0 || len % 2 != 0 || len > max) {
+		errno = EINVAL;
+		return -1;
+	}
+	memcpy(complete_colors, lscolors, len);
+
+	for (size_t i = 0; i < countof(keys); ++i) {
+		uint8_t fg = complete_colors[i * 2];
+		uint8_t bg = complete_colors[(i * 2) + 1];
+
+		const char *fg_code = fg_codes[fg];
+		const char *bg_code = bg_codes[bg];
+
+		if (!fg_code || !bg_code) {
+			continue;
+		}
+
+		dchar *esc = dstrprintf("%s;%s", fg_code, bg_code);
+		if (!esc) {
+			return -1;
+		}
+
+		int ret = set_esc(colors, keys[i], esc);
+		dstrfree(esc);
+		if (ret != 0) {
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static bool str_isset(const char *str) {
+    return str && *str;
+}
+
 struct colors *parse_colors(void) {
 	struct colors *colors = ALLOC(struct colors);
 	if (!colors) {
@@ -689,12 +737,22 @@ struct colors *parse_colors(void) {
 		goto fail;
 	}
 
-	if (parse_gnu_ls_colors(colors, getenv("LS_COLORS")) != 0) {
-		goto fail;
-	}
-	if (parse_gnu_ls_colors(colors, getenv("BFS_COLORS")) != 0) {
-		goto fail;
+	const char *gnu_colors = getenv("LS_COLORS");
+	const char *bfs_colors = getenv("BFS_COLORS");
+	const char *bsd_colors = getenv("LSCOLORS");
+	if (str_isset(gnu_colors) || str_isset(bfs_colors)) {
+		if (parse_gnu_ls_colors(colors, gnu_colors) != 0) {
+			goto fail;
+		}
+		if (parse_gnu_ls_colors(colors, bfs_colors) != 0) {
+			goto fail;
+		}
+	} else if (str_isset(bsd_colors)) {
+		if (parse_bsd_ls_colors(colors, bsd_colors) != 0) {
+			goto fail;
+		}
 	}
+
 	if (build_iext_trie(colors) != 0) {
 		goto fail;
 	}
@@ -975,7 +1033,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const struc
 		goto error;
 	}
 
-	const struct bfs_stat *statbuf;
+	const struct bfs_stat *statbuf = NULL;
 	const struct esc_seq *color = NULL;
 
 	switch (type) {
@@ -1253,6 +1311,33 @@ static int print_link_target(CFILE *cfile, const struct BFTW *ftwbuf) {
 _printf(2, 3)
 static int cbuff(CFILE *cfile, const char *format, ...);
 
+/** Print an expression's name, for diagnostics. */
+static int print_expr_name(CFILE *cfile, const struct bfs_expr *expr) {
+	switch (expr->kind) {
+	case BFS_FLAG:
+		return cbuff(cfile, "${cyn}%pq${rs}", expr->argv[0]);
+	case BFS_OPERATOR:
+		return cbuff(cfile, "${red}%pq${rs}", expr->argv[0]);
+	default:
+		return cbuff(cfile, "${blu}%pq${rs}", expr->argv[0]);
+	}
+}
+
+/** Print an expression's args, for diagnostics. */
+static int print_expr_args(CFILE *cfile, const struct bfs_expr *expr) {
+	if (print_expr_name(cfile, expr) != 0) {
+		return -1;
+	}
+
+	for (size_t i = 1; i < expr->argc; ++i) {
+		if (cbuff(cfile, " ${bld}%pq${rs}", expr->argv[i]) < 0) {
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 /** Dump a parsed expression tree, for debugging. */
 static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose, int depth) {
 	if (depth >= 2) {
@@ -1267,28 +1352,10 @@ static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose, i
 		return -1;
 	}
 
-	int ret;
-	switch (expr->kind) {
-	case BFS_FLAG:
-		ret = cbuff(cfile, "${cyn}%pq${rs}", expr->argv[0]);
-		break;
-	case BFS_OPERATOR:
-		ret = cbuff(cfile, "${red}%pq${rs}", expr->argv[0]);
-		break;
-	default:
-		ret = cbuff(cfile, "${blu}%pq${rs}", expr->argv[0]);
-		break;
-	}
-	if (ret < 0) {
+	if (print_expr_args(cfile, expr) != 0) {
 		return -1;
 	}
 
-	for (size_t i = 1; i < expr->argc; ++i) {
-		if (cbuff(cfile, " ${bld}%pq${rs}", expr->argv[i]) < 0) {
-			return -1;
-		}
-	}
-
 	if (verbose) {
 		double rate = 0.0, time = 0.0;
 		if (expr->evaluations) {
@@ -1426,6 +1493,16 @@ static int cvbuff(CFILE *cfile, const char *format, va_list args) {
 						return -1;
 					}
 					break;
+				case 'x':
+					if (print_expr_args(cfile, va_arg(args, const struct bfs_expr *)) != 0) {
+						return -1;
+					}
+					break;
+				case 'X':
+					if (print_expr_name(cfile, va_arg(args, const struct bfs_expr *)) != 0) {
+						return -1;
+					}
+					break;
 
 				default:
 					goto invalid;
diff --git a/src/color.h b/src/color.h
index 2394af2..aac8b33 100644
--- a/src/color.h
+++ b/src/color.h
@@ -95,6 +95,8 @@ int cfclose(CFILE *cfile);
  *         %pL: A colored link target, from a const struct BFTW * argument
  *         %pe: Dump a const struct bfs_expr *, for debugging.
  *         %pE: Dump a const struct bfs_expr * in verbose form, for debugging.
+ *         %px: Print a const struct bfs_expr * with syntax highlighting.
+ *         %pX: Print the name of a const struct bfs_expr *, without arguments.
  *         %%: A literal '%'
  *         ${cc}: Change the color to 'cc'
  *         $$: A literal '$'
diff --git a/src/ctx.c b/src/ctx.c
index 2c55a35..05baa1d 100644
--- a/src/ctx.c
+++ b/src/ctx.c
@@ -24,20 +24,6 @@
 #include <time.h>
 #include <unistd.h>
 
-/** Get the initial value for ctx->threads (-j). */
-static int bfs_nproc(void) {
-	long nproc = xsysconf(_SC_NPROCESSORS_ONLN);
-
-	if (nproc < 1) {
-		nproc = 1;
-	} else if (nproc > 8) {
-		// Not much speedup after 8 threads
-		nproc = 8;
-	}
-
-	return nproc;
-}
-
 struct bfs_ctx *bfs_ctx_new(void) {
 	struct bfs_ctx *ctx = ZALLOC(struct bfs_ctx);
 	if (!ctx) {
@@ -50,9 +36,14 @@ struct bfs_ctx *bfs_ctx_new(void) {
 	ctx->maxdepth = INT_MAX;
 	ctx->flags = BFTW_RECOVER;
 	ctx->strategy = BFTW_BFS;
-	ctx->threads = bfs_nproc();
 	ctx->optlevel = 3;
 
+	ctx->threads = nproc();
+	if (ctx->threads > 8) {
+		// Not much speedup after 8 threads
+		ctx->threads = 8;
+	}
+
 	trie_init(&ctx->files);
 
 	ctx->umask = umask(0);
@@ -295,6 +286,7 @@ int bfs_ctx_free(struct bfs_ctx *ctx) {
 		}
 		free(ctx->paths);
 
+		free(ctx->kinds);
 		free(ctx->argv);
 		free(ctx);
 	}
diff --git a/src/ctx.h b/src/ctx.h
index be6e2af..908338f 100644
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -29,6 +29,8 @@ struct bfs_ctx {
 	size_t argc;
 	/** The unparsed command line arguments. */
 	char **argv;
+	/** The argument token kinds. */
+	enum bfs_kind *kinds;
 
 	/** The root paths. */
 	const char **paths;
diff --git a/src/diag.c b/src/diag.c
index 4909cf5..a86b060 100644
--- a/src/diag.c
+++ b/src/diag.c
@@ -14,27 +14,30 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
-
-/** bfs_diagf() implementation. */
-_printf(2, 0)
-static void bfs_vdiagf(const struct bfs_loc *loc, const char *format, va_list args) {
-	fprintf(stderr, "%s: %s@%s:%d: ", xgetprogname(), loc->func, loc->file, loc->line);
-	vfprintf(stderr, format, args);
-	fprintf(stderr, "\n");
-}
-
-void bfs_diagf(const struct bfs_loc *loc, const char *format, ...) {
+#include <unistd.h>
+
+/**
+ * Print an error using dprintf() if possible, because it's more likely to be
+ * async-signal-safe in practice.
+ */
+#if BFS_HAS_DPRINTF
+#  define veprintf(...) vdprintf(STDERR_FILENO, __VA_ARGS__)
+#else
+#  define veprintf(...) vfprintf(stderr, __VA_ARGS__)
+#endif
+
+void bfs_diagf(const char *format, ...) {
 	va_list args;
 	va_start(args, format);
-	bfs_vdiagf(loc, format, args);
+	veprintf(format, args);
 	va_end(args);
 }
 
 _noreturn
-void bfs_abortf(const struct bfs_loc *loc, const char *format, ...) {
+void bfs_abortf(const char *format, ...) {
 	va_list args;
 	va_start(args, format);
-	bfs_vdiagf(loc, format, args);
+	veprintf(format, args);
 	va_end(args);
 
 	abort();
diff --git a/src/diag.h b/src/diag.h
index 3bea9b2..645dbb1 100644
--- a/src/diag.h
+++ b/src/diag.h
@@ -14,69 +14,68 @@
 #include <stdarg.h>
 
 /**
- * A source code location.
+ * Wrap a diagnostic format string so it looks like
+ *
+ *     bfs: func@src/file.c:0: Message
  */
-struct bfs_loc {
-	const char *file;
-	int line;
-	const char *func;
-};
-
-#define BFS_LOC_INIT { .file = __FILE__, .line = __LINE__, .func = __func__ }
+#define BFS_DIAG_FORMAT_(format) \
+	((format) ? "%s: %s@%s:%d: " format "%s" : "")
 
 /**
- * Get the current source code location.
+ * Add arguments to match a BFS_DIAG_FORMAT string.
  */
-#if __STDC_VERSION__ >= C23
-#  define bfs_location() (&(static const struct bfs_loc)BFS_LOC_INIT)
-#else
-#  define bfs_location() (&(const struct bfs_loc)BFS_LOC_INIT)
-#endif
+#define BFS_DIAG_ARGS_(...) \
+	xgetprogname(), __func__, __FILE__, __LINE__, __VA_ARGS__ "\n"
 
 /**
- * Print a low-level diagnostic message to standard error, formatted like
- *
- *     bfs: func@src/file.c:0: Message
+ * Print a low-level diagnostic message to standard error.
  */
-_printf(2, 3)
-void bfs_diagf(const struct bfs_loc *loc, const char *format, ...);
+_printf(1, 2)
+void bfs_diagf(const char *format, ...);
 
 /**
  * Unconditional diagnostic message.
  */
-#define bfs_diag(...) bfs_diagf(bfs_location(), __VA_ARGS__)
+#define bfs_diag(...) \
+	bfs_diag_(__VA_ARGS__, )
+
+#define bfs_diag_(format, ...) \
+	bfs_diagf(BFS_DIAG_FORMAT_(format), BFS_DIAG_ARGS_(__VA_ARGS__))
 
 /**
  * Print a diagnostic message including the last error.
  */
 #define bfs_ediag(...) \
-	bfs_ediag_("" __VA_ARGS__, errstr())
+	bfs_ediag_(__VA_ARGS__, )
 
 #define bfs_ediag_(format, ...) \
-	bfs_diag(sizeof(format) > 1 ? format ": %s" : "%s", __VA_ARGS__)
+	bfs_diag_(format "%s%s", __VA_ARGS__ (sizeof("" format) > 1 ? ": " : ""), errstr(), )
 
 /**
  * Print a message to standard error and abort.
  */
 _cold
-_printf(2, 3)
+_printf(1, 2)
 _noreturn
-void bfs_abortf(const struct bfs_loc *loc, const char *format, ...);
+void bfs_abortf(const char *format, ...);
 
 /**
  * Unconditional abort with a message.
  */
 #define bfs_abort(...) \
-	bfs_abortf(bfs_location(), __VA_ARGS__)
+	bfs_abort_(__VA_ARGS__, )
+
+#define bfs_abort_(format, ...) \
+	bfs_abortf(BFS_DIAG_FORMAT_(format), BFS_DIAG_ARGS_(__VA_ARGS__))
 
 /**
  * Abort with a message including the last error.
  */
 #define bfs_eabort(...) \
-	bfs_eabort_("" __VA_ARGS__, errstr())
+	bfs_eabort_(__VA_ARGS__, )
 
 #define bfs_eabort_(format, ...) \
-	bfs_abort(sizeof(format) > 1 ? format ": %s" : "%s", __VA_ARGS__)
+	((format) ? bfs_abort_(format ": %s", __VA_ARGS__ errstr(), ) : (void)0)
 
 /**
  * Abort in debug builds; no-op in release builds.
@@ -90,30 +89,43 @@ void bfs_abortf(const struct bfs_loc *loc, const char *format, ...);
 #endif
 
 /**
+ * Get the default assertion message, if no format string was specified.
+ */
+#define BFS_DIAG_MSG_(format, str) \
+	(sizeof(format) > 1 ? "" : str)
+
+/**
  * Unconditional assert.
  */
 #define bfs_verify(...) \
-	bfs_verify_(#__VA_ARGS__, __VA_ARGS__, "", "")
+	bfs_verify_(#__VA_ARGS__, __VA_ARGS__, "", )
 
 #define bfs_verify_(str, cond, format, ...) \
-	((cond) ? (void)0 : bfs_abort( \
+	((cond) ? (void)0 : bfs_verify__(format, BFS_DIAG_MSG_(format, str), __VA_ARGS__))
+
+#define bfs_verify__(format, ...) \
+	bfs_abortf( \
 		sizeof(format) > 1 \
-			? "%.0s" format "%s%s" \
-			: "Assertion failed: `%s`%s", \
-		str, __VA_ARGS__))
+			? BFS_DIAG_FORMAT_("%s" format "%s") \
+			: BFS_DIAG_FORMAT_("Assertion failed: `%s`"), \
+		BFS_DIAG_ARGS_(__VA_ARGS__))
 
 /**
  * Unconditional assert, including the last error.
  */
 #define bfs_everify(...) \
-	bfs_everify_(#__VA_ARGS__, __VA_ARGS__, "", errstr())
+	bfs_everify_(#__VA_ARGS__, __VA_ARGS__, "", )
+
 
 #define bfs_everify_(str, cond, format, ...) \
-	((cond) ? (void)0 : bfs_abort( \
+	((cond) ? (void)0 : bfs_everify__(format, BFS_DIAG_MSG_(format, str), __VA_ARGS__))
+
+#define bfs_everify__(format, ...) \
+	bfs_abortf( \
 		sizeof(format) > 1 \
-			? "%.0s" format "%s: %s" \
-			: "Assertion failed: `%s`: %s", \
-		str, __VA_ARGS__))
+			? BFS_DIAG_FORMAT_("%s" format "%s: %s") \
+			: BFS_DIAG_FORMAT_("Assertion failed: `%s`: %s"), \
+		BFS_DIAG_ARGS_(__VA_ARGS__ errstr(), ))
 
 /**
  * Assert in debug builds; no-op in release builds.
diff --git a/src/dstring.c b/src/dstring.c
index 0f08679..678d685 100644
--- a/src/dstring.c
+++ b/src/dstring.c
@@ -23,7 +23,7 @@ struct dstring {
 	/** Length of the string, *excluding* the terminating NUL. */
 	size_t len;
 	/** The string itself. */
-	alignas(dchar) char str[];
+	alignas(dchar) char str[] _counted_by(cap);
 };
 
 #define DSTR_OFFSET offsetof(struct dstring, str)
diff --git a/src/eval.c b/src/eval.c
index 6e9fffd..0d1bf68 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -137,11 +137,9 @@ static const struct bfs_stat *eval_stat(struct bfs_eval *state) {
  * Get the difference (in seconds) between two struct timespecs.
  */
 static time_t timespec_diff(const struct timespec *lhs, const struct timespec *rhs) {
-	time_t ret = lhs->tv_sec - rhs->tv_sec;
-	if (lhs->tv_nsec < rhs->tv_nsec) {
-		--ret;
-	}
-	return ret;
+	struct timespec diff = *lhs;
+	timespec_sub(&diff, rhs);
+	return diff.tv_sec;
 }
 
 bool bfs_expr_cmp(const struct bfs_expr *expr, long long n) {
@@ -260,8 +258,7 @@ bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state) {
 		return false;
 	}
 
-	return time->tv_sec > expr->reftime.tv_sec
-		|| (time->tv_sec == expr->reftime.tv_sec && time->tv_nsec > expr->reftime.tv_nsec);
+	return timespec_cmp(time, &expr->reftime) > 0;
 }
 
 /**
@@ -411,7 +408,7 @@ static int eval_exec_finish(const struct bfs_expr *expr, const struct bfs_ctx *c
 	if (expr->eval_fn == eval_exec) {
 		if (bfs_exec_finish(expr->exec) != 0) {
 			if (errno != 0) {
-				bfs_error(ctx, "%s %s: %s.\n", expr->argv[0], expr->argv[1], errstr());
+				bfs_error(ctx, "${blu}%pq${rs} ${bld}%pq${rs}: %s.\n", expr->argv[0], expr->argv[1], errstr());
 			}
 			ret = -1;
 		}
@@ -432,7 +429,7 @@ static int eval_exec_finish(const struct bfs_expr *expr, const struct bfs_ctx *c
 bool eval_exec(const struct bfs_expr *expr, struct bfs_eval *state) {
 	bool ret = bfs_exec(expr->exec, state->ftwbuf) == 0;
 	if (errno != 0) {
-		eval_error(state, "%s %s: %s.\n", expr->argv[0], expr->argv[1], errstr());
+		eval_error(state, "${blu}%pq${rs} ${bld}%pq${rs}: %s.\n", expr->argv[0], expr->argv[1], errstr());
 	}
 	return ret;
 }
@@ -703,6 +700,34 @@ static int print_owner(FILE *file, const char *name, uintmax_t id, int *width) {
 	}
 }
 
+/** Print a file's modification time. */
+static int print_time(FILE *file, time_t time, time_t now) {
+	struct tm tm;
+	if (!localtime_r(&time, &tm)) {
+		goto error;
+	}
+
+	char time_str[256];
+	size_t time_ret;
+
+	time_t six_months_ago = now - 6 * 30 * 24 * 60 * 60;
+	time_t tomorrow = now + 24 * 60 * 60;
+	if (time <= six_months_ago || time >= tomorrow) {
+		time_ret = strftime(time_str, sizeof(time_str), "%b %e  %Y", &tm);
+	} else {
+		time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm);
+	}
+
+	if (time_ret == 0) {
+		goto error;
+	}
+
+	return fprintf(file, " %s", time_str);
+
+error:
+	return fprintf(file, " %jd", (intmax_t)time);
+}
+
 /**
  * -f?ls action.
  */
@@ -759,28 +784,11 @@ bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state) {
 
 	time_t time = statbuf->mtime.tv_sec;
 	time_t now = ctx->now.tv_sec;
-	time_t six_months_ago = now - 6 * 30 * 24 * 60 * 60;
-	time_t tomorrow = now + 24 * 60 * 60;
-	struct tm tm;
-	if (!localtime_r(&time, &tm)) {
-		goto error;
-	}
-	char time_str[256];
-	size_t time_ret;
-	if (time <= six_months_ago || time >= tomorrow) {
-		time_ret = strftime(time_str, sizeof(time_str), "%b %e  %Y", &tm);
-	} else {
-		time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm);
-	}
-	if (time_ret == 0) {
-		errno = EOVERFLOW;
-		goto error;
-	}
-	if (cfprintf(cfile, " %s${rs}", time_str) < 0) {
+	if (print_time(file, time, now) < 0) {
 		goto error;
 	}
 
-	if (cfprintf(cfile, " %pP", ftwbuf) < 0) {
+	if (cfprintf(cfile, "${rs} %pP", ftwbuf) < 0) {
 		goto error;
 	}
 
@@ -1047,21 +1055,6 @@ static int eval_gettime(struct bfs_eval *state, struct timespec *ts) {
 }
 
 /**
- * Record an elapsed time.
- */
-static void timespec_elapsed(struct timespec *elapsed, const struct timespec *start, const struct timespec *end) {
-	elapsed->tv_sec += end->tv_sec - start->tv_sec;
-	elapsed->tv_nsec += end->tv_nsec - start->tv_nsec;
-	if (elapsed->tv_nsec < 0) {
-		elapsed->tv_nsec += 1000000000L;
-		--elapsed->tv_sec;
-	} else if (elapsed->tv_nsec >= 1000000000L) {
-		elapsed->tv_nsec -= 1000000000L;
-		++elapsed->tv_sec;
-	}
-}
-
-/**
  * Evaluate an expression.
  */
 static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) {
@@ -1079,7 +1072,8 @@ static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) {
 
 	if (time) {
 		if (eval_gettime(state, &end) == 0) {
-			timespec_elapsed(&expr->elapsed, &start, &end);
+			timespec_sub(&end, &start);
+			timespec_add(&expr->elapsed, &end);
 		}
 	}
 
diff --git a/src/expr.h b/src/expr.h
index 871b120..c116778 100644
--- a/src/expr.h
+++ b/src/expr.h
@@ -19,6 +19,9 @@
  * Argument/token/expression kinds.
  */
 enum bfs_kind {
+	/** A regular argument. */
+	BFS_ARG,
+
 	/** A flag (-H, -L, etc.). */
 	BFS_FLAG,
 
diff --git a/src/ioq.c b/src/ioq.c
index be5b758..57eb4a5 100644
--- a/src/ioq.c
+++ b/src/ioq.c
@@ -136,6 +136,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <sys/stat.h>
+#include <unistd.h>
 
 #if BFS_WITH_LIBURING
 #  include <liburing.h>
@@ -202,7 +203,7 @@ struct ioqq {
 	cache_align atomic size_t tail;
 
 	/** The circular buffer itself. */
-	cache_align ioq_slot slots[];
+	cache_align ioq_slot slots[]; // _counted_by(slot_mask + 1)
 };
 
 /** Destroy an I/O command queue. */
@@ -259,17 +260,45 @@ static struct ioqq *ioqq_create(size_t size) {
 
 /** Get the monitor associated with a slot. */
 static struct ioq_monitor *ioq_slot_monitor(struct ioqq *ioqq, ioq_slot *slot) {
-	size_t i = slot - ioqq->slots;
+	uint32_t i = slot - ioqq->slots;
+
+	// Hash the index to de-correlate waiters
+	// https://nullprogram.com/blog/2018/07/31/
+	// https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1120105785
+	i ^= i >> 16;
+	i *= UINT32_C(0x21f0aaad);
+	i ^= i >> 15;
+	i *= UINT32_C(0x735a2d97);
+	i ^= i >> 15;
+
 	return &ioqq->monitors[i & ioqq->monitor_mask];
 }
 
 /** Atomically wait for a slot to change. */
 _noinline
 static uintptr_t ioq_slot_wait(struct ioqq *ioqq, ioq_slot *slot, uintptr_t value) {
+	uintptr_t ret;
+
+	// Try spinning a few times (with exponential backoff) before blocking
+	_nounroll
+	for (int i = 1; i < 1024; i *= 2) {
+		_nounroll
+		for (int j = 0; j < i; ++j) {
+			spin_loop();
+		}
+
+		// Check if the slot changed
+		ret = load(slot, relaxed);
+		if (ret != value) {
+			return ret;
+		}
+	}
+
+	// Nothing changed, start blocking
 	struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot);
 	mutex_lock(&monitor->mutex);
 
-	uintptr_t ret = load(slot, relaxed);
+	ret = load(slot, relaxed);
 	if (ret != value) {
 		goto done;
 	}
@@ -355,6 +384,14 @@ static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent
 static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) {
 	uintptr_t prev = load(slot, relaxed);
 	while (true) {
+#if __has_builtin(__builtin_prefetch)
+		// Optimistically prefetch the pointer in this slot.  If this
+		// slot is not full, this will prefetch an invalid address, but
+		// experimentally this is worth it on both Intel (Alder Lake)
+		// and AMD (Zen 2).
+		__builtin_prefetch((void *)(prev << 1), 1 /* write */);
+#endif
+
 		// empty     → skip(1)
 		// skip(n)   → skip(n + 1)
 		// full(ptr) → full(ptr - 1)
@@ -409,13 +446,6 @@ static void ioqq_push_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t s
 	} while (size > 0);
 }
 
-/** Pop an entry from the queue. */
-static struct ioq_ent *ioqq_pop(struct ioqq *ioqq, bool block) {
-	size_t i = fetch_add(&ioqq->tail, 1, relaxed);
-	ioq_slot *slot = &ioqq->slots[i & ioqq->slot_mask];
-	return ioq_slot_pop(ioqq, slot, block);
-}
-
 /** Pop a batch of entries from the queue. */
 static void ioqq_pop_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t size, bool block) {
 	size_t mask = ioqq->slot_mask;
@@ -431,30 +461,77 @@ static void ioqq_pop_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t si
 #define IOQ_BATCH (FALSE_SHARING_SIZE / sizeof(ioq_slot))
 
 /**
- * A batch of entries to send all at once.
+ * A batch of I/O queue entries.
  */
 struct ioq_batch {
-	/** The current batch size. */
-	size_t size;
+	/** The start of the batch. */
+	size_t head;
+	/** The end of the batch. */
+	size_t tail;
 	/** The array of entries. */
 	struct ioq_ent *entries[IOQ_BATCH];
 };
 
-/** Send the batch to a queue. */
+/** Reset a batch. */
+static void ioq_batch_reset(struct ioq_batch *batch) {
+	batch->head = batch->tail = 0;
+}
+
+/** Check if a batch is empty. */
+static bool ioq_batch_empty(const struct ioq_batch *batch) {
+	return batch->head >= batch->tail;
+}
+
+/** Send a batch to a queue. */
 static void ioq_batch_flush(struct ioqq *ioqq, struct ioq_batch *batch) {
-	if (batch->size > 0) {
-		ioqq_push_batch(ioqq, batch->entries, batch->size);
-		batch->size = 0;
+	if (batch->tail > 0) {
+		ioqq_push_batch(ioqq, batch->entries, batch->tail);
+		ioq_batch_reset(batch);
 	}
 }
 
-/** An an entry to a batch, flushing if necessary. */
+/** Push an entry to a batch, flushing if necessary. */
 static void ioq_batch_push(struct ioqq *ioqq, struct ioq_batch *batch, struct ioq_ent *ent) {
-	if (batch->size >= IOQ_BATCH) {
+	batch->entries[batch->tail++] = ent;
+
+	if (batch->tail >= IOQ_BATCH) {
 		ioq_batch_flush(ioqq, batch);
 	}
+}
+
+/** Fill a batch from a queue. */
+static bool ioq_batch_fill(struct ioqq *ioqq, struct ioq_batch *batch, bool block) {
+	ioqq_pop_batch(ioqq, batch->entries, IOQ_BATCH, block);
+
+	ioq_batch_reset(batch);
+	for (size_t i = 0; i < IOQ_BATCH; ++i) {
+		struct ioq_ent *ent = batch->entries[i];
+		if (ent) {
+			batch->entries[batch->tail++] = ent;
+		}
+	}
+
+	return batch->tail > 0;
+}
+
+/** Pop an entry from a batch, filling it first if necessary. */
+static struct ioq_ent *ioq_batch_pop(struct ioqq *ioqq, struct ioq_batch *batch, bool block) {
+	if (ioq_batch_empty(batch)) {
+		// For non-blocking pops, make sure that each ioq_batch_pop()
+		// corresponds to a single (amortized) increment of ioqq->head.
+		// Otherwise, we start skipping many slots and batching ends up
+		// degrading performance.
+		if (!block && batch->head < IOQ_BATCH) {
+			++batch->head;
+			return NULL;
+		}
+
+		if (!ioq_batch_fill(ioqq, batch, block)) {
+			return NULL;
+		}
+	}
 
-	batch->entries[batch->size++] = ent;
+	return batch->entries[batch->head++];
 }
 
 /** Sentinel stop command. */
@@ -503,15 +580,20 @@ struct ioq {
 	struct arena xbufs;
 #endif
 
-	/** Pending I/O requests. */
+	/** Pending I/O request queue. */
 	struct ioqq *pending;
-	/** Ready I/O responses. */
+	/** Ready I/O response queue. */
 	struct ioqq *ready;
 
+	/** Pending request batch. */
+	struct ioq_batch pending_batch;
+	/** Ready request batch. */
+	struct ioq_batch ready_batch;
+
 	/** The number of background threads. */
 	size_t nthreads;
 	/** The background threads themselves. */
-	struct ioq_thread threads[];
+	struct ioq_thread threads[] _counted_by(nthreads);
 };
 
 /** Cancel a request if we need to. */
@@ -532,6 +614,14 @@ static bool ioq_check_cancel(struct ioq *ioq, struct ioq_ent *ent) {
 /** Dispatch a single request synchronously. */
 static void ioq_dispatch_sync(struct ioq *ioq, struct ioq_ent *ent) {
 	switch (ent->op) {
+		case IOQ_NOP:
+			if (ent->nop.type == IOQ_NOP_HEAVY) {
+				// A fast, no-op syscall
+				getppid();
+			}
+			ent->result = 0;
+			return;
+
 		case IOQ_CLOSE:
 			ent->result = try(xclose(ent->close.fd));
 			return;
@@ -580,23 +670,161 @@ struct ioq_ring_state {
 	struct ioq_batch ready;
 };
 
+/** Reap a single CQE. */
+static void ioq_reap_cqe(struct ioq_ring_state *state, struct io_uring_cqe *cqe) {
+	struct ioq *ioq = state->ioq;
+
+	struct ioq_ent *ent = io_uring_cqe_get_data(cqe);
+	ent->result = cqe->res;
+
+	if (ent->result < 0) {
+		goto push;
+	}
+
+	switch (ent->op) {
+		case IOQ_OPENDIR: {
+			int fd = ent->result;
+			if (ioq_check_cancel(ioq, ent)) {
+				xclose(fd);
+				goto push;
+			}
+
+			struct ioq_opendir *args = &ent->opendir;
+			ent->result = try(bfs_opendir(args->dir, fd, NULL, args->flags));
+			if (ent->result >= 0) {
+				// TODO: io_uring_prep_getdents()
+				bfs_polldir(args->dir);
+			} else {
+				xclose(fd);
+			}
+
+			break;
+		}
+
+#if BFS_USE_STATX
+		case IOQ_STAT: {
+			struct ioq_stat *args = &ent->stat;
+			ent->result = try(bfs_statx_convert(args->buf, args->xbuf));
+			break;
+		}
+#endif
+
+		default:
+			break;
+	}
+
+push:
+	ioq_batch_push(ioq->ready, &state->ready, ent);
+}
+
+/** Wait for submitted requests to complete. */
+static void ioq_ring_drain(struct ioq_ring_state *state, size_t wait_nr) {
+	struct ioq *ioq = state->ioq;
+	struct io_uring *ring = state->ring;
+
+	bfs_assert(wait_nr <= state->submitted);
+
+	while (state->submitted > 0) {
+		struct io_uring_cqe *cqe;
+		if (wait_nr > 0) {
+			io_uring_wait_cqes(ring, &cqe, wait_nr, NULL, NULL);
+		}
+
+		unsigned int head;
+		size_t seen = 0;
+		io_uring_for_each_cqe (ring, head, cqe) {
+			ioq_reap_cqe(state, cqe);
+			++seen;
+		}
+
+		io_uring_cq_advance(ring, seen);
+		state->submitted -= seen;
+
+		if (seen >= wait_nr) {
+			break;
+		}
+		wait_nr -= seen;
+	}
+
+	ioq_batch_flush(ioq->ready, &state->ready);
+}
+
+/** Submit prepped SQEs, and wait for some to complete. */
+static void ioq_ring_submit(struct ioq_ring_state *state) {
+	struct io_uring *ring = state->ring;
+
+	size_t unreaped = state->prepped + state->submitted;
+	size_t wait_nr = 0;
+
+	if (state->prepped == 0 && unreaped > 0) {
+		// If we have no new SQEs, wait for at least one old one to
+		// complete, to avoid livelock
+		wait_nr = 1;
+	}
+
+	if (unreaped > ring->sq.ring_entries) {
+		// Keep the completion queue below half full
+		wait_nr = unreaped - ring->sq.ring_entries;
+	}
+
+	// Submit all prepped SQEs
+	while (state->prepped > 0) {
+		int ret = io_uring_submit_and_wait(state->ring, wait_nr);
+		if (ret <= 0) {
+			continue;
+		}
+
+		state->submitted += ret;
+		state->prepped -= ret;
+		if (state->prepped > 0) {
+			// In the unlikely event of a short submission, any SQE
+			// links will be broken.  Wait for all SQEs to complete
+			// to preserve any ordering requirements.
+			ioq_ring_drain(state, state->submitted);
+			wait_nr = 0;
+		}
+	}
+
+	// Drain all the CQEs we waited for (and any others that are ready)
+	ioq_ring_drain(state, wait_nr);
+}
+
+/** Reserve space for a number of SQEs, submitting if necessary. */
+static void ioq_reserve_sqes(struct ioq_ring_state *state, unsigned int count) {
+	while (io_uring_sq_space_left(state->ring) < count) {
+		ioq_ring_submit(state);
+	}
+}
+
+/** Get an SQE, submitting if necessary. */
+static struct io_uring_sqe *ioq_get_sqe(struct ioq_ring_state *state) {
+	ioq_reserve_sqes(state, 1);
+	return io_uring_get_sqe(state->ring);
+}
+
 /** Dispatch a single request asynchronously. */
 static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, struct ioq_ent *ent) {
-	struct io_uring *ring = state->ring;
 	enum ioq_ring_ops ops = state->ops;
 	struct io_uring_sqe *sqe = NULL;
 
 	switch (ent->op) {
+	case IOQ_NOP:
+		if (ent->nop.type == IOQ_NOP_HEAVY) {
+			sqe = ioq_get_sqe(state);
+			io_uring_prep_nop(sqe);
+		}
+		return sqe;
+
 	case IOQ_CLOSE:
 		if (ops & IOQ_RING_CLOSE) {
-			sqe = io_uring_get_sqe(ring);
+			sqe = ioq_get_sqe(state);
 			io_uring_prep_close(sqe, ent->close.fd);
 		}
 		return sqe;
 
 	case IOQ_OPENDIR:
 		if (ops & IOQ_RING_OPENAT) {
-			sqe = io_uring_get_sqe(ring);
+			sqe = ioq_get_sqe(state);
 			struct ioq_opendir *args = &ent->opendir;
 			int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY;
 			io_uring_prep_openat(sqe, args->dfd, args->path, flags, 0);
@@ -606,7 +834,7 @@ static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, str
 	case IOQ_CLOSEDIR:
 #if BFS_USE_UNWRAPDIR
 		if (ops & IOQ_RING_CLOSE) {
-			sqe = io_uring_get_sqe(ring);
+			sqe = ioq_get_sqe(state);
 			io_uring_prep_close(sqe, bfs_unwrapdir(ent->closedir.dir));
 		}
 #endif
@@ -615,10 +843,10 @@ static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, str
 	case IOQ_STAT:
 #if BFS_USE_STATX
 		if (ops & IOQ_RING_STATX) {
-			sqe = io_uring_get_sqe(ring);
+			sqe = ioq_get_sqe(state);
 			struct ioq_stat *args = &ent->stat;
 			int flags = bfs_statx_flags(args->flags);
-			unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+			unsigned int mask = bfs_statx_mask();
 			io_uring_prep_statx(sqe, args->dfd, args->path, flags, mask, args->xbuf);
 		}
 #endif
@@ -631,7 +859,7 @@ static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, str
 
 /** Check if ioq_ring_reap() has work to do. */
 static bool ioq_ring_empty(struct ioq_ring_state *state) {
-	return !state->prepped && !state->submitted && !state->ready.size;
+	return !state->prepped && !state->submitted && ioq_batch_empty(&state->ready);
 }
 
 /** Prep a single SQE. */
@@ -659,121 +887,52 @@ static bool ioq_ring_prep(struct ioq_ring_state *state) {
 	}
 
 	struct ioq *ioq = state->ioq;
-	struct io_uring *ring = state->ring;
-	struct ioq_ent *pending[IOQ_BATCH];
-
-	while (io_uring_sq_space_left(ring) >= IOQ_BATCH) {
-		bool block = ioq_ring_empty(state);
-		ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, block);
-
-		bool any = false;
-		for (size_t i = 0; i < IOQ_BATCH; ++i) {
-			struct ioq_ent *ent = pending[i];
-			if (ent == &IOQ_STOP) {
-				ioqq_push(ioq->pending, &IOQ_STOP);
-				state->stop = true;
-				goto done;
-			} else if (ent) {
-				ioq_prep_sqe(state, ent);
-				any = true;
-			}
-		}
-
-		if (!any) {
-			break;
-		}
-	}
-
-done:
-	return !ioq_ring_empty(state);
-}
-
-/** Reap a single CQE. */
-static void ioq_reap_cqe(struct ioq_ring_state *state, struct io_uring_cqe *cqe) {
-	struct ioq *ioq = state->ioq;
-	struct io_uring *ring = state->ring;
-
-	struct ioq_ent *ent = io_uring_cqe_get_data(cqe);
-	ent->result = cqe->res;
-	io_uring_cqe_seen(ring, cqe);
-	--state->submitted;
-
-	if (ent->result < 0) {
-		goto push;
-	}
 
-	switch (ent->op) {
-		case IOQ_OPENDIR: {
-			int fd = ent->result;
-			if (ioq_check_cancel(ioq, ent)) {
-				xclose(fd);
-				goto push;
-			}
-
-			struct ioq_opendir *args = &ent->opendir;
-			ent->result = try(bfs_opendir(args->dir, fd, NULL, args->flags));
-			if (ent->result >= 0) {
-				// TODO: io_uring_prep_getdents()
-				bfs_polldir(args->dir);
-			} else {
-				xclose(fd);
-			}
+	struct ioq_batch pending;
+	ioq_batch_reset(&pending);
 
+	while (true) {
+		bool block = ioq_ring_empty(state);
+		struct ioq_ent *ent = ioq_batch_pop(ioq->pending, &pending, block);
+		if (ent == &IOQ_STOP) {
+			ioqq_push(ioq->pending, ent);
+			state->stop = true;
 			break;
-		}
-
-#if BFS_USE_STATX
-		case IOQ_STAT: {
-			struct ioq_stat *args = &ent->stat;
-			ent->result = try(bfs_statx_convert(args->buf, args->xbuf));
+		} else if (ent) {
+			ioq_prep_sqe(state, ent);
+		} else {
 			break;
 		}
-#endif
-
-		default:
-			break;
 	}
 
-push:
-	ioq_batch_push(ioq->ready, &state->ready, ent);
+	bfs_assert(ioq_batch_empty(&pending));
+	return !ioq_ring_empty(state);
 }
 
-/** Reap a batch of CQEs. */
-static void ioq_ring_reap(struct ioq_ring_state *state) {
-	struct ioq *ioq = state->ioq;
-	struct io_uring *ring = state->ring;
+/** io_uring worker loop. */
+static int ioq_ring_work(struct ioq_thread *thread) {
+	struct io_uring *ring = &thread->ring;
 
-	while (state->prepped) {
-		int ret = io_uring_submit_and_wait(ring, 1);
-		if (ret > 0) {
-			state->prepped -= ret;
-			state->submitted += ret;
+#ifdef IORING_SETUP_R_DISABLED
+	if (ring->flags & IORING_SETUP_R_DISABLED) {
+		if (io_uring_enable_rings(ring) != 0) {
+			return -1;
 		}
 	}
+#endif
 
-	while (state->submitted) {
-		struct io_uring_cqe *cqe;
-		if (io_uring_wait_cqe(ring, &cqe) < 0) {
-			continue;
-		}
-
-		ioq_reap_cqe(state, cqe);
-	}
-
-	ioq_batch_flush(ioq->ready, &state->ready);
-}
-
-/** io_uring worker loop. */
-static void ioq_ring_work(struct ioq_thread *thread) {
 	struct ioq_ring_state state = {
 		.ioq = thread->parent,
-		.ring = &thread->ring,
+		.ring = ring,
 		.ops = thread->ring_ops,
 	};
 
 	while (ioq_ring_prep(&state)) {
-		ioq_ring_reap(&state);
+		ioq_ring_submit(&state);
 	}
+
+	ioq_ring_drain(&state, state.submitted);
+	return 0;
 }
 
 #endif // BFS_WITH_LIBURING
@@ -782,30 +941,29 @@ static void ioq_ring_work(struct ioq_thread *thread) {
 static void ioq_sync_work(struct ioq_thread *thread) {
 	struct ioq *ioq = thread->parent;
 
-	bool stop = false;
-	while (!stop) {
-		struct ioq_ent *pending[IOQ_BATCH];
-		ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, true);
-
-		struct ioq_batch ready;
-		ready.size = 0;
-
-		for (size_t i = 0; i < IOQ_BATCH; ++i) {
-			struct ioq_ent *ent = pending[i];
-			if (ent == &IOQ_STOP) {
-				ioqq_push(ioq->pending, &IOQ_STOP);
-				stop = true;
-				break;
-			} else if (ent) {
-				if (!ioq_check_cancel(ioq, ent)) {
-					ioq_dispatch_sync(ioq, ent);
-				}
-				ioq_batch_push(ioq->ready, &ready, ent);
-			}
+	struct ioq_batch pending, ready;
+	ioq_batch_reset(&pending);
+	ioq_batch_reset(&ready);
+
+	while (true) {
+		if (ioq_batch_empty(&pending)) {
+			ioq_batch_flush(ioq->ready, &ready);
+		}
+
+		struct ioq_ent *ent = ioq_batch_pop(ioq->pending, &pending, true);
+		if (ent == &IOQ_STOP) {
+			ioqq_push(ioq->pending, ent);
+			break;
 		}
 
-		ioq_batch_flush(ioq->ready, &ready);
+		if (!ioq_check_cancel(ioq, ent)) {
+			ioq_dispatch_sync(ioq, ent);
+		}
+		ioq_batch_push(ioq->ready, &ready, ent);
 	}
+
+	bfs_assert(ioq_batch_empty(&pending));
+	ioq_batch_flush(ioq->ready, &ready);
 }
 
 /** Background thread entry point. */
@@ -814,8 +972,9 @@ static void *ioq_work(void *ptr) {
 
 #if BFS_WITH_LIBURING
 	if (thread->ring_err == 0) {
-		ioq_ring_work(thread);
-		return NULL;
+		if (ioq_ring_work(thread) == 0) {
+			return NULL;
+		}
 	}
 #endif
 
@@ -823,6 +982,27 @@ static void *ioq_work(void *ptr) {
 	return NULL;
 }
 
+#if BFS_WITH_LIBURING
+/** Test whether some io_uring setup flags are supported. */
+static bool ioq_ring_probe_flags(struct io_uring_params *params, unsigned int flags) {
+	unsigned int saved = params->flags;
+	params->flags |= flags;
+
+	struct io_uring ring;
+	int ret = io_uring_queue_init_params(2, &ring, params);
+	if (ret == 0) {
+		io_uring_queue_exit(&ring);
+	}
+
+	if (ret == -EINVAL) {
+		params->flags = saved;
+		return false;
+	}
+
+	return true;
+}
+#endif
+
 /** Initialize io_uring thread state. */
 static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) {
 #if BFS_WITH_LIBURING
@@ -836,11 +1016,31 @@ static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) {
 		return -1;
 	}
 
-	// Share io-wq workers between rings
 	struct io_uring_params params = {0};
+
 	if (prev) {
-		params.flags |= IORING_SETUP_ATTACH_WQ;
+		// Share io-wq workers between rings
+		params.flags = prev->ring.flags | IORING_SETUP_ATTACH_WQ;
 		params.wq_fd = prev->ring.ring_fd;
+	} else {
+#ifdef IORING_SETUP_SUBMIT_ALL
+		// Don't abort submission just because an inline request fails
+		ioq_ring_probe_flags(&params, IORING_SETUP_SUBMIT_ALL);
+#endif
+
+#ifdef IORING_SETUP_R_DISABLED
+		// Don't enable the ring yet (needed for SINGLE_ISSUER)
+		if (ioq_ring_probe_flags(&params, IORING_SETUP_R_DISABLED)) {
+#  ifdef IORING_SETUP_SINGLE_ISSUER
+			// Allow optimizations assuming only one task submits SQEs
+			ioq_ring_probe_flags(&params, IORING_SETUP_SINGLE_ISSUER);
+#  endif
+#  ifdef IORING_SETUP_DEFER_TASKRUN
+			// Don't interrupt us aggressively with completion events
+			ioq_ring_probe_flags(&params, IORING_SETUP_DEFER_TASKRUN);
+#  endif
+		}
+#endif
 	}
 
 	// Use a page for each SQE ring
@@ -902,7 +1102,8 @@ static void ioq_ring_exit(struct ioq_thread *thread) {
 }
 
 /** Create an I/O queue thread. */
-static int ioq_thread_create(struct ioq *ioq, struct ioq_thread *thread) {
+static int ioq_thread_create(struct ioq *ioq, size_t i) {
+	struct ioq_thread *thread = &ioq->threads[i];
 	thread->parent = ioq;
 
 	ioq_ring_init(ioq, thread);
@@ -912,6 +1113,11 @@ static int ioq_thread_create(struct ioq *ioq, struct ioq_thread *thread) {
 		return -1;
 	}
 
+	char name[16];
+	if (snprintf(name, sizeof(name), "ioq-%zu", i) >= 0) {
+		thread_setname(thread->id, name);
+	}
+
 	return 0;
 }
 
@@ -946,7 +1152,7 @@ struct ioq *ioq_create(size_t depth, size_t nthreads) {
 
 	ioq->nthreads = nthreads;
 	for (size_t i = 0; i < nthreads; ++i) {
-		if (ioq_thread_create(ioq, &ioq->threads[i]) != 0) {
+		if (ioq_thread_create(ioq, i) != 0) {
 			ioq->nthreads = i;
 			goto fail;
 		}
@@ -988,6 +1194,18 @@ static struct ioq_ent *ioq_request(struct ioq *ioq, enum ioq_op op, void *ptr) {
 	return ent;
 }
 
+int ioq_nop(struct ioq *ioq, enum ioq_nop_type type, void *ptr) {
+	struct ioq_ent *ent = ioq_request(ioq, IOQ_NOP, ptr);
+	if (!ent) {
+		return -1;
+	}
+
+	ent->nop.type = type;
+
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
+	return 0;
+}
+
 int ioq_close(struct ioq *ioq, int fd, void *ptr) {
 	struct ioq_ent *ent = ioq_request(ioq, IOQ_CLOSE, ptr);
 	if (!ent) {
@@ -996,7 +1214,7 @@ int ioq_close(struct ioq *ioq, int fd, void *ptr) {
 
 	ent->close.fd = fd;
 
-	ioqq_push(ioq->pending, ent);
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
 	return 0;
 }
 
@@ -1012,7 +1230,7 @@ int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path,
 	args->path = path;
 	args->flags = flags;
 
-	ioqq_push(ioq->pending, ent);
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
 	return 0;
 }
 
@@ -1024,7 +1242,7 @@ int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr) {
 
 	ent->closedir.dir = dir;
 
-	ioqq_push(ioq->pending, ent);
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
 	return 0;
 }
 
@@ -1048,16 +1266,23 @@ int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags fla
 	}
 #endif
 
-	ioqq_push(ioq->pending, ent);
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
 	return 0;
 }
 
+void ioq_submit(struct ioq *ioq) {
+	ioq_batch_flush(ioq->pending, &ioq->pending_batch);
+}
+
 struct ioq_ent *ioq_pop(struct ioq *ioq, bool block) {
+	// Don't forget to submit before popping
+	bfs_assert(ioq_batch_empty(&ioq->pending_batch));
+
 	if (ioq->size == 0) {
 		return NULL;
 	}
 
-	return ioqq_pop(ioq->ready, block);
+	return ioq_batch_pop(ioq->ready, &ioq->ready_batch, block);
 }
 
 void ioq_free(struct ioq *ioq, struct ioq_ent *ent) {
@@ -1075,7 +1300,8 @@ void ioq_free(struct ioq *ioq, struct ioq_ent *ent) {
 
 void ioq_cancel(struct ioq *ioq) {
 	if (!exchange(&ioq->cancel, true, relaxed)) {
-		ioqq_push(ioq->pending, &IOQ_STOP);
+		ioq_batch_push(ioq->pending, &ioq->pending_batch, &IOQ_STOP);
+		ioq_submit(ioq);
 	}
 }
 
diff --git a/src/ioq.h b/src/ioq.h
index cb14be4..5eaa066 100644
--- a/src/ioq.h
+++ b/src/ioq.h
@@ -8,6 +8,7 @@
 #ifndef BFS_IOQ_H
 #define BFS_IOQ_H
 
+#include "bfs.h"
 #include "dir.h"
 #include "stat.h"
 
@@ -22,6 +23,8 @@ struct ioq;
  * I/O queue operations.
  */
 enum ioq_op {
+	/** ioq_nop(). */
+	IOQ_NOP,
 	/** ioq_close(). */
 	IOQ_CLOSE,
 	/** ioq_opendir(). */
@@ -33,18 +36,21 @@ enum ioq_op {
 };
 
 /**
- * The I/O queue implementation needs two tag bits in each pointer to a struct
- * ioq_ent, so we need to ensure at least 4-byte alignment.  The natural
- * alignment is enough on most architectures, but not m68k, so over-align it.
+ * ioq_nop() types.
  */
-#define IOQ_ENT_ALIGN alignas(4)
+enum ioq_nop_type {
+	/** A lightweight nop that avoids syscalls. */
+	IOQ_NOP_LIGHT,
+	/** A heavyweight nop that involves a syscall. */
+	IOQ_NOP_HEAVY,
+};
 
 /**
  * An I/O queue entry.
  */
 struct ioq_ent {
 	/** The I/O operation. */
-	IOQ_ENT_ALIGN enum ioq_op op;
+	cache_align enum ioq_op op;
 
 	/** The return value (on success) or negative error code (on failure). */
 	int result;
@@ -54,6 +60,10 @@ struct ioq_ent {
 
 	/** Operation-specific arguments. */
 	union {
+		/** ioq_nop() args. */
+		struct ioq_nop {
+			enum ioq_nop_type type;
+		} nop;
 		/** ioq_close() args. */
 		struct ioq_close {
 			int fd;
@@ -98,6 +108,20 @@ struct ioq *ioq_create(size_t depth, size_t nthreads);
 size_t ioq_capacity(const struct ioq *ioq);
 
 /**
+ * A no-op, for benchmarking.
+ *
+ * @ioq
+ *         The I/O queue.
+ * @type
+ *         The type of operation to perform.
+ * @ptr
+ *         An arbitrary pointer to associate with the request.
+ * @return
+ *         0 on success, or -1 on failure.
+ */
+int ioq_nop(struct ioq *ioq, enum ioq_nop_type type, void *ptr);
+
+/**
  * Asynchronous close().
  *
  * @ioq
@@ -166,6 +190,11 @@ int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr);
 int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags flags, struct bfs_stat *buf, void *ptr);
 
 /**
+ * Submit any buffered requests.
+ */
+void ioq_submit(struct ioq *ioq);
+
+/**
  * Pop a response from the queue.
  *
  * @ioq
diff --git a/src/list.h b/src/list.h
index 48f0d06..276c610 100644
--- a/src/list.h
+++ b/src/list.h
@@ -82,11 +82,9 @@
 #ifndef BFS_LIST_H
 #define BFS_LIST_H
 
-#include "bfs.h"
 #include "diag.h"
 
 #include <stddef.h>
-#include <stdint.h>
 #include <string.h>
 
 /**
@@ -374,24 +372,19 @@
 #define SLIST_REMOVE_(list, cursor, ...) \
 	SLIST_REMOVE__((list), (cursor), LIST_NEXT_(__VA_ARGS__))
 
-// Scratch variables for the type-safe SLIST_REMOVE() implementation.
-// Not a pointer type due to https://github.com/llvm/llvm-project/issues/109718.
-_maybe_unused
-static thread_local uintptr_t _slist_prev, _slist_next;
-
-/** Suppress -Wunused-value. */
-_maybe_unused
-static inline void *_slist_cast(uintptr_t ptr) {
-	return (void *)ptr;
-}
-
 #define SLIST_REMOVE__(list, cursor, next) \
-	(_slist_prev = (uintptr_t)(void *)*cursor, \
-	 _slist_next = (uintptr_t)(void *)(*cursor)->next, \
-	 (*cursor)->next = NULL, \
-	 *cursor = (void *)_slist_next, \
-	 list->tail = *cursor ? list->tail : cursor, \
-	 _slist_cast(_slist_prev))
+	(list->tail = (*cursor)->next ? list->tail : cursor, \
+	 slist_remove_(*cursor, cursor, &(*cursor)->next, sizeof(*cursor)))
+
+// Helper for SLIST_REMOVE()
+static inline void *slist_remove_(void *ret, void *cursor, void *next, size_t size) {
+	// ret = *cursor;
+	// *cursor = ret->next;
+	memcpy(cursor, next, size);
+	// ret->next = NULL;
+	memset(next, 0, size);
+	return ret;
+}
 
 /**
  * Pop the head off a singly-linked list.
diff --git a/src/mtab.c b/src/mtab.c
index bf9fc53..40a9885 100644
--- a/src/mtab.c
+++ b/src/mtab.c
@@ -256,10 +256,7 @@ static int bfs_mtab_fill_types(struct bfs_mtab *mtab) {
 			continue;
 		}
 
-		struct trie_leaf *leaf = trie_insert_mem(&mtab->types, &sb.dev, sizeof(sb.dev));
-		if (leaf) {
-			leaf->value = mount->type;
-		} else {
+		if (trie_set_mem(&mtab->types, &sb.mnt_id, sizeof(sb.mnt_id), mount->type) != 0) {
 			goto fail;
 		}
 	}
@@ -282,9 +279,9 @@ const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statb
 		}
 	}
 
-	const struct trie_leaf *leaf = trie_find_mem(&mtab->types, &statbuf->dev, sizeof(statbuf->dev));
-	if (leaf) {
-		return leaf->value;
+	const char *type = trie_get_mem(&mtab->types, &statbuf->mnt_id, sizeof(statbuf->mnt_id));
+	if (type) {
+		return type;
 	} else {
 		return "unknown";
 	}
diff --git a/src/opt.c b/src/opt.c
index 49e8873..9094794 100644
--- a/src/opt.c
+++ b/src/opt.c
@@ -1623,14 +1623,19 @@ static void data_flow_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enu
 
 /** Transfer function for -{execut,read,writ}able. */
 static struct bfs_expr *data_flow_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
-	if (expr->num & R_OK) {
+	switch (expr->num) {
+	case R_OK:
 		data_flow_pred(opt, READABLE_PRED, true);
-	}
-	if (expr->num & W_OK) {
+		break;
+	case W_OK:
 		data_flow_pred(opt, WRITABLE_PRED, true);
-	}
-	if (expr->num & X_OK) {
+		break;
+	case X_OK:
 		data_flow_pred(opt, EXECUTABLE_PRED, true);
+		break;
+	default:
+		bfs_bug("Unknown access() mode %lld", expr->num);
+		break;
 	}
 
 	return expr;
@@ -1655,7 +1660,7 @@ static struct bfs_expr *data_flow_gid(struct bfs_opt *opt, struct bfs_expr *expr
 		gid_t gid = range->min;
 		bool nogroup = !bfs_getgrgid(opt->ctx->groups, gid);
 		if (errno == 0) {
-			data_flow_pred(opt, NOGROUP_PRED, nogroup);
+			constrain_pred(&opt->after_true.preds[NOGROUP_PRED], nogroup);
 		}
 	}
 
@@ -1729,7 +1734,7 @@ static struct bfs_expr *data_flow_uid(struct bfs_opt *opt, struct bfs_expr *expr
 		uid_t uid = range->min;
 		bool nouser = !bfs_getpwuid(opt->ctx->users, uid);
 		if (errno == 0) {
-			data_flow_pred(opt, NOUSER_PRED, nouser);
+			constrain_pred(&opt->after_true.preds[NOUSER_PRED], nouser);
 		}
 	}
 
diff --git a/src/parse.c b/src/parse.c
index 42f71cc..5ec4c0e 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -84,8 +84,6 @@ struct bfs_parser {
 	enum use_color use_color;
 	/** Whether a -print action is implied. */
 	bool implicit_print;
-	/** Whether the default root "." should be used. */
-	bool implicit_root;
 	/** Whether the expression has started. */
 	bool expr_started;
 	/** Whether an information option like -help or -version was passed. */
@@ -95,20 +93,20 @@ struct bfs_parser {
 
 	/** The last non-path argument. */
 	char **last_arg;
-	/** A "-depth"-type argument, if any. */
-	char **depth_arg;
-	/** A "-limit" argument, if any. */
-	char **limit_arg;
-	/** A "-prune" argument, if any. */
-	char **prune_arg;
-	/** A "-mount" argument, if any. */
-	char **mount_arg;
-	/** An "-xdev" argument, if any. */
-	char **xdev_arg;
-	/** A "-files0-from -" argument, if any. */
-	char **files0_stdin_arg;
-	/** An "-ok"-type expression, if any. */
-	const struct bfs_expr *ok_expr;
+	/** A "-depth"-type expression, if any. */
+	const struct bfs_expr *depth_expr;
+	/** A "-limit" expression, if any. */
+	const struct bfs_expr *limit_expr;
+	/** A "-prune" expression, if any. */
+	const struct bfs_expr *prune_expr;
+	/** A "-mount" expression, if any. */
+	const struct bfs_expr *mount_expr;
+	/** An "-xdev" expression, if any. */
+	const struct bfs_expr *xdev_expr;
+	/** A "-files0-from" expression, if any. */
+	const struct bfs_expr *files0_expr;
+	/** An expression that consumes stdin, if any. */
+	const struct bfs_expr *stdin_expr;
 
 	/** The current time (maybe modified by -daystart). */
 	struct timespec now;
@@ -176,14 +174,14 @@ static void parse_argv_error(const struct bfs_parser *parser, char **argv, size_
 /**
  * Print an error about conflicting command line arguments.
  */
-_printf(6, 7)
-static void parse_conflict_error(const struct bfs_parser *parser, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) {
+_printf(4, 5)
+static void parse_conflict_error(const struct bfs_parser *parser, const struct bfs_expr *expr1, const struct bfs_expr *expr2, const char *format, ...) {
 	const struct bfs_ctx *ctx = parser->ctx;
 
 	bool highlight[ctx->argc];
 	init_highlight(ctx, highlight);
-	highlight_args(ctx, argv1, argc1, highlight);
-	highlight_args(ctx, argv2, argc2, highlight);
+	highlight_args(ctx, expr1->argv, expr1->argc, highlight);
+	highlight_args(ctx, expr2->argv, expr2->argc, highlight);
 	bfs_argv_error(ctx, highlight);
 
 	va_list args;
@@ -231,14 +229,14 @@ static bool parse_warning(const struct bfs_parser *parser, const char *format, .
 /**
  * Print a warning about conflicting command line arguments.
  */
-_printf(6, 7)
-static bool parse_conflict_warning(const struct bfs_parser *parser, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) {
+_printf(4, 5)
+static bool parse_conflict_warning(const struct bfs_parser *parser, const struct bfs_expr *expr1, const struct bfs_expr *expr2, const char *format, ...) {
 	const struct bfs_ctx *ctx = parser->ctx;
 
 	bool highlight[ctx->argc];
 	init_highlight(ctx, highlight);
-	highlight_args(ctx, argv1, argc1, highlight);
-	highlight_args(ctx, argv2, argc2, highlight);
+	highlight_args(ctx, expr1->argv, expr1->argc, highlight);
+	highlight_args(ctx, expr2->argv, expr2->argc, highlight);
 	if (!bfs_argv_warning(ctx, highlight)) {
 		return false;
 	}
@@ -269,6 +267,21 @@ static bool parse_expr_warning(const struct bfs_parser *parser, const struct bfs
 }
 
 /**
+ * Report an error if stdin is already consumed, then consume it.
+ */
+static bool consume_stdin(struct bfs_parser *parser, const struct bfs_expr *expr) {
+	if (parser->stdin_expr) {
+		parse_conflict_error(parser, parser->stdin_expr, expr,
+			"%pX and %pX can't both use standard input.\n",
+			parser->stdin_expr, expr);
+		return false;
+	}
+
+	parser->stdin_expr = expr;
+	return true;
+}
+
+/**
  * Allocate a new expression.
  */
 static struct bfs_expr *parse_new_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc, char **argv, enum bfs_kind kind) {
@@ -383,6 +396,8 @@ static struct bfs_expr *parse_expr(struct bfs_parser *parser);
  * Advance by a single token.
  */
 static char **parser_advance(struct bfs_parser *parser, enum bfs_kind kind, size_t argc) {
+	struct bfs_ctx *ctx = parser->ctx;
+
 	if (kind != BFS_FLAG && kind != BFS_PATH) {
 		parser->expr_started = true;
 	}
@@ -391,6 +406,9 @@ static char **parser_advance(struct bfs_parser *parser, enum bfs_kind kind, size
 		parser->last_arg = parser->argv;
 	}
 
+	size_t i = parser->argv - ctx->argv;
+	ctx->kinds[i] = kind;
+
 	char **argv = parser->argv;
 	parser->argv += argc;
 	return argv;
@@ -414,7 +432,6 @@ static int parse_root(struct bfs_parser *parser, const char *path) {
 		return -1;
 	}
 
-	parser->implicit_root = false;
 	return 0;
 }
 
@@ -1158,22 +1175,33 @@ static struct bfs_expr *parse_daystart(struct bfs_parser *parser, int arg1, int
  * Parse -delete.
  */
 static struct bfs_expr *parse_delete(struct bfs_parser *parser, int arg1, int arg2) {
+	struct bfs_expr *expr = parse_nullary_action(parser, eval_delete);
+	if (!expr) {
+		return NULL;
+	}
+
 	struct bfs_ctx *ctx = parser->ctx;
 	ctx->flags |= BFTW_POST_ORDER;
 	ctx->dangerous = true;
 
-	parser->depth_arg = parser->argv;
-
-	return parse_nullary_action(parser, eval_delete);
+	parser->depth_expr = expr;
+	return expr;
 }
 
 /**
  * Parse -d.
  */
-static struct bfs_expr *parse_depth(struct bfs_parser *parser, int arg1, int arg2) {
+static struct bfs_expr *parse_depth(struct bfs_parser *parser, int flag, int arg2) {
+	struct bfs_expr *expr = flag
+		? parse_nullary_flag(parser)
+		: parse_nullary_option(parser);
+	if (!expr) {
+		return NULL;
+	}
+
 	parser->ctx->flags |= BFTW_POST_ORDER;
-	parser->depth_arg = parser->argv;
-	return parse_nullary_flag(parser);
+	parser->depth_expr = expr;
+	return expr;
 }
 
 /**
@@ -1219,6 +1247,41 @@ static struct bfs_expr *parse_empty(struct bfs_parser *parser, int arg1, int arg
 	return expr;
 }
 
+/** Check for unsafe relative paths in $PATH. */
+static const char *unsafe_path(const struct bfs_exec *execbuf) {
+	if (!(execbuf->flags & BFS_EXEC_CHDIR)) {
+		// Not -execdir or -okdir
+		return NULL;
+	}
+
+	const char *exe = execbuf->tmpl_argv[0];
+	if (strchr(exe, '/')) {
+		// No $PATH lookups for /foo or foo/bar
+		return NULL;
+	}
+
+	if (strstr(exe, "{}")) {
+		// Substituted paths always contain a /
+		return NULL;
+	}
+
+	const char *path = getenv("PATH");
+	while (path) {
+		if (path[0] != '/') {
+			// Relative $PATH component!
+			return path;
+		}
+
+		path = strchr(path, ':');
+		if (path) {
+			++path;
+		}
+	}
+
+	// No relative components in $PATH
+	return NULL;
+}
+
 /**
  * Parse -exec(dir)?/-ok(dir)?.
  */
@@ -1241,29 +1304,21 @@ static struct bfs_expr *parse_exec(struct bfs_parser *parser, int flags, int arg
 	// For pipe() in bfs_spawn()
 	expr->ephemeral_fds = 2;
 
-	if (execbuf->flags & BFS_EXEC_CHDIR) {
-		// Check for relative paths in $PATH
-		const char *path = getenv("PATH");
-		while (path) {
-			if (*path != '/') {
-				size_t len = strcspn(path, ":");
-				char *comp = strndup(path, len);
-				if (comp) {
-					parse_expr_error(parser, expr,
-						"This action would be unsafe, since ${bld}$$PATH${rs} contains the relative path ${bld}%pq${rs}\n", comp);
-					free(comp);
-				} else {
-					parse_perror(parser, "strndup()");
-				}
-				return NULL;
-			}
-
-			path = strchr(path, ':');
-			if (path) {
-				++path;
-			}
+	const char *unsafe = unsafe_path(execbuf);
+	if (unsafe) {
+		size_t len = strcspn(unsafe, ":");
+		char *comp = strndup(unsafe, len);
+		if (comp) {
+			parse_expr_error(parser, expr,
+				"This action would be unsafe, since ${bld}$$PATH${rs} contains the relative path ${bld}%pq${rs}\n", comp);
+			free(comp);
+		} else {
+			parse_perror(parser, "strndup()");
 		}
+		return NULL;
+	}
 
+	if (execbuf->flags & BFS_EXEC_CHDIR) {
 		// To dup() the parent directory
 		if (execbuf->flags & BFS_EXEC_MULTI) {
 			++expr->persistent_fds;
@@ -1273,7 +1328,9 @@ static struct bfs_expr *parse_exec(struct bfs_parser *parser, int flags, int arg
 	}
 
 	if (execbuf->flags & BFS_EXEC_CONFIRM) {
-		parser->ok_expr = expr;
+		if (!consume_stdin(parser, expr)) {
+			return NULL;
+		}
 	} else {
 		ctx->dangerous = true;
 	}
@@ -1304,11 +1361,17 @@ static struct bfs_expr *parse_exit(struct bfs_parser *parser, int arg1, int arg2
  * Parse -f PATH.
  */
 static struct bfs_expr *parse_f(struct bfs_parser *parser, int arg1, int arg2) {
+	struct bfs_ctx *ctx = parser->ctx;
+
 	struct bfs_expr *expr = parse_unary_flag(parser);
 	if (!expr) {
 		return NULL;
 	}
 
+	// Mark the path as a path, not a regular argument
+	size_t i = expr->argv - ctx->argv;
+	ctx->kinds[i + 1] = BFS_PATH;
+
 	if (parse_root(parser, expr->argv[1]) != 0) {
 		return NULL;
 	}
@@ -1325,50 +1388,14 @@ static struct bfs_expr *parse_files0_from(struct bfs_parser *parser, int arg1, i
 		return NULL;
 	}
 
-	const char *from = expr->argv[1];
-
-	FILE *file;
-	if (strcmp(from, "-") == 0) {
-		file = stdin;
-	} else {
-		file = xfopen(from, O_RDONLY | O_CLOEXEC);
-	}
-	if (!file) {
-		parse_expr_error(parser, expr, "%s.\n", errstr());
-		return NULL;
-	}
-
-	while (true) {
-		char *path = xgetdelim(file, '\0');
-		if (!path) {
-			if (errno) {
-				goto fail;
-			} else {
-				break;
-			}
-		}
-
-		int ret = parse_root(parser, path);
-		free(path);
-		if (ret != 0) {
-			goto fail;
-		}
-	}
-
-	if (file == stdin) {
-		parser->files0_stdin_arg = expr->argv;
-	} else {
-		fclose(file);
-	}
-
-	parser->implicit_root = false;
+	// For compatibility with GNU find,
+	//
+	//     bfs -files0-from a -files0-from b
+	//
+	// should *only* use b, not a.  So stash the expression here and only
+	// process the last one at the end of parsing.
+	parser->files0_expr = expr;
 	return expr;
-
-fail:
-	if (file != stdin) {
-		fclose(file);
-	}
-	return NULL;
 }
 
 /**
@@ -1638,11 +1665,11 @@ static struct bfs_expr *parse_limit(struct bfs_parser *parser, int arg1, int arg
 	}
 
 	if (expr->num <= 0) {
-		parse_expr_error(parser, expr, "The ${blu}%s${rs} must be at least ${bld}1${rs}.\n", expr->argv[0]);
+		parse_expr_error(parser, expr, "The %pX must be at least ${bld}1${rs}.\n", expr);
 		return NULL;
 	}
 
-	parser->limit_arg = expr->argv;
+	parser->limit_expr = expr;
 	return expr;
 }
 
@@ -1676,7 +1703,7 @@ static struct bfs_expr *parse_mount(struct bfs_parser *parser, int arg1, int arg
 	}
 
 	parser->ctx->flags |= BFTW_SKIP_MOUNTS;
-	parser->mount_arg = expr->argv;
+	parser->mount_expr = expr;
 	return expr;
 }
 
@@ -1855,9 +1882,15 @@ static struct bfs_expr *parse_nohidden(struct bfs_parser *parser, int arg1, int
  * Parse -noleaf.
  */
 static struct bfs_expr *parse_noleaf(struct bfs_parser *parser, int arg1, int arg2) {
-	parse_warning(parser, "${ex}%s${rs} does not apply the optimization that ${blu}%s${rs} inhibits.\n\n",
-		BFS_COMMAND, parser->argv[0]);
-	return parse_nullary_option(parser);
+	struct bfs_expr *expr = parse_nullary_option(parser);
+	if (!expr) {
+		return NULL;
+	}
+
+	parse_expr_warning(parser, expr,
+		"${ex}%s${rs} does not apply the optimization that %px inhibits.\n\n",
+		BFS_COMMAND, expr);
+	return expr;
 }
 
 /**
@@ -2193,8 +2226,13 @@ static struct bfs_expr *parse_printx(struct bfs_parser *parser, int arg1, int ar
  * Parse -prune.
  */
 static struct bfs_expr *parse_prune(struct bfs_parser *parser, int arg1, int arg2) {
-	parser->prune_arg = parser->argv;
-	return parse_nullary_action(parser, eval_prune);
+	struct bfs_expr *expr = parse_nullary_action(parser, eval_prune);
+	if (!expr) {
+		return NULL;
+	}
+
+	parser->prune_expr = expr;
+	return expr;
 }
 
 /**
@@ -2572,9 +2610,14 @@ static struct bfs_expr *parse_xattrname(struct bfs_parser *parser, int arg1, int
  * Parse -xdev.
  */
 static struct bfs_expr *parse_xdev(struct bfs_parser *parser, int arg1, int arg2) {
+	struct bfs_expr *expr = parse_nullary_option(parser);
+	if (!expr) {
+		return NULL;
+	}
+
 	parser->ctx->flags |= BFTW_PRUNE_MOUNTS;
-	parser->xdev_arg = parser->argv;
-	return parse_nullary_option(parser);
+	parser->xdev_expr = expr;
+	return expr;
 }
 
 /**
@@ -3048,10 +3091,10 @@ static const struct table_entry parse_table[] = {
 	{"-context", BFS_TEST, parse_context, true},
 	{"-csince", BFS_TEST, parse_since, BFS_STAT_CTIME},
 	{"-ctime", BFS_TEST, parse_time, BFS_STAT_CTIME},
-	{"-d", BFS_FLAG, parse_depth},
+	{"-d", BFS_FLAG, parse_depth, true},
 	{"-daystart", BFS_OPTION, parse_daystart},
 	{"-delete", BFS_ACTION, parse_delete},
-	{"-depth", BFS_OPTION, parse_depth_n},
+	{"-depth", BFS_OPTION, parse_depth_n, false},
 	{"-empty", BFS_TEST, parse_empty},
 	{"-exclude", BFS_OPERATOR},
 	{"-exec", BFS_ACTION, parse_exec, 0},
@@ -3503,6 +3546,73 @@ static struct bfs_expr *parse_expr(struct bfs_parser *parser) {
 	return expr;
 }
 
+/** Handle -files0-from after parsing. */
+static int parse_files0_roots(struct bfs_parser *parser) {
+	const struct bfs_ctx *ctx = parser->ctx;
+	const struct bfs_expr *expr = parser->files0_expr;
+
+	if (ctx->npaths > 0) {
+		bool highlight[ctx->argc];
+		init_highlight(ctx, highlight);
+		highlight_args(ctx, expr->argv, expr->argc, highlight);
+
+		for (size_t i = 0; i < ctx->argc; ++i) {
+			if (ctx->kinds[i] == BFS_PATH) {
+				highlight[i] = true;
+			}
+		}
+
+		bfs_argv_error(ctx, highlight);
+		bfs_error(ctx, "Cannot combine %pX with explicit root paths.\n", expr);
+		return -1;
+	}
+
+	const char *from = expr->argv[1];
+
+	FILE *file;
+	if (strcmp(from, "-") == 0) {
+		if (!consume_stdin(parser, expr)) {
+			return -1;
+		}
+		file = stdin;
+	} else {
+		file = xfopen(from, O_RDONLY | O_CLOEXEC);
+	}
+	if (!file) {
+		parse_expr_error(parser, expr, "%s.\n", errstr());
+		return -1;
+	}
+
+	while (true) {
+		char *path = xgetdelim(file, '\0');
+		if (!path) {
+			if (errno) {
+				goto fail;
+			} else {
+				break;
+			}
+		}
+
+		int ret = parse_root(parser, path);
+		free(path);
+		if (ret != 0) {
+			goto fail;
+		}
+	}
+
+	if (file != stdin) {
+		fclose(file);
+	}
+
+	return 0;
+
+fail:
+	if (file != stdin) {
+		fclose(file);
+	}
+	return -1;
+}
+
 /**
  * Parse the top-level expression.
  */
@@ -3528,12 +3638,22 @@ static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) {
 		return NULL;
 	}
 
+	if (parser->files0_expr) {
+		if (parse_files0_roots(parser) != 0) {
+			return NULL;
+		}
+	} else if (ctx->npaths == 0) {
+		if (parse_root(parser, ".") != 0) {
+			return NULL;
+		}
+	}
+
 	if (parser->implicit_print) {
-		char **limit = parser->limit_arg;
+		const struct bfs_expr *limit = parser->limit_expr;
 		if (limit) {
-			parse_argv_error(parser, parser->limit_arg, 2,
-				"With ${blu}%s${rs}, you must specify an action explicitly; for example, ${blu}-print${rs} ${blu}%s${rs} ${bld}%s${rs}.\n",
-				limit[0], limit[0], limit[1]);
+			parse_expr_error(parser, limit,
+				"With %pX, you must specify an action explicitly; for example, ${blu}-print${rs} %px.\n",
+				limit, limit);
 			return NULL;
 		}
 
@@ -3549,16 +3669,16 @@ static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) {
 		}
 	}
 
-	if (parser->mount_arg && parser->xdev_arg) {
-		parse_conflict_warning(parser, parser->mount_arg, 1, parser->xdev_arg, 1,
-			"${blu}%s${rs} is redundant in the presence of ${blu}%s${rs}.\n\n",
-			parser->xdev_arg[0], parser->mount_arg[0]);
+	if (parser->mount_expr && parser->xdev_expr) {
+		parse_conflict_warning(parser, parser->mount_expr, parser->xdev_expr,
+			"%px is redundant in the presence of %px.\n\n",
+			parser->xdev_expr, parser->mount_expr);
 	}
 
-	if (ctx->warn && parser->depth_arg && parser->prune_arg) {
-		parse_conflict_warning(parser, parser->depth_arg, 1, parser->prune_arg, 1,
-			"${blu}%s${rs} does not work in the presence of ${blu}%s${rs}.\n",
-			parser->prune_arg[0], parser->depth_arg[0]);
+	if (ctx->warn && parser->depth_expr && parser->prune_expr) {
+		parse_conflict_warning(parser, parser->depth_expr, parser->prune_expr,
+			"%px does not work in the presence of %px.\n",
+			parser->prune_expr, parser->depth_expr);
 
 		if (ctx->interactive) {
 			bfs_warning(ctx, "Do you want to continue? ");
@@ -3570,13 +3690,6 @@ static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) {
 		fprintf(stderr, "\n");
 	}
 
-	if (parser->ok_expr && parser->files0_stdin_arg) {
-		parse_conflict_error(parser, parser->ok_expr->argv, parser->ok_expr->argc, parser->files0_stdin_arg, 2,
-			"${blu}%s${rs} conflicts with ${blu}%s${rs} ${bld}%s${rs}.\n",
-			parser->ok_expr->argv[0], parser->files0_stdin_arg[0], parser->files0_stdin_arg[1]);
-		return NULL;
-	}
-
 	return expr;
 }
 
@@ -3758,6 +3871,12 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) {
 		goto fail;
 	}
 
+	ctx->kinds = ZALLOC_ARRAY(enum bfs_kind, argc);
+	if (!ctx->kinds) {
+		perror("zalloc()");
+		goto fail;
+	}
+
 	enum use_color use_color = COLOR_AUTO;
 	const char *no_color = getenv("NO_COLOR");
 	if (no_color && *no_color) {
@@ -3806,16 +3925,14 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) {
 		.stdout_tty = stdout_tty,
 		.use_color = use_color,
 		.implicit_print = true,
-		.implicit_root = true,
 		.just_info = false,
 		.excluding = false,
 		.last_arg = NULL,
-		.depth_arg = NULL,
-		.prune_arg = NULL,
-		.mount_arg = NULL,
-		.xdev_arg = NULL,
-		.files0_stdin_arg = NULL,
-		.ok_expr = NULL,
+		.depth_expr = NULL,
+		.prune_expr = NULL,
+		.mount_expr = NULL,
+		.xdev_expr = NULL,
+		.stdin_expr = NULL,
 		.now = ctx->now,
 	};
 
@@ -3844,12 +3961,6 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) {
 		goto fail;
 	}
 
-	if (ctx->npaths == 0 && parser.implicit_root) {
-		if (parse_root(&parser, ".") != 0) {
-			goto fail;
-		}
-	}
-
 	if ((ctx->flags & BFTW_FOLLOW_ALL) && !ctx->unique) {
 		// We need bftw() to detect cycles unless -unique does it for us
 		ctx->flags |= BFTW_DETECT_CYCLES;
diff --git a/src/prelude.h b/src/prelude.h
index a0cc2a1..de89a6c 100644
--- a/src/prelude.h
+++ b/src/prelude.h
@@ -62,6 +62,11 @@
 #  define _POSIX_PTHREAD_SEMANTICS 1
 #endif
 
+/** QNX extensions. */
+#if __QNX__
+#  define _QNX_SOURCE 1
+#endif
+
 // Get the convenience macros that became standard spellings in C23
 #if __STDC_VERSION__ < 202311L
 
diff --git a/src/sanity.h b/src/sanity.h
index 3f6020b..be77eef 100644
--- a/src/sanity.h
+++ b/src/sanity.h
@@ -20,11 +20,6 @@
 #define SANITIZE_CALL__(macro, ptr, size, ...) \
 	macro(ptr, size)
 
-/**
- * Squelch unused variable warnings when not sanitizing.
- */
-#define sanitize_ignore(ptr, size) ((void)(ptr), (void)(size))
-
 #if __SANITIZE_ADDRESS__
 #  include <sanitizer/asan_interface.h>
 
@@ -42,9 +37,27 @@
  */
 #define sanitize_free(...) SANITIZE_CALL(__asan_poison_memory_region, __VA_ARGS__)
 
+/**
+ * Adjust the size of an allocated region, for things like dynamic arrays.
+ *
+ * @ptr
+ *         The memory region.
+ * @old
+ *         The previous usable size of the region.
+ * @new
+ *         The new usable size of the region.
+ * @cap
+ *         The total allocated capacity of the region.
+ */
+static inline void sanitize_resize(const void *ptr, size_t old, size_t new, size_t cap) {
+	const char *beg = ptr;
+	__sanitizer_annotate_contiguous_container(beg, beg + cap, beg + old, beg + new);
+}
+
 #else
-#  define sanitize_alloc(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
-#  define sanitize_free(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
+#  define sanitize_alloc(...) ((void)0)
+#  define sanitize_free(...) ((void)0)
+#  define sanitize_resize(ptr, old, new, cap) ((void)0)
 #endif
 
 #if __SANITIZE_MEMORY__
@@ -65,8 +78,8 @@
 #define sanitize_uninit(...) SANITIZE_CALL(__msan_allocated_memory, __VA_ARGS__)
 
 #else
-#  define sanitize_init(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
-#  define sanitize_uninit(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
+#  define sanitize_init(...) ((void)0)
+#  define sanitize_uninit(...) ((void)0)
 #endif
 
 /**
diff --git a/src/sighook.c b/src/sighook.c
index 0cc81fa..a87bed5 100644
--- a/src/sighook.c
+++ b/src/sighook.c
@@ -32,6 +32,10 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+#if __linux__
+#  include <sys/syscall.h>
+#endif
+
 // NetBSD opens a file descriptor for each sem_init()
 #if defined(_POSIX_SEMAPHORES) && !__NetBSD__
 #  define BFS_POSIX_SEMAPHORES _POSIX_SEMAPHORES
@@ -245,62 +249,90 @@ static void *rcu_update(struct rcu *rcu, void *ptr) {
 	return rcu_decode(arc_wait(prev));
 }
 
-struct sighook {
-	/** The signal being hooked, or 0 for atsigexit(). */
-	int sig;
-	/** Signal hook flags. */
-	enum sigflags flags;
-	/** The function to call. */
-	sighook_fn *fn;
-	/** An argument to pass to the function. */
-	void *arg;
-
-	/** The RCU pointer to this hook. */
-	struct rcu *self;
-	/** The next hook in the list. */
-	struct rcu next;
-};
-
 /**
- * An RCU-protected linked list of signal hooks.
+ * An RCU-protected linked list.
  */
-struct siglist {
-	/** The first hook in the list. */
+struct rcu_list {
+	/** The first node in the list. */
 	struct rcu head;
 	/** &last->next */
 	struct rcu *tail;
 };
 
-/** Initialize a siglist. */
-static void siglist_init(struct siglist *list) {
+/**
+ * An rcu_list node.
+ */
+struct rcu_node {
+	/** The RCU pointer to this node. */
+	struct rcu *self;
+	/** The next node in the list. */
+	struct rcu next;
+};
+
+/** Initialize an rcu_list. */
+static void rcu_list_init(struct rcu_list *list) {
 	rcu_init(&list->head, NULL);
 	list->tail = &list->head;
 }
 
-/** Append a hook to a linked list. */
-static void sigpush(struct siglist *list, struct sighook *hook) {
-	hook->self = list->tail;
-	list->tail = &hook->next;
-	rcu_init(&hook->next, NULL);
-	rcu_update(hook->self, hook);
+/** Append to an rcu_list. */
+static void rcu_list_append(struct rcu_list *list, struct rcu_node *node) {
+	node->self = list->tail;
+	list->tail = &node->next;
+	rcu_init(&node->next, NULL);
+	rcu_update(node->self, node);
 }
 
-/** Remove a hook from the linked list. */
-static void sigpop(struct siglist *list, struct sighook *hook) {
-	struct sighook *next = rcu_peek(&hook->next);
-	rcu_update(hook->self, next);
+/** Remove from an rcu_list. */
+static void rcu_list_remove(struct rcu_list *list, struct rcu_node *node) {
+	struct rcu_node *next = rcu_peek(&node->next);
+	rcu_update(node->self, next);
 	if (next) {
-		next->self = hook->self;
+		next->self = node->self;
 	} else {
 		list->tail = &list->head;
 	}
+	rcu_destroy(&node->next);
 }
 
+/**
+ * Iterate over an rcu_list.
+ *
+ * It is save to `break` out of this loop, but `return` or `goto` will lead to
+ * a missed arc_put().
+ */
+#define for_rcu(type, node, list) \
+	for_rcu_(type, node, (list), node##_slot_, node##_prev_, node##_done_)
+
+#define for_rcu_(type, node, list, slot, prev, done) \
+	for (struct arc *slot, *prev, **done = NULL; !done; arc_put(slot), done = &slot) \
+		for (type *node = rcu_read(&list->head, &slot); \
+		     node; \
+		     prev = slot, \
+		     node = rcu_read(&((struct rcu_node *)node)->next, &slot), \
+		     arc_put(prev))
+
+struct sighook {
+	/** The RCU list node (must be the first field). */
+	struct rcu_node node;
+
+	/** The signal being hooked, or 0 for atsigexit(). */
+	int sig;
+	/** Signal hook flags. */
+	enum sigflags flags;
+	/** The function to call. */
+	sighook_fn *fn;
+	/** An argument to pass to the function. */
+	void *arg;
+	/** Flag for SH_ONESHOT. */
+	atomic bool armed;
+};
+
 /** The lists of signal hooks. */
-static struct siglist sighooks[64];
+static struct rcu_list sighooks[64];
 
 /** Get the hook list for a particular signal. */
-static struct siglist *siglist(int sig) {
+static struct rcu_list *siglist(int sig) {
 	return &sighooks[sig % countof(sighooks)];
 }
 
@@ -336,22 +368,31 @@ static const int FATAL_SIGNALS[] = {
 	SIGHUP,
 	SIGILL,
 	SIGINT,
+#ifdef SIGIO
+	SIGIO,
+#endif
 	SIGPIPE,
-	SIGQUIT,
-	SIGSEGV,
-	SIGTERM,
-	SIGUSR1,
-	SIGUSR2,
 #ifdef SIGPOLL
 	SIGPOLL,
 #endif
 #ifdef SIGPROF
 	SIGPROF,
 #endif
+#ifdef SIGPWR
+	SIGPWR,
+#endif
+	SIGQUIT,
+	SIGSEGV,
+#ifdef SIGSTKFLT
+	SIGSTKFLT,
+#endif
 #ifdef SIGSYS
 	SIGSYS,
 #endif
+	SIGTERM,
 	SIGTRAP,
+	SIGUSR1,
+	SIGUSR2,
 #ifdef SIGVTALRM
 	SIGVTALRM,
 #endif
@@ -383,7 +424,9 @@ static bool is_fatal(int sig) {
 
 /** Reraise a fatal signal. */
 _noreturn
-static void reraise(int sig) {
+static void reraise(siginfo_t *info) {
+	int sig = info->si_signo;
+
 	// Restore the default signal action
 	if (signal(sig, SIG_DFL) == SIG_ERR) {
 		goto fail;
@@ -397,42 +440,70 @@ static void reraise(int sig) {
 		goto fail;
 	}
 
+#if __linux__
+	// On Linux, try to re-raise the exact siginfo_t (since 3.9, a process can
+	// signal itself with any siginfo_t)
+	pid_t tid = syscall(SYS_gettid);
+	syscall(SYS_rt_tgsigqueueinfo, getpid(), tid, sig, info);
+#endif
+
 	raise(sig);
 fail:
 	abort();
 }
 
+/** Check whether we should run a hook. */
+static bool should_run(int sig, struct sighook *hook) {
+	if (hook->sig != sig && hook->sig != 0) {
+		return false;
+	}
+
+	if (hook->flags & SH_ONESHOT) {
+		if (!exchange(&hook->armed, false, relaxed)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
 /** Find any matching hooks and run them. */
-static enum sigflags run_hooks(struct siglist *list, int sig, siginfo_t *info) {
+static enum sigflags run_hooks(struct rcu_list *list, int sig, siginfo_t *info) {
 	enum sigflags ret = 0;
 
-	struct arc *slot = NULL;
-	struct sighook *hook = rcu_read(&list->head, &slot);
-	while (hook) {
-		if (hook->sig == sig || hook->sig == 0) {
+	for_rcu (struct sighook, hook, list) {
+		if (should_run(sig, hook)) {
 			hook->fn(sig, info, hook->arg);
 			ret |= hook->flags;
 		}
-
-		struct arc *prev = slot;
-		hook = rcu_read(&hook->next, &slot);
-		arc_put(prev);
 	}
 
-	arc_put(slot);
 	return ret;
 }
 
 /** Dispatches a signal to the registered handlers. */
 static void sigdispatch(int sig, siginfo_t *info, void *context) {
-	// https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_03_03
+	// If we get a fault (e.g. a "real" SIGSEGV, not something like
+	// kill(..., SIGSEGV)), don't try to run signal hooks, since we could be
+	// in an arbitrarily corrupted state.
 	//
-	//     The behavior of a process is undefined after it returns normally
-	//     from a signal-catching function for a SIGBUS, SIGFPE, SIGILL, or
-	//     SIGSEGV signal that was not generated by kill(), sigqueue(), or
-	//     raise().
+	// POSIX says that returning normally from a signal handler for a fault
+	// is undefined.  But in practice, it's better to uninstall the handler
+	// and return, which will re-run the faulting instruction and cause us
+	// to die "correctly" (e.g. with a core dump pointing at the faulting
+	// instruction, not reraise()).
 	if (is_fault(info)) {
-		reraise(sig);
+		// On macOS, we cannot reliably distinguish between faults and
+		// asynchronous signals.  For example, pkill -SEGV bfs will
+		// result in si_code == SEGV_ACCERR.  So we always re-raise the
+		// signal, because just returning would cause us to ignore
+		// asynchronous SIG{BUS,ILL,SEGV}.
+#if !__APPLE__
+		if (signal(sig, SIG_DFL) != SIG_ERR) {
+			return;
+		}
+#endif
+		reraise(info);
 	}
 
 	// https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_04
@@ -445,40 +516,58 @@ static void sigdispatch(int sig, siginfo_t *info, void *context) {
 	int error = errno;
 
 	// Run the normal hooks
-	struct siglist *list = siglist(sig);
+	struct rcu_list *list = siglist(sig);
 	enum sigflags flags = run_hooks(list, sig, info);
 
 	// Run the atsigexit() hooks, if we're exiting
 	if (!(flags & SH_CONTINUE) && is_fatal(sig)) {
 		list = siglist(0);
 		run_hooks(list, sig, info);
-		reraise(sig);
+		reraise(info);
 	}
 
 	errno = error;
 }
 
+/** A saved signal handler, for sigreset() to restore. */
+struct sigsave {
+	struct rcu_node node;
+	int sig;
+	struct sigaction action;
+};
+
+/** The list of saved signal handlers. */
+static struct rcu_list saved;
+/** `saved` initialization status (since rcu_list_init() isn't atomic). */
+static atomic bool initialized = false;
+
 /** Make sure our signal handler is installed for a given signal. */
 static int siginit(int sig) {
+#ifdef SA_RESTART
+#  define BFS_SA_RESTART SA_RESTART
+#else
+#  define BFS_SA_RESTART 0
+#endif
+
 	static struct sigaction action = {
 		.sa_sigaction = sigdispatch,
-		.sa_flags = SA_RESTART | SA_SIGINFO,
+		.sa_flags = BFS_SA_RESTART | SA_SIGINFO,
 	};
 
 	static sigset_t signals;
-	static bool initialized = false;
 
-	if (!initialized) {
+	if (!load(&initialized, relaxed)) {
 		if (sigemptyset(&signals) != 0
 		    || sigemptyset(&action.sa_mask) != 0) {
 			return -1;
 		}
 
 		for (size_t i = 0; i < countof(sighooks); ++i) {
-			siglist_init(&sighooks[i]);
+			rcu_list_init(&sighooks[i]);
 		}
 
-		initialized = true;
+		rcu_list_init(&saved);
+		store(&initialized, true, release);
 	}
 
 	int installed = sigismember(&signals, sig);
@@ -488,14 +577,32 @@ static int siginit(int sig) {
 		return 0;
 	}
 
-	if (sigaction(sig, &action, NULL) != 0) {
+	sigset_t updated = signals;
+	if (sigaddset(&updated, sig) != 0) {
+		return -1;
+	}
+
+	struct sigaction original;
+	if (sigaction(sig, NULL, &original) != 0) {
 		return -1;
 	}
 
-	if (sigaddset(&signals, sig) != 0) {
+	struct sigsave *save = ALLOC(struct sigsave);
+	if (!save) {
 		return -1;
 	}
 
+	save->sig = sig;
+	save->action = original;
+	rcu_list_append(&saved, &save->node);
+
+	if (sigaction(sig, &action, NULL) != 0) {
+		rcu_list_remove(&saved, &save->node);
+		free(save);
+		return -1;
+	}
+
+	signals = updated;
 	return 0;
 }
 
@@ -510,9 +617,10 @@ static struct sighook *sighook_impl(int sig, sighook_fn *fn, void *arg, enum sig
 	hook->flags = flags;
 	hook->fn = fn;
 	hook->arg = arg;
+	atomic_init(&hook->armed, true);
 
-	struct siglist *list = siglist(sig);
-	sigpush(list, hook);
+	struct rcu_list *list = siglist(sig);
+	rcu_list_append(list, &hook->node);
 	return hook;
 }
 
@@ -558,11 +666,27 @@ void sigunhook(struct sighook *hook) {
 
 	mutex_lock(&sigmutex);
 
-	struct siglist *list = siglist(hook->sig);
-	sigpop(list, hook);
+	struct rcu_list *list = siglist(hook->sig);
+	rcu_list_remove(list, &hook->node);
 
 	mutex_unlock(&sigmutex);
 
-	rcu_destroy(&hook->next);
 	free(hook);
 }
+
+int sigreset(void) {
+	if (!load(&initialized, acquire)) {
+		return 0;
+	}
+
+	int ret = 0;
+
+	for_rcu (struct sigsave, save, &saved) {
+		if (sigaction(save->sig, &save->action, NULL) != 0) {
+			ret = -1;
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/src/sighook.h b/src/sighook.h
index 3bece21..7149229 100644
--- a/src/sighook.h
+++ b/src/sighook.h
@@ -21,6 +21,8 @@ struct sighook;
 enum sigflags {
 	/** Suppress the default action for this signal. */
 	SH_CONTINUE = 1 << 0,
+	/** Only run this hook once. */
+	SH_ONESHOT = 1 << 1,
 };
 
 /**
@@ -70,4 +72,12 @@ struct sighook *atsigexit(sighook_fn *fn, void *arg);
  */
 void sigunhook(struct sighook *hook);
 
+/**
+ * Restore all signal handlers to their original dispositions (e.g. after fork()).
+ *
+ * @return
+ *         0 on success, -1 on failure.
+ */
+int sigreset(void);
+
 #endif // BFS_SIGHOOK_H
diff --git a/src/stat.c b/src/stat.c
index e99e711..1fcfde3 100644
--- a/src/stat.c
+++ b/src/stat.c
@@ -51,6 +51,8 @@ const char *bfs_stat_field_name(enum bfs_stat_field field) {
 		return "change time";
 	case BFS_STAT_MTIME:
 		return "modification time";
+	case BFS_STAT_MNT_ID:
+		return "mount ID";
 	}
 
 	bfs_bug("Unrecognized stat field %d", (int)field);
@@ -101,6 +103,10 @@ void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src) {
 	dest->rdev = src->st_rdev;
 	dest->mask |= BFS_STAT_RDEV;
 
+	// No mount IDs in regular stat(), so use the dev_t as an approximation
+	dest->mnt_id = dest->dev;
+	dest->mask |= BFS_STAT_MNT_ID;
+
 #if BFS_HAS_ST_FLAGS
 	dest->attrs = src->st_flags;
 	dest->mask |= BFS_STAT_ATTRS;
@@ -169,6 +175,17 @@ int bfs_statx_flags(enum bfs_stat_flags flags) {
 	return ret;
 }
 
+unsigned int bfs_statx_mask(void) {
+	unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+#ifdef STATX_MNT_ID
+	mask |= STATX_MNT_ID;
+#endif
+#ifdef STATX_MNT_ID_UNIQUE
+	mask |= STATX_MNT_ID_UNIQUE;
+#endif
+	return mask;
+}
+
 int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
 	// Callers shouldn't have to check anything except the times
 	const unsigned int guaranteed = STATX_BASIC_STATS & ~(STATX_ATIME | STATX_CTIME | STATX_MTIME);
@@ -209,6 +226,18 @@ int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
 	dest->attrs = src->stx_attributes;
 	dest->mask |= BFS_STAT_ATTRS;
 
+	dest->mnt_id = dest->dev;
+#ifdef STATX_MNT_ID
+	unsigned int mnt_mask = STATX_MNT_ID;
+#  ifdef STATX_MNT_ID_UNIQUE
+	mnt_mask |= STATX_MNT_ID_UNIQUE;
+#  endif
+	if (src->stx_mask & mnt_mask) {
+		dest->mnt_id = src->stx_mnt_id;
+	}
+#endif
+	dest->mask |= BFS_STAT_MNT_ID;
+
 	if (src->stx_mask & STATX_ATIME) {
 		dest->atime.tv_sec = src->stx_atime.tv_sec;
 		dest->atime.tv_nsec = src->stx_atime.tv_nsec;
@@ -240,7 +269,7 @@ int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
  * bfs_stat() implementation backed by statx().
  */
 static int bfs_statx_impl(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) {
-	unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+	unsigned int mask = bfs_statx_mask();
 	struct statx xbuf;
 	int ret = bfs_statx(at_fd, at_path, at_flags, mask, &xbuf);
 	if (ret != 0) {
diff --git a/src/stat.h b/src/stat.h
index 50c91de..c4a63d3 100644
--- a/src/stat.h
+++ b/src/stat.h
@@ -14,6 +14,7 @@
 
 #include "bfs.h"
 
+#include <stdint.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <time.h>
@@ -56,6 +57,7 @@ enum bfs_stat_field {
 	BFS_STAT_BTIME  = 1 << 11,
 	BFS_STAT_CTIME  = 1 << 12,
 	BFS_STAT_MTIME  = 1 << 13,
+	BFS_STAT_MNT_ID = 1 << 14,
 };
 
 /**
@@ -102,6 +104,8 @@ struct bfs_stat {
 	blkcnt_t blocks;
 	/** The device ID represented by this file. */
 	dev_t rdev;
+	/** The ID of the mount point containing this file. */
+	uint64_t mnt_id;
 
 	/** Attributes/flags set on the file. */
 	unsigned long long attrs;
@@ -150,6 +154,11 @@ void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src);
 int bfs_statx_flags(enum bfs_stat_flags flags);
 
 /**
+ * Get the default statx() mask.
+ */
+unsigned int bfs_statx_mask(void);
+
+/**
  * Convert struct statx to struct bfs_stat.
  */
 int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src);
diff --git a/src/thread.c b/src/thread.c
index d61ff8c..b3604f8 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -9,6 +9,10 @@
 #include <errno.h>
 #include <pthread.h>
 
+#if __has_include(<pthread_np.h>)
+#  include <pthread_np.h>
+#endif
+
 #define THREAD_FALLIBLE(expr) \
 	do { \
 		int err = expr; \
@@ -32,6 +36,14 @@ int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn,
 	THREAD_FALLIBLE(pthread_create(thread, attr, fn, arg));
 }
 
+void thread_setname(pthread_t thread, const char *name) {
+#if BFS_HAS_PTHREAD_SETNAME_NP
+	pthread_setname_np(thread, name);
+#elif BFS_HAS_PTHREAD_SET_NAME_NP
+	pthread_set_name_np(thread, name);
+#endif
+}
+
 void thread_join(pthread_t thread, void **ret) {
 	THREAD_INFALLIBLE(pthread_join(thread, ret));
 }
diff --git a/src/thread.h b/src/thread.h
index 7d12468..3dd8422 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -22,6 +22,11 @@ typedef void *thread_fn(void *arg);
 int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn, void *arg);
 
 /**
+ * Set the name of a thread.
+ */
+void thread_setname(pthread_t thread, const char *name);
+
+/**
  * Wrapper for pthread_join().
  */
 void thread_join(pthread_t thread, void **ret);
diff --git a/src/trie.c b/src/trie.c
index a7498d4..6aac17f 100644
--- a/src/trie.c
+++ b/src/trie.c
@@ -129,7 +129,7 @@ struct trie_node {
 	 * tag to distinguish internal nodes from leaves.  This is safe as long
 	 * as all dynamic allocations are aligned to more than a single byte.
 	 */
-	uintptr_t children[];
+	uintptr_t children[]; // _counted_by(count_ones(bitmap))
 };
 
 /** Check if an encoded pointer is to an internal node. */
@@ -173,16 +173,16 @@ void trie_init(struct trie *trie) {
 /** Extract the nibble at a certain offset from a byte sequence. */
 static unsigned char trie_key_nibble(const void *key, size_t length, size_t offset) {
 	const unsigned char *bytes = key;
-	size_t byte = offset >> 1;
+	size_t byte = offset / 2;
 	bfs_assert(byte < length);
 
 	// A branchless version of
 	// if (offset & 1) {
-	//         return bytes[byte] >> 4;
-	// } else {
 	//         return bytes[byte] & 0xF;
+	// } else {
+	//         return bytes[byte] >> 4;
 	// }
-	unsigned int shift = (offset & 1) << 2;
+	unsigned int shift = 4 * ((offset + 1) % 2);
 	return (bytes[byte] >> shift) & 0xF;
 }
 
@@ -191,6 +191,12 @@ static unsigned char trie_leaf_nibble(const struct trie_leaf *leaf, size_t offse
 	return trie_key_nibble(leaf->key, leaf->length, offset);
 }
 
+/** Get the number of children of an internal node. */
+_trie_clones
+static unsigned int trie_node_size(const struct trie_node *node) {
+	return count_ones((unsigned int)node->bitmap);
+}
+
 /**
  * Finds a leaf in the trie that matches the key at every branch.  If the key
  * exists in the trie, the representative will match the searched key.  But
@@ -202,19 +208,20 @@ _trie_clones
 static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) {
 	uintptr_t ptr = trie->root;
 
-	size_t offset = 0;
+	size_t offset = 0, limit = 2 * length;
 	while (trie_is_node(ptr)) {
 		struct trie_node *node = trie_decode_node(ptr);
 		offset += node->offset;
 
 		unsigned int index = 0;
-		if ((offset >> 1) < length) {
+		if (offset < limit) {
 			unsigned char nibble = trie_key_nibble(key, length, offset);
 			unsigned int bit = 1U << nibble;
-			// bits = bitmap & bit ? bitmap & (bit - 1) : 0
-			unsigned int mask = -!!(node->bitmap & bit);
-			unsigned int bits = node->bitmap & (bit - 1) & mask;
-			index = count_ones(bits);
+			unsigned int map = node->bitmap;
+			unsigned int bits = map & (bit - 1);
+			unsigned int mask = -!!(map & bit);
+			// index = (map & bit) ? count_ones(bits) : 0;
+			index = count_ones(bits) & mask;
 		}
 		ptr = node->children[index];
 	}
@@ -240,6 +247,16 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t
 	return trie_find_mem_impl(trie, key, length);
 }
 
+void *trie_get_str(const struct trie *trie, const char *key) {
+	const struct trie_leaf *leaf = trie_find_str(trie, key);
+	return leaf ? leaf->value : NULL;
+}
+
+void *trie_get_mem(const struct trie *trie, const void *key, size_t length) {
+	const struct trie_leaf *leaf = trie_find_mem(trie, key, length);
+	return leaf ? leaf->value : NULL;
+}
+
 _trie_clones
 static struct trie_leaf *trie_find_postfix_impl(const struct trie *trie, const char *key) {
 	size_t length = strlen(key);
@@ -296,18 +313,18 @@ static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const ch
 	size_t skip = 0;
 	size_t length = strlen(key) + 1;
 
-	size_t offset = 0;
+	size_t offset = 0, limit = 2 * length;
 	while (trie_is_node(ptr)) {
 		struct trie_node *node = trie_decode_node(ptr);
 		offset += node->offset;
-		if ((offset >> 1) >= length) {
+		if (offset >= limit) {
 			return best;
 		}
 
 		struct trie_leaf *leaf = trie_terminal_leaf(node);
 		if (trie_check_prefix(leaf, skip, key, length)) {
 			best = leaf;
-			skip = offset >> 1;
+			skip = offset / 2;
 		}
 
 		unsigned char nibble = trie_key_nibble(key, length, offset);
@@ -370,16 +387,10 @@ static struct trie_node *trie_node_realloc(struct trie *trie, struct trie_node *
 
 /** Free a node. */
 static void trie_node_free(struct trie *trie, struct trie_node *node, size_t size) {
-	bfs_assert(size == (size_t)count_ones(node->bitmap));
+	bfs_assert(size == trie_node_size(node));
 	varena_free(&trie->nodes, node, size);
 }
 
-#if ENDIAN_NATIVE == ENDIAN_LITTLE
-#  define TRIE_BSWAP(n) (n)
-#elif ENDIAN_NATIVE == ENDIAN_BIG
-#  define TRIE_BSWAP(n) bswap(n)
-#endif
-
 /** Find the offset of the first nibble that differs between two keys. */
 static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t length) {
 	if (!rep) {
@@ -393,32 +404,34 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t
 	const char *rep_bytes = rep->key;
 	const char *key_bytes = key;
 
-	size_t i = 0;
-	for (size_t chunk = sizeof(chunk); i + chunk <= length; i += chunk) {
-		size_t rep_chunk, key_chunk;
-		memcpy(&rep_chunk, rep_bytes + i, sizeof(rep_chunk));
-		memcpy(&key_chunk, key_bytes + i, sizeof(key_chunk));
-
-		if (rep_chunk != key_chunk) {
-#ifdef TRIE_BSWAP
-			size_t diff = TRIE_BSWAP(rep_chunk ^ key_chunk);
-			i *= 2;
-			i += trailing_zeros(diff) / 4;
-			return i;
+	size_t ret = 0, i = 0;
+
+#define CHUNK(n) CHUNK_(uint##n##_t, load8_beu##n)
+#define CHUNK_(type, load8) \
+	(length - i >= sizeof(type)) { \
+		type rep_chunk = load8(rep_bytes + i); \
+		type key_chunk = load8(key_bytes + i); \
+		type diff = rep_chunk ^ key_chunk; \
+		ret += leading_zeros(diff) / 4; \
+		if (diff) { \
+			return ret; \
+		} \
+		i += sizeof(type); \
+	}
+
+#if SIZE_WIDTH >= 64
+	while CHUNK(64);
+	if CHUNK(32);
 #else
-			break;
+	while CHUNK(32);
 #endif
-		}
-	}
+	if CHUNK(16);
+	if CHUNK(8);
 
-	for (; i < length; ++i) {
-		unsigned char diff = rep_bytes[i] ^ key_bytes[i];
-		if (diff) {
-			return 2 * i + !(diff & 0xF);
-		}
-	}
+#undef CHUNK_
+#undef CHUNK
 
-	return 2 * i;
+	return ret;
 }
 
 /**
@@ -446,7 +459,7 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t
 _trie_clones
 static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) {
 	struct trie_node *node = trie_decode_node(*ptr);
-	unsigned int size = count_ones(node->bitmap);
+	unsigned int size = trie_node_size(node);
 
 	// Double the capacity every power of two
 	if (has_single_bit(size)) {
@@ -626,6 +639,26 @@ struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t len
 	return trie_insert_mem_impl(trie, key, length);
 }
 
+int trie_set_str(struct trie *trie, const char *key, const void *value) {
+	struct trie_leaf *leaf = trie_insert_str(trie, key);
+	if (leaf) {
+		leaf->value = (void *)value;
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
+int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value) {
+	struct trie_leaf *leaf = trie_insert_mem(trie, key, length);
+	if (leaf) {
+		leaf->value = (void *)value;
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
 /** Free a chain of singleton nodes. */
 static void trie_free_singletons(struct trie *trie, uintptr_t ptr) {
 	while (trie_is_node(ptr)) {
@@ -711,7 +744,7 @@ static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) {
 	struct trie_node *node = trie_decode_node(*parent);
 	trie_free_singletons(trie, node->children[child_index]);
 
-	unsigned int parent_size = count_ones(node->bitmap);
+	unsigned int parent_size = trie_node_size(node);
 	bfs_assert(parent_size > 1);
 	if (parent_size == 2 && trie_collapse_node(trie, parent, node, child_index) == 0) {
 		return;
diff --git a/src/trie.h b/src/trie.h
index 318a23b..19bd81d 100644
--- a/src/trie.h
+++ b/src/trie.h
@@ -21,7 +21,7 @@ struct trie_leaf {
 	/** The length of the key in bytes. */
 	size_t length;
 	/** The key itself, stored inline. */
-	char key[];
+	char key[] _counted_by(length);
 };
 
 /**
@@ -70,6 +70,32 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key);
 struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length);
 
 /**
+ * Get the value associated with a string key.
+ *
+ * @trie
+ *         The trie to search.
+ * @key
+ *         The key to look up.
+ * @return
+ *         The found value, or NULL if the key is not present.
+ */
+void *trie_get_str(const struct trie *trie, const char *key);
+
+/**
+ * Get the value associated with a fixed-size key.
+ *
+ * @trie
+ *         The trie to search.
+ * @key
+ *         The key to look up.
+ * @length
+ *         The length of the key in bytes.
+ * @return
+ *         The found value, or NULL if the key is not present.
+ */
+void *trie_get_mem(const struct trie *trie, const void *key, size_t length);
+
+/**
  * Find the shortest leaf that starts with a given key.
  *
  * @trie
@@ -120,6 +146,36 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key);
 struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length);
 
 /**
+ * Set the value for a string key.
+ *
+ * @trie
+ *         The trie to modify.
+ * @key
+ *         The key to insert.
+ * @value
+ *         The value to set.
+ * @return
+ *         0 on success, -1 on error.
+ */
+int trie_set_str(struct trie *trie, const char *key, const void *value);
+
+/**
+ * Set the value for a fixed-size key.
+ *
+ * @trie
+ *         The trie to modify.
+ * @key
+ *         The key to insert.
+ * @length
+ *         The length of the key in bytes.
+ * @value
+ *         The value to set.
+ * @return
+ *         0 on success, -1 on error.
+ */
+int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value);
+
+/**
  * Remove a leaf from a trie.
  *
  * @trie
diff --git a/src/xspawn.c b/src/xspawn.c
index b3eed79..ee62c05 100644
--- a/src/xspawn.c
+++ b/src/xspawn.c
@@ -8,6 +8,7 @@
 #include "bfstd.h"
 #include "diag.h"
 #include "list.h"
+#include "sighook.h"
 
 #include <errno.h>
 #include <fcntl.h>
@@ -231,12 +232,28 @@ int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd) {
  */
 #define BFS_POSIX_SPAWNP_AFTER_FCHDIR !(__APPLE__ || __NetBSD__)
 
+/**
+ * NetBSD even resolves the executable before file actions with posix_spawn()!
+ */
+#define BFS_POSIX_SPAWN_AFTER_FCHDIR !__NetBSD__
+
 int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd) {
 	struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_FCHDIR);
 	if (!action) {
 		return -1;
 	}
 
+#if __APPLE__
+	// macOS has a bug that causes EBADF when an fchdir() action refers to a
+	// file opened by the file actions
+	for_slist (struct bfs_spawn_action, prev, ctx) {
+		if (fd == prev->out_fd) {
+			bfs_spawn_clear_posix(ctx);
+			break;
+		}
+	}
+#endif
+
 #if BFS_HAS_POSIX_SPAWN_ADDFCHDIR
 #  define BFS_POSIX_SPAWN_ADDFCHDIR posix_spawn_file_actions_addfchdir
 #elif BFS_HAS_POSIX_SPAWN_ADDFCHDIR_NP
@@ -400,18 +417,40 @@ static bool bfs_resolve_relative(const struct bfs_resolver *res) {
 	return false;
 }
 
+/** Check if the actions include fchdir(). */
+static bool bfs_spawn_will_chdir(const struct bfs_spawn *ctx) {
+	if (ctx) {
+		for_slist (const struct bfs_spawn_action, action, ctx) {
+			if (action->op == BFS_SPAWN_FCHDIR) {
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/** Check if we can call xfaccessat() before file actions. */
+static bool bfs_can_access_early(const struct bfs_resolver *res, const struct bfs_spawn *ctx) {
+	if (res->exe[0] == '/') {
+		return true;
+	}
+
+	if (bfs_spawn_will_chdir(ctx)) {
+		return false;
+	}
+
+	return true;
+}
+
 /** Check if we can resolve the executable before file actions. */
 static bool bfs_can_resolve_early(const struct bfs_resolver *res, const struct bfs_spawn *ctx) {
 	if (!bfs_resolve_relative(res)) {
 		return true;
 	}
 
-	if (ctx) {
-		for_slist (const struct bfs_spawn_action, action, ctx) {
-			if (action->op == BFS_SPAWN_FCHDIR) {
-				return false;
-			}
-		}
+	if (bfs_spawn_will_chdir(ctx)) {
+		return false;
 	}
 
 	return true;
@@ -441,17 +480,19 @@ static int bfs_resolve_early(struct bfs_resolver *res, const char *exe, const st
 	};
 
 	if (bfs_can_skip_resolve(res, ctx)) {
-		// Do this check eagerly, even though posix_spawn()/execv() also
-		// would, because:
-		//
-		//     - faccessat() is faster than fork()/clone() + execv()
-		//     - posix_spawn() is not guaranteed to report ENOENT
-		if (xfaccessat(AT_FDCWD, exe, X_OK) == 0) {
-			res->done = true;
-			return 0;
-		} else {
-			return -1;
+		if (bfs_can_access_early(res, ctx)) {
+			// Do this check eagerly, even though posix_spawn()/execv() also
+			// would, because:
+			//
+			//     - faccessat() is faster than fork()/clone() + execv()
+			//     - posix_spawn() is not guaranteed to report ENOENT
+			if (xfaccessat(AT_FDCWD, exe, X_OK) != 0) {
+				return -1;
+			}
 		}
+
+		res->done = true;
+		return 0;
 	}
 
 	res->path = getenv("PATH");
@@ -528,6 +569,12 @@ static bool bfs_use_posix_spawn(const struct bfs_resolver *res, const struct bfs
 	}
 #endif
 
+#if !BFS_POSIX_SPAWN_AFTER_FCHDIR
+	if (res->exe[0] != '/' && bfs_spawn_will_chdir(ctx)) {
+		return false;
+	}
+#endif
+
 	return true;
 }
 
@@ -535,7 +582,7 @@ static bool bfs_use_posix_spawn(const struct bfs_resolver *res, const struct bfs
 
 /** Actually exec() the new process. */
 _noreturn
-static void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp, int pipefd[2]) {
+static void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp, const sigset_t *mask, int pipefd[2]) {
 	xclose(pipefd[0]);
 
 	for_slist (const struct bfs_spawn_action, action, ctx) {
@@ -596,6 +643,18 @@ static void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx
 		goto fail;
 	}
 
+	// Reset signal handlers to their original values before we unblock
+	// signals, so that handlers don't run in both the parent and the child
+	if (sigreset() != 0) {
+		goto fail;
+	}
+
+	// Restore the original signal mask for the child process
+	errno = pthread_sigmask(SIG_SETMASK, mask, NULL);
+	if (errno != 0) {
+		goto fail;
+	}
+
 	execve(res->exe, argv, envp);
 
 fail:;
@@ -635,7 +694,7 @@ static pid_t bfs_fork_spawn(struct bfs_resolver *res, const struct bfs_spawn *ct
 #endif
 	if (pid == 0) {
 		// Child
-		bfs_spawn_exec(res, ctx, argv, envp, pipefd);
+		bfs_spawn_exec(res, ctx, argv, envp, &old_mask, pipefd);
 	}
 
 	// Restore the original signal mask
diff --git a/src/xtime.c b/src/xtime.c
index 49d7c36..6b8a141 100644
--- a/src/xtime.c
+++ b/src/xtime.c
@@ -354,6 +354,59 @@ error:
 	return -1;
 }
 
+/** One nanosecond. */
+static const long NS = 1000L * 1000 * 1000;
+
+void timespec_add(struct timespec *lhs, const struct timespec *rhs) {
+	lhs->tv_sec += rhs->tv_sec;
+	lhs->tv_nsec += rhs->tv_nsec;
+	if (lhs->tv_nsec >= NS) {
+		lhs->tv_nsec -= NS;
+		lhs->tv_sec += 1;
+	}
+}
+
+void timespec_sub(struct timespec *lhs, const struct timespec *rhs) {
+	lhs->tv_sec -= rhs->tv_sec;
+	lhs->tv_nsec -= rhs->tv_nsec;
+	if (lhs->tv_nsec < 0) {
+		lhs->tv_nsec += NS;
+		lhs->tv_sec -= 1;
+	}
+}
+
+int timespec_cmp(const struct timespec *lhs, const struct timespec *rhs) {
+	if (lhs->tv_sec < rhs->tv_sec) {
+		return -1;
+	} else if (lhs->tv_sec > rhs->tv_sec) {
+		return 1;
+	}
+
+	if (lhs->tv_nsec < rhs->tv_nsec) {
+		return -1;
+	} else if (lhs->tv_nsec > rhs->tv_nsec) {
+		return 1;
+	}
+
+	return 0;
+}
+
+void timespec_min(struct timespec *dest, const struct timespec *src) {
+	if (timespec_cmp(src, dest) < 0) {
+		*dest = *src;
+	}
+}
+
+void timespec_max(struct timespec *dest, const struct timespec *src) {
+	if (timespec_cmp(src, dest) > 0) {
+		*dest = *src;
+	}
+}
+
+double timespec_ns(const struct timespec *ts) {
+	return 1.0e9 * ts->tv_sec + ts->tv_nsec;
+}
+
 #if defined(_POSIX_TIMERS) && BFS_HAS_TIMER_CREATE
 #  define BFS_POSIX_TIMERS _POSIX_TIMERS
 #else
diff --git a/src/xtime.h b/src/xtime.h
index 2927a2e..b76fef2 100644
--- a/src/xtime.h
+++ b/src/xtime.h
@@ -47,6 +47,42 @@ int xtimegm(struct tm *tm, time_t *timep);
 int xgetdate(const char *str, struct timespec *result);
 
 /**
+ * Add to a timespec.
+ */
+void timespec_add(struct timespec *lhs, const struct timespec *rhs);
+
+/**
+ * Subtract from a timespec.
+ */
+void timespec_sub(struct timespec *lhs, const struct timespec *rhs);
+
+/**
+ * Compare two timespecs.
+ *
+ * @return
+ *         An integer with the sign of (*lhs - *rhs).
+ */
+int timespec_cmp(const struct timespec *lhs, const struct timespec *rhs);
+
+/**
+ * Update a minimum timespec.
+ */
+void timespec_min(struct timespec *dest, const struct timespec *src);
+
+/**
+ * Update a maximum timespec.
+ */
+void timespec_max(struct timespec *dest, const struct timespec *src);
+
+/**
+ * Convert a timespec to floating point.
+ *
+ * @return
+ *         The value in nanoseconds.
+ */
+double timespec_ns(const struct timespec *ts);
+
+/**
  * A timer.
  */
 struct timer;
diff --git a/tests/bfs/color_bsd.out b/tests/bfs/color_bsd.out
new file mode 100644
index 0000000..f7c577c
--- /dev/null
+++ b/tests/bfs/color_bsd.out
@@ -0,0 +1,27 @@
+[34;4;101m$'rainbow/\e[1m'[0m
+[34;4;101m$'rainbow/\e[1m/'[0m$'\e[0m'
+[34;4;101mrainbow[0m
+[34;4;101mrainbow/[0m[30;41msugid[0m
+[34;4;101mrainbow/[0m[30;41msuid[0m
+[34;4;101mrainbow/[0m[30;42msticky_ow[0m
+[34;4;101mrainbow/[0m[30;43mow[0m
+[34;4;101mrainbow/[0m[30;46msgid[0m
+[34;4;101mrainbow/[0m[31;49mexec.sh[0m
+[34;4;101mrainbow/[0m[32;49msocket[0m
+[34;4;101mrainbow/[0m[33;49mpipe[0m
+[34;4;101mrainbow/[0m[35;49mbroken[0m
+[34;4;101mrainbow/[0m[35;49mchardev_link[0m
+[34;4;101mrainbow/[0m[35;49mlink.txt[0m
+[34;4;101mrainbow/[0m[37;44msticky[0m
+[34;4;101mrainbow/[0mfile.dat
+[34;4;101mrainbow/[0mfile.txt
+[34;4;101mrainbow/[0mlower.gz
+[34;4;101mrainbow/[0mlower.tar
+[34;4;101mrainbow/[0mlower.tar.gz
+[34;4;101mrainbow/[0mlu.tar.GZ
+[34;4;101mrainbow/[0mmh1
+[34;4;101mrainbow/[0mmh2
+[34;4;101mrainbow/[0mul.TAR.gz
+[34;4;101mrainbow/[0mupper.GZ
+[34;4;101mrainbow/[0mupper.TAR
+[34;4;101mrainbow/[0mupper.TAR.GZ
diff --git a/tests/bfs/color_bsd.sh b/tests/bfs/color_bsd.sh
new file mode 100644
index 0000000..f8a777f
--- /dev/null
+++ b/tests/bfs/color_bsd.sh
@@ -0,0 +1 @@
+LSCOLORS="eB" bfs_diff rainbow -color
diff --git a/tests/bfs/color_bsd_fail.sh b/tests/bfs/color_bsd_fail.sh
new file mode 100644
index 0000000..94e1209
--- /dev/null
+++ b/tests/bfs/color_bsd_fail.sh
@@ -0,0 +1,2 @@
+# LSCOLORS can be at most 22 characters long (11 color pairs); this one has 24.
+! LSCOLORS="exfxcxdxbxegedabagacadeB" invoke_bfs  rainbow -color
diff --git a/tests/bfs/execdir_path_relative_slash.out b/tests/bfs/execdir_path_relative_slash.out
new file mode 100644
index 0000000..62b31f6
--- /dev/null
+++ b/tests/bfs/execdir_path_relative_slash.out
@@ -0,0 +1,19 @@
+./a
+./b
+./bar
+./bar
+./basic
+./baz
+./c
+./d
+./e
+./f
+./foo
+./foo
+./foo
+./g
+./h
+./i
+./j
+./k
+./l
diff --git a/tests/bfs/execdir_path_relative_slash.sh b/tests/bfs/execdir_path_relative_slash.sh
new file mode 100644
index 0000000..fb5a924
--- /dev/null
+++ b/tests/bfs/execdir_path_relative_slash.sh
@@ -0,0 +1 @@
+PATH="foo:$PATH" bfs_diff basic -execdir /bin/sh -c 'printf "%s\\n" "$@"' sh {} +
diff --git a/tests/bfs/files0_from_root.sh b/tests/bfs/files0_from_root.sh
new file mode 100644
index 0000000..6ba5f00
--- /dev/null
+++ b/tests/bfs/files0_from_root.sh
@@ -0,0 +1,2 @@
+printf 'basic\0' >"$TEST/input"
+! invoke_bfs basic -files0-from "$TEST/input"
diff --git a/tests/bfstd.c b/tests/bfstd.c
index a43783a..6e15e2b 100644
--- a/tests/bfstd.c
+++ b/tests/bfstd.c
@@ -6,35 +6,15 @@
 #include "bfstd.h"
 #include "diag.h"
 
+#include <errno.h>
 #include <langinfo.h>
+#include <limits.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 
-/** Check the result of xdirname()/xbasename(). */
-static void check_base_dir(const char *path, const char *dir, const char *base) {
-	char *xdir = xdirname(path);
-	bfs_everify(xdir, "xdirname()");
-	bfs_check(strcmp(xdir, dir) == 0, "xdirname('%s') == '%s' (!= '%s')", path, xdir, dir);
-	free(xdir);
-
-	char *xbase = xbasename(path);
-	bfs_everify(xbase, "xbasename()");
-	bfs_check(strcmp(xbase, base) == 0, "xbasename('%s') == '%s' (!= '%s')", path, xbase, base);
-	free(xbase);
-}
-
-/** Check the result of wordesc(). */
-static void check_wordesc(const char *str, const char *exp, enum wesc_flags flags) {
-	char buf[256];
-	char *end = buf + sizeof(buf);
-	char *esc = wordesc(buf, end, str, flags);
-
-	if (bfs_check(esc != end)) {
-		bfs_check(strcmp(buf, exp) == 0, "wordesc('%s') == '%s' (!= '%s')", str, buf, exp);
-	}
-}
-
-void check_bfstd(void) {
+/** asciilen() test cases. */
+static void check_asciilen(void) {
 	bfs_check(asciilen("") == 0);
 	bfs_check(asciilen("@") == 1);
 	bfs_check(asciilen("@@") == 2);
@@ -49,7 +29,23 @@ void check_bfstd(void) {
 	bfs_check(asciilen("@@@@@@@\xFF@@@@@@a\xFF@@@@@@@") == 7);
 	bfs_check(asciilen("@@@@@@@@\xFF@@@@@a\xFF@@@@@@@") == 8);
 	bfs_check(asciilen("@@@@@@@@@\xFF@@@@a\xFF@@@@@@@") == 9);
+}
+
+/** Check the result of xdirname()/xbasename(). */
+static void check_base_dir(const char *path, const char *dir, const char *base) {
+	char *xdir = xdirname(path);
+	bfs_everify(xdir, "xdirname()");
+	bfs_check(strcmp(xdir, dir) == 0, "xdirname('%s') == '%s' (!= '%s')", path, xdir, dir);
+	free(xdir);
 
+	char *xbase = xbasename(path);
+	bfs_everify(xbase, "xbasename()");
+	bfs_check(strcmp(xbase, base) == 0, "xbasename('%s') == '%s' (!= '%s')", path, xbase, base);
+	free(xbase);
+}
+
+/** xdirname()/xbasename() test cases. */
+static void check_basedirs(void) {
 	// From man 3p basename
 	check_base_dir("usr", ".", "usr");
 	check_base_dir("usr/", ".", "usr");
@@ -61,7 +57,21 @@ void check_bfstd(void) {
 	check_base_dir("/usr/lib", "/usr", "lib");
 	check_base_dir("//usr//lib//", "//usr", "lib");
 	check_base_dir("/home//dwc//test", "/home//dwc", "test");
+}
+
+/** Check the result of wordesc(). */
+static void check_wordesc(const char *str, const char *exp, enum wesc_flags flags) {
+	char buf[256];
+	char *end = buf + sizeof(buf);
+	char *esc = wordesc(buf, end, str, flags);
 
+	if (bfs_check(esc != end)) {
+		bfs_check(strcmp(buf, exp) == 0, "wordesc('%s') == '%s' (!= '%s')", str, buf, exp);
+	}
+}
+
+/** wordesc() test cases. */
+static void check_wordescs(void) {
 	check_wordesc("", "\"\"", WESC_SHELL);
 	check_wordesc("word", "word", WESC_SHELL);
 	check_wordesc("two words", "\"two words\"", WESC_SHELL);
@@ -80,7 +90,123 @@ void check_bfstd(void) {
 		check_wordesc("\xF0\x9F\x98\x80", "\xF0\x9F\x98\x80", WESC_SHELL | WESC_TTY);
 		check_wordesc("\xCB\x9Cuser", "\xCB\x9Cuser", WESC_SHELL);
 	}
+}
+
+/** xstrto*() test cases. */
+static void check_strtox(void) {
+	short s;
+	unsigned short us;
+	int i;
+	unsigned int ui;
+	long l;
+	unsigned long ul;
+	long long ll;
+	unsigned long long ull;
+	char *end;
+
+#define check_strtouerr(err, str, end, base) \
+	do { \
+		bfs_echeck(xstrtous(str, end, base, &us) != 0 && errno == err); \
+		bfs_echeck(xstrtoui(str, end, base, &ui) != 0 && errno == err); \
+		bfs_echeck(xstrtoul(str, end, base, &ul) != 0 && errno == err); \
+		bfs_echeck(xstrtoull(str, end, base, &ull) != 0 && errno == err); \
+	} while (0)
+
+	check_strtouerr(ERANGE, "-1", NULL, 0);
+	check_strtouerr(ERANGE, "-0x1", NULL, 0);
 
+	check_strtouerr(EINVAL, "-", NULL, 0);
+	check_strtouerr(EINVAL, "-q", NULL, 0);
+	check_strtouerr(EINVAL, "-1q", NULL, 0);
+	check_strtouerr(EINVAL, "-0x", NULL, 0);
+
+#define check_strtoerr(err, str, end, base) \
+	do { \
+		bfs_echeck(xstrtos(str, end, base, &s) != 0 && errno == err); \
+		bfs_echeck(xstrtoi(str, end, base, &i) != 0 && errno == err); \
+		bfs_echeck(xstrtol(str, end, base, &l) != 0 && errno == err); \
+		bfs_echeck(xstrtoll(str, end, base, &ll) != 0 && errno == err); \
+		check_strtouerr(err, str, end, base); \
+	} while (0)
+
+	check_strtoerr(EINVAL, "", NULL, 0);
+	check_strtoerr(EINVAL, "", &end, 0);
+	check_strtoerr(EINVAL, " 1 ", &end, 0);
+	check_strtoerr(EINVAL, " -1", NULL, 0);
+	check_strtoerr(EINVAL, " 123", NULL, 0);
+	check_strtoerr(EINVAL, "123 ", NULL, 0);
+	check_strtoerr(EINVAL, "0789", NULL, 0);
+	check_strtoerr(EINVAL, "789A", NULL, 0);
+	check_strtoerr(EINVAL, "0x", NULL, 0);
+	check_strtoerr(EINVAL, "0x789A", NULL, 10);
+	check_strtoerr(EINVAL, "0x-1", NULL, 0);
+
+#define check_strtotype(type, min, max, fmt, fn, str, base, v, n) \
+	do { \
+		if ((n) >= min && (n) <= max) { \
+			bfs_echeck(fn(str, NULL, base, &v) == 0); \
+			bfs_check(v == (type)(n), "%s('%s') == " fmt " (!= " fmt ")", #fn, str, v, (type)(n)); \
+		} else { \
+			bfs_echeck(fn(str, NULL, base, &v) != 0 && errno == ERANGE); \
+		} \
+	} while (0)
+
+#define check_strtoint(str, base, n) \
+	do { \
+		check_strtotype(  signed short,      SHRT_MIN,   SHRT_MAX, "%d",   xstrtos,   str, base,  s,  n); \
+		check_strtotype(  signed int,         INT_MIN,    INT_MAX, "%d",   xstrtoi,   str, base,  i,  n); \
+		check_strtotype(  signed long,       LONG_MIN,   LONG_MAX, "%ld",  xstrtol,   str, base,  l,  n); \
+		check_strtotype(  signed long long, LLONG_MIN,  LLONG_MAX, "%lld", xstrtoll,  str, base, ll,  n); \
+		check_strtotype(unsigned short,             0,  USHRT_MAX, "%u",   xstrtous,  str, base, us,  n); \
+		check_strtotype(unsigned int,               0,   UINT_MAX, "%u",   xstrtoui,  str, base, ui,  n); \
+		check_strtotype(unsigned long,              0,  ULONG_MAX, "%lu",  xstrtoul,  str, base, ul,  n); \
+		check_strtotype(unsigned long long,         0, ULLONG_MAX, "%llu", xstrtoull, str, base, ull, n); \
+	} while (0)
+
+	check_strtoint("123", 0, 123);
+	check_strtoint("+123", 0, 123);
+	check_strtoint("-123", 0, -123);
+
+	check_strtoint("0123", 0, 0123);
+	check_strtoint("0x789A", 0, 0x789A);
+
+	check_strtoint("0123", 10, 123);
+	check_strtoint("0789", 10, 789);
+
+	check_strtoint("123", 16, 0x123);
+
+	check_strtoint("0x7FFF", 0, 0x7FFF);
+	check_strtoint("-0x8000", 0, -0x8000);
+
+	check_strtoint("0x7FFFFFFF", 0, 0x7FFFFFFFL);
+	check_strtoint("-0x80000000", 0, -0x7FFFFFFFL - 1);
+
+	check_strtoint("0x7FFFFFFFFFFFFFFF", 0, 0x7FFFFFFFFFFFFFFFLL);
+	check_strtoint("-0x8000000000000000", 0, -0x7FFFFFFFFFFFFFFFLL - 1);
+
+#define check_strtoend(str, estr, base, n) \
+	do { \
+		bfs_echeck(xstrtoll(str, &end, base, &ll) == 0); \
+		bfs_check(ll == (n), "xstrtoll('%s') == %lld (!= %lld)", str, ll, (long long)(n)); \
+		bfs_check(strcmp(end, estr) == 0, "xstrtoll('%s'): end == '%s' (!= '%s')", str, end, estr); \
+	} while (0)
+
+	check_strtoend("123 ", " ", 0, 123);
+	check_strtoend("0789", "89", 0, 07);
+	check_strtoend("789A", "A", 0, 789);
+	check_strtoend("0xDEFG", "G", 0, 0xDEF);
+}
+
+/** xstrwidth() test cases. */
+static void check_strwidth(void) {
 	bfs_check(xstrwidth("Hello world") == 11);
 	bfs_check(xstrwidth("Hello\1world") == 10);
 }
+
+void check_bfstd(void) {
+	check_asciilen();
+	check_basedirs();
+	check_wordescs();
+	check_strtox();
+	check_strwidth();
+}
diff --git a/tests/bit.c b/tests/bit.c
index 5a3871d..09d470b 100644
--- a/tests/bit.c
+++ b/tests/bit.c
@@ -64,7 +64,7 @@ static_assert(INTMAX_MAX == IWIDTH_MAX(INTMAX_WIDTH));
 	bfs_check((a) == (b), "(0x%jX) %s != %s (0x%jX)", (uintmax_t)(a), #a, #b, (uintmax_t)(b))
 
 void check_bit(void) {
-	const char *str = "\x1\x2\x3\x4";
+	const char *str = "\x1\x2\x3\x4\x5\x6\x7\x8";
 	uint32_t word;
 	memcpy(&word, str, sizeof(word));
 
@@ -88,6 +88,15 @@ void check_bit(void) {
 	(void)bswap(0UL);
 	(void)bswap(0ULL);
 
+	check_eq(load8_beu8(str), 0x01);
+	check_eq(load8_leu8(str), 0x01);
+	check_eq(load8_beu16(str), 0x0102);
+	check_eq(load8_leu16(str), 0x0201);
+	check_eq(load8_beu32(str), 0x01020304);
+	check_eq(load8_leu32(str), 0x04030201);
+	check_eq(load8_beu64(str), 0x0102030405060708ULL);
+	check_eq(load8_leu64(str), 0x0807060504030201ULL);
+
 	check_eq(count_ones(0x0U), 0);
 	check_eq(count_ones(0x1U), 1);
 	check_eq(count_ones(0x2U), 1);
diff --git a/tests/color.sh b/tests/color.sh
index 4f4312e..9e2e0f6 100644
--- a/tests/color.sh
+++ b/tests/color.sh
@@ -46,9 +46,7 @@ show_bar() {
 
     # Name the pipe deterministically based on the ttyname, so that concurrent
     # tests.sh runs on the same terminal (e.g. make -jN check) cooperate
-    local pipe
-    pipe=$(printf '%s' "$TTY" | tr '/' '-')
-    pipe="${TMPDIR:-/tmp}/bfs$pipe.bar"
+    local pipe="${TMPDIR:-/tmp}/bfs${TTY//\//-}.bar"
 
     if mkfifo "$pipe" 2>/dev/null; then
         # We won the race, create the background process to manage the bar
@@ -78,7 +76,7 @@ hide_bar() {
 # The background process that muxes multiple status bars for one TTY
 bar_proc() {
     # Read from the pipe, write to the TTY
-    exec <"$1" >"$TTY"
+    exec <"$1" >/dev/tty
 
     # Delete the pipe when done
     defer rm "$1"
@@ -133,7 +131,7 @@ bar_proc() {
 # Resize the status bar
 resize_bar() {
     # Bash gets $LINES from stderr, so if it's redirected use tput instead
-    TTY_HEIGHT="${LINES:-$(tput lines 2>"$TTY")}"
+    TTY_HEIGHT="${LINES:-$(tput lines 2>/dev/tty)}"
 
     if ((BAR_HEIGHT == 0)); then
         return
diff --git a/tests/getopts.sh b/tests/getopts.sh
index 5214e9f..a16511f 100644
--- a/tests/getopts.sh
+++ b/tests/getopts.sh
@@ -5,11 +5,7 @@
 
 ## Argument parsing
 
-if command -v nproc &>/dev/null; then
-    JOBS=$(nproc)
-else
-    JOBS=1
-fi
+JOBS=$(_nproc)
 MAKE=
 PATTERNS=()
 SUDO=()
@@ -23,7 +19,6 @@ VERBOSE_TESTS=0
 
 # Print usage information
 usage() {
-    local pad=$(printf "%*s" ${#0} "")
     color cat <<EOF
 Usage: ${GRN}$0${RST}
            [${BLU}-j${RST}${BLD}N${RST}] [${BLU}--make${RST}=${BLD}MAKE${RST}] [${BLU}--bfs${RST}=${BLD}path/to/bfs${RST}] [${BLU}--sudo${RST}[=${BLD}COMMAND${RST}]]
diff --git a/tests/gnu/execdir_self.out b/tests/gnu/execdir_self.out
new file mode 100644
index 0000000..3ad0640
--- /dev/null
+++ b/tests/gnu/execdir_self.out
@@ -0,0 +1 @@
+./bar.sh
diff --git a/tests/gnu/execdir_self.sh b/tests/gnu/execdir_self.sh
new file mode 100644
index 0000000..1fc5d04
--- /dev/null
+++ b/tests/gnu/execdir_self.sh
@@ -0,0 +1,9 @@
+cd "$TEST"
+mkdir foo
+cat >foo/bar.sh <<EOF
+#!/bin/sh
+printf '%s\n' "\$@"
+EOF
+chmod +x foo/bar.sh
+
+bfs_diff . -name bar.sh -execdir {} {} \;
diff --git a/tests/gnu/files0_from_empty.sh b/tests/gnu/files0_from_empty.sh
index 85eee8f..7b42772 100644
--- a/tests/gnu/files0_from_empty.sh
+++ b/tests/gnu/files0_from_empty.sh
@@ -1 +1 @@
-! printf "\0" | invoke_bfs -files0-from -
+! printf '\0' | invoke_bfs -files0-from -
diff --git a/tests/gnu/files0_from_file_file.out b/tests/gnu/files0_from_file_file.out
new file mode 100644
index 0000000..fb683c7
--- /dev/null
+++ b/tests/gnu/files0_from_file_file.out
@@ -0,0 +1,2 @@
+basic/g
+basic/g/h
diff --git a/tests/gnu/files0_from_file_file.sh b/tests/gnu/files0_from_file_file.sh
new file mode 100644
index 0000000..1119952
--- /dev/null
+++ b/tests/gnu/files0_from_file_file.sh
@@ -0,0 +1,3 @@
+printf 'basic/c\0' >"$TEST/in1"
+printf 'basic/g\0' >"$TEST/in2"
+bfs_diff -files0-from "$TEST/in1" -files0-from "$TEST/in2"
diff --git a/tests/gnu/files0_from_ok.sh b/tests/gnu/files0_from_ok.sh
deleted file mode 100644
index 8e145ce..0000000
--- a/tests/gnu/files0_from_ok.sh
+++ /dev/null
@@ -1 +0,0 @@
-! printf "basic\0" | invoke_bfs -files0-from - -ok echo {} \;
diff --git a/tests/gnu/files0_from_stdin_ok.sh b/tests/gnu/files0_from_stdin_ok.sh
new file mode 100644
index 0000000..0283c8d
--- /dev/null
+++ b/tests/gnu/files0_from_stdin_ok.sh
@@ -0,0 +1 @@
+! printf 'basic\0' | invoke_bfs -files0-from - -ok echo {} \;
diff --git a/tests/gnu/files0_from_stdin_ok_file.out b/tests/gnu/files0_from_stdin_ok_file.out
new file mode 100644
index 0000000..0f6b00d
--- /dev/null
+++ b/tests/gnu/files0_from_stdin_ok_file.out
@@ -0,0 +1,45 @@
+
+
+
+
+ 
+ /j
+ /j
+!
+!-
+!-/e
+!-/e
+!/d
+!/d
+(
+(-
+(-/c
+(-/c
+(/b
+(/b
+)
+)/g
+)/g
+*
+*/m
+*/m
+,
+,/f
+,/f
+-
+-/a
+-/a
+...
+.../h
+.../h
+/n
+/n
+[
+[/k
+[/k
+\
+\/i
+\/i
+{
+{/l
+{/l
diff --git a/tests/gnu/files0_from_stdin_ok_file.sh b/tests/gnu/files0_from_stdin_ok_file.sh
new file mode 100644
index 0000000..028df0c
--- /dev/null
+++ b/tests/gnu/files0_from_stdin_ok_file.sh
@@ -0,0 +1,4 @@
+FILE="$TMP/$TEST.in"
+cd weirdnames
+invoke_bfs -mindepth 1 -fprintf "$FILE" "%P\0"
+yes | bfs_diff -files0-from - -ok printf '%s\n' {} \; -files0-from "$FILE"
diff --git a/tests/gnu/files0_from_stdin_stdin.out b/tests/gnu/files0_from_stdin_stdin.out
new file mode 100644
index 0000000..0f6b00d
--- /dev/null
+++ b/tests/gnu/files0_from_stdin_stdin.out
@@ -0,0 +1,45 @@
+
+
+
+
+ 
+ /j
+ /j
+!
+!-
+!-/e
+!-/e
+!/d
+!/d
+(
+(-
+(-/c
+(-/c
+(/b
+(/b
+)
+)/g
+)/g
+*
+*/m
+*/m
+,
+,/f
+,/f
+-
+-/a
+-/a
+...
+.../h
+.../h
+/n
+/n
+[
+[/k
+[/k
+\
+\/i
+\/i
+{
+{/l
+{/l
diff --git a/tests/gnu/files0_from_stdin_stdin.sh b/tests/gnu/files0_from_stdin_stdin.sh
new file mode 100644
index 0000000..8f6368f
--- /dev/null
+++ b/tests/gnu/files0_from_stdin_stdin.sh
@@ -0,0 +1,2 @@
+cd weirdnames
+invoke_bfs -mindepth 1 -printf "%P\0" | bfs_diff -files0-from - -files0-from -
diff --git a/tests/gnu/fls_overflow.sh b/tests/gnu/fls_overflow.sh
new file mode 100644
index 0000000..067bc86
--- /dev/null
+++ b/tests/gnu/fls_overflow.sh
@@ -0,0 +1,4 @@
+# Regression test: times that overflow localtime() should still print
+cd "$TEST"
+"$XTOUCH" -t "@1111111111111111111" overflow || skip
+invoke_bfs . -fls "$OUT"
diff --git a/tests/gnu/follow_files0_from.out b/tests/gnu/follow_files0_from.out
new file mode 100644
index 0000000..c77d546
--- /dev/null
+++ b/tests/gnu/follow_files0_from.out
@@ -0,0 +1,42 @@
+links
+links/broken
+links/broken
+links/deeply
+links/deeply
+links/deeply/nested
+links/deeply/nested
+links/deeply/nested
+links/deeply/nested/broken
+links/deeply/nested/broken
+links/deeply/nested/broken
+links/deeply/nested/broken
+links/deeply/nested/dir
+links/deeply/nested/dir
+links/deeply/nested/dir
+links/deeply/nested/dir
+links/deeply/nested/file
+links/deeply/nested/file
+links/deeply/nested/file
+links/deeply/nested/file
+links/deeply/nested/link
+links/deeply/nested/link
+links/deeply/nested/link
+links/deeply/nested/link
+links/file
+links/file
+links/hardlink
+links/hardlink
+links/notdir
+links/notdir
+links/skip
+links/skip
+links/skip/broken
+links/skip/broken
+links/skip/dir
+links/skip/dir
+links/skip/file
+links/skip/file
+links/skip/link
+links/skip/link
+links/symlink
+links/symlink
diff --git a/tests/gnu/follow_files0_from.sh b/tests/gnu/follow_files0_from.sh
new file mode 100644
index 0000000..8c20f6d
--- /dev/null
+++ b/tests/gnu/follow_files0_from.sh
@@ -0,0 +1 @@
+invoke_bfs links -print0 | bfs_diff -follow -files0-from -
diff --git a/tests/gnu/fstype_btrfs_subvol.out b/tests/gnu/fstype_btrfs_subvol.out
new file mode 100644
index 0000000..8871fb9
--- /dev/null
+++ b/tests/gnu/fstype_btrfs_subvol.out
@@ -0,0 +1,4 @@
+mnt
+mnt/file
+mnt/subvol
+mnt/subvol/file
diff --git a/tests/gnu/fstype_btrfs_subvol.sh b/tests/gnu/fstype_btrfs_subvol.sh
new file mode 100644
index 0000000..71df45c
--- /dev/null
+++ b/tests/gnu/fstype_btrfs_subvol.sh
@@ -0,0 +1,25 @@
+# Test that -fstype works in btrfs subvolumes
+
+command -v btrfs &>/dev/null || skip
+
+cd "$TEST"
+
+# Make a btrfs filesystem image
+truncate -s128M img
+mkfs.btrfs img >&2
+
+# Mount it
+mkdir mnt
+bfs_sudo mount img mnt || skip
+defer bfs_sudo umount mnt
+
+# Make it owned by us
+bfs_sudo chown "$(id -u):$(id -g)" mnt
+
+# Create a subvolume inside it
+btrfs subvolume create mnt/subvol >&2
+
+# Make a file in and outside the subvolume
+"$XTOUCH" mnt/file mnt/subvol/file
+
+bfs_diff mnt -fstype btrfs -print -o -printf '%p %F\n'
diff --git a/tests/gnu/ignore_readdir_race_rmdir.out b/tests/gnu/ignore_readdir_race_rmdir.out
new file mode 100644
index 0000000..ede8749
--- /dev/null
+++ b/tests/gnu/ignore_readdir_race_rmdir.out
@@ -0,0 +1,2 @@
+./bar
+./foo
diff --git a/tests/gnu/ignore_readdir_race_rmdir.sh b/tests/gnu/ignore_readdir_race_rmdir.sh
new file mode 100644
index 0000000..87f36a9
--- /dev/null
+++ b/tests/gnu/ignore_readdir_race_rmdir.sh
@@ -0,0 +1,5 @@
+cd "$TEST"
+"$XTOUCH" -p foo/ bar/
+
+# Check that -ignore_readdir_race suppresses errors from opendir()
+bfs_diff . -ignore_readdir_race -mindepth 1 -print -name foo -exec rmdir {} \;
diff --git a/tests/gnu/ok_files0_from_stdin.sh b/tests/gnu/ok_files0_from_stdin.sh
new file mode 100644
index 0000000..2c4de7b
--- /dev/null
+++ b/tests/gnu/ok_files0_from_stdin.sh
@@ -0,0 +1 @@
+! printf 'basic\0' | invoke_bfs -ok echo {} \; -files0-from -
diff --git a/tests/gnu/ok_flush.sh b/tests/gnu/ok_flush.sh
index 87c7298..a5dc0d0 100644
--- a/tests/gnu/ok_flush.sh
+++ b/tests/gnu/ok_flush.sh
@@ -1,4 +1,4 @@
 # I/O streams should be flushed before -ok prompts
-yes | invoke_bfs basic -printf '%p ? ' -ok echo found \; 2>&1 | tr '\0' ' ' | sed 's/?.*?/?/' >"$OUT"
+yes | invoke_bfs basic -printf '%p ? ' -ok echo found \; 2>&1 | sed 's/?.*?/?/' >"$OUT"
 sort_output
 diff_output
diff --git a/tests/ioq.c b/tests/ioq.c
index f067436..1a0da97 100644
--- a/tests/ioq.c
+++ b/tests/ioq.c
@@ -49,6 +49,7 @@ static void check_ioq_push_block(void) {
 		int ret = ioq_opendir(ioq, dir, AT_FDCWD, ".", 0, NULL);
 		bfs_everify(ret == 0, "ioq_opendir()");
 	}
+	ioq_submit(ioq);
 	bfs_verify(ioq_capacity(ioq) == 0);
 
 	// Now cancel the queue, pushing an additional IOQ_STOP message
diff --git a/tests/main.c b/tests/main.c
index 81c2311..9240e1c 100644
--- a/tests/main.c
+++ b/tests/main.c
@@ -7,24 +7,40 @@
 
 #include "tests.h"
 
+#include "alloc.h"
+#include "bfstd.h"
 #include "color.h"
+#include "list.h"
 
 #include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdint.h>
 #include <string.h>
+#include <sys/wait.h>
 #include <time.h>
+#include <unistd.h>
 
 /** Result of the current test. */
-static thread_local bool pass;
+static bool pass;
 
 bool bfs_check_impl(bool result) {
 	pass &= result;
 	return result;
 }
 
-/** Unit test function type. */
-typedef void test_fn(void);
+/**
+ * A running test.
+ */
+struct test_proc {
+	/** Linked list links. */
+	struct test_proc *prev, *next;
+
+	/** The PID of this test. */
+	pid_t pid;
+	/** The name of this test. */
+	const char *name;
+};
 
 /**
  * Global test context.
@@ -35,6 +51,17 @@ struct test_ctx {
 	/** The arguments themselves. */
 	char **argv;
 
+	/** Maximum jobs (-j). */
+	int jobs;
+	/** Current jobs. */
+	int running;
+	/** Completed jobs. */
+	int done;
+	/** List of running tests. */
+	struct {
+		struct test_proc *head, *tail;
+	} procs;
+
 	/** Parsed colors. */
 	struct colors *colors;
 	/** Colorized output stream. */
@@ -45,10 +72,15 @@ struct test_ctx {
 };
 
 /** Initialize the test context. */
-static int test_init(struct test_ctx *ctx, int argc, char **argv) {
+static int test_init(struct test_ctx *ctx, int jobs, int argc, char **argv) {
 	ctx->argc = argc;
 	ctx->argv = argv;
 
+	ctx->jobs = jobs;
+	ctx->running = 0;
+	ctx->done = 0;
+	LIST_INIT(&ctx->procs);
+
 	ctx->colors = parse_colors();
 	ctx->cout = cfwrap(stdout, ctx->colors, false);
 	if (!ctx->cout) {
@@ -60,26 +92,15 @@ static int test_init(struct test_ctx *ctx, int argc, char **argv) {
 	return 0;
 }
 
-/** Finalize the test context. */
-static int test_fini(struct test_ctx *ctx) {
-	if (ctx->cout) {
-		cfclose(ctx->cout);
-	}
-
-	free_colors(ctx->colors);
-
-	return ctx->ret;
-}
-
 /** Check if a test case is enabled for this run. */
 static bool should_run(const struct test_ctx *ctx, const char *test) {
 	// Run all tests by default
-	if (ctx->argc < 2) {
+	if (ctx->argc == 0) {
 		return true;
 	}
 
 	// With args, run only specified tests
-	for (int i = 1; i < ctx->argc; ++i) {
+	for (int i = 0; i < ctx->argc; ++i) {
 		if (strcmp(test, ctx->argv[i]) == 0) {
 			return true;
 		}
@@ -88,19 +109,104 @@ static bool should_run(const struct test_ctx *ctx, const char *test) {
 	return false;
 }
 
-/** Run a test if it's enabled. */
-static void run_test(struct test_ctx *ctx, const char *test, test_fn *fn) {
-	if (should_run(ctx, test)) {
-		pass = true;
-		fn();
+/** Wait for a test to finish. */
+static void wait_test(struct test_ctx *ctx) {
+	int wstatus;
+	pid_t pid = xwaitpid(0, &wstatus, 0);
+	bfs_everify(pid > 0, "xwaitpid()");
 
-		if (pass) {
-			cfprintf(ctx->cout, "${grn}[PASS]${rs} ${bld}%s${rs}\n", test);
+	struct test_proc *proc = NULL;
+	for_list (struct test_proc, i, &ctx->procs) {
+		if (i->pid == pid) {
+			proc = i;
+			break;
+		}
+	}
+
+	bfs_verify(proc, "No test_proc for PID %ju", (intmax_t)pid);
+
+	bool passed = false;
+
+	if (WIFEXITED(wstatus)) {
+		int status = WEXITSTATUS(wstatus);
+		if (status == EXIT_SUCCESS) {
+			cfprintf(ctx->cout, "${grn}[PASS]${rs} ${bld}%s${rs}\n", proc->name);
+			passed = true;
+		} else if (status == EXIT_FAILURE) {
+			cfprintf(ctx->cout, "${red}[FAIL]${rs} ${bld}%s${rs}\n", proc->name);
 		} else {
-			cfprintf(ctx->cout, "${red}[FAIL]${rs} ${bld}%s${rs}\n", test);
-			ctx->ret = EXIT_FAILURE;
+			cfprintf(ctx->cout, "${red}[FAIL]${rs} ${bld}%s${rs} (Exit %d)\n", proc->name, status);
+		}
+	} else {
+		const char *str = NULL;
+		if (WIFSIGNALED(wstatus)) {
+			str = strsignal(WTERMSIG(wstatus));
 		}
+		if (!str) {
+			str = "Unknown";
+		}
+		cfprintf(ctx->cout, "${red}[FAIL]${rs} ${bld}%s${rs} (%s)\n", proc->name, str);
+	}
+
+	if (!passed) {
+		ctx->ret = EXIT_FAILURE;
 	}
+
+	--ctx->running;
+	++ctx->done;
+	LIST_REMOVE(&ctx->procs, proc);
+	free(proc);
+}
+
+/** Unit test function type. */
+typedef void test_fn(void);
+
+/** Run a test if it's enabled. */
+static void run_test(struct test_ctx *ctx, const char *test, test_fn *fn) {
+	if (!should_run(ctx, test)) {
+		return;
+	}
+
+	while (ctx->running >= ctx->jobs) {
+		wait_test(ctx);
+	}
+
+	struct test_proc *proc = ALLOC(struct test_proc);
+	bfs_everify(proc, "alloc()");
+
+	LIST_ITEM_INIT(proc);
+	proc->name = test;
+
+	fflush(NULL);
+	proc->pid = fork();
+	bfs_everify(proc->pid >= 0, "fork()");
+
+	if (proc->pid > 0) {
+		// Parent
+		++ctx->running;
+		LIST_APPEND(&ctx->procs, proc);
+		return;
+	}
+
+	// Child
+	pass = true;
+	fn();
+	exit(pass ? EXIT_SUCCESS : EXIT_FAILURE);
+}
+
+/** Finalize the test context. */
+static int test_fini(struct test_ctx *ctx) {
+	while (ctx->running > 0) {
+		wait_test(ctx);
+	}
+
+	if (ctx->cout) {
+		cfclose(ctx->cout);
+	}
+
+	free_colors(ctx->colors);
+
+	return ctx->ret;
 }
 
 int main(int argc, char *argv[]) {
@@ -116,8 +222,37 @@ int main(int argc, char *argv[]) {
 	}
 	tzset();
 
+	unsigned int jobs = 0;
+
+	const char *cmd = argc > 0 ? argv[0] : "units";
+	int c;
+	while (c = getopt(argc, argv, ":j:"), c != -1) {
+		switch (c) {
+		case 'j':
+			if (xstrtoui(optarg, NULL, 10, &jobs) != 0) {
+				fprintf(stderr, "%s: Bad job count '%s': %s\n", cmd, optarg, errstr());
+				return EXIT_FAILURE;
+			}
+			break;
+		case ':':
+			fprintf(stderr, "%s: Missing argument to -%c\n", cmd, optopt);
+			return EXIT_FAILURE;
+		case '?':
+			fprintf(stderr, "%s: Unrecognized option -%c\n", cmd, optopt);
+			return EXIT_FAILURE;
+		}
+	}
+
+	if (!jobs) {
+		jobs = nproc();
+	}
+
+	if (optind > argc) {
+		optind = argc;
+	}
+
 	struct test_ctx ctx;
-	if (test_init(&ctx, argc, argv) != 0) {
+	if (test_init(&ctx, jobs, argc - optind, argv + optind) != 0) {
 		goto done;
 	}
 
diff --git a/tests/posix/exec_return.out b/tests/posix/exec_return.out
new file mode 100644
index 0000000..600c93a
--- /dev/null
+++ b/tests/posix/exec_return.out
@@ -0,0 +1,18 @@
+basic
+basic/a
+basic/b
+basic/c/d
+basic/e
+basic/e/f
+basic/g
+basic/g/h
+basic/i
+basic/j
+basic/j/foo
+basic/k
+basic/k/foo
+basic/k/foo/bar
+basic/l
+basic/l/foo
+basic/l/foo/bar
+basic/l/foo/bar/baz
diff --git a/tests/posix/exec_return.sh b/tests/posix/exec_return.sh
new file mode 100644
index 0000000..cfa0f5d
--- /dev/null
+++ b/tests/posix/exec_return.sh
@@ -0,0 +1 @@
+bfs_diff basic -exec test {} = basic/c \; -o -print
diff --git a/tests/posix/exec_sigmask.out b/tests/posix/exec_sigmask.out
new file mode 100644
index 0000000..bb646f3
--- /dev/null
+++ b/tests/posix/exec_sigmask.out
@@ -0,0 +1 @@
+SigBlk:	0000000000000000
diff --git a/tests/posix/exec_sigmask.sh b/tests/posix/exec_sigmask.sh
new file mode 100644
index 0000000..2907458
--- /dev/null
+++ b/tests/posix/exec_sigmask.sh
@@ -0,0 +1,16 @@
+# Regression test: restore the signal mask after fork()
+
+cd "$TEST"
+mkfifo p1 p2
+
+{
+    # Get the PID of `sh`
+    read -r pid <p1
+    # Send SIGTERM -- this will hang forever if signals are blocked
+    kill $pid
+} &
+
+# Write the `sh` PID to p1, then hang reading p2 until we're killed
+! invoke_bfs p1 -exec bash -c 'echo $$ >p1 && read -r _ <p2' bash {} + || fail
+
+_wait
diff --git a/tests/posix/group_o_group.out b/tests/posix/group_o_group.out
new file mode 100644
index 0000000..a7ccfe4
--- /dev/null
+++ b/tests/posix/group_o_group.out
@@ -0,0 +1,19 @@
+basic
+basic/a
+basic/b
+basic/c
+basic/c/d
+basic/e
+basic/e/f
+basic/g
+basic/g/h
+basic/i
+basic/j
+basic/j/foo
+basic/k
+basic/k/foo
+basic/k/foo/bar
+basic/l
+basic/l/foo
+basic/l/foo/bar
+basic/l/foo/bar/baz
diff --git a/tests/posix/group_o_group.sh b/tests/posix/group_o_group.sh
new file mode 100644
index 0000000..60aefc0
--- /dev/null
+++ b/tests/posix/group_o_group.sh
@@ -0,0 +1,3 @@
+# Regression test for
+# https://github.com/tavianator/bfs/issues/155
+bfs_diff basic -group 0 -o -group "$(id -g)"
diff --git a/tests/posix/root_order.out b/tests/posix/root_order.out
new file mode 100644
index 0000000..ea94276
--- /dev/null
+++ b/tests/posix/root_order.out
@@ -0,0 +1,4 @@
+basic/a
+basic/b
+basic/c/d
+basic/e/f
diff --git a/tests/posix/root_order.sh b/tests/posix/root_order.sh
new file mode 100644
index 0000000..86adf20
--- /dev/null
+++ b/tests/posix/root_order.sh
@@ -0,0 +1,6 @@
+# Root paths must be processed in order
+# https://www.austingroupbugs.net/view.php?id=1859
+
+# -size forces a stat(), which we don't want to be async
+invoke_bfs basic/{a,b,c/d,e/f} -size -1000 >"$OUT"
+diff_output
diff --git a/tests/posix/user_o_user.out b/tests/posix/user_o_user.out
new file mode 100644
index 0000000..a7ccfe4
--- /dev/null
+++ b/tests/posix/user_o_user.out
@@ -0,0 +1,19 @@
+basic
+basic/a
+basic/b
+basic/c
+basic/c/d
+basic/e
+basic/e/f
+basic/g
+basic/g/h
+basic/i
+basic/j
+basic/j/foo
+basic/k
+basic/k/foo
+basic/k/foo/bar
+basic/l
+basic/l/foo
+basic/l/foo/bar
+basic/l/foo/bar/baz
diff --git a/tests/posix/user_o_user.sh b/tests/posix/user_o_user.sh
new file mode 100644
index 0000000..7c143ae
--- /dev/null
+++ b/tests/posix/user_o_user.sh
@@ -0,0 +1,3 @@
+# Regression test for
+# https://github.com/tavianator/bfs/issues/155
+bfs_diff basic -user 0 -o -user "$(id -u)"
diff --git a/tests/ptyx.c b/tests/ptyx.c
new file mode 100644
index 0000000..59292df
--- /dev/null
+++ b/tests/ptyx.c
@@ -0,0 +1,252 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Execute a command in a pseudo-terminal.
+ *
+ *     $ ptyx [-w WIDTH] [-h HEIGHT] [--] COMMAND [ARGS...]
+ */
+
+#include "bfs.h"
+#include "bfstd.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <termios.h>
+#include <unistd.h>
+
+#if __has_include(<stropts.h>)
+#  include <stropts.h>
+#endif
+
+#if __sun
+/**
+ * Push a STREAMS module, if it's not already there.
+ *
+ * See https://www.illumos.org/issues/9042.
+ */
+static int i_push(int fd, const char *name) {
+	int ret = ioctl(fd, I_FIND, name);
+	if (ret < 0) {
+		return ret;
+	} else if (ret == 0) {
+		return ioctl(fd, I_PUSH, name);
+	} else {
+		return 0;
+	}
+}
+#endif
+
+int main(int argc, char *argv[]) {
+	const char *cmd = argc > 0 ? argv[0] : "ptyx";
+
+/** Report an error message and exit. */
+#define die(...) die_(__VA_ARGS__, )
+
+#define die_(format, ...) \
+	do { \
+		fprintf(stderr, "%s: " format "%s", cmd, __VA_ARGS__ "\n"); \
+		exit(EXIT_FAILURE); \
+	} while (0)
+
+/** Report an error code and exit. */
+#define edie(...) edie_(__VA_ARGS__, )
+
+#define edie_(format, ...) \
+	do { \
+		fprintf(stderr, "%s: " format ": %s\n", cmd, __VA_ARGS__ errstr()); \
+		exit(EXIT_FAILURE); \
+	} while (0)
+
+	unsigned short width = 0;
+	unsigned short height = 0;
+
+	// Parse the command line
+	int c;
+	while (c = getopt(argc, argv, "+:w:h:"), c != -1) {
+		switch (c) {
+		case 'w':
+			if (xstrtous(optarg, NULL, 10, &width) != 0) {
+				edie("Bad width '%s'", optarg);
+			}
+			break;
+		case 'h':
+			if (xstrtous(optarg, NULL, 10, &height) != 0) {
+				edie("Bad height '%s'", optarg);
+			}
+			break;
+		case ':':
+			die("Missing argument to -%c", optopt);
+		case '?':
+			die("Unrecognized option -%c", optopt);
+		}
+	}
+
+	if (optind >= argc) {
+		die("Missing command");
+	}
+	char **args = argv + optind;
+
+	// Create a new pty, and set it up
+	int ptm = posix_openpt(O_RDWR | O_NOCTTY);
+	if (ptm < 0) {
+		edie("posix_openpt()");
+	}
+	if (grantpt(ptm) != 0) {
+		edie("grantpt()");
+	}
+	if (unlockpt(ptm) != 0) {
+		edie("unlockpt()");
+	}
+
+	// Get the subsidiary device path
+	char *name = ptsname(ptm);
+	if (!name) {
+		edie("ptsname()");
+	}
+
+	// Open the subsidiary device
+	int pts = open(name, O_RDWR | O_NOCTTY);
+	if (pts < 0) {
+		edie("%s", name);
+	}
+
+#if __sun
+	// On Solaris/illumos, a pty doesn't behave like a terminal until we
+	// push some STREAMS modules (see ptm(4D), ptem(4M), ldterm(4M)).
+	if (i_push(pts, "ptem") != 0) {
+		die("ioctl(I_PUSH, ptem)");
+	}
+	if (i_push(pts, "ldterm") != 0) {
+		die("ioctl(I_PUSH, ldterm)");
+	}
+#endif
+
+	// A new pty starts at 0x0, which is not very useful.  Instead, grab the
+	// default size from the current controlling terminal, if possible.
+	if (!width || !height) {
+		int tty = open_cterm(O_RDONLY | O_CLOEXEC);
+		if (tty >= 0) {
+			struct winsize ws;
+			if (xtcgetwinsize(tty, &ws) != 0) {
+				edie("tcgetwinsize()");
+			}
+			if (!width) {
+				width = ws.ws_col;
+			}
+			if (!height) {
+				height = ws.ws_row;
+			}
+			xclose(tty);
+		}
+	}
+	if (!width) {
+		width = 80;
+	}
+	if (!height) {
+		height = 24;
+	}
+
+	// Update the pty size
+	struct winsize ws;
+	if (xtcgetwinsize(pts, &ws) != 0) {
+		edie("tcgetwinsize()");
+	}
+	ws.ws_col = width;
+	ws.ws_row = height;
+	if (xtcsetwinsize(pts, &ws) != 0) {
+		edie("tcsetwinsize()");
+	}
+
+	// Set custom terminal attributes
+	struct termios attrs;
+	if (tcgetattr(pts, &attrs) != 0) {
+		edie("tcgetattr()");
+	}
+	attrs.c_oflag &= ~OPOST; // Don't convert \n to \r\n
+	if (tcsetattr(pts, TCSANOW, &attrs) != 0) {
+		edie("tcsetattr()");
+	}
+
+	pid_t pid = fork();
+	if (pid < 0) {
+		edie("fork()");
+	} else if (pid == 0) {
+		// Child
+		close(ptm);
+
+		// Make ourselves a session leader so we can have our own
+		// controlling terminal
+		if (setsid() < 0) {
+			edie("setsid()");
+		}
+
+#ifdef TIOCSCTTY
+		// Set the pty as the controlling terminal
+		if (ioctl(pts, TIOCSCTTY, 0) != 0) {
+			edie("ioctl(TIOCSCTTY)");
+		}
+#endif
+
+		// Redirect std{in,out,err} to the pty
+		if (dup2(pts, STDIN_FILENO) < 0
+		    || dup2(pts, STDOUT_FILENO) < 0
+		    || dup2(pts, STDERR_FILENO) < 0) {
+			edie("dup2()");
+		}
+		if (pts > STDERR_FILENO) {
+			xclose(pts);
+		}
+
+		// Run the requested command
+		execvp(args[0], args);
+		edie("execvp(): %s", args[0]);
+	}
+
+	// Parent
+	xclose(pts);
+
+	// Read output from the pty and copy it to stdout
+	char buf[1024];
+	while (true) {
+		ssize_t len = read(ptm, buf, sizeof(buf));
+		if (len > 0) {
+			if (xwrite(STDOUT_FILENO, buf, len) < 0) {
+				edie("write()");
+			}
+		} else if (len == 0) {
+			break;
+		} else if (errno == EINTR) {
+			continue;
+		} else if (errno == EIO) {
+			// Linux reports EIO rather than EOF when pts is closed
+			break;
+		} else {
+			die("read()");
+		}
+	}
+
+	xclose(ptm);
+
+	int wstatus;
+	if (xwaitpid(pid, &wstatus, 0) < 0) {
+		edie("waitpid()");
+	}
+
+	if (WIFEXITED(wstatus)) {
+		return WEXITSTATUS(wstatus);
+	} else if (WIFSIGNALED(wstatus)) {
+		int sig = WTERMSIG(wstatus);
+		fprintf(stderr, "%s: %s: %s\n", cmd, args[0], strsignal(sig));
+		return 128 + sig;
+	} else {
+		return 128;
+	}
+}
diff --git a/tests/run.sh b/tests/run.sh
index 164790e..3ed2a9c 100644
--- a/tests/run.sh
+++ b/tests/run.sh
@@ -96,16 +96,13 @@ reap_test() {
 wait_test() {
     local pid line ret
 
-    while true; do
+    while :; do
         line=$((LINENO + 1))
-        wait -n -ppid
+        _wait -n -ppid
         ret=$?
 
         if [ "${pid:-}" ]; then
             break
-        elif ((ret > 128)); then
-            # Interrupted by signal
-            continue
         else
             debug "${BASH_SOURCE[0]}" $line "${RED}error $ret${RST}" >&$DUPERR
             exit 1
@@ -362,20 +359,12 @@ invoke_bfs() {
     fi
 }
 
-if command -v unbuffer &>/dev/null; then
-    UNBUFFER=unbuffer
-elif command -v expect_unbuffer &>/dev/null; then
-    UNBUFFER=expect_unbuffer
-fi
-
 # Run bfs with a pseudo-terminal attached
 bfs_pty() {
-    test -n "${UNBUFFER:-}" || skip
-
     bfs_verbose "$@"
 
     local ret=0
-    "$UNBUFFER" bash -c 'stty cols 80 rows 24 && "$@" </dev/null' bash "${BFS[@]}" "$@" || ret=$?
+    "$PTYX" -w80 -h24 -- "${BFS[@]}" "$@" || ret=$?
 
     if ((ret > 125)); then
         exit $ret
@@ -415,8 +404,13 @@ make_xattrs() {
 
 # Get the Unix epoch time in seconds
 epoch_time() {
-    # https://stackoverflow.com/a/12746260/502399
-    awk 'BEGIN { srand(); print srand(); }'
+    if [ "${EPOCHSECONDS:-}" ]; then
+        # Added in bash 5
+        printf '%d' "$EPOCHSECONDS"
+    else
+        # https://stackoverflow.com/a/12746260/502399
+        awk 'BEGIN { srand(); print srand(); }'
+    fi
 }
 
 ## Snapshot testing
diff --git a/tests/sighook.c b/tests/sighook.c
index 0cb8de2..82e0ae5 100644
--- a/tests/sighook.c
+++ b/tests/sighook.c
@@ -4,28 +4,40 @@
 #include "tests.h"
 
 #include "atomic.h"
+#include "bfstd.h"
 #include "sighook.h"
 #include "thread.h"
 #include "xtime.h"
 
-#include <stddef.h>
 #include <errno.h>
 #include <pthread.h>
 #include <signal.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <unistd.h>
 
 /** Counts SIGALRM deliveries. */
 static atomic size_t count = 0;
 
-/** Keeps the background thread alive. */
-static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-static bool done = false;
-
 /** SIGALRM handler. */
 static void alrm_hook(int sig, siginfo_t *info, void *arg) {
 	fetch_add(&count, 1, relaxed);
 }
 
+/** SH_ONESHOT counter. */
+static atomic size_t shots = 0;
+
+/** SH_ONESHOT hook. */
+static void alrm_oneshot(int sig, siginfo_t *info, void *arg) {
+	fetch_add(&shots, 1, relaxed);
+}
+
+/** Keeps the background thread alive. */
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static bool done = false;
+
 /** Background thread that receives signals. */
 static void *hook_thread(void *ptr) {
 	mutex_lock(&mutex);
@@ -54,41 +66,62 @@ static int block_signal(int sig, sigset_t *old) {
 	return 0;
 }
 
-void check_sighook(void) {
-	struct sighook *hook = sighook(SIGALRM, alrm_hook, NULL, SH_CONTINUE);
-	if (!bfs_echeck(hook, "sighook(SIGALRM)")) {
-		return;
-	}
+/** Tests for sighook(). */
+static void check_hooks(void) {
+	struct sighook *hook = NULL;
+	struct sighook *oneshot = NULL;
 
-	// Check that we can unregister and re-register a hook
-	sigunhook(hook);
 	hook = sighook(SIGALRM, alrm_hook, NULL, SH_CONTINUE);
 	if (!bfs_echeck(hook, "sighook(SIGALRM)")) {
 		return;
 	}
 
-	// Create a timer that sends SIGALRM every 100 microseconds
-	struct timespec ival = { .tv_nsec = 100 * 1000 };
-	struct timer *timer = xtimer_start(&ival);
-	if (!bfs_echeck(timer)) {
-		goto unhook;
-	}
-
-	// Create a background thread to receive signals
+	// Create a background thread to receive SIGALRM
 	pthread_t thread;
 	if (!bfs_echeck(thread_create(&thread, NULL, hook_thread, NULL) == 0)) {
-		goto untime;
+		goto unhook;
 	}
 
 	// Block SIGALRM in this thread so the handler runs concurrently with
 	// sighook()/sigunhook()
 	sigset_t mask;
 	if (!bfs_echeck(block_signal(SIGALRM, &mask) == 0)) {
-		goto untime;
+		goto unthread;
+	}
+
+	// Check that we can unregister and re-register a hook
+	sigunhook(hook);
+	hook = sighook(SIGALRM, alrm_hook, NULL, SH_CONTINUE);
+	if (!bfs_echeck(hook, "sighook(SIGALRM)")) {
+		goto unblock;
+	}
+
+	// Test SH_ONESHOT
+	oneshot = sighook(SIGALRM, alrm_oneshot, NULL, SH_ONESHOT);
+	if (!bfs_echeck(oneshot, "sighook(SH_ONESHOT)")) {
+		goto unblock;
+	}
+
+	// Create a timer that sends SIGALRM every 100 microseconds
+	const struct timespec ival = { .tv_nsec = 100 * 1000 };
+	struct timer *timer = xtimer_start(&ival);
+	if (!bfs_echeck(timer, "xtimer_start()")) {
+		goto unblock;
 	}
 
 	// Rapidly register/unregister SIGALRM hooks
-	while (load(&count, relaxed) < 1000) {
+	size_t alarms;
+	while (alarms = load(&count, relaxed), alarms < 1000) {
+		size_t nshots = load(&shots, relaxed);
+		bfs_check(nshots <= 1);
+		if (alarms > 1) {
+			bfs_check(nshots == 1);
+		}
+		if (alarms >= 500) {
+			sigunhook(oneshot);
+			oneshot = NULL;
+		}
+
 		struct sighook *next = sighook(SIGALRM, alrm_hook, NULL, SH_CONTINUE);
 		if (!bfs_echeck(next, "sighook(SIGALRM)")) {
 			break;
@@ -98,20 +131,98 @@ void check_sighook(void) {
 		hook = next;
 	}
 
+	// Stop the timer
+	xtimer_stop(timer);
+unblock:
+	// Restore the old signal mask
+	errno = pthread_sigmask(SIG_SETMASK, &mask, NULL);
+	bfs_echeck(errno == 0, "pthread_sigmask()");
+unthread:
 	// Quit the background thread
 	mutex_lock(&mutex);
 	done = true;
 	mutex_unlock(&mutex);
 	cond_signal(&cond);
 	thread_join(thread, NULL);
-
-	// Restore the old signal mask
-	errno = pthread_sigmask(SIG_SETMASK, &mask, NULL);
-	bfs_echeck(errno == 0, "pthread_sigmask()");
-untime:
-	// Stop the timer
-	xtimer_stop(timer);
 unhook:
-	// Unregister the SIGALRM hook
+	// Unregister the SIGALRM hooks
+	sigunhook(oneshot);
 	sigunhook(hook);
 }
+
+/** atsigexit() hook. */
+static void exit_hook(int sig, siginfo_t *info, void *arg) {
+	// Write the signal that's killing us to the pipe
+	int *pipes = arg;
+	if (xwrite(pipes[1], &sig, sizeof(sig)) != sizeof(sig)) {
+		abort();
+	}
+}
+
+/** Tests for atsigexit(). */
+static void check_sigexit(int sig) {
+	// To wait for the child to call atsigexit()
+	int ready[2];
+	bfs_everify(pipe(ready) == 0);
+
+	// Written in the atsigexit() handler
+	int killed[2];
+	bfs_everify(pipe(killed) == 0);
+
+	pid_t pid;
+	bfs_everify((pid = fork()) >= 0);
+
+	if (pid > 0) {
+		// Parent
+		xclose(ready[1]);
+		xclose(killed[1]);
+
+		// Wait for the child to call atsigexit()
+		char c;
+		bfs_everify(xread(ready[0], &c, 1) == 1);
+
+		// Kill the child with the signal
+		bfs_everify(kill(pid, sig) == 0);
+
+		// Check that the child died to the right signal
+		int wstatus;
+		if (bfs_echeck(xwaitpid(pid, &wstatus, 0) == pid)) {
+			bfs_check(WIFSIGNALED(wstatus) && WTERMSIG(wstatus) == sig);
+		}
+
+		// Check that the signal hook wrote the signal number to the pipe
+		int hsig;
+		if (bfs_echeck(xread(killed[0], &hsig, sizeof(hsig)) == sizeof(hsig))) {
+			bfs_check(hsig == sig);
+		}
+	} else {
+		// Child
+		xclose(ready[0]);
+		xclose(killed[0]);
+
+		// exit_hook() will write to killed[1]
+		bfs_everify(atsigexit(exit_hook, killed) != NULL);
+
+		// Tell the parent we're ready
+		bfs_everify(xwrite(ready[1], "A", 1) == 1);
+
+		// Wait until we're killed
+		const struct timespec dur = { .tv_nsec = 1 };
+		while (true) {
+			nanosleep(&dur, NULL);
+		}
+	}
+}
+
+void check_sighook(void) {
+	check_hooks();
+
+	check_sigexit(SIGINT);
+	check_sigexit(SIGQUIT);
+	check_sigexit(SIGPIPE);
+
+	// macOS cannot distinguish between sync and async SIG{BUS,ILL,SEGV}
+#if !__APPLE__
+	check_sigexit(SIGSEGV);
+#endif
+}
diff --git a/tests/stddirs.sh b/tests/stddirs.sh
index 1183970..1569fee 100644
--- a/tests/stddirs.sh
+++ b/tests/stddirs.sh
@@ -151,7 +151,7 @@ make_stddirs() {
     if ((CLEAN)); then
         defer clean_stddirs
     else
-        printf "Test files saved to ${BLD}%s${RST}\n" "$TMP"
+        color printf "Test files saved to ${BLD}%s${RST}\n" "$TMP"
     fi
 
     chown "$(id -u):$(id -g)" "$TMP"
diff --git a/tests/tests.h b/tests/tests.h
index 4c6b3d2..d395c7c 100644
--- a/tests/tests.h
+++ b/tests/tests.h
@@ -45,26 +45,30 @@ bool bfs_check_impl(bool result);
  * Check a condition, logging a message on failure but continuing.
  */
 #define bfs_check(...) \
-	bfs_check_impl(bfs_check_(#__VA_ARGS__, __VA_ARGS__, "", ""))
+	bfs_check_(#__VA_ARGS__, __VA_ARGS__, "", )
 
 #define bfs_check_(str, cond, format, ...) \
-	((cond) ? true : (bfs_diag( \
-		sizeof(format) > 1 \
-			? "%.0s" format "%s%s" \
-			: "Check failed: `%s`%s", \
-		str, __VA_ARGS__), false))
+	bfs_check_impl((cond) || (bfs_check__(format, BFS_DIAG_MSG_(format, str),  __VA_ARGS__), false))
+
+#define bfs_check__(format, ...) \
+	bfs_diagf(sizeof(format) > 1 \
+			? BFS_DIAG_FORMAT_("%s" format "%s") \
+			: BFS_DIAG_FORMAT_("Check failed: `%s`"), \
+		BFS_DIAG_ARGS_(__VA_ARGS__))
 
 /**
  * Check a condition, logging the current error string on failure.
  */
 #define bfs_echeck(...) \
-	bfs_check_impl(bfs_echeck_(#__VA_ARGS__, __VA_ARGS__, "", errstr()))
+	bfs_echeck_(#__VA_ARGS__, __VA_ARGS__, "", )
 
 #define bfs_echeck_(str, cond, format, ...) \
-	((cond) ? true : (bfs_diag( \
-		sizeof(format) > 1 \
-			? "%.0s" format "%s: %s" \
-			: "Check failed: `%s`: %s", \
-		str, __VA_ARGS__), false))
+	bfs_check_impl((cond) || (bfs_echeck__(format, BFS_DIAG_MSG_(format, str), __VA_ARGS__), false))
+
+#define bfs_echeck__(format, ...) \
+	bfs_diagf(sizeof(format) > 1 \
+			? BFS_DIAG_FORMAT_("%s" format "%s: %s") \
+			: BFS_DIAG_FORMAT_("Check failed: `%s`: %s"), \
+		BFS_DIAG_ARGS_(__VA_ARGS__ errstr(), ))
 
 #endif // BFS_TESTS_H
diff --git a/tests/trie.c b/tests/trie.c
index 6e6024a..59bde40 100644
--- a/tests/trie.c
+++ b/tests/trie.c
@@ -39,6 +39,10 @@ static const char *keys[] = {
 	">>>>>>",
 	">>><<<",
 	">>>",
+
+	"AAAAAAA",
+	"AAAAAAAB",
+	"AAAAAAAa",
 };
 
 static const size_t nkeys = countof(keys);
diff --git a/tests/util.sh b/tests/util.sh
index d8b7036..c998927 100644
--- a/tests/util.sh
+++ b/tests/util.sh
@@ -16,6 +16,7 @@ ROOT=$(_realpath "$(dirname -- "$TESTS")")
 TESTS="$ROOT/tests"
 BIN="$ROOT/bin"
 MKSOCK="$BIN/tests/mksock"
+PTYX="$BIN/tests/ptyx"
 XTOUCH="$BIN/tests/xtouch"
 UNAME=$(uname)
 
@@ -33,6 +34,7 @@ stdenv() {
 
     export LS_COLORS=""
     unset BFS_COLORS
+    unset LSCOLORS
 
     if [ "$UNAME" = Darwin ]; then
         # ASan on macOS likes to report
@@ -189,3 +191,28 @@ pop_defers() {
 
     return $ret
 }
+
+## Parallelism
+
+# Get the number of processors
+_nproc() {
+    {
+        nproc \
+            || sysctl -n hw.ncpu \
+            || getconf _NPROCESSORS_ONLN \
+            || echo 1
+    } 2>/dev/null
+}
+
+# Run wait, looping if interrupted
+_wait() {
+    local ret=130
+
+    # "If wait is interrupted by a signal, the return status will be greater than 128"
+    while ((ret > 128)); do
+	ret=0
+	wait "$@" || ret=$?
+    done
+
+    return $ret
+}
diff --git a/tests/xspawn.c b/tests/xspawn.c
index 3194adc..6864192 100644
--- a/tests/xspawn.c
+++ b/tests/xspawn.c
@@ -50,6 +50,71 @@ fail:
 	return NULL;
 }
 
+/** Add an entry to $PATH. */
+static int add_path(const char *entry, char **old_path) {
+	int ret = -1;
+	const char *new_path = NULL;
+
+	*old_path = getenv("PATH");
+	if (*old_path) {
+		*old_path = strdup(*old_path);
+		if (!*old_path) {
+			goto done;
+		}
+
+		new_path = dstrprintf("%s:%s", entry, *old_path);
+		if (!new_path) {
+			goto done;
+		}
+	} else {
+		new_path = entry;
+	}
+
+	ret = setenv("PATH", new_path, true);
+
+done:
+	if (new_path && new_path != entry) {
+		dstrfree((dchar *)new_path);
+	}
+
+	if (ret != 0) {
+		free(*old_path);
+		*old_path = NULL;
+	}
+
+	return ret;
+}
+
+/** Undo add_path(). */
+static int reset_path(char *old_path) {
+	int ret;
+
+	if (old_path) {
+		ret = setenv("PATH", old_path, true);
+		free(old_path);
+	} else {
+		ret = unsetenv("PATH");
+	}
+
+	return ret;
+}
+
+/** Spawn the test binary and check for success. */
+static void check_spawnee(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp) {
+	pid_t pid = bfs_spawn(exe, ctx, argv, envp);
+	if (!bfs_echeck(pid >= 0, "bfs_spawn('%s')", exe)) {
+		return;
+	}
+
+	int wstatus;
+	bool exited = bfs_echeck(xwaitpid(pid, &wstatus, 0) == pid)
+		&& bfs_check(WIFEXITED(wstatus));
+	if (exited) {
+		int wexit = WEXITSTATUS(wstatus);
+		bfs_check(wexit == EXIT_SUCCESS, "xspawnee: exit(%d)", wexit);
+	}
+}
+
 /** Check that we resolve executables in $PATH correctly. */
 static void check_use_path(bool use_posix) {
 	struct bfs_spawn spawn;
@@ -78,48 +143,16 @@ static void check_use_path(bool use_posix) {
 	}
 
 	// Check that $PATH is resolved after the file actions
-	char *old_path = getenv("PATH");
-	dchar *new_path = NULL;
-	if (old_path) {
-		old_path = strdup(old_path);
-		if (!bfs_echeck(old_path, "strdup()")) {
-			goto env;
-		}
-		new_path = dstrprintf("tests:%s", old_path);
-	} else {
-		new_path = dstrdup("tests");
-	}
-	if (!bfs_check(new_path)) {
-		goto path;
-	}
-
-	if (!bfs_echeck(setenv("PATH", new_path, true) == 0)) {
-		goto path;
+	char *old_path;
+	if (!bfs_echeck(add_path("tests", &old_path) == 0)) {
+		goto env;
 	}
 
 	char *argv[] = {"xspawnee", old_path, NULL};
-	pid_t pid = bfs_spawn("xspawnee", &spawn, argv, envp);
-	if (!bfs_echeck(pid >= 0, "bfs_spawn()")) {
-		goto unset;
-	}
+	check_spawnee("xspawnee", &spawn, argv, envp);
+	check_spawnee("tests/xspawnee", &spawn, argv, envp);
 
-	int wstatus;
-	bool exited = bfs_echeck(xwaitpid(pid, &wstatus, 0) == pid)
-		&& bfs_check(WIFEXITED(wstatus));
-	if (exited) {
-		int wexit = WEXITSTATUS(wstatus);
-		bfs_check(wexit == EXIT_SUCCESS, "xspawnee: exit(%d)", wexit);
-	}
-
-unset:
-	if (old_path) {
-		bfs_echeck(setenv("PATH", old_path, true) == 0);
-	} else {
-		bfs_echeck(unsetenv("PATH") == 0);
-	}
-path:
-	dstrfree(new_path);
-	free(old_path);
+	bfs_echeck(reset_path(old_path) == 0);
 env:
 	for (char **var = envp; *var; ++var) {
 		free(*var);
@@ -166,6 +199,14 @@ static void check_resolve(void) {
 	bfs_echeck(!bfs_spawn_resolve("eW6f5RM9Qi") && errno == ENOENT);
 
 	bfs_echeck(!bfs_spawn_resolve("bin/eW6f5RM9Qi") && errno == ENOENT);
+
+	char *old_path;
+	if (bfs_echeck(add_path("bin/tests", &old_path) == 0)) {
+		exe = bfs_spawn_resolve("xspawnee");
+		bfs_echeck(exe && strcmp(exe, "bin/tests/xspawnee") == 0);
+		free(exe);
+		bfs_echeck(reset_path(old_path) == 0);
+	}
 }
 
 void check_xspawn(void) {
diff --git a/tests/xtime.c b/tests/xtime.c
index 3472bea..c890a1e 100644
--- a/tests/xtime.c
+++ b/tests/xtime.c
@@ -154,6 +154,14 @@ static void check_xtimegm(void) {
 		.tm_isdst = -1,
 	};
 
+#if BFS_HAS_TIMEGM
+	// Check that xtimegm(-1) isn't an error
+	for (time_t time = -10; time <= 10; ++time) {
+		if (bfs_check(gmtime_r(&time, &tm), "gmtime_r(%jd)", (intmax_t)time)) {
+			check_one_xtimegm(&tm);
+		}
+	}
+#else
 	// Check equivalence with mktime()
 	for (tm.tm_year =  10; tm.tm_year <= 200; tm.tm_year += 10)
 	for (tm.tm_mon  =  -3; tm.tm_mon  <=  15; tm.tm_mon  +=  3)
@@ -164,13 +172,12 @@ static void check_xtimegm(void) {
 		check_one_xtimegm(&tm);
 	}
 
-#if !BFS_HAS_TIMEGM
 	// Check integer overflow cases
 	check_xtimegm_overflow(&(struct tm) { .tm_sec = INT_MAX, .tm_min = INT_MAX });
 	check_xtimegm_overflow(&(struct tm) { .tm_min = INT_MAX, .tm_hour = INT_MAX });
 	check_xtimegm_overflow(&(struct tm) { .tm_hour = INT_MAX, .tm_mday = INT_MAX });
 	check_xtimegm_overflow(&(struct tm) { .tm_mon = INT_MAX, .tm_year = INT_MAX });
-#endif
+#endif // !BFS_HAS_TIMEGM
 }
 
 void check_xtime(void) {
diff --git a/tests/xtouch.c b/tests/xtouch.c
index e7c2e00..f33c573 100644
--- a/tests/xtouch.c
+++ b/tests/xtouch.c
@@ -217,11 +217,8 @@ int main(int argc, char *argv[]) {
 	}
 
 	if (marg) {
-		char *end;
-		long mode = strtol(marg, &end, 8);
-		// https://github.com/llvm/llvm-project/issues/64946
-		sanitize_init(&end);
-		if (*marg && !*end && mode >= 0 && mode < 01000) {
+		unsigned int mode;
+		if (xstrtoui(marg, NULL, 8, &mode) == 0 && mode < 01000) {
 			args.fmode = args.dmode = mode;
 		} else {
 			fprintf(stderr, "%s: Invalid mode '%s'\n", cmd, marg);