62 files changed, 5591 insertions, 2648 deletions
diff --git a/src/alloc.c b/src/alloc.c
index ebaff38..f505eda 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -1,11 +1,13 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "alloc.h"
+
+#include "bfs.h"
 #include "bit.h"
 #include "diag.h"
 #include "sanity.h"
+
 #include <errno.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -18,24 +20,22 @@
 #  define ALLOC_MAX (SIZE_MAX / 2)
 #endif
 
-/** Portable aligned_alloc()/posix_memalign(). */
+/** posix_memalign() wrapper. */
 static void *xmemalign(size_t align, size_t size) {
 	bfs_assert(has_single_bit(align));
 	bfs_assert(align >= sizeof(void *));
-	bfs_assert(is_aligned(align, size));
 
-#if BFS_HAS_ALIGNED_ALLOC
-	return aligned_alloc(align, size);
-#else
+	// Since https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2072.htm,
+	// aligned_alloc() doesn't require the size to be a multiple of align.
+	// But the sanitizers don't know about that yet, so always use
+	// posix_memalign().
 	void *ptr = NULL;
 	errno = posix_memalign(&ptr, align, size);
 	return ptr;
-#endif
 }
 
 void *alloc(size_t align, size_t size) {
 	bfs_assert(has_single_bit(align));
-	bfs_assert(is_aligned(align, size));
 
 	if (size > ALLOC_MAX) {
 		errno = EOVERFLOW;
@@ -51,7 +51,6 @@ void *alloc(size_t align, size_t size) {
 
 void *zalloc(size_t align, size_t size) {
 	bfs_assert(has_single_bit(align));
-	bfs_assert(is_aligned(align, size));
 
 	if (size > ALLOC_MAX) {
 		errno = EOVERFLOW;
@@ -71,8 +70,6 @@ void *zalloc(size_t align, size_t size) {
 
 void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size) {
 	bfs_assert(has_single_bit(align));
-	bfs_assert(is_aligned(align, old_size));
-	bfs_assert(is_aligned(align, new_size));
 
 	if (new_size == 0) {
 		free(ptr);
@@ -106,10 +103,10 @@ void *reserve(void *ptr, size_t align, size_t size, size_t count) {
 	size_t old_size = size * count;
 
 	// Capacity is doubled every power of two, from 0→1, 1→2, 2→4, etc.
-	// If we stayed within the same size class, re-use ptr.
+	// If we stayed within the same size class, reuse ptr.
 	if (count & (count - 1)) {
 		// Tell sanitizers about the new array element
-		sanitize_alloc((char *)ptr + old_size, size);
+		sanitize_resize(ptr, old_size, old_size + size, bit_ceil(count) * size);
 		errno = 0;
 		return ptr;
 	}
@@ -124,7 +121,7 @@ void *reserve(void *ptr, size_t align, size_t size, size_t count) {
 	}
 
 	// Pretend we only allocated one more element
-	sanitize_free((char *)ret + old_size + size, new_size - old_size - size);
+	sanitize_resize(ret, new_size, old_size + size, new_size);
 	errno = 0;
 	return ret;
 }
@@ -176,7 +173,7 @@ void arena_init(struct arena *arena, size_t align, size_t size) {
 }
 
 /** Allocate a new slab. */
-attr(cold)
+_cold
 static int slab_alloc(struct arena *arena) {
 	// Make the initial allocation size ~4K
 	size_t size = 4096;
@@ -231,6 +228,7 @@ void arena_free(struct arena *arena, void *ptr) {
 	union chunk *chunk = ptr;
 	chunk_set_next(arena, chunk, arena->chunks);
 	arena->chunks = chunk;
+	sanitize_uninit(chunk, arena->size);
 	sanitize_free(chunk, arena->size);
 }
 
@@ -250,7 +248,7 @@ void arena_destroy(struct arena *arena) {
 	sanitize_uninit(arena);
 }
 
-void varena_init(struct varena *varena, size_t align, size_t min, size_t offset, size_t size) {
+void varena_init(struct varena *varena, size_t align, size_t offset, size_t size) {
 	varena->align = align;
 	varena->offset = offset;
 	varena->size = size;
@@ -259,7 +257,7 @@ void varena_init(struct varena *varena, size_t align, size_t min, size_t offset,
 
 	// The smallest size class is at least as many as fit in the smallest
 	// aligned allocation size
-	size_t min_count = (flex_size(align, min, offset, size, 1) - offset + size - 1) / size;
+	size_t min_count = (flex_size(align, offset, size, 1) - offset + size - 1) / size;
 	varena->shift = bit_width(min_count - 1);
 }
 
@@ -272,7 +270,7 @@ static size_t varena_size_class(struct varena *varena, size_t count) {
 
 /** Get the exact size of a flexible struct. */
 static size_t varena_exact_size(const struct varena *varena, size_t count) {
-	return flex_size(varena->align, 0, varena->offset, varena->size, count);
+	return flex_size(varena->align, varena->offset, varena->size, count);
 }
 
 /** Get the arena for the given array length. */
@@ -306,8 +304,7 @@ void *varena_alloc(struct varena *varena, size_t count) {
 	}
 
 	// Tell the sanitizers the exact size of the allocated struct
-	sanitize_free(ret, arena->size);
-	sanitize_alloc(ret, varena_exact_size(varena, count));
+	sanitize_resize(ret, arena->size, varena_exact_size(varena, count), arena->size);
 
 	return ret;
 }
@@ -319,15 +316,14 @@ void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t
 		return NULL;
 	}
 
-	size_t new_exact_size = varena_exact_size(varena, new_count);
-	size_t old_exact_size = varena_exact_size(varena, old_count);
+	size_t old_size = old_arena->size;
+	size_t new_size = new_arena->size;
 
 	if (new_arena == old_arena) {
-		if (new_count < old_count) {
-			sanitize_free((char *)ptr + new_exact_size, old_exact_size - new_exact_size);
-		} else if (new_count > old_count) {
-			sanitize_alloc((char *)ptr + old_exact_size, new_exact_size - old_exact_size);
-		}
+		sanitize_resize(ptr,
+			varena_exact_size(varena, old_count),
+			varena_exact_size(varena, new_count),
+			new_size);
 		return ptr;
 	}
 
@@ -336,16 +332,18 @@ void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t
 		return NULL;
 	}
 
-	size_t old_size = old_arena->size;
-	sanitize_alloc((char *)ptr + old_exact_size, old_size - old_exact_size);
+	// Non-sanitized builds don't bother computing exact sizes, and just use
+	// the potentially-larger arena size for each size class instead.  To
+	// allow the below memcpy() to work with the less-precise sizes, expand
+	// the old allocation to its full capacity.
+	sanitize_resize(ptr, varena_exact_size(varena, old_count), old_size, old_size);
 
-	size_t new_size = new_arena->size;
 	size_t min_size = new_size < old_size ? new_size : old_size;
 	memcpy(ret, ptr, min_size);
 
 	arena_free(old_arena, ptr);
-	sanitize_free((char *)ret + new_exact_size, new_size - new_exact_size);
 
+	sanitize_resize(ret, new_size, varena_exact_size(varena, new_count), new_size);
 	return ret;
 }
 
diff --git a/src/alloc.h b/src/alloc.h
index 095134a..1fafbab 100644
--- a/src/alloc.h
+++ b/src/alloc.h
@@ -8,127 +8,145 @@
 #ifndef BFS_ALLOC_H
 #define BFS_ALLOC_H
 
-#include "prelude.h"
+#include "bfs.h"
+
 #include <errno.h>
 #include <stddef.h>
 #include <stdlib.h>
 
+#define IS_ALIGNED(align, size) \
+	(((size) & ((align) - 1)) == 0)
+
 /** Check if a size is properly aligned. */
 static inline bool is_aligned(size_t align, size_t size) {
-	return (size & (align - 1)) == 0;
+	return IS_ALIGNED(align, size);
 }
 
+#define ALIGN_FLOOR(align, size) \
+	((size) & ~((align) - 1))
+
 /** Round down to a multiple of an alignment. */
 static inline size_t align_floor(size_t align, size_t size) {
-	return size & ~(align - 1);
+	return ALIGN_FLOOR(align, size);
 }
 
+#define ALIGN_CEIL(align, size) \
+	((((size) - 1) | ((align) - 1)) + 1)
+
 /** Round up to a multiple of an alignment. */
 static inline size_t align_ceil(size_t align, size_t size) {
-	return align_floor(align, size + align - 1);
+	return ALIGN_CEIL(align, size);
 }
 
 /**
- * Saturating array size.
- *
- * @param align
- *         Array element alignment.
- * @param size
- *         Array element size.
- * @param count
- *         Array element count.
- * @return
- *         size * count, saturating to the maximum aligned value on overflow.
+ * Saturating size addition.
+ */
+static inline size_t size_add(size_t lhs, size_t rhs) {
+	size_t ret = lhs + rhs;
+	return ret >= lhs ? ret : (size_t)-1;
+}
+
+/**
+ * Saturating size multiplication.
  */
-static inline size_t array_size(size_t align, size_t size, size_t count) {
+static inline size_t size_mul(size_t size, size_t count) {
 	size_t ret = size * count;
-	return ret / size == count ? ret : ~(align - 1);
+	return ret / size == count ? ret : (size_t)-1;
 }
 
 /** Saturating array sizeof. */
 #define sizeof_array(type, count) \
-	array_size(alignof(type), sizeof(type), count)
+	size_mul(sizeof(type), count)
 
 /** Size of a struct/union field. */
 #define sizeof_member(type, member) \
 	sizeof(((type *)NULL)->member)
 
 /**
+ * @internal
+ * Our flexible struct size calculations assume that structs have the minimum
+ * trailing padding to align the type properly.  A pathological ABI that adds
+ * extra padding would result in us under-allocating space for those structs,
+ * so we static_assert() that no such padding exists.
+ */
+#define ASSERT_FLEX_ABI(type, member) \
+	ASSERT_FLEX_ABI_( \
+		ALIGN_CEIL(alignof(type), offsetof(type, member)) >= sizeof(type), \
+		"Unexpected tail padding in " #type)
+
+/**
+ * @internal
+ * The contortions here allow static_assert() to be used in expressions, rather
+ * than just declarations.
+ */
+#define ASSERT_FLEX_ABI_(...) \
+	((void)sizeof(struct { char _; static_assert(__VA_ARGS__); }))
+
+/**
  * Saturating flexible struct size.
  *
- * @param align
+ * @align
  *         Struct alignment.
- * @param min
- *         Minimum struct size.
- * @param offset
+ * @offset
  *         Flexible array member offset.
- * @param size
+ * @size
  *         Flexible array element size.
- * @param count
+ * @count
  *         Flexible array element count.
  * @return
  *         The size of the struct with count flexible array elements.  Saturates
  *         to the maximum aligned value on overflow.
  */
-static inline size_t flex_size(size_t align, size_t min, size_t offset, size_t size, size_t count) {
-	size_t ret = size * count;
-	size_t overflow = ret / size != count;
-
-	size_t extra = offset + align - 1;
-	ret += extra;
-	overflow |= ret < extra;
-	ret |= -overflow;
+static inline size_t flex_size(size_t align, size_t offset, size_t size, size_t count) {
+	size_t ret = size_mul(size, count);
+	ret = size_add(ret, offset + align - 1);
 	ret = align_floor(align, ret);
-
-	// Make sure flex_sizeof(type, member, 0) >= sizeof(type), even if the
-	// type has more padding than necessary for alignment
-	if (min > align_ceil(align, offset)) {
-		ret = ret < min ? min : ret;
-	}
-
 	return ret;
 }
 
 /**
  * Computes the size of a flexible struct.
  *
- * @param type
+ * @type
  *         The type of the struct containing the flexible array.
- * @param member
+ * @member
  *         The name of the flexible array member.
- * @param count
+ * @count
  *         The length of the flexible array.
  * @return
  *         The size of the struct with count flexible array elements.  Saturates
  *         to the maximum aligned value on overflow.
  */
 #define sizeof_flex(type, member, count) \
-	flex_size(alignof(type), sizeof(type), offsetof(type, member), sizeof_member(type, member[0]), count)
+	(ASSERT_FLEX_ABI(type, member), flex_size( \
+		alignof(type), offsetof(type, member), sizeof_member(type, member[0]), count))
 
 /**
  * General memory allocator.
  *
- * @param align
+ * @align
  *         The required alignment.
- * @param size
+ * @size
  *         The size of the allocation.
  * @return
  *         The allocated memory, or NULL on failure.
  */
-attr(malloc(free, 1), aligned_alloc(1, 2))
+_malloc(free, 1)
+_aligned_alloc(1, 2)
 void *alloc(size_t align, size_t size);
 
 /**
  * Zero-initialized memory allocator.
  *
- * @param align
+ * @align
  *         The required alignment.
- * @param size
+ * @size
  *         The size of the allocation.
  * @return
  *         The allocated memory, or NULL on failure.
  */
-attr(malloc(free, 1), aligned_alloc(1, 2))
+_malloc(free, 1)
+_aligned_alloc(1, 2)
 void *zalloc(size_t align, size_t size);
 
 /** Allocate memory for the given type. */
@@ -158,18 +176,19 @@ void *zalloc(size_t align, size_t size);
 /**
  * Alignment-aware realloc().
  *
- * @param ptr
+ * @ptr
  *         The pointer to reallocate.
- * @param align
+ * @align
  *         The required alignment.
- * @param old_size
+ * @old_size
  *         The previous allocation size.
- * @param new_size
+ * @new_size
  *         The new allocation size.
  * @return
  *         The reallocated memory, or NULL on failure.
  */
-attr(nodiscard, aligned_alloc(2, 4))
+_aligned_alloc(2, 4)
+_nodiscard
 void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size);
 
 /** Reallocate memory for an array. */
@@ -183,11 +202,11 @@ void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size);
 /**
  * Reserve space for one more element in a dynamic array.
  *
- * @param ptr
+ * @ptr
  *         The pointer to reallocate.
- * @param align
+ * @align
  *         The required alignment.
- * @param count
+ * @count
  *         The current size of the array.
  * @return
  *         The reallocated memory, on both success *and* failure.  On success,
@@ -195,17 +214,17 @@ void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size);
  *         for (count + 1) elements.  On failure, errno will be non-zero, and
  *         ptr will returned unchanged.
  */
-attr(nodiscard)
+_nodiscard
 void *reserve(void *ptr, size_t align, size_t size, size_t count);
 
 /**
  * Convenience macro to grow a dynamic array.
  *
- * @param type
+ * @type
  *         The array element type.
- * @param type **ptr
+ * @type **ptr
  *         A pointer to the array.
- * @param size_t *count
+ * @size_t *count
  *         A pointer to the array's size.
  * @return
  *         On success, a pointer to the newly reserved array element, i.e.
@@ -253,7 +272,7 @@ void arena_free(struct arena *arena, void *ptr);
 /**
  * Allocate an object out of the arena.
  */
-attr(malloc(arena_free, 2))
+_malloc(arena_free, 2)
 void *arena_alloc(struct arena *arena);
 
 /**
@@ -287,40 +306,39 @@ struct varena {
 /**
  * Initialize a varena for a struct with the given layout.
  *
- * @param varena
+ * @varena
  *         The varena to initialize.
- * @param align
+ * @align
  *         alignof(type)
- * @param min
- *         sizeof(type)
- * @param offset
+ * @offset
  *         offsetof(type, flexible_array)
- * @param size
+ * @size
  *         sizeof(flexible_array[i])
  */
-void varena_init(struct varena *varena, size_t align, size_t min, size_t offset, size_t size);
+void varena_init(struct varena *varena, size_t align, size_t offset, size_t size);
 
 /**
  * Initialize a varena for the given type and flexible array.
  *
- * @param varena
+ * @varena
  *         The varena to initialize.
- * @param type
+ * @type
  *         A struct type containing a flexible array.
- * @param member
+ * @member
  *         The name of the flexible array member.
  */
 #define VARENA_INIT(varena, type, member) \
-	varena_init(varena, alignof(type), sizeof(type), offsetof(type, member), sizeof_member(type, member[0]))
+	(ASSERT_FLEX_ABI(type, member), varena_init( \
+		varena, alignof(type), offsetof(type, member), sizeof_member(type, member[0])))
 
 /**
  * Free an arena-allocated flexible struct.
  *
- * @param varena
+ * @varena
  *         The that allocated the object.
- * @param ptr
+ * @ptr
  *         The object to free.
- * @param count
+ * @count
  *         The length of the flexible array.
  */
 void varena_free(struct varena *varena, void *ptr, size_t count);
@@ -328,46 +346,46 @@ void varena_free(struct varena *varena, void *ptr, size_t count);
 /**
  * Arena-allocate a flexible struct.
  *
- * @param varena
+ * @varena
  *         The varena to allocate from.
- * @param count
+ * @count
  *         The length of the flexible array.
  * @return
  *         The allocated struct, or NULL on failure.
  */
-attr(malloc(varena_free, 2))
+_malloc(varena_free, 2)
 void *varena_alloc(struct varena *varena, size_t count);
 
 /**
  * Resize a flexible struct.
  *
- * @param varena
+ * @varena
  *         The varena to allocate from.
- * @param ptr
+ * @ptr
  *         The object to resize.
- * @param old_count
- *         The old array lenth.
- * @param new_count
+ * @old_count
+ *         The old array length.
+ * @new_count
  *         The new array length.
  * @return
  *         The resized struct, or NULL on failure.
  */
-attr(nodiscard)
+_nodiscard
 void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t new_count);
 
 /**
  * Grow a flexible struct by an arbitrary amount.
  *
- * @param varena
+ * @varena
  *         The varena to allocate from.
- * @param ptr
+ * @ptr
  *         The object to resize.
- * @param count
+ * @count
  *         Pointer to the flexible array length.
  * @return
  *         The resized struct, or NULL on failure.
  */
-attr(nodiscard)
+_nodiscard
 void *varena_grow(struct varena *varena, void *ptr, size_t *count);
 
 /**
diff --git a/src/atomic.h b/src/atomic.h
index f1a6bea..5c2826f 100644
--- a/src/atomic.h
+++ b/src/atomic.h
@@ -8,6 +8,8 @@
 #ifndef BFS_ATOMIC_H
 #define BFS_ATOMIC_H
 
+#include "bfs.h"
+
 #include <stdatomic.h>
 
 /**
@@ -18,9 +20,9 @@
 /**
  * Shorthand for atomic_load_explicit().
  *
- * @param obj
+ * @obj
  *         A pointer to the atomic object.
- * @param order
+ * @order
  *         The memory ordering to use, without the memory_order_ prefix.
  * @return
  *         The loaded value.
@@ -82,4 +84,35 @@
 #define fetch_and(obj, arg, order) \
 	atomic_fetch_and_explicit(obj, arg, memory_order_##order)
 
+/**
+ * Shorthand for atomic_thread_fence().
+ */
+#if __SANITIZE_THREAD__
+// TSan doesn't support fences: https://github.com/google/sanitizers/issues/1415
+#  define thread_fence(obj, order) \
+	fetch_add(obj, 0, order)
+#else
+#  define thread_fence(obj, order) \
+	atomic_thread_fence(memory_order_##order)
+#endif
+
+/**
+ * Shorthand for atomic_signal_fence().
+ */
+#define signal_fence(order) \
+	atomic_signal_fence(memory_order_##order)
+
+/**
+ * A hint to the CPU to relax while it spins.
+ */
+#if __has_builtin(__builtin_ia32_pause)
+#  define spin_loop() __builtin_ia32_pause()
+#elif __has_builtin(__builtin_arm_yield)
+#  define spin_loop() __builtin_arm_yield()
+#elif BFS_HAS_BUILTIN_RISCV_PAUSE
+#  define spin_loop() __builtin_riscv_pause()
+#else
+#  define spin_loop() ((void)0)
+#endif
+
 #endif // BFS_ATOMIC_H
diff --git a/src/bar.c b/src/bar.c
index 184d9a0..1b0691a 100644
--- a/src/bar.c
+++ b/src/bar.c
@@ -1,52 +1,55 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "bar.h"
+
+#include "alloc.h"
 #include "atomic.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "bit.h"
 #include "dstring.h"
+#include "sighook.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
 #include <stdarg.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
-#include <sys/ioctl.h>
+#include <termios.h>
+#include <unistd.h>
 
 struct bfs_bar {
 	int fd;
 	atomic unsigned int width;
 	atomic unsigned int height;
-};
 
-/** The global status bar instance. */
-static struct bfs_bar the_bar = {
-	.fd = -1,
+	struct sighook *exit_hook;
+	struct sighook *winch_hook;
 };
 
 /** Get the terminal size, if possible. */
 static int bfs_bar_getsize(struct bfs_bar *bar) {
-#ifdef TIOCGWINSZ
 	struct winsize ws;
-	if (ioctl(bar->fd, TIOCGWINSZ, &ws) != 0) {
+	if (xtcgetwinsize(bar->fd, &ws) != 0) {
 		return -1;
 	}
 
 	store(&bar->width, ws.ws_col, relaxed);
 	store(&bar->height, ws.ws_row, relaxed);
 	return 0;
-#else
-	errno = ENOTSUP;
-	return -1;
-#endif
 }
 
-/** Async Signal Safe puts(). */
-static int ass_puts(int fd, const char *str) {
-	size_t len = strlen(str);
-	return xwrite(fd, str, len) == len ? 0 : -1;
+/** Write a string to the status bar (async-signal-safe). */
+static int bfs_bar_write(struct bfs_bar *bar, const char *str, size_t len) {
+	return xwrite(bar->fd, str, len) == len ? 0 : -1;
+}
+
+/** Write a string to the status bar (async-signal-safe). */
+static int bfs_bar_puts(struct bfs_bar *bar, const char *str) {
+	return bfs_bar_write(bar, str, strlen(str));
 }
 
 /** Number of decimal digits needed for terminal sizes. */
@@ -68,66 +71,63 @@ static char *ass_itoa(char *str, unsigned int n) {
 	return str + len;
 }
 
+/** Reset the scrollable region and hide the bar. */
+static int bfs_bar_reset(struct bfs_bar *bar) {
+	return bfs_bar_puts(bar,
+		"\0337"  // DECSC: Save cursor
+		"\033[r" // DECSTBM: Reset scrollable region
+		"\0338"  // DECRC: Restore cursor
+		"\033[J" // ED: Erase display from cursor to end
+	);
+}
+
+/** Hide the bar if the terminal is shorter than this. */
+#define BFS_BAR_MIN_HEIGHT 3
+
 /** Update the size of the scrollable region. */
 static int bfs_bar_resize(struct bfs_bar *bar) {
-	char esc_seq[12 + ITOA_DIGITS] =
+	unsigned int height = load(&bar->height, relaxed);
+	if (height < BFS_BAR_MIN_HEIGHT) {
+		return bfs_bar_reset(bar);
+	}
+
+	static const char PREFIX[] =
+		"\033D"   // IND: Line feed, possibly scrolling
+		"\033[1A" // CUU: Move cursor up 1 row
 		"\0337"   // DECSC: Save cursor
 		"\033[;"; // DECSTBM: Set scrollable region
+	static const char SUFFIX[] =
+		"r"       // (end of DECSTBM)
+		"\0338"   // DECRC: Restore the cursor
+		"\033[J"; // ED: Erase display from cursor to end
 
-	// DECSTBM takes the height as the second argument
-	unsigned int height = load(&bar->height, relaxed);
-	char *ptr = esc_seq + strlen(esc_seq);
-	ptr = ass_itoa(ptr, height - 1);
+	char esc_seq[sizeof(PREFIX) + ITOA_DIGITS + sizeof(SUFFIX)];
 
-	strcpy(ptr,
-		"r"      // DECSTBM
-		"\0338"  // DECRC: Restore the cursor
-		"\033[J" // ED: Erase display from cursor to end
-	);
+	// DECSTBM takes the height as the second argument
+	char *cur = stpcpy(esc_seq, PREFIX);
+	cur = ass_itoa(cur, height - 1);
+	cur = stpcpy(cur, SUFFIX);
 
-	return ass_puts(bar->fd, esc_seq);
+	return bfs_bar_write(bar, esc_seq, cur - esc_seq);
 }
 
 #ifdef SIGWINCH
 /** SIGWINCH handler. */
-static void sighand_winch(int sig) {
-	int error = errno;
-
-	bfs_bar_getsize(&the_bar);
-	bfs_bar_resize(&the_bar);
-
-	errno = error;
+static void bfs_bar_sigwinch(int sig, siginfo_t *info, void *arg) {
+	struct bfs_bar *bar = arg;
+	bfs_bar_getsize(bar);
+	bfs_bar_resize(bar);
 }
 #endif
 
-/** Reset the scrollable region and hide the bar. */
-static int bfs_bar_reset(struct bfs_bar *bar) {
-	return ass_puts(bar->fd,
-		"\0337"  // DECSC: Save cursor
-		"\033[r" // DECSTBM: Reset scrollable region
-		"\0338"  // DECRC: Restore cursor
-		"\033[J" // ED: Erase display from cursor to end
-	);
-}
-
 /** Signal handler for process-terminating signals. */
-static void sighand_reset(int sig) {
-	bfs_bar_reset(&the_bar);
-	raise(sig);
-}
-
-/** Register sighand_reset() for a signal. */
-static void reset_before_death_by(int sig) {
-	struct sigaction sa = {
-		.sa_handler = sighand_reset,
-		.sa_flags = SA_RESETHAND,
-	};
-	sigemptyset(&sa.sa_mask);
-	sigaction(sig, &sa, NULL);
+static void bfs_bar_sigexit(int sig, siginfo_t *info, void *arg) {
+	struct bfs_bar *bar = arg;
+	bfs_bar_reset(bar);
 }
 
 /** printf() to the status bar with a single write(). */
-attr(printf(2, 3))
+_printf(2, 3)
 static int bfs_bar_printf(struct bfs_bar *bar, const char *format, ...) {
 	va_list args;
 	va_start(args, format);
@@ -138,64 +138,47 @@ static int bfs_bar_printf(struct bfs_bar *bar, const char *format, ...) {
 		return -1;
 	}
 
-	int ret = ass_puts(bar->fd, str);
+	int ret = bfs_bar_write(bar, str, dstrlen(str));
 	dstrfree(str);
 	return ret;
 }
 
 struct bfs_bar *bfs_bar_show(void) {
-	if (the_bar.fd >= 0) {
-		errno = EBUSY;
-		goto fail;
+	struct bfs_bar *bar = ALLOC(struct bfs_bar);
+	if (!bar) {
+		return NULL;
 	}
 
-	char term[L_ctermid];
-	ctermid(term);
-	if (strlen(term) == 0) {
-		errno = ENOTTY;
+	bar->fd = open_cterm(O_RDWR | O_CLOEXEC);
+	if (bar->fd < 0) {
 		goto fail;
 	}
 
-	the_bar.fd = open(term, O_RDWR | O_CLOEXEC);
-	if (the_bar.fd < 0) {
-		goto fail;
+	if (bfs_bar_getsize(bar) != 0) {
+		goto fail_close;
 	}
 
-	if (bfs_bar_getsize(&the_bar) != 0) {
+	bar->exit_hook = atsigexit(bfs_bar_sigexit, bar);
+	if (!bar->exit_hook) {
 		goto fail_close;
 	}
 
-	reset_before_death_by(SIGABRT);
-	reset_before_death_by(SIGINT);
-	reset_before_death_by(SIGPIPE);
-	reset_before_death_by(SIGQUIT);
-	reset_before_death_by(SIGTERM);
-
 #ifdef SIGWINCH
-	struct sigaction sa = {
-		.sa_handler = sighand_winch,
-		.sa_flags = SA_RESTART,
-	};
-	sigemptyset(&sa.sa_mask);
-	sigaction(SIGWINCH, &sa, NULL);
+	bar->winch_hook = sighook(SIGWINCH, bfs_bar_sigwinch, bar, 0);
+	if (!bar->winch_hook) {
+		goto fail_hook;
+	}
 #endif
 
-	unsigned int height = load(&the_bar.height, relaxed);
-	bfs_bar_printf(&the_bar,
-		"\n"        // Make space for the bar
-		"\0337"     // DECSC: Save cursor
-		"\033[;%ur" // DECSTBM: Set scrollable region
-		"\0338"     // DECRC: Restore cursor
-		"\033[1A",  // CUU: Move cursor up 1 row
-		height - 1
-	);
-
-	return &the_bar;
+	bfs_bar_resize(bar);
+	return bar;
 
+fail_hook:
+	sigunhook(bar->exit_hook);
 fail_close:
-	close_quietly(the_bar.fd);
-	the_bar.fd = -1;
+	close_quietly(bar->fd);
 fail:
+	free(bar);
 	return NULL;
 }
 
@@ -205,6 +188,10 @@ unsigned int bfs_bar_width(const struct bfs_bar *bar) {
 
 int bfs_bar_update(struct bfs_bar *bar, const char *str) {
 	unsigned int height = load(&bar->height, relaxed);
+	if (height < BFS_BAR_MIN_HEIGHT) {
+		return 0;
+	}
+
 	return bfs_bar_printf(bar,
 		"\0337"      // DECSC: Save cursor
 		"\033[%u;0f" // HVP: Move cursor to row, column
@@ -223,17 +210,11 @@ void bfs_bar_hide(struct bfs_bar *bar) {
 		return;
 	}
 
-	signal(SIGABRT, SIG_DFL);
-	signal(SIGINT, SIG_DFL);
-	signal(SIGPIPE, SIG_DFL);
-	signal(SIGQUIT, SIG_DFL);
-	signal(SIGTERM, SIG_DFL);
-#ifdef SIGWINCH
-	signal(SIGWINCH, SIG_DFL);
-#endif
+	sigunhook(bar->winch_hook);
+	sigunhook(bar->exit_hook);
 
 	bfs_bar_reset(bar);
 
 	xclose(bar->fd);
-	bar->fd = -1;
+	free(bar);
 }
diff --git a/src/bar.h b/src/bar.h
index 20d92a9..ec9e590 100644
--- a/src/bar.h
+++ b/src/bar.h
@@ -27,9 +27,9 @@ unsigned int bfs_bar_width(const struct bfs_bar *bar);
 /**
  * Update the status bar message.
  *
- * @param bar
+ * @bar
  *         The status bar to update.
- * @param str
+ * @str
  *         The string to display.
  * @return
  *         0 on success, -1 on failure.
diff --git a/src/bfs.h b/src/bfs.h
new file mode 100644
index 0000000..3cee727
--- /dev/null
+++ b/src/bfs.h
@@ -0,0 +1,241 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Configuration and fundamental utilities.
+ */
+
+#ifndef BFS_H
+#define BFS_H
+
+// Standard versions
+
+/** Possible __STDC_VERSION__ values. */
+#define C95 199409L
+#define C99 199901L
+#define C11 201112L
+#define C17 201710L
+#define C23 202311L
+
+/** Possible _POSIX_C_SOURCE and _POSIX_<OPTION> values. */
+#define POSIX_1990 1
+#define POSIX_1992 2
+#define POSIX_1993 199309L
+#define POSIX_1995 199506L
+#define POSIX_2001 200112L
+#define POSIX_2008 200809L
+#define POSIX_2024 202405L
+
+// Build configuration
+
+#include "config.h"
+
+#ifndef BFS_COMMAND
+#  define BFS_COMMAND "bfs"
+#endif
+
+#ifndef BFS_HOMEPAGE
+#  define BFS_HOMEPAGE "https://tavianator.com/projects/bfs.html"
+#endif
+
+#ifndef BFS_LINT
+#  define BFS_LINT false
+#endif
+
+// This is a symbol instead of a literal so we don't have to rebuild everything
+// when the version number changes
+extern const char bfs_version[];
+
+extern const char bfs_confflags[];
+extern const char bfs_cc[];
+extern const char bfs_cppflags[];
+extern const char bfs_cflags[];
+extern const char bfs_ldflags[];
+extern const char bfs_ldlibs[];
+
+// Get __GLIBC__
+#include <assert.h>
+
+// Fundamental utilities
+
+/**
+ * Get the length of an array.
+ */
+#define countof(...) (sizeof(__VA_ARGS__) / sizeof(0[__VA_ARGS__]))
+
+/**
+ * False sharing/destructive interference/largest cache line size.
+ */
+#ifdef __GCC_DESTRUCTIVE_SIZE
+#  define FALSE_SHARING_SIZE __GCC_DESTRUCTIVE_SIZE
+#else
+#  define FALSE_SHARING_SIZE 64
+#endif
+
+/**
+ * True sharing/constructive interference/smallest cache line size.
+ */
+#ifdef __GCC_CONSTRUCTIVE_SIZE
+#  define TRUE_SHARING_SIZE __GCC_CONSTRUCTIVE_SIZE
+#else
+#  define TRUE_SHARING_SIZE 64
+#endif
+
+/**
+ * Alignment specifier that avoids false sharing.
+ */
+#define cache_align alignas(FALSE_SHARING_SIZE)
+
+// Wrappers for attributes
+
+/**
+ * Silence warnings about switch/case fall-throughs.
+ */
+#if __has_attribute(fallthrough)
+#  define _fallthrough __attribute__((fallthrough))
+#else
+#  define _fallthrough ((void)0)
+#endif
+
+/**
+ * Silence warnings about unused declarations.
+ */
+#if __has_attribute(unused)
+#  define _maybe_unused __attribute__((unused))
+#else
+#  define _maybe_unused
+#endif
+
+/**
+ * Warn if a value is unused.
+ */
+#if __has_attribute(warn_unused_result)
+#  define _nodiscard __attribute__((warn_unused_result))
+#else
+#  define _nodiscard
+#endif
+
+/**
+ * Hint to avoid inlining a function.
+ */
+#if __has_attribute(noinline)
+#  define _noinline __attribute__((noinline))
+#else
+#  define _noinline
+#endif
+
+/**
+ * Marks a non-returning function.
+ */
+#if __STDC_VERSION__ >= C23
+#  define _noreturn [[noreturn]]
+#else
+#  define _noreturn _Noreturn
+#endif
+
+/**
+ * Hint that a function is unlikely to be called.
+ */
+#if __has_attribute(cold)
+#  define _cold _noinline __attribute__((cold))
+#else
+#  define _cold _noinline
+#endif
+
+/**
+ * Adds compiler warnings for bad printf()-style function calls, if supported.
+ */
+#if __has_attribute(format)
+#  define _printf(fmt, args) __attribute__((format(printf, fmt, args)))
+#else
+#  define _printf(fmt, args)
+#endif
+
+/**
+ * Annotates functions that potentially modify and return format strings.
+ */
+#if __has_attribute(format_arg)
+#  define _format_arg(arg) __attribute__((format_arg(arg)))
+#else
+#  define _format_arg(arg)
+#endif
+
+/**
+ * Annotates allocator-like functions.
+ */
+#if __has_attribute(malloc)
+#  if __GNUC__ >= 11 && !__OPTIMIZE__ // malloc(deallocator) disables inlining on GCC
+#    define _malloc(...) _nodiscard __attribute__((malloc(__VA_ARGS__)))
+#  else
+#    define _malloc(...) _nodiscard __attribute__((malloc))
+#  endif
+#else
+#  define _malloc(...) _nodiscard
+#endif
+
+/**
+ * Specifies that a function returns allocations with a given alignment.
+ */
+#if __has_attribute(alloc_align)
+#  define _alloc_align(param) __attribute__((alloc_align(param)))
+#else
+#  define _alloc_align(param)
+#endif
+
+/**
+ * Specifies that a function returns allocations with a given size.
+ */
+#if __has_attribute(alloc_size)
+#  define _alloc_size(...) __attribute__((alloc_size(__VA_ARGS__)))
+#else
+#  define _alloc_size(...)
+#endif
+
+/**
+ * Shorthand for _alloc_align() and _alloc_size().
+ */
+#define _aligned_alloc(align, ...) _alloc_align(align) _alloc_size(__VA_ARGS__)
+
+/**
+ * Check if function multiversioning via GNU indirect functions (ifunc) is supported.
+ *
+ * Disabled on TSan due to https://github.com/google/sanitizers/issues/342.
+ */
+#ifndef BFS_USE_TARGET_CLONES
+#  if __has_attribute(target_clones) && (__GLIBC__ || __FreeBSD__) && !__SANITIZE_THREAD__
+#    define BFS_USE_TARGET_CLONES true
+#  else
+#    define BFS_USE_TARGET_CLONES false
+#  endif
+#endif
+
+/**
+ * Apply the target_clones attribute, if available.
+ */
+#if BFS_USE_TARGET_CLONES
+#  define _target_clones(...) __attribute__((target_clones(__VA_ARGS__)))
+#else
+#  define _target_clones(...)
+#endif
+
+/**
+ * Mark the size of a flexible array member.
+ */
+#if __has_attribute(counted_by)
+#  define _counted_by(...) __attribute__((counted_by(__VA_ARGS__)))
+#else
+#  define _counted_by(...)
+#endif
+
+/**
+ * Optimization hint to not unroll a loop.
+ */
+#if BFS_HAS_PRAGMA_NOUNROLL
+#  define _nounroll _Pragma("nounroll")
+#elif __GNUC__ && !__clang__
+#  define _nounroll _Pragma("GCC unroll 0")
+#else
+#  define _nounroll
+#endif
+
+#endif // BFS_H
diff --git a/src/bfstd.c b/src/bfstd.c
index f8ce871..b78af7a 100644
--- a/src/bfstd.c
+++ b/src/bfstd.c
@@ -1,13 +1,15 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "bfstd.h"
+
+#include "bfs.h"
 #include "bit.h"
 #include "diag.h"
 #include "sanity.h"
 #include "thread.h"
 #include "xregex.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <langinfo.h>
@@ -15,25 +17,28 @@
 #include <locale.h>
 #include <nl_types.h>
 #include <pthread.h>
+#include <sched.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/ioctl.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <termios.h>
 #include <unistd.h>
 #include <wchar.h>
 
-#if BFS_USE_SYS_SYSMACROS_H
+#if __has_include(<sys/sysmacros.h>)
 #  include <sys/sysmacros.h>
-#elif BFS_USE_SYS_MKDEV_H
+#elif __has_include(<sys/mkdev.h>)
 #  include <sys/mkdev.h>
 #endif
 
-#if BFS_USE_UTIL_H
+#if __has_include(<util.h>)
 #  include <util.h>
 #endif
 
@@ -199,6 +204,171 @@ const char *xgetprogname(void) {
 	return cmd;
 }
 
+/** Common prologue for xstrto*() wrappers. */
+static int xstrtox_prologue(const char *str) {
+	// strto*() skips leading spaces, but we want to reject them
+	if (xisspace(str[0])) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	errno = 0;
+	return 0;
+}
+
+/** Common epilogue for xstrto*() wrappers. */
+static int xstrtox_epilogue(const char *str, char **end, char *endp) {
+	if (errno != 0) {
+		return -1;
+	}
+
+	if (end) {
+		*end = endp;
+	}
+
+	// If end is NULL, make sure the entire string is valid
+	if (endp == str || (!end && *endp != '\0')) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	return 0;
+}
+
+int xstrtos(const char *str, char **end, int base, short *value) {
+	long n;
+	if (xstrtol(str, end, base, &n) != 0) {
+		return -1;
+	}
+
+	if (n < SHRT_MIN || n > SHRT_MAX) {
+		errno = ERANGE;
+		return -1;
+	}
+
+	*value = n;
+	return 0;
+}
+
+int xstrtoi(const char *str, char **end, int base, int *value) {
+	long n;
+	if (xstrtol(str, end, base, &n) != 0) {
+		return -1;
+	}
+
+	if (n < INT_MIN || n > INT_MAX) {
+		errno = ERANGE;
+		return -1;
+	}
+
+	*value = n;
+	return 0;
+}
+
+int xstrtol(const char *str, char **end, int base, long *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
+	char *endp;
+	*value = strtol(str, &endp, base);
+	return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtoll(const char *str, char **end, int base, long long *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
+	char *endp;
+	*value = strtoll(str, &endp, base);
+	return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtof(const char *str, char **end, float *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
+	char *endp;
+	*value = strtof(str, &endp);
+	return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtod(const char *str, char **end, double *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
+	char *endp;
+	*value = strtod(str, &endp);
+	return xstrtox_epilogue(str, end, endp);
+}
+
+int xstrtous(const char *str, char **end, int base, unsigned short *value) {
+	unsigned long n;
+	if (xstrtoul(str, end, base, &n) != 0) {
+		return -1;
+	}
+
+	if (n > USHRT_MAX) {
+		errno = ERANGE;
+		return -1;
+	}
+
+	*value = n;
+	return 0;
+}
+
+int xstrtoui(const char *str, char **end, int base, unsigned int *value) {
+	unsigned long n;
+	if (xstrtoul(str, end, base, &n) != 0) {
+		return -1;
+	}
+
+	if (n > UINT_MAX) {
+		errno = ERANGE;
+		return -1;
+	}
+
+	*value = n;
+	return 0;
+}
+
+/** Common epilogue for xstrtou*() wrappers. */
+static int xstrtoux_epilogue(const char *str, char **end, char *endp) {
+	if (xstrtox_epilogue(str, end, endp) != 0) {
+		return -1;
+	}
+
+	if (str[0] == '-') {
+		errno = ERANGE;
+		return -1;
+	}
+
+	return 0;
+}
+
+int xstrtoul(const char *str, char **end, int base, unsigned long *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
+	char *endp;
+	*value = strtoul(str, &endp, base);
+	return xstrtoux_epilogue(str, end, endp);
+}
+
+int xstrtoull(const char *str, char **end, int base, unsigned long long *value) {
+	if (xstrtox_prologue(str) != 0) {
+		return -1;
+	}
+
+	char *endp;
+	*value = strtoull(str, &endp, base);
+	return xstrtoux_epilogue(str, end, endp);
+}
+
 /** Compile and execute a regular expression for xrpmatch(). */
 static int xrpregex(nl_item item, const char *response) {
 	const char *pattern = nl_langinfo(item);
@@ -285,7 +455,7 @@ const char *xstrerror(int errnum) {
 
 	// On FreeBSD with MemorySanitizer, duplocale() triggers
 	// https://github.com/llvm/llvm-project/issues/65532
-#if BFS_HAS_STRERROR_L && !(__FreeBSD__ && SANITIZE_MEMORY)
+#if BFS_HAS_STRERROR_L && !(__FreeBSD__ && __SANITIZE_MEMORY__)
 #  if BFS_HAS_USELOCALE
 	locale_t loc = uselocale((locale_t)0);
 #  else
@@ -322,6 +492,10 @@ const char *xstrerror(int errnum) {
 	return ret;
 }
 
+const char *errstr(void) {
+	return xstrerror(errno);
+}
+
 /** Get the single character describing the given file type. */
 static char type_char(mode_t mode) {
 	switch (mode & S_IFMT) {
@@ -437,7 +611,9 @@ int rlim_cmp(rlim_t a, rlim_t b) {
 }
 
 dev_t xmakedev(int ma, int mi) {
-#ifdef makedev
+#if __QNX__
+	return makedev(0, ma, mi);
+#elif defined(makedev)
 	return makedev(ma, mi);
 #else
 	return (ma << 8) | mi;
@@ -468,6 +644,32 @@ pid_t xwaitpid(pid_t pid, int *status, int flags) {
 	return ret;
 }
 
+int open_cterm(int flags) {
+	char path[L_ctermid];
+	if (ctermid(path) == NULL || strlen(path) == 0) {
+		errno = ENOTTY;
+		return -1;
+	}
+
+	return open(path, flags);
+}
+
+int xtcgetwinsize(int fd, struct winsize *ws) {
+#if BFS_HAS_TCGETWINSIZE
+	return tcgetwinsize(fd, ws);
+#else
+	return ioctl(fd, TIOCGWINSZ, ws);
+#endif
+}
+
+int xtcsetwinsize(int fd, const struct winsize *ws) {
+#if BFS_HAS_TCSETWINSIZE
+	return tcsetwinsize(fd, ws);
+#else
+	return ioctl(fd, TIOCSWINSZ, ws);
+#endif
+}
+
 int dup_cloexec(int fd) {
 #ifdef F_DUPFD_CLOEXEC
 	return fcntl(fd, F_DUPFD_CLOEXEC, 0);
@@ -637,8 +839,14 @@ error:
 	return NULL;
 }
 
+#if BFS_HAS_STRTOFFLAGS
+#  define BFS_STRTOFFLAGS strtofflags
+#elif BFS_HAS_STRING_TO_FLAGS
+#  define BFS_STRTOFFLAGS string_to_flags
+#endif
+
 int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear) {
-#if BSD && !__GNU__
+#ifdef BFS_STRTOFFLAGS
 	char *str_arg = (char *)*str;
 
 #if __OpenBSD__
@@ -649,11 +857,7 @@ int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *
 	bfs_fflags_t set_arg = 0;
 	bfs_fflags_t clear_arg = 0;
 
-#if __NetBSD__
-	int ret = string_to_flags(&str_arg, &set_arg, &clear_arg);
-#else
-	int ret = strtofflags(&str_arg, &set_arg, &clear_arg);
-#endif
+	int ret = BFS_STRTOFFLAGS(&str_arg, &set_arg, &clear_arg);
 
 	*str = str_arg;
 	*set = set_arg;
@@ -663,47 +867,124 @@ int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *
 		errno = EINVAL;
 	}
 	return ret;
-#else // !BSD
+#else // !BFS_STRTOFFLAGS
 	errno = ENOTSUP;
 	return -1;
 #endif
 }
 
-size_t asciilen(const char *str) {
-	return asciinlen(str, strlen(str));
-}
+long xsysconf(int name) {
+#if __FreeBSD__ && __SANITIZE_MEMORY__
+	// Work around https://github.com/llvm/llvm-project/issues/88163
+	__msan_scoped_disable_interceptor_checks();
+#endif
 
-size_t asciinlen(const char *str, size_t n) {
-	size_t i = 0;
+	long ret = sysconf(name);
 
-#if SIZE_WIDTH % 8 == 0
-	// Word-at-a-time isascii()
-	for (size_t word; i + sizeof(word) <= n; i += sizeof(word)) {
-		memcpy(&word, str + i, sizeof(word));
+#if __FreeBSD__ && __SANITIZE_MEMORY__
+	__msan_scoped_enable_interceptor_checks();
+#endif
 
-		const size_t mask = (SIZE_MAX / 0xFF) << 7; // 0x808080...
-		word &= mask;
-		if (!word) {
-			continue;
+	return ret;
+}
+
+#if BFS_HAS_SCHED_GETAFFINITY
+/** Get the CPU count in an affinity mask of the given size. */
+static long bfs_sched_getaffinity(size_t size) {
+	cpu_set_t set, *pset = &set;
+
+	if (size > sizeof(set)) {
+		pset = malloc(size);
+		if (!pset) {
+			return -1;
 		}
+	}
+
+	long ret = -1;
+	if (sched_getaffinity(0, size, pset) == 0) {
+#  ifdef CPU_COUNT_S
+		ret = CPU_COUNT_S(size, pset);
+#  else
+		bfs_assert(size <= sizeof(set));
+		ret = CPU_COUNT(pset);
+# endif
+	}
+
+	if (pset != &set) {
+		free(pset);
+	}
+	return ret;
+}
+#endif
+
+long nproc(void) {
+	long ret = 0;
 
-#if ENDIAN_NATIVE == ENDIAN_BIG
-		word = bswap(word);
-#elif ENDIAN_NATIVE != ENDIAN_LITTLE
+#if BFS_HAS_SCHED_GETAFFINITY
+	size_t size = sizeof(cpu_set_t);
+	do {
+		ret = bfs_sched_getaffinity(size);
+
+#  ifdef CPU_COUNT_S
+		// On Linux, sched_getaffinity(2) says:
+		//
+		//     When working on systems with large kernel CPU affinity masks, one must
+		//     dynamically allocate the mask argument (see CPU_ALLOC(3)).  Currently,
+		//     the only way to do this is by probing for the size of the required mask
+		//     using sched_getaffinity() calls with increasing mask sizes (until the
+		//     call does not fail with the error EINVAL).
+		size *= 2;
+#  else
+		// No support for dynamically-sized CPU masks
 		break;
+#  endif
+	} while (ret < 0 && errno == EINVAL);
 #endif
 
-		size_t first = trailing_zeros(word) / 8;
-		return i + first;
+	if (ret < 1) {
+		ret = xsysconf(_SC_NPROCESSORS_ONLN);
 	}
-#endif
 
-	for (; i < n; ++i) {
-		if (!xisascii(str[i])) {
-			break;
-		}
+	if (ret < 1) {
+		ret = 1;
 	}
 
+	return ret;
+}
+
+size_t asciilen(const char *str) {
+	return asciinlen(str, strlen(str));
+}
+
+size_t asciinlen(const char *str, size_t n) {
+	const unsigned char *ustr = (const unsigned char *)str;
+	size_t i = 0;
+
+	// Word-at-a-time isascii()
+#define CHUNK(n) CHUNK_(uint##n##_t, load8_leu##n)
+#define CHUNK_(type, load8) \
+	(n - i >= sizeof(type)) { \
+		type word = load8(ustr + i); \
+		type mask = (((type)-1) / 0xFF) << 7; /* 0x808080.. */ \
+		word &= mask; \
+		i += trailing_zeros(word) / 8; \
+		if (word) { \
+			return i; \
+		} \
+	}
+
+#if SIZE_WIDTH >= 64
+	while CHUNK(64);
+	if CHUNK(32);
+#else
+	while CHUNK(32);
+#endif
+	if CHUNK(16);
+	if CHUNK(8);
+
+#undef CHUNK_
+#undef CHUNK
+
 	return i;
 }
 
@@ -905,14 +1186,14 @@ static char *dollar_quote(char *dest, char *end, const char *str, size_t len, en
 
 /** How much of this string is safe as a bare word? */
 static size_t bare_len(const char *str, size_t len) {
-	// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02
+	// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02
 	size_t ret = strcspn(str, "|&;<>()$`\\\"' *?[#~=%!{}");
 	return ret < len ? ret : len;
 }
 
 /** How much of this string is safe to double-quote? */
 static size_t quotable_len(const char *str, size_t len) {
-	// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_03
+	// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_03
 	size_t ret = strcspn(str, "`$\\\"!");
 	return ret < len ? ret : len;
 }
diff --git a/src/bfstd.h b/src/bfstd.h
index f91e380..15dd949 100644
--- a/src/bfstd.h
+++ b/src/bfstd.h
@@ -8,8 +8,8 @@
 #ifndef BFS_BFSTD_H
 #define BFS_BFSTD_H
 
-#include "prelude.h"
-#include "sanity.h"
+#include "bfs.h"
+
 #include <stddef.h>
 
 #include <ctype.h>
@@ -18,7 +18,7 @@
  * Work around https://github.com/llvm/llvm-project/issues/65532 by forcing a
  * function, not a macro, to be called.
  */
-#if __FreeBSD__ && SANITIZE_MEMORY
+#if __FreeBSD__ && __SANITIZE_MEMORY__
 #  define BFS_INTERCEPT(fn) (fn)
 #else
 #  define BFS_INTERCEPT(fn) fn
@@ -48,9 +48,9 @@
  * Check if an error code is "like" another one.  For example, ENOTDIR is
  * like ENOENT because they can both be triggered by non-existent paths.
  *
- * @param error
+ * @error
  *         The error code to check.
- * @param category
+ * @category
  *         The category to test for.  Known categories include ENOENT and
  *         ENAMETOOLONG.
  * @return
@@ -66,7 +66,7 @@ bool errno_is_like(int category);
 /**
  * Apply the "negative errno" convention.
  *
- * @param ret
+ * @ret
  *         The return value of the attempted operation.
  * @return
  *         ret, if non-negative, otherwise -errno.
@@ -106,7 +106,7 @@ int try(int ret);
 /**
  * Re-entrant dirname() variant that always allocates a copy.
  *
- * @param path
+ * @path
  *         The path in question.
  * @return
  *         The parent directory of the path.
@@ -116,7 +116,7 @@ char *xdirname(const char *path);
 /**
  * Re-entrant basename() variant that always allocates a copy.
  *
- * @param path
+ * @path
  *         The path in question.
  * @return
  *         The final component of the path.
@@ -126,7 +126,7 @@ char *xbasename(const char *path);
 /**
  * Find the offset of the final component of a path.
  *
- * @param path
+ * @path
  *         The path in question.
  * @return
  *         The offset of the basename.
@@ -138,9 +138,9 @@ size_t xbaseoff(const char *path);
 /**
  * fopen() variant that takes open() style flags.
  *
- * @param path
+ * @path
  *         The path to open.
- * @param flags
+ * @flags
  *         Flags to pass to open().
  */
 FILE *xfopen(const char *path, int flags);
@@ -148,9 +148,9 @@ FILE *xfopen(const char *path, int flags);
 /**
  * Convenience wrapper for getdelim().
  *
- * @param file
+ * @file
  *         The file to read.
- * @param delim
+ * @delim
  *         The delimiter character to split on.
  * @return
  *         The read chunk (without the delimiter), allocated with malloc().
@@ -169,6 +169,56 @@ char *xgetdelim(FILE *file, char delim);
 const char *xgetprogname(void);
 
 /**
+ * Like xstrtol(), but for short.
+ */
+int xstrtos(const char *str, char **end, int base, short *value);
+
+/**
+ * Like xstrtol(), but for int.
+ */
+int xstrtoi(const char *str, char **end, int base, int *value);
+
+/**
+ * Wrapper for strtol() that forbids leading spaces.
+ */
+int xstrtol(const char *str, char **end, int base, long *value);
+
+/**
+ * Wrapper for strtoll() that forbids leading spaces.
+ */
+int xstrtoll(const char *str, char **end, int base, long long *value);
+
+/**
+ * Like xstrtoul(), but for unsigned short.
+ */
+int xstrtous(const char *str, char **end, int base, unsigned short *value);
+
+/**
+ * Like xstrtoul(), but for unsigned int.
+ */
+int xstrtoui(const char *str, char **end, int base, unsigned int *value);
+
+/**
+ * Wrapper for strtoul() that forbids leading spaces, negatives.
+ */
+int xstrtoul(const char *str, char **end, int base, unsigned long *value);
+
+/**
+ * Wrapper for strtoull() that forbids leading spaces, negatives.
+ */
+int xstrtoull(const char *str, char **end, int base, unsigned long long *value);
+
+/**
+ * Wrapper for strtof() that forbids leading spaces.
+ */
+int xstrtof(const char *str, char **end, float *value);
+
+/**
+ * Wrapper for strtod() that forbids leading spaces.
+ */
+int xstrtod(const char *str, char **end, double *value);
+
+/**
  * Process a yes/no prompt.
  *
  * @return 1 for yes, 0 for no, and -1 for unknown.
@@ -185,9 +235,9 @@ size_t asciilen(const char *str);
 /**
  * Get the length of the pure-ASCII prefix of a string.
  *
- * @param str
+ * @str
  *         The string to check.
- * @param n
+ * @n
  *         The maximum prefix length.
  */
 size_t asciinlen(const char *str, size_t n);
@@ -195,9 +245,9 @@ size_t asciinlen(const char *str, size_t n);
 /**
  * Allocate a copy of a region of memory.
  *
- * @param src
+ * @src
  *         The memory region to copy.
- * @param size
+ * @size
  *         The size of the memory region.
  * @return
  *         A copy of the region, allocated with malloc(), or NULL on failure.
@@ -207,12 +257,12 @@ void *xmemdup(const void *src, size_t size);
 /**
  * A nice string copying function.
  *
- * @param dest
+ * @dest
  *         The NUL terminator of the destination string, or `end` if it is
  *         already truncated.
- * @param end
+ * @end
  *         The end of the destination buffer.
- * @param src
+ * @src
  *         The string to copy from.
  * @return
  *         The new NUL terminator of the destination, or `end` on truncation.
@@ -222,14 +272,14 @@ char *xstpecpy(char *dest, char *end, const char *src);
 /**
  * A nice string copying function.
  *
- * @param dest
+ * @dest
  *         The NUL terminator of the destination string, or `end` if it is
  *         already truncated.
- * @param end
+ * @end
  *         The end of the destination buffer.
- * @param src
+ * @src
  *         The string to copy from.
- * @param n
+ * @n
  *         The maximum number of characters to copy.
  * @return
  *         The new NUL terminator of the destination, or `end` on truncation.
@@ -239,7 +289,7 @@ char *xstpencpy(char *dest, char *end, const char *src, size_t n);
 /**
  * Thread-safe strerror().
  *
- * @param errnum
+ * @errnum
  *         An error number.
  * @return
  *         A string describing that error, which remains valid until the next
@@ -248,11 +298,16 @@ char *xstpencpy(char *dest, char *end, const char *src, size_t n);
 const char *xstrerror(int errnum);
 
 /**
+ * Shorthand for xstrerror(errno).
+ */
+const char *errstr(void);
+
+/**
  * Format a mode like ls -l (e.g. -rw-r--r--).
  *
- * @param mode
+ * @mode
  *         The mode to format.
- * @param str
+ * @str
  *         The string to hold the formatted mode.
  */
 void xstrmode(mode_t mode, char str[11]);
@@ -307,12 +362,35 @@ int xminor(dev_t dev);
  */
 pid_t xwaitpid(pid_t pid, int *status, int flags);
 
+#include <sys/ioctl.h> // May be necessary for struct winsize
+#include <termios.h>
+
+/**
+ * Open the controlling terminal.
+ *
+ * @flags
+ *         The open() flags.
+ * @return
+ *         An open file descriptor, or -1 on failure.
+ */
+int open_cterm(int flags);
+
+/**
+ * tcgetwinsize()/ioctl(TIOCGWINSZ) wrapper.
+ */
+int xtcgetwinsize(int fd, struct winsize *ws);
+
+/**
+ * tcsetwinsize()/ioctl(TIOCSWINSZ) wrapper.
+ */
+int xtcsetwinsize(int fd, const struct winsize *ws);
+
 // #include <unistd.h>
 
 /**
  * Like dup(), but set the FD_CLOEXEC flag.
  *
- * @param fd
+ * @fd
  *         The file descriptor to duplicate.
  * @return
  *         A duplicated file descriptor, or -1 on failure.
@@ -322,7 +400,7 @@ int dup_cloexec(int fd);
 /**
  * Like pipe(), but set the FD_CLOEXEC flag.
  *
- * @param pipefd
+ * @pipefd
  *         The array to hold the two file descriptors.
  * @return
  *         0 on success, -1 on failure.
@@ -344,14 +422,14 @@ size_t xread(int fd, void *buf, size_t nbytes);
  * writes.
  *
  * @return
-           The number of bytes written.  A value != nbytes indicates an error.
+ *         The number of bytes written.  A value != nbytes indicates an error.
  */
 size_t xwrite(int fd, const void *buf, size_t nbytes);
 
 /**
  * close() variant that preserves errno.
  *
- * @param fd
+ * @fd
  *         The file descriptor to close.
  */
 void close_quietly(int fd);
@@ -359,7 +437,7 @@ void close_quietly(int fd);
 /**
  * close() wrapper that asserts the file descriptor is valid.
  *
- * @param fd
+ * @fd
  *         The file descriptor to close.
  * @return
  *         0 on success, or -1 on error.
@@ -374,11 +452,11 @@ int xfaccessat(int fd, const char *path, int amode);
 /**
  * readlinkat() wrapper that dynamically allocates the result.
  *
- * @param fd
+ * @fd
  *         The base directory descriptor.
- * @param path
+ * @path
  *         The path to the link, relative to fd.
- * @param size
+ * @size
  *         An estimate for the size of the link name (pass 0 if unknown).
  * @return
  *         The target of the link, allocated with malloc(), or NULL on failure.
@@ -388,7 +466,7 @@ char *xreadlinkat(int fd, const char *path, size_t size);
 /**
  * Wrapper for confstr() that allocates with malloc().
  *
- * @param name
+ * @name
  *         The ID of the confstr to look up.
  * @return
  *         The value of the confstr, or NULL on failure.
@@ -398,30 +476,53 @@ char *xconfstr(int name);
 /**
  * Portability wrapper for strtofflags().
  *
- * @param str
+ * @str
  *         The string to parse.  The pointee will be advanced to the first
  *         invalid position on error.
- * @param set
+ * @set
  *         The flags that are set in the string.
- * @param clear
+ * @clear
  *         The flags that are cleared in the string.
  * @return
  *         0 on success, -1 on failure.
  */
 int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear);
 
+/**
+ * Wrapper for sysconf() that works around an MSan bug.
+ */
+long xsysconf(int name);
+
+/**
+ * Check for a POSIX option[1] at runtime.
+ *
+ * [1]: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap02.html#tag_02_01_06
+ *
+ * @name
+ *         The symbolic name of the POSIX option (e.g. SPAWN).
+ * @return
+ *         The value of the option, either -1 or a date like 202405.
+ */
+#define sysoption(name) \
+	(_POSIX_##name == 0 ? xsysconf(_SC_##name) : _POSIX_##name)
+
+/**
+ * Get the number of CPU threads available to the current process.
+ */
+long nproc(void);
+
 #include <wchar.h>
 
 /**
  * Error-recovering mbrtowc() wrapper.
  *
- * @param str
+ * @str
  *         The string to convert.
- * @param i
+ * @i
  *         The current index.
- * @param len
+ * @len
  *         The length of the string.
- * @param mb
+ * @mb
  *         The multi-byte decoding state.
  * @return
  *         The wide character at index *i, or WEOF if decoding fails.  In either
@@ -432,7 +533,7 @@ wint_t xmbrtowc(const char *str, size_t *i, size_t len, mbstate_t *mb);
 /**
  * wcswidth() variant that works on narrow strings.
  *
- * @param str
+ * @str
  *         The string to measure.
  * @return
  *         The likely width of that string in a terminal.
@@ -484,13 +585,13 @@ enum wesc_flags {
 /**
  * Escape a string as a single shell word.
  *
- * @param dest
+ * @dest
  *         The destination string to fill.
- * @param end
+ * @end
  *         The end of the destination buffer.
- * @param src
+ * @src
  *         The string to escape.
- * @param flags
+ * @flags
  *         Controls which characters to escape.
  * @return
  *         The new NUL terminator of the destination, or `end` on truncation.
@@ -500,15 +601,15 @@ char *wordesc(char *dest, char *end, const char *str, enum wesc_flags flags);
 /**
  * Escape a string as a single shell word.
  *
- * @param dest
+ * @dest
  *         The destination string to fill.
- * @param end
+ * @end
  *         The end of the destination buffer.
- * @param src
+ * @src
  *         The string to escape.
- * @param n
+ * @n
  *         The maximum length of the string.
- * @param flags
+ * @flags
  *         Controls which characters to escape.
  * @return
  *         The new NUL terminator of the destination, or `end` on truncation.
diff --git a/src/bftw.c b/src/bftw.c
index c4d3c17..0ca6f34 100644
--- a/src/bftw.c
+++ b/src/bftw.c
@@ -18,9 +18,10 @@
  *   various helper functions to take fewer parameters.
  */
 
-#include "prelude.h"
 #include "bftw.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "diag.h"
 #include "dir.h"
@@ -30,6 +31,7 @@
 #include "mtab.h"
 #include "stat.h"
 #include "trie.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
@@ -251,7 +253,7 @@ struct bftw_file {
 	/** The length of the file's name. */
 	size_t namelen;
 	/** The file's name. */
-	char name[];
+	char name[]; // _counted_by(namelen + 1)
 };
 
 /**
@@ -446,7 +448,7 @@ static void bftw_queue_rebalance(struct bftw_queue *queue, bool async) {
 	}
 }
 
-/** Detatch the next waiting file. */
+/** Detach the next waiting file. */
 static void bftw_queue_detach(struct bftw_queue *queue, struct bftw_file *file, bool async) {
 	bfs_assert(!file->ioqueued);
 
@@ -913,7 +915,7 @@ static int bftw_state_init(struct bftw_state *state, const struct bftw_args *arg
 	size_t qdepth = 4096;
 	size_t nthreads = args->nthreads;
 
-#if BFS_USE_LIBURING
+#if BFS_WITH_LIBURING
 	// io_uring uses one fd per ring, ioq uses one ring per thread
 	if (nthreads >= nopenfd - 1) {
 		nthreads = nopenfd - 2;
@@ -1006,6 +1008,7 @@ static int bftw_ioq_pop(struct bftw_state *state, bool block) {
 		return -1;
 	}
 
+	ioq_submit(ioq);
 	struct ioq_ent *ent = ioq_pop(ioq, block);
 	if (!ent) {
 		return -1;
@@ -1049,6 +1052,10 @@ static int bftw_ioq_pop(struct bftw_state *state, bool block) {
 
 		bftw_queue_attach(&state->fileq, file, true);
 		break;
+
+	default:
+		bfs_bug("Unexpected ioq op %d", (int)op);
+		break;
 	}
 
 	ioq_free(ioq, ent);
@@ -1160,12 +1167,13 @@ static int bftw_file_open(struct bftw_state *state, struct bftw_file *file, cons
 	struct bftw_list parents;
 	SLIST_INIT(&parents);
 
-	struct bftw_file *cur;
-	for (cur = file; cur != base; cur = cur->parent) {
+	// Reverse the chain of parents
+	for (struct bftw_file *cur = file; cur != base; cur = cur->parent) {
 		SLIST_PREPEND(&parents, cur);
 	}
 
-	while ((cur = SLIST_POP(&parents))) {
+	// Open each component relative to its parent
+	drain_slist (struct bftw_file, cur, &parents) {
 		if (!cur->parent || cur->parent->fd >= 0) {
 			bftw_file_openat(state, cur, cur->parent, cur->name);
 		}
@@ -1281,8 +1289,8 @@ static int bftw_pin_parent(struct bftw_state *state, struct bftw_file *file) {
 
 	int fd = parent->fd;
 	if (fd < 0) {
-		bfs_static_assert(AT_FDCWD != -1);
-		return -1;
+		// Don't confuse failures with AT_FDCWD
+		return (int)AT_FDCWD == -1 ? -2 : -1;
 	}
 
 	bftw_cache_pin(&state->cache, parent);
@@ -1298,7 +1306,7 @@ static int bftw_ioq_opendir(struct bftw_state *state, struct bftw_file *file) {
 	}
 
 	int dfd = bftw_pin_parent(state, file);
-	if (dfd < 0 && dfd != AT_FDCWD) {
+	if (dfd < 0 && dfd != (int)AT_FDCWD) {
 		goto fail;
 	}
 
@@ -1431,7 +1439,7 @@ static bool bftw_must_stat(const struct bftw_state *state, size_t depth, enum bf
 		if (!(bftw_stat_flags(state, depth) & BFS_STAT_NOFOLLOW)) {
 			return true;
 		}
-		fallthru;
+		_fallthrough;
 
 	default:
 #if __linux__
@@ -1450,7 +1458,7 @@ static int bftw_ioq_stat(struct bftw_state *state, struct bftw_file *file) {
 	}
 
 	int dfd = bftw_pin_parent(state, file);
-	if (dfd < 0 && dfd != AT_FDCWD) {
+	if (dfd < 0 && dfd != (int)AT_FDCWD) {
 		goto fail;
 	}
 
@@ -1477,7 +1485,8 @@ fail:
 
 /** Check if we should stat() a file asynchronously. */
 static bool bftw_should_ioq_stat(struct bftw_state *state, struct bftw_file *file) {
-	// To avoid surprising users too much, process the roots in order
+	// POSIX wants the root paths to be processed in order
+	// See https://www.austingroupbugs.net/view.php?id=1859
 	if (file->depth == 0) {
 		return false;
 	}
@@ -1529,11 +1538,28 @@ static bool bftw_pop_file(struct bftw_state *state) {
 	return bftw_pop(state, &state->fileq);
 }
 
+/** Add a path component to the path. */
+static void bftw_prepend_path(char *path, size_t nameoff, size_t namelen, const char *name) {
+	if (nameoff > 0) {
+		path[nameoff - 1] = '/';
+	}
+	memcpy(path + nameoff, name, namelen);
+}
+
 /** Build the path to the current file. */
 static int bftw_build_path(struct bftw_state *state, const char *name) {
 	const struct bftw_file *file = state->file;
 
-	size_t pathlen = file ? file->nameoff + file->namelen : 0;
+	size_t nameoff, namelen;
+	if (name) {
+		nameoff = file ? bftw_child_nameoff(file) : 0;
+		namelen = strlen(name);
+	} else {
+		nameoff = file->nameoff;
+		namelen = file->namelen;
+	}
+
+	size_t pathlen = nameoff + namelen;
 	if (dstresize(&state->path, pathlen) != 0) {
 		state->error = errno;
 		return -1;
@@ -1546,11 +1572,11 @@ static int bftw_build_path(struct bftw_state *state, const char *name) {
 	}
 
 	// Build the path backwards
+	if (name) {
+		bftw_prepend_path(state->path, nameoff, namelen, name);
+	}
 	while (file && file != ancestor) {
-		if (file->nameoff > 0) {
-			state->path[file->nameoff - 1] = '/';
-		}
-		memcpy(state->path + file->nameoff, file->name, file->namelen);
+		bftw_prepend_path(state->path, file->nameoff, file->namelen, file->name);
 
 		if (ancestor && ancestor->depth == file->depth) {
 			ancestor = ancestor->parent;
@@ -1559,20 +1585,6 @@ static int bftw_build_path(struct bftw_state *state, const char *name) {
 	}
 
 	state->previous = state->file;
-
-	if (name) {
-		if (pathlen > 0 && state->path[pathlen - 1] != '/') {
-			if (dstrapp(&state->path, '/') != 0) {
-				state->error = errno;
-				return -1;
-			}
-		}
-		if (dstrcat(&state->path, name) != 0) {
-			state->error = errno;
-			return -1;
-		}
-	}
-
 	return 0;
 }
 
@@ -1676,6 +1688,7 @@ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) {
 	ftwbuf->visit = visit;
 	ftwbuf->type = BFS_UNKNOWN;
 	ftwbuf->error = state->direrror;
+	ftwbuf->loopoff = 0;
 	ftwbuf->at_fd = AT_FDCWD;
 	ftwbuf->at_path = ftwbuf->path;
 	bftw_stat_init(&ftwbuf->stat_bufs, &state->stat_buf, &state->lstat_buf);
@@ -1733,6 +1746,7 @@ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) {
 			if (ancestor->dev == statbuf->dev && ancestor->ino == statbuf->ino) {
 				ftwbuf->type = BFS_ERROR;
 				ftwbuf->error = ELOOP;
+				ftwbuf->loopoff = ancestor->nameoff + ancestor->namelen;
 				return;
 			}
 		}
@@ -1863,8 +1877,8 @@ static int bftw_gc(struct bftw_state *state, enum bftw_gc_flags flags) {
 	}
 	state->direrror = 0;
 
-	while ((file = SLIST_POP(&state->to_close, ready))) {
-		bftw_unwrapdir(state, file);
+	drain_slist (struct bftw_file, dead, &state->to_close, ready) {
+		bftw_unwrapdir(state, dead);
 	}
 
 	enum bftw_gc_flags visit = BFTW_VISIT_FILE;
@@ -1945,6 +1959,10 @@ static void bftw_flush(struct bftw_state *state) {
 
 	bftw_queue_flush(&state->dirq);
 	bftw_ioq_opendirs(state);
+
+	if (state->ioq) {
+		ioq_submit(state->ioq);
+	}
 }
 
 /** Close the current directory. */
diff --git a/src/bftw.h b/src/bftw.h
index 8656ca7..8b3ed7f 100644
--- a/src/bftw.h
+++ b/src/bftw.h
@@ -10,6 +10,7 @@
 
 #include "dir.h"
 #include "stat.h"
+
 #include <stddef.h>
 
 /**
@@ -56,6 +57,8 @@ struct BFTW {
 	enum bfs_type type;
 	/** The errno that occurred, if type == BFS_ERROR. */
 	int error;
+	/** For filesystem loops, the length of the loop prefix. */
+	size_t loopoff;
 
 	/** A parent file descriptor for the *at() family of calls. */
 	int at_fd;
@@ -72,9 +75,9 @@ struct BFTW {
  * Get bfs_stat() info for a file encountered during bftw(), caching the result
  * whenever possible.
  *
- * @param ftwbuf
+ * @ftwbuf
  *         bftw() data for the file to stat.
- * @param flags
+ * @flags
  *         flags for bfs_stat().  Pass ftwbuf->stat_flags for the default flags.
  * @return
  *         A pointer to a bfs_stat() buffer, or NULL if the call failed.
@@ -85,9 +88,9 @@ const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags
  * Get bfs_stat() info for a file encountered during bftw(), if it has already
  * been cached.
  *
- * @param ftwbuf
+ * @ftwbuf
  *         bftw() data for the file to stat.
- * @param flags
+ * @flags
  *         flags for bfs_stat().  Pass ftwbuf->stat_flags for the default flags.
  * @return
  *         A pointer to a bfs_stat() buffer, or NULL if no stat info is cached.
@@ -99,9 +102,9 @@ const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat
  * whether to follow links.  This function will avoid calling bfs_stat() if
  * possible.
  *
- * @param ftwbuf
+ * @ftwbuf
  *         bftw() data for the file to check.
- * @param flags
+ * @flags
  *         flags for bfs_stat().  Pass ftwbuf->stat_flags for the default flags.
  * @return
  *         The type of the file, or BFS_ERROR if an error occurred.
@@ -123,9 +126,9 @@ enum bftw_action {
 /**
  * Callback function type for bftw().
  *
- * @param ftwbuf
+ * @ftwbuf
  *         Data about the current file.
- * @param ptr
+ * @ptr
  *         The pointer passed to bftw().
  * @return
  *         An action value.
@@ -208,7 +211,7 @@ struct bftw_args {
  * Like ftw(3) and nftw(3), this function walks a directory tree recursively,
  * and invokes a callback for each path it encounters.
  *
- * @param args
+ * @args
  *         The arguments that control the walk.
  * @return
  *         0 on success, or -1 on failure.
diff --git a/src/bit.h b/src/bit.h
index 17cfbcf..5d6fb9d 100644
--- a/src/bit.h
+++ b/src/bit.h
@@ -8,11 +8,12 @@
 #ifndef BFS_BIT_H
 #define BFS_BIT_H
 
-#include "prelude.h"
+#include "bfs.h"
+
 #include <limits.h>
 #include <stdint.h>
 
-#if __STDC_VERSION__ >= C23
+#if __has_include(<stdbit.h>)
 #  include <stdbit.h>
 #endif
 
@@ -147,7 +148,7 @@
 #  define INTMAX_WIDTH UINTMAX_WIDTH
 #endif
 
-// C23 polyfill: byte order
+// N3022 polyfill: byte order
 
 #ifdef __STDC_ENDIAN_LITTLE__
 #  define ENDIAN_LITTLE __STDC_ENDIAN_LITTLE__
@@ -173,11 +174,7 @@
 #  define ENDIAN_NATIVE 0
 #endif
 
-#if __STDC_VERSION__ >= C23
-#  define bswap_u16 stdc_memreverse8u16
-#  define bswap_u32 stdc_memreverse8u32
-#  define bswap_u64 stdc_memreverse8u64
-#elif __GNUC__
+#if __GNUC__
 #  define bswap_u16 __builtin_bswap16
 #  define bswap_u32 __builtin_bswap32
 #  define bswap_u64 __builtin_bswap64
@@ -201,15 +198,35 @@ static inline uint8_t bswap_u8(uint8_t n) {
 	return n;
 }
 
-/**
- * Reverse the byte order of an integer.
- */
-#define bswap(n) \
-	_Generic((n), \
-		uint8_t: bswap_u8, \
-		uint16_t: bswap_u16, \
-		uint32_t: bswap_u32, \
-		uint64_t: bswap_u64)(n)
+#if UCHAR_WIDTH == 8
+#  define bswap_uc bswap_u8
+#endif
+
+#if USHRT_WIDTH == 16
+#  define bswap_us bswap_u16
+#elif USHRT_WIDTH == 32
+#  define bswap_us bswap_u32
+#elif USHRT_WIDTH == 64
+#  define bswap_us bswap_u64
+#endif
+
+#if UINT_WIDTH == 16
+#  define bswap_ui bswap_u16
+#elif UINT_WIDTH == 32
+#  define bswap_ui bswap_u32
+#elif UINT_WIDTH == 64
+#  define bswap_ui bswap_u64
+#endif
+
+#if ULONG_WIDTH == 32
+#  define bswap_ul bswap_u32
+#elif ULONG_WIDTH == 64
+#  define bswap_ul bswap_u64
+#endif
+
+#if ULLONG_WIDTH == 64
+#  define bswap_ull bswap_u64
+#endif
 
 // Define an overload for each unsigned type
 #define UINT_OVERLOADS(macro) \
@@ -222,25 +239,74 @@ static inline uint8_t bswap_u8(uint8_t n) {
 // Select an overload based on an unsigned integer type
 #define UINT_SELECT(n, name) \
 	_Generic((n), \
-		char:               name##_uc, \
-		signed char:        name##_uc, \
 		unsigned char:      name##_uc, \
-		signed short:       name##_us, \
 		unsigned short:     name##_us, \
-		signed int:         name##_ui, \
 		unsigned int:       name##_ui, \
-		signed long:        name##_ul, \
 		unsigned long:      name##_ul, \
-		signed long long:   name##_ull, \
 		unsigned long long: name##_ull)
 
+/**
+ * Reverse the byte order of an integer.
+ */
+#define bswap(n) UINT_SELECT(n, bswap)(n)
+
+#define LOAD8_LEU8(ptr, i, n) ((uint##n##_t)((const unsigned char *)ptr)[(i) / 8] << (i))
+#define LOAD8_BEU8(ptr, i, n) ((uint##n##_t)((const unsigned char *)ptr)[(i) / 8] << (n - (i) - 8))
+
+/** Load a little-endian 8-bit word. */
+static inline uint8_t load8_leu8(const void *ptr) {
+	return LOAD8_LEU8(ptr, 0, 8);
+}
+
+/** Load a big-endian 8-bit word. */
+static inline uint8_t load8_beu8(const void *ptr) {
+	return LOAD8_BEU8(ptr, 0, 8);
+}
+
+#define LOAD8_LEU16(ptr, i, n) (LOAD8_LEU8(ptr, i, n) | LOAD8_LEU8(ptr, i + 8, n))
+#define LOAD8_BEU16(ptr, i, n) (LOAD8_BEU8(ptr, i, n) | LOAD8_BEU8(ptr, i + 8, n))
+
+/** Load a little-endian 16-bit word. */
+static inline uint16_t load8_leu16(const void *ptr) {
+	return LOAD8_LEU16(ptr, 0, 16);
+}
+
+/** Load a big-endian 16-bit word. */
+static inline uint16_t load8_beu16(const void *ptr) {
+	return LOAD8_BEU16(ptr, 0, 16);
+}
+
+#define LOAD8_LEU32(ptr, i, n) (LOAD8_LEU16(ptr, i, n) | LOAD8_LEU16(ptr, i + 16, n))
+#define LOAD8_BEU32(ptr, i, n) (LOAD8_BEU16(ptr, i, n) | LOAD8_BEU16(ptr, i + 16, n))
+
+/** Load a little-endian 32-bit word. */
+static inline uint32_t load8_leu32(const void *ptr) {
+	return LOAD8_LEU32(ptr, 0, 32);
+}
+
+/** Load a big-endian 32-bit word. */
+static inline uint32_t load8_beu32(const void *ptr) {
+	return LOAD8_BEU32(ptr, 0, 32);
+}
+
+#define LOAD8_LEU64(ptr, i, n) (LOAD8_LEU32(ptr, i, n) | LOAD8_LEU32(ptr, i + 32, n))
+#define LOAD8_BEU64(ptr, i, n) (LOAD8_BEU32(ptr, i, n) | LOAD8_BEU32(ptr, i + 32, n))
+
+/** Load a little-endian 64-bit word. */
+static inline uint64_t load8_leu64(const void *ptr) {
+	return LOAD8_LEU64(ptr, 0, 64);
+}
+
+/** Load a big-endian 64-bit word. */
+static inline uint64_t load8_beu64(const void *ptr) {
+	return LOAD8_BEU64(ptr, 0, 64);
+}
+
 // C23 polyfill: bit utilities
 
-#if __STDC_VERSION__ >= C23
+#if __STDC_VERSION_STDBIT_H__ >= C23
 #  define count_ones stdc_count_ones
 #  define count_zeros stdc_count_zeros
-#  define rotate_left stdc_rotate_left
-#  define rotate_right stdc_rotate_right
 #  define leading_zeros stdc_leading_zeros
 #  define leading_ones stdc_leading_ones
 #  define trailing_zeros stdc_trailing_zeros
@@ -273,31 +339,31 @@ static inline uint8_t bswap_u8(uint8_t n) {
 #define BUILTIN_WIDTH(suffix) BUILTIN_WIDTH##suffix
 
 #define COUNT_ONES(type, suffix, width) \
-	static inline int count_ones##suffix(type n) { \
+	static inline unsigned int count_ones##suffix(type n) { \
 		return UINT_BUILTIN(popcount, suffix)(n); \
 	}
 
 #define LEADING_ZEROS(type, suffix, width) \
-	static inline int leading_zeros##suffix(type n) { \
+	static inline unsigned int leading_zeros##suffix(type n) { \
 	        return n \
 			? UINT_BUILTIN(clz, suffix)(n) - (BUILTIN_WIDTH(suffix) - width) \
 			: width; \
 	}
 
 #define TRAILING_ZEROS(type, suffix, width) \
-	static inline int trailing_zeros##suffix(type n) { \
+	static inline unsigned int trailing_zeros##suffix(type n) { \
 		return n ? UINT_BUILTIN(ctz, suffix)(n) : (int)width; \
 	}
 
 #define FIRST_TRAILING_ONE(type, suffix, width) \
-	static inline int first_trailing_one##suffix(type n) { \
+	static inline unsigned int first_trailing_one##suffix(type n) { \
 		return UINT_BUILTIN(ffs, suffix)(n); \
 	}
 
 #else // !__GNUC__
 
 #define COUNT_ONES(type, suffix, width) \
-	static inline int count_ones##suffix(type n) { \
+	static inline unsigned int count_ones##suffix(type n) { \
 		int ret; \
 		for (ret = 0; n; ++ret) { \
 			n &= n - 1; \
@@ -306,7 +372,7 @@ static inline uint8_t bswap_u8(uint8_t n) {
 	}
 
 #define LEADING_ZEROS(type, suffix, width) \
-	static inline int leading_zeros##suffix(type n) { \
+	static inline unsigned int leading_zeros##suffix(type n) { \
 		type bit = (type)1 << (width - 1); \
 		int ret; \
 		for (ret = 0; bit && !(n & bit); ++ret, bit >>= 1); \
@@ -314,7 +380,7 @@ static inline uint8_t bswap_u8(uint8_t n) {
 	}
 
 #define TRAILING_ZEROS(type, suffix, width) \
-	static inline int trailing_zeros##suffix(type n) { \
+	static inline unsigned int trailing_zeros##suffix(type n) { \
 		type bit = 1; \
 		int ret; \
 		for (ret = 0; bit && !(n & bit); ++ret, bit <<= 1); \
@@ -322,7 +388,7 @@ static inline uint8_t bswap_u8(uint8_t n) {
 	}
 
 #define FIRST_TRAILING_ONE(type, suffix, width) \
-	static inline int first_trailing_one##suffix(type n) { \
+	static inline unsigned int first_trailing_one##suffix(type n) { \
 		return n ? trailing_zeros##suffix(n) + 1 : 0; \
 	}
 
@@ -333,19 +399,9 @@ UINT_OVERLOADS(LEADING_ZEROS)
 UINT_OVERLOADS(TRAILING_ZEROS)
 UINT_OVERLOADS(FIRST_TRAILING_ONE)
 
-#define ROTATE_LEFT(type, suffix, width) \
-	static inline type rotate_left##suffix(type n, int c) { \
-		return (n << c) | (n >> ((width - c) % width)); \
-	}
-
-#define ROTATE_RIGHT(type, suffix, width) \
-	static inline type rotate_right##suffix(type n, int c) { \
-		return (n >> c) | (n << ((width - c) % width)); \
-	}
-
 #define FIRST_LEADING_ONE(type, suffix, width) \
-	static inline int first_leading_one##suffix(type n) { \
-		return width - leading_zeros##suffix(n); \
+	static inline unsigned int first_leading_one##suffix(type n) { \
+		return n ? leading_zeros##suffix(n) + 1 : 0; \
 	}
 
 #define HAS_SINGLE_BIT(type, suffix, width) \
@@ -354,17 +410,30 @@ UINT_OVERLOADS(FIRST_TRAILING_ONE)
 		return n - 1 < (n ^ (n - 1)); \
 	}
 
-UINT_OVERLOADS(ROTATE_LEFT)
-UINT_OVERLOADS(ROTATE_RIGHT)
+#define BIT_WIDTH(type, suffix, width) \
+	static inline unsigned int bit_width##suffix(type n) { \
+		return width - leading_zeros##suffix(n); \
+	}
+
+#define BIT_FLOOR(type, suffix, width) \
+	static inline type bit_floor##suffix(type n) { \
+		return n ? (type)1 << (bit_width##suffix(n) - 1) : 0; \
+	}
+
+#define BIT_CEIL(type, suffix, width) \
+	static inline type bit_ceil##suffix(type n) { \
+		return (type)1 << bit_width##suffix(n - !!n); \
+	}
+
 UINT_OVERLOADS(FIRST_LEADING_ONE)
 UINT_OVERLOADS(HAS_SINGLE_BIT)
+UINT_OVERLOADS(BIT_WIDTH)
+UINT_OVERLOADS(BIT_FLOOR)
+UINT_OVERLOADS(BIT_CEIL)
 
 #define count_ones(n) UINT_SELECT(n, count_ones)(n)
 #define count_zeros(n) UINT_SELECT(n, count_ones)(~(n))
 
-#define rotate_left(n, c) UINT_SELECT(n, rotate_left)(n, c)
-#define rotate_right(n, c) UINT_SELECT(n, rotate_right)(n, c)
-
 #define leading_zeros(n) UINT_SELECT(n, leading_zeros)(n)
 #define leading_ones(n) UINT_SELECT(n, leading_zeros)(~(n))
 
@@ -379,23 +448,26 @@ UINT_OVERLOADS(HAS_SINGLE_BIT)
 
 #define has_single_bit(n) UINT_SELECT(n, has_single_bit)(n)
 
-#define BIT_FLOOR(type, suffix, width) \
-	static inline type bit_floor##suffix(type n) { \
-		return n ? (type)1 << (first_leading_one##suffix(n) - 1) : 0; \
-	}
+#define bit_width(n) UINT_SELECT(n, bit_width)(n)
+#define bit_floor(n) UINT_SELECT(n, bit_floor)(n)
+#define bit_ceil(n) UINT_SELECT(n, bit_ceil)(n)
 
-#define BIT_CEIL(type, suffix, width) \
-	static inline type bit_ceil##suffix(type n) { \
-		return (type)1 << first_leading_one##suffix(n - !!n); \
+#endif // __STDC_VERSION_STDBIT_H__ < C23
+
+#define ROTATE_LEFT(type, suffix, width) \
+	static inline type rotate_left##suffix(type n, int c) { \
+		return (n << c) | (n >> ((width - c) % width)); \
 	}
 
-UINT_OVERLOADS(BIT_FLOOR)
-UINT_OVERLOADS(BIT_CEIL)
+#define ROTATE_RIGHT(type, suffix, width) \
+	static inline type rotate_right##suffix(type n, int c) { \
+		return (n >> c) | (n << ((width - c) % width)); \
+	}
 
-#define bit_width(n) first_leading_one(n)
-#define bit_floor(n) UINT_SELECT(n, bit_floor)(n)
-#define bit_ceil(n) UINT_SELECT(n, bit_ceil)(n)
+UINT_OVERLOADS(ROTATE_LEFT)
+UINT_OVERLOADS(ROTATE_RIGHT)
 
-#endif // __STDC_VERSION__ < C23
+#define rotate_left(n, c) UINT_SELECT(n, rotate_left)(n, c)
+#define rotate_right(n, c) UINT_SELECT(n, rotate_right)(n, c)
 
 #endif // BFS_BIT_H
diff --git a/src/color.c b/src/color.c
index f004bf2..a026831 100644
--- a/src/color.c
+++ b/src/color.c
@@ -1,9 +1,10 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "color.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "bftw.h"
 #include "diag.h"
@@ -13,6 +14,7 @@
 #include "fsade.h"
 #include "stat.h"
 #include "trie.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
@@ -29,8 +31,8 @@
 struct esc_seq {
 	/** The length of the escape sequence. */
 	size_t len;
-	/** The escape sequence iteself, without a terminating NUL. */
-	char seq[];
+	/** The escape sequence itself, without a terminating NUL. */
+	char seq[] _counted_by(len);
 };
 
 /**
@@ -46,7 +48,7 @@ struct ext_color {
 	/** Whether the comparison should be case-sensitive. */
 	bool case_sensitive;
 	/** The extension to match (NUL-terminated). */
-	char ext[];
+	char ext[]; // _counted_by(len + 1);
 };
 
 struct colors {
@@ -141,13 +143,7 @@ static int init_esc(struct colors *colors, const char *name, const char *value,
 
 	*field = esc;
 
-	struct trie_leaf *leaf = trie_insert_str(&colors->names, name);
-	if (!leaf) {
-		return -1;
-	}
-
-	leaf->value = field;
-	return 0;
+	return trie_set_str(&colors->names, name, field);
 }
 
 /** Check if an escape sequence is equal to a string. */
@@ -157,12 +153,16 @@ static bool esc_eq(const struct esc_seq *esc, const char *str, size_t len) {
 
 /** Get an escape sequence from the table. */
 static struct esc_seq **get_esc(const struct colors *colors, const char *name) {
-	const struct trie_leaf *leaf = trie_find_str(&colors->names, name);
-	return leaf ? leaf->value : NULL;
+	return trie_get_str(&colors->names, name);
+}
+
+/** Append an escape sequence to a string. */
+static int cat_esc(dchar **dstr, const struct esc_seq *seq) {
+	return dstrxcat(dstr, seq->seq, seq->len);
 }
 
 /** Set a named escape sequence. */
-static int set_esc(struct colors *colors, const char *name, char *value) {
+static int set_esc(struct colors *colors, const char *name, dchar *value) {
 	struct esc_seq **field = get_esc(colors, name);
 	if (!field) {
 		return 0;
@@ -209,58 +209,31 @@ static void ext_tolower(char *ext, size_t len) {
 	}
 }
 
-/**
- * The "smart case" algorithm.
- *
- * @param ext
- *         The current extension being added.
- * @param prev
- *         The previous case-sensitive match, if any, for the same extension.
- * @param iprev
- *         The previous case-insensitive match, if any, for the same extension.
- * @return
- *         Whether this extension should become case-sensitive.
- */
-static bool ext_case_sensitive(struct ext_color *ext, struct ext_color *prev, struct ext_color *iprev) {
-	// This is the first case-insensitive occurrence of this extension, e.g.
-	//
-	//     *.gz=01;31:*.tar.gz=01;33
-	if (!iprev) {
-		bfs_assert(!prev);
-		return false;
-	}
-
-	// If the last version of this extension is already case-sensitive,
-	// this one should be too, e.g.
-	//
-	//     *.tar.gz=01;31:*.TAR.GZ=01;32:*.TAR.GZ=01;33
-	if (iprev->case_sensitive) {
-		return true;
-	}
-
-	// The case matches the last occurrence exactly, e.g.
-	//
-	//     *.tar.gz=01;31:*.tar.gz=01;33
-	if (iprev == prev) {
-		return false;
-	}
-
-	// Different case, but same value, e.g.
-	//
-	//     *.tar.gz=01;31:*.TAR.GZ=01;31
-	if (esc_eq(iprev->esc, ext->esc->seq, ext->esc->len)) {
-		return false;
+/** Insert an extension into a trie. */
+static int insert_ext(struct trie *trie, struct ext_color *ext) {
+	// A later *.x should override any earlier *.x, *.y.x, etc.
+	struct trie_leaf *leaf;
+	while ((leaf = trie_find_postfix(trie, ext->ext))) {
+		trie_remove(trie, leaf);
 	}
 
-	// Different case, different value, e.g.
-	//
-	//     *.tar.gz=01;31:*.TAR.GZ=01;33
-	return true;
+	size_t len = ext->len + 1;
+	return trie_set_mem(trie, ext->ext, len, ext);
 }
 
 /** Set the color for an extension. */
-static int set_ext(struct colors *colors, char *key, char *value) {
+static int set_ext(struct colors *colors, dchar *key, dchar *value) {
 	size_t len = dstrlen(key);
+
+	// Embedded NUL bytes in extensions can lead to a non-prefix-free
+	// set of strings, e.g. {".gz", "\0.gz"} would be transformed to
+	// {"zg.\0", "zg.\0\0"} (showing the implicit terminating NUL).
+	// Our trie implementation only supports prefix-free key sets, but
+	// luckily '\0' cannot appear in filenames so we can ignore them.
+	if (memchr(key, '\0', len)) {
+		return 0;
+	}
+
 	struct ext_color *ext = varena_alloc(&colors->ext_arena, len + 1);
 	if (!ext) {
 		return -1;
@@ -274,45 +247,19 @@ static int set_ext(struct colors *colors, char *key, char *value) {
 		goto fail;
 	}
 
-	key = memcpy(ext->ext, key, len + 1);
+	memcpy(ext->ext, key, len + 1);
 
 	// Reverse the extension (`*.y.x` -> `x.y.*`) so we can use trie_find_prefix()
-	ext_reverse(key, len);
-
-	// Find any pre-existing exact match
-	struct ext_color *prev = NULL;
-	struct trie_leaf *leaf = trie_find_str(&colors->ext_trie, key);
-	if (leaf) {
-		prev = leaf->value;
-		trie_remove(&colors->ext_trie, leaf);
-	}
-
-	// A later *.x should override any earlier *.x, *.y.x, etc.
-	while ((leaf = trie_find_postfix(&colors->ext_trie, key))) {
-		trie_remove(&colors->ext_trie, leaf);
-	}
+	ext_reverse(ext->ext, len);
 
 	// Insert the extension into the case-sensitive trie
-	leaf = trie_insert_str(&colors->ext_trie, key);
-	if (!leaf) {
+	if (insert_ext(&colors->ext_trie, ext) != 0) {
 		goto fail;
 	}
-	leaf->value = ext;
 
-	// "Smart case": if the same extension is given with two different
-	// capitalizations (e.g. `*.y.x=31:*.Y.Z=32:`), make it case-sensitive
-	ext_tolower(key, len);
-	leaf = trie_insert_str(&colors->iext_trie, key);
-	if (!leaf) {
-		goto fail;
-	}
-
-	struct ext_color *iprev = leaf->value;
-	if (ext_case_sensitive(ext, prev, iprev)) {
-		iprev->case_sensitive = true;
-		ext->case_sensitive = true;
+	if (colors->ext_len < len) {
+		colors->ext_len = len;
 	}
-	leaf->value = ext;
 
 	return 0;
 
@@ -324,32 +271,83 @@ fail:
 	return -1;
 }
 
-/** Rebuild the case-insensitive trie after all extensions have been parsed. */
-static int build_iext_trie(struct colors *colors) {
-	trie_clear(&colors->iext_trie);
+/**
+ * The "smart case" algorithm.
+ *
+ * @ext
+ *         The current extension being added.
+ * @iext
+ *         The previous case-insensitive match, if any, for the same extension.
+ * @return
+ *         Whether this extension should become case-sensitive.
+ */
+static bool ext_case_sensitive(struct ext_color *ext, struct ext_color *iext) {
+	// This is the first case-insensitive occurrence of this extension, e.g.
+	//
+	//     *.gz=01;31:*.tar.gz=01;33
+	if (!iext) {
+		return false;
+	}
+
+	// If the last version of this extension is already case-sensitive,
+	// this one should be too, e.g.
+	//
+	//     *.tar.gz=01;31:*.TAR.GZ=01;32:*.TAR.GZ=01;33
+	if (iext->case_sensitive) {
+		return true;
+	}
+
+	// Different case, but same value, e.g.
+	//
+	//     *.tar.gz=01;31:*.TAR.GZ=01;31
+	if (esc_eq(iext->esc, ext->esc->seq, ext->esc->len)) {
+		return false;
+	}
 
+	// Different case, different value, e.g.
+	//
+	//     *.tar.gz=01;31:*.TAR.GZ=01;33
+	return true;
+}
+
+/** Build the case-insensitive trie, after all extensions have been parsed. */
+static int build_iext_trie(struct colors *colors) {
+	// Find which extensions should be case-sensitive
 	for_trie (leaf, &colors->ext_trie) {
-		size_t len = leaf->length - 1;
-		if (colors->ext_len < len) {
-			colors->ext_len = len;
+		struct ext_color *ext = leaf->value;
+
+		// "Smart case": if the same extension is given with two different
+		// capitalizations (e.g. `*.y.x=31:*.Y.Z=32:`), make it case-sensitive
+		ext_tolower(ext->ext, ext->len);
+
+		size_t len = ext->len + 1;
+		struct trie_leaf *ileaf = trie_insert_mem(&colors->iext_trie, ext->ext, len);
+		if (!ileaf) {
+			return -1;
 		}
 
+		struct ext_color *iext = ileaf->value;
+		if (ext_case_sensitive(ext, iext)) {
+			ext->case_sensitive = true;
+			iext->case_sensitive = true;
+		}
+
+		ileaf->value = ext;
+	}
+
+	// Rebuild the trie with only the case-insensitive ones
+	trie_clear(&colors->iext_trie);
+
+	for_trie (leaf, &colors->ext_trie) {
 		struct ext_color *ext = leaf->value;
 		if (ext->case_sensitive) {
 			continue;
 		}
 
-		// set_ext() already reversed and lowercased the extension
-		struct trie_leaf *ileaf;
-		while ((ileaf = trie_find_postfix(&colors->iext_trie, ext->ext))) {
-			trie_remove(&colors->iext_trie, ileaf);
-		}
-
-		ileaf = trie_insert_str(&colors->iext_trie, ext->ext);
-		if (!ileaf) {
+		// We already lowercased the extension above
+		if (insert_ext(&colors->iext_trie, ext) != 0) {
 			return -1;
 		}
-		ileaf->value = ext;
 	}
 
 	return 0;
@@ -358,9 +356,8 @@ static int build_iext_trie(struct colors *colors) {
 /**
  * Find a color by an extension.
  */
-static const struct esc_seq *get_ext(const struct colors *colors, const char *filename) {
+static const struct esc_seq *get_ext(const struct colors *colors, const char *filename, size_t name_len) {
 	size_t ext_len = colors->ext_len;
-	size_t name_len = strlen(filename);
 	if (name_len < ext_len) {
 		ext_len = name_len;
 	}
@@ -369,7 +366,8 @@ static const struct esc_seq *get_ext(const struct colors *colors, const char *fi
 	char buf[256];
 	char *copy;
 	if (ext_len < sizeof(buf)) {
-		copy = memcpy(buf, suffix, ext_len + 1);
+		copy = memcpy(buf, suffix, ext_len);
+		copy[ext_len] = '\0';
 	} else {
 		copy = strndup(suffix, ext_len);
 		if (!copy) {
@@ -417,13 +415,13 @@ static const struct esc_seq *get_ext(const struct colors *colors, const char *fi
  *
  * See man dir_colors.
  *
- * @param str
+ * @str
  *         A dstring to fill with the unescaped chunk.
- * @param value
+ * @value
  *         The value to parse.
- * @param end
+ * @end
  *         The character that marks the end of the chunk.
- * @param[out] next
+ * @next[out]
  *         Will be set to the next chunk.
  * @return
  *         0 on success, -1 on failure.
@@ -578,7 +576,7 @@ static int parse_gnu_ls_colors(struct colors *colors, const char *ls_colors) {
 				break;
 			}
 
-			if (dstrncpy(&key, chunk, equals - chunk) != 0) {
+			if (dstrxcpy(&key, chunk, equals - chunk) != 0) {
 				goto fail;
 			}
 			if (unescape(&value, equals + 1, ':', &next) != 0) {
@@ -587,8 +585,8 @@ static int parse_gnu_ls_colors(struct colors *colors, const char *ls_colors) {
 
 			// All-zero values should be treated like NULL, to fall
 			// back on any other relevant coloring for that file
-			char *esc = value;
-			if (strspn(value, "0") == strlen(value)
+			dchar *esc = value;
+			if (strspn(value, "0") == dstrlen(value)
 			    && strcmp(key, "rs") != 0
 			    && strcmp(key, "lc") != 0
 			    && strcmp(key, "rc") != 0
@@ -693,6 +691,20 @@ struct colors *parse_colors(void) {
 		colors->link->len = 0;
 	}
 
+	// Pre-compute the reset escape sequence
+	if (!colors->endcode) {
+		dchar *ec = dstralloc(0);
+		if (!ec
+		    || cat_esc(&ec, colors->leftcode) != 0
+		    || cat_esc(&ec, colors->reset) != 0
+		    || cat_esc(&ec, colors->rightcode) != 0
+		    || set_esc(colors, "ec", ec) != 0) {
+			dstrfree(ec);
+			goto fail;
+		}
+		dstrfree(ec);
+	}
+
 	return colors;
 
 fail:
@@ -727,10 +739,11 @@ CFILE *cfwrap(FILE *file, const struct colors *colors, bool close) {
 	}
 
 	cfile->file = file;
+	cfile->fd = fileno(file);
 	cfile->need_reset = false;
 	cfile->close = close;
 
-	if (isatty(fileno(file))) {
+	if (isatty(cfile->fd)) {
 		cfile->colors = colors;
 	} else {
 		cfile->colors = NULL;
@@ -755,23 +768,196 @@ int cfclose(CFILE *cfile) {
 	return ret;
 }
 
+bool colors_need_stat(const struct colors *colors) {
+	return colors->setuid || colors->setgid || colors->executable || colors->multi_hard
+		|| colors->sticky_other_writable || colors->other_writable || colors->sticky;
+}
+
+/** A colorable file path. */
+struct cpath {
+	/** The full path to color. */
+	const char *path;
+	/** The basename offset of the last valid component. */
+	size_t nameoff;
+	/** The end offset of the last valid component. */
+	size_t valid;
+	/** The total length of the path. */
+	size_t len;
+
+	/** The bftw() buffer. */
+	const struct BFTW *ftwbuf;
+	/** bfs_stat() flags for the final component. */
+	enum bfs_stat_flags flags;
+	/** A bfs_stat() buffer, filled in when 0 < valid < len. */
+	struct bfs_stat statbuf;
+};
+
+/** Move the valid range of a path backwards. */
+static void cpath_retreat(struct cpath *cpath) {
+	const char *path = cpath->path;
+	size_t nameoff = cpath->nameoff;
+	size_t valid = cpath->valid;
+
+	if (valid > 0 && path[valid - 1] == '/') {
+		// Try without trailing slashes, to distinguish "notdir/" from "notdir"
+		do {
+			--valid;
+		} while (valid > 0 && path[valid - 1] == '/');
+
+		nameoff = valid;
+		while (nameoff > 0 && path[nameoff - 1] != '/') {
+			--nameoff;
+		}
+	} else {
+		// Remove the last component and try again
+		valid = nameoff;
+	}
+
+	cpath->nameoff = nameoff;
+	cpath->valid = valid;
+}
+
+/** Initialize a struct cpath. */
+static int cpath_init(struct cpath *cpath, const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
+	// Normally there are only two components to color:
+	//
+	//          nameoff  valid
+	//             v       v
+	//     path/to/filename
+	//     --------+-------
+	//     ${di}   ${fi}
+	//
+	// Error cases also usually have two components:
+	//
+	//           valid,
+	//          nameoff
+	//             v
+	//     path/to/nowhere
+	//     --------+------
+	//     ${di}   ${mi}
+	//
+	// But with ENOTDIR, there may be three:
+	//
+	//          nameoff  valid
+	//             v       v
+	//     path/to/filename/nowhere
+	//     --------+-------+-------
+	//     ${di}   ${fi}   ${mi}
+
+	cpath->path = path;
+	cpath->len = strlen(path);
+	cpath->ftwbuf = ftwbuf;
+	cpath->flags = flags;
+
+	cpath->valid = cpath->len;
+	if (path == ftwbuf->path) {
+		cpath->nameoff = ftwbuf->nameoff;
+	} else {
+		cpath->nameoff = xbaseoff(path);
+	}
+
+	if (bftw_type(ftwbuf, flags) != BFS_ERROR) {
+		return 0;
+	}
+
+	cpath_retreat(cpath);
+
+	// Find the base path.  For symlinks like
+	//
+	//     path/to/symlink -> nested/file
+	//
+	// this will be something like
+	//
+	//     path/to/nested/file
+	int at_fd = AT_FDCWD;
+	dchar *at_path = NULL;
+	if (path == ftwbuf->path) {
+		if (ftwbuf->depth > 0) {
+			// The parent must have existed to get here
+			return 0;
+		}
+	} else {
+		// We're in print_link_target(), so resolve relative to the link's parent directory
+		at_fd = ftwbuf->at_fd;
+		if (at_fd == (int)AT_FDCWD && path[0] != '/') {
+			at_path = dstrxdup(ftwbuf->path, ftwbuf->nameoff);
+			if (!at_path) {
+				return -1;
+			}
+		}
+	}
+
+	if (!at_path) {
+		at_path = dstralloc(cpath->valid);
+		if (!at_path) {
+			return -1;
+		}
+	}
+	if (dstrxcat(&at_path, path, cpath->valid) != 0) {
+		dstrfree(at_path);
+		return -1;
+	}
+
+	size_t at_off = dstrlen(at_path) - cpath->valid;
+
+	// Find the longest valid path prefix
+	while (cpath->valid > 0) {
+		if (bfs_stat(at_fd, at_path, BFS_STAT_FOLLOW, &cpath->statbuf) == 0) {
+			break;
+		}
+
+		cpath_retreat(cpath);
+		dstrshrink(at_path, at_off + cpath->valid);
+	}
+
+	dstrfree(at_path);
+	return 0;
+}
+
+/** Get the bfs_stat() buffer for the last valid component. */
+static const struct bfs_stat *cpath_stat(const struct cpath *cpath) {
+	if (cpath->valid == cpath->len) {
+		return bftw_stat(cpath->ftwbuf, cpath->flags);
+	} else {
+		return &cpath->statbuf;
+	}
+}
+
+/** Check if a path has non-trivial capabilities. */
+static bool cpath_has_capabilities(const struct cpath *cpath) {
+	if (cpath->valid == cpath->len) {
+		return bfs_check_capabilities(cpath->ftwbuf) > 0;
+	} else {
+		// TODO: implement capability checks for arbitrary paths
+		return false;
+	}
+}
+
 /** Check if a symlink is broken. */
-static bool is_link_broken(const struct BFTW *ftwbuf) {
+static bool cpath_is_broken(const struct cpath *cpath) {
+	if (cpath->valid < cpath->len) {
+		// A valid parent can't be a broken link
+		return false;
+	}
+
+	const struct BFTW *ftwbuf = cpath->ftwbuf;
 	if (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW) {
 		return xfaccessat(ftwbuf->at_fd, ftwbuf->at_path, F_OK) != 0;
 	} else {
+		// A link encountered with BFS_STAT_TRYFOLLOW must be broken
 		return true;
 	}
 }
 
-bool colors_need_stat(const struct colors *colors) {
-	return colors->setuid || colors->setgid || colors->executable || colors->multi_hard
-		|| colors->sticky_other_writable || colors->other_writable || colors->sticky;
-}
-
 /** Get the color for a file. */
-static const struct esc_seq *file_color(const struct colors *colors, const char *filename, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
-	enum bfs_type type = bftw_type(ftwbuf, flags);
+static const struct esc_seq *file_color(const struct colors *colors, const struct cpath *cpath) {
+	enum bfs_type type;
+	if (cpath->valid == cpath->len) {
+		type = bftw_type(cpath->ftwbuf, cpath->flags);
+	} else {
+		type = bfs_mode_to_type(cpath->statbuf.mode);
+	}
+
 	if (type == BFS_ERROR) {
 		goto error;
 	}
@@ -782,7 +968,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const char
 	switch (type) {
 	case BFS_REG:
 		if (colors->setuid || colors->setgid || colors->executable || colors->multi_hard) {
-			statbuf = bftw_stat(ftwbuf, flags);
+			statbuf = cpath_stat(cpath);
 			if (!statbuf) {
 				goto error;
 			}
@@ -792,7 +978,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const char
 			color = colors->setuid;
 		} else if (colors->setgid && (statbuf->mode & 02000)) {
 			color = colors->setgid;
-		} else if (colors->capable && bfs_check_capabilities(ftwbuf) > 0) {
+		} else if (colors->capable && cpath_has_capabilities(cpath)) {
 			color = colors->capable;
 		} else if (colors->executable && (statbuf->mode & 00111)) {
 			color = colors->executable;
@@ -801,7 +987,9 @@ static const struct esc_seq *file_color(const struct colors *colors, const char
 		}
 
 		if (!color) {
-			color = get_ext(colors, filename);
+			const char *name = cpath->path + cpath->nameoff;
+			size_t namelen = cpath->valid - cpath->nameoff;
+			color = get_ext(colors, name, namelen);
 		}
 
 		if (!color) {
@@ -812,7 +1000,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const char
 
 	case BFS_DIR:
 		if (colors->sticky_other_writable || colors->other_writable || colors->sticky) {
-			statbuf = bftw_stat(ftwbuf, flags);
+			statbuf = cpath_stat(cpath);
 			if (!statbuf) {
 				goto error;
 			}
@@ -831,7 +1019,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const char
 		break;
 
 	case BFS_LNK:
-		if (colors->orphan && is_link_broken(ftwbuf)) {
+		if (colors->orphan && cpath_is_broken(cpath)) {
 			color = colors->orphan;
 		} else {
 			color = colors->link;
@@ -874,7 +1062,7 @@ error:
 
 /** Print an escape sequence chunk. */
 static int print_esc_chunk(CFILE *cfile, const struct esc_seq *esc) {
-	return dstrxcat(&cfile->buffer, esc->seq, esc->len);
+	return cat_esc(&cfile->buffer, esc);
 }
 
 /** Print an ANSI escape sequence. */
@@ -908,12 +1096,7 @@ static int print_reset(CFILE *cfile) {
 	}
 	cfile->need_reset = false;
 
-	const struct colors *colors = cfile->colors;
-	if (colors->endcode) {
-		return print_esc_chunk(cfile, colors->endcode);
-	} else {
-		return print_esc(cfile, colors->reset);
-	}
+	return print_esc_chunk(cfile, cfile->colors->endcode);
 }
 
 /** Print a shell-escaped string. */
@@ -923,6 +1106,10 @@ static int print_wordesc(CFILE *cfile, const char *str, size_t n, enum wesc_flag
 
 /** Print a string with an optional color. */
 static int print_colored(CFILE *cfile, const struct esc_seq *esc, const char *str, size_t len) {
+	if (len == 0) {
+		return 0;
+	}
+
 	if (print_esc(cfile, esc) != 0) {
 		return -1;
 	}
@@ -939,112 +1126,42 @@ static int print_colored(CFILE *cfile, const struct esc_seq *esc, const char *st
 	return 0;
 }
 
-/** Find the offset of the first broken path component. */
-static ssize_t first_broken_offset(const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags, size_t max) {
-	ssize_t ret = max;
-	bfs_assert(ret >= 0);
-
-	if (bftw_type(ftwbuf, flags) != BFS_ERROR) {
-		goto out;
-	}
-
-	dchar *at_path;
-	int at_fd;
-	if (path == ftwbuf->path) {
-		if (ftwbuf->depth == 0) {
-			at_fd = AT_FDCWD;
-			at_path = dstrndup(path, max);
-		} else {
-			// The parent must have existed to get here
-			goto out;
-		}
-	} else {
-		// We're in print_link_target(), so resolve relative to the link's parent directory
-		at_fd = ftwbuf->at_fd;
-		if (at_fd == AT_FDCWD && path[0] != '/') {
-			at_path = dstrndup(ftwbuf->path, ftwbuf->nameoff);
-			if (at_path && dstrncat(&at_path, path, max) != 0) {
-				ret = -1;
-				goto out_path;
-			}
-		} else {
-			at_path = dstrndup(path, max);
-		}
-	}
-
-	if (!at_path) {
-		ret = -1;
-		goto out;
-	}
-
-	while (ret > 0) {
-		if (xfaccessat(at_fd, at_path, F_OK) == 0) {
-			break;
-		}
-
-		size_t len = dstrlen(at_path);
-		while (ret && at_path[len - 1] == '/') {
-			--len, --ret;
-		}
-		if (errno != ENOTDIR) {
-			while (ret && at_path[len - 1] != '/') {
-				--len, --ret;
-			}
-		}
-
-		dstresize(&at_path, len);
-	}
-
-out_path:
-	dstrfree(at_path);
-out:
-	return ret;
-}
-
 /** Print a path with colors. */
 static int print_path_colored(CFILE *cfile, const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
-	size_t nameoff;
-	if (path == ftwbuf->path) {
-		nameoff = ftwbuf->nameoff;
-	} else {
-		nameoff = xbaseoff(path);
-	}
-
-	const char *name = path + nameoff;
-	size_t pathlen = nameoff + strlen(name);
-
-	ssize_t broken = first_broken_offset(path, ftwbuf, flags, nameoff);
-	if (broken < 0) {
+	struct cpath cpath;
+	if (cpath_init(&cpath, path, ftwbuf, flags) != 0) {
 		return -1;
 	}
-	size_t split = broken;
 
 	const struct colors *colors = cfile->colors;
 	const struct esc_seq *dirs_color = colors->directory;
-	const struct esc_seq *name_color;
+	const struct esc_seq *name_color = NULL;
+	const struct esc_seq *err_color = colors->missing;
+	if (!err_color) {
+		err_color = colors->orphan;
+	}
 
-	if (split < nameoff) {
-		name_color = colors->missing;
-		if (!name_color) {
-			name_color = colors->orphan;
-		}
-	} else {
-		name_color = file_color(cfile->colors, path + nameoff, ftwbuf, flags);
+	if (cpath.nameoff < cpath.valid) {
+		name_color = file_color(colors, &cpath);
 		if (name_color == dirs_color) {
-			split = pathlen;
+			cpath.nameoff = cpath.valid;
 		}
 	}
 
-	if (split > 0) {
-		if (print_colored(cfile, dirs_color, path, split) != 0) {
-			return -1;
-		}
+	if (print_colored(cfile, dirs_color, path, cpath.nameoff) != 0) {
+		return -1;
 	}
 
-	if (split < pathlen) {
-		if (print_colored(cfile, name_color, path + split, pathlen - split) != 0) {
-			return -1;
-		}
+	const char *name = path + cpath.nameoff;
+	size_t name_len = cpath.valid - cpath.nameoff;
+	if (print_colored(cfile, name_color, name, name_len) != 0) {
+		return -1;
+	}
+
+	const char *tail = path + cpath.valid;
+	size_t tail_len = cpath.len - cpath.valid;
+	if (print_colored(cfile, err_color, tail, tail_len) != 0) {
+		return -1;
 	}
 
 	return 0;
@@ -1052,8 +1169,18 @@ static int print_path_colored(CFILE *cfile, const char *path, const struct BFTW
 
 /** Print a file name with colors. */
 static int print_name_colored(CFILE *cfile, const char *name, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
-	const struct esc_seq *esc = file_color(cfile->colors, name, ftwbuf, flags);
-	return print_colored(cfile, esc, name, strlen(name));
+	size_t len = strlen(name);
+	const struct cpath cpath = {
+		.path = name,
+		.nameoff = 0,
+		.valid = len,
+		.len = len,
+		.ftwbuf = ftwbuf,
+		.flags = flags,
+	};
+
+	const struct esc_seq *esc = file_color(cfile->colors, &cpath);
+	return print_colored(cfile, esc, name, cpath.len);
 }
 
 /** Print the name of a file with the appropriate colors. */
@@ -1110,9 +1237,36 @@ static int print_link_target(CFILE *cfile, const struct BFTW *ftwbuf) {
 }
 
 /** Format some colored output to the buffer. */
-attr(printf(2, 3))
+_printf(2, 3)
 static int cbuff(CFILE *cfile, const char *format, ...);
 
+/** Print an expression's name, for diagnostics. */
+static int print_expr_name(CFILE *cfile, const struct bfs_expr *expr) {
+	switch (expr->kind) {
+	case BFS_FLAG:
+		return cbuff(cfile, "${cyn}%pq${rs}", expr->argv[0]);
+	case BFS_OPERATOR:
+		return cbuff(cfile, "${red}%pq${rs}", expr->argv[0]);
+	default:
+		return cbuff(cfile, "${blu}%pq${rs}", expr->argv[0]);
+	}
+}
+
+/** Print an expression's args, for diagnostics. */
+static int print_expr_args(CFILE *cfile, const struct bfs_expr *expr) {
+	if (print_expr_name(cfile, expr) != 0) {
+		return -1;
+	}
+
+	for (size_t i = 1; i < expr->argc; ++i) {
+		if (cbuff(cfile, " ${bld}%pq${rs}", expr->argv[i]) < 0) {
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 /** Dump a parsed expression tree, for debugging. */
 static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose, int depth) {
 	if (depth >= 2) {
@@ -1127,20 +1281,8 @@ static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose, i
 		return -1;
 	}
 
-	if (bfs_expr_is_parent(expr)) {
-		if (cbuff(cfile, "${red}%pq${rs}", expr->argv[0]) < 0) {
-			return -1;
-		}
-	} else {
-		if (cbuff(cfile, "${blu}%pq${rs}", expr->argv[0]) < 0) {
-			return -1;
-		}
-	}
-
-	for (size_t i = 1; i < expr->argc; ++i) {
-		if (cbuff(cfile, " ${bld}%pq${rs}", expr->argv[i]) < 0) {
-			return -1;
-		}
+	if (print_expr_args(cfile, expr) != 0) {
+		return -1;
 	}
 
 	if (verbose) {
@@ -1156,7 +1298,7 @@ static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose, i
 	}
 
 	int count = 0;
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		if (dstrcat(&cfile->buffer, " ") != 0) {
 			return -1;
 		}
@@ -1179,10 +1321,9 @@ static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose, i
 	return 0;
 }
 
-attr(printf(2, 0))
+_printf(2, 0)
 static int cvbuff(CFILE *cfile, const char *format, va_list args) {
 	const struct colors *colors = cfile->colors;
-	int error = errno;
 
 	// Color specifier (e.g. ${blu}) state
 	struct esc_seq **esc;
@@ -1192,7 +1333,7 @@ static int cvbuff(CFILE *cfile, const char *format, va_list args) {
 
 	for (const char *i = format; *i; ++i) {
 		size_t verbatim = strcspn(i, "%$");
-		if (dstrncat(&cfile->buffer, i, verbatim) != 0) {
+		if (dstrxcat(&cfile->buffer, i, verbatim) != 0) {
 			return -1;
 		}
 		i += verbatim;
@@ -1240,12 +1381,6 @@ static int cvbuff(CFILE *cfile, const char *format, va_list args) {
 				}
 				break;
 
-			case 'm':
-				if (dstrcat(&cfile->buffer, xstrerror(error)) != 0) {
-					return -1;
-				}
-				break;
-
 			case 'p':
 				switch (*++i) {
 				case 'q':
@@ -1287,6 +1422,16 @@ static int cvbuff(CFILE *cfile, const char *format, va_list args) {
 						return -1;
 					}
 					break;
+				case 'x':
+					if (print_expr_args(cfile, va_arg(args, const struct bfs_expr *)) != 0) {
+						return -1;
+					}
+					break;
+				case 'X':
+					if (print_expr_name(cfile, va_arg(args, const struct bfs_expr *)) != 0) {
+						return -1;
+					}
+					break;
 
 				default:
 					goto invalid;
@@ -1379,7 +1524,7 @@ int cvfprintf(CFILE *cfile, const char *format, va_list args) {
 		}
 	}
 
-	dstresize(&cfile->buffer, 0);
+	dstrshrink(cfile->buffer, 0);
 	return ret;
 }
 
@@ -1390,3 +1535,14 @@ int cfprintf(CFILE *cfile, const char *format, ...) {
 	va_end(args);
 	return ret;
 }
+
+int cfreset(CFILE *cfile) {
+	const struct colors *colors = cfile->colors;
+	if (!colors) {
+		return 0;
+	}
+
+	const struct esc_seq *esc = colors->endcode;
+	size_t ret = xwrite(cfile->fd, esc->seq, esc->len);
+	return ret == esc->len ? 0 : -1;
+}
diff --git a/src/color.h b/src/color.h
index 3278cd6..aac8b33 100644
--- a/src/color.h
+++ b/src/color.h
@@ -8,8 +8,9 @@
 #ifndef BFS_COLOR_H
 #define BFS_COLOR_H
 
-#include "prelude.h"
+#include "bfs.h"
 #include "dstring.h"
+
 #include <stdio.h>
 
 /**
@@ -42,6 +43,8 @@ typedef struct CFILE {
 	const struct colors *colors;
 	/** A buffer for colored formatting. */
 	dchar *buffer;
+	/** Cached file descriptor number. */
+	int fd;
 	/** Whether the next ${rs} is actually necessary. */
 	bool need_reset;
 	/** Whether to close the underlying stream. */
@@ -51,11 +54,11 @@ typedef struct CFILE {
 /**
  * Wrap an existing file into a colored stream.
  *
- * @param file
+ * @file
  *         The underlying file.
- * @param colors
+ * @colors
  *         The color table to use if file is a TTY.
- * @param close
+ * @close
  *         Whether to close the underlying stream when this stream is closed.
  * @return
  *         A colored wrapper around file.
@@ -65,7 +68,7 @@ CFILE *cfwrap(FILE *file, const struct colors *colors, bool close);
 /**
  * Close a colored file.
  *
- * @param cfile
+ * @cfile
  *         The colored file to close.
  * @return
  *         0 on success, -1 on failure.
@@ -75,9 +78,9 @@ int cfclose(CFILE *cfile);
 /**
  * Colored, formatted output.
  *
- * @param cfile
+ * @cfile
  *         The colored stream to print to.
- * @param format
+ * @format
  *         A printf()-style format string, supporting these format specifiers:
  *
  *         %c: A single character
@@ -85,7 +88,6 @@ int cfclose(CFILE *cfile);
  *         %g: A double
  *         %s: A string
  *         %zu: A size_t
- *         %m: strerror(errno)
  *         %pq: A shell-escaped string, like bash's printf %q
  *         %pQ: A TTY-escaped string.
  *         %pF: A colored file name, from a const struct BFTW * argument
@@ -93,19 +95,26 @@ int cfclose(CFILE *cfile);
  *         %pL: A colored link target, from a const struct BFTW * argument
  *         %pe: Dump a const struct bfs_expr *, for debugging.
  *         %pE: Dump a const struct bfs_expr * in verbose form, for debugging.
+ *         %px: Print a const struct bfs_expr * with syntax highlighting.
+ *         %pX: Print the name of a const struct bfs_expr *, without arguments.
  *         %%: A literal '%'
  *         ${cc}: Change the color to 'cc'
  *         $$: A literal '$'
  * @return
  *         0 on success, -1 on failure.
  */
-attr(printf(2, 3))
+_printf(2, 3)
 int cfprintf(CFILE *cfile, const char *format, ...);
 
 /**
  * cfprintf() variant that takes a va_list.
  */
-attr(printf(2, 0))
+_printf(2, 0)
 int cvfprintf(CFILE *cfile, const char *format, va_list args);
 
+/**
+ * Reset the TTY state when terminating abnormally (async-signal-safe).
+ */
+int cfreset(CFILE *cfile);
+
 #endif // BFS_COLOR_H
diff --git a/src/ctx.c b/src/ctx.c
index aa73b35..05baa1d 100644
--- a/src/ctx.c
+++ b/src/ctx.c
@@ -2,36 +2,28 @@
 // SPDX-License-Identifier: 0BSD
 
 #include "ctx.h"
+
 #include "alloc.h"
+#include "bfstd.h"
 #include "color.h"
 #include "diag.h"
 #include "expr.h"
 #include "list.h"
 #include "mtab.h"
 #include "pwcache.h"
+#include "sighook.h"
 #include "stat.h"
 #include "trie.h"
-#include "xtime.h"
+
 #include <errno.h>
 #include <limits.h>
+#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <sys/stat.h>
+#include <time.h>
 #include <unistd.h>
 
-/** Get the initial value for ctx->threads (-j). */
-static int bfs_nproc(void) {
-	long nproc = sysconf(_SC_NPROCESSORS_ONLN);
-
-	if (nproc < 1) {
-		nproc = 1;
-	} else if (nproc > 8) {
-		// Not much speedup after 8 threads
-		nproc = 8;
-	}
-
-	return nproc;
-}
-
 struct bfs_ctx *bfs_ctx_new(void) {
 	struct bfs_ctx *ctx = ZALLOC(struct bfs_ctx);
 	if (!ctx) {
@@ -44,15 +36,24 @@ struct bfs_ctx *bfs_ctx_new(void) {
 	ctx->maxdepth = INT_MAX;
 	ctx->flags = BFTW_RECOVER;
 	ctx->strategy = BFTW_BFS;
-	ctx->threads = bfs_nproc();
 	ctx->optlevel = 3;
 
+	ctx->threads = nproc();
+	if (ctx->threads > 8) {
+		// Not much speedup after 8 threads
+		ctx->threads = 8;
+	}
+
 	trie_init(&ctx->files);
 
+	ctx->umask = umask(0);
+	umask(ctx->umask);
+
 	if (getrlimit(RLIMIT_NOFILE, &ctx->orig_nofile) != 0) {
 		goto fail;
 	}
 	ctx->cur_nofile = ctx->orig_nofile;
+	ctx->raise_nofile = true;
 
 	ctx->users = bfs_users_new();
 	if (!ctx->users) {
@@ -64,7 +65,7 @@ struct bfs_ctx *bfs_ctx_new(void) {
 		goto fail;
 	}
 
-	if (xgettime(&ctx->now) != 0) {
+	if (clock_gettime(CLOCK_REALTIME, &ctx->now) != 0) {
 		goto fail;
 	}
 
@@ -98,13 +99,20 @@ struct bfs_ctx_file {
 	CFILE *cfile;
 	/** The path to the file (for diagnostics). */
 	const char *path;
+	/** Signal hook to send a reset escape sequence. */
+	struct sighook *hook;
 	/** Remembers I/O errors, to propagate them to the exit status. */
 	int error;
 };
 
+/** Call cfreset() on a tracked file. */
+static void cfreset_hook(int sig, siginfo_t *info, void *arg) {
+	cfreset(arg);
+}
+
 CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, CFILE *cfile, const char *path) {
 	struct bfs_stat sb;
-	if (bfs_stat(fileno(cfile->file), NULL, 0, &sb) != 0) {
+	if (bfs_stat(cfile->fd, NULL, 0, &sb) != 0) {
 		return NULL;
 	}
 
@@ -124,19 +132,31 @@ CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, CFILE *cfile, const char *path) {
 
 	leaf->value = ctx_file = ALLOC(struct bfs_ctx_file);
 	if (!ctx_file) {
-		trie_remove(&ctx->files, leaf);
-		return NULL;
+		goto fail;
 	}
 
 	ctx_file->cfile = cfile;
 	ctx_file->path = path;
 	ctx_file->error = 0;
+	ctx_file->hook = NULL;
+
+	if (cfile->colors) {
+		ctx_file->hook = atsigexit(cfreset_hook, cfile);
+		if (!ctx_file->hook) {
+			goto fail;
+		}
+	}
 
 	if (cfile != ctx->cout && cfile != ctx->cerr) {
 		++ctx->nfiles;
 	}
 
 	return cfile;
+
+fail:
+	trie_remove(&ctx->files, leaf);
+	free(ctx_file);
+	return NULL;
 }
 
 void bfs_ctx_flush(const struct bfs_ctx *ctx) {
@@ -156,9 +176,9 @@ void bfs_ctx_flush(const struct bfs_ctx *ctx) {
 
 		const char *path = ctx_file->path;
 		if (path) {
-			bfs_error(ctx, "'%s': %m.\n", path);
+			bfs_error(ctx, "%pq: %s.\n", path, errstr());
 		} else if (cfile == ctx->cout) {
-			bfs_error(ctx, "(standard output): %m.\n");
+			bfs_error(ctx, "(standard output): %s.\n", errstr());
 		}
 	}
 
@@ -188,30 +208,47 @@ static int bfs_ctx_fflush(CFILE *cfile) {
 static int bfs_ctx_fclose(struct bfs_ctx *ctx, struct bfs_ctx_file *ctx_file) {
 	CFILE *cfile = ctx_file->cfile;
 
-	if (cfile == ctx->cout) {
-		// Will be checked later
-		return 0;
-	} else if (cfile == ctx->cerr) {
-		// Writes to stderr are allowed to fail silently, unless the same file was used by
-		// -fprint, -fls, etc.
-		if (ctx_file->path) {
-			return bfs_ctx_fflush(cfile);
-		} else {
-			return 0;
-		}
-	}
-
+	// Writes to stderr are allowed to fail silently, unless the same file
+	// was used by -fprint, -fls, etc.
+	bool silent = cfile == ctx->cerr && !ctx_file->path;
 	int ret = 0, error = 0;
-	if (ferror(cfile->file)) {
+
+	if (ctx_file->error) {
+		// An error was previously reported during bfs_ctx_flush()
 		ret = -1;
-		error = EIO;
+		error = ctx_file->error;
 	}
-	if (cfclose(cfile) != 0) {
+
+	// Flush the file just before we remove the hook, to maximize the chance
+	// we leave the TTY in a good state
+	if (bfs_ctx_fflush(cfile) != 0) {
 		ret = -1;
 		error = errno;
 	}
 
-	errno = error;
+	sigunhook(ctx_file->hook);
+
+	// Close the CFILE, except for stdio streams, which are closed later
+	if (cfile != ctx->cout && cfile != ctx->cerr) {
+		if (cfclose(cfile) != 0) {
+			ret = -1;
+			error = errno;
+		}
+	}
+
+	if (silent) {
+		ret = 0;
+	}
+
+	if (ret != 0 && ctx->cerr) {
+		if (ctx_file->path) {
+			bfs_error(ctx, "%pq: %s.\n", ctx_file->path, xstrerror(error));
+		} else if (cfile == ctx->cout) {
+			bfs_error(ctx, "(standard output): %s.\n", xstrerror(error));
+		}
+	}
+
+	free(ctx_file);
 	return ret;
 }
 
@@ -229,33 +266,14 @@ int bfs_ctx_free(struct bfs_ctx *ctx) {
 
 		for_trie (leaf, &ctx->files) {
 			struct bfs_ctx_file *ctx_file = leaf->value;
-
-			if (ctx_file->error) {
-				// An error was previously reported during bfs_ctx_flush()
-				ret = -1;
-			}
-
 			if (bfs_ctx_fclose(ctx, ctx_file) != 0) {
-				if (cerr) {
-					bfs_error(ctx, "%pq: %m.\n", ctx_file->path);
-				}
 				ret = -1;
 			}
-
-			free(ctx_file);
 		}
 		trie_destroy(&ctx->files);
 
-		if (cout && bfs_ctx_fflush(cout) != 0) {
-			if (cerr) {
-				bfs_error(ctx, "(standard output): %m.\n");
-			}
-			ret = -1;
-		}
-
 		cfclose(cout);
 		cfclose(cerr);
-
 		free_colors(ctx->colors);
 
 		for_slist (struct bfs_expr, expr, &ctx->expr_list, freelist) {
@@ -268,6 +286,7 @@ int bfs_ctx_free(struct bfs_ctx *ctx) {
 		}
 		free(ctx->paths);
 
+		free(ctx->kinds);
 		free(ctx->argv);
 		free(ctx);
 	}
diff --git a/src/ctx.h b/src/ctx.h
index fc3020c..908338f 100644
--- a/src/ctx.h
+++ b/src/ctx.h
@@ -8,14 +8,15 @@
 #ifndef BFS_CTX_H
 #define BFS_CTX_H
 
-#include "prelude.h"
 #include "alloc.h"
 #include "bftw.h"
 #include "diag.h"
 #include "expr.h"
 #include "trie.h"
+
 #include <stddef.h>
 #include <sys/resource.h>
+#include <sys/types.h>
 #include <time.h>
 
 struct CFILE;
@@ -28,6 +29,8 @@ struct bfs_ctx {
 	size_t argc;
 	/** The unparsed command line arguments. */
 	char **argv;
+	/** The argument token kinds. */
+	enum bfs_kind *kinds;
 
 	/** The root paths. */
 	const char **paths;
@@ -67,11 +70,18 @@ struct bfs_ctx {
 	bool status;
 	/** Whether to only return unique files (-unique). */
 	bool unique;
-	/** Whether to print warnings (-warn/-nowarn). */
-	bool warn;
 	/** Whether to only handle paths with xargs-safe characters (-X). */
 	bool xargs_safe;
 
+	/** Whether bfs was run interactively. */
+	bool interactive;
+	/** Whether to print warnings (-warn/-nowarn). */
+	bool warn;
+	/** Whether to report errors (-noerror). */
+	bool ignore_errors;
+	/** Whether any dangerous actions (-delete/-exec) are present. */
+	bool dangerous;
+
 	/** Color data. */
 	struct colors *colors;
 	/** The error that occurred parsing the color table, if any. */
@@ -98,10 +108,15 @@ struct bfs_ctx {
 	/** The number of files owned by the context. */
 	int nfiles;
 
+	/** The current file creation mask. */
+	mode_t umask;
+
 	/** The initial RLIMIT_NOFILE limits. */
 	struct rlimit orig_nofile;
 	/** The current RLIMIT_NOFILE limits. */
 	struct rlimit cur_nofile;
+	/** Whether the fd limit should be raised. */
+	bool raise_nofile;
 
 	/** The current time. */
 	struct timespec now;
@@ -116,7 +131,7 @@ struct bfs_ctx *bfs_ctx_new(void);
 /**
  * Get the mount table.
  *
- * @param ctx
+ * @ctx
  *         The bfs context.
  * @return
  *         The cached mount table, or NULL on failure.
@@ -126,11 +141,11 @@ const struct bfs_mtab *bfs_ctx_mtab(const struct bfs_ctx *ctx);
 /**
  * Deduplicate an opened file.
  *
- * @param ctx
+ * @ctx
  *         The bfs context.
- * @param cfile
+ * @cfile
  *         The opened file.
- * @param path
+ * @path
  *         The path to the opened file (or NULL for standard streams).
  * @return
  *         If the same file was opened previously, that file is returned.  If cfile is a new file,
@@ -141,7 +156,7 @@ struct CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, struct CFILE *cfile, const char
 /**
  * Flush any caches for consistency with external processes.
  *
- * @param ctx
+ * @ctx
  *         The bfs context.
  */
 void bfs_ctx_flush(const struct bfs_ctx *ctx);
@@ -149,9 +164,9 @@ void bfs_ctx_flush(const struct bfs_ctx *ctx);
 /**
  * Dump the parsed command line.
  *
- * @param ctx
+ * @ctx
  *         The bfs context.
- * @param flag
+ * @flag
  *         The -D flag that triggered the dump.
  */
 void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag);
@@ -159,7 +174,7 @@ void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag);
 /**
  * Free a bfs context.
  *
- * @param ctx
+ * @ctx
  *         The context to free.
  * @return
  *         0 on success, -1 if any errors occurred.
diff --git a/src/diag.c b/src/diag.c
index deb6f26..a86b060 100644
--- a/src/diag.c
+++ b/src/diag.c
@@ -1,38 +1,43 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "diag.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "color.h"
 #include "ctx.h"
 #include "dstring.h"
 #include "expr.h"
-#include <errno.h>
+
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
-
-/** bfs_diagf() implementation. */
-attr(printf(2, 0))
-static void bfs_vdiagf(const struct bfs_loc *loc, const char *format, va_list args) {
-	fprintf(stderr, "%s: %s@%s:%d: ", xgetprogname(), loc->func, loc->file, loc->line);
-	vfprintf(stderr, format, args);
-	fprintf(stderr, "\n");
-}
-
-void bfs_diagf(const struct bfs_loc *loc, const char *format, ...) {
+#include <unistd.h>
+
+/**
+ * Print an error using dprintf() if possible, because it's more likely to be
+ * async-signal-safe in practice.
+ */
+#if BFS_HAS_DPRINTF
+#  define veprintf(...) vdprintf(STDERR_FILENO, __VA_ARGS__)
+#else
+#  define veprintf(...) vfprintf(stderr, __VA_ARGS__)
+#endif
+
+void bfs_diagf(const char *format, ...) {
 	va_list args;
 	va_start(args, format);
-	bfs_vdiagf(loc, format, args);
+	veprintf(format, args);
 	va_end(args);
 }
 
-noreturn void bfs_abortf(const struct bfs_loc *loc, const char *format, ...) {
+_noreturn
+void bfs_abortf(const char *format, ...) {
 	va_list args;
 	va_start(args, format);
-	bfs_vdiagf(loc, format, args);
+	veprintf(format, args);
 	va_end(args);
 
 	abort();
@@ -64,7 +69,7 @@ const char *debug_flag_name(enum debug_flags flag) {
 }
 
 void bfs_perror(const struct bfs_ctx *ctx, const char *str) {
-	bfs_error(ctx, "%s: %m.\n", str);
+	bfs_error(ctx, "%s: %s.\n", str, errstr());
 }
 
 void bfs_error(const struct bfs_ctx *ctx, const char *format, ...) {
@@ -91,19 +96,12 @@ bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *for
 }
 
 void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args) {
-	int error = errno;
-
 	bfs_error_prefix(ctx);
-
-	errno = error;
 	cvfprintf(ctx->cerr, format, args);
 }
 
 bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args) {
-	int error = errno;
-
 	if (bfs_warning_prefix(ctx)) {
-		errno = error;
 		cvfprintf(ctx->cerr, format, args);
 		return true;
 	} else {
@@ -112,10 +110,7 @@ bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args) {
 }
 
 bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args) {
-	int error = errno;
-
 	if (bfs_debug_prefix(ctx, flag)) {
-		errno = error;
 		cvfprintf(ctx->cerr, format, args);
 		return true;
 	} else {
@@ -169,7 +164,7 @@ static bool highlight_expr_recursive(const struct bfs_ctx *ctx, const struct bfs
 		}
 	}
 
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		ret |= highlight_expr_recursive(ctx, child, args);
 	}
 
diff --git a/src/diag.h b/src/diag.h
index 2b13609..645dbb1 100644
--- a/src/diag.h
+++ b/src/diag.h
@@ -8,92 +8,134 @@
 #ifndef BFS_DIAG_H
 #define BFS_DIAG_H
 
-#include "prelude.h"
+#include "bfs.h"
+#include "bfstd.h"
+
 #include <stdarg.h>
 
 /**
- * static_assert() with an optional second argument.
+ * Wrap a diagnostic format string so it looks like
+ *
+ *     bfs: func@src/file.c:0: Message
  */
-#if __STDC_VERSION__ >= C23
-#  define bfs_static_assert static_assert
-#else
-#  define bfs_static_assert(...) bfs_static_assert_(__VA_ARGS__, #__VA_ARGS__, )
-#  define bfs_static_assert_(expr, msg, ...) _Static_assert(expr, msg)
-#endif
+#define BFS_DIAG_FORMAT_(format) \
+	((format) ? "%s: %s@%s:%d: " format "%s" : "")
 
 /**
- * A source code location.
+ * Add arguments to match a BFS_DIAG_FORMAT string.
  */
-struct bfs_loc {
-	const char *file;
-	int line;
-	const char *func;
-};
-
-#define BFS_LOC_INIT { .file = __FILE__, .line = __LINE__, .func = __func__ }
+#define BFS_DIAG_ARGS_(...) \
+	xgetprogname(), __func__, __FILE__, __LINE__, __VA_ARGS__ "\n"
 
 /**
- * Get the current source code location.
+ * Print a low-level diagnostic message to standard error.
  */
-#if __STDC_VERSION__ >= C23
-#  define bfs_location() (&(static const struct bfs_loc)BFS_LOC_INIT)
-#else
-#  define bfs_location() (&(const struct bfs_loc)BFS_LOC_INIT)
-#endif
+_printf(1, 2)
+void bfs_diagf(const char *format, ...);
 
 /**
- * Print a low-level diagnostic message to standard error, formatted like
- *
- *     bfs: func@src/file.c:0: Message
+ * Unconditional diagnostic message.
  */
-attr(printf(2, 3))
-void bfs_diagf(const struct bfs_loc *loc, const char *format, ...);
+#define bfs_diag(...) \
+	bfs_diag_(__VA_ARGS__, )
+
+#define bfs_diag_(format, ...) \
+	bfs_diagf(BFS_DIAG_FORMAT_(format), BFS_DIAG_ARGS_(__VA_ARGS__))
 
 /**
- * Unconditional diagnostic message.
+ * Print a diagnostic message including the last error.
  */
-#define bfs_diag(...) bfs_diagf(bfs_location(), __VA_ARGS__)
+#define bfs_ediag(...) \
+	bfs_ediag_(__VA_ARGS__, )
+
+#define bfs_ediag_(format, ...) \
+	bfs_diag_(format "%s%s", __VA_ARGS__ (sizeof("" format) > 1 ? ": " : ""), errstr(), )
 
 /**
  * Print a message to standard error and abort.
  */
-attr(cold, printf(2, 3))
-noreturn void bfs_abortf(const struct bfs_loc *loc, const char *format, ...);
+_cold
+_printf(1, 2)
+_noreturn
+void bfs_abortf(const char *format, ...);
 
 /**
  * Unconditional abort with a message.
  */
-#define bfs_abort(...) bfs_abortf(bfs_location(), __VA_ARGS__)
+#define bfs_abort(...) \
+	bfs_abort_(__VA_ARGS__, )
+
+#define bfs_abort_(format, ...) \
+	bfs_abortf(BFS_DIAG_FORMAT_(format), BFS_DIAG_ARGS_(__VA_ARGS__))
+
+/**
+ * Abort with a message including the last error.
+ */
+#define bfs_eabort(...) \
+	bfs_eabort_(__VA_ARGS__, )
+
+#define bfs_eabort_(format, ...) \
+	((format) ? bfs_abort_(format ": %s", __VA_ARGS__ errstr(), ) : (void)0)
 
 /**
  * Abort in debug builds; no-op in release builds.
  */
 #ifdef NDEBUG
 #  define bfs_bug(...) ((void)0)
+#  define bfs_ebug(...) ((void)0)
 #else
 #  define bfs_bug bfs_abort
+#  define bfs_ebug bfs_eabort
 #endif
 
 /**
+ * Get the default assertion message, if no format string was specified.
+ */
+#define BFS_DIAG_MSG_(format, str) \
+	(sizeof(format) > 1 ? "" : str)
+
+/**
  * Unconditional assert.
  */
 #define bfs_verify(...) \
-	bfs_verify_(#__VA_ARGS__, __VA_ARGS__, "", "")
+	bfs_verify_(#__VA_ARGS__, __VA_ARGS__, "", )
 
 #define bfs_verify_(str, cond, format, ...) \
-	((cond) ? (void)0 : bfs_abort( \
+	((cond) ? (void)0 : bfs_verify__(format, BFS_DIAG_MSG_(format, str), __VA_ARGS__))
+
+#define bfs_verify__(format, ...) \
+	bfs_abortf( \
+		sizeof(format) > 1 \
+			? BFS_DIAG_FORMAT_("%s" format "%s") \
+			: BFS_DIAG_FORMAT_("Assertion failed: `%s`"), \
+		BFS_DIAG_ARGS_(__VA_ARGS__))
+
+/**
+ * Unconditional assert, including the last error.
+ */
+#define bfs_everify(...) \
+	bfs_everify_(#__VA_ARGS__, __VA_ARGS__, "", )
+
+
+#define bfs_everify_(str, cond, format, ...) \
+	((cond) ? (void)0 : bfs_everify__(format, BFS_DIAG_MSG_(format, str), __VA_ARGS__))
+
+#define bfs_everify__(format, ...) \
+	bfs_abortf( \
 		sizeof(format) > 1 \
-			? "%.0s" format "%s%s" \
-			: "Assertion failed: `%s`%s", \
-		str, __VA_ARGS__))
+			? BFS_DIAG_FORMAT_("%s" format "%s: %s") \
+			: BFS_DIAG_FORMAT_("Assertion failed: `%s`: %s"), \
+		BFS_DIAG_ARGS_(__VA_ARGS__ errstr(), ))
 
 /**
  * Assert in debug builds; no-op in release builds.
  */
 #ifdef NDEBUG
 #  define bfs_assert(...) ((void)0)
+#  define bfs_eassert(...) ((void)0)
 #else
 #  define bfs_assert bfs_verify
+#  define bfs_eassert bfs_everify
 #endif
 
 struct bfs_ctx;
@@ -129,13 +171,14 @@ const char *debug_flag_name(enum debug_flags flag);
 /**
  * Like perror(), but decorated like bfs_error().
  */
-attr(cold)
+_cold
 void bfs_perror(const struct bfs_ctx *ctx, const char *str);
 
 /**
  * Shorthand for printing error messages.
  */
-attr(cold, printf(2, 3))
+_cold
+_printf(2, 3)
 void bfs_error(const struct bfs_ctx *ctx, const char *format, ...);
 
 /**
@@ -143,7 +186,8 @@ void bfs_error(const struct bfs_ctx *ctx, const char *format, ...);
  *
  * @return Whether a warning was printed.
  */
-attr(cold, printf(2, 3))
+_cold
+_printf(2, 3)
 bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...);
 
 /**
@@ -151,67 +195,71 @@ bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...);
  *
  * @return Whether a debug message was printed.
  */
-attr(cold, printf(3, 4))
+_cold
+_printf(3, 4)
 bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...);
 
 /**
  * bfs_error() variant that takes a va_list.
  */
-attr(cold, printf(2, 0))
+_cold
+_printf(2, 0)
 void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args);
 
 /**
  * bfs_warning() variant that takes a va_list.
  */
-attr(cold, printf(2, 0))
+_cold
+_printf(2, 0)
 bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args);
 
 /**
  * bfs_debug() variant that takes a va_list.
  */
-attr(cold, printf(3, 0))
+_cold
+_printf(3, 0)
 bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args);
 
 /**
  * Print the error message prefix.
  */
-attr(cold)
+_cold
 void bfs_error_prefix(const struct bfs_ctx *ctx);
 
 /**
  * Print the warning message prefix.
  */
-attr(cold)
+_cold
 bool bfs_warning_prefix(const struct bfs_ctx *ctx);
 
 /**
  * Print the debug message prefix.
  */
-attr(cold)
+_cold
 bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag);
 
 /**
  * Highlight parts of the command line in an error message.
  */
-attr(cold)
+_cold
 void bfs_argv_error(const struct bfs_ctx *ctx, const bool args[]);
 
 /**
  * Highlight parts of an expression in an error message.
  */
-attr(cold)
+_cold
 void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr);
 
 /**
  * Highlight parts of the command line in a warning message.
  */
-attr(cold)
+_cold
 bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool args[]);
 
 /**
  * Highlight parts of an expression in a warning message.
  */
-attr(cold)
+_cold
 bool bfs_expr_warning(const struct bfs_ctx *ctx, const struct bfs_expr *expr);
 
 #endif // BFS_DIAG_H
diff --git a/src/dir.c b/src/dir.c
index cfbbca4..4bf72a1 100644
--- a/src/dir.c
+++ b/src/dir.c
@@ -1,13 +1,15 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "dir.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "diag.h"
 #include "sanity.h"
 #include "trie.h"
+
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -25,7 +27,13 @@
 static ssize_t bfs_getdents(int fd, void *buf, size_t size) {
 	sanitize_uninit(buf, size);
 
-#if BFS_HAS_GETDENTS
+#if BFS_HAS_POSIX_GETDENTS
+	int flags = 0;
+#  ifdef DT_FORCE_TYPE
+	flags |= DT_FORCE_TYPE;
+#  endif
+	ssize_t ret = posix_getdents(fd, buf, size, flags);
+#elif BFS_HAS_GETDENTS
 	ssize_t ret = getdents(fd, buf, size);
 #elif BFS_HAS_GETDENTS64
 	ssize_t ret = getdents64(fd, buf, size);
@@ -44,11 +52,13 @@ static ssize_t bfs_getdents(int fd, void *buf, size_t size) {
 
 #endif // BFS_USE_GETDENTS
 
-#if BFS_USE_GETDENTS && !BFS_HAS_GETDENTS
 /** Directory entry type for bfs_getdents() */
-typedef struct dirent64 sys_dirent;
-#else
+#if !BFS_USE_GETDENTS || BFS_HAS_GETDENTS
 typedef struct dirent sys_dirent;
+#elif BFS_HAS_POSIX_GETDENTS
+typedef struct posix_dent sys_dirent;
+#else
+typedef struct dirent64 sys_dirent;
 #endif
 
 enum bfs_type bfs_mode_to_type(mode_t mode) {
diff --git a/src/dir.h b/src/dir.h
index 6d5c9c5..885dac3 100644
--- a/src/dir.h
+++ b/src/dir.h
@@ -8,15 +8,22 @@
 #ifndef BFS_DIR_H
 #define BFS_DIR_H
 
-#include "prelude.h"
+#include "bfs.h"
+
 #include <sys/types.h>
 
 /**
  * Whether the implementation uses the getdents() syscall directly, rather than
  * libc's readdir().
  */
-#if !defined(BFS_USE_GETDENTS) && (__linux__ || __FreeBSD__)
-#  define BFS_USE_GETDENTS (BFS_HAS_GETDENTS || BFS_HAS_GETDENTS64 | BFS_HAS_GETDENTS64_SYSCALL)
+#ifndef BFS_USE_GETDENTS
+#  if BFS_HAS_POSIX_GETDENTS
+#    define BFS_USE_GETDENTS true
+#  elif __linux__ || __FreeBSD__
+#    define BFS_USE_GETDENTS (BFS_HAS_GETDENTS || BFS_HAS_GETDENTS64 | BFS_HAS_GETDENTS64_SYSCALL)
+#  else
+#    define BFS_USE_GETDENTS false
+#  endif
 #endif
 
 /**
@@ -82,7 +89,7 @@ struct arena;
 /**
  * Initialize an arena for directories.
  *
- * @param arena
+ * @arena
  *         The arena to initialize.
  */
 void bfs_dir_arena(struct arena *arena);
@@ -100,14 +107,14 @@ enum bfs_dir_flags {
 /**
  * Open a directory.
  *
- * @param dir
+ * @dir
  *         The allocated directory.
- * @param at_fd
+ * @at_fd
  *         The base directory for path resolution.
- * @param at_path
+ * @at_path
  *         The path of the directory to open, relative to at_fd.  Pass NULL to
  *         open at_fd itself.
- * @param flags
+ * @flags
  *         Flags that control which directory entries are listed.
  * @return
  *         0 on success, or -1 on failure.
@@ -122,7 +129,7 @@ int bfs_dirfd(const struct bfs_dir *dir);
 /**
  * Performs any I/O necessary for the next bfs_readdir() call.
  *
- * @param dir
+ * @dir
  *         The directory to poll.
  * @return
  *         1 on success, 0 on EOF, or -1 on failure.
@@ -132,9 +139,9 @@ int bfs_polldir(struct bfs_dir *dir);
 /**
  * Read a directory entry.
  *
- * @param dir
+ * @dir
  *         The directory to read.
- * @param[out] dirent
+ * @dirent[out]
  *         The directory entry to populate.
  * @return
  *         1 on success, 0 on EOF, or -1 on failure.
@@ -160,7 +167,7 @@ int bfs_closedir(struct bfs_dir *dir);
 /**
  * Detach the file descriptor from an open directory.
  *
- * @param dir
+ * @dir
  *         The directory to detach.
  * @return
  *         The file descriptor of the directory.
diff --git a/src/dstring.c b/src/dstring.c
index b5bf3d3..678d685 100644
--- a/src/dstring.c
+++ b/src/dstring.c
@@ -1,11 +1,12 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "dstring.h"
+
 #include "alloc.h"
 #include "bit.h"
 #include "diag.h"
+
 #include <stdarg.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -22,7 +23,7 @@ struct dstring {
 	/** Length of the string, *excluding* the terminating NUL. */
 	size_t len;
 	/** The string itself. */
-	alignas(dchar) char str[];
+	alignas(dchar) char str[] _counted_by(cap);
 };
 
 #define DSTR_OFFSET offsetof(struct dstring, str)
@@ -45,6 +46,13 @@ static dchar *dstrdata(struct dstring *header) {
 	return (char *)header + DSTR_OFFSET;
 }
 
+/** Set the length of a dynamic string. */
+static void dstrsetlen(struct dstring *header, size_t len) {
+	bfs_assert(len < header->cap);
+	header->len = len;
+	header->str[len] = '\0';
+}
+
 /** Allocate a dstring with the given contents. */
 static dchar *dstralloc_impl(size_t cap, size_t len, const char *str) {
 	// Avoid reallocations for small strings
@@ -58,11 +66,10 @@ static dchar *dstralloc_impl(size_t cap, size_t len, const char *str) {
 	}
 
 	header->cap = cap;
-	header->len = len;
+	dstrsetlen(header, len);
 
-	char *ret = dstrdata(header);
+	dchar *ret = dstrdata(header);
 	memcpy(ret, str, len);
-	ret[len] = '\0';
 	return ret;
 }
 
@@ -120,11 +127,16 @@ int dstresize(dchar **dstr, size_t len) {
 	}
 
 	struct dstring *header = dstrheader(*dstr);
-	header->len = len;
-	header->str[len] = '\0';
+	dstrsetlen(header, len);
 	return 0;
 }
 
+void dstrshrink(dchar *dstr, size_t len) {
+	struct dstring *header = dstrheader(dstr);
+	bfs_assert(len <= header->len);
+	dstrsetlen(header, len);
+}
+
 int dstrcat(dchar **dest, const char *src) {
 	return dstrxcat(dest, src, strlen(src));
 }
@@ -174,7 +186,7 @@ int dstrxcpy(dchar **dest, const char *src, size_t len) {
 	return 0;
 }
 
-char *dstrprintf(const char *format, ...) {
+dchar *dstrprintf(const char *format, ...) {
 	va_list args;
 
 	va_start(args, format);
@@ -184,7 +196,7 @@ char *dstrprintf(const char *format, ...) {
 	return str;
 }
 
-char *dstrvprintf(const char *format, va_list args) {
+dchar *dstrvprintf(const char *format, va_list args) {
 	// Guess a capacity to try to avoid reallocating
 	dchar *str = dstralloc(2 * strlen(format));
 	if (!str) {
@@ -277,3 +289,20 @@ void dstrfree(dchar *dstr) {
 		free(dstrheader(dstr));
 	}
 }
+
+dchar *dstrepeat(const char *str, size_t n) {
+	size_t len = strlen(str);
+	dchar *ret = dstralloc(n * len);
+	if (!ret) {
+		return NULL;
+	}
+
+	for (size_t i = 0; i < n; ++i) {
+		if (dstrxcat(&ret, str, len) < 0) {
+			dstrfree(ret);
+			return NULL;
+		}
+	}
+
+	return ret;
+}
diff --git a/src/dstring.h b/src/dstring.h
index 9ea7eb9..ce7ef86 100644
--- a/src/dstring.h
+++ b/src/dstring.h
@@ -8,8 +8,9 @@
 #ifndef BFS_DSTRING_H
 #define BFS_DSTRING_H
 
-#include "prelude.h"
+#include "bfs.h"
 #include "bfstd.h"
+
 #include <stdarg.h>
 #include <stddef.h>
 
@@ -30,7 +31,7 @@ typedef char dchar;
 /**
  * Free a dynamic string.
  *
- * @param dstr
+ * @dstr
  *         The string to free.
  */
 void dstrfree(dchar *dstr);
@@ -38,56 +39,56 @@ void dstrfree(dchar *dstr);
 /**
  * Allocate a dynamic string.
  *
- * @param cap
+ * @cap
  *         The initial capacity of the string.
  */
-attr(malloc(dstrfree, 1))
+_malloc(dstrfree, 1)
 dchar *dstralloc(size_t cap);
 
 /**
  * Create a dynamic copy of a string.
  *
- * @param str
+ * @str
  *         The NUL-terminated string to copy.
  */
-attr(malloc(dstrfree, 1))
+_malloc(dstrfree, 1)
 dchar *dstrdup(const char *str);
 
 /**
  * Create a length-limited dynamic copy of a string.
  *
- * @param str
+ * @str
  *         The string to copy.
- * @param n
+ * @n
  *         The maximum number of characters to copy from str.
  */
-attr(malloc(dstrfree, 1))
+_malloc(dstrfree, 1)
 dchar *dstrndup(const char *str, size_t n);
 
 /**
  * Create a dynamic copy of a dynamic string.
  *
- * @param dstr
+ * @dstr
  *         The dynamic string to copy.
  */
-attr(malloc(dstrfree, 1))
+_malloc(dstrfree, 1)
 dchar *dstrddup(const dchar *dstr);
 
 /**
  * Create an exact-sized dynamic copy of a string.
  *
- * @param str
+ * @str
  *         The string to copy.
- * @param len
+ * @len
  *         The length of the string, which may include internal NUL bytes.
  */
-attr(malloc(dstrfree, 1))
+_malloc(dstrfree, 1)
 dchar *dstrxdup(const char *str, size_t len);
 
 /**
  * Get a dynamic string's length.
  *
- * @param dstr
+ * @dstr
  *         The string to measure.
  * @return
  *         The length of dstr.
@@ -97,9 +98,9 @@ size_t dstrlen(const dchar *dstr);
 /**
  * Reserve some capacity in a dynamic string.
  *
- * @param dstr
+ * @dstr
  *         The dynamic string to preallocate.
- * @param cap
+ * @cap
  *         The new capacity for the string.
  * @return
  *         0 on success, -1 on failure.
@@ -109,214 +110,246 @@ int dstreserve(dchar **dstr, size_t cap);
 /**
  * Resize a dynamic string.
  *
- * @param dstr
+ * @dstr
  *         The dynamic string to resize.
- * @param len
+ * @len
  *         The new length for the dynamic string.
  * @return
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstresize(dchar **dstr, size_t len);
 
 /**
+ * Shrink a dynamic string.
+ *
+ * @dstr
+ *         The dynamic string to shrink.
+ * @len
+ *         The new length.  Must not be greater than the current length.
+ */
+void dstrshrink(dchar *dstr, size_t len);
+
+/**
  * Append to a dynamic string.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param src
+ * @src
  *         The string to append.
  * @return 0 on success, -1 on failure.
  */
+_nodiscard
 int dstrcat(dchar **dest, const char *src);
 
 /**
  * Append to a dynamic string.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param src
+ * @src
  *         The string to append.
- * @param n
+ * @n
  *         The maximum number of characters to take from src.
  * @return
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrncat(dchar **dest, const char *src, size_t n);
 
 /**
  * Append a dynamic string to another dynamic string.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param src
+ * @src
  *         The dynamic string to append.
  * @return
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrdcat(dchar **dest, const dchar *src);
 
 /**
  * Append to a dynamic string.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param src
+ * @src
  *         The string to append.
- * @param len
+ * @len
  *         The exact number of characters to take from src.
  * @return
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrxcat(dchar **dest, const char *src, size_t len);
 
 /**
  * Append a single character to a dynamic string.
  *
- * @param str
+ * @str
  *         The string to append to.
- * @param c
+ * @c
  *         The character to append.
  * @return
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrapp(dchar **str, char c);
 
 /**
  * Copy a string into a dynamic string.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param src
+ * @src
  *         The string to copy.
  * @returns
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrcpy(dchar **dest, const char *str);
 
 /**
  * Copy a dynamic string into another one.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param src
+ * @src
  *         The dynamic string to copy.
  * @returns
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrdcpy(dchar **dest, const dchar *str);
 
 /**
  * Copy a string into a dynamic string.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param src
+ * @src
  *         The dynamic string to copy.
- * @param n
+ * @n
  *         The maximum number of characters to take from src.
  * @returns
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrncpy(dchar **dest, const char *str, size_t n);
 
 /**
  * Copy a string into a dynamic string.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param src
+ * @src
  *         The dynamic string to copy.
- * @param len
+ * @len
  *         The exact number of characters to take from src.
  * @returns
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrxcpy(dchar **dest, const char *str, size_t len);
 
 /**
  * Create a dynamic string from a format string.
  *
- * @param format
+ * @format
  *         The format string to fill in.
- * @param ...
+ * @...
  *         Any arguments for the format string.
  * @return
  *         The created string, or NULL on failure.
  */
-attr(printf(1, 2))
-char *dstrprintf(const char *format, ...);
+_nodiscard
+_printf(1, 2)
+dchar *dstrprintf(const char *format, ...);
 
 /**
  * Create a dynamic string from a format string and a va_list.
  *
- * @param format
+ * @format
  *         The format string to fill in.
- * @param args
+ * @args
  *         The arguments for the format string.
  * @return
  *         The created string, or NULL on failure.
  */
-attr(printf(1, 0))
-char *dstrvprintf(const char *format, va_list args);
+_nodiscard
+_printf(1, 0)
+dchar *dstrvprintf(const char *format, va_list args);
 
 /**
  * Format some text onto the end of a dynamic string.
  *
- * @param str
+ * @str
  *         The destination dynamic string.
- * @param format
+ * @format
  *         The format string to fill in.
- * @param ...
+ * @...
  *         Any arguments for the format string.
  * @return
  *         0 on success, -1 on failure.
  */
-attr(printf(2, 3))
+_nodiscard
+_printf(2, 3)
 int dstrcatf(dchar **str, const char *format, ...);
 
 /**
  * Format some text from a va_list onto the end of a dynamic string.
  *
- * @param str
+ * @str
  *         The destination dynamic string.
- * @param format
+ * @format
  *         The format string to fill in.
- * @param args
+ * @args
  *         The arguments for the format string.
  * @return
  *         0 on success, -1 on failure.
  */
-attr(printf(2, 0))
+_nodiscard
+_printf(2, 0)
 int dstrvcatf(dchar **str, const char *format, va_list args);
 
 /**
  * Concatenate while shell-escaping.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param str
+ * @str
  *         The string to escape.
- * @param flags
+ * @flags
  *         Flags for wordesc().
  * @return
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrescat(dchar **dest, const char *str, enum wesc_flags flags);
 
 /**
  * Concatenate while shell-escaping.
  *
- * @param dest
+ * @dest
  *         The destination dynamic string.
- * @param str
+ * @str
  *         The string to escape.
- * @param n
+ * @n
  *         The maximum length of the string.
- * @param flags
+ * @flags
  *         Flags for wordesc().
  * @return
  *         0 on success, -1 on failure.
  */
+_nodiscard
 int dstrnescat(dchar **dest, const char *str, size_t n, enum wesc_flags flags);
 
+/**
+ * Repeat a string n times.
+ */
+_nodiscard
+dchar *dstrepeat(const char *str, size_t n);
+
 #endif // BFS_DSTRING_H
diff --git a/src/eval.c b/src/eval.c
index 49028b7..0d1bf68 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -5,9 +5,11 @@
  * Implementation of all the primary expressions.
  */
 
-#include "prelude.h"
 #include "eval.h"
+
+#include "atomic.h"
 #include "bar.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "bftw.h"
 #include "color.h"
@@ -22,14 +24,18 @@
 #include "printf.h"
 #include "pwcache.h"
 #include "sanity.h"
+#include "sighook.h"
 #include "stat.h"
 #include "trie.h"
 #include "xregex.h"
+#include "xtime.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <fnmatch.h>
 #include <grp.h>
 #include <pwd.h>
+#include <signal.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -37,6 +43,7 @@
 #include <string.h>
 #include <strings.h>
 #include <sys/resource.h>
+#include <sys/time.h>
 #include <sys/types.h>
 #include <time.h>
 #include <unistd.h>
@@ -51,6 +58,8 @@ struct bfs_eval {
 	enum bftw_action action;
 	/** The bfs_eval() return value. */
 	int *ret;
+	/** The number of errors that have occurred. */
+	size_t *nerrors;
 	/** Whether to quit immediately. */
 	bool quit;
 };
@@ -58,20 +67,24 @@ struct bfs_eval {
 /**
  * Print an error message.
  */
-attr(printf(2, 3))
+_printf(2, 3)
 static void eval_error(struct bfs_eval *state, const char *format, ...) {
+	const struct bfs_ctx *ctx = state->ctx;
+
+	++*state->nerrors;
+	if (ctx->ignore_errors) {
+		return;
+	}
+
 	// By POSIX, any errors should be accompanied by a non-zero exit status
 	*state->ret = EXIT_FAILURE;
 
-	int error = errno;
-	const struct bfs_ctx *ctx = state->ctx;
 	CFILE *cerr = ctx->cerr;
 
 	bfs_error(ctx, "%pP: ", state->ftwbuf);
 
 	va_list args;
 	va_start(args, format);
-	errno = error;
 	cvfprintf(cerr, format, args);
 	va_end(args);
 }
@@ -90,7 +103,7 @@ static bool eval_should_ignore(const struct bfs_eval *state, int error) {
  */
 static void eval_report_error(struct bfs_eval *state) {
 	if (!eval_should_ignore(state, errno)) {
-		eval_error(state, "%m.\n");
+		eval_error(state, "%s.\n", errstr());
 	}
 }
 
@@ -99,9 +112,9 @@ static void eval_report_error(struct bfs_eval *state) {
  */
 static void eval_io_error(const struct bfs_expr *expr, struct bfs_eval *state) {
 	if (expr->path) {
-		eval_error(state, "'%s': %m.\n", expr->path);
+		eval_error(state, "'%s': %s.\n", expr->path, errstr());
 	} else {
-		eval_error(state, "(standard output): %m.\n");
+		eval_error(state, "(standard output): %s.\n", errstr());
 	}
 
 	// Don't report the error again in bfs_ctx_free()
@@ -124,11 +137,9 @@ static const struct bfs_stat *eval_stat(struct bfs_eval *state) {
  * Get the difference (in seconds) between two struct timespecs.
  */
 static time_t timespec_diff(const struct timespec *lhs, const struct timespec *rhs) {
-	time_t ret = lhs->tv_sec - rhs->tv_sec;
-	if (lhs->tv_nsec < rhs->tv_nsec) {
-		--ret;
-	}
-	return ret;
+	struct timespec diff = *lhs;
+	timespec_sub(&diff, rhs);
+	return diff.tv_sec;
 }
 
 bool bfs_expr_cmp(const struct bfs_expr *expr, long long n) {
@@ -228,7 +239,7 @@ bool eval_context(const struct bfs_expr *expr, struct bfs_eval *state) {
 static const struct timespec *eval_stat_time(const struct bfs_stat *statbuf, enum bfs_stat_field field, struct bfs_eval *state) {
 	const struct timespec *ret = bfs_stat_time(statbuf, field);
 	if (!ret) {
-		eval_error(state, "Couldn't get file %s: %m.\n", bfs_stat_field_name(field));
+		eval_error(state, "Couldn't get file %s: %s.\n", bfs_stat_field_name(field), errstr());
 	}
 	return ret;
 }
@@ -247,8 +258,7 @@ bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state) {
 		return false;
 	}
 
-	return time->tv_sec > expr->reftime.tv_sec
-		|| (time->tv_sec == expr->reftime.tv_sec && time->tv_nsec > expr->reftime.tv_nsec);
+	return timespec_cmp(time, &expr->reftime) > 0;
 }
 
 /**
@@ -269,10 +279,10 @@ bool eval_time(const struct bfs_expr *expr, struct bfs_eval *state) {
 	switch (expr->time_unit) {
 	case BFS_DAYS:
 		diff /= 60 * 24;
-		fallthru;
+		_fallthrough;
 	case BFS_MINUTES:
 		diff /= 60;
-		fallthru;
+		_fallthrough;
 	case BFS_SECONDS:
 		break;
 	}
@@ -398,13 +408,13 @@ static int eval_exec_finish(const struct bfs_expr *expr, const struct bfs_ctx *c
 	if (expr->eval_fn == eval_exec) {
 		if (bfs_exec_finish(expr->exec) != 0) {
 			if (errno != 0) {
-				bfs_error(ctx, "%s %s: %m.\n", expr->argv[0], expr->argv[1]);
+				bfs_error(ctx, "${blu}%pq${rs} ${bld}%pq${rs}: %s.\n", expr->argv[0], expr->argv[1], errstr());
 			}
 			ret = -1;
 		}
 	}
 
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		if (eval_exec_finish(child, ctx) != 0) {
 			ret = -1;
 		}
@@ -419,7 +429,7 @@ static int eval_exec_finish(const struct bfs_expr *expr, const struct bfs_ctx *c
 bool eval_exec(const struct bfs_expr *expr, struct bfs_eval *state) {
 	bool ret = bfs_exec(expr->exec, state->ftwbuf) == 0;
 	if (errno != 0) {
-		eval_error(state, "%s %s: %m.\n", expr->argv[0], expr->argv[1]);
+		eval_error(state, "${blu}%pq${rs} ${bld}%pq${rs}: %s.\n", expr->argv[0], expr->argv[1], errstr());
 	}
 	return ret;
 }
@@ -690,6 +700,34 @@ static int print_owner(FILE *file, const char *name, uintmax_t id, int *width) {
 	}
 }
 
+/** Print a file's modification time. */
+static int print_time(FILE *file, time_t time, time_t now) {
+	struct tm tm;
+	if (!localtime_r(&time, &tm)) {
+		goto error;
+	}
+
+	char time_str[256];
+	size_t time_ret;
+
+	time_t six_months_ago = now - 6 * 30 * 24 * 60 * 60;
+	time_t tomorrow = now + 24 * 60 * 60;
+	if (time <= six_months_ago || time >= tomorrow) {
+		time_ret = strftime(time_str, sizeof(time_str), "%b %e  %Y", &tm);
+	} else {
+		time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm);
+	}
+
+	if (time_ret == 0) {
+		goto error;
+	}
+
+	return fprintf(file, " %s", time_str);
+
+error:
+	return fprintf(file, " %jd", (intmax_t)time);
+}
+
 /**
  * -f?ls action.
  */
@@ -746,28 +784,11 @@ bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state) {
 
 	time_t time = statbuf->mtime.tv_sec;
 	time_t now = ctx->now.tv_sec;
-	time_t six_months_ago = now - 6 * 30 * 24 * 60 * 60;
-	time_t tomorrow = now + 24 * 60 * 60;
-	struct tm tm;
-	if (!localtime_r(&time, &tm)) {
-		goto error;
-	}
-	char time_str[256];
-	size_t time_ret;
-	if (time <= six_months_ago || time >= tomorrow) {
-		time_ret = strftime(time_str, sizeof(time_str), "%b %e  %Y", &tm);
-	} else {
-		time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm);
-	}
-	if (time_ret == 0) {
-		errno = EOVERFLOW;
-		goto error;
-	}
-	if (cfprintf(cfile, " %s${rs}", time_str) < 0) {
+	if (print_time(file, time, now) < 0) {
 		goto error;
 	}
 
-	if (cfprintf(cfile, " %pP", ftwbuf) < 0) {
+	if (cfprintf(cfile, "${rs} %pP", ftwbuf) < 0) {
 		goto error;
 	}
 
@@ -902,7 +923,7 @@ bool eval_regex(const struct bfs_expr *expr, struct bfs_eval *state) {
 			eval_error(state, "%s.\n", str);
 			free(str);
 		} else {
-			eval_error(state, "bfs_regerror(): %m.\n");
+			eval_error(state, "bfs_regerror(): %s.\n", errstr());
 		}
 	}
 
@@ -999,6 +1020,13 @@ bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state) {
 	const struct BFTW *ftwbuf = state->ftwbuf;
 	enum bfs_stat_flags flags = ftwbuf->stat_flags ^ (BFS_STAT_NOFOLLOW | BFS_STAT_TRYFOLLOW);
 	enum bfs_type type = bftw_type(ftwbuf, flags);
+
+	// GNU find treats ELOOP as a broken symbolic link for -xtype l
+	// (but not -L -type l)
+	if ((flags & BFS_STAT_TRYFOLLOW) && type == BFS_ERROR && errno == ELOOP) {
+		type = BFS_LNK;
+	}
+
 	if (type == BFS_ERROR) {
 		eval_report_error(state);
 		return false;
@@ -1007,40 +1035,23 @@ bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state) {
 	}
 }
 
-#if _POSIX_MONOTONIC_CLOCK > 0
-#  define BFS_CLOCK CLOCK_MONOTONIC
-#elif _POSIX_TIMERS > 0
-#  define BFS_CLOCK CLOCK_REALTIME
-#endif
-
 /**
- * Call clock_gettime(), if available.
+ * clock_gettime() wrapper.
  */
 static int eval_gettime(struct bfs_eval *state, struct timespec *ts) {
-#ifdef BFS_CLOCK
-	int ret = clock_gettime(BFS_CLOCK, ts);
-	if (ret != 0) {
-		bfs_warning(state->ctx, "%pP: clock_gettime(): %m.\n", state->ftwbuf);
+	clockid_t clock = CLOCK_REALTIME;
+
+#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0
+	if (sysoption(MONOTONIC_CLOCK) > 0) {
+		clock = CLOCK_MONOTONIC;
 	}
-	return ret;
-#else
-	return -1;
 #endif
-}
 
-/**
- * Record an elapsed time.
- */
-static void timespec_elapsed(struct timespec *elapsed, const struct timespec *start, const struct timespec *end) {
-	elapsed->tv_sec += end->tv_sec - start->tv_sec;
-	elapsed->tv_nsec += end->tv_nsec - start->tv_nsec;
-	if (elapsed->tv_nsec < 0) {
-		elapsed->tv_nsec += 1000000000L;
-		--elapsed->tv_sec;
-	} else if (elapsed->tv_nsec >= 1000000000L) {
-		elapsed->tv_nsec -= 1000000000L;
-		++elapsed->tv_sec;
+	int ret = clock_gettime(clock, ts);
+	if (ret != 0) {
+		bfs_warning(state->ctx, "%pP: clock_gettime(): %s.\n", state->ftwbuf, errstr());
 	}
+	return ret;
 }
 
 /**
@@ -1061,7 +1072,8 @@ static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) {
 
 	if (time) {
 		if (eval_gettime(state, &end) == 0) {
-			timespec_elapsed(&expr->elapsed, &start, &end);
+			timespec_sub(&end, &start);
+			timespec_add(&expr->elapsed, &end);
 		}
 	}
 
@@ -1091,7 +1103,7 @@ bool eval_not(const struct bfs_expr *expr, struct bfs_eval *state) {
  * Evaluate a conjunction.
  */
 bool eval_and(const struct bfs_expr *expr, struct bfs_eval *state) {
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		if (!eval_expr(child, state) || state->quit) {
 			return false;
 		}
@@ -1104,7 +1116,7 @@ bool eval_and(const struct bfs_expr *expr, struct bfs_eval *state) {
  * Evaluate a disjunction.
  */
 bool eval_or(const struct bfs_expr *expr, struct bfs_eval *state) {
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		if (eval_expr(child, state) || state->quit) {
 			return true;
 		}
@@ -1119,7 +1131,7 @@ bool eval_or(const struct bfs_expr *expr, struct bfs_eval *state) {
 bool eval_comma(const struct bfs_expr *expr, struct bfs_eval *state) {
 	bool ret uninit(false);
 
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		ret = eval_expr(child, state);
 		if (state->quit) {
 			break;
@@ -1130,20 +1142,7 @@ bool eval_comma(const struct bfs_expr *expr, struct bfs_eval *state) {
 }
 
 /** Update the status bar. */
-static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct timespec *last_status, size_t count) {
-	struct timespec now;
-	if (eval_gettime(state, &now) == 0) {
-		struct timespec elapsed = {0};
-		timespec_elapsed(&elapsed, last_status, &now);
-
-		// Update every 0.1s
-		if (elapsed.tv_sec > 0 || elapsed.tv_nsec >= 100000000L) {
-			*last_status = now;
-		} else {
-			return;
-		}
-	}
-
+static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, size_t count) {
 	size_t width = bfs_bar_width(bar);
 	if (width < 3) {
 		return;
@@ -1159,7 +1158,7 @@ static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct time
 
 	size_t rhslen = xstrwidth(rhs);
 	if (3 + rhslen > width) {
-		dstresize(&rhs, 0);
+		dstrshrink(rhs, 0);
 		rhslen = 0;
 	}
 
@@ -1203,7 +1202,7 @@ static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct time
 		}
 		pathwidth += cwidth;
 	}
-	dstresize(&status, lhslen);
+	dstrshrink(status, lhslen);
 
 	if (dstrcat(&status, "...") != 0) {
 		goto out;
@@ -1270,7 +1269,7 @@ static void debug_stat(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf, enu
 	bfs_debug_prefix(ctx, DEBUG_STAT);
 
 	fprintf(stderr, "bfs_stat(");
-	if (ftwbuf->at_fd == AT_FDCWD) {
+	if (ftwbuf->at_fd == (int)AT_FDCWD) {
 		fprintf(stderr, "AT_FDCWD");
 	} else {
 		size_t baselen = strlen(ftwbuf->path) - strlen(ftwbuf->at_path);
@@ -1286,7 +1285,7 @@ static void debug_stat(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf, enu
 	DEBUG_FLAG(flags, BFS_STAT_TRYFOLLOW);
 	DEBUG_FLAG(flags, BFS_STAT_NOSYNC);
 
-	fprintf(stderr, ") == %d", err ? 0 : -1);
+	fprintf(stderr, ") == %d", err == 0 ? 0 : -1);
 
 	if (err) {
 		fprintf(stderr, " [%d]", err);
@@ -1373,18 +1372,85 @@ struct callback_args {
 
 	/** The status bar. */
 	struct bfs_bar *bar;
-	/** The time of the last status update. */
-	struct timespec last_status;
+	/** The SIGALRM hook. */
+	struct sighook *alrm_hook;
+	/** The interval timer. */
+	struct timer *timer;
+	/** Flag set by SIGALRM. */
+	atomic bool alrm_flag;
+	/** Flag set by SIGINFO. */
+	atomic bool info_flag;
+
 	/** The number of files visited so far. */
 	size_t count;
 
 	/** The set of seen files. */
 	struct trie *seen;
 
+	/** The number of errors that have occurred. */
+	size_t nerrors;
 	/** Eventual return value from bfs_eval(). */
 	int ret;
 };
 
+/** Update the status bar in response to SIGALRM. */
+static void eval_sigalrm(int sig, siginfo_t *info, void *ptr) {
+	struct callback_args *args = ptr;
+	store(&args->alrm_flag, true, relaxed);
+}
+
+/** Show/hide the bar in response to SIGINFO. */
+static void eval_siginfo(int sig, siginfo_t *info, void *ptr) {
+	struct callback_args *args = ptr;
+	store(&args->info_flag, true, relaxed);
+}
+
+/** Show the status bar. */
+static void eval_show_bar(struct callback_args *args) {
+	args->alrm_hook = sighook(SIGALRM, eval_sigalrm, args, SH_CONTINUE);
+	if (!args->alrm_hook) {
+		goto fail;
+	}
+
+	args->bar = bfs_bar_show();
+	if (!args->bar) {
+		goto fail;
+	}
+
+	// Update the bar every 0.1s
+	struct timespec ival = { .tv_nsec = 100 * 1000 * 1000 };
+	args->timer = xtimer_start(&ival);
+	if (!args->timer) {
+		goto fail;
+	}
+
+	// Update the bar immediately
+	store(&args->alrm_flag, true, relaxed);
+
+	return;
+
+fail:
+	bfs_warning(args->ctx, "Couldn't show status bar: %s.\n\n", errstr());
+
+	bfs_bar_hide(args->bar);
+	args->bar = NULL;
+
+	sigunhook(args->alrm_hook);
+	args->alrm_hook = NULL;
+}
+
+/** Hide the status bar. */
+static void eval_hide_bar(struct callback_args *args) {
+	xtimer_stop(args->timer);
+	args->timer = NULL;
+
+	sigunhook(args->alrm_hook);
+	args->alrm_hook = NULL;
+
+	bfs_bar_hide(args->bar);
+	args->bar = NULL;
+}
+
 /**
  * bftw() callback.
  */
@@ -1399,17 +1465,37 @@ static enum bftw_action eval_callback(const struct BFTW *ftwbuf, void *ptr) {
 	state.ctx = ctx;
 	state.action = BFTW_CONTINUE;
 	state.ret = &args->ret;
+	state.nerrors = &args->nerrors;
 	state.quit = false;
 
-	if (args->bar) {
-		eval_status(&state, args->bar, &args->last_status, args->count);
+	// Check whether SIGINFO was delivered and show/hide the bar
+	if (exchange(&args->info_flag, false, relaxed)) {
+		if (args->bar) {
+			eval_hide_bar(args);
+		} else {
+			eval_show_bar(args);
+		}
+	}
+
+	if (exchange(&args->alrm_flag, false, relaxed)) {
+		eval_status(&state, args->bar, args->count);
 	}
 
 	if (ftwbuf->type == BFS_ERROR) {
-		if (!eval_should_ignore(&state, ftwbuf->error)) {
-			eval_error(&state, "%s.\n", xstrerror(ftwbuf->error));
-		}
 		state.action = BFTW_PRUNE;
+
+		if (ftwbuf->error == ELOOP && ftwbuf->loopoff > 0) {
+			char *loop = strndup(ftwbuf->path, ftwbuf->loopoff);
+			if (loop) {
+				eval_error(&state, "Filesystem loop back to ${di}%pq${rs}\n", loop);
+				free(loop);
+				goto done;
+			}
+		} else if (eval_should_ignore(&state, ftwbuf->error)) {
+			goto done;
+		}
+
+		eval_error(&state, "%s.\n", xstrerror(ftwbuf->error));
 		goto done;
 	}
 
@@ -1468,6 +1554,9 @@ done:
 static int raise_fdlimit(struct bfs_ctx *ctx) {
 	rlim_t cur = ctx->orig_nofile.rlim_cur;
 	rlim_t max = ctx->orig_nofile.rlim_max;
+	if (!ctx->raise_nofile) {
+		max = cur;
+	}
 
 	rlim_t target = 64 << 10;
 	if (rlim_cmp(target, max) > 0) {
@@ -1596,7 +1685,7 @@ static bool eval_must_buffer(const struct bfs_expr *expr) {
 		return true;
 	}
 
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		if (eval_must_buffer(child)) {
 			return true;
 		}
@@ -1617,12 +1706,16 @@ int bfs_eval(struct bfs_ctx *ctx) {
 	};
 
 	if (ctx->status) {
-		args.bar = bfs_bar_show();
-		if (!args.bar) {
-			bfs_warning(ctx, "Couldn't show status bar: %m.\n\n");
-		}
+		eval_show_bar(&args);
 	}
 
+#ifdef SIGINFO
+	int siginfo = SIGINFO;
+#else
+	int siginfo = SIGUSR1;
+#endif
+	struct sighook *info_hook = sighook(siginfo, eval_siginfo, &args, SH_CONTINUE);
+
 	struct trie seen;
 	if (ctx->unique) {
 		trie_init(&seen);
@@ -1690,7 +1783,14 @@ int bfs_eval(struct bfs_ctx *ctx) {
 		trie_destroy(&seen);
 	}
 
-	bfs_bar_hide(args.bar);
+	sigunhook(info_hook);
+	if (args.bar) {
+		eval_hide_bar(&args);
+	}
+
+	if (ctx->ignore_errors && args.nerrors > 0) {
+		bfs_warning(ctx, "Suppressed errors: %zu\n", args.nerrors);
+	}
 
 	return args.ret;
 }
diff --git a/src/eval.h b/src/eval.h
index 4dd7996..b038740 100644
--- a/src/eval.h
+++ b/src/eval.h
@@ -9,8 +9,6 @@
 #ifndef BFS_EVAL_H
 #define BFS_EVAL_H
 
-#include "prelude.h"
-
 struct bfs_ctx;
 struct bfs_expr;
 
@@ -22,9 +20,9 @@ struct bfs_eval;
 /**
  * Expression evaluation function.
  *
- * @param expr
+ * @expr
  *         The current expression.
- * @param state
+ * @state
  *         The current evaluation state.
  * @return
  *         The result of the test.
@@ -34,7 +32,7 @@ typedef bool bfs_eval_fn(const struct bfs_expr *expr, struct bfs_eval *state);
 /**
  * Evaluate the command line.
  *
- * @param ctx
+ * @ctx
  *         The bfs context to evaluate.
  * @return
  *         EXIT_SUCCESS on success, otherwise on failure.
diff --git a/src/exec.c b/src/exec.c
index e782d49..45c9f1d 100644
--- a/src/exec.c
+++ b/src/exec.c
@@ -1,9 +1,10 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "exec.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "bftw.h"
 #include "color.h"
@@ -11,6 +12,7 @@
 #include "diag.h"
 #include "dstring.h"
 #include "xspawn.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
@@ -22,7 +24,7 @@
 #include <unistd.h>
 
 /** Print some debugging info. */
-attr(printf(2, 3))
+_printf(2, 3)
 static void bfs_exec_debug(const struct bfs_exec *execbuf, const char *format, ...) {
 	const struct bfs_ctx *ctx = execbuf->ctx;
 
@@ -56,7 +58,7 @@ static size_t bfs_exec_arg_size(const char *arg) {
 
 /** Determine the maximum argv size. */
 static size_t bfs_exec_arg_max(const struct bfs_exec *execbuf) {
-	long arg_max = sysconf(_SC_ARG_MAX);
+	long arg_max = xsysconf(_SC_ARG_MAX);
 	bfs_exec_debug(execbuf, "ARG_MAX: %ld according to sysconf()\n", arg_max);
 	if (arg_max < 0) {
 		arg_max = BFS_EXEC_ARG_MAX;
@@ -82,7 +84,7 @@ static size_t bfs_exec_arg_max(const struct bfs_exec *execbuf) {
 
 	// Assume arguments are counted with the granularity of a single page,
 	// so allow a one page cushion to account for rounding up
-	long page_size = sysconf(_SC_PAGESIZE);
+	long page_size = xsysconf(_SC_PAGESIZE);
 	if (page_size < 4096) {
 		page_size = 4096;
 	}
@@ -234,7 +236,7 @@ static char *bfs_exec_format_arg(char *arg, const char *path) {
 
 	char *last = arg;
 	do {
-		if (dstrncat(&ret, last, match - last) != 0) {
+		if (dstrxcat(&ret, last, match - last) != 0) {
 			goto err;
 		}
 		if (dstrcat(&ret, path) != 0) {
@@ -268,7 +270,7 @@ static int bfs_exec_openwd(struct bfs_exec *execbuf, const struct BFTW *ftwbuf)
 	bfs_assert(execbuf->wd_fd < 0);
 	bfs_assert(!execbuf->wd_path);
 
-	if (ftwbuf->at_fd != AT_FDCWD) {
+	if (ftwbuf->at_fd != (int)AT_FDCWD) {
 		// Rely on at_fd being the immediate parent
 		bfs_assert(xbaseoff(ftwbuf->at_path) == 0);
 
diff --git a/src/exec.h b/src/exec.h
index 9d4192d..1d8e75f 100644
--- a/src/exec.h
+++ b/src/exec.h
@@ -67,11 +67,11 @@ struct bfs_exec {
 /**
  * Parse an exec action.
  *
- * @param argv
+ * @argv
  *         The (bfs) command line argument to parse.
- * @param flags
+ * @flags
  *         Any flags for this exec action.
- * @param ctx
+ * @ctx
  *         The bfs context.
  * @return
  *         The parsed exec action, or NULL on failure.
@@ -81,9 +81,9 @@ struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs
 /**
  * Execute the command for a file.
  *
- * @param execbuf
+ * @execbuf
  *         The parsed exec action.
- * @param ftwbuf
+ * @ftwbuf
  *         The bftw() data for the current file.
  * @return 0 if the command succeeded, -1 if it failed.  If the command could
  *         be executed, -1 is returned, and errno will be non-zero.  For
@@ -94,7 +94,7 @@ int bfs_exec(struct bfs_exec *execbuf, const struct BFTW *ftwbuf);
 /**
  * Finish executing any commands.
  *
- * @param execbuf
+ * @execbuf
  *         The parsed exec action.
  * @return 0 on success, -1 if any errors were encountered.
  */
diff --git a/src/expr.c b/src/expr.c
index 5784220..ca37ffc 100644
--- a/src/expr.c
+++ b/src/expr.c
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: 0BSD
 
 #include "expr.h"
+
 #include "alloc.h"
 #include "ctx.h"
 #include "diag.h"
@@ -10,9 +11,12 @@
 #include "list.h"
 #include "printf.h"
 #include "xregex.h"
+
 #include <string.h>
 
-struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval_fn, size_t argc, char **argv) {
+struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval_fn, size_t argc, char **argv, enum bfs_kind kind) {
+	bfs_assert(kind != BFS_PATH);
+
 	struct bfs_expr *expr = arena_alloc(&ctx->expr_arena);
 	if (!expr) {
 		return NULL;
@@ -22,6 +26,7 @@ struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval_fn, size_t
 	expr->eval_fn = eval_fn;
 	expr->argc = argc;
 	expr->argv = argv;
+	expr->kind = kind;
 	expr->probability = 0.5;
 	SLIST_PREPEND(&ctx->expr_list, expr, freelist);
 
@@ -63,8 +68,7 @@ void bfs_expr_append(struct bfs_expr *expr, struct bfs_expr *child) {
 }
 
 void bfs_expr_extend(struct bfs_expr *expr, struct bfs_exprs *children) {
-	while (!SLIST_EMPTY(children)) {
-		struct bfs_expr *child = SLIST_POP(children);
+	drain_slist (struct bfs_expr, child, children) {
 		bfs_expr_append(expr, child);
 	}
 }
diff --git a/src/expr.h b/src/expr.h
index 7bcace7..c116778 100644
--- a/src/expr.h
+++ b/src/expr.h
@@ -8,14 +8,38 @@
 #ifndef BFS_EXPR_H
 #define BFS_EXPR_H
 
-#include "prelude.h"
 #include "color.h"
 #include "eval.h"
 #include "stat.h"
+
 #include <sys/types.h>
 #include <time.h>
 
 /**
+ * Argument/token/expression kinds.
+ */
+enum bfs_kind {
+	/** A regular argument. */
+	BFS_ARG,
+
+	/** A flag (-H, -L, etc.). */
+	BFS_FLAG,
+
+	/** A root path. */
+	BFS_PATH,
+
+	/** An option (-follow, -mindepth, etc.). */
+	BFS_OPTION,
+	/** A test (-name, -size, etc.). */
+	BFS_TEST,
+	/** An action (-print, -exec, etc.). */
+	BFS_ACTION,
+
+	/** An operator (-and, -or, etc.). */
+	BFS_OPERATOR,
+};
+
+/**
  * Integer comparison modes.
  */
 enum bfs_int_cmp {
@@ -97,6 +121,8 @@ struct bfs_expr {
 	size_t argc;
 	/** The command line arguments comprising this expression. */
 	char **argv;
+	/** The kind of expression this is. */
+	enum bfs_kind kind;
 
 	/** The number of files this expression keeps open between evaluations. */
 	int persistent_fds;
@@ -123,7 +149,7 @@ struct bfs_expr {
 	/** Total time spent running this predicate. */
 	struct timespec elapsed;
 
-	/** Auxilliary data for the evaluation function. */
+	/** Auxiliary data for the evaluation function. */
 	union {
 		/** Child expressions. */
 		struct bfs_exprs children;
@@ -207,7 +233,7 @@ struct bfs_ctx;
 /**
  * Create a new expression.
  */
-struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval, size_t argc, char **argv);
+struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval, size_t argc, char **argv, enum bfs_kind kind);
 
 /**
  * @return Whether this type of expression has children.
@@ -244,4 +270,10 @@ bool bfs_expr_cmp(const struct bfs_expr *expr, long long n);
  */
 void bfs_expr_clear(struct bfs_expr *expr);
 
+/**
+ * Iterate over the children of an expression.
+ */
+#define for_expr(child, expr) \
+	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next)
+
 #endif // BFS_EXPR_H
diff --git a/src/fsade.c b/src/fsade.c
index d56fb07..dfdf125 100644
--- a/src/fsade.c
+++ b/src/fsade.c
@@ -1,14 +1,16 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "fsade.h"
+
 #include "atomic.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "bftw.h"
 #include "dir.h"
 #include "dstring.h"
 #include "sanity.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <stddef.h>
@@ -26,22 +28,31 @@
 #  include <selinux/selinux.h>
 #endif
 
-#if BFS_USE_SYS_EXTATTR_H
+#if __has_include(<sys/extattr.h>)
 #  include <sys/extattr.h>
-#elif BFS_USE_SYS_XATTR_H
+#  define BFS_USE_EXTATTR true
+#elif __has_include(<sys/xattr.h>)
 #  include <sys/xattr.h>
+#  define BFS_USE_XATTR true
+#endif
+
+#ifndef BFS_USE_EXTATTR
+#  define BFS_USE_EXTATTR false
+#endif
+#ifndef BFS_USE_XATTR
+#  define BFS_USE_XATTR false
 #endif
 
 /**
  * Many of the APIs used here don't have *at() variants, but we can try to
  * emulate something similar if /proc/self/fd is available.
  */
-attr(maybe_unused)
+_maybe_unused
 static const char *fake_at(const struct BFTW *ftwbuf) {
 	static atomic int proc_works = -1;
 
 	dchar *path = NULL;
-	if (ftwbuf->at_fd == AT_FDCWD || load(&proc_works, relaxed) == 0) {
+	if (ftwbuf->at_fd == (int)AT_FDCWD || load(&proc_works, relaxed) == 0) {
 		goto fail;
 	}
 
@@ -70,7 +81,7 @@ fail:
 	return ftwbuf->path;
 }
 
-attr(maybe_unused)
+_maybe_unused
 static void free_fake_at(const struct BFTW *ftwbuf, const char *path) {
 	if (path != ftwbuf->path) {
 		dstrfree((dchar *)path);
@@ -80,7 +91,7 @@ static void free_fake_at(const struct BFTW *ftwbuf, const char *path) {
 /**
  * Check if an error was caused by the absence of support or data for a feature.
  */
-attr(maybe_unused)
+_maybe_unused
 static bool is_absence_error(int error) {
 	// If the OS doesn't support the feature, it's obviously not enabled for
 	// any files
@@ -160,7 +171,7 @@ static int bfs_acl_entry(acl_t acl, int which, acl_entry_t *entry) {
 }
 
 /** Unified interface for acl_get_tag_type(). */
-attr(maybe_unused)
+_maybe_unused
 static int bfs_acl_tag_type(acl_entry_t entry, acl_tag_t *tag) {
 #if BFS_HAS_ACL_GET_TAG_TYPE
 	return acl_get_tag_type(entry, tag);
@@ -344,7 +355,7 @@ int bfs_check_capabilities(const struct BFTW *ftwbuf) {
 
 #if BFS_CAN_CHECK_XATTRS
 
-#if BFS_USE_SYS_EXTATTR_H
+#if BFS_USE_EXTATTR
 
 /** Wrapper for extattr_list_{file,link}. */
 static ssize_t bfs_extattr_list(const char *path, enum bfs_type type, int namespace) {
@@ -390,13 +401,13 @@ static ssize_t bfs_extattr_get(const char *path, enum bfs_type type, int namespa
 #endif
 }
 
-#endif // BFS_USE_SYS_EXTATTR_H
+#endif // BFS_USE_EXTATTR
 
 int bfs_check_xattrs(const struct BFTW *ftwbuf) {
 	const char *path = fake_at(ftwbuf);
 	ssize_t len;
 
-#if BFS_USE_SYS_EXTATTR_H
+#if BFS_USE_EXTATTR
 	len = bfs_extattr_list(path, ftwbuf->type, EXTATTR_NAMESPACE_SYSTEM);
 	if (len <= 0) {
 		len = bfs_extattr_list(path, ftwbuf->type, EXTATTR_NAMESPACE_USER);
@@ -432,7 +443,7 @@ int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name) {
 	const char *path = fake_at(ftwbuf);
 	ssize_t len;
 
-#if BFS_USE_SYS_EXTATTR_H
+#if BFS_USE_EXTATTR
 	len = bfs_extattr_get(path, ftwbuf->type, EXTATTR_NAMESPACE_SYSTEM, name);
 	if (len < 0) {
 		len = bfs_extattr_get(path, ftwbuf->type, EXTATTR_NAMESPACE_USER, name);
diff --git a/src/fsade.h b/src/fsade.h
index eefef9f..fbe02d8 100644
--- a/src/fsade.h
+++ b/src/fsade.h
@@ -9,22 +9,26 @@
 #ifndef BFS_FSADE_H
 #define BFS_FSADE_H
 
-#include "prelude.h"
+#include "bfs.h"
 
 #define BFS_CAN_CHECK_ACL (BFS_HAS_ACL_GET_FILE || BFS_HAS_ACL_TRIVIAL)
 
-#define BFS_CAN_CHECK_CAPABILITIES BFS_USE_LIBCAP
+#define BFS_CAN_CHECK_CAPABILITIES BFS_WITH_LIBCAP
 
-#define BFS_CAN_CHECK_CONTEXT BFS_USE_LIBSELINUX
+#define BFS_CAN_CHECK_CONTEXT BFS_WITH_LIBSELINUX
 
-#define BFS_CAN_CHECK_XATTRS (BFS_USE_SYS_EXTATTR_H || BFS_USE_SYS_XATTR_H)
+#if __has_include(<sys/extattr.h>) || __has_include(<sys/xattr.h>)
+#  define BFS_CAN_CHECK_XATTRS true
+#else
+#  define BFS_CAN_CHECK_XATTRS false
+#endif
 
 struct BFTW;
 
 /**
  * Check if a file has a non-trivial Access Control List.
  *
- * @param ftwbuf
+ * @ftwbuf
  *         The file to check.
  * @return
  *         1 if it does, 0 if it doesn't, or -1 if an error occurred.
@@ -34,7 +38,7 @@ int bfs_check_acl(const struct BFTW *ftwbuf);
 /**
  * Check if a file has a non-trivial capability set.
  *
- * @param ftwbuf
+ * @ftwbuf
  *         The file to check.
  * @return
  *         1 if it does, 0 if it doesn't, or -1 if an error occurred.
@@ -44,7 +48,7 @@ int bfs_check_capabilities(const struct BFTW *ftwbuf);
 /**
  * Check if a file has any extended attributes set.
  *
- * @param ftwbuf
+ * @ftwbuf
  *         The file to check.
  * @return
  *         1 if it does, 0 if it doesn't, or -1 if an error occurred.
@@ -54,9 +58,9 @@ int bfs_check_xattrs(const struct BFTW *ftwbuf);
 /**
  * Check if a file has an extended attribute with the given name.
  *
- * @param ftwbuf
+ * @ftwbuf
  *         The file to check.
- * @param name
+ * @name
  *         The name of the xattr to check.
  * @return
  *         1 if it does, 0 if it doesn't, or -1 if an error occurred.
@@ -66,7 +70,7 @@ int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name);
 /**
  * Get a file's SELinux context
  *
- * @param ftwbuf
+ * @ftwbuf
  *         The file to check.
  * @return
  *         The file's SELinux context, or NULL on failure.
diff --git a/src/ioq.c b/src/ioq.c
index 43a1b35..57eb4a5 100644
--- a/src/ioq.c
+++ b/src/ioq.c
@@ -118,24 +118,27 @@
  * [1]: https://arxiv.org/abs/2201.02179
  */
 
-#include "prelude.h"
 #include "ioq.h"
+
 #include "alloc.h"
 #include "atomic.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "bit.h"
 #include "diag.h"
 #include "dir.h"
 #include "stat.h"
 #include "thread.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <pthread.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <sys/stat.h>
+#include <unistd.h>
 
-#if BFS_USE_LIBURING
+#if BFS_WITH_LIBURING
 #  include <liburing.h>
 #endif
 
@@ -180,8 +183,7 @@ typedef atomic uintptr_t ioq_slot;
 /** Amount to add for an additional skip. */
 #define IOQ_SKIP_ONE (~IOQ_BLOCKED)
 
-// Need room for two flag bits
-bfs_static_assert(alignof(struct ioq_ent) >= (1 << 2));
+static_assert(alignof(struct ioq_ent) >= (1 << 2), "struct ioq_ent is underaligned");
 
 /**
  * An MPMC queue of I/O commands.
@@ -201,7 +203,7 @@ struct ioqq {
 	cache_align atomic size_t tail;
 
 	/** The circular buffer itself. */
-	cache_align ioq_slot slots[];
+	cache_align ioq_slot slots[]; // _counted_by(slot_mask + 1)
 };
 
 /** Destroy an I/O command queue. */
@@ -258,17 +260,45 @@ static struct ioqq *ioqq_create(size_t size) {
 
 /** Get the monitor associated with a slot. */
 static struct ioq_monitor *ioq_slot_monitor(struct ioqq *ioqq, ioq_slot *slot) {
-	size_t i = slot - ioqq->slots;
+	uint32_t i = slot - ioqq->slots;
+
+	// Hash the index to de-correlate waiters
+	// https://nullprogram.com/blog/2018/07/31/
+	// https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1120105785
+	i ^= i >> 16;
+	i *= UINT32_C(0x21f0aaad);
+	i ^= i >> 15;
+	i *= UINT32_C(0x735a2d97);
+	i ^= i >> 15;
+
 	return &ioqq->monitors[i & ioqq->monitor_mask];
 }
 
 /** Atomically wait for a slot to change. */
-attr(noinline)
+_noinline
 static uintptr_t ioq_slot_wait(struct ioqq *ioqq, ioq_slot *slot, uintptr_t value) {
+	uintptr_t ret;
+
+	// Try spinning a few times (with exponential backoff) before blocking
+	_nounroll
+	for (int i = 1; i < 1024; i *= 2) {
+		_nounroll
+		for (int j = 0; j < i; ++j) {
+			spin_loop();
+		}
+
+		// Check if the slot changed
+		ret = load(slot, relaxed);
+		if (ret != value) {
+			return ret;
+		}
+	}
+
+	// Nothing changed, start blocking
 	struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot);
 	mutex_lock(&monitor->mutex);
 
-	uintptr_t ret = load(slot, relaxed);
+	ret = load(slot, relaxed);
 	if (ret != value) {
 		goto done;
 	}
@@ -293,7 +323,7 @@ done:
 }
 
 /** Wake up any threads waiting on a slot. */
-attr(noinline)
+_noinline
 static void ioq_slot_wake(struct ioqq *ioqq, ioq_slot *slot) {
 	struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot);
 
@@ -313,9 +343,11 @@ static void ioq_slot_wake(struct ioqq *ioqq, ioq_slot *slot) {
 	cond_broadcast(&monitor->cond);
 }
 
-/** Branch-free (slot & IOQ_SKIP) ? ~IOQ_BLOCKED : 0 */
-static uintptr_t ioq_skip_mask(uintptr_t slot) {
-	return -(slot >> IOQ_SKIP_BIT) << 1;
+/** Branch-free ((slot & IOQ_SKIP) ? skip : full) & ~IOQ_BLOCKED */
+static uintptr_t ioq_slot_blend(uintptr_t slot, uintptr_t skip, uintptr_t full) {
+	uintptr_t mask = -(slot >> IOQ_SKIP_BIT);
+	uintptr_t ret = (skip & mask) | (full & ~mask);
+	return ret & ~IOQ_BLOCKED;
 }
 
 /** Push an entry into a slot. */
@@ -323,19 +355,18 @@ static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent
 	uintptr_t prev = load(slot, relaxed);
 
 	while (true) {
-		size_t skip_mask = ioq_skip_mask(prev);
-		size_t full_mask = ~skip_mask & ~IOQ_BLOCKED;
-		if (prev & full_mask) {
+		uintptr_t full = ioq_slot_blend(prev, 0, prev);
+		if (full) {
 			// full(ptr) → wait
 			prev = ioq_slot_wait(ioqq, slot, prev);
 			continue;
 		}
 
 		// empty   → full(ptr)
-		uintptr_t next = ((uintptr_t)ent >> 1) & full_mask;
+		uintptr_t next = (uintptr_t)ent >> 1;
 		// skip(1) → empty
 		// skip(n) → skip(n - 1)
-		next |= (prev - IOQ_SKIP_ONE) & skip_mask;
+		next = ioq_slot_blend(prev, prev - IOQ_SKIP_ONE, next);
 
 		if (compare_exchange_weak(slot, &prev, next, release, relaxed)) {
 			break;
@@ -353,13 +384,20 @@ static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent
 static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) {
 	uintptr_t prev = load(slot, relaxed);
 	while (true) {
+#if __has_builtin(__builtin_prefetch)
+		// Optimistically prefetch the pointer in this slot.  If this
+		// slot is not full, this will prefetch an invalid address, but
+		// experimentally this is worth it on both Intel (Alder Lake)
+		// and AMD (Zen 2).
+		__builtin_prefetch((void *)(prev << 1), 1 /* write */);
+#endif
+
 		// empty     → skip(1)
 		// skip(n)   → skip(n + 1)
 		// full(ptr) → full(ptr - 1)
 		uintptr_t next = prev + IOQ_SKIP_ONE;
-		// skip(n)   → ~IOQ_BLOCKED
 		// full(ptr) → 0
-		next &= ioq_skip_mask(next);
+		next = ioq_slot_blend(next, next, 0);
 
 		if (block && next) {
 			prev = ioq_slot_wait(ioqq, slot, prev);
@@ -378,7 +416,7 @@ static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool bloc
 	// empty     → 0
 	// skip(n)   → 0
 	// full(ptr) → ptr
-	prev &= ioq_skip_mask(~prev);
+	prev = ioq_slot_blend(prev, 0, prev);
 	return (struct ioq_ent *)(prev << 1);
 }
 
@@ -408,13 +446,6 @@ static void ioqq_push_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t s
 	} while (size > 0);
 }
 
-/** Pop an entry from the queue. */
-static struct ioq_ent *ioqq_pop(struct ioqq *ioqq, bool block) {
-	size_t i = fetch_add(&ioqq->tail, 1, relaxed);
-	ioq_slot *slot = &ioqq->slots[i & ioqq->slot_mask];
-	return ioq_slot_pop(ioqq, slot, block);
-}
-
 /** Pop a batch of entries from the queue. */
 static void ioqq_pop_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t size, bool block) {
 	size_t mask = ioqq->slot_mask;
@@ -430,36 +461,83 @@ static void ioqq_pop_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t si
 #define IOQ_BATCH (FALSE_SHARING_SIZE / sizeof(ioq_slot))
 
 /**
- * A batch of entries to send all at once.
+ * A batch of I/O queue entries.
  */
 struct ioq_batch {
-	/** The current batch size. */
-	size_t size;
+	/** The start of the batch. */
+	size_t head;
+	/** The end of the batch. */
+	size_t tail;
 	/** The array of entries. */
 	struct ioq_ent *entries[IOQ_BATCH];
 };
 
-/** Send the batch to a queue. */
+/** Reset a batch. */
+static void ioq_batch_reset(struct ioq_batch *batch) {
+	batch->head = batch->tail = 0;
+}
+
+/** Check if a batch is empty. */
+static bool ioq_batch_empty(const struct ioq_batch *batch) {
+	return batch->head >= batch->tail;
+}
+
+/** Send a batch to a queue. */
 static void ioq_batch_flush(struct ioqq *ioqq, struct ioq_batch *batch) {
-	if (batch->size > 0) {
-		ioqq_push_batch(ioqq, batch->entries, batch->size);
-		batch->size = 0;
+	if (batch->tail > 0) {
+		ioqq_push_batch(ioqq, batch->entries, batch->tail);
+		ioq_batch_reset(batch);
 	}
 }
 
-/** An an entry to a batch, flushing if necessary. */
+/** Push an entry to a batch, flushing if necessary. */
 static void ioq_batch_push(struct ioqq *ioqq, struct ioq_batch *batch, struct ioq_ent *ent) {
-	if (batch->size >= IOQ_BATCH) {
+	batch->entries[batch->tail++] = ent;
+
+	if (batch->tail >= IOQ_BATCH) {
 		ioq_batch_flush(ioqq, batch);
 	}
+}
+
+/** Fill a batch from a queue. */
+static bool ioq_batch_fill(struct ioqq *ioqq, struct ioq_batch *batch, bool block) {
+	ioqq_pop_batch(ioqq, batch->entries, IOQ_BATCH, block);
+
+	ioq_batch_reset(batch);
+	for (size_t i = 0; i < IOQ_BATCH; ++i) {
+		struct ioq_ent *ent = batch->entries[i];
+		if (ent) {
+			batch->entries[batch->tail++] = ent;
+		}
+	}
+
+	return batch->tail > 0;
+}
+
+/** Pop an entry from a batch, filling it first if necessary. */
+static struct ioq_ent *ioq_batch_pop(struct ioqq *ioqq, struct ioq_batch *batch, bool block) {
+	if (ioq_batch_empty(batch)) {
+		// For non-blocking pops, make sure that each ioq_batch_pop()
+		// corresponds to a single (amortized) increment of ioqq->head.
+		// Otherwise, we start skipping many slots and batching ends up
+		// degrading performance.
+		if (!block && batch->head < IOQ_BATCH) {
+			++batch->head;
+			return NULL;
+		}
+
+		if (!ioq_batch_fill(ioqq, batch, block)) {
+			return NULL;
+		}
+	}
 
-	batch->entries[batch->size++] = ent;
+	return batch->entries[batch->head++];
 }
 
 /** Sentinel stop command. */
 static struct ioq_ent IOQ_STOP;
 
-#if BFS_USE_LIBURING
+#if BFS_WITH_LIBURING
 /**
  * Supported io_uring operations.
  */
@@ -477,7 +555,7 @@ struct ioq_thread {
 	/** Pointer back to the I/O queue. */
 	struct ioq *parent;
 
-#if BFS_USE_LIBURING
+#if BFS_WITH_LIBURING
 	/** io_uring instance. */
 	struct io_uring ring;
 	/** Any error that occurred initializing the ring. */
@@ -497,20 +575,25 @@ struct ioq {
 
 	/** ioq_ent arena. */
 	struct arena ents;
-#if BFS_USE_LIBURING && BFS_USE_STATX
+#if BFS_WITH_LIBURING && BFS_USE_STATX
 	/** struct statx arena. */
 	struct arena xbufs;
 #endif
 
-	/** Pending I/O requests. */
+	/** Pending I/O request queue. */
 	struct ioqq *pending;
-	/** Ready I/O responses. */
+	/** Ready I/O response queue. */
 	struct ioqq *ready;
 
+	/** Pending request batch. */
+	struct ioq_batch pending_batch;
+	/** Ready request batch. */
+	struct ioq_batch ready_batch;
+
 	/** The number of background threads. */
 	size_t nthreads;
 	/** The background threads themselves. */
-	struct ioq_thread threads[];
+	struct ioq_thread threads[] _counted_by(nthreads);
 };
 
 /** Cancel a request if we need to. */
@@ -531,6 +614,14 @@ static bool ioq_check_cancel(struct ioq *ioq, struct ioq_ent *ent) {
 /** Dispatch a single request synchronously. */
 static void ioq_dispatch_sync(struct ioq *ioq, struct ioq_ent *ent) {
 	switch (ent->op) {
+		case IOQ_NOP:
+			if (ent->nop.type == IOQ_NOP_HEAVY) {
+				// A fast, no-op syscall
+				getppid();
+			}
+			ent->result = 0;
+			return;
+
 		case IOQ_CLOSE:
 			ent->result = try(xclose(ent->close.fd));
 			return;
@@ -559,7 +650,7 @@ static void ioq_dispatch_sync(struct ioq *ioq, struct ioq_ent *ent) {
 	ent->result = -ENOSYS;
 }
 
-#if BFS_USE_LIBURING
+#if BFS_WITH_LIBURING
 
 /** io_uring worker state. */
 struct ioq_ring_state {
@@ -579,23 +670,161 @@ struct ioq_ring_state {
 	struct ioq_batch ready;
 };
 
+/** Reap a single CQE. */
+static void ioq_reap_cqe(struct ioq_ring_state *state, struct io_uring_cqe *cqe) {
+	struct ioq *ioq = state->ioq;
+
+	struct ioq_ent *ent = io_uring_cqe_get_data(cqe);
+	ent->result = cqe->res;
+
+	if (ent->result < 0) {
+		goto push;
+	}
+
+	switch (ent->op) {
+		case IOQ_OPENDIR: {
+			int fd = ent->result;
+			if (ioq_check_cancel(ioq, ent)) {
+				xclose(fd);
+				goto push;
+			}
+
+			struct ioq_opendir *args = &ent->opendir;
+			ent->result = try(bfs_opendir(args->dir, fd, NULL, args->flags));
+			if (ent->result >= 0) {
+				// TODO: io_uring_prep_getdents()
+				bfs_polldir(args->dir);
+			} else {
+				xclose(fd);
+			}
+
+			break;
+		}
+
+#if BFS_USE_STATX
+		case IOQ_STAT: {
+			struct ioq_stat *args = &ent->stat;
+			ent->result = try(bfs_statx_convert(args->buf, args->xbuf));
+			break;
+		}
+#endif
+
+		default:
+			break;
+	}
+
+push:
+	ioq_batch_push(ioq->ready, &state->ready, ent);
+}
+
+/** Wait for submitted requests to complete. */
+static void ioq_ring_drain(struct ioq_ring_state *state, size_t wait_nr) {
+	struct ioq *ioq = state->ioq;
+	struct io_uring *ring = state->ring;
+
+	bfs_assert(wait_nr <= state->submitted);
+
+	while (state->submitted > 0) {
+		struct io_uring_cqe *cqe;
+		if (wait_nr > 0) {
+			io_uring_wait_cqes(ring, &cqe, wait_nr, NULL, NULL);
+		}
+
+		unsigned int head;
+		size_t seen = 0;
+		io_uring_for_each_cqe (ring, head, cqe) {
+			ioq_reap_cqe(state, cqe);
+			++seen;
+		}
+
+		io_uring_cq_advance(ring, seen);
+		state->submitted -= seen;
+
+		if (seen >= wait_nr) {
+			break;
+		}
+		wait_nr -= seen;
+	}
+
+	ioq_batch_flush(ioq->ready, &state->ready);
+}
+
+/** Submit prepped SQEs, and wait for some to complete. */
+static void ioq_ring_submit(struct ioq_ring_state *state) {
+	struct io_uring *ring = state->ring;
+
+	size_t unreaped = state->prepped + state->submitted;
+	size_t wait_nr = 0;
+
+	if (state->prepped == 0 && unreaped > 0) {
+		// If we have no new SQEs, wait for at least one old one to
+		// complete, to avoid livelock
+		wait_nr = 1;
+	}
+
+	if (unreaped > ring->sq.ring_entries) {
+		// Keep the completion queue below half full
+		wait_nr = unreaped - ring->sq.ring_entries;
+	}
+
+	// Submit all prepped SQEs
+	while (state->prepped > 0) {
+		int ret = io_uring_submit_and_wait(state->ring, wait_nr);
+		if (ret <= 0) {
+			continue;
+		}
+
+		state->submitted += ret;
+		state->prepped -= ret;
+		if (state->prepped > 0) {
+			// In the unlikely event of a short submission, any SQE
+			// links will be broken.  Wait for all SQEs to complete
+			// to preserve any ordering requirements.
+			ioq_ring_drain(state, state->submitted);
+			wait_nr = 0;
+		}
+	}
+
+	// Drain all the CQEs we waited for (and any others that are ready)
+	ioq_ring_drain(state, wait_nr);
+}
+
+/** Reserve space for a number of SQEs, submitting if necessary. */
+static void ioq_reserve_sqes(struct ioq_ring_state *state, unsigned int count) {
+	while (io_uring_sq_space_left(state->ring) < count) {
+		ioq_ring_submit(state);
+	}
+}
+
+/** Get an SQE, submitting if necessary. */
+static struct io_uring_sqe *ioq_get_sqe(struct ioq_ring_state *state) {
+	ioq_reserve_sqes(state, 1);
+	return io_uring_get_sqe(state->ring);
+}
+
 /** Dispatch a single request asynchronously. */
 static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, struct ioq_ent *ent) {
-	struct io_uring *ring = state->ring;
 	enum ioq_ring_ops ops = state->ops;
 	struct io_uring_sqe *sqe = NULL;
 
 	switch (ent->op) {
+	case IOQ_NOP:
+		if (ent->nop.type == IOQ_NOP_HEAVY) {
+			sqe = ioq_get_sqe(state);
+			io_uring_prep_nop(sqe);
+		}
+		return sqe;
+
 	case IOQ_CLOSE:
 		if (ops & IOQ_RING_CLOSE) {
-			sqe = io_uring_get_sqe(ring);
+			sqe = ioq_get_sqe(state);
 			io_uring_prep_close(sqe, ent->close.fd);
 		}
 		return sqe;
 
 	case IOQ_OPENDIR:
 		if (ops & IOQ_RING_OPENAT) {
-			sqe = io_uring_get_sqe(ring);
+			sqe = ioq_get_sqe(state);
 			struct ioq_opendir *args = &ent->opendir;
 			int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY;
 			io_uring_prep_openat(sqe, args->dfd, args->path, flags, 0);
@@ -605,7 +834,7 @@ static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, str
 	case IOQ_CLOSEDIR:
 #if BFS_USE_UNWRAPDIR
 		if (ops & IOQ_RING_CLOSE) {
-			sqe = io_uring_get_sqe(ring);
+			sqe = ioq_get_sqe(state);
 			io_uring_prep_close(sqe, bfs_unwrapdir(ent->closedir.dir));
 		}
 #endif
@@ -614,10 +843,10 @@ static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, str
 	case IOQ_STAT:
 #if BFS_USE_STATX
 		if (ops & IOQ_RING_STATX) {
-			sqe = io_uring_get_sqe(ring);
+			sqe = ioq_get_sqe(state);
 			struct ioq_stat *args = &ent->stat;
 			int flags = bfs_statx_flags(args->flags);
-			unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+			unsigned int mask = bfs_statx_mask();
 			io_uring_prep_statx(sqe, args->dfd, args->path, flags, mask, args->xbuf);
 		}
 #endif
@@ -630,7 +859,7 @@ static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, str
 
 /** Check if ioq_ring_reap() has work to do. */
 static bool ioq_ring_empty(struct ioq_ring_state *state) {
-	return !state->prepped && !state->submitted && !state->ready.size;
+	return !state->prepped && !state->submitted && ioq_batch_empty(&state->ready);
 }
 
 /** Prep a single SQE. */
@@ -658,163 +887,94 @@ static bool ioq_ring_prep(struct ioq_ring_state *state) {
 	}
 
 	struct ioq *ioq = state->ioq;
-	struct io_uring *ring = state->ring;
-	struct ioq_ent *pending[IOQ_BATCH];
-
-	while (io_uring_sq_space_left(ring) >= IOQ_BATCH) {
-		bool block = ioq_ring_empty(state);
-		ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, block);
-
-		bool any = false;
-		for (size_t i = 0; i < IOQ_BATCH; ++i) {
-			struct ioq_ent *ent = pending[i];
-			if (ent == &IOQ_STOP) {
-				ioqq_push(ioq->pending, &IOQ_STOP);
-				state->stop = true;
-				goto done;
-			} else if (ent) {
-				ioq_prep_sqe(state, ent);
-				any = true;
-			}
-		}
-
-		if (!any) {
-			break;
-		}
-	}
-
-done:
-	return !ioq_ring_empty(state);
-}
-
-/** Reap a single CQE. */
-static void ioq_reap_cqe(struct ioq_ring_state *state, struct io_uring_cqe *cqe) {
-	struct ioq *ioq = state->ioq;
-	struct io_uring *ring = state->ring;
-
-	struct ioq_ent *ent = io_uring_cqe_get_data(cqe);
-	ent->result = cqe->res;
-	io_uring_cqe_seen(ring, cqe);
-	--state->submitted;
-
-	if (ent->result < 0) {
-		goto push;
-	}
 
-	switch (ent->op) {
-		case IOQ_OPENDIR: {
-			int fd = ent->result;
-			if (ioq_check_cancel(ioq, ent)) {
-				xclose(fd);
-				goto push;
-			}
-
-			struct ioq_opendir *args = &ent->opendir;
-			ent->result = try(bfs_opendir(args->dir, fd, NULL, args->flags));
-			if (ent->result >= 0) {
-				// TODO: io_uring_prep_getdents()
-				bfs_polldir(args->dir);
-			} else {
-				xclose(fd);
-			}
+	struct ioq_batch pending;
+	ioq_batch_reset(&pending);
 
+	while (true) {
+		bool block = ioq_ring_empty(state);
+		struct ioq_ent *ent = ioq_batch_pop(ioq->pending, &pending, block);
+		if (ent == &IOQ_STOP) {
+			ioqq_push(ioq->pending, ent);
+			state->stop = true;
 			break;
-		}
-
-#if BFS_USE_STATX
-		case IOQ_STAT: {
-			struct ioq_stat *args = &ent->stat;
-			ent->result = try(bfs_statx_convert(args->buf, args->xbuf));
+		} else if (ent) {
+			ioq_prep_sqe(state, ent);
+		} else {
 			break;
 		}
-#endif
-
-		default:
-			break;
 	}
 
-push:
-	ioq_batch_push(ioq->ready, &state->ready, ent);
+	bfs_assert(ioq_batch_empty(&pending));
+	return !ioq_ring_empty(state);
 }
 
-/** Reap a batch of CQEs. */
-static void ioq_ring_reap(struct ioq_ring_state *state) {
-	struct ioq *ioq = state->ioq;
-	struct io_uring *ring = state->ring;
+/** io_uring worker loop. */
+static int ioq_ring_work(struct ioq_thread *thread) {
+	struct io_uring *ring = &thread->ring;
 
-	while (state->prepped) {
-		int ret = io_uring_submit_and_wait(ring, 1);
-		if (ret > 0) {
-			state->prepped -= ret;
-			state->submitted += ret;
+#ifdef IORING_SETUP_R_DISABLED
+	if (ring->flags & IORING_SETUP_R_DISABLED) {
+		if (io_uring_enable_rings(ring) != 0) {
+			return -1;
 		}
 	}
+#endif
 
-	while (state->submitted) {
-		struct io_uring_cqe *cqe;
-		if (io_uring_wait_cqe(ring, &cqe) < 0) {
-			continue;
-		}
-
-		ioq_reap_cqe(state, cqe);
-	}
-
-	ioq_batch_flush(ioq->ready, &state->ready);
-}
-
-/** io_uring worker loop. */
-static void ioq_ring_work(struct ioq_thread *thread) {
 	struct ioq_ring_state state = {
 		.ioq = thread->parent,
-		.ring = &thread->ring,
+		.ring = ring,
 		.ops = thread->ring_ops,
 	};
 
 	while (ioq_ring_prep(&state)) {
-		ioq_ring_reap(&state);
+		ioq_ring_submit(&state);
 	}
+
+	ioq_ring_drain(&state, state.submitted);
+	return 0;
 }
 
-#endif // BFS_USE_LIBURING
+#endif // BFS_WITH_LIBURING
 
 /** Synchronous syscall loop. */
 static void ioq_sync_work(struct ioq_thread *thread) {
 	struct ioq *ioq = thread->parent;
 
-	bool stop = false;
-	while (!stop) {
-		struct ioq_ent *pending[IOQ_BATCH];
-		ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, true);
-
-		struct ioq_batch ready;
-		ready.size = 0;
-
-		for (size_t i = 0; i < IOQ_BATCH; ++i) {
-			struct ioq_ent *ent = pending[i];
-			if (ent == &IOQ_STOP) {
-				ioqq_push(ioq->pending, &IOQ_STOP);
-				stop = true;
-				break;
-			} else if (ent) {
-				if (!ioq_check_cancel(ioq, ent)) {
-					ioq_dispatch_sync(ioq, ent);
-				}
-				ioq_batch_push(ioq->ready, &ready, ent);
-			}
+	struct ioq_batch pending, ready;
+	ioq_batch_reset(&pending);
+	ioq_batch_reset(&ready);
+
+	while (true) {
+		if (ioq_batch_empty(&pending)) {
+			ioq_batch_flush(ioq->ready, &ready);
 		}
 
-		ioq_batch_flush(ioq->ready, &ready);
+		struct ioq_ent *ent = ioq_batch_pop(ioq->pending, &pending, true);
+		if (ent == &IOQ_STOP) {
+			ioqq_push(ioq->pending, ent);
+			break;
+		}
+
+		if (!ioq_check_cancel(ioq, ent)) {
+			ioq_dispatch_sync(ioq, ent);
+		}
+		ioq_batch_push(ioq->ready, &ready, ent);
 	}
+
+	bfs_assert(ioq_batch_empty(&pending));
+	ioq_batch_flush(ioq->ready, &ready);
 }
 
 /** Background thread entry point. */
 static void *ioq_work(void *ptr) {
 	struct ioq_thread *thread = ptr;
 
-#if BFS_USE_LIBURING
+#if BFS_WITH_LIBURING
 	if (thread->ring_err == 0) {
-		ioq_ring_work(thread);
-		return NULL;
+		if (ioq_ring_work(thread) == 0) {
+			return NULL;
+		}
 	}
 #endif
 
@@ -822,9 +982,30 @@ static void *ioq_work(void *ptr) {
 	return NULL;
 }
 
+#if BFS_WITH_LIBURING
+/** Test whether some io_uring setup flags are supported. */
+static bool ioq_ring_probe_flags(struct io_uring_params *params, unsigned int flags) {
+	unsigned int saved = params->flags;
+	params->flags |= flags;
+
+	struct io_uring ring;
+	int ret = io_uring_queue_init_params(2, &ring, params);
+	if (ret == 0) {
+		io_uring_queue_exit(&ring);
+	}
+
+	if (ret == -EINVAL) {
+		params->flags = saved;
+		return false;
+	}
+
+	return true;
+}
+#endif
+
 /** Initialize io_uring thread state. */
 static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) {
-#if BFS_USE_LIBURING
+#if BFS_WITH_LIBURING
 	struct ioq_thread *prev = NULL;
 	if (thread > ioq->threads) {
 		prev = thread - 1;
@@ -835,11 +1016,31 @@ static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) {
 		return -1;
 	}
 
-	// Share io-wq workers between rings
 	struct io_uring_params params = {0};
+
 	if (prev) {
-		params.flags |= IORING_SETUP_ATTACH_WQ;
+		// Share io-wq workers between rings
+		params.flags = prev->ring.flags | IORING_SETUP_ATTACH_WQ;
 		params.wq_fd = prev->ring.ring_fd;
+	} else {
+#ifdef IORING_SETUP_SUBMIT_ALL
+		// Don't abort submission just because an inline request fails
+		ioq_ring_probe_flags(&params, IORING_SETUP_SUBMIT_ALL);
+#endif
+
+#ifdef IORING_SETUP_R_DISABLED
+		// Don't enable the ring yet (needed for SINGLE_ISSUER)
+		if (ioq_ring_probe_flags(&params, IORING_SETUP_R_DISABLED)) {
+#  ifdef IORING_SETUP_SINGLE_ISSUER
+			// Allow optimizations assuming only one task submits SQEs
+			ioq_ring_probe_flags(&params, IORING_SETUP_SINGLE_ISSUER);
+#  endif
+#  ifdef IORING_SETUP_DEFER_TASKRUN
+			// Don't interrupt us aggressively with completion events
+			ioq_ring_probe_flags(&params, IORING_SETUP_DEFER_TASKRUN);
+#  endif
+		}
+#endif
 	}
 
 	// Use a page for each SQE ring
@@ -877,6 +1078,7 @@ static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) {
 		return -1;
 	}
 
+#if BFS_HAS_IO_URING_MAX_WORKERS
 	// Limit the number of io_uring workers
 	unsigned int values[] = {
 		ioq->nthreads, // [IO_WQ_BOUND]
@@ -885,12 +1087,14 @@ static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) {
 	io_uring_register_iowq_max_workers(&thread->ring, values);
 #endif
 
+#endif // BFS_WITH_LIBURING
+
 	return 0;
 }
 
 /** Destroy an io_uring. */
 static void ioq_ring_exit(struct ioq_thread *thread) {
-#if BFS_USE_LIBURING
+#if BFS_WITH_LIBURING
 	if (thread->ring_err == 0) {
 		io_uring_queue_exit(&thread->ring);
 	}
@@ -898,7 +1102,8 @@ static void ioq_ring_exit(struct ioq_thread *thread) {
 }
 
 /** Create an I/O queue thread. */
-static int ioq_thread_create(struct ioq *ioq, struct ioq_thread *thread) {
+static int ioq_thread_create(struct ioq *ioq, size_t i) {
+	struct ioq_thread *thread = &ioq->threads[i];
 	thread->parent = ioq;
 
 	ioq_ring_init(ioq, thread);
@@ -908,6 +1113,11 @@ static int ioq_thread_create(struct ioq *ioq, struct ioq_thread *thread) {
 		return -1;
 	}
 
+	char name[16];
+	if (snprintf(name, sizeof(name), "ioq-%zu", i) >= 0) {
+		thread_setname(thread->id, name);
+	}
+
 	return 0;
 }
 
@@ -926,7 +1136,7 @@ struct ioq *ioq_create(size_t depth, size_t nthreads) {
 	ioq->depth = depth;
 
 	ARENA_INIT(&ioq->ents, struct ioq_ent);
-#if BFS_USE_LIBURING && BFS_USE_STATX
+#if BFS_WITH_LIBURING && BFS_USE_STATX
 	ARENA_INIT(&ioq->xbufs, struct statx);
 #endif
 
@@ -942,7 +1152,7 @@ struct ioq *ioq_create(size_t depth, size_t nthreads) {
 
 	ioq->nthreads = nthreads;
 	for (size_t i = 0; i < nthreads; ++i) {
-		if (ioq_thread_create(ioq, &ioq->threads[i]) != 0) {
+		if (ioq_thread_create(ioq, i) != 0) {
 			ioq->nthreads = i;
 			goto fail;
 		}
@@ -984,6 +1194,18 @@ static struct ioq_ent *ioq_request(struct ioq *ioq, enum ioq_op op, void *ptr) {
 	return ent;
 }
 
+int ioq_nop(struct ioq *ioq, enum ioq_nop_type type, void *ptr) {
+	struct ioq_ent *ent = ioq_request(ioq, IOQ_NOP, ptr);
+	if (!ent) {
+		return -1;
+	}
+
+	ent->nop.type = type;
+
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
+	return 0;
+}
+
 int ioq_close(struct ioq *ioq, int fd, void *ptr) {
 	struct ioq_ent *ent = ioq_request(ioq, IOQ_CLOSE, ptr);
 	if (!ent) {
@@ -992,7 +1214,7 @@ int ioq_close(struct ioq *ioq, int fd, void *ptr) {
 
 	ent->close.fd = fd;
 
-	ioqq_push(ioq->pending, ent);
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
 	return 0;
 }
 
@@ -1008,7 +1230,7 @@ int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path,
 	args->path = path;
 	args->flags = flags;
 
-	ioqq_push(ioq->pending, ent);
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
 	return 0;
 }
 
@@ -1020,7 +1242,7 @@ int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr) {
 
 	ent->closedir.dir = dir;
 
-	ioqq_push(ioq->pending, ent);
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
 	return 0;
 }
 
@@ -1036,7 +1258,7 @@ int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags fla
 	args->flags = flags;
 	args->buf = buf;
 
-#if BFS_USE_LIBURING && BFS_USE_STATX
+#if BFS_WITH_LIBURING && BFS_USE_STATX
 	args->xbuf = arena_alloc(&ioq->xbufs);
 	if (!args->xbuf) {
 		ioq_free(ioq, ent);
@@ -1044,23 +1266,30 @@ int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags fla
 	}
 #endif
 
-	ioqq_push(ioq->pending, ent);
+	ioq_batch_push(ioq->pending, &ioq->pending_batch, ent);
 	return 0;
 }
 
+void ioq_submit(struct ioq *ioq) {
+	ioq_batch_flush(ioq->pending, &ioq->pending_batch);
+}
+
 struct ioq_ent *ioq_pop(struct ioq *ioq, bool block) {
+	// Don't forget to submit before popping
+	bfs_assert(ioq_batch_empty(&ioq->pending_batch));
+
 	if (ioq->size == 0) {
 		return NULL;
 	}
 
-	return ioqq_pop(ioq->ready, block);
+	return ioq_batch_pop(ioq->ready, &ioq->ready_batch, block);
 }
 
 void ioq_free(struct ioq *ioq, struct ioq_ent *ent) {
 	bfs_assert(ioq->size > 0);
 	--ioq->size;
 
-#if BFS_USE_LIBURING && BFS_USE_STATX
+#if BFS_WITH_LIBURING && BFS_USE_STATX
 	if (ent->op == IOQ_STAT && ent->stat.xbuf) {
 		arena_free(&ioq->xbufs, ent->stat.xbuf);
 	}
@@ -1071,7 +1300,8 @@ void ioq_free(struct ioq *ioq, struct ioq_ent *ent) {
 
 void ioq_cancel(struct ioq *ioq) {
 	if (!exchange(&ioq->cancel, true, relaxed)) {
-		ioqq_push(ioq->pending, &IOQ_STOP);
+		ioq_batch_push(ioq->pending, &ioq->pending_batch, &IOQ_STOP);
+		ioq_submit(ioq);
 	}
 }
 
@@ -1091,7 +1321,7 @@ void ioq_destroy(struct ioq *ioq) {
 	ioqq_destroy(ioq->ready);
 	ioqq_destroy(ioq->pending);
 
-#if BFS_USE_LIBURING && BFS_USE_STATX
+#if BFS_WITH_LIBURING && BFS_USE_STATX
 	arena_destroy(&ioq->xbufs);
 #endif
 	arena_destroy(&ioq->ents);
diff --git a/src/ioq.h b/src/ioq.h
index d8e1573..5eaa066 100644
--- a/src/ioq.h
+++ b/src/ioq.h
@@ -8,9 +8,10 @@
 #ifndef BFS_IOQ_H
 #define BFS_IOQ_H
 
-#include "prelude.h"
+#include "bfs.h"
 #include "dir.h"
 #include "stat.h"
+
 #include <stddef.h>
 
 /**
@@ -22,6 +23,8 @@ struct ioq;
  * I/O queue operations.
  */
 enum ioq_op {
+	/** ioq_nop(). */
+	IOQ_NOP,
 	/** ioq_close(). */
 	IOQ_CLOSE,
 	/** ioq_opendir(). */
@@ -33,18 +36,21 @@ enum ioq_op {
 };
 
 /**
- * The I/O queue implementation needs two tag bits in each pointer to a struct
- * ioq_ent, so we need to ensure at least 4-byte alignment.  The natural
- * alignment is enough on most architectures, but not m68k, so over-align it.
+ * ioq_nop() types.
  */
-#define IOQ_ENT_ALIGN alignas(4)
+enum ioq_nop_type {
+	/** A lightweight nop that avoids syscalls. */
+	IOQ_NOP_LIGHT,
+	/** A heavyweight nop that involves a syscall. */
+	IOQ_NOP_HEAVY,
+};
 
 /**
  * An I/O queue entry.
  */
 struct ioq_ent {
 	/** The I/O operation. */
-	IOQ_ENT_ALIGN enum ioq_op op;
+	cache_align enum ioq_op op;
 
 	/** The return value (on success) or negative error code (on failure). */
 	int result;
@@ -54,6 +60,10 @@ struct ioq_ent {
 
 	/** Operation-specific arguments. */
 	union {
+		/** ioq_nop() args. */
+		struct ioq_nop {
+			enum ioq_nop_type type;
+		} nop;
 		/** ioq_close() args. */
 		struct ioq_close {
 			int fd;
@@ -83,9 +93,9 @@ struct ioq_ent {
 /**
  * Create an I/O queue.
  *
- * @param depth
+ * @depth
  *         The maximum depth of the queue.
- * @param nthreads
+ * @nthreads
  *         The maximum number of background threads.
  * @return
  *         The new I/O queue, or NULL on failure.
@@ -98,13 +108,27 @@ struct ioq *ioq_create(size_t depth, size_t nthreads);
 size_t ioq_capacity(const struct ioq *ioq);
 
 /**
+ * A no-op, for benchmarking.
+ *
+ * @ioq
+ *         The I/O queue.
+ * @type
+ *         The type of operation to perform.
+ * @ptr
+ *         An arbitrary pointer to associate with the request.
+ * @return
+ *         0 on success, or -1 on failure.
+ */
+int ioq_nop(struct ioq *ioq, enum ioq_nop_type type, void *ptr);
+
+/**
  * Asynchronous close().
  *
- * @param ioq
+ * @ioq
  *         The I/O queue.
- * @param fd
+ * @fd
  *         The fd to close.
- * @param ptr
+ * @ptr
  *         An arbitrary pointer to associate with the request.
  * @return
  *         0 on success, or -1 on failure.
@@ -114,17 +138,17 @@ int ioq_close(struct ioq *ioq, int fd, void *ptr);
 /**
  * Asynchronous bfs_opendir().
  *
- * @param ioq
+ * @ioq
  *         The I/O queue.
- * @param dir
+ * @dir
  *         The allocated directory.
- * @param dfd
+ * @dfd
  *         The base file descriptor.
- * @param path
+ * @path
  *         The path to open, relative to dfd.
- * @param flags
+ * @flags
  *         Flags that control which directory entries are listed.
- * @param ptr
+ * @ptr
  *         An arbitrary pointer to associate with the request.
  * @return
  *         0 on success, or -1 on failure.
@@ -134,11 +158,11 @@ int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path,
 /**
  * Asynchronous bfs_closedir().
  *
- * @param ioq
+ * @ioq
  *         The I/O queue.
- * @param dir
+ * @dir
  *         The directory to close.
- * @param ptr
+ * @ptr
  *         An arbitrary pointer to associate with the request.
  * @return
  *         0 on success, or -1 on failure.
@@ -148,17 +172,17 @@ int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr);
 /**
  * Asynchronous bfs_stat().
  *
- * @param ioq
+ * @ioq
  *         The I/O queue.
- * @param dfd
+ * @dfd
  *         The base file descriptor.
- * @param path
+ * @path
  *         The path to stat, relative to dfd.
- * @param flags
+ * @flags
  *         Flags that affect the lookup.
- * @param buf
+ * @buf
  *         A place to store the stat buffer, if successful.
- * @param ptr
+ * @ptr
  *         An arbitrary pointer to associate with the request.
  * @return
  *         0 on success, or -1 on failure.
@@ -166,9 +190,14 @@ int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr);
 int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags flags, struct bfs_stat *buf, void *ptr);
 
 /**
+ * Submit any buffered requests.
+ */
+void ioq_submit(struct ioq *ioq);
+
+/**
  * Pop a response from the queue.
  *
- * @param ioq
+ * @ioq
  *         The I/O queue.
  * @return
  *         The next response, or NULL.
@@ -178,9 +207,9 @@ struct ioq_ent *ioq_pop(struct ioq *ioq, bool block);
 /**
  * Free a queue entry.
  *
- * @param ioq
+ * @ioq
  *         The I/O queue.
- * @param ent
+ * @ent
  *         The entry to free.
  */
 void ioq_free(struct ioq *ioq, struct ioq_ent *ent);
diff --git a/src/list.h b/src/list.h
index 61d0e5b..276c610 100644
--- a/src/list.h
+++ b/src/list.h
@@ -83,13 +83,14 @@
 #define BFS_LIST_H
 
 #include "diag.h"
+
 #include <stddef.h>
 #include <string.h>
 
 /**
  * Initialize a singly-linked list.
  *
- * @param list
+ * @list
  *         The list to initialize.
  *
  * ---
@@ -116,9 +117,9 @@
 /**
  * Initialize a singly-linked list item.
  *
- * @param item
+ * @item
  *         The item to initialize.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.next rather than item->next.
  *
  * ---
@@ -200,7 +201,7 @@
 /**
  * Get the head of a singly-linked list.
  *
- * @param list
+ * @list
  *         The list in question.
  * @return
  *         The first item in the list.
@@ -227,9 +228,9 @@
 /**
  * Get the tail of a singly-linked list.
  *
- * @param list
+ * @list
  *         The list in question.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.next rather than item->next.
  * @return
  *         The last item in the list.
@@ -246,11 +247,11 @@
 /**
  * Check if an item is attached to a singly-linked list.
  *
- * @param list
+ * @list
  *         The list to check.
- * @param item
+ * @item
  *         The item to check.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.next rather than item->next.
  * @return
  *         Whether the item is attached to the list.
@@ -267,13 +268,13 @@
 /**
  * Insert an item into a singly-linked list.
  *
- * @param list
+ * @list
  *         The list to modify.
- * @param cursor
+ * @cursor
  *         A pointer to the item to insert after, e.g. &list->head or list->tail.
- * @param item
+ * @item
  *         The item to insert.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.next rather than item->next.
  * @return
  *         A cursor for the next item.
@@ -294,11 +295,11 @@
 /**
  * Add an item to the tail of a singly-linked list.
  *
- * @param list
+ * @list
  *         The list to modify.
- * @param item
+ * @item
  *         The item to append.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.next rather than item->next.
  */
 #define SLIST_APPEND(list, ...) \
@@ -310,11 +311,11 @@
 /**
  * Add an item to the head of a singly-linked list.
  *
- * @param list
+ * @list
  *         The list to modify.
- * @param item
+ * @item
  *         The item to prepend.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.next rather than item->next.
  */
 #define SLIST_PREPEND(list, ...) \
@@ -324,27 +325,43 @@
 	LIST_VOID_(SLIST_INSERT_(list, &(list)->head, item, __VA_ARGS__))
 
 /**
+ * Splice a singly-linked list into another.
+ *
+ * @dest
+ *         The destination list.
+ * @cursor
+ *         A pointer to the item to splice after, e.g. &list->head or list->tail.
+ * @src
+ *         The source list.
+ */
+#define SLIST_SPLICE(dest, cursor, src) \
+	LIST_VOID_(SLIST_SPLICE_((dest), (cursor), (src)))
+
+#define SLIST_SPLICE_(dest, cursor, src) \
+	*src->tail = *cursor, \
+	*cursor = src->head, \
+	dest->tail = *dest->tail ? src->tail : dest->tail, \
+	SLIST_INIT(src)
+
+/**
  * Add an entire singly-linked list to the tail of another.
  *
- * @param dest
+ * @dest
  *         The destination list.
- * @param src
+ * @src
  *         The source list.
  */
 #define SLIST_EXTEND(dest, src) \
-	SLIST_EXTEND_((dest), (src))
-
-#define SLIST_EXTEND_(dest, src) \
-	(src->head ? (*dest->tail = src->head, dest->tail = src->tail, SLIST_INIT(src)) : (void)0)
+	SLIST_SPLICE(dest, (dest)->tail, src)
 
 /**
  * Remove an item from a singly-linked list.
  *
- * @param list
+ * @list
  *         The list to modify.
- * @param cursor
+ * @cursor
  *         A pointer to the item to remove, either &list->head or &prev->next.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.next rather than item->next.
  * @return
  *         The removed item.
@@ -357,10 +374,10 @@
 
 #define SLIST_REMOVE__(list, cursor, next) \
 	(list->tail = (*cursor)->next ? list->tail : cursor, \
-	 slist_remove_impl(*cursor, cursor, &(*cursor)->next, sizeof(*cursor)))
+	 slist_remove_(*cursor, cursor, &(*cursor)->next, sizeof(*cursor)))
 
 // Helper for SLIST_REMOVE()
-static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_t size) {
+static inline void *slist_remove_(void *ret, void *cursor, void *next, size_t size) {
 	// ret = *cursor;
 	// *cursor = ret->next;
 	memcpy(cursor, next, size);
@@ -372,9 +389,9 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 /**
  * Pop the head off a singly-linked list.
  *
- * @param list
+ * @list
  *         The list to modify.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use head->node.next rather than head->next.
  * @return
  *         The popped item, or NULL if the list was empty.
@@ -391,13 +408,13 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 /**
  * Loop over the items in a singly-linked list.
  *
- * @param type
+ * @type
  *         The list item type.
- * @param item
+ * @item
  *         The induction variable name.
- * @param list
+ * @list
  *         The list to iterate.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use head->node.next rather than head->next.
  */
 #define for_slist(type, item, ...) \
@@ -412,9 +429,24 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 	     item = _next)
 
 /**
+ * Loop over a singly-linked list, popping each item.
+ *
+ * @type
+ *         The list item type.
+ * @item
+ *         The induction variable name.
+ * @list
+ *         The list to drain.
+ * @node (optional)
+ *         If specified, use head->node.next rather than head->next.
+ */
+#define drain_slist(type, item, ...) \
+	for (type *item; (item = SLIST_POP(__VA_ARGS__));)
+
+/**
  * Initialize a doubly-linked list.
  *
- * @param list
+ * @list
  *         The list to initialize.
  */
 #define LIST_INIT(list) \
@@ -433,9 +465,9 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 /**
  * Initialize a doubly-linked list item.
  *
- * @param item
+ * @item
  *         The item to initialize.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.next rather than item->next.
  */
 #define LIST_ITEM_INIT(...) \
@@ -465,11 +497,11 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 /**
  * Add an item to the tail of a doubly-linked list.
  *
- * @param list
+ * @list
  *         The list to modify.
- * @param item
+ * @item
  *         The item to append.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.{prev,next} rather than item->{prev,next}.
  */
 #define LIST_APPEND(list, ...) \
@@ -478,11 +510,11 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 /**
  * Add an item to the head of a doubly-linked list.
  *
- * @param list
+ * @list
  *         The list to modify.
- * @param item
+ * @item
  *         The item to prepend.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.{prev,next} rather than item->{prev,next}.
  */
 #define LIST_PREPEND(list, ...) \
@@ -491,11 +523,11 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 /**
  * Check if an item is attached to a doubly-linked list.
  *
- * @param list
+ * @list
  *         The list to check.
- * @param item
+ * @item
  *         The item to check.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.{prev,next} rather than item->{prev,next}.
  * @return
  *         Whether the item is attached to the list.
@@ -512,13 +544,13 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 /**
  * Insert into a doubly-linked list after the given cursor.
  *
- * @param list
+ * @list
  *         The list to modify.
- * @param cursor
+ * @cursor
  *         Insert after this element.
- * @param item
+ * @item
  *         The item to insert.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.{prev,next} rather than item->{prev,next}.
  */
 #define LIST_INSERT(list, cursor, ...) \
@@ -537,11 +569,11 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 /**
  * Remove an item from a doubly-linked list.
  *
- * @param list
+ * @list
  *         The list to modify.
- * @param item
+ * @item
  *         The item to remove.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use item->node.{prev,next} rather than item->{prev,next}.
  */
 #define LIST_REMOVE(list, ...) \
@@ -558,13 +590,13 @@ static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_
 /**
  * Loop over the items in a doubly-linked list.
  *
- * @param type
+ * @type
  *         The list item type.
- * @param item
+ * @item
  *         The induction variable name.
- * @param list
+ * @list
  *         The list to iterate.
- * @param node (optional)
+ * @node (optional)
  *         If specified, use head->node.next rather than head->next.
  */
 #define for_list(type, item, ...) \
diff --git a/src/main.c b/src/main.c
index 9d8b206..da07508 100644
--- a/src/main.c
+++ b/src/main.c
@@ -20,13 +20,14 @@
  *     - bftw.[ch]     (an extended version of nftw(3))
  *
  * - Utilities:
+ *     - prelude.h     (feature test macros; automatically included)
  *     - alloc.[ch]    (memory allocation)
  *     - atomic.h      (atomic operations)
  *     - bar.[ch]      (a terminal status bar)
  *     - bit.h         (bit manipulation)
+ *     - bfs.h         (configuration and fundamental utilities)
  *     - bfstd.[ch]    (standard library wrappers/polyfills)
  *     - color.[ch]    (for pretty terminal colors)
- *     - prelude.h     (configuration and feature/platform detection)
  *     - diag.[ch]     (formats diagnostic messages)
  *     - dir.[ch]      (a directory API facade)
  *     - dstring.[ch]  (a dynamic string library)
@@ -36,21 +37,23 @@
  *     - mtab.[ch]     (parses the system's mount table)
  *     - pwcache.[ch]  (a cache for the user/group tables)
  *     - sanity.h      (sanitizer interfaces)
+ *     - sighook.[ch]  (signal hooks)
  *     - stat.[ch]     (wraps stat(), or statx() on Linux)
  *     - thread.h      (multi-threading)
  *     - trie.[ch]     (a trie set/map implementation)
  *     - typo.[ch]     (fuzzy matching for typos)
+ *     - version.c     (embeds version information)
  *     - xregex.[ch]   (regular expression support)
  *     - xspawn.[ch]   (spawns processes)
  *     - xtime.[ch]    (date/time handling utilities)
  */
 
-#include "prelude.h"
 #include "bfstd.h"
 #include "ctx.h"
 #include "diag.h"
 #include "eval.h"
 #include "parse.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <locale.h>
diff --git a/src/mtab.c b/src/mtab.c
index 7905d14..40a9885 100644
--- a/src/mtab.c
+++ b/src/mtab.c
@@ -1,24 +1,28 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "mtab.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "stat.h"
 #include "trie.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 
-#if !defined(BFS_USE_MNTENT) && BFS_USE_MNTENT_H
-#  define BFS_USE_MNTENT true
-#elif !defined(BFS_USE_MNTINFO) && BSD
-#  define BFS_USE_MNTINFO true
-#elif !defined(BFS_USE_MNTTAB) && __SVR4
-#  define BFS_USE_MNTTAB true
+#ifndef BFS_USE_MNTENT
+#  define BFS_USE_MNTENT BFS_HAS_GETMNTENT_1
+#endif
+#ifndef BFS_USE_MNTINFO
+#  define BFS_USE_MNTINFO (!BFS_USE_MNTENT && BFS_HAS_GETMNTINFO)
+#endif
+#ifndef BFS_USE_MNTTAB
+#  define BFS_USE_MNTTAB (!BFS_USE_MNTINFO && BFS_HAS_GETMNTENT_2)
 #endif
 
 #if BFS_USE_MNTENT
@@ -27,7 +31,6 @@
 #  include <stdio.h>
 #elif BFS_USE_MNTINFO
 #  include <sys/mount.h>
-#  include <sys/ucred.h>
 #elif BFS_USE_MNTTAB
 #  include <stdio.h>
 #  include <sys/mnttab.h>
@@ -66,7 +69,7 @@ struct bfs_mtab {
 /**
  * Add an entry to the mount table.
  */
-attr(maybe_unused)
+_maybe_unused
 static int bfs_mtab_add(struct bfs_mtab *mtab, const char *path, const char *type) {
 	size_t path_size = strlen(path) + 1;
 	size_t type_size = strlen(type) + 1;
@@ -148,7 +151,7 @@ struct bfs_mtab *bfs_mtab_parse(void) {
 
 	bfs_statfs *mntbuf;
 	int size = getmntinfo(&mntbuf, MNT_WAIT);
-	if (size < 0) {
+	if (size <= 0) {
 		error = errno;
 		goto fail;
 	}
@@ -253,10 +256,7 @@ static int bfs_mtab_fill_types(struct bfs_mtab *mtab) {
 			continue;
 		}
 
-		struct trie_leaf *leaf = trie_insert_mem(&mtab->types, &sb.dev, sizeof(sb.dev));
-		if (leaf) {
-			leaf->value = mount->type;
-		} else {
+		if (trie_set_mem(&mtab->types, &sb.mnt_id, sizeof(sb.mnt_id), mount->type) != 0) {
 			goto fail;
 		}
 	}
@@ -279,9 +279,9 @@ const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statb
 		}
 	}
 
-	const struct trie_leaf *leaf = trie_find_mem(&mtab->types, &statbuf->dev, sizeof(statbuf->dev));
-	if (leaf) {
-		return leaf->value;
+	const char *type = trie_get_mem(&mtab->types, &statbuf->mnt_id, sizeof(statbuf->mnt_id));
+	if (type) {
+		return type;
 	} else {
 		return "unknown";
 	}
diff --git a/src/mtab.h b/src/mtab.h
index 67290c2..090392b 100644
--- a/src/mtab.h
+++ b/src/mtab.h
@@ -8,8 +8,6 @@
 #ifndef BFS_MTAB_H
 #define BFS_MTAB_H
 
-#include "prelude.h"
-
 struct bfs_stat;
 
 /**
@@ -28,9 +26,9 @@ struct bfs_mtab *bfs_mtab_parse(void);
 /**
  * Determine the file system type that a file is on.
  *
- * @param mtab
+ * @mtab
  *         The current mount table.
- * @param statbuf
+ * @statbuf
  *         The bfs_stat() buffer for the file in question.
  * @return
  *         The type of file system containing this file, "unknown" if not known,
@@ -41,9 +39,9 @@ const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statb
 /**
  * Check if a file could be a mount point.
  *
- * @param mtab
+ * @mtab
  *         The current mount table.
- * @param name
+ * @name
  *         The name of the file to check.
  * @return
  *         Whether the named file could be a mount point.
diff --git a/src/opt.c b/src/opt.c
index 883d598..9094794 100644
--- a/src/opt.c
+++ b/src/opt.c
@@ -25,8 +25,10 @@
  * effects are reachable at all, skipping the traversal if not.
  */
 
-#include "prelude.h"
 #include "opt.h"
+
+#include "bfs.h"
+#include "bfstd.h"
 #include "bftw.h"
 #include "bit.h"
 #include "color.h"
@@ -38,6 +40,8 @@
 #include "expr.h"
 #include "list.h"
 #include "pwcache.h"
+#include "xspawn.h"
+
 #include <errno.h>
 #include <limits.h>
 #include <stdarg.h>
@@ -102,42 +106,23 @@ enum pred_type {
 	PRED_TYPES,
 };
 
-/** Get the name of a predicate type. */
-static const char *pred_type_name(enum pred_type type) {
-	switch (type) {
-	case READABLE_PRED:
-		return "-readable";
-	case WRITABLE_PRED:
-		return "-writable";
-	case EXECUTABLE_PRED:
-		return "-executable";
-	case ACL_PRED:
-		return "-acl";
-	case CAPABLE_PRED:
-		return "-capable";
-	case EMPTY_PRED:
-		return "-empty";
-	case HIDDEN_PRED:
-		return "-hidden";
-	case NOGROUP_PRED:
-		return "-nogroup";
-	case NOUSER_PRED:
-		return "-nouser";
-	case SPARSE_PRED:
-		return "-sparse";
-	case XATTR_PRED:
-		return "-xattr";
-
-	case PRED_TYPES:
-		break;
-	}
-
-	bfs_bug("Unknown predicate %d", (int)type);
-	return "???";
-}
+/** Predicate type names. */
+static const char *const pred_names[] = {
+	[READABLE_PRED] = "-readable",
+	[WRITABLE_PRED] = "-writable",
+	[EXECUTABLE_PRED] = "-executable",
+	[ACL_PRED] = "-acl",
+	[CAPABLE_PRED] = "-capable",
+	[EMPTY_PRED] = "-empty",
+	[HIDDEN_PRED] = "-hidden",
+	[NOGROUP_PRED] = "-nogroup",
+	[NOUSER_PRED] = "-nouser",
+	[SPARSE_PRED] = "-sparse",
+	[XATTR_PRED] = "-xattr",
+};
 
 /**
- * A contrained integer range.
+ * A constrained integer range.
  */
 struct df_range {
 	/** The (inclusive) minimum value. */
@@ -192,11 +177,17 @@ static void constrain_min(struct df_range *range, long long value) {
 	range->min = max_value(range->min, value);
 }
 
-/** Contrain the maximum of a range. */
+/** Constrain the maximum of a range. */
 static void constrain_max(struct df_range *range, long long value) {
 	range->max = min_value(range->max, value);
 }
 
+/** Constrain a range to a single value. */
+static void constrain_range(struct df_range *range, long long value) {
+	constrain_min(range, value);
+	constrain_max(range, value);
+}
+
 /** Remove a single value from a range. */
 static void range_remove(struct df_range *range, long long value) {
 	if (range->min == value) {
@@ -242,29 +233,15 @@ enum range_type {
 	RANGE_TYPES,
 };
 
-/** Get the name of a range type. */
-static const char *range_type_name(enum range_type type) {
-	switch (type) {
-	case DEPTH_RANGE:
-		return "-depth";
-	case GID_RANGE:
-		return "-gid";
-	case INUM_RANGE:
-		return "-inum";
-	case LINKS_RANGE:
-		return "-links";
-	case SIZE_RANGE:
-		return "-size";
-	case UID_RANGE:
-		return "-uid";
-
-	case RANGE_TYPES:
-		break;
-	}
-
-	bfs_bug("Unknown range %d", (int)type);
-	return "???";
-}
+/** Range type names. */
+static const char *const range_names[] = {
+	[DEPTH_RANGE] = "-depth",
+	[GID_RANGE] = "-gid",
+	[INUM_RANGE] = "-inum",
+	[LINKS_RANGE] = "-links",
+	[SIZE_RANGE] = "-size",
+	[UID_RANGE] = "-uid",
+};
 
 /**
  * The data flow analysis domain.
@@ -333,27 +310,27 @@ static void df_init_top(struct df_domain *value) {
 
 /** Check for the top element. */
 static bool df_is_top(const struct df_domain *value) {
-        for (int i = 0; i < PRED_TYPES; ++i) {
-                if (value->preds[i] != PRED_TOP) {
-                        return false;
-                }
-        }
+	for (int i = 0; i < PRED_TYPES; ++i) {
+		if (value->preds[i] != PRED_TOP) {
+			return false;
+		}
+	}
 
-        for (int i = 0; i < RANGE_TYPES; ++i) {
-                if (!range_is_top(&value->ranges[i])) {
-                        return false;
-                }
-        }
+	for (int i = 0; i < RANGE_TYPES; ++i) {
+		if (!range_is_top(&value->ranges[i])) {
+			return false;
+		}
+	}
 
-        if (value->types != ~0U) {
-                return false;
-        }
+	if (value->types != ~0U) {
+		return false;
+	}
 
-        if (value->xtypes != ~0U) {
-                return false;
-        }
+	if (value->xtypes != ~0U) {
+		return false;
+	}
 
-        return true;
+	return true;
 }
 
 /** Compute the union of two fact sets. */
@@ -397,7 +374,7 @@ struct bfs_opt {
 };
 
 /** Log an optimization. */
-attr(printf(2, 3))
+_printf(2, 3)
 static bool opt_debug(struct bfs_opt *opt, const char *format, ...) {
 	if (bfs_debug_prefix(opt->ctx, DEBUG_OPT)) {
 		for (int i = 0; i < opt->depth; ++i) {
@@ -415,7 +392,7 @@ static bool opt_debug(struct bfs_opt *opt, const char *format, ...) {
 }
 
 /** Log a recursive call. */
-attr(printf(2, 3))
+_printf(2, 3)
 static bool opt_enter(struct bfs_opt *opt, const char *format, ...) {
 	int depth = opt->depth;
 	if (depth > 0) {
@@ -435,7 +412,7 @@ static bool opt_enter(struct bfs_opt *opt, const char *format, ...) {
 }
 
 /** Log a recursive return. */
-attr(printf(2, 3))
+_printf(2, 3)
 static bool opt_leave(struct bfs_opt *opt, const char *format, ...) {
 	bool debug = false;
 	int depth = opt->depth;
@@ -459,7 +436,7 @@ static bool opt_leave(struct bfs_opt *opt, const char *format, ...) {
 }
 
 /** Log a shallow visit. */
-attr(printf(2, 3))
+_printf(2, 3)
 static bool opt_visit(struct bfs_opt *opt, const char *format, ...) {
 	int depth = opt->depth;
 	if (depth > 0) {
@@ -479,7 +456,7 @@ static bool opt_visit(struct bfs_opt *opt, const char *format, ...) {
 }
 
 /** Log the deletion of an expression. */
-attr(printf(2, 3))
+_printf(2, 3)
 static bool opt_delete(struct bfs_opt *opt, const char *format, ...) {
 	int depth = opt->depth;
 
@@ -503,7 +480,7 @@ typedef bool dump_fn(struct bfs_opt *opt, const char *format, ...);
 
 /** Print a df_pred. */
 static void pred_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum pred_type type) {
-	dump(opt, "${blu}%s${rs}: ", pred_type_name(type));
+	dump(opt, "${blu}%s${rs}: ", pred_names[type]);
 
 	FILE *file = opt->ctx->cerr->file;
 	switch (value->preds[type]) {
@@ -524,7 +501,7 @@ static void pred_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain
 
 /** Print a df_range. */
 static void range_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum range_type type) {
-	dump(opt, "${blu}%s${rs}: ", range_type_name(type));
+	dump(opt, "${blu}%s${rs}: ", range_names[type]);
 
 	FILE *file = opt->ctx->cerr->file;
 	const struct df_range *range = &value->ranges[type];
@@ -641,22 +618,26 @@ static bool is_const(const struct bfs_expr *expr) {
 }
 
 /** Warn about an expression. */
-attr(printf(3, 4))
-static void opt_warning(const struct bfs_opt *opt, const struct bfs_expr *expr, const char *format, ...) {
+_printf(3, 4)
+static bool opt_warning(const struct bfs_opt *opt, const struct bfs_expr *expr, const char *format, ...) {
 	if (!opt->warn) {
-		return;
+		return false;
 	}
 
 	if (bfs_expr_is_parent(expr) || is_const(expr)) {
-		return;
+		return false;
 	}
 
-	if (bfs_expr_warning(opt->ctx, expr)) {
-		va_list args;
-		va_start(args, format);
-		bfs_vwarning(opt->ctx, format, args);
-		va_end(args);
+	if (!bfs_expr_warning(opt->ctx, expr)) {
+		return false;
 	}
+
+	va_list args;
+	va_start(args, format);
+	bfs_vwarning(opt->ctx, format, args);
+	va_end(args);
+
+	return true;
 }
 
 /** Remove and return an expression's children. */
@@ -756,9 +737,7 @@ static struct bfs_expr *visit_and(struct bfs_opt *opt, struct bfs_expr *expr, co
 	df_init_bottom(&opt->after_false);
 	struct bfs_opt nested = *opt;
 
-	while (!SLIST_EMPTY(&children)) {
-		struct bfs_expr *child = SLIST_POP(&children);
-
+	drain_slist (struct bfs_expr, child, &children) {
 		if (SLIST_EMPTY(&children)) {
 			nested.ignore_result = opt->ignore_result;
 		} else {
@@ -790,9 +769,7 @@ static struct bfs_expr *visit_or(struct bfs_opt *opt, struct bfs_expr *expr, con
 	df_init_bottom(&opt->after_true);
 	struct bfs_opt nested = *opt;
 
-	while (!SLIST_EMPTY(&children)) {
-		struct bfs_expr *child = SLIST_POP(&children);
-
+	drain_slist (struct bfs_expr, child, &children) {
 		if (SLIST_EMPTY(&children)) {
 			nested.ignore_result = opt->ignore_result;
 		} else {
@@ -822,9 +799,7 @@ static struct bfs_expr *visit_comma(struct bfs_opt *opt, struct bfs_expr *expr,
 
 	struct bfs_opt nested = *opt;
 
-	while (!SLIST_EMPTY(&children)) {
-		struct bfs_expr *child = SLIST_POP(&children);
-
+	drain_slist (struct bfs_expr, child, &children) {
 		if (SLIST_EMPTY(&children)) {
 			nested.ignore_result = opt->ignore_result;
 		} else {
@@ -1088,7 +1063,7 @@ static struct bfs_expr *annotate_and(struct bfs_opt *opt, struct bfs_expr *expr,
 	expr->cost = 0.0;
 	expr->probability = 1.0;
 
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		expr->pure &= child->pure;
 		expr->always_true &= child->always_true;
 		expr->always_false |= child->always_false;
@@ -1107,7 +1082,7 @@ static struct bfs_expr *annotate_or(struct bfs_opt *opt, struct bfs_expr *expr,
 	expr->cost = 0.0;
 
 	float false_prob = 1.0;
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		expr->pure &= child->pure;
 		expr->always_true |= child->always_true;
 		expr->always_false &= child->always_false;
@@ -1124,7 +1099,7 @@ static struct bfs_expr *annotate_comma(struct bfs_opt *opt, struct bfs_expr *exp
 	expr->pure = true;
 	expr->cost = 0.0;
 
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		expr->pure &= child->pure;
 		expr->always_true = child->always_true;
 		expr->always_false = child->always_false;
@@ -1360,7 +1335,7 @@ static struct bfs_expr *opt_const(struct bfs_opt *opt, bool value) {
 	static bfs_eval_fn *const fns[] = {eval_false, eval_true};
 	static char *fake_args[] = {"-false", "-true"};
 
-	struct bfs_expr *expr = bfs_expr_new(opt->ctx, fns[value], 1, &fake_args[value]);
+	struct bfs_expr *expr = bfs_expr_new(opt->ctx, fns[value], 1, &fake_args[value], BFS_TEST);
 	return visit_shallow(opt, expr, &annotate);
 }
 
@@ -1374,7 +1349,7 @@ static struct bfs_expr *negate_expr(struct bfs_opt *opt, struct bfs_expr *expr,
 		return opt_const(opt, true);
 	}
 
-	struct bfs_expr *ret = bfs_expr_new(opt->ctx, eval_not, 1, argv);
+	struct bfs_expr *ret = bfs_expr_new(opt->ctx, eval_not, 1, argv, BFS_OPERATOR);
 	if (!ret) {
 		return NULL;
 	}
@@ -1403,8 +1378,7 @@ static struct bfs_expr *sink_not_andor(struct bfs_opt *opt, struct bfs_expr *exp
 	struct bfs_exprs children;
 	foster_children(expr, &children);
 
-	struct bfs_expr *child;
-	while ((child = SLIST_POP(&children))) {
+	drain_slist (struct bfs_expr, child, &children) {
 		opt_enter(opt, "%pe\n", child);
 
 		child = negate_expr(opt, child, argv);
@@ -1422,18 +1396,16 @@ static struct bfs_expr *sink_not_andor(struct bfs_opt *opt, struct bfs_expr *exp
 
 /** Sink a negation into a comma expression. */
 static struct bfs_expr *sink_not_comma(struct bfs_opt *opt, struct bfs_expr *expr) {
-	bfs_assert(expr->eval_fn == eval_comma);
-
-	opt_enter(opt, "%pe\n", expr);
-
 	char **argv = expr->argv;
 	expr = only_child(expr);
+	opt_enter(opt, "%pe\n", expr);
+
+	bfs_assert(expr->eval_fn == eval_comma);
 
 	struct bfs_exprs children;
 	foster_children(expr, &children);
 
-	struct bfs_expr *child;
-	while ((child = SLIST_POP(&children))) {
+	drain_slist (struct bfs_expr, child, &children) {
 		if (SLIST_EMPTY(&children)) {
 			opt_enter(opt, "%pe\n", child);
 			opt_debug(opt, "sink\n");
@@ -1461,7 +1433,6 @@ static struct bfs_expr *canonicalize_not(struct bfs_opt *opt, struct bfs_expr *e
 
 	if (rhs->eval_fn == eval_not) {
 		opt_debug(opt, "double negation\n");
-		rhs = only_child(expr);
 		return only_child(rhs);
 	} else if (rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) {
 		return sink_not_andor(opt, expr);
@@ -1483,8 +1454,7 @@ static struct bfs_expr *canonicalize_assoc(struct bfs_opt *opt, struct bfs_expr
 	struct bfs_exprs flat;
 	SLIST_INIT(&flat);
 
-	struct bfs_expr *child;
-	while ((child = SLIST_POP(&children))) {
+	drain_slist (struct bfs_expr, child, &children) {
 		if (child->eval_fn == expr->eval_fn) {
 			struct bfs_expr *head = SLIST_HEAD(&child->children);
 			struct bfs_expr *tail = SLIST_TAIL(&child->children);
@@ -1592,8 +1562,7 @@ static struct bfs_expr *reorder_andor(struct bfs_opt *opt, struct bfs_expr *expr
 	struct bfs_exprs pure;
 	SLIST_INIT(&pure);
 
-	struct bfs_expr *child;
-	while ((child = SLIST_POP(&children))) {
+	drain_slist (struct bfs_expr, child, &children) {
 		if (child->pure) {
 			SLIST_APPEND(&pure, child);
 		} else {
@@ -1634,8 +1603,7 @@ static void data_flow_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enu
 
 	switch (expr->int_cmp) {
 	case BFS_INT_EQUAL:
-		constrain_min(true_range, value);
-		constrain_max(true_range, value);
+		constrain_range(true_range, value);
 		range_remove(false_range, value);
 		break;
 
@@ -1655,14 +1623,31 @@ static void data_flow_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enu
 
 /** Transfer function for -{execut,read,writ}able. */
 static struct bfs_expr *data_flow_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
-	if (expr->num & R_OK) {
+	switch (expr->num) {
+	case R_OK:
 		data_flow_pred(opt, READABLE_PRED, true);
-	}
-	if (expr->num & W_OK) {
+		break;
+	case W_OK:
 		data_flow_pred(opt, WRITABLE_PRED, true);
-	}
-	if (expr->num & X_OK) {
+		break;
+	case X_OK:
 		data_flow_pred(opt, EXECUTABLE_PRED, true);
+		break;
+	default:
+		bfs_bug("Unknown access() mode %lld", expr->num);
+		break;
+	}
+
+	return expr;
+}
+
+/** Transfer function for -empty. */
+static struct bfs_expr *data_flow_empty(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+	opt->after_true.types &= (1 << BFS_REG) | (1 << BFS_DIR);
+
+	if (opt->before.types == (1 << BFS_REG)) {
+		constrain_range(&opt->after_true.ranges[SIZE_RANGE], 0);
+		range_remove(&opt->after_false.ranges[SIZE_RANGE], 0);
 	}
 
 	return expr;
@@ -1675,7 +1660,7 @@ static struct bfs_expr *data_flow_gid(struct bfs_opt *opt, struct bfs_expr *expr
 		gid_t gid = range->min;
 		bool nogroup = !bfs_getgrgid(opt->ctx->groups, gid);
 		if (errno == 0) {
-			data_flow_pred(opt, NOGROUP_PRED, nogroup);
+			constrain_pred(&opt->after_true.preds[NOGROUP_PRED], nogroup);
 		}
 	}
 
@@ -1706,11 +1691,16 @@ static struct bfs_expr *data_flow_links(struct bfs_opt *opt, struct bfs_expr *ex
 	return expr;
 }
 
+/** Transfer function for -lname. */
+static struct bfs_expr *data_flow_lname(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+	opt->after_true.types &= 1 << BFS_LNK;
+	return expr;
+}
+
 /** Transfer function for -samefile. */
 static struct bfs_expr *data_flow_samefile(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
 	struct df_range *true_range = &opt->after_true.ranges[INUM_RANGE];
-	constrain_min(true_range, expr->ino);
-	constrain_max(true_range, expr->ino);
+	constrain_range(true_range, expr->ino);
 
 	struct df_range *false_range = &opt->after_false.ranges[INUM_RANGE];
 	range_remove(false_range, expr->ino);
@@ -1744,7 +1734,7 @@ static struct bfs_expr *data_flow_uid(struct bfs_opt *opt, struct bfs_expr *expr
 		uid_t uid = range->min;
 		bool nouser = !bfs_getpwuid(opt->ctx->users, uid);
 		if (errno == 0) {
-			data_flow_pred(opt, NOUSER_PRED, nouser);
+			constrain_pred(&opt->after_true.preds[NOUSER_PRED], nouser);
 		}
 	}
 
@@ -1790,7 +1780,7 @@ static struct bfs_expr *data_flow_leave(struct bfs_opt *opt, struct bfs_expr *ex
 	if (df_is_bottom(&opt->after_false)) {
 		if (!expr->pure) {
 			expr->always_true = true;
-			expr->probability = 0.0;
+			expr->probability = 1.0;
 		} else if (expr->eval_fn != eval_true) {
 			opt_warning(opt, expr, "This expression is always true.\n\n");
 			opt_debug(opt, "pure, always true\n");
@@ -1818,12 +1808,45 @@ static struct bfs_expr *data_flow_leave(struct bfs_opt *opt, struct bfs_expr *ex
 	return visit_leave(opt, expr, visitor);
 }
 
-/** Data flow visitor function. */
-static struct bfs_expr *data_flow_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
-	if (opt->ignore_result && expr->pure) {
+/** Ignore an expression (and possibly warn/prompt). */
+static struct bfs_expr *opt_ignore(struct bfs_opt *opt, struct bfs_expr *expr, bool delete) {
+	if (delete) {
+		opt_delete(opt, "%pe [ignored result]\n", expr);
+	} else {
 		opt_debug(opt, "ignored result\n");
-		opt_warning(opt, expr, "The result of this expression is ignored.\n\n");
+	}
+
+	if (expr->kind != BFS_TEST) {
+		goto done;
+	}
+
+	if (!opt_warning(opt, expr, "The result of this expression is ignored.\n")) {
+		goto done;
+	}
+
+	struct bfs_ctx *ctx = opt->ctx;
+	if (ctx->interactive && ctx->dangerous) {
+		bfs_warning(ctx, "Do you want to continue? ");
+		if (ynprompt() <= 0) {
+			errno = 0;
+			return NULL;
+		}
+	}
+
+	fprintf(stderr, "\n");
+
+done:
+	if (!delete && expr->pure) {
+		// If we're not deleting the expression entirely, replace it with -false
 		expr = opt_const(opt, false);
+	}
+	return expr;
+}
+
+/** Data flow visitor function. */
+static struct bfs_expr *data_flow_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+	if (opt->ignore_result) {
+		expr = opt_ignore(opt, expr, false);
 		if (!expr) {
 			return NULL;
 		}
@@ -1893,9 +1916,11 @@ static const struct visitor data_flow = {
 	.leave = data_flow_leave,
 	.table = (const struct visitor_table[]) {
 		{eval_access, data_flow_access},
+		{eval_empty, data_flow_empty},
 		{eval_gid, data_flow_gid},
 		{eval_inum, data_flow_inum},
 		{eval_links, data_flow_links},
+		{eval_lname, data_flow_lname},
 		{eval_samefile, data_flow_samefile},
 		{eval_size, data_flow_size},
 		{eval_type, data_flow_type},
@@ -1919,7 +1944,7 @@ static struct bfs_expr *simplify_not(struct bfs_opt *opt, struct bfs_expr *expr,
 static struct bfs_expr *lift_andor_not(struct bfs_opt *opt, struct bfs_expr *expr) {
 	// Only lift negations if it would reduce the number of (-not) expressions
 	size_t added = 0, removed = 0;
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		if (child->eval_fn == eval_not) {
 			++removed;
 		} else {
@@ -1944,8 +1969,7 @@ static struct bfs_expr *lift_andor_not(struct bfs_opt *opt, struct bfs_expr *exp
 	struct bfs_exprs children;
 	foster_children(expr, &children);
 
-	struct bfs_expr *child;
-	while ((child = SLIST_POP(&children))) {
+	drain_slist (struct bfs_expr, child, &children) {
 		opt_enter(opt, "%pe\n", child);
 
 		child = negate_expr(opt, child, &fake_not_arg);
@@ -1958,6 +1982,10 @@ static struct bfs_expr *lift_andor_not(struct bfs_opt *opt, struct bfs_expr *exp
 	}
 
 	expr = visit_shallow(opt, expr, &annotate);
+	if (!expr) {
+		return NULL;
+	}
+
 	return negate_expr(opt, expr, &fake_not_arg);
 }
 
@@ -1968,7 +1996,7 @@ static struct bfs_expr *first_ignorable(struct bfs_opt *opt, struct bfs_expr *ex
 	}
 
 	struct bfs_expr *ret = NULL;
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+	for_expr (child, expr) {
 		if (!child->pure) {
 			ret = NULL;
 		} else if (!ret) {
@@ -1987,16 +2015,15 @@ static struct bfs_expr *simplify_and(struct bfs_opt *opt, struct bfs_expr *expr,
 	struct bfs_exprs children;
 	foster_children(expr, &children);
 
-	while (!SLIST_EMPTY(&children)) {
-		struct bfs_expr *child = SLIST_POP(&children);
-
+	drain_slist (struct bfs_expr, child, &children) {
 		if (child == ignorable) {
 			ignore = true;
 		}
 
 		if (ignore) {
-			opt_delete(opt, "%pe [ignored result]\n", child);
-			opt_warning(opt, child, "The result of this expression is ignored.\n\n");
+			if (!opt_ignore(opt, child, true)) {
+				return NULL;
+			}
 			continue;
 		}
 
@@ -2009,8 +2036,8 @@ static struct bfs_expr *simplify_and(struct bfs_opt *opt, struct bfs_expr *expr,
 		bfs_expr_append(expr, child);
 
 		if (child->always_false) {
-			while ((child = SLIST_POP(&children))) {
-				opt_delete(opt, "%pe [short-circuit]\n", child);
+			drain_slist (struct bfs_expr, dead, &children) {
+				opt_delete(opt, "%pe [short-circuit]\n", dead);
 			}
 		}
 	}
@@ -2035,16 +2062,15 @@ static struct bfs_expr *simplify_or(struct bfs_opt *opt, struct bfs_expr *expr,
 	struct bfs_exprs children;
 	foster_children(expr, &children);
 
-	while (!SLIST_EMPTY(&children)) {
-		struct bfs_expr *child = SLIST_POP(&children);
-
+	drain_slist (struct bfs_expr, child, &children) {
 		if (child == ignorable) {
 			ignore = true;
 		}
 
 		if (ignore) {
-			opt_delete(opt, "%pe [ignored result]\n", child);
-			opt_warning(opt, child, "The result of this expression is ignored.\n\n");
+			if (!opt_ignore(opt, child, true)) {
+				return NULL;
+			}
 			continue;
 		}
 
@@ -2057,8 +2083,8 @@ static struct bfs_expr *simplify_or(struct bfs_opt *opt, struct bfs_expr *expr,
 		bfs_expr_append(expr, child);
 
 		if (child->always_true) {
-			while ((child = SLIST_POP(&children))) {
-				opt_delete(opt, "%pe [short-circuit]\n", child);
+			drain_slist (struct bfs_expr, dead, &children) {
+				opt_delete(opt, "%pe [short-circuit]\n", dead);
 			}
 		}
 	}
@@ -2080,12 +2106,11 @@ static struct bfs_expr *simplify_comma(struct bfs_opt *opt, struct bfs_expr *exp
 	struct bfs_exprs children;
 	foster_children(expr, &children);
 
-	while (!SLIST_EMPTY(&children)) {
-		struct bfs_expr *child = SLIST_POP(&children);
-
+	drain_slist (struct bfs_expr, child, &children) {
 		if (opt->level >= 2 && child->pure && !SLIST_EMPTY(&children)) {
-			opt_delete(opt, "%pe [ignored result]\n", child);
-			opt_warning(opt, child, "The result of this expression is ignored.\n\n");
+			if (!opt_ignore(opt, child, true)) {
+				return NULL;
+			}
 			continue;
 		}
 
@@ -2136,6 +2161,8 @@ static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) {
 	};
 
 	struct df_domain impure;
+	df_init_top(&opt->after_true);
+	df_init_top(&opt->after_false);
 
 	for (int i = 0; i < 3; ++i) {
 		struct bfs_opt nested = *opt;
@@ -2149,9 +2176,11 @@ static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) {
 				continue;
 			}
 
+			const struct visitor *visitor = passes[j].visitor;
+
 			// Skip reordering the first time through the passes, to
 			// make warnings more understandable
-			if (passes[j].visitor == &reorder) {
+			if (visitor == &reorder) {
 				if (i == 0) {
 					continue;
 				} else {
@@ -2159,10 +2188,15 @@ static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) {
 				}
 			}
 
-			expr = visit(&nested, expr, passes[j].visitor);
+			expr = visit(&nested, expr, visitor);
 			if (!expr) {
 				return NULL;
 			}
+
+			if (visitor == &data_flow) {
+				opt->after_true = nested.after_true;
+				opt->after_false = nested.after_false;
+			}
 		}
 
 		opt_leave(&nested, NULL);
@@ -2176,17 +2210,20 @@ static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) {
 	return expr;
 }
 
-/** Estimate the odds of an expression calling stat(). */
-static float expr_stat_odds(struct bfs_expr *expr) {
-	if (expr->calls_stat) {
+/** An expression predicate. */
+typedef bool expr_pred(const struct bfs_expr *expr);
+
+/** Estimate the odds that a matching expression will be evaluated. */
+static float estimate_odds(const struct bfs_expr *expr, expr_pred *pred) {
+	if (pred(expr)) {
 		return 1.0;
 	}
 
-	float nostat_odds = 1.0;
+	float nonmatch_odds = 1.0;
 	float reached_odds = 1.0;
-	for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
-		float child_odds = expr_stat_odds(child);
-		nostat_odds *= 1.0 - reached_odds * child_odds;
+	for_expr (child, expr) {
+		float child_odds = estimate_odds(child, pred);
+		nonmatch_odds *= 1.0 - reached_odds * child_odds;
 
 		if (expr->eval_fn == eval_and) {
 			reached_odds *= child->probability;
@@ -2195,7 +2232,12 @@ static float expr_stat_odds(struct bfs_expr *expr) {
 		}
 	}
 
-	return 1.0 - nostat_odds;
+	return 1.0 - nonmatch_odds;
+}
+
+/** Whether an expression calls stat(). */
+static bool calls_stat(const struct bfs_expr *expr) {
+	return expr->calls_stat;
 }
 
 /** Estimate the odds of calling stat(). */
@@ -2204,15 +2246,20 @@ static float estimate_stat_odds(struct bfs_ctx *ctx) {
 		return 1.0;
 	}
 
-	float nostat_odds = 1.0 - expr_stat_odds(ctx->exclude);
+	float nostat_odds = 1.0 - estimate_odds(ctx->exclude, calls_stat);
 
 	float reached_odds = 1.0 - ctx->exclude->probability;
-	float expr_odds = expr_stat_odds(ctx->expr);
+	float expr_odds = estimate_odds(ctx->expr, calls_stat);
 	nostat_odds *= 1.0 - reached_odds * expr_odds;
 
 	return 1.0 - nostat_odds;
 }
 
+/** Matches -(exec|ok) ... \; */
+static bool single_exec(const struct bfs_expr *expr) {
+	return expr->eval_fn == eval_exec && !(expr->exec->flags & BFS_EXEC_MULTI);
+}
+
 int bfs_optimize(struct bfs_ctx *ctx) {
 	bfs_ctx_dump(ctx, DEBUG_OPT);
 
@@ -2291,6 +2338,17 @@ int bfs_optimize(struct bfs_ctx *ctx) {
 			opt_leave(&opt, "eager stat cost: ${ylw}%g${rs}\n", eager_cost);
 		}
 
+#ifndef POSIX_SPAWN_SETRLIMIT
+		// If bfs_spawn_setrlimit() would force us to use fork() over
+		// posix_spawn(), the extra cost may outweigh the benefit of a
+		// higher RLIMIT_NOFILE
+		float single_exec_odds = estimate_odds(ctx->expr, single_exec);
+		if (single_exec_odds >= 0.5) {
+			opt_enter(&opt, "single ${blu}-exec${rs} odds: ${ylw}%g${rs}\n", single_exec_odds);
+			ctx->raise_nofile = false;
+			opt_leave(&opt, "not raising RLIMIT_NOFILE\n");
+		}
+#endif
 	}
 
 	opt_leave(&opt, NULL);
diff --git a/src/opt.h b/src/opt.h
index 4aac129..a5729b3 100644
--- a/src/opt.h
+++ b/src/opt.h
@@ -13,7 +13,7 @@ struct bfs_ctx;
 /**
  * Apply optimizations to the command line.
  *
- * @param ctx
+ * @ctx
  *         The bfs context to optimize.
  * @return
  *         0 if successful, -1 on error.
diff --git a/src/parse.c b/src/parse.c
index a1155c0..5ec4c0e 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -8,9 +8,10 @@
  * flags like always-true options, and skipping over paths wherever they appear.
  */
 
-#include "prelude.h"
 #include "parse.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "bftw.h"
 #include "color.h"
@@ -31,6 +32,7 @@
 #include "xregex.h"
 #include "xspawn.h"
 #include "xtime.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <fnmatch.h>
@@ -78,14 +80,10 @@ struct bfs_parser {
 
 	/** Whether stdout is a terminal. */
 	bool stdout_tty;
-	/** Whether this session is interactive (stdin and stderr are each a terminal). */
-	bool interactive;
 	/** Whether -color or -nocolor has been passed. */
 	enum use_color use_color;
 	/** Whether a -print action is implied. */
 	bool implicit_print;
-	/** Whether the default root "." should be used. */
-	bool implicit_root;
 	/** Whether the expression has started. */
 	bool expr_started;
 	/** Whether an information option like -help or -version was passed. */
@@ -95,44 +93,26 @@ struct bfs_parser {
 
 	/** The last non-path argument. */
 	char **last_arg;
-	/** A "-depth"-type argument, if any. */
-	char **depth_arg;
-	/** A "-limit" argument, if any. */
-	char **limit_arg;
-	/** A "-prune" argument, if any. */
-	char **prune_arg;
-	/** A "-mount" argument, if any. */
-	char **mount_arg;
-	/** An "-xdev" argument, if any. */
-	char **xdev_arg;
-	/** A "-files0-from -" argument, if any. */
-	char **files0_stdin_arg;
-	/** An "-ok"-type expression, if any. */
-	const struct bfs_expr *ok_expr;
+	/** A "-depth"-type expression, if any. */
+	const struct bfs_expr *depth_expr;
+	/** A "-limit" expression, if any. */
+	const struct bfs_expr *limit_expr;
+	/** A "-prune" expression, if any. */
+	const struct bfs_expr *prune_expr;
+	/** A "-mount" expression, if any. */
+	const struct bfs_expr *mount_expr;
+	/** An "-xdev" expression, if any. */
+	const struct bfs_expr *xdev_expr;
+	/** A "-files0-from" expression, if any. */
+	const struct bfs_expr *files0_expr;
+	/** An expression that consumes stdin, if any. */
+	const struct bfs_expr *stdin_expr;
 
 	/** The current time (maybe modified by -daystart). */
 	struct timespec now;
 };
 
 /**
- * Possible token types.
- */
-enum token_type {
-	/** A flag. */
-	T_FLAG,
-	/** A root path. */
-	T_PATH,
-	/** An option. */
-	T_OPTION,
-	/** A test. */
-	T_TEST,
-	/** An action. */
-	T_ACTION,
-	/** An operator. */
-	T_OPERATOR,
-};
-
-/**
  * Print a low-level error message during parsing.
  */
 static void parse_perror(const struct bfs_parser *parser, const char *str) {
@@ -158,9 +138,8 @@ static void highlight_args(const struct bfs_ctx *ctx, char **argv, size_t argc,
 /**
  * Print an error message during parsing.
  */
-attr(printf(2, 3))
+_printf(2, 3)
 static void parse_error(const struct bfs_parser *parser, const char *format, ...) {
-	int error = errno;
 	const struct bfs_ctx *ctx = parser->ctx;
 
 	bool highlight[ctx->argc];
@@ -170,7 +149,6 @@ static void parse_error(const struct bfs_parser *parser, const char *format, ...
 
 	va_list args;
 	va_start(args, format);
-	errno = error;
 	bfs_verror(parser->ctx, format, args);
 	va_end(args);
 }
@@ -178,9 +156,8 @@ static void parse_error(const struct bfs_parser *parser, const char *format, ...
 /**
  * Print an error about some command line arguments.
  */
-attr(printf(4, 5))
+_printf(4, 5)
 static void parse_argv_error(const struct bfs_parser *parser, char **argv, size_t argc, const char *format, ...) {
-	int error = errno;
 	const struct bfs_ctx *ctx = parser->ctx;
 
 	bool highlight[ctx->argc];
@@ -190,7 +167,6 @@ static void parse_argv_error(const struct bfs_parser *parser, char **argv, size_
 
 	va_list args;
 	va_start(args, format);
-	errno = error;
 	bfs_verror(ctx, format, args);
 	va_end(args);
 }
@@ -198,20 +174,18 @@ static void parse_argv_error(const struct bfs_parser *parser, char **argv, size_
 /**
  * Print an error about conflicting command line arguments.
  */
-attr(printf(6, 7))
-static void parse_conflict_error(const struct bfs_parser *parser, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) {
-	int error = errno;
+_printf(4, 5)
+static void parse_conflict_error(const struct bfs_parser *parser, const struct bfs_expr *expr1, const struct bfs_expr *expr2, const char *format, ...) {
 	const struct bfs_ctx *ctx = parser->ctx;
 
 	bool highlight[ctx->argc];
 	init_highlight(ctx, highlight);
-	highlight_args(ctx, argv1, argc1, highlight);
-	highlight_args(ctx, argv2, argc2, highlight);
+	highlight_args(ctx, expr1->argv, expr1->argc, highlight);
+	highlight_args(ctx, expr2->argv, expr2->argc, highlight);
 	bfs_argv_error(ctx, highlight);
 
 	va_list args;
 	va_start(args, format);
-	errno = error;
 	bfs_verror(ctx, format, args);
 	va_end(args);
 }
@@ -219,16 +193,14 @@ static void parse_conflict_error(const struct bfs_parser *parser, char **argv1,
 /**
  * Print an error about an expression.
  */
-attr(printf(3, 4))
+_printf(3, 4)
 static void parse_expr_error(const struct bfs_parser *parser, const struct bfs_expr *expr, const char *format, ...) {
-	int error = errno;
 	const struct bfs_ctx *ctx = parser->ctx;
 
 	bfs_expr_error(ctx, expr);
 
 	va_list args;
 	va_start(args, format);
-	errno = error;
 	bfs_verror(ctx, format, args);
 	va_end(args);
 }
@@ -236,9 +208,8 @@ static void parse_expr_error(const struct bfs_parser *parser, const struct bfs_e
 /**
  * Print a warning message during parsing.
  */
-attr(printf(2, 3))
+_printf(2, 3)
 static bool parse_warning(const struct bfs_parser *parser, const char *format, ...) {
-	int error = errno;
 	const struct bfs_ctx *ctx = parser->ctx;
 
 	bool highlight[ctx->argc];
@@ -250,7 +221,6 @@ static bool parse_warning(const struct bfs_parser *parser, const char *format, .
 
 	va_list args;
 	va_start(args, format);
-	errno = error;
 	bool ret = bfs_vwarning(parser->ctx, format, args);
 	va_end(args);
 	return ret;
@@ -259,22 +229,20 @@ static bool parse_warning(const struct bfs_parser *parser, const char *format, .
 /**
  * Print a warning about conflicting command line arguments.
  */
-attr(printf(6, 7))
-static bool parse_conflict_warning(const struct bfs_parser *parser, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) {
-	int error = errno;
+_printf(4, 5)
+static bool parse_conflict_warning(const struct bfs_parser *parser, const struct bfs_expr *expr1, const struct bfs_expr *expr2, const char *format, ...) {
 	const struct bfs_ctx *ctx = parser->ctx;
 
 	bool highlight[ctx->argc];
 	init_highlight(ctx, highlight);
-	highlight_args(ctx, argv1, argc1, highlight);
-	highlight_args(ctx, argv2, argc2, highlight);
+	highlight_args(ctx, expr1->argv, expr1->argc, highlight);
+	highlight_args(ctx, expr2->argv, expr2->argc, highlight);
 	if (!bfs_argv_warning(ctx, highlight)) {
 		return false;
 	}
 
 	va_list args;
 	va_start(args, format);
-	errno = error;
 	bool ret = bfs_vwarning(ctx, format, args);
 	va_end(args);
 	return ret;
@@ -283,9 +251,8 @@ static bool parse_conflict_warning(const struct bfs_parser *parser, char **argv1
 /**
  * Print a warning about an expression.
  */
-attr(printf(3, 4))
+_printf(3, 4)
 static bool parse_expr_warning(const struct bfs_parser *parser, const struct bfs_expr *expr, const char *format, ...) {
-	int error = errno;
 	const struct bfs_ctx *ctx = parser->ctx;
 
 	if (!bfs_expr_warning(ctx, expr)) {
@@ -294,17 +261,31 @@ static bool parse_expr_warning(const struct bfs_parser *parser, const struct bfs
 
 	va_list args;
 	va_start(args, format);
-	errno = error;
 	bool ret = bfs_vwarning(ctx, format, args);
 	va_end(args);
 	return ret;
 }
 
 /**
+ * Report an error if stdin is already consumed, then consume it.
+ */
+static bool consume_stdin(struct bfs_parser *parser, const struct bfs_expr *expr) {
+	if (parser->stdin_expr) {
+		parse_conflict_error(parser, parser->stdin_expr, expr,
+			"%pX and %pX can't both use standard input.\n",
+			parser->stdin_expr, expr);
+		return false;
+	}
+
+	parser->stdin_expr = expr;
+	return true;
+}
+
+/**
  * Allocate a new expression.
  */
-static struct bfs_expr *parse_new_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc, char **argv) {
-	struct bfs_expr *expr = bfs_expr_new(parser->ctx, eval_fn, argc, argv);
+static struct bfs_expr *parse_new_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc, char **argv, enum bfs_kind kind) {
+	struct bfs_expr *expr = bfs_expr_new(parser->ctx, eval_fn, argc, argv, kind);
 	if (!expr) {
 		parse_perror(parser, "bfs_expr_new()");
 	}
@@ -315,7 +296,7 @@ static struct bfs_expr *parse_new_expr(const struct bfs_parser *parser, bfs_eval
  * Create a new unary expression.
  */
 static struct bfs_expr *new_unary_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, struct bfs_expr *rhs, char **argv) {
-	struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv);
+	struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv, BFS_OPERATOR);
 	if (!expr) {
 		return NULL;
 	}
@@ -329,7 +310,7 @@ static struct bfs_expr *new_unary_expr(const struct bfs_parser *parser, bfs_eval
  * Create a new binary expression.
  */
 static struct bfs_expr *new_binary_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, struct bfs_expr *lhs, struct bfs_expr *rhs, char **argv) {
-	struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv);
+	struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv, BFS_OPERATOR);
 	if (!expr) {
 		return NULL;
 	}
@@ -381,7 +362,7 @@ static int expr_open(struct bfs_parser *parser, struct bfs_expr *expr, const cha
 	return 0;
 
 fail:
-	parse_expr_error(parser, expr, "%m.\n");
+	parse_expr_error(parser, expr, "%s.\n", errstr());
 	if (cfile) {
 		cfclose(cfile);
 	} else if (file) {
@@ -401,7 +382,7 @@ static int stat_arg(const struct bfs_parser *parser, char **arg, struct bfs_stat
 
 	int ret = bfs_stat(AT_FDCWD, *arg, flags, sb);
 	if (ret != 0) {
-		parse_argv_error(parser, arg, 1, "%m.\n");
+		parse_argv_error(parser, arg, 1, "%s.\n", errstr());
 	}
 	return ret;
 }
@@ -414,15 +395,20 @@ static struct bfs_expr *parse_expr(struct bfs_parser *parser);
 /**
  * Advance by a single token.
  */
-static char **parser_advance(struct bfs_parser *parser, enum token_type type, size_t argc) {
-	if (type != T_FLAG && type != T_PATH) {
+static char **parser_advance(struct bfs_parser *parser, enum bfs_kind kind, size_t argc) {
+	struct bfs_ctx *ctx = parser->ctx;
+
+	if (kind != BFS_FLAG && kind != BFS_PATH) {
 		parser->expr_started = true;
 	}
 
-	if (type != T_PATH) {
+	if (kind != BFS_PATH) {
 		parser->last_arg = parser->argv;
 	}
 
+	size_t i = parser->argv - ctx->argv;
+	ctx->kinds[i] = kind;
+
 	char **argv = parser->argv;
 	parser->argv += argc;
 	return argv;
@@ -446,7 +432,6 @@ static int parse_root(struct bfs_parser *parser, const char *path) {
 		return -1;
 	}
 
-	parser->implicit_root = false;
 	return 0;
 }
 
@@ -465,7 +450,7 @@ static int skip_paths(struct bfs_parser *parser) {
 				// find uses -- to separate flags from the rest
 				// of the command line.  We allow mixing flags
 				// and paths/predicates, so we just ignore --.
-				parser_advance(parser, T_FLAG, 1);
+				parser_advance(parser, BFS_FLAG, 1);
 				continue;
 			}
 			if (strcmp(arg, "-") != 0) {
@@ -497,7 +482,7 @@ static int skip_paths(struct bfs_parser *parser) {
 			return -1;
 		}
 
-		parser_advance(parser, T_PATH, 1);
+		parser_advance(parser, BFS_PATH, 1);
 	}
 }
 
@@ -517,20 +502,14 @@ enum int_flags {
  * Parse an integer.
  */
 static const char *parse_int(const struct bfs_parser *parser, char **arg, const char *str, void *result, enum int_flags flags) {
-	// strtoll() skips leading spaces, but we want to reject them
-	if (xisspace(str[0])) {
-		goto bad;
-	}
-
 	int base = flags & IF_BASE_MASK;
 	if (base == 0) {
 		base = 10;
 	}
 
 	char *endptr;
-	errno = 0;
-	long long value = strtoll(str, &endptr, base);
-	if (errno != 0) {
+	long long value;
+	if (xstrtoll(str, &endptr, base, &value) != 0) {
 		if (errno == ERANGE) {
 			goto range;
 		} else {
@@ -538,13 +517,6 @@ static const char *parse_int(const struct bfs_parser *parser, char **arg, const
 		}
 	}
 
-	// https://github.com/llvm/llvm-project/issues/64946
-	sanitize_init(&endptr);
-
-	if (endptr == str) {
-		goto bad;
-	}
-
 	if (!(flags & IF_PARTIAL_OK) && *endptr != '\0') {
 		goto bad;
 	}
@@ -641,8 +613,8 @@ static bool looks_like_icmp(const char *str) {
  * Parse a single flag.
  */
 static struct bfs_expr *parse_flag(struct bfs_parser *parser, size_t argc) {
-	char **argv = parser_advance(parser, T_FLAG, argc);
-	return parse_new_expr(parser, eval_true, argc, argv);
+	char **argv = parser_advance(parser, BFS_FLAG, argc);
+	return parse_new_expr(parser, eval_true, argc, argv, BFS_FLAG);
 }
 
 /**
@@ -657,9 +629,11 @@ static struct bfs_expr *parse_nullary_flag(struct bfs_parser *parser) {
  */
 static struct bfs_expr *parse_unary_flag(struct bfs_parser *parser) {
 	const char *arg = parser->argv[0];
+	char flag = arg[strlen(arg) - 1];
+
 	const char *value = parser->argv[1];
 	if (!value) {
-		parse_error(parser, "${cyn}%s${rs} needs a value.\n", arg);
+		parse_error(parser, "${cyn}-%c${rs} needs a value.\n", flag);
 		return NULL;
 	}
 
@@ -667,11 +641,34 @@ static struct bfs_expr *parse_unary_flag(struct bfs_parser *parser) {
 }
 
 /**
+ * Parse a prefix flag like -O3, -j8, etc.
+ */
+static struct bfs_expr *parse_prefix_flag(struct bfs_parser *parser, char flag, bool allow_separate, const char **value) {
+	const char *arg = parser->argv[0];
+
+	const char *suffix = strchr(arg, flag) + 1;
+	if (*suffix) {
+		*value = suffix;
+		return parse_nullary_flag(parser);
+	}
+
+	suffix = parser->argv[1];
+	if (allow_separate && suffix) {
+		*value = suffix;
+	} else {
+		parse_error(parser, "${cyn}-%c${rs} needs a value.\n", flag);
+		return NULL;
+	}
+
+	return parse_unary_flag(parser);
+}
+
+/**
  * Parse a single option.
  */
 static struct bfs_expr *parse_option(struct bfs_parser *parser, size_t argc) {
-	char **argv = parser_advance(parser, T_OPTION, argc);
-	return parse_new_expr(parser, eval_true, argc, argv);
+	char **argv = parser_advance(parser, BFS_OPTION, argc);
+	return parse_new_expr(parser, eval_true, argc, argv, BFS_OPTION);
 }
 
 /**
@@ -699,8 +696,8 @@ static struct bfs_expr *parse_unary_option(struct bfs_parser *parser) {
  * Parse a single test.
  */
 static struct bfs_expr *parse_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc) {
-	char **argv = parser_advance(parser, T_TEST, argc);
-	return parse_new_expr(parser, eval_fn, argc, argv);
+	char **argv = parser_advance(parser, BFS_TEST, argc);
+	return parse_new_expr(parser, eval_fn, argc, argv, BFS_TEST);
 }
 
 /**
@@ -728,7 +725,7 @@ static struct bfs_expr *parse_unary_test(struct bfs_parser *parser, bfs_eval_fn
  * Parse a single action.
  */
 static struct bfs_expr *parse_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc) {
-	char **argv = parser_advance(parser, T_ACTION, argc);
+	char **argv = parser_advance(parser, BFS_ACTION, argc);
 
 	if (parser->excluding) {
 		parse_argv_error(parser, argv, argc, "This action is not supported within ${red}-exclude${rs}.\n");
@@ -739,7 +736,7 @@ static struct bfs_expr *parse_action(struct bfs_parser *parser, bfs_eval_fn *eva
 		parser->implicit_print = false;
 	}
 
-	return parse_new_expr(parser, eval_fn, argc, argv);
+	return parse_new_expr(parser, eval_fn, argc, argv, BFS_ACTION);
 }
 
 /**
@@ -811,7 +808,8 @@ static bool parse_debug_flag(const char *flag, size_t len, const char *expected)
 static struct bfs_expr *parse_debug(struct bfs_parser *parser, int arg1, int arg2) {
 	struct bfs_ctx *ctx = parser->ctx;
 
-	struct bfs_expr *expr = parse_unary_flag(parser);
+	const char *flags;
+	struct bfs_expr *expr = parse_prefix_flag(parser, 'D', true, &flags);
 	if (!expr) {
 		cfprintf(ctx->cerr, "\n");
 		debug_help(ctx->cerr);
@@ -820,7 +818,7 @@ static struct bfs_expr *parse_debug(struct bfs_parser *parser, int arg1, int arg
 
 	bool unrecognized = false;
 
-	for (const char *flag = expr->argv[1], *next; flag; flag = next) {
+	for (const char *flag = flags, *next; flag; flag = next) {
 		size_t len = strcspn(flag, ",");
 		if (flag[len]) {
 			next = flag + len + 1;
@@ -868,21 +866,22 @@ static struct bfs_expr *parse_debug(struct bfs_parser *parser, int arg1, int arg
  * Parse -On.
  */
 static struct bfs_expr *parse_optlevel(struct bfs_parser *parser, int arg1, int arg2) {
-	struct bfs_expr *expr = parse_nullary_flag(parser);
+	const char *arg;
+	struct bfs_expr *expr = parse_prefix_flag(parser, 'O', false, &arg);
 	if (!expr) {
 		return NULL;
 	}
 
 	int *optlevel = &parser->ctx->optlevel;
 
-	if (strcmp(expr->argv[0], "-Ofast") == 0) {
+	if (strcmp(arg, "fast") == 0) {
 		*optlevel = 4;
-	} else if (!parse_int(parser, expr->argv, expr->argv[0] + 2, optlevel, IF_INT | IF_UNSIGNED)) {
+	} else if (!parse_int(parser, expr->argv, arg, optlevel, IF_INT | IF_UNSIGNED)) {
 		return NULL;
 	}
 
 	if (*optlevel > 4) {
-		parse_expr_warning(parser, expr, "${cyn}-O${bld}%s${rs} is the same as ${cyn}-O${bld}4${rs}.\n\n", expr->argv[0] + 2);
+		parse_expr_warning(parser, expr, "${cyn}-O${bld}%s${rs} is the same as ${cyn}-O${bld}4${rs}.\n\n", arg);
 	}
 
 	return expr;
@@ -996,16 +995,16 @@ static struct bfs_expr *parse_time(struct bfs_parser *parser, int field, int arg
 		switch (*tail) {
 		case 'w':
 			time *= 7;
-			fallthru;
+			_fallthrough;
 		case 'd':
 			time *= 24;
-			fallthru;
+			_fallthrough;
 		case 'h':
 			time *= 60;
-			fallthru;
+			_fallthrough;
 		case 'm':
 			time *= 60;
-			fallthru;
+			_fallthrough;
 		case 's':
 			break;
 		default:
@@ -1117,7 +1116,7 @@ static struct bfs_expr *parse_fnmatch(const struct bfs_parser *parser, struct bf
 	// strcmp() can be much faster than fnmatch() since it doesn't have to
 	// parse the pattern, so special-case patterns with no wildcards.
 	//
-	//     https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13_01
+	//     https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_14_01
 	expr->literal = strcspn(expr->pattern, "?*\\[") == len;
 
 	return expr;
@@ -1176,18 +1175,33 @@ static struct bfs_expr *parse_daystart(struct bfs_parser *parser, int arg1, int
  * Parse -delete.
  */
 static struct bfs_expr *parse_delete(struct bfs_parser *parser, int arg1, int arg2) {
-	parser->ctx->flags |= BFTW_POST_ORDER;
-	parser->depth_arg = parser->argv;
-	return parse_nullary_action(parser, eval_delete);
+	struct bfs_expr *expr = parse_nullary_action(parser, eval_delete);
+	if (!expr) {
+		return NULL;
+	}
+
+	struct bfs_ctx *ctx = parser->ctx;
+	ctx->flags |= BFTW_POST_ORDER;
+	ctx->dangerous = true;
+
+	parser->depth_expr = expr;
+	return expr;
 }
 
 /**
  * Parse -d.
  */
-static struct bfs_expr *parse_depth(struct bfs_parser *parser, int arg1, int arg2) {
+static struct bfs_expr *parse_depth(struct bfs_parser *parser, int flag, int arg2) {
+	struct bfs_expr *expr = flag
+		? parse_nullary_flag(parser)
+		: parse_nullary_option(parser);
+	if (!expr) {
+		return NULL;
+	}
+
 	parser->ctx->flags |= BFTW_POST_ORDER;
-	parser->depth_arg = parser->argv;
-	return parse_nullary_flag(parser);
+	parser->depth_expr = expr;
+	return expr;
 }
 
 /**
@@ -1233,11 +1247,48 @@ static struct bfs_expr *parse_empty(struct bfs_parser *parser, int arg1, int arg
 	return expr;
 }
 
+/** Check for unsafe relative paths in $PATH. */
+static const char *unsafe_path(const struct bfs_exec *execbuf) {
+	if (!(execbuf->flags & BFS_EXEC_CHDIR)) {
+		// Not -execdir or -okdir
+		return NULL;
+	}
+
+	const char *exe = execbuf->tmpl_argv[0];
+	if (strchr(exe, '/')) {
+		// No $PATH lookups for /foo or foo/bar
+		return NULL;
+	}
+
+	if (strstr(exe, "{}")) {
+		// Substituted paths always contain a /
+		return NULL;
+	}
+
+	const char *path = getenv("PATH");
+	while (path) {
+		if (path[0] != '/') {
+			// Relative $PATH component!
+			return path;
+		}
+
+		path = strchr(path, ':');
+		if (path) {
+			++path;
+		}
+	}
+
+	// No relative components in $PATH
+	return NULL;
+}
+
 /**
  * Parse -exec(dir)?/-ok(dir)?.
  */
 static struct bfs_expr *parse_exec(struct bfs_parser *parser, int flags, int arg2) {
-	struct bfs_exec *execbuf = bfs_exec_parse(parser->ctx, parser->argv, flags);
+	struct bfs_ctx *ctx = parser->ctx;
+
+	struct bfs_exec *execbuf = bfs_exec_parse(ctx, parser->argv, flags);
 	if (!execbuf) {
 		return NULL;
 	}
@@ -1253,29 +1304,21 @@ static struct bfs_expr *parse_exec(struct bfs_parser *parser, int flags, int arg
 	// For pipe() in bfs_spawn()
 	expr->ephemeral_fds = 2;
 
-	if (execbuf->flags & BFS_EXEC_CHDIR) {
-		// Check for relative paths in $PATH
-		const char *path = getenv("PATH");
-		while (path) {
-			if (*path != '/') {
-				size_t len = strcspn(path, ":");
-				char *comp = strndup(path, len);
-				if (comp) {
-					parse_expr_error(parser, expr,
-						"This action would be unsafe, since ${bld}$$PATH${rs} contains the relative path ${bld}%pq${rs}\n", comp);
-					free(comp);
-				} else {
-					parse_perror(parser, "strndup()");
-				}
-				return NULL;
-			}
-
-			path = strchr(path, ':');
-			if (path) {
-				++path;
-			}
+	const char *unsafe = unsafe_path(execbuf);
+	if (unsafe) {
+		size_t len = strcspn(unsafe, ":");
+		char *comp = strndup(unsafe, len);
+		if (comp) {
+			parse_expr_error(parser, expr,
+				"This action would be unsafe, since ${bld}$$PATH${rs} contains the relative path ${bld}%pq${rs}\n", comp);
+			free(comp);
+		} else {
+			parse_perror(parser, "strndup()");
 		}
+		return NULL;
+	}
 
+	if (execbuf->flags & BFS_EXEC_CHDIR) {
 		// To dup() the parent directory
 		if (execbuf->flags & BFS_EXEC_MULTI) {
 			++expr->persistent_fds;
@@ -1285,7 +1328,11 @@ static struct bfs_expr *parse_exec(struct bfs_parser *parser, int flags, int arg
 	}
 
 	if (execbuf->flags & BFS_EXEC_CONFIRM) {
-		parser->ok_expr = expr;
+		if (!consume_stdin(parser, expr)) {
+			return NULL;
+		}
+	} else {
+		ctx->dangerous = true;
 	}
 
 	return expr;
@@ -1314,11 +1361,17 @@ static struct bfs_expr *parse_exit(struct bfs_parser *parser, int arg1, int arg2
  * Parse -f PATH.
  */
 static struct bfs_expr *parse_f(struct bfs_parser *parser, int arg1, int arg2) {
+	struct bfs_ctx *ctx = parser->ctx;
+
 	struct bfs_expr *expr = parse_unary_flag(parser);
 	if (!expr) {
 		return NULL;
 	}
 
+	// Mark the path as a path, not a regular argument
+	size_t i = expr->argv - ctx->argv;
+	ctx->kinds[i + 1] = BFS_PATH;
+
 	if (parse_root(parser, expr->argv[1]) != 0) {
 		return NULL;
 	}
@@ -1335,50 +1388,14 @@ static struct bfs_expr *parse_files0_from(struct bfs_parser *parser, int arg1, i
 		return NULL;
 	}
 
-	const char *from = expr->argv[1];
-
-	FILE *file;
-	if (strcmp(from, "-") == 0) {
-		file = stdin;
-	} else {
-		file = xfopen(from, O_RDONLY | O_CLOEXEC);
-	}
-	if (!file) {
-		parse_expr_error(parser, expr, "%m.\n");
-		return NULL;
-	}
-
-	while (true) {
-		char *path = xgetdelim(file, '\0');
-		if (!path) {
-			if (errno) {
-				goto fail;
-			} else {
-				break;
-			}
-		}
-
-		int ret = parse_root(parser, path);
-		free(path);
-		if (ret != 0) {
-			goto fail;
-		}
-	}
-
-	if (file == stdin) {
-		parser->files0_stdin_arg = expr->argv;
-	} else {
-		fclose(file);
-	}
-
-	parser->implicit_root = false;
+	// For compatibility with GNU find,
+	//
+	//     bfs -files0-from a -files0-from b
+	//
+	// should *only* use b, not a.  So stash the expression here and only
+	// process the last one at the end of parsing.
+	parser->files0_expr = expr;
 	return expr;
-
-fail:
-	if (file != stdin) {
-		fclose(file);
-	}
-	return NULL;
 }
 
 /**
@@ -1509,7 +1526,7 @@ static struct bfs_expr *parse_fstype(struct bfs_parser *parser, int arg1, int ar
 	}
 
 	if (!bfs_ctx_mtab(parser->ctx)) {
-		parse_expr_error(parser, expr, "Couldn't parse the mount table: %m.\n");
+		parse_expr_error(parser, expr, "Couldn't parse the mount table: %s.\n", errstr());
 		return NULL;
 	}
 
@@ -1534,7 +1551,7 @@ static struct bfs_expr *parse_group(struct bfs_parser *parser, int arg1, int arg
 			return NULL;
 		}
 	} else if (errno) {
-		parse_expr_error(parser, expr, "%m.\n");
+		parse_expr_error(parser, expr, "%s.\n", errstr());
 		return NULL;
 	} else {
 		parse_expr_error(parser, expr, "No such group.\n");
@@ -1577,7 +1594,7 @@ static struct bfs_expr *parse_user(struct bfs_parser *parser, int arg1, int arg2
 			return NULL;
 		}
 	} else if (errno) {
-		parse_expr_error(parser, expr, "%m.\n");
+		parse_expr_error(parser, expr, "%s.\n", errstr());
 		return NULL;
 	} else {
 		parse_expr_error(parser, expr, "No such user.\n");
@@ -1613,13 +1630,14 @@ static struct bfs_expr *parse_inum(struct bfs_parser *parser, int arg1, int arg2
  * Parse -j<n>.
  */
 static struct bfs_expr *parse_jobs(struct bfs_parser *parser, int arg1, int arg2) {
-	struct bfs_expr *expr = parse_nullary_flag(parser);
+	const char *arg;
+	struct bfs_expr *expr = parse_prefix_flag(parser, 'j', false, &arg);
 	if (!expr) {
 		return NULL;
 	}
 
 	unsigned int n;
-	if (!parse_int(parser, expr->argv, expr->argv[0] + 2, &n, IF_INT | IF_UNSIGNED)) {
+	if (!parse_int(parser, expr->argv, arg, &n, IF_INT | IF_UNSIGNED)) {
 		return NULL;
 	}
 
@@ -1647,11 +1665,11 @@ static struct bfs_expr *parse_limit(struct bfs_parser *parser, int arg1, int arg
 	}
 
 	if (expr->num <= 0) {
-		parse_expr_error(parser, expr, "The ${blu}%s${rs} must be at least ${bld}1${rs}.\n", expr->argv[0]);
+		parse_expr_error(parser, expr, "The %pX must be at least ${bld}1${rs}.\n", expr);
 		return NULL;
 	}
 
-	parser->limit_arg = expr->argv;
+	parser->limit_expr = expr;
 	return expr;
 }
 
@@ -1684,11 +1702,8 @@ static struct bfs_expr *parse_mount(struct bfs_parser *parser, int arg1, int arg
 		return NULL;
 	}
 
-	parse_expr_warning(parser, expr, "In the future, ${blu}%s${rs} will skip mount points entirely, unlike\n", expr->argv[0]);
-	bfs_warning(parser->ctx, "${blu}-xdev${rs}, due to http://austingroupbugs.net/view.php?id=1133.\n\n");
-
-	parser->ctx->flags |= BFTW_PRUNE_MOUNTS;
-	parser->mount_arg = expr->argv;
+	parser->ctx->flags |= BFTW_SKIP_MOUNTS;
+	parser->mount_expr = expr;
 	return expr;
 }
 
@@ -1737,7 +1752,7 @@ static int parse_reftime(const struct bfs_parser *parser, struct bfs_expr *expr)
 	if (xgetdate(expr->argv[1], &expr->reftime) == 0) {
 		return 0;
 	} else if (errno != EINVAL) {
-		parse_expr_error(parser, expr, "%m.\n");
+		parse_expr_error(parser, expr, "%s.\n", errstr());
 		return -1;
 	}
 
@@ -1831,6 +1846,14 @@ static struct bfs_expr *parse_newerxy(struct bfs_parser *parser, int arg1, int a
 }
 
 /**
+ * Parse -noerror.
+ */
+static struct bfs_expr *parse_noerror(struct bfs_parser *parser, int arg1, int arg2) {
+	parser->ctx->ignore_errors = true;
+	return parse_nullary_option(parser);
+}
+
+/**
  * Parse -nogroup.
  */
 static struct bfs_expr *parse_nogroup(struct bfs_parser *parser, int arg1, int arg2) {
@@ -1846,7 +1869,7 @@ static struct bfs_expr *parse_nogroup(struct bfs_parser *parser, int arg1, int a
  * Parse -nohidden.
  */
 static struct bfs_expr *parse_nohidden(struct bfs_parser *parser, int arg1, int arg2) {
-	struct bfs_expr *hidden = parse_new_expr(parser, eval_hidden, 1, &fake_hidden_arg);
+	struct bfs_expr *hidden = parse_new_expr(parser, eval_hidden, 1, &fake_hidden_arg, BFS_TEST);
 	if (!hidden) {
 		return NULL;
 	}
@@ -1859,9 +1882,15 @@ static struct bfs_expr *parse_nohidden(struct bfs_parser *parser, int arg1, int
  * Parse -noleaf.
  */
 static struct bfs_expr *parse_noleaf(struct bfs_parser *parser, int arg1, int arg2) {
-	parse_warning(parser, "${ex}%s${rs} does not apply the optimization that ${blu}%s${rs} inhibits.\n\n",
-		BFS_COMMAND, parser->argv[0]);
-	return parse_nullary_option(parser);
+	struct bfs_expr *expr = parse_nullary_option(parser);
+	if (!expr) {
+		return NULL;
+	}
+
+	parse_expr_warning(parser, expr,
+		"${ex}%s${rs} does not apply the optimization that %px inhibits.\n\n",
+		BFS_COMMAND, expr);
+	return expr;
 }
 
 /**
@@ -1894,6 +1923,8 @@ static int parse_mode(const struct bfs_parser *parser, const char *mode, struct
 		return 0;
 	}
 
+	mode_t umask = parser->ctx->umask;
+
 	expr->file_mode = 0;
 	expr->dir_mode = 0;
 
@@ -1914,7 +1945,7 @@ static int parse_mode(const struct bfs_parser *parser, const char *mode, struct
 	//
 	// PERMCOPY : "u" | "g" | "o"
 
-	// Parser machine parser
+	// State machine state
 	enum {
 		MODE_CLAUSE,
 		MODE_WHO,
@@ -1922,7 +1953,7 @@ static int parse_mode(const struct bfs_parser *parser, const char *mode, struct
 		MODE_ACTION_APPLY,
 		MODE_OP,
 		MODE_PERM,
-	} mparser = MODE_CLAUSE;
+	} state = MODE_CLAUSE;
 
 	enum {
 		MODE_PLUS,
@@ -1931,16 +1962,18 @@ static int parse_mode(const struct bfs_parser *parser, const char *mode, struct
 	} op uninit(MODE_EQUALS);
 
 	mode_t who uninit(0);
+	mode_t mask uninit(0);
 	mode_t file_change uninit(0);
 	mode_t dir_change uninit(0);
 
 	const char *i = mode;
 	while (true) {
-		switch (mparser) {
+		switch (state) {
 		case MODE_CLAUSE:
 			who = 0;
-			mparser = MODE_WHO;
-			fallthru;
+			mask = 0777;
+			state = MODE_WHO;
+			_fallthrough;
 
 		case MODE_WHO:
 			switch (*i) {
@@ -1957,7 +1990,7 @@ static int parse_mode(const struct bfs_parser *parser, const char *mode, struct
 				who |= 0777;
 				break;
 			default:
-				mparser = MODE_ACTION;
+				state = MODE_ACTION;
 				continue;
 			}
 			break;
@@ -1967,7 +2000,7 @@ static int parse_mode(const struct bfs_parser *parser, const char *mode, struct
 			case MODE_EQUALS:
 				expr->file_mode &= ~who;
 				expr->dir_mode &= ~who;
-				fallthru;
+				_fallthrough;
 			case MODE_PLUS:
 				expr->file_mode |= file_change;
 				expr->dir_mode |= dir_change;
@@ -1977,37 +2010,40 @@ static int parse_mode(const struct bfs_parser *parser, const char *mode, struct
 				expr->dir_mode &= ~dir_change;
 				break;
 			}
-			fallthru;
+			_fallthrough;
 
 		case MODE_ACTION:
 			if (who == 0) {
 				who = 0777;
+				mask = who & ~umask;
+			} else {
+				mask = who;
 			}
 
 			switch (*i) {
 			case '+':
 				op = MODE_PLUS;
-				mparser = MODE_OP;
+				state = MODE_OP;
 				break;
 			case '-':
 				op = MODE_MINUS;
-				mparser = MODE_OP;
+				state = MODE_OP;
 				break;
 			case '=':
 				op = MODE_EQUALS;
-				mparser = MODE_OP;
+				state = MODE_OP;
 				break;
 
 			case ',':
-				if (mparser == MODE_ACTION_APPLY) {
-					mparser = MODE_CLAUSE;
+				if (state == MODE_ACTION_APPLY) {
+					state = MODE_CLAUSE;
 				} else {
 					goto fail;
 				}
 				break;
 
 			case '\0':
-				if (mparser == MODE_ACTION_APPLY) {
+				if (state == MODE_ACTION_APPLY) {
 					goto done;
 				} else {
 					goto fail;
@@ -2036,32 +2072,32 @@ static int parse_mode(const struct bfs_parser *parser, const char *mode, struct
 			default:
 				file_change = 0;
 				dir_change = 0;
-				mparser = MODE_PERM;
+				state = MODE_PERM;
 				continue;
 			}
 
 			file_change |= (file_change << 6) | (file_change << 3);
-			file_change &= who;
+			file_change &= mask;
 			dir_change |= (dir_change << 6) | (dir_change << 3);
-			dir_change &= who;
-			mparser = MODE_ACTION_APPLY;
+			dir_change &= mask;
+			state = MODE_ACTION_APPLY;
 			break;
 
 		case MODE_PERM:
 			switch (*i) {
 			case 'r':
-				file_change |= who & 0444;
-				dir_change |= who & 0444;
+				file_change |= mask & 0444;
+				dir_change |= mask & 0444;
 				break;
 			case 'w':
-				file_change |= who & 0222;
-				dir_change |= who & 0222;
+				file_change |= mask & 0222;
+				dir_change |= mask & 0222;
 				break;
 			case 'x':
-				file_change |= who & 0111;
-				fallthru;
+				file_change |= mask & 0111;
+				_fallthrough;
 			case 'X':
-				dir_change |= who & 0111;
+				dir_change |= mask & 0111;
 				break;
 			case 's':
 				if (who & 0700) {
@@ -2080,7 +2116,7 @@ static int parse_mode(const struct bfs_parser *parser, const char *mode, struct
 				}
 				break;
 			default:
-				mparser = MODE_ACTION_APPLY;
+				state = MODE_ACTION_APPLY;
 				continue;
 			}
 			break;
@@ -2122,7 +2158,7 @@ static struct bfs_expr *parse_perm(struct bfs_parser *parser, int field, int arg
 			++mode;
 			break;
 		}
-		fallthru;
+		_fallthrough;
 	default:
 		expr->mode_cmp = BFS_MODE_EQUAL;
 		break;
@@ -2190,8 +2226,13 @@ static struct bfs_expr *parse_printx(struct bfs_parser *parser, int arg1, int ar
  * Parse -prune.
  */
 static struct bfs_expr *parse_prune(struct bfs_parser *parser, int arg1, int arg2) {
-	parser->prune_arg = parser->argv;
-	return parse_nullary_action(parser, eval_prune);
+	struct bfs_expr *expr = parse_nullary_action(parser, eval_prune);
+	if (!expr) {
+		return NULL;
+	}
+
+	parser->prune_expr = expr;
+	return expr;
 }
 
 /**
@@ -2253,16 +2294,27 @@ static struct bfs_expr *parse_regextype(struct bfs_parser *parser, int arg1, int
 	// See https://www.gnu.org/software/gnulib/manual/html_node/Predefined-Syntaxes.html
 	const char *type = expr->argv[1];
 	if (strcmp(type, "posix-basic") == 0
+	    || strcmp(type, "posix-minimal-basic") == 0
 	    || strcmp(type, "ed") == 0
 	    || strcmp(type, "sed") == 0) {
 		parser->regex_type = BFS_REGEX_POSIX_BASIC;
 	} else if (strcmp(type, "posix-extended") == 0) {
 		parser->regex_type = BFS_REGEX_POSIX_EXTENDED;
-#if BFS_USE_ONIGURUMA
+#if BFS_WITH_ONIGURUMA
+	} else if (strcmp(type, "awk") == 0
+	           || strcmp(type, "posix-awk") == 0) {
+		parser->regex_type = BFS_REGEX_AWK;
+	} else if (strcmp(type, "gnu-awk") == 0) {
+		parser->regex_type = BFS_REGEX_GNU_AWK;
 	} else if (strcmp(type, "emacs") == 0) {
 		parser->regex_type = BFS_REGEX_EMACS;
 	} else if (strcmp(type, "grep") == 0) {
 		parser->regex_type = BFS_REGEX_GREP;
+	} else if (strcmp(type, "egrep") == 0
+	           || strcmp(type, "posix-egrep") == 0) {
+		parser->regex_type = BFS_REGEX_EGREP;
+	} else if (strcmp(type, "findutils-default") == 0) {
+		parser->regex_type = BFS_REGEX_GNU_FIND;
 #endif
 	} else if (strcmp(type, "help") == 0) {
 		parser->just_info = true;
@@ -2277,14 +2329,23 @@ static struct bfs_expr *parse_regextype(struct bfs_parser *parser, int arg1, int
 
 list_types:
 	cfprintf(cfile, "Supported types are:\n\n");
-	cfprintf(cfile, "  ${bld}posix-basic${rs}:    POSIX basic regular expressions (BRE)\n");
-	cfprintf(cfile, "  ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n");
-	cfprintf(cfile, "  ${bld}ed${rs}:             Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n");
-#if BFS_USE_ONIGURUMA
-	cfprintf(cfile, "  ${bld}emacs${rs}:          Like ${grn}emacs${rs}\n");
-	cfprintf(cfile, "  ${bld}grep${rs}:           Like ${grn}grep${rs}\n");
+	cfprintf(cfile, "        ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n");
+	cfprintf(cfile, "                 ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n");
+	cfprintf(cfile, "                ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n\n");
+
+	cfprintf(cfile, "     ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n\n");
+
+#if BFS_WITH_ONIGURUMA
+	cfprintf(cfile, "        [${bld}posix-${rs}]${bld}awk${rs}: Like ${grn}awk${rs}\n");
+	cfprintf(cfile, "            ${bld}gnu-awk${rs}: Like GNU ${grn}awk${rs}\n\n");
+
+	cfprintf(cfile, "              ${bld}emacs${rs}: Like ${grn}emacs${rs}\n\n");
+
+	cfprintf(cfile, "               ${bld}grep${rs}: Like ${grn}grep${rs}\n");
+	cfprintf(cfile, "      [${bld}posix-${rs}]${bld}egrep${rs}: Like ${grn}grep${rs} ${cyn}-E${rs}\n\n");
+
+	cfprintf(cfile, "  ${bld}findutils-default${rs}: Like GNU ${grn}find${rs}\n");
 #endif
-	cfprintf(cfile, "  ${bld}sed${rs}:            Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n");
 	return NULL;
 }
 
@@ -2322,13 +2383,13 @@ static struct bfs_expr *parse_search_strategy(struct bfs_parser *parser, int arg
 	struct bfs_ctx *ctx = parser->ctx;
 	CFILE *cfile = ctx->cerr;
 
-	struct bfs_expr *expr = parse_unary_flag(parser);
+	const char *arg;
+	struct bfs_expr *expr = parse_prefix_flag(parser, 'S', true, &arg);
 	if (!expr) {
 		cfprintf(cfile, "\n");
 		goto list_strategies;
 	}
 
-	const char *arg = expr->argv[1];
 	if (strcmp(arg, "bfs") == 0) {
 		ctx->strategy = BFTW_BFS;
 	} else if (strcmp(arg, "dfs") == 0) {
@@ -2549,9 +2610,14 @@ static struct bfs_expr *parse_xattrname(struct bfs_parser *parser, int arg1, int
  * Parse -xdev.
  */
 static struct bfs_expr *parse_xdev(struct bfs_parser *parser, int arg1, int arg2) {
+	struct bfs_expr *expr = parse_nullary_option(parser);
+	if (!expr) {
+		return NULL;
+	}
+
 	parser->ctx->flags |= BFTW_PRUNE_MOUNTS;
-	parser->xdev_arg = parser->argv;
-	return parse_nullary_option(parser);
+	parser->xdev_expr = expr;
+	return expr;
 }
 
 /**
@@ -2744,8 +2810,9 @@ static struct bfs_expr *parse_help(struct bfs_parser *parser, int arg1, int arg2
 	cfprintf(cout, "  ${blu}-mindepth${rs} ${bld}N${rs}\n");
 	cfprintf(cout, "      Ignore files deeper/shallower than ${bld}N${rs}\n");
 	cfprintf(cout, "  ${blu}-mount${rs}\n");
-	cfprintf(cout, "      Don't descend into other mount points (same as ${blu}-xdev${rs} for now, but will\n");
-	cfprintf(cout, "      skip mount points entirely in the future)\n");
+	cfprintf(cout, "      Exclude mount points entirely from the results\n");
+	cfprintf(cout, "  ${blu}-noerror${rs}\n");
+	cfprintf(cout, "      Ignore any errors that occur during traversal\n");
 	cfprintf(cout, "  ${blu}-nohidden${rs}\n");
 	cfprintf(cout, "      Exclude hidden files\n");
 	cfprintf(cout, "  ${blu}-noleaf${rs}\n");
@@ -2922,18 +2989,60 @@ static struct bfs_expr *parse_help(struct bfs_parser *parser, int arg1, int arg2
 	return NULL;
 }
 
+/** Print the bfs "logo". */
+static void print_logo(CFILE *cout) {
+	if (!cout->colors) {
+		goto boring;
+	}
+
+	size_t vwidth = xstrwidth(bfs_version);
+	dchar *spaces = dstrepeat(" ", vwidth);
+	dchar *lines = dstrepeat("─", vwidth);
+	if (!spaces || !lines) {
+		dstrfree(lines);
+		dstrfree(spaces);
+		goto boring;
+	}
+
+	// We do ----\r<emoji> rather than <emoji>--- so we don't have to assume
+	// anything about the width of the emoji
+	cfprintf(cout, "╭─────%s╮\r📂\n", lines);
+	cfprintf(cout, "├${ex}b${rs}   %s │\n", spaces);
+	cfprintf(cout, "╰├${ex}f${rs}  ${bld}%s${rs} │\n", bfs_version);
+	cfprintf(cout, " ╰├${ex}s${rs} %s │\n", spaces);
+	cfprintf(cout, "  ╰──%s─╯\n\n", lines);
+
+	dstrfree(lines);
+	dstrfree(spaces);
+	return;
+
+boring:
+	printf("%s %s\n\n", BFS_COMMAND, bfs_version);
+}
+
 /**
  * "Parse" -version.
  */
 static struct bfs_expr *parse_version(struct bfs_parser *parser, int arg1, int arg2) {
-	cfprintf(parser->ctx->cout, "${ex}%s${rs} ${bld}%s${rs}\n\n", BFS_COMMAND, bfs_version);
+	print_logo(parser->ctx->cout);
+
+	printf("Copyright © Tavian Barnes and the bfs contributors\n");
+	printf("No rights reserved (https://opensource.org/license/0BSD)\n\n");
+
+	printf("CONFFLAGS := %s\n", bfs_confflags);
+	printf("CC        := %s\n", bfs_cc);
+	printf("CPPFLAGS  := %s\n", bfs_cppflags);
+	printf("CFLAGS    := %s\n", bfs_cflags);
+	printf("LDFLAGS   := %s\n", bfs_ldflags);
+	printf("LDLIBS    := %s\n", bfs_ldlibs);
 
-	printf("%s\n", BFS_HOMEPAGE);
+	printf("\n%s\n", BFS_HOMEPAGE);
 
 	parser->just_info = true;
 	return NULL;
 }
 
+/** Parser callback function type. */
 typedef struct bfs_expr *parse_fn(struct bfs_parser *parser, int arg1, int arg2);
 
 /**
@@ -2941,137 +3050,139 @@ typedef struct bfs_expr *parse_fn(struct bfs_parser *parser, int arg1, int arg2)
  */
 struct table_entry {
 	char *arg;
-	enum token_type type;
+	enum bfs_kind kind;
 	parse_fn *parse;
 	int arg1;
 	int arg2;
 	bool prefix;
+	bool needs_arg;
 };
 
 /**
  * The parse table for primary expressions.
  */
 static const struct table_entry parse_table[] = {
-	{"--", T_FLAG},
-	{"--help", T_ACTION, parse_help},
-	{"--version", T_ACTION, parse_version},
-	{"-Bmin", T_TEST, parse_min, BFS_STAT_BTIME},
-	{"-Bnewer", T_TEST, parse_newer, BFS_STAT_BTIME},
-	{"-Bsince", T_TEST, parse_since, BFS_STAT_BTIME},
-	{"-Btime", T_TEST, parse_time, BFS_STAT_BTIME},
-	{"-D", T_FLAG, parse_debug},
-	{"-E", T_FLAG, parse_regex_extended},
-	{"-H", T_FLAG, parse_follow, BFTW_FOLLOW_ROOTS, false},
-	{"-L", T_FLAG, parse_follow, BFTW_FOLLOW_ALL, false},
-	{"-O", T_FLAG, parse_optlevel, 0, 0, true},
-	{"-P", T_FLAG, parse_follow, 0, false},
-	{"-S", T_FLAG, parse_search_strategy},
-	{"-X", T_FLAG, parse_xargs_safe},
-	{"-a", T_OPERATOR},
-	{"-acl", T_TEST, parse_acl},
-	{"-amin", T_TEST, parse_min, BFS_STAT_ATIME},
-	{"-and", T_OPERATOR},
-	{"-anewer", T_TEST, parse_newer, BFS_STAT_ATIME},
-	{"-asince", T_TEST, parse_since, BFS_STAT_ATIME},
-	{"-atime", T_TEST, parse_time, BFS_STAT_ATIME},
-	{"-capable", T_TEST, parse_capable},
-	{"-cmin", T_TEST, parse_min, BFS_STAT_CTIME},
-	{"-cnewer", T_TEST, parse_newer, BFS_STAT_CTIME},
-	{"-color", T_OPTION, parse_color, true},
-	{"-context", T_TEST, parse_context, true},
-	{"-csince", T_TEST, parse_since, BFS_STAT_CTIME},
-	{"-ctime", T_TEST, parse_time, BFS_STAT_CTIME},
-	{"-d", T_FLAG, parse_depth},
-	{"-daystart", T_OPTION, parse_daystart},
-	{"-delete", T_ACTION, parse_delete},
-	{"-depth", T_OPTION, parse_depth_n},
-	{"-empty", T_TEST, parse_empty},
-	{"-exclude", T_OPERATOR},
-	{"-exec", T_ACTION, parse_exec, 0},
-	{"-execdir", T_ACTION, parse_exec, BFS_EXEC_CHDIR},
-	{"-executable", T_TEST, parse_access, X_OK},
-	{"-exit", T_ACTION, parse_exit},
-	{"-f", T_FLAG, parse_f},
-	{"-false", T_TEST, parse_const, false},
-	{"-files0-from", T_OPTION, parse_files0_from},
-	{"-flags", T_TEST, parse_flags},
-	{"-fls", T_ACTION, parse_fls},
-	{"-follow", T_OPTION, parse_follow, BFTW_FOLLOW_ALL, true},
-	{"-fprint", T_ACTION, parse_fprint},
-	{"-fprint0", T_ACTION, parse_fprint0},
-	{"-fprintf", T_ACTION, parse_fprintf},
-	{"-fstype", T_TEST, parse_fstype},
-	{"-gid", T_TEST, parse_group},
-	{"-group", T_TEST, parse_group},
-	{"-help", T_ACTION, parse_help},
-	{"-hidden", T_TEST, parse_hidden},
-	{"-ignore_readdir_race", T_OPTION, parse_ignore_races, true},
-	{"-ilname", T_TEST, parse_lname, true},
-	{"-iname", T_TEST, parse_name, true},
-	{"-inum", T_TEST, parse_inum},
-	{"-ipath", T_TEST, parse_path, true},
-	{"-iregex", T_TEST, parse_regex, BFS_REGEX_ICASE},
-	{"-iwholename", T_TEST, parse_path, true},
-	{"-j", T_FLAG, parse_jobs, 0, 0, true},
-	{"-limit", T_ACTION, parse_limit},
-	{"-links", T_TEST, parse_links},
-	{"-lname", T_TEST, parse_lname, false},
-	{"-ls", T_ACTION, parse_ls},
-	{"-maxdepth", T_OPTION, parse_depth_limit, false},
-	{"-mindepth", T_OPTION, parse_depth_limit, true},
-	{"-mmin", T_TEST, parse_min, BFS_STAT_MTIME},
-	{"-mnewer", T_TEST, parse_newer, BFS_STAT_MTIME},
-	{"-mount", T_OPTION, parse_mount},
-	{"-msince", T_TEST, parse_since, BFS_STAT_MTIME},
-	{"-mtime", T_TEST, parse_time, BFS_STAT_MTIME},
-	{"-name", T_TEST, parse_name, false},
-	{"-newer", T_TEST, parse_newer, BFS_STAT_MTIME},
-	{"-newer", T_TEST, parse_newerxy, 0, 0, true},
-	{"-nocolor", T_OPTION, parse_color, false},
-	{"-nogroup", T_TEST, parse_nogroup},
-	{"-nohidden", T_TEST, parse_nohidden},
-	{"-noignore_readdir_race", T_OPTION, parse_ignore_races, false},
-	{"-noleaf", T_OPTION, parse_noleaf},
-	{"-not", T_OPERATOR},
-	{"-nouser", T_TEST, parse_nouser},
-	{"-nowarn", T_OPTION, parse_warn, false},
-	{"-o", T_OPERATOR},
-	{"-ok", T_ACTION, parse_exec, BFS_EXEC_CONFIRM},
-	{"-okdir", T_ACTION, parse_exec, BFS_EXEC_CONFIRM | BFS_EXEC_CHDIR},
-	{"-or", T_OPERATOR},
-	{"-path", T_TEST, parse_path, false},
-	{"-perm", T_TEST, parse_perm},
-	{"-print", T_ACTION, parse_print},
-	{"-print0", T_ACTION, parse_print0},
-	{"-printf", T_ACTION, parse_printf},
-	{"-printx", T_ACTION, parse_printx},
-	{"-prune", T_ACTION, parse_prune},
-	{"-quit", T_ACTION, parse_quit},
-	{"-readable", T_TEST, parse_access, R_OK},
-	{"-regex", T_TEST, parse_regex, 0},
-	{"-regextype", T_OPTION, parse_regextype},
-	{"-rm", T_ACTION, parse_delete},
-	{"-s", T_FLAG, parse_s},
-	{"-samefile", T_TEST, parse_samefile},
-	{"-since", T_TEST, parse_since, BFS_STAT_MTIME},
-	{"-size", T_TEST, parse_size},
-	{"-sparse", T_TEST, parse_sparse},
-	{"-status", T_OPTION, parse_status},
-	{"-true", T_TEST, parse_const, true},
-	{"-type", T_TEST, parse_type, false},
-	{"-uid", T_TEST, parse_user},
-	{"-unique", T_OPTION, parse_unique},
-	{"-used", T_TEST, parse_used},
-	{"-user", T_TEST, parse_user},
-	{"-version", T_ACTION, parse_version},
-	{"-warn", T_OPTION, parse_warn, true},
-	{"-wholename", T_TEST, parse_path, false},
-	{"-writable", T_TEST, parse_access, W_OK},
-	{"-x", T_FLAG, parse_xdev},
-	{"-xattr", T_TEST, parse_xattr},
-	{"-xattrname", T_TEST, parse_xattrname},
-	{"-xdev", T_OPTION, parse_xdev},
-	{"-xtype", T_TEST, parse_type, true},
+	{"--", BFS_FLAG},
+	{"--help", BFS_ACTION, parse_help},
+	{"--version", BFS_ACTION, parse_version},
+	{"-Bmin", BFS_TEST, parse_min, BFS_STAT_BTIME},
+	{"-Bnewer", BFS_TEST, parse_newer, BFS_STAT_BTIME},
+	{"-Bsince", BFS_TEST, parse_since, BFS_STAT_BTIME},
+	{"-Btime", BFS_TEST, parse_time, BFS_STAT_BTIME},
+	{"-D", BFS_FLAG, parse_debug, .prefix = true},
+	{"-E", BFS_FLAG, parse_regex_extended},
+	{"-H", BFS_FLAG, parse_follow, BFTW_FOLLOW_ROOTS, false},
+	{"-L", BFS_FLAG, parse_follow, BFTW_FOLLOW_ALL, false},
+	{"-O", BFS_FLAG, parse_optlevel, .prefix = true},
+	{"-P", BFS_FLAG, parse_follow, 0, false},
+	{"-S", BFS_FLAG, parse_search_strategy, .prefix = true},
+	{"-X", BFS_FLAG, parse_xargs_safe},
+	{"-a", BFS_OPERATOR},
+	{"-acl", BFS_TEST, parse_acl},
+	{"-amin", BFS_TEST, parse_min, BFS_STAT_ATIME},
+	{"-and", BFS_OPERATOR},
+	{"-anewer", BFS_TEST, parse_newer, BFS_STAT_ATIME},
+	{"-asince", BFS_TEST, parse_since, BFS_STAT_ATIME},
+	{"-atime", BFS_TEST, parse_time, BFS_STAT_ATIME},
+	{"-capable", BFS_TEST, parse_capable},
+	{"-cmin", BFS_TEST, parse_min, BFS_STAT_CTIME},
+	{"-cnewer", BFS_TEST, parse_newer, BFS_STAT_CTIME},
+	{"-color", BFS_OPTION, parse_color, true},
+	{"-context", BFS_TEST, parse_context, true},
+	{"-csince", BFS_TEST, parse_since, BFS_STAT_CTIME},
+	{"-ctime", BFS_TEST, parse_time, BFS_STAT_CTIME},
+	{"-d", BFS_FLAG, parse_depth, true},
+	{"-daystart", BFS_OPTION, parse_daystart},
+	{"-delete", BFS_ACTION, parse_delete},
+	{"-depth", BFS_OPTION, parse_depth_n, false},
+	{"-empty", BFS_TEST, parse_empty},
+	{"-exclude", BFS_OPERATOR},
+	{"-exec", BFS_ACTION, parse_exec, 0},
+	{"-execdir", BFS_ACTION, parse_exec, BFS_EXEC_CHDIR},
+	{"-executable", BFS_TEST, parse_access, X_OK},
+	{"-exit", BFS_ACTION, parse_exit},
+	{"-f", BFS_FLAG, parse_f, .needs_arg = true},
+	{"-false", BFS_TEST, parse_const, false},
+	{"-files0-from", BFS_OPTION, parse_files0_from},
+	{"-flags", BFS_TEST, parse_flags},
+	{"-fls", BFS_ACTION, parse_fls},
+	{"-follow", BFS_OPTION, parse_follow, BFTW_FOLLOW_ALL, true},
+	{"-fprint", BFS_ACTION, parse_fprint},
+	{"-fprint0", BFS_ACTION, parse_fprint0},
+	{"-fprintf", BFS_ACTION, parse_fprintf},
+	{"-fstype", BFS_TEST, parse_fstype},
+	{"-gid", BFS_TEST, parse_group},
+	{"-group", BFS_TEST, parse_group},
+	{"-help", BFS_ACTION, parse_help},
+	{"-hidden", BFS_TEST, parse_hidden},
+	{"-ignore_readdir_race", BFS_OPTION, parse_ignore_races, true},
+	{"-ilname", BFS_TEST, parse_lname, true},
+	{"-iname", BFS_TEST, parse_name, true},
+	{"-inum", BFS_TEST, parse_inum},
+	{"-ipath", BFS_TEST, parse_path, true},
+	{"-iregex", BFS_TEST, parse_regex, BFS_REGEX_ICASE},
+	{"-iwholename", BFS_TEST, parse_path, true},
+	{"-j", BFS_FLAG, parse_jobs, .prefix = true},
+	{"-limit", BFS_ACTION, parse_limit},
+	{"-links", BFS_TEST, parse_links},
+	{"-lname", BFS_TEST, parse_lname, false},
+	{"-ls", BFS_ACTION, parse_ls},
+	{"-maxdepth", BFS_OPTION, parse_depth_limit, false},
+	{"-mindepth", BFS_OPTION, parse_depth_limit, true},
+	{"-mmin", BFS_TEST, parse_min, BFS_STAT_MTIME},
+	{"-mnewer", BFS_TEST, parse_newer, BFS_STAT_MTIME},
+	{"-mount", BFS_OPTION, parse_mount},
+	{"-msince", BFS_TEST, parse_since, BFS_STAT_MTIME},
+	{"-mtime", BFS_TEST, parse_time, BFS_STAT_MTIME},
+	{"-name", BFS_TEST, parse_name, false},
+	{"-newer", BFS_TEST, parse_newer, BFS_STAT_MTIME},
+	{"-newer", BFS_TEST, parse_newerxy, .prefix = true},
+	{"-nocolor", BFS_OPTION, parse_color, false},
+	{"-noerror", BFS_OPTION, parse_noerror},
+	{"-nogroup", BFS_TEST, parse_nogroup},
+	{"-nohidden", BFS_TEST, parse_nohidden},
+	{"-noignore_readdir_race", BFS_OPTION, parse_ignore_races, false},
+	{"-noleaf", BFS_OPTION, parse_noleaf},
+	{"-not", BFS_OPERATOR},
+	{"-nouser", BFS_TEST, parse_nouser},
+	{"-nowarn", BFS_OPTION, parse_warn, false},
+	{"-o", BFS_OPERATOR},
+	{"-ok", BFS_ACTION, parse_exec, BFS_EXEC_CONFIRM},
+	{"-okdir", BFS_ACTION, parse_exec, BFS_EXEC_CONFIRM | BFS_EXEC_CHDIR},
+	{"-or", BFS_OPERATOR},
+	{"-path", BFS_TEST, parse_path, false},
+	{"-perm", BFS_TEST, parse_perm},
+	{"-print", BFS_ACTION, parse_print},
+	{"-print0", BFS_ACTION, parse_print0},
+	{"-printf", BFS_ACTION, parse_printf},
+	{"-printx", BFS_ACTION, parse_printx},
+	{"-prune", BFS_ACTION, parse_prune},
+	{"-quit", BFS_ACTION, parse_quit},
+	{"-readable", BFS_TEST, parse_access, R_OK},
+	{"-regex", BFS_TEST, parse_regex, 0},
+	{"-regextype", BFS_OPTION, parse_regextype},
+	{"-rm", BFS_ACTION, parse_delete},
+	{"-s", BFS_FLAG, parse_s},
+	{"-samefile", BFS_TEST, parse_samefile},
+	{"-since", BFS_TEST, parse_since, BFS_STAT_MTIME},
+	{"-size", BFS_TEST, parse_size},
+	{"-sparse", BFS_TEST, parse_sparse},
+	{"-status", BFS_OPTION, parse_status},
+	{"-true", BFS_TEST, parse_const, true},
+	{"-type", BFS_TEST, parse_type, false},
+	{"-uid", BFS_TEST, parse_user},
+	{"-unique", BFS_OPTION, parse_unique},
+	{"-used", BFS_TEST, parse_used},
+	{"-user", BFS_TEST, parse_user},
+	{"-version", BFS_ACTION, parse_version},
+	{"-warn", BFS_OPTION, parse_warn, true},
+	{"-wholename", BFS_TEST, parse_path, false},
+	{"-writable", BFS_TEST, parse_access, W_OK},
+	{"-x", BFS_FLAG, parse_xdev},
+	{"-xattr", BFS_TEST, parse_xattr},
+	{"-xattrname", BFS_TEST, parse_xattrname},
+	{"-xdev", BFS_OPTION, parse_xdev},
+	{"-xtype", BFS_TEST, parse_type, true},
 	{0},
 };
 
@@ -3092,6 +3203,83 @@ static const struct table_entry *table_lookup(const char *arg) {
 	return NULL;
 }
 
+/** Look up a single-character flag in the parse table. */
+static const struct table_entry *flag_lookup(char flag) {
+	for (const struct table_entry *entry = parse_table; entry->arg; ++entry) {
+		enum bfs_kind kind = entry->kind;
+		if (kind == BFS_FLAG && entry->arg[1] == flag && !entry->arg[2]) {
+			return entry;
+		}
+	}
+
+	return NULL;
+}
+
+/** Check for a multi-flag argument like -LEXO2. */
+static bool is_flag_group(const char *arg) {
+	// We enforce that at least one flag in a flag group must be a capital
+	// letter, to avoid ambiguity with primary expressions
+	bool has_upper = false;
+
+	// Flags that take an argument must appear last
+	bool needs_arg = false;
+
+	for (size_t i = 1; arg[i]; ++i) {
+		char c = arg[i];
+		if (c >= 'A' && c <= 'Z') {
+			has_upper = true;
+		}
+
+		if (needs_arg) {
+			return false;
+		}
+
+		const struct table_entry *entry = flag_lookup(c);
+		if (!entry || !entry->parse) {
+			return false;
+		}
+
+		if (entry->prefix) {
+			// The rest is the flag's argument
+			break;
+		}
+
+		needs_arg |= entry->needs_arg;
+	}
+
+	return has_upper;
+}
+
+/** Parse a multi-flag argument. */
+static struct bfs_expr *parse_flag_group(struct bfs_parser *parser) {
+	struct bfs_expr *expr = NULL;
+
+	char **start = parser->argv;
+	char **end = start;
+	const char *arg = start[0];
+
+	for (size_t i = 1; arg[i]; ++i) {
+		parser->argv = start;
+
+		const struct table_entry *entry = flag_lookup(arg[i]);
+		expr = entry->parse(parser, entry->arg1, entry->arg2);
+
+		if (parser->argv > end) {
+			end = parser->argv;
+		}
+
+		if (!expr || entry->prefix) {
+			break;
+		}
+	}
+
+	if (expr) {
+		bfs_assert(parser->argv == end, "Didn't eat enough tokens");
+	}
+
+	return expr;
+}
+
 /** Search for a fuzzy match in the parse table. */
 static const struct table_entry *table_lookup_fuzzy(const char *arg) {
 	const struct table_entry *best = NULL;
@@ -3114,6 +3302,8 @@ static const struct table_entry *table_lookup_fuzzy(const char *arg) {
  *         | ACTION
  */
 static struct bfs_expr *parse_primary(struct bfs_parser *parser) {
+	struct bfs_ctx *ctx = parser->ctx;
+
 	// Paths are already skipped at this point
 	const char *arg = parser->argv[0];
 
@@ -3130,15 +3320,19 @@ static struct bfs_expr *parse_primary(struct bfs_parser *parser) {
 		}
 	}
 
+	if (is_flag_group(arg)) {
+		return parse_flag_group(parser);
+	}
+
 	match = table_lookup_fuzzy(arg);
 
-	CFILE *cerr = parser->ctx->cerr;
+	CFILE *cerr = ctx->cerr;
 	parse_error(parser, "Unknown argument; did you mean ");
-	switch (match->type) {
-	case T_FLAG:
+	switch (match->kind) {
+	case BFS_FLAG:
 		cfprintf(cerr, "${cyn}%s${rs}?", match->arg);
 		break;
-	case T_OPERATOR:
+	case BFS_OPERATOR:
 		cfprintf(cerr, "${red}%s${rs}?", match->arg);
 		break;
 	default:
@@ -3146,7 +3340,7 @@ static struct bfs_expr *parse_primary(struct bfs_parser *parser) {
 		break;
 	}
 
-	if (!parser->interactive || !match->parse) {
+	if (!ctx->interactive || !match->parse) {
 		fprintf(stderr, "\n");
 		goto unmatched;
 	}
@@ -3188,7 +3382,7 @@ static struct bfs_expr *parse_factor(struct bfs_parser *parser) {
 	}
 
 	if (strcmp(arg, "(") == 0) {
-		parser_advance(parser, T_OPERATOR, 1);
+		parser_advance(parser, BFS_OPERATOR, 1);
 
 		struct bfs_expr *expr = parse_expr(parser);
 		if (!expr) {
@@ -3205,7 +3399,7 @@ static struct bfs_expr *parse_factor(struct bfs_parser *parser) {
 			return NULL;
 		}
 
-		parser_advance(parser, T_OPERATOR, 1);
+		parser_advance(parser, BFS_OPERATOR, 1);
 		return expr;
 	} else if (strcmp(arg, "-exclude") == 0) {
 		if (parser->excluding) {
@@ -3213,7 +3407,7 @@ static struct bfs_expr *parse_factor(struct bfs_parser *parser) {
 			return NULL;
 		}
 
-		char **argv = parser_advance(parser, T_OPERATOR, 1);
+		char **argv = parser_advance(parser, BFS_OPERATOR, 1);
 		parser->excluding = true;
 
 		struct bfs_expr *factor = parse_factor(parser);
@@ -3224,9 +3418,9 @@ static struct bfs_expr *parse_factor(struct bfs_parser *parser) {
 		parser->excluding = false;
 
 		bfs_expr_append(parser->ctx->exclude, factor);
-		return parse_new_expr(parser, eval_true, parser->argv - argv, argv);
+		return parse_new_expr(parser, eval_true, parser->argv - argv, argv, BFS_OPERATOR);
 	} else if (strcmp(arg, "!") == 0 || strcmp(arg, "-not") == 0) {
-		char **argv = parser_advance(parser, T_OPERATOR, 1);
+		char **argv = parser_advance(parser, BFS_OPERATOR, 1);
 
 		struct bfs_expr *factor = parse_factor(parser);
 		if (!factor) {
@@ -3266,7 +3460,7 @@ static struct bfs_expr *parse_term(struct bfs_parser *parser) {
 
 		char **argv = &fake_and_arg;
 		if (strcmp(arg, "-a") == 0 || strcmp(arg, "-and") == 0) {
-			argv = parser_advance(parser, T_OPERATOR, 1);
+			argv = parser_advance(parser, BFS_OPERATOR, 1);
 		}
 
 		struct bfs_expr *lhs = term;
@@ -3303,7 +3497,7 @@ static struct bfs_expr *parse_clause(struct bfs_parser *parser) {
 			break;
 		}
 
-		char **argv = parser_advance(parser, T_OPERATOR, 1);
+		char **argv = parser_advance(parser, BFS_OPERATOR, 1);
 
 		struct bfs_expr *lhs = clause;
 		struct bfs_expr *rhs = parse_term(parser);
@@ -3338,7 +3532,7 @@ static struct bfs_expr *parse_expr(struct bfs_parser *parser) {
 			break;
 		}
 
-		char **argv = parser_advance(parser, T_OPERATOR, 1);
+		char **argv = parser_advance(parser, BFS_OPERATOR, 1);
 
 		struct bfs_expr *lhs = expr;
 		struct bfs_expr *rhs = parse_clause(parser);
@@ -3352,10 +3546,79 @@ static struct bfs_expr *parse_expr(struct bfs_parser *parser) {
 	return expr;
 }
 
+/** Handle -files0-from after parsing. */
+static int parse_files0_roots(struct bfs_parser *parser) {
+	const struct bfs_ctx *ctx = parser->ctx;
+	const struct bfs_expr *expr = parser->files0_expr;
+
+	if (ctx->npaths > 0) {
+		bool highlight[ctx->argc];
+		init_highlight(ctx, highlight);
+		highlight_args(ctx, expr->argv, expr->argc, highlight);
+
+		for (size_t i = 0; i < ctx->argc; ++i) {
+			if (ctx->kinds[i] == BFS_PATH) {
+				highlight[i] = true;
+			}
+		}
+
+		bfs_argv_error(ctx, highlight);
+		bfs_error(ctx, "Cannot combine %pX with explicit root paths.\n", expr);
+		return -1;
+	}
+
+	const char *from = expr->argv[1];
+
+	FILE *file;
+	if (strcmp(from, "-") == 0) {
+		if (!consume_stdin(parser, expr)) {
+			return -1;
+		}
+		file = stdin;
+	} else {
+		file = xfopen(from, O_RDONLY | O_CLOEXEC);
+	}
+	if (!file) {
+		parse_expr_error(parser, expr, "%s.\n", errstr());
+		return -1;
+	}
+
+	while (true) {
+		char *path = xgetdelim(file, '\0');
+		if (!path) {
+			if (errno) {
+				goto fail;
+			} else {
+				break;
+			}
+		}
+
+		int ret = parse_root(parser, path);
+		free(path);
+		if (ret != 0) {
+			goto fail;
+		}
+	}
+
+	if (file != stdin) {
+		fclose(file);
+	}
+
+	return 0;
+
+fail:
+	if (file != stdin) {
+		fclose(file);
+	}
+	return -1;
+}
+
 /**
  * Parse the top-level expression.
  */
 static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) {
+	struct bfs_ctx *ctx = parser->ctx;
+
 	if (skip_paths(parser) != 0) {
 		return NULL;
 	}
@@ -3364,7 +3627,7 @@ static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) {
 	if (parser->argv[0]) {
 		expr = parse_expr(parser);
 	} else {
-		expr = parse_new_expr(parser, eval_true, 1, &fake_true_arg);
+		expr = parse_new_expr(parser, eval_true, 1, &fake_true_arg, BFS_TEST);
 	}
 	if (!expr) {
 		return NULL;
@@ -3375,16 +3638,26 @@ static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) {
 		return NULL;
 	}
 
+	if (parser->files0_expr) {
+		if (parse_files0_roots(parser) != 0) {
+			return NULL;
+		}
+	} else if (ctx->npaths == 0) {
+		if (parse_root(parser, ".") != 0) {
+			return NULL;
+		}
+	}
+
 	if (parser->implicit_print) {
-		char **limit = parser->limit_arg;
+		const struct bfs_expr *limit = parser->limit_expr;
 		if (limit) {
-			parse_argv_error(parser, parser->limit_arg, 2,
-				"With ${blu}%s${rs}, you must specify an action explicitly; for example, ${blu}-print${rs} ${blu}%s${rs} ${bld}%s${rs}.\n",
-				limit[0], limit[0], limit[1]);
+			parse_expr_error(parser, limit,
+				"With %pX, you must specify an action explicitly; for example, ${blu}-print${rs} %px.\n",
+				limit, limit);
 			return NULL;
 		}
 
-		struct bfs_expr *print = parse_new_expr(parser, eval_fprint, 1, &fake_print_arg);
+		struct bfs_expr *print = parse_new_expr(parser, eval_fprint, 1, &fake_print_arg, BFS_ACTION);
 		if (!print) {
 			return NULL;
 		}
@@ -3396,20 +3669,20 @@ static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) {
 		}
 	}
 
-	if (parser->mount_arg && parser->xdev_arg) {
-		parse_conflict_warning(parser, parser->mount_arg, 1, parser->xdev_arg, 1,
-			"${blu}%s${rs} is redundant in the presence of ${blu}%s${rs}.\n\n",
-			parser->xdev_arg[0], parser->mount_arg[0]);
+	if (parser->mount_expr && parser->xdev_expr) {
+		parse_conflict_warning(parser, parser->mount_expr, parser->xdev_expr,
+			"%px is redundant in the presence of %px.\n\n",
+			parser->xdev_expr, parser->mount_expr);
 	}
 
-	if (parser->ctx->warn && parser->depth_arg && parser->prune_arg) {
-		parse_conflict_warning(parser, parser->depth_arg, 1, parser->prune_arg, 1,
-			"${blu}%s${rs} does not work in the presence of ${blu}%s${rs}.\n",
-			parser->prune_arg[0], parser->depth_arg[0]);
+	if (ctx->warn && parser->depth_expr && parser->prune_expr) {
+		parse_conflict_warning(parser, parser->depth_expr, parser->prune_expr,
+			"%px does not work in the presence of %px.\n",
+			parser->prune_expr, parser->depth_expr);
 
-		if (parser->interactive) {
-			bfs_warning(parser->ctx, "Do you want to continue? ");
-			if (ynprompt() == 0) {
+		if (ctx->interactive) {
+			bfs_warning(ctx, "Do you want to continue? ");
+			if (ynprompt() <= 0) {
 				return NULL;
 			}
 		}
@@ -3417,13 +3690,6 @@ static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) {
 		fprintf(stderr, "\n");
 	}
 
-	if (parser->ok_expr && parser->files0_stdin_arg) {
-		parse_conflict_error(parser, parser->ok_expr->argv, parser->ok_expr->argc, parser->files0_stdin_arg, 2,
-			"${blu}%s${rs} conflicts with ${blu}%s${rs} ${bld}%s${rs}.\n",
-			parser->ok_expr->argv[0], parser->files0_stdin_arg[0], parser->files0_stdin_arg[1]);
-		return NULL;
-	}
-
 	return expr;
 }
 
@@ -3458,7 +3724,7 @@ static void dump_expr_multiline(const struct bfs_ctx *ctx, enum debug_flags flag
 			++rparens;
 		} else {
 			cfprintf(ctx->cerr, "(${red}%s${rs}\n", expr->argv[0]);
-			for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+			for_expr (child, expr) {
 				int parens = child->next ? 0 : rparens + 1;
 				dump_expr_multiline(ctx, flag, child, indent + 1, parens);
 			}
@@ -3605,6 +3871,12 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) {
 		goto fail;
 	}
 
+	ctx->kinds = ZALLOC_ARRAY(enum bfs_kind, argc);
+	if (!ctx->kinds) {
+		perror("zalloc()");
+		goto fail;
+	}
+
 	enum use_color use_color = COLOR_AUTO;
 	const char *no_color = getenv("NO_COLOR");
 	if (no_color && *no_color) {
@@ -3643,6 +3915,7 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) {
 	} else {
 		ctx->warn = stdin_tty;
 	}
+	ctx->interactive = stdin_tty && stderr_tty;
 
 	struct bfs_parser parser = {
 		.ctx = ctx,
@@ -3650,23 +3923,20 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) {
 		.command = ctx->argv[0],
 		.regex_type = BFS_REGEX_POSIX_BASIC,
 		.stdout_tty = stdout_tty,
-		.interactive = stdin_tty && stderr_tty,
 		.use_color = use_color,
 		.implicit_print = true,
-		.implicit_root = true,
 		.just_info = false,
 		.excluding = false,
 		.last_arg = NULL,
-		.depth_arg = NULL,
-		.prune_arg = NULL,
-		.mount_arg = NULL,
-		.xdev_arg = NULL,
-		.files0_stdin_arg = NULL,
-		.ok_expr = NULL,
+		.depth_expr = NULL,
+		.prune_expr = NULL,
+		.mount_expr = NULL,
+		.xdev_expr = NULL,
+		.stdin_expr = NULL,
 		.now = ctx->now,
 	};
 
-	ctx->exclude = parse_new_expr(&parser, eval_or, 1, &fake_or_arg);
+	ctx->exclude = parse_new_expr(&parser, eval_or, 1, &fake_or_arg, BFS_OPERATOR);
 	if (!ctx->exclude) {
 		goto fail;
 	}
@@ -3685,14 +3955,10 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) {
 	}
 
 	if (bfs_optimize(ctx) != 0) {
-		bfs_perror(ctx, "bfs_optimize()");
-		goto fail;
-	}
-
-	if (ctx->npaths == 0 && parser.implicit_root) {
-		if (parse_root(&parser, ".") != 0) {
-			goto fail;
+		if (errno != 0) {
+			bfs_perror(ctx, "bfs_optimize()");
 		}
+		goto fail;
 	}
 
 	if ((ctx->flags & BFTW_FOLLOW_ALL) && !ctx->unique) {
diff --git a/src/parse.h b/src/parse.h
index 6895c9f..fcc8234 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -11,9 +11,9 @@
 /**
  * Parse the command line.
  *
- * @param argc
+ * @argc
  *         The number of arguments.
- * @param argv
+ * @argv
  *         The arguments to parse.
  * @return
  *         A new bfs context, or NULL on failure.
diff --git a/src/prelude.h b/src/prelude.h
index ddeacbd..de89a6c 100644
--- a/src/prelude.h
+++ b/src/prelude.h
@@ -2,384 +2,129 @@
 // SPDX-License-Identifier: 0BSD
 
 /**
- * Configuration and feature/platform detection.
+ * Praeludium.
+ *
+ * This header is automatically included in every translation unit, before any
+ * other headers, so it can set feature test macros[1][2].  This sets up our own
+ * mini-dialect of C, which includes
+ *
+ *   - Standard C17 and POSIX.1 2024 features
+ *   - Portable and platform-specific extensions
+ *   - Convenience macros like `bool`, `alignof`, etc.
+ *   - Common compiler extensions like __has_include()
+ *
+ * Further bfs-specific utilities are defined in "bfs.h".
+ *
+ * [1]: https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html
+ * [2]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html
  */
 
 #ifndef BFS_PRELUDE_H
 #define BFS_PRELUDE_H
 
-// Possible __STDC_VERSION__ values
-
-#define C95 199409L
-#define C99 199901L
-#define C11 201112L
-#define C17 201710L
-#define C23 202311L
-
-#include <stddef.h>
-
-#if __STDC_VERSION__ < C23
-#  include <stdalign.h>
-#  include <stdbool.h>
-#  include <stdnoreturn.h>
-#endif
-
-// bfs packaging configuration
-
-#include "config.h"
-
-#ifndef BFS_COMMAND
-#  define BFS_COMMAND "bfs"
-#endif
-#ifndef BFS_HOMEPAGE
-#  define BFS_HOMEPAGE "https://tavianator.com/projects/bfs.html"
-#endif
-
-// This is a symbol instead of a literal so we don't have to rebuild everything
-// when the version number changes
-extern const char bfs_version[];
-
-// Check for system headers
+// Feature test macros
 
-#ifdef __has_include
-
-#if __has_include(<mntent.h>)
-#  define BFS_HAS_MNTENT_H true
-#endif
-#if __has_include(<paths.h>)
-#  define BFS_HAS_PATHS_H true
-#endif
-#if __has_include(<sys/extattr.h>)
-#  define BFS_HAS_SYS_EXTATTR_H true
-#endif
-#if __has_include(<sys/mkdev.h>)
-#  define BFS_HAS_SYS_MKDEV_H true
-#endif
-#if __has_include(<sys/param.h>)
-#  define BFS_HAS_SYS_PARAM_H true
-#endif
-#if __has_include(<sys/sysmacros.h>)
-#  define BFS_HAS_SYS_SYSMACROS_H true
-#endif
-#if __has_include(<sys/xattr.h>)
-#  define BFS_HAS_SYS_XATTR_H true
-#endif
-#if __has_include(<threads.h>)
-#  define BFS_HAS_THREADS_H true
-#endif
-#if __has_include(<util.h>)
-#  define BFS_HAS_UTIL_H true
-#endif
-
-#else // !__has_include
-
-#define BFS_HAS_MNTENT_H __GLIBC__
-#define BFS_HAS_PATHS_H true
-#define BFS_HAS_SYS_EXTATTR_H __FreeBSD__
-#define BFS_HAS_SYS_MKDEV_H false
-#define BFS_HAS_SYS_PARAM_H true
-#define BFS_HAS_SYS_SYSMACROS_H __GLIBC__
-#define BFS_HAS_SYS_XATTR_H __linux__
-#define BFS_HAS_THREADS_H (!__STDC_NO_THREADS__)
-#define BFS_HAS_UTIL_H __NetBSD__
-
-#endif // !__has_include
-
-#ifndef BFS_USE_MNTENT_H
-#  define BFS_USE_MNTENT_H BFS_HAS_MNTENT_H
-#endif
-#ifndef BFS_USE_PATHS_H
-#  define BFS_USE_PATHS_H BFS_HAS_PATHS_H
-#endif
-#ifndef BFS_USE_SYS_EXTATTR_H
-#  define BFS_USE_SYS_EXTATTR_H BFS_HAS_SYS_EXTATTR_H
-#endif
-#ifndef BFS_USE_SYS_MKDEV_H
-#  define BFS_USE_SYS_MKDEV_H BFS_HAS_SYS_MKDEV_H
-#endif
-#ifndef BFS_USE_SYS_PARAM_H
-#  define BFS_USE_SYS_PARAM_H BFS_HAS_SYS_PARAM_H
-#endif
-#ifndef BFS_USE_SYS_SYSMACROS_H
-#  define BFS_USE_SYS_SYSMACROS_H BFS_HAS_SYS_SYSMACROS_H
-#endif
-#ifndef BFS_USE_SYS_XATTR_H
-#  define BFS_USE_SYS_XATTR_H BFS_HAS_SYS_XATTR_H
-#endif
-#ifndef BFS_USE_THREADS_H
-#  define BFS_USE_THREADS_H BFS_HAS_THREADS_H
-#endif
-#ifndef BFS_USE_UTIL_H
-#  define BFS_USE_UTIL_H BFS_HAS_UTIL_H
-#endif
+/**
+ * Linux and BSD handle _POSIX_C_SOURCE differently: on Linux, it enables POSIX
+ * interfaces that are not visible by default.  On BSD, it also *disables* most
+ * extensions, giving a strict POSIX environment.  Since we want the extensions,
+ * we don't set _POSIX_C_SOURCE.
+ */
+// #define _POSIX_C_SOURCE 202405L
 
-// Stub out feature detection on old/incompatible compilers
+/** openat() etc. */
+#define _ATFILE_SOURCE 1
 
-#ifndef __has_feature
-#  define __has_feature(feat) false
-#endif
+/** BSD-derived extensions. */
+#define _BSD_SOURCE 1
 
-#ifndef __has_c_attribute
-#  define __has_c_attribute(attr) false
-#endif
+/** glibc successor to _BSD_SOURCE. */
+#define _DEFAULT_SOURCE 1
 
-#ifndef __has_attribute
-#  define __has_attribute(attr) false
-#endif
+/** GNU extensions. */
+#define _GNU_SOURCE 1
 
-// Platform detection
+/** Use 64-bit off_t. */
+#define _FILE_OFFSET_BITS 64
 
-// Get the definition of BSD if available
-#if BFS_USE_SYS_PARAM_H
-#  include <sys/param.h>
-#endif
+/** Use 64-bit time_t. */
+#define _TIME_BITS 64
 
-#ifndef __GLIBC_PREREQ
-#  define __GLIBC_PREREQ(maj, min) false
+/** macOS extensions. */
+#if __APPLE__
+#  define _DARWIN_C_SOURCE 1
 #endif
 
-#ifndef __NetBSD_Prereq__
-#  define __NetBSD_Prereq__(maj, min, patch) false
+/** Solaris extensions. */
+#if __sun
+#  define __EXTENSIONS__ 1
+// https://illumos.org/man/3C/getpwnam#standard-conforming
+#  define _POSIX_PTHREAD_SEMANTICS 1
 #endif
 
-// Fundamental utilities
-
-/**
- * Get the length of an array.
- */
-#define countof(array) (sizeof(array) / sizeof(0[array]))
-
-/**
- * False sharing/destructive interference/largest cache line size.
- */
-#ifdef __GCC_DESTRUCTIVE_SIZE
-#  define FALSE_SHARING_SIZE __GCC_DESTRUCTIVE_SIZE
-#else
-#  define FALSE_SHARING_SIZE 64
+/** QNX extensions. */
+#if __QNX__
+#  define _QNX_SOURCE 1
 #endif
 
-/**
- * True sharing/constructive interference/smallest cache line size.
- */
-#ifdef __GCC_CONSTRUCTIVE_SIZE
-#  define TRUE_SHARING_SIZE __GCC_CONSTRUCTIVE_SIZE
-#else
-#  define TRUE_SHARING_SIZE 64
-#endif
+// Get the convenience macros that became standard spellings in C23
+#if __STDC_VERSION__ < 202311L
 
-/**
- * Polyfill max_align_t if we don't already have it.
- */
-#if !BFS_HAS_MAX_ALIGN_T
-typedef union {
-#  ifdef __BIGGEST_ALIGNMENT__
-	alignas(__BIGGEST_ALIGNMENT__) char c;
-#  else
-	long double ld;
-	long long ll;
-	void *ptr;
-#  endif
-} max_align_t;
-#endif
+/** _Static_assert() => static_assert() */
+#include <assert.h>
+/** _Alignas(), _Alignof() => alignas(), alignof() */
+#include <stdalign.h>
+/** _Bool => bool, true, false */
+#include <stdbool.h>
 
 /**
- * Alignment specifier that avoids false sharing.
+ * C23 deprecates `noreturn void` in favour of `[[noreturn]] void`, so we expose
+ * _noreturn instead with the other attributes in "bfs.h".
  */
-#define cache_align alignas(FALSE_SHARING_SIZE)
+// #include <stdnoreturn.h>
 
-// Wrappers for attributes
+/** Part of <threads.h>, but we don't use anything else from it. */
+#define thread_local _Thread_local
 
-/**
- * Silence warnings about switch/case fall-throughs.
- */
-#if __has_attribute(fallthrough)
-#  define fallthru __attribute__((fallthrough))
-#else
-#  define fallthru ((void)0)
-#endif
+#endif // !C23
 
-/**
- * Silence warnings about unused declarations.
- */
-#if __has_attribute(unused)
-#  define attr_maybe_unused __attribute__((unused))
-#else
-#  define attr_maybe_unused
-#endif
+// Feature detection
 
-/**
- * Warn if a value is unused.
- */
-#if __has_attribute(warn_unused_result)
-#  define attr_nodiscard __attribute__((warn_unused_result))
-#else
-#  define attr_nodiscard
-#endif
-
-/**
- * Hint to avoid inlining a function.
- */
-#if __has_attribute(noinline)
-#  define attr_noinline __attribute__((noinline))
-#else
-#  define attr_noinline
+// https://clang.llvm.org/docs/LanguageExtensions.html#has-attribute
+#ifndef __has_attribute
+#  define __has_attribute(attr) false
 #endif
 
-/**
- * Hint that a function is unlikely to be called.
- */
-#if __has_attribute(cold)
-#  define attr_cold attr_noinline __attribute__((cold))
-#else
-#  define attr_cold attr_noinline
+// https://clang.llvm.org/docs/LanguageExtensions.html#has-builtin
+#ifndef __has_builtin
+#  define __has_builtin(builtin) false
 #endif
 
-/**
- * Adds compiler warnings for bad printf()-style function calls, if supported.
- */
-#if __has_attribute(format)
-#  define attr_printf(fmt, args) __attribute__((format(printf, fmt, args)))
-#else
-#  define attr_printf(fmt, args)
+// https://en.cppreference.com/w/c/language/attributes#Attribute_testing
+#ifndef __has_c_attribute
+#  define __has_c_attribute(attr) false
 #endif
 
-/**
- * Annotates functions that potentially modify and return format strings.
- */
-#if __has_attribute(format_arg)
-#  define attr_format_arg(arg) __attribute__((format_arg(arg)))
-#else
-#  define attr_format_arg(args)
+// https://clang.llvm.org/docs/LanguageExtensions.html#has-feature-and-has-extension
+#ifndef __has_feature
+#  define __has_feature(feat) false
 #endif
 
-/**
- * Annotates allocator-like functions.
- */
-#if __has_attribute(malloc)
-#  if __GNUC__ >= 11 && !__OPTIMIZE__ // malloc(deallocator) disables inlining on GCC
-#    define attr_malloc(...) attr_nodiscard __attribute__((malloc(__VA_ARGS__)))
-#  else
-#    define attr_malloc(...) attr_nodiscard __attribute__((malloc))
-#  endif
-#else
-#  define attr_malloc(...) attr_nodiscard
+// https://en.cppreference.com/w/c/preprocessor/include
+#ifndef __has_include
+#  define __has_include(header) false
 #endif
 
-/**
- * Specifies that a function returns allocations with a given alignment.
- */
-#if __has_attribute(alloc_align)
-#  define attr_alloc_align(param) __attribute__((alloc_align(param)))
-#else
-#  define attr_alloc_align(param)
-#endif
+// Sanitizer macros (GCC defines these but Clang does not)
 
-/**
- * Specifies that a function returns allocations with a given size.
- */
-#if __has_attribute(alloc_size)
-#  define attr_alloc_size(...) __attribute__((alloc_size(__VA_ARGS__)))
-#else
-#  define attr_alloc_size(...)
+#if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__)
+#  define __SANITIZE_ADDRESS__ true
 #endif
-
-/**
- * Shorthand for attr_alloc_align() and attr_alloc_size().
- */
-#define attr_aligned_alloc(align, ...) \
-	attr_alloc_align(align) \
-	attr_alloc_size(__VA_ARGS__)
-
-/**
- * Check if function multiversioning via GNU indirect functions (ifunc) is supported.
- */
-#ifndef BFS_USE_TARGET_CLONES
-#  if __has_attribute(target_clones) && (__GLIBC__ || __FreeBSD__)
-#    define BFS_USE_TARGET_CLONES true
-#  endif
+#if __has_feature(memory_sanitizer) && !defined(__SANITIZE_MEMORY__)
+#  define __SANITIZE_MEMORY__ true
 #endif
-
-/**
- * Apply the target_clones attribute, if available.
- */
-#if BFS_USE_TARGET_CLONES
-#  define attr_target_clones(...) __attribute__((target_clones(__VA_ARGS__)))
-#else
-#  define attr_target_clones(...)
+#if __has_feature(thread_sanitizer) && !defined(__SANITIZE_THREAD__)
+#  define __SANITIZE_THREAD__ true
 #endif
 
-/**
- * Shorthand for multiple attributes at once. attr(a, b(c), d) is equivalent to
- *
- *     attr_a
- *     attr_b(c)
- *     attr_d
- */
-#define attr(...) \
-	attr__(attr_##__VA_ARGS__, none, none, none, none, none, none, none, none, none, )
-
-/**
- * attr() helper.  For exposition, pretend we support only 2 args, instead of 9.
- * There are a few cases:
- *
- *     attr()
- *         => attr__(attr_, none, none)
- *         => attr_                =>
- *            attr_none            =>
- *            attr_too_many_none() =>
- *
- *     attr(a)
- *         => attr__(attr_a, none, none)
- *         => attr_a               => __attribute__((a))
- *            attr_none            =>
- *            attr_too_many_none() =>
- *
- *     attr(a, b(c))
- *         => attr__(attr_a, b(c), none, none)
- *         => attr_a                   => __attribute__((a))
- *            attr_b(c)                => __attribute__((b(c)))
- *            attr_too_many_none(none) =>
- *
- *     attr(a, b(c), d)
- *         => attr__(attr_a, b(c), d, none, none)
- *         => attr_a                      => __attribute__((a))
- *            attr_b(c)                   => __attribute__((b(c)))
- *            attr_too_many_d(none, none) => error
- *
- * Some attribute names are the same as standard library functions, e.g. printf.
- * Standard libraries are permitted to define these functions as macros, like
- *
- *     #define printf(...) __builtin_printf(__VA_ARGS__)
- *
- * The token paste in
- *
- *     #define attr(...) attr__(attr_##__VA_ARGS__, none, none)
- *
- * is necessary to prevent macro expansion before evaluating attr__().
- * Otherwise, we could get
- *
- *     attr(printf(1, 2))
- *         => attr__(__builtin_printf(1, 2), none, none)
- *         => attr____builtin_printf(1, 2)
- *         => error
- */
-#define attr__(a1, a2, a3, a4, a5, a6, a7, a8, a9, none, ...) \
-	a1 \
-	attr_##a2 \
-	attr_##a3 \
-	attr_##a4 \
-	attr_##a5 \
-	attr_##a6 \
-	attr_##a7 \
-	attr_##a8 \
-	attr_##a9 \
-	attr_too_many_##none(__VA_ARGS__)
-
-// Ignore `attr_none` from expanding 1-9 argument attr(a1, a2, ...)
-#define attr_none
-// Ignore `attr_` from expanding 0-argument attr()
-#define attr_
-// Only trigger an error on more than 9 arguments
-#define attr_too_many_none(...)
-
 #endif // BFS_PRELUDE_H
diff --git a/src/printf.c b/src/printf.c
index f8428f7..30ec201 100644
--- a/src/printf.c
+++ b/src/printf.c
@@ -1,9 +1,10 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "printf.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "bftw.h"
 #include "color.h"
@@ -16,6 +17,7 @@
 #include "mtab.h"
 #include "pwcache.h"
 #include "stat.h"
+
 #include <errno.h>
 #include <grp.h>
 #include <pwd.h>
@@ -89,7 +91,7 @@ static bool should_color(CFILE *cfile, const struct bfs_fmt *fmt) {
 	(void)ret
 
 /** Return a dynamic format string. */
-attr(format_arg(2))
+_format_arg(2)
 static const char *dyn_fmt(const char *str, const char *fake) {
 	bfs_assert(strcmp(str + strlen(str) - strlen(fake) + 1, fake + 1) == 0,
 		"Mismatched format specifiers: '%s' vs. '%s'", str, fake);
@@ -97,7 +99,7 @@ static const char *dyn_fmt(const char *str, const char *fake) {
 }
 
 /** Wrapper for fprintf(). */
-attr(printf(3, 4))
+_printf(3, 4)
 static int bfs_fprintf(CFILE *cfile, const struct bfs_fmt *fmt, const char *fake, ...) {
 	va_list args;
 	va_start(args, fake);
@@ -505,30 +507,25 @@ static int bfs_printf_u(CFILE *cfile, const struct bfs_fmt *fmt, const struct BF
 }
 
 static const char *bfs_printf_type(enum bfs_type type) {
-	switch (type) {
-	case BFS_BLK:
-		return "b";
-	case BFS_CHR:
-		return "c";
-	case BFS_DIR:
-		return "d";
-	case BFS_DOOR:
-		return "D";
-	case BFS_FIFO:
-		return "p";
-	case BFS_LNK:
-		return "l";
-	case BFS_PORT:
-		return "P";
-	case BFS_REG:
-		return "f";
-	case BFS_SOCK:
-		return "s";
-	case BFS_WHT:
-		return "w";
-	default:
-		return "U";
+	const char *const names[] = {
+		[BFS_BLK] = "b",
+		[BFS_CHR] = "c",
+		[BFS_DIR] = "d",
+		[BFS_DOOR] = "D",
+		[BFS_FIFO] = "p",
+		[BFS_LNK] = "l",
+		[BFS_PORT] = "P",
+		[BFS_REG] = "f",
+		[BFS_SOCK] = "s",
+		[BFS_WHT] = "w",
+	};
+
+	const char *name = NULL;
+	if ((size_t)type < countof(names)) {
+		name = names[type];
 	}
+
+	return name ? name : "U";
 }
 
 /** %y: type */
@@ -544,7 +541,7 @@ static int bfs_printf_Y(CFILE *cfile, const struct bfs_fmt *fmt, const struct BF
 
 	int error = 0;
 	if (type == BFS_ERROR) {
-		if (errno_is_like(ELOOP)) {
+		if (errno == ELOOP) {
 			str = "L";
 		} else if (errno_is_like(ENOENT)) {
 			str = "N";
@@ -565,7 +562,7 @@ static int bfs_printf_Y(CFILE *cfile, const struct bfs_fmt *fmt, const struct BF
 }
 
 /** %Z: SELinux context */
-attr(maybe_unused)
+_maybe_unused
 static int bfs_printf_Z(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
 	char *con = bfs_getfilecon(ftwbuf);
 	if (!con) {
@@ -709,9 +706,9 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha
 				case '#':
 				case '0':
 				case '+':
-					must_be_numeric = true;
-					fallthru;
 				case ' ':
+					must_be_numeric = true;
+					_fallthrough;
 				case '-':
 					if (strchr(fmt.str, c)) {
 						bfs_expr_error(ctx, expr);
diff --git a/src/printf.h b/src/printf.h
index 2bff087..e8d862e 100644
--- a/src/printf.h
+++ b/src/printf.h
@@ -22,11 +22,11 @@ struct bfs_printf;
 /**
  * Parse a -printf format string.
  *
- * @param ctx
+ * @ctx
  *         The bfs context.
- * @param expr
+ * @expr
  *         The expression to fill in.
- * @param format
+ * @format
  *         The format string to parse.
  * @return
  *         0 on success, -1 on failure.
@@ -36,11 +36,11 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha
 /**
  * Evaluate a parsed format string.
  *
- * @param cfile
+ * @cfile
  *         The CFILE to print to.
- * @param format
+ * @format
  *         The parsed printf format.
- * @param ftwbuf
+ * @ftwbuf
  *         The bftw() data for the current file.
  * @return
  *         0 on success, -1 on failure.
diff --git a/src/pwcache.c b/src/pwcache.c
index af8c237..fa19dad 100644
--- a/src/pwcache.c
+++ b/src/pwcache.c
@@ -1,16 +1,15 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "pwcache.h"
+
 #include "alloc.h"
 #include "trie.h"
+
 #include <errno.h>
 #include <grp.h>
 #include <pwd.h>
 #include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
 
 /** Represents cache hits for negative results. */
 static void *MISSING = &MISSING;
diff --git a/src/pwcache.h b/src/pwcache.h
index b6c0b67..d7c602d 100644
--- a/src/pwcache.h
+++ b/src/pwcache.h
@@ -27,9 +27,9 @@ struct bfs_users *bfs_users_new(void);
 /**
  * Get a user entry by name.
  *
- * @param users
+ * @users
  *         The user cache.
- * @param name
+ * @name
  *         The username to look up.
  * @return
  *         The matching user, or NULL if not found (errno == 0) or an error
@@ -40,9 +40,9 @@ const struct passwd *bfs_getpwnam(struct bfs_users *users, const char *name);
 /**
  * Get a user entry by ID.
  *
- * @param users
+ * @users
  *         The user cache.
- * @param uid
+ * @uid
  *         The ID to look up.
  * @return
  *         The matching user, or NULL if not found (errno == 0) or an error
@@ -53,7 +53,7 @@ const struct passwd *bfs_getpwuid(struct bfs_users *users, uid_t uid);
 /**
  * Flush a user cache.
  *
- * @param users
+ * @users
  *         The cache to flush.
  */
 void bfs_users_flush(struct bfs_users *users);
@@ -61,7 +61,7 @@ void bfs_users_flush(struct bfs_users *users);
 /**
  * Free a user cache.
  *
- * @param users
+ * @users
  *         The user cache to free.
  */
 void bfs_users_free(struct bfs_users *users);
@@ -82,9 +82,9 @@ struct bfs_groups *bfs_groups_new(void);
 /**
  * Get a group entry by name.
  *
- * @param groups
+ * @groups
  *         The group cache.
- * @param name
+ * @name
  *         The group name to look up.
  * @return
  *         The matching group, or NULL if not found (errno == 0) or an error
@@ -95,9 +95,9 @@ const struct group *bfs_getgrnam(struct bfs_groups *groups, const char *name);
 /**
  * Get a group entry by ID.
  *
- * @param groups
+ * @groups
  *         The group cache.
- * @param uid
+ * @uid
  *         The ID to look up.
  * @return
  *         The matching group, or NULL if not found (errno == 0) or an error
@@ -108,7 +108,7 @@ const struct group *bfs_getgrgid(struct bfs_groups *groups, gid_t gid);
 /**
  * Flush a group cache.
  *
- * @param groups
+ * @groups
  *         The cache to flush.
  */
 void bfs_groups_flush(struct bfs_groups *groups);
@@ -116,7 +116,7 @@ void bfs_groups_flush(struct bfs_groups *groups);
 /**
  * Free a group cache.
  *
- * @param groups
+ * @groups
  *         The group cache to free.
  */
 void bfs_groups_free(struct bfs_groups *groups);
diff --git a/src/sanity.h b/src/sanity.h
index e168b8f..be77eef 100644
--- a/src/sanity.h
+++ b/src/sanity.h
@@ -8,21 +8,8 @@
 #ifndef BFS_SANITY_H
 #define BFS_SANITY_H
 
-#include "prelude.h"
 #include <stddef.h>
 
-#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
-#  define SANITIZE_ADDRESS true
-#endif
-
-#if __has_feature(memory_sanitizer) || defined(__SANITIZE_MEMORY__)
-#  define SANITIZE_MEMORY true
-#endif
-
-#if __has_feature(thread_sanitizer) || defined(__SANITIZE_THREAD__)
-#  define SANITIZE_THREAD true
-#endif
-
 // Call macro(ptr, size) or macro(ptr, sizeof(*ptr))
 #define SANITIZE_CALL(...) \
 	SANITIZE_CALL_(__VA_ARGS__, )
@@ -33,7 +20,7 @@
 #define SANITIZE_CALL__(macro, ptr, size, ...) \
 	macro(ptr, size)
 
-#if SANITIZE_ADDRESS
+#if __SANITIZE_ADDRESS__
 #  include <sanitizer/asan_interface.h>
 
 /**
@@ -50,12 +37,30 @@
  */
 #define sanitize_free(...) SANITIZE_CALL(__asan_poison_memory_region, __VA_ARGS__)
 
+/**
+ * Adjust the size of an allocated region, for things like dynamic arrays.
+ *
+ * @ptr
+ *         The memory region.
+ * @old
+ *         The previous usable size of the region.
+ * @new
+ *         The new usable size of the region.
+ * @cap
+ *         The total allocated capacity of the region.
+ */
+static inline void sanitize_resize(const void *ptr, size_t old, size_t new, size_t cap) {
+	const char *beg = ptr;
+	__sanitizer_annotate_contiguous_container(beg, beg + cap, beg + old, beg + new);
+}
+
 #else
-#  define sanitize_alloc sanitize_uninit
-#  define sanitize_free sanitize_uninit
+#  define sanitize_alloc(...) ((void)0)
+#  define sanitize_free(...) ((void)0)
+#  define sanitize_resize(ptr, old, new, cap) ((void)0)
 #endif
 
-#if SANITIZE_MEMORY
+#if __SANITIZE_MEMORY__
 #  include <sanitizer/msan_interface.h>
 
 /**
@@ -73,19 +78,14 @@
 #define sanitize_uninit(...) SANITIZE_CALL(__msan_allocated_memory, __VA_ARGS__)
 
 #else
-#  define sanitize_init(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
-#  define sanitize_uninit(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
+#  define sanitize_init(...) ((void)0)
+#  define sanitize_uninit(...) ((void)0)
 #endif
 
 /**
- * Squelch unused variable warnings when not sanitizing.
- */
-#define sanitize_ignore(ptr, size) ((void)(ptr), (void)(size))
-
-/**
  * Initialize a variable, unless sanitizers would detect uninitialized uses.
  */
-#if SANITIZE_MEMORY
+#if __SANITIZE_MEMORY__
 #  define uninit(value)
 #else
 #  define uninit(value) = value
diff --git a/src/sighook.c b/src/sighook.c
new file mode 100644
index 0000000..a87bed5
--- /dev/null
+++ b/src/sighook.c
@@ -0,0 +1,692 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Dynamic (un)registration of signal handlers.
+ *
+ * Because signal handlers can interrupt any thread at an arbitrary point, they
+ * must be lock-free or risk deadlock.  Therefore, we implement the global table
+ * of signal "hooks" with a simple read-copy-update (RCU) scheme.  Readers get a
+ * reference-counted pointer (struct arc) to the table in a lock-free way, and
+ * release the reference count when finished.
+ *
+ * Updates are managed by struct rcu, which has two slots: one active and one
+ * inactive.  Readers acquire a reference to the active slot.  A single writer
+ * can safely update it by initializing the inactive slot, atomically swapping
+ * the slots, and waiting for the reference count of the newly inactive slot to
+ * drop to zero.  Once it does, the old pointer can be safely freed.
+ */
+
+#include "sighook.h"
+
+#include "alloc.h"
+#include "atomic.h"
+#include "bfs.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "thread.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#if __linux__
+#  include <sys/syscall.h>
+#endif
+
+// NetBSD opens a file descriptor for each sem_init()
+#if defined(_POSIX_SEMAPHORES) && !__NetBSD__
+#  define BFS_POSIX_SEMAPHORES _POSIX_SEMAPHORES
+#else
+#  define BFS_POSIX_SEMAPHORES (-1)
+#endif
+
+#if BFS_POSIX_SEMAPHORES >= 0
+#  include <semaphore.h>
+#endif
+
+/**
+ * An atomically reference-counted pointer.
+ */
+struct arc {
+	/** The current reference count (0 means empty). */
+	atomic size_t refs;
+	/** The reference itself. */
+	void *ptr;
+
+#if BFS_POSIX_SEMAPHORES >= 0
+	/** A semaphore for arc_wait(). */
+	sem_t sem;
+	/** sem_init() result. */
+	int sem_status;
+#endif
+};
+
+/** Initialize an arc. */
+static void arc_init(struct arc *arc) {
+	bfs_verify(atomic_is_lock_free(&arc->refs));
+
+	atomic_init(&arc->refs, 0);
+	arc->ptr = NULL;
+
+#if BFS_POSIX_SEMAPHORES >= 0
+	if (sysoption(SEMAPHORES) > 0) {
+		arc->sem_status = sem_init(&arc->sem, false, 0);
+	} else {
+		arc->sem_status = -1;
+	}
+#endif
+}
+
+/** Get the current refcount. */
+static size_t arc_refs(const struct arc *arc) {
+	return load(&arc->refs, relaxed);
+}
+
+/** Set the pointer in an empty arc. */
+static void arc_set(struct arc *arc, void *ptr) {
+	bfs_assert(arc_refs(arc) == 0);
+	bfs_assert(ptr);
+
+	arc->ptr = ptr;
+	store(&arc->refs, 1, release);
+}
+
+/** Acquire a reference. */
+static void *arc_get(struct arc *arc) {
+	size_t refs = arc_refs(arc);
+	do {
+		if (refs < 1) {
+			return NULL;
+		}
+	} while (!compare_exchange_weak(&arc->refs, &refs, refs + 1, acquire, relaxed));
+
+	return arc->ptr;
+}
+
+/** Release a reference. */
+static void arc_put(struct arc *arc) {
+	size_t refs = fetch_sub(&arc->refs, 1, release);
+
+	if (refs == 1) {
+#if BFS_POSIX_SEMAPHORES >= 0
+		if (arc->sem_status == 0 && sem_post(&arc->sem) != 0) {
+			abort();
+		}
+#endif
+	}
+}
+
+/** Wait on the semaphore. */
+static int arc_sem_wait(struct arc *arc) {
+#if BFS_POSIX_SEMAPHORES >= 0
+	if (arc->sem_status == 0) {
+		while (sem_wait(&arc->sem) != 0) {
+			bfs_everify(errno == EINTR, "sem_wait()");
+		}
+		return 0;
+	}
+#endif
+
+	return -1;
+}
+
+/** Wait for all references to be released. */
+static void *arc_wait(struct arc *arc) {
+	size_t refs = fetch_sub(&arc->refs, 1, relaxed);
+	bfs_assert(refs > 0);
+
+	--refs;
+	while (refs > 0) {
+		if (arc_sem_wait(arc) == 0) {
+			bfs_assert(arc_refs(arc) == 0);
+			// sem_wait() provides enough ordering, so we can skip the fence
+			goto done;
+		}
+
+		// Some platforms (like macOS) don't support unnamed semaphores,
+		// but we can always busy-wait
+		spin_loop();
+		refs = arc_refs(arc);
+	}
+
+	thread_fence(&arc->refs, acquire);
+
+done:;
+	void *ptr = arc->ptr;
+	arc->ptr = NULL;
+	return ptr;
+}
+
+/** Destroy an arc. */
+static void arc_destroy(struct arc *arc) {
+	bfs_assert(arc_refs(arc) == 0);
+
+#if BFS_POSIX_SEMAPHORES >= 0
+	if (arc->sem_status == 0) {
+		bfs_everify(sem_destroy(&arc->sem) == 0, "sem_destroy()");
+	}
+#endif
+}
+
+/**
+ * A simple read-copy-update memory reclamation scheme.
+ */
+struct rcu {
+	/** The currently active slot. */
+	atomic size_t active;
+	/** The two slots. */
+	struct arc slots[2];
+};
+
+/** Sentinel value for RCU, since arc uses NULL already. */
+static void *RCU_NULL = &RCU_NULL;
+
+/** Map NULL -> RCU_NULL. */
+static void *rcu_encode(void *ptr) {
+	return ptr ? ptr : RCU_NULL;
+}
+
+/** Map RCU_NULL -> NULL. */
+static void *rcu_decode(void *ptr) {
+	bfs_assert(ptr != NULL);
+	return ptr == RCU_NULL ? NULL : ptr;
+}
+
+/** Initialize an RCU block. */
+static void rcu_init(struct rcu *rcu, void *ptr) {
+	bfs_verify(atomic_is_lock_free(&rcu->active));
+
+	atomic_init(&rcu->active, 0);
+	arc_init(&rcu->slots[0]);
+	arc_init(&rcu->slots[1]);
+	arc_set(&rcu->slots[0], rcu_encode(ptr));
+}
+
+/** Get the active slot. */
+static struct arc *rcu_active(struct rcu *rcu) {
+	size_t i = load(&rcu->active, relaxed);
+	return &rcu->slots[i];
+}
+
+/** Destroy an RCU block. */
+static void rcu_destroy(struct rcu *rcu) {
+	arc_wait(rcu_active(rcu));
+	arc_destroy(&rcu->slots[1]);
+	arc_destroy(&rcu->slots[0]);
+}
+
+/** Read an RCU-protected pointer. */
+static void *rcu_read(struct rcu *rcu, struct arc **slot) {
+	while (true) {
+		*slot = rcu_active(rcu);
+		void *ptr = arc_get(*slot);
+		if (ptr) {
+			return rcu_decode(ptr);
+		}
+		// Otherwise, the other slot became active; retry
+	}
+}
+
+/** Get the RCU-protected pointer without acquiring a reference. */
+static void *rcu_peek(struct rcu *rcu) {
+	struct arc *arc = rcu_active(rcu);
+	return rcu_decode(arc->ptr);
+}
+
+/** Update an RCU-protected pointer, and return the old one. */
+static void *rcu_update(struct rcu *rcu, void *ptr) {
+	size_t i = load(&rcu->active, relaxed);
+	struct arc *prev = &rcu->slots[i];
+
+	size_t j = i ^ 1;
+	struct arc *next = &rcu->slots[j];
+
+	arc_set(next, rcu_encode(ptr));
+	store(&rcu->active, j, relaxed);
+	return rcu_decode(arc_wait(prev));
+}
+
+/**
+ * An RCU-protected linked list.
+ */
+struct rcu_list {
+	/** The first node in the list. */
+	struct rcu head;
+	/** &last->next */
+	struct rcu *tail;
+};
+
+/**
+ * An rcu_list node.
+ */
+struct rcu_node {
+	/** The RCU pointer to this node. */
+	struct rcu *self;
+	/** The next node in the list. */
+	struct rcu next;
+};
+
+/** Initialize an rcu_list. */
+static void rcu_list_init(struct rcu_list *list) {
+	rcu_init(&list->head, NULL);
+	list->tail = &list->head;
+}
+
+/** Append to an rcu_list. */
+static void rcu_list_append(struct rcu_list *list, struct rcu_node *node) {
+	node->self = list->tail;
+	list->tail = &node->next;
+	rcu_init(&node->next, NULL);
+	rcu_update(node->self, node);
+}
+
+/** Remove from an rcu_list. */
+static void rcu_list_remove(struct rcu_list *list, struct rcu_node *node) {
+	struct rcu_node *next = rcu_peek(&node->next);
+	rcu_update(node->self, next);
+	if (next) {
+		next->self = node->self;
+	} else {
+		list->tail = &list->head;
+	}
+	rcu_destroy(&node->next);
+}
+
+/**
+ * Iterate over an rcu_list.
+ *
+ * It is save to `break` out of this loop, but `return` or `goto` will lead to
+ * a missed arc_put().
+ */
+#define for_rcu(type, node, list) \
+	for_rcu_(type, node, (list), node##_slot_, node##_prev_, node##_done_)
+
+#define for_rcu_(type, node, list, slot, prev, done) \
+	for (struct arc *slot, *prev, **done = NULL; !done; arc_put(slot), done = &slot) \
+		for (type *node = rcu_read(&list->head, &slot); \
+		     node; \
+		     prev = slot, \
+		     node = rcu_read(&((struct rcu_node *)node)->next, &slot), \
+		     arc_put(prev))
+
+struct sighook {
+	/** The RCU list node (must be the first field). */
+	struct rcu_node node;
+
+	/** The signal being hooked, or 0 for atsigexit(). */
+	int sig;
+	/** Signal hook flags. */
+	enum sigflags flags;
+	/** The function to call. */
+	sighook_fn *fn;
+	/** An argument to pass to the function. */
+	void *arg;
+	/** Flag for SH_ONESHOT. */
+	atomic bool armed;
+};
+
+/** The lists of signal hooks. */
+static struct rcu_list sighooks[64];
+
+/** Get the hook list for a particular signal. */
+static struct rcu_list *siglist(int sig) {
+	return &sighooks[sig % countof(sighooks)];
+}
+
+/** Mutex for initialization and RCU writer exclusion. */
+static pthread_mutex_t sigmutex = PTHREAD_MUTEX_INITIALIZER;
+
+/** Check if a signal was generated by userspace. */
+static bool is_user_generated(const siginfo_t *info) {
+	// https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_03_03
+	//
+	//     If si_code is SI_USER or SI_QUEUE, or any value less than or
+	//     equal to 0, then the signal was generated by a process ...
+	int code = info->si_code;
+	return code == SI_USER || code == SI_QUEUE || code <= 0;
+}
+
+/** Check if a signal is caused by a fault. */
+static bool is_fault(const siginfo_t *info) {
+	int sig = info->si_signo;
+	if (sig == SIGBUS || sig == SIGFPE || sig == SIGILL || sig == SIGSEGV) {
+		return !is_user_generated(info);
+	} else {
+		return false;
+	}
+}
+
+// https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/signal.h.html
+static const int FATAL_SIGNALS[] = {
+	SIGABRT,
+	SIGALRM,
+	SIGBUS,
+	SIGFPE,
+	SIGHUP,
+	SIGILL,
+	SIGINT,
+#ifdef SIGIO
+	SIGIO,
+#endif
+	SIGPIPE,
+#ifdef SIGPOLL
+	SIGPOLL,
+#endif
+#ifdef SIGPROF
+	SIGPROF,
+#endif
+#ifdef SIGPWR
+	SIGPWR,
+#endif
+	SIGQUIT,
+	SIGSEGV,
+#ifdef SIGSTKFLT
+	SIGSTKFLT,
+#endif
+#ifdef SIGSYS
+	SIGSYS,
+#endif
+	SIGTERM,
+	SIGTRAP,
+	SIGUSR1,
+	SIGUSR2,
+#ifdef SIGVTALRM
+	SIGVTALRM,
+#endif
+	SIGXCPU,
+	SIGXFSZ,
+};
+
+/** Check if a signal's default action is to terminate the process. */
+static bool is_fatal(int sig) {
+	for (size_t i = 0; i < countof(FATAL_SIGNALS); ++i) {
+		if (sig == FATAL_SIGNALS[i]) {
+			return true;
+		}
+	}
+
+#ifdef SIGRTMIN
+	// https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_03_01
+	//
+	//     The default actions for the realtime signals in the range
+	//     SIGRTMIN to SIGRTMAX shall be to terminate the process
+	//     abnormally.
+	if (sig >= SIGRTMIN && sig <= SIGRTMAX) {
+		return true;
+	}
+#endif
+
+	return false;
+}
+
+/** Reraise a fatal signal. */
+_noreturn
+static void reraise(siginfo_t *info) {
+	int sig = info->si_signo;
+
+	// Restore the default signal action
+	if (signal(sig, SIG_DFL) == SIG_ERR) {
+		goto fail;
+	}
+
+	// Unblock the signal, since we didn't set SA_NODEFER
+	sigset_t mask;
+	if (sigemptyset(&mask) != 0
+	    || sigaddset(&mask, sig) != 0
+	    || pthread_sigmask(SIG_UNBLOCK, &mask, NULL) != 0) {
+		goto fail;
+	}
+
+#if __linux__
+	// On Linux, try to re-raise the exact siginfo_t (since 3.9, a process can
+	// signal itself with any siginfo_t)
+	pid_t tid = syscall(SYS_gettid);
+	syscall(SYS_rt_tgsigqueueinfo, getpid(), tid, sig, info);
+#endif
+
+	raise(sig);
+fail:
+	abort();
+}
+
+/** Check whether we should run a hook. */
+static bool should_run(int sig, struct sighook *hook) {
+	if (hook->sig != sig && hook->sig != 0) {
+		return false;
+	}
+
+	if (hook->flags & SH_ONESHOT) {
+		if (!exchange(&hook->armed, false, relaxed)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/** Find any matching hooks and run them. */
+static enum sigflags run_hooks(struct rcu_list *list, int sig, siginfo_t *info) {
+	enum sigflags ret = 0;
+
+	for_rcu (struct sighook, hook, list) {
+		if (should_run(sig, hook)) {
+			hook->fn(sig, info, hook->arg);
+			ret |= hook->flags;
+		}
+	}
+
+	return ret;
+}
+
+/** Dispatches a signal to the registered handlers. */
+static void sigdispatch(int sig, siginfo_t *info, void *context) {
+	// If we get a fault (e.g. a "real" SIGSEGV, not something like
+	// kill(..., SIGSEGV)), don't try to run signal hooks, since we could be
+	// in an arbitrarily corrupted state.
+	//
+	// POSIX says that returning normally from a signal handler for a fault
+	// is undefined.  But in practice, it's better to uninstall the handler
+	// and return, which will re-run the faulting instruction and cause us
+	// to die "correctly" (e.g. with a core dump pointing at the faulting
+	// instruction, not reraise()).
+	if (is_fault(info)) {
+		// On macOS, we cannot reliably distinguish between faults and
+		// asynchronous signals.  For example, pkill -SEGV bfs will
+		// result in si_code == SEGV_ACCERR.  So we always re-raise the
+		// signal, because just returning would cause us to ignore
+		// asynchronous SIG{BUS,ILL,SEGV}.
+#if !__APPLE__
+		if (signal(sig, SIG_DFL) != SIG_ERR) {
+			return;
+		}
+#endif
+		reraise(info);
+	}
+
+	// https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_04
+	//
+	//     After returning from a signal-catching function, the value of
+	//     errno is unspecified if the signal-catching function or any
+	//     function it called assigned a value to errno and the signal-
+	//     catching function did not save and restore the original value of
+	//     errno.
+	int error = errno;
+
+	// Run the normal hooks
+	struct rcu_list *list = siglist(sig);
+	enum sigflags flags = run_hooks(list, sig, info);
+
+	// Run the atsigexit() hooks, if we're exiting
+	if (!(flags & SH_CONTINUE) && is_fatal(sig)) {
+		list = siglist(0);
+		run_hooks(list, sig, info);
+		reraise(info);
+	}
+
+	errno = error;
+}
+
+/** A saved signal handler, for sigreset() to restore. */
+struct sigsave {
+	struct rcu_node node;
+	int sig;
+	struct sigaction action;
+};
+
+/** The list of saved signal handlers. */
+static struct rcu_list saved;
+/** `saved` initialization status (since rcu_list_init() isn't atomic). */
+static atomic bool initialized = false;
+
+/** Make sure our signal handler is installed for a given signal. */
+static int siginit(int sig) {
+#ifdef SA_RESTART
+#  define BFS_SA_RESTART SA_RESTART
+#else
+#  define BFS_SA_RESTART 0
+#endif
+
+	static struct sigaction action = {
+		.sa_sigaction = sigdispatch,
+		.sa_flags = BFS_SA_RESTART | SA_SIGINFO,
+	};
+
+	static sigset_t signals;
+
+	if (!load(&initialized, relaxed)) {
+		if (sigemptyset(&signals) != 0
+		    || sigemptyset(&action.sa_mask) != 0) {
+			return -1;
+		}
+
+		for (size_t i = 0; i < countof(sighooks); ++i) {
+			rcu_list_init(&sighooks[i]);
+		}
+
+		rcu_list_init(&saved);
+		store(&initialized, true, release);
+	}
+
+	int installed = sigismember(&signals, sig);
+	if (installed < 0) {
+		return -1;
+	} else if (installed) {
+		return 0;
+	}
+
+	sigset_t updated = signals;
+	if (sigaddset(&updated, sig) != 0) {
+		return -1;
+	}
+
+	struct sigaction original;
+	if (sigaction(sig, NULL, &original) != 0) {
+		return -1;
+	}
+
+	struct sigsave *save = ALLOC(struct sigsave);
+	if (!save) {
+		return -1;
+	}
+
+	save->sig = sig;
+	save->action = original;
+	rcu_list_append(&saved, &save->node);
+
+	if (sigaction(sig, &action, NULL) != 0) {
+		rcu_list_remove(&saved, &save->node);
+		free(save);
+		return -1;
+	}
+
+	signals = updated;
+	return 0;
+}
+
+/** Shared sighook()/atsigexit() implementation. */
+static struct sighook *sighook_impl(int sig, sighook_fn *fn, void *arg, enum sigflags flags) {
+	struct sighook *hook = ALLOC(struct sighook);
+	if (!hook) {
+		return NULL;
+	}
+
+	hook->sig = sig;
+	hook->flags = flags;
+	hook->fn = fn;
+	hook->arg = arg;
+	atomic_init(&hook->armed, true);
+
+	struct rcu_list *list = siglist(sig);
+	rcu_list_append(list, &hook->node);
+	return hook;
+}
+
+struct sighook *sighook(int sig, sighook_fn *fn, void *arg, enum sigflags flags) {
+	bfs_assert(sig > 0);
+
+	mutex_lock(&sigmutex);
+
+	struct sighook *ret = NULL;
+	if (siginit(sig) == 0) {
+		ret = sighook_impl(sig, fn, arg, flags);
+	}
+
+	mutex_unlock(&sigmutex);
+	return ret;
+}
+
+struct sighook *atsigexit(sighook_fn *fn, void *arg) {
+	mutex_lock(&sigmutex);
+
+	for (size_t i = 0; i < countof(FATAL_SIGNALS); ++i) {
+		// Ignore errors; atsigexit() is best-effort anyway and things
+		// like sanitizer runtimes or valgrind may reserve signals for
+		// their own use
+		siginit(FATAL_SIGNALS[i]);
+	}
+
+#ifdef SIGRTMIN
+	for (int i = SIGRTMIN; i <= SIGRTMAX; ++i) {
+		siginit(i);
+	}
+#endif
+
+	struct sighook *ret = sighook_impl(0, fn, arg, 0);
+	mutex_unlock(&sigmutex);
+	return ret;
+}
+
+void sigunhook(struct sighook *hook) {
+	if (!hook) {
+		return;
+	}
+
+	mutex_lock(&sigmutex);
+
+	struct rcu_list *list = siglist(hook->sig);
+	rcu_list_remove(list, &hook->node);
+
+	mutex_unlock(&sigmutex);
+
+	free(hook);
+}
+
+int sigreset(void) {
+	if (!load(&initialized, acquire)) {
+		return 0;
+	}
+
+	int ret = 0;
+
+	for_rcu (struct sigsave, save, &saved) {
+		if (sigaction(save->sig, &save->action, NULL) != 0) {
+			ret = -1;
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/src/sighook.h b/src/sighook.h
new file mode 100644
index 0000000..7149229
--- /dev/null
+++ b/src/sighook.h
@@ -0,0 +1,83 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Signal hooks.
+ */
+
+#ifndef BFS_SIGHOOK_H
+#define BFS_SIGHOOK_H
+
+#include <signal.h>
+
+/**
+ * A dynamic signal hook.
+ */
+struct sighook;
+
+/**
+ * Signal hook flags.
+ */
+enum sigflags {
+	/** Suppress the default action for this signal. */
+	SH_CONTINUE = 1 << 0,
+	/** Only run this hook once. */
+	SH_ONESHOT = 1 << 1,
+};
+
+/**
+ * A signal hook callback.  Hooks are executed from a signal handler, so must
+ * only call async-signal-safe functions.
+ *
+ * @sig
+ *         The signal number.
+ * @info
+ *         Additional information about the signal.
+ * @arg
+ *         An arbitrary pointer passed to the hook.
+ */
+typedef void sighook_fn(int sig, siginfo_t *info, void *arg);
+
+/**
+ * Install a hook for a signal.
+ *
+ * @sig
+ *         The signal to hook.
+ * @fn
+ *         The function to call.
+ * @arg
+ *         An argument passed to the function.
+ * @flags
+ *         Flags for the new hook.
+ * @return
+ *         The installed hook, or NULL on failure.
+ */
+struct sighook *sighook(int sig, sighook_fn *fn, void *arg, enum sigflags flags);
+
+/**
+ * On a best-effort basis, invoke the given hook just before the program is
+ * abnormally terminated by a signal.
+ *
+ * @fn
+ *         The function to call.
+ * @arg
+ *         An argument passed to the function.
+ * @return
+ *         The installed hook, or NULL on failure.
+ */
+struct sighook *atsigexit(sighook_fn *fn, void *arg);
+
+/**
+ * Remove a signal hook.
+ */
+void sigunhook(struct sighook *hook);
+
+/**
+ * Restore all signal handlers to their original dispositions (e.g. after fork()).
+ *
+ * @return
+ *         0 on success, -1 on failure.
+ */
+int sigreset(void);
+
+#endif // BFS_SIGHOOK_H
diff --git a/src/stat.c b/src/stat.c
index f5cf3fe..1fcfde3 100644
--- a/src/stat.c
+++ b/src/stat.c
@@ -1,12 +1,14 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "stat.h"
+
 #include "atomic.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "diag.h"
 #include "sanity.h"
+
 #include <errno.h>
 #include <fcntl.h>
 #include <string.h>
@@ -49,9 +51,11 @@ const char *bfs_stat_field_name(enum bfs_stat_field field) {
 		return "change time";
 	case BFS_STAT_MTIME:
 		return "modification time";
+	case BFS_STAT_MNT_ID:
+		return "mount ID";
 	}
 
-	bfs_bug("Unrecognized stat field");
+	bfs_bug("Unrecognized stat field %d", (int)field);
 	return "???";
 }
 
@@ -62,7 +66,7 @@ int bfs_fstatat_flags(enum bfs_stat_flags flags) {
 		ret |= AT_SYMLINK_NOFOLLOW;
 	}
 
-#if defined(AT_NO_AUTOMOUNT) && (!__GNU__ || __GLIBC_PREREQ(2, 35))
+#ifdef AT_NO_AUTOMOUNT
 	ret |= AT_NO_AUTOMOUNT;
 #endif
 
@@ -99,6 +103,10 @@ void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src) {
 	dest->rdev = src->st_rdev;
 	dest->mask |= BFS_STAT_RDEV;
 
+	// No mount IDs in regular stat(), so use the dev_t as an approximation
+	dest->mnt_id = dest->dev;
+	dest->mask |= BFS_STAT_MNT_ID;
+
 #if BFS_HAS_ST_FLAGS
 	dest->attrs = src->st_flags;
 	dest->mask |= BFS_STAT_ATTRS;
@@ -116,6 +124,9 @@ void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src) {
 #if BFS_HAS_ST_BIRTHTIM
 	dest->btime = src->st_birthtim;
 	dest->mask |= BFS_STAT_BTIME;
+#elif BFS_HAS___ST_BIRTHTIM
+	dest->btime = src->__st_birthtim;
+	dest->mask |= BFS_STAT_BTIME;
 #elif BFS_HAS_ST_BIRTHTIMESPEC
 	dest->btime = src->st_birthtimespec;
 	dest->mask |= BFS_STAT_BTIME;
@@ -164,6 +175,17 @@ int bfs_statx_flags(enum bfs_stat_flags flags) {
 	return ret;
 }
 
+unsigned int bfs_statx_mask(void) {
+	unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+#ifdef STATX_MNT_ID
+	mask |= STATX_MNT_ID;
+#endif
+#ifdef STATX_MNT_ID_UNIQUE
+	mask |= STATX_MNT_ID_UNIQUE;
+#endif
+	return mask;
+}
+
 int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
 	// Callers shouldn't have to check anything except the times
 	const unsigned int guaranteed = STATX_BASIC_STATS & ~(STATX_ATIME | STATX_CTIME | STATX_MTIME);
@@ -204,6 +226,18 @@ int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
 	dest->attrs = src->stx_attributes;
 	dest->mask |= BFS_STAT_ATTRS;
 
+	dest->mnt_id = dest->dev;
+#ifdef STATX_MNT_ID
+	unsigned int mnt_mask = STATX_MNT_ID;
+#  ifdef STATX_MNT_ID_UNIQUE
+	mnt_mask |= STATX_MNT_ID_UNIQUE;
+#  endif
+	if (src->stx_mask & mnt_mask) {
+		dest->mnt_id = src->stx_mnt_id;
+	}
+#endif
+	dest->mask |= BFS_STAT_MNT_ID;
+
 	if (src->stx_mask & STATX_ATIME) {
 		dest->atime.tv_sec = src->stx_atime.tv_sec;
 		dest->atime.tv_nsec = src->stx_atime.tv_nsec;
@@ -235,7 +269,7 @@ int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
  * bfs_stat() implementation backed by statx().
  */
 static int bfs_statx_impl(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) {
-	unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+	unsigned int mask = bfs_statx_mask();
 	struct statx xbuf;
 	int ret = bfs_statx(at_fd, at_path, at_flags, mask, &xbuf);
 	if (ret != 0) {
@@ -297,27 +331,21 @@ int bfs_stat(int at_fd, const char *at_path, enum bfs_stat_flags flags, struct b
 		return bfs_stat_tryfollow(at_fd, at_path, at_flags, flags, buf);
 	}
 
-	// Check __GNU__ to work around https://lists.gnu.org/archive/html/bug-hurd/2021-12/msg00001.html
-#if defined(AT_EMPTY_PATH) && !__GNU__
-	static atomic bool has_at_ep = true;
-	if (load(&has_at_ep, relaxed)) {
-		at_flags |= AT_EMPTY_PATH;
-		int ret = bfs_stat_explicit(at_fd, "", at_flags, buf);
-		if (ret != 0 && errno == EINVAL) {
-			store(&has_at_ep, false, relaxed);
-		} else {
-			return ret;
-		}
-	}
-#endif
-
-	struct stat statbuf;
-	if (fstat(at_fd, &statbuf) == 0) {
-		bfs_stat_convert(buf, &statbuf);
-		return 0;
-	} else {
+#if BFS_USE_STATX
+	// If we have statx(), use it with AT_EMPTY_PATH for its extra features
+	at_flags |= AT_EMPTY_PATH;
+	return bfs_stat_explicit(at_fd, "", at_flags, buf);
+#else
+	// Otherwise, just use fstat() rather than fstatat(at_fd, ""), to save
+	// the kernel the trouble of copying in the empty string
+	struct stat sb;
+	if (fstat(at_fd, &sb) != 0) {
 		return -1;
 	}
+
+	bfs_stat_convert(buf, &sb);
+	return 0;
+#endif
 }
 
 const struct timespec *bfs_stat_time(const struct bfs_stat *buf, enum bfs_stat_field field) {
diff --git a/src/stat.h b/src/stat.h
index 8d7144d..c4a63d3 100644
--- a/src/stat.h
+++ b/src/stat.h
@@ -12,7 +12,9 @@
 #ifndef BFS_STAT_H
 #define BFS_STAT_H
 
-#include "prelude.h"
+#include "bfs.h"
+
+#include <stdint.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <time.h>
@@ -25,7 +27,7 @@
 #  define BFS_USE_STATX (BFS_HAS_STATX || BFS_HAS_STATX_SYSCALL)
 #endif
 
-#if BFS_USE_SYS_PARAM_H
+#if __has_include(<sys/param.h>)
 #  include <sys/param.h>
 #endif
 
@@ -55,6 +57,7 @@ enum bfs_stat_field {
 	BFS_STAT_BTIME  = 1 << 11,
 	BFS_STAT_CTIME  = 1 << 12,
 	BFS_STAT_MTIME  = 1 << 13,
+	BFS_STAT_MNT_ID = 1 << 14,
 };
 
 /**
@@ -101,6 +104,8 @@ struct bfs_stat {
 	blkcnt_t blocks;
 	/** The device ID represented by this file. */
 	dev_t rdev;
+	/** The ID of the mount point containing this file. */
+	uint64_t mnt_id;
 
 	/** Attributes/flags set on the file. */
 	unsigned long long attrs;
@@ -118,14 +123,14 @@ struct bfs_stat {
 /**
  * Facade over fstatat().
  *
- * @param at_fd
+ * @at_fd
  *         The base file descriptor for the lookup.
- * @param at_path
+ * @at_path
  *         The path to stat, relative to at_fd.  Pass NULL to fstat() at_fd
  *         itself.
- * @param flags
+ * @flags
  *         Flags that affect the lookup.
- * @param[out] buf
+ * @buf[out]
  *         A place to store the stat buffer, if successful.
  * @return
  *         0 on success, -1 on error.
@@ -149,6 +154,11 @@ void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src);
 int bfs_statx_flags(enum bfs_stat_flags flags);
 
 /**
+ * Get the default statx() mask.
+ */
+unsigned int bfs_statx_mask(void);
+
+/**
  * Convert struct statx to struct bfs_stat.
  */
 int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src);
diff --git a/src/thread.c b/src/thread.c
index 3793896..b3604f8 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -1,13 +1,18 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "thread.h"
+
 #include "bfstd.h"
 #include "diag.h"
+
 #include <errno.h>
 #include <pthread.h>
 
+#if __has_include(<pthread_np.h>)
+#  include <pthread_np.h>
+#endif
+
 #define THREAD_FALLIBLE(expr) \
 	do { \
 		int err = expr; \
@@ -31,6 +36,14 @@ int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn,
 	THREAD_FALLIBLE(pthread_create(thread, attr, fn, arg));
 }
 
+void thread_setname(pthread_t thread, const char *name) {
+#if BFS_HAS_PTHREAD_SETNAME_NP
+	pthread_setname_np(thread, name);
+#elif BFS_HAS_PTHREAD_SET_NAME_NP
+	pthread_set_name_np(thread, name);
+#endif
+}
+
 void thread_join(pthread_t thread, void **ret) {
 	THREAD_INFALLIBLE(pthread_join(thread, ret));
 }
diff --git a/src/thread.h b/src/thread.h
index db11bd8..3dd8422 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -8,17 +8,8 @@
 #ifndef BFS_THREAD_H
 #define BFS_THREAD_H
 
-#include "prelude.h"
 #include <pthread.h>
 
-#if __STDC_VERSION__ < C23 && !defined(thread_local)
-#  if BFS_USE_THREADS_H
-#    include <threads.h>
-#  else
-#    define thread_local _Thread_local
-#  endif
-#endif
-
 /** Thread entry point type. */
 typedef void *thread_fn(void *arg);
 
@@ -31,6 +22,11 @@ typedef void *thread_fn(void *arg);
 int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn, void *arg);
 
 /**
+ * Set the name of a thread.
+ */
+void thread_setname(pthread_t thread, const char *name);
+
+/**
  * Wrapper for pthread_join().
  */
 void thread_join(pthread_t thread, void **ret);
diff --git a/src/trie.c b/src/trie.c
index 808953e..6aac17f 100644
--- a/src/trie.c
+++ b/src/trie.c
@@ -81,21 +81,23 @@
  * and insert intermediate singleton "jump" nodes when necessary.
  */
 
-#include "prelude.h"
 #include "trie.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bit.h"
 #include "diag.h"
 #include "list.h"
+
 #include <stdint.h>
 #include <string.h>
 
-bfs_static_assert(CHAR_WIDTH == 8);
+static_assert(CHAR_WIDTH == 8, "This trie implementation assumes 8-bit bytes.");
 
 #if __i386__ || __x86_64__
-#  define trie_clones attr(target_clones("popcnt", "default"))
+#  define _trie_clones _target_clones("popcnt", "default")
 #else
-#  define trie_clones
+#  define _trie_clones
 #endif
 
 /** Number of bits for the sparse array bitmap, aka the range of a nibble. */
@@ -127,37 +129,37 @@ struct trie_node {
 	 * tag to distinguish internal nodes from leaves.  This is safe as long
 	 * as all dynamic allocations are aligned to more than a single byte.
 	 */
-	uintptr_t children[];
+	uintptr_t children[]; // _counted_by(count_ones(bitmap))
 };
 
-/** Check if an encoded pointer is to a leaf. */
-static bool trie_is_leaf(uintptr_t ptr) {
+/** Check if an encoded pointer is to an internal node. */
+static bool trie_is_node(uintptr_t ptr) {
 	return ptr & 1;
 }
 
-/** Decode a pointer to a leaf. */
-static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) {
-	bfs_assert(trie_is_leaf(ptr));
-	return (struct trie_leaf *)(ptr ^ 1);
+/** Decode a pointer to an internal node. */
+static struct trie_node *trie_decode_node(uintptr_t ptr) {
+	bfs_assert(trie_is_node(ptr));
+	return (struct trie_node *)(ptr - 1);
 }
 
-/** Encode a pointer to a leaf. */
-static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) {
-	uintptr_t ptr = (uintptr_t)leaf ^ 1;
-	bfs_assert(trie_is_leaf(ptr));
+/** Encode a pointer to an internal node. */
+static uintptr_t trie_encode_node(const struct trie_node *node) {
+	uintptr_t ptr = (uintptr_t)node + 1;
+	bfs_assert(trie_is_node(ptr));
 	return ptr;
 }
 
-/** Decode a pointer to an internal node. */
-static struct trie_node *trie_decode_node(uintptr_t ptr) {
-	bfs_assert(!trie_is_leaf(ptr));
-	return (struct trie_node *)ptr;
+/** Decode a pointer to a leaf. */
+static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) {
+	bfs_assert(!trie_is_node(ptr));
+	return (struct trie_leaf *)ptr;
 }
 
-/** Encode a pointer to an internal node. */
-static uintptr_t trie_encode_node(const struct trie_node *node) {
-	uintptr_t ptr = (uintptr_t)node;
-	bfs_assert(!trie_is_leaf(ptr));
+/** Encode a pointer to a leaf. */
+static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) {
+	uintptr_t ptr = (uintptr_t)leaf;
+	bfs_assert(!trie_is_node(ptr));
 	return ptr;
 }
 
@@ -169,20 +171,32 @@ void trie_init(struct trie *trie) {
 }
 
 /** Extract the nibble at a certain offset from a byte sequence. */
-static unsigned char trie_key_nibble(const void *key, size_t offset) {
+static unsigned char trie_key_nibble(const void *key, size_t length, size_t offset) {
 	const unsigned char *bytes = key;
-	size_t byte = offset >> 1;
+	size_t byte = offset / 2;
+	bfs_assert(byte < length);
 
 	// A branchless version of
 	// if (offset & 1) {
-	//         return bytes[byte] >> 4;
-	// } else {
 	//         return bytes[byte] & 0xF;
+	// } else {
+	//         return bytes[byte] >> 4;
 	// }
-	unsigned int shift = (offset & 1) << 2;
+	unsigned int shift = 4 * ((offset + 1) % 2);
 	return (bytes[byte] >> shift) & 0xF;
 }
 
+/** Extract the nibble at a certain offset from a leaf. */
+static unsigned char trie_leaf_nibble(const struct trie_leaf *leaf, size_t offset) {
+	return trie_key_nibble(leaf->key, leaf->length, offset);
+}
+
+/** Get the number of children of an internal node. */
+_trie_clones
+static unsigned int trie_node_size(const struct trie_node *node) {
+	return count_ones((unsigned int)node->bitmap);
+}
+
 /**
  * Finds a leaf in the trie that matches the key at every branch.  If the key
  * exists in the trie, the representative will match the searched key.  But
@@ -190,26 +204,24 @@ static unsigned char trie_key_nibble(const void *key, size_t offset) {
  * that case, the first mismatch between the key and the representative will be
  * the depth at which to make a new branch to insert the key.
  */
-trie_clones
+_trie_clones
 static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) {
 	uintptr_t ptr = trie->root;
-	if (!ptr) {
-		return NULL;
-	}
 
-	size_t offset = 0;
-	while (!trie_is_leaf(ptr)) {
+	size_t offset = 0, limit = 2 * length;
+	while (trie_is_node(ptr)) {
 		struct trie_node *node = trie_decode_node(ptr);
 		offset += node->offset;
 
 		unsigned int index = 0;
-		if ((offset >> 1) < length) {
-			unsigned char nibble = trie_key_nibble(key, offset);
+		if (offset < limit) {
+			unsigned char nibble = trie_key_nibble(key, length, offset);
 			unsigned int bit = 1U << nibble;
-			// bits = bitmap & bit ? bitmap & (bit - 1) : 0
-			unsigned int mask = -!!(node->bitmap & bit);
-			unsigned int bits = node->bitmap & (bit - 1) & mask;
-			index = count_ones(bits);
+			unsigned int map = node->bitmap;
+			unsigned int bits = map & (bit - 1);
+			unsigned int mask = -!!(map & bit);
+			// index = (map & bit) ? count_ones(bits) : 0;
+			index = count_ones(bits) & mask;
 		}
 		ptr = node->children[index];
 	}
@@ -221,7 +233,8 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) {
 	return trie_find_mem(trie, key, strlen(key) + 1);
 }
 
-struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) {
+_trie_clones
+static struct trie_leaf *trie_find_mem_impl(const struct trie *trie, const void *key, size_t length) {
 	struct trie_leaf *rep = trie_representative(trie, key, length);
 	if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) {
 		return rep;
@@ -230,7 +243,22 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t
 	}
 }
 
-struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) {
+struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) {
+	return trie_find_mem_impl(trie, key, length);
+}
+
+void *trie_get_str(const struct trie *trie, const char *key) {
+	const struct trie_leaf *leaf = trie_find_str(trie, key);
+	return leaf ? leaf->value : NULL;
+}
+
+void *trie_get_mem(const struct trie *trie, const void *key, size_t length) {
+	const struct trie_leaf *leaf = trie_find_mem(trie, key, length);
+	return leaf ? leaf->value : NULL;
+}
+
+_trie_clones
+static struct trie_leaf *trie_find_postfix_impl(const struct trie *trie, const char *key) {
 	size_t length = strlen(key);
 	struct trie_leaf *rep = trie_representative(trie, key, length + 1);
 	if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) {
@@ -240,6 +268,10 @@ struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) {
 	}
 }
 
+struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) {
+	return trie_find_postfix_impl(trie, key);
+}
+
 /**
  * Find a leaf that may end at the current node.
  */
@@ -251,10 +283,10 @@ static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) {
 		}
 
 		uintptr_t ptr = node->children[0];
-		if (trie_is_leaf(ptr)) {
-			return trie_decode_leaf(ptr);
-		} else {
+		if (trie_is_node(ptr)) {
 			node = trie_decode_node(ptr);
+		} else {
+			return trie_decode_leaf(ptr);
 		}
 	}
 
@@ -270,7 +302,7 @@ static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *k
 	}
 }
 
-trie_clones
+_trie_clones
 static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const char *key) {
 	uintptr_t ptr = trie->root;
 	if (!ptr) {
@@ -281,21 +313,21 @@ static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const ch
 	size_t skip = 0;
 	size_t length = strlen(key) + 1;
 
-	size_t offset = 0;
-	while (!trie_is_leaf(ptr)) {
+	size_t offset = 0, limit = 2 * length;
+	while (trie_is_node(ptr)) {
 		struct trie_node *node = trie_decode_node(ptr);
 		offset += node->offset;
-		if ((offset >> 1) >= length) {
+		if (offset >= limit) {
 			return best;
 		}
 
 		struct trie_leaf *leaf = trie_terminal_leaf(node);
 		if (trie_check_prefix(leaf, skip, key, length)) {
 			best = leaf;
-			skip = offset >> 1;
+			skip = offset / 2;
 		}
 
-		unsigned char nibble = trie_key_nibble(key, offset);
+		unsigned char nibble = trie_key_nibble(key, length, offset);
 		unsigned int bit = 1U << nibble;
 		if (node->bitmap & bit) {
 			unsigned int index = count_ones(node->bitmap & (bit - 1));
@@ -355,16 +387,10 @@ static struct trie_node *trie_node_realloc(struct trie *trie, struct trie_node *
 
 /** Free a node. */
 static void trie_node_free(struct trie *trie, struct trie_node *node, size_t size) {
-	bfs_assert(size == (size_t)count_ones(node->bitmap));
+	bfs_assert(size == trie_node_size(node));
 	varena_free(&trie->nodes, node, size);
 }
 
-#if ENDIAN_NATIVE == ENDIAN_LITTLE
-#  define TRIE_BSWAP(n) (n)
-#elif ENDIAN_NATIVE == ENDIAN_BIG
-#  define TRIE_BSWAP(n) bswap(n)
-#endif
-
 /** Find the offset of the first nibble that differs between two keys. */
 static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t length) {
 	if (!rep) {
@@ -378,32 +404,34 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t
 	const char *rep_bytes = rep->key;
 	const char *key_bytes = key;
 
-	size_t i = 0;
-	for (size_t chunk = sizeof(chunk); i + chunk <= length; i += chunk) {
-		size_t rep_chunk, key_chunk;
-		memcpy(&rep_chunk, rep_bytes + i, sizeof(rep_chunk));
-		memcpy(&key_chunk, key_bytes + i, sizeof(key_chunk));
-
-		if (rep_chunk != key_chunk) {
-#ifdef TRIE_BSWAP
-			size_t diff = TRIE_BSWAP(rep_chunk ^ key_chunk);
-			i *= 2;
-			i += trailing_zeros(diff) / 4;
-			return i;
+	size_t ret = 0, i = 0;
+
+#define CHUNK(n) CHUNK_(uint##n##_t, load8_beu##n)
+#define CHUNK_(type, load8) \
+	(length - i >= sizeof(type)) { \
+		type rep_chunk = load8(rep_bytes + i); \
+		type key_chunk = load8(key_bytes + i); \
+		type diff = rep_chunk ^ key_chunk; \
+		ret += leading_zeros(diff) / 4; \
+		if (diff) { \
+			return ret; \
+		} \
+		i += sizeof(type); \
+	}
+
+#if SIZE_WIDTH >= 64
+	while CHUNK(64);
+	if CHUNK(32);
 #else
-			break;
+	while CHUNK(32);
 #endif
-		}
-	}
+	if CHUNK(16);
+	if CHUNK(8);
 
-	for (; i < length; ++i) {
-		unsigned char diff = rep_bytes[i] ^ key_bytes[i];
-		if (diff) {
-			return 2 * i + !(diff & 0xF);
-		}
-	}
+#undef CHUNK_
+#undef CHUNK
 
-	return 2 * i;
+	return ret;
 }
 
 /**
@@ -428,10 +456,10 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t
  *      | Z
  *      +--->...
  */
-trie_clones
+_trie_clones
 static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) {
 	struct trie_node *node = trie_decode_node(*ptr);
-	unsigned int size = count_ones(node->bitmap);
+	unsigned int size = trie_node_size(node);
 
 	// Double the capacity every power of two
 	if (has_single_bit(size)) {
@@ -482,10 +510,10 @@ static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, str
  *           | Y
  *           +--->key
  */
-static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, size_t *offset) {
+static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, size_t *offset) {
 	// We only ever need to jump to leaf nodes, since internal nodes are
 	// guaranteed to be within OFFSET_MAX anyway
-	bfs_assert(trie_is_leaf(*ptr));
+	struct trie_leaf *leaf = trie_decode_leaf(*ptr);
 
 	struct trie_node *node = trie_node_alloc(trie, 1);
 	if (!node) {
@@ -495,7 +523,7 @@ static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key,
 	*offset += OFFSET_MAX;
 	node->offset = OFFSET_MAX;
 
-	unsigned char nibble = trie_key_nibble(key, *offset);
+	unsigned char nibble = trie_leaf_nibble(leaf, *offset);
 	node->bitmap = 1 << nibble;
 
 	node->children[0] = *ptr;
@@ -521,8 +549,8 @@ static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key,
  *      +--->leaf
  */
 static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, struct trie_leaf *rep, size_t offset, size_t mismatch) {
-	unsigned char key_nibble = trie_key_nibble(leaf->key, mismatch);
-	unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch);
+	unsigned char key_nibble = trie_leaf_nibble(leaf, mismatch);
+	unsigned char rep_nibble = trie_leaf_nibble(rep, mismatch);
 	bfs_assert(key_nibble != rep_nibble);
 
 	struct trie_node *node = trie_node_alloc(trie, 2);
@@ -534,7 +562,7 @@ static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct tr
 	node->bitmap = (1 << key_nibble) | (1 << rep_nibble);
 
 	size_t delta = mismatch - offset;
-	if (!trie_is_leaf(*ptr)) {
+	if (trie_is_node(*ptr)) {
 		struct trie_node *child = trie_decode_node(*ptr);
 		child->offset -= delta;
 	}
@@ -551,12 +579,18 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) {
 	return trie_insert_mem(trie, key, strlen(key) + 1);
 }
 
-trie_clones
+_trie_clones
 static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key, size_t length) {
 	struct trie_leaf *rep = trie_representative(trie, key, length);
 	size_t mismatch = trie_mismatch(rep, key, length);
-	if (mismatch >= (length << 1)) {
+	size_t misbyte = mismatch / 2;
+	if (misbyte >= length) {
+		bfs_assert(misbyte == length);
 		return rep;
+	} else if (rep && misbyte >= rep->length) {
+		bfs_bug("trie keys must be prefix-free");
+		errno = EINVAL;
+		return NULL;
 	}
 
 	struct trie_leaf *leaf = trie_leaf_alloc(trie, key, length);
@@ -571,14 +605,14 @@ static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key
 
 	size_t offset = 0;
 	uintptr_t *ptr = &trie->root;
-	while (!trie_is_leaf(*ptr)) {
+	while (trie_is_node(*ptr)) {
 		struct trie_node *node = trie_decode_node(*ptr);
 		if (offset + node->offset > mismatch) {
 			break;
 		}
 		offset += node->offset;
 
-		unsigned char nibble = trie_key_nibble(key, offset);
+		unsigned char nibble = trie_leaf_nibble(leaf, offset);
 		unsigned int bit = 1U << nibble;
 		if (node->bitmap & bit) {
 			bfs_assert(offset < mismatch);
@@ -591,7 +625,7 @@ static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key
 	}
 
 	while (mismatch - offset > OFFSET_MAX) {
-		ptr = trie_jump(trie, ptr, key, &offset);
+		ptr = trie_jump(trie, ptr, &offset);
 		if (!ptr) {
 			trie_leaf_free(trie, leaf);
 			return NULL;
@@ -605,13 +639,33 @@ struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t len
 	return trie_insert_mem_impl(trie, key, length);
 }
 
+int trie_set_str(struct trie *trie, const char *key, const void *value) {
+	struct trie_leaf *leaf = trie_insert_str(trie, key);
+	if (leaf) {
+		leaf->value = (void *)value;
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
+int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value) {
+	struct trie_leaf *leaf = trie_insert_mem(trie, key, length);
+	if (leaf) {
+		leaf->value = (void *)value;
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
 /** Free a chain of singleton nodes. */
 static void trie_free_singletons(struct trie *trie, uintptr_t ptr) {
-	while (!trie_is_leaf(ptr)) {
+	while (trie_is_node(ptr)) {
 		struct trie_node *node = trie_decode_node(ptr);
 
 		// Make sure the bitmap is a power of two, i.e. it has just one child
-		bfs_assert(has_single_bit(node->bitmap));
+		bfs_assert(has_single_bit((size_t)node->bitmap));
 
 		ptr = node->children[0];
 		trie_node_free(trie, node, 1);
@@ -639,7 +693,7 @@ static void trie_free_singletons(struct trie *trie, uintptr_t ptr) {
  */
 static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) {
 	uintptr_t other = parent_node->children[child_index ^ 1];
-	if (!trie_is_leaf(other)) {
+	if (trie_is_node(other)) {
 		struct trie_node *other_node = trie_decode_node(other);
 		if (other_node->offset + parent_node->offset <= OFFSET_MAX) {
 			other_node->offset += parent_node->offset;
@@ -649,22 +703,21 @@ static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_
 	}
 
 	*parent = other;
-	trie_node_free(trie, parent_node, 1);
+	trie_node_free(trie, parent_node, 2);
 	return 0;
 }
 
-trie_clones
+_trie_clones
 static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) {
 	uintptr_t *child = &trie->root;
 	uintptr_t *parent = NULL;
 	unsigned int child_bit = 0, child_index = 0;
 	size_t offset = 0;
-	while (!trie_is_leaf(*child)) {
+	while (trie_is_node(*child)) {
 		struct trie_node *node = trie_decode_node(*child);
 		offset += node->offset;
-		bfs_assert((offset >> 1) < leaf->length);
 
-		unsigned char nibble = trie_key_nibble(leaf->key, offset);
+		unsigned char nibble = trie_leaf_nibble(leaf, offset);
 		unsigned int bit = 1U << nibble;
 		unsigned int bitmap = node->bitmap;
 		bfs_assert(bitmap & bit);
@@ -689,19 +742,19 @@ static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) {
 	}
 
 	struct trie_node *node = trie_decode_node(*parent);
-	child = node->children + child_index;
-	trie_free_singletons(trie, *child);
+	trie_free_singletons(trie, node->children[child_index]);
 
-	node->bitmap ^= child_bit;
-	unsigned int parent_size = count_ones(node->bitmap);
-	bfs_assert(parent_size > 0);
-	if (parent_size == 1 && trie_collapse_node(trie, parent, node, child_index) == 0) {
+	unsigned int parent_size = trie_node_size(node);
+	bfs_assert(parent_size > 1);
+	if (parent_size == 2 && trie_collapse_node(trie, parent, node, child_index) == 0) {
 		return;
 	}
 
-	if (child_index < parent_size) {
-		memmove(child, child + 1, (parent_size - child_index) * sizeof(*child));
+	for (size_t i = child_index; i + 1 < parent_size; ++i) {
+		node->children[i] = node->children[i + 1];
 	}
+	node->bitmap &= ~child_bit;
+	--parent_size;
 
 	if (has_single_bit(parent_size)) {
 		node = trie_node_realloc(trie, node, 2 * parent_size, parent_size);
diff --git a/src/trie.h b/src/trie.h
index 4288d76..19bd81d 100644
--- a/src/trie.h
+++ b/src/trie.h
@@ -6,6 +6,7 @@
 
 #include "alloc.h"
 #include "list.h"
+
 #include <stddef.h>
 #include <stdint.h>
 
@@ -20,7 +21,7 @@ struct trie_leaf {
 	/** The length of the key in bytes. */
 	size_t length;
 	/** The key itself, stored inline. */
-	char key[];
+	char key[] _counted_by(length);
 };
 
 /**
@@ -45,9 +46,9 @@ void trie_init(struct trie *trie);
 /**
  * Find the leaf for a string key.
  *
- * @param trie
+ * @trie
  *         The trie to search.
- * @param key
+ * @key
  *         The key to look up.
  * @return
  *         The found leaf, or NULL if the key is not present.
@@ -57,11 +58,11 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key);
 /**
  * Find the leaf for a fixed-size key.
  *
- * @param trie
+ * @trie
  *         The trie to search.
- * @param key
+ * @key
  *         The key to look up.
- * @param length
+ * @length
  *         The length of the key in bytes.
  * @return
  *         The found leaf, or NULL if the key is not present.
@@ -69,11 +70,37 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key);
 struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length);
 
 /**
+ * Get the value associated with a string key.
+ *
+ * @trie
+ *         The trie to search.
+ * @key
+ *         The key to look up.
+ * @return
+ *         The found value, or NULL if the key is not present.
+ */
+void *trie_get_str(const struct trie *trie, const char *key);
+
+/**
+ * Get the value associated with a fixed-size key.
+ *
+ * @trie
+ *         The trie to search.
+ * @key
+ *         The key to look up.
+ * @length
+ *         The length of the key in bytes.
+ * @return
+ *         The found value, or NULL if the key is not present.
+ */
+void *trie_get_mem(const struct trie *trie, const void *key, size_t length);
+
+/**
  * Find the shortest leaf that starts with a given key.
  *
- * @param trie
+ * @trie
  *         The trie to search.
- * @param key
+ * @key
  *         The key to look up.
  * @return
  *         A leaf that starts with the given key, or NULL.
@@ -83,9 +110,9 @@ struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key);
 /**
  * Find the leaf that is the longest prefix of the given key.
  *
- * @param trie
+ * @trie
  *         The trie to search.
- * @param key
+ * @key
  *         The key to look up.
  * @return
  *         The longest prefix match for the given key, or NULL.
@@ -95,9 +122,9 @@ struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key);
 /**
  * Insert a string key into the trie.
  *
- * @param trie
+ * @trie
  *         The trie to modify.
- * @param key
+ * @key
  *         The key to insert.
  * @return
  *         The inserted leaf, or NULL on failure.
@@ -107,11 +134,11 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key);
 /**
  * Insert a fixed-size key into the trie.
  *
- * @param trie
+ * @trie
  *         The trie to modify.
- * @param key
+ * @key
  *         The key to insert.
- * @param length
+ * @length
  *         The length of the key in bytes.
  * @return
  *         The inserted leaf, or NULL on failure.
@@ -119,11 +146,41 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key);
 struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length);
 
 /**
+ * Set the value for a string key.
+ *
+ * @trie
+ *         The trie to modify.
+ * @key
+ *         The key to insert.
+ * @value
+ *         The value to set.
+ * @return
+ *         0 on success, -1 on error.
+ */
+int trie_set_str(struct trie *trie, const char *key, const void *value);
+
+/**
+ * Set the value for a fixed-size key.
+ *
+ * @trie
+ *         The trie to modify.
+ * @key
+ *         The key to insert.
+ * @length
+ *         The length of the key in bytes.
+ * @value
+ *         The value to set.
+ * @return
+ *         0 on success, -1 on error.
+ */
+int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value);
+
+/**
  * Remove a leaf from a trie.
  *
- * @param trie
+ * @trie
  *         The trie to modify.
- * @param leaf
+ * @leaf
  *         The leaf to remove.
  */
 void trie_remove(struct trie *trie, struct trie_leaf *leaf);
diff --git a/src/typo.c b/src/typo.c
index b1c5c44..7b359c4 100644
--- a/src/typo.c
+++ b/src/typo.c
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: 0BSD
 
 #include "typo.h"
+
 #include <limits.h>
 #include <stdint.h>
 #include <stdlib.h>
diff --git a/src/typo.h b/src/typo.h
index 13eaa67..b0daaf1 100644
--- a/src/typo.h
+++ b/src/typo.h
@@ -7,9 +7,9 @@
 /**
  * Find the "typo" distance between two strings.
  *
- * @param actual
+ * @actual
  *         The actual string typed by the user.
- * @param expected
+ * @expected
  *         The expected valid string.
  * @return The distance between the two strings.
  */
diff --git a/src/version.c b/src/version.c
new file mode 100644
index 0000000..7479a9f
--- /dev/null
+++ b/src/version.c
@@ -0,0 +1,32 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "bfs.h"
+
+const char bfs_version[] = {
+#include "version.i"
+};
+
+const char bfs_confflags[] = {
+#include "confflags.i"
+};
+
+const char bfs_cc[] = {
+#include "cc.i"
+};
+
+const char bfs_cppflags[] = {
+#include "cppflags.i"
+};
+
+const char bfs_cflags[] = {
+#include "cflags.i"
+};
+
+const char bfs_ldflags[] = {
+#include "ldflags.i"
+};
+
+const char bfs_ldlibs[] = {
+#include "ldlibs.i"
+};
diff --git a/src/xregex.c b/src/xregex.c
index c2711bc..796544e 100644
--- a/src/xregex.c
+++ b/src/xregex.c
@@ -1,19 +1,21 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "xregex.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "diag.h"
 #include "sanity.h"
 #include "thread.h"
+
 #include <errno.h>
 #include <pthread.h>
 #include <stdlib.h>
 #include <string.h>
 
-#if BFS_USE_ONIGURUMA
+#if BFS_WITH_ONIGURUMA
 #  include <langinfo.h>
 #  include <oniguruma.h>
 #else
@@ -21,7 +23,7 @@
 #endif
 
 struct bfs_regex {
-#if BFS_USE_ONIGURUMA
+#if BFS_WITH_ONIGURUMA
 	unsigned char *pattern;
 	OnigRegex impl;
 	int err;
@@ -32,11 +34,17 @@ struct bfs_regex {
 #endif
 };
 
-#if BFS_USE_ONIGURUMA
+#if BFS_WITH_ONIGURUMA
 
 static int bfs_onig_status;
 static OnigEncoding bfs_onig_enc;
 
+static OnigSyntaxType bfs_onig_syntax_awk;
+static OnigSyntaxType bfs_onig_syntax_gnu_awk;
+static OnigSyntaxType bfs_onig_syntax_emacs;
+static OnigSyntaxType bfs_onig_syntax_egrep;
+static OnigSyntaxType bfs_onig_syntax_gnu_find;
+
 /** pthread_once() callback. */
 static void bfs_onig_once(void) {
 	// Fall back to ASCII by default
@@ -103,6 +111,35 @@ static void bfs_onig_once(void) {
 	if (bfs_onig_status != ONIG_NORMAL) {
 		bfs_onig_enc = NULL;
 	}
+
+	// Compute the GNU extensions
+	OnigSyntaxType *ere = ONIG_SYNTAX_POSIX_EXTENDED;
+	OnigSyntaxType *gnu = ONIG_SYNTAX_GNU_REGEX;
+	unsigned int gnu_op = gnu->op & ~ere->op;
+	unsigned int gnu_op2 = gnu->op2 & ~ere->op2;
+	unsigned int gnu_behavior = gnu->behavior & ~ere->behavior;
+
+	onig_copy_syntax(&bfs_onig_syntax_awk, ONIG_SYNTAX_POSIX_EXTENDED);
+	bfs_onig_syntax_awk.behavior |= ONIG_SYN_ALLOW_INVALID_INTERVAL;
+	bfs_onig_syntax_awk.behavior |= ONIG_SYN_BACKSLASH_ESCAPE_IN_CC;
+
+	onig_copy_syntax(&bfs_onig_syntax_gnu_awk, &bfs_onig_syntax_awk);
+	bfs_onig_syntax_gnu_awk.op |= gnu_op;
+	bfs_onig_syntax_gnu_awk.op2 |= gnu_op2;
+	bfs_onig_syntax_gnu_awk.behavior |= gnu_behavior;
+	bfs_onig_syntax_gnu_awk.behavior &= ~ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS;
+	bfs_onig_syntax_gnu_awk.behavior &= ~ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS;
+
+	// https://github.com/kkos/oniguruma/issues/296
+	onig_copy_syntax(&bfs_onig_syntax_emacs, ONIG_SYNTAX_EMACS);
+	bfs_onig_syntax_emacs.op2 |= ONIG_SYN_OP2_QMARK_GROUP_EFFECT;
+
+	onig_copy_syntax(&bfs_onig_syntax_egrep, ONIG_SYNTAX_POSIX_EXTENDED);
+	bfs_onig_syntax_egrep.behavior |= ONIG_SYN_ALLOW_INVALID_INTERVAL;
+	bfs_onig_syntax_egrep.behavior &= ~ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS;
+
+	onig_copy_syntax(&bfs_onig_syntax_gnu_find, &bfs_onig_syntax_emacs);
+	bfs_onig_syntax_gnu_find.options |= ONIG_OPTION_MULTILINE;
 }
 
 /** Initialize Oniguruma. */
@@ -121,7 +158,7 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ
 		return -1;
 	}
 
-#if BFS_USE_ONIGURUMA
+#if BFS_WITH_ONIGURUMA
 	// onig_error_code_to_str() says
 	//
 	//     don't call this after the pattern argument of onig_new() is freed
@@ -143,12 +180,24 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ
 	case BFS_REGEX_POSIX_EXTENDED:
 		syntax = ONIG_SYNTAX_POSIX_EXTENDED;
 		break;
+	case BFS_REGEX_AWK:
+		syntax = &bfs_onig_syntax_awk;
+		break;
+	case BFS_REGEX_GNU_AWK:
+		syntax = &bfs_onig_syntax_gnu_awk;
+		break;
 	case BFS_REGEX_EMACS:
-		syntax = ONIG_SYNTAX_EMACS;
+		syntax = &bfs_onig_syntax_emacs;
 		break;
 	case BFS_REGEX_GREP:
 		syntax = ONIG_SYNTAX_GREP;
 		break;
+	case BFS_REGEX_EGREP:
+		syntax = &bfs_onig_syntax_egrep;
+		break;
+	case BFS_REGEX_GNU_FIND:
+		syntax = &bfs_onig_syntax_gnu_find;
+		break;
 	}
 	bfs_assert(syntax, "Invalid regex type");
 
@@ -204,7 +253,7 @@ fail:
 int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) {
 	size_t len = strlen(str);
 
-#if BFS_USE_ONIGURUMA
+#if BFS_WITH_ONIGURUMA
 	const unsigned char *ustr = (const unsigned char *)str;
 	const unsigned char *end = ustr + len;
 
@@ -263,7 +312,7 @@ int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags
 
 void bfs_regfree(struct bfs_regex *regex) {
 	if (regex) {
-#if BFS_USE_ONIGURUMA
+#if BFS_WITH_ONIGURUMA
 		onig_free(regex->impl);
 		free(regex->pattern);
 #else
@@ -278,7 +327,7 @@ char *bfs_regerror(const struct bfs_regex *regex) {
 		return strdup(xstrerror(ENOMEM));
 	}
 
-#if BFS_USE_ONIGURUMA
+#if BFS_WITH_ONIGURUMA
 	unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN);
 	if (str) {
 		onig_error_code_to_str(str, regex->err, &regex->einfo);
diff --git a/src/xregex.h b/src/xregex.h
index 998a2b0..c4504ee 100644
--- a/src/xregex.h
+++ b/src/xregex.h
@@ -15,8 +15,12 @@ struct bfs_regex;
 enum bfs_regex_type {
 	BFS_REGEX_POSIX_BASIC,
 	BFS_REGEX_POSIX_EXTENDED,
+	BFS_REGEX_AWK,
+	BFS_REGEX_GNU_AWK,
 	BFS_REGEX_EMACS,
 	BFS_REGEX_GREP,
+	BFS_REGEX_EGREP,
+	BFS_REGEX_GNU_FIND,
 };
 
 /**
@@ -38,13 +42,13 @@ enum bfs_regexec_flags {
 /**
  * Wrapper for regcomp() that supports additional regex types.
  *
- * @param[out] preg
+ * @preg[out]
  *         Will hold the compiled regex.
- * @param pattern
+ * @pattern
  *         The regular expression to compile.
- * @param type
+ * @type
  *         The regular expression syntax to use.
- * @param flags
+ * @flags
  *         Regex compilation flags.
  * @return
  *         0 on success, -1 on failure.
@@ -54,11 +58,11 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ
 /**
  * Wrapper for regexec().
  *
- * @param regex
+ * @regex
  *         The regular expression to execute.
- * @param str
+ * @str
  *         The string to match against.
- * @param flags
+ * @flags
  *         Regex execution flags.
  * @return
  *         1 for a match, 0 for no match, -1 on failure.
@@ -73,7 +77,7 @@ void bfs_regfree(struct bfs_regex *regex);
 /**
  * Get a human-readable regex error message.
  *
- * @param regex
+ * @regex
  *         The compiled regex.
  * @return
  *         A human-readable description of the error, which should be free()'d.
diff --git a/src/xspawn.c b/src/xspawn.c
index 0b0cea4..ee62c05 100644
--- a/src/xspawn.c
+++ b/src/xspawn.c
@@ -1,24 +1,29 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "xspawn.h"
+
 #include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
+#include "diag.h"
 #include "list.h"
+#include "sighook.h"
+
 #include <errno.h>
 #include <fcntl.h>
+#include <signal.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <unistd.h>
 
-#if BFS_USE_PATHS_H
+#if __has_include(<paths.h>)
 #  include <paths.h>
 #endif
 
-#if _POSIX_SPAWN > 0
+#if BFS_POSIX_SPAWN >= 0
 #  include <spawn.h>
 #endif
 
@@ -68,29 +73,42 @@ int bfs_spawn_init(struct bfs_spawn *ctx) {
 	ctx->flags = 0;
 	SLIST_INIT(ctx);
 
-#if _POSIX_SPAWN > 0
-	ctx->flags |= BFS_SPAWN_USE_POSIX;
+#if BFS_POSIX_SPAWN >= 0
+	if (sysoption(SPAWN) > 0) {
+		ctx->flags |= BFS_SPAWN_USE_POSIX;
 
-	errno = posix_spawn_file_actions_init(&ctx->actions);
-	if (errno != 0) {
-		return -1;
-	}
+		errno = posix_spawn_file_actions_init(&ctx->actions);
+		if (errno != 0) {
+			return -1;
+		}
 
-	errno = posix_spawnattr_init(&ctx->attr);
-	if (errno != 0) {
-		posix_spawn_file_actions_destroy(&ctx->actions);
-		return -1;
+		errno = posix_spawnattr_init(&ctx->attr);
+		if (errno != 0) {
+			posix_spawn_file_actions_destroy(&ctx->actions);
+			return -1;
+		}
 	}
 #endif
 
 	return 0;
 }
 
-int bfs_spawn_destroy(struct bfs_spawn *ctx) {
-#if _POSIX_SPAWN > 0
-	posix_spawnattr_destroy(&ctx->attr);
-	posix_spawn_file_actions_destroy(&ctx->actions);
+/**
+ * Clear the BFS_SPAWN_USE_POSIX flag and free the attributes.
+ */
+static void bfs_spawn_clear_posix(struct bfs_spawn *ctx) {
+	if (ctx->flags & BFS_SPAWN_USE_POSIX) {
+		ctx->flags &= ~BFS_SPAWN_USE_POSIX;
+
+#if BFS_POSIX_SPAWN >= 0
+		posix_spawnattr_destroy(&ctx->attr);
+		posix_spawn_file_actions_destroy(&ctx->actions);
 #endif
+	}
+}
+
+int bfs_spawn_destroy(struct bfs_spawn *ctx) {
+	bfs_spawn_clear_posix(ctx);
 
 	for_slist (struct bfs_spawn_action, action, ctx) {
 		free(action);
@@ -99,9 +117,9 @@ int bfs_spawn_destroy(struct bfs_spawn *ctx) {
 	return 0;
 }
 
-#if _POSIX_SPAWN > 0
+#if BFS_POSIX_SPAWN >= 0
 /** Set some posix_spawnattr flags. */
-attr(maybe_unused)
+_maybe_unused
 static int bfs_spawn_addflags(struct bfs_spawn *ctx, short flags) {
 	short prev;
 	errno = posix_spawnattr_getflags(&ctx->attr, &prev);
@@ -119,7 +137,7 @@ static int bfs_spawn_addflags(struct bfs_spawn *ctx, short flags) {
 
 	return 0;
 }
-#endif // _POSIX_SPAWN > 0
+#endif
 
 /** Allocate a spawn action. */
 static struct bfs_spawn_action *bfs_spawn_action(enum bfs_spawn_op op) {
@@ -141,7 +159,7 @@ int bfs_spawn_addopen(struct bfs_spawn *ctx, int fd, const char *path, int flags
 		return -1;
 	}
 
-#if _POSIX_SPAWN > 0
+#if BFS_POSIX_SPAWN >= 0
 	if (ctx->flags & BFS_SPAWN_USE_POSIX) {
 		errno = posix_spawn_file_actions_addopen(&ctx->actions, fd, path, flags, mode);
 		if (errno != 0) {
@@ -165,7 +183,7 @@ int bfs_spawn_addclose(struct bfs_spawn *ctx, int fd) {
 		return -1;
 	}
 
-#if _POSIX_SPAWN > 0
+#if BFS_POSIX_SPAWN >= 0
 	if (ctx->flags & BFS_SPAWN_USE_POSIX) {
 		errno = posix_spawn_file_actions_addclose(&ctx->actions, fd);
 		if (errno != 0) {
@@ -186,7 +204,7 @@ int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd) {
 		return -1;
 	}
 
-#if _POSIX_SPAWN > 0
+#if BFS_POSIX_SPAWN >= 0
 	if (ctx->flags & BFS_SPAWN_USE_POSIX) {
 		errno = posix_spawn_file_actions_adddup2(&ctx->actions, oldfd, newfd);
 		if (errno != 0) {
@@ -214,19 +232,35 @@ int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd) {
  */
 #define BFS_POSIX_SPAWNP_AFTER_FCHDIR !(__APPLE__ || __NetBSD__)
 
+/**
+ * NetBSD even resolves the executable before file actions with posix_spawn()!
+ */
+#define BFS_POSIX_SPAWN_AFTER_FCHDIR !__NetBSD__
+
 int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd) {
 	struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_FCHDIR);
 	if (!action) {
 		return -1;
 	}
 
+#if __APPLE__
+	// macOS has a bug that causes EBADF when an fchdir() action refers to a
+	// file opened by the file actions
+	for_slist (struct bfs_spawn_action, prev, ctx) {
+		if (fd == prev->out_fd) {
+			bfs_spawn_clear_posix(ctx);
+			break;
+		}
+	}
+#endif
+
 #if BFS_HAS_POSIX_SPAWN_ADDFCHDIR
 #  define BFS_POSIX_SPAWN_ADDFCHDIR posix_spawn_file_actions_addfchdir
 #elif BFS_HAS_POSIX_SPAWN_ADDFCHDIR_NP
 #  define BFS_POSIX_SPAWN_ADDFCHDIR posix_spawn_file_actions_addfchdir_np
 #endif
 
-#if _POSIX_SPAWN > 0 && defined(BFS_POSIX_SPAWN_FCHDIR)
+#if BFS_POSIX_SPAWN >= 0 && defined(BFS_POSIX_SPAWN_ADDFCHDIR)
 	if (ctx->flags & BFS_SPAWN_USE_POSIX) {
 		errno = BFS_POSIX_SPAWN_ADDFCHDIR(&ctx->actions, fd);
 		if (errno != 0) {
@@ -235,7 +269,7 @@ int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd) {
 		}
 	}
 #else
-	ctx->flags &= ~BFS_SPAWN_USE_POSIX;
+	bfs_spawn_clear_posix(ctx);
 #endif
 
 	action->in_fd = fd;
@@ -259,7 +293,7 @@ int bfs_spawn_setrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit
 		goto fail;
 	}
 #else
-	ctx->flags &= ~BFS_SPAWN_USE_POSIX;
+	bfs_spawn_clear_posix(ctx);
 #endif
 
 	action->resource = resource;
@@ -383,18 +417,40 @@ static bool bfs_resolve_relative(const struct bfs_resolver *res) {
 	return false;
 }
 
+/** Check if the actions include fchdir(). */
+static bool bfs_spawn_will_chdir(const struct bfs_spawn *ctx) {
+	if (ctx) {
+		for_slist (const struct bfs_spawn_action, action, ctx) {
+			if (action->op == BFS_SPAWN_FCHDIR) {
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/** Check if we can call xfaccessat() before file actions. */
+static bool bfs_can_access_early(const struct bfs_resolver *res, const struct bfs_spawn *ctx) {
+	if (res->exe[0] == '/') {
+		return true;
+	}
+
+	if (bfs_spawn_will_chdir(ctx)) {
+		return false;
+	}
+
+	return true;
+}
+
 /** Check if we can resolve the executable before file actions. */
 static bool bfs_can_resolve_early(const struct bfs_resolver *res, const struct bfs_spawn *ctx) {
 	if (!bfs_resolve_relative(res)) {
 		return true;
 	}
 
-	if (ctx) {
-		for_slist (const struct bfs_spawn_action, action, ctx) {
-			if (action->op == BFS_SPAWN_FCHDIR) {
-				return false;
-			}
-		}
+	if (bfs_spawn_will_chdir(ctx)) {
+		return false;
 	}
 
 	return true;
@@ -424,6 +480,17 @@ static int bfs_resolve_early(struct bfs_resolver *res, const char *exe, const st
 	};
 
 	if (bfs_can_skip_resolve(res, ctx)) {
+		if (bfs_can_access_early(res, ctx)) {
+			// Do this check eagerly, even though posix_spawn()/execv() also
+			// would, because:
+			//
+			//     - faccessat() is faster than fork()/clone() + execv()
+			//     - posix_spawn() is not guaranteed to report ENOENT
+			if (xfaccessat(AT_FDCWD, exe, X_OK) != 0) {
+				return -1;
+			}
+		}
+
 		res->done = true;
 		return 0;
 	}
@@ -471,7 +538,7 @@ fail:
 	return -1;
 }
 
-#if _POSIX_SPAWN > 0
+#if BFS_POSIX_SPAWN >= 0
 
 /** bfs_spawn() implementation using posix_spawn(). */
 static pid_t bfs_posix_spawn(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) {
@@ -502,13 +569,20 @@ static bool bfs_use_posix_spawn(const struct bfs_resolver *res, const struct bfs
 	}
 #endif
 
+#if !BFS_POSIX_SPAWN_AFTER_FCHDIR
+	if (res->exe[0] != '/' && bfs_spawn_will_chdir(ctx)) {
+		return false;
+	}
+#endif
+
 	return true;
 }
 
-#endif // _POSIX_SPAWN > 0
+#endif // BFS_POSIX_SPAWN >= 0
 
 /** Actually exec() the new process. */
-static noreturn void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp, int pipefd[2]) {
+_noreturn
+static void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp, const sigset_t *mask, int pipefd[2]) {
 	xclose(pipefd[0]);
 
 	for_slist (const struct bfs_spawn_action, action, ctx) {
@@ -569,6 +643,18 @@ static noreturn void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_s
 		goto fail;
 	}
 
+	// Reset signal handlers to their original values before we unblock
+	// signals, so that handlers don't run in both the parent and the child
+	if (sigreset() != 0) {
+		goto fail;
+	}
+
+	// Restore the original signal mask for the child process
+	errno = pthread_sigmask(SIG_SETMASK, mask, NULL);
+	if (errno != 0) {
+		goto fail;
+	}
+
 	execve(res->exe, argv, envp);
 
 fail:;
@@ -590,35 +676,58 @@ static pid_t bfs_fork_spawn(struct bfs_resolver *res, const struct bfs_spawn *ct
 		return -1;
 	}
 
+	// Block signals before fork() so handlers don't run in the child
+	sigset_t new_mask;
+	if (sigfillset(&new_mask) != 0) {
+		goto fail;
+	}
+	sigset_t old_mask;
+	errno = pthread_sigmask(SIG_BLOCK, &new_mask, &old_mask);
+	if (errno != 0) {
+		goto fail;
+	}
+
+#if BFS_HAS__FORK
+	pid_t pid = _Fork();
+#else
 	pid_t pid = fork();
-	if (pid < 0) {
-		close_quietly(pipefd[1]);
-		close_quietly(pipefd[0]);
-		return -1;
-	} else if (pid == 0) {
+#endif
+	if (pid == 0) {
 		// Child
-		bfs_spawn_exec(res, ctx, argv, envp, pipefd);
+		bfs_spawn_exec(res, ctx, argv, envp, &old_mask, pipefd);
+	}
+
+	// Restore the original signal mask
+	errno = pthread_sigmask(SIG_SETMASK, &old_mask, NULL);
+	bfs_everify(errno == 0, "pthread_sigmask()");
+
+	if (pid < 0) {
+		// fork() failed
+		goto fail;
 	}
 
-	// Parent
 	xclose(pipefd[1]);
 
 	int error;
 	ssize_t nbytes = xread(pipefd[0], &error, sizeof(error));
 	xclose(pipefd[0]);
 	if (nbytes == sizeof(error)) {
-		int wstatus;
-		xwaitpid(pid, &wstatus, 0);
+		xwaitpid(pid, NULL, 0);
 		errno = error;
 		return -1;
 	}
 
 	return pid;
+
+fail:
+	close_quietly(pipefd[1]);
+	close_quietly(pipefd[0]);
+	return -1;
 }
 
 /** Call the right bfs_spawn() implementation. */
 static pid_t bfs_spawn_impl(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) {
-#if _POSIX_SPAWN > 0
+#if BFS_POSIX_SPAWN >= 0
 	if (bfs_use_posix_spawn(res, ctx)) {
 		return bfs_posix_spawn(res, ctx, argv, envp);
 	}
diff --git a/src/xspawn.h b/src/xspawn.h
index 6a8f54a..3c74ccd 100644
--- a/src/xspawn.h
+++ b/src/xspawn.h
@@ -8,12 +8,17 @@
 #ifndef BFS_XSPAWN_H
 #define BFS_XSPAWN_H
 
-#include "prelude.h"
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <unistd.h>
 
-#if _POSIX_SPAWN > 0
+#ifdef _POSIX_SPAWN
+#  define BFS_POSIX_SPAWN _POSIX_SPAWN
+#else
+#  define BFS_POSIX_SPAWN (-1)
+#endif
+
+#if BFS_POSIX_SPAWN >= 0
 #  include <spawn.h>
 #endif
 
@@ -38,7 +43,7 @@ struct bfs_spawn {
 	struct bfs_spawn_action *head;
 	struct bfs_spawn_action **tail;
 
-#if _POSIX_SPAWN > 0
+#if BFS_POSIX_SPAWN >= 0
 	/** posix_spawn() context, for when we can use it. */
 	posix_spawn_file_actions_t actions;
 	posix_spawnattr_t attr;
@@ -104,13 +109,13 @@ int bfs_spawn_setrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit
 /**
  * Spawn a new process.
  *
- * @param exe
+ * @exe
  *         The executable to run.
- * @param ctx
+ * @ctx
  *         The context for the new process.
- * @param argv
+ * @argv
  *         The arguments for the new process.
- * @param envp
+ * @envp
  *         The environment variables for the new process (NULL for the current
  *         environment).
  * @return
@@ -122,7 +127,7 @@ pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char
  * Look up an executable in the current PATH, as BFS_SPAWN_USE_PATH or execvp()
  * would do.
  *
- * @param exe
+ * @exe
  *         The name of the binary to execute.  Bare names without a '/' will be
  *         searched on the provided PATH.
  * @return
diff --git a/src/xtime.c b/src/xtime.c
index 91ed915..6b8a141 100644
--- a/src/xtime.c
+++ b/src/xtime.c
@@ -1,10 +1,14 @@
 // Copyright © Tavian Barnes <tavianator@tavianator.com>
 // SPDX-License-Identifier: 0BSD
 
-#include "prelude.h"
 #include "xtime.h"
+
+#include "alloc.h"
+#include "bfs.h"
 #include "bfstd.h"
 #include "diag.h"
+#include "sanity.h"
+
 #include <errno.h>
 #include <limits.h>
 #include <sys/time.h>
@@ -12,14 +16,14 @@
 #include <unistd.h>
 
 int xmktime(struct tm *tm, time_t *timep) {
-	*timep = mktime(tm);
+	time_t time = mktime(tm);
 
-	if (*timep == -1) {
+	if (time == -1) {
 		int error = errno;
 
 		struct tm tmp;
-		if (!localtime_r(timep, &tmp)) {
-			bfs_bug("localtime_r(-1): %s", xstrerror(errno));
+		if (!localtime_r(&time, &tmp)) {
+			bfs_ebug("localtime_r(-1)");
 			return -1;
 		}
 
@@ -30,9 +34,38 @@ int xmktime(struct tm *tm, time_t *timep) {
 		}
 	}
 
+	*timep = time;
+	return 0;
+}
+
+// FreeBSD is missing an interceptor
+#if BFS_HAS_TIMEGM && !(__FreeBSD__ && __SANITIZE_MEMORY__)
+
+int xtimegm(struct tm *tm, time_t *timep) {
+	time_t time = timegm(tm);
+
+	if (time == -1) {
+		int error = errno;
+
+		struct tm tmp;
+		if (!gmtime_r(&time, &tmp)) {
+			bfs_ebug("gmtime_r(-1)");
+			return -1;
+		}
+
+		if (tm->tm_year != tmp.tm_year || tm->tm_yday != tmp.tm_yday
+		    || tm->tm_hour != tmp.tm_hour || tm->tm_min != tmp.tm_min || tm->tm_sec != tmp.tm_sec) {
+			errno = error;
+			return -1;
+		}
+	}
+
+	*timep = time;
 	return 0;
 }
 
+#else
+
 static int safe_add(int *value, int delta) {
 	if (*value >= 0) {
 		if (delta > INT_MAX - *value) {
@@ -147,6 +180,8 @@ overflow:
 	return -1;
 }
 
+#endif // !BFS_HAS_TIMEGM
+
 /** Parse a decimal digit. */
 static int xgetdigit(char c) {
 	int ret = c - '0';
@@ -174,6 +209,23 @@ static int xgetpart(const char **str, size_t n, int *result) {
 }
 
 int xgetdate(const char *str, struct timespec *result) {
+	// Handle @epochseconds
+	if (str[0] == '@') {
+		long long value;
+		if (xstrtoll(str + 1, NULL, 10, &value) != 0) {
+			goto error;
+		}
+
+		time_t time = (time_t)value;
+		if ((long long)time != value) {
+			errno = ERANGE;
+			goto error;
+		}
+
+		result->tv_sec = time;
+		goto done;
+	}
+
 	struct tm tm = {
 		.tm_isdst = -1,
 	};
@@ -292,6 +344,7 @@ end:
 		}
 	}
 
+done:
 	result->tv_nsec = 0;
 	return 0;
 
@@ -301,16 +354,150 @@ error:
 	return -1;
 }
 
-int xgettime(struct timespec *result) {
-#if _POSIX_TIMERS > 0
-	return clock_gettime(CLOCK_REALTIME, result);
+/** One nanosecond. */
+static const long NS = 1000L * 1000 * 1000;
+
+void timespec_add(struct timespec *lhs, const struct timespec *rhs) {
+	lhs->tv_sec += rhs->tv_sec;
+	lhs->tv_nsec += rhs->tv_nsec;
+	if (lhs->tv_nsec >= NS) {
+		lhs->tv_nsec -= NS;
+		lhs->tv_sec += 1;
+	}
+}
+
+void timespec_sub(struct timespec *lhs, const struct timespec *rhs) {
+	lhs->tv_sec -= rhs->tv_sec;
+	lhs->tv_nsec -= rhs->tv_nsec;
+	if (lhs->tv_nsec < 0) {
+		lhs->tv_nsec += NS;
+		lhs->tv_sec -= 1;
+	}
+}
+
+int timespec_cmp(const struct timespec *lhs, const struct timespec *rhs) {
+	if (lhs->tv_sec < rhs->tv_sec) {
+		return -1;
+	} else if (lhs->tv_sec > rhs->tv_sec) {
+		return 1;
+	}
+
+	if (lhs->tv_nsec < rhs->tv_nsec) {
+		return -1;
+	} else if (lhs->tv_nsec > rhs->tv_nsec) {
+		return 1;
+	}
+
+	return 0;
+}
+
+void timespec_min(struct timespec *dest, const struct timespec *src) {
+	if (timespec_cmp(src, dest) < 0) {
+		*dest = *src;
+	}
+}
+
+void timespec_max(struct timespec *dest, const struct timespec *src) {
+	if (timespec_cmp(src, dest) > 0) {
+		*dest = *src;
+	}
+}
+
+double timespec_ns(const struct timespec *ts) {
+	return 1.0e9 * ts->tv_sec + ts->tv_nsec;
+}
+
+#if defined(_POSIX_TIMERS) && BFS_HAS_TIMER_CREATE
+#  define BFS_POSIX_TIMERS _POSIX_TIMERS
 #else
-	struct timeval tv;
-	int ret = gettimeofday(&tv, NULL);
-	if (ret == 0) {
-		result->tv_sec = tv.tv_sec;
-		result->tv_nsec = tv.tv_usec * 1000L;
+#  define BFS_POSIX_TIMERS (-1)
+#endif
+
+struct timer {
+#if BFS_POSIX_TIMERS >= 0
+	/** The POSIX timer. */
+	timer_t timer;
+#endif
+	/** Whether to use timer_create() or setitimer(). */
+	bool legacy;
+};
+
+struct timer *xtimer_start(const struct timespec *interval) {
+	struct timer *timer = ALLOC(struct timer);
+	if (!timer) {
+		return NULL;
 	}
-	return ret;
+
+#if BFS_POSIX_TIMERS >= 0
+	if (sysoption(TIMERS)) {
+		clockid_t clock = CLOCK_REALTIME;
+
+#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0
+		if (sysoption(MONOTONIC_CLOCK) > 0) {
+			clock = CLOCK_MONOTONIC;
+		}
+#endif
+
+		if (timer_create(clock, NULL, &timer->timer) != 0) {
+			goto fail;
+		}
+
+		// https://github.com/llvm/llvm-project/issues/111847
+		sanitize_init(&timer->timer);
+
+		struct itimerspec spec = {
+			.it_value = *interval,
+			.it_interval = *interval,
+		};
+		if (timer_settime(timer->timer, 0, &spec, NULL) != 0) {
+			timer_delete(timer->timer);
+			goto fail;
+		}
+
+		timer->legacy = false;
+		return timer;
+	}
+#endif
+
+#if BFS_POSIX_TIMERS <= 0
+	struct timeval tv = {
+		.tv_sec = interval->tv_sec,
+		.tv_usec = (interval->tv_nsec + 999) / 1000,
+	};
+	struct itimerval ival = {
+		.it_value = tv,
+		.it_interval = tv,
+	};
+	if (setitimer(ITIMER_REAL, &ival, NULL) != 0) {
+		goto fail;
+	}
+
+	timer->legacy = true;
+	return timer;
 #endif
+
+fail:
+	free(timer);
+	return NULL;
+}
+
+void xtimer_stop(struct timer *timer) {
+	if (!timer) {
+		return;
+	}
+
+	if (timer->legacy) {
+#if BFS_POSIX_TIMERS <= 0
+		struct itimerval ival = {0};
+		int ret = setitimer(ITIMER_REAL, &ival, NULL);
+		bfs_everify(ret == 0, "setitimer()");
+#endif
+	} else {
+#if BFS_POSIX_TIMERS >= 0
+		int ret = timer_delete(timer->timer);
+		bfs_everify(ret == 0, "timer_delete()");
+#endif
+	}
+
+	free(timer);
 }
diff --git a/src/xtime.h b/src/xtime.h
index fb60ae4..b76fef2 100644
--- a/src/xtime.h
+++ b/src/xtime.h
@@ -13,9 +13,9 @@
 /**
  * mktime() wrapper that reports errors more reliably.
  *
- * @param[in,out] tm
- *         The struct tm to convert.
- * @param[out] timep
+ * @tm[in,out]
+ *         The struct tm to convert and normalize.
+ * @timep[out]
  *         A pointer to the result.
  * @return
  *         0 on success, -1 on failure.
@@ -25,9 +25,9 @@ int xmktime(struct tm *tm, time_t *timep);
 /**
  * A portable timegm(), the inverse of gmtime().
  *
- * @param[in,out] tm
- *         The struct tm to convert.
- * @param[out] timep
+ * @tm[in,out]
+ *         The struct tm to convert and normalize.
+ * @timep[out]
  *         A pointer to the result.
  * @return
  *         0 on success, -1 on failure.
@@ -37,9 +37,9 @@ int xtimegm(struct tm *tm, time_t *timep);
 /**
  * Parse an ISO 8601-style timestamp.
  *
- * @param[in] str
+ * @str
  *         The string to parse.
- * @param[out] result
+ * @result[out]
  *         A pointer to the result.
  * @return
  *         0 on success, -1 on failure.
@@ -47,13 +47,62 @@ int xtimegm(struct tm *tm, time_t *timep);
 int xgetdate(const char *str, struct timespec *result);
 
 /**
- * Get the current time.
+ * Add to a timespec.
+ */
+void timespec_add(struct timespec *lhs, const struct timespec *rhs);
+
+/**
+ * Subtract from a timespec.
+ */
+void timespec_sub(struct timespec *lhs, const struct timespec *rhs);
+
+/**
+ * Compare two timespecs.
  *
- * @param[out] result
- *         A pointer to the result.
  * @return
- *         0 on success, -1 on failure.
+ *         An integer with the sign of (*lhs - *rhs).
+ */
+int timespec_cmp(const struct timespec *lhs, const struct timespec *rhs);
+
+/**
+ * Update a minimum timespec.
+ */
+void timespec_min(struct timespec *dest, const struct timespec *src);
+
+/**
+ * Update a maximum timespec.
+ */
+void timespec_max(struct timespec *dest, const struct timespec *src);
+
+/**
+ * Convert a timespec to floating point.
+ *
+ * @return
+ *         The value in nanoseconds.
+ */
+double timespec_ns(const struct timespec *ts);
+
+/**
+ * A timer.
+ */
+struct timer;
+
+/**
+ * Start a timer.
+ *
+ * @interval
+ *         The regular interval at which to send SIGALRM.
+ * @return
+ *         The new timer on success, otherwise NULL.
+ */
+struct timer *xtimer_start(const struct timespec *interval);
+
+/**
+ * Stop a timer.
+ *
+ * @timer
+ *         The timer to stop.
  */
-int xgettime(struct timespec *result);
+void xtimer_stop(struct timer *timer);
 
 #endif // BFS_XTIME_H