summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/alloc.c384
-rw-r--r--src/alloc.h383
-rw-r--r--src/atomic.h118
-rw-r--r--src/bar.c218
-rw-r--r--src/bar.h44
-rw-r--r--src/bfstd.c1006
-rw-r--r--src/bfstd.h523
-rw-r--r--src/bftw.c2326
-rw-r--r--src/bftw.h218
-rw-r--r--src/bit.h401
-rw-r--r--src/color.c1418
-rw-r--r--src/color.h118
-rw-r--r--src/ctx.c298
-rw-r--r--src/ctx.h169
-rw-r--r--src/diag.c300
-rw-r--r--src/diag.h258
-rw-r--r--src/dir.c371
-rw-r--r--src/dir.h175
-rw-r--r--src/dstring.c279
-rw-r--r--src/dstring.h322
-rw-r--r--src/eval.c1696
-rw-r--r--src/eval.h102
-rw-r--r--src/exec.c690
-rw-r--r--src/exec.h108
-rw-r--r--src/expr.c85
-rw-r--r--src/expr.h247
-rw-r--r--src/fsade.c508
-rw-r--r--src/fsade.h81
-rw-r--r--src/ioq.c1100
-rw-r--r--src/ioq.h198
-rw-r--r--src/list.h581
-rw-r--r--src/main.c150
-rw-r--r--src/mtab.c303
-rw-r--r--src/mtab.h58
-rw-r--r--src/opt.c2299
-rw-r--r--src/opt.h23
-rw-r--r--src/parse.c3712
-rw-r--r--src/parse.h23
-rw-r--r--src/prelude.h370
-rw-r--r--src/printf.c968
-rw-r--r--src/printf.h55
-rw-r--r--src/pwcache.c220
-rw-r--r--src/pwcache.h124
-rw-r--r--src/sanity.h94
-rw-r--r--src/sighook.c600
-rw-r--r--src/sighook.h73
-rw-r--r--src/stat.c345
-rw-r--r--src/stat.h172
-rw-r--r--src/thread.c81
-rw-r--r--src/thread.h99
-rw-r--r--src/trie.c729
-rw-r--r--src/trie.h147
-rw-r--r--src/typo.c164
-rw-r--r--src/typo.h18
-rw-r--r--src/xregex.c295
-rw-r--r--src/xregex.h83
-rw-r--r--src/xspawn.c690
-rw-r--r--src/xspawn.h134
-rw-r--r--src/xtime.c348
-rw-r--r--src/xtime.h59
60 files changed, 27161 insertions, 0 deletions
diff --git a/src/alloc.c b/src/alloc.c
new file mode 100644
index 0000000..ebaff38
--- /dev/null
+++ b/src/alloc.c
@@ -0,0 +1,384 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "alloc.h"
+#include "bit.h"
+#include "diag.h"
+#include "sanity.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+/** The largest possible allocation size. */
+#if PTRDIFF_MAX < SIZE_MAX / 2
+# define ALLOC_MAX ((size_t)PTRDIFF_MAX)
+#else
+# define ALLOC_MAX (SIZE_MAX / 2)
+#endif
+
+/** Portable aligned_alloc()/posix_memalign(). */
+static void *xmemalign(size_t align, size_t size) {
+ bfs_assert(has_single_bit(align));
+ bfs_assert(align >= sizeof(void *));
+ bfs_assert(is_aligned(align, size));
+
+#if BFS_HAS_ALIGNED_ALLOC
+ return aligned_alloc(align, size);
+#else
+ void *ptr = NULL;
+ errno = posix_memalign(&ptr, align, size);
+ return ptr;
+#endif
+}
+
+void *alloc(size_t align, size_t size) {
+ bfs_assert(has_single_bit(align));
+ bfs_assert(is_aligned(align, size));
+
+ if (size > ALLOC_MAX) {
+ errno = EOVERFLOW;
+ return NULL;
+ }
+
+ if (align <= alignof(max_align_t)) {
+ return malloc(size);
+ } else {
+ return xmemalign(align, size);
+ }
+}
+
+void *zalloc(size_t align, size_t size) {
+ bfs_assert(has_single_bit(align));
+ bfs_assert(is_aligned(align, size));
+
+ if (size > ALLOC_MAX) {
+ errno = EOVERFLOW;
+ return NULL;
+ }
+
+ if (align <= alignof(max_align_t)) {
+ return calloc(1, size);
+ }
+
+ void *ret = xmemalign(align, size);
+ if (ret) {
+ memset(ret, 0, size);
+ }
+ return ret;
+}
+
+void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size) {
+ bfs_assert(has_single_bit(align));
+ bfs_assert(is_aligned(align, old_size));
+ bfs_assert(is_aligned(align, new_size));
+
+ if (new_size == 0) {
+ free(ptr);
+ return NULL;
+ } else if (new_size > ALLOC_MAX) {
+ errno = EOVERFLOW;
+ return NULL;
+ }
+
+ if (align <= alignof(max_align_t)) {
+ return realloc(ptr, new_size);
+ }
+
+ // There is no aligned_realloc(), so reallocate and copy manually
+ void *ret = xmemalign(align, new_size);
+ if (!ret) {
+ return NULL;
+ }
+
+ size_t min_size = old_size < new_size ? old_size : new_size;
+ if (min_size) {
+ memcpy(ret, ptr, min_size);
+ }
+
+ free(ptr);
+ return ret;
+}
+
+void *reserve(void *ptr, size_t align, size_t size, size_t count) {
+ // No need to overflow-check the current size
+ size_t old_size = size * count;
+
+ // Capacity is doubled every power of two, from 0→1, 1→2, 2→4, etc.
+ // If we stayed within the same size class, re-use ptr.
+ if (count & (count - 1)) {
+ // Tell sanitizers about the new array element
+ sanitize_alloc((char *)ptr + old_size, size);
+ errno = 0;
+ return ptr;
+ }
+
+ // No need to overflow-check; xrealloc() will fail before we overflow
+ size_t new_size = count ? 2 * old_size : size;
+ void *ret = xrealloc(ptr, align, old_size, new_size);
+ if (!ret) {
+ // errno is used to communicate success/failure to the RESERVE() macro
+ bfs_assert(errno != 0);
+ return ptr;
+ }
+
+ // Pretend we only allocated one more element
+ sanitize_free((char *)ret + old_size + size, new_size - old_size - size);
+ errno = 0;
+ return ret;
+}
+
+/**
+ * An arena allocator chunk.
+ */
+union chunk {
+ /**
+ * Free chunks are stored in a singly linked list. The pointer to the
+ * next chunk is represented by an offset from the chunk immediately
+ * after this one in memory, so that zalloc() correctly initializes a
+ * linked list of chunks (except for the last one).
+ */
+ uintptr_t next;
+
+ // char object[];
+};
+
+/** Decode the next chunk. */
+static union chunk *chunk_next(const struct arena *arena, const union chunk *chunk) {
+ uintptr_t base = (uintptr_t)chunk + arena->size;
+ return (union chunk *)(base + chunk->next);
+}
+
+/** Encode the next chunk. */
+static void chunk_set_next(const struct arena *arena, union chunk *chunk, union chunk *next) {
+ uintptr_t base = (uintptr_t)chunk + arena->size;
+ chunk->next = (uintptr_t)next - base;
+}
+
+void arena_init(struct arena *arena, size_t align, size_t size) {
+ bfs_assert(has_single_bit(align));
+ bfs_assert(is_aligned(align, size));
+
+ if (align < alignof(union chunk)) {
+ align = alignof(union chunk);
+ }
+ if (size < sizeof(union chunk)) {
+ size = sizeof(union chunk);
+ }
+ bfs_assert(is_aligned(align, size));
+
+ arena->chunks = NULL;
+ arena->nslabs = 0;
+ arena->slabs = NULL;
+ arena->align = align;
+ arena->size = size;
+}
+
+/** Allocate a new slab. */
+attr(cold)
+static int slab_alloc(struct arena *arena) {
+ // Make the initial allocation size ~4K
+ size_t size = 4096;
+ if (size < arena->size) {
+ size = arena->size;
+ }
+ // Trim off the excess
+ size -= size % arena->size;
+ // Double the size for every slab
+ size <<= arena->nslabs;
+
+ // Allocate the slab
+ void *slab = zalloc(arena->align, size);
+ if (!slab) {
+ return -1;
+ }
+
+ // Grow the slab array
+ void **pslab = RESERVE(void *, &arena->slabs, &arena->nslabs);
+ if (!pslab) {
+ free(slab);
+ return -1;
+ }
+
+ // Fix the last chunk->next offset
+ void *last = (char *)slab + size - arena->size;
+ chunk_set_next(arena, last, arena->chunks);
+
+ // We can rely on zero-initialized slabs, but others shouldn't
+ sanitize_uninit(slab, size);
+
+ arena->chunks = *pslab = slab;
+ return 0;
+}
+
+void *arena_alloc(struct arena *arena) {
+ if (!arena->chunks && slab_alloc(arena) != 0) {
+ return NULL;
+ }
+
+ union chunk *chunk = arena->chunks;
+ sanitize_alloc(chunk, arena->size);
+
+ sanitize_init(chunk);
+ arena->chunks = chunk_next(arena, chunk);
+ sanitize_uninit(chunk, arena->size);
+
+ return chunk;
+}
+
+void arena_free(struct arena *arena, void *ptr) {
+ union chunk *chunk = ptr;
+ chunk_set_next(arena, chunk, arena->chunks);
+ arena->chunks = chunk;
+ sanitize_free(chunk, arena->size);
+}
+
+void arena_clear(struct arena *arena) {
+ for (size_t i = 0; i < arena->nslabs; ++i) {
+ free(arena->slabs[i]);
+ }
+ free(arena->slabs);
+
+ arena->chunks = NULL;
+ arena->nslabs = 0;
+ arena->slabs = NULL;
+}
+
+void arena_destroy(struct arena *arena) {
+ arena_clear(arena);
+ sanitize_uninit(arena);
+}
+
+void varena_init(struct varena *varena, size_t align, size_t min, size_t offset, size_t size) {
+ varena->align = align;
+ varena->offset = offset;
+ varena->size = size;
+ varena->narenas = 0;
+ varena->arenas = NULL;
+
+ // The smallest size class is at least as many as fit in the smallest
+ // aligned allocation size
+ size_t min_count = (flex_size(align, min, offset, size, 1) - offset + size - 1) / size;
+ varena->shift = bit_width(min_count - 1);
+}
+
+/** Get the size class for the given array length. */
+static size_t varena_size_class(struct varena *varena, size_t count) {
+ // Since powers of two are common array lengths, make them the
+ // (inclusive) upper bound for each size class
+ return bit_width((count - !!count) >> varena->shift);
+}
+
+/** Get the exact size of a flexible struct. */
+static size_t varena_exact_size(const struct varena *varena, size_t count) {
+ return flex_size(varena->align, 0, varena->offset, varena->size, count);
+}
+
+/** Get the arena for the given array length. */
+static struct arena *varena_get(struct varena *varena, size_t count) {
+ size_t i = varena_size_class(varena, count);
+
+ while (i >= varena->narenas) {
+ size_t j = varena->narenas;
+ struct arena *arena = RESERVE(struct arena, &varena->arenas, &varena->narenas);
+ if (!arena) {
+ return NULL;
+ }
+
+ size_t shift = j + varena->shift;
+ size_t size = varena_exact_size(varena, (size_t)1 << shift);
+ arena_init(arena, varena->align, size);
+ }
+
+ return &varena->arenas[i];
+}
+
+void *varena_alloc(struct varena *varena, size_t count) {
+ struct arena *arena = varena_get(varena, count);
+ if (!arena) {
+ return NULL;
+ }
+
+ void *ret = arena_alloc(arena);
+ if (!ret) {
+ return NULL;
+ }
+
+ // Tell the sanitizers the exact size of the allocated struct
+ sanitize_free(ret, arena->size);
+ sanitize_alloc(ret, varena_exact_size(varena, count));
+
+ return ret;
+}
+
+void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t new_count) {
+ struct arena *new_arena = varena_get(varena, new_count);
+ struct arena *old_arena = varena_get(varena, old_count);
+ if (!new_arena) {
+ return NULL;
+ }
+
+ size_t new_exact_size = varena_exact_size(varena, new_count);
+ size_t old_exact_size = varena_exact_size(varena, old_count);
+
+ if (new_arena == old_arena) {
+ if (new_count < old_count) {
+ sanitize_free((char *)ptr + new_exact_size, old_exact_size - new_exact_size);
+ } else if (new_count > old_count) {
+ sanitize_alloc((char *)ptr + old_exact_size, new_exact_size - old_exact_size);
+ }
+ return ptr;
+ }
+
+ void *ret = arena_alloc(new_arena);
+ if (!ret) {
+ return NULL;
+ }
+
+ size_t old_size = old_arena->size;
+ sanitize_alloc((char *)ptr + old_exact_size, old_size - old_exact_size);
+
+ size_t new_size = new_arena->size;
+ size_t min_size = new_size < old_size ? new_size : old_size;
+ memcpy(ret, ptr, min_size);
+
+ arena_free(old_arena, ptr);
+ sanitize_free((char *)ret + new_exact_size, new_size - new_exact_size);
+
+ return ret;
+}
+
+void *varena_grow(struct varena *varena, void *ptr, size_t *count) {
+ size_t old_count = *count;
+
+ // Round up to the limit of the current size class. If we're already at
+ // the limit, go to the next size class.
+ size_t new_shift = varena_size_class(varena, old_count + 1) + varena->shift;
+ size_t new_count = (size_t)1 << new_shift;
+
+ ptr = varena_realloc(varena, ptr, old_count, new_count);
+ if (ptr) {
+ *count = new_count;
+ }
+ return ptr;
+}
+
+void varena_free(struct varena *varena, void *ptr, size_t count) {
+ struct arena *arena = varena_get(varena, count);
+ arena_free(arena, ptr);
+}
+
+void varena_clear(struct varena *varena) {
+ for (size_t i = 0; i < varena->narenas; ++i) {
+ arena_clear(&varena->arenas[i]);
+ }
+}
+
+void varena_destroy(struct varena *varena) {
+ for (size_t i = 0; i < varena->narenas; ++i) {
+ arena_destroy(&varena->arenas[i]);
+ }
+ free(varena->arenas);
+ sanitize_uninit(varena);
+}
diff --git a/src/alloc.h b/src/alloc.h
new file mode 100644
index 0000000..095134a
--- /dev/null
+++ b/src/alloc.h
@@ -0,0 +1,383 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Memory allocation.
+ */
+
+#ifndef BFS_ALLOC_H
+#define BFS_ALLOC_H
+
+#include "prelude.h"
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+/** Check if a size is properly aligned. */
+static inline bool is_aligned(size_t align, size_t size) {
+ return (size & (align - 1)) == 0;
+}
+
+/** Round down to a multiple of an alignment. */
+static inline size_t align_floor(size_t align, size_t size) {
+ return size & ~(align - 1);
+}
+
+/** Round up to a multiple of an alignment. */
+static inline size_t align_ceil(size_t align, size_t size) {
+ return align_floor(align, size + align - 1);
+}
+
+/**
+ * Saturating array size.
+ *
+ * @param align
+ * Array element alignment.
+ * @param size
+ * Array element size.
+ * @param count
+ * Array element count.
+ * @return
+ * size * count, saturating to the maximum aligned value on overflow.
+ */
+static inline size_t array_size(size_t align, size_t size, size_t count) {
+ size_t ret = size * count;
+ return ret / size == count ? ret : ~(align - 1);
+}
+
+/** Saturating array sizeof. */
+#define sizeof_array(type, count) \
+ array_size(alignof(type), sizeof(type), count)
+
+/** Size of a struct/union field. */
+#define sizeof_member(type, member) \
+ sizeof(((type *)NULL)->member)
+
+/**
+ * Saturating flexible struct size.
+ *
+ * @param align
+ * Struct alignment.
+ * @param min
+ * Minimum struct size.
+ * @param offset
+ * Flexible array member offset.
+ * @param size
+ * Flexible array element size.
+ * @param count
+ * Flexible array element count.
+ * @return
+ * The size of the struct with count flexible array elements. Saturates
+ * to the maximum aligned value on overflow.
+ */
+static inline size_t flex_size(size_t align, size_t min, size_t offset, size_t size, size_t count) {
+ size_t ret = size * count;
+ size_t overflow = ret / size != count;
+
+ size_t extra = offset + align - 1;
+ ret += extra;
+ overflow |= ret < extra;
+ ret |= -overflow;
+ ret = align_floor(align, ret);
+
+ // Make sure flex_sizeof(type, member, 0) >= sizeof(type), even if the
+ // type has more padding than necessary for alignment
+ if (min > align_ceil(align, offset)) {
+ ret = ret < min ? min : ret;
+ }
+
+ return ret;
+}
+
+/**
+ * Computes the size of a flexible struct.
+ *
+ * @param type
+ * The type of the struct containing the flexible array.
+ * @param member
+ * The name of the flexible array member.
+ * @param count
+ * The length of the flexible array.
+ * @return
+ * The size of the struct with count flexible array elements. Saturates
+ * to the maximum aligned value on overflow.
+ */
+#define sizeof_flex(type, member, count) \
+ flex_size(alignof(type), sizeof(type), offsetof(type, member), sizeof_member(type, member[0]), count)
+
+/**
+ * General memory allocator.
+ *
+ * @param align
+ * The required alignment.
+ * @param size
+ * The size of the allocation.
+ * @return
+ * The allocated memory, or NULL on failure.
+ */
+attr(malloc(free, 1), aligned_alloc(1, 2))
+void *alloc(size_t align, size_t size);
+
+/**
+ * Zero-initialized memory allocator.
+ *
+ * @param align
+ * The required alignment.
+ * @param size
+ * The size of the allocation.
+ * @return
+ * The allocated memory, or NULL on failure.
+ */
+attr(malloc(free, 1), aligned_alloc(1, 2))
+void *zalloc(size_t align, size_t size);
+
+/** Allocate memory for the given type. */
+#define ALLOC(type) \
+ (type *)alloc(alignof(type), sizeof(type))
+
+/** Allocate zeroed memory for the given type. */
+#define ZALLOC(type) \
+ (type *)zalloc(alignof(type), sizeof(type))
+
+/** Allocate memory for an array. */
+#define ALLOC_ARRAY(type, count) \
+ (type *)alloc(alignof(type), sizeof_array(type, count))
+
+/** Allocate zeroed memory for an array. */
+#define ZALLOC_ARRAY(type, count) \
+ (type *)zalloc(alignof(type), sizeof_array(type, count))
+
+/** Allocate memory for a flexible struct. */
+#define ALLOC_FLEX(type, member, count) \
+ (type *)alloc(alignof(type), sizeof_flex(type, member, count))
+
+/** Allocate zeroed memory for a flexible struct. */
+#define ZALLOC_FLEX(type, member, count) \
+ (type *)zalloc(alignof(type), sizeof_flex(type, member, count))
+
+/**
+ * Alignment-aware realloc().
+ *
+ * @param ptr
+ * The pointer to reallocate.
+ * @param align
+ * The required alignment.
+ * @param old_size
+ * The previous allocation size.
+ * @param new_size
+ * The new allocation size.
+ * @return
+ * The reallocated memory, or NULL on failure.
+ */
+attr(nodiscard, aligned_alloc(2, 4))
+void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size);
+
+/** Reallocate memory for an array. */
+#define REALLOC_ARRAY(type, ptr, old_count, new_count) \
+ (type *)xrealloc((ptr), alignof(type), sizeof_array(type, old_count), sizeof_array(type, new_count))
+
+/** Reallocate memory for a flexible struct. */
+#define REALLOC_FLEX(type, member, ptr, old_count, new_count) \
+ (type *)xrealloc((ptr), alignof(type), sizeof_flex(type, member, old_count), sizeof_flex(type, member, new_count))
+
+/**
+ * Reserve space for one more element in a dynamic array.
+ *
+ * @param ptr
+ * The pointer to reallocate.
+ * @param align
+ * The required alignment.
+ * @param count
+ * The current size of the array.
+ * @return
+ * The reallocated memory, on both success *and* failure. On success,
+ * errno will be set to zero, and the returned pointer will have room
+ * for (count + 1) elements. On failure, errno will be non-zero, and
+ * ptr will returned unchanged.
+ */
+attr(nodiscard)
+void *reserve(void *ptr, size_t align, size_t size, size_t count);
+
+/**
+ * Convenience macro to grow a dynamic array.
+ *
+ * @param type
+ * The array element type.
+ * @param type **ptr
+ * A pointer to the array.
+ * @param size_t *count
+ * A pointer to the array's size.
+ * @return
+ * On success, a pointer to the newly reserved array element, i.e.
+ * `*ptr + *count++`. On failure, NULL is returned, and both *ptr and
+ * *count remain unchanged.
+ */
+#define RESERVE(type, ptr, count) \
+ ((*ptr) = reserve((*ptr), alignof(type), sizeof(type), (*count)), \
+ errno ? NULL : (*ptr) + (*count)++)
+
+/**
+ * An arena allocator for fixed-size types.
+ *
+ * Arena allocators are intentionally not thread safe.
+ */
+struct arena {
+ /** The list of free chunks. */
+ void *chunks;
+ /** The number of allocated slabs. */
+ size_t nslabs;
+ /** The array of slabs. */
+ void **slabs;
+ /** Chunk alignment. */
+ size_t align;
+ /** Chunk size. */
+ size_t size;
+};
+
+/**
+ * Initialize an arena for chunks of the given size and alignment.
+ */
+void arena_init(struct arena *arena, size_t align, size_t size);
+
+/**
+ * Initialize an arena for the given type.
+ */
+#define ARENA_INIT(arena, type) \
+ arena_init((arena), alignof(type), sizeof(type))
+
+/**
+ * Free an object from the arena.
+ */
+void arena_free(struct arena *arena, void *ptr);
+
+/**
+ * Allocate an object out of the arena.
+ */
+attr(malloc(arena_free, 2))
+void *arena_alloc(struct arena *arena);
+
+/**
+ * Free all allocations from an arena.
+ */
+void arena_clear(struct arena *arena);
+
+/**
+ * Destroy an arena, freeing all allocations.
+ */
+void arena_destroy(struct arena *arena);
+
+/**
+ * An arena allocator for flexibly-sized types.
+ */
+struct varena {
+ /** The alignment of the struct. */
+ size_t align;
+ /** The offset of the flexible array. */
+ size_t offset;
+ /** The size of the flexible array elements. */
+ size_t size;
+ /** Shift amount for the smallest size class. */
+ size_t shift;
+ /** The number of arenas of different sizes. */
+ size_t narenas;
+ /** The array of differently-sized arenas. */
+ struct arena *arenas;
+};
+
+/**
+ * Initialize a varena for a struct with the given layout.
+ *
+ * @param varena
+ * The varena to initialize.
+ * @param align
+ * alignof(type)
+ * @param min
+ * sizeof(type)
+ * @param offset
+ * offsetof(type, flexible_array)
+ * @param size
+ * sizeof(flexible_array[i])
+ */
+void varena_init(struct varena *varena, size_t align, size_t min, size_t offset, size_t size);
+
+/**
+ * Initialize a varena for the given type and flexible array.
+ *
+ * @param varena
+ * The varena to initialize.
+ * @param type
+ * A struct type containing a flexible array.
+ * @param member
+ * The name of the flexible array member.
+ */
+#define VARENA_INIT(varena, type, member) \
+ varena_init(varena, alignof(type), sizeof(type), offsetof(type, member), sizeof_member(type, member[0]))
+
+/**
+ * Free an arena-allocated flexible struct.
+ *
+ * @param varena
+ * The that allocated the object.
+ * @param ptr
+ * The object to free.
+ * @param count
+ * The length of the flexible array.
+ */
+void varena_free(struct varena *varena, void *ptr, size_t count);
+
+/**
+ * Arena-allocate a flexible struct.
+ *
+ * @param varena
+ * The varena to allocate from.
+ * @param count
+ * The length of the flexible array.
+ * @return
+ * The allocated struct, or NULL on failure.
+ */
+attr(malloc(varena_free, 2))
+void *varena_alloc(struct varena *varena, size_t count);
+
+/**
+ * Resize a flexible struct.
+ *
+ * @param varena
+ * The varena to allocate from.
+ * @param ptr
+ * The object to resize.
+ * @param old_count
+ * The old array lenth.
+ * @param new_count
+ * The new array length.
+ * @return
+ * The resized struct, or NULL on failure.
+ */
+attr(nodiscard)
+void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t new_count);
+
+/**
+ * Grow a flexible struct by an arbitrary amount.
+ *
+ * @param varena
+ * The varena to allocate from.
+ * @param ptr
+ * The object to resize.
+ * @param count
+ * Pointer to the flexible array length.
+ * @return
+ * The resized struct, or NULL on failure.
+ */
+attr(nodiscard)
+void *varena_grow(struct varena *varena, void *ptr, size_t *count);
+
+/**
+ * Free all allocations from a varena.
+ */
+void varena_clear(struct varena *varena);
+
+/**
+ * Destroy a varena, freeing all allocations.
+ */
+void varena_destroy(struct varena *varena);
+
+#endif // BFS_ALLOC_H
diff --git a/src/atomic.h b/src/atomic.h
new file mode 100644
index 0000000..360de20
--- /dev/null
+++ b/src/atomic.h
@@ -0,0 +1,118 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Shorthand for standard C atomic operations.
+ */
+
+#ifndef BFS_ATOMIC_H
+#define BFS_ATOMIC_H
+
+#include "prelude.h"
+#include "sanity.h"
+#include <stdatomic.h>
+
+/**
+ * Prettier spelling of _Atomic.
+ */
+#define atomic _Atomic
+
+/**
+ * Shorthand for atomic_load_explicit().
+ *
+ * @param obj
+ * A pointer to the atomic object.
+ * @param order
+ * The memory ordering to use, without the memory_order_ prefix.
+ * @return
+ * The loaded value.
+ */
+#define load(obj, order) \
+ atomic_load_explicit(obj, memory_order_##order)
+
+/**
+ * Shorthand for atomic_store_explicit().
+ */
+#define store(obj, value, order) \
+ atomic_store_explicit(obj, value, memory_order_##order)
+
+/**
+ * Shorthand for atomic_exchange_explicit().
+ */
+#define exchange(obj, value, order) \
+ atomic_exchange_explicit(obj, value, memory_order_##order)
+
+/**
+ * Shorthand for atomic_compare_exchange_weak_explicit().
+ */
+#define compare_exchange_weak(obj, expected, desired, succ, fail) \
+ atomic_compare_exchange_weak_explicit(obj, expected, desired, memory_order_##succ, memory_order_##fail)
+
+/**
+ * Shorthand for atomic_compare_exchange_strong_explicit().
+ */
+#define compare_exchange_strong(obj, expected, desired, succ, fail) \
+ atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_##succ, memory_order_##fail)
+
+/**
+ * Shorthand for atomic_fetch_add_explicit().
+ */
+#define fetch_add(obj, arg, order) \
+ atomic_fetch_add_explicit(obj, arg, memory_order_##order)
+
+/**
+ * Shorthand for atomic_fetch_sub_explicit().
+ */
+#define fetch_sub(obj, arg, order) \
+ atomic_fetch_sub_explicit(obj, arg, memory_order_##order)
+
+/**
+ * Shorthand for atomic_fetch_or_explicit().
+ */
+#define fetch_or(obj, arg, order) \
+ atomic_fetch_or_explicit(obj, arg, memory_order_##order)
+
+/**
+ * Shorthand for atomic_fetch_xor_explicit().
+ */
+#define fetch_xor(obj, arg, order) \
+ atomic_fetch_xor_explicit(obj, arg, memory_order_##order)
+
+/**
+ * Shorthand for atomic_fetch_and_explicit().
+ */
+#define fetch_and(obj, arg, order) \
+ atomic_fetch_and_explicit(obj, arg, memory_order_##order)
+
+/**
+ * Shorthand for atomic_thread_fence().
+ */
+#if SANITIZE_THREAD
+// TSan doesn't support fences: https://github.com/google/sanitizers/issues/1415
+# define thread_fence(obj, order) \
+ fetch_add(obj, 0, order)
+#else
+# define thread_fence(obj, order) \
+ atomic_thread_fence(memory_order_##order)
+#endif
+
+/**
+ * Shorthand for atomic_signal_fence().
+ */
+#define signal_fence(order) \
+ atomic_signal_fence(memory_order_##order)
+
+/**
+ * A hint to the CPU to relax while it spins.
+ */
+#if __has_builtin(__builtin_ia32_pause)
+# define spin_loop() __builtin_ia32_pause()
+#elif __has_builtin(__builtin_arm_yield)
+# define spin_loop() __builtin_arm_yield()
+#elif __has_builtin(__builtin_riscv_pause)
+# define spin_loop() __builtin_riscv_pause()
+#else
+# define spin_loop() ((void)0)
+#endif
+
+#endif // BFS_ATOMIC_H
diff --git a/src/bar.c b/src/bar.c
new file mode 100644
index 0000000..b928373
--- /dev/null
+++ b/src/bar.c
@@ -0,0 +1,218 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "bar.h"
+#include "alloc.h"
+#include "atomic.h"
+#include "bfstd.h"
+#include "bit.h"
+#include "dstring.h"
+#include "sighook.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+struct bfs_bar {
+ int fd;
+ atomic unsigned int width;
+ atomic unsigned int height;
+
+ struct sighook *exit_hook;
+ struct sighook *winch_hook;
+};
+
+/** Get the terminal size, if possible. */
+static int bfs_bar_getsize(struct bfs_bar *bar) {
+#ifdef TIOCGWINSZ
+ struct winsize ws;
+ if (ioctl(bar->fd, TIOCGWINSZ, &ws) != 0) {
+ return -1;
+ }
+
+ store(&bar->width, ws.ws_col, relaxed);
+ store(&bar->height, ws.ws_row, relaxed);
+ return 0;
+#else
+ errno = ENOTSUP;
+ return -1;
+#endif
+}
+
+/** Write a string to the status bar (async-signal-safe). */
+static int bfs_bar_write(struct bfs_bar *bar, const char *str, size_t len) {
+ return xwrite(bar->fd, str, len) == len ? 0 : -1;
+}
+
+/** Write a string to the status bar (async-signal-safe). */
+static int bfs_bar_puts(struct bfs_bar *bar, const char *str) {
+ return bfs_bar_write(bar, str, strlen(str));
+}
+
+/** Number of decimal digits needed for terminal sizes. */
+#define ITOA_DIGITS ((USHRT_WIDTH + 2) / 3)
+
+/** Async Signal Safe itoa(). */
+static char *ass_itoa(char *str, unsigned int n) {
+ char *end = str + ITOA_DIGITS;
+ *end = '\0';
+
+ char *c = end;
+ do {
+ *--c = '0' + (n % 10);
+ n /= 10;
+ } while (n);
+
+ size_t len = end - c;
+ memmove(str, c, len + 1);
+ return str + len;
+}
+
+/** Update the size of the scrollable region. */
+static int bfs_bar_resize(struct bfs_bar *bar) {
+ static const char PREFIX[] =
+ "\033D" // IND: Line feed, possibly scrolling
+ "\033[1A" // CUU: Move cursor up 1 row
+ "\0337" // DECSC: Save cursor
+ "\033[;"; // DECSTBM: Set scrollable region
+ static const char SUFFIX[] =
+ "r" // (end of DECSTBM)
+ "\0338" // DECRC: Restore the cursor
+ "\033[J"; // ED: Erase display from cursor to end
+
+ char esc_seq[sizeof(PREFIX) + ITOA_DIGITS + sizeof(SUFFIX)];
+
+ // DECSTBM takes the height as the second argument
+ unsigned int height = load(&bar->height, relaxed) - 1;
+
+ char *cur = stpcpy(esc_seq, PREFIX);
+ cur = ass_itoa(cur, height);
+ cur = stpcpy(cur, SUFFIX);
+
+ return bfs_bar_write(bar, esc_seq, cur - esc_seq);
+}
+
+#ifdef SIGWINCH
+/** SIGWINCH handler. */
+static void bfs_bar_sigwinch(int sig, siginfo_t *info, void *arg) {
+ struct bfs_bar *bar = arg;
+ bfs_bar_getsize(bar);
+ bfs_bar_resize(bar);
+}
+#endif
+
+/** Reset the scrollable region and hide the bar. */
+static int bfs_bar_reset(struct bfs_bar *bar) {
+ return bfs_bar_puts(bar,
+ "\0337" // DECSC: Save cursor
+ "\033[r" // DECSTBM: Reset scrollable region
+ "\0338" // DECRC: Restore cursor
+ "\033[J" // ED: Erase display from cursor to end
+ );
+}
+
+/** Signal handler for process-terminating signals. */
+static void bfs_bar_sigexit(int sig, siginfo_t *info, void *arg) {
+ struct bfs_bar *bar = arg;
+ bfs_bar_reset(bar);
+}
+
+/** printf() to the status bar with a single write(). */
+attr(printf(2, 3))
+static int bfs_bar_printf(struct bfs_bar *bar, const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ dchar *str = dstrvprintf(format, args);
+ va_end(args);
+
+ if (!str) {
+ return -1;
+ }
+
+ int ret = bfs_bar_write(bar, str, dstrlen(str));
+ dstrfree(str);
+ return ret;
+}
+
+struct bfs_bar *bfs_bar_show(void) {
+ struct bfs_bar *bar = ALLOC(struct bfs_bar);
+ if (!bar) {
+ return NULL;
+ }
+
+ char term[L_ctermid];
+ ctermid(term);
+ if (strlen(term) == 0) {
+ errno = ENOTTY;
+ goto fail;
+ }
+
+ bar->fd = open(term, O_RDWR | O_CLOEXEC);
+ if (bar->fd < 0) {
+ goto fail;
+ }
+
+ if (bfs_bar_getsize(bar) != 0) {
+ goto fail_close;
+ }
+
+ bar->exit_hook = atsigexit(bfs_bar_sigexit, bar);
+ if (!bar->exit_hook) {
+ goto fail_close;
+ }
+
+#ifdef SIGWINCH
+ bar->winch_hook = sighook(SIGWINCH, bfs_bar_sigwinch, bar, 0);
+ if (!bar->winch_hook) {
+ goto fail_hook;
+ }
+#endif
+
+ bfs_bar_resize(bar);
+ return bar;
+
+fail_hook:
+ sigunhook(bar->exit_hook);
+fail_close:
+ close_quietly(bar->fd);
+fail:
+ free(bar);
+ return NULL;
+}
+
+unsigned int bfs_bar_width(const struct bfs_bar *bar) {
+ return load(&bar->width, relaxed);
+}
+
+int bfs_bar_update(struct bfs_bar *bar, const char *str) {
+ unsigned int height = load(&bar->height, relaxed);
+ return bfs_bar_printf(bar,
+ "\0337" // DECSC: Save cursor
+ "\033[%u;0f" // HVP: Move cursor to row, column
+ "\033[K" // EL: Erase line
+ "\033[7m" // SGR reverse video
+ "%s"
+ "\033[27m" // SGR reverse video off
+ "\0338", // DECRC: Restore cursor
+ height,
+ str
+ );
+}
+
+void bfs_bar_hide(struct bfs_bar *bar) {
+ if (!bar) {
+ return;
+ }
+
+ sigunhook(bar->winch_hook);
+ sigunhook(bar->exit_hook);
+
+ bfs_bar_reset(bar);
+
+ xclose(bar->fd);
+ free(bar);
+}
diff --git a/src/bar.h b/src/bar.h
new file mode 100644
index 0000000..20d92a9
--- /dev/null
+++ b/src/bar.h
@@ -0,0 +1,44 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * A terminal status bar.
+ */
+
+#ifndef BFS_BAR_H
+#define BFS_BAR_H
+
+/** A terminal status bar. */
+struct bfs_bar;
+
+/**
+ * Create a terminal status bar. Only one status bar is supported at a time.
+ *
+ * @return
+ * A pointer to the new status bar, or NULL on failure.
+ */
+struct bfs_bar *bfs_bar_show(void);
+
+/**
+ * Get the width of the status bar.
+ */
+unsigned int bfs_bar_width(const struct bfs_bar *bar);
+
+/**
+ * Update the status bar message.
+ *
+ * @param bar
+ * The status bar to update.
+ * @param str
+ * The string to display.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_bar_update(struct bfs_bar *bar, const char *str);
+
+/**
+ * Hide the status bar.
+ */
+void bfs_bar_hide(struct bfs_bar *status);
+
+#endif // BFS_BAR_H
diff --git a/src/bfstd.c b/src/bfstd.c
new file mode 100644
index 0000000..7680f17
--- /dev/null
+++ b/src/bfstd.c
@@ -0,0 +1,1006 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "bfstd.h"
+#include "bit.h"
+#include "diag.h"
+#include "sanity.h"
+#include "thread.h"
+#include "xregex.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <locale.h>
+#include <nl_types.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <wchar.h>
+
+#if BFS_USE_SYS_SYSMACROS_H
+# include <sys/sysmacros.h>
+#elif BFS_USE_SYS_MKDEV_H
+# include <sys/mkdev.h>
+#endif
+
+#if BFS_USE_UTIL_H
+# include <util.h>
+#endif
+
+bool error_is_like(int error, int category) {
+ if (error == category) {
+ return true;
+ }
+
+ switch (category) {
+ case ENOENT:
+ return error == ENOTDIR;
+
+ case ENOSYS:
+ // https://github.com/opencontainers/runc/issues/2151
+ return errno == EPERM;
+
+#if __DragonFly__
+ // https://twitter.com/tavianator/status/1742991411203485713
+ case ENAMETOOLONG:
+ return error == EFAULT;
+#endif
+ }
+
+ return false;
+}
+
+bool errno_is_like(int category) {
+ return error_is_like(errno, category);
+}
+
+int try(int ret) {
+ if (ret >= 0) {
+ return ret;
+ } else {
+ bfs_assert(errno > 0, "errno should be positive, was %d\n", errno);
+ return -errno;
+ }
+}
+
+char *xdirname(const char *path) {
+ size_t i = xbaseoff(path);
+
+ // Skip trailing slashes
+ while (i > 0 && path[i - 1] == '/') {
+ --i;
+ }
+
+ if (i > 0) {
+ return strndup(path, i);
+ } else if (path[i] == '/') {
+ return strdup("/");
+ } else {
+ return strdup(".");
+ }
+}
+
+char *xbasename(const char *path) {
+ size_t i = xbaseoff(path);
+ size_t len = strcspn(path + i, "/");
+ if (len > 0) {
+ return strndup(path + i, len);
+ } else if (path[i] == '/') {
+ return strdup("/");
+ } else {
+ return strdup(".");
+ }
+}
+
+size_t xbaseoff(const char *path) {
+ size_t i = strlen(path);
+
+ // Skip trailing slashes
+ while (i > 0 && path[i - 1] == '/') {
+ --i;
+ }
+
+ // Find the beginning of the name
+ while (i > 0 && path[i - 1] != '/') {
+ --i;
+ }
+
+ // Skip leading slashes
+ while (path[i] == '/' && path[i + 1]) {
+ ++i;
+ }
+
+ return i;
+}
+
+FILE *xfopen(const char *path, int flags) {
+ char mode[4];
+
+ switch (flags & O_ACCMODE) {
+ case O_RDONLY:
+ strcpy(mode, "rb");
+ break;
+ case O_WRONLY:
+ strcpy(mode, "wb");
+ break;
+ case O_RDWR:
+ strcpy(mode, "r+b");
+ break;
+ default:
+ bfs_bug("Invalid access mode");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (flags & O_APPEND) {
+ mode[0] = 'a';
+ }
+
+ int fd;
+ if (flags & O_CREAT) {
+ fd = open(path, flags, 0666);
+ } else {
+ fd = open(path, flags);
+ }
+
+ if (fd < 0) {
+ return NULL;
+ }
+
+ FILE *ret = fdopen(fd, mode);
+ if (!ret) {
+ close_quietly(fd);
+ return NULL;
+ }
+
+ return ret;
+}
+
+char *xgetdelim(FILE *file, char delim) {
+ char *chunk = NULL;
+ size_t n = 0;
+ ssize_t len = getdelim(&chunk, &n, delim, file);
+ if (len >= 0) {
+ if (chunk[len] == delim) {
+ chunk[len] = '\0';
+ }
+ return chunk;
+ } else {
+ free(chunk);
+ if (!ferror(file)) {
+ errno = 0;
+ }
+ return NULL;
+ }
+}
+
+const char *xgetprogname(void) {
+ const char *cmd = NULL;
+#if BFS_HAS_GETPROGNAME
+ cmd = getprogname();
+#elif BFS_HAS_GETPROGNAME_GNU
+ cmd = program_invocation_short_name;
+#endif
+
+ if (!cmd) {
+ cmd = BFS_COMMAND;
+ }
+
+ return cmd;
+}
+
+/** Compile and execute a regular expression for xrpmatch(). */
+static int xrpregex(nl_item item, const char *response) {
+ const char *pattern = nl_langinfo(item);
+ if (!pattern) {
+ return -1;
+ }
+
+ struct bfs_regex *regex;
+ int ret = bfs_regcomp(&regex, pattern, BFS_REGEX_POSIX_EXTENDED, 0);
+ if (ret == 0) {
+ ret = bfs_regexec(regex, response, 0);
+ }
+
+ bfs_regfree(regex);
+ return ret;
+}
+
+/** Check if a response is affirmative or negative. */
+static int xrpmatch(const char *response) {
+ int ret = xrpregex(NOEXPR, response);
+ if (ret > 0) {
+ return 0;
+ } else if (ret < 0) {
+ return -1;
+ }
+
+ ret = xrpregex(YESEXPR, response);
+ if (ret > 0) {
+ return 1;
+ } else if (ret < 0) {
+ return -1;
+ }
+
+ // Failsafe: always handle y/n
+ char c = response[0];
+ if (c == 'n' || c == 'N') {
+ return 0;
+ } else if (c == 'y' || c == 'Y') {
+ return 1;
+ } else {
+ return -1;
+ }
+}
+
+int ynprompt(void) {
+ fflush(stderr);
+
+ char *line = xgetdelim(stdin, '\n');
+ int ret = line ? xrpmatch(line) : -1;
+ free(line);
+ return ret;
+}
+
+void *xmemdup(const void *src, size_t size) {
+ void *ret = malloc(size);
+ if (ret) {
+ memcpy(ret, src, size);
+ }
+ return ret;
+}
+
+char *xstpecpy(char *dest, char *end, const char *src) {
+ return xstpencpy(dest, end, src, SIZE_MAX);
+}
+
+char *xstpencpy(char *dest, char *end, const char *src, size_t n) {
+ size_t space = end - dest;
+ n = space < n ? space : n;
+ n = strnlen(src, n);
+ memcpy(dest, src, n);
+ if (n < space) {
+ dest[n] = '\0';
+ return dest + n;
+ } else {
+ end[-1] = '\0';
+ return end;
+ }
+}
+
+const char *xstrerror(int errnum) {
+ int saved = errno;
+ const char *ret = NULL;
+ static thread_local char buf[256];
+
+ // On FreeBSD with MemorySanitizer, duplocale() triggers
+ // https://github.com/llvm/llvm-project/issues/65532
+#if BFS_HAS_STRERROR_L && !(__FreeBSD__ && SANITIZE_MEMORY)
+# if BFS_HAS_USELOCALE
+ locale_t loc = uselocale((locale_t)0);
+# else
+ locale_t loc = LC_GLOBAL_LOCALE;
+# endif
+
+ bool free_loc = false;
+ if (loc == LC_GLOBAL_LOCALE) {
+ loc = duplocale(loc);
+ free_loc = true;
+ }
+
+ if (loc != (locale_t)0) {
+ ret = strerror_l(errnum, loc);
+ if (free_loc) {
+ freelocale(loc);
+ }
+ }
+#elif BFS_HAS_STRERROR_R_POSIX
+ if (strerror_r(errnum, buf, sizeof(buf)) == 0) {
+ ret = buf;
+ }
+#elif BFS_HAS_STRERROR_R_GNU
+ ret = strerror_r(errnum, buf, sizeof(buf));
+#endif
+
+ if (!ret) {
+ // Fallback for strerror_[lr]() or duplocale() failures
+ snprintf(buf, sizeof(buf), "Unknown error %d", errnum);
+ ret = buf;
+ }
+
+ errno = saved;
+ return ret;
+}
+
+/** Get the single character describing the given file type. */
+static char type_char(mode_t mode) {
+ switch (mode & S_IFMT) {
+ case S_IFREG:
+ return '-';
+ case S_IFBLK:
+ return 'b';
+ case S_IFCHR:
+ return 'c';
+ case S_IFDIR:
+ return 'd';
+ case S_IFLNK:
+ return 'l';
+ case S_IFIFO:
+ return 'p';
+ case S_IFSOCK:
+ return 's';
+#ifdef S_IFDOOR
+ case S_IFDOOR:
+ return 'D';
+#endif
+#ifdef S_IFPORT
+ case S_IFPORT:
+ return 'P';
+#endif
+#ifdef S_IFWHT
+ case S_IFWHT:
+ return 'w';
+#endif
+ }
+
+ return '?';
+}
+
+void xstrmode(mode_t mode, char str[11]) {
+ strcpy(str, "----------");
+
+ str[0] = type_char(mode);
+
+ if (mode & 00400) {
+ str[1] = 'r';
+ }
+ if (mode & 00200) {
+ str[2] = 'w';
+ }
+ if ((mode & 04100) == 04000) {
+ str[3] = 'S';
+ } else if (mode & 04000) {
+ str[3] = 's';
+ } else if (mode & 00100) {
+ str[3] = 'x';
+ }
+
+ if (mode & 00040) {
+ str[4] = 'r';
+ }
+ if (mode & 00020) {
+ str[5] = 'w';
+ }
+ if ((mode & 02010) == 02000) {
+ str[6] = 'S';
+ } else if (mode & 02000) {
+ str[6] = 's';
+ } else if (mode & 00010) {
+ str[6] = 'x';
+ }
+
+ if (mode & 00004) {
+ str[7] = 'r';
+ }
+ if (mode & 00002) {
+ str[8] = 'w';
+ }
+ if ((mode & 01001) == 01000) {
+ str[9] = 'T';
+ } else if (mode & 01000) {
+ str[9] = 't';
+ } else if (mode & 00001) {
+ str[9] = 'x';
+ }
+}
+
+/** Check if an rlimit value is infinite. */
+static bool rlim_isinf(rlim_t r) {
+ // Consider RLIM_{INFINITY,SAVED_{CUR,MAX}} all equally infinite
+ if (r == RLIM_INFINITY) {
+ return true;
+ }
+
+#ifdef RLIM_SAVED_CUR
+ if (r == RLIM_SAVED_CUR) {
+ return true;
+ }
+#endif
+
+#ifdef RLIM_SAVED_MAX
+ if (r == RLIM_SAVED_MAX) {
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+int rlim_cmp(rlim_t a, rlim_t b) {
+ bool a_inf = rlim_isinf(a);
+ bool b_inf = rlim_isinf(b);
+ if (a_inf || b_inf) {
+ return a_inf - b_inf;
+ }
+
+ return (a > b) - (a < b);
+}
+
+dev_t xmakedev(int ma, int mi) {
+#ifdef makedev
+ return makedev(ma, mi);
+#else
+ return (ma << 8) | mi;
+#endif
+}
+
+int xmajor(dev_t dev) {
+#ifdef major
+ return major(dev);
+#else
+ return dev >> 8;
+#endif
+}
+
+int xminor(dev_t dev) {
+#ifdef minor
+ return minor(dev);
+#else
+ return dev & 0xFF;
+#endif
+}
+
+pid_t xwaitpid(pid_t pid, int *status, int flags) {
+ pid_t ret;
+ do {
+ ret = waitpid(pid, status, flags);
+ } while (ret < 0 && errno == EINTR);
+ return ret;
+}
+
+int dup_cloexec(int fd) {
+#ifdef F_DUPFD_CLOEXEC
+ return fcntl(fd, F_DUPFD_CLOEXEC, 0);
+#else
+ int ret = dup(fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ if (fcntl(ret, F_SETFD, FD_CLOEXEC) == -1) {
+ close_quietly(ret);
+ return -1;
+ }
+
+ return ret;
+#endif
+}
+
+int pipe_cloexec(int pipefd[2]) {
+#if BFS_HAS_PIPE2
+ return pipe2(pipefd, O_CLOEXEC);
+#else
+ if (pipe(pipefd) != 0) {
+ return -1;
+ }
+
+ if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) == -1 || fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) == -1) {
+ close_quietly(pipefd[1]);
+ close_quietly(pipefd[0]);
+ return -1;
+ }
+
+ return 0;
+#endif
+}
+
+size_t xread(int fd, void *buf, size_t nbytes) {
+ size_t count = 0;
+
+ while (count < nbytes) {
+ ssize_t ret = read(fd, (char *)buf + count, nbytes - count);
+ if (ret < 0) {
+ if (errno == EINTR) {
+ continue;
+ } else {
+ break;
+ }
+ } else if (ret == 0) {
+ // EOF
+ errno = 0;
+ break;
+ } else {
+ count += ret;
+ }
+ }
+
+ return count;
+}
+
+size_t xwrite(int fd, const void *buf, size_t nbytes) {
+ size_t count = 0;
+
+ while (count < nbytes) {
+ ssize_t ret = write(fd, (const char *)buf + count, nbytes - count);
+ if (ret < 0) {
+ if (errno == EINTR) {
+ continue;
+ } else {
+ break;
+ }
+ } else if (ret == 0) {
+ // EOF?
+ errno = 0;
+ break;
+ } else {
+ count += ret;
+ }
+ }
+
+ return count;
+}
+
+void close_quietly(int fd) {
+ int error = errno;
+ xclose(fd);
+ errno = error;
+}
+
+int xclose(int fd) {
+ int ret = close(fd);
+ if (ret != 0) {
+ bfs_verify(errno != EBADF);
+ }
+ return ret;
+}
+
+int xfaccessat(int fd, const char *path, int amode) {
+ int ret = faccessat(fd, path, amode, 0);
+
+#ifdef AT_EACCESS
+ // Some platforms, like Hurd, only support AT_EACCESS. Other platforms,
+ // like Android, don't support AT_EACCESS at all.
+ if (ret != 0 && (errno == EINVAL || errno == ENOTSUP)) {
+ ret = faccessat(fd, path, amode, AT_EACCESS);
+ }
+#endif
+
+ return ret;
+}
+
+char *xconfstr(int name) {
+#if BFS_HAS_CONFSTR
+ size_t len = confstr(name, NULL, 0);
+ if (len == 0) {
+ return NULL;
+ }
+
+ char *str = malloc(len);
+ if (!str) {
+ return NULL;
+ }
+
+ if (confstr(name, str, len) != len) {
+ free(str);
+ return NULL;
+ }
+
+ return str;
+#else
+ errno = ENOTSUP;
+ return NULL;
+#endif
+}
+
+char *xreadlinkat(int fd, const char *path, size_t size) {
+ ssize_t len;
+ char *name = NULL;
+
+ if (size == 0) {
+ size = 64;
+ } else {
+ ++size; // NUL terminator
+ }
+
+ while (true) {
+ char *new_name = realloc(name, size);
+ if (!new_name) {
+ goto error;
+ }
+ name = new_name;
+
+ len = readlinkat(fd, path, name, size);
+ if (len < 0) {
+ goto error;
+ } else if ((size_t)len >= size) {
+ size *= 2;
+ } else {
+ break;
+ }
+ }
+
+ name[len] = '\0';
+ return name;
+
+error:
+ free(name);
+ return NULL;
+}
+
+#if BFS_HAS_STRTOFFLAGS
+# define BFS_STRTOFFLAGS strtofflags
+#elif BFS_HAS_STRING_TO_FLAGS
+# define BFS_STRTOFFLAGS string_to_flags
+#endif
+
+int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear) {
+#ifdef BFS_STRTOFFLAGS
+ char *str_arg = (char *)*str;
+
+#if __OpenBSD__
+ typedef uint32_t bfs_fflags_t;
+#else
+ typedef unsigned long bfs_fflags_t;
+#endif
+ bfs_fflags_t set_arg = 0;
+ bfs_fflags_t clear_arg = 0;
+
+ int ret = BFS_STRTOFFLAGS(&str_arg, &set_arg, &clear_arg);
+
+ *str = str_arg;
+ *set = set_arg;
+ *clear = clear_arg;
+
+ if (ret != 0) {
+ errno = EINVAL;
+ }
+ return ret;
+#else // !BFS_STRTOFFLAGS
+ errno = ENOTSUP;
+ return -1;
+#endif
+}
+
+long xsysconf(int name) {
+#if __FreeBSD__ && SANITIZE_MEMORY
+ // Work around https://github.com/llvm/llvm-project/issues/88163
+ __msan_scoped_disable_interceptor_checks();
+#endif
+
+ long ret = sysconf(name);
+
+#if __FreeBSD__ && SANITIZE_MEMORY
+ __msan_scoped_enable_interceptor_checks();
+#endif
+
+ return ret;
+}
+
+size_t asciilen(const char *str) {
+ return asciinlen(str, strlen(str));
+}
+
+size_t asciinlen(const char *str, size_t n) {
+ size_t i = 0;
+
+#if SIZE_WIDTH % 8 == 0
+ // Word-at-a-time isascii()
+ for (size_t word; i + sizeof(word) <= n; i += sizeof(word)) {
+ memcpy(&word, str + i, sizeof(word));
+
+ const size_t mask = (SIZE_MAX / 0xFF) << 7; // 0x808080...
+ word &= mask;
+ if (!word) {
+ continue;
+ }
+
+#if ENDIAN_NATIVE == ENDIAN_BIG
+ word = bswap(word);
+#elif ENDIAN_NATIVE != ENDIAN_LITTLE
+ break;
+#endif
+
+ size_t first = trailing_zeros(word) / 8;
+ return i + first;
+ }
+#endif
+
+ for (; i < n; ++i) {
+ if (!xisascii(str[i])) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+wint_t xmbrtowc(const char *str, size_t *i, size_t len, mbstate_t *mb) {
+ wchar_t wc;
+ size_t mblen = mbrtowc(&wc, str + *i, len - *i, mb);
+ switch (mblen) {
+ case -1: // Invalid byte sequence
+ case -2: // Incomplete byte sequence
+ *i += 1;
+ *mb = (mbstate_t){0};
+ return WEOF;
+ default:
+ *i += mblen;
+ return wc;
+ }
+}
+
+size_t xstrwidth(const char *str) {
+ size_t len = strlen(str);
+ size_t ret = 0;
+
+ size_t asclen = asciinlen(str, len);
+ size_t i;
+ for (i = 0; i < asclen; ++i) {
+ // Assume all ASCII printables have width 1
+ if (xisprint(str[i])) {
+ ++ret;
+ }
+ }
+
+ mbstate_t mb = {0};
+ while (i < len) {
+ wint_t wc = xmbrtowc(str, &i, len, &mb);
+ if (wc == WEOF) {
+ // Assume a single-width '?'
+ ++ret;
+ continue;
+ }
+
+ int width = xwcwidth(wc);
+ if (width > 0) {
+ ret += width;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * Character type flags.
+ */
+enum ctype {
+ IS_PRINT = 1 << 0,
+ IS_SPACE = 1 << 1,
+};
+
+/** Cached ctypes. */
+static unsigned char ctype_cache[UCHAR_MAX + 1];
+
+/** Initialize the ctype cache. */
+static void char_cache_init(void) {
+ for (size_t c = 0; c <= UCHAR_MAX; ++c) {
+ if (xisprint(c)) {
+ ctype_cache[c] |= IS_PRINT;
+ }
+ if (xisspace(c)) {
+ ctype_cache[c] |= IS_SPACE;
+ }
+ }
+}
+
+/** Check if a character is printable. */
+static bool wesc_isprint(unsigned char c, enum wesc_flags flags) {
+ if (ctype_cache[c] & IS_PRINT) {
+ return true;
+ }
+
+ // Technically a literal newline is safe inside single quotes, but $'\n'
+ // is much nicer than '
+ // '
+ if (!(flags & WESC_SHELL) && (ctype_cache[c] & IS_SPACE)) {
+ return true;
+ }
+
+ return false;
+}
+
+/** Check if a wide character is printable. */
+static bool wesc_iswprint(wchar_t c, enum wesc_flags flags) {
+ if (xiswprint(c)) {
+ return true;
+ }
+
+ if (!(flags & WESC_SHELL) && xiswspace(c)) {
+ return true;
+ }
+
+ return false;
+}
+
+/** Get the length of the longest printable prefix of a string. */
+static size_t printable_len(const char *str, size_t len, enum wesc_flags flags) {
+ static pthread_once_t once = PTHREAD_ONCE_INIT;
+ invoke_once(&once, char_cache_init);
+
+ // Fast path: avoid multibyte checks
+ size_t asclen = asciinlen(str, len);
+ size_t i;
+ for (i = 0; i < asclen; ++i) {
+ if (!wesc_isprint(str[i], flags)) {
+ return i;
+ }
+ }
+
+ mbstate_t mb = {0};
+ for (size_t j = i; i < len; i = j) {
+ wint_t wc = xmbrtowc(str, &j, len, &mb);
+ if (wc == WEOF) {
+ break;
+ }
+ if (!wesc_iswprint(wc, flags)) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+/** Convert a special char into a well-known escape sequence like "\n". */
+static const char *dollar_esc(char c) {
+ // https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
+ switch (c) {
+ case '\a':
+ return "\\a";
+ case '\b':
+ return "\\b";
+ case '\033':
+ return "\\e";
+ case '\f':
+ return "\\f";
+ case '\n':
+ return "\\n";
+ case '\r':
+ return "\\r";
+ case '\t':
+ return "\\t";
+ case '\v':
+ return "\\v";
+ case '\'':
+ return "\\'";
+ case '\\':
+ return "\\\\";
+ default:
+ return NULL;
+ }
+}
+
+/** $'Quote' a string for the shell. */
+static char *dollar_quote(char *dest, char *end, const char *str, size_t len, enum wesc_flags flags) {
+ dest = xstpecpy(dest, end, "$'");
+
+ mbstate_t mb = {0};
+ for (size_t i = 0; i < len;) {
+ size_t start = i;
+ bool safe = false;
+
+ wint_t wc = xmbrtowc(str, &i, len, &mb);
+ if (wc != WEOF) {
+ safe = wesc_iswprint(wc, flags);
+ }
+
+ for (size_t j = start; safe && j < i; ++j) {
+ if (str[j] == '\'' || str[j] == '\\') {
+ safe = false;
+ }
+ }
+
+ if (safe) {
+ dest = xstpencpy(dest, end, str + start, i - start);
+ } else {
+ for (size_t j = start; j < i; ++j) {
+ unsigned char byte = str[j];
+ const char *esc = dollar_esc(byte);
+ if (esc) {
+ dest = xstpecpy(dest, end, esc);
+ } else {
+ static const char *hex[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"};
+ dest = xstpecpy(dest, end, "\\x");
+ dest = xstpecpy(dest, end, hex[byte / 0x10]);
+ dest = xstpecpy(dest, end, hex[byte % 0x10]);
+ }
+ }
+ }
+ }
+
+ return xstpecpy(dest, end, "'");
+}
+
+/** How much of this string is safe as a bare word? */
+static size_t bare_len(const char *str, size_t len) {
+ // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02
+ size_t ret = strcspn(str, "|&;<>()$`\\\"' *?[#~=%!{}");
+ return ret < len ? ret : len;
+}
+
+/** How much of this string is safe to double-quote? */
+static size_t quotable_len(const char *str, size_t len) {
+ // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_03
+ size_t ret = strcspn(str, "`$\\\"!");
+ return ret < len ? ret : len;
+}
+
+/** "Quote" a string for the shell. */
+static char *double_quote(char *dest, char *end, const char *str, size_t len) {
+ dest = xstpecpy(dest, end, "\"");
+ dest = xstpencpy(dest, end, str, len);
+ return xstpecpy(dest, end, "\"");
+}
+
+/** 'Quote' a string for the shell. */
+static char *single_quote(char *dest, char *end, const char *str, size_t len) {
+ bool open = false;
+
+ while (len > 0) {
+ size_t chunk = strcspn(str, "'");
+ chunk = chunk < len ? chunk : len;
+ if (chunk > 0) {
+ if (!open) {
+ dest = xstpecpy(dest, end, "'");
+ open = true;
+ }
+ dest = xstpencpy(dest, end, str, chunk);
+ str += chunk;
+ len -= chunk;
+ }
+
+ while (len > 0 && *str == '\'') {
+ if (open) {
+ dest = xstpecpy(dest, end, "'");
+ open = false;
+ }
+ dest = xstpecpy(dest, end, "\\'");
+ ++str;
+ --len;
+ }
+ }
+
+ if (open) {
+ dest = xstpecpy(dest, end, "'");
+ }
+
+ return dest;
+}
+
+char *wordesc(char *dest, char *end, const char *str, enum wesc_flags flags) {
+ return wordnesc(dest, end, str, SIZE_MAX, flags);
+}
+
+char *wordnesc(char *dest, char *end, const char *str, size_t n, enum wesc_flags flags) {
+ size_t len = strnlen(str, n);
+ char *start = dest;
+
+ if (printable_len(str, len, flags) < len) {
+ // String contains unprintable chars, use $'this\x7Fsyntax'
+ dest = dollar_quote(dest, end, str, len, flags);
+ } else if (!(flags & WESC_SHELL) || bare_len(str, len) == len) {
+ // Whole string is safe as a bare word
+ dest = xstpencpy(dest, end, str, len);
+ } else if (quotable_len(str, len) == len) {
+ // Whole string is safe to double-quote
+ dest = double_quote(dest, end, str, len);
+ } else {
+ // Single-quote the whole string
+ dest = single_quote(dest, end, str, len);
+ }
+
+ if (dest == start) {
+ dest = xstpecpy(dest, end, "\"\"");
+ }
+
+ return dest;
+}
diff --git a/src/bfstd.h b/src/bfstd.h
new file mode 100644
index 0000000..d06bbd9
--- /dev/null
+++ b/src/bfstd.h
@@ -0,0 +1,523 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Standard library wrappers and polyfills.
+ */
+
+#ifndef BFS_BFSTD_H
+#define BFS_BFSTD_H
+
+#include "prelude.h"
+#include "sanity.h"
+#include <stddef.h>
+
+#include <ctype.h>
+
+/**
+ * Work around https://github.com/llvm/llvm-project/issues/65532 by forcing a
+ * function, not a macro, to be called.
+ */
+#if __FreeBSD__ && SANITIZE_MEMORY
+# define BFS_INTERCEPT(fn) (fn)
+#else
+# define BFS_INTERCEPT(fn) fn
+#endif
+
+/**
+ * Wrap isalpha()/isdigit()/etc.
+ */
+#define BFS_ISCTYPE(fn, c) BFS_INTERCEPT(fn)((unsigned char)(c))
+
+#define xisalnum(c) BFS_ISCTYPE(isalnum, c)
+#define xisalpha(c) BFS_ISCTYPE(isalpha, c)
+#define xisascii(c) BFS_ISCTYPE(isascii, c)
+#define xiscntrl(c) BFS_ISCTYPE(iscntrl, c)
+#define xisdigit(c) BFS_ISCTYPE(isdigit, c)
+#define xislower(c) BFS_ISCTYPE(islower, c)
+#define xisgraph(c) BFS_ISCTYPE(isgraph, c)
+#define xisprint(c) BFS_ISCTYPE(isprint, c)
+#define xispunct(c) BFS_ISCTYPE(ispunct, c)
+#define xisspace(c) BFS_ISCTYPE(isspace, c)
+#define xisupper(c) BFS_ISCTYPE(isupper, c)
+#define xisxdigit(c) BFS_ISCTYPE(isxdigit, c)
+
+// #include <errno.h>
+
+/**
+ * Check if an error code is "like" another one. For example, ENOTDIR is
+ * like ENOENT because they can both be triggered by non-existent paths.
+ *
+ * @param error
+ * The error code to check.
+ * @param category
+ * The category to test for. Known categories include ENOENT and
+ * ENAMETOOLONG.
+ * @return
+ * Whether the error belongs to the given category.
+ */
+bool error_is_like(int error, int category);
+
+/**
+ * Equivalent to error_is_like(errno, category).
+ */
+bool errno_is_like(int category);
+
+/**
+ * Apply the "negative errno" convention.
+ *
+ * @param ret
+ * The return value of the attempted operation.
+ * @return
+ * ret, if non-negative, otherwise -errno.
+ */
+int try(int ret);
+
+#include <fcntl.h>
+
+#ifndef O_EXEC
+# ifdef O_PATH
+# define O_EXEC O_PATH
+# else
+# define O_EXEC O_RDONLY
+# endif
+#endif
+
+#ifndef O_SEARCH
+# ifdef O_PATH
+# define O_SEARCH O_PATH
+# else
+# define O_SEARCH O_RDONLY
+# endif
+#endif
+
+#ifndef O_DIRECTORY
+# define O_DIRECTORY 0
+#endif
+
+#include <fnmatch.h>
+
+#if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE)
+# define FNM_CASEFOLD FNM_IGNORECASE
+#endif
+
+// #include <libgen.h>
+
+/**
+ * Re-entrant dirname() variant that always allocates a copy.
+ *
+ * @param path
+ * The path in question.
+ * @return
+ * The parent directory of the path.
+ */
+char *xdirname(const char *path);
+
+/**
+ * Re-entrant basename() variant that always allocates a copy.
+ *
+ * @param path
+ * The path in question.
+ * @return
+ * The final component of the path.
+ */
+char *xbasename(const char *path);
+
+/**
+ * Find the offset of the final component of a path.
+ *
+ * @param path
+ * The path in question.
+ * @return
+ * The offset of the basename.
+ */
+size_t xbaseoff(const char *path);
+
+#include <stdio.h>
+
+/**
+ * fopen() variant that takes open() style flags.
+ *
+ * @param path
+ * The path to open.
+ * @param flags
+ * Flags to pass to open().
+ */
+FILE *xfopen(const char *path, int flags);
+
+/**
+ * Convenience wrapper for getdelim().
+ *
+ * @param file
+ * The file to read.
+ * @param delim
+ * The delimiter character to split on.
+ * @return
+ * The read chunk (without the delimiter), allocated with malloc().
+ * NULL is returned on error (errno != 0) or end of file (errno == 0).
+ */
+char *xgetdelim(FILE *file, char delim);
+
+// #include <stdlib.h>
+
+/**
+ * Wrapper for getprogname() or equivalent functionality.
+ *
+ * @return
+ * The basename of the currently running program.
+ */
+const char *xgetprogname(void);
+
+/**
+ * Process a yes/no prompt.
+ *
+ * @return 1 for yes, 0 for no, and -1 for unknown.
+ */
+int ynprompt(void);
+
+// #include <string.h>
+
+/**
+ * Get the length of the pure-ASCII prefix of a string.
+ */
+size_t asciilen(const char *str);
+
+/**
+ * Get the length of the pure-ASCII prefix of a string.
+ *
+ * @param str
+ * The string to check.
+ * @param n
+ * The maximum prefix length.
+ */
+size_t asciinlen(const char *str, size_t n);
+
+/**
+ * Allocate a copy of a region of memory.
+ *
+ * @param src
+ * The memory region to copy.
+ * @param size
+ * The size of the memory region.
+ * @return
+ * A copy of the region, allocated with malloc(), or NULL on failure.
+ */
+void *xmemdup(const void *src, size_t size);
+
+/**
+ * A nice string copying function.
+ *
+ * @param dest
+ * The NUL terminator of the destination string, or `end` if it is
+ * already truncated.
+ * @param end
+ * The end of the destination buffer.
+ * @param src
+ * The string to copy from.
+ * @return
+ * The new NUL terminator of the destination, or `end` on truncation.
+ */
+char *xstpecpy(char *dest, char *end, const char *src);
+
+/**
+ * A nice string copying function.
+ *
+ * @param dest
+ * The NUL terminator of the destination string, or `end` if it is
+ * already truncated.
+ * @param end
+ * The end of the destination buffer.
+ * @param src
+ * The string to copy from.
+ * @param n
+ * The maximum number of characters to copy.
+ * @return
+ * The new NUL terminator of the destination, or `end` on truncation.
+ */
+char *xstpencpy(char *dest, char *end, const char *src, size_t n);
+
+/**
+ * Thread-safe strerror().
+ *
+ * @param errnum
+ * An error number.
+ * @return
+ * A string describing that error, which remains valid until the next
+ * xstrerror() call in the same thread.
+ */
+const char *xstrerror(int errnum);
+
+/**
+ * Format a mode like ls -l (e.g. -rw-r--r--).
+ *
+ * @param mode
+ * The mode to format.
+ * @param str
+ * The string to hold the formatted mode.
+ */
+void xstrmode(mode_t mode, char str[11]);
+
+#include <sys/resource.h>
+
+/**
+ * Compare two rlim_t values, accounting for infinite limits.
+ */
+int rlim_cmp(rlim_t a, rlim_t b);
+
+#include <sys/types.h>
+
+/**
+ * Portable version of makedev().
+ */
+dev_t xmakedev(int ma, int mi);
+
+/**
+ * Portable version of major().
+ */
+int xmajor(dev_t dev);
+
+/**
+ * Portable version of minor().
+ */
+int xminor(dev_t dev);
+
+// #include <sys/stat.h>
+
+/**
+ * Get the access/change/modification time from a struct stat.
+ */
+#if BFS_HAS_ST_ACMTIM
+# define ST_ATIM(sb) (sb).st_atim
+# define ST_CTIM(sb) (sb).st_ctim
+# define ST_MTIM(sb) (sb).st_mtim
+#elif BFS_HAS_ST_ACMTIMESPEC
+# define ST_ATIM(sb) (sb).st_atimespec
+# define ST_CTIM(sb) (sb).st_ctimespec
+# define ST_MTIM(sb) (sb).st_mtimespec
+#else
+# define ST_ATIM(sb) ((struct timespec) { .tv_sec = (sb).st_atime })
+# define ST_CTIM(sb) ((struct timespec) { .tv_sec = (sb).st_ctime })
+# define ST_MTIM(sb) ((struct timespec) { .tv_sec = (sb).st_mtime })
+#endif
+
+// #include <sys/wait.h>
+
+/**
+ * waitpid() wrapper that handles EINTR.
+ */
+pid_t xwaitpid(pid_t pid, int *status, int flags);
+
+// #include <unistd.h>
+
+/**
+ * Like dup(), but set the FD_CLOEXEC flag.
+ *
+ * @param fd
+ * The file descriptor to duplicate.
+ * @return
+ * A duplicated file descriptor, or -1 on failure.
+ */
+int dup_cloexec(int fd);
+
+/**
+ * Like pipe(), but set the FD_CLOEXEC flag.
+ *
+ * @param pipefd
+ * The array to hold the two file descriptors.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int pipe_cloexec(int pipefd[2]);
+
+/**
+ * A safe version of read() that handles interrupted system calls and partial
+ * reads.
+ *
+ * @return
+ * The number of bytes read. A value != nbytes indicates an error
+ * (errno != 0) or end of file (errno == 0).
+ */
+size_t xread(int fd, void *buf, size_t nbytes);
+
+/**
+ * A safe version of write() that handles interrupted system calls and partial
+ * writes.
+ *
+ * @return
+ The number of bytes written. A value != nbytes indicates an error.
+ */
+size_t xwrite(int fd, const void *buf, size_t nbytes);
+
+/**
+ * close() variant that preserves errno.
+ *
+ * @param fd
+ * The file descriptor to close.
+ */
+void close_quietly(int fd);
+
+/**
+ * close() wrapper that asserts the file descriptor is valid.
+ *
+ * @param fd
+ * The file descriptor to close.
+ * @return
+ * 0 on success, or -1 on error.
+ */
+int xclose(int fd);
+
+/**
+ * Wrapper for faccessat() that handles some portability issues.
+ */
+int xfaccessat(int fd, const char *path, int amode);
+
+/**
+ * readlinkat() wrapper that dynamically allocates the result.
+ *
+ * @param fd
+ * The base directory descriptor.
+ * @param path
+ * The path to the link, relative to fd.
+ * @param size
+ * An estimate for the size of the link name (pass 0 if unknown).
+ * @return
+ * The target of the link, allocated with malloc(), or NULL on failure.
+ */
+char *xreadlinkat(int fd, const char *path, size_t size);
+
+/**
+ * Wrapper for confstr() that allocates with malloc().
+ *
+ * @param name
+ * The ID of the confstr to look up.
+ * @return
+ * The value of the confstr, or NULL on failure.
+ */
+char *xconfstr(int name);
+
+/**
+ * Portability wrapper for strtofflags().
+ *
+ * @param str
+ * The string to parse. The pointee will be advanced to the first
+ * invalid position on error.
+ * @param set
+ * The flags that are set in the string.
+ * @param clear
+ * The flags that are cleared in the string.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear);
+
+/**
+ * Wrapper for sysconf() that works around an MSan bug.
+ */
+long xsysconf(int name);
+
+#include <wchar.h>
+
+/**
+ * Error-recovering mbrtowc() wrapper.
+ *
+ * @param str
+ * The string to convert.
+ * @param i
+ * The current index.
+ * @param len
+ * The length of the string.
+ * @param mb
+ * The multi-byte decoding state.
+ * @return
+ * The wide character at index *i, or WEOF if decoding fails. In either
+ * case, *i will be advanced to the next multi-byte character.
+ */
+wint_t xmbrtowc(const char *str, size_t *i, size_t len, mbstate_t *mb);
+
+/**
+ * wcswidth() variant that works on narrow strings.
+ *
+ * @param str
+ * The string to measure.
+ * @return
+ * The likely width of that string in a terminal.
+ */
+size_t xstrwidth(const char *str);
+
+/**
+ * wcwidth() wrapper that works around LLVM bug #65532.
+ */
+#define xwcwidth BFS_INTERCEPT(wcwidth)
+
+#include <wctype.h>
+
+/**
+ * Wrap iswalpha()/iswdigit()/etc.
+ */
+#define BFS_ISWCTYPE(fn, c) BFS_INTERCEPT(fn)(c)
+
+#define xiswalnum(c) BFS_ISWCTYPE(iswalnum, c)
+#define xiswalpha(c) BFS_ISWCTYPE(iswalpha, c)
+#define xiswcntrl(c) BFS_ISWCTYPE(iswcntrl, c)
+#define xiswdigit(c) BFS_ISWCTYPE(iswdigit, c)
+#define xiswlower(c) BFS_ISWCTYPE(iswlower, c)
+#define xiswgraph(c) BFS_ISWCTYPE(iswgraph, c)
+#define xiswprint(c) BFS_ISWCTYPE(iswprint, c)
+#define xiswpunct(c) BFS_ISWCTYPE(iswpunct, c)
+#define xiswspace(c) BFS_ISWCTYPE(iswspace, c)
+#define xiswupper(c) BFS_ISWCTYPE(iswupper, c)
+#define xiswxdigit(c) BFS_ISWCTYPE(iswxdigit, c)
+
+// #include <wordexp.h>
+
+/**
+ * Flags for wordesc().
+ */
+enum wesc_flags {
+ /**
+ * Escape special characters so that the shell will treat the escaped
+ * string as a single word.
+ */
+ WESC_SHELL = 1 << 0,
+ /**
+ * Escape special characters so that the escaped string is safe to print
+ * to a TTY.
+ */
+ WESC_TTY = 1 << 1,
+};
+
+/**
+ * Escape a string as a single shell word.
+ *
+ * @param dest
+ * The destination string to fill.
+ * @param end
+ * The end of the destination buffer.
+ * @param src
+ * The string to escape.
+ * @param flags
+ * Controls which characters to escape.
+ * @return
+ * The new NUL terminator of the destination, or `end` on truncation.
+ */
+char *wordesc(char *dest, char *end, const char *str, enum wesc_flags flags);
+
+/**
+ * Escape a string as a single shell word.
+ *
+ * @param dest
+ * The destination string to fill.
+ * @param end
+ * The end of the destination buffer.
+ * @param src
+ * The string to escape.
+ * @param n
+ * The maximum length of the string.
+ * @param flags
+ * Controls which characters to escape.
+ * @return
+ * The new NUL terminator of the destination, or `end` on truncation.
+ */
+char *wordnesc(char *dest, char *end, const char *str, size_t n, enum wesc_flags flags);
+
+#endif // BFS_BFSTD_H
diff --git a/src/bftw.c b/src/bftw.c
new file mode 100644
index 0000000..5322181
--- /dev/null
+++ b/src/bftw.c
@@ -0,0 +1,2326 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * The bftw() implementation consists of the following components:
+ *
+ * - struct bftw_file: A file that has been encountered during the traversal.
+ * They have reference-counted links to their parents in the directory tree.
+ *
+ * - struct bftw_list: A linked list of bftw_file's.
+ *
+ * - struct bftw_queue: A multi-stage queue of bftw_file's.
+ *
+ * - struct bftw_cache: An LRU list of bftw_file's with open file descriptors,
+ * used for openat() to minimize the amount of path re-traversals.
+ *
+ * - struct bftw_state: Represents the current state of the traversal, allowing
+ * various helper functions to take fewer parameters.
+ */
+
+#include "prelude.h"
+#include "bftw.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "dir.h"
+#include "dstring.h"
+#include "ioq.h"
+#include "list.h"
+#include "mtab.h"
+#include "stat.h"
+#include "trie.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+/** Initialize a bftw_stat cache. */
+static void bftw_stat_init(struct bftw_stat *bufs, struct bfs_stat *stat_buf, struct bfs_stat *lstat_buf) {
+ bufs->stat_buf = stat_buf;
+ bufs->lstat_buf = lstat_buf;
+ bufs->stat_err = -1;
+ bufs->lstat_err = -1;
+}
+
+/** Fill a bftw_stat cache from another one. */
+static void bftw_stat_fill(struct bftw_stat *dest, const struct bftw_stat *src) {
+ if (dest->stat_err < 0 && src->stat_err >= 0) {
+ dest->stat_buf = src->stat_buf;
+ dest->stat_err = src->stat_err;
+ }
+
+ if (dest->lstat_err < 0 && src->lstat_err >= 0) {
+ dest->lstat_buf = src->lstat_buf;
+ dest->lstat_err = src->lstat_err;
+ }
+}
+
+/** Cache a bfs_stat() result. */
+static void bftw_stat_cache(struct bftw_stat *bufs, enum bfs_stat_flags flags, const struct bfs_stat *buf, int err) {
+ if (flags & BFS_STAT_NOFOLLOW) {
+ bufs->lstat_buf = buf;
+ bufs->lstat_err = err;
+ if (err || !S_ISLNK(buf->mode)) {
+ // Non-link, so share stat info
+ bufs->stat_buf = buf;
+ bufs->stat_err = err;
+ }
+ } else if (flags & BFS_STAT_TRYFOLLOW) {
+ if (err) {
+ bufs->stat_err = err;
+ } else if (S_ISLNK(buf->mode)) {
+ bufs->lstat_buf = buf;
+ bufs->lstat_err = err;
+ bufs->stat_err = ENOENT;
+ } else {
+ bufs->stat_buf = buf;
+ bufs->stat_err = err;
+ }
+ } else {
+ bufs->stat_buf = buf;
+ bufs->stat_err = err;
+ }
+}
+
+/** Caching bfs_stat(). */
+static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
+ struct bftw_stat *bufs = &ftwbuf->stat_bufs;
+ struct bfs_stat *buf;
+
+ if (flags & BFS_STAT_NOFOLLOW) {
+ buf = (struct bfs_stat *)bufs->lstat_buf;
+ if (bufs->lstat_err == 0) {
+ return buf;
+ } else if (bufs->lstat_err > 0) {
+ errno = bufs->lstat_err;
+ return NULL;
+ }
+ } else {
+ buf = (struct bfs_stat *)bufs->stat_buf;
+ if (bufs->stat_err == 0) {
+ return buf;
+ } else if (bufs->stat_err > 0) {
+ errno = bufs->stat_err;
+ return NULL;
+ }
+ }
+
+ struct bfs_stat *ret;
+ int err;
+ if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, buf) == 0) {
+ ret = buf;
+ err = 0;
+#ifdef S_IFWHT
+ } else if (errno == ENOENT && ftwbuf->type == BFS_WHT) {
+ // This matches the behavior of FTS_WHITEOUT on BSD
+ ret = memset(buf, 0, sizeof(*buf));
+ ret->mode = S_IFWHT;
+ err = 0;
+#endif
+ } else {
+ ret = NULL;
+ err = errno;
+ }
+
+ bftw_stat_cache(bufs, flags, ret, err);
+ return ret;
+}
+
+const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
+ struct BFTW *mutbuf = (struct BFTW *)ftwbuf;
+ const struct bfs_stat *ret;
+
+ if (flags & BFS_STAT_TRYFOLLOW) {
+ ret = bftw_stat_impl(mutbuf, BFS_STAT_FOLLOW);
+ if (!ret && errno_is_like(ENOENT)) {
+ ret = bftw_stat_impl(mutbuf, BFS_STAT_NOFOLLOW);
+ }
+ } else {
+ ret = bftw_stat_impl(mutbuf, flags);
+ }
+
+ return ret;
+}
+
+const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
+ const struct bftw_stat *bufs = &ftwbuf->stat_bufs;
+
+ if (flags & BFS_STAT_NOFOLLOW) {
+ if (bufs->lstat_err == 0) {
+ return bufs->lstat_buf;
+ }
+ } else if (bufs->stat_err == 0) {
+ return bufs->stat_buf;
+ } else if ((flags & BFS_STAT_TRYFOLLOW) && error_is_like(bufs->stat_err, ENOENT)) {
+ if (bufs->lstat_err == 0) {
+ return bufs->lstat_buf;
+ }
+ }
+
+ return NULL;
+}
+
+enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
+ if (flags & BFS_STAT_NOFOLLOW) {
+ if (ftwbuf->type == BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) {
+ return ftwbuf->type;
+ }
+ } else if (flags & BFS_STAT_TRYFOLLOW) {
+ if (ftwbuf->type != BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW)) {
+ return ftwbuf->type;
+ }
+ } else {
+ if (ftwbuf->type != BFS_LNK) {
+ return ftwbuf->type;
+ } else if (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW) {
+ return BFS_ERROR;
+ }
+ }
+
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, flags);
+ if (statbuf) {
+ return bfs_mode_to_type(statbuf->mode);
+ } else {
+ return BFS_ERROR;
+ }
+}
+
+/**
+ * A file.
+ */
+struct bftw_file {
+ /** The parent directory, if any. */
+ struct bftw_file *parent;
+ /** The root under which this file was found. */
+ struct bftw_file *root;
+
+ /**
+ * List node for:
+ *
+ * bftw_queue::buffer
+ * bftw_queue::waiting
+ * bftw_file_open()::parents
+ */
+ struct bftw_file *next;
+
+ /**
+ * List node for:
+ *
+ * bftw_queue::ready
+ * bftw_state::to_close
+ */
+ struct { struct bftw_file *next; } ready;
+
+ /**
+ * List node for bftw_cache.
+ */
+ struct {
+ struct bftw_file *prev;
+ struct bftw_file *next;
+ } lru;
+
+ /** This file's depth in the walk. */
+ size_t depth;
+ /** Reference count (for ->parent). */
+ size_t refcount;
+
+ /** Pin count (for ->fd). */
+ size_t pincount;
+ /** An open descriptor to this file, or -1. */
+ int fd;
+ /** Whether this file has a pending ioq request. */
+ bool ioqueued;
+ /** An open directory for this file, if any. */
+ struct bfs_dir *dir;
+
+ /** This file's type, if known. */
+ enum bfs_type type;
+ /** The device number, for cycle detection. */
+ dev_t dev;
+ /** The inode number, for cycle detection. */
+ ino_t ino;
+
+ /** Cached bfs_stat() info. */
+ struct bftw_stat stat_bufs;
+
+ /** The offset of this file in the full path. */
+ size_t nameoff;
+ /** The length of the file's name. */
+ size_t namelen;
+ /** The file's name. */
+ char name[];
+};
+
+/**
+ * A linked list of bftw_file's.
+ */
+struct bftw_list {
+ struct bftw_file *head;
+ struct bftw_file **tail;
+};
+
+/**
+ * bftw_queue flags.
+ */
+enum bftw_qflags {
+ /** Track the sync/async service balance. */
+ BFTW_QBALANCE = 1 << 0,
+ /** Buffer files before adding them to the queue. */
+ BFTW_QBUFFER = 1 << 1,
+ /** Use LIFO (stack/DFS) ordering. */
+ BFTW_QLIFO = 1 << 2,
+ /** Maintain a strict order. */
+ BFTW_QORDER = 1 << 3,
+};
+
+/**
+ * A queue of bftw_file's that may be serviced asynchronously.
+ *
+ * A bftw_queue comprises three linked lists each tracking different stages.
+ * When BFTW_QBUFFER is set, files are initially pushed to the buffer:
+ *
+ * ╔═══╗ ╔═══╦═══╗
+ * buffer: ║ 𝘩 ║ ║ 𝘩 ║ 𝘪 ║
+ * ╠═══╬═══╦═══╗ ╠═══╬═══╬═══╗
+ * waiting: ║ e ║ f ║ g ║ → ║ e ║ f ║ g ║
+ * ╠═══╬═══╬═══╬═══╗ ╠═══╬═══╬═══╬═══╗
+ * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║
+ * ╚═══╩═══╩═══╩═══╝ ╚═══╩═══╩═══╩═══╝
+ *
+ * When bftw_queue_flush() is called, the files in the buffer are appended to
+ * the waiting list (or prepended, if BFTW_QLIFO is set):
+ *
+ * ╔═╗
+ * buffer: ║ ║
+ * ╠═╩═╦═══╦═══╦═══╦═══╗
+ * waiting: ║ e ║ f ║ g ║ h ║ i ║
+ * ╠═══╬═══╬═══╬═══╬═══╝
+ * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║
+ * ╚═══╩═══╩═══╩═══╝
+ *
+ * Using the buffer gives a more natural ordering for BFTW_QLIFO, and allows
+ * files to be sorted before adding them to the waiting list. If BFTW_QBUFFER
+ * is not set, files are pushed directly to the waiting list instead.
+ *
+ * Files on the waiting list are waiting to be "serviced" asynchronously by the
+ * ioq (for example, by an ioq_opendir() or ioq_stat() call). While they are
+ * being serviced, they are detached from the queue by bftw_queue_detach() and
+ * are not tracked by the queue at all:
+ *
+ * ╔═╗
+ * buffer: ║ ║
+ * ╠═╩═╦═══╦═══╗ ⎛ ┌───┬───┐ ⎞
+ * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ 𝓯 │ ⎟
+ * ╠═══╬═══╬═══╬═══╗ ⎝ └───┴───┘ ⎠
+ * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║
+ * ╚═══╩═══╩═══╩═══╝
+ *
+ * When their async service is complete, files are reattached to the queue by
+ * bftw_queue_attach(), this time on the ready list:
+ *
+ * ╔═╗
+ * buffer: ║ ║
+ * ╠═╩═╦═══╦═══╗ ⎛ ┌───┐ ⎞
+ * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ ⎟
+ * ╠═══╬═══╬═══╬═══╦═══╗ ⎝ └───┘ ⎠
+ * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ 𝕗 ║
+ * ╚═══╩═══╩═══╩═══╩═══╝
+ *
+ * Files are added to the ready list in the order they are finished by the ioq.
+ * bftw_queue_pop() pops a file from the ready list if possible. Otherwise, it
+ * pops from the waiting list, and the file must be serviced synchronously.
+ *
+ * However, if BFTW_QORDER is set, files must be popped in the exact order they
+ * are added to the waiting list (to maintain sorted order). In this case,
+ * files are added to the waiting and ready lists at the same time. The
+ * file->ioqueued flag is set while it is in-service, so that bftw() can wait
+ * for it to be truly ready before using it.
+ *
+ * ╔═╗
+ * buffer: ║ ║
+ * ╠═╩═╦═══╦═══╗ ⎛ ┌───┐ ⎞
+ * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ ⎟
+ * ╠═══╬═══╬═══╬═══╦═══╦═══╦═══╦═══╦═══╗ ⎝ └───┘ ⎠
+ * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ 𝓮 ║ 𝕗 ║ g ║ h ║ i ║
+ * ╚═══╩═══╩═══╩═══╩═══╩═══╩═══╩═══╩═══╝
+ *
+ * If BFTW_QBALANCE is set, queue->imbalance tracks the delta between async
+ * service (negative) and synchronous service (positive). The queue is
+ * considered "balanced" when this number is non-negative. Only a balanced
+ * queue will perform any async service, ensuring work is fairly distributed
+ * between the main thread and the ioq.
+ *
+ * BFTW_QBALANCE is only set for single-threaded ioqs. When an ioq has multiple
+ * threads, it is faster to wait for the ioq to complete an operation than it is
+ * to perform it on the main thread.
+ */
+struct bftw_queue {
+ /** Queue flags. */
+ enum bftw_qflags flags;
+ /** A buffer of files to be enqueued together. */
+ struct bftw_list buffer;
+ /** A list of files which are waiting to be serviced. */
+ struct bftw_list waiting;
+ /** A list of already-serviced files. */
+ struct bftw_list ready;
+ /** The current size of the queue. */
+ size_t size;
+ /** The number of files currently in-service. */
+ size_t ioqueued;
+ /** Tracks the imbalance between synchronous and async service. */
+ unsigned long imbalance;
+};
+
+/** Initialize a queue. */
+static void bftw_queue_init(struct bftw_queue *queue, enum bftw_qflags flags) {
+ queue->flags = flags;
+ SLIST_INIT(&queue->buffer);
+ SLIST_INIT(&queue->waiting);
+ SLIST_INIT(&queue->ready);
+ queue->size = 0;
+ queue->ioqueued = 0;
+ queue->imbalance = 0;
+}
+
+/** Add a file to the queue. */
+static void bftw_queue_push(struct bftw_queue *queue, struct bftw_file *file) {
+ if (queue->flags & BFTW_QBUFFER) {
+ SLIST_APPEND(&queue->buffer, file);
+ } else if (queue->flags & BFTW_QLIFO) {
+ SLIST_PREPEND(&queue->waiting, file);
+ if (queue->flags & BFTW_QORDER) {
+ SLIST_PREPEND(&queue->ready, file, ready);
+ }
+ } else {
+ SLIST_APPEND(&queue->waiting, file);
+ if (queue->flags & BFTW_QORDER) {
+ SLIST_APPEND(&queue->ready, file, ready);
+ }
+ }
+
+ ++queue->size;
+}
+
+/** Add any buffered files to the queue. */
+static void bftw_queue_flush(struct bftw_queue *queue) {
+ if (!(queue->flags & BFTW_QBUFFER)) {
+ return;
+ }
+
+ if (queue->flags & BFTW_QORDER) {
+ // When sorting, add files to the ready list at the same time
+ // (and in the same order) as they are added to the waiting list
+ struct bftw_file **cursor = (queue->flags & BFTW_QLIFO)
+ ? &queue->ready.head
+ : queue->ready.tail;
+ for_slist (struct bftw_file, file, &queue->buffer) {
+ cursor = SLIST_INSERT(&queue->ready, cursor, file, ready);
+ }
+ }
+
+ if (queue->flags & BFTW_QLIFO) {
+ SLIST_EXTEND(&queue->buffer, &queue->waiting);
+ }
+
+ SLIST_EXTEND(&queue->waiting, &queue->buffer);
+}
+
+/** Check if the queue is properly balanced for async work. */
+static bool bftw_queue_balanced(const struct bftw_queue *queue) {
+ if (queue->flags & BFTW_QBALANCE) {
+ return (long)queue->imbalance >= 0;
+ } else {
+ return true;
+ }
+}
+
+/** Update the queue balance for (a)sync service. */
+static void bftw_queue_rebalance(struct bftw_queue *queue, bool async) {
+ if (async) {
+ --queue->imbalance;
+ } else {
+ ++queue->imbalance;
+ }
+}
+
+/** Detatch the next waiting file. */
+static void bftw_queue_detach(struct bftw_queue *queue, struct bftw_file *file, bool async) {
+ bfs_assert(!file->ioqueued);
+
+ if (file == SLIST_HEAD(&queue->buffer)) {
+ // To maintain order, we can't detach any files until they're
+ // added to the waiting/ready lists
+ bfs_assert(!(queue->flags & BFTW_QORDER));
+ SLIST_POP(&queue->buffer);
+ } else if (file == SLIST_HEAD(&queue->waiting)) {
+ SLIST_POP(&queue->waiting);
+ } else {
+ bfs_bug("Detached file was not buffered or waiting");
+ }
+
+ if (async) {
+ file->ioqueued = true;
+ ++queue->ioqueued;
+ bftw_queue_rebalance(queue, true);
+ }
+}
+
+/** Reattach a serviced file to the queue. */
+static void bftw_queue_attach(struct bftw_queue *queue, struct bftw_file *file, bool async) {
+ if (async) {
+ bfs_assert(file->ioqueued);
+ file->ioqueued = false;
+ --queue->ioqueued;
+ } else {
+ bfs_assert(!file->ioqueued);
+ }
+
+ if (!(queue->flags & BFTW_QORDER)) {
+ SLIST_APPEND(&queue->ready, file, ready);
+ }
+}
+
+/** Make a file ready immediately. */
+static void bftw_queue_skip(struct bftw_queue *queue, struct bftw_file *file) {
+ bftw_queue_detach(queue, file, false);
+ bftw_queue_attach(queue, file, false);
+}
+
+/** Get the next waiting file. */
+static struct bftw_file *bftw_queue_waiting(const struct bftw_queue *queue) {
+ if (!(queue->flags & BFTW_QBUFFER)) {
+ return SLIST_HEAD(&queue->waiting);
+ }
+
+ if (queue->flags & BFTW_QORDER) {
+ // Don't detach files until they're on the waiting/ready lists
+ return SLIST_HEAD(&queue->waiting);
+ }
+
+ const struct bftw_list *prefix = &queue->waiting;
+ const struct bftw_list *suffix = &queue->buffer;
+ if (queue->flags & BFTW_QLIFO) {
+ prefix = &queue->buffer;
+ suffix = &queue->waiting;
+ }
+
+ struct bftw_file *file = SLIST_HEAD(prefix);
+ if (!file) {
+ file = SLIST_HEAD(suffix);
+ }
+ return file;
+}
+
+/** Get the next ready file. */
+static struct bftw_file *bftw_queue_ready(const struct bftw_queue *queue) {
+ return SLIST_HEAD(&queue->ready);
+}
+
+/** Pop a file from the queue. */
+static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) {
+ // Don't pop until we've had a chance to sort the buffer
+ bfs_assert(SLIST_EMPTY(&queue->buffer));
+
+ struct bftw_file *file = SLIST_POP(&queue->ready, ready);
+
+ if (!file || file == SLIST_HEAD(&queue->waiting)) {
+ // If no files are ready, try the waiting list. Or, if
+ // BFTW_QORDER is set, we may need to pop from both lists.
+ file = SLIST_POP(&queue->waiting);
+ }
+
+ if (file) {
+ --queue->size;
+ }
+
+ return file;
+}
+
+/**
+ * A cache of open directories.
+ */
+struct bftw_cache {
+ /** The head of the LRU list. */
+ struct bftw_file *head;
+ /** The tail of the LRU list. */
+ struct bftw_file *tail;
+ /** The insertion target for the LRU list. */
+ struct bftw_file *target;
+ /** The remaining capacity of the LRU list. */
+ size_t capacity;
+
+ /** bftw_file arena. */
+ struct varena files;
+
+ /** bfs_dir arena. */
+ struct arena dirs;
+ /** Remaining bfs_dir capacity. */
+ int dir_limit;
+
+ /** bfs_stat arena. */
+ struct arena stat_bufs;
+};
+
+/** Initialize a cache. */
+static void bftw_cache_init(struct bftw_cache *cache, size_t capacity) {
+ LIST_INIT(cache);
+ cache->target = NULL;
+ cache->capacity = capacity;
+
+ VARENA_INIT(&cache->files, struct bftw_file, name);
+
+ bfs_dir_arena(&cache->dirs);
+
+ if (cache->capacity > 1024) {
+ cache->dir_limit = 1024;
+ } else {
+ cache->dir_limit = capacity - 1;
+ }
+
+ ARENA_INIT(&cache->stat_bufs, struct bfs_stat);
+}
+
+/** Allocate a directory. */
+static struct bfs_dir *bftw_allocdir(struct bftw_cache *cache, bool force) {
+ if (!force && cache->dir_limit <= 0) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ struct bfs_dir *dir = arena_alloc(&cache->dirs);
+ if (dir) {
+ --cache->dir_limit;
+ }
+ return dir;
+}
+
+/** Free a directory. */
+static void bftw_freedir(struct bftw_cache *cache, struct bfs_dir *dir) {
+ ++cache->dir_limit;
+ arena_free(&cache->dirs, dir);
+}
+
+/** Remove a bftw_file from the LRU list. */
+static void bftw_lru_remove(struct bftw_cache *cache, struct bftw_file *file) {
+ if (cache->target == file) {
+ cache->target = file->lru.prev;
+ }
+
+ LIST_REMOVE(cache, file, lru);
+}
+
+/** Remove a bftw_file from the cache. */
+static void bftw_cache_remove(struct bftw_cache *cache, struct bftw_file *file) {
+ bftw_lru_remove(cache, file);
+ ++cache->capacity;
+}
+
+/** Close a bftw_file. */
+static void bftw_file_close(struct bftw_cache *cache, struct bftw_file *file) {
+ bfs_assert(file->fd >= 0);
+ bfs_assert(file->pincount == 0);
+
+ if (file->dir) {
+ bfs_closedir(file->dir);
+ bftw_freedir(cache, file->dir);
+ file->dir = NULL;
+ } else {
+ xclose(file->fd);
+ }
+
+ file->fd = -1;
+ bftw_cache_remove(cache, file);
+}
+
+/** Pop the least recently used directory from the cache. */
+static int bftw_cache_pop(struct bftw_cache *cache) {
+ struct bftw_file *file = cache->tail;
+ if (!file) {
+ return -1;
+ }
+
+ bftw_file_close(cache, file);
+ return 0;
+}
+
+/** Add a bftw_file to the LRU list. */
+static void bftw_lru_add(struct bftw_cache *cache, struct bftw_file *file) {
+ bfs_assert(file->fd >= 0);
+
+ LIST_INSERT(cache, cache->target, file, lru);
+
+ // Prefer to keep the root paths open by keeping them at the head of the list
+ if (file->depth == 0) {
+ cache->target = file;
+ }
+}
+
+/** Add a bftw_file to the cache. */
+static int bftw_cache_add(struct bftw_cache *cache, struct bftw_file *file) {
+ bfs_assert(file->fd >= 0);
+
+ if (cache->capacity == 0 && bftw_cache_pop(cache) != 0) {
+ bftw_file_close(cache, file);
+ errno = EMFILE;
+ return -1;
+ }
+
+ bfs_assert(cache->capacity > 0);
+ --cache->capacity;
+
+ bftw_lru_add(cache, file);
+ return 0;
+}
+
+/** Pin a cache entry so it won't be closed. */
+static void bftw_cache_pin(struct bftw_cache *cache, struct bftw_file *file) {
+ bfs_assert(file->fd >= 0);
+
+ if (file->pincount++ == 0) {
+ bftw_lru_remove(cache, file);
+ }
+}
+
+/** Unpin a cache entry. */
+static void bftw_cache_unpin(struct bftw_cache *cache, struct bftw_file *file) {
+ bfs_assert(file->fd >= 0);
+ bfs_assert(file->pincount > 0);
+
+ if (--file->pincount == 0) {
+ bftw_lru_add(cache, file);
+ }
+}
+
+/** Compute the name offset of a child path. */
+static size_t bftw_child_nameoff(const struct bftw_file *parent) {
+ size_t ret = parent->nameoff + parent->namelen;
+ if (parent->name[parent->namelen - 1] != '/') {
+ ++ret;
+ }
+ return ret;
+}
+
+/** Destroy a cache. */
+static void bftw_cache_destroy(struct bftw_cache *cache) {
+ bfs_assert(LIST_EMPTY(cache));
+ bfs_assert(!cache->target);
+
+ arena_destroy(&cache->stat_bufs);
+ arena_destroy(&cache->dirs);
+ varena_destroy(&cache->files);
+}
+
+/** Create a new bftw_file. */
+static struct bftw_file *bftw_file_new(struct bftw_cache *cache, struct bftw_file *parent, const char *name) {
+ size_t namelen = strlen(name);
+ struct bftw_file *file = varena_alloc(&cache->files, namelen + 1);
+ if (!file) {
+ return NULL;
+ }
+
+ file->parent = parent;
+
+ if (parent) {
+ file->root = parent->root;
+ file->depth = parent->depth + 1;
+ file->nameoff = bftw_child_nameoff(parent);
+ ++parent->refcount;
+ } else {
+ file->root = file;
+ file->depth = 0;
+ file->nameoff = 0;
+ }
+
+ SLIST_ITEM_INIT(file);
+ SLIST_ITEM_INIT(file, ready);
+ LIST_ITEM_INIT(file, lru);
+
+ file->refcount = 1;
+ file->pincount = 0;
+ file->fd = -1;
+ file->ioqueued = false;
+ file->dir = NULL;
+
+ file->type = BFS_UNKNOWN;
+ file->dev = -1;
+ file->ino = -1;
+
+ bftw_stat_init(&file->stat_bufs, NULL, NULL);
+
+ file->namelen = namelen;
+ memcpy(file->name, name, namelen + 1);
+
+ return file;
+}
+
+/** Associate an open directory with a bftw_file. */
+static void bftw_file_set_dir(struct bftw_cache *cache, struct bftw_file *file, struct bfs_dir *dir) {
+ bfs_assert(!file->dir);
+ file->dir = dir;
+
+ if (file->fd >= 0) {
+ bfs_assert(file->fd == bfs_dirfd(dir));
+ } else {
+ file->fd = bfs_dirfd(dir);
+ bftw_cache_add(cache, file);
+ }
+}
+
+/** Free a file's cached stat() buffers. */
+static void bftw_stat_recycle(struct bftw_cache *cache, struct bftw_file *file) {
+ struct bftw_stat *bufs = &file->stat_bufs;
+
+ struct bfs_stat *stat_buf = (struct bfs_stat *)bufs->stat_buf;
+ struct bfs_stat *lstat_buf = (struct bfs_stat *)bufs->lstat_buf;
+ if (stat_buf) {
+ arena_free(&cache->stat_bufs, stat_buf);
+ } else if (lstat_buf) {
+ arena_free(&cache->stat_bufs, lstat_buf);
+ }
+
+ bftw_stat_init(bufs, NULL, NULL);
+}
+
+/** Free a bftw_file. */
+static void bftw_file_free(struct bftw_cache *cache, struct bftw_file *file) {
+ bfs_assert(file->refcount == 0);
+
+ if (file->fd >= 0) {
+ bftw_file_close(cache, file);
+ }
+
+ bftw_stat_recycle(cache, file);
+
+ varena_free(&cache->files, file, file->namelen + 1);
+}
+
+/**
+ * Holds the current state of the bftw() traversal.
+ */
+struct bftw_state {
+ /** The path(s) to start from. */
+ const char **paths;
+ /** The number of starting paths. */
+ size_t npaths;
+ /** bftw() callback. */
+ bftw_callback *callback;
+ /** bftw() callback data. */
+ void *ptr;
+ /** bftw() flags. */
+ enum bftw_flags flags;
+ /** Search strategy. */
+ enum bftw_strategy strategy;
+ /** The mount table. */
+ const struct bfs_mtab *mtab;
+ /** bfs_opendir() flags. */
+ enum bfs_dir_flags dir_flags;
+
+ /** The appropriate errno value, if any. */
+ int error;
+
+ /** The cache of open directories. */
+ struct bftw_cache cache;
+
+ /** The async I/O queue. */
+ struct ioq *ioq;
+ /** The number of I/O threads. */
+ size_t nthreads;
+
+ /** The queue of unpinned directories to unwrap. */
+ struct bftw_list to_close;
+ /** The queue of files to visit. */
+ struct bftw_queue fileq;
+ /** The queue of directories to open/read. */
+ struct bftw_queue dirq;
+
+ /** The current path. */
+ dchar *path;
+ /** The current file. */
+ struct bftw_file *file;
+ /** The previous file. */
+ struct bftw_file *previous;
+
+ /** The currently open directory. */
+ struct bfs_dir *dir;
+ /** The current directory entry. */
+ struct bfs_dirent *de;
+ /** Storage for the directory entry. */
+ struct bfs_dirent de_storage;
+ /** Any error encountered while reading the directory. */
+ int direrror;
+
+ /** Extra data about the current file. */
+ struct BFTW ftwbuf;
+ /** stat() buffer storage. */
+ struct bfs_stat stat_buf;
+ /** lstat() buffer storage. */
+ struct bfs_stat lstat_buf;
+};
+
+/** Check if we have to buffer files before visiting them. */
+static bool bftw_must_buffer(const struct bftw_state *state) {
+ if (state->flags & BFTW_SORT) {
+ // Have to buffer the files to sort them
+ return true;
+ }
+
+ if (state->strategy == BFTW_DFS && state->nthreads == 0) {
+ // Without buffering, we would get a not-quite-depth-first
+ // ordering:
+ //
+ // a
+ // b
+ // a/c
+ // a/c/d
+ // b/e
+ // b/e/f
+ //
+ // This is okay for iterative deepening, since the caller only
+ // sees files at the target depth. We also deem it okay for
+ // parallel searches, since the order is unpredictable anyway.
+ return true;
+ }
+
+ if ((state->flags & BFTW_STAT) && state->nthreads > 1) {
+ // We will be buffering every file anyway for ioq_stat()
+ return true;
+ }
+
+ return false;
+}
+
+/** Initialize the bftw() state. */
+static int bftw_state_init(struct bftw_state *state, const struct bftw_args *args) {
+ state->paths = args->paths;
+ state->npaths = args->npaths;
+ state->callback = args->callback;
+ state->ptr = args->ptr;
+ state->flags = args->flags;
+ state->strategy = args->strategy;
+ state->mtab = args->mtab;
+ state->dir_flags = 0;
+ state->error = 0;
+
+ if (args->nopenfd < 2) {
+ errno = EMFILE;
+ return -1;
+ }
+
+ size_t nopenfd = args->nopenfd;
+ size_t qdepth = 4096;
+ size_t nthreads = args->nthreads;
+
+#if BFS_USE_LIBURING
+ // io_uring uses one fd per ring, ioq uses one ring per thread
+ if (nthreads >= nopenfd - 1) {
+ nthreads = nopenfd - 2;
+ }
+ nopenfd -= nthreads;
+#endif
+
+ bftw_cache_init(&state->cache, nopenfd);
+
+ if (nthreads > 0) {
+ state->ioq = ioq_create(qdepth, nthreads);
+ if (!state->ioq) {
+ return -1;
+ }
+ } else {
+ state->ioq = NULL;
+ }
+ state->nthreads = nthreads;
+
+ if (bftw_must_buffer(state)) {
+ state->flags |= BFTW_BUFFER;
+ }
+
+ if (state->flags & BFTW_WHITEOUTS) {
+ state->dir_flags |= BFS_DIR_WHITEOUTS;
+ }
+
+ SLIST_INIT(&state->to_close);
+
+ enum bftw_qflags qflags = 0;
+ if (state->strategy != BFTW_BFS) {
+ qflags |= BFTW_QBUFFER | BFTW_QLIFO;
+ }
+ if (state->flags & BFTW_BUFFER) {
+ qflags |= BFTW_QBUFFER;
+ }
+ if (state->flags & BFTW_SORT) {
+ qflags |= BFTW_QORDER;
+ } else if (nthreads == 1) {
+ qflags |= BFTW_QBALANCE;
+ }
+ bftw_queue_init(&state->fileq, qflags);
+
+ if (state->strategy == BFTW_BFS || (state->flags & BFTW_BUFFER)) {
+ // In breadth-first mode, or if we're already buffering files,
+ // directories can be queued in FIFO order
+ qflags &= ~(BFTW_QBUFFER | BFTW_QLIFO);
+ }
+ bftw_queue_init(&state->dirq, qflags);
+
+ state->path = NULL;
+ state->file = NULL;
+ state->previous = NULL;
+
+ state->dir = NULL;
+ state->de = NULL;
+ state->direrror = 0;
+
+ return 0;
+}
+
+/** Queue a directory for unwrapping. */
+static void bftw_delayed_unwrap(struct bftw_state *state, struct bftw_file *file) {
+ bfs_assert(file->dir);
+
+ if (!SLIST_ATTACHED(&state->to_close, file, ready)) {
+ SLIST_APPEND(&state->to_close, file, ready);
+ }
+}
+
+/** Unpin a file's parent. */
+static void bftw_unpin_parent(struct bftw_state *state, struct bftw_file *file, bool unwrap) {
+ struct bftw_file *parent = file->parent;
+ if (!parent) {
+ return;
+ }
+
+ bftw_cache_unpin(&state->cache, parent);
+
+ if (unwrap && parent->dir && parent->pincount == 0) {
+ bftw_delayed_unwrap(state, parent);
+ }
+}
+
+/** Pop a response from the I/O queue. */
+static int bftw_ioq_pop(struct bftw_state *state, bool block) {
+ struct bftw_cache *cache = &state->cache;
+ struct ioq *ioq = state->ioq;
+ if (!ioq) {
+ return -1;
+ }
+
+ struct ioq_ent *ent = ioq_pop(ioq, block);
+ if (!ent) {
+ return -1;
+ }
+
+ struct bftw_file *file = ent->ptr;
+ if (file) {
+ bftw_unpin_parent(state, file, true);
+ }
+
+ enum ioq_op op = ent->op;
+ switch (op) {
+ case IOQ_CLOSE:
+ ++cache->capacity;
+ break;
+
+ case IOQ_CLOSEDIR:
+ ++cache->capacity;
+ bftw_freedir(cache, ent->closedir.dir);
+ break;
+
+ case IOQ_OPENDIR:
+ ++cache->capacity;
+
+ if (ent->result >= 0) {
+ bftw_file_set_dir(cache, file, ent->opendir.dir);
+ } else {
+ bftw_freedir(cache, ent->opendir.dir);
+ }
+
+ bftw_queue_attach(&state->dirq, file, true);
+ break;
+
+ case IOQ_STAT:
+ if (ent->result >= 0) {
+ bftw_stat_cache(&file->stat_bufs, ent->stat.flags, ent->stat.buf, 0);
+ } else {
+ arena_free(&cache->stat_bufs, ent->stat.buf);
+ bftw_stat_cache(&file->stat_bufs, ent->stat.flags, NULL, -ent->result);
+ }
+
+ bftw_queue_attach(&state->fileq, file, true);
+ break;
+ }
+
+ ioq_free(ioq, ent);
+ return op;
+}
+
+/** Try to reserve space in the I/O queue. */
+static int bftw_ioq_reserve(struct bftw_state *state) {
+ struct ioq *ioq = state->ioq;
+ if (!ioq) {
+ return -1;
+ }
+
+ if (ioq_capacity(ioq) > 0) {
+ return 0;
+ }
+
+ // With more than one background thread, it's faster to wait on
+ // background I/O than it is to do it on the main thread
+ bool block = state->nthreads > 1;
+ if (bftw_ioq_pop(state, block) < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/** Try to reserve space in the cache. */
+static int bftw_cache_reserve(struct bftw_state *state) {
+ struct bftw_cache *cache = &state->cache;
+ if (cache->capacity > 0) {
+ return 0;
+ }
+
+ while (bftw_ioq_pop(state, true) >= 0) {
+ if (cache->capacity > 0) {
+ return 0;
+ }
+ }
+
+ if (bftw_cache_pop(cache) != 0) {
+ errno = EMFILE;
+ return -1;
+ }
+
+ bfs_assert(cache->capacity > 0);
+ return 0;
+}
+
+/** Open a bftw_file relative to another one. */
+static int bftw_file_openat(struct bftw_state *state, struct bftw_file *file, struct bftw_file *base, const char *at_path) {
+ bfs_assert(file->fd < 0);
+
+ struct bftw_cache *cache = &state->cache;
+
+ int at_fd = AT_FDCWD;
+ if (base) {
+ bftw_cache_pin(cache, base);
+ at_fd = base->fd;
+ }
+
+ int fd = -1;
+ if (bftw_cache_reserve(state) != 0) {
+ goto unpin;
+ }
+
+ int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY;
+ fd = openat(at_fd, at_path, flags);
+
+ if (fd < 0 && errno == EMFILE) {
+ if (bftw_cache_pop(cache) == 0) {
+ fd = openat(at_fd, at_path, flags);
+ }
+ cache->capacity = 1;
+ }
+
+unpin:
+ if (base) {
+ bftw_cache_unpin(cache, base);
+ }
+
+ if (fd >= 0) {
+ file->fd = fd;
+ bftw_cache_add(cache, file);
+ }
+
+ return fd;
+}
+
+/** Open a bftw_file. */
+static int bftw_file_open(struct bftw_state *state, struct bftw_file *file, const char *path) {
+ // Find the nearest open ancestor
+ struct bftw_file *base = file;
+ do {
+ base = base->parent;
+ } while (base && base->fd < 0);
+
+ const char *at_path = path;
+ if (base) {
+ at_path += bftw_child_nameoff(base);
+ }
+
+ int fd = bftw_file_openat(state, file, base, at_path);
+ if (fd >= 0 || !errno_is_like(ENAMETOOLONG)) {
+ return fd;
+ }
+
+ // Handle ENAMETOOLONG by manually traversing the path component-by-component
+ struct bftw_list parents;
+ SLIST_INIT(&parents);
+
+ struct bftw_file *cur;
+ for (cur = file; cur != base; cur = cur->parent) {
+ SLIST_PREPEND(&parents, cur);
+ }
+
+ while ((cur = SLIST_POP(&parents))) {
+ if (!cur->parent || cur->parent->fd >= 0) {
+ bftw_file_openat(state, cur, cur->parent, cur->name);
+ }
+ }
+
+ return file->fd;
+}
+
+/** Close a directory, asynchronously if possible. */
+static int bftw_ioq_closedir(struct bftw_state *state, struct bfs_dir *dir) {
+ if (bftw_ioq_reserve(state) == 0) {
+ if (ioq_closedir(state->ioq, dir, NULL) == 0) {
+ return 0;
+ }
+ }
+
+ struct bftw_cache *cache = &state->cache;
+ int ret = bfs_closedir(dir);
+ bftw_freedir(cache, dir);
+ ++cache->capacity;
+ return ret;
+}
+
+/** Close a file descriptor, asynchronously if possible. */
+static int bftw_ioq_close(struct bftw_state *state, int fd) {
+ if (bftw_ioq_reserve(state) == 0) {
+ if (ioq_close(state->ioq, fd, NULL) == 0) {
+ return 0;
+ }
+ }
+
+ struct bftw_cache *cache = &state->cache;
+ int ret = xclose(fd);
+ ++cache->capacity;
+ return ret;
+}
+
+/** Close a file, asynchronously if possible. */
+static int bftw_close(struct bftw_state *state, struct bftw_file *file) {
+ bfs_assert(file->fd >= 0);
+ bfs_assert(file->pincount == 0);
+
+ struct bfs_dir *dir = file->dir;
+ int fd = file->fd;
+
+ bftw_lru_remove(&state->cache, file);
+ file->dir = NULL;
+ file->fd = -1;
+
+ if (dir) {
+ return bftw_ioq_closedir(state, dir);
+ } else {
+ return bftw_ioq_close(state, fd);
+ }
+}
+
+/** Free an open directory. */
+static int bftw_unwrapdir(struct bftw_state *state, struct bftw_file *file) {
+ struct bfs_dir *dir = file->dir;
+ if (!dir) {
+ return 0;
+ }
+
+ struct bftw_cache *cache = &state->cache;
+
+ // Try to keep an open fd if any children exist
+ bool reffed = file->refcount > 1;
+ // Keep the fd the same if it's pinned
+ bool pinned = file->pincount > 0;
+
+#if BFS_USE_UNWRAPDIR
+ if (reffed || pinned) {
+ bfs_unwrapdir(dir);
+ bftw_freedir(cache, dir);
+ file->dir = NULL;
+ return 0;
+ }
+#else
+ if (pinned) {
+ return -1;
+ }
+#endif
+
+ if (!reffed) {
+ return bftw_close(state, file);
+ }
+
+ // Make room for dup()
+ bftw_cache_pin(cache, file);
+ int ret = bftw_cache_reserve(state);
+ bftw_cache_unpin(cache, file);
+ if (ret != 0) {
+ return ret;
+ }
+
+ int fd = dup_cloexec(file->fd);
+ if (fd < 0) {
+ return -1;
+ }
+ --cache->capacity;
+
+ file->dir = NULL;
+ file->fd = fd;
+ return bftw_ioq_closedir(state, dir);
+}
+
+/** Try to pin a file's parent. */
+static int bftw_pin_parent(struct bftw_state *state, struct bftw_file *file) {
+ struct bftw_file *parent = file->parent;
+ if (!parent) {
+ return AT_FDCWD;
+ }
+
+ int fd = parent->fd;
+ if (fd < 0) {
+ bfs_static_assert((int)AT_FDCWD != -1);
+ return -1;
+ }
+
+ bftw_cache_pin(&state->cache, parent);
+ return fd;
+}
+
+/** Open a directory asynchronously. */
+static int bftw_ioq_opendir(struct bftw_state *state, struct bftw_file *file) {
+ struct bftw_cache *cache = &state->cache;
+
+ if (bftw_ioq_reserve(state) != 0) {
+ goto fail;
+ }
+
+ int dfd = bftw_pin_parent(state, file);
+ if (dfd < 0 && dfd != (int)AT_FDCWD) {
+ goto fail;
+ }
+
+ if (bftw_cache_reserve(state) != 0) {
+ goto unpin;
+ }
+
+ struct bfs_dir *dir = bftw_allocdir(cache, false);
+ if (!dir) {
+ goto unpin;
+ }
+
+ if (ioq_opendir(state->ioq, dir, dfd, file->name, state->dir_flags, file) != 0) {
+ goto free;
+ }
+
+ --cache->capacity;
+ return 0;
+
+free:
+ bftw_freedir(cache, dir);
+unpin:
+ bftw_unpin_parent(state, file, false);
+fail:
+ return -1;
+}
+
+/** Open a batch of directories asynchronously. */
+static void bftw_ioq_opendirs(struct bftw_state *state) {
+ while (bftw_queue_balanced(&state->dirq)) {
+ struct bftw_file *dir = bftw_queue_waiting(&state->dirq);
+ if (!dir) {
+ break;
+ }
+
+ if (bftw_ioq_opendir(state, dir) == 0) {
+ bftw_queue_detach(&state->dirq, dir, true);
+ } else {
+ break;
+ }
+ }
+}
+
+/** Push a directory onto the queue. */
+static void bftw_push_dir(struct bftw_state *state, struct bftw_file *file) {
+ bfs_assert(file->type == BFS_DIR);
+ bftw_queue_push(&state->dirq, file);
+ bftw_ioq_opendirs(state);
+}
+
+/** Pop a file from a queue, then activate it. */
+static bool bftw_pop(struct bftw_state *state, struct bftw_queue *queue) {
+ if (queue->size == 0) {
+ return false;
+ }
+
+ while (!bftw_queue_ready(queue) && queue->ioqueued > 0) {
+ bool block = true;
+ if (bftw_queue_waiting(queue) && state->nthreads == 1) {
+ // With only one background thread, balance the work
+ // between it and the main thread
+ block = false;
+ }
+
+ if (bftw_ioq_pop(state, block) < 0) {
+ break;
+ }
+ }
+
+ struct bftw_file *file = bftw_queue_pop(queue);
+ if (!file) {
+ return false;
+ }
+
+ while (file->ioqueued) {
+ bftw_ioq_pop(state, true);
+ }
+
+ state->file = file;
+ return true;
+}
+
+/** Pop a directory to read from the queue. */
+static bool bftw_pop_dir(struct bftw_state *state) {
+ bfs_assert(!state->file);
+
+ if (state->flags & BFTW_SORT) {
+ // Keep strict breadth-first order when sorting
+ if (state->strategy == BFTW_BFS && bftw_queue_ready(&state->fileq)) {
+ return false;
+ }
+ } else if (!bftw_queue_ready(&state->dirq)) {
+ // Don't block if we have files ready to visit
+ if (bftw_queue_ready(&state->fileq)) {
+ return false;
+ }
+ }
+
+ return bftw_pop(state, &state->dirq);
+}
+
+/** Figure out bfs_stat() flags. */
+static enum bfs_stat_flags bftw_stat_flags(const struct bftw_state *state, size_t depth) {
+ enum bftw_flags mask = BFTW_FOLLOW_ALL;
+ if (depth == 0) {
+ mask |= BFTW_FOLLOW_ROOTS;
+ }
+
+ if (state->flags & mask) {
+ return BFS_STAT_TRYFOLLOW;
+ } else {
+ return BFS_STAT_NOFOLLOW;
+ }
+}
+
+/** Check if a stat() call is necessary. */
+static bool bftw_must_stat(const struct bftw_state *state, size_t depth, enum bfs_type type, const char *name) {
+ if (state->flags & BFTW_STAT) {
+ return true;
+ }
+
+ switch (type) {
+ case BFS_UNKNOWN:
+ return true;
+
+ case BFS_DIR:
+ return state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS);
+
+ case BFS_LNK:
+ if (!(bftw_stat_flags(state, depth) & BFS_STAT_NOFOLLOW)) {
+ return true;
+ }
+ fallthru;
+
+ default:
+#if __linux__
+ if (state->mtab && bfs_might_be_mount(state->mtab, name)) {
+ return true;
+ }
+#endif
+ return false;
+ }
+}
+
+/** stat() a file asynchronously. */
+static int bftw_ioq_stat(struct bftw_state *state, struct bftw_file *file) {
+ if (bftw_ioq_reserve(state) != 0) {
+ goto fail;
+ }
+
+ int dfd = bftw_pin_parent(state, file);
+ if (dfd < 0 && dfd != (int)AT_FDCWD) {
+ goto fail;
+ }
+
+ struct bftw_cache *cache = &state->cache;
+ struct bfs_stat *buf = arena_alloc(&cache->stat_bufs);
+ if (!buf) {
+ goto unpin;
+ }
+
+ enum bfs_stat_flags flags = bftw_stat_flags(state, file->depth);
+ if (ioq_stat(state->ioq, dfd, file->name, flags, buf, file) != 0) {
+ goto free;
+ }
+
+ return 0;
+
+free:
+ arena_free(&cache->stat_bufs, buf);
+unpin:
+ bftw_unpin_parent(state, file, false);
+fail:
+ return -1;
+}
+
+/** Check if we should stat() a file asynchronously. */
+static bool bftw_should_ioq_stat(struct bftw_state *state, struct bftw_file *file) {
+ // To avoid surprising users too much, process the roots in order
+ if (file->depth == 0) {
+ return false;
+ }
+
+#ifdef S_IFWHT
+ // ioq_stat() does not do whiteout emulation like bftw_stat_impl()
+ if (file->type == BFS_WHT) {
+ return false;
+ }
+#endif
+
+ return bftw_must_stat(state, file->depth, file->type, file->name);
+}
+
+/** Call stat() on files that need it. */
+static void bftw_stat_files(struct bftw_state *state) {
+ while (true) {
+ struct bftw_file *file = bftw_queue_waiting(&state->fileq);
+ if (!file) {
+ break;
+ }
+
+ if (!bftw_should_ioq_stat(state, file)) {
+ bftw_queue_skip(&state->fileq, file);
+ continue;
+ }
+
+ if (!bftw_queue_balanced(&state->fileq)) {
+ break;
+ }
+
+ if (bftw_ioq_stat(state, file) == 0) {
+ bftw_queue_detach(&state->fileq, file, true);
+ } else {
+ break;
+ }
+ }
+}
+
+/** Push a file onto the queue. */
+static void bftw_push_file(struct bftw_state *state, struct bftw_file *file) {
+ bftw_queue_push(&state->fileq, file);
+ bftw_stat_files(state);
+}
+
+/** Pop a file to visit from the queue. */
+static bool bftw_pop_file(struct bftw_state *state) {
+ bfs_assert(!state->file);
+ return bftw_pop(state, &state->fileq);
+}
+
+/** Build the path to the current file. */
+static int bftw_build_path(struct bftw_state *state, const char *name) {
+ const struct bftw_file *file = state->file;
+
+ size_t pathlen = file ? file->nameoff + file->namelen : 0;
+ if (dstresize(&state->path, pathlen) != 0) {
+ state->error = errno;
+ return -1;
+ }
+
+ // Try to find a common ancestor with the existing path
+ const struct bftw_file *ancestor = state->previous;
+ while (ancestor && ancestor->depth > file->depth) {
+ ancestor = ancestor->parent;
+ }
+
+ // Build the path backwards
+ while (file && file != ancestor) {
+ if (file->nameoff > 0) {
+ state->path[file->nameoff - 1] = '/';
+ }
+ memcpy(state->path + file->nameoff, file->name, file->namelen);
+
+ if (ancestor && ancestor->depth == file->depth) {
+ ancestor = ancestor->parent;
+ }
+ file = file->parent;
+ }
+
+ state->previous = state->file;
+
+ if (name) {
+ if (pathlen > 0 && state->path[pathlen - 1] != '/') {
+ if (dstrapp(&state->path, '/') != 0) {
+ state->error = errno;
+ return -1;
+ }
+ }
+ if (dstrcat(&state->path, name) != 0) {
+ state->error = errno;
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/** Open a bftw_file as a directory. */
+static struct bfs_dir *bftw_file_opendir(struct bftw_state *state, struct bftw_file *file, const char *path) {
+ int fd = bftw_file_open(state, file, path);
+ if (fd < 0) {
+ return NULL;
+ }
+
+ struct bftw_cache *cache = &state->cache;
+ struct bfs_dir *dir = bftw_allocdir(cache, true);
+ if (!dir) {
+ return NULL;
+ }
+
+ if (bfs_opendir(dir, fd, NULL, state->dir_flags) != 0) {
+ bftw_freedir(cache, dir);
+ return NULL;
+ }
+
+ bftw_file_set_dir(cache, file, dir);
+ return dir;
+}
+
+/** Open the current directory. */
+static int bftw_opendir(struct bftw_state *state) {
+ bfs_assert(!state->dir);
+ bfs_assert(!state->de);
+
+ state->direrror = 0;
+
+ struct bftw_file *file = state->file;
+ state->dir = file->dir;
+ if (state->dir) {
+ goto pin;
+ }
+
+ if (bftw_build_path(state, NULL) != 0) {
+ return -1;
+ }
+
+ bftw_queue_rebalance(&state->dirq, false);
+
+ state->dir = bftw_file_opendir(state, file, state->path);
+ if (!state->dir) {
+ state->direrror = errno;
+ return 0;
+ }
+
+pin:
+ bftw_cache_pin(&state->cache, file);
+ return 0;
+}
+
+/** Read an entry from the current directory. */
+static int bftw_readdir(struct bftw_state *state) {
+ if (!state->dir) {
+ return -1;
+ }
+
+ int ret = bfs_readdir(state->dir, &state->de_storage);
+ if (ret > 0) {
+ state->de = &state->de_storage;
+ } else if (ret == 0) {
+ state->de = NULL;
+ } else {
+ state->de = NULL;
+ state->direrror = errno;
+ }
+
+ return ret;
+}
+
+/** Open a file if necessary. */
+static int bftw_ensure_open(struct bftw_state *state, struct bftw_file *file, const char *path) {
+ int ret = file->fd;
+
+ if (ret < 0) {
+ char *copy = strndup(path, file->nameoff + file->namelen);
+ if (!copy) {
+ return -1;
+ }
+
+ ret = bftw_file_open(state, file, copy);
+ free(copy);
+ }
+
+ return ret;
+}
+
+/** Initialize the buffers with data about the current path. */
+static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) {
+ struct bftw_file *file = state->file;
+ const struct bfs_dirent *de = state->de;
+
+ struct BFTW *ftwbuf = &state->ftwbuf;
+ ftwbuf->path = state->path;
+ ftwbuf->root = file ? file->root->name : ftwbuf->path;
+ ftwbuf->depth = 0;
+ ftwbuf->visit = visit;
+ ftwbuf->type = BFS_UNKNOWN;
+ ftwbuf->error = state->direrror;
+ ftwbuf->at_fd = AT_FDCWD;
+ ftwbuf->at_path = ftwbuf->path;
+ bftw_stat_init(&ftwbuf->stat_bufs, &state->stat_buf, &state->lstat_buf);
+
+ struct bftw_file *parent = NULL;
+ if (de) {
+ parent = file;
+ ftwbuf->depth = file->depth + 1;
+ ftwbuf->type = de->type;
+ ftwbuf->nameoff = bftw_child_nameoff(file);
+ } else if (file) {
+ parent = file->parent;
+ ftwbuf->depth = file->depth;
+ ftwbuf->type = file->type;
+ ftwbuf->nameoff = file->nameoff;
+ bftw_stat_fill(&ftwbuf->stat_bufs, &file->stat_bufs);
+ }
+
+ if (parent) {
+ // Try to ensure the immediate parent is open, to avoid ENAMETOOLONG
+ if (bftw_ensure_open(state, parent, state->path) >= 0) {
+ ftwbuf->at_fd = parent->fd;
+ ftwbuf->at_path += ftwbuf->nameoff;
+ } else {
+ ftwbuf->error = errno;
+ }
+ }
+
+ if (ftwbuf->depth == 0) {
+ // Compute the name offset for root paths like "foo/bar"
+ ftwbuf->nameoff = xbaseoff(ftwbuf->path);
+ }
+
+ ftwbuf->stat_flags = bftw_stat_flags(state, ftwbuf->depth);
+
+ if (ftwbuf->error != 0) {
+ ftwbuf->type = BFS_ERROR;
+ return;
+ }
+
+ const struct bfs_stat *statbuf = NULL;
+ if (bftw_must_stat(state, ftwbuf->depth, ftwbuf->type, ftwbuf->path + ftwbuf->nameoff)) {
+ statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (statbuf) {
+ ftwbuf->type = bfs_mode_to_type(statbuf->mode);
+ } else {
+ ftwbuf->type = BFS_ERROR;
+ ftwbuf->error = errno;
+ return;
+ }
+ }
+
+ if (ftwbuf->type == BFS_DIR && (state->flags & BFTW_DETECT_CYCLES)) {
+ for (const struct bftw_file *ancestor = parent; ancestor; ancestor = ancestor->parent) {
+ if (ancestor->dev == statbuf->dev && ancestor->ino == statbuf->ino) {
+ ftwbuf->type = BFS_ERROR;
+ ftwbuf->error = ELOOP;
+ return;
+ }
+ }
+ }
+}
+
+/** Check if the current file is a mount point. */
+static bool bftw_is_mount(struct bftw_state *state, const char *name) {
+ const struct bftw_file *file = state->file;
+ if (!file) {
+ return false;
+ }
+
+ const struct bftw_file *parent = name ? file : file->parent;
+ if (!parent) {
+ return false;
+ }
+
+ const struct BFTW *ftwbuf = &state->ftwbuf;
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ return statbuf && statbuf->dev != parent->dev;
+}
+
+/** Check if bfs_stat() was called from the main thread. */
+static bool bftw_stat_was_sync(const struct bftw_state *state, const struct bfs_stat *buf) {
+ return buf == &state->stat_buf || buf == &state->lstat_buf;
+}
+
+/** Invoke the callback. */
+static enum bftw_action bftw_call_back(struct bftw_state *state, const char *name, enum bftw_visit visit) {
+ if (visit == BFTW_POST && !(state->flags & BFTW_POST_ORDER)) {
+ return BFTW_PRUNE;
+ }
+
+ if (bftw_build_path(state, name) != 0) {
+ return BFTW_STOP;
+ }
+
+ const struct BFTW *ftwbuf = &state->ftwbuf;
+ bftw_init_ftwbuf(state, visit);
+
+ // Never give the callback BFS_ERROR unless BFTW_RECOVER is specified
+ if (ftwbuf->type == BFS_ERROR && !(state->flags & BFTW_RECOVER)) {
+ state->error = ftwbuf->error;
+ return BFTW_STOP;
+ }
+
+ enum bftw_action ret = BFTW_PRUNE;
+ if ((state->flags & BFTW_SKIP_MOUNTS) && bftw_is_mount(state, name)) {
+ goto done;
+ }
+
+ ret = state->callback(ftwbuf, state->ptr);
+ switch (ret) {
+ case BFTW_CONTINUE:
+ if (visit != BFTW_PRE || ftwbuf->type != BFS_DIR) {
+ ret = BFTW_PRUNE;
+ } else if (state->flags & BFTW_PRUNE_MOUNTS) {
+ if (bftw_is_mount(state, name)) {
+ ret = BFTW_PRUNE;
+ }
+ }
+ break;
+
+ case BFTW_PRUNE:
+ case BFTW_STOP:
+ break;
+
+ default:
+ state->error = EINVAL;
+ return BFTW_STOP;
+ }
+
+done:
+ if (state->fileq.flags & BFTW_QBALANCE) {
+ // Detect any main-thread stat() calls to rebalance the queue
+ const struct bfs_stat *buf = bftw_cached_stat(ftwbuf, BFS_STAT_FOLLOW);
+ const struct bfs_stat *lbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW);
+ if (bftw_stat_was_sync(state, buf) || bftw_stat_was_sync(state, lbuf)) {
+ bftw_queue_rebalance(&state->fileq, false);
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * Flags controlling which files get visited when done with a directory.
+ */
+enum bftw_gc_flags {
+ /** Don't visit anything. */
+ BFTW_VISIT_NONE = 0,
+ /** Report directory errors. */
+ BFTW_VISIT_ERROR = 1 << 0,
+ /** Visit the file itself. */
+ BFTW_VISIT_FILE = 1 << 1,
+ /** Visit the file's ancestors. */
+ BFTW_VISIT_PARENTS = 1 << 2,
+ /** Visit both the file and its ancestors. */
+ BFTW_VISIT_ALL = BFTW_VISIT_ERROR | BFTW_VISIT_FILE | BFTW_VISIT_PARENTS,
+};
+
+/** Garbage collect the current file and its parents. */
+static int bftw_gc(struct bftw_state *state, enum bftw_gc_flags flags) {
+ int ret = 0;
+
+ struct bftw_file *file = state->file;
+ if (file) {
+ if (state->dir) {
+ bftw_cache_unpin(&state->cache, file);
+ }
+ if (file->dir) {
+ bftw_delayed_unwrap(state, file);
+ }
+ }
+ state->dir = NULL;
+ state->de = NULL;
+
+ if (state->direrror != 0) {
+ if (flags & BFTW_VISIT_ERROR) {
+ if (bftw_call_back(state, NULL, BFTW_PRE) == BFTW_STOP) {
+ ret = -1;
+ flags = 0;
+ }
+ } else {
+ state->error = state->direrror;
+ }
+ }
+ state->direrror = 0;
+
+ while ((file = SLIST_POP(&state->to_close, ready))) {
+ bftw_unwrapdir(state, file);
+ }
+
+ enum bftw_gc_flags visit = BFTW_VISIT_FILE;
+ while ((file = state->file)) {
+ if (--file->refcount > 0) {
+ state->file = NULL;
+ break;
+ }
+
+ if (flags & visit) {
+ if (bftw_call_back(state, NULL, BFTW_POST) == BFTW_STOP) {
+ ret = -1;
+ flags = 0;
+ }
+ }
+ visit = BFTW_VISIT_PARENTS;
+
+ struct bftw_file *parent = file->parent;
+ if (state->previous == file) {
+ state->previous = parent;
+ }
+ state->file = parent;
+
+ if (file->fd >= 0) {
+ bftw_close(state, file);
+ }
+ bftw_file_free(&state->cache, file);
+ }
+
+ return ret;
+}
+
+/** Sort a bftw_list by filename. */
+static void bftw_list_sort(struct bftw_list *list) {
+ if (!list->head || !list->head->next) {
+ return;
+ }
+
+ struct bftw_list left, right;
+ SLIST_INIT(&left);
+ SLIST_INIT(&right);
+
+ // Split
+ for (struct bftw_file *hare = list->head; hare && (hare = hare->next); hare = hare->next) {
+ struct bftw_file *tortoise = SLIST_POP(list);
+ SLIST_APPEND(&left, tortoise);
+ }
+ SLIST_EXTEND(&right, list);
+
+ // Recurse
+ bftw_list_sort(&left);
+ bftw_list_sort(&right);
+
+ // Merge
+ while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) {
+ struct bftw_file *lf = left.head;
+ struct bftw_file *rf = right.head;
+
+ if (strcoll(lf->name, rf->name) <= 0) {
+ SLIST_POP(&left);
+ SLIST_APPEND(list, lf);
+ } else {
+ SLIST_POP(&right);
+ SLIST_APPEND(list, rf);
+ }
+ }
+ SLIST_EXTEND(list, &left);
+ SLIST_EXTEND(list, &right);
+}
+
+/** Flush all the queue buffers. */
+static void bftw_flush(struct bftw_state *state) {
+ if (state->flags & BFTW_SORT) {
+ bftw_list_sort(&state->fileq.buffer);
+ }
+ bftw_queue_flush(&state->fileq);
+ bftw_stat_files(state);
+
+ bftw_queue_flush(&state->dirq);
+ bftw_ioq_opendirs(state);
+}
+
+/** Close the current directory. */
+static int bftw_closedir(struct bftw_state *state) {
+ if (bftw_gc(state, BFTW_VISIT_ALL) != 0) {
+ return -1;
+ }
+
+ bftw_flush(state);
+ return 0;
+}
+
+/** Fill file identity information from an ftwbuf. */
+static void bftw_save_ftwbuf(struct bftw_file *file, const struct BFTW *ftwbuf) {
+ file->type = ftwbuf->type;
+
+ const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, ftwbuf->stat_flags);
+ if (statbuf) {
+ file->dev = statbuf->dev;
+ file->ino = statbuf->ino;
+ }
+}
+
+/** Check if we should buffer a file instead of visiting it. */
+static bool bftw_buffer_file(const struct bftw_state *state, const struct bftw_file *file, const char *name) {
+ if (!name) {
+ // Already buffered
+ return false;
+ }
+
+ if (state->flags & BFTW_BUFFER) {
+ return true;
+ }
+
+ // If we need to call stat(), and can do it async, buffer this file
+ if (!state->ioq) {
+ return false;
+ }
+
+ if (!bftw_queue_balanced(&state->fileq)) {
+ // stat() would run synchronously anyway
+ return false;
+ }
+
+ size_t depth = file ? file->depth + 1 : 1;
+ enum bfs_type type = state->de ? state->de->type : BFS_UNKNOWN;
+ return bftw_must_stat(state, depth, type, name);
+}
+
+/** Visit and/or enqueue the current file. */
+static int bftw_visit(struct bftw_state *state, const char *name) {
+ struct bftw_cache *cache = &state->cache;
+ struct bftw_file *file = state->file;
+
+ if (bftw_buffer_file(state, file, name)) {
+ file = bftw_file_new(cache, file, name);
+ if (!file) {
+ state->error = errno;
+ return -1;
+ }
+
+ if (state->de) {
+ file->type = state->de->type;
+ }
+
+ bftw_push_file(state, file);
+ return 0;
+ }
+
+ switch (bftw_call_back(state, name, BFTW_PRE)) {
+ case BFTW_CONTINUE:
+ if (name) {
+ file = bftw_file_new(cache, state->file, name);
+ } else {
+ state->file = NULL;
+ }
+ if (!file) {
+ state->error = errno;
+ return -1;
+ }
+
+ bftw_save_ftwbuf(file, &state->ftwbuf);
+ bftw_stat_recycle(cache, file);
+ bftw_push_dir(state, file);
+ return 0;
+
+ case BFTW_PRUNE:
+ if (file && !name) {
+ return bftw_gc(state, BFTW_VISIT_PARENTS);
+ } else {
+ return 0;
+ }
+
+ default:
+ if (file && !name) {
+ bftw_gc(state, BFTW_VISIT_NONE);
+ }
+ return -1;
+ }
+}
+
+/** Drain a bftw_queue. */
+static void bftw_drain(struct bftw_state *state, struct bftw_queue *queue) {
+ bftw_queue_flush(queue);
+
+ while (bftw_pop(state, queue)) {
+ bftw_gc(state, BFTW_VISIT_NONE);
+ }
+}
+
+/**
+ * Dispose of the bftw() state.
+ *
+ * @return
+ * The bftw() return value.
+ */
+static int bftw_state_destroy(struct bftw_state *state) {
+ dstrfree(state->path);
+
+ struct ioq *ioq = state->ioq;
+ if (ioq) {
+ ioq_cancel(ioq);
+ while (bftw_ioq_pop(state, true) >= 0);
+ state->ioq = NULL;
+ }
+
+ bftw_gc(state, BFTW_VISIT_NONE);
+ bftw_drain(state, &state->dirq);
+ bftw_drain(state, &state->fileq);
+
+ ioq_destroy(ioq);
+
+ bftw_cache_destroy(&state->cache);
+
+ errno = state->error;
+ return state->error ? -1 : 0;
+}
+
+/**
+ * Shared implementation for all search strategies.
+ */
+static int bftw_impl(struct bftw_state *state) {
+ for (size_t i = 0; i < state->npaths; ++i) {
+ if (bftw_visit(state, state->paths[i]) != 0) {
+ return -1;
+ }
+ }
+ bftw_flush(state);
+
+ while (true) {
+ while (bftw_pop_dir(state)) {
+ if (bftw_opendir(state) != 0) {
+ return -1;
+ }
+ while (bftw_readdir(state) > 0) {
+ if (bftw_visit(state, state->de->name) != 0) {
+ return -1;
+ }
+ }
+ if (bftw_closedir(state) != 0) {
+ return -1;
+ }
+ }
+
+ if (!bftw_pop_file(state)) {
+ break;
+ }
+ if (bftw_visit(state, NULL) != 0) {
+ return -1;
+ }
+ bftw_flush(state);
+ }
+
+ return 0;
+}
+
+/**
+ * bftw() implementation for simple breadth-/depth-first search.
+ */
+static int bftw_walk(const struct bftw_args *args) {
+ struct bftw_state state;
+ if (bftw_state_init(&state, args) != 0) {
+ return -1;
+ }
+
+ bftw_impl(&state);
+ return bftw_state_destroy(&state);
+}
+
+/**
+ * Iterative deepening search state.
+ */
+struct bftw_ids_state {
+ /** Nested walk state. */
+ struct bftw_state nested;
+ /** The wrapped callback. */
+ bftw_callback *delegate;
+ /** The wrapped callback arguments. */
+ void *ptr;
+ /** Which visit this search corresponds to. */
+ enum bftw_visit visit;
+ /** Whether to override the bftw_visit. */
+ bool force_visit;
+ /** The current minimum depth (inclusive). */
+ size_t min_depth;
+ /** The current maximum depth (exclusive). */
+ size_t max_depth;
+ /** The set of pruned paths. */
+ struct trie pruned;
+ /** Whether the bottom has been found. */
+ bool bottom;
+};
+
+/** Iterative deepening callback function. */
+static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr) {
+ struct bftw_ids_state *state = ptr;
+
+ if (state->force_visit) {
+ struct BFTW *mutbuf = (struct BFTW *)ftwbuf;
+ mutbuf->visit = state->visit;
+ }
+
+ if (ftwbuf->type == BFS_ERROR) {
+ if (ftwbuf->depth + 1 >= state->min_depth) {
+ return state->delegate(ftwbuf, state->ptr);
+ } else {
+ return BFTW_PRUNE;
+ }
+ }
+
+ if (ftwbuf->depth < state->min_depth) {
+ if (trie_find_str(&state->pruned, ftwbuf->path)) {
+ return BFTW_PRUNE;
+ } else {
+ return BFTW_CONTINUE;
+ }
+ } else if (state->visit == BFTW_POST) {
+ if (trie_find_str(&state->pruned, ftwbuf->path)) {
+ return BFTW_PRUNE;
+ }
+ }
+
+ enum bftw_action ret = BFTW_CONTINUE;
+ if (ftwbuf->visit == state->visit) {
+ ret = state->delegate(ftwbuf, state->ptr);
+ }
+
+ switch (ret) {
+ case BFTW_CONTINUE:
+ if (ftwbuf->type == BFS_DIR && ftwbuf->depth + 1 >= state->max_depth) {
+ state->bottom = false;
+ ret = BFTW_PRUNE;
+ }
+ break;
+
+ case BFTW_PRUNE:
+ if (ftwbuf->type == BFS_DIR) {
+ if (!trie_insert_str(&state->pruned, ftwbuf->path)) {
+ state->nested.error = errno;
+ ret = BFTW_STOP;
+ }
+ }
+ break;
+
+ case BFTW_STOP:
+ break;
+ }
+
+ return ret;
+}
+
+/** Initialize iterative deepening state. */
+static int bftw_ids_init(struct bftw_ids_state *state, const struct bftw_args *args) {
+ state->delegate = args->callback;
+ state->ptr = args->ptr;
+ state->visit = BFTW_PRE;
+ state->force_visit = false;
+ state->min_depth = 0;
+ state->max_depth = 1;
+ trie_init(&state->pruned);
+ state->bottom = false;
+
+ struct bftw_args ids_args = *args;
+ ids_args.callback = bftw_ids_callback;
+ ids_args.ptr = state;
+ ids_args.flags &= ~BFTW_POST_ORDER;
+ return bftw_state_init(&state->nested, &ids_args);
+}
+
+/** Finish an iterative deepening search. */
+static int bftw_ids_destroy(struct bftw_ids_state *state) {
+ trie_destroy(&state->pruned);
+ return bftw_state_destroy(&state->nested);
+}
+
+/**
+ * Iterative deepening bftw() wrapper.
+ */
+static int bftw_ids(const struct bftw_args *args) {
+ struct bftw_ids_state state;
+ if (bftw_ids_init(&state, args) != 0) {
+ return -1;
+ }
+
+ while (!state.bottom) {
+ state.bottom = true;
+
+ if (bftw_impl(&state.nested) != 0) {
+ goto done;
+ }
+
+ ++state.min_depth;
+ ++state.max_depth;
+ }
+
+ if (args->flags & BFTW_POST_ORDER) {
+ state.visit = BFTW_POST;
+ state.force_visit = true;
+
+ while (state.min_depth > 0) {
+ --state.max_depth;
+ --state.min_depth;
+
+ if (bftw_impl(&state.nested) != 0) {
+ goto done;
+ }
+ }
+ }
+
+done:
+ return bftw_ids_destroy(&state);
+}
+
+/**
+ * Exponential deepening bftw() wrapper.
+ */
+static int bftw_eds(const struct bftw_args *args) {
+ struct bftw_ids_state state;
+ if (bftw_ids_init(&state, args) != 0) {
+ return -1;
+ }
+
+ while (!state.bottom) {
+ state.bottom = true;
+
+ if (bftw_impl(&state.nested) != 0) {
+ goto done;
+ }
+
+ state.min_depth = state.max_depth;
+ state.max_depth *= 2;
+ }
+
+ if (args->flags & BFTW_POST_ORDER) {
+ state.visit = BFTW_POST;
+ state.min_depth = 0;
+ state.nested.flags |= BFTW_POST_ORDER;
+
+ bftw_impl(&state.nested);
+ }
+
+done:
+ return bftw_ids_destroy(&state);
+}
+
+int bftw(const struct bftw_args *args) {
+ switch (args->strategy) {
+ case BFTW_BFS:
+ case BFTW_DFS:
+ return bftw_walk(args);
+ case BFTW_IDS:
+ return bftw_ids(args);
+ case BFTW_EDS:
+ return bftw_eds(args);
+ }
+
+ errno = EINVAL;
+ return -1;
+}
diff --git a/src/bftw.h b/src/bftw.h
new file mode 100644
index 0000000..8656ca7
--- /dev/null
+++ b/src/bftw.h
@@ -0,0 +1,218 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * A file-walking API based on nftw().
+ */
+
+#ifndef BFS_BFTW_H
+#define BFS_BFTW_H
+
+#include "dir.h"
+#include "stat.h"
+#include <stddef.h>
+
+/**
+ * Possible visit occurrences.
+ */
+enum bftw_visit {
+ /** Pre-order visit. */
+ BFTW_PRE,
+ /** Post-order visit. */
+ BFTW_POST,
+};
+
+/**
+ * Cached bfs_stat() info for a file.
+ */
+struct bftw_stat {
+ /** The bfs_stat(BFS_STAT_FOLLOW) buffer. */
+ const struct bfs_stat *stat_buf;
+ /** The bfs_stat(BFS_STAT_NOFOLLOW) buffer. */
+ const struct bfs_stat *lstat_buf;
+ /** The cached bfs_stat(BFS_STAT_FOLLOW) error. */
+ int stat_err;
+ /** The cached bfs_stat(BFS_STAT_NOFOLLOW) error. */
+ int lstat_err;
+};
+
+/**
+ * Data about the current file for the bftw() callback.
+ */
+struct BFTW {
+ /** The path to the file. */
+ const char *path;
+ /** The string offset of the filename. */
+ size_t nameoff;
+
+ /** The root path passed to bftw(). */
+ const char *root;
+ /** The depth of this file in the traversal. */
+ size_t depth;
+ /** Which visit this is. */
+ enum bftw_visit visit;
+
+ /** The file type. */
+ enum bfs_type type;
+ /** The errno that occurred, if type == BFS_ERROR. */
+ int error;
+
+ /** A parent file descriptor for the *at() family of calls. */
+ int at_fd;
+ /** The path relative to at_fd for the *at() family of calls. */
+ const char *at_path;
+
+ /** Flags for bfs_stat(). */
+ enum bfs_stat_flags stat_flags;
+ /** Cached bfs_stat() info. */
+ struct bftw_stat stat_bufs;
+};
+
+/**
+ * Get bfs_stat() info for a file encountered during bftw(), caching the result
+ * whenever possible.
+ *
+ * @param ftwbuf
+ * bftw() data for the file to stat.
+ * @param flags
+ * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags.
+ * @return
+ * A pointer to a bfs_stat() buffer, or NULL if the call failed.
+ */
+const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags);
+
+/**
+ * Get bfs_stat() info for a file encountered during bftw(), if it has already
+ * been cached.
+ *
+ * @param ftwbuf
+ * bftw() data for the file to stat.
+ * @param flags
+ * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags.
+ * @return
+ * A pointer to a bfs_stat() buffer, or NULL if no stat info is cached.
+ */
+const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags);
+
+/**
+ * Get the type of a file encountered during bftw(), with flags controlling
+ * whether to follow links. This function will avoid calling bfs_stat() if
+ * possible.
+ *
+ * @param ftwbuf
+ * bftw() data for the file to check.
+ * @param flags
+ * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags.
+ * @return
+ * The type of the file, or BFS_ERROR if an error occurred.
+ */
+enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags);
+
+/**
+ * Walk actions returned by the bftw() callback.
+ */
+enum bftw_action {
+ /** Keep walking. */
+ BFTW_CONTINUE,
+ /** Skip this path's children. */
+ BFTW_PRUNE,
+ /** Stop walking. */
+ BFTW_STOP,
+};
+
+/**
+ * Callback function type for bftw().
+ *
+ * @param ftwbuf
+ * Data about the current file.
+ * @param ptr
+ * The pointer passed to bftw().
+ * @return
+ * An action value.
+ */
+typedef enum bftw_action bftw_callback(const struct BFTW *ftwbuf, void *ptr);
+
+/**
+ * Flags that control bftw() behavior.
+ */
+enum bftw_flags {
+ /** stat() each encountered file. */
+ BFTW_STAT = 1 << 0,
+ /** Attempt to recover from encountered errors. */
+ BFTW_RECOVER = 1 << 1,
+ /** Visit directories in post-order as well as pre-order. */
+ BFTW_POST_ORDER = 1 << 2,
+ /** If the initial path is a symbolic link, follow it. */
+ BFTW_FOLLOW_ROOTS = 1 << 3,
+ /** Follow all symbolic links. */
+ BFTW_FOLLOW_ALL = 1 << 4,
+ /** Detect directory cycles. */
+ BFTW_DETECT_CYCLES = 1 << 5,
+ /** Skip mount points and their descendents. */
+ BFTW_SKIP_MOUNTS = 1 << 6,
+ /** Skip the descendents of mount points. */
+ BFTW_PRUNE_MOUNTS = 1 << 7,
+ /** Sort directory entries before processing them. */
+ BFTW_SORT = 1 << 8,
+ /** Read each directory into memory before processing its children. */
+ BFTW_BUFFER = 1 << 9,
+ /** Include whiteouts in the search results. */
+ BFTW_WHITEOUTS = 1 << 10,
+};
+
+/**
+ * Tree search strategies for bftw().
+ */
+enum bftw_strategy {
+ /** Breadth-first search. */
+ BFTW_BFS,
+ /** Depth-first search. */
+ BFTW_DFS,
+ /** Iterative deepening search. */
+ BFTW_IDS,
+ /** Exponential deepening search. */
+ BFTW_EDS,
+};
+
+/**
+ * Structure for holding the arguments passed to bftw().
+ */
+struct bftw_args {
+ /** The path(s) to start from. */
+ const char **paths;
+ /** The number of starting paths. */
+ size_t npaths;
+
+ /** The callback to invoke. */
+ bftw_callback *callback;
+ /** A pointer which is passed to the callback. */
+ void *ptr;
+
+ /** The maximum number of file descriptors to keep open. */
+ int nopenfd;
+ /** The maximum number of threads to use. */
+ int nthreads;
+
+ /** Flags that control bftw() behaviour. */
+ enum bftw_flags flags;
+ /** The search strategy to use. */
+ enum bftw_strategy strategy;
+
+ /** The parsed mount table, if available. */
+ const struct bfs_mtab *mtab;
+};
+
+/**
+ * Breadth First Tree Walk (or Better File Tree Walk).
+ *
+ * Like ftw(3) and nftw(3), this function walks a directory tree recursively,
+ * and invokes a callback for each path it encounters.
+ *
+ * @param args
+ * The arguments that control the walk.
+ * @return
+ * 0 on success, or -1 on failure.
+ */
+int bftw(const struct bftw_args *args);
+
+#endif // BFS_BFTW_H
diff --git a/src/bit.h b/src/bit.h
new file mode 100644
index 0000000..17cfbcf
--- /dev/null
+++ b/src/bit.h
@@ -0,0 +1,401 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Bits & bytes.
+ */
+
+#ifndef BFS_BIT_H
+#define BFS_BIT_H
+
+#include "prelude.h"
+#include <limits.h>
+#include <stdint.h>
+
+#if __STDC_VERSION__ >= C23
+# include <stdbit.h>
+#endif
+
+// C23 polyfill: _WIDTH macros
+
+// The U*_MAX macros are of the form 2**n - 1, and we want to extract the n.
+// One way would be *_WIDTH = popcount(*_MAX). Alternatively, we can use
+// Hallvard B. Furuseth's technique from [1], which is shorter.
+//
+// [1]: https://groups.google.com/g/comp.lang.c/c/NfedEFBFJ0k
+
+// Let mask be of the form 2**m - 1, e.g. 0b111, and let n range over
+// [0b0, 0b1, 0b11, 0b111, 0b1111, ...]. Then we have
+//
+// n % 0b111
+// == [0b0, 0b1, 0b11, 0b0, 0b1, 0b11, ...]
+// n / (n % 0b111 + 1)
+// == [0b0 (x3), 0b111 (x3), 0b111111 (x3), ...]
+// n / (n % 0b111 + 1) / 0b111
+// == [0b0 (x3), 0b1 (x3), 0b1001 (x3), 0b1001001 (x3), ...]
+// n / (n % 0b111 + 1) / 0b111 % 0b111
+// == [0 (x3), 1 (x3), 2 (x3), ...]
+// == UMAX_CHUNK(n, 0b111)
+#define UMAX_CHUNK(n, mask) (n / (n % mask + 1) / mask % mask)
+
+// 8 * UMAX_CHUNK(n, 255) gives [0 (x8), 8 (x8), 16 (x8), ...]. To that we add
+// [0, 1, 2, ..., 6, 7, 0, 1, ...], which we get from a linear interpolation on
+// n % 255:
+//
+// n % 255
+// == [0, 1, 3, 7, 15, 31, 63, 127, 0, ...]
+// 86 / (n % 255 + 12)
+// == [7, 6, 5, 4, 3, 2, 1, 0, 7, ...]
+#define UMAX_INTERP(n) (7 - 86 / (n % 255 + 12))
+
+#define UMAX_WIDTH(n) (8 * UMAX_CHUNK(n, 255) + UMAX_INTERP(n))
+
+#ifndef CHAR_WIDTH
+# define CHAR_WIDTH CHAR_BIT
+#endif
+
+// See https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html
+
+#ifndef USHRT_WIDTH
+# ifdef __SHRT_WIDTH__
+# define USHRT_WIDTH __SHRT_WIDTH__
+# else
+# define USHRT_WIDTH UMAX_WIDTH(USHRT_MAX)
+# endif
+#endif
+
+#ifndef UINT_WIDTH
+# ifdef __INT_WIDTH__
+# define UINT_WIDTH __INT_WIDTH__
+# else
+# define UINT_WIDTH UMAX_WIDTH(UINT_MAX)
+# endif
+#endif
+
+#ifndef ULONG_WIDTH
+# ifdef __LONG_WIDTH__
+# define ULONG_WIDTH __LONG_WIDTH__
+# else
+# define ULONG_WIDTH UMAX_WIDTH(ULONG_MAX)
+# endif
+#endif
+
+#ifndef ULLONG_WIDTH
+# ifdef __LONG_LONG_WIDTH__
+# define ULLONG_WIDTH __LONG_LONG_WIDTH__
+# elif defined(__LLONG_WIDTH__) // Clang
+# define ULLONG_WIDTH __LLONG_WIDTH__
+# else
+# define ULLONG_WIDTH UMAX_WIDTH(ULLONG_MAX)
+# endif
+#endif
+
+#ifndef SIZE_WIDTH
+# ifdef __SIZE_WIDTH__
+# define SIZE_WIDTH __SIZE_WIDTH__
+# else
+# define SIZE_WIDTH UMAX_WIDTH(SIZE_MAX)
+# endif
+#endif
+
+#ifndef PTRDIFF_WIDTH
+# ifdef __PTRDIFF_WIDTH__
+# define PTRDIFF_WIDTH __PTRDIFF_WIDTH__
+# else
+# define PTRDIFF_WIDTH UMAX_WIDTH(PTRDIFF_MAX)
+# endif
+#endif
+
+#ifndef UINTPTR_WIDTH
+# ifdef __INTPTR_WIDTH__
+# define UINTPTR_WIDTH __INTPTR_WIDTH__
+# else
+# define UINTPTR_WIDTH UMAX_WIDTH(UINTPTR_MAX)
+# endif
+#endif
+
+#ifndef UINTMAX_WIDTH
+# ifdef __INTMAX_WIDTH__
+# define UINTMAX_WIDTH __INTMAX_WIDTH__
+# else
+# define UINTMAX_WIDTH UMAX_WIDTH(UINTMAX_MAX)
+# endif
+#endif
+
+#ifndef UCHAR_WIDTH
+# define UCHAR_WIDTH CHAR_WIDTH
+#endif
+#ifndef SCHAR_WIDTH
+# define SCHAR_WIDTH CHAR_WIDTH
+#endif
+#ifndef SHRT_WIDTH
+# define SHRT_WIDTH USHRT_WIDTH
+#endif
+#ifndef INT_WIDTH
+# define INT_WIDTH UINT_WIDTH
+#endif
+#ifndef LONG_WIDTH
+# define LONG_WIDTH ULONG_WIDTH
+#endif
+#ifndef LLONG_WIDTH
+# define LLONG_WIDTH ULLONG_WIDTH
+#endif
+#ifndef INTPTR_WIDTH
+# define INTPTR_WIDTH UINTPTR_WIDTH
+#endif
+#ifndef INTMAX_WIDTH
+# define INTMAX_WIDTH UINTMAX_WIDTH
+#endif
+
+// C23 polyfill: byte order
+
+#ifdef __STDC_ENDIAN_LITTLE__
+# define ENDIAN_LITTLE __STDC_ENDIAN_LITTLE__
+#elif defined(__ORDER_LITTLE_ENDIAN__)
+# define ENDIAN_LITTLE __ORDER_LITTLE_ENDIAN__
+#else
+# define ENDIAN_LITTLE 1234
+#endif
+
+#ifdef __STDC_ENDIAN_BIG__
+# define ENDIAN_BIG __STDC_ENDIAN_BIG__
+#elif defined(__ORDER_BIG_ENDIAN__)
+# define ENDIAN_BIG __ORDER_BIG_ENDIAN__
+#else
+# define ENDIAN_BIG 4321
+#endif
+
+#ifdef __STDC_ENDIAN_NATIVE__
+# define ENDIAN_NATIVE __STDC_ENDIAN_NATIVE__
+#elif defined(__BYTE_ORDER__)
+# define ENDIAN_NATIVE __BYTE_ORDER__
+#else
+# define ENDIAN_NATIVE 0
+#endif
+
+#if __STDC_VERSION__ >= C23
+# define bswap_u16 stdc_memreverse8u16
+# define bswap_u32 stdc_memreverse8u32
+# define bswap_u64 stdc_memreverse8u64
+#elif __GNUC__
+# define bswap_u16 __builtin_bswap16
+# define bswap_u32 __builtin_bswap32
+# define bswap_u64 __builtin_bswap64
+#else
+
+static inline uint16_t bswap_u16(uint16_t n) {
+ return (n << 8) | (n >> 8);
+}
+
+static inline uint32_t bswap_u32(uint32_t n) {
+ return ((uint32_t)bswap_u16(n) << 16) | bswap_u16(n >> 16);
+}
+
+static inline uint64_t bswap_u64(uint64_t n) {
+ return ((uint64_t)bswap_u32(n) << 32) | bswap_u32(n >> 32);
+}
+
+#endif
+
+static inline uint8_t bswap_u8(uint8_t n) {
+ return n;
+}
+
+/**
+ * Reverse the byte order of an integer.
+ */
+#define bswap(n) \
+ _Generic((n), \
+ uint8_t: bswap_u8, \
+ uint16_t: bswap_u16, \
+ uint32_t: bswap_u32, \
+ uint64_t: bswap_u64)(n)
+
+// Define an overload for each unsigned type
+#define UINT_OVERLOADS(macro) \
+ macro(unsigned char, _uc, UCHAR_WIDTH) \
+ macro(unsigned short, _us, USHRT_WIDTH) \
+ macro(unsigned int, _ui, UINT_WIDTH) \
+ macro(unsigned long, _ul, ULONG_WIDTH) \
+ macro(unsigned long long, _ull, ULLONG_WIDTH)
+
+// Select an overload based on an unsigned integer type
+#define UINT_SELECT(n, name) \
+ _Generic((n), \
+ char: name##_uc, \
+ signed char: name##_uc, \
+ unsigned char: name##_uc, \
+ signed short: name##_us, \
+ unsigned short: name##_us, \
+ signed int: name##_ui, \
+ unsigned int: name##_ui, \
+ signed long: name##_ul, \
+ unsigned long: name##_ul, \
+ signed long long: name##_ull, \
+ unsigned long long: name##_ull)
+
+// C23 polyfill: bit utilities
+
+#if __STDC_VERSION__ >= C23
+# define count_ones stdc_count_ones
+# define count_zeros stdc_count_zeros
+# define rotate_left stdc_rotate_left
+# define rotate_right stdc_rotate_right
+# define leading_zeros stdc_leading_zeros
+# define leading_ones stdc_leading_ones
+# define trailing_zeros stdc_trailing_zeros
+# define trailing_ones stdc_trailing_ones
+# define first_leading_zero stdc_first_leading_zero
+# define first_leading_one stdc_first_leading_one
+# define first_trailing_zero stdc_first_trailing_zero
+# define first_trailing_one stdc_first_trailing_one
+# define has_single_bit stdc_has_single_bit
+# define bit_width stdc_bit_width
+# define bit_ceil stdc_bit_ceil
+# define bit_floor stdc_bit_floor
+#else
+
+#if __GNUC__
+
+// GCC provides builtins for unsigned {int,long,long long}, so promote char/short
+#define UINT_BUILTIN_uc(name) __builtin_##name
+#define UINT_BUILTIN_us(name) __builtin_##name
+#define UINT_BUILTIN_ui(name) __builtin_##name
+#define UINT_BUILTIN_ul(name) __builtin_##name##l
+#define UINT_BUILTIN_ull(name) __builtin_##name##ll
+#define UINT_BUILTIN(name, suffix) UINT_BUILTIN##suffix(name)
+
+#define BUILTIN_WIDTH_uc UINT_WIDTH
+#define BUILTIN_WIDTH_us UINT_WIDTH
+#define BUILTIN_WIDTH_ui UINT_WIDTH
+#define BUILTIN_WIDTH_ul ULONG_WIDTH
+#define BUILTIN_WIDTH_ull ULLONG_WIDTH
+#define BUILTIN_WIDTH(suffix) BUILTIN_WIDTH##suffix
+
+#define COUNT_ONES(type, suffix, width) \
+ static inline int count_ones##suffix(type n) { \
+ return UINT_BUILTIN(popcount, suffix)(n); \
+ }
+
+#define LEADING_ZEROS(type, suffix, width) \
+ static inline int leading_zeros##suffix(type n) { \
+ return n \
+ ? UINT_BUILTIN(clz, suffix)(n) - (BUILTIN_WIDTH(suffix) - width) \
+ : width; \
+ }
+
+#define TRAILING_ZEROS(type, suffix, width) \
+ static inline int trailing_zeros##suffix(type n) { \
+ return n ? UINT_BUILTIN(ctz, suffix)(n) : (int)width; \
+ }
+
+#define FIRST_TRAILING_ONE(type, suffix, width) \
+ static inline int first_trailing_one##suffix(type n) { \
+ return UINT_BUILTIN(ffs, suffix)(n); \
+ }
+
+#else // !__GNUC__
+
+#define COUNT_ONES(type, suffix, width) \
+ static inline int count_ones##suffix(type n) { \
+ int ret; \
+ for (ret = 0; n; ++ret) { \
+ n &= n - 1; \
+ } \
+ return ret; \
+ }
+
+#define LEADING_ZEROS(type, suffix, width) \
+ static inline int leading_zeros##suffix(type n) { \
+ type bit = (type)1 << (width - 1); \
+ int ret; \
+ for (ret = 0; bit && !(n & bit); ++ret, bit >>= 1); \
+ return ret; \
+ }
+
+#define TRAILING_ZEROS(type, suffix, width) \
+ static inline int trailing_zeros##suffix(type n) { \
+ type bit = 1; \
+ int ret; \
+ for (ret = 0; bit && !(n & bit); ++ret, bit <<= 1); \
+ return ret; \
+ }
+
+#define FIRST_TRAILING_ONE(type, suffix, width) \
+ static inline int first_trailing_one##suffix(type n) { \
+ return n ? trailing_zeros##suffix(n) + 1 : 0; \
+ }
+
+#endif // !__GNUC__
+
+UINT_OVERLOADS(COUNT_ONES)
+UINT_OVERLOADS(LEADING_ZEROS)
+UINT_OVERLOADS(TRAILING_ZEROS)
+UINT_OVERLOADS(FIRST_TRAILING_ONE)
+
+#define ROTATE_LEFT(type, suffix, width) \
+ static inline type rotate_left##suffix(type n, int c) { \
+ return (n << c) | (n >> ((width - c) % width)); \
+ }
+
+#define ROTATE_RIGHT(type, suffix, width) \
+ static inline type rotate_right##suffix(type n, int c) { \
+ return (n >> c) | (n << ((width - c) % width)); \
+ }
+
+#define FIRST_LEADING_ONE(type, suffix, width) \
+ static inline int first_leading_one##suffix(type n) { \
+ return width - leading_zeros##suffix(n); \
+ }
+
+#define HAS_SINGLE_BIT(type, suffix, width) \
+ static inline bool has_single_bit##suffix(type n) { \
+ /** Branchless n && !(n & (n - 1)) */ \
+ return n - 1 < (n ^ (n - 1)); \
+ }
+
+UINT_OVERLOADS(ROTATE_LEFT)
+UINT_OVERLOADS(ROTATE_RIGHT)
+UINT_OVERLOADS(FIRST_LEADING_ONE)
+UINT_OVERLOADS(HAS_SINGLE_BIT)
+
+#define count_ones(n) UINT_SELECT(n, count_ones)(n)
+#define count_zeros(n) UINT_SELECT(n, count_ones)(~(n))
+
+#define rotate_left(n, c) UINT_SELECT(n, rotate_left)(n, c)
+#define rotate_right(n, c) UINT_SELECT(n, rotate_right)(n, c)
+
+#define leading_zeros(n) UINT_SELECT(n, leading_zeros)(n)
+#define leading_ones(n) UINT_SELECT(n, leading_zeros)(~(n))
+
+#define trailing_zeros(n) UINT_SELECT(n, trailing_zeros)(n)
+#define trailing_ones(n) UINT_SELECT(n, trailing_zeros)(~(n))
+
+#define first_leading_one(n) UINT_SELECT(n, first_leading_one)(n)
+#define first_leading_zero(n) UINT_SELECT(n, first_leading_one)(~(n))
+
+#define first_trailing_one(n) UINT_SELECT(n, first_trailing_one)(n)
+#define first_trailing_zero(n) UINT_SELECT(n, first_trailing_one)(~(n))
+
+#define has_single_bit(n) UINT_SELECT(n, has_single_bit)(n)
+
+#define BIT_FLOOR(type, suffix, width) \
+ static inline type bit_floor##suffix(type n) { \
+ return n ? (type)1 << (first_leading_one##suffix(n) - 1) : 0; \
+ }
+
+#define BIT_CEIL(type, suffix, width) \
+ static inline type bit_ceil##suffix(type n) { \
+ return (type)1 << first_leading_one##suffix(n - !!n); \
+ }
+
+UINT_OVERLOADS(BIT_FLOOR)
+UINT_OVERLOADS(BIT_CEIL)
+
+#define bit_width(n) first_leading_one(n)
+#define bit_floor(n) UINT_SELECT(n, bit_floor)(n)
+#define bit_ceil(n) UINT_SELECT(n, bit_ceil)(n)
+
+#endif // __STDC_VERSION__ < C23
+
+#endif // BFS_BIT_H
diff --git a/src/color.c b/src/color.c
new file mode 100644
index 0000000..a0e553f
--- /dev/null
+++ b/src/color.c
@@ -0,0 +1,1418 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "color.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "bftw.h"
+#include "diag.h"
+#include "dir.h"
+#include "dstring.h"
+#include "expr.h"
+#include "fsade.h"
+#include "stat.h"
+#include "trie.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+/**
+ * An escape sequence, which may contain embedded NUL bytes.
+ */
+struct esc_seq {
+ /** The length of the escape sequence. */
+ size_t len;
+ /** The escape sequence iteself, without a terminating NUL. */
+ char seq[];
+};
+
+/**
+ * A colored file extension, like `*.tar=01;31`.
+ */
+struct ext_color {
+ /** Priority, to disambiguate case-sensitive and insensitive matches. */
+ size_t priority;
+ /** The escape sequence associated with this extension. */
+ struct esc_seq *esc;
+ /** The length of the extension to match. */
+ size_t len;
+ /** Whether the comparison should be case-sensitive. */
+ bool case_sensitive;
+ /** The extension to match (NUL-terminated). */
+ char ext[];
+};
+
+struct colors {
+ /** esc_seq allocator. */
+ struct varena esc_arena;
+ /** ext_color allocator. */
+ struct varena ext_arena;
+
+ // Known dircolors keys
+
+ struct esc_seq *reset;
+ struct esc_seq *leftcode;
+ struct esc_seq *rightcode;
+ struct esc_seq *endcode;
+ struct esc_seq *clear_to_eol;
+
+ struct esc_seq *bold;
+ struct esc_seq *gray;
+ struct esc_seq *red;
+ struct esc_seq *green;
+ struct esc_seq *yellow;
+ struct esc_seq *blue;
+ struct esc_seq *magenta;
+ struct esc_seq *cyan;
+ struct esc_seq *white;
+
+ struct esc_seq *warning;
+ struct esc_seq *error;
+
+ struct esc_seq *normal;
+
+ struct esc_seq *file;
+ struct esc_seq *multi_hard;
+ struct esc_seq *executable;
+ struct esc_seq *capable;
+ struct esc_seq *setgid;
+ struct esc_seq *setuid;
+
+ struct esc_seq *directory;
+ struct esc_seq *sticky;
+ struct esc_seq *other_writable;
+ struct esc_seq *sticky_other_writable;
+
+ struct esc_seq *link;
+ struct esc_seq *orphan;
+ struct esc_seq *missing;
+ bool link_as_target;
+
+ struct esc_seq *blockdev;
+ struct esc_seq *chardev;
+ struct esc_seq *door;
+ struct esc_seq *pipe;
+ struct esc_seq *socket;
+
+ /** A mapping from color names (fi, di, ln, etc.) to struct fields. */
+ struct trie names;
+
+ /** Number of extensions. */
+ size_t ext_count;
+ /** Longest extension. */
+ size_t ext_len;
+ /** Case-sensitive extension trie. */
+ struct trie ext_trie;
+ /** Case-insensitive extension trie. */
+ struct trie iext_trie;
+};
+
+/** Allocate an escape sequence. */
+static struct esc_seq *new_esc(struct colors *colors, const char *seq, size_t len) {
+ struct esc_seq *esc = varena_alloc(&colors->esc_arena, len);
+ if (esc) {
+ esc->len = len;
+ memcpy(esc->seq, seq, len);
+ }
+ return esc;
+}
+
+/** Free an escape sequence. */
+static void free_esc(struct colors *colors, struct esc_seq *seq) {
+ varena_free(&colors->esc_arena, seq, seq->len);
+}
+
+/** Initialize a color in the table. */
+static int init_esc(struct colors *colors, const char *name, const char *value, struct esc_seq **field) {
+ struct esc_seq *esc = NULL;
+ if (value) {
+ esc = new_esc(colors, value, strlen(value));
+ if (!esc) {
+ return -1;
+ }
+ }
+
+ *field = esc;
+
+ struct trie_leaf *leaf = trie_insert_str(&colors->names, name);
+ if (!leaf) {
+ return -1;
+ }
+
+ leaf->value = field;
+ return 0;
+}
+
+/** Check if an escape sequence is equal to a string. */
+static bool esc_eq(const struct esc_seq *esc, const char *str, size_t len) {
+ return esc->len == len && memcmp(esc->seq, str, len) == 0;
+}
+
+/** Get an escape sequence from the table. */
+static struct esc_seq **get_esc(const struct colors *colors, const char *name) {
+ const struct trie_leaf *leaf = trie_find_str(&colors->names, name);
+ return leaf ? leaf->value : NULL;
+}
+
+/** Append an escape sequence to a string. */
+static int cat_esc(dchar **dstr, const struct esc_seq *seq) {
+ return dstrxcat(dstr, seq->seq, seq->len);
+}
+
+/** Set a named escape sequence. */
+static int set_esc(struct colors *colors, const char *name, dchar *value) {
+ struct esc_seq **field = get_esc(colors, name);
+ if (!field) {
+ return 0;
+ }
+
+ if (*field) {
+ free_esc(colors, *field);
+ *field = NULL;
+ }
+
+ if (value) {
+ *field = new_esc(colors, value, dstrlen(value));
+ if (!*field) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/** Reverse a string, to turn suffix matches into prefix matches. */
+static void ext_reverse(char *ext, size_t len) {
+ for (size_t i = 0, j = len - 1; len && i < j; ++i, --j) {
+ char c = ext[i];
+ ext[i] = ext[j];
+ ext[j] = c;
+ }
+}
+
+/** Convert a string to lowercase for case-insensitive matching. */
+static void ext_tolower(char *ext, size_t len) {
+ for (size_t i = 0; i < len; ++i) {
+ char c = ext[i];
+
+ // What's internationalization? Doesn't matter, this is what
+ // GNU ls does. Luckily, since there's no standard C way to
+ // casefold. Not using tolower() here since it respects the
+ // current locale, which GNU ls doesn't do.
+ if (c >= 'A' && c <= 'Z') {
+ c += 'a' - 'A';
+ }
+
+ ext[i] = c;
+ }
+}
+
+/**
+ * The "smart case" algorithm.
+ *
+ * @param ext
+ * The current extension being added.
+ * @param prev
+ * The previous case-sensitive match, if any, for the same extension.
+ * @param iprev
+ * The previous case-insensitive match, if any, for the same extension.
+ * @return
+ * Whether this extension should become case-sensitive.
+ */
+static bool ext_case_sensitive(struct ext_color *ext, struct ext_color *prev, struct ext_color *iprev) {
+ // This is the first case-insensitive occurrence of this extension, e.g.
+ //
+ // *.gz=01;31:*.tar.gz=01;33
+ if (!iprev) {
+ bfs_assert(!prev);
+ return false;
+ }
+
+ // If the last version of this extension is already case-sensitive,
+ // this one should be too, e.g.
+ //
+ // *.tar.gz=01;31:*.TAR.GZ=01;32:*.TAR.GZ=01;33
+ if (iprev->case_sensitive) {
+ return true;
+ }
+
+ // The case matches the last occurrence exactly, e.g.
+ //
+ // *.tar.gz=01;31:*.tar.gz=01;33
+ if (iprev == prev) {
+ return false;
+ }
+
+ // Different case, but same value, e.g.
+ //
+ // *.tar.gz=01;31:*.TAR.GZ=01;31
+ if (esc_eq(iprev->esc, ext->esc->seq, ext->esc->len)) {
+ return false;
+ }
+
+ // Different case, different value, e.g.
+ //
+ // *.tar.gz=01;31:*.TAR.GZ=01;33
+ return true;
+}
+
+/** Set the color for an extension. */
+static int set_ext(struct colors *colors, char *key, char *value) {
+ size_t len = dstrlen(key);
+ struct ext_color *ext = varena_alloc(&colors->ext_arena, len + 1);
+ if (!ext) {
+ return -1;
+ }
+
+ ext->priority = colors->ext_count++;
+ ext->len = len;
+ ext->case_sensitive = false;
+ ext->esc = new_esc(colors, value, dstrlen(value));
+ if (!ext->esc) {
+ goto fail;
+ }
+
+ key = memcpy(ext->ext, key, len + 1);
+
+ // Reverse the extension (`*.y.x` -> `x.y.*`) so we can use trie_find_prefix()
+ ext_reverse(key, len);
+
+ // Find any pre-existing exact match
+ struct ext_color *prev = NULL;
+ struct trie_leaf *leaf = trie_find_str(&colors->ext_trie, key);
+ if (leaf) {
+ prev = leaf->value;
+ trie_remove(&colors->ext_trie, leaf);
+ }
+
+ // A later *.x should override any earlier *.x, *.y.x, etc.
+ while ((leaf = trie_find_postfix(&colors->ext_trie, key))) {
+ trie_remove(&colors->ext_trie, leaf);
+ }
+
+ // Insert the extension into the case-sensitive trie
+ leaf = trie_insert_str(&colors->ext_trie, key);
+ if (!leaf) {
+ goto fail;
+ }
+ leaf->value = ext;
+
+ // "Smart case": if the same extension is given with two different
+ // capitalizations (e.g. `*.y.x=31:*.Y.Z=32:`), make it case-sensitive
+ ext_tolower(key, len);
+ leaf = trie_insert_str(&colors->iext_trie, key);
+ if (!leaf) {
+ goto fail;
+ }
+
+ struct ext_color *iprev = leaf->value;
+ if (ext_case_sensitive(ext, prev, iprev)) {
+ iprev->case_sensitive = true;
+ ext->case_sensitive = true;
+ }
+ leaf->value = ext;
+
+ return 0;
+
+fail:
+ if (ext->esc) {
+ free_esc(colors, ext->esc);
+ }
+ varena_free(&colors->ext_arena, ext, len + 1);
+ return -1;
+}
+
+/** Rebuild the case-insensitive trie after all extensions have been parsed. */
+static int build_iext_trie(struct colors *colors) {
+ trie_clear(&colors->iext_trie);
+
+ for_trie (leaf, &colors->ext_trie) {
+ size_t len = leaf->length - 1;
+ if (colors->ext_len < len) {
+ colors->ext_len = len;
+ }
+
+ struct ext_color *ext = leaf->value;
+ if (ext->case_sensitive) {
+ continue;
+ }
+
+ // set_ext() already reversed and lowercased the extension
+ struct trie_leaf *ileaf;
+ while ((ileaf = trie_find_postfix(&colors->iext_trie, ext->ext))) {
+ trie_remove(&colors->iext_trie, ileaf);
+ }
+
+ ileaf = trie_insert_str(&colors->iext_trie, ext->ext);
+ if (!ileaf) {
+ return -1;
+ }
+ ileaf->value = ext;
+ }
+
+ return 0;
+}
+
+/**
+ * Find a color by an extension.
+ */
+static const struct esc_seq *get_ext(const struct colors *colors, const char *filename) {
+ size_t ext_len = colors->ext_len;
+ size_t name_len = strlen(filename);
+ if (name_len < ext_len) {
+ ext_len = name_len;
+ }
+ const char *suffix = filename + name_len - ext_len;
+
+ char buf[256];
+ char *copy;
+ if (ext_len < sizeof(buf)) {
+ copy = memcpy(buf, suffix, ext_len + 1);
+ } else {
+ copy = strndup(suffix, ext_len);
+ if (!copy) {
+ return NULL;
+ }
+ }
+
+ ext_reverse(copy, ext_len);
+ const struct trie_leaf *leaf = trie_find_prefix(&colors->ext_trie, copy);
+ const struct ext_color *ext = leaf ? leaf->value : NULL;
+
+ ext_tolower(copy, ext_len);
+ const struct trie_leaf *ileaf = trie_find_prefix(&colors->iext_trie, copy);
+ const struct ext_color *iext = ileaf ? ileaf->value : NULL;
+
+ if (iext && (!ext || ext->priority < iext->priority)) {
+ ext = iext;
+ }
+
+ if (copy != buf) {
+ free(copy);
+ }
+
+ return ext ? ext->esc : NULL;
+}
+
+/**
+ * Parse a chunk of $LS_COLORS that may have escape sequences. The supported
+ * escapes are:
+ *
+ * \a, \b, \f, \n, \r, \t, \v:
+ * As in C
+ * \e:
+ * ESC (\033)
+ * \?:
+ * DEL (\177)
+ * \_:
+ * ' ' (space)
+ * \NNN:
+ * Octal
+ * \xNN:
+ * Hex
+ * ^C:
+ * Control character.
+ *
+ * See man dir_colors.
+ *
+ * @param str
+ * A dstring to fill with the unescaped chunk.
+ * @param value
+ * The value to parse.
+ * @param end
+ * The character that marks the end of the chunk.
+ * @param[out] next
+ * Will be set to the next chunk.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+static int unescape(char **str, const char *value, char end, const char **next) {
+ *next = NULL;
+
+ if (!value) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (dstresize(str, 0) != 0) {
+ return -1;
+ }
+
+ const char *i;
+ for (i = value; *i && *i != end; ++i) {
+ unsigned char c = 0;
+
+ switch (*i) {
+ case '\\':
+ switch (*++i) {
+ case 'a':
+ c = '\a';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'e':
+ c = '\033';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+ case '?':
+ c = '\177';
+ break;
+ case '_':
+ c = ' ';
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ while (i[1] >= '0' && i[1] <= '7') {
+ c <<= 3;
+ c |= *i++ - '0';
+ }
+ c <<= 3;
+ c |= *i - '0';
+ break;
+
+ case 'X':
+ case 'x':
+ while (true) {
+ if (i[1] >= '0' && i[1] <= '9') {
+ c <<= 4;
+ c |= i[1] - '0';
+ } else if (i[1] >= 'A' && i[1] <= 'F') {
+ c <<= 4;
+ c |= i[1] - 'A' + 0xA;
+ } else if (i[1] >= 'a' && i[1] <= 'f') {
+ c <<= 4;
+ c |= i[1] - 'a' + 0xA;
+ } else {
+ break;
+ }
+ ++i;
+ }
+ break;
+
+ case '\0':
+ errno = EINVAL;
+ return -1;
+
+ default:
+ c = *i;
+ break;
+ }
+ break;
+
+ case '^':
+ switch (*++i) {
+ case '?':
+ c = '\177';
+ break;
+ case '\0':
+ errno = EINVAL;
+ return -1;
+ default:
+ // CTRL masks bits 6 and 7
+ c = *i & 0x1F;
+ break;
+ }
+ break;
+
+ default:
+ c = *i;
+ break;
+ }
+
+ if (dstrapp(str, c) != 0) {
+ return -1;
+ }
+ }
+
+ if (*i) {
+ *next = i + 1;
+ }
+
+ return 0;
+}
+
+/** Parse the GNU $LS_COLORS format. */
+static int parse_gnu_ls_colors(struct colors *colors, const char *ls_colors) {
+ int ret = -1;
+ dchar *key = NULL;
+ dchar *value = NULL;
+
+ for (const char *chunk = ls_colors, *next; chunk; chunk = next) {
+ if (chunk[0] == '*') {
+ if (unescape(&key, chunk + 1, '=', &next) != 0) {
+ goto fail;
+ }
+ if (unescape(&value, next, ':', &next) != 0) {
+ goto fail;
+ }
+ if (set_ext(colors, key, value) != 0) {
+ goto fail;
+ }
+ } else {
+ const char *equals = strchr(chunk, '=');
+ if (!equals) {
+ break;
+ }
+
+ if (dstrncpy(&key, chunk, equals - chunk) != 0) {
+ goto fail;
+ }
+ if (unescape(&value, equals + 1, ':', &next) != 0) {
+ goto fail;
+ }
+
+ // All-zero values should be treated like NULL, to fall
+ // back on any other relevant coloring for that file
+ dchar *esc = value;
+ if (strspn(value, "0") == dstrlen(value)
+ && strcmp(key, "rs") != 0
+ && strcmp(key, "lc") != 0
+ && strcmp(key, "rc") != 0
+ && strcmp(key, "ec") != 0) {
+ esc = NULL;
+ }
+
+ if (set_esc(colors, key, esc) != 0) {
+ goto fail;
+ }
+ }
+ }
+
+ ret = 0;
+fail:
+ dstrfree(value);
+ dstrfree(key);
+ return ret;
+}
+
+struct colors *parse_colors(void) {
+ struct colors *colors = ALLOC(struct colors);
+ if (!colors) {
+ return NULL;
+ }
+
+ VARENA_INIT(&colors->esc_arena, struct esc_seq, seq);
+ VARENA_INIT(&colors->ext_arena, struct ext_color, ext);
+ trie_init(&colors->names);
+ colors->ext_count = 0;
+ colors->ext_len = 0;
+ trie_init(&colors->ext_trie);
+ trie_init(&colors->iext_trie);
+
+ bool fail = false;
+
+ // From man console_codes
+
+ fail = fail || init_esc(colors, "rs", "0", &colors->reset);
+ fail = fail || init_esc(colors, "lc", "\033[", &colors->leftcode);
+ fail = fail || init_esc(colors, "rc", "m", &colors->rightcode);
+ fail = fail || init_esc(colors, "ec", NULL, &colors->endcode);
+ fail = fail || init_esc(colors, "cl", "\033[K", &colors->clear_to_eol);
+
+ fail = fail || init_esc(colors, "bld", "01;39", &colors->bold);
+ fail = fail || init_esc(colors, "gry", "01;30", &colors->gray);
+ fail = fail || init_esc(colors, "red", "01;31", &colors->red);
+ fail = fail || init_esc(colors, "grn", "01;32", &colors->green);
+ fail = fail || init_esc(colors, "ylw", "01;33", &colors->yellow);
+ fail = fail || init_esc(colors, "blu", "01;34", &colors->blue);
+ fail = fail || init_esc(colors, "mag", "01;35", &colors->magenta);
+ fail = fail || init_esc(colors, "cyn", "01;36", &colors->cyan);
+ fail = fail || init_esc(colors, "wht", "01;37", &colors->white);
+
+ fail = fail || init_esc(colors, "wrn", "01;33", &colors->warning);
+ fail = fail || init_esc(colors, "err", "01;31", &colors->error);
+
+ // Defaults from man dir_colors
+ // "" means fall back to ->normal
+
+ fail = fail || init_esc(colors, "no", NULL, &colors->normal);
+
+ fail = fail || init_esc(colors, "fi", "", &colors->file);
+ fail = fail || init_esc(colors, "mh", NULL, &colors->multi_hard);
+ fail = fail || init_esc(colors, "ex", "01;32", &colors->executable);
+ fail = fail || init_esc(colors, "ca", NULL, &colors->capable);
+ fail = fail || init_esc(colors, "sg", "30;43", &colors->setgid);
+ fail = fail || init_esc(colors, "su", "37;41", &colors->setuid);
+
+ fail = fail || init_esc(colors, "di", "01;34", &colors->directory);
+ fail = fail || init_esc(colors, "st", "37;44", &colors->sticky);
+ fail = fail || init_esc(colors, "ow", "34;42", &colors->other_writable);
+ fail = fail || init_esc(colors, "tw", "30;42", &colors->sticky_other_writable);
+
+ fail = fail || init_esc(colors, "ln", "01;36", &colors->link);
+ fail = fail || init_esc(colors, "or", NULL, &colors->orphan);
+ fail = fail || init_esc(colors, "mi", NULL, &colors->missing);
+ colors->link_as_target = false;
+
+ fail = fail || init_esc(colors, "bd", "01;33", &colors->blockdev);
+ fail = fail || init_esc(colors, "cd", "01;33", &colors->chardev);
+ fail = fail || init_esc(colors, "do", "01;35", &colors->door);
+ fail = fail || init_esc(colors, "pi", "33", &colors->pipe);
+ fail = fail || init_esc(colors, "so", "01;35", &colors->socket);
+
+ if (fail) {
+ goto fail;
+ }
+
+ if (parse_gnu_ls_colors(colors, getenv("LS_COLORS")) != 0) {
+ goto fail;
+ }
+ if (parse_gnu_ls_colors(colors, getenv("BFS_COLORS")) != 0) {
+ goto fail;
+ }
+ if (build_iext_trie(colors) != 0) {
+ goto fail;
+ }
+
+ if (colors->link && esc_eq(colors->link, "target", strlen("target"))) {
+ colors->link_as_target = true;
+ colors->link->len = 0;
+ }
+
+ // Pre-compute the reset escape sequence
+ if (!colors->endcode) {
+ dchar *ec = dstralloc(0);
+ if (!ec
+ || cat_esc(&ec, colors->leftcode) != 0
+ || cat_esc(&ec, colors->reset) != 0
+ || cat_esc(&ec, colors->rightcode) != 0
+ || set_esc(colors, "ec", ec) != 0) {
+ dstrfree(ec);
+ goto fail;
+ }
+ dstrfree(ec);
+ }
+
+ return colors;
+
+fail:
+ free_colors(colors);
+ return NULL;
+}
+
+void free_colors(struct colors *colors) {
+ if (!colors) {
+ return;
+ }
+
+ trie_destroy(&colors->iext_trie);
+ trie_destroy(&colors->ext_trie);
+ trie_destroy(&colors->names);
+ varena_destroy(&colors->ext_arena);
+ varena_destroy(&colors->esc_arena);
+
+ free(colors);
+}
+
+CFILE *cfwrap(FILE *file, const struct colors *colors, bool close) {
+ CFILE *cfile = ALLOC(CFILE);
+ if (!cfile) {
+ return NULL;
+ }
+
+ cfile->buffer = dstralloc(128);
+ if (!cfile->buffer) {
+ free(cfile);
+ return NULL;
+ }
+
+ cfile->file = file;
+ cfile->fd = fileno(file);
+ cfile->need_reset = false;
+ cfile->close = close;
+
+ if (isatty(cfile->fd)) {
+ cfile->colors = colors;
+ } else {
+ cfile->colors = NULL;
+ }
+
+ return cfile;
+}
+
+int cfclose(CFILE *cfile) {
+ int ret = 0;
+
+ if (cfile) {
+ dstrfree(cfile->buffer);
+
+ if (cfile->close) {
+ ret = fclose(cfile->file);
+ }
+
+ free(cfile);
+ }
+
+ return ret;
+}
+
+/** Check if a symlink is broken. */
+static bool is_link_broken(const struct BFTW *ftwbuf) {
+ if (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW) {
+ return xfaccessat(ftwbuf->at_fd, ftwbuf->at_path, F_OK) != 0;
+ } else {
+ return true;
+ }
+}
+
+bool colors_need_stat(const struct colors *colors) {
+ return colors->setuid || colors->setgid || colors->executable || colors->multi_hard
+ || colors->sticky_other_writable || colors->other_writable || colors->sticky;
+}
+
+/** Get the color for a file. */
+static const struct esc_seq *file_color(const struct colors *colors, const char *filename, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
+ enum bfs_type type = bftw_type(ftwbuf, flags);
+ if (type == BFS_ERROR) {
+ goto error;
+ }
+
+ const struct bfs_stat *statbuf = NULL;
+ const struct esc_seq *color = NULL;
+
+ switch (type) {
+ case BFS_REG:
+ if (colors->setuid || colors->setgid || colors->executable || colors->multi_hard) {
+ statbuf = bftw_stat(ftwbuf, flags);
+ if (!statbuf) {
+ goto error;
+ }
+ }
+
+ if (colors->setuid && (statbuf->mode & 04000)) {
+ color = colors->setuid;
+ } else if (colors->setgid && (statbuf->mode & 02000)) {
+ color = colors->setgid;
+ } else if (colors->capable && bfs_check_capabilities(ftwbuf) > 0) {
+ color = colors->capable;
+ } else if (colors->executable && (statbuf->mode & 00111)) {
+ color = colors->executable;
+ } else if (colors->multi_hard && statbuf->nlink > 1) {
+ color = colors->multi_hard;
+ }
+
+ if (!color) {
+ color = get_ext(colors, filename);
+ }
+
+ if (!color) {
+ color = colors->file;
+ }
+
+ break;
+
+ case BFS_DIR:
+ if (colors->sticky_other_writable || colors->other_writable || colors->sticky) {
+ statbuf = bftw_stat(ftwbuf, flags);
+ if (!statbuf) {
+ goto error;
+ }
+ }
+
+ if (colors->sticky_other_writable && (statbuf->mode & 01002) == 01002) {
+ color = colors->sticky_other_writable;
+ } else if (colors->other_writable && (statbuf->mode & 00002)) {
+ color = colors->other_writable;
+ } else if (colors->sticky && (statbuf->mode & 01000)) {
+ color = colors->sticky;
+ } else {
+ color = colors->directory;
+ }
+
+ break;
+
+ case BFS_LNK:
+ if (colors->orphan && is_link_broken(ftwbuf)) {
+ color = colors->orphan;
+ } else {
+ color = colors->link;
+ }
+ break;
+
+ case BFS_BLK:
+ color = colors->blockdev;
+ break;
+ case BFS_CHR:
+ color = colors->chardev;
+ break;
+ case BFS_FIFO:
+ color = colors->pipe;
+ break;
+ case BFS_SOCK:
+ color = colors->socket;
+ break;
+ case BFS_DOOR:
+ color = colors->door;
+ break;
+
+ default:
+ break;
+ }
+
+ if (color && color->len == 0) {
+ color = colors->normal;
+ }
+
+ return color;
+
+error:
+ if (colors->missing) {
+ return colors->missing;
+ } else {
+ return colors->orphan;
+ }
+}
+
+/** Print an escape sequence chunk. */
+static int print_esc_chunk(CFILE *cfile, const struct esc_seq *esc) {
+ return cat_esc(&cfile->buffer, esc);
+}
+
+/** Print an ANSI escape sequence. */
+static int print_esc(CFILE *cfile, const struct esc_seq *esc) {
+ if (!esc) {
+ return 0;
+ }
+
+ const struct colors *colors = cfile->colors;
+ if (esc != colors->reset) {
+ cfile->need_reset = true;
+ }
+
+ if (print_esc_chunk(cfile, cfile->colors->leftcode) != 0) {
+ return -1;
+ }
+ if (print_esc_chunk(cfile, esc) != 0) {
+ return -1;
+ }
+ if (print_esc_chunk(cfile, cfile->colors->rightcode) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/** Reset after an ANSI escape sequence. */
+static int print_reset(CFILE *cfile) {
+ if (!cfile->need_reset) {
+ return 0;
+ }
+ cfile->need_reset = false;
+
+ return print_esc_chunk(cfile, cfile->colors->endcode);
+}
+
+/** Print a shell-escaped string. */
+static int print_wordesc(CFILE *cfile, const char *str, size_t n, enum wesc_flags flags) {
+ return dstrnescat(&cfile->buffer, str, n, flags);
+}
+
+/** Print a string with an optional color. */
+static int print_colored(CFILE *cfile, const struct esc_seq *esc, const char *str, size_t len) {
+ if (print_esc(cfile, esc) != 0) {
+ return -1;
+ }
+
+ // Don't let the string itself interfere with the colors
+ if (print_wordesc(cfile, str, len, WESC_TTY) != 0) {
+ return -1;
+ }
+
+ if (print_reset(cfile) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/** Find the offset of the first broken path component. */
+static ssize_t first_broken_offset(const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags, size_t max) {
+ ssize_t ret = max;
+ bfs_assert(ret >= 0);
+
+ if (bftw_type(ftwbuf, flags) != BFS_ERROR) {
+ goto out;
+ }
+
+ dchar *at_path;
+ int at_fd;
+ if (path == ftwbuf->path) {
+ if (ftwbuf->depth == 0) {
+ at_fd = AT_FDCWD;
+ at_path = dstrndup(path, max);
+ } else {
+ // The parent must have existed to get here
+ goto out;
+ }
+ } else {
+ // We're in print_link_target(), so resolve relative to the link's parent directory
+ at_fd = ftwbuf->at_fd;
+ if (at_fd == (int)AT_FDCWD && path[0] != '/') {
+ at_path = dstrndup(ftwbuf->path, ftwbuf->nameoff);
+ if (at_path && dstrncat(&at_path, path, max) != 0) {
+ ret = -1;
+ goto out_path;
+ }
+ } else {
+ at_path = dstrndup(path, max);
+ }
+ }
+
+ if (!at_path) {
+ ret = -1;
+ goto out;
+ }
+
+ while (ret > 0) {
+ if (xfaccessat(at_fd, at_path, F_OK) == 0) {
+ break;
+ }
+
+ size_t len = dstrlen(at_path);
+ while (ret && at_path[len - 1] == '/') {
+ --len, --ret;
+ }
+ if (errno != ENOTDIR) {
+ while (ret && at_path[len - 1] != '/') {
+ --len, --ret;
+ }
+ }
+
+ dstresize(&at_path, len);
+ }
+
+out_path:
+ dstrfree(at_path);
+out:
+ return ret;
+}
+
+/** Print a path with colors. */
+static int print_path_colored(CFILE *cfile, const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
+ size_t nameoff;
+ if (path == ftwbuf->path) {
+ nameoff = ftwbuf->nameoff;
+ } else {
+ nameoff = xbaseoff(path);
+ }
+
+ const char *name = path + nameoff;
+ size_t pathlen = nameoff + strlen(name);
+
+ ssize_t broken = first_broken_offset(path, ftwbuf, flags, nameoff);
+ if (broken < 0) {
+ return -1;
+ }
+ size_t split = broken;
+
+ const struct colors *colors = cfile->colors;
+ const struct esc_seq *dirs_color = colors->directory;
+ const struct esc_seq *name_color;
+
+ if (split < nameoff) {
+ name_color = colors->missing;
+ if (!name_color) {
+ name_color = colors->orphan;
+ }
+ } else {
+ name_color = file_color(cfile->colors, path + nameoff, ftwbuf, flags);
+ if (name_color == dirs_color) {
+ split = pathlen;
+ }
+ }
+
+ if (split > 0) {
+ if (print_colored(cfile, dirs_color, path, split) != 0) {
+ return -1;
+ }
+ }
+
+ if (split < pathlen) {
+ if (print_colored(cfile, name_color, path + split, pathlen - split) != 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/** Print a file name with colors. */
+static int print_name_colored(CFILE *cfile, const char *name, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) {
+ const struct esc_seq *esc = file_color(cfile->colors, name, ftwbuf, flags);
+ return print_colored(cfile, esc, name, strlen(name));
+}
+
+/** Print the name of a file with the appropriate colors. */
+static int print_name(CFILE *cfile, const struct BFTW *ftwbuf) {
+ const char *name = ftwbuf->path + ftwbuf->nameoff;
+
+ const struct colors *colors = cfile->colors;
+ if (!colors) {
+ return dstrcat(&cfile->buffer, name);
+ }
+
+ enum bfs_stat_flags flags = ftwbuf->stat_flags;
+ if (colors->link_as_target && ftwbuf->type == BFS_LNK) {
+ flags = BFS_STAT_TRYFOLLOW;
+ }
+
+ return print_name_colored(cfile, name, ftwbuf, flags);
+}
+
+/** Print the path to a file with the appropriate colors. */
+static int print_path(CFILE *cfile, const struct BFTW *ftwbuf) {
+ const struct colors *colors = cfile->colors;
+ if (!colors) {
+ return dstrcat(&cfile->buffer, ftwbuf->path);
+ }
+
+ enum bfs_stat_flags flags = ftwbuf->stat_flags;
+ if (colors->link_as_target && ftwbuf->type == BFS_LNK) {
+ flags = BFS_STAT_TRYFOLLOW;
+ }
+
+ return print_path_colored(cfile, ftwbuf->path, ftwbuf, flags);
+}
+
+/** Print a link target with the appropriate colors. */
+static int print_link_target(CFILE *cfile, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW);
+ size_t len = statbuf ? statbuf->size : 0;
+
+ char *target = xreadlinkat(ftwbuf->at_fd, ftwbuf->at_path, len);
+ if (!target) {
+ return -1;
+ }
+
+ int ret;
+ if (cfile->colors) {
+ ret = print_path_colored(cfile, target, ftwbuf, BFS_STAT_FOLLOW);
+ } else {
+ ret = dstrcat(&cfile->buffer, target);
+ }
+
+ free(target);
+ return ret;
+}
+
+/** Format some colored output to the buffer. */
+attr(printf(2, 3))
+static int cbuff(CFILE *cfile, const char *format, ...);
+
+/** Dump a parsed expression tree, for debugging. */
+static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose, int depth) {
+ if (depth >= 2) {
+ return dstrcat(&cfile->buffer, "(...)");
+ }
+
+ if (!expr) {
+ return dstrcat(&cfile->buffer, "(null)");
+ }
+
+ if (dstrcat(&cfile->buffer, "(") != 0) {
+ return -1;
+ }
+
+ if (bfs_expr_is_parent(expr)) {
+ if (cbuff(cfile, "${red}%pq${rs}", expr->argv[0]) < 0) {
+ return -1;
+ }
+ } else {
+ if (cbuff(cfile, "${blu}%pq${rs}", expr->argv[0]) < 0) {
+ return -1;
+ }
+ }
+
+ for (size_t i = 1; i < expr->argc; ++i) {
+ if (cbuff(cfile, " ${bld}%pq${rs}", expr->argv[i]) < 0) {
+ return -1;
+ }
+ }
+
+ if (verbose) {
+ double rate = 0.0, time = 0.0;
+ if (expr->evaluations) {
+ rate = 100.0 * expr->successes / expr->evaluations;
+ time = (1.0e9 * expr->elapsed.tv_sec + expr->elapsed.tv_nsec) / expr->evaluations;
+ }
+ if (cbuff(cfile, " [${ylw}%zu${rs}/${ylw}%zu${rs}=${ylw}%g%%${rs}; ${ylw}%gns${rs}]",
+ expr->successes, expr->evaluations, rate, time)) {
+ return -1;
+ }
+ }
+
+ int count = 0;
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ if (dstrcat(&cfile->buffer, " ") != 0) {
+ return -1;
+ }
+ if (++count >= 3) {
+ if (dstrcat(&cfile->buffer, "...") != 0) {
+ return -1;
+ }
+ break;
+ } else {
+ if (print_expr(cfile, child, verbose, depth + 1) != 0) {
+ return -1;
+ }
+ }
+ }
+
+ if (dstrcat(&cfile->buffer, ")") != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+attr(printf(2, 0))
+static int cvbuff(CFILE *cfile, const char *format, va_list args) {
+ const struct colors *colors = cfile->colors;
+ int error = errno;
+
+ // Color specifier (e.g. ${blu}) state
+ struct esc_seq **esc;
+ const char *end;
+ size_t len;
+ char name[4];
+
+ for (const char *i = format; *i; ++i) {
+ size_t verbatim = strcspn(i, "%$");
+ if (dstrncat(&cfile->buffer, i, verbatim) != 0) {
+ return -1;
+ }
+ i += verbatim;
+
+ switch (*i) {
+ case '%':
+ switch (*++i) {
+ case '%':
+ if (dstrapp(&cfile->buffer, '%') != 0) {
+ return -1;
+ }
+ break;
+
+ case 'c':
+ if (dstrapp(&cfile->buffer, va_arg(args, int)) != 0) {
+ return -1;
+ }
+ break;
+
+ case 'd':
+ if (dstrcatf(&cfile->buffer, "%d", va_arg(args, int)) != 0) {
+ return -1;
+ }
+ break;
+
+ case 'g':
+ if (dstrcatf(&cfile->buffer, "%g", va_arg(args, double)) != 0) {
+ return -1;
+ }
+ break;
+
+ case 's':
+ if (dstrcat(&cfile->buffer, va_arg(args, const char *)) != 0) {
+ return -1;
+ }
+ break;
+
+ case 'z':
+ ++i;
+ if (*i != 'u') {
+ goto invalid;
+ }
+ if (dstrcatf(&cfile->buffer, "%zu", va_arg(args, size_t)) != 0) {
+ return -1;
+ }
+ break;
+
+ case 'm':
+ if (dstrcat(&cfile->buffer, xstrerror(error)) != 0) {
+ return -1;
+ }
+ break;
+
+ case 'p':
+ switch (*++i) {
+ case 'q':
+ if (print_wordesc(cfile, va_arg(args, const char *), SIZE_MAX, WESC_SHELL | WESC_TTY) != 0) {
+ return -1;
+ }
+ break;
+ case 'Q':
+ if (print_wordesc(cfile, va_arg(args, const char *), SIZE_MAX, WESC_TTY) != 0) {
+ return -1;
+ }
+ break;
+
+ case 'F':
+ if (print_name(cfile, va_arg(args, const struct BFTW *)) != 0) {
+ return -1;
+ }
+ break;
+
+ case 'P':
+ if (print_path(cfile, va_arg(args, const struct BFTW *)) != 0) {
+ return -1;
+ }
+ break;
+
+ case 'L':
+ if (print_link_target(cfile, va_arg(args, const struct BFTW *)) != 0) {
+ return -1;
+ }
+ break;
+
+ case 'e':
+ if (print_expr(cfile, va_arg(args, const struct bfs_expr *), false, 0) != 0) {
+ return -1;
+ }
+ break;
+ case 'E':
+ if (print_expr(cfile, va_arg(args, const struct bfs_expr *), true, 0) != 0) {
+ return -1;
+ }
+ break;
+
+ default:
+ goto invalid;
+ }
+
+ break;
+
+ default:
+ goto invalid;
+ }
+ break;
+
+ case '$':
+ switch (*++i) {
+ case '$':
+ if (dstrapp(&cfile->buffer, '$') != 0) {
+ return -1;
+ }
+ break;
+
+ case '{':
+ ++i;
+ end = strchr(i, '}');
+ if (!end) {
+ goto invalid;
+ }
+ if (!colors) {
+ i = end;
+ break;
+ }
+
+ len = end - i;
+ if (len >= sizeof(name)) {
+ goto invalid;
+ }
+ memcpy(name, i, len);
+ name[len] = '\0';
+
+ if (strcmp(name, "rs") == 0) {
+ if (print_reset(cfile) != 0) {
+ return -1;
+ }
+ } else {
+ esc = get_esc(colors, name);
+ if (!esc) {
+ goto invalid;
+ }
+ if (print_esc(cfile, *esc) != 0) {
+ return -1;
+ }
+ }
+
+ i = end;
+ break;
+
+ default:
+ goto invalid;
+ }
+ break;
+
+ default:
+ return 0;
+ }
+ }
+
+ return 0;
+
+invalid:
+ bfs_bug("Invalid format string '%s'", format);
+ errno = EINVAL;
+ return -1;
+}
+
+static int cbuff(CFILE *cfile, const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ int ret = cvbuff(cfile, format, args);
+ va_end(args);
+ return ret;
+}
+
+int cvfprintf(CFILE *cfile, const char *format, va_list args) {
+ bfs_assert(dstrlen(cfile->buffer) == 0);
+
+ int ret = -1;
+ if (cvbuff(cfile, format, args) == 0) {
+ size_t len = dstrlen(cfile->buffer);
+ if (fwrite(cfile->buffer, 1, len, cfile->file) == len) {
+ ret = 0;
+ }
+ }
+
+ dstresize(&cfile->buffer, 0);
+ return ret;
+}
+
+int cfprintf(CFILE *cfile, const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ int ret = cvfprintf(cfile, format, args);
+ va_end(args);
+ return ret;
+}
+
+int cfreset(CFILE *cfile) {
+ const struct colors *colors = cfile->colors;
+ if (!colors) {
+ return 0;
+ }
+
+ const struct esc_seq *esc = colors->endcode;
+ size_t ret = xwrite(cfile->fd, esc->seq, esc->len);
+ return ret == esc->len ? 0 : -1;
+}
diff --git a/src/color.h b/src/color.h
new file mode 100644
index 0000000..3550888
--- /dev/null
+++ b/src/color.h
@@ -0,0 +1,118 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Utilities for colored output on ANSI terminals.
+ */
+
+#ifndef BFS_COLOR_H
+#define BFS_COLOR_H
+
+#include "prelude.h"
+#include "dstring.h"
+#include <stdio.h>
+
+/**
+ * A color scheme.
+ */
+struct colors;
+
+/**
+ * Parse the color table from the environment.
+ */
+struct colors *parse_colors(void);
+
+/**
+ * Check if stat() info is required to color a file correctly.
+ */
+bool colors_need_stat(const struct colors *colors);
+
+/**
+ * Free a color table.
+ */
+void free_colors(struct colors *colors);
+
+/**
+ * A file/stream with associated colors.
+ */
+typedef struct CFILE {
+ /** The underlying file/stream. */
+ FILE *file;
+ /** The color table to use, if any. */
+ const struct colors *colors;
+ /** A buffer for colored formatting. */
+ dchar *buffer;
+ /** Cached file descriptor number. */
+ int fd;
+ /** Whether the next ${rs} is actually necessary. */
+ bool need_reset;
+ /** Whether to close the underlying stream. */
+ bool close;
+} CFILE;
+
+/**
+ * Wrap an existing file into a colored stream.
+ *
+ * @param file
+ * The underlying file.
+ * @param colors
+ * The color table to use if file is a TTY.
+ * @param close
+ * Whether to close the underlying stream when this stream is closed.
+ * @return
+ * A colored wrapper around file.
+ */
+CFILE *cfwrap(FILE *file, const struct colors *colors, bool close);
+
+/**
+ * Close a colored file.
+ *
+ * @param cfile
+ * The colored file to close.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int cfclose(CFILE *cfile);
+
+/**
+ * Colored, formatted output.
+ *
+ * @param cfile
+ * The colored stream to print to.
+ * @param format
+ * A printf()-style format string, supporting these format specifiers:
+ *
+ * %c: A single character
+ * %d: An integer
+ * %g: A double
+ * %s: A string
+ * %zu: A size_t
+ * %m: strerror(errno)
+ * %pq: A shell-escaped string, like bash's printf %q
+ * %pQ: A TTY-escaped string.
+ * %pF: A colored file name, from a const struct BFTW * argument
+ * %pP: A colored file path, from a const struct BFTW * argument
+ * %pL: A colored link target, from a const struct BFTW * argument
+ * %pe: Dump a const struct bfs_expr *, for debugging.
+ * %pE: Dump a const struct bfs_expr * in verbose form, for debugging.
+ * %%: A literal '%'
+ * ${cc}: Change the color to 'cc'
+ * $$: A literal '$'
+ * @return
+ * 0 on success, -1 on failure.
+ */
+attr(printf(2, 3))
+int cfprintf(CFILE *cfile, const char *format, ...);
+
+/**
+ * cfprintf() variant that takes a va_list.
+ */
+attr(printf(2, 0))
+int cvfprintf(CFILE *cfile, const char *format, va_list args);
+
+/**
+ * Reset the TTY state when terminating abnormally (async-signal-safe).
+ */
+int cfreset(CFILE *cfile);
+
+#endif // BFS_COLOR_H
diff --git a/src/ctx.c b/src/ctx.c
new file mode 100644
index 0000000..11531df
--- /dev/null
+++ b/src/ctx.c
@@ -0,0 +1,298 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "ctx.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "color.h"
+#include "diag.h"
+#include "expr.h"
+#include "list.h"
+#include "mtab.h"
+#include "pwcache.h"
+#include "sighook.h"
+#include "stat.h"
+#include "trie.h"
+#include "xtime.h"
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+/** Get the initial value for ctx->threads (-j). */
+static int bfs_nproc(void) {
+ long nproc = xsysconf(_SC_NPROCESSORS_ONLN);
+
+ if (nproc < 1) {
+ nproc = 1;
+ } else if (nproc > 8) {
+ // Not much speedup after 8 threads
+ nproc = 8;
+ }
+
+ return nproc;
+}
+
+struct bfs_ctx *bfs_ctx_new(void) {
+ struct bfs_ctx *ctx = ZALLOC(struct bfs_ctx);
+ if (!ctx) {
+ return NULL;
+ }
+
+ SLIST_INIT(&ctx->expr_list);
+ ARENA_INIT(&ctx->expr_arena, struct bfs_expr);
+
+ ctx->maxdepth = INT_MAX;
+ ctx->flags = BFTW_RECOVER;
+ ctx->strategy = BFTW_BFS;
+ ctx->threads = bfs_nproc();
+ ctx->optlevel = 3;
+
+ trie_init(&ctx->files);
+
+ if (getrlimit(RLIMIT_NOFILE, &ctx->orig_nofile) != 0) {
+ goto fail;
+ }
+ ctx->cur_nofile = ctx->orig_nofile;
+
+ ctx->users = bfs_users_new();
+ if (!ctx->users) {
+ goto fail;
+ }
+
+ ctx->groups = bfs_groups_new();
+ if (!ctx->groups) {
+ goto fail;
+ }
+
+ if (xgettime(&ctx->now) != 0) {
+ goto fail;
+ }
+
+ return ctx;
+
+fail:
+ bfs_ctx_free(ctx);
+ return NULL;
+}
+
+const struct bfs_mtab *bfs_ctx_mtab(const struct bfs_ctx *ctx) {
+ struct bfs_ctx *mut = (struct bfs_ctx *)ctx;
+
+ if (mut->mtab_error) {
+ errno = mut->mtab_error;
+ } else if (!mut->mtab) {
+ mut->mtab = bfs_mtab_parse();
+ if (!mut->mtab) {
+ mut->mtab_error = errno;
+ }
+ }
+
+ return mut->mtab;
+}
+
+/**
+ * An open file tracked by the bfs context.
+ */
+struct bfs_ctx_file {
+ /** The file itself. */
+ CFILE *cfile;
+ /** The path to the file (for diagnostics). */
+ const char *path;
+ /** Signal hook to send a reset escape sequence. */
+ struct sighook *hook;
+ /** Remembers I/O errors, to propagate them to the exit status. */
+ int error;
+};
+
+/** Call cfreset() on a tracked file. */
+static void cfreset_hook(int sig, siginfo_t *info, void *arg) {
+ cfreset(arg);
+}
+
+CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, CFILE *cfile, const char *path) {
+ struct bfs_stat sb;
+ if (bfs_stat(cfile->fd, NULL, 0, &sb) != 0) {
+ return NULL;
+ }
+
+ bfs_file_id id;
+ bfs_stat_id(&sb, &id);
+
+ struct trie_leaf *leaf = trie_insert_mem(&ctx->files, id, sizeof(id));
+ if (!leaf) {
+ return NULL;
+ }
+
+ struct bfs_ctx_file *ctx_file = leaf->value;
+ if (ctx_file) {
+ ctx_file->path = path;
+ return ctx_file->cfile;
+ }
+
+ leaf->value = ctx_file = ALLOC(struct bfs_ctx_file);
+ if (!ctx_file) {
+ goto fail;
+ }
+
+ ctx_file->cfile = cfile;
+ ctx_file->path = path;
+ ctx_file->error = 0;
+ ctx_file->hook = NULL;
+
+ if (cfile->colors) {
+ ctx_file->hook = atsigexit(cfreset_hook, cfile);
+ if (!ctx_file->hook) {
+ goto fail;
+ }
+ }
+
+ if (cfile != ctx->cout && cfile != ctx->cerr) {
+ ++ctx->nfiles;
+ }
+
+ return cfile;
+
+fail:
+ trie_remove(&ctx->files, leaf);
+ free(ctx_file);
+ return NULL;
+}
+
+void bfs_ctx_flush(const struct bfs_ctx *ctx) {
+ // Before executing anything, flush all open streams. This ensures that
+ // - the user sees everything relevant before an -ok[dir] prompt
+ // - output from commands is interleaved consistently with bfs
+ // - executed commands can rely on I/O from other bfs actions
+ for_trie (leaf, &ctx->files) {
+ struct bfs_ctx_file *ctx_file = leaf->value;
+ CFILE *cfile = ctx_file->cfile;
+ if (fflush(cfile->file) == 0) {
+ continue;
+ }
+
+ ctx_file->error = errno;
+ clearerr(cfile->file);
+
+ const char *path = ctx_file->path;
+ if (path) {
+ bfs_error(ctx, "%pq: %m.\n", path);
+ } else if (cfile == ctx->cout) {
+ bfs_error(ctx, "(standard output): %m.\n");
+ }
+ }
+
+ // Flush the user/group caches, in case the executed command edits the
+ // user/group tables
+ bfs_users_flush(ctx->users);
+ bfs_groups_flush(ctx->groups);
+}
+
+/** Flush a file and report any errors. */
+static int bfs_ctx_fflush(CFILE *cfile) {
+ int ret = 0, error = 0;
+ if (ferror(cfile->file)) {
+ ret = -1;
+ error = EIO;
+ }
+ if (fflush(cfile->file) != 0) {
+ ret = -1;
+ error = errno;
+ }
+
+ errno = error;
+ return ret;
+}
+
+/** Close a file tracked by the bfs context. */
+static int bfs_ctx_fclose(struct bfs_ctx *ctx, struct bfs_ctx_file *ctx_file) {
+ CFILE *cfile = ctx_file->cfile;
+
+ // Writes to stderr are allowed to fail silently, unless the same file
+ // was used by -fprint, -fls, etc.
+ bool silent = cfile == ctx->cerr && !ctx_file->path;
+ int ret = 0, error = 0;
+
+ if (ctx_file->error) {
+ // An error was previously reported during bfs_ctx_flush()
+ ret = -1;
+ error = ctx_file->error;
+ }
+
+ // Flush the file just before we remove the hook, to maximize the chance
+ // we leave the TTY in a good state
+ if (bfs_ctx_fflush(cfile) != 0) {
+ ret = -1;
+ error = errno;
+ }
+
+ if (ctx_file->hook) {
+ sigunhook(ctx_file->hook);
+ }
+
+ // Close the CFILE, except for stdio streams, which are closed later
+ if (cfile != ctx->cout && cfile != ctx->cerr) {
+ if (cfclose(cfile) != 0) {
+ ret = -1;
+ error = errno;
+ }
+ }
+
+ if (silent) {
+ ret = 0;
+ }
+
+ if (ret != 0 && ctx->cerr) {
+ if (ctx_file->path) {
+ bfs_error(ctx, "%pq: %s.\n", ctx_file->path, xstrerror(error));
+ } else if (cfile == ctx->cout) {
+ bfs_error(ctx, "(standard output): %s.\n", xstrerror(error));
+ }
+ }
+
+ free(ctx_file);
+ return ret;
+}
+
+int bfs_ctx_free(struct bfs_ctx *ctx) {
+ int ret = 0;
+
+ if (ctx) {
+ CFILE *cout = ctx->cout;
+ CFILE *cerr = ctx->cerr;
+
+ bfs_mtab_free(ctx->mtab);
+
+ bfs_groups_free(ctx->groups);
+ bfs_users_free(ctx->users);
+
+ for_trie (leaf, &ctx->files) {
+ struct bfs_ctx_file *ctx_file = leaf->value;
+ if (bfs_ctx_fclose(ctx, ctx_file) != 0) {
+ ret = -1;
+ }
+ }
+ trie_destroy(&ctx->files);
+
+ cfclose(cout);
+ cfclose(cerr);
+ free_colors(ctx->colors);
+
+ for_slist (struct bfs_expr, expr, &ctx->expr_list, freelist) {
+ bfs_expr_clear(expr);
+ }
+ arena_destroy(&ctx->expr_arena);
+
+ for (size_t i = 0; i < ctx->npaths; ++i) {
+ free((char *)ctx->paths[i]);
+ }
+ free(ctx->paths);
+
+ free(ctx->argv);
+ free(ctx);
+ }
+
+ return ret;
+}
diff --git a/src/ctx.h b/src/ctx.h
new file mode 100644
index 0000000..fc3020c
--- /dev/null
+++ b/src/ctx.h
@@ -0,0 +1,169 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * bfs execution context.
+ */
+
+#ifndef BFS_CTX_H
+#define BFS_CTX_H
+
+#include "prelude.h"
+#include "alloc.h"
+#include "bftw.h"
+#include "diag.h"
+#include "expr.h"
+#include "trie.h"
+#include <stddef.h>
+#include <sys/resource.h>
+#include <time.h>
+
+struct CFILE;
+
+/**
+ * The execution context for bfs.
+ */
+struct bfs_ctx {
+ /** The number of command line arguments. */
+ size_t argc;
+ /** The unparsed command line arguments. */
+ char **argv;
+
+ /** The root paths. */
+ const char **paths;
+ /** The number of root paths. */
+ size_t npaths;
+
+ /** The main command line expression. */
+ struct bfs_expr *expr;
+ /** An expression for files to filter out. */
+ struct bfs_expr *exclude;
+ /** A list of allocated expressions. */
+ struct bfs_exprs expr_list;
+ /** bfs_expr arena. */
+ struct arena expr_arena;
+
+ /** -mindepth option. */
+ int mindepth;
+ /** -maxdepth option. */
+ int maxdepth;
+
+ /** bftw() flags. */
+ enum bftw_flags flags;
+ /** bftw() search strategy. */
+ enum bftw_strategy strategy;
+
+ /** Threads (-j). */
+ int threads;
+ /** Optimization level (-O). */
+ int optlevel;
+ /** Debugging flags (-D). */
+ enum debug_flags debug;
+ /** Whether to ignore deletions that race with bfs (-ignore_readdir_race). */
+ bool ignore_races;
+ /** Whether to follow POSIXisms more closely ($POSIXLY_CORRECT). */
+ bool posixly_correct;
+ /** Whether to show a status bar (-status). */
+ bool status;
+ /** Whether to only return unique files (-unique). */
+ bool unique;
+ /** Whether to print warnings (-warn/-nowarn). */
+ bool warn;
+ /** Whether to only handle paths with xargs-safe characters (-X). */
+ bool xargs_safe;
+
+ /** Color data. */
+ struct colors *colors;
+ /** The error that occurred parsing the color table, if any. */
+ int colors_error;
+ /** Colored stdout. */
+ struct CFILE *cout;
+ /** Colored stderr. */
+ struct CFILE *cerr;
+
+ /** User cache. */
+ struct bfs_users *users;
+ /** Group table. */
+ struct bfs_groups *groups;
+ /** The error that occurred parsing the group table, if any. */
+ int groups_error;
+
+ /** Table of mounted file systems. */
+ struct bfs_mtab *mtab;
+ /** The error that occurred parsing the mount table, if any. */
+ int mtab_error;
+
+ /** All the files owned by the context. */
+ struct trie files;
+ /** The number of files owned by the context. */
+ int nfiles;
+
+ /** The initial RLIMIT_NOFILE limits. */
+ struct rlimit orig_nofile;
+ /** The current RLIMIT_NOFILE limits. */
+ struct rlimit cur_nofile;
+
+ /** The current time. */
+ struct timespec now;
+};
+
+/**
+ * @return
+ * A new bfs context, or NULL on failure.
+ */
+struct bfs_ctx *bfs_ctx_new(void);
+
+/**
+ * Get the mount table.
+ *
+ * @param ctx
+ * The bfs context.
+ * @return
+ * The cached mount table, or NULL on failure.
+ */
+const struct bfs_mtab *bfs_ctx_mtab(const struct bfs_ctx *ctx);
+
+/**
+ * Deduplicate an opened file.
+ *
+ * @param ctx
+ * The bfs context.
+ * @param cfile
+ * The opened file.
+ * @param path
+ * The path to the opened file (or NULL for standard streams).
+ * @return
+ * If the same file was opened previously, that file is returned. If cfile is a new file,
+ * cfile itself is returned. If an error occurs, NULL is returned.
+ */
+struct CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, struct CFILE *cfile, const char *path);
+
+/**
+ * Flush any caches for consistency with external processes.
+ *
+ * @param ctx
+ * The bfs context.
+ */
+void bfs_ctx_flush(const struct bfs_ctx *ctx);
+
+/**
+ * Dump the parsed command line.
+ *
+ * @param ctx
+ * The bfs context.
+ * @param flag
+ * The -D flag that triggered the dump.
+ */
+void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag);
+
+/**
+ * Free a bfs context.
+ *
+ * @param ctx
+ * The context to free.
+ * @return
+ * 0 on success, -1 if any errors occurred.
+ */
+int bfs_ctx_free(struct bfs_ctx *ctx);
+
+#endif // BFS_CTX_H
diff --git a/src/diag.c b/src/diag.c
new file mode 100644
index 0000000..bb744f6
--- /dev/null
+++ b/src/diag.c
@@ -0,0 +1,300 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "diag.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "color.h"
+#include "ctx.h"
+#include "dstring.h"
+#include "expr.h"
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/** bfs_diagf() implementation. */
+attr(printf(2, 0))
+static void bfs_vdiagf(const struct bfs_loc *loc, const char *format, va_list args) {
+ fprintf(stderr, "%s: %s@%s:%d: ", xgetprogname(), loc->func, loc->file, loc->line);
+ vfprintf(stderr, format, args);
+ fprintf(stderr, "\n");
+}
+
+void bfs_diagf(const struct bfs_loc *loc, const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ bfs_vdiagf(loc, format, args);
+ va_end(args);
+}
+
+noreturn void bfs_abortf(const struct bfs_loc *loc, const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ bfs_vdiagf(loc, format, args);
+ va_end(args);
+
+ abort();
+}
+
+const char *bfs_errstr(void) {
+ return xstrerror(errno);
+}
+
+const char *debug_flag_name(enum debug_flags flag) {
+ switch (flag) {
+ case DEBUG_COST:
+ return "cost";
+ case DEBUG_EXEC:
+ return "exec";
+ case DEBUG_OPT:
+ return "opt";
+ case DEBUG_RATES:
+ return "rates";
+ case DEBUG_SEARCH:
+ return "search";
+ case DEBUG_STAT:
+ return "stat";
+ case DEBUG_TREE:
+ return "tree";
+
+ case DEBUG_ALL:
+ break;
+ }
+
+ bfs_bug("Unrecognized debug flag");
+ return "???";
+}
+
+void bfs_perror(const struct bfs_ctx *ctx, const char *str) {
+ bfs_error(ctx, "%s: %m.\n", str);
+}
+
+void bfs_error(const struct bfs_ctx *ctx, const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ bfs_verror(ctx, format, args);
+ va_end(args);
+}
+
+bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ bool ret = bfs_vwarning(ctx, format, args);
+ va_end(args);
+ return ret;
+}
+
+bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ bool ret = bfs_vdebug(ctx, flag, format, args);
+ va_end(args);
+ return ret;
+}
+
+void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args) {
+ int error = errno;
+
+ bfs_error_prefix(ctx);
+
+ errno = error;
+ cvfprintf(ctx->cerr, format, args);
+}
+
+bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args) {
+ int error = errno;
+
+ if (bfs_warning_prefix(ctx)) {
+ errno = error;
+ cvfprintf(ctx->cerr, format, args);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args) {
+ int error = errno;
+
+ if (bfs_debug_prefix(ctx, flag)) {
+ errno = error;
+ cvfprintf(ctx->cerr, format, args);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/** Get the command name without any leading directories. */
+static const char *bfs_cmd(const struct bfs_ctx *ctx) {
+ return ctx->argv[0] + xbaseoff(ctx->argv[0]);
+}
+
+void bfs_error_prefix(const struct bfs_ctx *ctx) {
+ cfprintf(ctx->cerr, "${bld}%s:${rs} ${err}error:${rs} ", bfs_cmd(ctx));
+}
+
+bool bfs_warning_prefix(const struct bfs_ctx *ctx) {
+ if (ctx->warn) {
+ cfprintf(ctx->cerr, "${bld}%s:${rs} ${wrn}warning:${rs} ", bfs_cmd(ctx));
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag) {
+ if (ctx->debug & flag) {
+ cfprintf(ctx->cerr, "${bld}%s:${rs} ${cyn}-D %s${rs}: ", bfs_cmd(ctx), debug_flag_name(flag));
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/** Recursive part of highlight_expr(). */
+static bool highlight_expr_recursive(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool args[]) {
+ if (!expr) {
+ return false;
+ }
+
+ bool ret = false;
+
+ for (size_t i = 0; i < ctx->argc; ++i) {
+ if (&ctx->argv[i] == expr->argv) {
+ for (size_t j = 0; j < expr->argc; ++j) {
+ bfs_assert(i + j < ctx->argc);
+ args[i + j] = true;
+ ret = true;
+ }
+ break;
+ }
+ }
+
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ ret |= highlight_expr_recursive(ctx, child, args);
+ }
+
+ return ret;
+}
+
+/** Highlight an expression in the command line. */
+static bool highlight_expr(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool args[]) {
+ for (size_t i = 0; i < ctx->argc; ++i) {
+ args[i] = false;
+ }
+
+ return highlight_expr_recursive(ctx, expr, args);
+}
+
+/** Print a highlighted portion of the command line. */
+static void bfs_argv_diag(const struct bfs_ctx *ctx, const bool args[], bool warning) {
+ if (warning) {
+ bfs_warning_prefix(ctx);
+ } else {
+ bfs_error_prefix(ctx);
+ }
+
+ dchar **argv = ZALLOC_ARRAY(dchar *, ctx->argc);
+ if (!argv) {
+ return;
+ }
+
+ for (size_t i = 0; i < ctx->argc; ++i) {
+ if (dstrescat(&argv[i], ctx->argv[i], WESC_SHELL | WESC_TTY) != 0) {
+ goto done;
+ }
+ }
+
+ size_t max_argc = 0;
+ for (size_t i = 0; i < ctx->argc; ++i) {
+ if (i > 0) {
+ cfprintf(ctx->cerr, " ");
+ }
+
+ if (args[i]) {
+ max_argc = i + 1;
+ cfprintf(ctx->cerr, "${bld}%s${rs}", argv[i]);
+ } else {
+ cfprintf(ctx->cerr, "%s", argv[i]);
+ }
+ }
+
+ cfprintf(ctx->cerr, "\n");
+
+ if (warning) {
+ bfs_warning_prefix(ctx);
+ } else {
+ bfs_error_prefix(ctx);
+ }
+
+ for (size_t i = 0; i < max_argc; ++i) {
+ if (i > 0) {
+ if (args[i - 1] && args[i]) {
+ cfprintf(ctx->cerr, "~");
+ } else {
+ cfprintf(ctx->cerr, " ");
+ }
+ }
+
+ if (args[i] && (i == 0 || !args[i - 1])) {
+ if (warning) {
+ cfprintf(ctx->cerr, "${wrn}");
+ } else {
+ cfprintf(ctx->cerr, "${err}");
+ }
+ }
+
+ size_t len = xstrwidth(argv[i]);
+ for (size_t j = 0; j < len; ++j) {
+ if (args[i]) {
+ cfprintf(ctx->cerr, "~");
+ } else {
+ cfprintf(ctx->cerr, " ");
+ }
+ }
+
+ if (args[i] && (i + 1 >= max_argc || !args[i + 1])) {
+ cfprintf(ctx->cerr, "${rs}");
+ }
+ }
+
+ cfprintf(ctx->cerr, "\n");
+
+done:
+ for (size_t i = 0; i < ctx->argc; ++i) {
+ dstrfree(argv[i]);
+ }
+ free(argv);
+}
+
+void bfs_argv_error(const struct bfs_ctx *ctx, const bool args[]) {
+ bfs_argv_diag(ctx, args, false);
+}
+
+void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr) {
+ bool args[ctx->argc];
+ if (highlight_expr(ctx, expr, args)) {
+ bfs_argv_error(ctx, args);
+ }
+}
+
+bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool args[]) {
+ if (!ctx->warn) {
+ return false;
+ }
+
+ bfs_argv_diag(ctx, args, true);
+ return true;
+}
+
+bool bfs_expr_warning(const struct bfs_ctx *ctx, const struct bfs_expr *expr) {
+ bool args[ctx->argc];
+ if (highlight_expr(ctx, expr, args)) {
+ return bfs_argv_warning(ctx, args);
+ }
+
+ return false;
+}
diff --git a/src/diag.h b/src/diag.h
new file mode 100644
index 0000000..f2498b5
--- /dev/null
+++ b/src/diag.h
@@ -0,0 +1,258 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Diagnostic messages.
+ */
+
+#ifndef BFS_DIAG_H
+#define BFS_DIAG_H
+
+#include "prelude.h"
+#include <stdarg.h>
+
+/**
+ * static_assert() with an optional second argument.
+ */
+#if __STDC_VERSION__ >= C23
+# define bfs_static_assert static_assert
+#else
+# define bfs_static_assert(...) bfs_static_assert_(__VA_ARGS__, #__VA_ARGS__, )
+# define bfs_static_assert_(expr, msg, ...) _Static_assert(expr, msg)
+#endif
+
+/**
+ * A source code location.
+ */
+struct bfs_loc {
+ const char *file;
+ int line;
+ const char *func;
+};
+
+#define BFS_LOC_INIT { .file = __FILE__, .line = __LINE__, .func = __func__ }
+
+/**
+ * Get the current source code location.
+ */
+#if __STDC_VERSION__ >= C23
+# define bfs_location() (&(static const struct bfs_loc)BFS_LOC_INIT)
+#else
+# define bfs_location() (&(const struct bfs_loc)BFS_LOC_INIT)
+#endif
+
+/**
+ * Print a low-level diagnostic message to standard error, formatted like
+ *
+ * bfs: func@src/file.c:0: Message
+ */
+attr(printf(2, 3))
+void bfs_diagf(const struct bfs_loc *loc, const char *format, ...);
+
+/**
+ * Unconditional diagnostic message.
+ */
+#define bfs_diag(...) bfs_diagf(bfs_location(), __VA_ARGS__)
+
+/**
+ * Get the last error message.
+ */
+const char *bfs_errstr(void);
+
+/**
+ * Print a diagnostic message including the last error.
+ */
+#define bfs_ediag(...) \
+ bfs_ediag_("" __VA_ARGS__, bfs_errstr())
+
+#define bfs_ediag_(format, ...) \
+ bfs_diag(sizeof(format) > 1 ? format ": %s" : "%s", __VA_ARGS__)
+
+/**
+ * Print a message to standard error and abort.
+ */
+attr(cold, printf(2, 3))
+noreturn void bfs_abortf(const struct bfs_loc *loc, const char *format, ...);
+
+/**
+ * Unconditional abort with a message.
+ */
+#define bfs_abort(...) \
+ bfs_abortf(bfs_location(), __VA_ARGS__)
+
+/**
+ * Abort with a message including the last error.
+ */
+#define bfs_eabort(...) \
+ bfs_eabort_("" __VA_ARGS__, bfs_errstr())
+
+#define bfs_eabort_(format, ...) \
+ bfs_abort(sizeof(format) > 1 ? format ": %s" : "%s", __VA_ARGS__)
+
+/**
+ * Abort in debug builds; no-op in release builds.
+ */
+#ifdef NDEBUG
+# define bfs_bug(...) ((void)0)
+# define bfs_ebug(...) ((void)0)
+#else
+# define bfs_bug bfs_abort
+# define bfs_ebug bfs_eabort
+#endif
+
+/**
+ * Unconditional assert.
+ */
+#define bfs_verify(...) \
+ bfs_verify_(#__VA_ARGS__, __VA_ARGS__, "", "")
+
+#define bfs_verify_(str, cond, format, ...) \
+ ((cond) ? (void)0 : bfs_abort( \
+ sizeof(format) > 1 \
+ ? "%.0s" format "%s%s" \
+ : "Assertion failed: `%s`%s", \
+ str, __VA_ARGS__))
+
+/**
+ * Unconditional assert, including the last error.
+ */
+#define bfs_everify(...) \
+ bfs_everify_(#__VA_ARGS__, __VA_ARGS__, "", bfs_errstr())
+
+#define bfs_everify_(str, cond, format, ...) \
+ ((cond) ? (void)0 : bfs_abort( \
+ sizeof(format) > 1 \
+ ? "%.0s" format "%s: %s" \
+ : "Assertion failed: `%s`: %s", \
+ str, __VA_ARGS__))
+
+/**
+ * Assert in debug builds; no-op in release builds.
+ */
+#ifdef NDEBUG
+# define bfs_assert(...) ((void)0)
+# define bfs_eassert(...) ((void)0)
+#else
+# define bfs_assert bfs_verify
+# define bfs_eassert bfs_everify
+#endif
+
+struct bfs_ctx;
+struct bfs_expr;
+
+/**
+ * Various debugging flags.
+ */
+enum debug_flags {
+ /** Print cost estimates. */
+ DEBUG_COST = 1 << 0,
+ /** Print executed command details. */
+ DEBUG_EXEC = 1 << 1,
+ /** Print optimization details. */
+ DEBUG_OPT = 1 << 2,
+ /** Print rate information. */
+ DEBUG_RATES = 1 << 3,
+ /** Trace the filesystem traversal. */
+ DEBUG_SEARCH = 1 << 4,
+ /** Trace all stat() calls. */
+ DEBUG_STAT = 1 << 5,
+ /** Print the parse tree. */
+ DEBUG_TREE = 1 << 6,
+ /** All debug flags. */
+ DEBUG_ALL = (1 << 7) - 1,
+};
+
+/**
+ * Convert a debug flag to a string.
+ */
+const char *debug_flag_name(enum debug_flags flag);
+
+/**
+ * Like perror(), but decorated like bfs_error().
+ */
+attr(cold)
+void bfs_perror(const struct bfs_ctx *ctx, const char *str);
+
+/**
+ * Shorthand for printing error messages.
+ */
+attr(cold, printf(2, 3))
+void bfs_error(const struct bfs_ctx *ctx, const char *format, ...);
+
+/**
+ * Shorthand for printing warning messages.
+ *
+ * @return Whether a warning was printed.
+ */
+attr(cold, printf(2, 3))
+bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...);
+
+/**
+ * Shorthand for printing debug messages.
+ *
+ * @return Whether a debug message was printed.
+ */
+attr(cold, printf(3, 4))
+bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...);
+
+/**
+ * bfs_error() variant that takes a va_list.
+ */
+attr(cold, printf(2, 0))
+void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args);
+
+/**
+ * bfs_warning() variant that takes a va_list.
+ */
+attr(cold, printf(2, 0))
+bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args);
+
+/**
+ * bfs_debug() variant that takes a va_list.
+ */
+attr(cold, printf(3, 0))
+bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args);
+
+/**
+ * Print the error message prefix.
+ */
+attr(cold)
+void bfs_error_prefix(const struct bfs_ctx *ctx);
+
+/**
+ * Print the warning message prefix.
+ */
+attr(cold)
+bool bfs_warning_prefix(const struct bfs_ctx *ctx);
+
+/**
+ * Print the debug message prefix.
+ */
+attr(cold)
+bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag);
+
+/**
+ * Highlight parts of the command line in an error message.
+ */
+attr(cold)
+void bfs_argv_error(const struct bfs_ctx *ctx, const bool args[]);
+
+/**
+ * Highlight parts of an expression in an error message.
+ */
+attr(cold)
+void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr);
+
+/**
+ * Highlight parts of the command line in a warning message.
+ */
+attr(cold)
+bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool args[]);
+
+/**
+ * Highlight parts of an expression in a warning message.
+ */
+attr(cold)
+bool bfs_expr_warning(const struct bfs_ctx *ctx, const struct bfs_expr *expr);
+
+#endif // BFS_DIAG_H
diff --git a/src/dir.c b/src/dir.c
new file mode 100644
index 0000000..fadf1c0
--- /dev/null
+++ b/src/dir.c
@@ -0,0 +1,371 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "dir.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "sanity.h"
+#include "trie.h"
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#if BFS_USE_GETDENTS
+# if BFS_HAS_GETDENTS64_SYSCALL
+# include <sys/syscall.h>
+# endif
+
+/** getdents() syscall wrapper. */
+static ssize_t bfs_getdents(int fd, void *buf, size_t size) {
+ sanitize_uninit(buf, size);
+
+#if BFS_HAS_POSIX_GETDENTS
+ int flags = 0;
+# ifdef DT_FORCE_TYPE
+ flags |= DT_FORCE_TYPE;
+# endif
+ ssize_t ret = posix_getdents(fd, buf, size, flags);
+#elif BFS_HAS_GETDENTS
+ ssize_t ret = getdents(fd, buf, size);
+#elif BFS_HAS_GETDENTS64
+ ssize_t ret = getdents64(fd, buf, size);
+#elif BFS_HAS_GETDENTS64_SYSCALL
+ ssize_t ret = syscall(SYS_getdents64, fd, buf, size);
+#else
+# error "No getdents() implementation"
+#endif
+
+ if (ret > 0) {
+ sanitize_init(buf, ret);
+ }
+
+ return ret;
+}
+
+#endif // BFS_USE_GETDENTS
+
+/** Directory entry type for bfs_getdents() */
+#if !BFS_USE_GETDENTS || BFS_HAS_GETDENTS
+typedef struct dirent sys_dirent;
+#elif BFS_HAS_POSIX_GETDENTS
+typedef struct posix_dent sys_dirent;
+#else
+typedef struct dirent64 sys_dirent;
+#endif
+
+enum bfs_type bfs_mode_to_type(mode_t mode) {
+ switch (mode & S_IFMT) {
+#ifdef S_IFBLK
+ case S_IFBLK:
+ return BFS_BLK;
+#endif
+#ifdef S_IFCHR
+ case S_IFCHR:
+ return BFS_CHR;
+#endif
+#ifdef S_IFDIR
+ case S_IFDIR:
+ return BFS_DIR;
+#endif
+#ifdef S_IFDOOR
+ case S_IFDOOR:
+ return BFS_DOOR;
+#endif
+#ifdef S_IFIFO
+ case S_IFIFO:
+ return BFS_FIFO;
+#endif
+#ifdef S_IFLNK
+ case S_IFLNK:
+ return BFS_LNK;
+#endif
+#ifdef S_IFPORT
+ case S_IFPORT:
+ return BFS_PORT;
+#endif
+#ifdef S_IFREG
+ case S_IFREG:
+ return BFS_REG;
+#endif
+#ifdef S_IFSOCK
+ case S_IFSOCK:
+ return BFS_SOCK;
+#endif
+#ifdef S_IFWHT
+ case S_IFWHT:
+ return BFS_WHT;
+#endif
+
+ default:
+ return BFS_UNKNOWN;
+ }
+}
+
+/**
+ * Private directory flags.
+ */
+enum {
+ /** We've reached the end of the directory. */
+ BFS_DIR_EOF = BFS_DIR_PRIVATE << 0,
+ /** This directory is a union mount we need to dedup manually. */
+ BFS_DIR_UNION = BFS_DIR_PRIVATE << 1,
+};
+
+struct bfs_dir {
+ unsigned int flags;
+
+#if BFS_USE_GETDENTS
+ int fd;
+ unsigned short pos;
+ unsigned short size;
+# if __FreeBSD__
+ struct trie trie;
+# endif
+ alignas(sys_dirent) char buf[];
+#else
+ DIR *dir;
+ struct dirent *de;
+#endif
+};
+
+#if BFS_USE_GETDENTS
+# define DIR_SIZE (64 << 10)
+# define BUF_SIZE (DIR_SIZE - sizeof(struct bfs_dir))
+#else
+# define DIR_SIZE sizeof(struct bfs_dir)
+#endif
+
+struct bfs_dir *bfs_allocdir(void) {
+ return malloc(DIR_SIZE);
+}
+
+void bfs_dir_arena(struct arena *arena) {
+ arena_init(arena, alignof(struct bfs_dir), DIR_SIZE);
+}
+
+int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path, enum bfs_dir_flags flags) {
+ int fd;
+ if (at_path) {
+ fd = openat(at_fd, at_path, O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+ if (fd < 0) {
+ return -1;
+ }
+ } else if (at_fd >= 0) {
+ fd = at_fd;
+ } else {
+ errno = EBADF;
+ return -1;
+ }
+
+ dir->flags = flags;
+
+#if BFS_USE_GETDENTS
+ dir->fd = fd;
+ dir->pos = 0;
+ dir->size = 0;
+
+# if __FreeBSD__ && defined(F_ISUNIONSTACK)
+ if (fcntl(fd, F_ISUNIONSTACK) > 0) {
+ dir->flags |= BFS_DIR_UNION;
+ trie_init(&dir->trie);
+ }
+# endif
+#else // !BFS_USE_GETDENTS
+ dir->dir = fdopendir(fd);
+ if (!dir->dir) {
+ if (at_path) {
+ close_quietly(fd);
+ }
+ return -1;
+ }
+ dir->de = NULL;
+#endif
+
+ return 0;
+}
+
+int bfs_dirfd(const struct bfs_dir *dir) {
+#if BFS_USE_GETDENTS
+ return dir->fd;
+#else
+ return dirfd(dir->dir);
+#endif
+}
+
+int bfs_polldir(struct bfs_dir *dir) {
+#if BFS_USE_GETDENTS
+ if (dir->pos < dir->size) {
+ return 1;
+ } else if (dir->flags & BFS_DIR_EOF) {
+ return 0;
+ }
+
+ char *buf = (char *)(dir + 1);
+ ssize_t size = bfs_getdents(dir->fd, buf, BUF_SIZE);
+ if (size == 0) {
+ dir->flags |= BFS_DIR_EOF;
+ return 0;
+ } else if (size < 0) {
+ return -1;
+ }
+
+ dir->pos = 0;
+ dir->size = size;
+
+ // Like read(), getdents() doesn't indicate EOF until another call returns zero.
+ // Check that eagerly here to hopefully avoid a syscall in the last bfs_readdir().
+ size_t rest = BUF_SIZE - size;
+ if (rest >= sizeof(sys_dirent)) {
+ size = bfs_getdents(dir->fd, buf + size, rest);
+ if (size > 0) {
+ dir->size += size;
+ } else if (size == 0) {
+ dir->flags |= BFS_DIR_EOF;
+ }
+ }
+
+ return 1;
+#else // !BFS_USE_GETDENTS
+ if (dir->de) {
+ return 1;
+ } else if (dir->flags & BFS_DIR_EOF) {
+ return 0;
+ }
+
+ errno = 0;
+ dir->de = readdir(dir->dir);
+ if (dir->de) {
+ return 1;
+ } else if (errno == 0) {
+ dir->flags |= BFS_DIR_EOF;
+ return 0;
+ } else {
+ return -1;
+ }
+#endif
+}
+
+/** Read a single directory entry. */
+static int bfs_getdent(struct bfs_dir *dir, const sys_dirent **de) {
+ int ret = bfs_polldir(dir);
+ if (ret > 0) {
+#if BFS_USE_GETDENTS
+ char *buf = (char *)(dir + 1);
+ *de = (const sys_dirent *)(buf + dir->pos);
+ dir->pos += (*de)->d_reclen;
+#else
+ *de = dir->de;
+ dir->de = NULL;
+#endif
+ }
+ return ret;
+}
+
+/** Skip ".", "..", and deleted/empty dirents. */
+static int bfs_skipdent(struct bfs_dir *dir, const sys_dirent *de) {
+#if BFS_USE_GETDENTS
+# if __FreeBSD__
+ // Union mounts on FreeBSD have to be de-duplicated in userspace
+ if (dir->flags & BFS_DIR_UNION) {
+ struct trie_leaf *leaf = trie_insert_str(&dir->trie, de->d_name);
+ if (!leaf) {
+ return -1;
+ } else if (leaf->value) {
+ return 1;
+ } else {
+ leaf->value = leaf;
+ }
+ }
+
+ // NFS mounts on FreeBSD can return empty dirents with inode number 0
+ if (de->d_ino == 0) {
+ return 1;
+ }
+# endif
+
+# ifdef DT_WHT
+ if (de->d_type == DT_WHT && !(dir->flags & BFS_DIR_WHITEOUTS)) {
+ return 1;
+ }
+# endif
+#endif // BFS_USE_GETDENTS
+
+ const char *name = de->d_name;
+ return name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'));
+}
+
+/** Convert de->d_type to a bfs_type, if it exists. */
+static enum bfs_type bfs_d_type(const sys_dirent *de) {
+#ifdef DTTOIF
+ return bfs_mode_to_type(DTTOIF(de->d_type));
+#else
+ return BFS_UNKNOWN;
+#endif
+}
+
+int bfs_readdir(struct bfs_dir *dir, struct bfs_dirent *de) {
+ while (true) {
+ const sys_dirent *sysde;
+ int ret = bfs_getdent(dir, &sysde);
+ if (ret <= 0) {
+ return ret;
+ }
+
+ int skip = bfs_skipdent(dir, sysde);
+ if (skip < 0) {
+ return skip;
+ } else if (skip) {
+ continue;
+ }
+
+ if (de) {
+ de->type = bfs_d_type(sysde);
+ de->name = sysde->d_name;
+ }
+
+ return 1;
+ }
+}
+
+static void bfs_destroydir(struct bfs_dir *dir) {
+#if BFS_USE_GETDENTS && __FreeBSD__
+ if (dir->flags & BFS_DIR_UNION) {
+ trie_destroy(&dir->trie);
+ }
+#endif
+
+ sanitize_uninit(dir, DIR_SIZE);
+}
+
+int bfs_closedir(struct bfs_dir *dir) {
+#if BFS_USE_GETDENTS
+ int ret = xclose(dir->fd);
+#else
+ int ret = closedir(dir->dir);
+ if (ret != 0) {
+ bfs_verify(errno != EBADF);
+ }
+#endif
+
+ bfs_destroydir(dir);
+ return ret;
+}
+
+#if BFS_USE_UNWRAPDIR
+int bfs_unwrapdir(struct bfs_dir *dir) {
+#if BFS_USE_GETDENTS
+ int ret = dir->fd;
+#elif BFS_HAS_FDCLOSEDIR
+ int ret = fdclosedir(dir->dir);
+#endif
+
+ bfs_destroydir(dir);
+ return ret;
+}
+#endif
diff --git a/src/dir.h b/src/dir.h
new file mode 100644
index 0000000..bbba071
--- /dev/null
+++ b/src/dir.h
@@ -0,0 +1,175 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Directories and their contents.
+ */
+
+#ifndef BFS_DIR_H
+#define BFS_DIR_H
+
+#include "prelude.h"
+#include <sys/types.h>
+
+/**
+ * Whether the implementation uses the getdents() syscall directly, rather than
+ * libc's readdir().
+ */
+#ifndef BFS_USE_GETDENTS
+# if BFS_HAS_POSIX_GETDENTS
+# define BFS_USE_GETDENTS true
+# elif __linux__ || __FreeBSD__
+# define BFS_USE_GETDENTS (BFS_HAS_GETDENTS || BFS_HAS_GETDENTS64 | BFS_HAS_GETDENTS64_SYSCALL)
+# endif
+#endif
+
+/**
+ * A directory.
+ */
+struct bfs_dir;
+
+/**
+ * File types.
+ */
+enum bfs_type {
+ /** An error occurred for this file. */
+ BFS_ERROR = -1,
+ /** Unknown type. */
+ BFS_UNKNOWN,
+ /** Block device. */
+ BFS_BLK,
+ /** Character device. */
+ BFS_CHR,
+ /** Directory. */
+ BFS_DIR,
+ /** Solaris door. */
+ BFS_DOOR,
+ /** Pipe. */
+ BFS_FIFO,
+ /** Symbolic link. */
+ BFS_LNK,
+ /** Solaris event port. */
+ BFS_PORT,
+ /** Regular file. */
+ BFS_REG,
+ /** Socket. */
+ BFS_SOCK,
+ /** BSD whiteout. */
+ BFS_WHT,
+};
+
+/**
+ * Convert a bfs_stat() mode to a bfs_type.
+ */
+enum bfs_type bfs_mode_to_type(mode_t mode);
+
+/**
+ * A directory entry.
+ */
+struct bfs_dirent {
+ /** The type of this file (possibly unknown). */
+ enum bfs_type type;
+ /** The name of this file. */
+ const char *name;
+};
+
+/**
+ * Allocate space for a directory.
+ *
+ * @return
+ * An allocated, unopen directory, or NULL on failure.
+ */
+struct bfs_dir *bfs_allocdir(void);
+
+struct arena;
+
+/**
+ * Initialize an arena for directories.
+ *
+ * @param arena
+ * The arena to initialize.
+ */
+void bfs_dir_arena(struct arena *arena);
+
+/**
+ * bfs_opendir() flags.
+ */
+enum bfs_dir_flags {
+ /** Include whiteouts in the results. */
+ BFS_DIR_WHITEOUTS = 1 << 0,
+ /** @internal Start of private flags. */
+ BFS_DIR_PRIVATE = 1 << 1,
+};
+
+/**
+ * Open a directory.
+ *
+ * @param dir
+ * The allocated directory.
+ * @param at_fd
+ * The base directory for path resolution.
+ * @param at_path
+ * The path of the directory to open, relative to at_fd. Pass NULL to
+ * open at_fd itself.
+ * @param flags
+ * Flags that control which directory entries are listed.
+ * @return
+ * 0 on success, or -1 on failure.
+ */
+int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path, enum bfs_dir_flags flags);
+
+/**
+ * Get the file descriptor for a directory.
+ */
+int bfs_dirfd(const struct bfs_dir *dir);
+
+/**
+ * Performs any I/O necessary for the next bfs_readdir() call.
+ *
+ * @param dir
+ * The directory to poll.
+ * @return
+ * 1 on success, 0 on EOF, or -1 on failure.
+ */
+int bfs_polldir(struct bfs_dir *dir);
+
+/**
+ * Read a directory entry.
+ *
+ * @param dir
+ * The directory to read.
+ * @param[out] dirent
+ * The directory entry to populate.
+ * @return
+ * 1 on success, 0 on EOF, or -1 on failure.
+ */
+int bfs_readdir(struct bfs_dir *dir, struct bfs_dirent *de);
+
+/**
+ * Close a directory.
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_closedir(struct bfs_dir *dir);
+
+/**
+ * Whether the bfs_unwrapdir() function is supported.
+ */
+#ifndef BFS_USE_UNWRAPDIR
+# define BFS_USE_UNWRAPDIR (BFS_USE_GETDENTS || BFS_HAS_FDCLOSEDIR)
+#endif
+
+#if BFS_USE_UNWRAPDIR
+/**
+ * Detach the file descriptor from an open directory.
+ *
+ * @param dir
+ * The directory to detach.
+ * @return
+ * The file descriptor of the directory.
+ */
+int bfs_unwrapdir(struct bfs_dir *dir);
+#endif
+
+#endif // BFS_DIR_H
diff --git a/src/dstring.c b/src/dstring.c
new file mode 100644
index 0000000..86ab646
--- /dev/null
+++ b/src/dstring.c
@@ -0,0 +1,279 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "dstring.h"
+#include "alloc.h"
+#include "bit.h"
+#include "diag.h"
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/**
+ * The memory representation of a dynamic string. Users get a pointer to str.
+ */
+struct dstring {
+ /** Capacity of the string, *including* the terminating NUL. */
+ size_t cap;
+ /** Length of the string, *excluding* the terminating NUL. */
+ size_t len;
+ /** The string itself. */
+ alignas(dchar) char str[];
+};
+
+#define DSTR_OFFSET offsetof(struct dstring, str)
+
+/** Back up to the header from a pointer to dstring::str. */
+static struct dstring *dstrheader(const dchar *dstr) {
+ return (struct dstring *)(dstr - DSTR_OFFSET);
+}
+
+/**
+ * In some provenance models, the expression `header->str` has its provenance
+ * restricted to just the `str` field itself, making a future dstrheader()
+ * illegal. This alternative is guaranteed to preserve provenance for the entire
+ * allocation.
+ *
+ * - https://stackoverflow.com/q/25296019
+ * - https://mastodon.social/@void_friend@tech.lgbt/111144859908104311
+ */
+static dchar *dstrdata(struct dstring *header) {
+ return (char *)header + DSTR_OFFSET;
+}
+
+/** Allocate a dstring with the given contents. */
+static dchar *dstralloc_impl(size_t cap, size_t len, const char *str) {
+ // Avoid reallocations for small strings
+ if (cap < DSTR_OFFSET) {
+ cap = DSTR_OFFSET;
+ }
+
+ struct dstring *header = ALLOC_FLEX(struct dstring, str, cap);
+ if (!header) {
+ return NULL;
+ }
+
+ header->cap = cap;
+ header->len = len;
+
+ char *ret = dstrdata(header);
+ memcpy(ret, str, len);
+ ret[len] = '\0';
+ return ret;
+}
+
+dchar *dstralloc(size_t cap) {
+ return dstralloc_impl(cap + 1, 0, "");
+}
+
+dchar *dstrdup(const char *str) {
+ return dstrxdup(str, strlen(str));
+}
+
+dchar *dstrndup(const char *str, size_t n) {
+ return dstrxdup(str, strnlen(str, n));
+}
+
+dchar *dstrddup(const dchar *dstr) {
+ return dstrxdup(dstr, dstrlen(dstr));
+}
+
+dchar *dstrxdup(const char *str, size_t len) {
+ return dstralloc_impl(len + 1, len, str);
+}
+
+size_t dstrlen(const dchar *dstr) {
+ return dstrheader(dstr)->len;
+}
+
+int dstreserve(dchar **dstr, size_t cap) {
+ if (!*dstr) {
+ *dstr = dstralloc(cap);
+ return *dstr ? 0 : -1;
+ }
+
+ struct dstring *header = dstrheader(*dstr);
+ size_t old_cap = header->cap;
+ size_t new_cap = cap + 1; // Terminating NUL
+ if (old_cap >= new_cap) {
+ return 0;
+ }
+
+ new_cap = bit_ceil(new_cap);
+ header = REALLOC_FLEX(struct dstring, str, header, old_cap, new_cap);
+ if (!header) {
+ return -1;
+ }
+
+ header->cap = new_cap;
+ *dstr = dstrdata(header);
+ return 0;
+}
+
+int dstresize(dchar **dstr, size_t len) {
+ if (dstreserve(dstr, len) != 0) {
+ return -1;
+ }
+
+ struct dstring *header = dstrheader(*dstr);
+ header->len = len;
+ header->str[len] = '\0';
+ return 0;
+}
+
+int dstrcat(dchar **dest, const char *src) {
+ return dstrxcat(dest, src, strlen(src));
+}
+
+int dstrncat(dchar **dest, const char *src, size_t n) {
+ return dstrxcat(dest, src, strnlen(src, n));
+}
+
+int dstrdcat(dchar **dest, const dchar *src) {
+ return dstrxcat(dest, src, dstrlen(src));
+}
+
+int dstrxcat(dchar **dest, const char *src, size_t len) {
+ size_t oldlen = dstrlen(*dest);
+ size_t newlen = oldlen + len;
+
+ if (dstresize(dest, newlen) != 0) {
+ return -1;
+ }
+
+ memcpy(*dest + oldlen, src, len);
+ return 0;
+}
+
+int dstrapp(dchar **str, char c) {
+ return dstrxcat(str, &c, 1);
+}
+
+int dstrcpy(dchar **dest, const char *src) {
+ return dstrxcpy(dest, src, strlen(src));
+}
+
+int dstrncpy(dchar **dest, const char *src, size_t n) {
+ return dstrxcpy(dest, src, strnlen(src, n));
+}
+
+int dstrdcpy(dchar **dest, const dchar *src) {
+ return dstrxcpy(dest, src, dstrlen(src));
+}
+
+int dstrxcpy(dchar **dest, const char *src, size_t len) {
+ if (dstresize(dest, len) != 0) {
+ return -1;
+ }
+
+ memcpy(*dest, src, len);
+ return 0;
+}
+
+dchar *dstrprintf(const char *format, ...) {
+ va_list args;
+
+ va_start(args, format);
+ dchar *str = dstrvprintf(format, args);
+ va_end(args);
+
+ return str;
+}
+
+dchar *dstrvprintf(const char *format, va_list args) {
+ // Guess a capacity to try to avoid reallocating
+ dchar *str = dstralloc(2 * strlen(format));
+ if (!str) {
+ return NULL;
+ }
+
+ if (dstrvcatf(&str, format, args) != 0) {
+ dstrfree(str);
+ return NULL;
+ }
+
+ return str;
+}
+
+int dstrcatf(dchar **str, const char *format, ...) {
+ va_list args;
+
+ va_start(args, format);
+ int ret = dstrvcatf(str, format, args);
+ va_end(args);
+
+ return ret;
+}
+
+int dstrvcatf(dchar **str, const char *format, va_list args) {
+ // Guess a capacity to try to avoid calling vsnprintf() twice
+ size_t len = dstrlen(*str);
+ dstreserve(str, len + 2 * strlen(format));
+ size_t cap = dstrheader(*str)->cap;
+
+ va_list copy;
+ va_copy(copy, args);
+
+ char *tail = *str + len;
+ size_t tail_cap = cap - len;
+ int ret = vsnprintf(tail, tail_cap, format, args);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ size_t tail_len = ret;
+ if (tail_len >= tail_cap) {
+ if (dstreserve(str, len + tail_len) != 0) {
+ goto fail;
+ }
+
+ tail = *str + len;
+ ret = vsnprintf(tail, tail_len + 1, format, copy);
+ if (ret < 0 || (size_t)ret != tail_len) {
+ bfs_bug("Length of formatted string changed");
+ goto fail;
+ }
+ }
+
+ va_end(copy);
+
+ dstrheader(*str)->len += tail_len;
+ return 0;
+
+fail:
+ va_end(copy);
+ *tail = '\0';
+ return -1;
+}
+
+int dstrescat(dchar **dest, const char *str, enum wesc_flags flags) {
+ return dstrnescat(dest, str, SIZE_MAX, flags);
+}
+
+int dstrnescat(dchar **dest, const char *str, size_t n, enum wesc_flags flags) {
+ size_t len = *dest ? dstrlen(*dest) : 0;
+
+ // Worst case growth is `ccc...` => $'\xCC\xCC\xCC...'
+ n = strnlen(str, n);
+ size_t cap = len + 4 * n + 3;
+ if (dstreserve(dest, cap) != 0) {
+ return -1;
+ }
+
+ char *cur = *dest + len;
+ char *end = *dest + cap + 1;
+ cur = wordnesc(cur, end, str, n, flags);
+ bfs_assert(cur != end, "wordesc() result truncated");
+
+ return dstresize(dest, cur - *dest);
+}
+
+void dstrfree(dchar *dstr) {
+ if (dstr) {
+ free(dstrheader(dstr));
+ }
+}
diff --git a/src/dstring.h b/src/dstring.h
new file mode 100644
index 0000000..14e1d3e
--- /dev/null
+++ b/src/dstring.h
@@ -0,0 +1,322 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * A dynamic string library.
+ */
+
+#ifndef BFS_DSTRING_H
+#define BFS_DSTRING_H
+
+#include "prelude.h"
+#include "bfstd.h"
+#include <stdarg.h>
+#include <stddef.h>
+
+/** Marker type for dynamic strings. */
+#if BFS_LINT && __clang__
+// Abuse __attribute__(aligned) to make a type that allows
+//
+// dchar * -> char *
+//
+// conversions, but warns (with Clang's -Walign-mismatch) on
+//
+// char * -> dchar *
+typedef __attribute__((aligned(alignof(size_t)))) char dchar;
+#else
+typedef char dchar;
+#endif
+
+/**
+ * Free a dynamic string.
+ *
+ * @param dstr
+ * The string to free.
+ */
+void dstrfree(dchar *dstr);
+
+/**
+ * Allocate a dynamic string.
+ *
+ * @param cap
+ * The initial capacity of the string.
+ */
+attr(malloc(dstrfree, 1))
+dchar *dstralloc(size_t cap);
+
+/**
+ * Create a dynamic copy of a string.
+ *
+ * @param str
+ * The NUL-terminated string to copy.
+ */
+attr(malloc(dstrfree, 1))
+dchar *dstrdup(const char *str);
+
+/**
+ * Create a length-limited dynamic copy of a string.
+ *
+ * @param str
+ * The string to copy.
+ * @param n
+ * The maximum number of characters to copy from str.
+ */
+attr(malloc(dstrfree, 1))
+dchar *dstrndup(const char *str, size_t n);
+
+/**
+ * Create a dynamic copy of a dynamic string.
+ *
+ * @param dstr
+ * The dynamic string to copy.
+ */
+attr(malloc(dstrfree, 1))
+dchar *dstrddup(const dchar *dstr);
+
+/**
+ * Create an exact-sized dynamic copy of a string.
+ *
+ * @param str
+ * The string to copy.
+ * @param len
+ * The length of the string, which may include internal NUL bytes.
+ */
+attr(malloc(dstrfree, 1))
+dchar *dstrxdup(const char *str, size_t len);
+
+/**
+ * Get a dynamic string's length.
+ *
+ * @param dstr
+ * The string to measure.
+ * @return
+ * The length of dstr.
+ */
+size_t dstrlen(const dchar *dstr);
+
+/**
+ * Reserve some capacity in a dynamic string.
+ *
+ * @param dstr
+ * The dynamic string to preallocate.
+ * @param cap
+ * The new capacity for the string.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int dstreserve(dchar **dstr, size_t cap);
+
+/**
+ * Resize a dynamic string.
+ *
+ * @param dstr
+ * The dynamic string to resize.
+ * @param len
+ * The new length for the dynamic string.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int dstresize(dchar **dstr, size_t len);
+
+/**
+ * Append to a dynamic string.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param src
+ * The string to append.
+ * @return 0 on success, -1 on failure.
+ */
+int dstrcat(dchar **dest, const char *src);
+
+/**
+ * Append to a dynamic string.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param src
+ * The string to append.
+ * @param n
+ * The maximum number of characters to take from src.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int dstrncat(dchar **dest, const char *src, size_t n);
+
+/**
+ * Append a dynamic string to another dynamic string.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param src
+ * The dynamic string to append.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int dstrdcat(dchar **dest, const dchar *src);
+
+/**
+ * Append to a dynamic string.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param src
+ * The string to append.
+ * @param len
+ * The exact number of characters to take from src.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int dstrxcat(dchar **dest, const char *src, size_t len);
+
+/**
+ * Append a single character to a dynamic string.
+ *
+ * @param str
+ * The string to append to.
+ * @param c
+ * The character to append.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int dstrapp(dchar **str, char c);
+
+/**
+ * Copy a string into a dynamic string.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param src
+ * The string to copy.
+ * @returns
+ * 0 on success, -1 on failure.
+ */
+int dstrcpy(dchar **dest, const char *str);
+
+/**
+ * Copy a dynamic string into another one.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param src
+ * The dynamic string to copy.
+ * @returns
+ * 0 on success, -1 on failure.
+ */
+int dstrdcpy(dchar **dest, const dchar *str);
+
+/**
+ * Copy a string into a dynamic string.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param src
+ * The dynamic string to copy.
+ * @param n
+ * The maximum number of characters to take from src.
+ * @returns
+ * 0 on success, -1 on failure.
+ */
+int dstrncpy(dchar **dest, const char *str, size_t n);
+
+/**
+ * Copy a string into a dynamic string.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param src
+ * The dynamic string to copy.
+ * @param len
+ * The exact number of characters to take from src.
+ * @returns
+ * 0 on success, -1 on failure.
+ */
+int dstrxcpy(dchar **dest, const char *str, size_t len);
+
+/**
+ * Create a dynamic string from a format string.
+ *
+ * @param format
+ * The format string to fill in.
+ * @param ...
+ * Any arguments for the format string.
+ * @return
+ * The created string, or NULL on failure.
+ */
+attr(printf(1, 2))
+dchar *dstrprintf(const char *format, ...);
+
+/**
+ * Create a dynamic string from a format string and a va_list.
+ *
+ * @param format
+ * The format string to fill in.
+ * @param args
+ * The arguments for the format string.
+ * @return
+ * The created string, or NULL on failure.
+ */
+attr(printf(1, 0))
+dchar *dstrvprintf(const char *format, va_list args);
+
+/**
+ * Format some text onto the end of a dynamic string.
+ *
+ * @param str
+ * The destination dynamic string.
+ * @param format
+ * The format string to fill in.
+ * @param ...
+ * Any arguments for the format string.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+attr(printf(2, 3))
+int dstrcatf(dchar **str, const char *format, ...);
+
+/**
+ * Format some text from a va_list onto the end of a dynamic string.
+ *
+ * @param str
+ * The destination dynamic string.
+ * @param format
+ * The format string to fill in.
+ * @param args
+ * The arguments for the format string.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+attr(printf(2, 0))
+int dstrvcatf(dchar **str, const char *format, va_list args);
+
+/**
+ * Concatenate while shell-escaping.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param str
+ * The string to escape.
+ * @param flags
+ * Flags for wordesc().
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int dstrescat(dchar **dest, const char *str, enum wesc_flags flags);
+
+/**
+ * Concatenate while shell-escaping.
+ *
+ * @param dest
+ * The destination dynamic string.
+ * @param str
+ * The string to escape.
+ * @param n
+ * The maximum length of the string.
+ * @param flags
+ * Flags for wordesc().
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int dstrnescat(dchar **dest, const char *str, size_t n, enum wesc_flags flags);
+
+#endif // BFS_DSTRING_H
diff --git a/src/eval.c b/src/eval.c
new file mode 100644
index 0000000..4fcda60
--- /dev/null
+++ b/src/eval.c
@@ -0,0 +1,1696 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Implementation of all the primary expressions.
+ */
+
+#include "prelude.h"
+#include "eval.h"
+#include "bar.h"
+#include "bfstd.h"
+#include "bftw.h"
+#include "color.h"
+#include "ctx.h"
+#include "diag.h"
+#include "dir.h"
+#include "dstring.h"
+#include "exec.h"
+#include "expr.h"
+#include "fsade.h"
+#include "mtab.h"
+#include "printf.h"
+#include "pwcache.h"
+#include "sanity.h"
+#include "stat.h"
+#include "trie.h"
+#include "xregex.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <wchar.h>
+
+struct bfs_eval {
+ /** Data about the current file. */
+ const struct BFTW *ftwbuf;
+ /** The bfs context. */
+ const struct bfs_ctx *ctx;
+ /** The bftw() callback return value. */
+ enum bftw_action action;
+ /** The bfs_eval() return value. */
+ int *ret;
+ /** Whether to quit immediately. */
+ bool quit;
+};
+
+/**
+ * Print an error message.
+ */
+attr(printf(2, 3))
+static void eval_error(struct bfs_eval *state, const char *format, ...) {
+ // By POSIX, any errors should be accompanied by a non-zero exit status
+ *state->ret = EXIT_FAILURE;
+
+ int error = errno;
+ const struct bfs_ctx *ctx = state->ctx;
+ CFILE *cerr = ctx->cerr;
+
+ bfs_error(ctx, "%pP: ", state->ftwbuf);
+
+ va_list args;
+ va_start(args, format);
+ errno = error;
+ cvfprintf(cerr, format, args);
+ va_end(args);
+}
+
+/**
+ * Check if an error should be ignored.
+ */
+static bool eval_should_ignore(const struct bfs_eval *state, int error) {
+ return state->ctx->ignore_races
+ && error_is_like(error, ENOENT)
+ && state->ftwbuf->depth > 0;
+}
+
+/**
+ * Report an error that occurs during evaluation.
+ */
+static void eval_report_error(struct bfs_eval *state) {
+ if (!eval_should_ignore(state, errno)) {
+ eval_error(state, "%m.\n");
+ }
+}
+
+/**
+ * Report an I/O error that occurs during evaluation.
+ */
+static void eval_io_error(const struct bfs_expr *expr, struct bfs_eval *state) {
+ if (expr->path) {
+ eval_error(state, "'%s': %m.\n", expr->path);
+ } else {
+ eval_error(state, "(standard output): %m.\n");
+ }
+
+ // Don't report the error again in bfs_ctx_free()
+ clearerr(expr->cfile->file);
+}
+
+/**
+ * Perform a bfs_stat() call if necessary.
+ */
+static const struct bfs_stat *eval_stat(struct bfs_eval *state) {
+ const struct BFTW *ftwbuf = state->ftwbuf;
+ const struct bfs_stat *ret = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!ret) {
+ eval_report_error(state);
+ }
+ return ret;
+}
+
+/**
+ * Get the difference (in seconds) between two struct timespecs.
+ */
+static time_t timespec_diff(const struct timespec *lhs, const struct timespec *rhs) {
+ time_t ret = lhs->tv_sec - rhs->tv_sec;
+ if (lhs->tv_nsec < rhs->tv_nsec) {
+ --ret;
+ }
+ return ret;
+}
+
+bool bfs_expr_cmp(const struct bfs_expr *expr, long long n) {
+ switch (expr->int_cmp) {
+ case BFS_INT_EQUAL:
+ return n == expr->num;
+ case BFS_INT_LESS:
+ return n < expr->num;
+ case BFS_INT_GREATER:
+ return n > expr->num;
+ }
+
+ bfs_bug("Invalid comparison mode");
+ return false;
+}
+
+/** Common code for fnmatch() tests. */
+static bool eval_fnmatch(const struct bfs_expr *expr, const char *str) {
+ if (expr->literal) {
+#ifdef FNM_CASEFOLD
+ if (expr->fnm_flags & FNM_CASEFOLD) {
+ return strcasecmp(expr->pattern, str) == 0;
+ }
+#endif
+ return strcmp(expr->pattern, str) == 0;
+ } else {
+ return fnmatch(expr->pattern, str, expr->fnm_flags) == 0;
+ }
+}
+
+/**
+ * -true test.
+ */
+bool eval_true(const struct bfs_expr *expr, struct bfs_eval *state) {
+ return true;
+}
+
+/**
+ * -false test.
+ */
+bool eval_false(const struct bfs_expr *expr, struct bfs_eval *state) {
+ return false;
+}
+
+/**
+ * -executable, -readable, -writable tests.
+ */
+bool eval_access(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct BFTW *ftwbuf = state->ftwbuf;
+ return xfaccessat(ftwbuf->at_fd, ftwbuf->at_path, expr->num) == 0;
+}
+
+/**
+ * -acl test.
+ */
+bool eval_acl(const struct bfs_expr *expr, struct bfs_eval *state) {
+ int ret = bfs_check_acl(state->ftwbuf);
+ if (ret >= 0) {
+ return ret;
+ } else {
+ eval_report_error(state);
+ return false;
+ }
+}
+
+/**
+ * -capable test.
+ */
+bool eval_capable(const struct bfs_expr *expr, struct bfs_eval *state) {
+ int ret = bfs_check_capabilities(state->ftwbuf);
+ if (ret >= 0) {
+ return ret;
+ } else {
+ eval_report_error(state);
+ return false;
+ }
+}
+
+/**
+ * -context test.
+ */
+bool eval_context(const struct bfs_expr *expr, struct bfs_eval *state) {
+ char *con = bfs_getfilecon(state->ftwbuf);
+ if (!con) {
+ eval_report_error(state);
+ return false;
+ }
+
+ bool ret = eval_fnmatch(expr, con);
+ bfs_freecon(con);
+ return ret;
+}
+
+/**
+ * Get the given timespec field out of a stat buffer.
+ */
+static const struct timespec *eval_stat_time(const struct bfs_stat *statbuf, enum bfs_stat_field field, struct bfs_eval *state) {
+ const struct timespec *ret = bfs_stat_time(statbuf, field);
+ if (!ret) {
+ eval_error(state, "Couldn't get file %s: %m.\n", bfs_stat_field_name(field));
+ }
+ return ret;
+}
+
+/**
+ * -[aBcm]?newer tests.
+ */
+bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ const struct timespec *time = eval_stat_time(statbuf, expr->stat_field, state);
+ if (!time) {
+ return false;
+ }
+
+ return time->tv_sec > expr->reftime.tv_sec
+ || (time->tv_sec == expr->reftime.tv_sec && time->tv_nsec > expr->reftime.tv_nsec);
+}
+
+/**
+ * -[aBcm]{min,time} tests.
+ */
+bool eval_time(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ const struct timespec *time = eval_stat_time(statbuf, expr->stat_field, state);
+ if (!time) {
+ return false;
+ }
+
+ time_t diff = timespec_diff(&expr->reftime, time);
+ switch (expr->time_unit) {
+ case BFS_DAYS:
+ diff /= 60 * 24;
+ fallthru;
+ case BFS_MINUTES:
+ diff /= 60;
+ fallthru;
+ case BFS_SECONDS:
+ break;
+ }
+
+ return bfs_expr_cmp(expr, diff);
+}
+
+/**
+ * -used test.
+ */
+bool eval_used(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ const struct timespec *atime = eval_stat_time(statbuf, BFS_STAT_ATIME, state);
+ const struct timespec *ctime = eval_stat_time(statbuf, BFS_STAT_CTIME, state);
+ if (!atime || !ctime) {
+ return false;
+ }
+
+ long long diff = timespec_diff(atime, ctime);
+ if (diff < 0) {
+ return false;
+ }
+
+ long long day_seconds = 60 * 60 * 24;
+ diff = (diff + day_seconds - 1) / day_seconds;
+ return bfs_expr_cmp(expr, diff);
+}
+
+/**
+ * -gid test.
+ */
+bool eval_gid(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ return bfs_expr_cmp(expr, statbuf->gid);
+}
+
+/**
+ * -uid test.
+ */
+bool eval_uid(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ return bfs_expr_cmp(expr, statbuf->uid);
+}
+
+/**
+ * -nogroup test.
+ */
+bool eval_nogroup(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ const struct group *grp = bfs_getgrgid(state->ctx->groups, statbuf->gid);
+ if (errno != 0) {
+ eval_report_error(state);
+ }
+ return grp == NULL;
+}
+
+/**
+ * -nouser test.
+ */
+bool eval_nouser(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ const struct passwd *pwd = bfs_getpwuid(state->ctx->users, statbuf->uid);
+ if (errno != 0) {
+ eval_report_error(state);
+ }
+ return pwd == NULL;
+}
+
+/**
+ * -delete action.
+ */
+bool eval_delete(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct BFTW *ftwbuf = state->ftwbuf;
+
+ // Don't try to delete the current directory
+ if (strcmp(ftwbuf->path, ".") == 0) {
+ return true;
+ }
+
+ int flag = 0;
+
+ // We need to know the actual type of the path, not what it points to
+ enum bfs_type type = bftw_type(ftwbuf, BFS_STAT_NOFOLLOW);
+ if (type == BFS_DIR) {
+ flag |= AT_REMOVEDIR;
+ } else if (type == BFS_ERROR) {
+ eval_report_error(state);
+ return false;
+ }
+
+ if (unlinkat(ftwbuf->at_fd, ftwbuf->at_path, flag) != 0) {
+ eval_report_error(state);
+ return false;
+ }
+
+ return true;
+}
+
+/** Finish any pending -exec ... + operations. */
+static int eval_exec_finish(const struct bfs_expr *expr, const struct bfs_ctx *ctx) {
+ int ret = 0;
+
+ if (expr->eval_fn == eval_exec) {
+ if (bfs_exec_finish(expr->exec) != 0) {
+ if (errno != 0) {
+ bfs_error(ctx, "%s %s: %m.\n", expr->argv[0], expr->argv[1]);
+ }
+ ret = -1;
+ }
+ }
+
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ if (eval_exec_finish(child, ctx) != 0) {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * -exec[dir]/-ok[dir] actions.
+ */
+bool eval_exec(const struct bfs_expr *expr, struct bfs_eval *state) {
+ bool ret = bfs_exec(expr->exec, state->ftwbuf) == 0;
+ if (errno != 0) {
+ eval_error(state, "%s %s: %m.\n", expr->argv[0], expr->argv[1]);
+ }
+ return ret;
+}
+
+/**
+ * -exit action.
+ */
+bool eval_exit(const struct bfs_expr *expr, struct bfs_eval *state) {
+ state->action = BFTW_STOP;
+ *state->ret = expr->num;
+ state->quit = true;
+ return true;
+}
+
+/**
+ * -depth N test.
+ */
+bool eval_depth(const struct bfs_expr *expr, struct bfs_eval *state) {
+ return bfs_expr_cmp(expr, state->ftwbuf->depth);
+}
+
+/**
+ * -empty test.
+ */
+bool eval_empty(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct BFTW *ftwbuf = state->ftwbuf;
+ const struct bfs_stat *statbuf;
+ struct bfs_dir *dir;
+
+ switch (ftwbuf->type) {
+ case BFS_REG:
+ statbuf = eval_stat(state);
+ return statbuf && statbuf->size == 0;
+
+ case BFS_DIR:
+ dir = bfs_allocdir();
+ if (!dir) {
+ goto error;
+ }
+
+ if (bfs_opendir(dir, ftwbuf->at_fd, ftwbuf->at_path, 0) != 0) {
+ goto error;
+ }
+
+ int did_read = bfs_readdir(dir, NULL);
+ bfs_closedir(dir);
+
+ if (did_read < 0) {
+ goto error;
+ }
+
+ free(dir);
+ return did_read == 0;
+ error:
+ eval_report_error(state);
+ free(dir);
+ return false;
+
+ default:
+ return false;
+ }
+}
+
+/**
+ * -flags test.
+ */
+bool eval_flags(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ if (!(statbuf->mask & BFS_STAT_ATTRS)) {
+ eval_error(state, "Couldn't get file %s.\n", bfs_stat_field_name(BFS_STAT_ATTRS));
+ return false;
+ }
+
+ unsigned long flags = statbuf->attrs;
+ unsigned long set = expr->set_flags;
+ unsigned long clear = expr->clear_flags;
+
+ switch (expr->flags_cmp) {
+ case BFS_MODE_EQUAL:
+ return flags == set && !(flags & clear);
+
+ case BFS_MODE_ALL:
+ return (flags & set) == set && !(flags & clear);
+
+ case BFS_MODE_ANY:
+ return (flags & set) || (flags & clear) != clear;
+ }
+
+ bfs_bug("Invalid comparison mode");
+ return false;
+}
+
+/**
+ * -fstype test.
+ */
+bool eval_fstype(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ const struct bfs_mtab *mtab = bfs_ctx_mtab(state->ctx);
+ if (!mtab) {
+ eval_report_error(state);
+ return false;
+ }
+
+ const char *type = bfs_fstype(mtab, statbuf);
+ if (!type) {
+ eval_report_error(state);
+ return false;
+ }
+
+ return strcmp(type, expr->argv[1]) == 0;
+}
+
+/**
+ * -hidden test.
+ */
+bool eval_hidden(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct BFTW *ftwbuf = state->ftwbuf;
+ const char *name = ftwbuf->path + ftwbuf->nameoff;
+
+ // Don't treat "." or ".." as hidden directories. Otherwise we'd filter
+ // out everything when given
+ //
+ // $ bfs . -nohidden
+ // $ bfs .. -nohidden
+ return name[0] == '.' && strcmp(name, ".") != 0 && strcmp(name, "..") != 0;
+}
+
+/**
+ * -inum test.
+ */
+bool eval_inum(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ return bfs_expr_cmp(expr, statbuf->ino);
+}
+
+/**
+ * -links test.
+ */
+bool eval_links(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ return bfs_expr_cmp(expr, statbuf->nlink);
+}
+
+/**
+ * -i?lname test.
+ */
+bool eval_lname(const struct bfs_expr *expr, struct bfs_eval *state) {
+ bool ret = false;
+ char *name = NULL;
+
+ const struct BFTW *ftwbuf = state->ftwbuf;
+ if (ftwbuf->type != BFS_LNK) {
+ goto done;
+ }
+
+ const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW);
+ size_t len = statbuf ? statbuf->size : 0;
+
+ name = xreadlinkat(ftwbuf->at_fd, ftwbuf->at_path, len);
+ if (!name) {
+ eval_report_error(state);
+ goto done;
+ }
+
+ ret = eval_fnmatch(expr, name);
+
+done:
+ free(name);
+ return ret;
+}
+
+/**
+ * -i?name test.
+ */
+bool eval_name(const struct bfs_expr *expr, struct bfs_eval *state) {
+ bool ret = false;
+ const struct BFTW *ftwbuf = state->ftwbuf;
+
+ const char *name = ftwbuf->path + ftwbuf->nameoff;
+ char *copy = NULL;
+ if (ftwbuf->depth == 0) {
+ // Any trailing slashes are not part of the name. This can only
+ // happen for the root path.
+ name = copy = xbasename(name);
+ if (!name) {
+ eval_report_error(state);
+ goto done;
+ }
+ }
+
+ ret = eval_fnmatch(expr, name);
+
+done:
+ free(copy);
+ return ret;
+}
+
+/**
+ * -i?path test.
+ */
+bool eval_path(const struct bfs_expr *expr, struct bfs_eval *state) {
+ return eval_fnmatch(expr, state->ftwbuf->path);
+}
+
+/**
+ * -perm test.
+ */
+bool eval_perm(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ mode_t mode = statbuf->mode;
+ mode_t target;
+ if (state->ftwbuf->type == BFS_DIR) {
+ target = expr->dir_mode;
+ } else {
+ target = expr->file_mode;
+ }
+
+ switch (expr->mode_cmp) {
+ case BFS_MODE_EQUAL:
+ return (mode & 07777) == target;
+
+ case BFS_MODE_ALL:
+ return (mode & target) == target;
+
+ case BFS_MODE_ANY:
+ return !(mode & target) == !target;
+ }
+
+ bfs_bug("Invalid comparison mode");
+ return false;
+}
+
+/** Print a user/group name/id, and update the column width. */
+static int print_owner(FILE *file, const char *name, uintmax_t id, int *width) {
+ if (name) {
+ int len = xstrwidth(name);
+ if (*width < len) {
+ *width = len;
+ }
+
+ return fprintf(file, " %s%*s", name, *width - len, "");
+ } else {
+ int ret = fprintf(file, " %-*ju", *width, id);
+ if (ret >= 0 && *width < ret - 1) {
+ *width = ret - 1;
+ }
+ return ret;
+ }
+}
+
+/**
+ * -f?ls action.
+ */
+bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state) {
+ CFILE *cfile = expr->cfile;
+ FILE *file = cfile->file;
+ const struct bfs_ctx *ctx = state->ctx;
+ const struct BFTW *ftwbuf = state->ftwbuf;
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ goto done;
+ }
+
+ // ls -l prints non-path text in the "normal" color, so do the same
+ if (cfprintf(cfile, "${no}") < 0) {
+ goto error;
+ }
+
+ uintmax_t ino = statbuf->ino;
+ uintmax_t block_size = ctx->posixly_correct ? 512 : 1024;
+ uintmax_t blocks = ((uintmax_t)statbuf->blocks * BFS_STAT_BLKSIZE + block_size - 1) / block_size;
+ char mode[11];
+ xstrmode(statbuf->mode, mode);
+ char acl = bfs_check_acl(ftwbuf) > 0 ? '+' : ' ';
+ uintmax_t nlink = statbuf->nlink;
+ if (fprintf(file, "%9ju %6ju %s%c %2ju", ino, blocks, mode, acl, nlink) < 0) {
+ goto error;
+ }
+
+ const struct passwd *pwd = bfs_getpwuid(ctx->users, statbuf->uid);
+ static int uwidth = 8;
+ if (print_owner(file, pwd ? pwd->pw_name : NULL, statbuf->uid, &uwidth) < 0) {
+ goto error;
+ }
+
+ const struct group *grp = bfs_getgrgid(ctx->groups, statbuf->gid);
+ static int gwidth = 8;
+ if (print_owner(file, grp ? grp->gr_name : NULL, statbuf->gid, &gwidth) < 0) {
+ goto error;
+ }
+
+ if (ftwbuf->type == BFS_BLK || ftwbuf->type == BFS_CHR) {
+ int ma = xmajor(statbuf->rdev);
+ int mi = xminor(statbuf->rdev);
+ if (fprintf(file, " %3d, %3d", ma, mi) < 0) {
+ goto error;
+ }
+ } else {
+ uintmax_t size = statbuf->size;
+ if (fprintf(file, " %8ju", size) < 0) {
+ goto error;
+ }
+ }
+
+ time_t time = statbuf->mtime.tv_sec;
+ time_t now = ctx->now.tv_sec;
+ time_t six_months_ago = now - 6 * 30 * 24 * 60 * 60;
+ time_t tomorrow = now + 24 * 60 * 60;
+ struct tm tm;
+ if (!localtime_r(&time, &tm)) {
+ goto error;
+ }
+ char time_str[256];
+ size_t time_ret;
+ if (time <= six_months_ago || time >= tomorrow) {
+ time_ret = strftime(time_str, sizeof(time_str), "%b %e %Y", &tm);
+ } else {
+ time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm);
+ }
+ if (time_ret == 0) {
+ errno = EOVERFLOW;
+ goto error;
+ }
+ if (cfprintf(cfile, " %s${rs}", time_str) < 0) {
+ goto error;
+ }
+
+ if (cfprintf(cfile, " %pP", ftwbuf) < 0) {
+ goto error;
+ }
+
+ if (ftwbuf->type == BFS_LNK) {
+ if (cfprintf(cfile, " -> %pL", ftwbuf) < 0) {
+ goto error;
+ }
+ }
+
+ if (fputc('\n', file) == EOF) {
+ goto error;
+ }
+
+done:
+ return true;
+
+error:
+ eval_io_error(expr, state);
+ return true;
+}
+
+/**
+ * -f?print action.
+ */
+bool eval_fprint(const struct bfs_expr *expr, struct bfs_eval *state) {
+ if (cfprintf(expr->cfile, "%pP\n", state->ftwbuf) < 0) {
+ eval_io_error(expr, state);
+ }
+ return true;
+}
+
+/**
+ * -f?print0 action.
+ */
+bool eval_fprint0(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const char *path = state->ftwbuf->path;
+ size_t length = strlen(path) + 1;
+ if (fwrite(path, 1, length, expr->cfile->file) != length) {
+ eval_io_error(expr, state);
+ }
+ return true;
+}
+
+/**
+ * -f?printf action.
+ */
+bool eval_fprintf(const struct bfs_expr *expr, struct bfs_eval *state) {
+ if (bfs_printf(expr->cfile, expr->printf, state->ftwbuf) != 0) {
+ eval_io_error(expr, state);
+ }
+
+ return true;
+}
+
+/**
+ * -printx action.
+ */
+bool eval_fprintx(const struct bfs_expr *expr, struct bfs_eval *state) {
+ FILE *file = expr->cfile->file;
+ const char *path = state->ftwbuf->path;
+
+ while (true) {
+ size_t span = strcspn(path, " \t\n\\$'\"`");
+ if (fwrite(path, 1, span, file) != span) {
+ goto error;
+ }
+ path += span;
+
+ char c = path[0];
+ if (!c) {
+ break;
+ }
+
+ char escaped[] = {'\\', c};
+ if (fwrite(escaped, 1, sizeof(escaped), file) != sizeof(escaped)) {
+ goto error;
+ }
+ ++path;
+ }
+
+ if (fputc('\n', file) == EOF) {
+ goto error;
+ }
+
+ return true;
+
+error:
+ eval_io_error(expr, state);
+ return true;
+}
+
+/**
+ * -limit action.
+ */
+bool eval_limit(const struct bfs_expr *expr, struct bfs_eval *state) {
+ long long evals = expr->evaluations + 1;
+ if (evals >= expr->num) {
+ state->action = BFTW_STOP;
+ state->quit = true;
+ }
+
+ return true;
+}
+
+/**
+ * -prune action.
+ */
+bool eval_prune(const struct bfs_expr *expr, struct bfs_eval *state) {
+ state->action = BFTW_PRUNE;
+ return true;
+}
+
+/**
+ * -quit action.
+ */
+bool eval_quit(const struct bfs_expr *expr, struct bfs_eval *state) {
+ state->action = BFTW_STOP;
+ state->quit = true;
+ return true;
+}
+
+/**
+ * -i?regex test.
+ */
+bool eval_regex(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const char *path = state->ftwbuf->path;
+
+ int ret = bfs_regexec(expr->regex, path, BFS_REGEX_ANCHOR);
+ if (ret < 0) {
+ char *str = bfs_regerror(expr->regex);
+ if (str) {
+ eval_error(state, "%s.\n", str);
+ free(str);
+ } else {
+ eval_error(state, "bfs_regerror(): %m.\n");
+ }
+ }
+
+ return ret > 0;
+}
+
+/**
+ * -samefile test.
+ */
+bool eval_samefile(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ return statbuf->dev == expr->dev && statbuf->ino == expr->ino;
+}
+
+/**
+ * -size test.
+ */
+bool eval_size(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ static const off_t scales[] = {
+ [BFS_BLOCKS] = 512,
+ [BFS_BYTES] = 1,
+ [BFS_WORDS] = 2,
+ [BFS_KB] = 1LL << 10,
+ [BFS_MB] = 1LL << 20,
+ [BFS_GB] = 1LL << 30,
+ [BFS_TB] = 1LL << 40,
+ [BFS_PB] = 1LL << 50,
+ };
+
+ off_t scale = scales[expr->size_unit];
+ off_t size = (statbuf->size + scale - 1) / scale; // Round up
+ return bfs_expr_cmp(expr, size);
+}
+
+/**
+ * -sparse test.
+ */
+bool eval_sparse(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ blkcnt_t expected = (statbuf->size + BFS_STAT_BLKSIZE - 1) / BFS_STAT_BLKSIZE;
+ return statbuf->blocks < expected;
+}
+
+/**
+ * -type test.
+ */
+bool eval_type(const struct bfs_expr *expr, struct bfs_eval *state) {
+ return (1 << state->ftwbuf->type) & expr->num;
+}
+
+/**
+ * -xattr test.
+ */
+bool eval_xattr(const struct bfs_expr *expr, struct bfs_eval *state) {
+ int ret = bfs_check_xattrs(state->ftwbuf);
+ if (ret >= 0) {
+ return ret;
+ } else {
+ eval_report_error(state);
+ return false;
+ }
+}
+
+/**
+ * -xattrname test.
+ */
+bool eval_xattrname(const struct bfs_expr *expr, struct bfs_eval *state) {
+ int ret = bfs_check_xattr_named(state->ftwbuf, expr->argv[1]);
+ if (ret >= 0) {
+ return ret;
+ } else {
+ eval_report_error(state);
+ return false;
+ }
+}
+
+/**
+ * -xtype test.
+ */
+bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state) {
+ const struct BFTW *ftwbuf = state->ftwbuf;
+ enum bfs_stat_flags flags = ftwbuf->stat_flags ^ (BFS_STAT_NOFOLLOW | BFS_STAT_TRYFOLLOW);
+ enum bfs_type type = bftw_type(ftwbuf, flags);
+ if (type == BFS_ERROR) {
+ eval_report_error(state);
+ return false;
+ } else {
+ return (1 << type) & expr->num;
+ }
+}
+
+#if _POSIX_MONOTONIC_CLOCK > 0
+# define BFS_CLOCK CLOCK_MONOTONIC
+#elif _POSIX_TIMERS > 0
+# define BFS_CLOCK CLOCK_REALTIME
+#endif
+
+/**
+ * Call clock_gettime(), if available.
+ */
+static int eval_gettime(struct bfs_eval *state, struct timespec *ts) {
+#ifdef BFS_CLOCK
+ int ret = clock_gettime(BFS_CLOCK, ts);
+ if (ret != 0) {
+ bfs_warning(state->ctx, "%pP: clock_gettime(): %m.\n", state->ftwbuf);
+ }
+ return ret;
+#else
+ return -1;
+#endif
+}
+
+/**
+ * Record an elapsed time.
+ */
+static void timespec_elapsed(struct timespec *elapsed, const struct timespec *start, const struct timespec *end) {
+ elapsed->tv_sec += end->tv_sec - start->tv_sec;
+ elapsed->tv_nsec += end->tv_nsec - start->tv_nsec;
+ if (elapsed->tv_nsec < 0) {
+ elapsed->tv_nsec += 1000000000L;
+ --elapsed->tv_sec;
+ } else if (elapsed->tv_nsec >= 1000000000L) {
+ elapsed->tv_nsec -= 1000000000L;
+ ++elapsed->tv_sec;
+ }
+}
+
+/**
+ * Evaluate an expression.
+ */
+static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) {
+ struct timespec start, end;
+ bool time = state->ctx->debug & DEBUG_RATES;
+ if (time) {
+ if (eval_gettime(state, &start) != 0) {
+ time = false;
+ }
+ }
+
+ bfs_assert(!state->quit);
+
+ bool ret = expr->eval_fn(expr, state);
+
+ if (time) {
+ if (eval_gettime(state, &end) == 0) {
+ timespec_elapsed(&expr->elapsed, &start, &end);
+ }
+ }
+
+ ++expr->evaluations;
+ if (ret) {
+ ++expr->successes;
+ }
+
+ if (bfs_expr_never_returns(expr)) {
+ bfs_assert(state->quit);
+ } else if (!state->quit) {
+ bfs_assert(!expr->always_true || ret);
+ bfs_assert(!expr->always_false || !ret);
+ }
+
+ return ret;
+}
+
+/**
+ * Evaluate a negation.
+ */
+bool eval_not(const struct bfs_expr *expr, struct bfs_eval *state) {
+ return !eval_expr(bfs_expr_children(expr), state);
+}
+
+/**
+ * Evaluate a conjunction.
+ */
+bool eval_and(const struct bfs_expr *expr, struct bfs_eval *state) {
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ if (!eval_expr(child, state) || state->quit) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Evaluate a disjunction.
+ */
+bool eval_or(const struct bfs_expr *expr, struct bfs_eval *state) {
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ if (eval_expr(child, state) || state->quit) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * Evaluate the comma operator.
+ */
+bool eval_comma(const struct bfs_expr *expr, struct bfs_eval *state) {
+ bool ret uninit(false);
+
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ ret = eval_expr(child, state);
+ if (state->quit) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/** Update the status bar. */
+static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct timespec *last_status, size_t count) {
+ struct timespec now;
+ if (eval_gettime(state, &now) == 0) {
+ struct timespec elapsed = {0};
+ timespec_elapsed(&elapsed, last_status, &now);
+
+ // Update every 0.1s
+ if (elapsed.tv_sec > 0 || elapsed.tv_nsec >= 100000000L) {
+ *last_status = now;
+ } else {
+ return;
+ }
+ }
+
+ size_t width = bfs_bar_width(bar);
+ if (width < 3) {
+ return;
+ }
+
+ const struct BFTW *ftwbuf = state->ftwbuf;
+
+ dchar *status = NULL;
+ dchar *rhs = dstrprintf(" (visited: %'zu; depth: %2zu)", count, ftwbuf->depth);
+ if (!rhs) {
+ return;
+ }
+
+ size_t rhslen = xstrwidth(rhs);
+ if (3 + rhslen > width) {
+ dstresize(&rhs, 0);
+ rhslen = 0;
+ }
+
+ status = dstralloc(0);
+ if (!status) {
+ goto out;
+ }
+
+ const char *path = ftwbuf->path;
+ size_t pathlen = ftwbuf->nameoff;
+ if (ftwbuf->depth == 0) {
+ pathlen = strlen(path);
+ }
+
+ // Escape weird filename characters
+ if (dstrnescat(&status, path, pathlen, WESC_TTY) != 0) {
+ goto out;
+ }
+ pathlen = dstrlen(status);
+
+ // Try to make sure even wide characters fit in the status bar
+ size_t pathmax = width - rhslen - 3;
+ size_t pathwidth = 0;
+ size_t lhslen = 0;
+ mbstate_t mb = {0};
+ for (size_t i = lhslen; lhslen < pathlen; lhslen = i) {
+ wint_t wc = xmbrtowc(status, &i, pathlen, &mb);
+ int cwidth;
+ if (wc == WEOF) {
+ // Invalid byte sequence, assume a single-width '?'
+ cwidth = 1;
+ } else {
+ cwidth = xwcwidth(wc);
+ if (cwidth < 0) {
+ cwidth = 0;
+ }
+ }
+
+ if (pathwidth + cwidth > pathmax) {
+ break;
+ }
+ pathwidth += cwidth;
+ }
+ dstresize(&status, lhslen);
+
+ if (dstrcat(&status, "...") != 0) {
+ goto out;
+ }
+
+ while (pathwidth < pathmax) {
+ if (dstrapp(&status, ' ') != 0) {
+ goto out;
+ }
+ ++pathwidth;
+ }
+
+ if (dstrdcat(&status, rhs) != 0) {
+ goto out;
+ }
+
+ bfs_bar_update(bar, status);
+
+out:
+ dstrfree(status);
+ dstrfree(rhs);
+}
+
+/** Check if we've seen a file before. */
+static bool eval_file_unique(struct bfs_eval *state, struct trie *seen) {
+ const struct bfs_stat *statbuf = eval_stat(state);
+ if (!statbuf) {
+ return false;
+ }
+
+ bfs_file_id id;
+ bfs_stat_id(statbuf, &id);
+
+ struct trie_leaf *leaf = trie_insert_mem(seen, id, sizeof(id));
+ if (!leaf) {
+ eval_report_error(state);
+ return false;
+ }
+
+ if (leaf->value) {
+ state->action = BFTW_PRUNE;
+ return false;
+ } else {
+ leaf->value = leaf;
+ return true;
+ }
+}
+
+#define DEBUG_FLAG(flags, flag) \
+ do { \
+ if ((flags & flag) || flags == flag) { \
+ fputs(#flag, stderr); \
+ flags ^= flag; \
+ if (flags) { \
+ fputs(" | ", stderr); \
+ } \
+ } \
+ } while (0)
+
+/**
+ * Log a stat() call.
+ */
+static void debug_stat(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf, enum bfs_stat_flags flags, int err) {
+ bfs_debug_prefix(ctx, DEBUG_STAT);
+
+ fprintf(stderr, "bfs_stat(");
+ if (ftwbuf->at_fd == (int)AT_FDCWD) {
+ fprintf(stderr, "AT_FDCWD");
+ } else {
+ size_t baselen = strlen(ftwbuf->path) - strlen(ftwbuf->at_path);
+ fprintf(stderr, "\"");
+ fwrite(ftwbuf->path, 1, baselen, stderr);
+ fprintf(stderr, "\"");
+ }
+
+ fprintf(stderr, ", \"%s\", ", ftwbuf->at_path);
+
+ DEBUG_FLAG(flags, BFS_STAT_FOLLOW);
+ DEBUG_FLAG(flags, BFS_STAT_NOFOLLOW);
+ DEBUG_FLAG(flags, BFS_STAT_TRYFOLLOW);
+ DEBUG_FLAG(flags, BFS_STAT_NOSYNC);
+
+ fprintf(stderr, ") == %d", err ? 0 : -1);
+
+ if (err) {
+ fprintf(stderr, " [%d]", err);
+ }
+
+ fprintf(stderr, "\n");
+}
+
+/**
+ * Log any stat() calls that happened.
+ */
+static void debug_stats(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf) {
+ if (!(ctx->debug & DEBUG_STAT)) {
+ return;
+ }
+
+ const struct bftw_stat *bufs = &ftwbuf->stat_bufs;
+
+ if (bufs->stat_err >= 0) {
+ debug_stat(ctx, ftwbuf, BFS_STAT_FOLLOW, bufs->stat_err);
+ }
+
+ if (bufs->lstat_err >= 0) {
+ debug_stat(ctx, ftwbuf, BFS_STAT_NOFOLLOW, bufs->lstat_err);
+ }
+}
+
+#define DUMP_MAP(value) [value] = #value
+
+/**
+ * Dump the bfs_type for -D search.
+ */
+static const char *dump_bfs_type(enum bfs_type type) {
+ static const char *types[] = {
+ DUMP_MAP(BFS_UNKNOWN),
+ DUMP_MAP(BFS_BLK),
+ DUMP_MAP(BFS_CHR),
+ DUMP_MAP(BFS_DIR),
+ DUMP_MAP(BFS_DOOR),
+ DUMP_MAP(BFS_FIFO),
+ DUMP_MAP(BFS_LNK),
+ DUMP_MAP(BFS_PORT),
+ DUMP_MAP(BFS_REG),
+ DUMP_MAP(BFS_SOCK),
+ DUMP_MAP(BFS_WHT),
+ };
+
+ if (type == BFS_ERROR) {
+ return "BFS_ERROR";
+ } else {
+ return types[type];
+ }
+}
+
+/**
+ * Dump the bftw_visit for -D search.
+ */
+static const char *dump_bftw_visit(enum bftw_visit visit) {
+ static const char *visits[] = {
+ DUMP_MAP(BFTW_PRE),
+ DUMP_MAP(BFTW_POST),
+ };
+ return visits[visit];
+}
+
+/**
+ * Dump the bftw_action for -D search.
+ */
+static const char *dump_bftw_action(enum bftw_action action) {
+ static const char *actions[] = {
+ DUMP_MAP(BFTW_CONTINUE),
+ DUMP_MAP(BFTW_PRUNE),
+ DUMP_MAP(BFTW_STOP),
+ };
+ return actions[action];
+}
+
+/**
+ * Type passed as the argument to the bftw() callback.
+ */
+struct callback_args {
+ /** The bfs context. */
+ const struct bfs_ctx *ctx;
+
+ /** The status bar. */
+ struct bfs_bar *bar;
+ /** The time of the last status update. */
+ struct timespec last_status;
+ /** The number of files visited so far. */
+ size_t count;
+
+ /** The set of seen files. */
+ struct trie *seen;
+
+ /** Eventual return value from bfs_eval(). */
+ int ret;
+};
+
+/**
+ * bftw() callback.
+ */
+static enum bftw_action eval_callback(const struct BFTW *ftwbuf, void *ptr) {
+ struct callback_args *args = ptr;
+ ++args->count;
+
+ const struct bfs_ctx *ctx = args->ctx;
+
+ struct bfs_eval state;
+ state.ftwbuf = ftwbuf;
+ state.ctx = ctx;
+ state.action = BFTW_CONTINUE;
+ state.ret = &args->ret;
+ state.quit = false;
+
+ if (args->bar) {
+ eval_status(&state, args->bar, &args->last_status, args->count);
+ }
+
+ if (ftwbuf->type == BFS_ERROR) {
+ if (!eval_should_ignore(&state, ftwbuf->error)) {
+ eval_error(&state, "%s.\n", xstrerror(ftwbuf->error));
+ }
+ state.action = BFTW_PRUNE;
+ goto done;
+ }
+
+ if (ctx->unique && ftwbuf->visit == BFTW_PRE) {
+ if (!eval_file_unique(&state, args->seen)) {
+ goto done;
+ }
+ }
+
+ if (eval_expr(ctx->exclude, &state)) {
+ state.action = BFTW_PRUNE;
+ goto done;
+ }
+
+ if (ctx->xargs_safe && strpbrk(ftwbuf->path, " \t\n\'\"\\")) {
+ eval_error(&state, "Path is not safe for xargs.\n");
+ state.action = BFTW_PRUNE;
+ goto done;
+ }
+
+ if (ctx->maxdepth < 0 || ftwbuf->depth >= (size_t)ctx->maxdepth) {
+ state.action = BFTW_PRUNE;
+ }
+
+ // In -depth mode, only handle directories on the BFTW_POST visit
+ enum bftw_visit expected_visit = BFTW_PRE;
+ if ((ctx->flags & BFTW_POST_ORDER)
+ && (ctx->strategy == BFTW_IDS || ftwbuf->type == BFS_DIR)
+ && ftwbuf->depth < (size_t)ctx->maxdepth) {
+ expected_visit = BFTW_POST;
+ }
+
+ if (ftwbuf->visit == expected_visit
+ && ftwbuf->depth >= (size_t)ctx->mindepth
+ && ftwbuf->depth <= (size_t)ctx->maxdepth) {
+ eval_expr(ctx->expr, &state);
+ }
+
+done:
+ debug_stats(ctx, ftwbuf);
+
+ if (bfs_debug(ctx, DEBUG_SEARCH, "eval_callback({\n")) {
+ fprintf(stderr, "\t.path = \"%s\",\n", ftwbuf->path);
+ fprintf(stderr, "\t.root = \"%s\",\n", ftwbuf->root);
+ fprintf(stderr, "\t.depth = %zu,\n", ftwbuf->depth);
+ fprintf(stderr, "\t.visit = %s,\n", dump_bftw_visit(ftwbuf->visit));
+ fprintf(stderr, "\t.type = %s,\n", dump_bfs_type(ftwbuf->type));
+ fprintf(stderr, "\t.error = %d,\n", ftwbuf->error);
+ fprintf(stderr, "}) == %s\n", dump_bftw_action(state.action));
+ }
+
+ return state.action;
+}
+
+/** Raise RLIMIT_NOFILE if possible, and return the new limit. */
+static int raise_fdlimit(struct bfs_ctx *ctx) {
+ rlim_t cur = ctx->orig_nofile.rlim_cur;
+ rlim_t max = ctx->orig_nofile.rlim_max;
+
+ rlim_t target = 64 << 10;
+ if (rlim_cmp(target, max) > 0) {
+ target = max;
+ }
+
+ if (rlim_cmp(target, cur) <= 0) {
+ return target;
+ }
+
+ const struct rlimit rl = {
+ .rlim_cur = target,
+ .rlim_max = max,
+ };
+
+ if (setrlimit(RLIMIT_NOFILE, &rl) != 0) {
+ return cur;
+ }
+
+ ctx->cur_nofile = rl;
+ return target;
+}
+
+/** Preallocate the fd table in the kernel. */
+static void reserve_fds(int limit) {
+ // Kernels typically implement the fd table as a dynamic array.
+ // Growing the array can be expensive, especially if files are being
+ // opened in parallel. We can work around this by allocating the
+ // highest possible fd, forcing the kernel to grow the table upfront.
+
+#ifdef F_DUPFD_CLOEXEC
+ int fd = fcntl(STDIN_FILENO, F_DUPFD_CLOEXEC, limit - 1);
+#else
+ int fd = fcntl(STDIN_FILENO, F_DUPFD, limit - 1);
+#endif
+ if (fd >= 0) {
+ xclose(fd);
+ }
+}
+
+/** Infer the number of file descriptors available to bftw(). */
+static int infer_fdlimit(const struct bfs_ctx *ctx, int limit) {
+ // 3 for std{in,out,err}
+ int nopen = 3 + ctx->nfiles;
+
+ // Check /proc/self/fd for the current number of open fds, if possible
+ // (we may have inherited more than just the standard ones)
+ struct bfs_dir *dir = bfs_allocdir();
+ if (!dir) {
+ goto done;
+ }
+
+ if (bfs_opendir(dir, AT_FDCWD, "/proc/self/fd", 0) != 0
+ && bfs_opendir(dir, AT_FDCWD, "/dev/fd", 0) != 0) {
+ goto done;
+ }
+
+ // Account for 'dir' itself
+ nopen = -1;
+
+ while (bfs_readdir(dir, NULL) > 0) {
+ ++nopen;
+ }
+ bfs_closedir(dir);
+done:
+ free(dir);
+
+ int ret = limit - nopen;
+ ret -= ctx->expr->persistent_fds;
+ ret -= ctx->expr->ephemeral_fds;
+
+ // bftw() needs at least 2 available fds
+ if (ret < 2) {
+ ret = 2;
+ }
+
+ return ret;
+}
+
+/**
+ * Dump the bftw() flags for -D search.
+ */
+static void dump_bftw_flags(enum bftw_flags flags) {
+ DEBUG_FLAG(flags, 0);
+ DEBUG_FLAG(flags, BFTW_STAT);
+ DEBUG_FLAG(flags, BFTW_RECOVER);
+ DEBUG_FLAG(flags, BFTW_POST_ORDER);
+ DEBUG_FLAG(flags, BFTW_FOLLOW_ROOTS);
+ DEBUG_FLAG(flags, BFTW_FOLLOW_ALL);
+ DEBUG_FLAG(flags, BFTW_DETECT_CYCLES);
+ DEBUG_FLAG(flags, BFTW_SKIP_MOUNTS);
+ DEBUG_FLAG(flags, BFTW_PRUNE_MOUNTS);
+ DEBUG_FLAG(flags, BFTW_SORT);
+ DEBUG_FLAG(flags, BFTW_BUFFER);
+ DEBUG_FLAG(flags, BFTW_WHITEOUTS);
+
+ bfs_assert(flags == 0, "Missing bftw flag 0x%X", flags);
+}
+
+/**
+ * Dump the bftw_strategy for -D search.
+ */
+static const char *dump_bftw_strategy(enum bftw_strategy strategy) {
+ static const char *strategies[] = {
+ DUMP_MAP(BFTW_BFS),
+ DUMP_MAP(BFTW_DFS),
+ DUMP_MAP(BFTW_IDS),
+ DUMP_MAP(BFTW_EDS),
+ };
+ return strategies[strategy];
+}
+
+/** Check if we need to enable BFTW_BUFFER. */
+static bool eval_must_buffer(const struct bfs_expr *expr) {
+#if __FreeBSD__
+ // FreeBSD doesn't properly handle adding/removing directory entries
+ // during readdir() on NFS mounts. Work around it by passing BFTW_BUFFER
+ // whenever we could be mutating the directory ourselves through -delete
+ // or -exec. We don't attempt to handle concurrent modification by other
+ // processes, which are racey anyway.
+ //
+ // https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=57696
+ // https://github.com/tavianator/bfs/issues/67
+
+ if (expr->eval_fn == eval_delete || expr->eval_fn == eval_exec) {
+ return true;
+ }
+
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ if (eval_must_buffer(child)) {
+ return true;
+ }
+ }
+#endif // __FreeBSD__
+
+ return false;
+}
+
+int bfs_eval(struct bfs_ctx *ctx) {
+ if (!ctx->expr) {
+ return EXIT_SUCCESS;
+ }
+
+ struct callback_args args = {
+ .ctx = ctx,
+ .ret = EXIT_SUCCESS,
+ };
+
+ if (ctx->status) {
+ args.bar = bfs_bar_show();
+ if (!args.bar) {
+ bfs_warning(ctx, "Couldn't show status bar: %m.\n\n");
+ }
+ }
+
+ struct trie seen;
+ if (ctx->unique) {
+ trie_init(&seen);
+ args.seen = &seen;
+ }
+
+ int fdlimit = raise_fdlimit(ctx);
+ reserve_fds(fdlimit);
+ fdlimit = infer_fdlimit(ctx, fdlimit);
+
+ // -1 for the main thread
+ int nthreads = ctx->threads - 1;
+
+ struct bftw_args bftw_args = {
+ .paths = ctx->paths,
+ .npaths = ctx->npaths,
+ .callback = eval_callback,
+ .ptr = &args,
+ .nopenfd = fdlimit,
+ .nthreads = nthreads,
+ .flags = ctx->flags,
+ .strategy = ctx->strategy,
+ .mtab = bfs_ctx_mtab(ctx),
+ };
+
+ if (eval_must_buffer(ctx->expr)) {
+ bftw_args.flags |= BFTW_BUFFER;
+ }
+
+ if (bfs_debug(ctx, DEBUG_SEARCH, "bftw({\n")) {
+ fprintf(stderr, "\t.paths = {\n");
+ for (size_t i = 0; i < bftw_args.npaths; ++i) {
+ fprintf(stderr, "\t\t\"%s\",\n", bftw_args.paths[i]);
+ }
+ fprintf(stderr, "\t},\n");
+ fprintf(stderr, "\t.npaths = %zu,\n", bftw_args.npaths);
+ fprintf(stderr, "\t.callback = eval_callback,\n");
+ fprintf(stderr, "\t.ptr = &args,\n");
+ fprintf(stderr, "\t.nopenfd = %d,\n", bftw_args.nopenfd);
+ fprintf(stderr, "\t.nthreads = %d,\n", bftw_args.nthreads);
+ fprintf(stderr, "\t.flags = ");
+ dump_bftw_flags(bftw_args.flags);
+ fprintf(stderr, ",\n\t.strategy = %s,\n", dump_bftw_strategy(bftw_args.strategy));
+ fprintf(stderr, "\t.mtab = ");
+ if (bftw_args.mtab) {
+ fprintf(stderr, "ctx->mtab");
+ } else {
+ fprintf(stderr, "NULL");
+ }
+ fprintf(stderr, ",\n})\n");
+ }
+
+ if (bftw(&bftw_args) != 0) {
+ args.ret = EXIT_FAILURE;
+ bfs_perror(ctx, "bftw()");
+ }
+
+ if (eval_exec_finish(ctx->expr, ctx) != 0) {
+ args.ret = EXIT_FAILURE;
+ }
+
+ bfs_ctx_dump(ctx, DEBUG_RATES);
+
+ if (ctx->unique) {
+ trie_destroy(&seen);
+ }
+
+ bfs_bar_hide(args.bar);
+
+ return args.ret;
+}
diff --git a/src/eval.h b/src/eval.h
new file mode 100644
index 0000000..4dd7996
--- /dev/null
+++ b/src/eval.h
@@ -0,0 +1,102 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * The evaluation functions that implement primary expressions like -name,
+ * -print, etc.
+ */
+
+#ifndef BFS_EVAL_H
+#define BFS_EVAL_H
+
+#include "prelude.h"
+
+struct bfs_ctx;
+struct bfs_expr;
+
+/**
+ * Ephemeral state for evaluating an expression.
+ */
+struct bfs_eval;
+
+/**
+ * Expression evaluation function.
+ *
+ * @param expr
+ * The current expression.
+ * @param state
+ * The current evaluation state.
+ * @return
+ * The result of the test.
+ */
+typedef bool bfs_eval_fn(const struct bfs_expr *expr, struct bfs_eval *state);
+
+/**
+ * Evaluate the command line.
+ *
+ * @param ctx
+ * The bfs context to evaluate.
+ * @return
+ * EXIT_SUCCESS on success, otherwise on failure.
+ */
+int bfs_eval(struct bfs_ctx *ctx);
+
+// Predicate evaluation functions
+
+bool eval_true(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_false(const struct bfs_expr *expr, struct bfs_eval *state);
+
+bool eval_access(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_acl(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_capable(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_context(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_perm(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_xattr(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_xattrname(const struct bfs_expr *expr, struct bfs_eval *state);
+
+bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_time(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_used(const struct bfs_expr *expr, struct bfs_eval *state);
+
+bool eval_gid(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_uid(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_nogroup(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_nouser(const struct bfs_expr *expr, struct bfs_eval *state);
+
+bool eval_depth(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_empty(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_flags(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_fstype(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_hidden(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_inum(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_links(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_samefile(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_size(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_sparse(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_type(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state);
+
+bool eval_lname(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_name(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_path(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_regex(const struct bfs_expr *expr, struct bfs_eval *state);
+
+bool eval_delete(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_exec(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_exit(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_fprint(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_fprint0(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_fprintf(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_fprintx(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_limit(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_prune(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_quit(const struct bfs_expr *expr, struct bfs_eval *state);
+
+// Operator evaluation functions
+bool eval_not(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_and(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_or(const struct bfs_expr *expr, struct bfs_eval *state);
+bool eval_comma(const struct bfs_expr *expr, struct bfs_eval *state);
+
+#endif // BFS_EVAL_H
diff --git a/src/exec.c b/src/exec.c
new file mode 100644
index 0000000..cd73d6c
--- /dev/null
+++ b/src/exec.c
@@ -0,0 +1,690 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "exec.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "bftw.h"
+#include "color.h"
+#include "ctx.h"
+#include "diag.h"
+#include "dstring.h"
+#include "xspawn.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/** Print some debugging info. */
+attr(printf(2, 3))
+static void bfs_exec_debug(const struct bfs_exec *execbuf, const char *format, ...) {
+ const struct bfs_ctx *ctx = execbuf->ctx;
+
+ if (!bfs_debug(ctx, DEBUG_EXEC, "${blu}")) {
+ return;
+ }
+
+ if (execbuf->flags & BFS_EXEC_CONFIRM) {
+ fputs("-ok", stderr);
+ } else {
+ fputs("-exec", stderr);
+ }
+ if (execbuf->flags & BFS_EXEC_CHDIR) {
+ fputs("dir", stderr);
+ }
+ cfprintf(ctx->cerr, "${rs}: ");
+
+ va_list args;
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+}
+
+/** Determine the size of a single argument, for comparison to arg_max. */
+static size_t bfs_exec_arg_size(const char *arg) {
+ return sizeof(arg) + strlen(arg) + 1;
+}
+
+/** Even if we can pass a bigger argument list, cap it here. */
+#define BFS_EXEC_ARG_MAX (16 << 20)
+
+/** Determine the maximum argv size. */
+static size_t bfs_exec_arg_max(const struct bfs_exec *execbuf) {
+ long arg_max = xsysconf(_SC_ARG_MAX);
+ bfs_exec_debug(execbuf, "ARG_MAX: %ld according to sysconf()\n", arg_max);
+ if (arg_max < 0) {
+ arg_max = BFS_EXEC_ARG_MAX;
+ bfs_exec_debug(execbuf, "ARG_MAX: %ld assumed\n", arg_max);
+ }
+
+ // We have to share space with the environment variables
+ extern char **environ;
+ for (char **envp = environ; *envp; ++envp) {
+ arg_max -= bfs_exec_arg_size(*envp);
+ }
+ // Account for the terminating NULL entry
+ arg_max -= sizeof(char *);
+ bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after environment variables\n", arg_max);
+
+ // Account for the fixed arguments
+ for (size_t i = 0; i < execbuf->tmpl_argc - 1; ++i) {
+ arg_max -= bfs_exec_arg_size(execbuf->tmpl_argv[i]);
+ }
+ // Account for the terminating NULL entry
+ arg_max -= sizeof(char *);
+ bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after fixed arguments\n", arg_max);
+
+ // Assume arguments are counted with the granularity of a single page,
+ // so allow a one page cushion to account for rounding up
+ long page_size = xsysconf(_SC_PAGESIZE);
+ if (page_size < 4096) {
+ page_size = 4096;
+ }
+ arg_max -= page_size;
+ bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after page cushion\n", arg_max);
+
+ // POSIX recommends an additional 2048 bytes of headroom
+ arg_max -= 2048;
+ bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after headroom\n", arg_max);
+
+ if (arg_max < 0) {
+ arg_max = 0;
+ } else if (arg_max > BFS_EXEC_ARG_MAX) {
+ arg_max = BFS_EXEC_ARG_MAX;
+ }
+
+ bfs_exec_debug(execbuf, "ARG_MAX: %ld final value\n", arg_max);
+ return arg_max;
+}
+
+/** Highlight part of the command line as an error. */
+static void bfs_exec_parse_error(const struct bfs_ctx *ctx, const struct bfs_exec *execbuf) {
+ char **argv = execbuf->tmpl_argv - 1;
+ size_t argc = execbuf->tmpl_argc + 1;
+ if (argv[argc]) {
+ ++argc;
+ }
+
+ bool args[ctx->argc];
+ for (size_t i = 0; i < ctx->argc; ++i) {
+ args[i] = false;
+ }
+
+ size_t i = argv - ctx->argv;
+ for (size_t j = 0; j < argc; ++j) {
+ args[i + j] = true;
+ }
+
+ bfs_argv_error(ctx, args);
+}
+
+struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs_exec_flags flags) {
+ struct bfs_exec *execbuf = ZALLOC(struct bfs_exec);
+ if (!execbuf) {
+ bfs_perror(ctx, "zalloc()");
+ goto fail;
+ }
+
+ execbuf->flags = flags;
+ execbuf->ctx = ctx;
+ execbuf->tmpl_argv = argv + 1;
+ execbuf->wd_fd = -1;
+
+ while (true) {
+ const char *arg = execbuf->tmpl_argv[execbuf->tmpl_argc];
+ if (!arg) {
+ if (execbuf->flags & BFS_EXEC_CONFIRM) {
+ bfs_exec_parse_error(ctx, execbuf);
+ bfs_error(ctx, "Expected '... ;'.\n");
+ } else {
+ bfs_exec_parse_error(ctx, execbuf);
+ bfs_error(ctx, "Expected '... ;' or '... {} +'.\n");
+ }
+ goto fail;
+ } else if (strcmp(arg, ";") == 0) {
+ break;
+ } else if (execbuf->tmpl_argc > 0 && strcmp(arg, "+") == 0) {
+ const char *prev = execbuf->tmpl_argv[execbuf->tmpl_argc - 1];
+ if (!(execbuf->flags & BFS_EXEC_CONFIRM) && strcmp(prev, "{}") == 0) {
+ execbuf->flags |= BFS_EXEC_MULTI;
+ break;
+ }
+ }
+
+ ++execbuf->tmpl_argc;
+ }
+
+ if (execbuf->tmpl_argc == 0) {
+ bfs_exec_parse_error(ctx, execbuf);
+ bfs_error(ctx, "Missing command.\n");
+ goto fail;
+ }
+
+ execbuf->argv_cap = execbuf->tmpl_argc + 1;
+ execbuf->argv = ALLOC_ARRAY(char *, execbuf->argv_cap);
+ if (!execbuf->argv) {
+ bfs_perror(ctx, "alloc()");
+ goto fail;
+ }
+
+ if (execbuf->flags & BFS_EXEC_MULTI) {
+ for (size_t i = 0; i < execbuf->tmpl_argc - 1; ++i) {
+ char *arg = execbuf->tmpl_argv[i];
+ if (strstr(arg, "{}")) {
+ bfs_exec_parse_error(ctx, execbuf);
+ bfs_error(ctx, "Only one '{}' is supported.\n");
+ goto fail;
+ }
+ execbuf->argv[i] = arg;
+ }
+ execbuf->argc = execbuf->tmpl_argc - 1;
+
+ execbuf->arg_max = bfs_exec_arg_max(execbuf);
+ execbuf->arg_min = execbuf->arg_max;
+ }
+
+ return execbuf;
+
+fail:
+ bfs_exec_free(execbuf);
+ return NULL;
+}
+
+/** Format the current path for use as a command line argument. */
+static char *bfs_exec_format_path(const struct bfs_exec *execbuf, const struct BFTW *ftwbuf) {
+ if (!(execbuf->flags & BFS_EXEC_CHDIR)) {
+ return strdup(ftwbuf->path);
+ }
+
+ const char *name = ftwbuf->path + ftwbuf->nameoff;
+
+ if (name[0] == '/') {
+ // Must be a root path ("/", "//", etc.)
+ return strdup(name);
+ }
+
+ // For compatibility with GNU find, use './name' instead of just 'name'
+ char *path = malloc(2 + strlen(name) + 1);
+ if (!path) {
+ return NULL;
+ }
+
+ char *cur = stpcpy(path, "./");
+ cur = stpcpy(cur, name);
+ return path;
+}
+
+/** Format an argument, expanding "{}" to the current path. */
+static char *bfs_exec_format_arg(char *arg, const char *path) {
+ char *match = strstr(arg, "{}");
+ if (!match) {
+ return arg;
+ }
+
+ dchar *ret = dstralloc(0);
+ if (!ret) {
+ return NULL;
+ }
+
+ char *last = arg;
+ do {
+ if (dstrncat(&ret, last, match - last) != 0) {
+ goto err;
+ }
+ if (dstrcat(&ret, path) != 0) {
+ goto err;
+ }
+
+ last = match + 2;
+ match = strstr(last, "{}");
+ } while (match);
+
+ if (dstrcat(&ret, last) != 0) {
+ goto err;
+ }
+
+ return ret;
+
+err:
+ dstrfree(ret);
+ return NULL;
+}
+
+/** Free a formatted argument. */
+static void bfs_exec_free_arg(char *arg, const char *tmpl) {
+ if (arg != tmpl) {
+ dstrfree((dchar *)arg);
+ }
+}
+
+/** Open a file to use as the working directory. */
+static int bfs_exec_openwd(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) {
+ bfs_assert(execbuf->wd_fd < 0);
+ bfs_assert(!execbuf->wd_path);
+
+ if (ftwbuf->at_fd != (int)AT_FDCWD) {
+ // Rely on at_fd being the immediate parent
+ bfs_assert(xbaseoff(ftwbuf->at_path) == 0);
+
+ execbuf->wd_fd = ftwbuf->at_fd;
+ if (!(execbuf->flags & BFS_EXEC_MULTI)) {
+ return 0;
+ }
+
+ execbuf->wd_fd = dup_cloexec(execbuf->wd_fd);
+ if (execbuf->wd_fd < 0) {
+ return -1;
+ }
+ }
+
+ execbuf->wd_len = ftwbuf->nameoff;
+ if (execbuf->wd_len == 0) {
+ if (ftwbuf->path[0] == '/') {
+ ++execbuf->wd_len;
+ } else {
+ // The path is something like "foo", so we're already in the right directory
+ return 0;
+ }
+ }
+
+ execbuf->wd_path = strndup(ftwbuf->path, execbuf->wd_len);
+ if (!execbuf->wd_path) {
+ return -1;
+ }
+
+ if (execbuf->wd_fd < 0) {
+ execbuf->wd_fd = open(execbuf->wd_path, O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+ }
+
+ if (execbuf->wd_fd < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/** Close the working directory. */
+static void bfs_exec_closewd(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) {
+ if (execbuf->wd_fd >= 0) {
+ if (!ftwbuf || execbuf->wd_fd != ftwbuf->at_fd) {
+ xclose(execbuf->wd_fd);
+ }
+ execbuf->wd_fd = -1;
+ }
+
+ if (execbuf->wd_path) {
+ free(execbuf->wd_path);
+ execbuf->wd_path = NULL;
+ execbuf->wd_len = 0;
+ }
+}
+
+/** Actually spawn the process. */
+static int bfs_exec_spawn(const struct bfs_exec *execbuf) {
+ const struct bfs_ctx *ctx = execbuf->ctx;
+
+ // Flush the context state for consistency with the external process
+ bfs_ctx_flush(ctx);
+
+ if (execbuf->flags & BFS_EXEC_CONFIRM) {
+ for (size_t i = 0; i < execbuf->argc; ++i) {
+ if (fprintf(stderr, "%s ", execbuf->argv[i]) < 0) {
+ return -1;
+ }
+ }
+ if (fprintf(stderr, "? ") < 0) {
+ return -1;
+ }
+
+ if (ynprompt() <= 0) {
+ errno = 0;
+ return -1;
+ }
+ }
+
+ if (execbuf->flags & BFS_EXEC_MULTI) {
+ bfs_exec_debug(execbuf, "Executing '%s' ... [%zu arguments] (size %zu)\n",
+ execbuf->argv[0], execbuf->argc - 1, execbuf->arg_size);
+ } else {
+ bfs_exec_debug(execbuf, "Executing '%s' ... [%zu arguments]\n", execbuf->argv[0], execbuf->argc - 1);
+ }
+
+ pid_t pid = -1;
+
+ struct bfs_spawn spawn;
+ if (bfs_spawn_init(&spawn) != 0) {
+ return -1;
+ }
+
+ spawn.flags |= BFS_SPAWN_USE_PATH;
+
+ if (execbuf->wd_fd >= 0) {
+ if (bfs_spawn_addfchdir(&spawn, execbuf->wd_fd) != 0) {
+ goto fail;
+ }
+ }
+
+ // Reset RLIMIT_NOFILE if necessary, to avoid breaking applications that use select()
+ if (rlim_cmp(ctx->orig_nofile.rlim_cur, ctx->cur_nofile.rlim_cur) < 0) {
+ if (bfs_spawn_setrlimit(&spawn, RLIMIT_NOFILE, &ctx->orig_nofile) != 0) {
+ goto fail;
+ }
+ }
+
+ pid = bfs_spawn(execbuf->argv[0], &spawn, execbuf->argv, NULL);
+
+fail:;
+ int error = errno;
+
+ bfs_spawn_destroy(&spawn);
+ if (pid < 0) {
+ errno = error;
+ return -1;
+ }
+
+ int wstatus;
+ if (xwaitpid(pid, &wstatus, 0) < 0) {
+ return -1;
+ }
+
+ int ret = -1;
+
+ if (WIFEXITED(wstatus)) {
+ int status = WEXITSTATUS(wstatus);
+ if (status == EXIT_SUCCESS) {
+ ret = 0;
+ } else {
+ bfs_exec_debug(execbuf, "Command '%s' failed with status %d\n", execbuf->argv[0], status);
+ }
+ } else if (WIFSIGNALED(wstatus)) {
+ int sig = WTERMSIG(wstatus);
+ const char *str = strsignal(sig);
+ if (!str) {
+ str = "unknown";
+ }
+ bfs_warning(ctx, "Command '${ex}%s${rs}' terminated by signal %d (%s)\n", execbuf->argv[0], sig, str);
+ } else {
+ bfs_warning(ctx, "Command '${ex}%s${rs}' terminated abnormally\n", execbuf->argv[0]);
+ }
+
+ errno = 0;
+ return ret;
+}
+
+/** exec() a command for a single file. */
+static int bfs_exec_single(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) {
+ int ret = -1, error = 0;
+
+ char *path = bfs_exec_format_path(execbuf, ftwbuf);
+ if (!path) {
+ goto out;
+ }
+
+ size_t i;
+ for (i = 0; i < execbuf->tmpl_argc; ++i) {
+ execbuf->argv[i] = bfs_exec_format_arg(execbuf->tmpl_argv[i], path);
+ if (!execbuf->argv[i]) {
+ goto out_free;
+ }
+ }
+ execbuf->argv[i] = NULL;
+ execbuf->argc = i;
+
+ if (execbuf->flags & BFS_EXEC_CHDIR) {
+ if (bfs_exec_openwd(execbuf, ftwbuf) != 0) {
+ goto out_free;
+ }
+ }
+
+ ret = bfs_exec_spawn(execbuf);
+
+out_free:
+ error = errno;
+
+ bfs_exec_closewd(execbuf, ftwbuf);
+
+ for (size_t j = 0; j < i; ++j) {
+ bfs_exec_free_arg(execbuf->argv[j], execbuf->tmpl_argv[j]);
+ }
+
+ free(path);
+
+ errno = error;
+
+out:
+ return ret;
+}
+
+/** Check if any arguments remain in the buffer. */
+static bool bfs_exec_args_remain(const struct bfs_exec *execbuf) {
+ return execbuf->argc >= execbuf->tmpl_argc;
+}
+
+/** Compute the current ARG_MAX estimate for binary search. */
+static size_t bfs_exec_estimate_max(const struct bfs_exec *execbuf) {
+ size_t min = execbuf->arg_min;
+ size_t max = execbuf->arg_max;
+ return min + (max - min) / 2;
+}
+
+/** Update the ARG_MAX lower bound from a successful execution. */
+static void bfs_exec_update_min(struct bfs_exec *execbuf) {
+ if (execbuf->arg_size > execbuf->arg_min) {
+ execbuf->arg_min = execbuf->arg_size;
+
+ // Don't let min exceed max
+ if (execbuf->arg_min > execbuf->arg_max) {
+ execbuf->arg_min = execbuf->arg_max;
+ }
+
+ size_t estimate = bfs_exec_estimate_max(execbuf);
+ bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n",
+ execbuf->arg_min, execbuf->arg_max, estimate);
+ }
+}
+
+/** Update the ARG_MAX upper bound from a failed execution. */
+static size_t bfs_exec_update_max(struct bfs_exec *execbuf) {
+ bfs_exec_debug(execbuf, "Got E2BIG, shrinking argument list...\n");
+
+ size_t size = execbuf->arg_size;
+ if (size <= execbuf->arg_min) {
+ // Lower bound was wrong, restart binary search.
+ execbuf->arg_min = 0;
+ }
+
+ // Trim a fraction off the max size to avoid repeated failures near the
+ // top end of the working range
+ size -= size / 16;
+ if (size < execbuf->arg_max) {
+ execbuf->arg_max = size;
+
+ // Don't let min exceed max
+ if (execbuf->arg_min > execbuf->arg_max) {
+ execbuf->arg_min = execbuf->arg_max;
+ }
+ }
+
+ // Binary search for a more precise bound
+ size_t estimate = bfs_exec_estimate_max(execbuf);
+ bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n",
+ execbuf->arg_min, execbuf->arg_max, estimate);
+ return estimate;
+}
+
+/** Execute the pending command from a BFS_EXEC_MULTI execbuf. */
+static int bfs_exec_flush(struct bfs_exec *execbuf) {
+ int ret = 0, error = 0;
+
+ size_t orig_argc = execbuf->argc;
+ while (bfs_exec_args_remain(execbuf)) {
+ execbuf->argv[execbuf->argc] = NULL;
+ ret = bfs_exec_spawn(execbuf);
+ error = errno;
+ if (ret == 0) {
+ bfs_exec_update_min(execbuf);
+ break;
+ } else if (error != E2BIG) {
+ break;
+ }
+
+ // Try to recover from E2BIG by trying fewer and fewer arguments
+ // until they fit
+ size_t new_max = bfs_exec_update_max(execbuf);
+ while (execbuf->arg_size > new_max) {
+ execbuf->argv[execbuf->argc] = execbuf->argv[execbuf->argc - 1];
+ execbuf->arg_size -= bfs_exec_arg_size(execbuf->argv[execbuf->argc]);
+ --execbuf->argc;
+ }
+ }
+
+ size_t new_argc = execbuf->argc;
+ for (size_t i = execbuf->tmpl_argc - 1; i < new_argc; ++i) {
+ free(execbuf->argv[i]);
+ }
+ execbuf->argc = execbuf->tmpl_argc - 1;
+ execbuf->arg_size = 0;
+
+ if (new_argc < orig_argc) {
+ // If we recovered from E2BIG, there are unused arguments at the
+ // end of the list
+ for (size_t i = new_argc + 1; i <= orig_argc; ++i) {
+ if (error == 0) {
+ execbuf->argv[execbuf->argc] = execbuf->argv[i];
+ execbuf->arg_size += bfs_exec_arg_size(execbuf->argv[execbuf->argc]);
+ ++execbuf->argc;
+ } else {
+ free(execbuf->argv[i]);
+ }
+ }
+ }
+
+ errno = error;
+ return ret;
+}
+
+/** Check if we need to flush the execbuf because we're changing directories. */
+static bool bfs_exec_changed_dirs(const struct bfs_exec *execbuf, const struct BFTW *ftwbuf) {
+ if (execbuf->flags & BFS_EXEC_CHDIR) {
+ if (ftwbuf->nameoff > execbuf->wd_len
+ || (execbuf->wd_path && strncmp(ftwbuf->path, execbuf->wd_path, execbuf->wd_len) != 0)) {
+ bfs_exec_debug(execbuf, "Changed directories, executing buffered command\n");
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/** Check if we need to flush the execbuf because we're too big. */
+static bool bfs_exec_would_overflow(const struct bfs_exec *execbuf, const char *arg) {
+ size_t arg_max = bfs_exec_estimate_max(execbuf);
+ size_t next_size = execbuf->arg_size + bfs_exec_arg_size(arg);
+ if (next_size > arg_max) {
+ bfs_exec_debug(execbuf, "Command size (%zu) would exceed maximum (%zu), executing buffered command\n",
+ next_size, arg_max);
+ return true;
+ }
+
+ return false;
+}
+
+/** Push a new argument to a BFS_EXEC_MULTI execbuf. */
+static int bfs_exec_push(struct bfs_exec *execbuf, char *arg) {
+ execbuf->argv[execbuf->argc] = arg;
+
+ if (execbuf->argc + 1 >= execbuf->argv_cap) {
+ size_t cap = 2 * execbuf->argv_cap;
+ char **argv = REALLOC_ARRAY(char *, execbuf->argv, execbuf->argv_cap, cap);
+ if (!argv) {
+ return -1;
+ }
+ execbuf->argv = argv;
+ execbuf->argv_cap = cap;
+ }
+
+ ++execbuf->argc;
+ execbuf->arg_size += bfs_exec_arg_size(arg);
+ return 0;
+}
+
+/** Handle a new path for a BFS_EXEC_MULTI execbuf. */
+static int bfs_exec_multi(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) {
+ int ret = 0;
+
+ char *arg = bfs_exec_format_path(execbuf, ftwbuf);
+ if (!arg) {
+ ret = -1;
+ goto out;
+ }
+
+ if (bfs_exec_changed_dirs(execbuf, ftwbuf)) {
+ while (bfs_exec_args_remain(execbuf)) {
+ ret |= bfs_exec_flush(execbuf);
+ }
+ bfs_exec_closewd(execbuf, ftwbuf);
+ } else if (bfs_exec_would_overflow(execbuf, arg)) {
+ ret |= bfs_exec_flush(execbuf);
+ }
+
+ if ((execbuf->flags & BFS_EXEC_CHDIR) && execbuf->wd_fd < 0) {
+ if (bfs_exec_openwd(execbuf, ftwbuf) != 0) {
+ ret = -1;
+ goto out_arg;
+ }
+ }
+
+ if (bfs_exec_push(execbuf, arg) != 0) {
+ ret = -1;
+ goto out_arg;
+ }
+
+ // arg will get cleaned up later by bfs_exec_flush()
+ goto out;
+
+out_arg:
+ free(arg);
+out:
+ return ret;
+}
+
+int bfs_exec(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) {
+ if (execbuf->flags & BFS_EXEC_MULTI) {
+ if (bfs_exec_multi(execbuf, ftwbuf) == 0) {
+ errno = 0;
+ } else {
+ execbuf->ret = -1;
+ }
+ // -exec ... + never returns false
+ return 0;
+ } else {
+ return bfs_exec_single(execbuf, ftwbuf);
+ }
+}
+
+int bfs_exec_finish(struct bfs_exec *execbuf) {
+ if (execbuf->flags & BFS_EXEC_MULTI) {
+ bfs_exec_debug(execbuf, "Finishing execution, executing buffered command\n");
+ while (bfs_exec_args_remain(execbuf)) {
+ execbuf->ret |= bfs_exec_flush(execbuf);
+ }
+ if (execbuf->ret != 0) {
+ bfs_exec_debug(execbuf, "One or more executions of '%s' failed\n", execbuf->argv[0]);
+ }
+ }
+ return execbuf->ret;
+}
+
+void bfs_exec_free(struct bfs_exec *execbuf) {
+ if (execbuf) {
+ bfs_exec_closewd(execbuf, NULL);
+ free(execbuf->argv);
+ free(execbuf);
+ }
+}
diff --git a/src/exec.h b/src/exec.h
new file mode 100644
index 0000000..9d4192d
--- /dev/null
+++ b/src/exec.h
@@ -0,0 +1,108 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Implementation of -exec/-execdir/-ok/-okdir.
+ */
+
+#ifndef BFS_EXEC_H
+#define BFS_EXEC_H
+
+#include <stddef.h>
+
+struct BFTW;
+struct bfs_ctx;
+
+/**
+ * Flags for the -exec actions.
+ */
+enum bfs_exec_flags {
+ /** Prompt the user before executing (-ok, -okdir). */
+ BFS_EXEC_CONFIRM = 1 << 0,
+ /** Run the command in the file's parent directory (-execdir, -okdir). */
+ BFS_EXEC_CHDIR = 1 << 1,
+ /** Pass multiple files at once to the command (-exec ... {} +). */
+ BFS_EXEC_MULTI = 1 << 2,
+};
+
+/**
+ * Buffer for a command line to be executed.
+ */
+struct bfs_exec {
+ /** Flags for this exec buffer. */
+ enum bfs_exec_flags flags;
+
+ /** The bfs context. */
+ const struct bfs_ctx *ctx;
+ /** Command line template. */
+ char **tmpl_argv;
+ /** Command line template size. */
+ size_t tmpl_argc;
+
+ /** The built command line. */
+ char **argv;
+ /** Number of command line arguments. */
+ size_t argc;
+ /** Capacity of argv. */
+ size_t argv_cap;
+
+ /** Current size of all arguments. */
+ size_t arg_size;
+ /** Maximum arg_size before E2BIG. */
+ size_t arg_max;
+ /** Lower bound for arg_max. */
+ size_t arg_min;
+
+ /** A file descriptor for the working directory, for BFS_EXEC_CHDIR. */
+ int wd_fd;
+ /** The path to the working directory, for BFS_EXEC_CHDIR. */
+ char *wd_path;
+ /** Length of the working directory path. */
+ size_t wd_len;
+
+ /** The ultimate return value for bfs_exec_finish(). */
+ int ret;
+};
+
+/**
+ * Parse an exec action.
+ *
+ * @param argv
+ * The (bfs) command line argument to parse.
+ * @param flags
+ * Any flags for this exec action.
+ * @param ctx
+ * The bfs context.
+ * @return
+ * The parsed exec action, or NULL on failure.
+ */
+struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs_exec_flags flags);
+
+/**
+ * Execute the command for a file.
+ *
+ * @param execbuf
+ * The parsed exec action.
+ * @param ftwbuf
+ * The bftw() data for the current file.
+ * @return 0 if the command succeeded, -1 if it failed. If the command could
+ * be executed, -1 is returned, and errno will be non-zero. For
+ * BFS_EXEC_MULTI, errors will not be reported until bfs_exec_finish().
+ */
+int bfs_exec(struct bfs_exec *execbuf, const struct BFTW *ftwbuf);
+
+/**
+ * Finish executing any commands.
+ *
+ * @param execbuf
+ * The parsed exec action.
+ * @return 0 on success, -1 if any errors were encountered.
+ */
+int bfs_exec_finish(struct bfs_exec *execbuf);
+
+/**
+ * Free a parsed exec action.
+ */
+void bfs_exec_free(struct bfs_exec *execbuf);
+
+#endif // BFS_EXEC_H
diff --git a/src/expr.c b/src/expr.c
new file mode 100644
index 0000000..5784220
--- /dev/null
+++ b/src/expr.c
@@ -0,0 +1,85 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "expr.h"
+#include "alloc.h"
+#include "ctx.h"
+#include "diag.h"
+#include "eval.h"
+#include "exec.h"
+#include "list.h"
+#include "printf.h"
+#include "xregex.h"
+#include <string.h>
+
+struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval_fn, size_t argc, char **argv) {
+ struct bfs_expr *expr = arena_alloc(&ctx->expr_arena);
+ if (!expr) {
+ return NULL;
+ }
+
+ memset(expr, 0, sizeof(*expr));
+ expr->eval_fn = eval_fn;
+ expr->argc = argc;
+ expr->argv = argv;
+ expr->probability = 0.5;
+ SLIST_PREPEND(&ctx->expr_list, expr, freelist);
+
+ if (bfs_expr_is_parent(expr)) {
+ SLIST_INIT(&expr->children);
+ }
+
+ return expr;
+}
+
+bool bfs_expr_is_parent(const struct bfs_expr *expr) {
+ return expr->eval_fn == eval_and
+ || expr->eval_fn == eval_or
+ || expr->eval_fn == eval_not
+ || expr->eval_fn == eval_comma;
+}
+
+struct bfs_expr *bfs_expr_children(const struct bfs_expr *expr) {
+ if (bfs_expr_is_parent(expr)) {
+ return expr->children.head;
+ } else {
+ return NULL;
+ }
+}
+
+void bfs_expr_append(struct bfs_expr *expr, struct bfs_expr *child) {
+ bfs_assert(bfs_expr_is_parent(expr));
+
+ SLIST_APPEND(&expr->children, child);
+
+ if (!child->pure) {
+ expr->pure = false;
+ }
+
+ expr->persistent_fds += child->persistent_fds;
+ if (expr->ephemeral_fds < child->ephemeral_fds) {
+ expr->ephemeral_fds = child->ephemeral_fds;
+ }
+}
+
+void bfs_expr_extend(struct bfs_expr *expr, struct bfs_exprs *children) {
+ while (!SLIST_EMPTY(children)) {
+ struct bfs_expr *child = SLIST_POP(children);
+ bfs_expr_append(expr, child);
+ }
+}
+
+bool bfs_expr_never_returns(const struct bfs_expr *expr) {
+ // Expressions that never return are vacuously both always true and always false
+ return expr->always_true && expr->always_false;
+}
+
+void bfs_expr_clear(struct bfs_expr *expr) {
+ if (expr->eval_fn == eval_exec) {
+ bfs_exec_free(expr->exec);
+ } else if (expr->eval_fn == eval_fprintf) {
+ bfs_printf_free(expr->printf);
+ } else if (expr->eval_fn == eval_regex) {
+ bfs_regfree(expr->regex);
+ }
+}
diff --git a/src/expr.h b/src/expr.h
new file mode 100644
index 0000000..7bcace7
--- /dev/null
+++ b/src/expr.h
@@ -0,0 +1,247 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * The expression tree representation.
+ */
+
+#ifndef BFS_EXPR_H
+#define BFS_EXPR_H
+
+#include "prelude.h"
+#include "color.h"
+#include "eval.h"
+#include "stat.h"
+#include <sys/types.h>
+#include <time.h>
+
+/**
+ * Integer comparison modes.
+ */
+enum bfs_int_cmp {
+ /** Exactly N. */
+ BFS_INT_EQUAL,
+ /** Less than N (-N). */
+ BFS_INT_LESS,
+ /** Greater than N (+N). */
+ BFS_INT_GREATER,
+};
+
+/**
+ * Permission comparison modes.
+ */
+enum bfs_mode_cmp {
+ /** Mode is an exact match (MODE). */
+ BFS_MODE_EQUAL,
+ /** Mode has all these bits (-MODE). */
+ BFS_MODE_ALL,
+ /** Mode has any of these bits (/MODE). */
+ BFS_MODE_ANY,
+};
+
+/**
+ * Possible time units.
+ */
+enum bfs_time_unit {
+ /** Seconds. */
+ BFS_SECONDS,
+ /** Minutes. */
+ BFS_MINUTES,
+ /** Days. */
+ BFS_DAYS,
+};
+
+/**
+ * Possible file size units.
+ */
+enum bfs_size_unit {
+ /** 512-byte blocks. */
+ BFS_BLOCKS,
+ /** Single bytes. */
+ BFS_BYTES,
+ /** Two-byte words. */
+ BFS_WORDS,
+ /** Kibibytes. */
+ BFS_KB,
+ /** Mebibytes. */
+ BFS_MB,
+ /** Gibibytes. */
+ BFS_GB,
+ /** Tebibytes. */
+ BFS_TB,
+ /** Pebibytes. */
+ BFS_PB,
+};
+
+/**
+ * A linked list of expressions.
+ */
+struct bfs_exprs {
+ struct bfs_expr *head;
+ struct bfs_expr **tail;
+};
+
+/**
+ * A command line expression.
+ */
+struct bfs_expr {
+ /** This expression's next sibling, if any. */
+ struct bfs_expr *next;
+ /** The next allocated expression. */
+ struct { struct bfs_expr *next; } freelist;
+
+ /** The function that evaluates this expression. */
+ bfs_eval_fn *eval_fn;
+
+ /** The number of command line arguments for this expression. */
+ size_t argc;
+ /** The command line arguments comprising this expression. */
+ char **argv;
+
+ /** The number of files this expression keeps open between evaluations. */
+ int persistent_fds;
+ /** The number of files this expression opens during evaluation. */
+ int ephemeral_fds;
+
+ /** Whether this expression has no side effects. */
+ bool pure;
+ /** Whether this expression always evaluates to true. */
+ bool always_true;
+ /** Whether this expression always evaluates to false. */
+ bool always_false;
+ /** Whether this expression uses stat(). */
+ bool calls_stat;
+
+ /** Estimated cost. */
+ float cost;
+ /** Estimated probability of success. */
+ float probability;
+ /** Number of times this predicate was evaluated. */
+ size_t evaluations;
+ /** Number of times this predicate succeeded. */
+ size_t successes;
+ /** Total time spent running this predicate. */
+ struct timespec elapsed;
+
+ /** Auxilliary data for the evaluation function. */
+ union {
+ /** Child expressions. */
+ struct bfs_exprs children;
+
+ /** Integer comparisons. */
+ struct {
+ /** Integer for this comparison. */
+ long long num;
+ /** The comparison mode. */
+ enum bfs_int_cmp int_cmp;
+
+ /** -size data. */
+ enum bfs_size_unit size_unit;
+
+ /** The stat field to look at. */
+ enum bfs_stat_field stat_field;
+ /** The time unit. */
+ enum bfs_time_unit time_unit;
+ /** The reference time. */
+ struct timespec reftime;
+ };
+
+ /** String comparisons. */
+ struct {
+ /** String pattern. */
+ const char *pattern;
+ /** fnmatch() flags. */
+ int fnm_flags;
+ /** Whether strcmp() can be used instead of fnmatch(). */
+ bool literal;
+ };
+
+ /** Printing actions. */
+ struct {
+ /** The output stream. */
+ CFILE *cfile;
+ /** Optional file path. */
+ const char *path;
+ /** Optional -printf format. */
+ struct bfs_printf *printf;
+ };
+
+ /** -exec data. */
+ struct bfs_exec *exec;
+
+ /** -flags data. */
+ struct {
+ /** The comparison mode. */
+ enum bfs_mode_cmp flags_cmp;
+ /** Flags that should be set. */
+ unsigned long long set_flags;
+ /** Flags that should be cleared. */
+ unsigned long long clear_flags;
+ };
+
+ /** -perm data. */
+ struct {
+ /** The comparison mode. */
+ enum bfs_mode_cmp mode_cmp;
+ /** Mode to use for files. */
+ mode_t file_mode;
+ /** Mode to use for directories (different due to X). */
+ mode_t dir_mode;
+ };
+
+ /** -regex data. */
+ struct bfs_regex *regex;
+
+ /** -samefile data. */
+ struct {
+ /** Device number of the target file. */
+ dev_t dev;
+ /** Inode number of the target file. */
+ ino_t ino;
+ };
+ };
+};
+
+struct bfs_ctx;
+
+/**
+ * Create a new expression.
+ */
+struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval, size_t argc, char **argv);
+
+/**
+ * @return Whether this type of expression has children.
+ */
+bool bfs_expr_is_parent(const struct bfs_expr *expr);
+
+/**
+ * @return The first child of this expression, or NULL if it has none.
+ */
+struct bfs_expr *bfs_expr_children(const struct bfs_expr *expr);
+
+/**
+ * Add a child to an expression.
+ */
+void bfs_expr_append(struct bfs_expr *expr, struct bfs_expr *child);
+
+/**
+ * Add a list of children to an expression.
+ */
+void bfs_expr_extend(struct bfs_expr *expr, struct bfs_exprs *children);
+
+/**
+ * @return Whether expr is known to always quit.
+ */
+bool bfs_expr_never_returns(const struct bfs_expr *expr);
+
+/**
+ * @return The result of the integer comparison for this expression.
+ */
+bool bfs_expr_cmp(const struct bfs_expr *expr, long long n);
+
+/**
+ * Free any resources owned by an expression.
+ */
+void bfs_expr_clear(struct bfs_expr *expr);
+
+#endif // BFS_EXPR_H
diff --git a/src/fsade.c b/src/fsade.c
new file mode 100644
index 0000000..7310141
--- /dev/null
+++ b/src/fsade.c
@@ -0,0 +1,508 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "fsade.h"
+#include "atomic.h"
+#include "bfstd.h"
+#include "bftw.h"
+#include "dir.h"
+#include "dstring.h"
+#include "sanity.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#if BFS_CAN_CHECK_ACL
+# include <sys/acl.h>
+#endif
+
+#if BFS_CAN_CHECK_CAPABILITIES
+# include <sys/capability.h>
+#endif
+
+#if BFS_CAN_CHECK_CONTEXT
+# include <selinux/selinux.h>
+#endif
+
+#if BFS_USE_SYS_EXTATTR_H
+# include <sys/extattr.h>
+#elif BFS_USE_SYS_XATTR_H
+# include <sys/xattr.h>
+#endif
+
+/**
+ * Many of the APIs used here don't have *at() variants, but we can try to
+ * emulate something similar if /proc/self/fd is available.
+ */
+attr(maybe_unused)
+static const char *fake_at(const struct BFTW *ftwbuf) {
+ static atomic int proc_works = -1;
+
+ dchar *path = NULL;
+ if (ftwbuf->at_fd == (int)AT_FDCWD || load(&proc_works, relaxed) == 0) {
+ goto fail;
+ }
+
+ path = dstrprintf("/proc/self/fd/%d/", ftwbuf->at_fd);
+ if (!path) {
+ goto fail;
+ }
+
+ if (load(&proc_works, relaxed) < 0) {
+ if (xfaccessat(AT_FDCWD, path, F_OK) != 0) {
+ store(&proc_works, 0, relaxed);
+ goto fail;
+ } else {
+ store(&proc_works, 1, relaxed);
+ }
+ }
+
+ if (dstrcat(&path, ftwbuf->at_path) != 0) {
+ goto fail;
+ }
+
+ return path;
+
+fail:
+ dstrfree(path);
+ return ftwbuf->path;
+}
+
+attr(maybe_unused)
+static void free_fake_at(const struct BFTW *ftwbuf, const char *path) {
+ if (path != ftwbuf->path) {
+ dstrfree((dchar *)path);
+ }
+}
+
+/**
+ * Check if an error was caused by the absence of support or data for a feature.
+ */
+attr(maybe_unused)
+static bool is_absence_error(int error) {
+ // If the OS doesn't support the feature, it's obviously not enabled for
+ // any files
+ if (error == ENOTSUP) {
+ return true;
+ }
+
+ // On Linux, ACLs and capabilities are implemented in terms of extended
+ // attributes, which report ENODATA/ENOATTR when missing
+
+#ifdef ENODATA
+ if (error == ENODATA) {
+ return true;
+ }
+#endif
+
+#if defined(ENOATTR) && ENOATTR != ENODATA
+ if (error == ENOATTR) {
+ return true;
+ }
+#endif
+
+ // On at least FreeBSD and macOS, EINVAL is returned when the requested
+ // ACL type is not supported for that file
+ if (error == EINVAL) {
+ return true;
+ }
+
+#if __APPLE__
+ // On macOS, ENOENT can also signal that a file has no ACLs
+ if (error == ENOENT) {
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+#if BFS_CAN_CHECK_ACL
+
+#if BFS_HAS_ACL_GET_FILE
+
+/** Unified interface for incompatible acl_get_entry() implementations. */
+static int bfs_acl_entry(acl_t acl, int which, acl_entry_t *entry) {
+#if BFS_HAS_ACL_GET_ENTRY
+ int ret = acl_get_entry(acl, which, entry);
+# if __APPLE__
+ // POSIX.1e specifies a return value of 1 for success, but macOS returns 0 instead
+ return !ret;
+# else
+ return ret;
+# endif
+#elif __DragonFly__
+# if !defined(ACL_FIRST_ENTRY) && !defined(ACL_NEXT_ENTRY)
+# define ACL_FIRST_ENTRY 0
+# define ACL_NEXT_ENTRY 1
+# endif
+
+ switch (which) {
+ case ACL_FIRST_ENTRY:
+ *entry = &acl->acl_entry[0];
+ break;
+ case ACL_NEXT_ENTRY:
+ ++*entry;
+ break;
+ default:
+ errno = EINVAL;
+ return -1;
+ }
+
+ acl_entry_t last = &acl->acl_entry[acl->acl_cnt];
+ return *entry == last;
+#else
+ errno = ENOTSUP;
+ return -1;
+#endif
+}
+
+/** Unified interface for acl_get_tag_type(). */
+attr(maybe_unused)
+static int bfs_acl_tag_type(acl_entry_t entry, acl_tag_t *tag) {
+#if BFS_HAS_ACL_GET_TAG_TYPE
+ return acl_get_tag_type(entry, tag);
+#elif __DragonFly__
+ *tag = entry->ae_tag;
+ return 0;
+#else
+ errno = ENOTSUP;
+ return -1;
+#endif
+}
+
+/** Check if a POSIX.1e ACL is non-trivial. */
+static int bfs_check_posix1e_acl(acl_t acl, bool ignore_required) {
+ int ret = 0;
+
+ acl_entry_t entry;
+ for (int status = bfs_acl_entry(acl, ACL_FIRST_ENTRY, &entry);
+ status > 0;
+ status = bfs_acl_entry(acl, ACL_NEXT_ENTRY, &entry))
+ {
+#if defined(ACL_USER_OBJ) && defined(ACL_GROUP_OBJ) && defined(ACL_OTHER)
+ if (ignore_required) {
+ acl_tag_t tag;
+ if (bfs_acl_tag_type(entry, &tag) != 0) {
+ ret = -1;
+ continue;
+ }
+ if (tag == ACL_USER_OBJ || tag == ACL_GROUP_OBJ || tag == ACL_OTHER) {
+ continue;
+ }
+ }
+#endif
+
+ ret = 1;
+ break;
+ }
+
+ return ret;
+}
+
+/** Check if an ACL of the given type is non-trivial. */
+static int bfs_check_acl_type(acl_t acl, acl_type_t type) {
+ if (type == ACL_TYPE_DEFAULT) {
+ // For directory default ACLs, any entries make them non-trivial
+ return bfs_check_posix1e_acl(acl, false);
+ }
+
+#if BFS_HAS_ACL_IS_TRIVIAL_NP
+ int trivial;
+ int ret = acl_is_trivial_np(acl, &trivial);
+
+ // msan seems to be missing an interceptor for acl_is_trivial_np()
+ sanitize_init(&trivial);
+
+ if (ret < 0) {
+ return -1;
+ } else if (trivial) {
+ return 0;
+ } else {
+ return 1;
+ }
+#else
+ return bfs_check_posix1e_acl(acl, true);
+#endif
+}
+
+#endif // BFS_HAS_ACL_GET_FILE
+
+int bfs_check_acl(const struct BFTW *ftwbuf) {
+ if (ftwbuf->type == BFS_LNK) {
+ return 0;
+ }
+
+ const char *path = fake_at(ftwbuf);
+
+#if BFS_HAS_ACL_TRIVIAL
+ int ret = acl_trivial(path);
+ int error = errno;
+#elif BFS_HAS_ACL_GET_FILE
+ static const acl_type_t acl_types[] = {
+# if __APPLE__
+ // macOS gives EINVAL for either of the two standard ACL types,
+ // supporting only ACL_TYPE_EXTENDED
+ ACL_TYPE_EXTENDED,
+# else
+ // The two standard POSIX.1e ACL types
+ ACL_TYPE_ACCESS,
+ ACL_TYPE_DEFAULT,
+# endif
+
+# ifdef ACL_TYPE_NFS4
+ ACL_TYPE_NFS4,
+# endif
+ };
+
+ int ret = -1, error = 0;
+ for (size_t i = 0; i < countof(acl_types) && ret <= 0; ++i) {
+ acl_type_t type = acl_types[i];
+
+ if (type == ACL_TYPE_DEFAULT && ftwbuf->type != BFS_DIR) {
+ // ACL_TYPE_DEFAULT is supported only for directories,
+ // otherwise acl_get_file() gives EACCESS
+ continue;
+ }
+
+ acl_t acl = acl_get_file(path, type);
+ if (!acl) {
+ error = errno;
+ if (is_absence_error(error)) {
+ ret = 0;
+ }
+ continue;
+ }
+
+ ret = bfs_check_acl_type(acl, type);
+ error = errno;
+ acl_free(acl);
+ }
+#endif
+
+ free_fake_at(ftwbuf, path);
+ errno = error;
+ return ret;
+}
+
+#else // !BFS_CAN_CHECK_ACL
+
+int bfs_check_acl(const struct BFTW *ftwbuf) {
+ errno = ENOTSUP;
+ return -1;
+}
+
+#endif
+
+#if BFS_CAN_CHECK_CAPABILITIES
+
+int bfs_check_capabilities(const struct BFTW *ftwbuf) {
+ if (ftwbuf->type == BFS_LNK) {
+ return 0;
+ }
+
+ int ret = -1, error;
+ const char *path = fake_at(ftwbuf);
+
+ cap_t caps = cap_get_file(path);
+ if (!caps) {
+ error = errno;
+ if (is_absence_error(error)) {
+ ret = 0;
+ }
+ goto out_path;
+ }
+
+ // TODO: Any better way to check for a non-empty capability set?
+ char *text = cap_to_text(caps, NULL);
+ if (!text) {
+ error = errno;
+ goto out_caps;
+ }
+ ret = text[0] ? 1 : 0;
+
+ error = errno;
+ cap_free(text);
+out_caps:
+ cap_free(caps);
+out_path:
+ free_fake_at(ftwbuf, path);
+ errno = error;
+ return ret;
+}
+
+#else // !BFS_CAN_CHECK_CAPABILITIES
+
+int bfs_check_capabilities(const struct BFTW *ftwbuf) {
+ errno = ENOTSUP;
+ return -1;
+}
+
+#endif
+
+#if BFS_CAN_CHECK_XATTRS
+
+#if BFS_USE_SYS_EXTATTR_H
+
+/** Wrapper for extattr_list_{file,link}. */
+static ssize_t bfs_extattr_list(const char *path, enum bfs_type type, int namespace) {
+ if (type == BFS_LNK) {
+#if BFS_HAS_EXTATTR_LIST_LINK
+ return extattr_list_link(path, namespace, NULL, 0);
+#elif BFS_HAS_EXTATTR_GET_LINK
+ return extattr_get_link(path, namespace, "", NULL, 0);
+#else
+ return 0;
+#endif
+ }
+
+#if BFS_HAS_EXTATTR_LIST_FILE
+ return extattr_list_file(path, namespace, NULL, 0);
+#elif BFS_HAS_EXTATTR_GET_FILE
+ // From man extattr(2):
+ //
+ // In earlier versions of this API, passing an empty string for the
+ // attribute name to extattr_get_file() would return the list of attributes
+ // defined for the target object. This interface has been deprecated in
+ // preference to using the explicit list API, and should not be used.
+ return extattr_get_file(path, namespace, "", NULL, 0);
+#else
+ return 0;
+#endif
+}
+
+/** Wrapper for extattr_get_{file,link}. */
+static ssize_t bfs_extattr_get(const char *path, enum bfs_type type, int namespace, const char *name) {
+ if (type == BFS_LNK) {
+#if BFS_HAS_EXTATTR_GET_LINK
+ return extattr_get_link(path, namespace, name, NULL, 0);
+#else
+ return 0;
+#endif
+ }
+
+#if BFS_HAS_EXTATTR_GET_FILE
+ return extattr_get_file(path, namespace, name, NULL, 0);
+#else
+ return 0;
+#endif
+}
+
+#endif // BFS_USE_SYS_EXTATTR_H
+
+int bfs_check_xattrs(const struct BFTW *ftwbuf) {
+ const char *path = fake_at(ftwbuf);
+ ssize_t len;
+
+#if BFS_USE_SYS_EXTATTR_H
+ len = bfs_extattr_list(path, ftwbuf->type, EXTATTR_NAMESPACE_SYSTEM);
+ if (len <= 0) {
+ len = bfs_extattr_list(path, ftwbuf->type, EXTATTR_NAMESPACE_USER);
+ }
+#elif __APPLE__
+ int options = ftwbuf->type == BFS_LNK ? XATTR_NOFOLLOW : 0;
+ len = listxattr(path, NULL, 0, options);
+#else
+ if (ftwbuf->type == BFS_LNK) {
+ len = llistxattr(path, NULL, 0);
+ } else {
+ len = listxattr(path, NULL, 0);
+ }
+#endif
+
+ int error = errno;
+
+ free_fake_at(ftwbuf, path);
+
+ if (len > 0) {
+ return 1;
+ } else if (len == 0 || is_absence_error(error)) {
+ return 0;
+ } else if (error == E2BIG) {
+ return 1;
+ } else {
+ errno = error;
+ return -1;
+ }
+}
+
+int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name) {
+ const char *path = fake_at(ftwbuf);
+ ssize_t len;
+
+#if BFS_USE_SYS_EXTATTR_H
+ len = bfs_extattr_get(path, ftwbuf->type, EXTATTR_NAMESPACE_SYSTEM, name);
+ if (len < 0) {
+ len = bfs_extattr_get(path, ftwbuf->type, EXTATTR_NAMESPACE_USER, name);
+ }
+#elif __APPLE__
+ int options = ftwbuf->type == BFS_LNK ? XATTR_NOFOLLOW : 0;
+ len = getxattr(path, name, NULL, 0, 0, options);
+#else
+ if (ftwbuf->type == BFS_LNK) {
+ len = lgetxattr(path, name, NULL, 0);
+ } else {
+ len = getxattr(path, name, NULL, 0);
+ }
+#endif
+
+ int error = errno;
+
+ free_fake_at(ftwbuf, path);
+
+ if (len >= 0) {
+ return 1;
+ } else if (is_absence_error(error)) {
+ return 0;
+ } else if (error == E2BIG) {
+ return 1;
+ } else {
+ errno = error;
+ return -1;
+ }
+}
+
+#else // !BFS_CAN_CHECK_XATTRS
+
+int bfs_check_xattrs(const struct BFTW *ftwbuf) {
+ errno = ENOTSUP;
+ return -1;
+}
+
+int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name) {
+ errno = ENOTSUP;
+ return -1;
+}
+
+#endif
+
+char *bfs_getfilecon(const struct BFTW *ftwbuf) {
+#if BFS_CAN_CHECK_CONTEXT
+ const char *path = fake_at(ftwbuf);
+
+ char *con;
+ int ret;
+ if (ftwbuf->type == BFS_LNK) {
+ ret = lgetfilecon(path, &con);
+ } else {
+ ret = getfilecon(path, &con);
+ }
+
+ if (ret >= 0) {
+ return con;
+ } else {
+ return NULL;
+ }
+#else
+ errno = ENOTSUP;
+ return NULL;
+#endif
+}
+
+void bfs_freecon(char *con) {
+#if BFS_CAN_CHECK_CONTEXT
+ freecon(con);
+#endif
+}
diff --git a/src/fsade.h b/src/fsade.h
new file mode 100644
index 0000000..eefef9f
--- /dev/null
+++ b/src/fsade.h
@@ -0,0 +1,81 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * A facade over (file)system features that are (un)implemented differently
+ * between platforms.
+ */
+
+#ifndef BFS_FSADE_H
+#define BFS_FSADE_H
+
+#include "prelude.h"
+
+#define BFS_CAN_CHECK_ACL (BFS_HAS_ACL_GET_FILE || BFS_HAS_ACL_TRIVIAL)
+
+#define BFS_CAN_CHECK_CAPABILITIES BFS_USE_LIBCAP
+
+#define BFS_CAN_CHECK_CONTEXT BFS_USE_LIBSELINUX
+
+#define BFS_CAN_CHECK_XATTRS (BFS_USE_SYS_EXTATTR_H || BFS_USE_SYS_XATTR_H)
+
+struct BFTW;
+
+/**
+ * Check if a file has a non-trivial Access Control List.
+ *
+ * @param ftwbuf
+ * The file to check.
+ * @return
+ * 1 if it does, 0 if it doesn't, or -1 if an error occurred.
+ */
+int bfs_check_acl(const struct BFTW *ftwbuf);
+
+/**
+ * Check if a file has a non-trivial capability set.
+ *
+ * @param ftwbuf
+ * The file to check.
+ * @return
+ * 1 if it does, 0 if it doesn't, or -1 if an error occurred.
+ */
+int bfs_check_capabilities(const struct BFTW *ftwbuf);
+
+/**
+ * Check if a file has any extended attributes set.
+ *
+ * @param ftwbuf
+ * The file to check.
+ * @return
+ * 1 if it does, 0 if it doesn't, or -1 if an error occurred.
+ */
+int bfs_check_xattrs(const struct BFTW *ftwbuf);
+
+/**
+ * Check if a file has an extended attribute with the given name.
+ *
+ * @param ftwbuf
+ * The file to check.
+ * @param name
+ * The name of the xattr to check.
+ * @return
+ * 1 if it does, 0 if it doesn't, or -1 if an error occurred.
+ */
+int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name);
+
+/**
+ * Get a file's SELinux context
+ *
+ * @param ftwbuf
+ * The file to check.
+ * @return
+ * The file's SELinux context, or NULL on failure.
+ */
+char *bfs_getfilecon(const struct BFTW *ftwbuf);
+
+/**
+ * Free a bfs_getfilecon() result.
+ */
+void bfs_freecon(char *con);
+
+#endif // BFS_FSADE_H
diff --git a/src/ioq.c b/src/ioq.c
new file mode 100644
index 0000000..43a1b35
--- /dev/null
+++ b/src/ioq.c
@@ -0,0 +1,1100 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * An asynchronous I/O queue implementation.
+ *
+ * struct ioq is composed of two separate queues:
+ *
+ * struct ioqq *pending; // Pending I/O requests
+ * struct ioqq *ready; // Ready I/O responses
+ *
+ * Worker threads pop requests from `pending`, execute them, and push them back
+ * to the `ready` queue. The main thread pushes requests to `pending` and pops
+ * them from `ready`.
+ *
+ * struct ioqq is a blocking MPMC queue (though it could be SPMC/MPSC for
+ * pending/ready respectively). It is implemented as a circular buffer:
+ *
+ * size_t mask; // (1 << N) - 1
+ * [padding]
+ * size_t head; // Writer index
+ * [padding]
+ * size_t tail; // Reader index
+ * [padding]
+ * ioq_slot slots[1 << N]; // Queue contents
+ *
+ * Pushes are implemented with an unconditional
+ *
+ * fetch_add(&ioqq->head, 1)
+ *
+ * which scales better on many architectures than compare-and-swap (see [1] for
+ * details). Pops are implemented similarly. Since the fetch-and-adds are
+ * unconditional, non-blocking readers can get ahead of writers:
+ *
+ * Reader Writer
+ * ──────────────── ──────────────────────
+ * head: 0 → 1
+ * slots[0]: empty
+ * tail: 0 → 1
+ * slots[0]: empty → full
+ * head: 1 → 2
+ * slots[1]: empty!
+ *
+ * To avoid this, non-blocking reads (ioqq_pop(ioqq, false)) must mark the slots
+ * somehow so that writers can skip them:
+ *
+ * Reader Writer
+ * ─────────────────────── ───────────────────────
+ * head: 0 → 1
+ * slots[0]: empty → skip
+ * tail: 0 → 1
+ * slots[0]: skip → empty
+ * tail: 1 → 2
+ * slots[1]: empty → full
+ * head: 1 → 2
+ * slots[1]: full → empty
+ *
+ * As well, a reader might "lap" a writer (or another reader), so slots need to
+ * count how many times they should be skipped:
+ *
+ * Reader Writer
+ * ────────────────────────── ─────────────────────────
+ * head: 0 → 1
+ * slots[0]: empty → skip(1)
+ * head: 1 → 2
+ * slots[1]: empty → skip(1)
+ * ...
+ * head: M → 0
+ * slots[M]: empty → skip(1)
+ * head: 0 → 1
+ * slots[0]: skip(1 → 2)
+ * tail: 0 → 1
+ * slots[0]: skip(2 → 1)
+ * tail: 1 → 2
+ * slots[1]: skip(1) → empty
+ * ...
+ * tail: M → 0
+ * slots[M]: skip(1) → empty
+ * tail: 0 → 1
+ * slots[0]: skip(1) → empty
+ * tail: 1 → 2
+ * slots[1]: empty → full
+ * head: 1 → 2
+ * slots[1]: full → empty
+ *
+ * As described in [1], this approach is susceptible to livelock if readers stay
+ * ahead of writers. This is okay for us because we don't retry failed non-
+ * blocking reads.
+ *
+ * The slot representation uses tag bits to hold either a pointer or skip(N):
+ *
+ * IOQ_SKIP (highest bit) IOQ_BLOCKED (lowest bit)
+ * ↓ ↓
+ * 0 0 0 ... 0 0 0
+ * └──────────┬──────────┘
+ * │
+ * value bits
+ *
+ * If IOQ_SKIP is unset, the value bits hold a pointer (or zero/NULL for empty).
+ * If IOQ_SKIP is set, the value bits hold a negative skip count. Writers can
+ * reduce the skip count by adding 1 to the value bits, and when the count hits
+ * zero, the carry will automatically clear IOQ_SKIP:
+ *
+ * IOQ_SKIP IOQ_BLOCKED
+ * ↓ ↓
+ * 1 1 1 ... 1 0 0 skip(2)
+ * 1 1 1 ... 1 1 0 skip(1)
+ * 0 0 0 ... 0 0 0 empty
+ *
+ * The IOQ_BLOCKED flag is used to track sleeping waiters, futex-style. To wait
+ * for a slot to change, waiters call ioq_slot_wait() which sets IOQ_BLOCKED and
+ * goes to sleep. Whenever a slot is updated, if the old value had IOQ_BLOCKED
+ * set, ioq_slot_wake() must be called to wake up that waiter.
+ *
+ * Blocking/waking uses a pool of monitors (mutex, condition variable pairs).
+ * Slots are assigned round-robin to a monitor from the pool.
+ *
+ * [1]: https://arxiv.org/abs/2201.02179
+ */
+
+#include "prelude.h"
+#include "ioq.h"
+#include "alloc.h"
+#include "atomic.h"
+#include "bfstd.h"
+#include "bit.h"
+#include "diag.h"
+#include "dir.h"
+#include "stat.h"
+#include "thread.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#if BFS_USE_LIBURING
+# include <liburing.h>
+#endif
+
+/**
+ * A monitor for an I/O queue slot.
+ */
+struct ioq_monitor {
+ cache_align pthread_mutex_t mutex;
+ pthread_cond_t cond;
+};
+
+/** Initialize an ioq_monitor. */
+static int ioq_monitor_init(struct ioq_monitor *monitor) {
+ if (mutex_init(&monitor->mutex, NULL) != 0) {
+ return -1;
+ }
+
+ if (cond_init(&monitor->cond, NULL) != 0) {
+ mutex_destroy(&monitor->mutex);
+ return -1;
+ }
+
+ return 0;
+}
+
+/** Destroy an ioq_monitor. */
+static void ioq_monitor_destroy(struct ioq_monitor *monitor) {
+ cond_destroy(&monitor->cond);
+ mutex_destroy(&monitor->mutex);
+}
+
+/** A single entry in a command queue. */
+typedef atomic uintptr_t ioq_slot;
+
+/** Someone might be waiting on this slot. */
+#define IOQ_BLOCKED ((uintptr_t)1)
+
+/** Bit for IOQ_SKIP. */
+#define IOQ_SKIP_BIT (UINTPTR_WIDTH - 1)
+/** The next push(es) should skip this slot. */
+#define IOQ_SKIP ((uintptr_t)1 << IOQ_SKIP_BIT)
+/** Amount to add for an additional skip. */
+#define IOQ_SKIP_ONE (~IOQ_BLOCKED)
+
+// Need room for two flag bits
+bfs_static_assert(alignof(struct ioq_ent) >= (1 << 2));
+
+/**
+ * An MPMC queue of I/O commands.
+ */
+struct ioqq {
+ /** Circular buffer index mask. */
+ size_t slot_mask;
+
+ /** Monitor index mask. */
+ size_t monitor_mask;
+ /** Array of monitors used by the slots. */
+ struct ioq_monitor *monitors;
+
+ /** Index of next writer. */
+ cache_align atomic size_t head;
+ /** Index of next reader. */
+ cache_align atomic size_t tail;
+
+ /** The circular buffer itself. */
+ cache_align ioq_slot slots[];
+};
+
+/** Destroy an I/O command queue. */
+static void ioqq_destroy(struct ioqq *ioqq) {
+ if (!ioqq) {
+ return;
+ }
+
+ for (size_t i = 0; i < ioqq->monitor_mask + 1; ++i) {
+ ioq_monitor_destroy(&ioqq->monitors[i]);
+ }
+ free(ioqq->monitors);
+ free(ioqq);
+}
+
+/** Create an I/O command queue. */
+static struct ioqq *ioqq_create(size_t size) {
+ // Circular buffer size must be a power of two
+ size = bit_ceil(size);
+
+ struct ioqq *ioqq = ALLOC_FLEX(struct ioqq, slots, size);
+ if (!ioqq) {
+ return NULL;
+ }
+
+ ioqq->slot_mask = size - 1;
+ ioqq->monitor_mask = -1;
+
+ // Use a pool of monitors
+ size_t nmonitors = size < 64 ? size : 64;
+ ioqq->monitors = ALLOC_ARRAY(struct ioq_monitor, nmonitors);
+ if (!ioqq->monitors) {
+ ioqq_destroy(ioqq);
+ return NULL;
+ }
+
+ for (size_t i = 0; i < nmonitors; ++i) {
+ if (ioq_monitor_init(&ioqq->monitors[i]) != 0) {
+ ioqq_destroy(ioqq);
+ return NULL;
+ }
+ ++ioqq->monitor_mask;
+ }
+
+ atomic_init(&ioqq->head, 0);
+ atomic_init(&ioqq->tail, 0);
+
+ for (size_t i = 0; i < size; ++i) {
+ atomic_init(&ioqq->slots[i], 0);
+ }
+
+ return ioqq;
+}
+
+/** Get the monitor associated with a slot. */
+static struct ioq_monitor *ioq_slot_monitor(struct ioqq *ioqq, ioq_slot *slot) {
+ size_t i = slot - ioqq->slots;
+ return &ioqq->monitors[i & ioqq->monitor_mask];
+}
+
+/** Atomically wait for a slot to change. */
+attr(noinline)
+static uintptr_t ioq_slot_wait(struct ioqq *ioqq, ioq_slot *slot, uintptr_t value) {
+ struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot);
+ mutex_lock(&monitor->mutex);
+
+ uintptr_t ret = load(slot, relaxed);
+ if (ret != value) {
+ goto done;
+ }
+
+ if (!(value & IOQ_BLOCKED)) {
+ value |= IOQ_BLOCKED;
+ if (!compare_exchange_strong(slot, &ret, value, relaxed, relaxed)) {
+ goto done;
+ }
+ }
+
+ do {
+ // To avoid missed wakeups, it is important that
+ // cond_broadcast() is not called right here
+ cond_wait(&monitor->cond, &monitor->mutex);
+ ret = load(slot, relaxed);
+ } while (ret == value);
+
+done:
+ mutex_unlock(&monitor->mutex);
+ return ret;
+}
+
+/** Wake up any threads waiting on a slot. */
+attr(noinline)
+static void ioq_slot_wake(struct ioqq *ioqq, ioq_slot *slot) {
+ struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot);
+
+ // The following implementation would clearly avoid the missed wakeup
+ // issue mentioned above in ioq_slot_wait():
+ //
+ // mutex_lock(&monitor->mutex);
+ // cond_broadcast(&monitor->cond);
+ // mutex_unlock(&monitor->mutex);
+ //
+ // As a minor optimization, we move the broadcast outside of the lock.
+ // This optimization is correct, even though it leads to a seemingly-
+ // useless empty critical section.
+
+ mutex_lock(&monitor->mutex);
+ mutex_unlock(&monitor->mutex);
+ cond_broadcast(&monitor->cond);
+}
+
+/** Branch-free (slot & IOQ_SKIP) ? ~IOQ_BLOCKED : 0 */
+static uintptr_t ioq_skip_mask(uintptr_t slot) {
+ return -(slot >> IOQ_SKIP_BIT) << 1;
+}
+
+/** Push an entry into a slot. */
+static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent) {
+ uintptr_t prev = load(slot, relaxed);
+
+ while (true) {
+ size_t skip_mask = ioq_skip_mask(prev);
+ size_t full_mask = ~skip_mask & ~IOQ_BLOCKED;
+ if (prev & full_mask) {
+ // full(ptr) → wait
+ prev = ioq_slot_wait(ioqq, slot, prev);
+ continue;
+ }
+
+ // empty → full(ptr)
+ uintptr_t next = ((uintptr_t)ent >> 1) & full_mask;
+ // skip(1) → empty
+ // skip(n) → skip(n - 1)
+ next |= (prev - IOQ_SKIP_ONE) & skip_mask;
+
+ if (compare_exchange_weak(slot, &prev, next, release, relaxed)) {
+ break;
+ }
+ }
+
+ if (prev & IOQ_BLOCKED) {
+ ioq_slot_wake(ioqq, slot);
+ }
+
+ return !(prev & IOQ_SKIP);
+}
+
+/** (Try to) pop an entry from a slot. */
+static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) {
+ uintptr_t prev = load(slot, relaxed);
+ while (true) {
+ // empty → skip(1)
+ // skip(n) → skip(n + 1)
+ // full(ptr) → full(ptr - 1)
+ uintptr_t next = prev + IOQ_SKIP_ONE;
+ // skip(n) → ~IOQ_BLOCKED
+ // full(ptr) → 0
+ next &= ioq_skip_mask(next);
+
+ if (block && next) {
+ prev = ioq_slot_wait(ioqq, slot, prev);
+ continue;
+ }
+
+ if (compare_exchange_weak(slot, &prev, next, acquire, relaxed)) {
+ break;
+ }
+ }
+
+ if (prev & IOQ_BLOCKED) {
+ ioq_slot_wake(ioqq, slot);
+ }
+
+ // empty → 0
+ // skip(n) → 0
+ // full(ptr) → ptr
+ prev &= ioq_skip_mask(~prev);
+ return (struct ioq_ent *)(prev << 1);
+}
+
+/** Push an entry onto the queue. */
+static void ioqq_push(struct ioqq *ioqq, struct ioq_ent *ent) {
+ while (true) {
+ size_t i = fetch_add(&ioqq->head, 1, relaxed);
+ ioq_slot *slot = &ioqq->slots[i & ioqq->slot_mask];
+ if (ioq_slot_push(ioqq, slot, ent)) {
+ break;
+ }
+ }
+}
+
+/** Push a batch of entries to the queue. */
+static void ioqq_push_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t size) {
+ size_t mask = ioqq->slot_mask;
+ do {
+ size_t i = fetch_add(&ioqq->head, size, relaxed);
+ for (size_t j = i + size; i != j; ++i) {
+ ioq_slot *slot = &ioqq->slots[i & mask];
+ if (ioq_slot_push(ioqq, slot, *batch)) {
+ ++batch;
+ --size;
+ }
+ }
+ } while (size > 0);
+}
+
+/** Pop an entry from the queue. */
+static struct ioq_ent *ioqq_pop(struct ioqq *ioqq, bool block) {
+ size_t i = fetch_add(&ioqq->tail, 1, relaxed);
+ ioq_slot *slot = &ioqq->slots[i & ioqq->slot_mask];
+ return ioq_slot_pop(ioqq, slot, block);
+}
+
+/** Pop a batch of entries from the queue. */
+static void ioqq_pop_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t size, bool block) {
+ size_t mask = ioqq->slot_mask;
+ size_t i = fetch_add(&ioqq->tail, size, relaxed);
+ for (size_t j = i + size; i != j; ++i) {
+ ioq_slot *slot = &ioqq->slots[i & mask];
+ *batch++ = ioq_slot_pop(ioqq, slot, block);
+ block = false;
+ }
+}
+
+/** Use cache-line-sized batches. */
+#define IOQ_BATCH (FALSE_SHARING_SIZE / sizeof(ioq_slot))
+
+/**
+ * A batch of entries to send all at once.
+ */
+struct ioq_batch {
+ /** The current batch size. */
+ size_t size;
+ /** The array of entries. */
+ struct ioq_ent *entries[IOQ_BATCH];
+};
+
+/** Send the batch to a queue. */
+static void ioq_batch_flush(struct ioqq *ioqq, struct ioq_batch *batch) {
+ if (batch->size > 0) {
+ ioqq_push_batch(ioqq, batch->entries, batch->size);
+ batch->size = 0;
+ }
+}
+
+/** An an entry to a batch, flushing if necessary. */
+static void ioq_batch_push(struct ioqq *ioqq, struct ioq_batch *batch, struct ioq_ent *ent) {
+ if (batch->size >= IOQ_BATCH) {
+ ioq_batch_flush(ioqq, batch);
+ }
+
+ batch->entries[batch->size++] = ent;
+}
+
+/** Sentinel stop command. */
+static struct ioq_ent IOQ_STOP;
+
+#if BFS_USE_LIBURING
+/**
+ * Supported io_uring operations.
+ */
+enum ioq_ring_ops {
+ IOQ_RING_OPENAT = 1 << 0,
+ IOQ_RING_CLOSE = 1 << 1,
+ IOQ_RING_STATX = 1 << 2,
+};
+#endif
+
+/** I/O queue thread-specific data. */
+struct ioq_thread {
+ /** The thread handle. */
+ pthread_t id;
+ /** Pointer back to the I/O queue. */
+ struct ioq *parent;
+
+#if BFS_USE_LIBURING
+ /** io_uring instance. */
+ struct io_uring ring;
+ /** Any error that occurred initializing the ring. */
+ int ring_err;
+ /** Bitmask of supported io_uring operations. */
+ enum ioq_ring_ops ring_ops;
+#endif
+};
+
+struct ioq {
+ /** The depth of the queue. */
+ size_t depth;
+ /** The current size of the queue. */
+ size_t size;
+ /** Cancellation flag. */
+ atomic bool cancel;
+
+ /** ioq_ent arena. */
+ struct arena ents;
+#if BFS_USE_LIBURING && BFS_USE_STATX
+ /** struct statx arena. */
+ struct arena xbufs;
+#endif
+
+ /** Pending I/O requests. */
+ struct ioqq *pending;
+ /** Ready I/O responses. */
+ struct ioqq *ready;
+
+ /** The number of background threads. */
+ size_t nthreads;
+ /** The background threads themselves. */
+ struct ioq_thread threads[];
+};
+
+/** Cancel a request if we need to. */
+static bool ioq_check_cancel(struct ioq *ioq, struct ioq_ent *ent) {
+ if (!load(&ioq->cancel, relaxed)) {
+ return false;
+ }
+
+ // Always close(), even if we're cancelled, just like a real EINTR
+ if (ent->op == IOQ_CLOSE || ent->op == IOQ_CLOSEDIR) {
+ return false;
+ }
+
+ ent->result = -EINTR;
+ return true;
+}
+
+/** Dispatch a single request synchronously. */
+static void ioq_dispatch_sync(struct ioq *ioq, struct ioq_ent *ent) {
+ switch (ent->op) {
+ case IOQ_CLOSE:
+ ent->result = try(xclose(ent->close.fd));
+ return;
+
+ case IOQ_OPENDIR: {
+ struct ioq_opendir *args = &ent->opendir;
+ ent->result = try(bfs_opendir(args->dir, args->dfd, args->path, args->flags));
+ if (ent->result >= 0) {
+ bfs_polldir(args->dir);
+ }
+ return;
+ }
+
+ case IOQ_CLOSEDIR:
+ ent->result = try(bfs_closedir(ent->closedir.dir));
+ return;
+
+ case IOQ_STAT: {
+ struct ioq_stat *args = &ent->stat;
+ ent->result = try(bfs_stat(args->dfd, args->path, args->flags, args->buf));
+ return;
+ }
+ }
+
+ bfs_bug("Unknown ioq_op %d", (int)ent->op);
+ ent->result = -ENOSYS;
+}
+
+#if BFS_USE_LIBURING
+
+/** io_uring worker state. */
+struct ioq_ring_state {
+ /** The I/O queue. */
+ struct ioq *ioq;
+ /** The io_uring. */
+ struct io_uring *ring;
+ /** Supported io_uring operations. */
+ enum ioq_ring_ops ops;
+ /** Number of prepped, unsubmitted SQEs. */
+ size_t prepped;
+ /** Number of submitted, unreaped SQEs. */
+ size_t submitted;
+ /** Whether to stop the loop. */
+ bool stop;
+ /** A batch of ready entries. */
+ struct ioq_batch ready;
+};
+
+/** Dispatch a single request asynchronously. */
+static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, struct ioq_ent *ent) {
+ struct io_uring *ring = state->ring;
+ enum ioq_ring_ops ops = state->ops;
+ struct io_uring_sqe *sqe = NULL;
+
+ switch (ent->op) {
+ case IOQ_CLOSE:
+ if (ops & IOQ_RING_CLOSE) {
+ sqe = io_uring_get_sqe(ring);
+ io_uring_prep_close(sqe, ent->close.fd);
+ }
+ return sqe;
+
+ case IOQ_OPENDIR:
+ if (ops & IOQ_RING_OPENAT) {
+ sqe = io_uring_get_sqe(ring);
+ struct ioq_opendir *args = &ent->opendir;
+ int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY;
+ io_uring_prep_openat(sqe, args->dfd, args->path, flags, 0);
+ }
+ return sqe;
+
+ case IOQ_CLOSEDIR:
+#if BFS_USE_UNWRAPDIR
+ if (ops & IOQ_RING_CLOSE) {
+ sqe = io_uring_get_sqe(ring);
+ io_uring_prep_close(sqe, bfs_unwrapdir(ent->closedir.dir));
+ }
+#endif
+ return sqe;
+
+ case IOQ_STAT:
+#if BFS_USE_STATX
+ if (ops & IOQ_RING_STATX) {
+ sqe = io_uring_get_sqe(ring);
+ struct ioq_stat *args = &ent->stat;
+ int flags = bfs_statx_flags(args->flags);
+ unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+ io_uring_prep_statx(sqe, args->dfd, args->path, flags, mask, args->xbuf);
+ }
+#endif
+ return sqe;
+ }
+
+ bfs_bug("Unknown ioq_op %d", (int)ent->op);
+ return NULL;
+}
+
+/** Check if ioq_ring_reap() has work to do. */
+static bool ioq_ring_empty(struct ioq_ring_state *state) {
+ return !state->prepped && !state->submitted && !state->ready.size;
+}
+
+/** Prep a single SQE. */
+static void ioq_prep_sqe(struct ioq_ring_state *state, struct ioq_ent *ent) {
+ struct ioq *ioq = state->ioq;
+ if (ioq_check_cancel(ioq, ent)) {
+ ioq_batch_push(ioq->ready, &state->ready, ent);
+ return;
+ }
+
+ struct io_uring_sqe *sqe = ioq_dispatch_async(state, ent);
+ if (sqe) {
+ io_uring_sqe_set_data(sqe, ent);
+ ++state->prepped;
+ } else {
+ ioq_dispatch_sync(ioq, ent);
+ ioq_batch_push(ioq->ready, &state->ready, ent);
+ }
+}
+
+/** Prep a batch of SQEs. */
+static bool ioq_ring_prep(struct ioq_ring_state *state) {
+ if (state->stop) {
+ return false;
+ }
+
+ struct ioq *ioq = state->ioq;
+ struct io_uring *ring = state->ring;
+ struct ioq_ent *pending[IOQ_BATCH];
+
+ while (io_uring_sq_space_left(ring) >= IOQ_BATCH) {
+ bool block = ioq_ring_empty(state);
+ ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, block);
+
+ bool any = false;
+ for (size_t i = 0; i < IOQ_BATCH; ++i) {
+ struct ioq_ent *ent = pending[i];
+ if (ent == &IOQ_STOP) {
+ ioqq_push(ioq->pending, &IOQ_STOP);
+ state->stop = true;
+ goto done;
+ } else if (ent) {
+ ioq_prep_sqe(state, ent);
+ any = true;
+ }
+ }
+
+ if (!any) {
+ break;
+ }
+ }
+
+done:
+ return !ioq_ring_empty(state);
+}
+
+/** Reap a single CQE. */
+static void ioq_reap_cqe(struct ioq_ring_state *state, struct io_uring_cqe *cqe) {
+ struct ioq *ioq = state->ioq;
+ struct io_uring *ring = state->ring;
+
+ struct ioq_ent *ent = io_uring_cqe_get_data(cqe);
+ ent->result = cqe->res;
+ io_uring_cqe_seen(ring, cqe);
+ --state->submitted;
+
+ if (ent->result < 0) {
+ goto push;
+ }
+
+ switch (ent->op) {
+ case IOQ_OPENDIR: {
+ int fd = ent->result;
+ if (ioq_check_cancel(ioq, ent)) {
+ xclose(fd);
+ goto push;
+ }
+
+ struct ioq_opendir *args = &ent->opendir;
+ ent->result = try(bfs_opendir(args->dir, fd, NULL, args->flags));
+ if (ent->result >= 0) {
+ // TODO: io_uring_prep_getdents()
+ bfs_polldir(args->dir);
+ } else {
+ xclose(fd);
+ }
+
+ break;
+ }
+
+#if BFS_USE_STATX
+ case IOQ_STAT: {
+ struct ioq_stat *args = &ent->stat;
+ ent->result = try(bfs_statx_convert(args->buf, args->xbuf));
+ break;
+ }
+#endif
+
+ default:
+ break;
+ }
+
+push:
+ ioq_batch_push(ioq->ready, &state->ready, ent);
+}
+
+/** Reap a batch of CQEs. */
+static void ioq_ring_reap(struct ioq_ring_state *state) {
+ struct ioq *ioq = state->ioq;
+ struct io_uring *ring = state->ring;
+
+ while (state->prepped) {
+ int ret = io_uring_submit_and_wait(ring, 1);
+ if (ret > 0) {
+ state->prepped -= ret;
+ state->submitted += ret;
+ }
+ }
+
+ while (state->submitted) {
+ struct io_uring_cqe *cqe;
+ if (io_uring_wait_cqe(ring, &cqe) < 0) {
+ continue;
+ }
+
+ ioq_reap_cqe(state, cqe);
+ }
+
+ ioq_batch_flush(ioq->ready, &state->ready);
+}
+
+/** io_uring worker loop. */
+static void ioq_ring_work(struct ioq_thread *thread) {
+ struct ioq_ring_state state = {
+ .ioq = thread->parent,
+ .ring = &thread->ring,
+ .ops = thread->ring_ops,
+ };
+
+ while (ioq_ring_prep(&state)) {
+ ioq_ring_reap(&state);
+ }
+}
+
+#endif // BFS_USE_LIBURING
+
+/** Synchronous syscall loop. */
+static void ioq_sync_work(struct ioq_thread *thread) {
+ struct ioq *ioq = thread->parent;
+
+ bool stop = false;
+ while (!stop) {
+ struct ioq_ent *pending[IOQ_BATCH];
+ ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, true);
+
+ struct ioq_batch ready;
+ ready.size = 0;
+
+ for (size_t i = 0; i < IOQ_BATCH; ++i) {
+ struct ioq_ent *ent = pending[i];
+ if (ent == &IOQ_STOP) {
+ ioqq_push(ioq->pending, &IOQ_STOP);
+ stop = true;
+ break;
+ } else if (ent) {
+ if (!ioq_check_cancel(ioq, ent)) {
+ ioq_dispatch_sync(ioq, ent);
+ }
+ ioq_batch_push(ioq->ready, &ready, ent);
+ }
+ }
+
+ ioq_batch_flush(ioq->ready, &ready);
+ }
+}
+
+/** Background thread entry point. */
+static void *ioq_work(void *ptr) {
+ struct ioq_thread *thread = ptr;
+
+#if BFS_USE_LIBURING
+ if (thread->ring_err == 0) {
+ ioq_ring_work(thread);
+ return NULL;
+ }
+#endif
+
+ ioq_sync_work(thread);
+ return NULL;
+}
+
+/** Initialize io_uring thread state. */
+static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) {
+#if BFS_USE_LIBURING
+ struct ioq_thread *prev = NULL;
+ if (thread > ioq->threads) {
+ prev = thread - 1;
+ }
+
+ if (prev && prev->ring_err) {
+ thread->ring_err = prev->ring_err;
+ return -1;
+ }
+
+ // Share io-wq workers between rings
+ struct io_uring_params params = {0};
+ if (prev) {
+ params.flags |= IORING_SETUP_ATTACH_WQ;
+ params.wq_fd = prev->ring.ring_fd;
+ }
+
+ // Use a page for each SQE ring
+ size_t entries = 4096 / sizeof(struct io_uring_sqe);
+ thread->ring_err = -io_uring_queue_init_params(entries, &thread->ring, &params);
+ if (thread->ring_err) {
+ return -1;
+ }
+
+ if (prev) {
+ // Initial setup already complete
+ thread->ring_ops = prev->ring_ops;
+ return 0;
+ }
+
+ // Check for supported operations
+ struct io_uring_probe *probe = io_uring_get_probe_ring(&thread->ring);
+ if (probe) {
+ if (io_uring_opcode_supported(probe, IORING_OP_OPENAT)) {
+ thread->ring_ops |= IOQ_RING_OPENAT;
+ }
+ if (io_uring_opcode_supported(probe, IORING_OP_CLOSE)) {
+ thread->ring_ops |= IOQ_RING_CLOSE;
+ }
+#if BFS_USE_STATX
+ if (io_uring_opcode_supported(probe, IORING_OP_STATX)) {
+ thread->ring_ops |= IOQ_RING_STATX;
+ }
+#endif
+ io_uring_free_probe(probe);
+ }
+ if (!thread->ring_ops) {
+ io_uring_queue_exit(&thread->ring);
+ thread->ring_err = ENOTSUP;
+ return -1;
+ }
+
+ // Limit the number of io_uring workers
+ unsigned int values[] = {
+ ioq->nthreads, // [IO_WQ_BOUND]
+ 0, // [IO_WQ_UNBOUND]
+ };
+ io_uring_register_iowq_max_workers(&thread->ring, values);
+#endif
+
+ return 0;
+}
+
+/** Destroy an io_uring. */
+static void ioq_ring_exit(struct ioq_thread *thread) {
+#if BFS_USE_LIBURING
+ if (thread->ring_err == 0) {
+ io_uring_queue_exit(&thread->ring);
+ }
+#endif
+}
+
+/** Create an I/O queue thread. */
+static int ioq_thread_create(struct ioq *ioq, struct ioq_thread *thread) {
+ thread->parent = ioq;
+
+ ioq_ring_init(ioq, thread);
+
+ if (thread_create(&thread->id, NULL, ioq_work, thread) != 0) {
+ ioq_ring_exit(thread);
+ return -1;
+ }
+
+ return 0;
+}
+
+/** Join an I/O queue thread. */
+static void ioq_thread_join(struct ioq_thread *thread) {
+ thread_join(thread->id, NULL);
+ ioq_ring_exit(thread);
+}
+
+struct ioq *ioq_create(size_t depth, size_t nthreads) {
+ struct ioq *ioq = ZALLOC_FLEX(struct ioq, threads, nthreads);
+ if (!ioq) {
+ goto fail;
+ }
+
+ ioq->depth = depth;
+
+ ARENA_INIT(&ioq->ents, struct ioq_ent);
+#if BFS_USE_LIBURING && BFS_USE_STATX
+ ARENA_INIT(&ioq->xbufs, struct statx);
+#endif
+
+ ioq->pending = ioqq_create(depth);
+ if (!ioq->pending) {
+ goto fail;
+ }
+
+ ioq->ready = ioqq_create(depth);
+ if (!ioq->ready) {
+ goto fail;
+ }
+
+ ioq->nthreads = nthreads;
+ for (size_t i = 0; i < nthreads; ++i) {
+ if (ioq_thread_create(ioq, &ioq->threads[i]) != 0) {
+ ioq->nthreads = i;
+ goto fail;
+ }
+ }
+
+ return ioq;
+
+ int err;
+fail:
+ err = errno;
+ ioq_destroy(ioq);
+ errno = err;
+ return NULL;
+}
+
+size_t ioq_capacity(const struct ioq *ioq) {
+ return ioq->depth - ioq->size;
+}
+
+static struct ioq_ent *ioq_request(struct ioq *ioq, enum ioq_op op, void *ptr) {
+ if (load(&ioq->cancel, relaxed)) {
+ errno = EINTR;
+ return NULL;
+ }
+
+ if (ioq->size >= ioq->depth) {
+ errno = EAGAIN;
+ return NULL;
+ }
+
+ struct ioq_ent *ent = arena_alloc(&ioq->ents);
+ if (!ent) {
+ return NULL;
+ }
+
+ ent->op = op;
+ ent->ptr = ptr;
+ ++ioq->size;
+ return ent;
+}
+
+int ioq_close(struct ioq *ioq, int fd, void *ptr) {
+ struct ioq_ent *ent = ioq_request(ioq, IOQ_CLOSE, ptr);
+ if (!ent) {
+ return -1;
+ }
+
+ ent->close.fd = fd;
+
+ ioqq_push(ioq->pending, ent);
+ return 0;
+}
+
+int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, enum bfs_dir_flags flags, void *ptr) {
+ struct ioq_ent *ent = ioq_request(ioq, IOQ_OPENDIR, ptr);
+ if (!ent) {
+ return -1;
+ }
+
+ struct ioq_opendir *args = &ent->opendir;
+ args->dir = dir;
+ args->dfd = dfd;
+ args->path = path;
+ args->flags = flags;
+
+ ioqq_push(ioq->pending, ent);
+ return 0;
+}
+
+int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr) {
+ struct ioq_ent *ent = ioq_request(ioq, IOQ_CLOSEDIR, ptr);
+ if (!ent) {
+ return -1;
+ }
+
+ ent->closedir.dir = dir;
+
+ ioqq_push(ioq->pending, ent);
+ return 0;
+}
+
+int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags flags, struct bfs_stat *buf, void *ptr) {
+ struct ioq_ent *ent = ioq_request(ioq, IOQ_STAT, ptr);
+ if (!ent) {
+ return -1;
+ }
+
+ struct ioq_stat *args = &ent->stat;
+ args->dfd = dfd;
+ args->path = path;
+ args->flags = flags;
+ args->buf = buf;
+
+#if BFS_USE_LIBURING && BFS_USE_STATX
+ args->xbuf = arena_alloc(&ioq->xbufs);
+ if (!args->xbuf) {
+ ioq_free(ioq, ent);
+ return -1;
+ }
+#endif
+
+ ioqq_push(ioq->pending, ent);
+ return 0;
+}
+
+struct ioq_ent *ioq_pop(struct ioq *ioq, bool block) {
+ if (ioq->size == 0) {
+ return NULL;
+ }
+
+ return ioqq_pop(ioq->ready, block);
+}
+
+void ioq_free(struct ioq *ioq, struct ioq_ent *ent) {
+ bfs_assert(ioq->size > 0);
+ --ioq->size;
+
+#if BFS_USE_LIBURING && BFS_USE_STATX
+ if (ent->op == IOQ_STAT && ent->stat.xbuf) {
+ arena_free(&ioq->xbufs, ent->stat.xbuf);
+ }
+#endif
+
+ arena_free(&ioq->ents, ent);
+}
+
+void ioq_cancel(struct ioq *ioq) {
+ if (!exchange(&ioq->cancel, true, relaxed)) {
+ ioqq_push(ioq->pending, &IOQ_STOP);
+ }
+}
+
+void ioq_destroy(struct ioq *ioq) {
+ if (!ioq) {
+ return;
+ }
+
+ if (ioq->nthreads > 0) {
+ ioq_cancel(ioq);
+ }
+
+ for (size_t i = 0; i < ioq->nthreads; ++i) {
+ ioq_thread_join(&ioq->threads[i]);
+ }
+
+ ioqq_destroy(ioq->ready);
+ ioqq_destroy(ioq->pending);
+
+#if BFS_USE_LIBURING && BFS_USE_STATX
+ arena_destroy(&ioq->xbufs);
+#endif
+ arena_destroy(&ioq->ents);
+
+ free(ioq);
+}
diff --git a/src/ioq.h b/src/ioq.h
new file mode 100644
index 0000000..d8e1573
--- /dev/null
+++ b/src/ioq.h
@@ -0,0 +1,198 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Asynchronous I/O queues.
+ */
+
+#ifndef BFS_IOQ_H
+#define BFS_IOQ_H
+
+#include "prelude.h"
+#include "dir.h"
+#include "stat.h"
+#include <stddef.h>
+
+/**
+ * An queue of asynchronous I/O operations.
+ */
+struct ioq;
+
+/**
+ * I/O queue operations.
+ */
+enum ioq_op {
+ /** ioq_close(). */
+ IOQ_CLOSE,
+ /** ioq_opendir(). */
+ IOQ_OPENDIR,
+ /** ioq_closedir(). */
+ IOQ_CLOSEDIR,
+ /** ioq_stat(). */
+ IOQ_STAT,
+};
+
+/**
+ * The I/O queue implementation needs two tag bits in each pointer to a struct
+ * ioq_ent, so we need to ensure at least 4-byte alignment. The natural
+ * alignment is enough on most architectures, but not m68k, so over-align it.
+ */
+#define IOQ_ENT_ALIGN alignas(4)
+
+/**
+ * An I/O queue entry.
+ */
+struct ioq_ent {
+ /** The I/O operation. */
+ IOQ_ENT_ALIGN enum ioq_op op;
+
+ /** The return value (on success) or negative error code (on failure). */
+ int result;
+
+ /** Arbitrary user data. */
+ void *ptr;
+
+ /** Operation-specific arguments. */
+ union {
+ /** ioq_close() args. */
+ struct ioq_close {
+ int fd;
+ } close;
+ /** ioq_opendir() args. */
+ struct ioq_opendir {
+ struct bfs_dir *dir;
+ const char *path;
+ int dfd;
+ enum bfs_dir_flags flags;
+ } opendir;
+ /** ioq_closedir() args. */
+ struct ioq_closedir {
+ struct bfs_dir *dir;
+ } closedir;
+ /** ioq_stat() args. */
+ struct ioq_stat {
+ const char *path;
+ struct bfs_stat *buf;
+ void *xbuf;
+ int dfd;
+ enum bfs_stat_flags flags;
+ } stat;
+ };
+};
+
+/**
+ * Create an I/O queue.
+ *
+ * @param depth
+ * The maximum depth of the queue.
+ * @param nthreads
+ * The maximum number of background threads.
+ * @return
+ * The new I/O queue, or NULL on failure.
+ */
+struct ioq *ioq_create(size_t depth, size_t nthreads);
+
+/**
+ * Check the remaining capacity of a queue.
+ */
+size_t ioq_capacity(const struct ioq *ioq);
+
+/**
+ * Asynchronous close().
+ *
+ * @param ioq
+ * The I/O queue.
+ * @param fd
+ * The fd to close.
+ * @param ptr
+ * An arbitrary pointer to associate with the request.
+ * @return
+ * 0 on success, or -1 on failure.
+ */
+int ioq_close(struct ioq *ioq, int fd, void *ptr);
+
+/**
+ * Asynchronous bfs_opendir().
+ *
+ * @param ioq
+ * The I/O queue.
+ * @param dir
+ * The allocated directory.
+ * @param dfd
+ * The base file descriptor.
+ * @param path
+ * The path to open, relative to dfd.
+ * @param flags
+ * Flags that control which directory entries are listed.
+ * @param ptr
+ * An arbitrary pointer to associate with the request.
+ * @return
+ * 0 on success, or -1 on failure.
+ */
+int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, enum bfs_dir_flags flags, void *ptr);
+
+/**
+ * Asynchronous bfs_closedir().
+ *
+ * @param ioq
+ * The I/O queue.
+ * @param dir
+ * The directory to close.
+ * @param ptr
+ * An arbitrary pointer to associate with the request.
+ * @return
+ * 0 on success, or -1 on failure.
+ */
+int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr);
+
+/**
+ * Asynchronous bfs_stat().
+ *
+ * @param ioq
+ * The I/O queue.
+ * @param dfd
+ * The base file descriptor.
+ * @param path
+ * The path to stat, relative to dfd.
+ * @param flags
+ * Flags that affect the lookup.
+ * @param buf
+ * A place to store the stat buffer, if successful.
+ * @param ptr
+ * An arbitrary pointer to associate with the request.
+ * @return
+ * 0 on success, or -1 on failure.
+ */
+int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags flags, struct bfs_stat *buf, void *ptr);
+
+/**
+ * Pop a response from the queue.
+ *
+ * @param ioq
+ * The I/O queue.
+ * @return
+ * The next response, or NULL.
+ */
+struct ioq_ent *ioq_pop(struct ioq *ioq, bool block);
+
+/**
+ * Free a queue entry.
+ *
+ * @param ioq
+ * The I/O queue.
+ * @param ent
+ * The entry to free.
+ */
+void ioq_free(struct ioq *ioq, struct ioq_ent *ent);
+
+/**
+ * Cancel any pending I/O operations.
+ */
+void ioq_cancel(struct ioq *ioq);
+
+/**
+ * Stop and destroy an I/O queue.
+ */
+void ioq_destroy(struct ioq *ioq);
+
+#endif // BFS_IOQ_H
diff --git a/src/list.h b/src/list.h
new file mode 100644
index 0000000..61d0e5b
--- /dev/null
+++ b/src/list.h
@@ -0,0 +1,581 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Intrusive linked lists.
+ *
+ * Singly-linked lists are declared like this:
+ *
+ * struct item {
+ * struct item *next;
+ * };
+ *
+ * struct list {
+ * struct item *head;
+ * struct item **tail;
+ * };
+ *
+ * The SLIST_*() macros manipulate singly-linked lists.
+ *
+ * struct list list;
+ * SLIST_INIT(&list);
+ *
+ * struct item item;
+ * SLIST_ITEM_INIT(&item);
+ * SLIST_APPEND(&list, &item);
+ *
+ * Doubly linked lists are similar:
+ *
+ * struct item {
+ * struct item *next;
+ * struct item *prev;
+ * };
+ *
+ * struct list {
+ * struct item *head;
+ * struct item *tail;
+ * };
+ *
+ * struct list list;
+ * LIST_INIT(&list);
+ *
+ * struct item item;
+ * LIST_ITEM_INIT(&item);
+ * LIST_APPEND(&list, &item);
+ *
+ * Items can be on multiple lists at once:
+ *
+ * struct item {
+ * struct {
+ * struct item *next;
+ * } chain;
+ *
+ * struct {
+ * struct item *next;
+ * struct item *prev;
+ * } lru;
+ * };
+ *
+ * struct items {
+ * struct {
+ * struct item *head;
+ * struct item **tail;
+ * } queue;
+ *
+ * struct {
+ * struct item *head;
+ * struct item *tail;
+ * } cache;
+ * };
+ *
+ * struct items items;
+ * SLIST_INIT(&items.queue);
+ * LIST_INIT(&items.cache);
+ *
+ * struct item item;
+ * SLIST_ITEM_INIT(&item, chain);
+ * SLIST_APPEND(&items.queue, &item, chain);
+ * LIST_ITEM_INIT(&item, lru);
+ * LIST_APPEND(&items.cache, &item, lru);
+ */
+
+#ifndef BFS_LIST_H
+#define BFS_LIST_H
+
+#include "diag.h"
+#include <stddef.h>
+#include <string.h>
+
+/**
+ * Initialize a singly-linked list.
+ *
+ * @param list
+ * The list to initialize.
+ *
+ * ---
+ *
+ * Like many macros in this file, this macro delegates the bulk of its work to
+ * some helper macros. We explicitly parenthesize (list) here so the helpers
+ * don't have to.
+ */
+#define SLIST_INIT(list) \
+ SLIST_INIT_((list))
+
+/**
+ * Helper for SLIST_INIT().
+ */
+#define SLIST_INIT_(list) LIST_VOID_( \
+ list->head = NULL, \
+ list->tail = &list->head)
+
+/**
+ * Cast a list of expressions to void.
+ */
+#define LIST_VOID_(...) ((void)(__VA_ARGS__))
+
+/**
+ * Initialize a singly-linked list item.
+ *
+ * @param item
+ * The item to initialize.
+ * @param node (optional)
+ * If specified, use item->node.next rather than item->next.
+ *
+ * ---
+ *
+ * We play some tricks with variadic macros to handle the optional parameter:
+ *
+ * SLIST_ITEM_INIT(item) => item->next = NULL
+ * SLIST_ITEM_INIT(item, node) => item->node.next = NULL
+ *
+ * The first trick is that
+ *
+ * #define SLIST_ITEM_INIT(item, ...)
+ *
+ * won't work because both commas are required (until C23; see N3033). As a
+ * workaround, we dispatch to another macro and add a trailing comma.
+ *
+ * SLIST_ITEM_INIT(item) => SLIST_ITEM_INIT_(item, )
+ * SLIST_ITEM_INIT(item, node) => SLIST_ITEM_INIT_(item, node, )
+ */
+#define SLIST_ITEM_INIT(...) \
+ SLIST_ITEM_INIT_(__VA_ARGS__, )
+
+/**
+ * Now we need a way to generate either ->next or ->node.next depending on
+ * whether the node parameter was passed. The approach is based on
+ *
+ * #define FOO(...) BAR(__VA_ARGS__, 1, 2, )
+ * #define BAR(x, y, z, ...) z
+ *
+ * FOO(a) => 2
+ * FOO(a, b) => 1
+ *
+ * The LIST_NEXT_() macro uses this technique:
+ *
+ * LIST_NEXT_() => LIST_NODE_(next, )
+ * LIST_NEXT_(node, ) => LIST_NODE_(next, node, )
+ */
+#define LIST_NEXT_(...) \
+ LIST_NODE_(next, __VA_ARGS__)
+
+/**
+ * LIST_NODE_() dispatches to yet another macro:
+ *
+ * LIST_NODE_(next, ) => LIST_NODE__(next, , . , , )
+ * LIST_NODE_(next, node, ) => LIST_NODE__(next, node, , . , , )
+ */
+#define LIST_NODE_(dir, ...) \
+ LIST_NODE__(dir, __VA_ARGS__, . , , )
+
+/**
+ * And finally, LIST_NODE__() adds the node and the dot if necessary.
+ *
+ * dir node ignored dot
+ * v v v v
+ * LIST_NODE__(next, , . , , ) => next
+ * LIST_NODE__(next, node, , . , , ) => node . next
+ * ^ ^ ^ ^
+ * dir node ignored dot
+ */
+#define LIST_NODE__(dir, node, ignored, dot, ...) \
+ node dot dir
+
+/**
+ * SLIST_ITEM_INIT_() uses LIST_NEXT_() to generate the right name for the list
+ * node, and finally delegates to the actual implementation.
+ */
+#define SLIST_ITEM_INIT_(item, ...) \
+ SLIST_ITEM_INIT__((item), LIST_NEXT_(__VA_ARGS__))
+
+#define SLIST_ITEM_INIT__(item, next) \
+ LIST_VOID_(item->next = NULL)
+
+/**
+ * Type-checking macro for singly-linked lists.
+ */
+#define SLIST_CHECK_(list) \
+ (void)sizeof(list->tail - &list->head)
+
+/**
+ * Get the head of a singly-linked list.
+ *
+ * @param list
+ * The list in question.
+ * @return
+ * The first item in the list.
+ */
+#define SLIST_HEAD(list) \
+ SLIST_HEAD_((list))
+
+#define SLIST_HEAD_(list) \
+ (SLIST_CHECK_(list), list->head)
+
+/**
+ * Check if a singly-linked list is empty.
+ */
+#define SLIST_EMPTY(list) \
+ (!SLIST_HEAD(list))
+
+/**
+ * Like container_of(), but using the head pointer instead of offsetof() since
+ * we don't have the type around.
+ */
+#define SLIST_CONTAINER_(tail, head, next) \
+ (void *)((char *)tail - ((char *)&head->next - (char *)head))
+
+/**
+ * Get the tail of a singly-linked list.
+ *
+ * @param list
+ * The list in question.
+ * @param node (optional)
+ * If specified, use item->node.next rather than item->next.
+ * @return
+ * The last item in the list.
+ */
+#define SLIST_TAIL(...) \
+ SLIST_TAIL_(__VA_ARGS__, )
+
+#define SLIST_TAIL_(list, ...) \
+ SLIST_TAIL__((list), LIST_NEXT_(__VA_ARGS__))
+
+#define SLIST_TAIL__(list, next) \
+ (list->head ? SLIST_CONTAINER_(list->tail, list->head, next) : NULL)
+
+/**
+ * Check if an item is attached to a singly-linked list.
+ *
+ * @param list
+ * The list to check.
+ * @param item
+ * The item to check.
+ * @param node (optional)
+ * If specified, use item->node.next rather than item->next.
+ * @return
+ * Whether the item is attached to the list.
+ */
+#define SLIST_ATTACHED(list, ...) \
+ SLIST_ATTACHED_(list, __VA_ARGS__, )
+
+#define SLIST_ATTACHED_(list, item, ...) \
+ SLIST_ATTACHED__((list), (item), LIST_NEXT_(__VA_ARGS__))
+
+#define SLIST_ATTACHED__(list, item, next) \
+ (item->next || list->tail == &item->next)
+
+/**
+ * Insert an item into a singly-linked list.
+ *
+ * @param list
+ * The list to modify.
+ * @param cursor
+ * A pointer to the item to insert after, e.g. &list->head or list->tail.
+ * @param item
+ * The item to insert.
+ * @param node (optional)
+ * If specified, use item->node.next rather than item->next.
+ * @return
+ * A cursor for the next item.
+ */
+#define SLIST_INSERT(list, cursor, ...) \
+ SLIST_INSERT_(list, cursor, __VA_ARGS__, )
+
+#define SLIST_INSERT_(list, cursor, item, ...) \
+ SLIST_INSERT__((list), (cursor), (item), LIST_NEXT_(__VA_ARGS__))
+
+#define SLIST_INSERT__(list, cursor, item, next) \
+ (bfs_assert(!SLIST_ATTACHED__(list, item, next)), \
+ item->next = *cursor, \
+ *cursor = item, \
+ list->tail = item->next ? list->tail : &item->next, \
+ &item->next)
+
+/**
+ * Add an item to the tail of a singly-linked list.
+ *
+ * @param list
+ * The list to modify.
+ * @param item
+ * The item to append.
+ * @param node (optional)
+ * If specified, use item->node.next rather than item->next.
+ */
+#define SLIST_APPEND(list, ...) \
+ SLIST_APPEND_(list, __VA_ARGS__, )
+
+#define SLIST_APPEND_(list, item, ...) \
+ LIST_VOID_(SLIST_INSERT_(list, (list)->tail, item, __VA_ARGS__))
+
+/**
+ * Add an item to the head of a singly-linked list.
+ *
+ * @param list
+ * The list to modify.
+ * @param item
+ * The item to prepend.
+ * @param node (optional)
+ * If specified, use item->node.next rather than item->next.
+ */
+#define SLIST_PREPEND(list, ...) \
+ SLIST_PREPEND_(list, __VA_ARGS__, )
+
+#define SLIST_PREPEND_(list, item, ...) \
+ LIST_VOID_(SLIST_INSERT_(list, &(list)->head, item, __VA_ARGS__))
+
+/**
+ * Add an entire singly-linked list to the tail of another.
+ *
+ * @param dest
+ * The destination list.
+ * @param src
+ * The source list.
+ */
+#define SLIST_EXTEND(dest, src) \
+ SLIST_EXTEND_((dest), (src))
+
+#define SLIST_EXTEND_(dest, src) \
+ (src->head ? (*dest->tail = src->head, dest->tail = src->tail, SLIST_INIT(src)) : (void)0)
+
+/**
+ * Remove an item from a singly-linked list.
+ *
+ * @param list
+ * The list to modify.
+ * @param cursor
+ * A pointer to the item to remove, either &list->head or &prev->next.
+ * @param node (optional)
+ * If specified, use item->node.next rather than item->next.
+ * @return
+ * The removed item.
+ */
+#define SLIST_REMOVE(list, ...) \
+ SLIST_REMOVE_(list, __VA_ARGS__, )
+
+#define SLIST_REMOVE_(list, cursor, ...) \
+ SLIST_REMOVE__((list), (cursor), LIST_NEXT_(__VA_ARGS__))
+
+#define SLIST_REMOVE__(list, cursor, next) \
+ (list->tail = (*cursor)->next ? list->tail : cursor, \
+ slist_remove_impl(*cursor, cursor, &(*cursor)->next, sizeof(*cursor)))
+
+// Helper for SLIST_REMOVE()
+static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_t size) {
+ // ret = *cursor;
+ // *cursor = ret->next;
+ memcpy(cursor, next, size);
+ // ret->next = NULL;
+ memset(next, 0, size);
+ return ret;
+}
+
+/**
+ * Pop the head off a singly-linked list.
+ *
+ * @param list
+ * The list to modify.
+ * @param node (optional)
+ * If specified, use head->node.next rather than head->next.
+ * @return
+ * The popped item, or NULL if the list was empty.
+ */
+#define SLIST_POP(...) \
+ SLIST_POP_(__VA_ARGS__, )
+
+#define SLIST_POP_(list, ...) \
+ SLIST_POP__((list), __VA_ARGS__)
+
+#define SLIST_POP__(list, ...) \
+ (list->head ? SLIST_REMOVE_(list, &list->head, __VA_ARGS__) : NULL)
+
+/**
+ * Loop over the items in a singly-linked list.
+ *
+ * @param type
+ * The list item type.
+ * @param item
+ * The induction variable name.
+ * @param list
+ * The list to iterate.
+ * @param node (optional)
+ * If specified, use head->node.next rather than head->next.
+ */
+#define for_slist(type, item, ...) \
+ for_slist_(type, item, __VA_ARGS__, )
+
+#define for_slist_(type, item, list, ...) \
+ for_slist__(type, item, (list), LIST_NEXT_(__VA_ARGS__))
+
+#define for_slist__(type, item, list, next) \
+ for (type *item = list->head, *_next; \
+ item && (SLIST_CHECK_(list), _next = item->next, true); \
+ item = _next)
+
+/**
+ * Initialize a doubly-linked list.
+ *
+ * @param list
+ * The list to initialize.
+ */
+#define LIST_INIT(list) \
+ LIST_INIT_((list))
+
+#define LIST_INIT_(list) \
+ LIST_VOID_(list->head = list->tail = NULL)
+
+/**
+ * LIST_PREV_() => prev
+ * LIST_PREV_(node, ) => node.prev
+ */
+#define LIST_PREV_(...) \
+ LIST_NODE_(prev, __VA_ARGS__)
+
+/**
+ * Initialize a doubly-linked list item.
+ *
+ * @param item
+ * The item to initialize.
+ * @param node (optional)
+ * If specified, use item->node.next rather than item->next.
+ */
+#define LIST_ITEM_INIT(...) \
+ LIST_ITEM_INIT_(__VA_ARGS__, )
+
+#define LIST_ITEM_INIT_(item, ...) \
+ LIST_ITEM_INIT__((item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__))
+
+#define LIST_ITEM_INIT__(item, prev, next) \
+ LIST_VOID_(item->prev = item->next = NULL)
+
+/**
+ * Type-checking macro for doubly-linked lists.
+ */
+#define LIST_CHECK_(list) \
+ (void)sizeof(list->tail - list->head)
+
+/**
+ * Check if a doubly-linked list is empty.
+ */
+#define LIST_EMPTY(list) \
+ LIST_EMPTY_((list))
+
+#define LIST_EMPTY_(list) \
+ (LIST_CHECK_(list), !list->head)
+
+/**
+ * Add an item to the tail of a doubly-linked list.
+ *
+ * @param list
+ * The list to modify.
+ * @param item
+ * The item to append.
+ * @param node (optional)
+ * If specified, use item->node.{prev,next} rather than item->{prev,next}.
+ */
+#define LIST_APPEND(list, ...) \
+ LIST_INSERT(list, (list)->tail, __VA_ARGS__)
+
+/**
+ * Add an item to the head of a doubly-linked list.
+ *
+ * @param list
+ * The list to modify.
+ * @param item
+ * The item to prepend.
+ * @param node (optional)
+ * If specified, use item->node.{prev,next} rather than item->{prev,next}.
+ */
+#define LIST_PREPEND(list, ...) \
+ LIST_INSERT(list, NULL, __VA_ARGS__)
+
+/**
+ * Check if an item is attached to a doubly-linked list.
+ *
+ * @param list
+ * The list to check.
+ * @param item
+ * The item to check.
+ * @param node (optional)
+ * If specified, use item->node.{prev,next} rather than item->{prev,next}.
+ * @return
+ * Whether the item is attached to the list.
+ */
+#define LIST_ATTACHED(list, ...) \
+ LIST_ATTACHED_(list, __VA_ARGS__, )
+
+#define LIST_ATTACHED_(list, item, ...) \
+ LIST_ATTACHED__((list), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__))
+
+#define LIST_ATTACHED__(list, item, prev, next) \
+ (item->prev || item->next || list->head == item || list->tail == item)
+
+/**
+ * Insert into a doubly-linked list after the given cursor.
+ *
+ * @param list
+ * The list to modify.
+ * @param cursor
+ * Insert after this element.
+ * @param item
+ * The item to insert.
+ * @param node (optional)
+ * If specified, use item->node.{prev,next} rather than item->{prev,next}.
+ */
+#define LIST_INSERT(list, cursor, ...) \
+ LIST_INSERT_(list, cursor, __VA_ARGS__, )
+
+#define LIST_INSERT_(list, cursor, item, ...) \
+ LIST_INSERT__((list), (cursor), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__))
+
+#define LIST_INSERT__(list, cursor, item, prev, next) LIST_VOID_( \
+ bfs_assert(!LIST_ATTACHED__(list, item, prev, next)), \
+ item->prev = cursor, \
+ item->next = cursor ? cursor->next : list->head, \
+ *(item->prev ? &item->prev->next : &list->head) = item, \
+ *(item->next ? &item->next->prev : &list->tail) = item)
+
+/**
+ * Remove an item from a doubly-linked list.
+ *
+ * @param list
+ * The list to modify.
+ * @param item
+ * The item to remove.
+ * @param node (optional)
+ * If specified, use item->node.{prev,next} rather than item->{prev,next}.
+ */
+#define LIST_REMOVE(list, ...) \
+ LIST_REMOVE_(list, __VA_ARGS__, )
+
+#define LIST_REMOVE_(list, item, ...) \
+ LIST_REMOVE__((list), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__))
+
+#define LIST_REMOVE__(list, item, prev, next) LIST_VOID_( \
+ *(item->prev ? &item->prev->next : &list->head) = item->next, \
+ *(item->next ? &item->next->prev : &list->tail) = item->prev, \
+ item->prev = item->next = NULL)
+
+/**
+ * Loop over the items in a doubly-linked list.
+ *
+ * @param type
+ * The list item type.
+ * @param item
+ * The induction variable name.
+ * @param list
+ * The list to iterate.
+ * @param node (optional)
+ * If specified, use head->node.next rather than head->next.
+ */
+#define for_list(type, item, ...) \
+ for_list_(type, item, __VA_ARGS__, )
+
+#define for_list_(type, item, list, ...) \
+ for_list__(type, item, (list), LIST_NEXT_(__VA_ARGS__))
+
+#define for_list__(type, item, list, next) \
+ for (type *item = list->head, *_next; \
+ item && (LIST_CHECK_(list), _next = item->next, true); \
+ item = _next)
+
+#endif // BFS_LIST_H
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..5dd88e4
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,150 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * - main(): the entry point for bfs(1), a breadth-first version of find(1)
+ * - main.c (this file)
+ *
+ * - bfs_parse_cmdline(): parses the command line into an expression tree
+ * - ctx.[ch] (struct bfs_ctx, the overall bfs context)
+ * - expr.h (declares the expression tree nodes)
+ * - parse.[ch] (the parser itself)
+ * - opt.[ch] (the optimizer)
+ *
+ * - bfs_eval(): runs the expression on every file it sees
+ * - eval.[ch] (the main evaluation functions)
+ * - exec.[ch] (implements -exec[dir]/-ok[dir])
+ * - printf.[ch] (implements -[f]printf)
+ *
+ * - bftw(): used by bfs_eval() to walk the directory tree(s)
+ * - bftw.[ch] (an extended version of nftw(3))
+ *
+ * - Utilities:
+ * - alloc.[ch] (memory allocation)
+ * - atomic.h (atomic operations)
+ * - bar.[ch] (a terminal status bar)
+ * - bit.h (bit manipulation)
+ * - bfstd.[ch] (standard library wrappers/polyfills)
+ * - color.[ch] (for pretty terminal colors)
+ * - prelude.h (configuration and feature/platform detection)
+ * - diag.[ch] (formats diagnostic messages)
+ * - dir.[ch] (a directory API facade)
+ * - dstring.[ch] (a dynamic string library)
+ * - fsade.[ch] (a facade over non-standard filesystem features)
+ * - ioq.[ch] (an async I/O queue)
+ * - list.h (linked list macros)
+ * - mtab.[ch] (parses the system's mount table)
+ * - pwcache.[ch] (a cache for the user/group tables)
+ * - sanity.h (sanitizer interfaces)
+ * - sighook.[ch] (signal hooks)
+ * - stat.[ch] (wraps stat(), or statx() on Linux)
+ * - thread.h (multi-threading)
+ * - trie.[ch] (a trie set/map implementation)
+ * - typo.[ch] (fuzzy matching for typos)
+ * - xregex.[ch] (regular expression support)
+ * - xspawn.[ch] (spawns processes)
+ * - xtime.[ch] (date/time handling utilities)
+ */
+
+#include "prelude.h"
+#include "bfstd.h"
+#include "ctx.h"
+#include "diag.h"
+#include "eval.h"
+#include "parse.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+
+/**
+ * Check if a file descriptor is open.
+ */
+static bool isopen(int fd) {
+ return fcntl(fd, F_GETFD) >= 0 || errno != EBADF;
+}
+
+/**
+ * Open a file and redirect it to a particular descriptor.
+ */
+static int redirect(int fd, const char *path, int flags) {
+ int newfd = open(path, flags);
+ if (newfd < 0 || newfd == fd) {
+ return newfd;
+ }
+
+ int ret = dup2(newfd, fd);
+ close_quietly(newfd);
+ return ret;
+}
+
+/**
+ * Make sure the standard streams std{in,out,err} are open. If they are not,
+ * future open() calls may use those file descriptors, and std{in,out,err} will
+ * use them unintentionally.
+ */
+static int open_std_streams(void) {
+#ifdef O_PATH
+ const int inflags = O_PATH, outflags = O_PATH;
+#else
+ // These are intentionally backwards so that bfs >&- still fails with EBADF
+ const int inflags = O_WRONLY, outflags = O_RDONLY;
+#endif
+
+ if (!isopen(STDERR_FILENO) && redirect(STDERR_FILENO, "/dev/null", outflags) < 0) {
+ return -1;
+ }
+ if (!isopen(STDOUT_FILENO) && redirect(STDOUT_FILENO, "/dev/null", outflags) < 0) {
+ perror("redirect()");
+ return -1;
+ }
+ if (!isopen(STDIN_FILENO) && redirect(STDIN_FILENO, "/dev/null", inflags) < 0) {
+ perror("redirect()");
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * bfs entry point.
+ */
+int main(int argc, char *argv[]) {
+ // Make sure the standard streams are open
+ if (open_std_streams() != 0) {
+ return EXIT_FAILURE;
+ }
+
+ // Use the system locale instead of "C"
+ int locale_err = 0;
+ if (!setlocale(LC_ALL, "")) {
+ locale_err = errno;
+ }
+
+ // Apply the environment's timezone
+ tzset();
+
+ // Parse the command line
+ struct bfs_ctx *ctx = bfs_parse_cmdline(argc, argv);
+ if (!ctx) {
+ return EXIT_FAILURE;
+ }
+
+ // Warn if setlocale() failed, unless there's no expression to evaluate
+ if (locale_err && ctx->warn && ctx->expr) {
+ bfs_warning(ctx, "Failed to set locale: %s\n\n", xstrerror(locale_err));
+ }
+
+ // Walk the file system tree, evaluating the expression on each file
+ int ret = bfs_eval(ctx);
+
+ // Free the parsed command line, and detect any last-minute errors
+ if (bfs_ctx_free(ctx) != 0 && ret == EXIT_SUCCESS) {
+ ret = EXIT_FAILURE;
+ }
+
+ return ret;
+}
diff --git a/src/mtab.c b/src/mtab.c
new file mode 100644
index 0000000..0377fea
--- /dev/null
+++ b/src/mtab.c
@@ -0,0 +1,303 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "mtab.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "stat.h"
+#include "trie.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#if !defined(BFS_USE_MNTENT) && BFS_HAS_GETMNTENT_1
+# define BFS_USE_MNTENT true
+#elif !defined(BFS_USE_MNTINFO) && BFS_HAS_GETMNTINFO
+# define BFS_USE_MNTINFO true
+#elif !defined(BFS_USE_MNTTAB) && BFS_HAS_GETMNTENT_2
+# define BFS_USE_MNTTAB true
+#endif
+
+#if BFS_USE_MNTENT
+# include <mntent.h>
+# include <paths.h>
+# include <stdio.h>
+#elif BFS_USE_MNTINFO
+# include <sys/mount.h>
+#elif BFS_USE_MNTTAB
+# include <stdio.h>
+# include <sys/mnttab.h>
+#endif
+
+/**
+ * A mount point in the table.
+ */
+struct bfs_mount {
+ /** The path to the mount point. */
+ char *path;
+ /** The filesystem type. */
+ char *type;
+ /** Buffer for the strings. */
+ char buf[];
+};
+
+struct bfs_mtab {
+ /** Mount point arena. */
+ struct varena varena;
+
+ /** The array of mount points. */
+ struct bfs_mount **mounts;
+ /** The number of mount points. */
+ size_t nmounts;
+
+ /** The basenames of every mount point. */
+ struct trie names;
+
+ /** A map from device ID to fstype (populated lazily). */
+ struct trie types;
+ /** Whether the types map has been populated. */
+ bool types_filled;
+};
+
+/**
+ * Add an entry to the mount table.
+ */
+attr(maybe_unused)
+static int bfs_mtab_add(struct bfs_mtab *mtab, const char *path, const char *type) {
+ size_t path_size = strlen(path) + 1;
+ size_t type_size = strlen(type) + 1;
+ size_t size = path_size + type_size;
+ struct bfs_mount *mount = varena_alloc(&mtab->varena, size);
+ if (!mount) {
+ return -1;
+ }
+
+ struct bfs_mount **ptr = RESERVE(struct bfs_mount *, &mtab->mounts, &mtab->nmounts);
+ if (!ptr) {
+ goto free;
+ }
+ *ptr = mount;
+
+ mount->path = mount->buf;
+ memcpy(mount->path, path, path_size);
+
+ mount->type = mount->buf + path_size;
+ memcpy(mount->type, type, type_size);
+
+ const char *name = path + xbaseoff(path);
+ if (!trie_insert_str(&mtab->names, name)) {
+ goto shrink;
+ }
+
+ return 0;
+
+shrink:
+ --mtab->nmounts;
+free:
+ varena_free(&mtab->varena, mount, size);
+ return -1;
+}
+
+struct bfs_mtab *bfs_mtab_parse(void) {
+ struct bfs_mtab *mtab = ZALLOC(struct bfs_mtab);
+ if (!mtab) {
+ return NULL;
+ }
+
+ VARENA_INIT(&mtab->varena, struct bfs_mount, buf);
+
+ trie_init(&mtab->names);
+ trie_init(&mtab->types);
+
+ int error = 0;
+
+#if BFS_USE_MNTENT
+
+ FILE *file = setmntent(_PATH_MOUNTED, "r");
+ if (!file) {
+ // In case we're in a chroot or something with /proc but no /etc/mtab
+ error = errno;
+ file = setmntent("/proc/mounts", "r");
+ }
+ if (!file) {
+ goto fail;
+ }
+
+ struct mntent *mnt;
+ while ((mnt = getmntent(file))) {
+ if (bfs_mtab_add(mtab, mnt->mnt_dir, mnt->mnt_type) != 0) {
+ error = errno;
+ endmntent(file);
+ goto fail;
+ }
+ }
+
+ endmntent(file);
+
+#elif BFS_USE_MNTINFO
+
+#if __NetBSD__
+ typedef struct statvfs bfs_statfs;
+#else
+ typedef struct statfs bfs_statfs;
+#endif
+
+ bfs_statfs *mntbuf;
+ int size = getmntinfo(&mntbuf, MNT_WAIT);
+ if (size <= 0) {
+ error = errno;
+ goto fail;
+ }
+
+ for (bfs_statfs *mnt = mntbuf; mnt < mntbuf + size; ++mnt) {
+ if (bfs_mtab_add(mtab, mnt->f_mntonname, mnt->f_fstypename) != 0) {
+ error = errno;
+ goto fail;
+ }
+ }
+
+#elif BFS_USE_MNTTAB
+
+ FILE *file = xfopen(MNTTAB, O_RDONLY | O_CLOEXEC);
+ if (!file) {
+ error = errno;
+ goto fail;
+ }
+
+ struct mnttab mnt;
+ while (getmntent(file, &mnt) == 0) {
+ if (bfs_mtab_add(mtab, mnt.mnt_mountp, mnt.mnt_fstype) != 0) {
+ error = errno;
+ fclose(file);
+ goto fail;
+ }
+ }
+
+ fclose(file);
+
+#else
+
+ error = ENOTSUP;
+ goto fail;
+
+#endif
+
+ return mtab;
+
+fail:
+ bfs_mtab_free(mtab);
+ errno = error;
+ return NULL;
+}
+
+static int bfs_mtab_fill_types(struct bfs_mtab *mtab) {
+ const enum bfs_stat_flags flags = BFS_STAT_NOFOLLOW | BFS_STAT_NOSYNC;
+ int ret = -1;
+
+ // It's possible that /path/to/mount was unmounted between bfs_mtab_parse() and bfs_mtab_fill_types().
+ // In that case, the dev_t of /path/to/mount will be the same as /path/to, which should not get its
+ // fstype from the old mount record of /path/to/mount.
+ //
+ // Detect this by comparing the st_dev of the parent (/path/to) and child (/path/to/mount). Only when
+ // they differ can the filesystem type actually change between them. As a minor optimization, we keep
+ // the parent directory open in case multiple mounts have the same parent (e.g. /mnt).
+ char *parent_dir = NULL;
+ int parent_fd = -1;
+ int parent_ret = -1;
+ struct bfs_stat parent_stat;
+
+ for (size_t i = 0; i < mtab->nmounts; ++i) {
+ struct bfs_mount *mount = mtab->mounts[i];
+ const char *path = mount->path;
+ int fd = AT_FDCWD;
+
+ char *dir = xdirname(path);
+ if (!dir) {
+ goto fail;
+ }
+
+ if (parent_dir && strcmp(parent_dir, dir) == 0) {
+ // Same parent
+ free(dir);
+ } else {
+ free(parent_dir);
+ parent_dir = dir;
+
+ if (parent_fd >= 0) {
+ xclose(parent_fd);
+ }
+ parent_fd = open(parent_dir, O_SEARCH | O_CLOEXEC | O_DIRECTORY);
+
+ parent_ret = -1;
+ if (parent_fd >= 0) {
+ parent_ret = bfs_stat(parent_fd, NULL, flags, &parent_stat);
+ }
+ }
+
+ if (parent_fd >= 0) {
+ fd = parent_fd;
+ path += xbaseoff(path);
+ }
+
+ struct bfs_stat sb;
+ if (bfs_stat(fd, path, flags, &sb) != 0) {
+ continue;
+ }
+
+ if (parent_ret == 0 && parent_stat.dev == sb.dev && parent_stat.ino != sb.ino) {
+ // Not a mount point any more (or a bind mount, but with the same fstype)
+ continue;
+ }
+
+ struct trie_leaf *leaf = trie_insert_mem(&mtab->types, &sb.dev, sizeof(sb.dev));
+ if (leaf) {
+ leaf->value = mount->type;
+ } else {
+ goto fail;
+ }
+ }
+
+ mtab->types_filled = true;
+ ret = 0;
+
+fail:
+ if (parent_fd >= 0) {
+ xclose(parent_fd);
+ }
+ free(parent_dir);
+ return ret;
+}
+
+const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statbuf) {
+ if (!mtab->types_filled) {
+ if (bfs_mtab_fill_types((struct bfs_mtab *)mtab) != 0) {
+ return NULL;
+ }
+ }
+
+ const struct trie_leaf *leaf = trie_find_mem(&mtab->types, &statbuf->dev, sizeof(statbuf->dev));
+ if (leaf) {
+ return leaf->value;
+ } else {
+ return "unknown";
+ }
+}
+
+bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *name) {
+ return trie_find_str(&mtab->names, name);
+}
+
+void bfs_mtab_free(struct bfs_mtab *mtab) {
+ if (mtab) {
+ trie_destroy(&mtab->types);
+ trie_destroy(&mtab->names);
+
+ free(mtab->mounts);
+ varena_destroy(&mtab->varena);
+
+ free(mtab);
+ }
+}
diff --git a/src/mtab.h b/src/mtab.h
new file mode 100644
index 0000000..67290c2
--- /dev/null
+++ b/src/mtab.h
@@ -0,0 +1,58 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * A facade over platform-specific APIs for enumerating mounted filesystems.
+ */
+
+#ifndef BFS_MTAB_H
+#define BFS_MTAB_H
+
+#include "prelude.h"
+
+struct bfs_stat;
+
+/**
+ * A file system mount table.
+ */
+struct bfs_mtab;
+
+/**
+ * Parse the mount table.
+ *
+ * @return
+ * The parsed mount table, or NULL on error.
+ */
+struct bfs_mtab *bfs_mtab_parse(void);
+
+/**
+ * Determine the file system type that a file is on.
+ *
+ * @param mtab
+ * The current mount table.
+ * @param statbuf
+ * The bfs_stat() buffer for the file in question.
+ * @return
+ * The type of file system containing this file, "unknown" if not known,
+ * or NULL on error.
+ */
+const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statbuf);
+
+/**
+ * Check if a file could be a mount point.
+ *
+ * @param mtab
+ * The current mount table.
+ * @param name
+ * The name of the file to check.
+ * @return
+ * Whether the named file could be a mount point.
+ */
+bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *name);
+
+/**
+ * Free a mount table.
+ */
+void bfs_mtab_free(struct bfs_mtab *mtab);
+
+#endif // BFS_MTAB_H
diff --git a/src/opt.c b/src/opt.c
new file mode 100644
index 0000000..883d598
--- /dev/null
+++ b/src/opt.c
@@ -0,0 +1,2299 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * The expression optimizer. Different optimization levels are supported:
+ *
+ * -O1: basic logical simplifications, like folding (-true -and -foo) to -foo.
+ *
+ * -O2: dead code elimination and data flow analysis. struct df_domain is used
+ * to record data flow facts that are true at various points of evaluation.
+ * Specifically, struct df_domain records the state before an expression is
+ * evaluated (opt->before), and after an expression returns true
+ * (opt->after_true) or false (opt->after_false). Additionally, opt->impure
+ * records the possible state before any expression with side effects is
+ * evaluated.
+ *
+ * -O3: expression re-ordering to reduce expected cost. In an expression like
+ * (-foo -and -bar), if both -foo and -bar are pure (no side effects), they can
+ * be re-ordered to (-bar -and -foo). This is profitable if the expected cost
+ * is lower for the re-ordered expression, for example if -foo is very slow or
+ * -bar is likely to return false.
+ *
+ * -O4/-Ofast: aggressive optimizations that may affect correctness in corner
+ * cases. The main effect is to use opt->impure to determine if any side-
+ * effects are reachable at all, skipping the traversal if not.
+ */
+
+#include "prelude.h"
+#include "opt.h"
+#include "bftw.h"
+#include "bit.h"
+#include "color.h"
+#include "ctx.h"
+#include "diag.h"
+#include "dir.h"
+#include "eval.h"
+#include "exec.h"
+#include "expr.h"
+#include "list.h"
+#include "pwcache.h"
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static char *fake_and_arg = "-and";
+static char *fake_or_arg = "-or";
+static char *fake_not_arg = "-not";
+
+/**
+ * The data flow domain for predicates.
+ */
+enum df_pred {
+ /** The bottom state (unreachable). */
+ PRED_BOTTOM = 0,
+ /** The predicate is known to be false. */
+ PRED_FALSE = 1 << false,
+ /** The predicate is known to be true. */
+ PRED_TRUE = 1 << true,
+ /** The top state (unknown). */
+ PRED_TOP = PRED_FALSE | PRED_TRUE,
+};
+
+/** Make a predicate known. */
+static void constrain_pred(enum df_pred *pred, bool value) {
+ *pred &= 1 << value;
+}
+
+/** Compute the join (union) of two predicates. */
+static void pred_join(enum df_pred *dest, enum df_pred src) {
+ *dest |= src;
+}
+
+/**
+ * Types of predicates we track.
+ */
+enum pred_type {
+ /** -readable */
+ READABLE_PRED,
+ /** -writable */
+ WRITABLE_PRED,
+ /** -executable */
+ EXECUTABLE_PRED,
+ /** -acl */
+ ACL_PRED,
+ /** -capable */
+ CAPABLE_PRED,
+ /** -empty */
+ EMPTY_PRED,
+ /** -hidden */
+ HIDDEN_PRED,
+ /** -nogroup */
+ NOGROUP_PRED,
+ /** -nouser */
+ NOUSER_PRED,
+ /** -sparse */
+ SPARSE_PRED,
+ /** -xattr */
+ XATTR_PRED,
+ /** The number of pred_types. */
+ PRED_TYPES,
+};
+
+/** Get the name of a predicate type. */
+static const char *pred_type_name(enum pred_type type) {
+ switch (type) {
+ case READABLE_PRED:
+ return "-readable";
+ case WRITABLE_PRED:
+ return "-writable";
+ case EXECUTABLE_PRED:
+ return "-executable";
+ case ACL_PRED:
+ return "-acl";
+ case CAPABLE_PRED:
+ return "-capable";
+ case EMPTY_PRED:
+ return "-empty";
+ case HIDDEN_PRED:
+ return "-hidden";
+ case NOGROUP_PRED:
+ return "-nogroup";
+ case NOUSER_PRED:
+ return "-nouser";
+ case SPARSE_PRED:
+ return "-sparse";
+ case XATTR_PRED:
+ return "-xattr";
+
+ case PRED_TYPES:
+ break;
+ }
+
+ bfs_bug("Unknown predicate %d", (int)type);
+ return "???";
+}
+
+/**
+ * A contrained integer range.
+ */
+struct df_range {
+ /** The (inclusive) minimum value. */
+ long long min;
+ /** The (inclusive) maximum value. */
+ long long max;
+};
+
+/** Initialize an empty range. */
+static void range_init_bottom(struct df_range *range) {
+ range->min = LLONG_MAX;
+ range->max = LLONG_MIN;
+}
+
+/** Check if a range is empty. */
+static bool range_is_bottom(const struct df_range *range) {
+ return range->min > range->max;
+}
+
+/** Initialize a full range. */
+static void range_init_top(struct df_range *range) {
+ // All ranges we currently track are non-negative
+ range->min = 0;
+ range->max = LLONG_MAX;
+}
+
+/** Check for an infinite range. */
+static bool range_is_top(const struct df_range *range) {
+ return range->min == 0 && range->max == LLONG_MAX;
+}
+
+/** Compute the minimum of two values. */
+static long long min_value(long long a, long long b) {
+ if (a < b) {
+ return a;
+ } else {
+ return b;
+ }
+}
+
+/** Compute the maximum of two values. */
+static long long max_value(long long a, long long b) {
+ if (a > b) {
+ return a;
+ } else {
+ return b;
+ }
+}
+
+/** Constrain the minimum of a range. */
+static void constrain_min(struct df_range *range, long long value) {
+ range->min = max_value(range->min, value);
+}
+
+/** Contrain the maximum of a range. */
+static void constrain_max(struct df_range *range, long long value) {
+ range->max = min_value(range->max, value);
+}
+
+/** Remove a single value from a range. */
+static void range_remove(struct df_range *range, long long value) {
+ if (range->min == value) {
+ if (range->min == LLONG_MAX) {
+ range->max = LLONG_MIN;
+ } else {
+ ++range->min;
+ }
+ }
+
+ if (range->max == value) {
+ if (range->max == LLONG_MIN) {
+ range->min = LLONG_MAX;
+ } else {
+ --range->max;
+ }
+ }
+}
+
+/** Compute the union of two ranges. */
+static void range_join(struct df_range *dest, const struct df_range *src) {
+ dest->min = min_value(dest->min, src->min);
+ dest->max = max_value(dest->max, src->max);
+}
+
+/**
+ * Types of ranges we track.
+ */
+enum range_type {
+ /** Search tree depth. */
+ DEPTH_RANGE,
+ /** Group ID. */
+ GID_RANGE,
+ /** Inode number. */
+ INUM_RANGE,
+ /** Hard link count. */
+ LINKS_RANGE,
+ /** File size. */
+ SIZE_RANGE,
+ /** User ID. */
+ UID_RANGE,
+ /** The number of range_types. */
+ RANGE_TYPES,
+};
+
+/** Get the name of a range type. */
+static const char *range_type_name(enum range_type type) {
+ switch (type) {
+ case DEPTH_RANGE:
+ return "-depth";
+ case GID_RANGE:
+ return "-gid";
+ case INUM_RANGE:
+ return "-inum";
+ case LINKS_RANGE:
+ return "-links";
+ case SIZE_RANGE:
+ return "-size";
+ case UID_RANGE:
+ return "-uid";
+
+ case RANGE_TYPES:
+ break;
+ }
+
+ bfs_bug("Unknown range %d", (int)type);
+ return "???";
+}
+
+/**
+ * The data flow analysis domain.
+ */
+struct df_domain {
+ /** The predicates we track. */
+ enum df_pred preds[PRED_TYPES];
+
+ /** The value ranges we track. */
+ struct df_range ranges[RANGE_TYPES];
+
+ /** Bitmask of possible -types. */
+ unsigned int types;
+ /** Bitmask of possible -xtypes. */
+ unsigned int xtypes;
+};
+
+/** Set a data flow value to bottom. */
+static void df_init_bottom(struct df_domain *value) {
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ value->preds[i] = PRED_BOTTOM;
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ range_init_bottom(&value->ranges[i]);
+ }
+
+ value->types = 0;
+ value->xtypes = 0;
+}
+
+/** Determine whether a fact set is impossible. */
+static bool df_is_bottom(const struct df_domain *value) {
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ if (range_is_bottom(&value->ranges[i])) {
+ return true;
+ }
+ }
+
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ if (value->preds[i] == PRED_BOTTOM) {
+ return true;
+ }
+ }
+
+ if (!value->types || !value->xtypes) {
+ return true;
+ }
+
+ return false;
+}
+
+/** Initialize some data flow value. */
+static void df_init_top(struct df_domain *value) {
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ value->preds[i] = PRED_TOP;
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ range_init_top(&value->ranges[i]);
+ }
+
+ value->types = ~0;
+ value->xtypes = ~0;
+}
+
+/** Check for the top element. */
+static bool df_is_top(const struct df_domain *value) {
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ if (value->preds[i] != PRED_TOP) {
+ return false;
+ }
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ if (!range_is_top(&value->ranges[i])) {
+ return false;
+ }
+ }
+
+ if (value->types != ~0U) {
+ return false;
+ }
+
+ if (value->xtypes != ~0U) {
+ return false;
+ }
+
+ return true;
+}
+
+/** Compute the union of two fact sets. */
+static void df_join(struct df_domain *dest, const struct df_domain *src) {
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ pred_join(&dest->preds[i], src->preds[i]);
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ range_join(&dest->ranges[i], &src->ranges[i]);
+ }
+
+ dest->types |= src->types;
+ dest->xtypes |= src->xtypes;
+}
+
+/**
+ * Optimizer state.
+ */
+struct bfs_opt {
+ /** The context we're optimizing. */
+ struct bfs_ctx *ctx;
+ /** Optimization level (ctx->optlevel). */
+ int level;
+ /** Recursion depth. */
+ int depth;
+
+ /** Whether to produce warnings. */
+ bool warn;
+ /** Whether the result of this expression is ignored. */
+ bool ignore_result;
+
+ /** Data flow state before this expression is evaluated. */
+ struct df_domain before;
+ /** Data flow state after this expression returns true. */
+ struct df_domain after_true;
+ /** Data flow state after this expression returns false. */
+ struct df_domain after_false;
+ /** Data flow state before any side-effecting expressions are evaluated. */
+ struct df_domain *impure;
+};
+
+/** Log an optimization. */
+attr(printf(2, 3))
+static bool opt_debug(struct bfs_opt *opt, const char *format, ...) {
+ if (bfs_debug_prefix(opt->ctx, DEBUG_OPT)) {
+ for (int i = 0; i < opt->depth; ++i) {
+ cfprintf(opt->ctx->cerr, "│ ");
+ }
+
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/** Log a recursive call. */
+attr(printf(2, 3))
+static bool opt_enter(struct bfs_opt *opt, const char *format, ...) {
+ int depth = opt->depth;
+ if (depth > 0) {
+ --opt->depth;
+ }
+
+ bool debug = opt_debug(opt, "%s", depth > 0 ? "├─╮ " : "");
+ if (debug) {
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ }
+
+ opt->depth = depth + 1;
+ return debug;
+}
+
+/** Log a recursive return. */
+attr(printf(2, 3))
+static bool opt_leave(struct bfs_opt *opt, const char *format, ...) {
+ bool debug = false;
+ int depth = opt->depth;
+
+ if (format) {
+ if (depth > 1) {
+ opt->depth -= 2;
+ }
+
+ debug = opt_debug(opt, "%s", depth > 1 ? "├─╯ " : "");
+ if (debug) {
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ }
+ }
+
+ opt->depth = depth - 1;
+ return debug;
+}
+
+/** Log a shallow visit. */
+attr(printf(2, 3))
+static bool opt_visit(struct bfs_opt *opt, const char *format, ...) {
+ int depth = opt->depth;
+ if (depth > 0) {
+ --opt->depth;
+ }
+
+ bool debug = opt_debug(opt, "%s", depth > 0 ? "├─◯ " : "");
+ if (debug) {
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ }
+
+ opt->depth = depth;
+ return debug;
+}
+
+/** Log the deletion of an expression. */
+attr(printf(2, 3))
+static bool opt_delete(struct bfs_opt *opt, const char *format, ...) {
+ int depth = opt->depth;
+
+ if (depth > 0) {
+ --opt->depth;
+ }
+
+ bool debug = opt_debug(opt, "%s", depth > 0 ? "├─✘ " : "");
+ if (debug) {
+ va_list args;
+ va_start(args, format);
+ cvfprintf(opt->ctx->cerr, format, args);
+ va_end(args);
+ }
+
+ opt->depth = depth;
+ return debug;
+}
+
+typedef bool dump_fn(struct bfs_opt *opt, const char *format, ...);
+
+/** Print a df_pred. */
+static void pred_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum pred_type type) {
+ dump(opt, "${blu}%s${rs}: ", pred_type_name(type));
+
+ FILE *file = opt->ctx->cerr->file;
+ switch (value->preds[type]) {
+ case PRED_BOTTOM:
+ fprintf(file, "⊥\n");
+ break;
+ case PRED_TOP:
+ fprintf(file, "⊤\n");
+ break;
+ case PRED_TRUE:
+ fprintf(file, "true\n");
+ break;
+ case PRED_FALSE:
+ fprintf(file, "false\n");
+ break;
+ }
+}
+
+/** Print a df_range. */
+static void range_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum range_type type) {
+ dump(opt, "${blu}%s${rs}: ", range_type_name(type));
+
+ FILE *file = opt->ctx->cerr->file;
+ const struct df_range *range = &value->ranges[type];
+ if (range_is_bottom(range)) {
+ fprintf(file, "⊥\n");
+ } else if (range_is_top(range)) {
+ fprintf(file, "⊤\n");
+ } else if (range->min == range->max) {
+ fprintf(file, "%lld\n", range->min);
+ } else {
+ if (range->min == LLONG_MIN) {
+ fprintf(file, "(-∞, ");
+ } else {
+ fprintf(file, "[%lld, ", range->min);
+ }
+ if (range->max == LLONG_MAX) {
+ fprintf(file, "∞)\n");
+ } else {
+ fprintf(file, "%lld]\n", range->max);
+ }
+ }
+}
+
+/** Print a set of types. */
+static void types_dump(dump_fn *dump, struct bfs_opt *opt, const char *name, unsigned int types) {
+ dump(opt, "${blu}%s${rs}: ", name);
+
+ FILE *file = opt->ctx->cerr->file;
+ if (types == 0) {
+ fprintf(file, " ⊥\n");
+ } else if (types == ~0U) {
+ fprintf(file, " ⊤\n");
+ } else if (count_ones(types) < count_ones(~types)) {
+ fprintf(file, " 0x%X\n", types);
+ } else {
+ fprintf(file, "~0x%X\n", ~types);
+ }
+}
+
+/** Calculate the number of lines of df_dump() output. */
+static int df_dump_lines(const struct df_domain *value) {
+ int lines = 0;
+
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ lines += value->preds[i] != PRED_TOP;
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ lines += !range_is_top(&value->ranges[i]);
+ }
+
+ lines += value->types != ~0U;
+ lines += value->xtypes != ~0U;
+
+ return lines;
+}
+
+/** Get the right debugging function for a df_dump() line. */
+static dump_fn *df_dump_line(int lines, int *line) {
+ ++*line;
+
+ if (lines == 1) {
+ return opt_visit;
+ } else if (*line == 1) {
+ return opt_enter;
+ } else if (*line == lines) {
+ return opt_leave;
+ } else {
+ return opt_debug;
+ }
+}
+
+/** Print a data flow value. */
+static void df_dump(struct bfs_opt *opt, const char *str, const struct df_domain *value) {
+ if (df_is_bottom(value)) {
+ opt_debug(opt, "%s: ⊥\n", str);
+ return;
+ } else if (df_is_top(value)) {
+ opt_debug(opt, "%s: ⊤\n", str);
+ return;
+ }
+
+ if (!opt_debug(opt, "%s:\n", str)) {
+ return;
+ }
+
+ int lines = df_dump_lines(value);
+ int line = 0;
+
+ for (int i = 0; i < PRED_TYPES; ++i) {
+ if (value->preds[i] != PRED_TOP) {
+ pred_dump(df_dump_line(lines, &line), opt, value, i);
+ }
+ }
+
+ for (int i = 0; i < RANGE_TYPES; ++i) {
+ if (!range_is_top(&value->ranges[i])) {
+ range_dump(df_dump_line(lines, &line), opt, value, i);
+ }
+ }
+
+ if (value->types != ~0U) {
+ types_dump(df_dump_line(lines, &line), opt, "-type", value->types);
+ }
+
+ if (value->xtypes != ~0U) {
+ types_dump(df_dump_line(lines, &line), opt, "-xtype", value->xtypes);
+ }
+}
+
+/** Check if an expression is constant. */
+static bool is_const(const struct bfs_expr *expr) {
+ return expr->eval_fn == eval_true || expr->eval_fn == eval_false;
+}
+
+/** Warn about an expression. */
+attr(printf(3, 4))
+static void opt_warning(const struct bfs_opt *opt, const struct bfs_expr *expr, const char *format, ...) {
+ if (!opt->warn) {
+ return;
+ }
+
+ if (bfs_expr_is_parent(expr) || is_const(expr)) {
+ return;
+ }
+
+ if (bfs_expr_warning(opt->ctx, expr)) {
+ va_list args;
+ va_start(args, format);
+ bfs_vwarning(opt->ctx, format, args);
+ va_end(args);
+ }
+}
+
+/** Remove and return an expression's children. */
+static void foster_children(struct bfs_expr *expr, struct bfs_exprs *children) {
+ bfs_assert(bfs_expr_is_parent(expr));
+
+ SLIST_INIT(children);
+ SLIST_EXTEND(children, &expr->children);
+
+ expr->persistent_fds = 0;
+ expr->ephemeral_fds = 0;
+ expr->pure = true;
+}
+
+/** Return an expression's only child. */
+static struct bfs_expr *only_child(struct bfs_expr *expr) {
+ bfs_assert(bfs_expr_is_parent(expr));
+ struct bfs_expr *child = bfs_expr_children(expr);
+ bfs_assert(child && !child->next);
+ return child;
+}
+
+/** Foster an expression's only child. */
+static struct bfs_expr *foster_only_child(struct bfs_expr *expr) {
+ struct bfs_expr *child = only_child(expr);
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+ return child;
+}
+
+/** An expression visitor. */
+struct visitor;
+
+/** An expression-visiting function. */
+typedef struct bfs_expr *visit_fn(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor);
+
+/** An entry in a visitor lookup table. */
+struct visitor_table {
+ /** The evaluation function to match on. */
+ bfs_eval_fn *eval_fn;
+ /** The visitor function. */
+ visit_fn *visit;
+};
+
+/** Look up a visitor in a table. */
+static visit_fn *look_up_visitor(const struct bfs_expr *expr, const struct visitor_table table[]) {
+ for (size_t i = 0; table[i].eval_fn; ++i) {
+ if (expr->eval_fn == table[i].eval_fn) {
+ return table[i].visit;
+ }
+ }
+
+ return NULL;
+}
+
+struct visitor {
+ /** The name of this visitor. */
+ const char *name;
+
+ /** A function to call before visiting children. */
+ visit_fn *enter;
+ /** The default visitor. */
+ visit_fn *visit;
+ /** A function to call after visiting children. */
+ visit_fn *leave;
+
+ /** A visitor lookup table. */
+ const struct visitor_table *table;
+};
+
+/** Recursive visitor implementation. */
+static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor);
+
+/** Visit a negation. */
+static struct bfs_expr *visit_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *rhs = foster_only_child(expr);
+
+ struct bfs_opt nested = *opt;
+ rhs = visit_deep(&nested, rhs, visitor);
+ if (!rhs) {
+ return NULL;
+ }
+
+ opt->after_true = nested.after_false;
+ opt->after_false = nested.after_true;
+
+ bfs_expr_append(expr, rhs);
+ return expr;
+}
+
+/** Visit a conjunction. */
+static struct bfs_expr *visit_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ // Base case (-and) == (-true)
+ df_init_bottom(&opt->after_false);
+ struct bfs_opt nested = *opt;
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (SLIST_EMPTY(&children)) {
+ nested.ignore_result = opt->ignore_result;
+ } else {
+ nested.ignore_result = false;
+ }
+
+ child = visit_deep(&nested, child, visitor);
+ if (!child) {
+ return NULL;
+ }
+
+ df_join(&opt->after_false, &nested.after_false);
+ nested.before = nested.after_true;
+
+ bfs_expr_append(expr, child);
+ }
+
+ opt->after_true = nested.after_true;
+
+ return expr;
+}
+
+/** Visit a disjunction. */
+static struct bfs_expr *visit_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ // Base case (-or) == (-false)
+ df_init_bottom(&opt->after_true);
+ struct bfs_opt nested = *opt;
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (SLIST_EMPTY(&children)) {
+ nested.ignore_result = opt->ignore_result;
+ } else {
+ nested.ignore_result = false;
+ }
+
+ child = visit_deep(&nested, child, visitor);
+ if (!child) {
+ return NULL;
+ }
+
+ df_join(&opt->after_true, &nested.after_true);
+ nested.before = nested.after_false;
+
+ bfs_expr_append(expr, child);
+ }
+
+ opt->after_false = nested.after_false;
+
+ return expr;
+}
+
+/** Visit a comma expression. */
+static struct bfs_expr *visit_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_opt nested = *opt;
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (SLIST_EMPTY(&children)) {
+ nested.ignore_result = opt->ignore_result;
+ } else {
+ nested.ignore_result = true;
+ }
+
+ child = visit_deep(&nested, child, visitor);
+ if (!child) {
+ return NULL;
+ }
+
+ nested.before = nested.after_true;
+ df_join(&nested.before, &nested.after_false);
+
+ bfs_expr_append(expr, child);
+ }
+
+ opt->after_true = nested.after_true;
+ opt->after_false = nested.after_false;
+
+ return expr;
+}
+
+/** Default enter() function. */
+static struct bfs_expr *visit_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt_enter(opt, "%pe\n", expr);
+ opt->after_true = opt->before;
+ opt->after_false = opt->before;
+ return expr;
+}
+
+/** Default leave() function. */
+static struct bfs_expr *visit_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt_leave(opt, "%pe\n", expr);
+ return expr;
+}
+
+static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ bool entered = false;
+
+ visit_fn *enter = visitor->enter ? visitor->enter : visit_enter;
+ visit_fn *leave = visitor->leave ? visitor->leave : visit_leave;
+
+ static const struct visitor_table table[] = {
+ {eval_not, visit_not},
+ {eval_and, visit_and},
+ {eval_or, visit_or},
+ {eval_comma, visit_comma},
+ {NULL, NULL},
+ };
+ visit_fn *recursive = look_up_visitor(expr, table);
+ if (recursive) {
+ if (!entered) {
+ expr = enter(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ entered = true;
+ }
+
+ expr = recursive(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ visit_fn *general = visitor->visit;
+ if (general) {
+ if (!entered) {
+ expr = enter(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ entered = true;
+ }
+
+ expr = general(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ visit_fn *specific = look_up_visitor(expr, visitor->table);
+ if (specific) {
+ if (!entered) {
+ expr = enter(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ entered = true;
+ }
+
+ expr = specific(opt, expr, visitor);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ if (entered) {
+ expr = leave(opt, expr, visitor);
+ } else {
+ opt_visit(opt, "%pe\n", expr);
+ }
+
+ return expr;
+}
+
+/** Visit an expression recursively. */
+static struct bfs_expr *visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt_enter(opt, "%s()\n", visitor->name);
+ expr = visit_deep(opt, expr, visitor);
+ opt_leave(opt, "\n");
+ return expr;
+}
+
+/** Visit an expression non-recursively. */
+static struct bfs_expr *visit_shallow(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ visit_fn *general = visitor->visit;
+ if (expr && general) {
+ expr = general(opt, expr, visitor);
+ }
+
+ if (!expr) {
+ return NULL;
+ }
+
+ visit_fn *specific = look_up_visitor(expr, visitor->table);
+ if (specific) {
+ expr = specific(opt, expr, visitor);
+ }
+
+ return expr;
+}
+
+/** Annotate -{execut,read,writ}able. */
+static struct bfs_expr *annotate_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ expr->probability = 1.0;
+ if (expr->num & R_OK) {
+ expr->probability *= 0.99;
+ }
+ if (expr->num & W_OK) {
+ expr->probability *= 0.8;
+ }
+ if (expr->num & X_OK) {
+ expr->probability *= 0.2;
+ }
+
+ return expr;
+}
+
+/** Annotate -empty. */
+static struct bfs_expr *annotate_empty(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (opt->level >= 4) {
+ // Since -empty attempts to open and read directories, it may
+ // have side effects such as reporting permission errors, and
+ // thus shouldn't be re-ordered without aggressive optimizations
+ expr->pure = true;
+ }
+
+ return expr;
+}
+
+/** Annotate -exec. */
+static struct bfs_expr *annotate_exec(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (expr->exec->flags & BFS_EXEC_MULTI) {
+ expr->always_true = true;
+ } else {
+ expr->cost = 1000000.0;
+ }
+
+ return expr;
+}
+
+/** Annotate -name/-lname/-path. */
+static struct bfs_expr *annotate_fnmatch(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (expr->literal) {
+ expr->probability = 0.1;
+ } else {
+ expr->probability = 0.5;
+ }
+
+ return expr;
+}
+
+/** Annotate -f?print. */
+static struct bfs_expr *annotate_fprint(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ const struct colors *colors = expr->cfile->colors;
+ expr->calls_stat = colors && colors_need_stat(colors);
+ return expr;
+}
+
+/** Estimate probability for -x?type. */
+static void estimate_type_probability(struct bfs_expr *expr) {
+ unsigned int types = expr->num;
+
+ expr->probability = 0.0;
+ if (types & (1 << BFS_BLK)) {
+ expr->probability += 0.00000721183;
+ }
+ if (types & (1 << BFS_CHR)) {
+ expr->probability += 0.0000499855;
+ }
+ if (types & (1 << BFS_DIR)) {
+ expr->probability += 0.114475;
+ }
+ if (types & (1 << BFS_DOOR)) {
+ expr->probability += 0.000001;
+ }
+ if (types & (1 << BFS_FIFO)) {
+ expr->probability += 0.00000248684;
+ }
+ if (types & (1 << BFS_REG)) {
+ expr->probability += 0.859772;
+ }
+ if (types & (1 << BFS_LNK)) {
+ expr->probability += 0.0256816;
+ }
+ if (types & (1 << BFS_SOCK)) {
+ expr->probability += 0.0000116881;
+ }
+ if (types & (1 << BFS_WHT)) {
+ expr->probability += 0.000001;
+ }
+}
+
+/** Annotate -type. */
+static struct bfs_expr *annotate_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ estimate_type_probability(expr);
+ return expr;
+}
+
+/** Annotate -xtype. */
+static struct bfs_expr *annotate_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (opt->level >= 4) {
+ // Since -xtype dereferences symbolic links, it may have side
+ // effects such as reporting permission errors, and thus
+ // shouldn't be re-ordered without aggressive optimizations
+ expr->pure = true;
+ }
+
+ estimate_type_probability(expr);
+ return expr;
+}
+
+/** Annotate a negation. */
+static struct bfs_expr *annotate_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *rhs = only_child(expr);
+ expr->pure = rhs->pure;
+ expr->always_true = rhs->always_false;
+ expr->always_false = rhs->always_true;
+ expr->cost = rhs->cost;
+ expr->probability = 1.0 - rhs->probability;
+ return expr;
+}
+
+/** Annotate a conjunction. */
+static struct bfs_expr *annotate_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ expr->pure = true;
+ expr->always_true = true;
+ expr->always_false = false;
+ expr->cost = 0.0;
+ expr->probability = 1.0;
+
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ expr->pure &= child->pure;
+ expr->always_true &= child->always_true;
+ expr->always_false |= child->always_false;
+ expr->cost += expr->probability * child->cost;
+ expr->probability *= child->probability;
+ }
+
+ return expr;
+}
+
+/** Annotate a disjunction. */
+static struct bfs_expr *annotate_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ expr->pure = true;
+ expr->always_true = false;
+ expr->always_false = true;
+ expr->cost = 0.0;
+
+ float false_prob = 1.0;
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ expr->pure &= child->pure;
+ expr->always_true |= child->always_true;
+ expr->always_false &= child->always_false;
+ expr->cost += false_prob * child->cost;
+ false_prob *= (1.0 - child->probability);
+ }
+ expr->probability = 1.0 - false_prob;
+
+ return expr;
+}
+
+/** Annotate a comma expression. */
+static struct bfs_expr *annotate_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ expr->pure = true;
+ expr->cost = 0.0;
+
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ expr->pure &= child->pure;
+ expr->always_true = child->always_true;
+ expr->always_false = child->always_false;
+ expr->cost += child->cost;
+ expr->probability = child->probability;
+ }
+
+ return expr;
+}
+
+/** Annotate an arbitrary expression. */
+static struct bfs_expr *annotate_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ /** Table of pure expressions. */
+ static bfs_eval_fn *const pure[] = {
+ eval_access,
+ eval_acl,
+ eval_capable,
+ eval_depth,
+ eval_false,
+ eval_flags,
+ eval_fstype,
+ eval_gid,
+ eval_hidden,
+ eval_inum,
+ eval_links,
+ eval_lname,
+ eval_name,
+ eval_newer,
+ eval_nogroup,
+ eval_nouser,
+ eval_path,
+ eval_perm,
+ eval_regex,
+ eval_samefile,
+ eval_size,
+ eval_sparse,
+ eval_time,
+ eval_true,
+ eval_type,
+ eval_uid,
+ eval_used,
+ eval_xattr,
+ eval_xattrname,
+ };
+
+ expr->pure = false;
+ for (size_t i = 0; i < countof(pure); ++i) {
+ if (expr->eval_fn == pure[i]) {
+ expr->pure = true;
+ break;
+ }
+ }
+
+ /** Table of always-true expressions. */
+ static bfs_eval_fn *const always_true[] = {
+ eval_fls,
+ eval_fprint,
+ eval_fprint0,
+ eval_fprintf,
+ eval_fprintx,
+ eval_limit,
+ eval_prune,
+ eval_true,
+ // Non-returning
+ eval_exit,
+ eval_quit,
+ };
+
+ expr->always_true = false;
+ for (size_t i = 0; i < countof(always_true); ++i) {
+ if (expr->eval_fn == always_true[i]) {
+ expr->always_true = true;
+ break;
+ }
+ }
+
+ /** Table of always-false expressions. */
+ static bfs_eval_fn *const always_false[] = {
+ eval_false,
+ // Non-returning
+ eval_exit,
+ eval_quit,
+ };
+
+ expr->always_false = false;
+ for (size_t i = 0; i < countof(always_false); ++i) {
+ if (expr->eval_fn == always_false[i]) {
+ expr->always_false = true;
+ break;
+ }
+ }
+
+ /** Table of stat-calling primaries. */
+ static bfs_eval_fn *const calls_stat[] = {
+ eval_empty,
+ eval_flags,
+ eval_fls,
+ eval_fprintf,
+ eval_fstype,
+ eval_gid,
+ eval_inum,
+ eval_links,
+ eval_newer,
+ eval_nogroup,
+ eval_nouser,
+ eval_perm,
+ eval_samefile,
+ eval_size,
+ eval_sparse,
+ eval_time,
+ eval_uid,
+ eval_used,
+ eval_xattr,
+ eval_xattrname,
+ };
+
+ expr->calls_stat = false;
+ for (size_t i = 0; i < countof(calls_stat); ++i) {
+ if (expr->eval_fn == calls_stat[i]) {
+ expr->calls_stat = true;
+ break;
+ }
+ }
+
+#define FAST_COST 40.0
+#define FNMATCH_COST 400.0
+#define STAT_COST 1000.0
+#define PRINT_COST 20000.0
+
+ /** Table of expression costs. */
+ static const struct {
+ bfs_eval_fn *eval_fn;
+ float cost;
+ } costs[] = {
+ {eval_access, STAT_COST},
+ {eval_acl, STAT_COST},
+ {eval_capable, STAT_COST},
+ {eval_empty, 2 * STAT_COST}, // readdir() is worse than stat()
+ {eval_flags, STAT_COST},
+ {eval_fls, PRINT_COST},
+ {eval_fprint, PRINT_COST},
+ {eval_fprint0, PRINT_COST},
+ {eval_fprintf, PRINT_COST},
+ {eval_fprintx, PRINT_COST},
+ {eval_fstype, STAT_COST},
+ {eval_gid, STAT_COST},
+ {eval_inum, STAT_COST},
+ {eval_links, STAT_COST},
+ {eval_lname, FNMATCH_COST},
+ {eval_name, FNMATCH_COST},
+ {eval_newer, STAT_COST},
+ {eval_nogroup, STAT_COST},
+ {eval_nouser, STAT_COST},
+ {eval_path, FNMATCH_COST},
+ {eval_perm, STAT_COST},
+ {eval_samefile, STAT_COST},
+ {eval_size, STAT_COST},
+ {eval_sparse, STAT_COST},
+ {eval_time, STAT_COST},
+ {eval_uid, STAT_COST},
+ {eval_used, STAT_COST},
+ {eval_xattr, STAT_COST},
+ {eval_xattrname, STAT_COST},
+ };
+
+ expr->cost = FAST_COST;
+ for (size_t i = 0; i < countof(costs); ++i) {
+ if (expr->eval_fn == costs[i].eval_fn) {
+ expr->cost = costs[i].cost;
+ break;
+ }
+ }
+
+ /** Table of expression probabilities. */
+ static const struct {
+ /** The evaluation function with this cost. */
+ bfs_eval_fn *eval_fn;
+ /** The matching probability. */
+ float probability;
+ } probs[] = {
+ {eval_acl, 0.00002},
+ {eval_capable, 0.000002},
+ {eval_empty, 0.01},
+ {eval_false, 0.0},
+ {eval_hidden, 0.01},
+ {eval_nogroup, 0.01},
+ {eval_nouser, 0.01},
+ {eval_samefile, 0.01},
+ {eval_true, 1.0},
+ {eval_xattr, 0.01},
+ {eval_xattrname, 0.01},
+ };
+
+ expr->probability = 0.5;
+ for (size_t i = 0; i < countof(probs); ++i) {
+ if (expr->eval_fn == probs[i].eval_fn) {
+ expr->probability = probs[i].probability;
+ break;
+ }
+ }
+
+ return expr;
+}
+
+/**
+ * Annotating visitor.
+ */
+static const struct visitor annotate = {
+ .name = "annotate",
+ .visit = annotate_visit,
+ .table = (const struct visitor_table[]) {
+ {eval_access, annotate_access},
+ {eval_empty, annotate_empty},
+ {eval_exec, annotate_exec},
+ {eval_fprint, annotate_fprint},
+ {eval_lname, annotate_fnmatch},
+ {eval_name, annotate_fnmatch},
+ {eval_path, annotate_fnmatch},
+ {eval_type, annotate_type},
+ {eval_xtype, annotate_xtype},
+
+ {eval_not, annotate_not},
+ {eval_and, annotate_and},
+ {eval_or, annotate_or},
+ {eval_comma, annotate_comma},
+
+ {NULL, NULL},
+ },
+};
+
+/** Create a constant expression. */
+static struct bfs_expr *opt_const(struct bfs_opt *opt, bool value) {
+ static bfs_eval_fn *const fns[] = {eval_false, eval_true};
+ static char *fake_args[] = {"-false", "-true"};
+
+ struct bfs_expr *expr = bfs_expr_new(opt->ctx, fns[value], 1, &fake_args[value]);
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/** Negate an expression, keeping it canonical. */
+static struct bfs_expr *negate_expr(struct bfs_opt *opt, struct bfs_expr *expr, char **argv) {
+ if (expr->eval_fn == eval_not) {
+ return only_child(expr);
+ } else if (expr->eval_fn == eval_true) {
+ return opt_const(opt, false);
+ } else if (expr->eval_fn == eval_false) {
+ return opt_const(opt, true);
+ }
+
+ struct bfs_expr *ret = bfs_expr_new(opt->ctx, eval_not, 1, argv);
+ if (!ret) {
+ return NULL;
+ }
+
+ bfs_expr_append(ret, expr);
+ return visit_shallow(opt, ret, &annotate);
+}
+
+/** Sink negations into a conjunction/disjunction using De Morgan's laws. */
+static struct bfs_expr *sink_not_andor(struct bfs_opt *opt, struct bfs_expr *expr) {
+ opt_debug(opt, "De Morgan's laws\n");
+
+ char **argv = expr->argv;
+ expr = only_child(expr);
+ opt_enter(opt, "%pe\n", expr);
+
+ if (expr->eval_fn == eval_and) {
+ expr->eval_fn = eval_or;
+ expr->argv = &fake_or_arg;
+ } else {
+ bfs_assert(expr->eval_fn == eval_or);
+ expr->eval_fn = eval_and;
+ expr->argv = &fake_and_arg;
+ }
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ opt_enter(opt, "%pe\n", child);
+
+ child = negate_expr(opt, child, argv);
+ if (!child) {
+ return NULL;
+ }
+
+ opt_leave(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+ }
+
+ opt_leave(opt, "%pe\n", expr);
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/** Sink a negation into a comma expression. */
+static struct bfs_expr *sink_not_comma(struct bfs_opt *opt, struct bfs_expr *expr) {
+ bfs_assert(expr->eval_fn == eval_comma);
+
+ opt_enter(opt, "%pe\n", expr);
+
+ char **argv = expr->argv;
+ expr = only_child(expr);
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ if (SLIST_EMPTY(&children)) {
+ opt_enter(opt, "%pe\n", child);
+ opt_debug(opt, "sink\n");
+
+ child = negate_expr(opt, child, argv);
+ if (!child) {
+ return NULL;
+ }
+
+ opt_leave(opt, "%pe\n", child);
+ } else {
+ opt_visit(opt, "%pe\n", child);
+ }
+
+ bfs_expr_append(expr, child);
+ }
+
+ opt_leave(opt, "%pe\n", expr);
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/** Canonicalize a negation. */
+static struct bfs_expr *canonicalize_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *rhs = only_child(expr);
+
+ if (rhs->eval_fn == eval_not) {
+ opt_debug(opt, "double negation\n");
+ rhs = only_child(expr);
+ return only_child(rhs);
+ } else if (rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) {
+ return sink_not_andor(opt, expr);
+ } else if (rhs->eval_fn == eval_comma) {
+ return sink_not_comma(opt, expr);
+ } else if (is_const(rhs)) {
+ opt_debug(opt, "constant propagation\n");
+ return opt_const(opt, rhs->eval_fn == eval_false);
+ } else {
+ return expr;
+ }
+}
+
+/** Canonicalize an associative operator. */
+static struct bfs_expr *canonicalize_assoc(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_exprs flat;
+ SLIST_INIT(&flat);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ if (child->eval_fn == expr->eval_fn) {
+ struct bfs_expr *head = SLIST_HEAD(&child->children);
+ struct bfs_expr *tail = SLIST_TAIL(&child->children);
+
+ if (!head) {
+ opt_delete(opt, "%pe [empty]\n", child);
+ } else {
+ opt_enter(opt, "%pe\n", child);
+ opt_debug(opt, "associativity\n");
+ if (head == tail) {
+ opt_leave(opt, "%pe\n", head);
+ } else if (head->next == tail) {
+ opt_leave(opt, "%pe %pe\n", head, tail);
+ } else {
+ opt_leave(opt, "%pe ... %pe\n", head, tail);
+ }
+ }
+
+ SLIST_EXTEND(&flat, &child->children);
+ } else {
+ opt_visit(opt, "%pe\n", child);
+ SLIST_APPEND(&flat, child);
+ }
+ }
+
+ bfs_expr_extend(expr, &flat);
+
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/**
+ * Canonicalizing visitor.
+ */
+static const struct visitor canonicalize = {
+ .name = "canonicalize",
+ .table = (const struct visitor_table[]) {
+ {eval_not, canonicalize_not},
+ {eval_and, canonicalize_assoc},
+ {eval_or, canonicalize_assoc},
+ {eval_comma, canonicalize_assoc},
+ {NULL, NULL},
+ },
+};
+
+/** Calculate the cost of an ordered pair of expressions. */
+static float expr_cost(const struct bfs_expr *parent, const struct bfs_expr *lhs, const struct bfs_expr *rhs) {
+ // https://cs.stackexchange.com/a/66921/21004
+ float prob = lhs->probability;
+ if (parent->eval_fn == eval_or) {
+ prob = 1.0 - prob;
+ }
+ return lhs->cost + prob * rhs->cost;
+}
+
+/** Sort a block of expressions. */
+static void sort_exprs(struct bfs_opt *opt, struct bfs_expr *parent, struct bfs_exprs *exprs) {
+ if (!exprs->head || !exprs->head->next) {
+ return;
+ }
+
+ struct bfs_exprs left, right;
+ SLIST_INIT(&left);
+ SLIST_INIT(&right);
+
+ // Split
+ for (struct bfs_expr *hare = exprs->head; hare && (hare = hare->next); hare = hare->next) {
+ struct bfs_expr *tortoise = SLIST_POP(exprs);
+ SLIST_APPEND(&left, tortoise);
+ }
+ SLIST_EXTEND(&right, exprs);
+
+ // Recurse
+ sort_exprs(opt, parent, &left);
+ sort_exprs(opt, parent, &right);
+
+ // Merge
+ while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) {
+ struct bfs_expr *lhs = left.head;
+ struct bfs_expr *rhs = right.head;
+
+ float cost = expr_cost(parent, lhs, rhs);
+ float swapped = expr_cost(parent, rhs, lhs);
+
+ if (cost <= swapped) {
+ SLIST_POP(&left);
+ SLIST_APPEND(exprs, lhs);
+ } else {
+ opt_enter(opt, "%pe %pe [${ylw}%g${rs}]\n", lhs, rhs, cost);
+ SLIST_POP(&right);
+ SLIST_APPEND(exprs, rhs);
+ opt_leave(opt, "%pe %pe [${ylw}%g${rs}]\n", rhs, lhs, swapped);
+ }
+ }
+ SLIST_EXTEND(exprs, &left);
+ SLIST_EXTEND(exprs, &right);
+}
+
+/** Reorder children to reduce cost. */
+static struct bfs_expr *reorder_andor(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ // Split into blocks of consecutive pure/impure expressions, and sort
+ // the pure blocks
+ struct bfs_exprs pure;
+ SLIST_INIT(&pure);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ if (child->pure) {
+ SLIST_APPEND(&pure, child);
+ } else {
+ sort_exprs(opt, expr, &pure);
+ bfs_expr_extend(expr, &pure);
+ bfs_expr_append(expr, child);
+ }
+ }
+ sort_exprs(opt, expr, &pure);
+ bfs_expr_extend(expr, &pure);
+
+ return visit_shallow(opt, expr, &annotate);
+}
+
+/**
+ * Reordering visitor.
+ */
+static const struct visitor reorder = {
+ .name = "reorder",
+ .table = (const struct visitor_table[]) {
+ {eval_and, reorder_andor},
+ {eval_or, reorder_andor},
+ {NULL, NULL},
+ },
+};
+
+/** Transfer function for simple predicates. */
+static void data_flow_pred(struct bfs_opt *opt, enum pred_type pred, bool value) {
+ constrain_pred(&opt->after_true.preds[pred], value);
+ constrain_pred(&opt->after_false.preds[pred], !value);
+}
+
+/** Transfer function for icmp-style ([+-]N) expressions. */
+static void data_flow_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enum range_type type) {
+ struct df_range *true_range = &opt->after_true.ranges[type];
+ struct df_range *false_range = &opt->after_false.ranges[type];
+ long long value = expr->num;
+
+ switch (expr->int_cmp) {
+ case BFS_INT_EQUAL:
+ constrain_min(true_range, value);
+ constrain_max(true_range, value);
+ range_remove(false_range, value);
+ break;
+
+ case BFS_INT_LESS:
+ constrain_min(false_range, value);
+ constrain_max(true_range, value);
+ range_remove(true_range, value);
+ break;
+
+ case BFS_INT_GREATER:
+ constrain_max(false_range, value);
+ constrain_min(true_range, value);
+ range_remove(true_range, value);
+ break;
+ }
+}
+
+/** Transfer function for -{execut,read,writ}able. */
+static struct bfs_expr *data_flow_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (expr->num & R_OK) {
+ data_flow_pred(opt, READABLE_PRED, true);
+ }
+ if (expr->num & W_OK) {
+ data_flow_pred(opt, WRITABLE_PRED, true);
+ }
+ if (expr->num & X_OK) {
+ data_flow_pred(opt, EXECUTABLE_PRED, true);
+ }
+
+ return expr;
+}
+
+/** Transfer function for -gid. */
+static struct bfs_expr *data_flow_gid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[GID_RANGE];
+ if (range->min == range->max) {
+ gid_t gid = range->min;
+ bool nogroup = !bfs_getgrgid(opt->ctx->groups, gid);
+ if (errno == 0) {
+ data_flow_pred(opt, NOGROUP_PRED, nogroup);
+ }
+ }
+
+ return expr;
+}
+
+/** Transfer function for -inum. */
+static struct bfs_expr *data_flow_inum(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[INUM_RANGE];
+ if (range->min == range->max) {
+ expr->probability = 0.01;
+ } else {
+ expr->probability = 0.5;
+ }
+
+ return expr;
+}
+
+/** Transfer function for -links. */
+static struct bfs_expr *data_flow_links(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[LINKS_RANGE];
+ if (1 >= range->min && 1 <= range->max) {
+ expr->probability = 0.99;
+ } else {
+ expr->probability = 0.5;
+ }
+
+ return expr;
+}
+
+/** Transfer function for -samefile. */
+static struct bfs_expr *data_flow_samefile(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *true_range = &opt->after_true.ranges[INUM_RANGE];
+ constrain_min(true_range, expr->ino);
+ constrain_max(true_range, expr->ino);
+
+ struct df_range *false_range = &opt->after_false.ranges[INUM_RANGE];
+ range_remove(false_range, expr->ino);
+
+ return expr;
+}
+
+/** Transfer function for -size. */
+static struct bfs_expr *data_flow_size(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[SIZE_RANGE];
+ if (range->min == range->max) {
+ expr->probability = 0.01;
+ } else {
+ expr->probability = 0.5;
+ }
+
+ return expr;
+}
+
+/** Transfer function for -type. */
+static struct bfs_expr *data_flow_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt->after_true.types &= expr->num;
+ opt->after_false.types &= ~expr->num;
+ return expr;
+}
+
+/** Transfer function for -uid. */
+static struct bfs_expr *data_flow_uid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct df_range *range = &opt->after_true.ranges[UID_RANGE];
+ if (range->min == range->max) {
+ uid_t uid = range->min;
+ bool nouser = !bfs_getpwuid(opt->ctx->users, uid);
+ if (errno == 0) {
+ data_flow_pred(opt, NOUSER_PRED, nouser);
+ }
+ }
+
+ return expr;
+}
+
+/** Transfer function for -xtype. */
+static struct bfs_expr *data_flow_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ opt->after_true.xtypes &= expr->num;
+ opt->after_false.xtypes &= ~expr->num;
+ return expr;
+}
+
+/** Data flow visitor entry. */
+static struct bfs_expr *data_flow_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ visit_enter(opt, expr, visitor);
+
+ df_dump(opt, "before", &opt->before);
+
+ if (!bfs_expr_is_parent(expr) && !expr->pure) {
+ df_join(opt->impure, &opt->before);
+ df_dump(opt, "impure", opt->impure);
+ }
+
+ return expr;
+}
+
+/** Data flow visitor exit. */
+static struct bfs_expr *data_flow_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (expr->always_true) {
+ expr->probability = 1.0;
+ df_init_bottom(&opt->after_false);
+ }
+
+ if (expr->always_false) {
+ expr->probability = 0.0;
+ df_init_bottom(&opt->after_true);
+ }
+
+ df_dump(opt, "after true", &opt->after_true);
+ df_dump(opt, "after false", &opt->after_false);
+
+ if (df_is_bottom(&opt->after_false)) {
+ if (!expr->pure) {
+ expr->always_true = true;
+ expr->probability = 0.0;
+ } else if (expr->eval_fn != eval_true) {
+ opt_warning(opt, expr, "This expression is always true.\n\n");
+ opt_debug(opt, "pure, always true\n");
+ expr = opt_const(opt, true);
+ if (!expr) {
+ return NULL;
+ }
+ }
+ }
+
+ if (df_is_bottom(&opt->after_true)) {
+ if (!expr->pure) {
+ expr->always_false = true;
+ expr->probability = 0.0;
+ } else if (expr->eval_fn != eval_false) {
+ opt_warning(opt, expr, "This expression is always false.\n\n");
+ opt_debug(opt, "pure, always false\n");
+ expr = opt_const(opt, false);
+ if (!expr) {
+ return NULL;
+ }
+ }
+ }
+
+ return visit_leave(opt, expr, visitor);
+}
+
+/** Data flow visitor function. */
+static struct bfs_expr *data_flow_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (opt->ignore_result && expr->pure) {
+ opt_debug(opt, "ignored result\n");
+ opt_warning(opt, expr, "The result of this expression is ignored.\n\n");
+ expr = opt_const(opt, false);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ if (df_is_bottom(&opt->before)) {
+ opt_debug(opt, "unreachable\n");
+ opt_warning(opt, expr, "This expression is unreachable.\n\n");
+ expr = opt_const(opt, false);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ /** Table of simple predicates. */
+ static const struct {
+ bfs_eval_fn *eval_fn;
+ enum pred_type pred;
+ } preds[] = {
+ {eval_acl, ACL_PRED},
+ {eval_capable, CAPABLE_PRED},
+ {eval_empty, EMPTY_PRED},
+ {eval_hidden, HIDDEN_PRED},
+ {eval_nogroup, NOGROUP_PRED},
+ {eval_nouser, NOUSER_PRED},
+ {eval_sparse, SPARSE_PRED},
+ {eval_xattr, XATTR_PRED},
+ };
+
+ for (size_t i = 0; i < countof(preds); ++i) {
+ if (preds[i].eval_fn == expr->eval_fn) {
+ data_flow_pred(opt, preds[i].pred, true);
+ break;
+ }
+ }
+
+ /** Table of simple range comparisons. */
+ static const struct {
+ bfs_eval_fn *eval_fn;
+ enum range_type range;
+ } ranges[] = {
+ {eval_depth, DEPTH_RANGE},
+ {eval_gid, GID_RANGE},
+ {eval_inum, INUM_RANGE},
+ {eval_links, LINKS_RANGE},
+ {eval_size, SIZE_RANGE},
+ {eval_uid, UID_RANGE},
+ };
+
+ for (size_t i = 0; i < countof(ranges); ++i) {
+ if (ranges[i].eval_fn == expr->eval_fn) {
+ data_flow_icmp(opt, expr, ranges[i].range);
+ break;
+ }
+ }
+
+ return expr;
+}
+
+/**
+ * Data flow visitor.
+ */
+static const struct visitor data_flow = {
+ .name = "data_flow",
+ .enter = data_flow_enter,
+ .visit = data_flow_visit,
+ .leave = data_flow_leave,
+ .table = (const struct visitor_table[]) {
+ {eval_access, data_flow_access},
+ {eval_gid, data_flow_gid},
+ {eval_inum, data_flow_inum},
+ {eval_links, data_flow_links},
+ {eval_samefile, data_flow_samefile},
+ {eval_size, data_flow_size},
+ {eval_type, data_flow_type},
+ {eval_uid, data_flow_uid},
+ {eval_xtype, data_flow_xtype},
+ {NULL, NULL},
+ },
+};
+
+/** Simplify a negation. */
+static struct bfs_expr *simplify_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ if (opt->ignore_result) {
+ opt_debug(opt, "ignored result\n");
+ expr = only_child(expr);
+ }
+
+ return expr;
+}
+
+/** Lift negations out of a conjunction/disjunction using De Morgan's laws. */
+static struct bfs_expr *lift_andor_not(struct bfs_opt *opt, struct bfs_expr *expr) {
+ // Only lift negations if it would reduce the number of (-not) expressions
+ size_t added = 0, removed = 0;
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ if (child->eval_fn == eval_not) {
+ ++removed;
+ } else {
+ ++added;
+ }
+ }
+ if (added >= removed) {
+ return visit_shallow(opt, expr, &annotate);
+ }
+
+ opt_debug(opt, "De Morgan's laws\n");
+
+ if (expr->eval_fn == eval_and) {
+ expr->eval_fn = eval_or;
+ expr->argv = &fake_or_arg;
+ } else {
+ bfs_assert(expr->eval_fn == eval_or);
+ expr->eval_fn = eval_and;
+ expr->argv = &fake_and_arg;
+ }
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ struct bfs_expr *child;
+ while ((child = SLIST_POP(&children))) {
+ opt_enter(opt, "%pe\n", child);
+
+ child = negate_expr(opt, child, &fake_not_arg);
+ if (!child) {
+ return NULL;
+ }
+
+ opt_leave(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+ }
+
+ expr = visit_shallow(opt, expr, &annotate);
+ return negate_expr(opt, expr, &fake_not_arg);
+}
+
+/** Get the first ignorable expression in a conjunction/disjunction. */
+static struct bfs_expr *first_ignorable(struct bfs_opt *opt, struct bfs_expr *expr) {
+ if (opt->level < 2 || !opt->ignore_result) {
+ return NULL;
+ }
+
+ struct bfs_expr *ret = NULL;
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ if (!child->pure) {
+ ret = NULL;
+ } else if (!ret) {
+ ret = child;
+ }
+ }
+
+ return ret;
+}
+
+/** Simplify a conjunction. */
+static struct bfs_expr *simplify_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *ignorable = first_ignorable(opt, expr);
+ bool ignore = false;
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (child == ignorable) {
+ ignore = true;
+ }
+
+ if (ignore) {
+ opt_delete(opt, "%pe [ignored result]\n", child);
+ opt_warning(opt, child, "The result of this expression is ignored.\n\n");
+ continue;
+ }
+
+ if (child->eval_fn == eval_true) {
+ opt_delete(opt, "%pe [conjunction elimination]\n", child);
+ continue;
+ }
+
+ opt_visit(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+
+ if (child->always_false) {
+ while ((child = SLIST_POP(&children))) {
+ opt_delete(opt, "%pe [short-circuit]\n", child);
+ }
+ }
+ }
+
+ struct bfs_expr *child = bfs_expr_children(expr);
+ if (!child) {
+ opt_debug(opt, "nullary identity\n");
+ return opt_const(opt, true);
+ } else if (!child->next) {
+ opt_debug(opt, "unary identity\n");
+ return only_child(expr);
+ }
+
+ return lift_andor_not(opt, expr);
+}
+
+/** Simplify a disjunction. */
+static struct bfs_expr *simplify_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_expr *ignorable = first_ignorable(opt, expr);
+ bool ignore = false;
+
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (child == ignorable) {
+ ignore = true;
+ }
+
+ if (ignore) {
+ opt_delete(opt, "%pe [ignored result]\n", child);
+ opt_warning(opt, child, "The result of this expression is ignored.\n\n");
+ continue;
+ }
+
+ if (child->eval_fn == eval_false) {
+ opt_delete(opt, "%pe [disjunctive syllogism]\n", child);
+ continue;
+ }
+
+ opt_visit(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+
+ if (child->always_true) {
+ while ((child = SLIST_POP(&children))) {
+ opt_delete(opt, "%pe [short-circuit]\n", child);
+ }
+ }
+ }
+
+ struct bfs_expr *child = bfs_expr_children(expr);
+ if (!child) {
+ opt_debug(opt, "nullary identity\n");
+ return opt_const(opt, false);
+ } else if (!child->next) {
+ opt_debug(opt, "unary identity\n");
+ return only_child(expr);
+ }
+
+ return lift_andor_not(opt, expr);
+}
+
+/** Simplify a comma expression. */
+static struct bfs_expr *simplify_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) {
+ struct bfs_exprs children;
+ foster_children(expr, &children);
+
+ while (!SLIST_EMPTY(&children)) {
+ struct bfs_expr *child = SLIST_POP(&children);
+
+ if (opt->level >= 2 && child->pure && !SLIST_EMPTY(&children)) {
+ opt_delete(opt, "%pe [ignored result]\n", child);
+ opt_warning(opt, child, "The result of this expression is ignored.\n\n");
+ continue;
+ }
+
+ opt_visit(opt, "%pe\n", child);
+ bfs_expr_append(expr, child);
+ }
+
+ struct bfs_expr *child = bfs_expr_children(expr);
+ if (child && !child->next) {
+ opt_debug(opt, "unary identity\n");
+ return only_child(expr);
+ }
+
+ return expr;
+}
+
+/**
+ * Logical simplification visitor.
+ */
+static const struct visitor simplify = {
+ .name = "simplify",
+ .table = (const struct visitor_table[]) {
+ {eval_not, simplify_not},
+ {eval_and, simplify_and},
+ {eval_or, simplify_or},
+ {eval_comma, simplify_comma},
+ {NULL, NULL},
+ },
+};
+
+/** Optimize an expression. */
+static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) {
+ opt_enter(opt, "pass 0:\n");
+ expr = visit(opt, expr, &annotate);
+ opt_leave(opt, NULL);
+
+ /** Table of optimization passes. */
+ static const struct {
+ /** Minimum optlevel for this pass. */
+ int level;
+ /** The visitor for this pass. */
+ const struct visitor *visitor;
+ } passes[] = {
+ {1, &canonicalize},
+ {3, &reorder},
+ {2, &data_flow},
+ {1, &simplify},
+ };
+
+ struct df_domain impure;
+
+ for (int i = 0; i < 3; ++i) {
+ struct bfs_opt nested = *opt;
+ nested.impure = &impure;
+ impure = *opt->impure;
+
+ opt_enter(&nested, "pass %d:\n", i + 1);
+
+ for (size_t j = 0; j < countof(passes); ++j) {
+ if (opt->level < passes[j].level) {
+ continue;
+ }
+
+ // Skip reordering the first time through the passes, to
+ // make warnings more understandable
+ if (passes[j].visitor == &reorder) {
+ if (i == 0) {
+ continue;
+ } else {
+ nested.warn = false;
+ }
+ }
+
+ expr = visit(&nested, expr, passes[j].visitor);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ opt_leave(&nested, NULL);
+
+ if (!bfs_expr_is_parent(expr)) {
+ break;
+ }
+ }
+
+ *opt->impure = impure;
+ return expr;
+}
+
+/** Estimate the odds of an expression calling stat(). */
+static float expr_stat_odds(struct bfs_expr *expr) {
+ if (expr->calls_stat) {
+ return 1.0;
+ }
+
+ float nostat_odds = 1.0;
+ float reached_odds = 1.0;
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ float child_odds = expr_stat_odds(child);
+ nostat_odds *= 1.0 - reached_odds * child_odds;
+
+ if (expr->eval_fn == eval_and) {
+ reached_odds *= child->probability;
+ } else if (expr->eval_fn == eval_or) {
+ reached_odds *= 1.0 - child->probability;
+ }
+ }
+
+ return 1.0 - nostat_odds;
+}
+
+/** Estimate the odds of calling stat(). */
+static float estimate_stat_odds(struct bfs_ctx *ctx) {
+ if (ctx->unique) {
+ return 1.0;
+ }
+
+ float nostat_odds = 1.0 - expr_stat_odds(ctx->exclude);
+
+ float reached_odds = 1.0 - ctx->exclude->probability;
+ float expr_odds = expr_stat_odds(ctx->expr);
+ nostat_odds *= 1.0 - reached_odds * expr_odds;
+
+ return 1.0 - nostat_odds;
+}
+
+int bfs_optimize(struct bfs_ctx *ctx) {
+ bfs_ctx_dump(ctx, DEBUG_OPT);
+
+ struct df_domain impure;
+ df_init_bottom(&impure);
+
+ struct bfs_opt opt = {
+ .ctx = ctx,
+ .level = ctx->optlevel,
+ .depth = 0,
+ .warn = ctx->warn,
+ .ignore_result = false,
+ .impure = &impure,
+ };
+ df_init_top(&opt.before);
+
+ ctx->exclude = optimize(&opt, ctx->exclude);
+ if (!ctx->exclude) {
+ return -1;
+ }
+
+ // Only non-excluded files are evaluated
+ opt.before = opt.after_false;
+ opt.ignore_result = true;
+
+ struct df_range *depth = &opt.before.ranges[DEPTH_RANGE];
+ if (ctx->mindepth > 0) {
+ constrain_min(depth, ctx->mindepth);
+ }
+ if (ctx->maxdepth < INT_MAX) {
+ constrain_max(depth, ctx->maxdepth);
+ }
+
+ ctx->expr = optimize(&opt, ctx->expr);
+ if (!ctx->expr) {
+ return -1;
+ }
+
+ if (opt.level >= 2 && df_is_bottom(&impure)) {
+ bfs_warning(ctx, "This command won't do anything.\n\n");
+ }
+
+ const struct df_range *impure_depth = &impure.ranges[DEPTH_RANGE];
+ long long mindepth = impure_depth->min;
+ long long maxdepth = impure_depth->max;
+
+ opt_enter(&opt, "post-process:\n");
+
+ if (opt.level >= 2 && mindepth > ctx->mindepth) {
+ if (mindepth > INT_MAX) {
+ mindepth = INT_MAX;
+ }
+ opt_enter(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth);
+ ctx->mindepth = mindepth;
+ opt_leave(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth);
+ }
+
+ if (opt.level >= 4 && maxdepth < ctx->maxdepth) {
+ if (maxdepth < INT_MIN) {
+ maxdepth = INT_MIN;
+ }
+ opt_enter(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth);
+ ctx->maxdepth = maxdepth;
+ opt_leave(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth);
+ }
+
+ if (opt.level >= 3) {
+ // bfs_eval() can do lazy stat() calls, but only on one thread.
+ float lazy_cost = estimate_stat_odds(ctx);
+ // bftw() can do eager stat() calls in parallel
+ float eager_cost = 1.0 / ctx->threads;
+
+ if (eager_cost <= lazy_cost) {
+ opt_enter(&opt, "lazy stat cost: ${ylw}%g${rs}\n", lazy_cost);
+ ctx->flags |= BFTW_STAT;
+ opt_leave(&opt, "eager stat cost: ${ylw}%g${rs}\n", eager_cost);
+ }
+
+ }
+
+ opt_leave(&opt, NULL);
+
+ return 0;
+}
diff --git a/src/opt.h b/src/opt.h
new file mode 100644
index 0000000..4aac129
--- /dev/null
+++ b/src/opt.h
@@ -0,0 +1,23 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Optimization.
+ */
+
+#ifndef BFS_OPT_H
+#define BFS_OPT_H
+
+struct bfs_ctx;
+
+/**
+ * Apply optimizations to the command line.
+ *
+ * @param ctx
+ * The bfs context to optimize.
+ * @return
+ * 0 if successful, -1 on error.
+ */
+int bfs_optimize(struct bfs_ctx *ctx);
+
+#endif // BFS_OPT_H
diff --git a/src/parse.c b/src/parse.c
new file mode 100644
index 0000000..a1155c0
--- /dev/null
+++ b/src/parse.c
@@ -0,0 +1,3712 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * The command line parser. Expressions are parsed by recursive descent, with a
+ * grammar described in the comments of the parse_*() functions. The parser
+ * also accepts flags and paths at any point in the expression, by treating
+ * flags like always-true options, and skipping over paths wherever they appear.
+ */
+
+#include "prelude.h"
+#include "parse.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "bftw.h"
+#include "color.h"
+#include "ctx.h"
+#include "diag.h"
+#include "dir.h"
+#include "eval.h"
+#include "exec.h"
+#include "expr.h"
+#include "fsade.h"
+#include "list.h"
+#include "opt.h"
+#include "printf.h"
+#include "pwcache.h"
+#include "sanity.h"
+#include "stat.h"
+#include "typo.h"
+#include "xregex.h"
+#include "xspawn.h"
+#include "xtime.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <grp.h>
+#include <limits.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+// Strings printed by -D tree for "fake" expressions
+static char *fake_and_arg = "-and";
+static char *fake_hidden_arg = "-hidden";
+static char *fake_or_arg = "-or";
+static char *fake_print_arg = "-print";
+static char *fake_true_arg = "-true";
+
+/**
+ * Color use flags.
+ */
+enum use_color {
+ COLOR_NEVER,
+ COLOR_AUTO,
+ COLOR_ALWAYS,
+};
+
+/**
+ * Command line parser state.
+ */
+struct bfs_parser {
+ /** The command line being constructed. */
+ struct bfs_ctx *ctx;
+ /** The command line arguments being parsed. */
+ char **argv;
+ /** The name of this program. */
+ const char *command;
+
+ /** The current regex flags to use. */
+ enum bfs_regex_type regex_type;
+
+ /** Whether stdout is a terminal. */
+ bool stdout_tty;
+ /** Whether this session is interactive (stdin and stderr are each a terminal). */
+ bool interactive;
+ /** Whether -color or -nocolor has been passed. */
+ enum use_color use_color;
+ /** Whether a -print action is implied. */
+ bool implicit_print;
+ /** Whether the default root "." should be used. */
+ bool implicit_root;
+ /** Whether the expression has started. */
+ bool expr_started;
+ /** Whether an information option like -help or -version was passed. */
+ bool just_info;
+ /** Whether we are currently parsing an -exclude expression. */
+ bool excluding;
+
+ /** The last non-path argument. */
+ char **last_arg;
+ /** A "-depth"-type argument, if any. */
+ char **depth_arg;
+ /** A "-limit" argument, if any. */
+ char **limit_arg;
+ /** A "-prune" argument, if any. */
+ char **prune_arg;
+ /** A "-mount" argument, if any. */
+ char **mount_arg;
+ /** An "-xdev" argument, if any. */
+ char **xdev_arg;
+ /** A "-files0-from -" argument, if any. */
+ char **files0_stdin_arg;
+ /** An "-ok"-type expression, if any. */
+ const struct bfs_expr *ok_expr;
+
+ /** The current time (maybe modified by -daystart). */
+ struct timespec now;
+};
+
+/**
+ * Possible token types.
+ */
+enum token_type {
+ /** A flag. */
+ T_FLAG,
+ /** A root path. */
+ T_PATH,
+ /** An option. */
+ T_OPTION,
+ /** A test. */
+ T_TEST,
+ /** An action. */
+ T_ACTION,
+ /** An operator. */
+ T_OPERATOR,
+};
+
+/**
+ * Print a low-level error message during parsing.
+ */
+static void parse_perror(const struct bfs_parser *parser, const char *str) {
+ bfs_perror(parser->ctx, str);
+}
+
+/** Initialize an empty highlighted range. */
+static void init_highlight(const struct bfs_ctx *ctx, bool *args) {
+ for (size_t i = 0; i < ctx->argc; ++i) {
+ args[i] = false;
+ }
+}
+
+/** Highlight a range of command line arguments. */
+static void highlight_args(const struct bfs_ctx *ctx, char **argv, size_t argc, bool *args) {
+ size_t i = argv - ctx->argv;
+ for (size_t j = 0; j < argc; ++j) {
+ bfs_assert(i + j < ctx->argc);
+ args[i + j] = true;
+ }
+}
+
+/**
+ * Print an error message during parsing.
+ */
+attr(printf(2, 3))
+static void parse_error(const struct bfs_parser *parser, const char *format, ...) {
+ int error = errno;
+ const struct bfs_ctx *ctx = parser->ctx;
+
+ bool highlight[ctx->argc];
+ init_highlight(ctx, highlight);
+ highlight_args(ctx, parser->argv, 1, highlight);
+ bfs_argv_error(ctx, highlight);
+
+ va_list args;
+ va_start(args, format);
+ errno = error;
+ bfs_verror(parser->ctx, format, args);
+ va_end(args);
+}
+
+/**
+ * Print an error about some command line arguments.
+ */
+attr(printf(4, 5))
+static void parse_argv_error(const struct bfs_parser *parser, char **argv, size_t argc, const char *format, ...) {
+ int error = errno;
+ const struct bfs_ctx *ctx = parser->ctx;
+
+ bool highlight[ctx->argc];
+ init_highlight(ctx, highlight);
+ highlight_args(ctx, argv, argc, highlight);
+ bfs_argv_error(ctx, highlight);
+
+ va_list args;
+ va_start(args, format);
+ errno = error;
+ bfs_verror(ctx, format, args);
+ va_end(args);
+}
+
+/**
+ * Print an error about conflicting command line arguments.
+ */
+attr(printf(6, 7))
+static void parse_conflict_error(const struct bfs_parser *parser, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) {
+ int error = errno;
+ const struct bfs_ctx *ctx = parser->ctx;
+
+ bool highlight[ctx->argc];
+ init_highlight(ctx, highlight);
+ highlight_args(ctx, argv1, argc1, highlight);
+ highlight_args(ctx, argv2, argc2, highlight);
+ bfs_argv_error(ctx, highlight);
+
+ va_list args;
+ va_start(args, format);
+ errno = error;
+ bfs_verror(ctx, format, args);
+ va_end(args);
+}
+
+/**
+ * Print an error about an expression.
+ */
+attr(printf(3, 4))
+static void parse_expr_error(const struct bfs_parser *parser, const struct bfs_expr *expr, const char *format, ...) {
+ int error = errno;
+ const struct bfs_ctx *ctx = parser->ctx;
+
+ bfs_expr_error(ctx, expr);
+
+ va_list args;
+ va_start(args, format);
+ errno = error;
+ bfs_verror(ctx, format, args);
+ va_end(args);
+}
+
+/**
+ * Print a warning message during parsing.
+ */
+attr(printf(2, 3))
+static bool parse_warning(const struct bfs_parser *parser, const char *format, ...) {
+ int error = errno;
+ const struct bfs_ctx *ctx = parser->ctx;
+
+ bool highlight[ctx->argc];
+ init_highlight(ctx, highlight);
+ highlight_args(ctx, parser->argv, 1, highlight);
+ if (!bfs_argv_warning(ctx, highlight)) {
+ return false;
+ }
+
+ va_list args;
+ va_start(args, format);
+ errno = error;
+ bool ret = bfs_vwarning(parser->ctx, format, args);
+ va_end(args);
+ return ret;
+}
+
+/**
+ * Print a warning about conflicting command line arguments.
+ */
+attr(printf(6, 7))
+static bool parse_conflict_warning(const struct bfs_parser *parser, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) {
+ int error = errno;
+ const struct bfs_ctx *ctx = parser->ctx;
+
+ bool highlight[ctx->argc];
+ init_highlight(ctx, highlight);
+ highlight_args(ctx, argv1, argc1, highlight);
+ highlight_args(ctx, argv2, argc2, highlight);
+ if (!bfs_argv_warning(ctx, highlight)) {
+ return false;
+ }
+
+ va_list args;
+ va_start(args, format);
+ errno = error;
+ bool ret = bfs_vwarning(ctx, format, args);
+ va_end(args);
+ return ret;
+}
+
+/**
+ * Print a warning about an expression.
+ */
+attr(printf(3, 4))
+static bool parse_expr_warning(const struct bfs_parser *parser, const struct bfs_expr *expr, const char *format, ...) {
+ int error = errno;
+ const struct bfs_ctx *ctx = parser->ctx;
+
+ if (!bfs_expr_warning(ctx, expr)) {
+ return false;
+ }
+
+ va_list args;
+ va_start(args, format);
+ errno = error;
+ bool ret = bfs_vwarning(ctx, format, args);
+ va_end(args);
+ return ret;
+}
+
+/**
+ * Allocate a new expression.
+ */
+static struct bfs_expr *parse_new_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc, char **argv) {
+ struct bfs_expr *expr = bfs_expr_new(parser->ctx, eval_fn, argc, argv);
+ if (!expr) {
+ parse_perror(parser, "bfs_expr_new()");
+ }
+ return expr;
+}
+
+/**
+ * Create a new unary expression.
+ */
+static struct bfs_expr *new_unary_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, struct bfs_expr *rhs, char **argv) {
+ struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv);
+ if (!expr) {
+ return NULL;
+ }
+
+ bfs_assert(bfs_expr_is_parent(expr));
+ bfs_expr_append(expr, rhs);
+ return expr;
+}
+
+/**
+ * Create a new binary expression.
+ */
+static struct bfs_expr *new_binary_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, struct bfs_expr *lhs, struct bfs_expr *rhs, char **argv) {
+ struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv);
+ if (!expr) {
+ return NULL;
+ }
+
+ bfs_assert(bfs_expr_is_parent(expr));
+ bfs_expr_append(expr, lhs);
+ bfs_expr_append(expr, rhs);
+ return expr;
+}
+
+/**
+ * Fill in a "-print"-type expression.
+ */
+static void init_print_expr(struct bfs_parser *parser, struct bfs_expr *expr) {
+ expr->cfile = parser->ctx->cout;
+ expr->path = NULL;
+}
+
+/**
+ * Open a file for an expression.
+ */
+static int expr_open(struct bfs_parser *parser, struct bfs_expr *expr, const char *path) {
+ struct bfs_ctx *ctx = parser->ctx;
+
+ FILE *file = NULL;
+ CFILE *cfile = NULL;
+
+ file = xfopen(path, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC);
+ if (!file) {
+ goto fail;
+ }
+
+ cfile = cfwrap(file, parser->use_color ? ctx->colors : NULL, true);
+ if (!cfile) {
+ goto fail;
+ }
+
+ CFILE *dedup = bfs_ctx_dedup(ctx, cfile, path);
+ if (!dedup) {
+ goto fail;
+ }
+
+ if (dedup != cfile) {
+ cfclose(cfile);
+ }
+
+ expr->cfile = dedup;
+ expr->path = path;
+ return 0;
+
+fail:
+ parse_expr_error(parser, expr, "%m.\n");
+ if (cfile) {
+ cfclose(cfile);
+ } else if (file) {
+ fclose(file);
+ }
+ return -1;
+}
+
+/**
+ * Invoke bfs_stat() on an argument.
+ */
+static int stat_arg(const struct bfs_parser *parser, char **arg, struct bfs_stat *sb) {
+ const struct bfs_ctx *ctx = parser->ctx;
+
+ bool follow = ctx->flags & (BFTW_FOLLOW_ROOTS | BFTW_FOLLOW_ALL);
+ enum bfs_stat_flags flags = follow ? BFS_STAT_TRYFOLLOW : BFS_STAT_NOFOLLOW;
+
+ int ret = bfs_stat(AT_FDCWD, *arg, flags, sb);
+ if (ret != 0) {
+ parse_argv_error(parser, arg, 1, "%m.\n");
+ }
+ return ret;
+}
+
+/**
+ * Parse the expression specified on the command line.
+ */
+static struct bfs_expr *parse_expr(struct bfs_parser *parser);
+
+/**
+ * Advance by a single token.
+ */
+static char **parser_advance(struct bfs_parser *parser, enum token_type type, size_t argc) {
+ if (type != T_FLAG && type != T_PATH) {
+ parser->expr_started = true;
+ }
+
+ if (type != T_PATH) {
+ parser->last_arg = parser->argv;
+ }
+
+ char **argv = parser->argv;
+ parser->argv += argc;
+ return argv;
+}
+
+/**
+ * Parse a root path.
+ */
+static int parse_root(struct bfs_parser *parser, const char *path) {
+ struct bfs_ctx *ctx = parser->ctx;
+ const char **root = RESERVE(const char *, &ctx->paths, &ctx->npaths);
+ if (!root) {
+ parse_perror(parser, "RESERVE()");
+ return -1;
+ }
+
+ *root = strdup(path);
+ if (!*root) {
+ --ctx->npaths;
+ parse_perror(parser, "strdup()");
+ return -1;
+ }
+
+ parser->implicit_root = false;
+ return 0;
+}
+
+/**
+ * While parsing an expression, skip any paths and add them to ctx->paths.
+ */
+static int skip_paths(struct bfs_parser *parser) {
+ while (true) {
+ const char *arg = parser->argv[0];
+ if (!arg) {
+ return 0;
+ }
+
+ if (arg[0] == '-') {
+ if (strcmp(arg, "--") == 0) {
+ // find uses -- to separate flags from the rest
+ // of the command line. We allow mixing flags
+ // and paths/predicates, so we just ignore --.
+ parser_advance(parser, T_FLAG, 1);
+ continue;
+ }
+ if (strcmp(arg, "-") != 0) {
+ // - by itself is a file name. Anything else
+ // starting with - is a flag/predicate.
+ return 0;
+ }
+ }
+
+ // By POSIX, these are always options
+ if (strcmp(arg, "(") == 0 || strcmp(arg, "!") == 0) {
+ return 0;
+ }
+
+ if (parser->expr_started) {
+ // By POSIX, these can be paths. We only treat them as
+ // such at the beginning of the command line.
+ if (strcmp(arg, ")") == 0 || strcmp(arg, ",") == 0) {
+ return 0;
+ }
+ }
+
+ if (parser->excluding) {
+ parse_warning(parser, "This path will not be excluded. Use a test like ${blu}-name${rs} or ${blu}-path${rs}\n");
+ bfs_warning(parser->ctx, "within ${red}-exclude${rs} to exclude matching files.\n\n");
+ }
+
+ if (parse_root(parser, arg) != 0) {
+ return -1;
+ }
+
+ parser_advance(parser, T_PATH, 1);
+ }
+}
+
+/** Integer parsing flags. */
+enum int_flags {
+ IF_BASE_MASK = 0x03F,
+ IF_INT = 0x040,
+ IF_LONG = 0x080,
+ IF_LONG_LONG = 0x0C0,
+ IF_SIZE_MASK = 0x0C0,
+ IF_UNSIGNED = 0x100,
+ IF_PARTIAL_OK = 0x200,
+ IF_QUIET = 0x400,
+};
+
+/**
+ * Parse an integer.
+ */
+static const char *parse_int(const struct bfs_parser *parser, char **arg, const char *str, void *result, enum int_flags flags) {
+ // strtoll() skips leading spaces, but we want to reject them
+ if (xisspace(str[0])) {
+ goto bad;
+ }
+
+ int base = flags & IF_BASE_MASK;
+ if (base == 0) {
+ base = 10;
+ }
+
+ char *endptr;
+ errno = 0;
+ long long value = strtoll(str, &endptr, base);
+ if (errno != 0) {
+ if (errno == ERANGE) {
+ goto range;
+ } else {
+ goto bad;
+ }
+ }
+
+ // https://github.com/llvm/llvm-project/issues/64946
+ sanitize_init(&endptr);
+
+ if (endptr == str) {
+ goto bad;
+ }
+
+ if (!(flags & IF_PARTIAL_OK) && *endptr != '\0') {
+ goto bad;
+ }
+
+ if ((flags & IF_UNSIGNED) && value < 0) {
+ goto negative;
+ }
+
+ switch (flags & IF_SIZE_MASK) {
+ case IF_INT:
+ if (value < INT_MIN || value > INT_MAX) {
+ goto range;
+ }
+ *(int *)result = value;
+ break;
+
+ case IF_LONG:
+ if (value < LONG_MIN || value > LONG_MAX) {
+ goto range;
+ }
+ *(long *)result = value;
+ break;
+
+ case IF_LONG_LONG:
+ *(long long *)result = value;
+ break;
+
+ default:
+ bfs_bug("Invalid int size");
+ goto bad;
+ }
+
+ return endptr;
+
+bad:
+ if (!(flags & IF_QUIET)) {
+ parse_argv_error(parser, arg, 1, "${bld}%pq${rs} is not a valid integer.\n", str);
+ }
+ return NULL;
+
+negative:
+ if (!(flags & IF_QUIET)) {
+ parse_argv_error(parser, arg, 1, "Negative integer ${bld}%pq${rs} is not allowed here.\n", str);
+ }
+ return NULL;
+
+range:
+ if (!(flags & IF_QUIET)) {
+ parse_argv_error(parser, arg, 1, "${bld}%pq${rs} is too large an integer.\n", str);
+ }
+ return NULL;
+}
+
+/**
+ * Parse an integer and a comparison flag.
+ */
+static const char *parse_icmp(const struct bfs_parser *parser, struct bfs_expr *expr, enum int_flags flags) {
+ char **arg = &expr->argv[1];
+ const char *str = *arg;
+ switch (str[0]) {
+ case '-':
+ expr->int_cmp = BFS_INT_LESS;
+ ++str;
+ break;
+ case '+':
+ expr->int_cmp = BFS_INT_GREATER;
+ ++str;
+ break;
+ default:
+ expr->int_cmp = BFS_INT_EQUAL;
+ break;
+ }
+
+ return parse_int(parser, arg, str, &expr->num, flags | IF_LONG_LONG | IF_UNSIGNED);
+}
+
+/**
+ * Check if a string could be an integer comparison.
+ */
+static bool looks_like_icmp(const char *str) {
+ int i;
+
+ // One +/- for the comparison flag, one for the sign
+ for (i = 0; i < 2; ++i) {
+ if (str[i] != '-' && str[i] != '+') {
+ break;
+ }
+ }
+
+ return str[i] >= '0' && str[i] <= '9';
+}
+
+/**
+ * Parse a single flag.
+ */
+static struct bfs_expr *parse_flag(struct bfs_parser *parser, size_t argc) {
+ char **argv = parser_advance(parser, T_FLAG, argc);
+ return parse_new_expr(parser, eval_true, argc, argv);
+}
+
+/**
+ * Parse a flag that doesn't take a value.
+ */
+static struct bfs_expr *parse_nullary_flag(struct bfs_parser *parser) {
+ return parse_flag(parser, 1);
+}
+
+/**
+ * Parse a flag that takes a value.
+ */
+static struct bfs_expr *parse_unary_flag(struct bfs_parser *parser) {
+ const char *arg = parser->argv[0];
+ const char *value = parser->argv[1];
+ if (!value) {
+ parse_error(parser, "${cyn}%s${rs} needs a value.\n", arg);
+ return NULL;
+ }
+
+ return parse_flag(parser, 2);
+}
+
+/**
+ * Parse a single option.
+ */
+static struct bfs_expr *parse_option(struct bfs_parser *parser, size_t argc) {
+ char **argv = parser_advance(parser, T_OPTION, argc);
+ return parse_new_expr(parser, eval_true, argc, argv);
+}
+
+/**
+ * Parse an option that doesn't take a value.
+ */
+static struct bfs_expr *parse_nullary_option(struct bfs_parser *parser) {
+ return parse_option(parser, 1);
+}
+
+/**
+ * Parse an option that takes a value.
+ */
+static struct bfs_expr *parse_unary_option(struct bfs_parser *parser) {
+ const char *arg = parser->argv[0];
+ const char *value = parser->argv[1];
+ if (!value) {
+ parse_error(parser, "${blu}%s${rs} needs a value.\n", arg);
+ return NULL;
+ }
+
+ return parse_option(parser, 2);
+}
+
+/**
+ * Parse a single test.
+ */
+static struct bfs_expr *parse_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc) {
+ char **argv = parser_advance(parser, T_TEST, argc);
+ return parse_new_expr(parser, eval_fn, argc, argv);
+}
+
+/**
+ * Parse a test that doesn't take a value.
+ */
+static struct bfs_expr *parse_nullary_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn) {
+ return parse_test(parser, eval_fn, 1);
+}
+
+/**
+ * Parse a test that takes a value.
+ */
+static struct bfs_expr *parse_unary_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn) {
+ const char *arg = parser->argv[0];
+ const char *value = parser->argv[1];
+ if (!value) {
+ parse_error(parser, "${blu}%s${rs} needs a value.\n", arg);
+ return NULL;
+ }
+
+ return parse_test(parser, eval_fn, 2);
+}
+
+/**
+ * Parse a single action.
+ */
+static struct bfs_expr *parse_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc) {
+ char **argv = parser_advance(parser, T_ACTION, argc);
+
+ if (parser->excluding) {
+ parse_argv_error(parser, argv, argc, "This action is not supported within ${red}-exclude${rs}.\n");
+ return NULL;
+ }
+
+ if (eval_fn != eval_limit && eval_fn != eval_prune && eval_fn != eval_quit) {
+ parser->implicit_print = false;
+ }
+
+ return parse_new_expr(parser, eval_fn, argc, argv);
+}
+
+/**
+ * Parse an action that takes no arguments.
+ */
+static struct bfs_expr *parse_nullary_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn) {
+ return parse_action(parser, eval_fn, 1);
+}
+
+/**
+ * Parse an action that takes one argument.
+ */
+static struct bfs_expr *parse_unary_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn) {
+ const char *arg = parser->argv[0];
+ const char *value = parser->argv[1];
+ if (!value) {
+ parse_error(parser, "${blu}%s${rs} needs a value.\n", arg);
+ return NULL;
+ }
+
+ return parse_action(parser, eval_fn, 2);
+}
+
+/**
+ * Parse a test expression with integer data and a comparison flag.
+ */
+static struct bfs_expr *parse_test_icmp(struct bfs_parser *parser, bfs_eval_fn *eval_fn) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_fn);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (!parse_icmp(parser, expr, 0)) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Print usage information for -D.
+ */
+static void debug_help(CFILE *cfile) {
+ cfprintf(cfile, "Supported debug flags:\n\n");
+
+ cfprintf(cfile, " ${bld}help${rs}: This message.\n");
+ cfprintf(cfile, " ${bld}cost${rs}: Show cost estimates.\n");
+ cfprintf(cfile, " ${bld}exec${rs}: Print executed command details.\n");
+ cfprintf(cfile, " ${bld}opt${rs}: Print optimization details.\n");
+ cfprintf(cfile, " ${bld}rates${rs}: Print predicate success rates.\n");
+ cfprintf(cfile, " ${bld}search${rs}: Trace the filesystem traversal.\n");
+ cfprintf(cfile, " ${bld}stat${rs}: Trace all stat() calls.\n");
+ cfprintf(cfile, " ${bld}tree${rs}: Print the parse tree.\n");
+ cfprintf(cfile, " ${bld}all${rs}: All debug flags at once.\n");
+}
+
+/** Check if a substring matches a debug flag. */
+static bool parse_debug_flag(const char *flag, size_t len, const char *expected) {
+ if (len == strlen(expected)) {
+ return strncmp(flag, expected, len) == 0;
+ } else {
+ return false;
+ }
+}
+
+/**
+ * Parse -D FLAG.
+ */
+static struct bfs_expr *parse_debug(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_ctx *ctx = parser->ctx;
+
+ struct bfs_expr *expr = parse_unary_flag(parser);
+ if (!expr) {
+ cfprintf(ctx->cerr, "\n");
+ debug_help(ctx->cerr);
+ return NULL;
+ }
+
+ bool unrecognized = false;
+
+ for (const char *flag = expr->argv[1], *next; flag; flag = next) {
+ size_t len = strcspn(flag, ",");
+ if (flag[len]) {
+ next = flag + len + 1;
+ } else {
+ next = NULL;
+ }
+
+ if (parse_debug_flag(flag, len, "help")) {
+ debug_help(ctx->cout);
+ parser->just_info = true;
+ return NULL;
+ } else if (parse_debug_flag(flag, len, "all")) {
+ ctx->debug = DEBUG_ALL;
+ continue;
+ }
+
+ enum debug_flags i;
+ for (i = 1; DEBUG_ALL & i; i <<= 1) {
+ const char *name = debug_flag_name(i);
+ if (parse_debug_flag(flag, len, name)) {
+ break;
+ }
+ }
+
+ if (DEBUG_ALL & i) {
+ ctx->debug |= i;
+ } else {
+ if (parse_expr_warning(parser, expr, "Unrecognized debug flag ${bld}")) {
+ fwrite(flag, 1, len, stderr);
+ cfprintf(ctx->cerr, "${rs}.\n\n");
+ unrecognized = true;
+ }
+ }
+ }
+
+ if (unrecognized) {
+ debug_help(ctx->cerr);
+ cfprintf(ctx->cerr, "\n");
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -On.
+ */
+static struct bfs_expr *parse_optlevel(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_flag(parser);
+ if (!expr) {
+ return NULL;
+ }
+
+ int *optlevel = &parser->ctx->optlevel;
+
+ if (strcmp(expr->argv[0], "-Ofast") == 0) {
+ *optlevel = 4;
+ } else if (!parse_int(parser, expr->argv, expr->argv[0] + 2, optlevel, IF_INT | IF_UNSIGNED)) {
+ return NULL;
+ }
+
+ if (*optlevel > 4) {
+ parse_expr_warning(parser, expr, "${cyn}-O${bld}%s${rs} is the same as ${cyn}-O${bld}4${rs}.\n\n", expr->argv[0] + 2);
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -[PHL], -follow.
+ */
+static struct bfs_expr *parse_follow(struct bfs_parser *parser, int flags, int option) {
+ struct bfs_ctx *ctx = parser->ctx;
+ ctx->flags &= ~(BFTW_FOLLOW_ROOTS | BFTW_FOLLOW_ALL);
+ ctx->flags |= flags;
+ if (option) {
+ return parse_nullary_option(parser);
+ } else {
+ return parse_nullary_flag(parser);
+ }
+}
+
+/**
+ * Parse -X.
+ */
+static struct bfs_expr *parse_xargs_safe(struct bfs_parser *parser, int arg1, int arg2) {
+ parser->ctx->xargs_safe = true;
+ return parse_nullary_flag(parser);
+}
+
+/**
+ * Parse -executable, -readable, -writable
+ */
+static struct bfs_expr *parse_access(struct bfs_parser *parser, int flag, int arg2) {
+ struct bfs_expr *expr = parse_nullary_test(parser, eval_access);
+ if (expr) {
+ expr->num = flag;
+ }
+ return expr;
+}
+
+/**
+ * Parse -acl.
+ */
+static struct bfs_expr *parse_acl(struct bfs_parser *parser, int flag, int arg2) {
+#if BFS_CAN_CHECK_ACL
+ return parse_nullary_test(parser, eval_acl);
+#else
+ parse_error(parser, "Missing platform support.\n");
+ return NULL;
+#endif
+}
+
+/**
+ * Parse -[aBcm]?newer.
+ */
+static struct bfs_expr *parse_newer(struct bfs_parser *parser, int field, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_newer);
+ if (!expr) {
+ return NULL;
+ }
+
+ struct bfs_stat sb;
+ if (stat_arg(parser, &expr->argv[1], &sb) != 0) {
+ return NULL;
+ }
+
+ expr->reftime = sb.mtime;
+ expr->stat_field = field;
+ return expr;
+}
+
+/**
+ * Parse -[aBcm]min.
+ */
+static struct bfs_expr *parse_min(struct bfs_parser *parser, int field, int arg2) {
+ struct bfs_expr *expr = parse_test_icmp(parser, eval_time);
+ if (!expr) {
+ return NULL;
+ }
+
+ expr->reftime = parser->now;
+ expr->stat_field = field;
+ expr->time_unit = BFS_MINUTES;
+ return expr;
+}
+
+/**
+ * Parse -[aBcm]time.
+ */
+static struct bfs_expr *parse_time(struct bfs_parser *parser, int field, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_time);
+ if (!expr) {
+ return NULL;
+ }
+
+ expr->reftime = parser->now;
+ expr->stat_field = field;
+
+ const char *tail = parse_icmp(parser, expr, IF_PARTIAL_OK);
+ if (!tail) {
+ return NULL;
+ }
+
+ if (!*tail) {
+ expr->time_unit = BFS_DAYS;
+ return expr;
+ }
+
+ unsigned long long time = expr->num;
+ expr->num = 0;
+
+ while (true) {
+ switch (*tail) {
+ case 'w':
+ time *= 7;
+ fallthru;
+ case 'd':
+ time *= 24;
+ fallthru;
+ case 'h':
+ time *= 60;
+ fallthru;
+ case 'm':
+ time *= 60;
+ fallthru;
+ case 's':
+ break;
+ default:
+ parse_expr_error(parser, expr, "Unknown time unit ${bld}%c${rs}.\n", *tail);
+ return NULL;
+ }
+
+ expr->num += time;
+
+ if (!*++tail) {
+ break;
+ }
+
+ tail = parse_int(parser, &expr->argv[1], tail, &time, IF_PARTIAL_OK | IF_LONG_LONG | IF_UNSIGNED);
+ if (!tail) {
+ return NULL;
+ }
+ if (!*tail) {
+ parse_expr_error(parser, expr, "Missing time unit.\n");
+ return NULL;
+ }
+ }
+
+ expr->time_unit = BFS_SECONDS;
+ return expr;
+}
+
+/**
+ * Parse -capable.
+ */
+static struct bfs_expr *parse_capable(struct bfs_parser *parser, int flag, int arg2) {
+#if BFS_CAN_CHECK_CAPABILITIES
+ return parse_nullary_test(parser, eval_capable);
+#else
+ parse_error(parser, "Missing platform support.\n");
+ return NULL;
+#endif
+}
+
+/**
+ * Parse -(no)?color.
+ */
+static struct bfs_expr *parse_color(struct bfs_parser *parser, int color, int arg2) {
+ struct bfs_expr *expr = parse_nullary_option(parser);
+ if (!expr) {
+ return NULL;
+ }
+
+ struct bfs_ctx *ctx = parser->ctx;
+ struct colors *colors = ctx->colors;
+
+ if (color) {
+ if (!colors) {
+ parse_expr_error(parser, expr, "Error parsing $$LS_COLORS: %s.\n", xstrerror(ctx->colors_error));
+ return NULL;
+ }
+
+ parser->use_color = COLOR_ALWAYS;
+ ctx->cout->colors = colors;
+ ctx->cerr->colors = colors;
+ } else {
+ parser->use_color = COLOR_NEVER;
+ ctx->cout->colors = NULL;
+ ctx->cerr->colors = NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Common code for fnmatch() tests.
+ */
+static struct bfs_expr *parse_fnmatch(const struct bfs_parser *parser, struct bfs_expr *expr, bool casefold) {
+ if (!expr) {
+ return NULL;
+ }
+
+ expr->pattern = expr->argv[1];
+
+ if (casefold) {
+#ifdef FNM_CASEFOLD
+ expr->fnm_flags = FNM_CASEFOLD;
+#else
+ parse_expr_error(parser, expr, "Missing platform support.\n");
+ return NULL;
+#endif
+ } else {
+ expr->fnm_flags = 0;
+ }
+
+ // POSIX says, about fnmatch():
+ //
+ // If pattern ends with an unescaped <backslash>, fnmatch() shall
+ // return a non-zero value (indicating either no match or an error).
+ //
+ // But not all implementations obey this, so check for it ourselves.
+ size_t i, len = strlen(expr->pattern);
+ for (i = 0; i < len; ++i) {
+ if (expr->pattern[len - i - 1] != '\\') {
+ break;
+ }
+ }
+ if (i % 2 != 0) {
+ parse_expr_warning(parser, expr, "Unescaped trailing backslash.\n\n");
+ expr->eval_fn = eval_false;
+ return expr;
+ }
+
+ // strcmp() can be much faster than fnmatch() since it doesn't have to
+ // parse the pattern, so special-case patterns with no wildcards.
+ //
+ // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13_01
+ expr->literal = strcspn(expr->pattern, "?*\\[") == len;
+
+ return expr;
+}
+
+/**
+ * Parse -context.
+ */
+static struct bfs_expr *parse_context(struct bfs_parser *parser, int flag, int arg2) {
+#if BFS_CAN_CHECK_CONTEXT
+ struct bfs_expr *expr = parse_unary_test(parser, eval_context);
+ return parse_fnmatch(parser, expr, false);
+#else
+ parse_error(parser, "Missing platform support.\n");
+ return NULL;
+#endif
+}
+
+/**
+ * Parse -{false,true}.
+ */
+static struct bfs_expr *parse_const(struct bfs_parser *parser, int value, int arg2) {
+ return parse_nullary_test(parser, value ? eval_true : eval_false);
+}
+
+/**
+ * Parse -daystart.
+ */
+static struct bfs_expr *parse_daystart(struct bfs_parser *parser, int arg1, int arg2) {
+ struct tm tm;
+ if (!localtime_r(&parser->now.tv_sec, &tm)) {
+ parse_perror(parser, "localtime_r()");
+ return NULL;
+ }
+
+ if (tm.tm_hour || tm.tm_min || tm.tm_sec || parser->now.tv_nsec) {
+ ++tm.tm_mday;
+ }
+ tm.tm_hour = 0;
+ tm.tm_min = 0;
+ tm.tm_sec = 0;
+
+ time_t time;
+ if (xmktime(&tm, &time) != 0) {
+ parse_perror(parser, "xmktime()");
+ return NULL;
+ }
+
+ parser->now.tv_sec = time;
+ parser->now.tv_nsec = 0;
+
+ return parse_nullary_option(parser);
+}
+
+/**
+ * Parse -delete.
+ */
+static struct bfs_expr *parse_delete(struct bfs_parser *parser, int arg1, int arg2) {
+ parser->ctx->flags |= BFTW_POST_ORDER;
+ parser->depth_arg = parser->argv;
+ return parse_nullary_action(parser, eval_delete);
+}
+
+/**
+ * Parse -d.
+ */
+static struct bfs_expr *parse_depth(struct bfs_parser *parser, int arg1, int arg2) {
+ parser->ctx->flags |= BFTW_POST_ORDER;
+ parser->depth_arg = parser->argv;
+ return parse_nullary_flag(parser);
+}
+
+/**
+ * Parse -depth [N].
+ */
+static struct bfs_expr *parse_depth_n(struct bfs_parser *parser, int arg1, int arg2) {
+ const char *arg = parser->argv[1];
+ if (arg && looks_like_icmp(arg)) {
+ return parse_test_icmp(parser, eval_depth);
+ } else {
+ return parse_depth(parser, arg1, arg2);
+ }
+}
+
+/**
+ * Parse -{min,max}depth N.
+ */
+static struct bfs_expr *parse_depth_limit(struct bfs_parser *parser, int is_min, int arg2) {
+ struct bfs_expr *expr = parse_unary_option(parser);
+ if (!expr) {
+ return NULL;
+ }
+
+ struct bfs_ctx *ctx = parser->ctx;
+ int *depth = is_min ? &ctx->mindepth : &ctx->maxdepth;
+ char **arg = &expr->argv[1];
+ if (!parse_int(parser, arg, *arg, depth, IF_INT | IF_UNSIGNED)) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -empty.
+ */
+static struct bfs_expr *parse_empty(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_test(parser, eval_empty);
+ if (expr) {
+ // For opendir()
+ expr->ephemeral_fds = 1;
+ }
+ return expr;
+}
+
+/**
+ * Parse -exec(dir)?/-ok(dir)?.
+ */
+static struct bfs_expr *parse_exec(struct bfs_parser *parser, int flags, int arg2) {
+ struct bfs_exec *execbuf = bfs_exec_parse(parser->ctx, parser->argv, flags);
+ if (!execbuf) {
+ return NULL;
+ }
+
+ struct bfs_expr *expr = parse_action(parser, eval_exec, execbuf->tmpl_argc + 2);
+ if (!expr) {
+ bfs_exec_free(execbuf);
+ return NULL;
+ }
+
+ expr->exec = execbuf;
+
+ // For pipe() in bfs_spawn()
+ expr->ephemeral_fds = 2;
+
+ if (execbuf->flags & BFS_EXEC_CHDIR) {
+ // Check for relative paths in $PATH
+ const char *path = getenv("PATH");
+ while (path) {
+ if (*path != '/') {
+ size_t len = strcspn(path, ":");
+ char *comp = strndup(path, len);
+ if (comp) {
+ parse_expr_error(parser, expr,
+ "This action would be unsafe, since ${bld}$$PATH${rs} contains the relative path ${bld}%pq${rs}\n", comp);
+ free(comp);
+ } else {
+ parse_perror(parser, "strndup()");
+ }
+ return NULL;
+ }
+
+ path = strchr(path, ':');
+ if (path) {
+ ++path;
+ }
+ }
+
+ // To dup() the parent directory
+ if (execbuf->flags & BFS_EXEC_MULTI) {
+ ++expr->persistent_fds;
+ } else {
+ ++expr->ephemeral_fds;
+ }
+ }
+
+ if (execbuf->flags & BFS_EXEC_CONFIRM) {
+ parser->ok_expr = expr;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -exit [STATUS].
+ */
+static struct bfs_expr *parse_exit(struct bfs_parser *parser, int arg1, int arg2) {
+ size_t argc = 1;
+ const char *value = parser->argv[1];
+
+ int status = EXIT_SUCCESS;
+ if (value && parse_int(parser, NULL, value, &status, IF_INT | IF_UNSIGNED | IF_QUIET)) {
+ argc = 2;
+ }
+
+ struct bfs_expr *expr = parse_action(parser, eval_exit, argc);
+ if (expr) {
+ expr->num = status;
+ }
+ return expr;
+}
+
+/**
+ * Parse -f PATH.
+ */
+static struct bfs_expr *parse_f(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_flag(parser);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (parse_root(parser, expr->argv[1]) != 0) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -files0-from PATH.
+ */
+static struct bfs_expr *parse_files0_from(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_option(parser);
+ if (!expr) {
+ return NULL;
+ }
+
+ const char *from = expr->argv[1];
+
+ FILE *file;
+ if (strcmp(from, "-") == 0) {
+ file = stdin;
+ } else {
+ file = xfopen(from, O_RDONLY | O_CLOEXEC);
+ }
+ if (!file) {
+ parse_expr_error(parser, expr, "%m.\n");
+ return NULL;
+ }
+
+ while (true) {
+ char *path = xgetdelim(file, '\0');
+ if (!path) {
+ if (errno) {
+ goto fail;
+ } else {
+ break;
+ }
+ }
+
+ int ret = parse_root(parser, path);
+ free(path);
+ if (ret != 0) {
+ goto fail;
+ }
+ }
+
+ if (file == stdin) {
+ parser->files0_stdin_arg = expr->argv;
+ } else {
+ fclose(file);
+ }
+
+ parser->implicit_root = false;
+ return expr;
+
+fail:
+ if (file != stdin) {
+ fclose(file);
+ }
+ return NULL;
+}
+
+/**
+ * Parse -flags FLAGS.
+ */
+static struct bfs_expr *parse_flags(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_flags);
+ if (!expr) {
+ return NULL;
+ }
+
+ const char *flags = expr->argv[1];
+ switch (flags[0]) {
+ case '-':
+ expr->flags_cmp = BFS_MODE_ALL;
+ ++flags;
+ break;
+ case '+':
+ expr->flags_cmp = BFS_MODE_ANY;
+ ++flags;
+ break;
+ default:
+ expr->flags_cmp = BFS_MODE_EQUAL;
+ break;
+ }
+
+ if (xstrtofflags(&flags, &expr->set_flags, &expr->clear_flags) != 0) {
+ if (errno == ENOTSUP) {
+ parse_expr_error(parser, expr, "Missing platform support.\n");
+ } else {
+ parse_expr_error(parser, expr, "Invalid flags.\n");
+ }
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -fls FILE.
+ */
+static struct bfs_expr *parse_fls(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_action(parser, eval_fls);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (expr_open(parser, expr, expr->argv[1]) != 0) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -fprint FILE.
+ */
+static struct bfs_expr *parse_fprint(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_action(parser, eval_fprint);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (expr_open(parser, expr, expr->argv[1]) != 0) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -fprint0 FILE.
+ */
+static struct bfs_expr *parse_fprint0(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_action(parser, eval_fprint0);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (expr_open(parser, expr, expr->argv[1]) != 0) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -fprintf FILE FORMAT.
+ */
+static struct bfs_expr *parse_fprintf(struct bfs_parser *parser, int arg1, int arg2) {
+ const char *arg = parser->argv[0];
+
+ const char *file = parser->argv[1];
+ if (!file) {
+ parse_error(parser, "${blu}%s${rs} needs a file.\n", arg);
+ return NULL;
+ }
+
+ const char *format = parser->argv[2];
+ if (!format) {
+ parse_error(parser, "${blu}%s${rs} needs a format string.\n", arg);
+ return NULL;
+ }
+
+ struct bfs_expr *expr = parse_action(parser, eval_fprintf, 3);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (expr_open(parser, expr, file) != 0) {
+ return NULL;
+ }
+
+ if (bfs_printf_parse(parser->ctx, expr, format) != 0) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -fstype TYPE.
+ */
+static struct bfs_expr *parse_fstype(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_fstype);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (!bfs_ctx_mtab(parser->ctx)) {
+ parse_expr_error(parser, expr, "Couldn't parse the mount table: %m.\n");
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -gid/-group.
+ */
+static struct bfs_expr *parse_group(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_gid);
+ if (!expr) {
+ return NULL;
+ }
+
+ const struct group *grp = bfs_getgrnam(parser->ctx->groups, expr->argv[1]);
+ if (grp) {
+ expr->num = grp->gr_gid;
+ expr->int_cmp = BFS_INT_EQUAL;
+ } else if (looks_like_icmp(expr->argv[1])) {
+ if (!parse_icmp(parser, expr, 0)) {
+ return NULL;
+ }
+ } else if (errno) {
+ parse_expr_error(parser, expr, "%m.\n");
+ return NULL;
+ } else {
+ parse_expr_error(parser, expr, "No such group.\n");
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -unique.
+ */
+static struct bfs_expr *parse_unique(struct bfs_parser *parser, int arg1, int arg2) {
+ parser->ctx->unique = true;
+ return parse_nullary_option(parser);
+}
+
+/**
+ * Parse -used N.
+ */
+static struct bfs_expr *parse_used(struct bfs_parser *parser, int arg1, int arg2) {
+ return parse_test_icmp(parser, eval_used);
+}
+
+/**
+ * Parse -uid/-user.
+ */
+static struct bfs_expr *parse_user(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_uid);
+ if (!expr) {
+ return NULL;
+ }
+
+ const struct passwd *pwd = bfs_getpwnam(parser->ctx->users, expr->argv[1]);
+ if (pwd) {
+ expr->num = pwd->pw_uid;
+ expr->int_cmp = BFS_INT_EQUAL;
+ } else if (looks_like_icmp(expr->argv[1])) {
+ if (!parse_icmp(parser, expr, 0)) {
+ return NULL;
+ }
+ } else if (errno) {
+ parse_expr_error(parser, expr, "%m.\n");
+ return NULL;
+ } else {
+ parse_expr_error(parser, expr, "No such user.\n");
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -hidden.
+ */
+static struct bfs_expr *parse_hidden(struct bfs_parser *parser, int arg1, int arg2) {
+ return parse_nullary_test(parser, eval_hidden);
+}
+
+/**
+ * Parse -(no)?ignore_readdir_race.
+ */
+static struct bfs_expr *parse_ignore_races(struct bfs_parser *parser, int ignore, int arg2) {
+ parser->ctx->ignore_races = ignore;
+ return parse_nullary_option(parser);
+}
+
+/**
+ * Parse -inum N.
+ */
+static struct bfs_expr *parse_inum(struct bfs_parser *parser, int arg1, int arg2) {
+ return parse_test_icmp(parser, eval_inum);
+}
+
+/**
+ * Parse -j<n>.
+ */
+static struct bfs_expr *parse_jobs(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_flag(parser);
+ if (!expr) {
+ return NULL;
+ }
+
+ unsigned int n;
+ if (!parse_int(parser, expr->argv, expr->argv[0] + 2, &n, IF_INT | IF_UNSIGNED)) {
+ return NULL;
+ }
+
+ if (n == 0) {
+ parse_expr_error(parser, expr, "${bld}0${rs} is not enough threads.\n");
+ return NULL;
+ }
+
+ parser->ctx->threads = n;
+ return expr;
+}
+
+/**
+ * Parse -limit N.
+ */
+static struct bfs_expr *parse_limit(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_action(parser, eval_limit);
+ if (!expr) {
+ return NULL;
+ }
+
+ char **arg = &expr->argv[1];
+ if (!parse_int(parser, arg, *arg, &expr->num, IF_LONG_LONG)) {
+ return NULL;
+ }
+
+ if (expr->num <= 0) {
+ parse_expr_error(parser, expr, "The ${blu}%s${rs} must be at least ${bld}1${rs}.\n", expr->argv[0]);
+ return NULL;
+ }
+
+ parser->limit_arg = expr->argv;
+ return expr;
+}
+
+/**
+ * Parse -links N.
+ */
+static struct bfs_expr *parse_links(struct bfs_parser *parser, int arg1, int arg2) {
+ return parse_test_icmp(parser, eval_links);
+}
+
+/**
+ * Parse -ls.
+ */
+static struct bfs_expr *parse_ls(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_action(parser, eval_fls);
+ if (!expr) {
+ return NULL;
+ }
+
+ init_print_expr(parser, expr);
+ return expr;
+}
+
+/**
+ * Parse -mount.
+ */
+static struct bfs_expr *parse_mount(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_option(parser);
+ if (!expr) {
+ return NULL;
+ }
+
+ parse_expr_warning(parser, expr, "In the future, ${blu}%s${rs} will skip mount points entirely, unlike\n", expr->argv[0]);
+ bfs_warning(parser->ctx, "${blu}-xdev${rs}, due to http://austingroupbugs.net/view.php?id=1133.\n\n");
+
+ parser->ctx->flags |= BFTW_PRUNE_MOUNTS;
+ parser->mount_arg = expr->argv;
+ return expr;
+}
+
+/**
+ * Parse -i?name.
+ */
+static struct bfs_expr *parse_name(struct bfs_parser *parser, int casefold, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_name);
+ return parse_fnmatch(parser, expr, casefold);
+}
+
+/**
+ * Parse -i?path, -i?wholename.
+ */
+static struct bfs_expr *parse_path(struct bfs_parser *parser, int casefold, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_path);
+ return parse_fnmatch(parser, expr, casefold);
+}
+
+/**
+ * Parse -i?lname.
+ */
+static struct bfs_expr *parse_lname(struct bfs_parser *parser, int casefold, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_lname);
+ return parse_fnmatch(parser, expr, casefold);
+}
+
+/** Get the bfs_stat_field for X/Y in -newerXY. */
+static enum bfs_stat_field parse_newerxy_field(char c) {
+ switch (c) {
+ case 'a':
+ return BFS_STAT_ATIME;
+ case 'B':
+ return BFS_STAT_BTIME;
+ case 'c':
+ return BFS_STAT_CTIME;
+ case 'm':
+ return BFS_STAT_MTIME;
+ default:
+ return 0;
+ }
+}
+
+/** Parse an explicit reference timestamp for -newerXt and -*since. */
+static int parse_reftime(const struct bfs_parser *parser, struct bfs_expr *expr) {
+ if (xgetdate(expr->argv[1], &expr->reftime) == 0) {
+ return 0;
+ } else if (errno != EINVAL) {
+ parse_expr_error(parser, expr, "%m.\n");
+ return -1;
+ }
+
+ parse_expr_error(parser, expr, "Invalid timestamp.\n\n");
+ fprintf(stderr, "Supported timestamp formats are ISO 8601-like, e.g.\n\n");
+
+ struct tm tm;
+ if (!localtime_r(&parser->now.tv_sec, &tm)) {
+ parse_perror(parser, "localtime_r()");
+ return -1;
+ }
+
+ int year = tm.tm_year + 1900;
+ int month = tm.tm_mon + 1;
+ fprintf(stderr, " - %04d-%02d-%02d\n", year, month, tm.tm_mday);
+ fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02d\n", year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+#if BFS_HAS_TM_GMTOFF
+ int gmtoff = tm.tm_gmtoff;
+#else
+ int gmtoff = -timezone;
+#endif
+ int tz_hour = gmtoff / 3600;
+ int tz_min = (labs(gmtoff) / 60) % 60;
+ fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02d%+03d:%02d\n",
+ year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, tz_hour, tz_min);
+
+ if (!gmtime_r(&parser->now.tv_sec, &tm)) {
+ parse_perror(parser, "gmtime_r()");
+ return -1;
+ }
+
+ year = tm.tm_year + 1900;
+ month = tm.tm_mon + 1;
+ fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02dZ\n", year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+ return -1;
+}
+
+/**
+ * Parse -newerXY.
+ */
+static struct bfs_expr *parse_newerxy(struct bfs_parser *parser, int arg1, int arg2) {
+ const char *arg = parser->argv[0];
+ if (strlen(arg) != 8) {
+ parse_error(parser, "Expected ${blu}-newer${bld}XY${rs}; found ${blu}-newer${bld}%pq${rs}.\n", arg + 6);
+ return NULL;
+ }
+
+ struct bfs_expr *expr = parse_unary_test(parser, eval_newer);
+ if (!expr) {
+ return NULL;
+ }
+
+ expr->stat_field = parse_newerxy_field(arg[6]);
+ if (!expr->stat_field) {
+ parse_expr_error(parser, expr,
+ "For ${blu}-newer${bld}XY${rs}, ${bld}X${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, or ${bld}B${rs}, not ${err}%c${rs}.\n",
+ arg[6]);
+ return NULL;
+ }
+
+ if (arg[7] == 't') {
+ if (parse_reftime(parser, expr) != 0) {
+ return NULL;
+ }
+ } else {
+ enum bfs_stat_field field = parse_newerxy_field(arg[7]);
+ if (!field) {
+ parse_expr_error(parser, expr,
+ "For ${blu}-newer${bld}XY${rs}, ${bld}Y${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, ${bld}B${rs}, or ${bld}t${rs}, not ${err}%c${rs}.\n",
+ arg[7]);
+ return NULL;
+ }
+
+ struct bfs_stat sb;
+ if (stat_arg(parser, &expr->argv[1], &sb) != 0) {
+ return NULL;
+ }
+
+ const struct timespec *reftime = bfs_stat_time(&sb, field);
+ if (!reftime) {
+ parse_expr_error(parser, expr, "Couldn't get file %s.\n", bfs_stat_field_name(field));
+ return NULL;
+ }
+
+ expr->reftime = *reftime;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -nogroup.
+ */
+static struct bfs_expr *parse_nogroup(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_test(parser, eval_nogroup);
+ if (expr) {
+ // Who knows how many FDs getgrgid_r() needs?
+ expr->ephemeral_fds = 3;
+ }
+ return expr;
+}
+
+/**
+ * Parse -nohidden.
+ */
+static struct bfs_expr *parse_nohidden(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *hidden = parse_new_expr(parser, eval_hidden, 1, &fake_hidden_arg);
+ if (!hidden) {
+ return NULL;
+ }
+
+ bfs_expr_append(parser->ctx->exclude, hidden);
+ return parse_nullary_option(parser);
+}
+
+/**
+ * Parse -noleaf.
+ */
+static struct bfs_expr *parse_noleaf(struct bfs_parser *parser, int arg1, int arg2) {
+ parse_warning(parser, "${ex}%s${rs} does not apply the optimization that ${blu}%s${rs} inhibits.\n\n",
+ BFS_COMMAND, parser->argv[0]);
+ return parse_nullary_option(parser);
+}
+
+/**
+ * Parse -nouser.
+ */
+static struct bfs_expr *parse_nouser(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_test(parser, eval_nouser);
+ if (expr) {
+ // Who knows how many FDs getpwuid_r() needs?
+ expr->ephemeral_fds = 3;
+ }
+ return expr;
+}
+
+/**
+ * Parse a permission mode like chmod(1).
+ */
+static int parse_mode(const struct bfs_parser *parser, const char *mode, struct bfs_expr *expr) {
+ if (mode[0] >= '0' && mode[0] <= '9') {
+ unsigned int parsed;
+ if (!parse_int(parser, NULL, mode, &parsed, 8 | IF_INT | IF_UNSIGNED | IF_QUIET)) {
+ goto fail;
+ }
+ if (parsed > 07777) {
+ goto fail;
+ }
+
+ expr->file_mode = parsed;
+ expr->dir_mode = parsed;
+ return 0;
+ }
+
+ expr->file_mode = 0;
+ expr->dir_mode = 0;
+
+ // Parse the same grammar as chmod(1), which looks like this:
+ //
+ // MODE : CLAUSE ["," CLAUSE]*
+ //
+ // CLAUSE : WHO* ACTION+
+ //
+ // WHO : "u" | "g" | "o" | "a"
+ //
+ // ACTION : OP PERM*
+ // | OP PERMCOPY
+ //
+ // OP : "+" | "-" | "="
+ //
+ // PERM : "r" | "w" | "x" | "X" | "s" | "t"
+ //
+ // PERMCOPY : "u" | "g" | "o"
+
+ // Parser machine parser
+ enum {
+ MODE_CLAUSE,
+ MODE_WHO,
+ MODE_ACTION,
+ MODE_ACTION_APPLY,
+ MODE_OP,
+ MODE_PERM,
+ } mparser = MODE_CLAUSE;
+
+ enum {
+ MODE_PLUS,
+ MODE_MINUS,
+ MODE_EQUALS,
+ } op uninit(MODE_EQUALS);
+
+ mode_t who uninit(0);
+ mode_t file_change uninit(0);
+ mode_t dir_change uninit(0);
+
+ const char *i = mode;
+ while (true) {
+ switch (mparser) {
+ case MODE_CLAUSE:
+ who = 0;
+ mparser = MODE_WHO;
+ fallthru;
+
+ case MODE_WHO:
+ switch (*i) {
+ case 'u':
+ who |= 0700;
+ break;
+ case 'g':
+ who |= 0070;
+ break;
+ case 'o':
+ who |= 0007;
+ break;
+ case 'a':
+ who |= 0777;
+ break;
+ default:
+ mparser = MODE_ACTION;
+ continue;
+ }
+ break;
+
+ case MODE_ACTION_APPLY:
+ switch (op) {
+ case MODE_EQUALS:
+ expr->file_mode &= ~who;
+ expr->dir_mode &= ~who;
+ fallthru;
+ case MODE_PLUS:
+ expr->file_mode |= file_change;
+ expr->dir_mode |= dir_change;
+ break;
+ case MODE_MINUS:
+ expr->file_mode &= ~file_change;
+ expr->dir_mode &= ~dir_change;
+ break;
+ }
+ fallthru;
+
+ case MODE_ACTION:
+ if (who == 0) {
+ who = 0777;
+ }
+
+ switch (*i) {
+ case '+':
+ op = MODE_PLUS;
+ mparser = MODE_OP;
+ break;
+ case '-':
+ op = MODE_MINUS;
+ mparser = MODE_OP;
+ break;
+ case '=':
+ op = MODE_EQUALS;
+ mparser = MODE_OP;
+ break;
+
+ case ',':
+ if (mparser == MODE_ACTION_APPLY) {
+ mparser = MODE_CLAUSE;
+ } else {
+ goto fail;
+ }
+ break;
+
+ case '\0':
+ if (mparser == MODE_ACTION_APPLY) {
+ goto done;
+ } else {
+ goto fail;
+ }
+
+ default:
+ goto fail;
+ }
+ break;
+
+ case MODE_OP:
+ switch (*i) {
+ case 'u':
+ file_change = (expr->file_mode >> 6) & 07;
+ dir_change = (expr->dir_mode >> 6) & 07;
+ break;
+ case 'g':
+ file_change = (expr->file_mode >> 3) & 07;
+ dir_change = (expr->dir_mode >> 3) & 07;
+ break;
+ case 'o':
+ file_change = expr->file_mode & 07;
+ dir_change = expr->dir_mode & 07;
+ break;
+
+ default:
+ file_change = 0;
+ dir_change = 0;
+ mparser = MODE_PERM;
+ continue;
+ }
+
+ file_change |= (file_change << 6) | (file_change << 3);
+ file_change &= who;
+ dir_change |= (dir_change << 6) | (dir_change << 3);
+ dir_change &= who;
+ mparser = MODE_ACTION_APPLY;
+ break;
+
+ case MODE_PERM:
+ switch (*i) {
+ case 'r':
+ file_change |= who & 0444;
+ dir_change |= who & 0444;
+ break;
+ case 'w':
+ file_change |= who & 0222;
+ dir_change |= who & 0222;
+ break;
+ case 'x':
+ file_change |= who & 0111;
+ fallthru;
+ case 'X':
+ dir_change |= who & 0111;
+ break;
+ case 's':
+ if (who & 0700) {
+ file_change |= S_ISUID;
+ dir_change |= S_ISUID;
+ }
+ if (who & 0070) {
+ file_change |= S_ISGID;
+ dir_change |= S_ISGID;
+ }
+ break;
+ case 't':
+ if (who & 0007) {
+ file_change |= S_ISVTX;
+ dir_change |= S_ISVTX;
+ }
+ break;
+ default:
+ mparser = MODE_ACTION_APPLY;
+ continue;
+ }
+ break;
+ }
+
+ ++i;
+ }
+
+done:
+ return 0;
+
+fail:
+ parse_expr_error(parser, expr, "Invalid mode.\n");
+ return -1;
+}
+
+/**
+ * Parse -perm MODE.
+ */
+static struct bfs_expr *parse_perm(struct bfs_parser *parser, int field, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_perm);
+ if (!expr) {
+ return NULL;
+ }
+
+ const char *mode = expr->argv[1];
+ switch (mode[0]) {
+ case '-':
+ expr->mode_cmp = BFS_MODE_ALL;
+ ++mode;
+ break;
+ case '/':
+ expr->mode_cmp = BFS_MODE_ANY;
+ ++mode;
+ break;
+ case '+':
+ if (mode[1] >= '0' && mode[1] <= '9') {
+ expr->mode_cmp = BFS_MODE_ANY;
+ ++mode;
+ break;
+ }
+ fallthru;
+ default:
+ expr->mode_cmp = BFS_MODE_EQUAL;
+ break;
+ }
+
+ if (parse_mode(parser, mode, expr) != 0) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -print.
+ */
+static struct bfs_expr *parse_print(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_action(parser, eval_fprint);
+ if (expr) {
+ init_print_expr(parser, expr);
+ }
+ return expr;
+}
+
+/**
+ * Parse -print0.
+ */
+static struct bfs_expr *parse_print0(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_action(parser, eval_fprint0);
+ if (expr) {
+ init_print_expr(parser, expr);
+ }
+ return expr;
+}
+
+/**
+ * Parse -printf FORMAT.
+ */
+static struct bfs_expr *parse_printf(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_action(parser, eval_fprintf);
+ if (!expr) {
+ return NULL;
+ }
+
+ init_print_expr(parser, expr);
+
+ if (bfs_printf_parse(parser->ctx, expr, expr->argv[1]) != 0) {
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -printx.
+ */
+static struct bfs_expr *parse_printx(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_nullary_action(parser, eval_fprintx);
+ if (expr) {
+ init_print_expr(parser, expr);
+ }
+ return expr;
+}
+
+/**
+ * Parse -prune.
+ */
+static struct bfs_expr *parse_prune(struct bfs_parser *parser, int arg1, int arg2) {
+ parser->prune_arg = parser->argv;
+ return parse_nullary_action(parser, eval_prune);
+}
+
+/**
+ * Parse -quit.
+ */
+static struct bfs_expr *parse_quit(struct bfs_parser *parser, int arg1, int arg2) {
+ return parse_nullary_action(parser, eval_quit);
+}
+
+/**
+ * Parse -i?regex.
+ */
+static struct bfs_expr *parse_regex(struct bfs_parser *parser, int flags, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_regex);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (bfs_regcomp(&expr->regex, expr->argv[1], parser->regex_type, flags) != 0) {
+ if (expr->regex) {
+ char *str = bfs_regerror(expr->regex);
+ if (str) {
+ parse_expr_error(parser, expr, "%s.\n", str);
+ free(str);
+ } else {
+ parse_perror(parser, "bfs_regerror()");
+ }
+ } else {
+ parse_perror(parser, "bfs_regcomp()");
+ }
+
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -E.
+ */
+static struct bfs_expr *parse_regex_extended(struct bfs_parser *parser, int arg1, int arg2) {
+ parser->regex_type = BFS_REGEX_POSIX_EXTENDED;
+ return parse_nullary_flag(parser);
+}
+
+/**
+ * Parse -regextype TYPE.
+ */
+static struct bfs_expr *parse_regextype(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_ctx *ctx = parser->ctx;
+ CFILE *cfile = ctx->cerr;
+
+ struct bfs_expr *expr = parse_unary_option(parser);
+ if (!expr) {
+ cfprintf(cfile, "\n");
+ goto list_types;
+ }
+
+ // See https://www.gnu.org/software/gnulib/manual/html_node/Predefined-Syntaxes.html
+ const char *type = expr->argv[1];
+ if (strcmp(type, "posix-basic") == 0
+ || strcmp(type, "ed") == 0
+ || strcmp(type, "sed") == 0) {
+ parser->regex_type = BFS_REGEX_POSIX_BASIC;
+ } else if (strcmp(type, "posix-extended") == 0) {
+ parser->regex_type = BFS_REGEX_POSIX_EXTENDED;
+#if BFS_USE_ONIGURUMA
+ } else if (strcmp(type, "emacs") == 0) {
+ parser->regex_type = BFS_REGEX_EMACS;
+ } else if (strcmp(type, "grep") == 0) {
+ parser->regex_type = BFS_REGEX_GREP;
+#endif
+ } else if (strcmp(type, "help") == 0) {
+ parser->just_info = true;
+ cfile = ctx->cout;
+ goto list_types;
+ } else {
+ parse_expr_error(parser, expr, "Unsupported regex type.\n\n");
+ goto list_types;
+ }
+
+ return expr;
+
+list_types:
+ cfprintf(cfile, "Supported types are:\n\n");
+ cfprintf(cfile, " ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n");
+ cfprintf(cfile, " ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n");
+ cfprintf(cfile, " ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n");
+#if BFS_USE_ONIGURUMA
+ cfprintf(cfile, " ${bld}emacs${rs}: Like ${grn}emacs${rs}\n");
+ cfprintf(cfile, " ${bld}grep${rs}: Like ${grn}grep${rs}\n");
+#endif
+ cfprintf(cfile, " ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n");
+ return NULL;
+}
+
+/**
+ * Parse -s.
+ */
+static struct bfs_expr *parse_s(struct bfs_parser *parser, int arg1, int arg2) {
+ parser->ctx->flags |= BFTW_SORT;
+ return parse_nullary_flag(parser);
+}
+
+/**
+ * Parse -samefile FILE.
+ */
+static struct bfs_expr *parse_samefile(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_samefile);
+ if (!expr) {
+ return NULL;
+ }
+
+ struct bfs_stat sb;
+ if (stat_arg(parser, &expr->argv[1], &sb) != 0) {
+ return NULL;
+ }
+
+ expr->dev = sb.dev;
+ expr->ino = sb.ino;
+ return expr;
+}
+
+/**
+ * Parse -S STRATEGY.
+ */
+static struct bfs_expr *parse_search_strategy(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_ctx *ctx = parser->ctx;
+ CFILE *cfile = ctx->cerr;
+
+ struct bfs_expr *expr = parse_unary_flag(parser);
+ if (!expr) {
+ cfprintf(cfile, "\n");
+ goto list_strategies;
+ }
+
+ const char *arg = expr->argv[1];
+ if (strcmp(arg, "bfs") == 0) {
+ ctx->strategy = BFTW_BFS;
+ } else if (strcmp(arg, "dfs") == 0) {
+ ctx->strategy = BFTW_DFS;
+ } else if (strcmp(arg, "ids") == 0) {
+ ctx->strategy = BFTW_IDS;
+ } else if (strcmp(arg, "eds") == 0) {
+ ctx->strategy = BFTW_EDS;
+ } else if (strcmp(arg, "help") == 0) {
+ parser->just_info = true;
+ cfile = ctx->cout;
+ goto list_strategies;
+ } else {
+ parse_expr_error(parser, expr, "Unrecognized search strategy.\n\n");
+ goto list_strategies;
+ }
+
+ return expr;
+
+list_strategies:
+ cfprintf(cfile, "Supported search strategies:\n\n");
+ cfprintf(cfile, " ${bld}bfs${rs}: breadth-first search\n");
+ cfprintf(cfile, " ${bld}dfs${rs}: depth-first search\n");
+ cfprintf(cfile, " ${bld}ids${rs}: iterative deepening search\n");
+ cfprintf(cfile, " ${bld}eds${rs}: exponential deepening search\n");
+ return NULL;
+}
+
+/**
+ * Parse -[aBcm]?since.
+ */
+static struct bfs_expr *parse_since(struct bfs_parser *parser, int field, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_newer);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (parse_reftime(parser, expr) != 0) {
+ return NULL;
+ }
+
+ expr->stat_field = field;
+ return expr;
+}
+
+/**
+ * Parse -size N[cwbkMGTP]?.
+ */
+static struct bfs_expr *parse_size(struct bfs_parser *parser, int arg1, int arg2) {
+ struct bfs_expr *expr = parse_unary_test(parser, eval_size);
+ if (!expr) {
+ return NULL;
+ }
+
+ const char *unit = parse_icmp(parser, expr, IF_PARTIAL_OK);
+ if (!unit) {
+ return NULL;
+ }
+
+ if (strlen(unit) > 1) {
+ goto bad_unit;
+ }
+
+ switch (*unit) {
+ case '\0':
+ case 'b':
+ expr->size_unit = BFS_BLOCKS;
+ break;
+ case 'c':
+ expr->size_unit = BFS_BYTES;
+ break;
+ case 'w':
+ expr->size_unit = BFS_WORDS;
+ break;
+ case 'k':
+ expr->size_unit = BFS_KB;
+ break;
+ case 'M':
+ expr->size_unit = BFS_MB;
+ break;
+ case 'G':
+ expr->size_unit = BFS_GB;
+ break;
+ case 'T':
+ expr->size_unit = BFS_TB;
+ break;
+ case 'P':
+ expr->size_unit = BFS_PB;
+ break;
+
+ default:
+ bad_unit:
+ parse_expr_error(parser, expr, "Expected a size unit (one of ${bld}cwbkMGTP${rs}); found ${err}%pq${rs}.\n", unit);
+ return NULL;
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -sparse.
+ */
+static struct bfs_expr *parse_sparse(struct bfs_parser *parser, int arg1, int arg2) {
+ return parse_nullary_test(parser, eval_sparse);
+}
+
+/**
+ * Parse -status.
+ */
+static struct bfs_expr *parse_status(struct bfs_parser *parser, int arg1, int arg2) {
+ parser->ctx->status = true;
+ return parse_nullary_option(parser);
+}
+
+/**
+ * Parse -x?type [bcdpflsD].
+ */
+static struct bfs_expr *parse_type(struct bfs_parser *parser, int x, int arg2) {
+ struct bfs_ctx *ctx = parser->ctx;
+
+ bfs_eval_fn *eval = x ? eval_xtype : eval_type;
+ struct bfs_expr *expr = parse_unary_test(parser, eval);
+ if (!expr) {
+ return NULL;
+ }
+
+ expr->num = 0;
+
+ const char *c = expr->argv[1];
+ while (true) {
+ switch (*c) {
+ case 'b':
+ expr->num |= 1 << BFS_BLK;
+ break;
+ case 'c':
+ expr->num |= 1 << BFS_CHR;
+ break;
+ case 'd':
+ expr->num |= 1 << BFS_DIR;
+ break;
+ case 'D':
+ expr->num |= 1 << BFS_DOOR;
+ break;
+ case 'p':
+ expr->num |= 1 << BFS_FIFO;
+ break;
+ case 'f':
+ expr->num |= 1 << BFS_REG;
+ break;
+ case 'l':
+ expr->num |= 1 << BFS_LNK;
+ break;
+ case 's':
+ expr->num |= 1 << BFS_SOCK;
+ break;
+ case 'w':
+ expr->num |= 1 << BFS_WHT;
+ ctx->flags |= BFTW_WHITEOUTS;
+ break;
+
+ case '\0':
+ parse_expr_error(parser, expr, "Expected a type flag.\n");
+ return NULL;
+
+ default:
+ parse_expr_error(parser, expr, "Unknown type flag ${err}%c${rs}; expected one of [${bld}bcdpflsD${rs}].\n", *c);
+ return NULL;
+ }
+
+ ++c;
+ if (*c == '\0') {
+ break;
+ } else if (*c == ',') {
+ ++c;
+ continue;
+ } else {
+ parse_expr_error(parser, expr, "Types must be comma-separated.\n");
+ return NULL;
+ }
+ }
+
+ return expr;
+}
+
+/**
+ * Parse -(no)?warn.
+ */
+static struct bfs_expr *parse_warn(struct bfs_parser *parser, int warn, int arg2) {
+ parser->ctx->warn = warn;
+ return parse_nullary_option(parser);
+}
+
+/**
+ * Parse -xattr.
+ */
+static struct bfs_expr *parse_xattr(struct bfs_parser *parser, int arg1, int arg2) {
+#if BFS_CAN_CHECK_XATTRS
+ return parse_nullary_test(parser, eval_xattr);
+#else
+ parse_error(parser, "Missing platform support.\n");
+ return NULL;
+#endif
+}
+
+/**
+ * Parse -xattrname.
+ */
+static struct bfs_expr *parse_xattrname(struct bfs_parser *parser, int arg1, int arg2) {
+#if BFS_CAN_CHECK_XATTRS
+ return parse_unary_test(parser, eval_xattrname);
+#else
+ parse_error(parser, "Missing platform support.\n");
+ return NULL;
+#endif
+}
+
+/**
+ * Parse -xdev.
+ */
+static struct bfs_expr *parse_xdev(struct bfs_parser *parser, int arg1, int arg2) {
+ parser->ctx->flags |= BFTW_PRUNE_MOUNTS;
+ parser->xdev_arg = parser->argv;
+ return parse_nullary_option(parser);
+}
+
+/**
+ * Launch a pager for the help output.
+ */
+static CFILE *launch_pager(pid_t *pid, CFILE *cout) {
+ char *pager = getenv("PAGER");
+
+ char *exe;
+ if (pager && pager[0]) {
+ exe = bfs_spawn_resolve(pager);
+ } else {
+ exe = bfs_spawn_resolve("less");
+ if (!exe) {
+ exe = bfs_spawn_resolve("more");
+ }
+ }
+ if (!exe) {
+ goto fail;
+ }
+
+ int pipefd[2];
+ if (pipe(pipefd) != 0) {
+ goto fail_exe;
+ }
+
+ FILE *file = fdopen(pipefd[1], "w");
+ if (!file) {
+ goto fail_pipe;
+ }
+ pipefd[1] = -1;
+
+ CFILE *ret = cfwrap(file, NULL, true);
+ if (!ret) {
+ goto fail_file;
+ }
+ file = NULL;
+
+ struct bfs_spawn ctx;
+ if (bfs_spawn_init(&ctx) != 0) {
+ goto fail_ret;
+ }
+
+ if (bfs_spawn_addclose(&ctx, fileno(ret->file)) != 0) {
+ goto fail_ctx;
+ }
+ if (bfs_spawn_adddup2(&ctx, pipefd[0], STDIN_FILENO) != 0) {
+ goto fail_ctx;
+ }
+ if (bfs_spawn_addclose(&ctx, pipefd[0]) != 0) {
+ goto fail_ctx;
+ }
+
+ char *argv[] = {
+ exe,
+ NULL,
+ NULL,
+ };
+
+ const char *cmd = exe + xbaseoff(exe);
+ if (strcmp(cmd, "less") == 0) {
+ // We know less supports colors, other pagers may not
+ ret->colors = cout->colors;
+ argv[1] = "-FKRX";
+ }
+
+ *pid = bfs_spawn(exe, &ctx, argv, NULL);
+ if (*pid < 0) {
+ goto fail_ctx;
+ }
+
+ xclose(pipefd[0]);
+ bfs_spawn_destroy(&ctx);
+ free(exe);
+ return ret;
+
+fail_ctx:
+ bfs_spawn_destroy(&ctx);
+fail_ret:
+ cfclose(ret);
+fail_file:
+ if (file) {
+ fclose(file);
+ }
+fail_pipe:
+ if (pipefd[1] >= 0) {
+ xclose(pipefd[1]);
+ }
+ if (pipefd[0] >= 0) {
+ xclose(pipefd[0]);
+ }
+fail_exe:
+ free(exe);
+fail:
+ return cout;
+}
+
+/**
+ * "Parse" -help.
+ */
+static struct bfs_expr *parse_help(struct bfs_parser *parser, int arg1, int arg2) {
+ CFILE *cout = parser->ctx->cout;
+
+ pid_t pager = -1;
+ if (parser->stdout_tty) {
+ cout = launch_pager(&pager, cout);
+ }
+
+ cfprintf(cout, "Usage: ${ex}%s${rs} [${cyn}flags${rs}...] [${mag}paths${rs}...] [${blu}expression${rs}...]\n\n",
+ parser->command);
+
+ cfprintf(cout, "${ex}%s${rs} is compatible with ${ex}find${rs}, with some extensions. "
+ "${cyn}Flags${rs} (${cyn}-H${rs}/${cyn}-L${rs}/${cyn}-P${rs} etc.), ${mag}paths${rs},\n"
+ "and ${blu}expressions${rs} may be freely mixed in any order.\n\n",
+ BFS_COMMAND);
+
+ cfprintf(cout, "${bld}Flags:${rs}\n\n");
+
+ cfprintf(cout, " ${cyn}-H${rs}\n");
+ cfprintf(cout, " Follow symbolic links on the command line, but not while searching\n");
+ cfprintf(cout, " ${cyn}-L${rs}\n");
+ cfprintf(cout, " Follow all symbolic links\n");
+ cfprintf(cout, " ${cyn}-P${rs}\n");
+ cfprintf(cout, " Never follow symbolic links (the default)\n");
+
+ cfprintf(cout, " ${cyn}-E${rs}\n");
+ cfprintf(cout, " Use extended regular expressions (same as ${blu}-regextype${rs} ${bld}posix-extended${rs})\n");
+ cfprintf(cout, " ${cyn}-X${rs}\n");
+ cfprintf(cout, " Filter out files with non-${ex}xargs${rs}-safe names\n");
+ cfprintf(cout, " ${cyn}-d${rs}\n");
+ cfprintf(cout, " Search in post-order (same as ${blu}-depth${rs})\n");
+ cfprintf(cout, " ${cyn}-s${rs}\n");
+ cfprintf(cout, " Visit directory entries in sorted order\n");
+ cfprintf(cout, " ${cyn}-x${rs}\n");
+ cfprintf(cout, " Don't descend into other mount points (same as ${blu}-xdev${rs})\n");
+
+ cfprintf(cout, " ${cyn}-f${rs} ${mag}PATH${rs}\n");
+ cfprintf(cout, " Treat ${mag}PATH${rs} as a path to search (useful if begins with a dash)\n");
+ cfprintf(cout, " ${cyn}-D${rs} ${bld}FLAG${rs}\n");
+ cfprintf(cout, " Turn on a debugging flag (see ${cyn}-D${rs} ${bld}help${rs})\n");
+ cfprintf(cout, " ${cyn}-O${bld}N${rs}\n");
+ cfprintf(cout, " Enable optimization level ${bld}N${rs} (default: ${bld}3${rs})\n");
+ cfprintf(cout, " ${cyn}-S${rs} ${bld}bfs${rs}|${bld}dfs${rs}|${bld}ids${rs}|${bld}eds${rs}\n");
+ cfprintf(cout, " Use ${bld}b${rs}readth-${bld}f${rs}irst/${bld}d${rs}epth-${bld}f${rs}irst/${bld}i${rs}terative/${bld}e${rs}xponential ${bld}d${rs}eepening ${bld}s${rs}earch\n");
+ cfprintf(cout, " (default: ${cyn}-S${rs} ${bld}bfs${rs})\n");
+ cfprintf(cout, " ${cyn}-j${bld}N${rs}\n");
+ cfprintf(cout, " Search with ${bld}N${rs} threads in parallel (default: number of CPUs, up to ${bld}8${rs})\n\n");
+
+ cfprintf(cout, "${bld}Operators:${rs}\n\n");
+
+ cfprintf(cout, " ${red}(${rs} ${blu}expression${rs} ${red})${rs}\n\n");
+
+ cfprintf(cout, " ${red}!${rs} ${blu}expression${rs}\n");
+ cfprintf(cout, " ${red}-not${rs} ${blu}expression${rs}\n\n");
+
+ cfprintf(cout, " ${blu}expression${rs} ${blu}expression${rs}\n");
+ cfprintf(cout, " ${blu}expression${rs} ${red}-a${rs} ${blu}expression${rs}\n");
+ cfprintf(cout, " ${blu}expression${rs} ${red}-and${rs} ${blu}expression${rs}\n\n");
+
+ cfprintf(cout, " ${blu}expression${rs} ${red}-o${rs} ${blu}expression${rs}\n");
+ cfprintf(cout, " ${blu}expression${rs} ${red}-or${rs} ${blu}expression${rs}\n\n");
+
+ cfprintf(cout, " ${blu}expression${rs} ${red},${rs} ${blu}expression${rs}\n\n");
+
+ cfprintf(cout, "${bld}Special forms:${rs}\n\n");
+
+ cfprintf(cout, " ${red}-exclude${rs} ${blu}expression${rs}\n");
+ cfprintf(cout, " Exclude all paths matching the ${blu}expression${rs} from the search.\n\n");
+
+ cfprintf(cout, "${bld}Options:${rs}\n\n");
+
+ cfprintf(cout, " ${blu}-color${rs}\n");
+ cfprintf(cout, " ${blu}-nocolor${rs}\n");
+ cfprintf(cout, " Turn colors on or off (default: ${blu}-color${rs} if outputting to a terminal,\n");
+ cfprintf(cout, " ${blu}-nocolor${rs} otherwise)\n");
+ cfprintf(cout, " ${blu}-daystart${rs}\n");
+ cfprintf(cout, " Measure times relative to the start of today\n");
+ cfprintf(cout, " ${blu}-depth${rs}\n");
+ cfprintf(cout, " Search in post-order (descendents first)\n");
+ cfprintf(cout, " ${blu}-files0-from${rs} ${bld}FILE${rs}\n");
+ cfprintf(cout, " Search the NUL ('\\0')-separated paths from ${bld}FILE${rs} (${bld}-${rs} for standard input).\n");
+ cfprintf(cout, " ${blu}-follow${rs}\n");
+ cfprintf(cout, " Follow all symbolic links (same as ${cyn}-L${rs})\n");
+ cfprintf(cout, " ${blu}-ignore_readdir_race${rs}\n");
+ cfprintf(cout, " ${blu}-noignore_readdir_race${rs}\n");
+ cfprintf(cout, " Whether to report an error if ${ex}%s${rs} detects that the file tree is modified\n",
+ BFS_COMMAND);
+ cfprintf(cout, " during the search (default: ${blu}-noignore_readdir_race${rs})\n");
+ cfprintf(cout, " ${blu}-maxdepth${rs} ${bld}N${rs}\n");
+ cfprintf(cout, " ${blu}-mindepth${rs} ${bld}N${rs}\n");
+ cfprintf(cout, " Ignore files deeper/shallower than ${bld}N${rs}\n");
+ cfprintf(cout, " ${blu}-mount${rs}\n");
+ cfprintf(cout, " Don't descend into other mount points (same as ${blu}-xdev${rs} for now, but will\n");
+ cfprintf(cout, " skip mount points entirely in the future)\n");
+ cfprintf(cout, " ${blu}-nohidden${rs}\n");
+ cfprintf(cout, " Exclude hidden files\n");
+ cfprintf(cout, " ${blu}-noleaf${rs}\n");
+ cfprintf(cout, " Ignored; for compatibility with GNU find\n");
+ cfprintf(cout, " ${blu}-regextype${rs} ${bld}TYPE${rs}\n");
+ cfprintf(cout, " Use ${bld}TYPE${rs}-flavored regexes (default: ${bld}posix-basic${rs}; see ${blu}-regextype${rs} ${bld}help${rs})\n");
+ cfprintf(cout, " ${blu}-status${rs}\n");
+ cfprintf(cout, " Display a status bar while searching\n");
+ cfprintf(cout, " ${blu}-unique${rs}\n");
+ cfprintf(cout, " Skip any files that have already been seen\n");
+ cfprintf(cout, " ${blu}-warn${rs}\n");
+ cfprintf(cout, " ${blu}-nowarn${rs}\n");
+ cfprintf(cout, " Turn on or off warnings about the command line\n");
+ cfprintf(cout, " ${blu}-xdev${rs}\n");
+ cfprintf(cout, " Don't descend into other mount points\n\n");
+
+ cfprintf(cout, "${bld}Tests:${rs}\n\n");
+
+#if BFS_CAN_CHECK_ACL
+ cfprintf(cout, " ${blu}-acl${rs}\n");
+ cfprintf(cout, " Find files with a non-trivial Access Control List\n");
+#endif
+ cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}min${rs} ${bld}[-+]N${rs}\n");
+ cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified ${bld}N${rs} minutes ago\n");
+ cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}newer${rs} ${bld}FILE${rs}\n");
+ cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified more recently than ${bld}FILE${rs} was\n"
+ " modified\n");
+ cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}since${rs} ${bld}TIME${rs}\n");
+ cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified more recently than ${bld}TIME${rs}\n");
+ cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}time${rs} ${bld}[-+]N${rs}\n");
+ cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified ${bld}N${rs} days ago\n");
+#if BFS_CAN_CHECK_CAPABILITIES
+ cfprintf(cout, " ${blu}-capable${rs}\n");
+ cfprintf(cout, " Find files with POSIX.1e capabilities set\n");
+#endif
+#if BFS_CAN_CHECK_CONTEXT
+ cfprintf(cout, " ${blu}-context${rs} ${bld}GLOB${rs}\n");
+ cfprintf(cout, " Find files with SELinux context matching a glob pattern\n");
+#endif
+ cfprintf(cout, " ${blu}-depth${rs} ${bld}[-+]N${rs}\n");
+ cfprintf(cout, " Find files with depth ${bld}N${rs}\n");
+ cfprintf(cout, " ${blu}-empty${rs}\n");
+ cfprintf(cout, " Find empty files/directories\n");
+ cfprintf(cout, " ${blu}-executable${rs}\n");
+ cfprintf(cout, " ${blu}-readable${rs}\n");
+ cfprintf(cout, " ${blu}-writable${rs}\n");
+ cfprintf(cout, " Find files the current user can execute/read/write\n");
+ cfprintf(cout, " ${blu}-false${rs}\n");
+ cfprintf(cout, " ${blu}-true${rs}\n");
+ cfprintf(cout, " Always false/true\n");
+ cfprintf(cout, " ${blu}-fstype${rs} ${bld}TYPE${rs}\n");
+ cfprintf(cout, " Find files on file systems with the given ${bld}TYPE${rs}\n");
+ cfprintf(cout, " ${blu}-gid${rs} ${bld}[-+]N${rs}\n");
+ cfprintf(cout, " ${blu}-uid${rs} ${bld}[-+]N${rs}\n");
+ cfprintf(cout, " Find files owned by group/user ID ${bld}N${rs}\n");
+ cfprintf(cout, " ${blu}-group${rs} ${bld}NAME${rs}\n");
+ cfprintf(cout, " ${blu}-user${rs} ${bld}NAME${rs}\n");
+ cfprintf(cout, " Find files owned by the group/user ${bld}NAME${rs}\n");
+ cfprintf(cout, " ${blu}-hidden${rs}\n");
+ cfprintf(cout, " Find hidden files\n");
+#ifdef FNM_CASEFOLD
+ cfprintf(cout, " ${blu}-ilname${rs} ${bld}GLOB${rs}\n");
+ cfprintf(cout, " ${blu}-iname${rs} ${bld}GLOB${rs}\n");
+ cfprintf(cout, " ${blu}-ipath${rs} ${bld}GLOB${rs}\n");
+ cfprintf(cout, " ${blu}-iregex${rs} ${bld}REGEX${rs}\n");
+ cfprintf(cout, " ${blu}-iwholename${rs} ${bld}GLOB${rs}\n");
+ cfprintf(cout, " Case-insensitive versions of ${blu}-lname${rs}/${blu}-name${rs}/${blu}-path${rs}"
+ "/${blu}-regex${rs}/${blu}-wholename${rs}\n");
+#endif
+ cfprintf(cout, " ${blu}-inum${rs} ${bld}[-+]N${rs}\n");
+ cfprintf(cout, " Find files with inode number ${bld}N${rs}\n");
+ cfprintf(cout, " ${blu}-links${rs} ${bld}[-+]N${rs}\n");
+ cfprintf(cout, " Find files with ${bld}N${rs} hard links\n");
+ cfprintf(cout, " ${blu}-lname${rs} ${bld}GLOB${rs}\n");
+ cfprintf(cout, " Find symbolic links whose target matches the ${bld}GLOB${rs}\n");
+ cfprintf(cout, " ${blu}-name${rs} ${bld}GLOB${rs}\n");
+ cfprintf(cout, " Find files whose name matches the ${bld}GLOB${rs}\n");
+ cfprintf(cout, " ${blu}-newer${rs} ${bld}FILE${rs}\n");
+ cfprintf(cout, " Find files newer than ${bld}FILE${rs}\n");
+ cfprintf(cout, " ${blu}-newer${bld}XY${rs} ${bld}REFERENCE${rs}\n");
+ cfprintf(cout, " Find files whose ${bld}X${rs} time is newer than the ${bld}Y${rs} time of"
+ " ${bld}REFERENCE${rs}. ${bld}X${rs} and ${bld}Y${rs}\n");
+ cfprintf(cout, " can be any of [${bld}aBcm${rs}]. ${bld}Y${rs} may also be ${bld}t${rs} to parse ${bld}REFERENCE${rs} an explicit\n");
+ cfprintf(cout, " timestamp.\n");
+ cfprintf(cout, " ${blu}-nogroup${rs}\n");
+ cfprintf(cout, " ${blu}-nouser${rs}\n");
+ cfprintf(cout, " Find files owned by nonexistent groups/users\n");
+ cfprintf(cout, " ${blu}-path${rs} ${bld}GLOB${rs}\n");
+ cfprintf(cout, " ${blu}-wholename${rs} ${bld}GLOB${rs}\n");
+ cfprintf(cout, " Find files whose entire path matches the ${bld}GLOB${rs}\n");
+ cfprintf(cout, " ${blu}-perm${rs} ${bld}[-]MODE${rs}\n");
+ cfprintf(cout, " Find files with a matching mode\n");
+ cfprintf(cout, " ${blu}-regex${rs} ${bld}REGEX${rs}\n");
+ cfprintf(cout, " Find files whose entire path matches the regular expression ${bld}REGEX${rs}\n");
+ cfprintf(cout, " ${blu}-samefile${rs} ${bld}FILE${rs}\n");
+ cfprintf(cout, " Find hard links to ${bld}FILE${rs}\n");
+ cfprintf(cout, " ${blu}-since${rs} ${bld}TIME${rs}\n");
+ cfprintf(cout, " Find files modified since ${bld}TIME${rs}\n");
+ cfprintf(cout, " ${blu}-size${rs} ${bld}[-+]N[cwbkMGTP]${rs}\n");
+ cfprintf(cout, " Find files with the given size, in 1-byte ${bld}c${rs}haracters, 2-byte ${bld}w${rs}ords,\n");
+ cfprintf(cout, " 512-byte ${bld}b${rs}locks (default), or ${bld}k${rs}iB/${bld}M${rs}iB/${bld}G${rs}iB/${bld}T${rs}iB/${bld}P${rs}iB\n");
+ cfprintf(cout, " ${blu}-sparse${rs}\n");
+ cfprintf(cout, " Find files that occupy fewer disk blocks than expected\n");
+ cfprintf(cout, " ${blu}-type${rs} ${bld}[bcdlpfswD]${rs}\n");
+ cfprintf(cout, " Find files of the given type\n");
+ cfprintf(cout, " ${blu}-used${rs} ${bld}[-+]N${rs}\n");
+ cfprintf(cout, " Find files last accessed ${bld}N${rs} days after they were changed\n");
+#if BFS_CAN_CHECK_XATTRS
+ cfprintf(cout, " ${blu}-xattr${rs}\n");
+ cfprintf(cout, " Find files with extended attributes\n");
+ cfprintf(cout, " ${blu}-xattrname${rs} ${bld}NAME${rs}\n");
+ cfprintf(cout, " Find files with the extended attribute ${bld}NAME${rs}\n");
+#endif
+ cfprintf(cout, " ${blu}-xtype${rs} ${bld}[bcdlpfswD]${rs}\n");
+ cfprintf(cout, " Find files of the given type, following links when ${blu}-type${rs} would not, and\n");
+ cfprintf(cout, " vice versa\n\n");
+
+ cfprintf(cout, "${bld}Actions:${rs}\n\n");
+
+ cfprintf(cout, " ${blu}-delete${rs}\n");
+ cfprintf(cout, " ${blu}-rm${rs}\n");
+ cfprintf(cout, " Delete any found files (implies ${blu}-depth${rs})\n");
+ cfprintf(cout, " ${blu}-exec${rs} ${bld}command ... {} ;${rs}\n");
+ cfprintf(cout, " Execute a command\n");
+ cfprintf(cout, " ${blu}-exec${rs} ${bld}command ... {} +${rs}\n");
+ cfprintf(cout, " Execute a command with multiple files at once\n");
+ cfprintf(cout, " ${blu}-ok${rs} ${bld}command ... {} ;${rs}\n");
+ cfprintf(cout, " Prompt the user whether to execute a command\n");
+ cfprintf(cout, " ${blu}-execdir${rs} ${bld}command ... {} ;${rs}\n");
+ cfprintf(cout, " ${blu}-execdir${rs} ${bld}command ... {} +${rs}\n");
+ cfprintf(cout, " ${blu}-okdir${rs} ${bld}command ... {} ;${rs}\n");
+ cfprintf(cout, " Like ${blu}-exec${rs}/${blu}-ok${rs}, but run the command in the same directory as the found\n");
+ cfprintf(cout, " file(s)\n");
+ cfprintf(cout, " ${blu}-exit${rs} [${bld}STATUS${rs}]\n");
+ cfprintf(cout, " Exit immediately with the given status (%d if unspecified)\n", EXIT_SUCCESS);
+ cfprintf(cout, " ${blu}-fls${rs} ${bld}FILE${rs}\n");
+ cfprintf(cout, " ${blu}-fprint${rs} ${bld}FILE${rs}\n");
+ cfprintf(cout, " ${blu}-fprint0${rs} ${bld}FILE${rs}\n");
+ cfprintf(cout, " ${blu}-fprintf${rs} ${bld}FILE${rs} ${bld}FORMAT${rs}\n");
+ cfprintf(cout, " Like ${blu}-ls${rs}/${blu}-print${rs}/${blu}-print0${rs}/${blu}-printf${rs}, but write to ${bld}FILE${rs} instead of standard\n"
+ " output\n");
+ cfprintf(cout, " ${blu}-limit${rs} ${bld}N${rs}\n");
+ cfprintf(cout, " Quit after this action is evaluated ${bld}N${rs} times\n");
+ cfprintf(cout, " ${blu}-ls${rs}\n");
+ cfprintf(cout, " List files like ${ex}ls${rs} ${bld}-dils${rs}\n");
+ cfprintf(cout, " ${blu}-print${rs}\n");
+ cfprintf(cout, " Print the path to the found file\n");
+ cfprintf(cout, " ${blu}-print0${rs}\n");
+ cfprintf(cout, " Like ${blu}-print${rs}, but use the null character ('\\0') as a separator rather than\n");
+ cfprintf(cout, " newlines\n");
+ cfprintf(cout, " ${blu}-printf${rs} ${bld}FORMAT${rs}\n");
+ cfprintf(cout, " Print according to a format string (see ${ex}man${rs} ${bld}find${rs}). The additional format\n");
+ cfprintf(cout, " directives %%w and %%W${bld}k${rs} for printing file birth times are supported.\n");
+ cfprintf(cout, " ${blu}-printx${rs}\n");
+ cfprintf(cout, " Like ${blu}-print${rs}, but escape whitespace and quotation characters, to make the\n");
+ cfprintf(cout, " output safe for ${ex}xargs${rs}. Consider using ${blu}-print0${rs} and ${ex}xargs${rs} ${bld}-0${rs} instead.\n");
+ cfprintf(cout, " ${blu}-prune${rs}\n");
+ cfprintf(cout, " Don't descend into this directory\n");
+ cfprintf(cout, " ${blu}-quit${rs}\n");
+ cfprintf(cout, " Quit immediately\n");
+ cfprintf(cout, " ${blu}-version${rs}\n");
+ cfprintf(cout, " Print version information\n");
+ cfprintf(cout, " ${blu}-help${rs}\n");
+ cfprintf(cout, " Print this help message\n\n");
+
+ cfprintf(cout, "%s\n", BFS_HOMEPAGE);
+
+ if (pager > 0) {
+ cfclose(cout);
+ xwaitpid(pager, NULL, 0);
+ }
+
+ parser->just_info = true;
+ return NULL;
+}
+
+/**
+ * "Parse" -version.
+ */
+static struct bfs_expr *parse_version(struct bfs_parser *parser, int arg1, int arg2) {
+ cfprintf(parser->ctx->cout, "${ex}%s${rs} ${bld}%s${rs}\n\n", BFS_COMMAND, bfs_version);
+
+ printf("%s\n", BFS_HOMEPAGE);
+
+ parser->just_info = true;
+ return NULL;
+}
+
+typedef struct bfs_expr *parse_fn(struct bfs_parser *parser, int arg1, int arg2);
+
+/**
+ * An entry in the parse table for primary expressions.
+ */
+struct table_entry {
+ char *arg;
+ enum token_type type;
+ parse_fn *parse;
+ int arg1;
+ int arg2;
+ bool prefix;
+};
+
+/**
+ * The parse table for primary expressions.
+ */
+static const struct table_entry parse_table[] = {
+ {"--", T_FLAG},
+ {"--help", T_ACTION, parse_help},
+ {"--version", T_ACTION, parse_version},
+ {"-Bmin", T_TEST, parse_min, BFS_STAT_BTIME},
+ {"-Bnewer", T_TEST, parse_newer, BFS_STAT_BTIME},
+ {"-Bsince", T_TEST, parse_since, BFS_STAT_BTIME},
+ {"-Btime", T_TEST, parse_time, BFS_STAT_BTIME},
+ {"-D", T_FLAG, parse_debug},
+ {"-E", T_FLAG, parse_regex_extended},
+ {"-H", T_FLAG, parse_follow, BFTW_FOLLOW_ROOTS, false},
+ {"-L", T_FLAG, parse_follow, BFTW_FOLLOW_ALL, false},
+ {"-O", T_FLAG, parse_optlevel, 0, 0, true},
+ {"-P", T_FLAG, parse_follow, 0, false},
+ {"-S", T_FLAG, parse_search_strategy},
+ {"-X", T_FLAG, parse_xargs_safe},
+ {"-a", T_OPERATOR},
+ {"-acl", T_TEST, parse_acl},
+ {"-amin", T_TEST, parse_min, BFS_STAT_ATIME},
+ {"-and", T_OPERATOR},
+ {"-anewer", T_TEST, parse_newer, BFS_STAT_ATIME},
+ {"-asince", T_TEST, parse_since, BFS_STAT_ATIME},
+ {"-atime", T_TEST, parse_time, BFS_STAT_ATIME},
+ {"-capable", T_TEST, parse_capable},
+ {"-cmin", T_TEST, parse_min, BFS_STAT_CTIME},
+ {"-cnewer", T_TEST, parse_newer, BFS_STAT_CTIME},
+ {"-color", T_OPTION, parse_color, true},
+ {"-context", T_TEST, parse_context, true},
+ {"-csince", T_TEST, parse_since, BFS_STAT_CTIME},
+ {"-ctime", T_TEST, parse_time, BFS_STAT_CTIME},
+ {"-d", T_FLAG, parse_depth},
+ {"-daystart", T_OPTION, parse_daystart},
+ {"-delete", T_ACTION, parse_delete},
+ {"-depth", T_OPTION, parse_depth_n},
+ {"-empty", T_TEST, parse_empty},
+ {"-exclude", T_OPERATOR},
+ {"-exec", T_ACTION, parse_exec, 0},
+ {"-execdir", T_ACTION, parse_exec, BFS_EXEC_CHDIR},
+ {"-executable", T_TEST, parse_access, X_OK},
+ {"-exit", T_ACTION, parse_exit},
+ {"-f", T_FLAG, parse_f},
+ {"-false", T_TEST, parse_const, false},
+ {"-files0-from", T_OPTION, parse_files0_from},
+ {"-flags", T_TEST, parse_flags},
+ {"-fls", T_ACTION, parse_fls},
+ {"-follow", T_OPTION, parse_follow, BFTW_FOLLOW_ALL, true},
+ {"-fprint", T_ACTION, parse_fprint},
+ {"-fprint0", T_ACTION, parse_fprint0},
+ {"-fprintf", T_ACTION, parse_fprintf},
+ {"-fstype", T_TEST, parse_fstype},
+ {"-gid", T_TEST, parse_group},
+ {"-group", T_TEST, parse_group},
+ {"-help", T_ACTION, parse_help},
+ {"-hidden", T_TEST, parse_hidden},
+ {"-ignore_readdir_race", T_OPTION, parse_ignore_races, true},
+ {"-ilname", T_TEST, parse_lname, true},
+ {"-iname", T_TEST, parse_name, true},
+ {"-inum", T_TEST, parse_inum},
+ {"-ipath", T_TEST, parse_path, true},
+ {"-iregex", T_TEST, parse_regex, BFS_REGEX_ICASE},
+ {"-iwholename", T_TEST, parse_path, true},
+ {"-j", T_FLAG, parse_jobs, 0, 0, true},
+ {"-limit", T_ACTION, parse_limit},
+ {"-links", T_TEST, parse_links},
+ {"-lname", T_TEST, parse_lname, false},
+ {"-ls", T_ACTION, parse_ls},
+ {"-maxdepth", T_OPTION, parse_depth_limit, false},
+ {"-mindepth", T_OPTION, parse_depth_limit, true},
+ {"-mmin", T_TEST, parse_min, BFS_STAT_MTIME},
+ {"-mnewer", T_TEST, parse_newer, BFS_STAT_MTIME},
+ {"-mount", T_OPTION, parse_mount},
+ {"-msince", T_TEST, parse_since, BFS_STAT_MTIME},
+ {"-mtime", T_TEST, parse_time, BFS_STAT_MTIME},
+ {"-name", T_TEST, parse_name, false},
+ {"-newer", T_TEST, parse_newer, BFS_STAT_MTIME},
+ {"-newer", T_TEST, parse_newerxy, 0, 0, true},
+ {"-nocolor", T_OPTION, parse_color, false},
+ {"-nogroup", T_TEST, parse_nogroup},
+ {"-nohidden", T_TEST, parse_nohidden},
+ {"-noignore_readdir_race", T_OPTION, parse_ignore_races, false},
+ {"-noleaf", T_OPTION, parse_noleaf},
+ {"-not", T_OPERATOR},
+ {"-nouser", T_TEST, parse_nouser},
+ {"-nowarn", T_OPTION, parse_warn, false},
+ {"-o", T_OPERATOR},
+ {"-ok", T_ACTION, parse_exec, BFS_EXEC_CONFIRM},
+ {"-okdir", T_ACTION, parse_exec, BFS_EXEC_CONFIRM | BFS_EXEC_CHDIR},
+ {"-or", T_OPERATOR},
+ {"-path", T_TEST, parse_path, false},
+ {"-perm", T_TEST, parse_perm},
+ {"-print", T_ACTION, parse_print},
+ {"-print0", T_ACTION, parse_print0},
+ {"-printf", T_ACTION, parse_printf},
+ {"-printx", T_ACTION, parse_printx},
+ {"-prune", T_ACTION, parse_prune},
+ {"-quit", T_ACTION, parse_quit},
+ {"-readable", T_TEST, parse_access, R_OK},
+ {"-regex", T_TEST, parse_regex, 0},
+ {"-regextype", T_OPTION, parse_regextype},
+ {"-rm", T_ACTION, parse_delete},
+ {"-s", T_FLAG, parse_s},
+ {"-samefile", T_TEST, parse_samefile},
+ {"-since", T_TEST, parse_since, BFS_STAT_MTIME},
+ {"-size", T_TEST, parse_size},
+ {"-sparse", T_TEST, parse_sparse},
+ {"-status", T_OPTION, parse_status},
+ {"-true", T_TEST, parse_const, true},
+ {"-type", T_TEST, parse_type, false},
+ {"-uid", T_TEST, parse_user},
+ {"-unique", T_OPTION, parse_unique},
+ {"-used", T_TEST, parse_used},
+ {"-user", T_TEST, parse_user},
+ {"-version", T_ACTION, parse_version},
+ {"-warn", T_OPTION, parse_warn, true},
+ {"-wholename", T_TEST, parse_path, false},
+ {"-writable", T_TEST, parse_access, W_OK},
+ {"-x", T_FLAG, parse_xdev},
+ {"-xattr", T_TEST, parse_xattr},
+ {"-xattrname", T_TEST, parse_xattrname},
+ {"-xdev", T_OPTION, parse_xdev},
+ {"-xtype", T_TEST, parse_type, true},
+ {0},
+};
+
+/** Look up an argument in the parse table. */
+static const struct table_entry *table_lookup(const char *arg) {
+ for (const struct table_entry *entry = parse_table; entry->arg; ++entry) {
+ bool match;
+ if (entry->prefix) {
+ match = strncmp(arg, entry->arg, strlen(entry->arg)) == 0;
+ } else {
+ match = strcmp(arg, entry->arg) == 0;
+ }
+ if (match) {
+ return entry;
+ }
+ }
+
+ return NULL;
+}
+
+/** Search for a fuzzy match in the parse table. */
+static const struct table_entry *table_lookup_fuzzy(const char *arg) {
+ const struct table_entry *best = NULL;
+ int best_dist = INT_MAX;
+
+ for (const struct table_entry *entry = parse_table; entry->arg; ++entry) {
+ int dist = typo_distance(arg, entry->arg);
+ if (!best || dist < best_dist) {
+ best = entry;
+ best_dist = dist;
+ }
+ }
+
+ return best;
+}
+
+/**
+ * PRIMARY : OPTION
+ * | TEST
+ * | ACTION
+ */
+static struct bfs_expr *parse_primary(struct bfs_parser *parser) {
+ // Paths are already skipped at this point
+ const char *arg = parser->argv[0];
+
+ if (arg[0] != '-') {
+ goto unexpected;
+ }
+
+ const struct table_entry *match = table_lookup(arg);
+ if (match) {
+ if (match->parse) {
+ goto matched;
+ } else {
+ goto unexpected;
+ }
+ }
+
+ match = table_lookup_fuzzy(arg);
+
+ CFILE *cerr = parser->ctx->cerr;
+ parse_error(parser, "Unknown argument; did you mean ");
+ switch (match->type) {
+ case T_FLAG:
+ cfprintf(cerr, "${cyn}%s${rs}?", match->arg);
+ break;
+ case T_OPERATOR:
+ cfprintf(cerr, "${red}%s${rs}?", match->arg);
+ break;
+ default:
+ cfprintf(cerr, "${blu}%s${rs}?", match->arg);
+ break;
+ }
+
+ if (!parser->interactive || !match->parse) {
+ fprintf(stderr, "\n");
+ goto unmatched;
+ }
+
+ fprintf(stderr, " ");
+ if (ynprompt() <= 0) {
+ goto unmatched;
+ }
+
+ fprintf(stderr, "\n");
+ parser->argv[0] = match->arg;
+
+matched:
+ return match->parse(parser, match->arg1, match->arg2);
+
+unmatched:
+ return NULL;
+
+unexpected:
+ parse_error(parser, "Expected a predicate.\n");
+ return NULL;
+}
+
+/**
+ * FACTOR : "(" EXPR ")"
+ * | "!" FACTOR | "-not" FACTOR
+ * | "-exclude" FACTOR
+ * | PRIMARY
+ */
+static struct bfs_expr *parse_factor(struct bfs_parser *parser) {
+ if (skip_paths(parser) != 0) {
+ return NULL;
+ }
+
+ const char *arg = parser->argv[0];
+ if (!arg) {
+ parse_argv_error(parser, parser->last_arg, 1, "Expression terminated prematurely here.\n");
+ return NULL;
+ }
+
+ if (strcmp(arg, "(") == 0) {
+ parser_advance(parser, T_OPERATOR, 1);
+
+ struct bfs_expr *expr = parse_expr(parser);
+ if (!expr) {
+ return NULL;
+ }
+
+ if (skip_paths(parser) != 0) {
+ return NULL;
+ }
+
+ arg = parser->argv[0];
+ if (!arg || strcmp(arg, ")") != 0) {
+ parse_argv_error(parser, parser->last_arg, 1, "Expected a ${red})${rs}.\n");
+ return NULL;
+ }
+
+ parser_advance(parser, T_OPERATOR, 1);
+ return expr;
+ } else if (strcmp(arg, "-exclude") == 0) {
+ if (parser->excluding) {
+ parse_error(parser, "${err}%s${rs} is not supported within ${red}-exclude${rs}.\n", arg);
+ return NULL;
+ }
+
+ char **argv = parser_advance(parser, T_OPERATOR, 1);
+ parser->excluding = true;
+
+ struct bfs_expr *factor = parse_factor(parser);
+ if (!factor) {
+ return NULL;
+ }
+
+ parser->excluding = false;
+
+ bfs_expr_append(parser->ctx->exclude, factor);
+ return parse_new_expr(parser, eval_true, parser->argv - argv, argv);
+ } else if (strcmp(arg, "!") == 0 || strcmp(arg, "-not") == 0) {
+ char **argv = parser_advance(parser, T_OPERATOR, 1);
+
+ struct bfs_expr *factor = parse_factor(parser);
+ if (!factor) {
+ return NULL;
+ }
+
+ return new_unary_expr(parser, eval_not, factor, argv);
+ } else {
+ return parse_primary(parser);
+ }
+}
+
+/**
+ * TERM : FACTOR
+ * | TERM FACTOR
+ * | TERM "-a" FACTOR
+ * | TERM "-and" FACTOR
+ */
+static struct bfs_expr *parse_term(struct bfs_parser *parser) {
+ struct bfs_expr *term = parse_factor(parser);
+
+ while (term) {
+ if (skip_paths(parser) != 0) {
+ return NULL;
+ }
+
+ const char *arg = parser->argv[0];
+ if (!arg) {
+ break;
+ }
+
+ if (strcmp(arg, "-o") == 0 || strcmp(arg, "-or") == 0
+ || strcmp(arg, ",") == 0
+ || strcmp(arg, ")") == 0) {
+ break;
+ }
+
+ char **argv = &fake_and_arg;
+ if (strcmp(arg, "-a") == 0 || strcmp(arg, "-and") == 0) {
+ argv = parser_advance(parser, T_OPERATOR, 1);
+ }
+
+ struct bfs_expr *lhs = term;
+ struct bfs_expr *rhs = parse_factor(parser);
+ if (!rhs) {
+ return NULL;
+ }
+
+ term = new_binary_expr(parser, eval_and, lhs, rhs, argv);
+ }
+
+ return term;
+}
+
+/**
+ * CLAUSE : TERM
+ * | CLAUSE "-o" TERM
+ * | CLAUSE "-or" TERM
+ */
+static struct bfs_expr *parse_clause(struct bfs_parser *parser) {
+ struct bfs_expr *clause = parse_term(parser);
+
+ while (clause) {
+ if (skip_paths(parser) != 0) {
+ return NULL;
+ }
+
+ const char *arg = parser->argv[0];
+ if (!arg) {
+ break;
+ }
+
+ if (strcmp(arg, "-o") != 0 && strcmp(arg, "-or") != 0) {
+ break;
+ }
+
+ char **argv = parser_advance(parser, T_OPERATOR, 1);
+
+ struct bfs_expr *lhs = clause;
+ struct bfs_expr *rhs = parse_term(parser);
+ if (!rhs) {
+ return NULL;
+ }
+
+ clause = new_binary_expr(parser, eval_or, lhs, rhs, argv);
+ }
+
+ return clause;
+}
+
+/**
+ * EXPR : CLAUSE
+ * | EXPR "," CLAUSE
+ */
+static struct bfs_expr *parse_expr(struct bfs_parser *parser) {
+ struct bfs_expr *expr = parse_clause(parser);
+
+ while (expr) {
+ if (skip_paths(parser) != 0) {
+ return NULL;
+ }
+
+ const char *arg = parser->argv[0];
+ if (!arg) {
+ break;
+ }
+
+ if (strcmp(arg, ",") != 0) {
+ break;
+ }
+
+ char **argv = parser_advance(parser, T_OPERATOR, 1);
+
+ struct bfs_expr *lhs = expr;
+ struct bfs_expr *rhs = parse_clause(parser);
+ if (!rhs) {
+ return NULL;
+ }
+
+ expr = new_binary_expr(parser, eval_comma, lhs, rhs, argv);
+ }
+
+ return expr;
+}
+
+/**
+ * Parse the top-level expression.
+ */
+static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) {
+ if (skip_paths(parser) != 0) {
+ return NULL;
+ }
+
+ struct bfs_expr *expr;
+ if (parser->argv[0]) {
+ expr = parse_expr(parser);
+ } else {
+ expr = parse_new_expr(parser, eval_true, 1, &fake_true_arg);
+ }
+ if (!expr) {
+ return NULL;
+ }
+
+ if (parser->argv[0]) {
+ parse_error(parser, "Unexpected argument.\n");
+ return NULL;
+ }
+
+ if (parser->implicit_print) {
+ char **limit = parser->limit_arg;
+ if (limit) {
+ parse_argv_error(parser, parser->limit_arg, 2,
+ "With ${blu}%s${rs}, you must specify an action explicitly; for example, ${blu}-print${rs} ${blu}%s${rs} ${bld}%s${rs}.\n",
+ limit[0], limit[0], limit[1]);
+ return NULL;
+ }
+
+ struct bfs_expr *print = parse_new_expr(parser, eval_fprint, 1, &fake_print_arg);
+ if (!print) {
+ return NULL;
+ }
+ init_print_expr(parser, print);
+
+ expr = new_binary_expr(parser, eval_and, expr, print, &fake_and_arg);
+ if (!expr) {
+ return NULL;
+ }
+ }
+
+ if (parser->mount_arg && parser->xdev_arg) {
+ parse_conflict_warning(parser, parser->mount_arg, 1, parser->xdev_arg, 1,
+ "${blu}%s${rs} is redundant in the presence of ${blu}%s${rs}.\n\n",
+ parser->xdev_arg[0], parser->mount_arg[0]);
+ }
+
+ if (parser->ctx->warn && parser->depth_arg && parser->prune_arg) {
+ parse_conflict_warning(parser, parser->depth_arg, 1, parser->prune_arg, 1,
+ "${blu}%s${rs} does not work in the presence of ${blu}%s${rs}.\n",
+ parser->prune_arg[0], parser->depth_arg[0]);
+
+ if (parser->interactive) {
+ bfs_warning(parser->ctx, "Do you want to continue? ");
+ if (ynprompt() == 0) {
+ return NULL;
+ }
+ }
+
+ fprintf(stderr, "\n");
+ }
+
+ if (parser->ok_expr && parser->files0_stdin_arg) {
+ parse_conflict_error(parser, parser->ok_expr->argv, parser->ok_expr->argc, parser->files0_stdin_arg, 2,
+ "${blu}%s${rs} conflicts with ${blu}%s${rs} ${bld}%s${rs}.\n",
+ parser->ok_expr->argv[0], parser->files0_stdin_arg[0], parser->files0_stdin_arg[1]);
+ return NULL;
+ }
+
+ return expr;
+}
+
+static const char *bftw_strategy_name(enum bftw_strategy strategy) {
+ switch (strategy) {
+ case BFTW_BFS:
+ return "bfs";
+ case BFTW_DFS:
+ return "dfs";
+ case BFTW_IDS:
+ return "ids";
+ case BFTW_EDS:
+ return "eds";
+ }
+
+ bfs_bug("Invalid strategy");
+ return "???";
+}
+
+static void dump_expr_multiline(const struct bfs_ctx *ctx, enum debug_flags flag, const struct bfs_expr *expr, int indent, int rparens) {
+ bfs_debug_prefix(ctx, flag);
+
+ for (int i = 0; i < indent; ++i) {
+ cfprintf(ctx->cerr, " ");
+ }
+
+ bool close = true;
+
+ if (bfs_expr_is_parent(expr)) {
+ if (SLIST_EMPTY(&expr->children)) {
+ cfprintf(ctx->cerr, "(${red}%s${rs}", expr->argv[0]);
+ ++rparens;
+ } else {
+ cfprintf(ctx->cerr, "(${red}%s${rs}\n", expr->argv[0]);
+ for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) {
+ int parens = child->next ? 0 : rparens + 1;
+ dump_expr_multiline(ctx, flag, child, indent + 1, parens);
+ }
+ close = false;
+ }
+ } else {
+ if (flag == DEBUG_RATES) {
+ cfprintf(ctx->cerr, "%pE", expr);
+ } else {
+ cfprintf(ctx->cerr, "%pe", expr);
+ }
+ }
+
+ if (close) {
+ for (int i = 0; i < rparens; ++i) {
+ cfprintf(ctx->cerr, ")");
+ }
+ cfprintf(ctx->cerr, "\n");
+ }
+}
+
+void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag) {
+ if (!bfs_debug_prefix(ctx, flag)) {
+ return;
+ }
+
+ CFILE *cerr = ctx->cerr;
+
+ cfprintf(cerr, "${ex}%s${rs}", ctx->argv[0]);
+
+ if (ctx->flags & BFTW_FOLLOW_ALL) {
+ cfprintf(cerr, " ${cyn}-L${rs}");
+ } else if (ctx->flags & BFTW_FOLLOW_ROOTS) {
+ cfprintf(cerr, " ${cyn}-H${rs}");
+ } else {
+ cfprintf(cerr, " ${cyn}-P${rs}");
+ }
+
+ if (ctx->xargs_safe) {
+ cfprintf(cerr, " ${cyn}-X${rs}");
+ }
+
+ if (ctx->flags & BFTW_SORT) {
+ cfprintf(cerr, " ${cyn}-s${rs}");
+ }
+
+ cfprintf(cerr, " ${cyn}-j${bld}%d${rs}", ctx->threads);
+
+ if (ctx->optlevel != 3) {
+ cfprintf(cerr, " ${cyn}-O${bld}%d${rs}", ctx->optlevel);
+ }
+
+ cfprintf(cerr, " ${cyn}-S${rs} ${bld}%s${rs}", bftw_strategy_name(ctx->strategy));
+
+ enum debug_flags debug = ctx->debug;
+ if (debug == DEBUG_ALL) {
+ cfprintf(cerr, " ${cyn}-D${rs} ${bld}all${rs}");
+ } else if (debug) {
+ cfprintf(cerr, " ${cyn}-D${rs} ");
+ for (enum debug_flags i = 1; DEBUG_ALL & i; i <<= 1) {
+ if (debug & i) {
+ cfprintf(cerr, "${bld}%s${rs}", debug_flag_name(i));
+ debug ^= i;
+ if (debug) {
+ cfprintf(cerr, ",");
+ }
+ }
+ }
+ }
+
+ for (size_t i = 0; i < ctx->npaths; ++i) {
+ const char *path = ctx->paths[i];
+ char c = path[0];
+ if (c == '-' || c == '(' || c == ')' || c == '!' || c == ',') {
+ cfprintf(cerr, " ${cyn}-f${rs}");
+ }
+ cfprintf(cerr, " ${mag}%pq${rs}", path);
+ }
+
+ if (ctx->cout->colors) {
+ cfprintf(cerr, " ${blu}-color${rs}");
+ } else {
+ cfprintf(cerr, " ${blu}-nocolor${rs}");
+ }
+ if (ctx->flags & BFTW_POST_ORDER) {
+ cfprintf(cerr, " ${blu}-depth${rs}");
+ }
+ if (ctx->ignore_races) {
+ cfprintf(cerr, " ${blu}-ignore_readdir_race${rs}");
+ }
+ if (ctx->mindepth != 0) {
+ cfprintf(cerr, " ${blu}-mindepth${rs} ${bld}%d${rs}", ctx->mindepth);
+ }
+ if (ctx->maxdepth != INT_MAX) {
+ cfprintf(cerr, " ${blu}-maxdepth${rs} ${bld}%d${rs}", ctx->maxdepth);
+ }
+ if (ctx->flags & BFTW_SKIP_MOUNTS) {
+ cfprintf(cerr, " ${blu}-mount${rs}");
+ }
+ if (ctx->status) {
+ cfprintf(cerr, " ${blu}-status${rs}");
+ }
+ if (ctx->unique) {
+ cfprintf(cerr, " ${blu}-unique${rs}");
+ }
+ if ((ctx->flags & (BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS)) == BFTW_PRUNE_MOUNTS) {
+ cfprintf(cerr, " ${blu}-xdev${rs}");
+ }
+
+ fputs("\n", stderr);
+
+ bfs_debug(ctx, flag, "(${red}-exclude${rs}\n");
+ dump_expr_multiline(ctx, flag, ctx->exclude, 1, 1);
+
+ dump_expr_multiline(ctx, flag, ctx->expr, 0, 0);
+}
+
+/**
+ * Dump the estimated costs.
+ */
+static void dump_costs(const struct bfs_ctx *ctx) {
+ const struct bfs_expr *expr = ctx->expr;
+ bfs_debug(ctx, DEBUG_COST, " Cost: ~${ylw}%g${rs}\n", expr->cost);
+ bfs_debug(ctx, DEBUG_COST, "Probability: ~${ylw}%g%%${rs}\n", 100.0 * expr->probability);
+}
+
+struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) {
+ struct bfs_ctx *ctx = bfs_ctx_new();
+ if (!ctx) {
+ perror("bfs_ctx_new()");
+ goto fail;
+ }
+
+ static char *default_argv[] = {BFS_COMMAND, NULL};
+ if (argc < 1) {
+ argc = 1;
+ argv = default_argv;
+ }
+
+ ctx->argc = argc;
+ ctx->argv = xmemdup(argv, sizeof_array(char *, argc + 1));
+ if (!ctx->argv) {
+ perror("xmemdup()");
+ goto fail;
+ }
+
+ enum use_color use_color = COLOR_AUTO;
+ const char *no_color = getenv("NO_COLOR");
+ if (no_color && *no_color) {
+ // https://no-color.org/
+ use_color = COLOR_NEVER;
+ }
+
+ ctx->colors = parse_colors();
+ if (!ctx->colors) {
+ ctx->colors_error = errno;
+ }
+
+ ctx->cerr = cfwrap(stderr, use_color ? ctx->colors : NULL, false);
+ if (!ctx->cerr) {
+ perror("cfwrap()");
+ goto fail;
+ }
+
+ ctx->cout = cfwrap(stdout, use_color ? ctx->colors : NULL, false);
+ if (!ctx->cout) {
+ bfs_perror(ctx, "cfwrap()");
+ goto fail;
+ }
+
+ if (!bfs_ctx_dedup(ctx, ctx->cout, NULL) || !bfs_ctx_dedup(ctx, ctx->cerr, NULL)) {
+ bfs_perror(ctx, "bfs_ctx_dedup()");
+ goto fail;
+ }
+
+ bool stdin_tty = isatty(STDIN_FILENO);
+ bool stdout_tty = isatty(STDOUT_FILENO);
+ bool stderr_tty = isatty(STDERR_FILENO);
+
+ if (getenv("POSIXLY_CORRECT")) {
+ ctx->posixly_correct = true;
+ } else {
+ ctx->warn = stdin_tty;
+ }
+
+ struct bfs_parser parser = {
+ .ctx = ctx,
+ .argv = ctx->argv + 1,
+ .command = ctx->argv[0],
+ .regex_type = BFS_REGEX_POSIX_BASIC,
+ .stdout_tty = stdout_tty,
+ .interactive = stdin_tty && stderr_tty,
+ .use_color = use_color,
+ .implicit_print = true,
+ .implicit_root = true,
+ .just_info = false,
+ .excluding = false,
+ .last_arg = NULL,
+ .depth_arg = NULL,
+ .prune_arg = NULL,
+ .mount_arg = NULL,
+ .xdev_arg = NULL,
+ .files0_stdin_arg = NULL,
+ .ok_expr = NULL,
+ .now = ctx->now,
+ };
+
+ ctx->exclude = parse_new_expr(&parser, eval_or, 1, &fake_or_arg);
+ if (!ctx->exclude) {
+ goto fail;
+ }
+
+ ctx->expr = parse_whole_expr(&parser);
+ if (!ctx->expr) {
+ if (parser.just_info) {
+ goto done;
+ } else {
+ goto fail;
+ }
+ }
+
+ if (parser.use_color == COLOR_AUTO && !ctx->colors) {
+ bfs_warning(ctx, "Error parsing $$LS_COLORS: %s.\n\n", xstrerror(ctx->colors_error));
+ }
+
+ if (bfs_optimize(ctx) != 0) {
+ bfs_perror(ctx, "bfs_optimize()");
+ goto fail;
+ }
+
+ if (ctx->npaths == 0 && parser.implicit_root) {
+ if (parse_root(&parser, ".") != 0) {
+ goto fail;
+ }
+ }
+
+ if ((ctx->flags & BFTW_FOLLOW_ALL) && !ctx->unique) {
+ // We need bftw() to detect cycles unless -unique does it for us
+ ctx->flags |= BFTW_DETECT_CYCLES;
+ }
+
+ bfs_ctx_dump(ctx, DEBUG_TREE);
+ dump_costs(ctx);
+
+done:
+ return ctx;
+
+fail:
+ bfs_ctx_free(ctx);
+ return NULL;
+}
diff --git a/src/parse.h b/src/parse.h
new file mode 100644
index 0000000..6895c9f
--- /dev/null
+++ b/src/parse.h
@@ -0,0 +1,23 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * bfs command line parsing.
+ */
+
+#ifndef BFS_PARSE_H
+#define BFS_PARSE_H
+
+/**
+ * Parse the command line.
+ *
+ * @param argc
+ * The number of arguments.
+ * @param argv
+ * The arguments to parse.
+ * @return
+ * A new bfs context, or NULL on failure.
+ */
+struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]);
+
+#endif // BFS_PARSE_H
diff --git a/src/prelude.h b/src/prelude.h
new file mode 100644
index 0000000..72f88b0
--- /dev/null
+++ b/src/prelude.h
@@ -0,0 +1,370 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Configuration and feature/platform detection.
+ */
+
+#ifndef BFS_PRELUDE_H
+#define BFS_PRELUDE_H
+
+// Possible __STDC_VERSION__ values
+
+#define C95 199409L
+#define C99 199901L
+#define C11 201112L
+#define C17 201710L
+#define C23 202311L
+
+#include <stddef.h>
+
+#if __STDC_VERSION__ < C23
+# include <stdalign.h>
+# include <stdbool.h>
+# include <stdnoreturn.h>
+#endif
+
+// bfs packaging configuration
+
+#include "config.h"
+
+#ifndef BFS_COMMAND
+# define BFS_COMMAND "bfs"
+#endif
+#ifndef BFS_HOMEPAGE
+# define BFS_HOMEPAGE "https://tavianator.com/projects/bfs.html"
+#endif
+
+// This is a symbol instead of a literal so we don't have to rebuild everything
+// when the version number changes
+extern const char bfs_version[];
+
+// Check for system headers
+
+#ifdef __has_include
+
+#if __has_include(<mntent.h>)
+# define BFS_HAS_MNTENT_H true
+#endif
+#if __has_include(<paths.h>)
+# define BFS_HAS_PATHS_H true
+#endif
+#if __has_include(<sys/extattr.h>)
+# define BFS_HAS_SYS_EXTATTR_H true
+#endif
+#if __has_include(<sys/mkdev.h>)
+# define BFS_HAS_SYS_MKDEV_H true
+#endif
+#if __has_include(<sys/param.h>)
+# define BFS_HAS_SYS_PARAM_H true
+#endif
+#if __has_include(<sys/sysmacros.h>)
+# define BFS_HAS_SYS_SYSMACROS_H true
+#endif
+#if __has_include(<sys/xattr.h>)
+# define BFS_HAS_SYS_XATTR_H true
+#endif
+#if __has_include(<threads.h>)
+# define BFS_HAS_THREADS_H true
+#endif
+#if __has_include(<util.h>)
+# define BFS_HAS_UTIL_H true
+#endif
+
+#else // !__has_include
+
+#define BFS_HAS_MNTENT_H __GLIBC__
+#define BFS_HAS_PATHS_H true
+#define BFS_HAS_SYS_EXTATTR_H __FreeBSD__
+#define BFS_HAS_SYS_MKDEV_H false
+#define BFS_HAS_SYS_PARAM_H true
+#define BFS_HAS_SYS_SYSMACROS_H __GLIBC__
+#define BFS_HAS_SYS_XATTR_H __linux__
+#define BFS_HAS_THREADS_H (!__STDC_NO_THREADS__)
+#define BFS_HAS_UTIL_H __NetBSD__
+
+#endif // !__has_include
+
+#ifndef BFS_USE_MNTENT_H
+# define BFS_USE_MNTENT_H BFS_HAS_MNTENT_H
+#endif
+#ifndef BFS_USE_PATHS_H
+# define BFS_USE_PATHS_H BFS_HAS_PATHS_H
+#endif
+#ifndef BFS_USE_SYS_EXTATTR_H
+# define BFS_USE_SYS_EXTATTR_H BFS_HAS_SYS_EXTATTR_H
+#endif
+#ifndef BFS_USE_SYS_MKDEV_H
+# define BFS_USE_SYS_MKDEV_H BFS_HAS_SYS_MKDEV_H
+#endif
+#ifndef BFS_USE_SYS_PARAM_H
+# define BFS_USE_SYS_PARAM_H BFS_HAS_SYS_PARAM_H
+#endif
+#ifndef BFS_USE_SYS_SYSMACROS_H
+# define BFS_USE_SYS_SYSMACROS_H BFS_HAS_SYS_SYSMACROS_H
+#endif
+#ifndef BFS_USE_SYS_XATTR_H
+# define BFS_USE_SYS_XATTR_H BFS_HAS_SYS_XATTR_H
+#endif
+#ifndef BFS_USE_THREADS_H
+# define BFS_USE_THREADS_H BFS_HAS_THREADS_H
+#endif
+#ifndef BFS_USE_UTIL_H
+# define BFS_USE_UTIL_H BFS_HAS_UTIL_H
+#endif
+
+// Stub out feature detection on old/incompatible compilers
+
+#ifndef __has_feature
+# define __has_feature(feat) false
+#endif
+
+#ifndef __has_c_attribute
+# define __has_c_attribute(attr) false
+#endif
+
+#ifndef __has_attribute
+# define __has_attribute(attr) false
+#endif
+
+// Fundamental utilities
+
+/**
+ * Get the length of an array.
+ */
+#define countof(array) (sizeof(array) / sizeof(0[array]))
+
+/**
+ * False sharing/destructive interference/largest cache line size.
+ */
+#ifdef __GCC_DESTRUCTIVE_SIZE
+# define FALSE_SHARING_SIZE __GCC_DESTRUCTIVE_SIZE
+#else
+# define FALSE_SHARING_SIZE 64
+#endif
+
+/**
+ * True sharing/constructive interference/smallest cache line size.
+ */
+#ifdef __GCC_CONSTRUCTIVE_SIZE
+# define TRUE_SHARING_SIZE __GCC_CONSTRUCTIVE_SIZE
+#else
+# define TRUE_SHARING_SIZE 64
+#endif
+
+/**
+ * Polyfill max_align_t if we don't already have it.
+ */
+#if !BFS_HAS_MAX_ALIGN_T
+typedef union {
+# ifdef __BIGGEST_ALIGNMENT__
+ alignas(__BIGGEST_ALIGNMENT__) char c;
+# else
+ long double ld;
+ long long ll;
+ void *ptr;
+# endif
+} max_align_t;
+#endif
+
+/**
+ * Alignment specifier that avoids false sharing.
+ */
+#define cache_align alignas(FALSE_SHARING_SIZE)
+
+// Wrappers for attributes
+
+/**
+ * Silence warnings about switch/case fall-throughs.
+ */
+#if __has_attribute(fallthrough)
+# define fallthru __attribute__((fallthrough))
+#else
+# define fallthru ((void)0)
+#endif
+
+/**
+ * Silence warnings about unused declarations.
+ */
+#if __has_attribute(unused)
+# define attr_maybe_unused __attribute__((unused))
+#else
+# define attr_maybe_unused
+#endif
+
+/**
+ * Warn if a value is unused.
+ */
+#if __has_attribute(warn_unused_result)
+# define attr_nodiscard __attribute__((warn_unused_result))
+#else
+# define attr_nodiscard
+#endif
+
+/**
+ * Hint to avoid inlining a function.
+ */
+#if __has_attribute(noinline)
+# define attr_noinline __attribute__((noinline))
+#else
+# define attr_noinline
+#endif
+
+/**
+ * Hint that a function is unlikely to be called.
+ */
+#if __has_attribute(cold)
+# define attr_cold attr_noinline __attribute__((cold))
+#else
+# define attr_cold attr_noinline
+#endif
+
+/**
+ * Adds compiler warnings for bad printf()-style function calls, if supported.
+ */
+#if __has_attribute(format)
+# define attr_printf(fmt, args) __attribute__((format(printf, fmt, args)))
+#else
+# define attr_printf(fmt, args)
+#endif
+
+/**
+ * Annotates functions that potentially modify and return format strings.
+ */
+#if __has_attribute(format_arg)
+# define attr_format_arg(arg) __attribute__((format_arg(arg)))
+#else
+# define attr_format_arg(args)
+#endif
+
+/**
+ * Annotates allocator-like functions.
+ */
+#if __has_attribute(malloc)
+# if __GNUC__ >= 11 && !__OPTIMIZE__ // malloc(deallocator) disables inlining on GCC
+# define attr_malloc(...) attr_nodiscard __attribute__((malloc(__VA_ARGS__)))
+# else
+# define attr_malloc(...) attr_nodiscard __attribute__((malloc))
+# endif
+#else
+# define attr_malloc(...) attr_nodiscard
+#endif
+
+/**
+ * Specifies that a function returns allocations with a given alignment.
+ */
+#if __has_attribute(alloc_align)
+# define attr_alloc_align(param) __attribute__((alloc_align(param)))
+#else
+# define attr_alloc_align(param)
+#endif
+
+/**
+ * Specifies that a function returns allocations with a given size.
+ */
+#if __has_attribute(alloc_size)
+# define attr_alloc_size(...) __attribute__((alloc_size(__VA_ARGS__)))
+#else
+# define attr_alloc_size(...)
+#endif
+
+/**
+ * Shorthand for attr_alloc_align() and attr_alloc_size().
+ */
+#define attr_aligned_alloc(align, ...) \
+ attr_alloc_align(align) \
+ attr_alloc_size(__VA_ARGS__)
+
+/**
+ * Check if function multiversioning via GNU indirect functions (ifunc) is supported.
+ */
+#ifndef BFS_USE_TARGET_CLONES
+# if __has_attribute(target_clones) && (__GLIBC__ || __FreeBSD__)
+# define BFS_USE_TARGET_CLONES true
+# endif
+#endif
+
+/**
+ * Apply the target_clones attribute, if available.
+ */
+#if BFS_USE_TARGET_CLONES
+# define attr_target_clones(...) __attribute__((target_clones(__VA_ARGS__)))
+#else
+# define attr_target_clones(...)
+#endif
+
+/**
+ * Shorthand for multiple attributes at once. attr(a, b(c), d) is equivalent to
+ *
+ * attr_a
+ * attr_b(c)
+ * attr_d
+ */
+#define attr(...) \
+ attr__(attr_##__VA_ARGS__, none, none, none, none, none, none, none, none, none, )
+
+/**
+ * attr() helper. For exposition, pretend we support only 2 args, instead of 9.
+ * There are a few cases:
+ *
+ * attr()
+ * => attr__(attr_, none, none)
+ * => attr_ =>
+ * attr_none =>
+ * attr_too_many_none() =>
+ *
+ * attr(a)
+ * => attr__(attr_a, none, none)
+ * => attr_a => __attribute__((a))
+ * attr_none =>
+ * attr_too_many_none() =>
+ *
+ * attr(a, b(c))
+ * => attr__(attr_a, b(c), none, none)
+ * => attr_a => __attribute__((a))
+ * attr_b(c) => __attribute__((b(c)))
+ * attr_too_many_none(none) =>
+ *
+ * attr(a, b(c), d)
+ * => attr__(attr_a, b(c), d, none, none)
+ * => attr_a => __attribute__((a))
+ * attr_b(c) => __attribute__((b(c)))
+ * attr_too_many_d(none, none) => error
+ *
+ * Some attribute names are the same as standard library functions, e.g. printf.
+ * Standard libraries are permitted to define these functions as macros, like
+ *
+ * #define printf(...) __builtin_printf(__VA_ARGS__)
+ *
+ * The token paste in
+ *
+ * #define attr(...) attr__(attr_##__VA_ARGS__, none, none)
+ *
+ * is necessary to prevent macro expansion before evaluating attr__().
+ * Otherwise, we could get
+ *
+ * attr(printf(1, 2))
+ * => attr__(__builtin_printf(1, 2), none, none)
+ * => attr____builtin_printf(1, 2)
+ * => error
+ */
+#define attr__(a1, a2, a3, a4, a5, a6, a7, a8, a9, none, ...) \
+ a1 \
+ attr_##a2 \
+ attr_##a3 \
+ attr_##a4 \
+ attr_##a5 \
+ attr_##a6 \
+ attr_##a7 \
+ attr_##a8 \
+ attr_##a9 \
+ attr_too_many_##none(__VA_ARGS__)
+
+// Ignore `attr_none` from expanding 1-9 argument attr(a1, a2, ...)
+#define attr_none
+// Ignore `attr_` from expanding 0-argument attr()
+#define attr_
+// Only trigger an error on more than 9 arguments
+#define attr_too_many_none(...)
+
+#endif // BFS_PRELUDE_H
diff --git a/src/printf.c b/src/printf.c
new file mode 100644
index 0000000..f8428f7
--- /dev/null
+++ b/src/printf.c
@@ -0,0 +1,968 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "printf.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "bftw.h"
+#include "color.h"
+#include "ctx.h"
+#include "diag.h"
+#include "dir.h"
+#include "dstring.h"
+#include "expr.h"
+#include "fsade.h"
+#include "mtab.h"
+#include "pwcache.h"
+#include "stat.h"
+#include <errno.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+struct bfs_fmt;
+
+/**
+ * A function implementing a printf directive.
+ */
+typedef int bfs_printf_fn(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf);
+
+/**
+ * A single formatting directive like %f or %#4m.
+ */
+struct bfs_fmt {
+ /** The printing function to invoke. */
+ bfs_printf_fn *fn;
+ /** String data associated with this directive. */
+ dchar *str;
+ /** The stat field to print. */
+ enum bfs_stat_field stat_field;
+ /** Character data associated with this directive. */
+ char c;
+ /** Some data used by the directive. */
+ void *ptr;
+};
+
+/**
+ * An entire format string.
+ */
+struct bfs_printf {
+ /** An array of formatting directives. */
+ struct bfs_fmt *fmts;
+ /** The number of directives. */
+ size_t nfmts;
+};
+
+/** Print some text as-is. */
+static int bfs_printf_literal(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ size_t len = dstrlen(fmt->str);
+ if (fwrite(fmt->str, 1, len, cfile->file) == len) {
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+/** \c: flush */
+static int bfs_printf_flush(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ return fflush(cfile->file);
+}
+
+/** Check if we can safely colorize this directive. */
+static bool should_color(CFILE *cfile, const struct bfs_fmt *fmt) {
+ return cfile->colors && strcmp(fmt->str, "%s") == 0;
+}
+
+/**
+ * Print a value to a temporary buffer before formatting it.
+ */
+#define BFS_PRINTF_BUF(buf, format, ...) \
+ char buf[256]; \
+ int ret = snprintf(buf, sizeof(buf), format, __VA_ARGS__); \
+ bfs_assert(ret >= 0 && (size_t)ret < sizeof(buf)); \
+ (void)ret
+
+/** Return a dynamic format string. */
+attr(format_arg(2))
+static const char *dyn_fmt(const char *str, const char *fake) {
+ bfs_assert(strcmp(str + strlen(str) - strlen(fake) + 1, fake + 1) == 0,
+ "Mismatched format specifiers: '%s' vs. '%s'", str, fake);
+ return str;
+}
+
+/** Wrapper for fprintf(). */
+attr(printf(3, 4))
+static int bfs_fprintf(CFILE *cfile, const struct bfs_fmt *fmt, const char *fake, ...) {
+ va_list args;
+ va_start(args, fake);
+ int ret = vfprintf(cfile->file, dyn_fmt(fmt->str, fake), args);
+ va_end(args);
+ return ret;
+}
+
+/** %a, %c, %t: ctime() */
+static int bfs_printf_ctime(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ // Not using ctime() itself because GNU find adds nanoseconds
+ static const char *days[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
+ static const char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ const struct timespec *ts = bfs_stat_time(statbuf, fmt->stat_field);
+ if (!ts) {
+ return -1;
+ }
+
+ struct tm tm;
+ if (!localtime_r(&ts->tv_sec, &tm)) {
+ return -1;
+ }
+
+ BFS_PRINTF_BUF(buf, "%s %s %2d %.2d:%.2d:%.2d.%09ld0 %4d",
+ days[tm.tm_wday],
+ months[tm.tm_mon],
+ tm.tm_mday,
+ tm.tm_hour,
+ tm.tm_min,
+ tm.tm_sec,
+ (long)ts->tv_nsec,
+ 1900 + tm.tm_year);
+
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %A, %B/%W, %C, %T: strftime() */
+static int bfs_printf_strftime(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ const struct timespec *ts = bfs_stat_time(statbuf, fmt->stat_field);
+ if (!ts) {
+ return -1;
+ }
+
+ struct tm tm;
+ if (!localtime_r(&ts->tv_sec, &tm)) {
+ return -1;
+ }
+
+ int ret;
+ char buf[256];
+ char format[] = "% ";
+ switch (fmt->c) {
+ // Non-POSIX strftime() features
+ case '@':
+ ret = snprintf(buf, sizeof(buf), "%lld.%09ld0", (long long)ts->tv_sec, (long)ts->tv_nsec);
+ break;
+ case '+':
+ ret = snprintf(buf, sizeof(buf), "%4d-%.2d-%.2d+%.2d:%.2d:%.2d.%09ld0",
+ 1900 + tm.tm_year,
+ tm.tm_mon + 1,
+ tm.tm_mday,
+ tm.tm_hour,
+ tm.tm_min,
+ tm.tm_sec,
+ (long)ts->tv_nsec);
+ break;
+ case 'k':
+ ret = snprintf(buf, sizeof(buf), "%2d", tm.tm_hour);
+ break;
+ case 'l':
+ ret = snprintf(buf, sizeof(buf), "%2d", (tm.tm_hour + 11) % 12 + 1);
+ break;
+ case 's':
+ ret = snprintf(buf, sizeof(buf), "%lld", (long long)ts->tv_sec);
+ break;
+ case 'S':
+ ret = snprintf(buf, sizeof(buf), "%.2d.%09ld0", tm.tm_sec, (long)ts->tv_nsec);
+ break;
+ case 'T':
+ ret = snprintf(buf, sizeof(buf), "%.2d:%.2d:%.2d.%09ld0",
+ tm.tm_hour,
+ tm.tm_min,
+ tm.tm_sec,
+ (long)ts->tv_nsec);
+ break;
+
+ // POSIX strftime() features
+ default:
+ format[1] = fmt->c;
+#if __GNUC__
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#endif
+ ret = strftime(buf, sizeof(buf), format, &tm);
+#if __GNUC__
+# pragma GCC diagnostic pop
+#endif
+ break;
+ }
+
+ bfs_assert(ret >= 0 && (size_t)ret < sizeof(buf));
+ (void)ret;
+
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %b: blocks */
+static int bfs_printf_b(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ uintmax_t blocks = ((uintmax_t)statbuf->blocks * BFS_STAT_BLKSIZE + 511) / 512;
+ BFS_PRINTF_BUF(buf, "%ju", blocks);
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %d: depth */
+static int bfs_printf_d(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ return bfs_fprintf(cfile, fmt, "%jd", (intmax_t)ftwbuf->depth);
+}
+
+/** %D: device */
+static int bfs_printf_D(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->dev);
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %f: file name */
+static int bfs_printf_f(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ if (should_color(cfile, fmt)) {
+ return cfprintf(cfile, "%pF", ftwbuf);
+ } else {
+ return bfs_fprintf(cfile, fmt, "%s", ftwbuf->path + ftwbuf->nameoff);
+ }
+}
+
+/** %F: file system type */
+static int bfs_printf_F(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ const char *type = bfs_fstype(fmt->ptr, statbuf);
+ if (!type) {
+ return -1;
+ }
+
+ return bfs_fprintf(cfile, fmt, "%s", type);
+}
+
+/** %G: gid */
+static int bfs_printf_G(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->gid);
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %g: group name */
+static int bfs_printf_g(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ struct bfs_groups *groups = fmt->ptr;
+ const struct group *grp = bfs_getgrgid(groups, statbuf->gid);
+ if (!grp) {
+ return bfs_printf_G(cfile, fmt, ftwbuf);
+ }
+
+ return bfs_fprintf(cfile, fmt, "%s", grp->gr_name);
+}
+
+/** %h: leading directories */
+static int bfs_printf_h(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ char *copy = NULL;
+ const char *buf;
+
+ if (ftwbuf->nameoff > 0) {
+ size_t len = ftwbuf->nameoff;
+ if (len > 1) {
+ --len;
+ }
+
+ buf = copy = strndup(ftwbuf->path, len);
+ } else if (ftwbuf->path[0] == '/') {
+ buf = "/";
+ } else {
+ buf = ".";
+ }
+
+ if (!buf) {
+ return -1;
+ }
+
+ int ret;
+ if (should_color(cfile, fmt)) {
+ ret = cfprintf(cfile, "${di}%pQ${rs}", buf);
+ } else {
+ ret = bfs_fprintf(cfile, fmt, "%s", buf);
+ }
+
+ free(copy);
+ return ret;
+}
+
+/** %H: current root */
+static int bfs_printf_H(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ if (should_color(cfile, fmt)) {
+ if (ftwbuf->depth == 0) {
+ return cfprintf(cfile, "%pP", ftwbuf);
+ } else {
+ return cfprintf(cfile, "${di}%pQ${rs}", ftwbuf->root);
+ }
+ } else {
+ return bfs_fprintf(cfile, fmt, "%s", ftwbuf->root);
+ }
+}
+
+/** %i: inode */
+static int bfs_printf_i(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->ino);
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %k: 1K blocks */
+static int bfs_printf_k(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ uintmax_t blocks = ((uintmax_t)statbuf->blocks * BFS_STAT_BLKSIZE + 1023) / 1024;
+ BFS_PRINTF_BUF(buf, "%ju", blocks);
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %l: link target */
+static int bfs_printf_l(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ char *buf = NULL;
+ const char *target = "";
+
+ if (ftwbuf->type == BFS_LNK) {
+ if (should_color(cfile, fmt)) {
+ return cfprintf(cfile, "%pL", ftwbuf);
+ }
+
+ const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW);
+ size_t len = statbuf ? statbuf->size : 0;
+
+ target = buf = xreadlinkat(ftwbuf->at_fd, ftwbuf->at_path, len);
+ if (!target) {
+ return -1;
+ }
+ }
+
+ int ret = bfs_fprintf(cfile, fmt, "%s", target);
+ free(buf);
+ return ret;
+}
+
+/** %m: mode */
+static int bfs_printf_m(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ return bfs_fprintf(cfile, fmt, "%o", (unsigned int)(statbuf->mode & 07777));
+}
+
+/** %M: symbolic mode */
+static int bfs_printf_M(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ char buf[11];
+ xstrmode(statbuf->mode, buf);
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %n: link count */
+static int bfs_printf_n(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->nlink);
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %p: full path */
+static int bfs_printf_p(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ if (should_color(cfile, fmt)) {
+ return cfprintf(cfile, "%pP", ftwbuf);
+ } else {
+ return bfs_fprintf(cfile, fmt, "%s", ftwbuf->path);
+ }
+}
+
+/** %P: path after root */
+static int bfs_printf_P(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ size_t offset = strlen(ftwbuf->root);
+ if (ftwbuf->path[offset] == '/') {
+ ++offset;
+ }
+
+ if (should_color(cfile, fmt)) {
+ if (ftwbuf->depth == 0) {
+ return 0;
+ }
+
+ struct BFTW copybuf = *ftwbuf;
+ copybuf.path += offset;
+ copybuf.nameoff -= offset;
+ return cfprintf(cfile, "%pP", &copybuf);
+ } else {
+ return bfs_fprintf(cfile, fmt, "%s", ftwbuf->path + offset);
+ }
+}
+
+/** %s: size */
+static int bfs_printf_s(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->size);
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %S: sparseness */
+static int bfs_printf_S(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ double sparsity;
+ if (statbuf->size == 0 && statbuf->blocks == 0) {
+ sparsity = 1.0;
+ } else {
+ sparsity = (double)BFS_STAT_BLKSIZE * statbuf->blocks / statbuf->size;
+ }
+ return bfs_fprintf(cfile, fmt, "%g", sparsity);
+}
+
+/** %U: uid */
+static int bfs_printf_U(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->uid);
+ return bfs_fprintf(cfile, fmt, "%s", buf);
+}
+
+/** %u: user name */
+static int bfs_printf_u(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags);
+ if (!statbuf) {
+ return -1;
+ }
+
+ struct bfs_users *users = fmt->ptr;
+ const struct passwd *pwd = bfs_getpwuid(users, statbuf->uid);
+ if (!pwd) {
+ return bfs_printf_U(cfile, fmt, ftwbuf);
+ }
+
+ return bfs_fprintf(cfile, fmt, "%s", pwd->pw_name);
+}
+
+static const char *bfs_printf_type(enum bfs_type type) {
+ switch (type) {
+ case BFS_BLK:
+ return "b";
+ case BFS_CHR:
+ return "c";
+ case BFS_DIR:
+ return "d";
+ case BFS_DOOR:
+ return "D";
+ case BFS_FIFO:
+ return "p";
+ case BFS_LNK:
+ return "l";
+ case BFS_PORT:
+ return "P";
+ case BFS_REG:
+ return "f";
+ case BFS_SOCK:
+ return "s";
+ case BFS_WHT:
+ return "w";
+ default:
+ return "U";
+ }
+}
+
+/** %y: type */
+static int bfs_printf_y(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ const char *type = bfs_printf_type(ftwbuf->type);
+ return bfs_fprintf(cfile, fmt, "%s", type);
+}
+
+/** %Y: target type */
+static int bfs_printf_Y(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ enum bfs_type type = bftw_type(ftwbuf, BFS_STAT_FOLLOW);
+ const char *str;
+
+ int error = 0;
+ if (type == BFS_ERROR) {
+ if (errno_is_like(ELOOP)) {
+ str = "L";
+ } else if (errno_is_like(ENOENT)) {
+ str = "N";
+ } else {
+ str = "?";
+ error = errno;
+ }
+ } else {
+ str = bfs_printf_type(type);
+ }
+
+ int ret = bfs_fprintf(cfile, fmt, "%s", str);
+ if (error != 0) {
+ ret = -1;
+ errno = error;
+ }
+ return ret;
+}
+
+/** %Z: SELinux context */
+attr(maybe_unused)
+static int bfs_printf_Z(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) {
+ char *con = bfs_getfilecon(ftwbuf);
+ if (!con) {
+ return -1;
+ }
+
+ int ret = bfs_fprintf(cfile, fmt, "%s", con);
+ bfs_freecon(con);
+ return ret;
+}
+
+/**
+ * Append a literal string to the chain.
+ */
+static int append_literal(const struct bfs_ctx *ctx, struct bfs_printf *format, dchar **literal) {
+ if (dstrlen(*literal) == 0) {
+ return 0;
+ }
+
+ struct bfs_fmt *fmt = RESERVE(struct bfs_fmt, &format->fmts, &format->nfmts);
+ if (!fmt) {
+ bfs_perror(ctx, "RESERVE()");
+ return -1;
+ }
+
+ fmt->fn = bfs_printf_literal;
+ fmt->str = *literal;
+
+ *literal = dstralloc(0);
+ if (!*literal) {
+ bfs_perror(ctx, "dstralloc()");
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Append a printf directive to the chain.
+ */
+static int append_directive(const struct bfs_ctx *ctx, struct bfs_printf *format, dchar **literal, struct bfs_fmt *fmt) {
+ if (append_literal(ctx, format, literal) != 0) {
+ return -1;
+ }
+
+ struct bfs_fmt *dest = RESERVE(struct bfs_fmt, &format->fmts, &format->nfmts);
+ if (!dest) {
+ bfs_perror(ctx, "RESERVE()");
+ return -1;
+ }
+
+ *dest = *fmt;
+ return 0;
+}
+
+int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const char *format) {
+ expr->printf = ZALLOC(struct bfs_printf);
+ if (!expr->printf) {
+ bfs_perror(ctx, "zalloc()");
+ return -1;
+ }
+
+ dchar *literal = dstralloc(0);
+ if (!literal) {
+ bfs_perror(ctx, "dstralloc()");
+ goto error;
+ }
+
+ for (const char *i = format; *i; ++i) {
+ char c = *i;
+
+ if (c == '\\') {
+ c = *++i;
+
+ if (c >= '0' && c < '8') {
+ c = 0;
+ for (int j = 0; j < 3 && *i >= '0' && *i < '8'; ++i, ++j) {
+ c *= 8;
+ c += *i - '0';
+ }
+ --i;
+ goto one_char;
+ }
+
+ switch (c) {
+ case 'a': c = '\a'; break;
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'v': c = '\v'; break;
+ case '\\': c = '\\'; break;
+
+ case 'c':
+ {
+ struct bfs_fmt fmt = {
+ .fn = bfs_printf_flush,
+ };
+ if (append_directive(ctx, expr->printf, &literal, &fmt) != 0) {
+ goto error;
+ }
+ goto done;
+ }
+
+ case '\0':
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Incomplete escape sequence '\\'.\n");
+ goto error;
+
+ default:
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Unrecognized escape sequence '\\%c'.\n", c);
+ goto error;
+ }
+ } else if (c == '%') {
+ if (i[1] == '%') {
+ c = *++i;
+ goto one_char;
+ }
+
+ struct bfs_fmt fmt = {
+ .str = dstralloc(2),
+ };
+ if (!fmt.str) {
+ goto fmt_error;
+ }
+ if (dstrapp(&fmt.str, c) != 0) {
+ bfs_perror(ctx, "dstrapp()");
+ goto fmt_error;
+ }
+
+ const char *specifier = "s";
+
+ // Parse any flags
+ bool must_be_numeric = false;
+ while (true) {
+ c = *++i;
+
+ switch (c) {
+ case '#':
+ case '0':
+ case '+':
+ must_be_numeric = true;
+ fallthru;
+ case ' ':
+ case '-':
+ if (strchr(fmt.str, c)) {
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Duplicate flag '%c'.\n", c);
+ goto fmt_error;
+ }
+ if (dstrapp(&fmt.str, c) != 0) {
+ bfs_perror(ctx, "dstrapp()");
+ goto fmt_error;
+ }
+ continue;
+ }
+
+ break;
+ }
+
+ // Parse the field width
+ while (c >= '0' && c <= '9') {
+ if (dstrapp(&fmt.str, c) != 0) {
+ bfs_perror(ctx, "dstrapp()");
+ goto fmt_error;
+ }
+ c = *++i;
+ }
+
+ // Parse the precision
+ if (c == '.') {
+ do {
+ if (dstrapp(&fmt.str, c) != 0) {
+ bfs_perror(ctx, "dstrapp()");
+ goto fmt_error;
+ }
+ c = *++i;
+ } while (c >= '0' && c <= '9');
+ }
+
+ switch (c) {
+ case 'a':
+ fmt.fn = bfs_printf_ctime;
+ fmt.stat_field = BFS_STAT_ATIME;
+ break;
+ case 'b':
+ fmt.fn = bfs_printf_b;
+ break;
+ case 'c':
+ fmt.fn = bfs_printf_ctime;
+ fmt.stat_field = BFS_STAT_CTIME;
+ break;
+ case 'd':
+ fmt.fn = bfs_printf_d;
+ specifier = "jd";
+ break;
+ case 'D':
+ fmt.fn = bfs_printf_D;
+ break;
+ case 'f':
+ fmt.fn = bfs_printf_f;
+ break;
+ case 'F':
+ fmt.fn = bfs_printf_F;
+ fmt.ptr = (void *)bfs_ctx_mtab(ctx);
+ if (!fmt.ptr) {
+ int error = errno;
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Couldn't parse the mount table: %s.\n", xstrerror(error));
+ goto fmt_error;
+ }
+ break;
+ case 'g':
+ fmt.fn = bfs_printf_g;
+ fmt.ptr = ctx->groups;
+ break;
+ case 'G':
+ fmt.fn = bfs_printf_G;
+ break;
+ case 'h':
+ fmt.fn = bfs_printf_h;
+ break;
+ case 'H':
+ fmt.fn = bfs_printf_H;
+ break;
+ case 'i':
+ fmt.fn = bfs_printf_i;
+ break;
+ case 'k':
+ fmt.fn = bfs_printf_k;
+ break;
+ case 'l':
+ fmt.fn = bfs_printf_l;
+ break;
+ case 'm':
+ fmt.fn = bfs_printf_m;
+ specifier = "o";
+ break;
+ case 'M':
+ fmt.fn = bfs_printf_M;
+ break;
+ case 'n':
+ fmt.fn = bfs_printf_n;
+ break;
+ case 'p':
+ fmt.fn = bfs_printf_p;
+ break;
+ case 'P':
+ fmt.fn = bfs_printf_P;
+ break;
+ case 's':
+ fmt.fn = bfs_printf_s;
+ break;
+ case 'S':
+ fmt.fn = bfs_printf_S;
+ specifier = "g";
+ break;
+ case 't':
+ fmt.fn = bfs_printf_ctime;
+ fmt.stat_field = BFS_STAT_MTIME;
+ break;
+ case 'u':
+ fmt.fn = bfs_printf_u;
+ fmt.ptr = ctx->users;
+ break;
+ case 'U':
+ fmt.fn = bfs_printf_U;
+ break;
+ case 'w':
+ fmt.fn = bfs_printf_ctime;
+ fmt.stat_field = BFS_STAT_BTIME;
+ break;
+ case 'y':
+ fmt.fn = bfs_printf_y;
+ break;
+ case 'Y':
+ fmt.fn = bfs_printf_Y;
+ break;
+ case 'Z':
+#if BFS_CAN_CHECK_CONTEXT
+ fmt.fn = bfs_printf_Z;
+ break;
+#else
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Missing platform support for '%%%c'.\n", c);
+ goto fmt_error;
+#endif
+
+ case 'A':
+ fmt.stat_field = BFS_STAT_ATIME;
+ goto fmt_strftime;
+ case 'B':
+ case 'W':
+ fmt.stat_field = BFS_STAT_BTIME;
+ goto fmt_strftime;
+ case 'C':
+ fmt.stat_field = BFS_STAT_CTIME;
+ goto fmt_strftime;
+ case 'T':
+ fmt.stat_field = BFS_STAT_MTIME;
+ goto fmt_strftime;
+
+ fmt_strftime:
+ fmt.fn = bfs_printf_strftime;
+ c = *++i;
+ if (!c) {
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Incomplete time specifier '%s%c'.\n", fmt.str, i[-1]);
+ goto fmt_error;
+ } else if (strchr("%+@aAbBcCdDeFgGhHIjklmMnprRsStTuUVwWxXyYzZ", c)) {
+ fmt.c = c;
+ } else {
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Unrecognized time specifier '%%%c%c'.\n", i[-1], c);
+ goto fmt_error;
+ }
+ break;
+
+ case '\0':
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Incomplete format specifier '%s'.\n", fmt.str);
+ goto fmt_error;
+
+ default:
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Unrecognized format specifier '%%%c'.\n", c);
+ goto fmt_error;
+ }
+
+ if (must_be_numeric && strcmp(specifier, "s") == 0) {
+ bfs_expr_error(ctx, expr);
+ bfs_error(ctx, "Invalid flags '%s' for string format '%%%c'.\n", fmt.str + 1, c);
+ goto fmt_error;
+ }
+
+ if (dstrcat(&fmt.str, specifier) != 0) {
+ bfs_perror(ctx, "dstrcat()");
+ goto fmt_error;
+ }
+
+ if (append_directive(ctx, expr->printf, &literal, &fmt) != 0) {
+ goto fmt_error;
+ }
+
+ continue;
+
+ fmt_error:
+ dstrfree(fmt.str);
+ goto error;
+ }
+
+ one_char:
+ if (dstrapp(&literal, c) != 0) {
+ bfs_perror(ctx, "dstrapp()");
+ goto error;
+ }
+ }
+
+done:
+ if (append_literal(ctx, expr->printf, &literal) != 0) {
+ goto error;
+ }
+ dstrfree(literal);
+ return 0;
+
+error:
+ dstrfree(literal);
+ bfs_printf_free(expr->printf);
+ expr->printf = NULL;
+ return -1;
+}
+
+int bfs_printf(CFILE *cfile, const struct bfs_printf *format, const struct BFTW *ftwbuf) {
+ int ret = 0, error = 0;
+
+ for (size_t i = 0; i < format->nfmts; ++i) {
+ const struct bfs_fmt *fmt = &format->fmts[i];
+ if (fmt->fn(cfile, fmt, ftwbuf) < 0) {
+ ret = -1;
+ error = errno;
+ }
+ }
+
+ errno = error;
+ return ret;
+}
+
+void bfs_printf_free(struct bfs_printf *format) {
+ if (!format) {
+ return;
+ }
+
+ for (size_t i = 0; i < format->nfmts; ++i) {
+ dstrfree(format->fmts[i].str);
+ }
+ free(format->fmts);
+ free(format);
+}
diff --git a/src/printf.h b/src/printf.h
new file mode 100644
index 0000000..2bff087
--- /dev/null
+++ b/src/printf.h
@@ -0,0 +1,55 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Implementation of -printf/-fprintf.
+ */
+
+#ifndef BFS_PRINTF_H
+#define BFS_PRINTF_H
+
+#include "color.h"
+
+struct BFTW;
+struct bfs_ctx;
+struct bfs_expr;
+
+/**
+ * A printf command, the result of parsing a single format string.
+ */
+struct bfs_printf;
+
+/**
+ * Parse a -printf format string.
+ *
+ * @param ctx
+ * The bfs context.
+ * @param expr
+ * The expression to fill in.
+ * @param format
+ * The format string to parse.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const char *format);
+
+/**
+ * Evaluate a parsed format string.
+ *
+ * @param cfile
+ * The CFILE to print to.
+ * @param format
+ * The parsed printf format.
+ * @param ftwbuf
+ * The bftw() data for the current file.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_printf(CFILE *cfile, const struct bfs_printf *format, const struct BFTW *ftwbuf);
+
+/**
+ * Free a parsed format string.
+ */
+void bfs_printf_free(struct bfs_printf *format);
+
+#endif // BFS_PRINTF_H
diff --git a/src/pwcache.c b/src/pwcache.c
new file mode 100644
index 0000000..af8c237
--- /dev/null
+++ b/src/pwcache.c
@@ -0,0 +1,220 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "pwcache.h"
+#include "alloc.h"
+#include "trie.h"
+#include <errno.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+/** Represents cache hits for negative results. */
+static void *MISSING = &MISSING;
+
+/** Callback type for bfs_getent(). */
+typedef void *bfs_getent_fn(const void *key, void *ptr, size_t bufsize);
+
+/** Shared scaffolding for get{pw,gr}{nam,?id}_r(). */
+static void *bfs_getent(bfs_getent_fn *fn, const void *key, struct trie_leaf *leaf, struct varena *varena) {
+ if (leaf->value) {
+ errno = 0;
+ return leaf->value == MISSING ? NULL : leaf->value;
+ }
+
+ // _SC_GET{PW,GR}_R_SIZE_MAX tend to be fairly large (~1K). That's okay
+ // for temporary allocations, but for these long-lived ones, let's start
+ // with a smaller buffer.
+ size_t bufsize = 128;
+ void *ptr = varena_alloc(varena, bufsize);
+ if (!ptr) {
+ return NULL;
+ }
+
+ while (true) {
+ void *ret = fn(key, ptr, bufsize);
+ if (ret) {
+ leaf->value = ret;
+ return ret;
+ } else if (errno == 0) {
+ leaf->value = MISSING;
+ break;
+ } else if (errno == ERANGE) {
+ void *next = varena_grow(varena, ptr, &bufsize);
+ if (!next) {
+ break;
+ }
+ ptr = next;
+ } else {
+ break;
+ }
+ }
+
+ varena_free(varena, ptr, bufsize);
+ return NULL;
+}
+
+/**
+ * An arena-allocated struct passwd.
+ */
+struct bfs_passwd {
+ struct passwd pwd;
+ char buf[];
+};
+
+struct bfs_users {
+ /** bfs_passwd arena. */
+ struct varena varena;
+ /** A map from usernames to entries. */
+ struct trie by_name;
+ /** A map from UIDs to entries. */
+ struct trie by_uid;
+};
+
+struct bfs_users *bfs_users_new(void) {
+ struct bfs_users *users = ALLOC(struct bfs_users);
+ if (!users) {
+ return NULL;
+ }
+
+ VARENA_INIT(&users->varena, struct bfs_passwd, buf);
+ trie_init(&users->by_name);
+ trie_init(&users->by_uid);
+ return users;
+}
+
+/** bfs_getent() callback for getpwnam_r(). */
+static void *bfs_getpwnam_impl(const void *key, void *ptr, size_t bufsize) {
+ struct bfs_passwd *storage = ptr;
+
+ struct passwd *ret = NULL;
+ errno = getpwnam_r(key, &storage->pwd, storage->buf, bufsize, &ret);
+ return ret;
+}
+
+const struct passwd *bfs_getpwnam(struct bfs_users *users, const char *name) {
+ struct trie_leaf *leaf = trie_insert_str(&users->by_name, name);
+ if (!leaf) {
+ return NULL;
+ }
+
+ return bfs_getent(bfs_getpwnam_impl, name, leaf, &users->varena);
+}
+
+/** bfs_getent() callback for getpwuid_r(). */
+static void *bfs_getpwuid_impl(const void *key, void *ptr, size_t bufsize) {
+ const uid_t *uid = key;
+ struct bfs_passwd *storage = ptr;
+
+ struct passwd *ret = NULL;
+ errno = getpwuid_r(*uid, &storage->pwd, storage->buf, bufsize, &ret);
+ return ret;
+}
+
+const struct passwd *bfs_getpwuid(struct bfs_users *users, uid_t uid) {
+ struct trie_leaf *leaf = trie_insert_mem(&users->by_uid, &uid, sizeof(uid));
+ if (!leaf) {
+ return NULL;
+ }
+
+ return bfs_getent(bfs_getpwuid_impl, &uid, leaf, &users->varena);
+}
+
+void bfs_users_flush(struct bfs_users *users) {
+ trie_clear(&users->by_uid);
+ trie_clear(&users->by_name);
+ varena_clear(&users->varena);
+}
+
+void bfs_users_free(struct bfs_users *users) {
+ if (users) {
+ trie_destroy(&users->by_uid);
+ trie_destroy(&users->by_name);
+ varena_destroy(&users->varena);
+ free(users);
+ }
+}
+
+/**
+ * An arena-allocated struct group.
+ */
+struct bfs_group {
+ struct group grp;
+ char buf[];
+};
+
+struct bfs_groups {
+ /** bfs_group arena. */
+ struct varena varena;
+ /** A map from group names to entries. */
+ struct trie by_name;
+ /** A map from GIDs to entries. */
+ struct trie by_gid;
+};
+
+struct bfs_groups *bfs_groups_new(void) {
+ struct bfs_groups *groups = ALLOC(struct bfs_groups);
+ if (!groups) {
+ return NULL;
+ }
+
+ VARENA_INIT(&groups->varena, struct bfs_group, buf);
+ trie_init(&groups->by_name);
+ trie_init(&groups->by_gid);
+ return groups;
+}
+
+/** bfs_getent() callback for getgrnam_r(). */
+static void *bfs_getgrnam_impl(const void *key, void *ptr, size_t bufsize) {
+ struct bfs_group *storage = ptr;
+
+ struct group *ret = NULL;
+ errno = getgrnam_r(key, &storage->grp, storage->buf, bufsize, &ret);
+ return ret;
+}
+
+const struct group *bfs_getgrnam(struct bfs_groups *groups, const char *name) {
+ struct trie_leaf *leaf = trie_insert_str(&groups->by_name, name);
+ if (!leaf) {
+ return NULL;
+ }
+
+ return bfs_getent(bfs_getgrnam_impl, name, leaf, &groups->varena);
+}
+
+/** bfs_getent() callback for getgrgid_r(). */
+static void *bfs_getgrgid_impl(const void *key, void *ptr, size_t bufsize) {
+ const gid_t *gid = key;
+ struct bfs_group *storage = ptr;
+
+ struct group *ret = NULL;
+ errno = getgrgid_r(*gid, &storage->grp, storage->buf, bufsize, &ret);
+ return ret;
+}
+
+const struct group *bfs_getgrgid(struct bfs_groups *groups, gid_t gid) {
+ struct trie_leaf *leaf = trie_insert_mem(&groups->by_gid, &gid, sizeof(gid));
+ if (!leaf) {
+ return NULL;
+ }
+
+ return bfs_getent(bfs_getgrgid_impl, &gid, leaf, &groups->varena);
+}
+
+void bfs_groups_flush(struct bfs_groups *groups) {
+ trie_clear(&groups->by_gid);
+ trie_clear(&groups->by_name);
+ varena_clear(&groups->varena);
+}
+
+void bfs_groups_free(struct bfs_groups *groups) {
+ if (groups) {
+ trie_destroy(&groups->by_gid);
+ trie_destroy(&groups->by_name);
+ varena_destroy(&groups->varena);
+ free(groups);
+ }
+}
diff --git a/src/pwcache.h b/src/pwcache.h
new file mode 100644
index 0000000..b6c0b67
--- /dev/null
+++ b/src/pwcache.h
@@ -0,0 +1,124 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * A caching wrapper for /etc/{passwd,group}.
+ */
+
+#ifndef BFS_PWCACHE_H
+#define BFS_PWCACHE_H
+
+#include <grp.h>
+#include <pwd.h>
+
+/**
+ * A user cache.
+ */
+struct bfs_users;
+
+/**
+ * Create a user cache.
+ *
+ * @return
+ * A new user cache, or NULL on failure.
+ */
+struct bfs_users *bfs_users_new(void);
+
+/**
+ * Get a user entry by name.
+ *
+ * @param users
+ * The user cache.
+ * @param name
+ * The username to look up.
+ * @return
+ * The matching user, or NULL if not found (errno == 0) or an error
+ * occurred (errno != 0).
+ */
+const struct passwd *bfs_getpwnam(struct bfs_users *users, const char *name);
+
+/**
+ * Get a user entry by ID.
+ *
+ * @param users
+ * The user cache.
+ * @param uid
+ * The ID to look up.
+ * @return
+ * The matching user, or NULL if not found (errno == 0) or an error
+ * occurred (errno != 0).
+ */
+const struct passwd *bfs_getpwuid(struct bfs_users *users, uid_t uid);
+
+/**
+ * Flush a user cache.
+ *
+ * @param users
+ * The cache to flush.
+ */
+void bfs_users_flush(struct bfs_users *users);
+
+/**
+ * Free a user cache.
+ *
+ * @param users
+ * The user cache to free.
+ */
+void bfs_users_free(struct bfs_users *users);
+
+/**
+ * A group cache.
+ */
+struct bfs_groups;
+
+/**
+ * Create a group cache.
+ *
+ * @return
+ * A new group cache, or NULL on failure.
+ */
+struct bfs_groups *bfs_groups_new(void);
+
+/**
+ * Get a group entry by name.
+ *
+ * @param groups
+ * The group cache.
+ * @param name
+ * The group name to look up.
+ * @return
+ * The matching group, or NULL if not found (errno == 0) or an error
+ * occurred (errno != 0).
+ */
+const struct group *bfs_getgrnam(struct bfs_groups *groups, const char *name);
+
+/**
+ * Get a group entry by ID.
+ *
+ * @param groups
+ * The group cache.
+ * @param uid
+ * The ID to look up.
+ * @return
+ * The matching group, or NULL if not found (errno == 0) or an error
+ * occurred (errno != 0).
+ */
+const struct group *bfs_getgrgid(struct bfs_groups *groups, gid_t gid);
+
+/**
+ * Flush a group cache.
+ *
+ * @param groups
+ * The cache to flush.
+ */
+void bfs_groups_flush(struct bfs_groups *groups);
+
+/**
+ * Free a group cache.
+ *
+ * @param groups
+ * The group cache to free.
+ */
+void bfs_groups_free(struct bfs_groups *groups);
+
+#endif // BFS_PWCACHE_H
diff --git a/src/sanity.h b/src/sanity.h
new file mode 100644
index 0000000..e168b8f
--- /dev/null
+++ b/src/sanity.h
@@ -0,0 +1,94 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Sanitizer interface.
+ */
+
+#ifndef BFS_SANITY_H
+#define BFS_SANITY_H
+
+#include "prelude.h"
+#include <stddef.h>
+
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+# define SANITIZE_ADDRESS true
+#endif
+
+#if __has_feature(memory_sanitizer) || defined(__SANITIZE_MEMORY__)
+# define SANITIZE_MEMORY true
+#endif
+
+#if __has_feature(thread_sanitizer) || defined(__SANITIZE_THREAD__)
+# define SANITIZE_THREAD true
+#endif
+
+// Call macro(ptr, size) or macro(ptr, sizeof(*ptr))
+#define SANITIZE_CALL(...) \
+ SANITIZE_CALL_(__VA_ARGS__, )
+
+#define SANITIZE_CALL_(macro, ptr, ...) \
+ SANITIZE_CALL__(macro, ptr, __VA_ARGS__ sizeof(*(ptr)), )
+
+#define SANITIZE_CALL__(macro, ptr, size, ...) \
+ macro(ptr, size)
+
+#if SANITIZE_ADDRESS
+# include <sanitizer/asan_interface.h>
+
+/**
+ * sanitize_alloc(ptr, size = sizeof(*ptr))
+ *
+ * Mark a memory region as allocated.
+ */
+#define sanitize_alloc(...) SANITIZE_CALL(__asan_unpoison_memory_region, __VA_ARGS__)
+
+/**
+ * sanitize_free(ptr, size = sizeof(*ptr))
+ *
+ * Mark a memory region as free.
+ */
+#define sanitize_free(...) SANITIZE_CALL(__asan_poison_memory_region, __VA_ARGS__)
+
+#else
+# define sanitize_alloc sanitize_uninit
+# define sanitize_free sanitize_uninit
+#endif
+
+#if SANITIZE_MEMORY
+# include <sanitizer/msan_interface.h>
+
+/**
+ * sanitize_init(ptr, size = sizeof(*ptr))
+ *
+ * Mark a memory region as initialized.
+ */
+#define sanitize_init(...) SANITIZE_CALL(__msan_unpoison, __VA_ARGS__)
+
+/**
+ * sanitize_uninit(ptr, size = sizeof(*ptr))
+ *
+ * Mark a memory region as uninitialized.
+ */
+#define sanitize_uninit(...) SANITIZE_CALL(__msan_allocated_memory, __VA_ARGS__)
+
+#else
+# define sanitize_init(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
+# define sanitize_uninit(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__)
+#endif
+
+/**
+ * Squelch unused variable warnings when not sanitizing.
+ */
+#define sanitize_ignore(ptr, size) ((void)(ptr), (void)(size))
+
+/**
+ * Initialize a variable, unless sanitizers would detect uninitialized uses.
+ */
+#if SANITIZE_MEMORY
+# define uninit(value)
+#else
+# define uninit(value) = value
+#endif
+
+#endif // BFS_SANITY_H
diff --git a/src/sighook.c b/src/sighook.c
new file mode 100644
index 0000000..ff5b96f
--- /dev/null
+++ b/src/sighook.c
@@ -0,0 +1,600 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Dynamic (un)registration of signal handlers.
+ *
+ * Because signal handlers can interrupt any thread at an arbitrary point, they
+ * must be lock-free or risk deadlock. Therefore, we implement the global table
+ * of signal "hooks" with a simple read-copy-update (RCU) scheme. Readers get a
+ * reference-counted pointer (struct arc) to the table in a lock-free way, and
+ * release the reference count when finished.
+ *
+ * Updates are managed by struct rcu, which has two slots: one active and one
+ * inactive. Readers acquire a reference to the active slot. A single writer
+ * can safely update it by initializing the inactive slot, atomically swapping
+ * the slots, and waiting for the reference count of the newly inactive slot to
+ * drop to zero. Once it does, the old pointer can be safely freed.
+ */
+
+#include "prelude.h"
+#include "sighook.h"
+#include "alloc.h"
+#include "atomic.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "thread.h"
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#if _POSIX_SEMAPHORES > 0
+# include <semaphore.h>
+#endif
+
+/**
+ * An atomically reference-counted pointer.
+ */
+struct arc {
+ /** The current reference count (0 means empty). */
+ atomic size_t refs;
+ /** The reference itself. */
+ void *ptr;
+
+#if _POSIX_SEMAPHORES > 0
+ /** A semaphore for arc_wake(). */
+ sem_t sem;
+ /** sem_init() result. */
+ int sem_status;
+#endif
+};
+
+/** Initialize an arc. */
+static void arc_init(struct arc *arc) {
+ atomic_init(&arc->refs, 0);
+ arc->ptr = NULL;
+
+#if _POSIX_SEMAPHORES > 0
+ arc->sem_status = sem_init(&arc->sem, false, 0);
+#endif
+}
+
+/** Get the current refcount. */
+static size_t arc_refs(const struct arc *arc) {
+ return load(&arc->refs, relaxed);
+}
+
+/** Set the pointer in an empty arc. */
+static void arc_set(struct arc *arc, void *ptr) {
+ bfs_assert(arc_refs(arc) == 0);
+ bfs_assert(ptr);
+
+ arc->ptr = ptr;
+ store(&arc->refs, 1, release);
+}
+
+/** Acquire a reference. */
+static void *arc_get(struct arc *arc) {
+ size_t refs = arc_refs(arc);
+ do {
+ if (refs < 1) {
+ return NULL;
+ }
+ } while (!compare_exchange_weak(&arc->refs, &refs, refs + 1, acquire, relaxed));
+
+ return arc->ptr;
+}
+
+/** Release a reference. */
+static void arc_put(struct arc *arc) {
+ size_t refs = fetch_sub(&arc->refs, 1, release);
+
+ if (refs == 1) {
+#if _POSIX_SEMAPHORES > 0
+ if (arc->sem_status == 0 && sem_post(&arc->sem) != 0) {
+ abort();
+ }
+#endif
+ }
+}
+
+/** Wait on the semaphore. */
+static int arc_sem_wait(struct arc *arc) {
+#if _POSIX_SEMAPHORES > 0
+ if (arc->sem_status == 0) {
+ while (sem_wait(&arc->sem) != 0) {
+ bfs_everify(errno == EINTR, "sem_wait()");
+ }
+ return 0;
+ }
+#endif
+
+ return -1;
+}
+
+/** Wait for all references to be released. */
+static void *arc_wait(struct arc *arc) {
+ size_t refs = fetch_sub(&arc->refs, 1, relaxed);
+ bfs_assert(refs > 0);
+
+ --refs;
+ while (refs > 0) {
+ if (arc_sem_wait(arc) == 0) {
+ bfs_assert(arc_refs(arc) == 0);
+ // sem_wait() provides enough ordering, so we can skip the fence
+ goto done;
+ }
+
+ // Some platforms (like macOS) don't support unnamed semaphores,
+ // but we can always busy-wait
+ spin_loop();
+ refs = arc_refs(arc);
+ }
+
+ thread_fence(&arc->refs, acquire);
+
+done:;
+ void *ptr = arc->ptr;
+ arc->ptr = NULL;
+ return ptr;
+}
+
+/** Destroy an arc. */
+static void arc_destroy(struct arc *arc) {
+ bfs_assert(arc_refs(arc) <= 1);
+
+#if _POSIX_SEMAPHORES > 0
+ if (arc->sem_status == 0) {
+ bfs_everify(sem_destroy(&arc->sem) == 0, "sem_destroy()");
+ }
+#endif
+}
+
+/**
+ * A simple read-copy-update memory reclamation scheme.
+ */
+struct rcu {
+ /** The currently active slot. */
+ atomic size_t active;
+ /** The two slots. */
+ struct arc slots[2];
+};
+
+/** Sentinel value for RCU, since arc uses NULL already. */
+static void *RCU_NULL = &RCU_NULL;
+
+/** Initialize an RCU block. */
+static void rcu_init(struct rcu *rcu) {
+ atomic_init(&rcu->active, 0);
+ arc_init(&rcu->slots[0]);
+ arc_init(&rcu->slots[1]);
+ arc_set(&rcu->slots[0], RCU_NULL);
+}
+
+/** Get the active slot. */
+static struct arc *rcu_active(struct rcu *rcu) {
+ size_t i = load(&rcu->active, relaxed);
+ return &rcu->slots[i];
+}
+
+/** Read an RCU-protected pointer. */
+static void *rcu_read(struct rcu *rcu, struct arc **slot) {
+ while (true) {
+ *slot = rcu_active(rcu);
+ void *ptr = arc_get(*slot);
+ if (ptr == RCU_NULL) {
+ return NULL;
+ } else if (ptr) {
+ return ptr;
+ }
+ // Otherwise, the other slot became active; retry
+ }
+}
+
+/** Get the RCU-protected pointer without acquiring a reference. */
+static void *rcu_peek(struct rcu *rcu) {
+ struct arc *arc = rcu_active(rcu);
+ void *ptr = arc->ptr;
+ if (ptr == RCU_NULL) {
+ return NULL;
+ } else {
+ return ptr;
+ }
+}
+
+/** Update an RCU-protected pointer, and return the old one. */
+static void *rcu_update(struct rcu *rcu, void *ptr) {
+ size_t i = load(&rcu->active, relaxed);
+ struct arc *prev = &rcu->slots[i];
+
+ size_t j = i ^ 1;
+ struct arc *next = &rcu->slots[j];
+
+ arc_set(next, ptr ? ptr : RCU_NULL);
+ store(&rcu->active, j, relaxed);
+ return arc_wait(prev);
+}
+
+struct sighook {
+ int sig;
+ sighook_fn *fn;
+ void *arg;
+ enum sigflags flags;
+};
+
+/**
+ * A table of signal hooks.
+ */
+struct sigtable {
+ /** The number of filled slots. */
+ size_t filled;
+ /** The length of the array. */
+ size_t size;
+ /** An array of signal hooks. */
+ struct arc hooks[];
+};
+
+/** Add a hook to a table. */
+static int sigtable_add(struct sigtable *table, struct sighook *hook) {
+ if (!table || table->filled == table->size) {
+ return -1;
+ }
+
+ for (size_t i = 0; i < table->size; ++i) {
+ struct arc *arc = &table->hooks[i];
+ if (arc_refs(arc) == 0) {
+ arc_set(arc, hook);
+ ++table->filled;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+/** Delete a hook from a table. */
+static int sigtable_del(struct sigtable *table, struct sighook *hook) {
+ for (size_t i = 0; i < table->size; ++i) {
+ struct arc *arc = &table->hooks[i];
+ if (arc->ptr == hook) {
+ arc_wait(arc);
+ --table->filled;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+/** Create a bigger copy of a signal table. */
+static struct sigtable *sigtable_grow(struct sigtable *prev) {
+ size_t old_size = prev ? prev->size : 0;
+ size_t new_size = old_size ? 2 * old_size : 1;
+ struct sigtable *table = ALLOC_FLEX(struct sigtable, hooks, new_size);
+ if (!table) {
+ return NULL;
+ }
+
+ table->filled = 0;
+ table->size = new_size;
+ for (size_t i = 0; i < new_size; ++i) {
+ arc_init(&table->hooks[i]);
+ }
+
+ for (size_t i = 0; i < old_size; ++i) {
+ struct sighook *hook = prev->hooks[i].ptr;
+ if (hook) {
+ bfs_verify(sigtable_add(table, hook) == 0);
+ }
+ }
+
+ return table;
+}
+
+/** Free a signal table. */
+static void sigtable_free(struct sigtable *table) {
+ if (!table) {
+ return;
+ }
+
+ for (size_t i = 0; i < table->size; ++i) {
+ struct arc *arc = &table->hooks[i];
+ arc_destroy(arc);
+ }
+ free(table);
+}
+
+/** Add a hook to a signal table, growing it if necessary. */
+static int rcu_sigtable_add(struct rcu *rcu, struct sighook *hook) {
+ struct sigtable *prev = rcu_peek(rcu);
+ if (sigtable_add(prev, hook) == 0) {
+ return 0;
+ }
+
+ struct sigtable *next = sigtable_grow(prev);
+ if (!next) {
+ return -1;
+ }
+
+ bfs_verify(sigtable_add(next, hook) == 0);
+ rcu_update(rcu, next);
+ sigtable_free(prev);
+ return 0;
+}
+
+/** The global table of signal hooks. */
+static struct rcu rcu_sighooks;
+/** The global table of atsigexit() hooks. */
+static struct rcu rcu_exithooks;
+
+/** Mutex for initialization and RCU writer exclusion. */
+static pthread_mutex_t sigmutex = PTHREAD_MUTEX_INITIALIZER;
+
+/** Check if a signal was generated by userspace. */
+static bool is_user_generated(const siginfo_t *info) {
+ // https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
+ //
+ // If si_code is SI_USER or SI_QUEUE, or any value less than or
+ // equal to 0, then the signal was generated by a process ...
+ int code = info->si_code;
+ return code == SI_USER || code == SI_QUEUE || code <= 0;
+}
+
+/** Check if a signal is caused by a fault. */
+static bool is_fault(const siginfo_t *info) {
+ int sig = info->si_signo;
+ if (sig == SIGBUS || sig == SIGFPE || sig == SIGILL || sig == SIGSEGV) {
+ return !is_user_generated(info);
+ } else {
+ return false;
+ }
+}
+
+// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html
+static const int FATAL_SIGNALS[] = {
+ SIGABRT,
+ SIGALRM,
+ SIGBUS,
+ SIGFPE,
+ SIGHUP,
+ SIGILL,
+ SIGINT,
+ SIGPIPE,
+ SIGQUIT,
+ SIGSEGV,
+ SIGTERM,
+ SIGUSR1,
+ SIGUSR2,
+#ifdef SIGPOLL
+ SIGPOLL,
+#endif
+#ifdef SIGPROF
+ SIGPROF,
+#endif
+#ifdef SIGSYS
+ SIGSYS,
+#endif
+ SIGTRAP,
+#ifdef SIGVTALRM
+ SIGVTALRM,
+#endif
+ SIGXCPU,
+ SIGXFSZ,
+};
+
+/** Check if a signal's default action is to terminate the process. */
+static bool is_fatal(int sig) {
+ for (size_t i = 0; i < countof(FATAL_SIGNALS); ++i) {
+ if (sig == FATAL_SIGNALS[i]) {
+ return true;
+ }
+ }
+
+#ifdef SIGRTMIN
+ // https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
+ //
+ // The default actions for the realtime signals in the range
+ // SIGRTMIN to SIGRTMAX shall be to terminate the process
+ // abnormally.
+ if (sig >= SIGRTMIN && sig <= SIGRTMAX) {
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+/** Reraise a fatal signal. */
+static noreturn void reraise(int sig) {
+ // Restore the default signal action
+ if (signal(sig, SIG_DFL) == SIG_ERR) {
+ goto fail;
+ }
+
+ // Unblock the signal, since we didn't set SA_NODEFER
+ sigset_t mask;
+ if (sigemptyset(&mask) != 0
+ || sigaddset(&mask, sig) != 0
+ || pthread_sigmask(SIG_UNBLOCK, &mask, NULL) != 0) {
+ goto fail;
+ }
+
+ raise(sig);
+fail:
+ abort();
+}
+
+/** Find any matching hooks and run them. */
+static enum sigflags run_hooks(struct rcu *rcu, int sig, siginfo_t *info) {
+ enum sigflags ret = 0;
+ struct arc *slot;
+ struct sigtable *table = rcu_read(rcu, &slot);
+ if (!table) {
+ goto done;
+ }
+
+ for (size_t i = 0; i < table->size; ++i) {
+ struct arc *arc = &table->hooks[i];
+ struct sighook *hook = arc_get(arc);
+ if (!hook) {
+ continue;
+ }
+
+ if (hook->sig == sig || hook->sig == 0) {
+ hook->fn(sig, info, hook->arg);
+ ret |= hook->flags;
+ }
+ arc_put(arc);
+ }
+
+done:
+ arc_put(slot);
+ return ret;
+}
+
+/** Dispatches a signal to the registered handlers. */
+static void sigdispatch(int sig, siginfo_t *info, void *context) {
+ // https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
+ //
+ // The behavior of a process is undefined after it returns normally
+ // from a signal-catching function for a SIGBUS, SIGFPE, SIGILL, or
+ // SIGSEGV signal that was not generated by kill(), sigqueue(), or
+ // raise().
+ if (is_fault(info)) {
+ reraise(sig);
+ }
+
+ // https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
+ //
+ // After returning from a signal-catching function, the value of
+ // errno is unspecified if the signal-catching function or any
+ // function it called assigned a value to errno and the signal-
+ // catching function did not save and restore the original value of
+ // errno.
+ int error = errno;
+
+ // Run the normal hooks
+ enum sigflags flags = run_hooks(&rcu_sighooks, sig, info);
+
+ // Run the atsigexit() hooks, if we're exiting
+ if (!(flags & SH_CONTINUE) && is_fatal(sig)) {
+ run_hooks(&rcu_exithooks, sig, info);
+ reraise(sig);
+ }
+
+ errno = error;
+}
+
+/** Make sure our signal handler is installed for a given signal. */
+static int siginit(int sig) {
+ static struct sigaction action = {
+ .sa_sigaction = sigdispatch,
+ .sa_flags = SA_RESTART | SA_SIGINFO,
+ };
+
+ static sigset_t signals;
+ static bool initialized = false;
+
+ if (!initialized) {
+ if (sigemptyset(&signals) != 0
+ || sigemptyset(&action.sa_mask) != 0) {
+ return -1;
+ }
+ rcu_init(&rcu_sighooks);
+ rcu_init(&rcu_exithooks);
+ initialized = true;
+ }
+
+ int installed = sigismember(&signals, sig);
+ if (installed < 0) {
+ return -1;
+ } else if (installed) {
+ return 0;
+ }
+
+ if (sigaction(sig, &action, NULL) != 0) {
+ return -1;
+ }
+
+ if (sigaddset(&signals, sig) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/** Shared sighook()/atsigexit() implementation. */
+static struct sighook *sighook_impl(struct rcu *rcu, int sig, sighook_fn *fn, void *arg, enum sigflags flags) {
+ struct sighook *hook = ALLOC(struct sighook);
+ if (!hook) {
+ return NULL;
+ }
+
+ hook->sig = sig;
+ hook->fn = fn;
+ hook->arg = arg;
+ hook->flags = flags;
+
+ if (rcu_sigtable_add(rcu, hook) != 0) {
+ free(hook);
+ return NULL;
+ }
+
+ return hook;
+}
+
+struct sighook *sighook(int sig, sighook_fn *fn, void *arg, enum sigflags flags) {
+ mutex_lock(&sigmutex);
+
+ struct sighook *ret = NULL;
+ if (siginit(sig) != 0) {
+ goto done;
+ }
+
+ ret = sighook_impl(&rcu_sighooks, sig, fn, arg, flags);
+done:
+ mutex_unlock(&sigmutex);
+ return ret;
+}
+
+struct sighook *atsigexit(sighook_fn *fn, void *arg) {
+ mutex_lock(&sigmutex);
+
+ struct sighook *ret = NULL;
+
+ for (size_t i = 0; i < countof(FATAL_SIGNALS); ++i) {
+ if (siginit(FATAL_SIGNALS[i]) != 0) {
+ goto done;
+ }
+ }
+
+#ifdef SIGRTMIN
+ for (int i = SIGRTMIN; i <= SIGRTMAX; ++i) {
+ if (siginit(i) != 0) {
+ goto done;
+ }
+ }
+#endif
+
+ ret = sighook_impl(&rcu_exithooks, 0, fn, arg, 0);
+done:
+ mutex_unlock(&sigmutex);
+ return ret;
+}
+
+void sigunhook(struct sighook *hook) {
+ mutex_lock(&sigmutex);
+
+ struct rcu *rcu = hook->sig ? &rcu_sighooks : &rcu_exithooks;
+ struct sigtable *table = rcu_peek(rcu);
+ bfs_verify(sigtable_del(table, hook) == 0);
+
+ if (table->filled == 0) {
+ rcu_update(rcu, NULL);
+ sigtable_free(table);
+ }
+
+ mutex_unlock(&sigmutex);
+ free(hook);
+}
diff --git a/src/sighook.h b/src/sighook.h
new file mode 100644
index 0000000..74d18c0
--- /dev/null
+++ b/src/sighook.h
@@ -0,0 +1,73 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Signal hooks.
+ */
+
+#ifndef BFS_SIGHOOK_H
+#define BFS_SIGHOOK_H
+
+#include <signal.h>
+
+/**
+ * A dynamic signal hook.
+ */
+struct sighook;
+
+/**
+ * Signal hook flags.
+ */
+enum sigflags {
+ /** Suppress the default action for this signal. */
+ SH_CONTINUE = 1 << 0,
+};
+
+/**
+ * A signal hook callback. Hooks are executed from a signal handler, so must
+ * only call async-signal-safe functions.
+ *
+ * @param sig
+ * The signal number.
+ * @param info
+ * Additional information about the signal.
+ * @param arg
+ * An arbitrary pointer passed to the hook.
+ */
+typedef void sighook_fn(int sig, siginfo_t *info, void *arg);
+
+/**
+ * Install a hook for a signal.
+ *
+ * @param sig
+ * The signal to hook.
+ * @param fn
+ * The function to call.
+ * @param arg
+ * An argument passed to the function.
+ * @param flags
+ * Flags for the new hook.
+ * @return
+ * The installed hook, or NULL on failure.
+ */
+struct sighook *sighook(int sig, sighook_fn *fn, void *arg, enum sigflags flags);
+
+/**
+ * On a best-effort basis, invoke the given hook just before the program is
+ * abnormally terminated by a signal.
+ *
+ * @param fn
+ * The function to call.
+ * @param arg
+ * An argument passed to the function.
+ * @return
+ * The installed hook, or NULL on failure.
+ */
+struct sighook *atsigexit(sighook_fn *fn, void *arg);
+
+/**
+ * Remove a signal hook.
+ */
+void sigunhook(struct sighook *hook);
+
+#endif // BFS_SIGHOOK_H
diff --git a/src/stat.c b/src/stat.c
new file mode 100644
index 0000000..2044564
--- /dev/null
+++ b/src/stat.c
@@ -0,0 +1,345 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "stat.h"
+#include "atomic.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "sanity.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#if BFS_USE_STATX && !BFS_HAS_STATX
+# include <linux/stat.h>
+# include <sys/syscall.h>
+# include <unistd.h>
+#endif
+
+const char *bfs_stat_field_name(enum bfs_stat_field field) {
+ switch (field) {
+ case BFS_STAT_MODE:
+ return "mode";
+ case BFS_STAT_DEV:
+ return "device number";
+ case BFS_STAT_INO:
+ return "inode nunmber";
+ case BFS_STAT_NLINK:
+ return "link count";
+ case BFS_STAT_GID:
+ return "group ID";
+ case BFS_STAT_UID:
+ return "user ID";
+ case BFS_STAT_SIZE:
+ return "size";
+ case BFS_STAT_BLOCKS:
+ return "block count";
+ case BFS_STAT_RDEV:
+ return "underlying device";
+ case BFS_STAT_ATTRS:
+ return "attributes";
+ case BFS_STAT_ATIME:
+ return "access time";
+ case BFS_STAT_BTIME:
+ return "birth time";
+ case BFS_STAT_CTIME:
+ return "change time";
+ case BFS_STAT_MTIME:
+ return "modification time";
+ }
+
+ bfs_bug("Unrecognized stat field");
+ return "???";
+}
+
+int bfs_fstatat_flags(enum bfs_stat_flags flags) {
+ int ret = 0;
+
+ if (flags & BFS_STAT_NOFOLLOW) {
+ ret |= AT_SYMLINK_NOFOLLOW;
+ }
+
+#ifdef AT_NO_AUTOMOUNT
+ ret |= AT_NO_AUTOMOUNT;
+#endif
+
+ return ret;
+}
+
+void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src) {
+ dest->mask = 0;
+
+ dest->mode = src->st_mode;
+ dest->mask |= BFS_STAT_MODE;
+
+ dest->dev = src->st_dev;
+ dest->mask |= BFS_STAT_DEV;
+
+ dest->ino = src->st_ino;
+ dest->mask |= BFS_STAT_INO;
+
+ dest->nlink = src->st_nlink;
+ dest->mask |= BFS_STAT_NLINK;
+
+ dest->gid = src->st_gid;
+ dest->mask |= BFS_STAT_GID;
+
+ dest->uid = src->st_uid;
+ dest->mask |= BFS_STAT_UID;
+
+ dest->size = src->st_size;
+ dest->mask |= BFS_STAT_SIZE;
+
+ dest->blocks = src->st_blocks;
+ dest->mask |= BFS_STAT_BLOCKS;
+
+ dest->rdev = src->st_rdev;
+ dest->mask |= BFS_STAT_RDEV;
+
+#if BFS_HAS_ST_FLAGS
+ dest->attrs = src->st_flags;
+ dest->mask |= BFS_STAT_ATTRS;
+#endif
+
+ dest->atime = ST_ATIM(*src);
+ dest->mask |= BFS_STAT_ATIME;
+
+ dest->ctime = ST_CTIM(*src);
+ dest->mask |= BFS_STAT_CTIME;
+
+ dest->mtime = ST_MTIM(*src);
+ dest->mask |= BFS_STAT_MTIME;
+
+#if BFS_HAS_ST_BIRTHTIM
+ dest->btime = src->st_birthtim;
+ dest->mask |= BFS_STAT_BTIME;
+#elif BFS_HAS___ST_BIRTHTIM
+ dest->btime = src->__st_birthtim;
+ dest->mask |= BFS_STAT_BTIME;
+#elif BFS_HAS_ST_BIRTHTIMESPEC
+ dest->btime = src->st_birthtimespec;
+ dest->mask |= BFS_STAT_BTIME;
+#endif
+}
+
+/**
+ * bfs_stat() implementation backed by stat().
+ */
+static int bfs_stat_impl(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) {
+ struct stat statbuf;
+ int ret = fstatat(at_fd, at_path, &statbuf, at_flags);
+ if (ret == 0) {
+ bfs_stat_convert(buf, &statbuf);
+ }
+ return ret;
+}
+
+#if BFS_USE_STATX
+
+/**
+ * Wrapper for the statx() system call, which had no glibc wrapper prior to 2.28.
+ */
+static int bfs_statx(int at_fd, const char *at_path, int at_flags, unsigned int mask, struct statx *buf) {
+#if BFS_HAS_STATX
+ int ret = statx(at_fd, at_path, at_flags, mask, buf);
+#else
+ int ret = syscall(SYS_statx, at_fd, at_path, at_flags, mask, buf);
+#endif
+
+ if (ret == 0) {
+ // -fsanitize=memory doesn't know about statx()
+ sanitize_init(buf);
+ }
+
+ return ret;
+}
+
+int bfs_statx_flags(enum bfs_stat_flags flags) {
+ int ret = bfs_fstatat_flags(flags);
+
+ if (flags & BFS_STAT_NOSYNC) {
+ ret |= AT_STATX_DONT_SYNC;
+ }
+
+ return ret;
+}
+
+int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) {
+ // Callers shouldn't have to check anything except the times
+ const unsigned int guaranteed = STATX_BASIC_STATS & ~(STATX_ATIME | STATX_CTIME | STATX_MTIME);
+ if ((src->stx_mask & guaranteed) != guaranteed) {
+ errno = ENOTSUP;
+ return -1;
+ }
+
+ dest->mask = 0;
+
+ dest->mode = src->stx_mode;
+ dest->mask |= BFS_STAT_MODE;
+
+ dest->dev = xmakedev(src->stx_dev_major, src->stx_dev_minor);
+ dest->mask |= BFS_STAT_DEV;
+
+ dest->ino = src->stx_ino;
+ dest->mask |= BFS_STAT_INO;
+
+ dest->nlink = src->stx_nlink;
+ dest->mask |= BFS_STAT_NLINK;
+
+ dest->gid = src->stx_gid;
+ dest->mask |= BFS_STAT_GID;
+
+ dest->uid = src->stx_uid;
+ dest->mask |= BFS_STAT_UID;
+
+ dest->size = src->stx_size;
+ dest->mask |= BFS_STAT_SIZE;
+
+ dest->blocks = src->stx_blocks;
+ dest->mask |= BFS_STAT_BLOCKS;
+
+ dest->rdev = xmakedev(src->stx_rdev_major, src->stx_rdev_minor);
+ dest->mask |= BFS_STAT_RDEV;
+
+ dest->attrs = src->stx_attributes;
+ dest->mask |= BFS_STAT_ATTRS;
+
+ if (src->stx_mask & STATX_ATIME) {
+ dest->atime.tv_sec = src->stx_atime.tv_sec;
+ dest->atime.tv_nsec = src->stx_atime.tv_nsec;
+ dest->mask |= BFS_STAT_ATIME;
+ }
+
+ if (src->stx_mask & STATX_BTIME) {
+ dest->btime.tv_sec = src->stx_btime.tv_sec;
+ dest->btime.tv_nsec = src->stx_btime.tv_nsec;
+ dest->mask |= BFS_STAT_BTIME;
+ }
+
+ if (src->stx_mask & STATX_CTIME) {
+ dest->ctime.tv_sec = src->stx_ctime.tv_sec;
+ dest->ctime.tv_nsec = src->stx_ctime.tv_nsec;
+ dest->mask |= BFS_STAT_CTIME;
+ }
+
+ if (src->stx_mask & STATX_MTIME) {
+ dest->mtime.tv_sec = src->stx_mtime.tv_sec;
+ dest->mtime.tv_nsec = src->stx_mtime.tv_nsec;
+ dest->mask |= BFS_STAT_MTIME;
+ }
+
+ return 0;
+}
+
+/**
+ * bfs_stat() implementation backed by statx().
+ */
+static int bfs_statx_impl(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) {
+ unsigned int mask = STATX_BASIC_STATS | STATX_BTIME;
+ struct statx xbuf;
+ int ret = bfs_statx(at_fd, at_path, at_flags, mask, &xbuf);
+ if (ret != 0) {
+ return ret;
+ }
+
+ return bfs_statx_convert(buf, &xbuf);
+}
+
+#endif // BFS_USE_STATX
+
+/**
+ * Calls the stat() implementation with explicit flags.
+ */
+static int bfs_stat_explicit(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) {
+#if BFS_USE_STATX
+ static atomic bool has_statx = true;
+
+ if (load(&has_statx, relaxed)) {
+ int ret = bfs_statx_impl(at_fd, at_path, at_flags, buf);
+ if (ret != 0 && errno_is_like(ENOSYS)) {
+ store(&has_statx, false, relaxed);
+ } else {
+ return ret;
+ }
+ }
+
+ at_flags &= ~AT_STATX_DONT_SYNC;
+#endif
+
+ return bfs_stat_impl(at_fd, at_path, at_flags, buf);
+}
+
+/**
+ * Implements the BFS_STAT_TRYFOLLOW retry logic.
+ */
+static int bfs_stat_tryfollow(int at_fd, const char *at_path, int at_flags, enum bfs_stat_flags bfs_flags, struct bfs_stat *buf) {
+ int ret = bfs_stat_explicit(at_fd, at_path, at_flags, buf);
+
+ if (ret != 0
+ && (bfs_flags & (BFS_STAT_NOFOLLOW | BFS_STAT_TRYFOLLOW)) == BFS_STAT_TRYFOLLOW
+ && errno_is_like(ENOENT))
+ {
+ at_flags |= AT_SYMLINK_NOFOLLOW;
+ ret = bfs_stat_explicit(at_fd, at_path, at_flags, buf);
+ }
+
+ return ret;
+}
+
+int bfs_stat(int at_fd, const char *at_path, enum bfs_stat_flags flags, struct bfs_stat *buf) {
+#if BFS_USE_STATX
+ int at_flags = bfs_statx_flags(flags);
+#else
+ int at_flags = bfs_fstatat_flags(flags);
+#endif
+
+ if (at_path) {
+ return bfs_stat_tryfollow(at_fd, at_path, at_flags, flags, buf);
+ }
+
+#if BFS_USE_STATX
+ // If we have statx(), use it with AT_EMPTY_PATH for its extra features
+ at_flags |= AT_EMPTY_PATH;
+ return bfs_stat_explicit(at_fd, "", at_flags, buf);
+#else
+ // Otherwise, just use fstat() rather than fstatat(at_fd, ""), to save
+ // the kernel the trouble of copying in the empty string
+ struct stat sb;
+ if (fstat(at_fd, &sb) != 0) {
+ return -1;
+ }
+
+ bfs_stat_convert(buf, &sb);
+ return 0;
+#endif
+}
+
+const struct timespec *bfs_stat_time(const struct bfs_stat *buf, enum bfs_stat_field field) {
+ if (!(buf->mask & field)) {
+ errno = ENOTSUP;
+ return NULL;
+ }
+
+ switch (field) {
+ case BFS_STAT_ATIME:
+ return &buf->atime;
+ case BFS_STAT_BTIME:
+ return &buf->btime;
+ case BFS_STAT_CTIME:
+ return &buf->ctime;
+ case BFS_STAT_MTIME:
+ return &buf->mtime;
+ default:
+ bfs_bug("Invalid stat field for time");
+ errno = EINVAL;
+ return NULL;
+ }
+}
+
+void bfs_stat_id(const struct bfs_stat *buf, bfs_file_id *id) {
+ memcpy(*id, &buf->dev, sizeof(buf->dev));
+ memcpy(*id + sizeof(buf->dev), &buf->ino, sizeof(buf->ino));
+}
diff --git a/src/stat.h b/src/stat.h
new file mode 100644
index 0000000..8d7144d
--- /dev/null
+++ b/src/stat.h
@@ -0,0 +1,172 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * A facade over the stat() API that unifies some details that diverge between
+ * implementations, like the names of the timespec fields and the presence of
+ * file "birth" times. On new enough Linux kernels, the facade is backed by
+ * statx() instead, and so it exposes a similar interface with a mask for which
+ * fields were successfully returned.
+ */
+
+#ifndef BFS_STAT_H
+#define BFS_STAT_H
+
+#include "prelude.h"
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+
+#if !BFS_HAS_STATX && BFS_HAS_STATX_SYSCALL
+# include <linux/stat.h>
+#endif
+
+#ifndef BFS_USE_STATX
+# define BFS_USE_STATX (BFS_HAS_STATX || BFS_HAS_STATX_SYSCALL)
+#endif
+
+#if BFS_USE_SYS_PARAM_H
+# include <sys/param.h>
+#endif
+
+#ifdef DEV_BSIZE
+# define BFS_STAT_BLKSIZE DEV_BSIZE
+#elif defined(S_BLKSIZE)
+# define BFS_STAT_BLKSIZE S_BLKSIZE
+#else
+# define BFS_STAT_BLKSIZE 512
+#endif
+
+/**
+ * bfs_stat field bitmask.
+ */
+enum bfs_stat_field {
+ BFS_STAT_MODE = 1 << 0,
+ BFS_STAT_DEV = 1 << 1,
+ BFS_STAT_INO = 1 << 2,
+ BFS_STAT_NLINK = 1 << 3,
+ BFS_STAT_GID = 1 << 4,
+ BFS_STAT_UID = 1 << 5,
+ BFS_STAT_SIZE = 1 << 6,
+ BFS_STAT_BLOCKS = 1 << 7,
+ BFS_STAT_RDEV = 1 << 8,
+ BFS_STAT_ATTRS = 1 << 9,
+ BFS_STAT_ATIME = 1 << 10,
+ BFS_STAT_BTIME = 1 << 11,
+ BFS_STAT_CTIME = 1 << 12,
+ BFS_STAT_MTIME = 1 << 13,
+};
+
+/**
+ * Get the human-readable name of a bfs_stat field.
+ */
+const char *bfs_stat_field_name(enum bfs_stat_field field);
+
+/**
+ * bfs_stat() flags.
+ */
+enum bfs_stat_flags {
+ /** Follow symlinks (the default). */
+ BFS_STAT_FOLLOW = 0,
+ /** Never follow symlinks. */
+ BFS_STAT_NOFOLLOW = 1 << 0,
+ /** Try to follow symlinks, but fall back to the link itself if broken. */
+ BFS_STAT_TRYFOLLOW = 1 << 1,
+ /** Try to use cached values without synchronizing remote filesystems. */
+ BFS_STAT_NOSYNC = 1 << 2,
+};
+
+/**
+ * Facade over struct stat.
+ */
+struct bfs_stat {
+ /** Bitmask indicating filled fields. */
+ enum bfs_stat_field mask;
+
+ /** File type and access mode. */
+ mode_t mode;
+ /** Device ID containing the file. */
+ dev_t dev;
+ /** Inode number. */
+ ino_t ino;
+ /** Number of hard links. */
+ nlink_t nlink;
+ /** Owner group ID. */
+ gid_t gid;
+ /** Owner user ID. */
+ uid_t uid;
+ /** File size in bytes. */
+ off_t size;
+ /** Number of disk blocks allocated (of size BFS_STAT_BLKSIZE). */
+ blkcnt_t blocks;
+ /** The device ID represented by this file. */
+ dev_t rdev;
+
+ /** Attributes/flags set on the file. */
+ unsigned long long attrs;
+
+ /** Access time. */
+ struct timespec atime;
+ /** Birth/creation time. */
+ struct timespec btime;
+ /** Status change time. */
+ struct timespec ctime;
+ /** Modification time. */
+ struct timespec mtime;
+};
+
+/**
+ * Facade over fstatat().
+ *
+ * @param at_fd
+ * The base file descriptor for the lookup.
+ * @param at_path
+ * The path to stat, relative to at_fd. Pass NULL to fstat() at_fd
+ * itself.
+ * @param flags
+ * Flags that affect the lookup.
+ * @param[out] buf
+ * A place to store the stat buffer, if successful.
+ * @return
+ * 0 on success, -1 on error.
+ */
+int bfs_stat(int at_fd, const char *at_path, enum bfs_stat_flags flags, struct bfs_stat *buf);
+
+/**
+ * Convert bfs_stat_flags to fstatat() flags.
+ */
+int bfs_fstatat_flags(enum bfs_stat_flags flags);
+
+/**
+ * Convert struct stat to struct bfs_stat.
+ */
+void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src);
+
+#if BFS_USE_STATX
+/**
+ * Convert bfs_stat_flags to statx() flags.
+ */
+int bfs_statx_flags(enum bfs_stat_flags flags);
+
+/**
+ * Convert struct statx to struct bfs_stat.
+ */
+int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src);
+#endif
+
+/**
+ * Get a particular time field from a bfs_stat() buffer.
+ */
+const struct timespec *bfs_stat_time(const struct bfs_stat *buf, enum bfs_stat_field field);
+
+/**
+ * A unique ID for a file.
+ */
+typedef unsigned char bfs_file_id[sizeof(dev_t) + sizeof(ino_t)];
+
+/**
+ * Compute a unique ID for a file.
+ */
+void bfs_stat_id(const struct bfs_stat *buf, bfs_file_id *id);
+
+#endif // BFS_STAT_H
diff --git a/src/thread.c b/src/thread.c
new file mode 100644
index 0000000..3793896
--- /dev/null
+++ b/src/thread.c
@@ -0,0 +1,81 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "thread.h"
+#include "bfstd.h"
+#include "diag.h"
+#include <errno.h>
+#include <pthread.h>
+
+#define THREAD_FALLIBLE(expr) \
+ do { \
+ int err = expr; \
+ if (err == 0) { \
+ return 0; \
+ } else { \
+ errno = err; \
+ return -1; \
+ } \
+ } while (0)
+
+#define THREAD_INFALLIBLE(...) \
+ THREAD_INFALLIBLE_(__VA_ARGS__, 0, )
+
+#define THREAD_INFALLIBLE_(expr, allowed, ...) \
+ int err = expr; \
+ bfs_verify(err == 0 || err == allowed, "%s: %s", #expr, xstrerror(err)); \
+ (void)0
+
+int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn, void *arg) {
+ THREAD_FALLIBLE(pthread_create(thread, attr, fn, arg));
+}
+
+void thread_join(pthread_t thread, void **ret) {
+ THREAD_INFALLIBLE(pthread_join(thread, ret));
+}
+
+int mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr) {
+ THREAD_FALLIBLE(pthread_mutex_init(mutex, attr));
+}
+
+void mutex_lock(pthread_mutex_t *mutex) {
+ THREAD_INFALLIBLE(pthread_mutex_lock(mutex));
+}
+
+bool mutex_trylock(pthread_mutex_t *mutex) {
+ THREAD_INFALLIBLE(pthread_mutex_trylock(mutex), EBUSY);
+ return err == 0;
+}
+
+void mutex_unlock(pthread_mutex_t *mutex) {
+ THREAD_INFALLIBLE(pthread_mutex_unlock(mutex));
+}
+
+void mutex_destroy(pthread_mutex_t *mutex) {
+ THREAD_INFALLIBLE(pthread_mutex_destroy(mutex));
+}
+
+int cond_init(pthread_cond_t *cond, pthread_condattr_t *attr) {
+ THREAD_FALLIBLE(pthread_cond_init(cond, attr));
+}
+
+void cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) {
+ THREAD_INFALLIBLE(pthread_cond_wait(cond, mutex));
+}
+
+void cond_signal(pthread_cond_t *cond) {
+ THREAD_INFALLIBLE(pthread_cond_signal(cond));
+}
+
+void cond_broadcast(pthread_cond_t *cond) {
+ THREAD_INFALLIBLE(pthread_cond_broadcast(cond));
+}
+
+void cond_destroy(pthread_cond_t *cond) {
+ THREAD_INFALLIBLE(pthread_cond_destroy(cond));
+}
+
+void invoke_once(pthread_once_t *once, once_fn *fn) {
+ THREAD_INFALLIBLE(pthread_once(once, fn));
+}
diff --git a/src/thread.h b/src/thread.h
new file mode 100644
index 0000000..db11bd8
--- /dev/null
+++ b/src/thread.h
@@ -0,0 +1,99 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Wrappers for POSIX threading APIs.
+ */
+
+#ifndef BFS_THREAD_H
+#define BFS_THREAD_H
+
+#include "prelude.h"
+#include <pthread.h>
+
+#if __STDC_VERSION__ < C23 && !defined(thread_local)
+# if BFS_USE_THREADS_H
+# include <threads.h>
+# else
+# define thread_local _Thread_local
+# endif
+#endif
+
+/** Thread entry point type. */
+typedef void *thread_fn(void *arg);
+
+/**
+ * Wrapper for pthread_create().
+ *
+ * @return
+ * 0 on success, -1 on error.
+ */
+int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn, void *arg);
+
+/**
+ * Wrapper for pthread_join().
+ */
+void thread_join(pthread_t thread, void **ret);
+
+/**
+ * Wrapper for pthread_mutex_init().
+ */
+int mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr);
+
+/**
+ * Wrapper for pthread_mutex_lock().
+ */
+void mutex_lock(pthread_mutex_t *mutex);
+
+/**
+ * Wrapper for pthread_mutex_trylock().
+ *
+ * @return
+ * Whether the mutex was locked.
+ */
+bool mutex_trylock(pthread_mutex_t *mutex);
+
+/**
+ * Wrapper for pthread_mutex_unlock().
+ */
+void mutex_unlock(pthread_mutex_t *mutex);
+
+/**
+ * Wrapper for pthread_mutex_destroy().
+ */
+void mutex_destroy(pthread_mutex_t *mutex);
+
+/**
+ * Wrapper for pthread_cond_init().
+ */
+int cond_init(pthread_cond_t *cond, pthread_condattr_t *attr);
+
+/**
+ * Wrapper for pthread_cond_wait().
+ */
+void cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex);
+
+/**
+ * Wrapper for pthread_cond_signal().
+ */
+void cond_signal(pthread_cond_t *cond);
+
+/**
+ * Wrapper for pthread_cond_broadcast().
+ */
+void cond_broadcast(pthread_cond_t *cond);
+
+/**
+ * Wrapper for pthread_cond_destroy().
+ */
+void cond_destroy(pthread_cond_t *cond);
+
+/** pthread_once() callback type. */
+typedef void once_fn(void);
+
+/**
+ * Wrapper for pthread_once().
+ */
+void invoke_once(pthread_once_t *once, once_fn *fn);
+
+#endif // BFS_THREAD_H
diff --git a/src/trie.c b/src/trie.c
new file mode 100644
index 0000000..808953e
--- /dev/null
+++ b/src/trie.c
@@ -0,0 +1,729 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * This is an implementation of a "qp trie," as documented at
+ * https://dotat.at/prog/qp/README.html
+ *
+ * An uncompressed trie over the dataset {AAAA, AADD, ABCD, DDAA, DDDD} would
+ * look like
+ *
+ * A A A A
+ * ●───→●───→●───→●───→○
+ * │ │ │ D D
+ * │ │ └───→●───→○
+ * │ │ B C D
+ * │ └───→●───→●───→○
+ * │ D D A A
+ * └───→●───→●───→●───→○
+ * │ D D
+ * └───→●───→○
+ *
+ * A compressed (PATRICIA) trie collapses internal nodes that have only a single
+ * child, like this:
+ *
+ * A A AA
+ * ●───→●───→●────→○
+ * │ │ │ DD
+ * │ │ └────→○
+ * │ │ BCD
+ * │ └─────→○
+ * │ DD AA
+ * └────→●────→○
+ * │ DD
+ * └────→○
+ *
+ * The nodes can be compressed further by dropping the actual compressed
+ * sequences from the nodes, storing it only in the leaves. This is the
+ * technique applied in QP tries, and the crit-bit trees that inspired them
+ * (https://cr.yp.to/critbit.html). Only the index to test, and the values to
+ * branch on, need to be stored in each node.
+ *
+ * A A A
+ * 0───→1───→2───→AAAA
+ * │ │ │ D
+ * │ │ └───→AADD
+ * │ │ B
+ * │ └───→ABCD
+ * │ D A
+ * └───→2───→DDAA
+ * │ D
+ * └───→DDDD
+ *
+ * Nodes are represented very compactly. Rather than a dense array of children,
+ * a sparse array of only the non-NULL children directly follows the node in
+ * memory. A bitmap is used to track which children exist.
+ *
+ * ┌────────────┐
+ * │ [4] [3] [2][1][0] ←─ children
+ * │ ↓ ↓ ↓ ↓ ↓
+ * │ 14 10 6 3 0 ←─ sparse index
+ * │ ↓ ↓ ↓ ↓ ↓
+ * │ 0100010001001001 ←─ bitmap
+ * │
+ * │ To convert a sparse index to a dense index, mask off the bits above it, and
+ * │ count the remaining bits.
+ * │
+ * │ 10 ←─ sparse index
+ * │ ↓
+ * │ 0000001111111111 ←─ mask
+ * │ & 0100010001001001 ←─ bitmap
+ * │ ────────────────
+ * │ = 0000000001001001
+ * │ └──┼──┘
+ * │ [3] ←─ dense index
+ * └───────────────────┘
+ *
+ * This implementation tests a whole nibble (half byte/hex digit) at every
+ * branch, so the bitmap takes up 16 bits. The remainder of a machine word is
+ * used to hold the offset, which severely constrains its range on 32-bit
+ * platforms. As a workaround, we store relative instead of absolute offsets,
+ * and insert intermediate singleton "jump" nodes when necessary.
+ */
+
+#include "prelude.h"
+#include "trie.h"
+#include "alloc.h"
+#include "bit.h"
+#include "diag.h"
+#include "list.h"
+#include <stdint.h>
+#include <string.h>
+
+bfs_static_assert(CHAR_WIDTH == 8);
+
+#if __i386__ || __x86_64__
+# define trie_clones attr(target_clones("popcnt", "default"))
+#else
+# define trie_clones
+#endif
+
+/** Number of bits for the sparse array bitmap, aka the range of a nibble. */
+#define BITMAP_WIDTH 16
+/** The number of remaining bits in a word, to hold the offset. */
+#define OFFSET_WIDTH (SIZE_WIDTH - BITMAP_WIDTH)
+/** The highest representable offset (only 64k on a 32-bit architecture). */
+#define OFFSET_MAX (((size_t)1 << OFFSET_WIDTH) - 1)
+
+/**
+ * An internal node of the trie.
+ */
+struct trie_node {
+ /**
+ * A bitmap that hold which indices exist in the sparse children array.
+ * Bit i will be set if a child exists at logical index i, and its index
+ * into the array will be popcount(bitmap & ((1 << i) - 1)).
+ */
+ size_t bitmap : BITMAP_WIDTH;
+
+ /**
+ * The offset into the key in nibbles. This is relative to the parent
+ * node, to support offsets larger than OFFSET_MAX.
+ */
+ size_t offset : OFFSET_WIDTH;
+
+ /**
+ * Flexible array of children. Each pointer uses the lowest bit as a
+ * tag to distinguish internal nodes from leaves. This is safe as long
+ * as all dynamic allocations are aligned to more than a single byte.
+ */
+ uintptr_t children[];
+};
+
+/** Check if an encoded pointer is to a leaf. */
+static bool trie_is_leaf(uintptr_t ptr) {
+ return ptr & 1;
+}
+
+/** Decode a pointer to a leaf. */
+static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) {
+ bfs_assert(trie_is_leaf(ptr));
+ return (struct trie_leaf *)(ptr ^ 1);
+}
+
+/** Encode a pointer to a leaf. */
+static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) {
+ uintptr_t ptr = (uintptr_t)leaf ^ 1;
+ bfs_assert(trie_is_leaf(ptr));
+ return ptr;
+}
+
+/** Decode a pointer to an internal node. */
+static struct trie_node *trie_decode_node(uintptr_t ptr) {
+ bfs_assert(!trie_is_leaf(ptr));
+ return (struct trie_node *)ptr;
+}
+
+/** Encode a pointer to an internal node. */
+static uintptr_t trie_encode_node(const struct trie_node *node) {
+ uintptr_t ptr = (uintptr_t)node;
+ bfs_assert(!trie_is_leaf(ptr));
+ return ptr;
+}
+
+void trie_init(struct trie *trie) {
+ trie->root = 0;
+ LIST_INIT(trie);
+ VARENA_INIT(&trie->nodes, struct trie_node, children);
+ VARENA_INIT(&trie->leaves, struct trie_leaf, key);
+}
+
+/** Extract the nibble at a certain offset from a byte sequence. */
+static unsigned char trie_key_nibble(const void *key, size_t offset) {
+ const unsigned char *bytes = key;
+ size_t byte = offset >> 1;
+
+ // A branchless version of
+ // if (offset & 1) {
+ // return bytes[byte] >> 4;
+ // } else {
+ // return bytes[byte] & 0xF;
+ // }
+ unsigned int shift = (offset & 1) << 2;
+ return (bytes[byte] >> shift) & 0xF;
+}
+
+/**
+ * Finds a leaf in the trie that matches the key at every branch. If the key
+ * exists in the trie, the representative will match the searched key. But
+ * since only branch points are tested, it can be different from the key. In
+ * that case, the first mismatch between the key and the representative will be
+ * the depth at which to make a new branch to insert the key.
+ */
+trie_clones
+static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) {
+ uintptr_t ptr = trie->root;
+ if (!ptr) {
+ return NULL;
+ }
+
+ size_t offset = 0;
+ while (!trie_is_leaf(ptr)) {
+ struct trie_node *node = trie_decode_node(ptr);
+ offset += node->offset;
+
+ unsigned int index = 0;
+ if ((offset >> 1) < length) {
+ unsigned char nibble = trie_key_nibble(key, offset);
+ unsigned int bit = 1U << nibble;
+ // bits = bitmap & bit ? bitmap & (bit - 1) : 0
+ unsigned int mask = -!!(node->bitmap & bit);
+ unsigned int bits = node->bitmap & (bit - 1) & mask;
+ index = count_ones(bits);
+ }
+ ptr = node->children[index];
+ }
+
+ return trie_decode_leaf(ptr);
+}
+
+struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) {
+ return trie_find_mem(trie, key, strlen(key) + 1);
+}
+
+struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) {
+ struct trie_leaf *rep = trie_representative(trie, key, length);
+ if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) {
+ return rep;
+ } else {
+ return NULL;
+ }
+}
+
+struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) {
+ size_t length = strlen(key);
+ struct trie_leaf *rep = trie_representative(trie, key, length + 1);
+ if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) {
+ return rep;
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * Find a leaf that may end at the current node.
+ */
+static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) {
+ // Finding a terminating NUL byte may take two nibbles
+ for (int i = 0; i < 2; ++i) {
+ if (!(node->bitmap & 1)) {
+ break;
+ }
+
+ uintptr_t ptr = node->children[0];
+ if (trie_is_leaf(ptr)) {
+ return trie_decode_leaf(ptr);
+ } else {
+ node = trie_decode_node(ptr);
+ }
+ }
+
+ return NULL;
+}
+
+/** Check if a leaf is a prefix of a search key. */
+static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *key, size_t length) {
+ if (leaf && leaf->length <= length) {
+ return memcmp(key + skip, leaf->key + skip, leaf->length - skip - 1) == 0;
+ } else {
+ return false;
+ }
+}
+
+trie_clones
+static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const char *key) {
+ uintptr_t ptr = trie->root;
+ if (!ptr) {
+ return NULL;
+ }
+
+ struct trie_leaf *best = NULL;
+ size_t skip = 0;
+ size_t length = strlen(key) + 1;
+
+ size_t offset = 0;
+ while (!trie_is_leaf(ptr)) {
+ struct trie_node *node = trie_decode_node(ptr);
+ offset += node->offset;
+ if ((offset >> 1) >= length) {
+ return best;
+ }
+
+ struct trie_leaf *leaf = trie_terminal_leaf(node);
+ if (trie_check_prefix(leaf, skip, key, length)) {
+ best = leaf;
+ skip = offset >> 1;
+ }
+
+ unsigned char nibble = trie_key_nibble(key, offset);
+ unsigned int bit = 1U << nibble;
+ if (node->bitmap & bit) {
+ unsigned int index = count_ones(node->bitmap & (bit - 1));
+ ptr = node->children[index];
+ } else {
+ return best;
+ }
+ }
+
+ struct trie_leaf *leaf = trie_decode_leaf(ptr);
+ if (trie_check_prefix(leaf, skip, key, length)) {
+ best = leaf;
+ }
+
+ return best;
+}
+
+struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) {
+ return trie_find_prefix_impl(trie, key);
+}
+
+/** Create a new leaf, holding a copy of the given key. */
+static struct trie_leaf *trie_leaf_alloc(struct trie *trie, const void *key, size_t length) {
+ struct trie_leaf *leaf = varena_alloc(&trie->leaves, length);
+ if (!leaf) {
+ return NULL;
+ }
+
+ LIST_ITEM_INIT(leaf);
+ LIST_APPEND(trie, leaf);
+
+ leaf->value = NULL;
+ leaf->length = length;
+ memcpy(leaf->key, key, length);
+
+ return leaf;
+}
+
+/** Free a leaf. */
+static void trie_leaf_free(struct trie *trie, struct trie_leaf *leaf) {
+ LIST_REMOVE(trie, leaf);
+ varena_free(&trie->leaves, leaf, leaf->length);
+}
+
+/** Create a new node. */
+static struct trie_node *trie_node_alloc(struct trie *trie, size_t size) {
+ bfs_assert(has_single_bit(size));
+ return varena_alloc(&trie->nodes, size);
+}
+
+/** Reallocate a trie node. */
+static struct trie_node *trie_node_realloc(struct trie *trie, struct trie_node *node, size_t old_size, size_t new_size) {
+ bfs_assert(has_single_bit(old_size));
+ bfs_assert(has_single_bit(new_size));
+ return varena_realloc(&trie->nodes, node, old_size, new_size);
+}
+
+/** Free a node. */
+static void trie_node_free(struct trie *trie, struct trie_node *node, size_t size) {
+ bfs_assert(size == (size_t)count_ones(node->bitmap));
+ varena_free(&trie->nodes, node, size);
+}
+
+#if ENDIAN_NATIVE == ENDIAN_LITTLE
+# define TRIE_BSWAP(n) (n)
+#elif ENDIAN_NATIVE == ENDIAN_BIG
+# define TRIE_BSWAP(n) bswap(n)
+#endif
+
+/** Find the offset of the first nibble that differs between two keys. */
+static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t length) {
+ if (!rep) {
+ return 0;
+ }
+
+ if (rep->length < length) {
+ length = rep->length;
+ }
+
+ const char *rep_bytes = rep->key;
+ const char *key_bytes = key;
+
+ size_t i = 0;
+ for (size_t chunk = sizeof(chunk); i + chunk <= length; i += chunk) {
+ size_t rep_chunk, key_chunk;
+ memcpy(&rep_chunk, rep_bytes + i, sizeof(rep_chunk));
+ memcpy(&key_chunk, key_bytes + i, sizeof(key_chunk));
+
+ if (rep_chunk != key_chunk) {
+#ifdef TRIE_BSWAP
+ size_t diff = TRIE_BSWAP(rep_chunk ^ key_chunk);
+ i *= 2;
+ i += trailing_zeros(diff) / 4;
+ return i;
+#else
+ break;
+#endif
+ }
+ }
+
+ for (; i < length; ++i) {
+ unsigned char diff = rep_bytes[i] ^ key_bytes[i];
+ if (diff) {
+ return 2 * i + !(diff & 0xF);
+ }
+ }
+
+ return 2 * i;
+}
+
+/**
+ * Insert a leaf into a node. The node must not have a child in that position
+ * already. Effectively takes a subtrie like this:
+ *
+ * ptr
+ * |
+ * v X
+ * *--->...
+ * | Z
+ * +--->...
+ *
+ * and transforms it to:
+ *
+ * ptr
+ * |
+ * v X
+ * *--->...
+ * | Y
+ * +--->leaf
+ * | Z
+ * +--->...
+ */
+trie_clones
+static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) {
+ struct trie_node *node = trie_decode_node(*ptr);
+ unsigned int size = count_ones(node->bitmap);
+
+ // Double the capacity every power of two
+ if (has_single_bit(size)) {
+ node = trie_node_realloc(trie, node, size, 2 * size);
+ if (!node) {
+ trie_leaf_free(trie, leaf);
+ return NULL;
+ }
+ *ptr = trie_encode_node(node);
+ }
+
+ unsigned int bit = 1U << nibble;
+
+ // The child must not already be present
+ bfs_assert(!(node->bitmap & bit));
+ node->bitmap |= bit;
+
+ unsigned int target = count_ones(node->bitmap & (bit - 1));
+ for (size_t i = size; i > target; --i) {
+ node->children[i] = node->children[i - 1];
+ }
+ node->children[target] = trie_encode_leaf(leaf);
+ return leaf;
+}
+
+/**
+ * When the current offset exceeds OFFSET_MAX, insert "jump" nodes that bridge
+ * the gap. This function takes a subtrie like this:
+ *
+ * ptr
+ * |
+ * v
+ * *--->rep
+ *
+ * and changes it to:
+ *
+ * ptr ret
+ * | |
+ * v v
+ * *--->*--->rep
+ *
+ * so that a new key can be inserted like:
+ *
+ * ptr ret
+ * | |
+ * v v X
+ * *--->*--->rep
+ * | Y
+ * +--->key
+ */
+static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, size_t *offset) {
+ // We only ever need to jump to leaf nodes, since internal nodes are
+ // guaranteed to be within OFFSET_MAX anyway
+ bfs_assert(trie_is_leaf(*ptr));
+
+ struct trie_node *node = trie_node_alloc(trie, 1);
+ if (!node) {
+ return NULL;
+ }
+
+ *offset += OFFSET_MAX;
+ node->offset = OFFSET_MAX;
+
+ unsigned char nibble = trie_key_nibble(key, *offset);
+ node->bitmap = 1 << nibble;
+
+ node->children[0] = *ptr;
+ *ptr = trie_encode_node(node);
+ return node->children;
+}
+
+/**
+ * Split a node in the trie. Changes a subtrie like this:
+ *
+ * ptr
+ * |
+ * v
+ * *...>--->rep
+ *
+ * into this:
+ *
+ * ptr
+ * |
+ * v X
+ * *--->*...>--->rep
+ * | Y
+ * +--->leaf
+ */
+static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, struct trie_leaf *rep, size_t offset, size_t mismatch) {
+ unsigned char key_nibble = trie_key_nibble(leaf->key, mismatch);
+ unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch);
+ bfs_assert(key_nibble != rep_nibble);
+
+ struct trie_node *node = trie_node_alloc(trie, 2);
+ if (!node) {
+ trie_leaf_free(trie, leaf);
+ return NULL;
+ }
+
+ node->bitmap = (1 << key_nibble) | (1 << rep_nibble);
+
+ size_t delta = mismatch - offset;
+ if (!trie_is_leaf(*ptr)) {
+ struct trie_node *child = trie_decode_node(*ptr);
+ child->offset -= delta;
+ }
+ node->offset = delta;
+
+ unsigned int key_index = key_nibble > rep_nibble;
+ node->children[key_index] = trie_encode_leaf(leaf);
+ node->children[key_index ^ 1] = *ptr;
+ *ptr = trie_encode_node(node);
+ return leaf;
+}
+
+struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) {
+ return trie_insert_mem(trie, key, strlen(key) + 1);
+}
+
+trie_clones
+static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key, size_t length) {
+ struct trie_leaf *rep = trie_representative(trie, key, length);
+ size_t mismatch = trie_mismatch(rep, key, length);
+ if (mismatch >= (length << 1)) {
+ return rep;
+ }
+
+ struct trie_leaf *leaf = trie_leaf_alloc(trie, key, length);
+ if (!leaf) {
+ return NULL;
+ }
+
+ if (!rep) {
+ trie->root = trie_encode_leaf(leaf);
+ return leaf;
+ }
+
+ size_t offset = 0;
+ uintptr_t *ptr = &trie->root;
+ while (!trie_is_leaf(*ptr)) {
+ struct trie_node *node = trie_decode_node(*ptr);
+ if (offset + node->offset > mismatch) {
+ break;
+ }
+ offset += node->offset;
+
+ unsigned char nibble = trie_key_nibble(key, offset);
+ unsigned int bit = 1U << nibble;
+ if (node->bitmap & bit) {
+ bfs_assert(offset < mismatch);
+ unsigned int index = count_ones(node->bitmap & (bit - 1));
+ ptr = &node->children[index];
+ } else {
+ bfs_assert(offset == mismatch);
+ return trie_node_insert(trie, ptr, leaf, nibble);
+ }
+ }
+
+ while (mismatch - offset > OFFSET_MAX) {
+ ptr = trie_jump(trie, ptr, key, &offset);
+ if (!ptr) {
+ trie_leaf_free(trie, leaf);
+ return NULL;
+ }
+ }
+
+ return trie_split(trie, ptr, leaf, rep, offset, mismatch);
+}
+
+struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length) {
+ return trie_insert_mem_impl(trie, key, length);
+}
+
+/** Free a chain of singleton nodes. */
+static void trie_free_singletons(struct trie *trie, uintptr_t ptr) {
+ while (!trie_is_leaf(ptr)) {
+ struct trie_node *node = trie_decode_node(ptr);
+
+ // Make sure the bitmap is a power of two, i.e. it has just one child
+ bfs_assert(has_single_bit(node->bitmap));
+
+ ptr = node->children[0];
+ trie_node_free(trie, node, 1);
+ }
+
+ trie_leaf_free(trie, trie_decode_leaf(ptr));
+}
+
+/**
+ * Try to collapse a two-child node like:
+ *
+ * parent child
+ * | |
+ * v v
+ * *----->*----->*----->leaf
+ * |
+ * +----->other
+ *
+ * into
+ *
+ * parent
+ * |
+ * v
+ * other
+ */
+static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) {
+ uintptr_t other = parent_node->children[child_index ^ 1];
+ if (!trie_is_leaf(other)) {
+ struct trie_node *other_node = trie_decode_node(other);
+ if (other_node->offset + parent_node->offset <= OFFSET_MAX) {
+ other_node->offset += parent_node->offset;
+ } else {
+ return -1;
+ }
+ }
+
+ *parent = other;
+ trie_node_free(trie, parent_node, 1);
+ return 0;
+}
+
+trie_clones
+static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) {
+ uintptr_t *child = &trie->root;
+ uintptr_t *parent = NULL;
+ unsigned int child_bit = 0, child_index = 0;
+ size_t offset = 0;
+ while (!trie_is_leaf(*child)) {
+ struct trie_node *node = trie_decode_node(*child);
+ offset += node->offset;
+ bfs_assert((offset >> 1) < leaf->length);
+
+ unsigned char nibble = trie_key_nibble(leaf->key, offset);
+ unsigned int bit = 1U << nibble;
+ unsigned int bitmap = node->bitmap;
+ bfs_assert(bitmap & bit);
+ unsigned int index = count_ones(bitmap & (bit - 1));
+
+ // Advance the parent pointer, unless this node had only one child
+ if (!has_single_bit(bitmap)) {
+ parent = child;
+ child_bit = bit;
+ child_index = index;
+ }
+
+ child = &node->children[index];
+ }
+
+ bfs_assert(trie_decode_leaf(*child) == leaf);
+
+ if (!parent) {
+ trie_free_singletons(trie, trie->root);
+ trie->root = 0;
+ return;
+ }
+
+ struct trie_node *node = trie_decode_node(*parent);
+ child = node->children + child_index;
+ trie_free_singletons(trie, *child);
+
+ node->bitmap ^= child_bit;
+ unsigned int parent_size = count_ones(node->bitmap);
+ bfs_assert(parent_size > 0);
+ if (parent_size == 1 && trie_collapse_node(trie, parent, node, child_index) == 0) {
+ return;
+ }
+
+ if (child_index < parent_size) {
+ memmove(child, child + 1, (parent_size - child_index) * sizeof(*child));
+ }
+
+ if (has_single_bit(parent_size)) {
+ node = trie_node_realloc(trie, node, 2 * parent_size, parent_size);
+ if (node) {
+ *parent = trie_encode_node(node);
+ }
+ }
+}
+
+void trie_remove(struct trie *trie, struct trie_leaf *leaf) {
+ trie_remove_impl(trie, leaf);
+}
+
+void trie_clear(struct trie *trie) {
+ trie->root = 0;
+ LIST_INIT(trie);
+
+ varena_clear(&trie->leaves);
+ varena_clear(&trie->nodes);
+}
+
+void trie_destroy(struct trie *trie) {
+ varena_destroy(&trie->leaves);
+ varena_destroy(&trie->nodes);
+}
diff --git a/src/trie.h b/src/trie.h
new file mode 100644
index 0000000..4288d76
--- /dev/null
+++ b/src/trie.h
@@ -0,0 +1,147 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#ifndef BFS_TRIE_H
+#define BFS_TRIE_H
+
+#include "alloc.h"
+#include "list.h"
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * A leaf of a trie.
+ */
+struct trie_leaf {
+ /** Linked list of leaves, in insertion order. */
+ struct trie_leaf *prev, *next;
+ /** An arbitrary value associated with this leaf. */
+ void *value;
+ /** The length of the key in bytes. */
+ size_t length;
+ /** The key itself, stored inline. */
+ char key[];
+};
+
+/**
+ * A trie that holds a set of fixed- or variable-length strings.
+ */
+struct trie {
+ /** Pointer to the root node/leaf. */
+ uintptr_t root;
+ /** Linked list of leaves. */
+ struct trie_leaf *head, *tail;
+ /** Node allocator. */
+ struct varena nodes;
+ /** Leaf allocator. */
+ struct varena leaves;
+};
+
+/**
+ * Initialize an empty trie.
+ */
+void trie_init(struct trie *trie);
+
+/**
+ * Find the leaf for a string key.
+ *
+ * @param trie
+ * The trie to search.
+ * @param key
+ * The key to look up.
+ * @return
+ * The found leaf, or NULL if the key is not present.
+ */
+struct trie_leaf *trie_find_str(const struct trie *trie, const char *key);
+
+/**
+ * Find the leaf for a fixed-size key.
+ *
+ * @param trie
+ * The trie to search.
+ * @param key
+ * The key to look up.
+ * @param length
+ * The length of the key in bytes.
+ * @return
+ * The found leaf, or NULL if the key is not present.
+ */
+struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length);
+
+/**
+ * Find the shortest leaf that starts with a given key.
+ *
+ * @param trie
+ * The trie to search.
+ * @param key
+ * The key to look up.
+ * @return
+ * A leaf that starts with the given key, or NULL.
+ */
+struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key);
+
+/**
+ * Find the leaf that is the longest prefix of the given key.
+ *
+ * @param trie
+ * The trie to search.
+ * @param key
+ * The key to look up.
+ * @return
+ * The longest prefix match for the given key, or NULL.
+ */
+struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key);
+
+/**
+ * Insert a string key into the trie.
+ *
+ * @param trie
+ * The trie to modify.
+ * @param key
+ * The key to insert.
+ * @return
+ * The inserted leaf, or NULL on failure.
+ */
+struct trie_leaf *trie_insert_str(struct trie *trie, const char *key);
+
+/**
+ * Insert a fixed-size key into the trie.
+ *
+ * @param trie
+ * The trie to modify.
+ * @param key
+ * The key to insert.
+ * @param length
+ * The length of the key in bytes.
+ * @return
+ * The inserted leaf, or NULL on failure.
+ */
+struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length);
+
+/**
+ * Remove a leaf from a trie.
+ *
+ * @param trie
+ * The trie to modify.
+ * @param leaf
+ * The leaf to remove.
+ */
+void trie_remove(struct trie *trie, struct trie_leaf *leaf);
+
+/**
+ * Remove all leaves from a trie.
+ */
+void trie_clear(struct trie *trie);
+
+/**
+ * Destroy a trie and its contents.
+ */
+void trie_destroy(struct trie *trie);
+
+/**
+ * Iterate over the leaves of a trie.
+ */
+#define for_trie(leaf, trie) \
+ for_list (struct trie_leaf, leaf, trie)
+
+#endif // BFS_TRIE_H
diff --git a/src/typo.c b/src/typo.c
new file mode 100644
index 0000000..b1c5c44
--- /dev/null
+++ b/src/typo.c
@@ -0,0 +1,164 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "typo.h"
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+// Assume QWERTY layout for now
+static const int8_t key_coords[UCHAR_MAX + 1][3] = {
+ ['`'] = { 0, 0, 0},
+ ['~'] = { 0, 0, 1},
+ ['1'] = { 3, 0, 0},
+ ['!'] = { 3, 0, 1},
+ ['2'] = { 6, 0, 0},
+ ['@'] = { 6, 0, 1},
+ ['3'] = { 9, 0, 0},
+ ['#'] = { 9, 0, 1},
+ ['4'] = {12, 0, 0},
+ ['$'] = {12, 0, 1},
+ ['5'] = {15, 0, 0},
+ ['%'] = {15, 0, 1},
+ ['6'] = {18, 0, 0},
+ ['^'] = {18, 0, 1},
+ ['7'] = {21, 0, 0},
+ ['&'] = {21, 0, 1},
+ ['8'] = {24, 0, 0},
+ ['*'] = {24, 0, 1},
+ ['9'] = {27, 0, 0},
+ ['('] = {27, 0, 1},
+ ['0'] = {30, 0, 0},
+ [')'] = {30, 0, 1},
+ ['-'] = {33, 0, 0},
+ ['_'] = {33, 0, 1},
+ ['='] = {36, 0, 0},
+ ['+'] = {36, 0, 1},
+
+ ['\t'] = { 1, 3, 0},
+ ['q'] = { 4, 3, 0},
+ ['Q'] = { 4, 3, 1},
+ ['w'] = { 7, 3, 0},
+ ['W'] = { 7, 3, 1},
+ ['e'] = {10, 3, 0},
+ ['E'] = {10, 3, 1},
+ ['r'] = {13, 3, 0},
+ ['R'] = {13, 3, 1},
+ ['t'] = {16, 3, 0},
+ ['T'] = {16, 3, 1},
+ ['y'] = {19, 3, 0},
+ ['Y'] = {19, 3, 1},
+ ['u'] = {22, 3, 0},
+ ['U'] = {22, 3, 1},
+ ['i'] = {25, 3, 0},
+ ['I'] = {25, 3, 1},
+ ['o'] = {28, 3, 0},
+ ['O'] = {28, 3, 1},
+ ['p'] = {31, 3, 0},
+ ['P'] = {31, 3, 1},
+ ['['] = {34, 3, 0},
+ ['{'] = {34, 3, 1},
+ [']'] = {37, 3, 0},
+ ['}'] = {37, 3, 1},
+ ['\\'] = {40, 3, 0},
+ ['|'] = {40, 3, 1},
+
+ ['a'] = { 5, 6, 0},
+ ['A'] = { 5, 6, 1},
+ ['s'] = { 8, 6, 0},
+ ['S'] = { 8, 6, 1},
+ ['d'] = {11, 6, 0},
+ ['D'] = {11, 6, 1},
+ ['f'] = {14, 6, 0},
+ ['F'] = {14, 6, 1},
+ ['g'] = {17, 6, 0},
+ ['G'] = {17, 6, 1},
+ ['h'] = {20, 6, 0},
+ ['H'] = {20, 6, 1},
+ ['j'] = {23, 6, 0},
+ ['J'] = {23, 6, 1},
+ ['k'] = {26, 6, 0},
+ ['K'] = {26, 6, 1},
+ ['l'] = {29, 6, 0},
+ ['L'] = {29, 6, 1},
+ [';'] = {32, 6, 0},
+ [':'] = {32, 6, 1},
+ ['\''] = {35, 6, 0},
+ ['"'] = {35, 6, 1},
+ ['\n'] = {38, 6, 0},
+
+ ['z'] = { 6, 9, 0},
+ ['Z'] = { 6, 9, 1},
+ ['x'] = { 9, 9, 0},
+ ['X'] = { 9, 9, 1},
+ ['c'] = {12, 9, 0},
+ ['C'] = {12, 9, 1},
+ ['v'] = {15, 9, 0},
+ ['V'] = {15, 9, 1},
+ ['b'] = {18, 9, 0},
+ ['B'] = {18, 9, 1},
+ ['n'] = {21, 9, 0},
+ ['N'] = {21, 9, 1},
+ ['m'] = {24, 9, 0},
+ ['M'] = {24, 9, 1},
+ [','] = {27, 9, 0},
+ ['<'] = {27, 9, 1},
+ ['.'] = {30, 9, 0},
+ ['>'] = {30, 9, 1},
+ ['/'] = {33, 9, 0},
+ ['?'] = {33, 9, 1},
+
+ [' '] = {18, 12, 0},
+};
+
+static int char_distance(char a, char b) {
+ const int8_t *ac = key_coords[(unsigned char)a], *bc = key_coords[(unsigned char)b];
+ int ret = 0;
+ for (int i = 0; i < 3; ++i) {
+ ret += abs(ac[i] - bc[i]);
+ }
+ return ret;
+}
+
+int typo_distance(const char *actual, const char *expected) {
+ // This is the Wagner-Fischer algorithm for Levenshtein distance, using
+ // Manhattan distance on the keyboard for individual characters.
+
+ const int insert_cost = 12;
+
+ size_t rows = strlen(actual) + 1;
+ size_t cols = strlen(expected) + 1;
+
+ int arr0[cols], arr1[cols];
+ int *row0 = arr0, *row1 = arr1;
+
+ for (size_t j = 0; j < cols; ++j) {
+ row0[j] = insert_cost * j;
+ }
+
+ for (size_t i = 1; i < rows; ++i) {
+ row1[0] = row0[0] + insert_cost;
+
+ char a = actual[i - 1];
+ for (size_t j = 1; j < cols; ++j) {
+ char b = expected[j - 1];
+ int cost = row0[j - 1] + char_distance(a, b);
+ int del_cost = row0[j] + insert_cost;
+ if (del_cost < cost) {
+ cost = del_cost;
+ }
+ int ins_cost = row1[j - 1] + insert_cost;
+ if (ins_cost < cost) {
+ cost = ins_cost;
+ }
+ row1[j] = cost;
+ }
+
+ int *tmp = row0;
+ row0 = row1;
+ row1 = tmp;
+ }
+
+ return row0[cols - 1];
+}
diff --git a/src/typo.h b/src/typo.h
new file mode 100644
index 0000000..13eaa67
--- /dev/null
+++ b/src/typo.h
@@ -0,0 +1,18 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#ifndef BFS_TYPO_H
+#define BFS_TYPO_H
+
+/**
+ * Find the "typo" distance between two strings.
+ *
+ * @param actual
+ * The actual string typed by the user.
+ * @param expected
+ * The expected valid string.
+ * @return The distance between the two strings.
+ */
+int typo_distance(const char *actual, const char *expected);
+
+#endif // BFS_TYPO_H
diff --git a/src/xregex.c b/src/xregex.c
new file mode 100644
index 0000000..c2711bc
--- /dev/null
+++ b/src/xregex.c
@@ -0,0 +1,295 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "xregex.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "sanity.h"
+#include "thread.h"
+#include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if BFS_USE_ONIGURUMA
+# include <langinfo.h>
+# include <oniguruma.h>
+#else
+# include <regex.h>
+#endif
+
+struct bfs_regex {
+#if BFS_USE_ONIGURUMA
+ unsigned char *pattern;
+ OnigRegex impl;
+ int err;
+ OnigErrorInfo einfo;
+#else
+ regex_t impl;
+ int err;
+#endif
+};
+
+#if BFS_USE_ONIGURUMA
+
+static int bfs_onig_status;
+static OnigEncoding bfs_onig_enc;
+
+/** pthread_once() callback. */
+static void bfs_onig_once(void) {
+ // Fall back to ASCII by default
+ bfs_onig_enc = ONIG_ENCODING_ASCII;
+
+ // Oniguruma has no locale support, so try to guess the right encoding
+ // from the current locale.
+ const char *charmap = nl_langinfo(CODESET);
+ if (charmap) {
+#define BFS_MAP_ENCODING(name, value) \
+ do { \
+ if (strcmp(charmap, name) == 0) { \
+ bfs_onig_enc = value; \
+ } \
+ } while (0)
+#define BFS_MAP_ENCODING2(name1, name2, value) \
+ do { \
+ BFS_MAP_ENCODING(name1, value); \
+ BFS_MAP_ENCODING(name2, value); \
+ } while (0)
+
+ // These names were found with locale -m on Linux and FreeBSD
+#define BFS_MAP_ISO_8859(n) \
+ BFS_MAP_ENCODING2("ISO-8859-" #n, "ISO8859-" #n, ONIG_ENCODING_ISO_8859_ ## n)
+
+ BFS_MAP_ISO_8859(1);
+ BFS_MAP_ISO_8859(2);
+ BFS_MAP_ISO_8859(3);
+ BFS_MAP_ISO_8859(4);
+ BFS_MAP_ISO_8859(5);
+ BFS_MAP_ISO_8859(6);
+ BFS_MAP_ISO_8859(7);
+ BFS_MAP_ISO_8859(8);
+ BFS_MAP_ISO_8859(9);
+ BFS_MAP_ISO_8859(10);
+ BFS_MAP_ISO_8859(11);
+ // BFS_MAP_ISO_8859(12);
+ BFS_MAP_ISO_8859(13);
+ BFS_MAP_ISO_8859(14);
+ BFS_MAP_ISO_8859(15);
+ BFS_MAP_ISO_8859(16);
+
+ BFS_MAP_ENCODING("UTF-8", ONIG_ENCODING_UTF8);
+
+#define BFS_MAP_EUC(name) \
+ BFS_MAP_ENCODING2("EUC-" #name, "euc" #name, ONIG_ENCODING_EUC_ ## name)
+
+ BFS_MAP_EUC(JP);
+ BFS_MAP_EUC(TW);
+ BFS_MAP_EUC(KR);
+ BFS_MAP_EUC(CN);
+
+ BFS_MAP_ENCODING2("SHIFT_JIS", "SJIS", ONIG_ENCODING_SJIS);
+
+ // BFS_MAP_ENCODING("KOI-8", ONIG_ENCODING_KOI8);
+ BFS_MAP_ENCODING("KOI8-R", ONIG_ENCODING_KOI8_R);
+
+ BFS_MAP_ENCODING("CP1251", ONIG_ENCODING_CP1251);
+
+ BFS_MAP_ENCODING("GB18030", ONIG_ENCODING_BIG5);
+ }
+
+ bfs_onig_status = onig_initialize(&bfs_onig_enc, 1);
+ if (bfs_onig_status != ONIG_NORMAL) {
+ bfs_onig_enc = NULL;
+ }
+}
+
+/** Initialize Oniguruma. */
+static int bfs_onig_initialize(OnigEncoding *enc) {
+ static pthread_once_t once = PTHREAD_ONCE_INIT;
+ invoke_once(&once, bfs_onig_once);
+
+ *enc = bfs_onig_enc;
+ return bfs_onig_status;
+}
+#endif
+
+int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags) {
+ struct bfs_regex *regex = *preg = ALLOC(struct bfs_regex);
+ if (!regex) {
+ return -1;
+ }
+
+#if BFS_USE_ONIGURUMA
+ // onig_error_code_to_str() says
+ //
+ // don't call this after the pattern argument of onig_new() is freed
+ //
+ // so make a defensive copy.
+ regex->pattern = (unsigned char *)strdup(pattern);
+ if (!regex->pattern) {
+ goto fail;
+ }
+
+ regex->impl = NULL;
+ regex->err = ONIG_NORMAL;
+
+ OnigSyntaxType *syntax = NULL;
+ switch (type) {
+ case BFS_REGEX_POSIX_BASIC:
+ syntax = ONIG_SYNTAX_POSIX_BASIC;
+ break;
+ case BFS_REGEX_POSIX_EXTENDED:
+ syntax = ONIG_SYNTAX_POSIX_EXTENDED;
+ break;
+ case BFS_REGEX_EMACS:
+ syntax = ONIG_SYNTAX_EMACS;
+ break;
+ case BFS_REGEX_GREP:
+ syntax = ONIG_SYNTAX_GREP;
+ break;
+ }
+ bfs_assert(syntax, "Invalid regex type");
+
+ OnigOptionType options = syntax->options;
+ if (flags & BFS_REGEX_ICASE) {
+ options |= ONIG_OPTION_IGNORECASE;
+ }
+
+ OnigEncoding enc;
+ regex->err = bfs_onig_initialize(&enc);
+ if (regex->err != ONIG_NORMAL) {
+ return -1;
+ }
+
+ const unsigned char *end = regex->pattern + strlen(pattern);
+ regex->err = onig_new(&regex->impl, regex->pattern, end, options, enc, syntax, &regex->einfo);
+ if (regex->err != ONIG_NORMAL) {
+ return -1;
+ }
+#else
+ int cflags = 0;
+ switch (type) {
+ case BFS_REGEX_POSIX_BASIC:
+ break;
+ case BFS_REGEX_POSIX_EXTENDED:
+ cflags |= REG_EXTENDED;
+ break;
+ default:
+ errno = EINVAL;
+ goto fail;
+ }
+
+ if (flags & BFS_REGEX_ICASE) {
+ cflags |= REG_ICASE;
+ }
+
+ regex->err = regcomp(&regex->impl, pattern, cflags);
+ if (regex->err != 0) {
+ // https://github.com/google/sanitizers/issues/1496
+ sanitize_init(&regex->impl);
+ return -1;
+ }
+#endif
+
+ return 0;
+
+fail:
+ free(regex);
+ *preg = NULL;
+ return -1;
+}
+
+int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) {
+ size_t len = strlen(str);
+
+#if BFS_USE_ONIGURUMA
+ const unsigned char *ustr = (const unsigned char *)str;
+ const unsigned char *end = ustr + len;
+
+ // The docs for onig_{match,search}() say
+ //
+ // Do not pass invalid byte string in the regex character encoding.
+ if (!onigenc_is_valid_mbc_string(onig_get_encoding(regex->impl), ustr, end)) {
+ return 0;
+ }
+
+ int ret;
+ if (flags & BFS_REGEX_ANCHOR) {
+ ret = onig_match(regex->impl, ustr, end, ustr, NULL, ONIG_OPTION_DEFAULT);
+ } else {
+ ret = onig_search(regex->impl, ustr, end, ustr, end, NULL, ONIG_OPTION_DEFAULT);
+ }
+
+ if (ret >= 0) {
+ if (flags & BFS_REGEX_ANCHOR) {
+ return (size_t)ret == len;
+ } else {
+ return 1;
+ }
+ } else if (ret == ONIG_MISMATCH) {
+ return 0;
+ } else {
+ regex->err = ret;
+ return -1;
+ }
+#else
+ regmatch_t match = {
+ .rm_so = 0,
+ .rm_eo = len,
+ };
+
+ int eflags = 0;
+#ifdef REG_STARTEND
+ eflags |= REG_STARTEND;
+#endif
+
+ int ret = regexec(&regex->impl, str, 1, &match, eflags);
+ if (ret == 0) {
+ if (flags & BFS_REGEX_ANCHOR) {
+ return match.rm_so == 0 && (size_t)match.rm_eo == len;
+ } else {
+ return 1;
+ }
+ } else if (ret == REG_NOMATCH) {
+ return 0;
+ } else {
+ regex->err = ret;
+ return -1;
+ }
+#endif
+}
+
+void bfs_regfree(struct bfs_regex *regex) {
+ if (regex) {
+#if BFS_USE_ONIGURUMA
+ onig_free(regex->impl);
+ free(regex->pattern);
+#else
+ regfree(&regex->impl);
+#endif
+ free(regex);
+ }
+}
+
+char *bfs_regerror(const struct bfs_regex *regex) {
+ if (!regex) {
+ return strdup(xstrerror(ENOMEM));
+ }
+
+#if BFS_USE_ONIGURUMA
+ unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN);
+ if (str) {
+ onig_error_code_to_str(str, regex->err, &regex->einfo);
+ }
+ return (char *)str;
+#else
+ size_t len = regerror(regex->err, &regex->impl, NULL, 0);
+ char *str = malloc(len);
+ if (str) {
+ regerror(regex->err, &regex->impl, str, len);
+ }
+ return str;
+#endif
+}
diff --git a/src/xregex.h b/src/xregex.h
new file mode 100644
index 0000000..998a2b0
--- /dev/null
+++ b/src/xregex.h
@@ -0,0 +1,83 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com> and the bfs contributors
+// SPDX-License-Identifier: 0BSD
+
+#ifndef BFS_XREGEX_H
+#define BFS_XREGEX_H
+
+/**
+ * A compiled regular expression.
+ */
+struct bfs_regex;
+
+/**
+ * Regex syntax flavors.
+ */
+enum bfs_regex_type {
+ BFS_REGEX_POSIX_BASIC,
+ BFS_REGEX_POSIX_EXTENDED,
+ BFS_REGEX_EMACS,
+ BFS_REGEX_GREP,
+};
+
+/**
+ * Regex compilation flags.
+ */
+enum bfs_regcomp_flags {
+ /** Treat the regex case-insensitively. */
+ BFS_REGEX_ICASE = 1 << 0,
+};
+
+/**
+ * Regex execution flags.
+ */
+enum bfs_regexec_flags {
+ /** Only treat matches of the entire string as successful. */
+ BFS_REGEX_ANCHOR = 1 << 0,
+};
+
+/**
+ * Wrapper for regcomp() that supports additional regex types.
+ *
+ * @param[out] preg
+ * Will hold the compiled regex.
+ * @param pattern
+ * The regular expression to compile.
+ * @param type
+ * The regular expression syntax to use.
+ * @param flags
+ * Regex compilation flags.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags);
+
+/**
+ * Wrapper for regexec().
+ *
+ * @param regex
+ * The regular expression to execute.
+ * @param str
+ * The string to match against.
+ * @param flags
+ * Regex execution flags.
+ * @return
+ * 1 for a match, 0 for no match, -1 on failure.
+ */
+int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags);
+
+/**
+ * Free a compiled regex.
+ */
+void bfs_regfree(struct bfs_regex *regex);
+
+/**
+ * Get a human-readable regex error message.
+ *
+ * @param regex
+ * The compiled regex.
+ * @return
+ * A human-readable description of the error, which should be free()'d.
+ */
+char *bfs_regerror(const struct bfs_regex *regex);
+
+#endif // BFS_XREGEX_H
diff --git a/src/xspawn.c b/src/xspawn.c
new file mode 100644
index 0000000..33e5a4a
--- /dev/null
+++ b/src/xspawn.c
@@ -0,0 +1,690 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "xspawn.h"
+#include "alloc.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "list.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if BFS_USE_PATHS_H
+# include <paths.h>
+#endif
+
+#if _POSIX_SPAWN > 0
+# include <spawn.h>
+#endif
+
+/**
+ * Types of spawn actions.
+ */
+enum bfs_spawn_op {
+ BFS_SPAWN_OPEN,
+ BFS_SPAWN_CLOSE,
+ BFS_SPAWN_DUP2,
+ BFS_SPAWN_FCHDIR,
+ BFS_SPAWN_SETRLIMIT,
+};
+
+/**
+ * A spawn action.
+ */
+struct bfs_spawn_action {
+ /** The next action in the list. */
+ struct bfs_spawn_action *next;
+
+ /** This action's operation. */
+ enum bfs_spawn_op op;
+ /** The input fd (or -1). */
+ int in_fd;
+ /** The output fd (or -1). */
+ int out_fd;
+
+ /** Operation-specific args. */
+ union {
+ /** BFS_SPAWN_OPEN args. */
+ struct {
+ const char *path;
+ int flags;
+ mode_t mode;
+ };
+
+ /** BFS_SPAWN_SETRLIMIT args. */
+ struct {
+ int resource;
+ struct rlimit rlimit;
+ };
+ };
+};
+
+int bfs_spawn_init(struct bfs_spawn *ctx) {
+ ctx->flags = 0;
+ SLIST_INIT(ctx);
+
+#if _POSIX_SPAWN > 0
+ ctx->flags |= BFS_SPAWN_USE_POSIX;
+
+ errno = posix_spawn_file_actions_init(&ctx->actions);
+ if (errno != 0) {
+ return -1;
+ }
+
+ errno = posix_spawnattr_init(&ctx->attr);
+ if (errno != 0) {
+ posix_spawn_file_actions_destroy(&ctx->actions);
+ return -1;
+ }
+#endif
+
+ return 0;
+}
+
+int bfs_spawn_destroy(struct bfs_spawn *ctx) {
+#if _POSIX_SPAWN > 0
+ posix_spawnattr_destroy(&ctx->attr);
+ posix_spawn_file_actions_destroy(&ctx->actions);
+#endif
+
+ for_slist (struct bfs_spawn_action, action, ctx) {
+ free(action);
+ }
+
+ return 0;
+}
+
+#if _POSIX_SPAWN > 0
+/** Set some posix_spawnattr flags. */
+attr(maybe_unused)
+static int bfs_spawn_addflags(struct bfs_spawn *ctx, short flags) {
+ short prev;
+ errno = posix_spawnattr_getflags(&ctx->attr, &prev);
+ if (errno != 0) {
+ return -1;
+ }
+
+ short next = prev | flags;
+ if (next != prev) {
+ errno = posix_spawnattr_setflags(&ctx->attr, next);
+ if (errno != 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+#endif // _POSIX_SPAWN > 0
+
+/** Allocate a spawn action. */
+static struct bfs_spawn_action *bfs_spawn_action(enum bfs_spawn_op op) {
+ struct bfs_spawn_action *action = ALLOC(struct bfs_spawn_action);
+ if (!action) {
+ return NULL;
+ }
+
+ SLIST_ITEM_INIT(action);
+ action->op = op;
+ action->in_fd = -1;
+ action->out_fd = -1;
+ return action;
+}
+
+int bfs_spawn_addopen(struct bfs_spawn *ctx, int fd, const char *path, int flags, mode_t mode) {
+ struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_OPEN);
+ if (!action) {
+ return -1;
+ }
+
+#if _POSIX_SPAWN > 0
+ if (ctx->flags & BFS_SPAWN_USE_POSIX) {
+ errno = posix_spawn_file_actions_addopen(&ctx->actions, fd, path, flags, mode);
+ if (errno != 0) {
+ free(action);
+ return -1;
+ }
+ }
+#endif
+
+ action->out_fd = fd;
+ action->path = path;
+ action->flags = flags;
+ action->mode = mode;
+ SLIST_APPEND(ctx, action);
+ return 0;
+}
+
+int bfs_spawn_addclose(struct bfs_spawn *ctx, int fd) {
+ struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_CLOSE);
+ if (!action) {
+ return -1;
+ }
+
+#if _POSIX_SPAWN > 0
+ if (ctx->flags & BFS_SPAWN_USE_POSIX) {
+ errno = posix_spawn_file_actions_addclose(&ctx->actions, fd);
+ if (errno != 0) {
+ free(action);
+ return -1;
+ }
+ }
+#endif
+
+ action->out_fd = fd;
+ SLIST_APPEND(ctx, action);
+ return 0;
+}
+
+int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd) {
+ struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_DUP2);
+ if (!action) {
+ return -1;
+ }
+
+#if _POSIX_SPAWN > 0
+ if (ctx->flags & BFS_SPAWN_USE_POSIX) {
+ errno = posix_spawn_file_actions_adddup2(&ctx->actions, oldfd, newfd);
+ if (errno != 0) {
+ free(action);
+ return -1;
+ }
+ }
+#endif
+
+ action->in_fd = oldfd;
+ action->out_fd = newfd;
+ SLIST_APPEND(ctx, action);
+ return 0;
+}
+
+/**
+ * https://www.austingroupbugs.net/view.php?id=1208#c4830 says:
+ *
+ * ... a search of the directories passed as the environment variable
+ * PATH ..., using the working directory of the child process after all
+ * file_actions have been performed.
+ *
+ * but macOS and NetBSD resolve the PATH *before* file_actions (because there
+ * posix_spawn() is its own syscall).
+ */
+#define BFS_POSIX_SPAWNP_AFTER_FCHDIR !(__APPLE__ || __NetBSD__)
+
+int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd) {
+ struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_FCHDIR);
+ if (!action) {
+ return -1;
+ }
+
+#if BFS_HAS_POSIX_SPAWN_ADDFCHDIR
+# define BFS_POSIX_SPAWN_ADDFCHDIR posix_spawn_file_actions_addfchdir
+#elif BFS_HAS_POSIX_SPAWN_ADDFCHDIR_NP
+# define BFS_POSIX_SPAWN_ADDFCHDIR posix_spawn_file_actions_addfchdir_np
+#endif
+
+#if _POSIX_SPAWN > 0 && defined(BFS_POSIX_SPAWN_FCHDIR)
+ if (ctx->flags & BFS_SPAWN_USE_POSIX) {
+ errno = BFS_POSIX_SPAWN_ADDFCHDIR(&ctx->actions, fd);
+ if (errno != 0) {
+ free(action);
+ return -1;
+ }
+ }
+#else
+ ctx->flags &= ~BFS_SPAWN_USE_POSIX;
+#endif
+
+ action->in_fd = fd;
+ SLIST_APPEND(ctx, action);
+ return 0;
+}
+
+int bfs_spawn_setrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl) {
+ struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_SETRLIMIT);
+ if (!action) {
+ goto fail;
+ }
+
+#ifdef POSIX_SPAWN_SETRLIMIT
+ if (bfs_spawn_addflags(ctx, POSIX_SPAWN_SETRLIMIT) != 0) {
+ goto fail;
+ }
+
+ errno = posix_spawnattr_setrlimit(&ctx->attr, resource, rl);
+ if (errno != 0) {
+ goto fail;
+ }
+#else
+ ctx->flags &= ~BFS_SPAWN_USE_POSIX;
+#endif
+
+ action->resource = resource;
+ action->rlimit = *rl;
+ SLIST_APPEND(ctx, action);
+ return 0;
+
+fail:
+ free(action);
+ return -1;
+}
+
+/**
+ * Context for resolving executables in the $PATH.
+ */
+struct bfs_resolver {
+ /** The executable to spawn. */
+ const char *exe;
+ /** The $PATH to resolve in. */
+ char *path;
+ /** A buffer to hold the resolved path. */
+ char *buf;
+ /** The size of the buffer. */
+ size_t len;
+ /** Whether the executable is already resolved. */
+ bool done;
+ /** Whether to free(path). */
+ bool free;
+};
+
+/** Free a $PATH resolution context. */
+static void bfs_resolve_free(struct bfs_resolver *res) {
+ if (res->free) {
+ free(res->path);
+ }
+ free(res->buf);
+}
+
+/** Get the next component in the $PATH. */
+static bool bfs_resolve_next(const char **path, const char **next, size_t *len) {
+ *path = *next;
+ if (!*path) {
+ return false;
+ }
+
+ *next = strchr(*path, ':');
+ if (*next) {
+ *len = *next - *path;
+ ++*next;
+ } else {
+ *len = strlen(*path);
+ }
+
+ if (*len == 0) {
+ // POSIX 8.3: "A zero-length prefix is a legacy feature that
+ // indicates the current working directory."
+ *path = ".";
+ *len = 1;
+ }
+
+ return true;
+}
+
+/** Finish resolving an executable, potentially from the child process. */
+static int bfs_resolve_late(struct bfs_resolver *res) {
+ if (res->done) {
+ return 0;
+ }
+
+ char *buf = res->buf;
+ char *end = buf + res->len;
+
+ const char *path;
+ const char *next = res->path;
+ size_t len;
+ while (bfs_resolve_next(&path, &next, &len)) {
+ char *cur = xstpencpy(buf, end, path, len);
+ cur = xstpecpy(cur, end, "/");
+ cur = xstpecpy(cur, end, res->exe);
+ if (cur == end) {
+ bfs_bug("PATH resolution buffer too small");
+ errno = ENOMEM;
+ return -1;
+ }
+
+ if (xfaccessat(AT_FDCWD, buf, X_OK) == 0) {
+ res->exe = buf;
+ res->done = true;
+ return 0;
+ }
+ }
+
+ errno = ENOENT;
+ return -1;
+}
+
+/** Check if we can skip path resolution entirely. */
+static bool bfs_can_skip_resolve(const struct bfs_resolver *res, const struct bfs_spawn *ctx) {
+ if (ctx && !(ctx->flags & BFS_SPAWN_USE_PATH)) {
+ return true;
+ }
+
+ if (strchr(res->exe, '/')) {
+ return true;
+ }
+
+ return false;
+}
+
+/** Check if any $PATH components are relative. */
+static bool bfs_resolve_relative(const struct bfs_resolver *res) {
+ const char *path;
+ const char *next = res->path;
+ size_t len;
+ while (bfs_resolve_next(&path, &next, &len)) {
+ if (path[0] != '/') {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/** Check if we can resolve the executable before file actions. */
+static bool bfs_can_resolve_early(const struct bfs_resolver *res, const struct bfs_spawn *ctx) {
+ if (!bfs_resolve_relative(res)) {
+ return true;
+ }
+
+ if (ctx) {
+ for_slist (const struct bfs_spawn_action, action, ctx) {
+ if (action->op == BFS_SPAWN_FCHDIR) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/** Get the required path resolution buffer size. */
+static size_t bfs_resolve_capacity(const struct bfs_resolver *res) {
+ size_t max = 0;
+
+ const char *path;
+ const char *next = res->path;
+ size_t len;
+ while (bfs_resolve_next(&path, &next, &len)) {
+ if (len > max) {
+ max = len;
+ }
+ }
+
+ // path + "/" + exe + '\0'
+ return max + 1 + strlen(res->exe) + 1;
+}
+
+/** Begin resolving an executable, from the parent process. */
+static int bfs_resolve_early(struct bfs_resolver *res, const char *exe, const struct bfs_spawn *ctx) {
+ *res = (struct bfs_resolver) {
+ .exe = exe,
+ };
+
+ if (bfs_can_skip_resolve(res, ctx)) {
+ res->done = true;
+ return 0;
+ }
+
+ res->path = getenv("PATH");
+ if (!res->path) {
+#if defined(_CS_PATH)
+ res->path = xconfstr(_CS_PATH);
+ res->free = true;
+#elif defined(_PATH_DEFPATH)
+ res->path = _PATH_DEFPATH;
+#else
+ errno = ENOENT;
+#endif
+ }
+ if (!res->path) {
+ goto fail;
+ }
+
+ bool can_finish = bfs_can_resolve_early(res, ctx);
+
+#if BFS_POSIX_SPAWNP_AFTER_FCHDIR
+ bool use_posix = ctx && (ctx->flags & BFS_SPAWN_USE_POSIX);
+ if (!can_finish && use_posix) {
+ // posix_spawnp() will do the resolution, so don't bother
+ // allocating a buffer
+ return 0;
+ }
+#endif
+
+ res->len = bfs_resolve_capacity(res);
+ res->buf = malloc(res->len);
+ if (!res->buf) {
+ goto fail;
+ }
+
+ if (can_finish && bfs_resolve_late(res) != 0) {
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ bfs_resolve_free(res);
+ return -1;
+}
+
+#if _POSIX_SPAWN > 0
+
+/** bfs_spawn() implementation using posix_spawn(). */
+static pid_t bfs_posix_spawn(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) {
+ pid_t ret;
+
+ if (res->done) {
+ errno = posix_spawn(&ret, res->exe, &ctx->actions, &ctx->attr, argv, envp);
+ } else {
+ errno = posix_spawnp(&ret, res->exe, &ctx->actions, &ctx->attr, argv, envp);
+ }
+
+ if (errno != 0) {
+ return -1;
+ }
+
+ return ret;
+}
+
+/** Check if we can use posix_spawn(). */
+static bool bfs_use_posix_spawn(const struct bfs_resolver *res, const struct bfs_spawn *ctx) {
+ if (!(ctx->flags & BFS_SPAWN_USE_POSIX)) {
+ return false;
+ }
+
+#if !BFS_POSIX_SPAWNP_AFTER_FCHDIR
+ if (!res->done) {
+ return false;
+ }
+#endif
+
+ return true;
+}
+
+#endif // _POSIX_SPAWN > 0
+
+/** Actually exec() the new process. */
+static noreturn void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp, int pipefd[2]) {
+ xclose(pipefd[0]);
+
+ for_slist (const struct bfs_spawn_action, action, ctx) {
+ int fd;
+
+ // Move the error-reporting pipe out of the way if necessary...
+ if (action->out_fd == pipefd[1]) {
+ fd = dup_cloexec(pipefd[1]);
+ if (fd < 0) {
+ goto fail;
+ }
+ xclose(pipefd[1]);
+ pipefd[1] = fd;
+ }
+
+ // ... and pretend the pipe doesn't exist
+ if (action->in_fd == pipefd[1]) {
+ errno = EBADF;
+ goto fail;
+ }
+
+ switch (action->op) {
+ case BFS_SPAWN_OPEN:
+ fd = open(action->path, action->flags, action->mode);
+ if (fd < 0) {
+ goto fail;
+ }
+ if (fd != action->out_fd) {
+ if (dup2(fd, action->out_fd) < 0) {
+ goto fail;
+ }
+ }
+ break;
+ case BFS_SPAWN_CLOSE:
+ if (close(action->out_fd) != 0) {
+ goto fail;
+ }
+ break;
+ case BFS_SPAWN_DUP2:
+ if (dup2(action->in_fd, action->out_fd) < 0) {
+ goto fail;
+ }
+ break;
+ case BFS_SPAWN_FCHDIR:
+ if (fchdir(action->in_fd) != 0) {
+ goto fail;
+ }
+ break;
+ case BFS_SPAWN_SETRLIMIT:
+ if (setrlimit(action->resource, &action->rlimit) != 0) {
+ goto fail;
+ }
+ break;
+ }
+ }
+
+ if (bfs_resolve_late(res) != 0) {
+ goto fail;
+ }
+
+ execve(res->exe, argv, envp);
+
+fail:;
+ int error = errno;
+
+ // In case of a write error, the parent will still see that we exited
+ // unsuccessfully, but won't know why
+ (void)xwrite(pipefd[1], &error, sizeof(error));
+
+ xclose(pipefd[1]);
+ _Exit(127);
+}
+
+/** bfs_spawn() implementation using fork()/exec(). */
+static pid_t bfs_fork_spawn(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) {
+ // Use a pipe to report errors from the child
+ int pipefd[2];
+ if (pipe_cloexec(pipefd) != 0) {
+ return -1;
+ }
+
+ // Block signals before fork() so handlers don't run in the child
+ sigset_t new_mask;
+ if (sigfillset(&new_mask) != 0) {
+ goto fail;
+ }
+ sigset_t old_mask;
+ errno = pthread_sigmask(SIG_BLOCK, &new_mask, &old_mask);
+ if (errno != 0) {
+ goto fail;
+ }
+
+ pid_t pid = fork();
+ if (pid == 0) {
+ // Child
+ bfs_spawn_exec(res, ctx, argv, envp, pipefd);
+ }
+
+ // Restore the original signal mask
+ int ret = pthread_sigmask(SIG_SETMASK, &old_mask, NULL);
+ bfs_everify(ret == 0, "pthread_sigmask()");
+
+ if (pid < 0) {
+ // fork() failed
+ goto fail;
+ }
+
+ xclose(pipefd[1]);
+
+ int error;
+ ssize_t nbytes = xread(pipefd[0], &error, sizeof(error));
+ xclose(pipefd[0]);
+ if (nbytes == sizeof(error)) {
+ xwaitpid(pid, NULL, 0);
+ errno = error;
+ return -1;
+ }
+
+ return pid;
+
+fail:
+ close_quietly(pipefd[1]);
+ close_quietly(pipefd[0]);
+ return -1;
+}
+
+/** Call the right bfs_spawn() implementation. */
+static pid_t bfs_spawn_impl(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) {
+#if _POSIX_SPAWN > 0
+ if (bfs_use_posix_spawn(res, ctx)) {
+ return bfs_posix_spawn(res, ctx, argv, envp);
+ }
+#endif
+
+ return bfs_fork_spawn(res, ctx, argv, envp);
+}
+
+pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp) {
+ // execvp()/posix_spawnp() are typically implemented with repeated
+ // execv() calls for each $PATH component until one succeeds. It's
+ // faster to resolve the full path ahead of time.
+ struct bfs_resolver res;
+ if (bfs_resolve_early(&res, exe, ctx) != 0) {
+ return -1;
+ }
+
+ extern char **environ;
+ if (!envp) {
+ envp = environ;
+ }
+
+ pid_t ret = bfs_spawn_impl(&res, ctx, argv, envp);
+ bfs_resolve_free(&res);
+ return ret;
+}
+
+char *bfs_spawn_resolve(const char *exe) {
+ struct bfs_resolver res;
+ if (bfs_resolve_early(&res, exe, NULL) != 0) {
+ return NULL;
+ }
+ if (bfs_resolve_late(&res) != 0) {
+ bfs_resolve_free(&res);
+ return NULL;
+ }
+
+ char *ret;
+ if (res.exe == res.buf) {
+ ret = res.buf;
+ res.buf = NULL;
+ } else {
+ ret = strdup(res.exe);
+ }
+
+ bfs_resolve_free(&res);
+ return ret;
+}
diff --git a/src/xspawn.h b/src/xspawn.h
new file mode 100644
index 0000000..6a8f54a
--- /dev/null
+++ b/src/xspawn.h
@@ -0,0 +1,134 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * A process-spawning library inspired by posix_spawn().
+ */
+
+#ifndef BFS_XSPAWN_H
+#define BFS_XSPAWN_H
+
+#include "prelude.h"
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#if _POSIX_SPAWN > 0
+# include <spawn.h>
+#endif
+
+/**
+ * bfs_spawn() flags.
+ */
+enum bfs_spawn_flags {
+ /** Use the PATH variable to resolve the executable (like execvp()). */
+ BFS_SPAWN_USE_PATH = 1 << 0,
+ /** Whether posix_spawn() can be used. */
+ BFS_SPAWN_USE_POSIX = 1 << 1,
+};
+
+/**
+ * bfs_spawn() attributes, controlling the context of the new process.
+ */
+struct bfs_spawn {
+ /** Spawn flags. */
+ enum bfs_spawn_flags flags;
+
+ /** Linked list of actions. */
+ struct bfs_spawn_action *head;
+ struct bfs_spawn_action **tail;
+
+#if _POSIX_SPAWN > 0
+ /** posix_spawn() context, for when we can use it. */
+ posix_spawn_file_actions_t actions;
+ posix_spawnattr_t attr;
+#endif
+};
+
+/**
+ * Create a new bfs_spawn() context.
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_spawn_init(struct bfs_spawn *ctx);
+
+/**
+ * Destroy a bfs_spawn() context.
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_spawn_destroy(struct bfs_spawn *ctx);
+
+/**
+ * Add an open() action to a bfs_spawn() context.
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_spawn_addopen(struct bfs_spawn *ctx, int fd, const char *path, int flags, mode_t mode);
+
+/**
+ * Add a close() action to a bfs_spawn() context.
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_spawn_addclose(struct bfs_spawn *ctx, int fd);
+
+/**
+ * Add a dup2() action to a bfs_spawn() context.
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd);
+
+/**
+ * Add an fchdir() action to a bfs_spawn() context.
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd);
+
+/**
+ * Apply setrlimit() to a bfs_spawn() context.
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int bfs_spawn_setrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl);
+
+/**
+ * Spawn a new process.
+ *
+ * @param exe
+ * The executable to run.
+ * @param ctx
+ * The context for the new process.
+ * @param argv
+ * The arguments for the new process.
+ * @param envp
+ * The environment variables for the new process (NULL for the current
+ * environment).
+ * @return
+ * The PID of the new process, or -1 on error.
+ */
+pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp);
+
+/**
+ * Look up an executable in the current PATH, as BFS_SPAWN_USE_PATH or execvp()
+ * would do.
+ *
+ * @param exe
+ * The name of the binary to execute. Bare names without a '/' will be
+ * searched on the provided PATH.
+ * @return
+ * The full path to the executable, which should be free()'d, or NULL on
+ * failure.
+ */
+char *bfs_spawn_resolve(const char *exe);
+
+#endif // BFS_XSPAWN_H
diff --git a/src/xtime.c b/src/xtime.c
new file mode 100644
index 0000000..2808455
--- /dev/null
+++ b/src/xtime.c
@@ -0,0 +1,348 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+#include "prelude.h"
+#include "xtime.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "sanity.h"
+#include <errno.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+int xmktime(struct tm *tm, time_t *timep) {
+ time_t time = mktime(tm);
+
+ if (time == -1) {
+ int error = errno;
+
+ struct tm tmp;
+ if (!localtime_r(&time, &tmp)) {
+ bfs_ebug("localtime_r(-1)");
+ return -1;
+ }
+
+ if (tm->tm_year != tmp.tm_year || tm->tm_yday != tmp.tm_yday
+ || tm->tm_hour != tmp.tm_hour || tm->tm_min != tmp.tm_min || tm->tm_sec != tmp.tm_sec) {
+ errno = error;
+ return -1;
+ }
+ }
+
+ *timep = time;
+ return 0;
+}
+
+// FreeBSD is missing an interceptor
+#if BFS_HAS_TIMEGM && !(__FreeBSD__ && SANITIZE_MEMORY)
+
+int xtimegm(struct tm *tm, time_t *timep) {
+ time_t time = timegm(tm);
+
+ if (time == -1) {
+ int error = errno;
+
+ struct tm tmp;
+ if (!gmtime_r(&time, &tmp)) {
+ bfs_ebug("gmtime_r(-1)");
+ return -1;
+ }
+
+ if (tm->tm_year != tmp.tm_year || tm->tm_yday != tmp.tm_yday
+ || tm->tm_hour != tmp.tm_hour || tm->tm_min != tmp.tm_min || tm->tm_sec != tmp.tm_sec) {
+ errno = error;
+ return -1;
+ }
+ }
+
+ *timep = time;
+ return 0;
+}
+
+#else
+
+static int safe_add(int *value, int delta) {
+ if (*value >= 0) {
+ if (delta > INT_MAX - *value) {
+ return -1;
+ }
+ } else {
+ if (delta < INT_MIN - *value) {
+ return -1;
+ }
+ }
+
+ *value += delta;
+ return 0;
+}
+
+static int floor_div(int n, int d) {
+ int a = n < 0;
+ return (n + a) / d - a;
+}
+
+static int wrap(int *value, int max, int *next) {
+ int carry = floor_div(*value, max);
+ *value -= carry * max;
+ return safe_add(next, carry);
+}
+
+static int month_length(int year, int month) {
+ static const int month_lengths[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+ int ret = month_lengths[month];
+ if (month == 1 && year % 4 == 0 && (year % 100 != 0 || (year + 300) % 400 == 0)) {
+ ++ret;
+ }
+ return ret;
+}
+
+int xtimegm(struct tm *tm, time_t *timep) {
+ struct tm copy = *tm;
+ copy.tm_isdst = 0;
+
+ if (wrap(&copy.tm_sec, 60, &copy.tm_min) != 0) {
+ goto overflow;
+ }
+ if (wrap(&copy.tm_min, 60, &copy.tm_hour) != 0) {
+ goto overflow;
+ }
+ if (wrap(&copy.tm_hour, 24, &copy.tm_mday) != 0) {
+ goto overflow;
+ }
+
+ // In order to wrap the days of the month, we first need to know what
+ // month it is
+ if (wrap(&copy.tm_mon, 12, &copy.tm_year) != 0) {
+ goto overflow;
+ }
+
+ if (copy.tm_mday < 1) {
+ do {
+ --copy.tm_mon;
+ if (wrap(&copy.tm_mon, 12, &copy.tm_year) != 0) {
+ goto overflow;
+ }
+
+ copy.tm_mday += month_length(copy.tm_year, copy.tm_mon);
+ } while (copy.tm_mday < 1);
+ } else {
+ while (true) {
+ int days = month_length(copy.tm_year, copy.tm_mon);
+ if (copy.tm_mday <= days) {
+ break;
+ }
+
+ copy.tm_mday -= days;
+ ++copy.tm_mon;
+ if (wrap(&copy.tm_mon, 12, &copy.tm_year) != 0) {
+ goto overflow;
+ }
+ }
+ }
+
+ copy.tm_yday = 0;
+ for (int i = 0; i < copy.tm_mon; ++i) {
+ copy.tm_yday += month_length(copy.tm_year, i);
+ }
+ copy.tm_yday += copy.tm_mday - 1;
+
+ int leap_days;
+ // Compute floor((year - 69)/4) - floor((year - 1)/100) + floor((year + 299)/400) without overflows
+ if (copy.tm_year >= 0) {
+ leap_days = floor_div(copy.tm_year - 69, 4) - floor_div(copy.tm_year - 1, 100) + floor_div(copy.tm_year - 101, 400) + 1;
+ } else {
+ leap_days = floor_div(copy.tm_year + 3, 4) - floor_div(copy.tm_year + 99, 100) + floor_div(copy.tm_year + 299, 400) - 17;
+ }
+
+ long long epoch_days = 365LL * (copy.tm_year - 70) + leap_days + copy.tm_yday;
+ copy.tm_wday = (epoch_days + 4) % 7;
+ if (copy.tm_wday < 0) {
+ copy.tm_wday += 7;
+ }
+
+ long long epoch_time = copy.tm_sec + 60 * (copy.tm_min + 60 * (copy.tm_hour + 24 * epoch_days));
+ time_t time = (time_t)epoch_time;
+ if ((long long)time != epoch_time) {
+ goto overflow;
+ }
+
+ *tm = copy;
+ *timep = time;
+ return 0;
+
+overflow:
+ errno = EOVERFLOW;
+ return -1;
+}
+
+#endif // !BFS_HAS_TIMEGM
+
+/** Parse a decimal digit. */
+static int xgetdigit(char c) {
+ int ret = c - '0';
+ if (ret < 0 || ret > 9) {
+ return -1;
+ } else {
+ return ret;
+ }
+}
+
+/** Parse some digits from a timestamp. */
+static int xgetpart(const char **str, size_t n, int *result) {
+ *result = 0;
+
+ for (size_t i = 0; i < n; ++i, ++*str) {
+ int dig = xgetdigit(**str);
+ if (dig < 0) {
+ return -1;
+ }
+ *result *= 10;
+ *result += dig;
+ }
+
+ return 0;
+}
+
+int xgetdate(const char *str, struct timespec *result) {
+ struct tm tm = {
+ .tm_isdst = -1,
+ };
+
+ int tz_hour = 0;
+ int tz_min = 0;
+ bool tz_negative = false;
+ bool local = true;
+
+ // YYYY
+ if (xgetpart(&str, 4, &tm.tm_year) != 0) {
+ goto invalid;
+ }
+ tm.tm_year -= 1900;
+
+ // MM
+ if (*str == '-') {
+ ++str;
+ }
+ if (xgetpart(&str, 2, &tm.tm_mon) != 0) {
+ goto invalid;
+ }
+ tm.tm_mon -= 1;
+
+ // DD
+ if (*str == '-') {
+ ++str;
+ }
+ if (xgetpart(&str, 2, &tm.tm_mday) != 0) {
+ goto invalid;
+ }
+
+ if (!*str) {
+ goto end;
+ } else if (*str == 'T' || *str == ' ') {
+ ++str;
+ }
+
+ // hh
+ if (xgetpart(&str, 2, &tm.tm_hour) != 0) {
+ goto invalid;
+ }
+
+ // mm
+ if (!*str) {
+ goto end;
+ } else if (*str == ':') {
+ ++str;
+ } else if (xgetdigit(*str) < 0) {
+ goto zone;
+ }
+ if (xgetpart(&str, 2, &tm.tm_min) != 0) {
+ goto invalid;
+ }
+
+ // ss
+ if (!*str) {
+ goto end;
+ } else if (*str == ':') {
+ ++str;
+ } else if (xgetdigit(*str) < 0) {
+ goto zone;
+ }
+ if (xgetpart(&str, 2, &tm.tm_sec) != 0) {
+ goto invalid;
+ }
+
+zone:
+ if (!*str) {
+ goto end;
+ } else if (*str == 'Z') {
+ local = false;
+ ++str;
+ } else if (*str == '+' || *str == '-') {
+ local = false;
+ tz_negative = *str == '-';
+ ++str;
+
+ // hh
+ if (xgetpart(&str, 2, &tz_hour) != 0) {
+ goto invalid;
+ }
+
+ // mm
+ if (!*str) {
+ goto end;
+ } else if (*str == ':') {
+ ++str;
+ }
+ if (xgetpart(&str, 2, &tz_min) != 0) {
+ goto invalid;
+ }
+ } else {
+ goto invalid;
+ }
+
+ if (*str) {
+ goto invalid;
+ }
+
+end:
+ if (local) {
+ if (xmktime(&tm, &result->tv_sec) != 0) {
+ goto error;
+ }
+ } else {
+ if (xtimegm(&tm, &result->tv_sec) != 0) {
+ goto error;
+ }
+
+ int offset = (tz_hour * 60 + tz_min) * 60;
+ if (tz_negative) {
+ result->tv_sec += offset;
+ } else {
+ result->tv_sec -= offset;
+ }
+ }
+
+ result->tv_nsec = 0;
+ return 0;
+
+invalid:
+ errno = EINVAL;
+error:
+ return -1;
+}
+
+int xgettime(struct timespec *result) {
+#if _POSIX_TIMERS > 0
+ return clock_gettime(CLOCK_REALTIME, result);
+#else
+ struct timeval tv;
+ int ret = gettimeofday(&tv, NULL);
+ if (ret == 0) {
+ result->tv_sec = tv.tv_sec;
+ result->tv_nsec = tv.tv_usec * 1000L;
+ }
+ return ret;
+#endif
+}
diff --git a/src/xtime.h b/src/xtime.h
new file mode 100644
index 0000000..fb60ae4
--- /dev/null
+++ b/src/xtime.h
@@ -0,0 +1,59 @@
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+
+/**
+ * Date/time handling.
+ */
+
+#ifndef BFS_XTIME_H
+#define BFS_XTIME_H
+
+#include <time.h>
+
+/**
+ * mktime() wrapper that reports errors more reliably.
+ *
+ * @param[in,out] tm
+ * The struct tm to convert.
+ * @param[out] timep
+ * A pointer to the result.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int xmktime(struct tm *tm, time_t *timep);
+
+/**
+ * A portable timegm(), the inverse of gmtime().
+ *
+ * @param[in,out] tm
+ * The struct tm to convert.
+ * @param[out] timep
+ * A pointer to the result.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int xtimegm(struct tm *tm, time_t *timep);
+
+/**
+ * Parse an ISO 8601-style timestamp.
+ *
+ * @param[in] str
+ * The string to parse.
+ * @param[out] result
+ * A pointer to the result.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int xgetdate(const char *str, struct timespec *result);
+
+/**
+ * Get the current time.
+ *
+ * @param[out] result
+ * A pointer to the result.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int xgettime(struct timespec *result);
+
+#endif // BFS_XTIME_H