diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/alloc.c | 384 | ||||
-rw-r--r-- | src/alloc.h | 383 | ||||
-rw-r--r-- | src/atomic.h | 118 | ||||
-rw-r--r-- | src/bar.c | 218 | ||||
-rw-r--r-- | src/bar.h | 44 | ||||
-rw-r--r-- | src/bfstd.c | 1006 | ||||
-rw-r--r-- | src/bfstd.h | 523 | ||||
-rw-r--r-- | src/bftw.c | 2326 | ||||
-rw-r--r-- | src/bftw.h | 218 | ||||
-rw-r--r-- | src/bit.h | 401 | ||||
-rw-r--r-- | src/color.c | 1418 | ||||
-rw-r--r-- | src/color.h | 118 | ||||
-rw-r--r-- | src/ctx.c | 298 | ||||
-rw-r--r-- | src/ctx.h | 169 | ||||
-rw-r--r-- | src/diag.c | 300 | ||||
-rw-r--r-- | src/diag.h | 258 | ||||
-rw-r--r-- | src/dir.c | 371 | ||||
-rw-r--r-- | src/dir.h | 175 | ||||
-rw-r--r-- | src/dstring.c | 279 | ||||
-rw-r--r-- | src/dstring.h | 322 | ||||
-rw-r--r-- | src/eval.c | 1696 | ||||
-rw-r--r-- | src/eval.h | 102 | ||||
-rw-r--r-- | src/exec.c | 690 | ||||
-rw-r--r-- | src/exec.h | 108 | ||||
-rw-r--r-- | src/expr.c | 85 | ||||
-rw-r--r-- | src/expr.h | 247 | ||||
-rw-r--r-- | src/fsade.c | 508 | ||||
-rw-r--r-- | src/fsade.h | 81 | ||||
-rw-r--r-- | src/ioq.c | 1100 | ||||
-rw-r--r-- | src/ioq.h | 198 | ||||
-rw-r--r-- | src/list.h | 581 | ||||
-rw-r--r-- | src/main.c | 150 | ||||
-rw-r--r-- | src/mtab.c | 303 | ||||
-rw-r--r-- | src/mtab.h | 58 | ||||
-rw-r--r-- | src/opt.c | 2299 | ||||
-rw-r--r-- | src/opt.h | 23 | ||||
-rw-r--r-- | src/parse.c | 3712 | ||||
-rw-r--r-- | src/parse.h | 23 | ||||
-rw-r--r-- | src/prelude.h | 370 | ||||
-rw-r--r-- | src/printf.c | 968 | ||||
-rw-r--r-- | src/printf.h | 55 | ||||
-rw-r--r-- | src/pwcache.c | 220 | ||||
-rw-r--r-- | src/pwcache.h | 124 | ||||
-rw-r--r-- | src/sanity.h | 94 | ||||
-rw-r--r-- | src/sighook.c | 600 | ||||
-rw-r--r-- | src/sighook.h | 73 | ||||
-rw-r--r-- | src/stat.c | 345 | ||||
-rw-r--r-- | src/stat.h | 172 | ||||
-rw-r--r-- | src/thread.c | 81 | ||||
-rw-r--r-- | src/thread.h | 99 | ||||
-rw-r--r-- | src/trie.c | 729 | ||||
-rw-r--r-- | src/trie.h | 147 | ||||
-rw-r--r-- | src/typo.c | 164 | ||||
-rw-r--r-- | src/typo.h | 18 | ||||
-rw-r--r-- | src/xregex.c | 295 | ||||
-rw-r--r-- | src/xregex.h | 83 | ||||
-rw-r--r-- | src/xspawn.c | 690 | ||||
-rw-r--r-- | src/xspawn.h | 134 | ||||
-rw-r--r-- | src/xtime.c | 348 | ||||
-rw-r--r-- | src/xtime.h | 59 |
60 files changed, 27161 insertions, 0 deletions
diff --git a/src/alloc.c b/src/alloc.c new file mode 100644 index 0000000..ebaff38 --- /dev/null +++ b/src/alloc.c @@ -0,0 +1,384 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "alloc.h" +#include "bit.h" +#include "diag.h" +#include "sanity.h" +#include <errno.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> + +/** The largest possible allocation size. */ +#if PTRDIFF_MAX < SIZE_MAX / 2 +# define ALLOC_MAX ((size_t)PTRDIFF_MAX) +#else +# define ALLOC_MAX (SIZE_MAX / 2) +#endif + +/** Portable aligned_alloc()/posix_memalign(). */ +static void *xmemalign(size_t align, size_t size) { + bfs_assert(has_single_bit(align)); + bfs_assert(align >= sizeof(void *)); + bfs_assert(is_aligned(align, size)); + +#if BFS_HAS_ALIGNED_ALLOC + return aligned_alloc(align, size); +#else + void *ptr = NULL; + errno = posix_memalign(&ptr, align, size); + return ptr; +#endif +} + +void *alloc(size_t align, size_t size) { + bfs_assert(has_single_bit(align)); + bfs_assert(is_aligned(align, size)); + + if (size > ALLOC_MAX) { + errno = EOVERFLOW; + return NULL; + } + + if (align <= alignof(max_align_t)) { + return malloc(size); + } else { + return xmemalign(align, size); + } +} + +void *zalloc(size_t align, size_t size) { + bfs_assert(has_single_bit(align)); + bfs_assert(is_aligned(align, size)); + + if (size > ALLOC_MAX) { + errno = EOVERFLOW; + return NULL; + } + + if (align <= alignof(max_align_t)) { + return calloc(1, size); + } + + void *ret = xmemalign(align, size); + if (ret) { + memset(ret, 0, size); + } + return ret; +} + +void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size) { + bfs_assert(has_single_bit(align)); + bfs_assert(is_aligned(align, old_size)); + bfs_assert(is_aligned(align, new_size)); + + if (new_size == 0) { + free(ptr); + return NULL; + } else if (new_size > ALLOC_MAX) { + errno = EOVERFLOW; + return NULL; + } + + if (align <= alignof(max_align_t)) { + return realloc(ptr, new_size); + } + + // There is no aligned_realloc(), so reallocate and copy manually + void *ret = xmemalign(align, new_size); + if (!ret) { + return NULL; + } + + size_t min_size = old_size < new_size ? old_size : new_size; + if (min_size) { + memcpy(ret, ptr, min_size); + } + + free(ptr); + return ret; +} + +void *reserve(void *ptr, size_t align, size_t size, size_t count) { + // No need to overflow-check the current size + size_t old_size = size * count; + + // Capacity is doubled every power of two, from 0→1, 1→2, 2→4, etc. + // If we stayed within the same size class, re-use ptr. + if (count & (count - 1)) { + // Tell sanitizers about the new array element + sanitize_alloc((char *)ptr + old_size, size); + errno = 0; + return ptr; + } + + // No need to overflow-check; xrealloc() will fail before we overflow + size_t new_size = count ? 2 * old_size : size; + void *ret = xrealloc(ptr, align, old_size, new_size); + if (!ret) { + // errno is used to communicate success/failure to the RESERVE() macro + bfs_assert(errno != 0); + return ptr; + } + + // Pretend we only allocated one more element + sanitize_free((char *)ret + old_size + size, new_size - old_size - size); + errno = 0; + return ret; +} + +/** + * An arena allocator chunk. + */ +union chunk { + /** + * Free chunks are stored in a singly linked list. The pointer to the + * next chunk is represented by an offset from the chunk immediately + * after this one in memory, so that zalloc() correctly initializes a + * linked list of chunks (except for the last one). + */ + uintptr_t next; + + // char object[]; +}; + +/** Decode the next chunk. */ +static union chunk *chunk_next(const struct arena *arena, const union chunk *chunk) { + uintptr_t base = (uintptr_t)chunk + arena->size; + return (union chunk *)(base + chunk->next); +} + +/** Encode the next chunk. */ +static void chunk_set_next(const struct arena *arena, union chunk *chunk, union chunk *next) { + uintptr_t base = (uintptr_t)chunk + arena->size; + chunk->next = (uintptr_t)next - base; +} + +void arena_init(struct arena *arena, size_t align, size_t size) { + bfs_assert(has_single_bit(align)); + bfs_assert(is_aligned(align, size)); + + if (align < alignof(union chunk)) { + align = alignof(union chunk); + } + if (size < sizeof(union chunk)) { + size = sizeof(union chunk); + } + bfs_assert(is_aligned(align, size)); + + arena->chunks = NULL; + arena->nslabs = 0; + arena->slabs = NULL; + arena->align = align; + arena->size = size; +} + +/** Allocate a new slab. */ +attr(cold) +static int slab_alloc(struct arena *arena) { + // Make the initial allocation size ~4K + size_t size = 4096; + if (size < arena->size) { + size = arena->size; + } + // Trim off the excess + size -= size % arena->size; + // Double the size for every slab + size <<= arena->nslabs; + + // Allocate the slab + void *slab = zalloc(arena->align, size); + if (!slab) { + return -1; + } + + // Grow the slab array + void **pslab = RESERVE(void *, &arena->slabs, &arena->nslabs); + if (!pslab) { + free(slab); + return -1; + } + + // Fix the last chunk->next offset + void *last = (char *)slab + size - arena->size; + chunk_set_next(arena, last, arena->chunks); + + // We can rely on zero-initialized slabs, but others shouldn't + sanitize_uninit(slab, size); + + arena->chunks = *pslab = slab; + return 0; +} + +void *arena_alloc(struct arena *arena) { + if (!arena->chunks && slab_alloc(arena) != 0) { + return NULL; + } + + union chunk *chunk = arena->chunks; + sanitize_alloc(chunk, arena->size); + + sanitize_init(chunk); + arena->chunks = chunk_next(arena, chunk); + sanitize_uninit(chunk, arena->size); + + return chunk; +} + +void arena_free(struct arena *arena, void *ptr) { + union chunk *chunk = ptr; + chunk_set_next(arena, chunk, arena->chunks); + arena->chunks = chunk; + sanitize_free(chunk, arena->size); +} + +void arena_clear(struct arena *arena) { + for (size_t i = 0; i < arena->nslabs; ++i) { + free(arena->slabs[i]); + } + free(arena->slabs); + + arena->chunks = NULL; + arena->nslabs = 0; + arena->slabs = NULL; +} + +void arena_destroy(struct arena *arena) { + arena_clear(arena); + sanitize_uninit(arena); +} + +void varena_init(struct varena *varena, size_t align, size_t min, size_t offset, size_t size) { + varena->align = align; + varena->offset = offset; + varena->size = size; + varena->narenas = 0; + varena->arenas = NULL; + + // The smallest size class is at least as many as fit in the smallest + // aligned allocation size + size_t min_count = (flex_size(align, min, offset, size, 1) - offset + size - 1) / size; + varena->shift = bit_width(min_count - 1); +} + +/** Get the size class for the given array length. */ +static size_t varena_size_class(struct varena *varena, size_t count) { + // Since powers of two are common array lengths, make them the + // (inclusive) upper bound for each size class + return bit_width((count - !!count) >> varena->shift); +} + +/** Get the exact size of a flexible struct. */ +static size_t varena_exact_size(const struct varena *varena, size_t count) { + return flex_size(varena->align, 0, varena->offset, varena->size, count); +} + +/** Get the arena for the given array length. */ +static struct arena *varena_get(struct varena *varena, size_t count) { + size_t i = varena_size_class(varena, count); + + while (i >= varena->narenas) { + size_t j = varena->narenas; + struct arena *arena = RESERVE(struct arena, &varena->arenas, &varena->narenas); + if (!arena) { + return NULL; + } + + size_t shift = j + varena->shift; + size_t size = varena_exact_size(varena, (size_t)1 << shift); + arena_init(arena, varena->align, size); + } + + return &varena->arenas[i]; +} + +void *varena_alloc(struct varena *varena, size_t count) { + struct arena *arena = varena_get(varena, count); + if (!arena) { + return NULL; + } + + void *ret = arena_alloc(arena); + if (!ret) { + return NULL; + } + + // Tell the sanitizers the exact size of the allocated struct + sanitize_free(ret, arena->size); + sanitize_alloc(ret, varena_exact_size(varena, count)); + + return ret; +} + +void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t new_count) { + struct arena *new_arena = varena_get(varena, new_count); + struct arena *old_arena = varena_get(varena, old_count); + if (!new_arena) { + return NULL; + } + + size_t new_exact_size = varena_exact_size(varena, new_count); + size_t old_exact_size = varena_exact_size(varena, old_count); + + if (new_arena == old_arena) { + if (new_count < old_count) { + sanitize_free((char *)ptr + new_exact_size, old_exact_size - new_exact_size); + } else if (new_count > old_count) { + sanitize_alloc((char *)ptr + old_exact_size, new_exact_size - old_exact_size); + } + return ptr; + } + + void *ret = arena_alloc(new_arena); + if (!ret) { + return NULL; + } + + size_t old_size = old_arena->size; + sanitize_alloc((char *)ptr + old_exact_size, old_size - old_exact_size); + + size_t new_size = new_arena->size; + size_t min_size = new_size < old_size ? new_size : old_size; + memcpy(ret, ptr, min_size); + + arena_free(old_arena, ptr); + sanitize_free((char *)ret + new_exact_size, new_size - new_exact_size); + + return ret; +} + +void *varena_grow(struct varena *varena, void *ptr, size_t *count) { + size_t old_count = *count; + + // Round up to the limit of the current size class. If we're already at + // the limit, go to the next size class. + size_t new_shift = varena_size_class(varena, old_count + 1) + varena->shift; + size_t new_count = (size_t)1 << new_shift; + + ptr = varena_realloc(varena, ptr, old_count, new_count); + if (ptr) { + *count = new_count; + } + return ptr; +} + +void varena_free(struct varena *varena, void *ptr, size_t count) { + struct arena *arena = varena_get(varena, count); + arena_free(arena, ptr); +} + +void varena_clear(struct varena *varena) { + for (size_t i = 0; i < varena->narenas; ++i) { + arena_clear(&varena->arenas[i]); + } +} + +void varena_destroy(struct varena *varena) { + for (size_t i = 0; i < varena->narenas; ++i) { + arena_destroy(&varena->arenas[i]); + } + free(varena->arenas); + sanitize_uninit(varena); +} diff --git a/src/alloc.h b/src/alloc.h new file mode 100644 index 0000000..095134a --- /dev/null +++ b/src/alloc.h @@ -0,0 +1,383 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Memory allocation. + */ + +#ifndef BFS_ALLOC_H +#define BFS_ALLOC_H + +#include "prelude.h" +#include <errno.h> +#include <stddef.h> +#include <stdlib.h> + +/** Check if a size is properly aligned. */ +static inline bool is_aligned(size_t align, size_t size) { + return (size & (align - 1)) == 0; +} + +/** Round down to a multiple of an alignment. */ +static inline size_t align_floor(size_t align, size_t size) { + return size & ~(align - 1); +} + +/** Round up to a multiple of an alignment. */ +static inline size_t align_ceil(size_t align, size_t size) { + return align_floor(align, size + align - 1); +} + +/** + * Saturating array size. + * + * @param align + * Array element alignment. + * @param size + * Array element size. + * @param count + * Array element count. + * @return + * size * count, saturating to the maximum aligned value on overflow. + */ +static inline size_t array_size(size_t align, size_t size, size_t count) { + size_t ret = size * count; + return ret / size == count ? ret : ~(align - 1); +} + +/** Saturating array sizeof. */ +#define sizeof_array(type, count) \ + array_size(alignof(type), sizeof(type), count) + +/** Size of a struct/union field. */ +#define sizeof_member(type, member) \ + sizeof(((type *)NULL)->member) + +/** + * Saturating flexible struct size. + * + * @param align + * Struct alignment. + * @param min + * Minimum struct size. + * @param offset + * Flexible array member offset. + * @param size + * Flexible array element size. + * @param count + * Flexible array element count. + * @return + * The size of the struct with count flexible array elements. Saturates + * to the maximum aligned value on overflow. + */ +static inline size_t flex_size(size_t align, size_t min, size_t offset, size_t size, size_t count) { + size_t ret = size * count; + size_t overflow = ret / size != count; + + size_t extra = offset + align - 1; + ret += extra; + overflow |= ret < extra; + ret |= -overflow; + ret = align_floor(align, ret); + + // Make sure flex_sizeof(type, member, 0) >= sizeof(type), even if the + // type has more padding than necessary for alignment + if (min > align_ceil(align, offset)) { + ret = ret < min ? min : ret; + } + + return ret; +} + +/** + * Computes the size of a flexible struct. + * + * @param type + * The type of the struct containing the flexible array. + * @param member + * The name of the flexible array member. + * @param count + * The length of the flexible array. + * @return + * The size of the struct with count flexible array elements. Saturates + * to the maximum aligned value on overflow. + */ +#define sizeof_flex(type, member, count) \ + flex_size(alignof(type), sizeof(type), offsetof(type, member), sizeof_member(type, member[0]), count) + +/** + * General memory allocator. + * + * @param align + * The required alignment. + * @param size + * The size of the allocation. + * @return + * The allocated memory, or NULL on failure. + */ +attr(malloc(free, 1), aligned_alloc(1, 2)) +void *alloc(size_t align, size_t size); + +/** + * Zero-initialized memory allocator. + * + * @param align + * The required alignment. + * @param size + * The size of the allocation. + * @return + * The allocated memory, or NULL on failure. + */ +attr(malloc(free, 1), aligned_alloc(1, 2)) +void *zalloc(size_t align, size_t size); + +/** Allocate memory for the given type. */ +#define ALLOC(type) \ + (type *)alloc(alignof(type), sizeof(type)) + +/** Allocate zeroed memory for the given type. */ +#define ZALLOC(type) \ + (type *)zalloc(alignof(type), sizeof(type)) + +/** Allocate memory for an array. */ +#define ALLOC_ARRAY(type, count) \ + (type *)alloc(alignof(type), sizeof_array(type, count)) + +/** Allocate zeroed memory for an array. */ +#define ZALLOC_ARRAY(type, count) \ + (type *)zalloc(alignof(type), sizeof_array(type, count)) + +/** Allocate memory for a flexible struct. */ +#define ALLOC_FLEX(type, member, count) \ + (type *)alloc(alignof(type), sizeof_flex(type, member, count)) + +/** Allocate zeroed memory for a flexible struct. */ +#define ZALLOC_FLEX(type, member, count) \ + (type *)zalloc(alignof(type), sizeof_flex(type, member, count)) + +/** + * Alignment-aware realloc(). + * + * @param ptr + * The pointer to reallocate. + * @param align + * The required alignment. + * @param old_size + * The previous allocation size. + * @param new_size + * The new allocation size. + * @return + * The reallocated memory, or NULL on failure. + */ +attr(nodiscard, aligned_alloc(2, 4)) +void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size); + +/** Reallocate memory for an array. */ +#define REALLOC_ARRAY(type, ptr, old_count, new_count) \ + (type *)xrealloc((ptr), alignof(type), sizeof_array(type, old_count), sizeof_array(type, new_count)) + +/** Reallocate memory for a flexible struct. */ +#define REALLOC_FLEX(type, member, ptr, old_count, new_count) \ + (type *)xrealloc((ptr), alignof(type), sizeof_flex(type, member, old_count), sizeof_flex(type, member, new_count)) + +/** + * Reserve space for one more element in a dynamic array. + * + * @param ptr + * The pointer to reallocate. + * @param align + * The required alignment. + * @param count + * The current size of the array. + * @return + * The reallocated memory, on both success *and* failure. On success, + * errno will be set to zero, and the returned pointer will have room + * for (count + 1) elements. On failure, errno will be non-zero, and + * ptr will returned unchanged. + */ +attr(nodiscard) +void *reserve(void *ptr, size_t align, size_t size, size_t count); + +/** + * Convenience macro to grow a dynamic array. + * + * @param type + * The array element type. + * @param type **ptr + * A pointer to the array. + * @param size_t *count + * A pointer to the array's size. + * @return + * On success, a pointer to the newly reserved array element, i.e. + * `*ptr + *count++`. On failure, NULL is returned, and both *ptr and + * *count remain unchanged. + */ +#define RESERVE(type, ptr, count) \ + ((*ptr) = reserve((*ptr), alignof(type), sizeof(type), (*count)), \ + errno ? NULL : (*ptr) + (*count)++) + +/** + * An arena allocator for fixed-size types. + * + * Arena allocators are intentionally not thread safe. + */ +struct arena { + /** The list of free chunks. */ + void *chunks; + /** The number of allocated slabs. */ + size_t nslabs; + /** The array of slabs. */ + void **slabs; + /** Chunk alignment. */ + size_t align; + /** Chunk size. */ + size_t size; +}; + +/** + * Initialize an arena for chunks of the given size and alignment. + */ +void arena_init(struct arena *arena, size_t align, size_t size); + +/** + * Initialize an arena for the given type. + */ +#define ARENA_INIT(arena, type) \ + arena_init((arena), alignof(type), sizeof(type)) + +/** + * Free an object from the arena. + */ +void arena_free(struct arena *arena, void *ptr); + +/** + * Allocate an object out of the arena. + */ +attr(malloc(arena_free, 2)) +void *arena_alloc(struct arena *arena); + +/** + * Free all allocations from an arena. + */ +void arena_clear(struct arena *arena); + +/** + * Destroy an arena, freeing all allocations. + */ +void arena_destroy(struct arena *arena); + +/** + * An arena allocator for flexibly-sized types. + */ +struct varena { + /** The alignment of the struct. */ + size_t align; + /** The offset of the flexible array. */ + size_t offset; + /** The size of the flexible array elements. */ + size_t size; + /** Shift amount for the smallest size class. */ + size_t shift; + /** The number of arenas of different sizes. */ + size_t narenas; + /** The array of differently-sized arenas. */ + struct arena *arenas; +}; + +/** + * Initialize a varena for a struct with the given layout. + * + * @param varena + * The varena to initialize. + * @param align + * alignof(type) + * @param min + * sizeof(type) + * @param offset + * offsetof(type, flexible_array) + * @param size + * sizeof(flexible_array[i]) + */ +void varena_init(struct varena *varena, size_t align, size_t min, size_t offset, size_t size); + +/** + * Initialize a varena for the given type and flexible array. + * + * @param varena + * The varena to initialize. + * @param type + * A struct type containing a flexible array. + * @param member + * The name of the flexible array member. + */ +#define VARENA_INIT(varena, type, member) \ + varena_init(varena, alignof(type), sizeof(type), offsetof(type, member), sizeof_member(type, member[0])) + +/** + * Free an arena-allocated flexible struct. + * + * @param varena + * The that allocated the object. + * @param ptr + * The object to free. + * @param count + * The length of the flexible array. + */ +void varena_free(struct varena *varena, void *ptr, size_t count); + +/** + * Arena-allocate a flexible struct. + * + * @param varena + * The varena to allocate from. + * @param count + * The length of the flexible array. + * @return + * The allocated struct, or NULL on failure. + */ +attr(malloc(varena_free, 2)) +void *varena_alloc(struct varena *varena, size_t count); + +/** + * Resize a flexible struct. + * + * @param varena + * The varena to allocate from. + * @param ptr + * The object to resize. + * @param old_count + * The old array lenth. + * @param new_count + * The new array length. + * @return + * The resized struct, or NULL on failure. + */ +attr(nodiscard) +void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t new_count); + +/** + * Grow a flexible struct by an arbitrary amount. + * + * @param varena + * The varena to allocate from. + * @param ptr + * The object to resize. + * @param count + * Pointer to the flexible array length. + * @return + * The resized struct, or NULL on failure. + */ +attr(nodiscard) +void *varena_grow(struct varena *varena, void *ptr, size_t *count); + +/** + * Free all allocations from a varena. + */ +void varena_clear(struct varena *varena); + +/** + * Destroy a varena, freeing all allocations. + */ +void varena_destroy(struct varena *varena); + +#endif // BFS_ALLOC_H diff --git a/src/atomic.h b/src/atomic.h new file mode 100644 index 0000000..360de20 --- /dev/null +++ b/src/atomic.h @@ -0,0 +1,118 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Shorthand for standard C atomic operations. + */ + +#ifndef BFS_ATOMIC_H +#define BFS_ATOMIC_H + +#include "prelude.h" +#include "sanity.h" +#include <stdatomic.h> + +/** + * Prettier spelling of _Atomic. + */ +#define atomic _Atomic + +/** + * Shorthand for atomic_load_explicit(). + * + * @param obj + * A pointer to the atomic object. + * @param order + * The memory ordering to use, without the memory_order_ prefix. + * @return + * The loaded value. + */ +#define load(obj, order) \ + atomic_load_explicit(obj, memory_order_##order) + +/** + * Shorthand for atomic_store_explicit(). + */ +#define store(obj, value, order) \ + atomic_store_explicit(obj, value, memory_order_##order) + +/** + * Shorthand for atomic_exchange_explicit(). + */ +#define exchange(obj, value, order) \ + atomic_exchange_explicit(obj, value, memory_order_##order) + +/** + * Shorthand for atomic_compare_exchange_weak_explicit(). + */ +#define compare_exchange_weak(obj, expected, desired, succ, fail) \ + atomic_compare_exchange_weak_explicit(obj, expected, desired, memory_order_##succ, memory_order_##fail) + +/** + * Shorthand for atomic_compare_exchange_strong_explicit(). + */ +#define compare_exchange_strong(obj, expected, desired, succ, fail) \ + atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_##succ, memory_order_##fail) + +/** + * Shorthand for atomic_fetch_add_explicit(). + */ +#define fetch_add(obj, arg, order) \ + atomic_fetch_add_explicit(obj, arg, memory_order_##order) + +/** + * Shorthand for atomic_fetch_sub_explicit(). + */ +#define fetch_sub(obj, arg, order) \ + atomic_fetch_sub_explicit(obj, arg, memory_order_##order) + +/** + * Shorthand for atomic_fetch_or_explicit(). + */ +#define fetch_or(obj, arg, order) \ + atomic_fetch_or_explicit(obj, arg, memory_order_##order) + +/** + * Shorthand for atomic_fetch_xor_explicit(). + */ +#define fetch_xor(obj, arg, order) \ + atomic_fetch_xor_explicit(obj, arg, memory_order_##order) + +/** + * Shorthand for atomic_fetch_and_explicit(). + */ +#define fetch_and(obj, arg, order) \ + atomic_fetch_and_explicit(obj, arg, memory_order_##order) + +/** + * Shorthand for atomic_thread_fence(). + */ +#if SANITIZE_THREAD +// TSan doesn't support fences: https://github.com/google/sanitizers/issues/1415 +# define thread_fence(obj, order) \ + fetch_add(obj, 0, order) +#else +# define thread_fence(obj, order) \ + atomic_thread_fence(memory_order_##order) +#endif + +/** + * Shorthand for atomic_signal_fence(). + */ +#define signal_fence(order) \ + atomic_signal_fence(memory_order_##order) + +/** + * A hint to the CPU to relax while it spins. + */ +#if __has_builtin(__builtin_ia32_pause) +# define spin_loop() __builtin_ia32_pause() +#elif __has_builtin(__builtin_arm_yield) +# define spin_loop() __builtin_arm_yield() +#elif __has_builtin(__builtin_riscv_pause) +# define spin_loop() __builtin_riscv_pause() +#else +# define spin_loop() ((void)0) +#endif + +#endif // BFS_ATOMIC_H diff --git a/src/bar.c b/src/bar.c new file mode 100644 index 0000000..b928373 --- /dev/null +++ b/src/bar.c @@ -0,0 +1,218 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "bar.h" +#include "alloc.h" +#include "atomic.h" +#include "bfstd.h" +#include "bit.h" +#include "dstring.h" +#include "sighook.h" +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <sys/ioctl.h> + +struct bfs_bar { + int fd; + atomic unsigned int width; + atomic unsigned int height; + + struct sighook *exit_hook; + struct sighook *winch_hook; +}; + +/** Get the terminal size, if possible. */ +static int bfs_bar_getsize(struct bfs_bar *bar) { +#ifdef TIOCGWINSZ + struct winsize ws; + if (ioctl(bar->fd, TIOCGWINSZ, &ws) != 0) { + return -1; + } + + store(&bar->width, ws.ws_col, relaxed); + store(&bar->height, ws.ws_row, relaxed); + return 0; +#else + errno = ENOTSUP; + return -1; +#endif +} + +/** Write a string to the status bar (async-signal-safe). */ +static int bfs_bar_write(struct bfs_bar *bar, const char *str, size_t len) { + return xwrite(bar->fd, str, len) == len ? 0 : -1; +} + +/** Write a string to the status bar (async-signal-safe). */ +static int bfs_bar_puts(struct bfs_bar *bar, const char *str) { + return bfs_bar_write(bar, str, strlen(str)); +} + +/** Number of decimal digits needed for terminal sizes. */ +#define ITOA_DIGITS ((USHRT_WIDTH + 2) / 3) + +/** Async Signal Safe itoa(). */ +static char *ass_itoa(char *str, unsigned int n) { + char *end = str + ITOA_DIGITS; + *end = '\0'; + + char *c = end; + do { + *--c = '0' + (n % 10); + n /= 10; + } while (n); + + size_t len = end - c; + memmove(str, c, len + 1); + return str + len; +} + +/** Update the size of the scrollable region. */ +static int bfs_bar_resize(struct bfs_bar *bar) { + static const char PREFIX[] = + "\033D" // IND: Line feed, possibly scrolling + "\033[1A" // CUU: Move cursor up 1 row + "\0337" // DECSC: Save cursor + "\033[;"; // DECSTBM: Set scrollable region + static const char SUFFIX[] = + "r" // (end of DECSTBM) + "\0338" // DECRC: Restore the cursor + "\033[J"; // ED: Erase display from cursor to end + + char esc_seq[sizeof(PREFIX) + ITOA_DIGITS + sizeof(SUFFIX)]; + + // DECSTBM takes the height as the second argument + unsigned int height = load(&bar->height, relaxed) - 1; + + char *cur = stpcpy(esc_seq, PREFIX); + cur = ass_itoa(cur, height); + cur = stpcpy(cur, SUFFIX); + + return bfs_bar_write(bar, esc_seq, cur - esc_seq); +} + +#ifdef SIGWINCH +/** SIGWINCH handler. */ +static void bfs_bar_sigwinch(int sig, siginfo_t *info, void *arg) { + struct bfs_bar *bar = arg; + bfs_bar_getsize(bar); + bfs_bar_resize(bar); +} +#endif + +/** Reset the scrollable region and hide the bar. */ +static int bfs_bar_reset(struct bfs_bar *bar) { + return bfs_bar_puts(bar, + "\0337" // DECSC: Save cursor + "\033[r" // DECSTBM: Reset scrollable region + "\0338" // DECRC: Restore cursor + "\033[J" // ED: Erase display from cursor to end + ); +} + +/** Signal handler for process-terminating signals. */ +static void bfs_bar_sigexit(int sig, siginfo_t *info, void *arg) { + struct bfs_bar *bar = arg; + bfs_bar_reset(bar); +} + +/** printf() to the status bar with a single write(). */ +attr(printf(2, 3)) +static int bfs_bar_printf(struct bfs_bar *bar, const char *format, ...) { + va_list args; + va_start(args, format); + dchar *str = dstrvprintf(format, args); + va_end(args); + + if (!str) { + return -1; + } + + int ret = bfs_bar_write(bar, str, dstrlen(str)); + dstrfree(str); + return ret; +} + +struct bfs_bar *bfs_bar_show(void) { + struct bfs_bar *bar = ALLOC(struct bfs_bar); + if (!bar) { + return NULL; + } + + char term[L_ctermid]; + ctermid(term); + if (strlen(term) == 0) { + errno = ENOTTY; + goto fail; + } + + bar->fd = open(term, O_RDWR | O_CLOEXEC); + if (bar->fd < 0) { + goto fail; + } + + if (bfs_bar_getsize(bar) != 0) { + goto fail_close; + } + + bar->exit_hook = atsigexit(bfs_bar_sigexit, bar); + if (!bar->exit_hook) { + goto fail_close; + } + +#ifdef SIGWINCH + bar->winch_hook = sighook(SIGWINCH, bfs_bar_sigwinch, bar, 0); + if (!bar->winch_hook) { + goto fail_hook; + } +#endif + + bfs_bar_resize(bar); + return bar; + +fail_hook: + sigunhook(bar->exit_hook); +fail_close: + close_quietly(bar->fd); +fail: + free(bar); + return NULL; +} + +unsigned int bfs_bar_width(const struct bfs_bar *bar) { + return load(&bar->width, relaxed); +} + +int bfs_bar_update(struct bfs_bar *bar, const char *str) { + unsigned int height = load(&bar->height, relaxed); + return bfs_bar_printf(bar, + "\0337" // DECSC: Save cursor + "\033[%u;0f" // HVP: Move cursor to row, column + "\033[K" // EL: Erase line + "\033[7m" // SGR reverse video + "%s" + "\033[27m" // SGR reverse video off + "\0338", // DECRC: Restore cursor + height, + str + ); +} + +void bfs_bar_hide(struct bfs_bar *bar) { + if (!bar) { + return; + } + + sigunhook(bar->winch_hook); + sigunhook(bar->exit_hook); + + bfs_bar_reset(bar); + + xclose(bar->fd); + free(bar); +} diff --git a/src/bar.h b/src/bar.h new file mode 100644 index 0000000..20d92a9 --- /dev/null +++ b/src/bar.h @@ -0,0 +1,44 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * A terminal status bar. + */ + +#ifndef BFS_BAR_H +#define BFS_BAR_H + +/** A terminal status bar. */ +struct bfs_bar; + +/** + * Create a terminal status bar. Only one status bar is supported at a time. + * + * @return + * A pointer to the new status bar, or NULL on failure. + */ +struct bfs_bar *bfs_bar_show(void); + +/** + * Get the width of the status bar. + */ +unsigned int bfs_bar_width(const struct bfs_bar *bar); + +/** + * Update the status bar message. + * + * @param bar + * The status bar to update. + * @param str + * The string to display. + * @return + * 0 on success, -1 on failure. + */ +int bfs_bar_update(struct bfs_bar *bar, const char *str); + +/** + * Hide the status bar. + */ +void bfs_bar_hide(struct bfs_bar *status); + +#endif // BFS_BAR_H diff --git a/src/bfstd.c b/src/bfstd.c new file mode 100644 index 0000000..7680f17 --- /dev/null +++ b/src/bfstd.c @@ -0,0 +1,1006 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "bfstd.h" +#include "bit.h" +#include "diag.h" +#include "sanity.h" +#include "thread.h" +#include "xregex.h" +#include <errno.h> +#include <fcntl.h> +#include <langinfo.h> +#include <limits.h> +#include <locale.h> +#include <nl_types.h> +#include <pthread.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <wchar.h> + +#if BFS_USE_SYS_SYSMACROS_H +# include <sys/sysmacros.h> +#elif BFS_USE_SYS_MKDEV_H +# include <sys/mkdev.h> +#endif + +#if BFS_USE_UTIL_H +# include <util.h> +#endif + +bool error_is_like(int error, int category) { + if (error == category) { + return true; + } + + switch (category) { + case ENOENT: + return error == ENOTDIR; + + case ENOSYS: + // https://github.com/opencontainers/runc/issues/2151 + return errno == EPERM; + +#if __DragonFly__ + // https://twitter.com/tavianator/status/1742991411203485713 + case ENAMETOOLONG: + return error == EFAULT; +#endif + } + + return false; +} + +bool errno_is_like(int category) { + return error_is_like(errno, category); +} + +int try(int ret) { + if (ret >= 0) { + return ret; + } else { + bfs_assert(errno > 0, "errno should be positive, was %d\n", errno); + return -errno; + } +} + +char *xdirname(const char *path) { + size_t i = xbaseoff(path); + + // Skip trailing slashes + while (i > 0 && path[i - 1] == '/') { + --i; + } + + if (i > 0) { + return strndup(path, i); + } else if (path[i] == '/') { + return strdup("/"); + } else { + return strdup("."); + } +} + +char *xbasename(const char *path) { + size_t i = xbaseoff(path); + size_t len = strcspn(path + i, "/"); + if (len > 0) { + return strndup(path + i, len); + } else if (path[i] == '/') { + return strdup("/"); + } else { + return strdup("."); + } +} + +size_t xbaseoff(const char *path) { + size_t i = strlen(path); + + // Skip trailing slashes + while (i > 0 && path[i - 1] == '/') { + --i; + } + + // Find the beginning of the name + while (i > 0 && path[i - 1] != '/') { + --i; + } + + // Skip leading slashes + while (path[i] == '/' && path[i + 1]) { + ++i; + } + + return i; +} + +FILE *xfopen(const char *path, int flags) { + char mode[4]; + + switch (flags & O_ACCMODE) { + case O_RDONLY: + strcpy(mode, "rb"); + break; + case O_WRONLY: + strcpy(mode, "wb"); + break; + case O_RDWR: + strcpy(mode, "r+b"); + break; + default: + bfs_bug("Invalid access mode"); + errno = EINVAL; + return NULL; + } + + if (flags & O_APPEND) { + mode[0] = 'a'; + } + + int fd; + if (flags & O_CREAT) { + fd = open(path, flags, 0666); + } else { + fd = open(path, flags); + } + + if (fd < 0) { + return NULL; + } + + FILE *ret = fdopen(fd, mode); + if (!ret) { + close_quietly(fd); + return NULL; + } + + return ret; +} + +char *xgetdelim(FILE *file, char delim) { + char *chunk = NULL; + size_t n = 0; + ssize_t len = getdelim(&chunk, &n, delim, file); + if (len >= 0) { + if (chunk[len] == delim) { + chunk[len] = '\0'; + } + return chunk; + } else { + free(chunk); + if (!ferror(file)) { + errno = 0; + } + return NULL; + } +} + +const char *xgetprogname(void) { + const char *cmd = NULL; +#if BFS_HAS_GETPROGNAME + cmd = getprogname(); +#elif BFS_HAS_GETPROGNAME_GNU + cmd = program_invocation_short_name; +#endif + + if (!cmd) { + cmd = BFS_COMMAND; + } + + return cmd; +} + +/** Compile and execute a regular expression for xrpmatch(). */ +static int xrpregex(nl_item item, const char *response) { + const char *pattern = nl_langinfo(item); + if (!pattern) { + return -1; + } + + struct bfs_regex *regex; + int ret = bfs_regcomp(®ex, pattern, BFS_REGEX_POSIX_EXTENDED, 0); + if (ret == 0) { + ret = bfs_regexec(regex, response, 0); + } + + bfs_regfree(regex); + return ret; +} + +/** Check if a response is affirmative or negative. */ +static int xrpmatch(const char *response) { + int ret = xrpregex(NOEXPR, response); + if (ret > 0) { + return 0; + } else if (ret < 0) { + return -1; + } + + ret = xrpregex(YESEXPR, response); + if (ret > 0) { + return 1; + } else if (ret < 0) { + return -1; + } + + // Failsafe: always handle y/n + char c = response[0]; + if (c == 'n' || c == 'N') { + return 0; + } else if (c == 'y' || c == 'Y') { + return 1; + } else { + return -1; + } +} + +int ynprompt(void) { + fflush(stderr); + + char *line = xgetdelim(stdin, '\n'); + int ret = line ? xrpmatch(line) : -1; + free(line); + return ret; +} + +void *xmemdup(const void *src, size_t size) { + void *ret = malloc(size); + if (ret) { + memcpy(ret, src, size); + } + return ret; +} + +char *xstpecpy(char *dest, char *end, const char *src) { + return xstpencpy(dest, end, src, SIZE_MAX); +} + +char *xstpencpy(char *dest, char *end, const char *src, size_t n) { + size_t space = end - dest; + n = space < n ? space : n; + n = strnlen(src, n); + memcpy(dest, src, n); + if (n < space) { + dest[n] = '\0'; + return dest + n; + } else { + end[-1] = '\0'; + return end; + } +} + +const char *xstrerror(int errnum) { + int saved = errno; + const char *ret = NULL; + static thread_local char buf[256]; + + // On FreeBSD with MemorySanitizer, duplocale() triggers + // https://github.com/llvm/llvm-project/issues/65532 +#if BFS_HAS_STRERROR_L && !(__FreeBSD__ && SANITIZE_MEMORY) +# if BFS_HAS_USELOCALE + locale_t loc = uselocale((locale_t)0); +# else + locale_t loc = LC_GLOBAL_LOCALE; +# endif + + bool free_loc = false; + if (loc == LC_GLOBAL_LOCALE) { + loc = duplocale(loc); + free_loc = true; + } + + if (loc != (locale_t)0) { + ret = strerror_l(errnum, loc); + if (free_loc) { + freelocale(loc); + } + } +#elif BFS_HAS_STRERROR_R_POSIX + if (strerror_r(errnum, buf, sizeof(buf)) == 0) { + ret = buf; + } +#elif BFS_HAS_STRERROR_R_GNU + ret = strerror_r(errnum, buf, sizeof(buf)); +#endif + + if (!ret) { + // Fallback for strerror_[lr]() or duplocale() failures + snprintf(buf, sizeof(buf), "Unknown error %d", errnum); + ret = buf; + } + + errno = saved; + return ret; +} + +/** Get the single character describing the given file type. */ +static char type_char(mode_t mode) { + switch (mode & S_IFMT) { + case S_IFREG: + return '-'; + case S_IFBLK: + return 'b'; + case S_IFCHR: + return 'c'; + case S_IFDIR: + return 'd'; + case S_IFLNK: + return 'l'; + case S_IFIFO: + return 'p'; + case S_IFSOCK: + return 's'; +#ifdef S_IFDOOR + case S_IFDOOR: + return 'D'; +#endif +#ifdef S_IFPORT + case S_IFPORT: + return 'P'; +#endif +#ifdef S_IFWHT + case S_IFWHT: + return 'w'; +#endif + } + + return '?'; +} + +void xstrmode(mode_t mode, char str[11]) { + strcpy(str, "----------"); + + str[0] = type_char(mode); + + if (mode & 00400) { + str[1] = 'r'; + } + if (mode & 00200) { + str[2] = 'w'; + } + if ((mode & 04100) == 04000) { + str[3] = 'S'; + } else if (mode & 04000) { + str[3] = 's'; + } else if (mode & 00100) { + str[3] = 'x'; + } + + if (mode & 00040) { + str[4] = 'r'; + } + if (mode & 00020) { + str[5] = 'w'; + } + if ((mode & 02010) == 02000) { + str[6] = 'S'; + } else if (mode & 02000) { + str[6] = 's'; + } else if (mode & 00010) { + str[6] = 'x'; + } + + if (mode & 00004) { + str[7] = 'r'; + } + if (mode & 00002) { + str[8] = 'w'; + } + if ((mode & 01001) == 01000) { + str[9] = 'T'; + } else if (mode & 01000) { + str[9] = 't'; + } else if (mode & 00001) { + str[9] = 'x'; + } +} + +/** Check if an rlimit value is infinite. */ +static bool rlim_isinf(rlim_t r) { + // Consider RLIM_{INFINITY,SAVED_{CUR,MAX}} all equally infinite + if (r == RLIM_INFINITY) { + return true; + } + +#ifdef RLIM_SAVED_CUR + if (r == RLIM_SAVED_CUR) { + return true; + } +#endif + +#ifdef RLIM_SAVED_MAX + if (r == RLIM_SAVED_MAX) { + return true; + } +#endif + + return false; +} + +int rlim_cmp(rlim_t a, rlim_t b) { + bool a_inf = rlim_isinf(a); + bool b_inf = rlim_isinf(b); + if (a_inf || b_inf) { + return a_inf - b_inf; + } + + return (a > b) - (a < b); +} + +dev_t xmakedev(int ma, int mi) { +#ifdef makedev + return makedev(ma, mi); +#else + return (ma << 8) | mi; +#endif +} + +int xmajor(dev_t dev) { +#ifdef major + return major(dev); +#else + return dev >> 8; +#endif +} + +int xminor(dev_t dev) { +#ifdef minor + return minor(dev); +#else + return dev & 0xFF; +#endif +} + +pid_t xwaitpid(pid_t pid, int *status, int flags) { + pid_t ret; + do { + ret = waitpid(pid, status, flags); + } while (ret < 0 && errno == EINTR); + return ret; +} + +int dup_cloexec(int fd) { +#ifdef F_DUPFD_CLOEXEC + return fcntl(fd, F_DUPFD_CLOEXEC, 0); +#else + int ret = dup(fd); + if (ret < 0) { + return -1; + } + + if (fcntl(ret, F_SETFD, FD_CLOEXEC) == -1) { + close_quietly(ret); + return -1; + } + + return ret; +#endif +} + +int pipe_cloexec(int pipefd[2]) { +#if BFS_HAS_PIPE2 + return pipe2(pipefd, O_CLOEXEC); +#else + if (pipe(pipefd) != 0) { + return -1; + } + + if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) == -1 || fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) == -1) { + close_quietly(pipefd[1]); + close_quietly(pipefd[0]); + return -1; + } + + return 0; +#endif +} + +size_t xread(int fd, void *buf, size_t nbytes) { + size_t count = 0; + + while (count < nbytes) { + ssize_t ret = read(fd, (char *)buf + count, nbytes - count); + if (ret < 0) { + if (errno == EINTR) { + continue; + } else { + break; + } + } else if (ret == 0) { + // EOF + errno = 0; + break; + } else { + count += ret; + } + } + + return count; +} + +size_t xwrite(int fd, const void *buf, size_t nbytes) { + size_t count = 0; + + while (count < nbytes) { + ssize_t ret = write(fd, (const char *)buf + count, nbytes - count); + if (ret < 0) { + if (errno == EINTR) { + continue; + } else { + break; + } + } else if (ret == 0) { + // EOF? + errno = 0; + break; + } else { + count += ret; + } + } + + return count; +} + +void close_quietly(int fd) { + int error = errno; + xclose(fd); + errno = error; +} + +int xclose(int fd) { + int ret = close(fd); + if (ret != 0) { + bfs_verify(errno != EBADF); + } + return ret; +} + +int xfaccessat(int fd, const char *path, int amode) { + int ret = faccessat(fd, path, amode, 0); + +#ifdef AT_EACCESS + // Some platforms, like Hurd, only support AT_EACCESS. Other platforms, + // like Android, don't support AT_EACCESS at all. + if (ret != 0 && (errno == EINVAL || errno == ENOTSUP)) { + ret = faccessat(fd, path, amode, AT_EACCESS); + } +#endif + + return ret; +} + +char *xconfstr(int name) { +#if BFS_HAS_CONFSTR + size_t len = confstr(name, NULL, 0); + if (len == 0) { + return NULL; + } + + char *str = malloc(len); + if (!str) { + return NULL; + } + + if (confstr(name, str, len) != len) { + free(str); + return NULL; + } + + return str; +#else + errno = ENOTSUP; + return NULL; +#endif +} + +char *xreadlinkat(int fd, const char *path, size_t size) { + ssize_t len; + char *name = NULL; + + if (size == 0) { + size = 64; + } else { + ++size; // NUL terminator + } + + while (true) { + char *new_name = realloc(name, size); + if (!new_name) { + goto error; + } + name = new_name; + + len = readlinkat(fd, path, name, size); + if (len < 0) { + goto error; + } else if ((size_t)len >= size) { + size *= 2; + } else { + break; + } + } + + name[len] = '\0'; + return name; + +error: + free(name); + return NULL; +} + +#if BFS_HAS_STRTOFFLAGS +# define BFS_STRTOFFLAGS strtofflags +#elif BFS_HAS_STRING_TO_FLAGS +# define BFS_STRTOFFLAGS string_to_flags +#endif + +int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear) { +#ifdef BFS_STRTOFFLAGS + char *str_arg = (char *)*str; + +#if __OpenBSD__ + typedef uint32_t bfs_fflags_t; +#else + typedef unsigned long bfs_fflags_t; +#endif + bfs_fflags_t set_arg = 0; + bfs_fflags_t clear_arg = 0; + + int ret = BFS_STRTOFFLAGS(&str_arg, &set_arg, &clear_arg); + + *str = str_arg; + *set = set_arg; + *clear = clear_arg; + + if (ret != 0) { + errno = EINVAL; + } + return ret; +#else // !BFS_STRTOFFLAGS + errno = ENOTSUP; + return -1; +#endif +} + +long xsysconf(int name) { +#if __FreeBSD__ && SANITIZE_MEMORY + // Work around https://github.com/llvm/llvm-project/issues/88163 + __msan_scoped_disable_interceptor_checks(); +#endif + + long ret = sysconf(name); + +#if __FreeBSD__ && SANITIZE_MEMORY + __msan_scoped_enable_interceptor_checks(); +#endif + + return ret; +} + +size_t asciilen(const char *str) { + return asciinlen(str, strlen(str)); +} + +size_t asciinlen(const char *str, size_t n) { + size_t i = 0; + +#if SIZE_WIDTH % 8 == 0 + // Word-at-a-time isascii() + for (size_t word; i + sizeof(word) <= n; i += sizeof(word)) { + memcpy(&word, str + i, sizeof(word)); + + const size_t mask = (SIZE_MAX / 0xFF) << 7; // 0x808080... + word &= mask; + if (!word) { + continue; + } + +#if ENDIAN_NATIVE == ENDIAN_BIG + word = bswap(word); +#elif ENDIAN_NATIVE != ENDIAN_LITTLE + break; +#endif + + size_t first = trailing_zeros(word) / 8; + return i + first; + } +#endif + + for (; i < n; ++i) { + if (!xisascii(str[i])) { + break; + } + } + + return i; +} + +wint_t xmbrtowc(const char *str, size_t *i, size_t len, mbstate_t *mb) { + wchar_t wc; + size_t mblen = mbrtowc(&wc, str + *i, len - *i, mb); + switch (mblen) { + case -1: // Invalid byte sequence + case -2: // Incomplete byte sequence + *i += 1; + *mb = (mbstate_t){0}; + return WEOF; + default: + *i += mblen; + return wc; + } +} + +size_t xstrwidth(const char *str) { + size_t len = strlen(str); + size_t ret = 0; + + size_t asclen = asciinlen(str, len); + size_t i; + for (i = 0; i < asclen; ++i) { + // Assume all ASCII printables have width 1 + if (xisprint(str[i])) { + ++ret; + } + } + + mbstate_t mb = {0}; + while (i < len) { + wint_t wc = xmbrtowc(str, &i, len, &mb); + if (wc == WEOF) { + // Assume a single-width '?' + ++ret; + continue; + } + + int width = xwcwidth(wc); + if (width > 0) { + ret += width; + } + } + + return ret; +} + +/** + * Character type flags. + */ +enum ctype { + IS_PRINT = 1 << 0, + IS_SPACE = 1 << 1, +}; + +/** Cached ctypes. */ +static unsigned char ctype_cache[UCHAR_MAX + 1]; + +/** Initialize the ctype cache. */ +static void char_cache_init(void) { + for (size_t c = 0; c <= UCHAR_MAX; ++c) { + if (xisprint(c)) { + ctype_cache[c] |= IS_PRINT; + } + if (xisspace(c)) { + ctype_cache[c] |= IS_SPACE; + } + } +} + +/** Check if a character is printable. */ +static bool wesc_isprint(unsigned char c, enum wesc_flags flags) { + if (ctype_cache[c] & IS_PRINT) { + return true; + } + + // Technically a literal newline is safe inside single quotes, but $'\n' + // is much nicer than ' + // ' + if (!(flags & WESC_SHELL) && (ctype_cache[c] & IS_SPACE)) { + return true; + } + + return false; +} + +/** Check if a wide character is printable. */ +static bool wesc_iswprint(wchar_t c, enum wesc_flags flags) { + if (xiswprint(c)) { + return true; + } + + if (!(flags & WESC_SHELL) && xiswspace(c)) { + return true; + } + + return false; +} + +/** Get the length of the longest printable prefix of a string. */ +static size_t printable_len(const char *str, size_t len, enum wesc_flags flags) { + static pthread_once_t once = PTHREAD_ONCE_INIT; + invoke_once(&once, char_cache_init); + + // Fast path: avoid multibyte checks + size_t asclen = asciinlen(str, len); + size_t i; + for (i = 0; i < asclen; ++i) { + if (!wesc_isprint(str[i], flags)) { + return i; + } + } + + mbstate_t mb = {0}; + for (size_t j = i; i < len; i = j) { + wint_t wc = xmbrtowc(str, &j, len, &mb); + if (wc == WEOF) { + break; + } + if (!wesc_iswprint(wc, flags)) { + break; + } + } + + return i; +} + +/** Convert a special char into a well-known escape sequence like "\n". */ +static const char *dollar_esc(char c) { + // https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html + switch (c) { + case '\a': + return "\\a"; + case '\b': + return "\\b"; + case '\033': + return "\\e"; + case '\f': + return "\\f"; + case '\n': + return "\\n"; + case '\r': + return "\\r"; + case '\t': + return "\\t"; + case '\v': + return "\\v"; + case '\'': + return "\\'"; + case '\\': + return "\\\\"; + default: + return NULL; + } +} + +/** $'Quote' a string for the shell. */ +static char *dollar_quote(char *dest, char *end, const char *str, size_t len, enum wesc_flags flags) { + dest = xstpecpy(dest, end, "$'"); + + mbstate_t mb = {0}; + for (size_t i = 0; i < len;) { + size_t start = i; + bool safe = false; + + wint_t wc = xmbrtowc(str, &i, len, &mb); + if (wc != WEOF) { + safe = wesc_iswprint(wc, flags); + } + + for (size_t j = start; safe && j < i; ++j) { + if (str[j] == '\'' || str[j] == '\\') { + safe = false; + } + } + + if (safe) { + dest = xstpencpy(dest, end, str + start, i - start); + } else { + for (size_t j = start; j < i; ++j) { + unsigned char byte = str[j]; + const char *esc = dollar_esc(byte); + if (esc) { + dest = xstpecpy(dest, end, esc); + } else { + static const char *hex[] = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"}; + dest = xstpecpy(dest, end, "\\x"); + dest = xstpecpy(dest, end, hex[byte / 0x10]); + dest = xstpecpy(dest, end, hex[byte % 0x10]); + } + } + } + } + + return xstpecpy(dest, end, "'"); +} + +/** How much of this string is safe as a bare word? */ +static size_t bare_len(const char *str, size_t len) { + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02 + size_t ret = strcspn(str, "|&;<>()$`\\\"' *?[#~=%!{}"); + return ret < len ? ret : len; +} + +/** How much of this string is safe to double-quote? */ +static size_t quotable_len(const char *str, size_t len) { + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_03 + size_t ret = strcspn(str, "`$\\\"!"); + return ret < len ? ret : len; +} + +/** "Quote" a string for the shell. */ +static char *double_quote(char *dest, char *end, const char *str, size_t len) { + dest = xstpecpy(dest, end, "\""); + dest = xstpencpy(dest, end, str, len); + return xstpecpy(dest, end, "\""); +} + +/** 'Quote' a string for the shell. */ +static char *single_quote(char *dest, char *end, const char *str, size_t len) { + bool open = false; + + while (len > 0) { + size_t chunk = strcspn(str, "'"); + chunk = chunk < len ? chunk : len; + if (chunk > 0) { + if (!open) { + dest = xstpecpy(dest, end, "'"); + open = true; + } + dest = xstpencpy(dest, end, str, chunk); + str += chunk; + len -= chunk; + } + + while (len > 0 && *str == '\'') { + if (open) { + dest = xstpecpy(dest, end, "'"); + open = false; + } + dest = xstpecpy(dest, end, "\\'"); + ++str; + --len; + } + } + + if (open) { + dest = xstpecpy(dest, end, "'"); + } + + return dest; +} + +char *wordesc(char *dest, char *end, const char *str, enum wesc_flags flags) { + return wordnesc(dest, end, str, SIZE_MAX, flags); +} + +char *wordnesc(char *dest, char *end, const char *str, size_t n, enum wesc_flags flags) { + size_t len = strnlen(str, n); + char *start = dest; + + if (printable_len(str, len, flags) < len) { + // String contains unprintable chars, use $'this\x7Fsyntax' + dest = dollar_quote(dest, end, str, len, flags); + } else if (!(flags & WESC_SHELL) || bare_len(str, len) == len) { + // Whole string is safe as a bare word + dest = xstpencpy(dest, end, str, len); + } else if (quotable_len(str, len) == len) { + // Whole string is safe to double-quote + dest = double_quote(dest, end, str, len); + } else { + // Single-quote the whole string + dest = single_quote(dest, end, str, len); + } + + if (dest == start) { + dest = xstpecpy(dest, end, "\"\""); + } + + return dest; +} diff --git a/src/bfstd.h b/src/bfstd.h new file mode 100644 index 0000000..d06bbd9 --- /dev/null +++ b/src/bfstd.h @@ -0,0 +1,523 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Standard library wrappers and polyfills. + */ + +#ifndef BFS_BFSTD_H +#define BFS_BFSTD_H + +#include "prelude.h" +#include "sanity.h" +#include <stddef.h> + +#include <ctype.h> + +/** + * Work around https://github.com/llvm/llvm-project/issues/65532 by forcing a + * function, not a macro, to be called. + */ +#if __FreeBSD__ && SANITIZE_MEMORY +# define BFS_INTERCEPT(fn) (fn) +#else +# define BFS_INTERCEPT(fn) fn +#endif + +/** + * Wrap isalpha()/isdigit()/etc. + */ +#define BFS_ISCTYPE(fn, c) BFS_INTERCEPT(fn)((unsigned char)(c)) + +#define xisalnum(c) BFS_ISCTYPE(isalnum, c) +#define xisalpha(c) BFS_ISCTYPE(isalpha, c) +#define xisascii(c) BFS_ISCTYPE(isascii, c) +#define xiscntrl(c) BFS_ISCTYPE(iscntrl, c) +#define xisdigit(c) BFS_ISCTYPE(isdigit, c) +#define xislower(c) BFS_ISCTYPE(islower, c) +#define xisgraph(c) BFS_ISCTYPE(isgraph, c) +#define xisprint(c) BFS_ISCTYPE(isprint, c) +#define xispunct(c) BFS_ISCTYPE(ispunct, c) +#define xisspace(c) BFS_ISCTYPE(isspace, c) +#define xisupper(c) BFS_ISCTYPE(isupper, c) +#define xisxdigit(c) BFS_ISCTYPE(isxdigit, c) + +// #include <errno.h> + +/** + * Check if an error code is "like" another one. For example, ENOTDIR is + * like ENOENT because they can both be triggered by non-existent paths. + * + * @param error + * The error code to check. + * @param category + * The category to test for. Known categories include ENOENT and + * ENAMETOOLONG. + * @return + * Whether the error belongs to the given category. + */ +bool error_is_like(int error, int category); + +/** + * Equivalent to error_is_like(errno, category). + */ +bool errno_is_like(int category); + +/** + * Apply the "negative errno" convention. + * + * @param ret + * The return value of the attempted operation. + * @return + * ret, if non-negative, otherwise -errno. + */ +int try(int ret); + +#include <fcntl.h> + +#ifndef O_EXEC +# ifdef O_PATH +# define O_EXEC O_PATH +# else +# define O_EXEC O_RDONLY +# endif +#endif + +#ifndef O_SEARCH +# ifdef O_PATH +# define O_SEARCH O_PATH +# else +# define O_SEARCH O_RDONLY +# endif +#endif + +#ifndef O_DIRECTORY +# define O_DIRECTORY 0 +#endif + +#include <fnmatch.h> + +#if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE) +# define FNM_CASEFOLD FNM_IGNORECASE +#endif + +// #include <libgen.h> + +/** + * Re-entrant dirname() variant that always allocates a copy. + * + * @param path + * The path in question. + * @return + * The parent directory of the path. + */ +char *xdirname(const char *path); + +/** + * Re-entrant basename() variant that always allocates a copy. + * + * @param path + * The path in question. + * @return + * The final component of the path. + */ +char *xbasename(const char *path); + +/** + * Find the offset of the final component of a path. + * + * @param path + * The path in question. + * @return + * The offset of the basename. + */ +size_t xbaseoff(const char *path); + +#include <stdio.h> + +/** + * fopen() variant that takes open() style flags. + * + * @param path + * The path to open. + * @param flags + * Flags to pass to open(). + */ +FILE *xfopen(const char *path, int flags); + +/** + * Convenience wrapper for getdelim(). + * + * @param file + * The file to read. + * @param delim + * The delimiter character to split on. + * @return + * The read chunk (without the delimiter), allocated with malloc(). + * NULL is returned on error (errno != 0) or end of file (errno == 0). + */ +char *xgetdelim(FILE *file, char delim); + +// #include <stdlib.h> + +/** + * Wrapper for getprogname() or equivalent functionality. + * + * @return + * The basename of the currently running program. + */ +const char *xgetprogname(void); + +/** + * Process a yes/no prompt. + * + * @return 1 for yes, 0 for no, and -1 for unknown. + */ +int ynprompt(void); + +// #include <string.h> + +/** + * Get the length of the pure-ASCII prefix of a string. + */ +size_t asciilen(const char *str); + +/** + * Get the length of the pure-ASCII prefix of a string. + * + * @param str + * The string to check. + * @param n + * The maximum prefix length. + */ +size_t asciinlen(const char *str, size_t n); + +/** + * Allocate a copy of a region of memory. + * + * @param src + * The memory region to copy. + * @param size + * The size of the memory region. + * @return + * A copy of the region, allocated with malloc(), or NULL on failure. + */ +void *xmemdup(const void *src, size_t size); + +/** + * A nice string copying function. + * + * @param dest + * The NUL terminator of the destination string, or `end` if it is + * already truncated. + * @param end + * The end of the destination buffer. + * @param src + * The string to copy from. + * @return + * The new NUL terminator of the destination, or `end` on truncation. + */ +char *xstpecpy(char *dest, char *end, const char *src); + +/** + * A nice string copying function. + * + * @param dest + * The NUL terminator of the destination string, or `end` if it is + * already truncated. + * @param end + * The end of the destination buffer. + * @param src + * The string to copy from. + * @param n + * The maximum number of characters to copy. + * @return + * The new NUL terminator of the destination, or `end` on truncation. + */ +char *xstpencpy(char *dest, char *end, const char *src, size_t n); + +/** + * Thread-safe strerror(). + * + * @param errnum + * An error number. + * @return + * A string describing that error, which remains valid until the next + * xstrerror() call in the same thread. + */ +const char *xstrerror(int errnum); + +/** + * Format a mode like ls -l (e.g. -rw-r--r--). + * + * @param mode + * The mode to format. + * @param str + * The string to hold the formatted mode. + */ +void xstrmode(mode_t mode, char str[11]); + +#include <sys/resource.h> + +/** + * Compare two rlim_t values, accounting for infinite limits. + */ +int rlim_cmp(rlim_t a, rlim_t b); + +#include <sys/types.h> + +/** + * Portable version of makedev(). + */ +dev_t xmakedev(int ma, int mi); + +/** + * Portable version of major(). + */ +int xmajor(dev_t dev); + +/** + * Portable version of minor(). + */ +int xminor(dev_t dev); + +// #include <sys/stat.h> + +/** + * Get the access/change/modification time from a struct stat. + */ +#if BFS_HAS_ST_ACMTIM +# define ST_ATIM(sb) (sb).st_atim +# define ST_CTIM(sb) (sb).st_ctim +# define ST_MTIM(sb) (sb).st_mtim +#elif BFS_HAS_ST_ACMTIMESPEC +# define ST_ATIM(sb) (sb).st_atimespec +# define ST_CTIM(sb) (sb).st_ctimespec +# define ST_MTIM(sb) (sb).st_mtimespec +#else +# define ST_ATIM(sb) ((struct timespec) { .tv_sec = (sb).st_atime }) +# define ST_CTIM(sb) ((struct timespec) { .tv_sec = (sb).st_ctime }) +# define ST_MTIM(sb) ((struct timespec) { .tv_sec = (sb).st_mtime }) +#endif + +// #include <sys/wait.h> + +/** + * waitpid() wrapper that handles EINTR. + */ +pid_t xwaitpid(pid_t pid, int *status, int flags); + +// #include <unistd.h> + +/** + * Like dup(), but set the FD_CLOEXEC flag. + * + * @param fd + * The file descriptor to duplicate. + * @return + * A duplicated file descriptor, or -1 on failure. + */ +int dup_cloexec(int fd); + +/** + * Like pipe(), but set the FD_CLOEXEC flag. + * + * @param pipefd + * The array to hold the two file descriptors. + * @return + * 0 on success, -1 on failure. + */ +int pipe_cloexec(int pipefd[2]); + +/** + * A safe version of read() that handles interrupted system calls and partial + * reads. + * + * @return + * The number of bytes read. A value != nbytes indicates an error + * (errno != 0) or end of file (errno == 0). + */ +size_t xread(int fd, void *buf, size_t nbytes); + +/** + * A safe version of write() that handles interrupted system calls and partial + * writes. + * + * @return + The number of bytes written. A value != nbytes indicates an error. + */ +size_t xwrite(int fd, const void *buf, size_t nbytes); + +/** + * close() variant that preserves errno. + * + * @param fd + * The file descriptor to close. + */ +void close_quietly(int fd); + +/** + * close() wrapper that asserts the file descriptor is valid. + * + * @param fd + * The file descriptor to close. + * @return + * 0 on success, or -1 on error. + */ +int xclose(int fd); + +/** + * Wrapper for faccessat() that handles some portability issues. + */ +int xfaccessat(int fd, const char *path, int amode); + +/** + * readlinkat() wrapper that dynamically allocates the result. + * + * @param fd + * The base directory descriptor. + * @param path + * The path to the link, relative to fd. + * @param size + * An estimate for the size of the link name (pass 0 if unknown). + * @return + * The target of the link, allocated with malloc(), or NULL on failure. + */ +char *xreadlinkat(int fd, const char *path, size_t size); + +/** + * Wrapper for confstr() that allocates with malloc(). + * + * @param name + * The ID of the confstr to look up. + * @return + * The value of the confstr, or NULL on failure. + */ +char *xconfstr(int name); + +/** + * Portability wrapper for strtofflags(). + * + * @param str + * The string to parse. The pointee will be advanced to the first + * invalid position on error. + * @param set + * The flags that are set in the string. + * @param clear + * The flags that are cleared in the string. + * @return + * 0 on success, -1 on failure. + */ +int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear); + +/** + * Wrapper for sysconf() that works around an MSan bug. + */ +long xsysconf(int name); + +#include <wchar.h> + +/** + * Error-recovering mbrtowc() wrapper. + * + * @param str + * The string to convert. + * @param i + * The current index. + * @param len + * The length of the string. + * @param mb + * The multi-byte decoding state. + * @return + * The wide character at index *i, or WEOF if decoding fails. In either + * case, *i will be advanced to the next multi-byte character. + */ +wint_t xmbrtowc(const char *str, size_t *i, size_t len, mbstate_t *mb); + +/** + * wcswidth() variant that works on narrow strings. + * + * @param str + * The string to measure. + * @return + * The likely width of that string in a terminal. + */ +size_t xstrwidth(const char *str); + +/** + * wcwidth() wrapper that works around LLVM bug #65532. + */ +#define xwcwidth BFS_INTERCEPT(wcwidth) + +#include <wctype.h> + +/** + * Wrap iswalpha()/iswdigit()/etc. + */ +#define BFS_ISWCTYPE(fn, c) BFS_INTERCEPT(fn)(c) + +#define xiswalnum(c) BFS_ISWCTYPE(iswalnum, c) +#define xiswalpha(c) BFS_ISWCTYPE(iswalpha, c) +#define xiswcntrl(c) BFS_ISWCTYPE(iswcntrl, c) +#define xiswdigit(c) BFS_ISWCTYPE(iswdigit, c) +#define xiswlower(c) BFS_ISWCTYPE(iswlower, c) +#define xiswgraph(c) BFS_ISWCTYPE(iswgraph, c) +#define xiswprint(c) BFS_ISWCTYPE(iswprint, c) +#define xiswpunct(c) BFS_ISWCTYPE(iswpunct, c) +#define xiswspace(c) BFS_ISWCTYPE(iswspace, c) +#define xiswupper(c) BFS_ISWCTYPE(iswupper, c) +#define xiswxdigit(c) BFS_ISWCTYPE(iswxdigit, c) + +// #include <wordexp.h> + +/** + * Flags for wordesc(). + */ +enum wesc_flags { + /** + * Escape special characters so that the shell will treat the escaped + * string as a single word. + */ + WESC_SHELL = 1 << 0, + /** + * Escape special characters so that the escaped string is safe to print + * to a TTY. + */ + WESC_TTY = 1 << 1, +}; + +/** + * Escape a string as a single shell word. + * + * @param dest + * The destination string to fill. + * @param end + * The end of the destination buffer. + * @param src + * The string to escape. + * @param flags + * Controls which characters to escape. + * @return + * The new NUL terminator of the destination, or `end` on truncation. + */ +char *wordesc(char *dest, char *end, const char *str, enum wesc_flags flags); + +/** + * Escape a string as a single shell word. + * + * @param dest + * The destination string to fill. + * @param end + * The end of the destination buffer. + * @param src + * The string to escape. + * @param n + * The maximum length of the string. + * @param flags + * Controls which characters to escape. + * @return + * The new NUL terminator of the destination, or `end` on truncation. + */ +char *wordnesc(char *dest, char *end, const char *str, size_t n, enum wesc_flags flags); + +#endif // BFS_BFSTD_H diff --git a/src/bftw.c b/src/bftw.c new file mode 100644 index 0000000..5322181 --- /dev/null +++ b/src/bftw.c @@ -0,0 +1,2326 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * The bftw() implementation consists of the following components: + * + * - struct bftw_file: A file that has been encountered during the traversal. + * They have reference-counted links to their parents in the directory tree. + * + * - struct bftw_list: A linked list of bftw_file's. + * + * - struct bftw_queue: A multi-stage queue of bftw_file's. + * + * - struct bftw_cache: An LRU list of bftw_file's with open file descriptors, + * used for openat() to minimize the amount of path re-traversals. + * + * - struct bftw_state: Represents the current state of the traversal, allowing + * various helper functions to take fewer parameters. + */ + +#include "prelude.h" +#include "bftw.h" +#include "alloc.h" +#include "bfstd.h" +#include "diag.h" +#include "dir.h" +#include "dstring.h" +#include "ioq.h" +#include "list.h" +#include "mtab.h" +#include "stat.h" +#include "trie.h" +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> + +/** Initialize a bftw_stat cache. */ +static void bftw_stat_init(struct bftw_stat *bufs, struct bfs_stat *stat_buf, struct bfs_stat *lstat_buf) { + bufs->stat_buf = stat_buf; + bufs->lstat_buf = lstat_buf; + bufs->stat_err = -1; + bufs->lstat_err = -1; +} + +/** Fill a bftw_stat cache from another one. */ +static void bftw_stat_fill(struct bftw_stat *dest, const struct bftw_stat *src) { + if (dest->stat_err < 0 && src->stat_err >= 0) { + dest->stat_buf = src->stat_buf; + dest->stat_err = src->stat_err; + } + + if (dest->lstat_err < 0 && src->lstat_err >= 0) { + dest->lstat_buf = src->lstat_buf; + dest->lstat_err = src->lstat_err; + } +} + +/** Cache a bfs_stat() result. */ +static void bftw_stat_cache(struct bftw_stat *bufs, enum bfs_stat_flags flags, const struct bfs_stat *buf, int err) { + if (flags & BFS_STAT_NOFOLLOW) { + bufs->lstat_buf = buf; + bufs->lstat_err = err; + if (err || !S_ISLNK(buf->mode)) { + // Non-link, so share stat info + bufs->stat_buf = buf; + bufs->stat_err = err; + } + } else if (flags & BFS_STAT_TRYFOLLOW) { + if (err) { + bufs->stat_err = err; + } else if (S_ISLNK(buf->mode)) { + bufs->lstat_buf = buf; + bufs->lstat_err = err; + bufs->stat_err = ENOENT; + } else { + bufs->stat_buf = buf; + bufs->stat_err = err; + } + } else { + bufs->stat_buf = buf; + bufs->stat_err = err; + } +} + +/** Caching bfs_stat(). */ +static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + struct bftw_stat *bufs = &ftwbuf->stat_bufs; + struct bfs_stat *buf; + + if (flags & BFS_STAT_NOFOLLOW) { + buf = (struct bfs_stat *)bufs->lstat_buf; + if (bufs->lstat_err == 0) { + return buf; + } else if (bufs->lstat_err > 0) { + errno = bufs->lstat_err; + return NULL; + } + } else { + buf = (struct bfs_stat *)bufs->stat_buf; + if (bufs->stat_err == 0) { + return buf; + } else if (bufs->stat_err > 0) { + errno = bufs->stat_err; + return NULL; + } + } + + struct bfs_stat *ret; + int err; + if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, buf) == 0) { + ret = buf; + err = 0; +#ifdef S_IFWHT + } else if (errno == ENOENT && ftwbuf->type == BFS_WHT) { + // This matches the behavior of FTS_WHITEOUT on BSD + ret = memset(buf, 0, sizeof(*buf)); + ret->mode = S_IFWHT; + err = 0; +#endif + } else { + ret = NULL; + err = errno; + } + + bftw_stat_cache(bufs, flags, ret, err); + return ret; +} + +const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + struct BFTW *mutbuf = (struct BFTW *)ftwbuf; + const struct bfs_stat *ret; + + if (flags & BFS_STAT_TRYFOLLOW) { + ret = bftw_stat_impl(mutbuf, BFS_STAT_FOLLOW); + if (!ret && errno_is_like(ENOENT)) { + ret = bftw_stat_impl(mutbuf, BFS_STAT_NOFOLLOW); + } + } else { + ret = bftw_stat_impl(mutbuf, flags); + } + + return ret; +} + +const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + const struct bftw_stat *bufs = &ftwbuf->stat_bufs; + + if (flags & BFS_STAT_NOFOLLOW) { + if (bufs->lstat_err == 0) { + return bufs->lstat_buf; + } + } else if (bufs->stat_err == 0) { + return bufs->stat_buf; + } else if ((flags & BFS_STAT_TRYFOLLOW) && error_is_like(bufs->stat_err, ENOENT)) { + if (bufs->lstat_err == 0) { + return bufs->lstat_buf; + } + } + + return NULL; +} + +enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + if (flags & BFS_STAT_NOFOLLOW) { + if (ftwbuf->type == BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { + return ftwbuf->type; + } + } else if (flags & BFS_STAT_TRYFOLLOW) { + if (ftwbuf->type != BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW)) { + return ftwbuf->type; + } + } else { + if (ftwbuf->type != BFS_LNK) { + return ftwbuf->type; + } else if (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW) { + return BFS_ERROR; + } + } + + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, flags); + if (statbuf) { + return bfs_mode_to_type(statbuf->mode); + } else { + return BFS_ERROR; + } +} + +/** + * A file. + */ +struct bftw_file { + /** The parent directory, if any. */ + struct bftw_file *parent; + /** The root under which this file was found. */ + struct bftw_file *root; + + /** + * List node for: + * + * bftw_queue::buffer + * bftw_queue::waiting + * bftw_file_open()::parents + */ + struct bftw_file *next; + + /** + * List node for: + * + * bftw_queue::ready + * bftw_state::to_close + */ + struct { struct bftw_file *next; } ready; + + /** + * List node for bftw_cache. + */ + struct { + struct bftw_file *prev; + struct bftw_file *next; + } lru; + + /** This file's depth in the walk. */ + size_t depth; + /** Reference count (for ->parent). */ + size_t refcount; + + /** Pin count (for ->fd). */ + size_t pincount; + /** An open descriptor to this file, or -1. */ + int fd; + /** Whether this file has a pending ioq request. */ + bool ioqueued; + /** An open directory for this file, if any. */ + struct bfs_dir *dir; + + /** This file's type, if known. */ + enum bfs_type type; + /** The device number, for cycle detection. */ + dev_t dev; + /** The inode number, for cycle detection. */ + ino_t ino; + + /** Cached bfs_stat() info. */ + struct bftw_stat stat_bufs; + + /** The offset of this file in the full path. */ + size_t nameoff; + /** The length of the file's name. */ + size_t namelen; + /** The file's name. */ + char name[]; +}; + +/** + * A linked list of bftw_file's. + */ +struct bftw_list { + struct bftw_file *head; + struct bftw_file **tail; +}; + +/** + * bftw_queue flags. + */ +enum bftw_qflags { + /** Track the sync/async service balance. */ + BFTW_QBALANCE = 1 << 0, + /** Buffer files before adding them to the queue. */ + BFTW_QBUFFER = 1 << 1, + /** Use LIFO (stack/DFS) ordering. */ + BFTW_QLIFO = 1 << 2, + /** Maintain a strict order. */ + BFTW_QORDER = 1 << 3, +}; + +/** + * A queue of bftw_file's that may be serviced asynchronously. + * + * A bftw_queue comprises three linked lists each tracking different stages. + * When BFTW_QBUFFER is set, files are initially pushed to the buffer: + * + * ╔═══╗ ╔═══╦═══╗ + * buffer: ║ 𝘩 ║ ║ 𝘩 ║ 𝘪 ║ + * ╠═══╬═══╦═══╗ ╠═══╬═══╬═══╗ + * waiting: ║ e ║ f ║ g ║ → ║ e ║ f ║ g ║ + * ╠═══╬═══╬═══╬═══╗ ╠═══╬═══╬═══╬═══╗ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ + * ╚═══╩═══╩═══╩═══╝ ╚═══╩═══╩═══╩═══╝ + * + * When bftw_queue_flush() is called, the files in the buffer are appended to + * the waiting list (or prepended, if BFTW_QLIFO is set): + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╦═══╦═══╗ + * waiting: ║ e ║ f ║ g ║ h ║ i ║ + * ╠═══╬═══╬═══╬═══╬═══╝ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ + * ╚═══╩═══╩═══╩═══╝ + * + * Using the buffer gives a more natural ordering for BFTW_QLIFO, and allows + * files to be sorted before adding them to the waiting list. If BFTW_QBUFFER + * is not set, files are pushed directly to the waiting list instead. + * + * Files on the waiting list are waiting to be "serviced" asynchronously by the + * ioq (for example, by an ioq_opendir() or ioq_stat() call). While they are + * being serviced, they are detached from the queue by bftw_queue_detach() and + * are not tracked by the queue at all: + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╗ ⎛ ┌───┬───┐ ⎞ + * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ 𝓯 │ ⎟ + * ╠═══╬═══╬═══╬═══╗ ⎝ └───┴───┘ ⎠ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ + * ╚═══╩═══╩═══╩═══╝ + * + * When their async service is complete, files are reattached to the queue by + * bftw_queue_attach(), this time on the ready list: + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╗ ⎛ ┌───┐ ⎞ + * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ ⎟ + * ╠═══╬═══╬═══╬═══╦═══╗ ⎝ └───┘ ⎠ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ 𝕗 ║ + * ╚═══╩═══╩═══╩═══╩═══╝ + * + * Files are added to the ready list in the order they are finished by the ioq. + * bftw_queue_pop() pops a file from the ready list if possible. Otherwise, it + * pops from the waiting list, and the file must be serviced synchronously. + * + * However, if BFTW_QORDER is set, files must be popped in the exact order they + * are added to the waiting list (to maintain sorted order). In this case, + * files are added to the waiting and ready lists at the same time. The + * file->ioqueued flag is set while it is in-service, so that bftw() can wait + * for it to be truly ready before using it. + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╗ ⎛ ┌───┐ ⎞ + * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ ⎟ + * ╠═══╬═══╬═══╬═══╦═══╦═══╦═══╦═══╦═══╗ ⎝ └───┘ ⎠ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ 𝓮 ║ 𝕗 ║ g ║ h ║ i ║ + * ╚═══╩═══╩═══╩═══╩═══╩═══╩═══╩═══╩═══╝ + * + * If BFTW_QBALANCE is set, queue->imbalance tracks the delta between async + * service (negative) and synchronous service (positive). The queue is + * considered "balanced" when this number is non-negative. Only a balanced + * queue will perform any async service, ensuring work is fairly distributed + * between the main thread and the ioq. + * + * BFTW_QBALANCE is only set for single-threaded ioqs. When an ioq has multiple + * threads, it is faster to wait for the ioq to complete an operation than it is + * to perform it on the main thread. + */ +struct bftw_queue { + /** Queue flags. */ + enum bftw_qflags flags; + /** A buffer of files to be enqueued together. */ + struct bftw_list buffer; + /** A list of files which are waiting to be serviced. */ + struct bftw_list waiting; + /** A list of already-serviced files. */ + struct bftw_list ready; + /** The current size of the queue. */ + size_t size; + /** The number of files currently in-service. */ + size_t ioqueued; + /** Tracks the imbalance between synchronous and async service. */ + unsigned long imbalance; +}; + +/** Initialize a queue. */ +static void bftw_queue_init(struct bftw_queue *queue, enum bftw_qflags flags) { + queue->flags = flags; + SLIST_INIT(&queue->buffer); + SLIST_INIT(&queue->waiting); + SLIST_INIT(&queue->ready); + queue->size = 0; + queue->ioqueued = 0; + queue->imbalance = 0; +} + +/** Add a file to the queue. */ +static void bftw_queue_push(struct bftw_queue *queue, struct bftw_file *file) { + if (queue->flags & BFTW_QBUFFER) { + SLIST_APPEND(&queue->buffer, file); + } else if (queue->flags & BFTW_QLIFO) { + SLIST_PREPEND(&queue->waiting, file); + if (queue->flags & BFTW_QORDER) { + SLIST_PREPEND(&queue->ready, file, ready); + } + } else { + SLIST_APPEND(&queue->waiting, file); + if (queue->flags & BFTW_QORDER) { + SLIST_APPEND(&queue->ready, file, ready); + } + } + + ++queue->size; +} + +/** Add any buffered files to the queue. */ +static void bftw_queue_flush(struct bftw_queue *queue) { + if (!(queue->flags & BFTW_QBUFFER)) { + return; + } + + if (queue->flags & BFTW_QORDER) { + // When sorting, add files to the ready list at the same time + // (and in the same order) as they are added to the waiting list + struct bftw_file **cursor = (queue->flags & BFTW_QLIFO) + ? &queue->ready.head + : queue->ready.tail; + for_slist (struct bftw_file, file, &queue->buffer) { + cursor = SLIST_INSERT(&queue->ready, cursor, file, ready); + } + } + + if (queue->flags & BFTW_QLIFO) { + SLIST_EXTEND(&queue->buffer, &queue->waiting); + } + + SLIST_EXTEND(&queue->waiting, &queue->buffer); +} + +/** Check if the queue is properly balanced for async work. */ +static bool bftw_queue_balanced(const struct bftw_queue *queue) { + if (queue->flags & BFTW_QBALANCE) { + return (long)queue->imbalance >= 0; + } else { + return true; + } +} + +/** Update the queue balance for (a)sync service. */ +static void bftw_queue_rebalance(struct bftw_queue *queue, bool async) { + if (async) { + --queue->imbalance; + } else { + ++queue->imbalance; + } +} + +/** Detatch the next waiting file. */ +static void bftw_queue_detach(struct bftw_queue *queue, struct bftw_file *file, bool async) { + bfs_assert(!file->ioqueued); + + if (file == SLIST_HEAD(&queue->buffer)) { + // To maintain order, we can't detach any files until they're + // added to the waiting/ready lists + bfs_assert(!(queue->flags & BFTW_QORDER)); + SLIST_POP(&queue->buffer); + } else if (file == SLIST_HEAD(&queue->waiting)) { + SLIST_POP(&queue->waiting); + } else { + bfs_bug("Detached file was not buffered or waiting"); + } + + if (async) { + file->ioqueued = true; + ++queue->ioqueued; + bftw_queue_rebalance(queue, true); + } +} + +/** Reattach a serviced file to the queue. */ +static void bftw_queue_attach(struct bftw_queue *queue, struct bftw_file *file, bool async) { + if (async) { + bfs_assert(file->ioqueued); + file->ioqueued = false; + --queue->ioqueued; + } else { + bfs_assert(!file->ioqueued); + } + + if (!(queue->flags & BFTW_QORDER)) { + SLIST_APPEND(&queue->ready, file, ready); + } +} + +/** Make a file ready immediately. */ +static void bftw_queue_skip(struct bftw_queue *queue, struct bftw_file *file) { + bftw_queue_detach(queue, file, false); + bftw_queue_attach(queue, file, false); +} + +/** Get the next waiting file. */ +static struct bftw_file *bftw_queue_waiting(const struct bftw_queue *queue) { + if (!(queue->flags & BFTW_QBUFFER)) { + return SLIST_HEAD(&queue->waiting); + } + + if (queue->flags & BFTW_QORDER) { + // Don't detach files until they're on the waiting/ready lists + return SLIST_HEAD(&queue->waiting); + } + + const struct bftw_list *prefix = &queue->waiting; + const struct bftw_list *suffix = &queue->buffer; + if (queue->flags & BFTW_QLIFO) { + prefix = &queue->buffer; + suffix = &queue->waiting; + } + + struct bftw_file *file = SLIST_HEAD(prefix); + if (!file) { + file = SLIST_HEAD(suffix); + } + return file; +} + +/** Get the next ready file. */ +static struct bftw_file *bftw_queue_ready(const struct bftw_queue *queue) { + return SLIST_HEAD(&queue->ready); +} + +/** Pop a file from the queue. */ +static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) { + // Don't pop until we've had a chance to sort the buffer + bfs_assert(SLIST_EMPTY(&queue->buffer)); + + struct bftw_file *file = SLIST_POP(&queue->ready, ready); + + if (!file || file == SLIST_HEAD(&queue->waiting)) { + // If no files are ready, try the waiting list. Or, if + // BFTW_QORDER is set, we may need to pop from both lists. + file = SLIST_POP(&queue->waiting); + } + + if (file) { + --queue->size; + } + + return file; +} + +/** + * A cache of open directories. + */ +struct bftw_cache { + /** The head of the LRU list. */ + struct bftw_file *head; + /** The tail of the LRU list. */ + struct bftw_file *tail; + /** The insertion target for the LRU list. */ + struct bftw_file *target; + /** The remaining capacity of the LRU list. */ + size_t capacity; + + /** bftw_file arena. */ + struct varena files; + + /** bfs_dir arena. */ + struct arena dirs; + /** Remaining bfs_dir capacity. */ + int dir_limit; + + /** bfs_stat arena. */ + struct arena stat_bufs; +}; + +/** Initialize a cache. */ +static void bftw_cache_init(struct bftw_cache *cache, size_t capacity) { + LIST_INIT(cache); + cache->target = NULL; + cache->capacity = capacity; + + VARENA_INIT(&cache->files, struct bftw_file, name); + + bfs_dir_arena(&cache->dirs); + + if (cache->capacity > 1024) { + cache->dir_limit = 1024; + } else { + cache->dir_limit = capacity - 1; + } + + ARENA_INIT(&cache->stat_bufs, struct bfs_stat); +} + +/** Allocate a directory. */ +static struct bfs_dir *bftw_allocdir(struct bftw_cache *cache, bool force) { + if (!force && cache->dir_limit <= 0) { + errno = ENOMEM; + return NULL; + } + + struct bfs_dir *dir = arena_alloc(&cache->dirs); + if (dir) { + --cache->dir_limit; + } + return dir; +} + +/** Free a directory. */ +static void bftw_freedir(struct bftw_cache *cache, struct bfs_dir *dir) { + ++cache->dir_limit; + arena_free(&cache->dirs, dir); +} + +/** Remove a bftw_file from the LRU list. */ +static void bftw_lru_remove(struct bftw_cache *cache, struct bftw_file *file) { + if (cache->target == file) { + cache->target = file->lru.prev; + } + + LIST_REMOVE(cache, file, lru); +} + +/** Remove a bftw_file from the cache. */ +static void bftw_cache_remove(struct bftw_cache *cache, struct bftw_file *file) { + bftw_lru_remove(cache, file); + ++cache->capacity; +} + +/** Close a bftw_file. */ +static void bftw_file_close(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + bfs_assert(file->pincount == 0); + + if (file->dir) { + bfs_closedir(file->dir); + bftw_freedir(cache, file->dir); + file->dir = NULL; + } else { + xclose(file->fd); + } + + file->fd = -1; + bftw_cache_remove(cache, file); +} + +/** Pop the least recently used directory from the cache. */ +static int bftw_cache_pop(struct bftw_cache *cache) { + struct bftw_file *file = cache->tail; + if (!file) { + return -1; + } + + bftw_file_close(cache, file); + return 0; +} + +/** Add a bftw_file to the LRU list. */ +static void bftw_lru_add(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + + LIST_INSERT(cache, cache->target, file, lru); + + // Prefer to keep the root paths open by keeping them at the head of the list + if (file->depth == 0) { + cache->target = file; + } +} + +/** Add a bftw_file to the cache. */ +static int bftw_cache_add(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + + if (cache->capacity == 0 && bftw_cache_pop(cache) != 0) { + bftw_file_close(cache, file); + errno = EMFILE; + return -1; + } + + bfs_assert(cache->capacity > 0); + --cache->capacity; + + bftw_lru_add(cache, file); + return 0; +} + +/** Pin a cache entry so it won't be closed. */ +static void bftw_cache_pin(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + + if (file->pincount++ == 0) { + bftw_lru_remove(cache, file); + } +} + +/** Unpin a cache entry. */ +static void bftw_cache_unpin(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + bfs_assert(file->pincount > 0); + + if (--file->pincount == 0) { + bftw_lru_add(cache, file); + } +} + +/** Compute the name offset of a child path. */ +static size_t bftw_child_nameoff(const struct bftw_file *parent) { + size_t ret = parent->nameoff + parent->namelen; + if (parent->name[parent->namelen - 1] != '/') { + ++ret; + } + return ret; +} + +/** Destroy a cache. */ +static void bftw_cache_destroy(struct bftw_cache *cache) { + bfs_assert(LIST_EMPTY(cache)); + bfs_assert(!cache->target); + + arena_destroy(&cache->stat_bufs); + arena_destroy(&cache->dirs); + varena_destroy(&cache->files); +} + +/** Create a new bftw_file. */ +static struct bftw_file *bftw_file_new(struct bftw_cache *cache, struct bftw_file *parent, const char *name) { + size_t namelen = strlen(name); + struct bftw_file *file = varena_alloc(&cache->files, namelen + 1); + if (!file) { + return NULL; + } + + file->parent = parent; + + if (parent) { + file->root = parent->root; + file->depth = parent->depth + 1; + file->nameoff = bftw_child_nameoff(parent); + ++parent->refcount; + } else { + file->root = file; + file->depth = 0; + file->nameoff = 0; + } + + SLIST_ITEM_INIT(file); + SLIST_ITEM_INIT(file, ready); + LIST_ITEM_INIT(file, lru); + + file->refcount = 1; + file->pincount = 0; + file->fd = -1; + file->ioqueued = false; + file->dir = NULL; + + file->type = BFS_UNKNOWN; + file->dev = -1; + file->ino = -1; + + bftw_stat_init(&file->stat_bufs, NULL, NULL); + + file->namelen = namelen; + memcpy(file->name, name, namelen + 1); + + return file; +} + +/** Associate an open directory with a bftw_file. */ +static void bftw_file_set_dir(struct bftw_cache *cache, struct bftw_file *file, struct bfs_dir *dir) { + bfs_assert(!file->dir); + file->dir = dir; + + if (file->fd >= 0) { + bfs_assert(file->fd == bfs_dirfd(dir)); + } else { + file->fd = bfs_dirfd(dir); + bftw_cache_add(cache, file); + } +} + +/** Free a file's cached stat() buffers. */ +static void bftw_stat_recycle(struct bftw_cache *cache, struct bftw_file *file) { + struct bftw_stat *bufs = &file->stat_bufs; + + struct bfs_stat *stat_buf = (struct bfs_stat *)bufs->stat_buf; + struct bfs_stat *lstat_buf = (struct bfs_stat *)bufs->lstat_buf; + if (stat_buf) { + arena_free(&cache->stat_bufs, stat_buf); + } else if (lstat_buf) { + arena_free(&cache->stat_bufs, lstat_buf); + } + + bftw_stat_init(bufs, NULL, NULL); +} + +/** Free a bftw_file. */ +static void bftw_file_free(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->refcount == 0); + + if (file->fd >= 0) { + bftw_file_close(cache, file); + } + + bftw_stat_recycle(cache, file); + + varena_free(&cache->files, file, file->namelen + 1); +} + +/** + * Holds the current state of the bftw() traversal. + */ +struct bftw_state { + /** The path(s) to start from. */ + const char **paths; + /** The number of starting paths. */ + size_t npaths; + /** bftw() callback. */ + bftw_callback *callback; + /** bftw() callback data. */ + void *ptr; + /** bftw() flags. */ + enum bftw_flags flags; + /** Search strategy. */ + enum bftw_strategy strategy; + /** The mount table. */ + const struct bfs_mtab *mtab; + /** bfs_opendir() flags. */ + enum bfs_dir_flags dir_flags; + + /** The appropriate errno value, if any. */ + int error; + + /** The cache of open directories. */ + struct bftw_cache cache; + + /** The async I/O queue. */ + struct ioq *ioq; + /** The number of I/O threads. */ + size_t nthreads; + + /** The queue of unpinned directories to unwrap. */ + struct bftw_list to_close; + /** The queue of files to visit. */ + struct bftw_queue fileq; + /** The queue of directories to open/read. */ + struct bftw_queue dirq; + + /** The current path. */ + dchar *path; + /** The current file. */ + struct bftw_file *file; + /** The previous file. */ + struct bftw_file *previous; + + /** The currently open directory. */ + struct bfs_dir *dir; + /** The current directory entry. */ + struct bfs_dirent *de; + /** Storage for the directory entry. */ + struct bfs_dirent de_storage; + /** Any error encountered while reading the directory. */ + int direrror; + + /** Extra data about the current file. */ + struct BFTW ftwbuf; + /** stat() buffer storage. */ + struct bfs_stat stat_buf; + /** lstat() buffer storage. */ + struct bfs_stat lstat_buf; +}; + +/** Check if we have to buffer files before visiting them. */ +static bool bftw_must_buffer(const struct bftw_state *state) { + if (state->flags & BFTW_SORT) { + // Have to buffer the files to sort them + return true; + } + + if (state->strategy == BFTW_DFS && state->nthreads == 0) { + // Without buffering, we would get a not-quite-depth-first + // ordering: + // + // a + // b + // a/c + // a/c/d + // b/e + // b/e/f + // + // This is okay for iterative deepening, since the caller only + // sees files at the target depth. We also deem it okay for + // parallel searches, since the order is unpredictable anyway. + return true; + } + + if ((state->flags & BFTW_STAT) && state->nthreads > 1) { + // We will be buffering every file anyway for ioq_stat() + return true; + } + + return false; +} + +/** Initialize the bftw() state. */ +static int bftw_state_init(struct bftw_state *state, const struct bftw_args *args) { + state->paths = args->paths; + state->npaths = args->npaths; + state->callback = args->callback; + state->ptr = args->ptr; + state->flags = args->flags; + state->strategy = args->strategy; + state->mtab = args->mtab; + state->dir_flags = 0; + state->error = 0; + + if (args->nopenfd < 2) { + errno = EMFILE; + return -1; + } + + size_t nopenfd = args->nopenfd; + size_t qdepth = 4096; + size_t nthreads = args->nthreads; + +#if BFS_USE_LIBURING + // io_uring uses one fd per ring, ioq uses one ring per thread + if (nthreads >= nopenfd - 1) { + nthreads = nopenfd - 2; + } + nopenfd -= nthreads; +#endif + + bftw_cache_init(&state->cache, nopenfd); + + if (nthreads > 0) { + state->ioq = ioq_create(qdepth, nthreads); + if (!state->ioq) { + return -1; + } + } else { + state->ioq = NULL; + } + state->nthreads = nthreads; + + if (bftw_must_buffer(state)) { + state->flags |= BFTW_BUFFER; + } + + if (state->flags & BFTW_WHITEOUTS) { + state->dir_flags |= BFS_DIR_WHITEOUTS; + } + + SLIST_INIT(&state->to_close); + + enum bftw_qflags qflags = 0; + if (state->strategy != BFTW_BFS) { + qflags |= BFTW_QBUFFER | BFTW_QLIFO; + } + if (state->flags & BFTW_BUFFER) { + qflags |= BFTW_QBUFFER; + } + if (state->flags & BFTW_SORT) { + qflags |= BFTW_QORDER; + } else if (nthreads == 1) { + qflags |= BFTW_QBALANCE; + } + bftw_queue_init(&state->fileq, qflags); + + if (state->strategy == BFTW_BFS || (state->flags & BFTW_BUFFER)) { + // In breadth-first mode, or if we're already buffering files, + // directories can be queued in FIFO order + qflags &= ~(BFTW_QBUFFER | BFTW_QLIFO); + } + bftw_queue_init(&state->dirq, qflags); + + state->path = NULL; + state->file = NULL; + state->previous = NULL; + + state->dir = NULL; + state->de = NULL; + state->direrror = 0; + + return 0; +} + +/** Queue a directory for unwrapping. */ +static void bftw_delayed_unwrap(struct bftw_state *state, struct bftw_file *file) { + bfs_assert(file->dir); + + if (!SLIST_ATTACHED(&state->to_close, file, ready)) { + SLIST_APPEND(&state->to_close, file, ready); + } +} + +/** Unpin a file's parent. */ +static void bftw_unpin_parent(struct bftw_state *state, struct bftw_file *file, bool unwrap) { + struct bftw_file *parent = file->parent; + if (!parent) { + return; + } + + bftw_cache_unpin(&state->cache, parent); + + if (unwrap && parent->dir && parent->pincount == 0) { + bftw_delayed_unwrap(state, parent); + } +} + +/** Pop a response from the I/O queue. */ +static int bftw_ioq_pop(struct bftw_state *state, bool block) { + struct bftw_cache *cache = &state->cache; + struct ioq *ioq = state->ioq; + if (!ioq) { + return -1; + } + + struct ioq_ent *ent = ioq_pop(ioq, block); + if (!ent) { + return -1; + } + + struct bftw_file *file = ent->ptr; + if (file) { + bftw_unpin_parent(state, file, true); + } + + enum ioq_op op = ent->op; + switch (op) { + case IOQ_CLOSE: + ++cache->capacity; + break; + + case IOQ_CLOSEDIR: + ++cache->capacity; + bftw_freedir(cache, ent->closedir.dir); + break; + + case IOQ_OPENDIR: + ++cache->capacity; + + if (ent->result >= 0) { + bftw_file_set_dir(cache, file, ent->opendir.dir); + } else { + bftw_freedir(cache, ent->opendir.dir); + } + + bftw_queue_attach(&state->dirq, file, true); + break; + + case IOQ_STAT: + if (ent->result >= 0) { + bftw_stat_cache(&file->stat_bufs, ent->stat.flags, ent->stat.buf, 0); + } else { + arena_free(&cache->stat_bufs, ent->stat.buf); + bftw_stat_cache(&file->stat_bufs, ent->stat.flags, NULL, -ent->result); + } + + bftw_queue_attach(&state->fileq, file, true); + break; + } + + ioq_free(ioq, ent); + return op; +} + +/** Try to reserve space in the I/O queue. */ +static int bftw_ioq_reserve(struct bftw_state *state) { + struct ioq *ioq = state->ioq; + if (!ioq) { + return -1; + } + + if (ioq_capacity(ioq) > 0) { + return 0; + } + + // With more than one background thread, it's faster to wait on + // background I/O than it is to do it on the main thread + bool block = state->nthreads > 1; + if (bftw_ioq_pop(state, block) < 0) { + return -1; + } + + return 0; +} + +/** Try to reserve space in the cache. */ +static int bftw_cache_reserve(struct bftw_state *state) { + struct bftw_cache *cache = &state->cache; + if (cache->capacity > 0) { + return 0; + } + + while (bftw_ioq_pop(state, true) >= 0) { + if (cache->capacity > 0) { + return 0; + } + } + + if (bftw_cache_pop(cache) != 0) { + errno = EMFILE; + return -1; + } + + bfs_assert(cache->capacity > 0); + return 0; +} + +/** Open a bftw_file relative to another one. */ +static int bftw_file_openat(struct bftw_state *state, struct bftw_file *file, struct bftw_file *base, const char *at_path) { + bfs_assert(file->fd < 0); + + struct bftw_cache *cache = &state->cache; + + int at_fd = AT_FDCWD; + if (base) { + bftw_cache_pin(cache, base); + at_fd = base->fd; + } + + int fd = -1; + if (bftw_cache_reserve(state) != 0) { + goto unpin; + } + + int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY; + fd = openat(at_fd, at_path, flags); + + if (fd < 0 && errno == EMFILE) { + if (bftw_cache_pop(cache) == 0) { + fd = openat(at_fd, at_path, flags); + } + cache->capacity = 1; + } + +unpin: + if (base) { + bftw_cache_unpin(cache, base); + } + + if (fd >= 0) { + file->fd = fd; + bftw_cache_add(cache, file); + } + + return fd; +} + +/** Open a bftw_file. */ +static int bftw_file_open(struct bftw_state *state, struct bftw_file *file, const char *path) { + // Find the nearest open ancestor + struct bftw_file *base = file; + do { + base = base->parent; + } while (base && base->fd < 0); + + const char *at_path = path; + if (base) { + at_path += bftw_child_nameoff(base); + } + + int fd = bftw_file_openat(state, file, base, at_path); + if (fd >= 0 || !errno_is_like(ENAMETOOLONG)) { + return fd; + } + + // Handle ENAMETOOLONG by manually traversing the path component-by-component + struct bftw_list parents; + SLIST_INIT(&parents); + + struct bftw_file *cur; + for (cur = file; cur != base; cur = cur->parent) { + SLIST_PREPEND(&parents, cur); + } + + while ((cur = SLIST_POP(&parents))) { + if (!cur->parent || cur->parent->fd >= 0) { + bftw_file_openat(state, cur, cur->parent, cur->name); + } + } + + return file->fd; +} + +/** Close a directory, asynchronously if possible. */ +static int bftw_ioq_closedir(struct bftw_state *state, struct bfs_dir *dir) { + if (bftw_ioq_reserve(state) == 0) { + if (ioq_closedir(state->ioq, dir, NULL) == 0) { + return 0; + } + } + + struct bftw_cache *cache = &state->cache; + int ret = bfs_closedir(dir); + bftw_freedir(cache, dir); + ++cache->capacity; + return ret; +} + +/** Close a file descriptor, asynchronously if possible. */ +static int bftw_ioq_close(struct bftw_state *state, int fd) { + if (bftw_ioq_reserve(state) == 0) { + if (ioq_close(state->ioq, fd, NULL) == 0) { + return 0; + } + } + + struct bftw_cache *cache = &state->cache; + int ret = xclose(fd); + ++cache->capacity; + return ret; +} + +/** Close a file, asynchronously if possible. */ +static int bftw_close(struct bftw_state *state, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + bfs_assert(file->pincount == 0); + + struct bfs_dir *dir = file->dir; + int fd = file->fd; + + bftw_lru_remove(&state->cache, file); + file->dir = NULL; + file->fd = -1; + + if (dir) { + return bftw_ioq_closedir(state, dir); + } else { + return bftw_ioq_close(state, fd); + } +} + +/** Free an open directory. */ +static int bftw_unwrapdir(struct bftw_state *state, struct bftw_file *file) { + struct bfs_dir *dir = file->dir; + if (!dir) { + return 0; + } + + struct bftw_cache *cache = &state->cache; + + // Try to keep an open fd if any children exist + bool reffed = file->refcount > 1; + // Keep the fd the same if it's pinned + bool pinned = file->pincount > 0; + +#if BFS_USE_UNWRAPDIR + if (reffed || pinned) { + bfs_unwrapdir(dir); + bftw_freedir(cache, dir); + file->dir = NULL; + return 0; + } +#else + if (pinned) { + return -1; + } +#endif + + if (!reffed) { + return bftw_close(state, file); + } + + // Make room for dup() + bftw_cache_pin(cache, file); + int ret = bftw_cache_reserve(state); + bftw_cache_unpin(cache, file); + if (ret != 0) { + return ret; + } + + int fd = dup_cloexec(file->fd); + if (fd < 0) { + return -1; + } + --cache->capacity; + + file->dir = NULL; + file->fd = fd; + return bftw_ioq_closedir(state, dir); +} + +/** Try to pin a file's parent. */ +static int bftw_pin_parent(struct bftw_state *state, struct bftw_file *file) { + struct bftw_file *parent = file->parent; + if (!parent) { + return AT_FDCWD; + } + + int fd = parent->fd; + if (fd < 0) { + bfs_static_assert((int)AT_FDCWD != -1); + return -1; + } + + bftw_cache_pin(&state->cache, parent); + return fd; +} + +/** Open a directory asynchronously. */ +static int bftw_ioq_opendir(struct bftw_state *state, struct bftw_file *file) { + struct bftw_cache *cache = &state->cache; + + if (bftw_ioq_reserve(state) != 0) { + goto fail; + } + + int dfd = bftw_pin_parent(state, file); + if (dfd < 0 && dfd != (int)AT_FDCWD) { + goto fail; + } + + if (bftw_cache_reserve(state) != 0) { + goto unpin; + } + + struct bfs_dir *dir = bftw_allocdir(cache, false); + if (!dir) { + goto unpin; + } + + if (ioq_opendir(state->ioq, dir, dfd, file->name, state->dir_flags, file) != 0) { + goto free; + } + + --cache->capacity; + return 0; + +free: + bftw_freedir(cache, dir); +unpin: + bftw_unpin_parent(state, file, false); +fail: + return -1; +} + +/** Open a batch of directories asynchronously. */ +static void bftw_ioq_opendirs(struct bftw_state *state) { + while (bftw_queue_balanced(&state->dirq)) { + struct bftw_file *dir = bftw_queue_waiting(&state->dirq); + if (!dir) { + break; + } + + if (bftw_ioq_opendir(state, dir) == 0) { + bftw_queue_detach(&state->dirq, dir, true); + } else { + break; + } + } +} + +/** Push a directory onto the queue. */ +static void bftw_push_dir(struct bftw_state *state, struct bftw_file *file) { + bfs_assert(file->type == BFS_DIR); + bftw_queue_push(&state->dirq, file); + bftw_ioq_opendirs(state); +} + +/** Pop a file from a queue, then activate it. */ +static bool bftw_pop(struct bftw_state *state, struct bftw_queue *queue) { + if (queue->size == 0) { + return false; + } + + while (!bftw_queue_ready(queue) && queue->ioqueued > 0) { + bool block = true; + if (bftw_queue_waiting(queue) && state->nthreads == 1) { + // With only one background thread, balance the work + // between it and the main thread + block = false; + } + + if (bftw_ioq_pop(state, block) < 0) { + break; + } + } + + struct bftw_file *file = bftw_queue_pop(queue); + if (!file) { + return false; + } + + while (file->ioqueued) { + bftw_ioq_pop(state, true); + } + + state->file = file; + return true; +} + +/** Pop a directory to read from the queue. */ +static bool bftw_pop_dir(struct bftw_state *state) { + bfs_assert(!state->file); + + if (state->flags & BFTW_SORT) { + // Keep strict breadth-first order when sorting + if (state->strategy == BFTW_BFS && bftw_queue_ready(&state->fileq)) { + return false; + } + } else if (!bftw_queue_ready(&state->dirq)) { + // Don't block if we have files ready to visit + if (bftw_queue_ready(&state->fileq)) { + return false; + } + } + + return bftw_pop(state, &state->dirq); +} + +/** Figure out bfs_stat() flags. */ +static enum bfs_stat_flags bftw_stat_flags(const struct bftw_state *state, size_t depth) { + enum bftw_flags mask = BFTW_FOLLOW_ALL; + if (depth == 0) { + mask |= BFTW_FOLLOW_ROOTS; + } + + if (state->flags & mask) { + return BFS_STAT_TRYFOLLOW; + } else { + return BFS_STAT_NOFOLLOW; + } +} + +/** Check if a stat() call is necessary. */ +static bool bftw_must_stat(const struct bftw_state *state, size_t depth, enum bfs_type type, const char *name) { + if (state->flags & BFTW_STAT) { + return true; + } + + switch (type) { + case BFS_UNKNOWN: + return true; + + case BFS_DIR: + return state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS); + + case BFS_LNK: + if (!(bftw_stat_flags(state, depth) & BFS_STAT_NOFOLLOW)) { + return true; + } + fallthru; + + default: +#if __linux__ + if (state->mtab && bfs_might_be_mount(state->mtab, name)) { + return true; + } +#endif + return false; + } +} + +/** stat() a file asynchronously. */ +static int bftw_ioq_stat(struct bftw_state *state, struct bftw_file *file) { + if (bftw_ioq_reserve(state) != 0) { + goto fail; + } + + int dfd = bftw_pin_parent(state, file); + if (dfd < 0 && dfd != (int)AT_FDCWD) { + goto fail; + } + + struct bftw_cache *cache = &state->cache; + struct bfs_stat *buf = arena_alloc(&cache->stat_bufs); + if (!buf) { + goto unpin; + } + + enum bfs_stat_flags flags = bftw_stat_flags(state, file->depth); + if (ioq_stat(state->ioq, dfd, file->name, flags, buf, file) != 0) { + goto free; + } + + return 0; + +free: + arena_free(&cache->stat_bufs, buf); +unpin: + bftw_unpin_parent(state, file, false); +fail: + return -1; +} + +/** Check if we should stat() a file asynchronously. */ +static bool bftw_should_ioq_stat(struct bftw_state *state, struct bftw_file *file) { + // To avoid surprising users too much, process the roots in order + if (file->depth == 0) { + return false; + } + +#ifdef S_IFWHT + // ioq_stat() does not do whiteout emulation like bftw_stat_impl() + if (file->type == BFS_WHT) { + return false; + } +#endif + + return bftw_must_stat(state, file->depth, file->type, file->name); +} + +/** Call stat() on files that need it. */ +static void bftw_stat_files(struct bftw_state *state) { + while (true) { + struct bftw_file *file = bftw_queue_waiting(&state->fileq); + if (!file) { + break; + } + + if (!bftw_should_ioq_stat(state, file)) { + bftw_queue_skip(&state->fileq, file); + continue; + } + + if (!bftw_queue_balanced(&state->fileq)) { + break; + } + + if (bftw_ioq_stat(state, file) == 0) { + bftw_queue_detach(&state->fileq, file, true); + } else { + break; + } + } +} + +/** Push a file onto the queue. */ +static void bftw_push_file(struct bftw_state *state, struct bftw_file *file) { + bftw_queue_push(&state->fileq, file); + bftw_stat_files(state); +} + +/** Pop a file to visit from the queue. */ +static bool bftw_pop_file(struct bftw_state *state) { + bfs_assert(!state->file); + return bftw_pop(state, &state->fileq); +} + +/** Build the path to the current file. */ +static int bftw_build_path(struct bftw_state *state, const char *name) { + const struct bftw_file *file = state->file; + + size_t pathlen = file ? file->nameoff + file->namelen : 0; + if (dstresize(&state->path, pathlen) != 0) { + state->error = errno; + return -1; + } + + // Try to find a common ancestor with the existing path + const struct bftw_file *ancestor = state->previous; + while (ancestor && ancestor->depth > file->depth) { + ancestor = ancestor->parent; + } + + // Build the path backwards + while (file && file != ancestor) { + if (file->nameoff > 0) { + state->path[file->nameoff - 1] = '/'; + } + memcpy(state->path + file->nameoff, file->name, file->namelen); + + if (ancestor && ancestor->depth == file->depth) { + ancestor = ancestor->parent; + } + file = file->parent; + } + + state->previous = state->file; + + if (name) { + if (pathlen > 0 && state->path[pathlen - 1] != '/') { + if (dstrapp(&state->path, '/') != 0) { + state->error = errno; + return -1; + } + } + if (dstrcat(&state->path, name) != 0) { + state->error = errno; + return -1; + } + } + + return 0; +} + +/** Open a bftw_file as a directory. */ +static struct bfs_dir *bftw_file_opendir(struct bftw_state *state, struct bftw_file *file, const char *path) { + int fd = bftw_file_open(state, file, path); + if (fd < 0) { + return NULL; + } + + struct bftw_cache *cache = &state->cache; + struct bfs_dir *dir = bftw_allocdir(cache, true); + if (!dir) { + return NULL; + } + + if (bfs_opendir(dir, fd, NULL, state->dir_flags) != 0) { + bftw_freedir(cache, dir); + return NULL; + } + + bftw_file_set_dir(cache, file, dir); + return dir; +} + +/** Open the current directory. */ +static int bftw_opendir(struct bftw_state *state) { + bfs_assert(!state->dir); + bfs_assert(!state->de); + + state->direrror = 0; + + struct bftw_file *file = state->file; + state->dir = file->dir; + if (state->dir) { + goto pin; + } + + if (bftw_build_path(state, NULL) != 0) { + return -1; + } + + bftw_queue_rebalance(&state->dirq, false); + + state->dir = bftw_file_opendir(state, file, state->path); + if (!state->dir) { + state->direrror = errno; + return 0; + } + +pin: + bftw_cache_pin(&state->cache, file); + return 0; +} + +/** Read an entry from the current directory. */ +static int bftw_readdir(struct bftw_state *state) { + if (!state->dir) { + return -1; + } + + int ret = bfs_readdir(state->dir, &state->de_storage); + if (ret > 0) { + state->de = &state->de_storage; + } else if (ret == 0) { + state->de = NULL; + } else { + state->de = NULL; + state->direrror = errno; + } + + return ret; +} + +/** Open a file if necessary. */ +static int bftw_ensure_open(struct bftw_state *state, struct bftw_file *file, const char *path) { + int ret = file->fd; + + if (ret < 0) { + char *copy = strndup(path, file->nameoff + file->namelen); + if (!copy) { + return -1; + } + + ret = bftw_file_open(state, file, copy); + free(copy); + } + + return ret; +} + +/** Initialize the buffers with data about the current path. */ +static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { + struct bftw_file *file = state->file; + const struct bfs_dirent *de = state->de; + + struct BFTW *ftwbuf = &state->ftwbuf; + ftwbuf->path = state->path; + ftwbuf->root = file ? file->root->name : ftwbuf->path; + ftwbuf->depth = 0; + ftwbuf->visit = visit; + ftwbuf->type = BFS_UNKNOWN; + ftwbuf->error = state->direrror; + ftwbuf->at_fd = AT_FDCWD; + ftwbuf->at_path = ftwbuf->path; + bftw_stat_init(&ftwbuf->stat_bufs, &state->stat_buf, &state->lstat_buf); + + struct bftw_file *parent = NULL; + if (de) { + parent = file; + ftwbuf->depth = file->depth + 1; + ftwbuf->type = de->type; + ftwbuf->nameoff = bftw_child_nameoff(file); + } else if (file) { + parent = file->parent; + ftwbuf->depth = file->depth; + ftwbuf->type = file->type; + ftwbuf->nameoff = file->nameoff; + bftw_stat_fill(&ftwbuf->stat_bufs, &file->stat_bufs); + } + + if (parent) { + // Try to ensure the immediate parent is open, to avoid ENAMETOOLONG + if (bftw_ensure_open(state, parent, state->path) >= 0) { + ftwbuf->at_fd = parent->fd; + ftwbuf->at_path += ftwbuf->nameoff; + } else { + ftwbuf->error = errno; + } + } + + if (ftwbuf->depth == 0) { + // Compute the name offset for root paths like "foo/bar" + ftwbuf->nameoff = xbaseoff(ftwbuf->path); + } + + ftwbuf->stat_flags = bftw_stat_flags(state, ftwbuf->depth); + + if (ftwbuf->error != 0) { + ftwbuf->type = BFS_ERROR; + return; + } + + const struct bfs_stat *statbuf = NULL; + if (bftw_must_stat(state, ftwbuf->depth, ftwbuf->type, ftwbuf->path + ftwbuf->nameoff)) { + statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (statbuf) { + ftwbuf->type = bfs_mode_to_type(statbuf->mode); + } else { + ftwbuf->type = BFS_ERROR; + ftwbuf->error = errno; + return; + } + } + + if (ftwbuf->type == BFS_DIR && (state->flags & BFTW_DETECT_CYCLES)) { + for (const struct bftw_file *ancestor = parent; ancestor; ancestor = ancestor->parent) { + if (ancestor->dev == statbuf->dev && ancestor->ino == statbuf->ino) { + ftwbuf->type = BFS_ERROR; + ftwbuf->error = ELOOP; + return; + } + } + } +} + +/** Check if the current file is a mount point. */ +static bool bftw_is_mount(struct bftw_state *state, const char *name) { + const struct bftw_file *file = state->file; + if (!file) { + return false; + } + + const struct bftw_file *parent = name ? file : file->parent; + if (!parent) { + return false; + } + + const struct BFTW *ftwbuf = &state->ftwbuf; + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + return statbuf && statbuf->dev != parent->dev; +} + +/** Check if bfs_stat() was called from the main thread. */ +static bool bftw_stat_was_sync(const struct bftw_state *state, const struct bfs_stat *buf) { + return buf == &state->stat_buf || buf == &state->lstat_buf; +} + +/** Invoke the callback. */ +static enum bftw_action bftw_call_back(struct bftw_state *state, const char *name, enum bftw_visit visit) { + if (visit == BFTW_POST && !(state->flags & BFTW_POST_ORDER)) { + return BFTW_PRUNE; + } + + if (bftw_build_path(state, name) != 0) { + return BFTW_STOP; + } + + const struct BFTW *ftwbuf = &state->ftwbuf; + bftw_init_ftwbuf(state, visit); + + // Never give the callback BFS_ERROR unless BFTW_RECOVER is specified + if (ftwbuf->type == BFS_ERROR && !(state->flags & BFTW_RECOVER)) { + state->error = ftwbuf->error; + return BFTW_STOP; + } + + enum bftw_action ret = BFTW_PRUNE; + if ((state->flags & BFTW_SKIP_MOUNTS) && bftw_is_mount(state, name)) { + goto done; + } + + ret = state->callback(ftwbuf, state->ptr); + switch (ret) { + case BFTW_CONTINUE: + if (visit != BFTW_PRE || ftwbuf->type != BFS_DIR) { + ret = BFTW_PRUNE; + } else if (state->flags & BFTW_PRUNE_MOUNTS) { + if (bftw_is_mount(state, name)) { + ret = BFTW_PRUNE; + } + } + break; + + case BFTW_PRUNE: + case BFTW_STOP: + break; + + default: + state->error = EINVAL; + return BFTW_STOP; + } + +done: + if (state->fileq.flags & BFTW_QBALANCE) { + // Detect any main-thread stat() calls to rebalance the queue + const struct bfs_stat *buf = bftw_cached_stat(ftwbuf, BFS_STAT_FOLLOW); + const struct bfs_stat *lbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW); + if (bftw_stat_was_sync(state, buf) || bftw_stat_was_sync(state, lbuf)) { + bftw_queue_rebalance(&state->fileq, false); + } + } + + return ret; +} + +/** + * Flags controlling which files get visited when done with a directory. + */ +enum bftw_gc_flags { + /** Don't visit anything. */ + BFTW_VISIT_NONE = 0, + /** Report directory errors. */ + BFTW_VISIT_ERROR = 1 << 0, + /** Visit the file itself. */ + BFTW_VISIT_FILE = 1 << 1, + /** Visit the file's ancestors. */ + BFTW_VISIT_PARENTS = 1 << 2, + /** Visit both the file and its ancestors. */ + BFTW_VISIT_ALL = BFTW_VISIT_ERROR | BFTW_VISIT_FILE | BFTW_VISIT_PARENTS, +}; + +/** Garbage collect the current file and its parents. */ +static int bftw_gc(struct bftw_state *state, enum bftw_gc_flags flags) { + int ret = 0; + + struct bftw_file *file = state->file; + if (file) { + if (state->dir) { + bftw_cache_unpin(&state->cache, file); + } + if (file->dir) { + bftw_delayed_unwrap(state, file); + } + } + state->dir = NULL; + state->de = NULL; + + if (state->direrror != 0) { + if (flags & BFTW_VISIT_ERROR) { + if (bftw_call_back(state, NULL, BFTW_PRE) == BFTW_STOP) { + ret = -1; + flags = 0; + } + } else { + state->error = state->direrror; + } + } + state->direrror = 0; + + while ((file = SLIST_POP(&state->to_close, ready))) { + bftw_unwrapdir(state, file); + } + + enum bftw_gc_flags visit = BFTW_VISIT_FILE; + while ((file = state->file)) { + if (--file->refcount > 0) { + state->file = NULL; + break; + } + + if (flags & visit) { + if (bftw_call_back(state, NULL, BFTW_POST) == BFTW_STOP) { + ret = -1; + flags = 0; + } + } + visit = BFTW_VISIT_PARENTS; + + struct bftw_file *parent = file->parent; + if (state->previous == file) { + state->previous = parent; + } + state->file = parent; + + if (file->fd >= 0) { + bftw_close(state, file); + } + bftw_file_free(&state->cache, file); + } + + return ret; +} + +/** Sort a bftw_list by filename. */ +static void bftw_list_sort(struct bftw_list *list) { + if (!list->head || !list->head->next) { + return; + } + + struct bftw_list left, right; + SLIST_INIT(&left); + SLIST_INIT(&right); + + // Split + for (struct bftw_file *hare = list->head; hare && (hare = hare->next); hare = hare->next) { + struct bftw_file *tortoise = SLIST_POP(list); + SLIST_APPEND(&left, tortoise); + } + SLIST_EXTEND(&right, list); + + // Recurse + bftw_list_sort(&left); + bftw_list_sort(&right); + + // Merge + while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) { + struct bftw_file *lf = left.head; + struct bftw_file *rf = right.head; + + if (strcoll(lf->name, rf->name) <= 0) { + SLIST_POP(&left); + SLIST_APPEND(list, lf); + } else { + SLIST_POP(&right); + SLIST_APPEND(list, rf); + } + } + SLIST_EXTEND(list, &left); + SLIST_EXTEND(list, &right); +} + +/** Flush all the queue buffers. */ +static void bftw_flush(struct bftw_state *state) { + if (state->flags & BFTW_SORT) { + bftw_list_sort(&state->fileq.buffer); + } + bftw_queue_flush(&state->fileq); + bftw_stat_files(state); + + bftw_queue_flush(&state->dirq); + bftw_ioq_opendirs(state); +} + +/** Close the current directory. */ +static int bftw_closedir(struct bftw_state *state) { + if (bftw_gc(state, BFTW_VISIT_ALL) != 0) { + return -1; + } + + bftw_flush(state); + return 0; +} + +/** Fill file identity information from an ftwbuf. */ +static void bftw_save_ftwbuf(struct bftw_file *file, const struct BFTW *ftwbuf) { + file->type = ftwbuf->type; + + const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, ftwbuf->stat_flags); + if (statbuf) { + file->dev = statbuf->dev; + file->ino = statbuf->ino; + } +} + +/** Check if we should buffer a file instead of visiting it. */ +static bool bftw_buffer_file(const struct bftw_state *state, const struct bftw_file *file, const char *name) { + if (!name) { + // Already buffered + return false; + } + + if (state->flags & BFTW_BUFFER) { + return true; + } + + // If we need to call stat(), and can do it async, buffer this file + if (!state->ioq) { + return false; + } + + if (!bftw_queue_balanced(&state->fileq)) { + // stat() would run synchronously anyway + return false; + } + + size_t depth = file ? file->depth + 1 : 1; + enum bfs_type type = state->de ? state->de->type : BFS_UNKNOWN; + return bftw_must_stat(state, depth, type, name); +} + +/** Visit and/or enqueue the current file. */ +static int bftw_visit(struct bftw_state *state, const char *name) { + struct bftw_cache *cache = &state->cache; + struct bftw_file *file = state->file; + + if (bftw_buffer_file(state, file, name)) { + file = bftw_file_new(cache, file, name); + if (!file) { + state->error = errno; + return -1; + } + + if (state->de) { + file->type = state->de->type; + } + + bftw_push_file(state, file); + return 0; + } + + switch (bftw_call_back(state, name, BFTW_PRE)) { + case BFTW_CONTINUE: + if (name) { + file = bftw_file_new(cache, state->file, name); + } else { + state->file = NULL; + } + if (!file) { + state->error = errno; + return -1; + } + + bftw_save_ftwbuf(file, &state->ftwbuf); + bftw_stat_recycle(cache, file); + bftw_push_dir(state, file); + return 0; + + case BFTW_PRUNE: + if (file && !name) { + return bftw_gc(state, BFTW_VISIT_PARENTS); + } else { + return 0; + } + + default: + if (file && !name) { + bftw_gc(state, BFTW_VISIT_NONE); + } + return -1; + } +} + +/** Drain a bftw_queue. */ +static void bftw_drain(struct bftw_state *state, struct bftw_queue *queue) { + bftw_queue_flush(queue); + + while (bftw_pop(state, queue)) { + bftw_gc(state, BFTW_VISIT_NONE); + } +} + +/** + * Dispose of the bftw() state. + * + * @return + * The bftw() return value. + */ +static int bftw_state_destroy(struct bftw_state *state) { + dstrfree(state->path); + + struct ioq *ioq = state->ioq; + if (ioq) { + ioq_cancel(ioq); + while (bftw_ioq_pop(state, true) >= 0); + state->ioq = NULL; + } + + bftw_gc(state, BFTW_VISIT_NONE); + bftw_drain(state, &state->dirq); + bftw_drain(state, &state->fileq); + + ioq_destroy(ioq); + + bftw_cache_destroy(&state->cache); + + errno = state->error; + return state->error ? -1 : 0; +} + +/** + * Shared implementation for all search strategies. + */ +static int bftw_impl(struct bftw_state *state) { + for (size_t i = 0; i < state->npaths; ++i) { + if (bftw_visit(state, state->paths[i]) != 0) { + return -1; + } + } + bftw_flush(state); + + while (true) { + while (bftw_pop_dir(state)) { + if (bftw_opendir(state) != 0) { + return -1; + } + while (bftw_readdir(state) > 0) { + if (bftw_visit(state, state->de->name) != 0) { + return -1; + } + } + if (bftw_closedir(state) != 0) { + return -1; + } + } + + if (!bftw_pop_file(state)) { + break; + } + if (bftw_visit(state, NULL) != 0) { + return -1; + } + bftw_flush(state); + } + + return 0; +} + +/** + * bftw() implementation for simple breadth-/depth-first search. + */ +static int bftw_walk(const struct bftw_args *args) { + struct bftw_state state; + if (bftw_state_init(&state, args) != 0) { + return -1; + } + + bftw_impl(&state); + return bftw_state_destroy(&state); +} + +/** + * Iterative deepening search state. + */ +struct bftw_ids_state { + /** Nested walk state. */ + struct bftw_state nested; + /** The wrapped callback. */ + bftw_callback *delegate; + /** The wrapped callback arguments. */ + void *ptr; + /** Which visit this search corresponds to. */ + enum bftw_visit visit; + /** Whether to override the bftw_visit. */ + bool force_visit; + /** The current minimum depth (inclusive). */ + size_t min_depth; + /** The current maximum depth (exclusive). */ + size_t max_depth; + /** The set of pruned paths. */ + struct trie pruned; + /** Whether the bottom has been found. */ + bool bottom; +}; + +/** Iterative deepening callback function. */ +static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr) { + struct bftw_ids_state *state = ptr; + + if (state->force_visit) { + struct BFTW *mutbuf = (struct BFTW *)ftwbuf; + mutbuf->visit = state->visit; + } + + if (ftwbuf->type == BFS_ERROR) { + if (ftwbuf->depth + 1 >= state->min_depth) { + return state->delegate(ftwbuf, state->ptr); + } else { + return BFTW_PRUNE; + } + } + + if (ftwbuf->depth < state->min_depth) { + if (trie_find_str(&state->pruned, ftwbuf->path)) { + return BFTW_PRUNE; + } else { + return BFTW_CONTINUE; + } + } else if (state->visit == BFTW_POST) { + if (trie_find_str(&state->pruned, ftwbuf->path)) { + return BFTW_PRUNE; + } + } + + enum bftw_action ret = BFTW_CONTINUE; + if (ftwbuf->visit == state->visit) { + ret = state->delegate(ftwbuf, state->ptr); + } + + switch (ret) { + case BFTW_CONTINUE: + if (ftwbuf->type == BFS_DIR && ftwbuf->depth + 1 >= state->max_depth) { + state->bottom = false; + ret = BFTW_PRUNE; + } + break; + + case BFTW_PRUNE: + if (ftwbuf->type == BFS_DIR) { + if (!trie_insert_str(&state->pruned, ftwbuf->path)) { + state->nested.error = errno; + ret = BFTW_STOP; + } + } + break; + + case BFTW_STOP: + break; + } + + return ret; +} + +/** Initialize iterative deepening state. */ +static int bftw_ids_init(struct bftw_ids_state *state, const struct bftw_args *args) { + state->delegate = args->callback; + state->ptr = args->ptr; + state->visit = BFTW_PRE; + state->force_visit = false; + state->min_depth = 0; + state->max_depth = 1; + trie_init(&state->pruned); + state->bottom = false; + + struct bftw_args ids_args = *args; + ids_args.callback = bftw_ids_callback; + ids_args.ptr = state; + ids_args.flags &= ~BFTW_POST_ORDER; + return bftw_state_init(&state->nested, &ids_args); +} + +/** Finish an iterative deepening search. */ +static int bftw_ids_destroy(struct bftw_ids_state *state) { + trie_destroy(&state->pruned); + return bftw_state_destroy(&state->nested); +} + +/** + * Iterative deepening bftw() wrapper. + */ +static int bftw_ids(const struct bftw_args *args) { + struct bftw_ids_state state; + if (bftw_ids_init(&state, args) != 0) { + return -1; + } + + while (!state.bottom) { + state.bottom = true; + + if (bftw_impl(&state.nested) != 0) { + goto done; + } + + ++state.min_depth; + ++state.max_depth; + } + + if (args->flags & BFTW_POST_ORDER) { + state.visit = BFTW_POST; + state.force_visit = true; + + while (state.min_depth > 0) { + --state.max_depth; + --state.min_depth; + + if (bftw_impl(&state.nested) != 0) { + goto done; + } + } + } + +done: + return bftw_ids_destroy(&state); +} + +/** + * Exponential deepening bftw() wrapper. + */ +static int bftw_eds(const struct bftw_args *args) { + struct bftw_ids_state state; + if (bftw_ids_init(&state, args) != 0) { + return -1; + } + + while (!state.bottom) { + state.bottom = true; + + if (bftw_impl(&state.nested) != 0) { + goto done; + } + + state.min_depth = state.max_depth; + state.max_depth *= 2; + } + + if (args->flags & BFTW_POST_ORDER) { + state.visit = BFTW_POST; + state.min_depth = 0; + state.nested.flags |= BFTW_POST_ORDER; + + bftw_impl(&state.nested); + } + +done: + return bftw_ids_destroy(&state); +} + +int bftw(const struct bftw_args *args) { + switch (args->strategy) { + case BFTW_BFS: + case BFTW_DFS: + return bftw_walk(args); + case BFTW_IDS: + return bftw_ids(args); + case BFTW_EDS: + return bftw_eds(args); + } + + errno = EINVAL; + return -1; +} diff --git a/src/bftw.h b/src/bftw.h new file mode 100644 index 0000000..8656ca7 --- /dev/null +++ b/src/bftw.h @@ -0,0 +1,218 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * A file-walking API based on nftw(). + */ + +#ifndef BFS_BFTW_H +#define BFS_BFTW_H + +#include "dir.h" +#include "stat.h" +#include <stddef.h> + +/** + * Possible visit occurrences. + */ +enum bftw_visit { + /** Pre-order visit. */ + BFTW_PRE, + /** Post-order visit. */ + BFTW_POST, +}; + +/** + * Cached bfs_stat() info for a file. + */ +struct bftw_stat { + /** The bfs_stat(BFS_STAT_FOLLOW) buffer. */ + const struct bfs_stat *stat_buf; + /** The bfs_stat(BFS_STAT_NOFOLLOW) buffer. */ + const struct bfs_stat *lstat_buf; + /** The cached bfs_stat(BFS_STAT_FOLLOW) error. */ + int stat_err; + /** The cached bfs_stat(BFS_STAT_NOFOLLOW) error. */ + int lstat_err; +}; + +/** + * Data about the current file for the bftw() callback. + */ +struct BFTW { + /** The path to the file. */ + const char *path; + /** The string offset of the filename. */ + size_t nameoff; + + /** The root path passed to bftw(). */ + const char *root; + /** The depth of this file in the traversal. */ + size_t depth; + /** Which visit this is. */ + enum bftw_visit visit; + + /** The file type. */ + enum bfs_type type; + /** The errno that occurred, if type == BFS_ERROR. */ + int error; + + /** A parent file descriptor for the *at() family of calls. */ + int at_fd; + /** The path relative to at_fd for the *at() family of calls. */ + const char *at_path; + + /** Flags for bfs_stat(). */ + enum bfs_stat_flags stat_flags; + /** Cached bfs_stat() info. */ + struct bftw_stat stat_bufs; +}; + +/** + * Get bfs_stat() info for a file encountered during bftw(), caching the result + * whenever possible. + * + * @param ftwbuf + * bftw() data for the file to stat. + * @param flags + * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags. + * @return + * A pointer to a bfs_stat() buffer, or NULL if the call failed. + */ +const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags); + +/** + * Get bfs_stat() info for a file encountered during bftw(), if it has already + * been cached. + * + * @param ftwbuf + * bftw() data for the file to stat. + * @param flags + * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags. + * @return + * A pointer to a bfs_stat() buffer, or NULL if no stat info is cached. + */ +const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags); + +/** + * Get the type of a file encountered during bftw(), with flags controlling + * whether to follow links. This function will avoid calling bfs_stat() if + * possible. + * + * @param ftwbuf + * bftw() data for the file to check. + * @param flags + * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags. + * @return + * The type of the file, or BFS_ERROR if an error occurred. + */ +enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags); + +/** + * Walk actions returned by the bftw() callback. + */ +enum bftw_action { + /** Keep walking. */ + BFTW_CONTINUE, + /** Skip this path's children. */ + BFTW_PRUNE, + /** Stop walking. */ + BFTW_STOP, +}; + +/** + * Callback function type for bftw(). + * + * @param ftwbuf + * Data about the current file. + * @param ptr + * The pointer passed to bftw(). + * @return + * An action value. + */ +typedef enum bftw_action bftw_callback(const struct BFTW *ftwbuf, void *ptr); + +/** + * Flags that control bftw() behavior. + */ +enum bftw_flags { + /** stat() each encountered file. */ + BFTW_STAT = 1 << 0, + /** Attempt to recover from encountered errors. */ + BFTW_RECOVER = 1 << 1, + /** Visit directories in post-order as well as pre-order. */ + BFTW_POST_ORDER = 1 << 2, + /** If the initial path is a symbolic link, follow it. */ + BFTW_FOLLOW_ROOTS = 1 << 3, + /** Follow all symbolic links. */ + BFTW_FOLLOW_ALL = 1 << 4, + /** Detect directory cycles. */ + BFTW_DETECT_CYCLES = 1 << 5, + /** Skip mount points and their descendents. */ + BFTW_SKIP_MOUNTS = 1 << 6, + /** Skip the descendents of mount points. */ + BFTW_PRUNE_MOUNTS = 1 << 7, + /** Sort directory entries before processing them. */ + BFTW_SORT = 1 << 8, + /** Read each directory into memory before processing its children. */ + BFTW_BUFFER = 1 << 9, + /** Include whiteouts in the search results. */ + BFTW_WHITEOUTS = 1 << 10, +}; + +/** + * Tree search strategies for bftw(). + */ +enum bftw_strategy { + /** Breadth-first search. */ + BFTW_BFS, + /** Depth-first search. */ + BFTW_DFS, + /** Iterative deepening search. */ + BFTW_IDS, + /** Exponential deepening search. */ + BFTW_EDS, +}; + +/** + * Structure for holding the arguments passed to bftw(). + */ +struct bftw_args { + /** The path(s) to start from. */ + const char **paths; + /** The number of starting paths. */ + size_t npaths; + + /** The callback to invoke. */ + bftw_callback *callback; + /** A pointer which is passed to the callback. */ + void *ptr; + + /** The maximum number of file descriptors to keep open. */ + int nopenfd; + /** The maximum number of threads to use. */ + int nthreads; + + /** Flags that control bftw() behaviour. */ + enum bftw_flags flags; + /** The search strategy to use. */ + enum bftw_strategy strategy; + + /** The parsed mount table, if available. */ + const struct bfs_mtab *mtab; +}; + +/** + * Breadth First Tree Walk (or Better File Tree Walk). + * + * Like ftw(3) and nftw(3), this function walks a directory tree recursively, + * and invokes a callback for each path it encounters. + * + * @param args + * The arguments that control the walk. + * @return + * 0 on success, or -1 on failure. + */ +int bftw(const struct bftw_args *args); + +#endif // BFS_BFTW_H diff --git a/src/bit.h b/src/bit.h new file mode 100644 index 0000000..17cfbcf --- /dev/null +++ b/src/bit.h @@ -0,0 +1,401 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Bits & bytes. + */ + +#ifndef BFS_BIT_H +#define BFS_BIT_H + +#include "prelude.h" +#include <limits.h> +#include <stdint.h> + +#if __STDC_VERSION__ >= C23 +# include <stdbit.h> +#endif + +// C23 polyfill: _WIDTH macros + +// The U*_MAX macros are of the form 2**n - 1, and we want to extract the n. +// One way would be *_WIDTH = popcount(*_MAX). Alternatively, we can use +// Hallvard B. Furuseth's technique from [1], which is shorter. +// +// [1]: https://groups.google.com/g/comp.lang.c/c/NfedEFBFJ0k + +// Let mask be of the form 2**m - 1, e.g. 0b111, and let n range over +// [0b0, 0b1, 0b11, 0b111, 0b1111, ...]. Then we have +// +// n % 0b111 +// == [0b0, 0b1, 0b11, 0b0, 0b1, 0b11, ...] +// n / (n % 0b111 + 1) +// == [0b0 (x3), 0b111 (x3), 0b111111 (x3), ...] +// n / (n % 0b111 + 1) / 0b111 +// == [0b0 (x3), 0b1 (x3), 0b1001 (x3), 0b1001001 (x3), ...] +// n / (n % 0b111 + 1) / 0b111 % 0b111 +// == [0 (x3), 1 (x3), 2 (x3), ...] +// == UMAX_CHUNK(n, 0b111) +#define UMAX_CHUNK(n, mask) (n / (n % mask + 1) / mask % mask) + +// 8 * UMAX_CHUNK(n, 255) gives [0 (x8), 8 (x8), 16 (x8), ...]. To that we add +// [0, 1, 2, ..., 6, 7, 0, 1, ...], which we get from a linear interpolation on +// n % 255: +// +// n % 255 +// == [0, 1, 3, 7, 15, 31, 63, 127, 0, ...] +// 86 / (n % 255 + 12) +// == [7, 6, 5, 4, 3, 2, 1, 0, 7, ...] +#define UMAX_INTERP(n) (7 - 86 / (n % 255 + 12)) + +#define UMAX_WIDTH(n) (8 * UMAX_CHUNK(n, 255) + UMAX_INTERP(n)) + +#ifndef CHAR_WIDTH +# define CHAR_WIDTH CHAR_BIT +#endif + +// See https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + +#ifndef USHRT_WIDTH +# ifdef __SHRT_WIDTH__ +# define USHRT_WIDTH __SHRT_WIDTH__ +# else +# define USHRT_WIDTH UMAX_WIDTH(USHRT_MAX) +# endif +#endif + +#ifndef UINT_WIDTH +# ifdef __INT_WIDTH__ +# define UINT_WIDTH __INT_WIDTH__ +# else +# define UINT_WIDTH UMAX_WIDTH(UINT_MAX) +# endif +#endif + +#ifndef ULONG_WIDTH +# ifdef __LONG_WIDTH__ +# define ULONG_WIDTH __LONG_WIDTH__ +# else +# define ULONG_WIDTH UMAX_WIDTH(ULONG_MAX) +# endif +#endif + +#ifndef ULLONG_WIDTH +# ifdef __LONG_LONG_WIDTH__ +# define ULLONG_WIDTH __LONG_LONG_WIDTH__ +# elif defined(__LLONG_WIDTH__) // Clang +# define ULLONG_WIDTH __LLONG_WIDTH__ +# else +# define ULLONG_WIDTH UMAX_WIDTH(ULLONG_MAX) +# endif +#endif + +#ifndef SIZE_WIDTH +# ifdef __SIZE_WIDTH__ +# define SIZE_WIDTH __SIZE_WIDTH__ +# else +# define SIZE_WIDTH UMAX_WIDTH(SIZE_MAX) +# endif +#endif + +#ifndef PTRDIFF_WIDTH +# ifdef __PTRDIFF_WIDTH__ +# define PTRDIFF_WIDTH __PTRDIFF_WIDTH__ +# else +# define PTRDIFF_WIDTH UMAX_WIDTH(PTRDIFF_MAX) +# endif +#endif + +#ifndef UINTPTR_WIDTH +# ifdef __INTPTR_WIDTH__ +# define UINTPTR_WIDTH __INTPTR_WIDTH__ +# else +# define UINTPTR_WIDTH UMAX_WIDTH(UINTPTR_MAX) +# endif +#endif + +#ifndef UINTMAX_WIDTH +# ifdef __INTMAX_WIDTH__ +# define UINTMAX_WIDTH __INTMAX_WIDTH__ +# else +# define UINTMAX_WIDTH UMAX_WIDTH(UINTMAX_MAX) +# endif +#endif + +#ifndef UCHAR_WIDTH +# define UCHAR_WIDTH CHAR_WIDTH +#endif +#ifndef SCHAR_WIDTH +# define SCHAR_WIDTH CHAR_WIDTH +#endif +#ifndef SHRT_WIDTH +# define SHRT_WIDTH USHRT_WIDTH +#endif +#ifndef INT_WIDTH +# define INT_WIDTH UINT_WIDTH +#endif +#ifndef LONG_WIDTH +# define LONG_WIDTH ULONG_WIDTH +#endif +#ifndef LLONG_WIDTH +# define LLONG_WIDTH ULLONG_WIDTH +#endif +#ifndef INTPTR_WIDTH +# define INTPTR_WIDTH UINTPTR_WIDTH +#endif +#ifndef INTMAX_WIDTH +# define INTMAX_WIDTH UINTMAX_WIDTH +#endif + +// C23 polyfill: byte order + +#ifdef __STDC_ENDIAN_LITTLE__ +# define ENDIAN_LITTLE __STDC_ENDIAN_LITTLE__ +#elif defined(__ORDER_LITTLE_ENDIAN__) +# define ENDIAN_LITTLE __ORDER_LITTLE_ENDIAN__ +#else +# define ENDIAN_LITTLE 1234 +#endif + +#ifdef __STDC_ENDIAN_BIG__ +# define ENDIAN_BIG __STDC_ENDIAN_BIG__ +#elif defined(__ORDER_BIG_ENDIAN__) +# define ENDIAN_BIG __ORDER_BIG_ENDIAN__ +#else +# define ENDIAN_BIG 4321 +#endif + +#ifdef __STDC_ENDIAN_NATIVE__ +# define ENDIAN_NATIVE __STDC_ENDIAN_NATIVE__ +#elif defined(__BYTE_ORDER__) +# define ENDIAN_NATIVE __BYTE_ORDER__ +#else +# define ENDIAN_NATIVE 0 +#endif + +#if __STDC_VERSION__ >= C23 +# define bswap_u16 stdc_memreverse8u16 +# define bswap_u32 stdc_memreverse8u32 +# define bswap_u64 stdc_memreverse8u64 +#elif __GNUC__ +# define bswap_u16 __builtin_bswap16 +# define bswap_u32 __builtin_bswap32 +# define bswap_u64 __builtin_bswap64 +#else + +static inline uint16_t bswap_u16(uint16_t n) { + return (n << 8) | (n >> 8); +} + +static inline uint32_t bswap_u32(uint32_t n) { + return ((uint32_t)bswap_u16(n) << 16) | bswap_u16(n >> 16); +} + +static inline uint64_t bswap_u64(uint64_t n) { + return ((uint64_t)bswap_u32(n) << 32) | bswap_u32(n >> 32); +} + +#endif + +static inline uint8_t bswap_u8(uint8_t n) { + return n; +} + +/** + * Reverse the byte order of an integer. + */ +#define bswap(n) \ + _Generic((n), \ + uint8_t: bswap_u8, \ + uint16_t: bswap_u16, \ + uint32_t: bswap_u32, \ + uint64_t: bswap_u64)(n) + +// Define an overload for each unsigned type +#define UINT_OVERLOADS(macro) \ + macro(unsigned char, _uc, UCHAR_WIDTH) \ + macro(unsigned short, _us, USHRT_WIDTH) \ + macro(unsigned int, _ui, UINT_WIDTH) \ + macro(unsigned long, _ul, ULONG_WIDTH) \ + macro(unsigned long long, _ull, ULLONG_WIDTH) + +// Select an overload based on an unsigned integer type +#define UINT_SELECT(n, name) \ + _Generic((n), \ + char: name##_uc, \ + signed char: name##_uc, \ + unsigned char: name##_uc, \ + signed short: name##_us, \ + unsigned short: name##_us, \ + signed int: name##_ui, \ + unsigned int: name##_ui, \ + signed long: name##_ul, \ + unsigned long: name##_ul, \ + signed long long: name##_ull, \ + unsigned long long: name##_ull) + +// C23 polyfill: bit utilities + +#if __STDC_VERSION__ >= C23 +# define count_ones stdc_count_ones +# define count_zeros stdc_count_zeros +# define rotate_left stdc_rotate_left +# define rotate_right stdc_rotate_right +# define leading_zeros stdc_leading_zeros +# define leading_ones stdc_leading_ones +# define trailing_zeros stdc_trailing_zeros +# define trailing_ones stdc_trailing_ones +# define first_leading_zero stdc_first_leading_zero +# define first_leading_one stdc_first_leading_one +# define first_trailing_zero stdc_first_trailing_zero +# define first_trailing_one stdc_first_trailing_one +# define has_single_bit stdc_has_single_bit +# define bit_width stdc_bit_width +# define bit_ceil stdc_bit_ceil +# define bit_floor stdc_bit_floor +#else + +#if __GNUC__ + +// GCC provides builtins for unsigned {int,long,long long}, so promote char/short +#define UINT_BUILTIN_uc(name) __builtin_##name +#define UINT_BUILTIN_us(name) __builtin_##name +#define UINT_BUILTIN_ui(name) __builtin_##name +#define UINT_BUILTIN_ul(name) __builtin_##name##l +#define UINT_BUILTIN_ull(name) __builtin_##name##ll +#define UINT_BUILTIN(name, suffix) UINT_BUILTIN##suffix(name) + +#define BUILTIN_WIDTH_uc UINT_WIDTH +#define BUILTIN_WIDTH_us UINT_WIDTH +#define BUILTIN_WIDTH_ui UINT_WIDTH +#define BUILTIN_WIDTH_ul ULONG_WIDTH +#define BUILTIN_WIDTH_ull ULLONG_WIDTH +#define BUILTIN_WIDTH(suffix) BUILTIN_WIDTH##suffix + +#define COUNT_ONES(type, suffix, width) \ + static inline int count_ones##suffix(type n) { \ + return UINT_BUILTIN(popcount, suffix)(n); \ + } + +#define LEADING_ZEROS(type, suffix, width) \ + static inline int leading_zeros##suffix(type n) { \ + return n \ + ? UINT_BUILTIN(clz, suffix)(n) - (BUILTIN_WIDTH(suffix) - width) \ + : width; \ + } + +#define TRAILING_ZEROS(type, suffix, width) \ + static inline int trailing_zeros##suffix(type n) { \ + return n ? UINT_BUILTIN(ctz, suffix)(n) : (int)width; \ + } + +#define FIRST_TRAILING_ONE(type, suffix, width) \ + static inline int first_trailing_one##suffix(type n) { \ + return UINT_BUILTIN(ffs, suffix)(n); \ + } + +#else // !__GNUC__ + +#define COUNT_ONES(type, suffix, width) \ + static inline int count_ones##suffix(type n) { \ + int ret; \ + for (ret = 0; n; ++ret) { \ + n &= n - 1; \ + } \ + return ret; \ + } + +#define LEADING_ZEROS(type, suffix, width) \ + static inline int leading_zeros##suffix(type n) { \ + type bit = (type)1 << (width - 1); \ + int ret; \ + for (ret = 0; bit && !(n & bit); ++ret, bit >>= 1); \ + return ret; \ + } + +#define TRAILING_ZEROS(type, suffix, width) \ + static inline int trailing_zeros##suffix(type n) { \ + type bit = 1; \ + int ret; \ + for (ret = 0; bit && !(n & bit); ++ret, bit <<= 1); \ + return ret; \ + } + +#define FIRST_TRAILING_ONE(type, suffix, width) \ + static inline int first_trailing_one##suffix(type n) { \ + return n ? trailing_zeros##suffix(n) + 1 : 0; \ + } + +#endif // !__GNUC__ + +UINT_OVERLOADS(COUNT_ONES) +UINT_OVERLOADS(LEADING_ZEROS) +UINT_OVERLOADS(TRAILING_ZEROS) +UINT_OVERLOADS(FIRST_TRAILING_ONE) + +#define ROTATE_LEFT(type, suffix, width) \ + static inline type rotate_left##suffix(type n, int c) { \ + return (n << c) | (n >> ((width - c) % width)); \ + } + +#define ROTATE_RIGHT(type, suffix, width) \ + static inline type rotate_right##suffix(type n, int c) { \ + return (n >> c) | (n << ((width - c) % width)); \ + } + +#define FIRST_LEADING_ONE(type, suffix, width) \ + static inline int first_leading_one##suffix(type n) { \ + return width - leading_zeros##suffix(n); \ + } + +#define HAS_SINGLE_BIT(type, suffix, width) \ + static inline bool has_single_bit##suffix(type n) { \ + /** Branchless n && !(n & (n - 1)) */ \ + return n - 1 < (n ^ (n - 1)); \ + } + +UINT_OVERLOADS(ROTATE_LEFT) +UINT_OVERLOADS(ROTATE_RIGHT) +UINT_OVERLOADS(FIRST_LEADING_ONE) +UINT_OVERLOADS(HAS_SINGLE_BIT) + +#define count_ones(n) UINT_SELECT(n, count_ones)(n) +#define count_zeros(n) UINT_SELECT(n, count_ones)(~(n)) + +#define rotate_left(n, c) UINT_SELECT(n, rotate_left)(n, c) +#define rotate_right(n, c) UINT_SELECT(n, rotate_right)(n, c) + +#define leading_zeros(n) UINT_SELECT(n, leading_zeros)(n) +#define leading_ones(n) UINT_SELECT(n, leading_zeros)(~(n)) + +#define trailing_zeros(n) UINT_SELECT(n, trailing_zeros)(n) +#define trailing_ones(n) UINT_SELECT(n, trailing_zeros)(~(n)) + +#define first_leading_one(n) UINT_SELECT(n, first_leading_one)(n) +#define first_leading_zero(n) UINT_SELECT(n, first_leading_one)(~(n)) + +#define first_trailing_one(n) UINT_SELECT(n, first_trailing_one)(n) +#define first_trailing_zero(n) UINT_SELECT(n, first_trailing_one)(~(n)) + +#define has_single_bit(n) UINT_SELECT(n, has_single_bit)(n) + +#define BIT_FLOOR(type, suffix, width) \ + static inline type bit_floor##suffix(type n) { \ + return n ? (type)1 << (first_leading_one##suffix(n) - 1) : 0; \ + } + +#define BIT_CEIL(type, suffix, width) \ + static inline type bit_ceil##suffix(type n) { \ + return (type)1 << first_leading_one##suffix(n - !!n); \ + } + +UINT_OVERLOADS(BIT_FLOOR) +UINT_OVERLOADS(BIT_CEIL) + +#define bit_width(n) first_leading_one(n) +#define bit_floor(n) UINT_SELECT(n, bit_floor)(n) +#define bit_ceil(n) UINT_SELECT(n, bit_ceil)(n) + +#endif // __STDC_VERSION__ < C23 + +#endif // BFS_BIT_H diff --git a/src/color.c b/src/color.c new file mode 100644 index 0000000..a0e553f --- /dev/null +++ b/src/color.c @@ -0,0 +1,1418 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "color.h" +#include "alloc.h" +#include "bfstd.h" +#include "bftw.h" +#include "diag.h" +#include "dir.h" +#include "dstring.h" +#include "expr.h" +#include "fsade.h" +#include "stat.h" +#include "trie.h" +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +/** + * An escape sequence, which may contain embedded NUL bytes. + */ +struct esc_seq { + /** The length of the escape sequence. */ + size_t len; + /** The escape sequence iteself, without a terminating NUL. */ + char seq[]; +}; + +/** + * A colored file extension, like `*.tar=01;31`. + */ +struct ext_color { + /** Priority, to disambiguate case-sensitive and insensitive matches. */ + size_t priority; + /** The escape sequence associated with this extension. */ + struct esc_seq *esc; + /** The length of the extension to match. */ + size_t len; + /** Whether the comparison should be case-sensitive. */ + bool case_sensitive; + /** The extension to match (NUL-terminated). */ + char ext[]; +}; + +struct colors { + /** esc_seq allocator. */ + struct varena esc_arena; + /** ext_color allocator. */ + struct varena ext_arena; + + // Known dircolors keys + + struct esc_seq *reset; + struct esc_seq *leftcode; + struct esc_seq *rightcode; + struct esc_seq *endcode; + struct esc_seq *clear_to_eol; + + struct esc_seq *bold; + struct esc_seq *gray; + struct esc_seq *red; + struct esc_seq *green; + struct esc_seq *yellow; + struct esc_seq *blue; + struct esc_seq *magenta; + struct esc_seq *cyan; + struct esc_seq *white; + + struct esc_seq *warning; + struct esc_seq *error; + + struct esc_seq *normal; + + struct esc_seq *file; + struct esc_seq *multi_hard; + struct esc_seq *executable; + struct esc_seq *capable; + struct esc_seq *setgid; + struct esc_seq *setuid; + + struct esc_seq *directory; + struct esc_seq *sticky; + struct esc_seq *other_writable; + struct esc_seq *sticky_other_writable; + + struct esc_seq *link; + struct esc_seq *orphan; + struct esc_seq *missing; + bool link_as_target; + + struct esc_seq *blockdev; + struct esc_seq *chardev; + struct esc_seq *door; + struct esc_seq *pipe; + struct esc_seq *socket; + + /** A mapping from color names (fi, di, ln, etc.) to struct fields. */ + struct trie names; + + /** Number of extensions. */ + size_t ext_count; + /** Longest extension. */ + size_t ext_len; + /** Case-sensitive extension trie. */ + struct trie ext_trie; + /** Case-insensitive extension trie. */ + struct trie iext_trie; +}; + +/** Allocate an escape sequence. */ +static struct esc_seq *new_esc(struct colors *colors, const char *seq, size_t len) { + struct esc_seq *esc = varena_alloc(&colors->esc_arena, len); + if (esc) { + esc->len = len; + memcpy(esc->seq, seq, len); + } + return esc; +} + +/** Free an escape sequence. */ +static void free_esc(struct colors *colors, struct esc_seq *seq) { + varena_free(&colors->esc_arena, seq, seq->len); +} + +/** Initialize a color in the table. */ +static int init_esc(struct colors *colors, const char *name, const char *value, struct esc_seq **field) { + struct esc_seq *esc = NULL; + if (value) { + esc = new_esc(colors, value, strlen(value)); + if (!esc) { + return -1; + } + } + + *field = esc; + + struct trie_leaf *leaf = trie_insert_str(&colors->names, name); + if (!leaf) { + return -1; + } + + leaf->value = field; + return 0; +} + +/** Check if an escape sequence is equal to a string. */ +static bool esc_eq(const struct esc_seq *esc, const char *str, size_t len) { + return esc->len == len && memcmp(esc->seq, str, len) == 0; +} + +/** Get an escape sequence from the table. */ +static struct esc_seq **get_esc(const struct colors *colors, const char *name) { + const struct trie_leaf *leaf = trie_find_str(&colors->names, name); + return leaf ? leaf->value : NULL; +} + +/** Append an escape sequence to a string. */ +static int cat_esc(dchar **dstr, const struct esc_seq *seq) { + return dstrxcat(dstr, seq->seq, seq->len); +} + +/** Set a named escape sequence. */ +static int set_esc(struct colors *colors, const char *name, dchar *value) { + struct esc_seq **field = get_esc(colors, name); + if (!field) { + return 0; + } + + if (*field) { + free_esc(colors, *field); + *field = NULL; + } + + if (value) { + *field = new_esc(colors, value, dstrlen(value)); + if (!*field) { + return -1; + } + } + + return 0; +} + +/** Reverse a string, to turn suffix matches into prefix matches. */ +static void ext_reverse(char *ext, size_t len) { + for (size_t i = 0, j = len - 1; len && i < j; ++i, --j) { + char c = ext[i]; + ext[i] = ext[j]; + ext[j] = c; + } +} + +/** Convert a string to lowercase for case-insensitive matching. */ +static void ext_tolower(char *ext, size_t len) { + for (size_t i = 0; i < len; ++i) { + char c = ext[i]; + + // What's internationalization? Doesn't matter, this is what + // GNU ls does. Luckily, since there's no standard C way to + // casefold. Not using tolower() here since it respects the + // current locale, which GNU ls doesn't do. + if (c >= 'A' && c <= 'Z') { + c += 'a' - 'A'; + } + + ext[i] = c; + } +} + +/** + * The "smart case" algorithm. + * + * @param ext + * The current extension being added. + * @param prev + * The previous case-sensitive match, if any, for the same extension. + * @param iprev + * The previous case-insensitive match, if any, for the same extension. + * @return + * Whether this extension should become case-sensitive. + */ +static bool ext_case_sensitive(struct ext_color *ext, struct ext_color *prev, struct ext_color *iprev) { + // This is the first case-insensitive occurrence of this extension, e.g. + // + // *.gz=01;31:*.tar.gz=01;33 + if (!iprev) { + bfs_assert(!prev); + return false; + } + + // If the last version of this extension is already case-sensitive, + // this one should be too, e.g. + // + // *.tar.gz=01;31:*.TAR.GZ=01;32:*.TAR.GZ=01;33 + if (iprev->case_sensitive) { + return true; + } + + // The case matches the last occurrence exactly, e.g. + // + // *.tar.gz=01;31:*.tar.gz=01;33 + if (iprev == prev) { + return false; + } + + // Different case, but same value, e.g. + // + // *.tar.gz=01;31:*.TAR.GZ=01;31 + if (esc_eq(iprev->esc, ext->esc->seq, ext->esc->len)) { + return false; + } + + // Different case, different value, e.g. + // + // *.tar.gz=01;31:*.TAR.GZ=01;33 + return true; +} + +/** Set the color for an extension. */ +static int set_ext(struct colors *colors, char *key, char *value) { + size_t len = dstrlen(key); + struct ext_color *ext = varena_alloc(&colors->ext_arena, len + 1); + if (!ext) { + return -1; + } + + ext->priority = colors->ext_count++; + ext->len = len; + ext->case_sensitive = false; + ext->esc = new_esc(colors, value, dstrlen(value)); + if (!ext->esc) { + goto fail; + } + + key = memcpy(ext->ext, key, len + 1); + + // Reverse the extension (`*.y.x` -> `x.y.*`) so we can use trie_find_prefix() + ext_reverse(key, len); + + // Find any pre-existing exact match + struct ext_color *prev = NULL; + struct trie_leaf *leaf = trie_find_str(&colors->ext_trie, key); + if (leaf) { + prev = leaf->value; + trie_remove(&colors->ext_trie, leaf); + } + + // A later *.x should override any earlier *.x, *.y.x, etc. + while ((leaf = trie_find_postfix(&colors->ext_trie, key))) { + trie_remove(&colors->ext_trie, leaf); + } + + // Insert the extension into the case-sensitive trie + leaf = trie_insert_str(&colors->ext_trie, key); + if (!leaf) { + goto fail; + } + leaf->value = ext; + + // "Smart case": if the same extension is given with two different + // capitalizations (e.g. `*.y.x=31:*.Y.Z=32:`), make it case-sensitive + ext_tolower(key, len); + leaf = trie_insert_str(&colors->iext_trie, key); + if (!leaf) { + goto fail; + } + + struct ext_color *iprev = leaf->value; + if (ext_case_sensitive(ext, prev, iprev)) { + iprev->case_sensitive = true; + ext->case_sensitive = true; + } + leaf->value = ext; + + return 0; + +fail: + if (ext->esc) { + free_esc(colors, ext->esc); + } + varena_free(&colors->ext_arena, ext, len + 1); + return -1; +} + +/** Rebuild the case-insensitive trie after all extensions have been parsed. */ +static int build_iext_trie(struct colors *colors) { + trie_clear(&colors->iext_trie); + + for_trie (leaf, &colors->ext_trie) { + size_t len = leaf->length - 1; + if (colors->ext_len < len) { + colors->ext_len = len; + } + + struct ext_color *ext = leaf->value; + if (ext->case_sensitive) { + continue; + } + + // set_ext() already reversed and lowercased the extension + struct trie_leaf *ileaf; + while ((ileaf = trie_find_postfix(&colors->iext_trie, ext->ext))) { + trie_remove(&colors->iext_trie, ileaf); + } + + ileaf = trie_insert_str(&colors->iext_trie, ext->ext); + if (!ileaf) { + return -1; + } + ileaf->value = ext; + } + + return 0; +} + +/** + * Find a color by an extension. + */ +static const struct esc_seq *get_ext(const struct colors *colors, const char *filename) { + size_t ext_len = colors->ext_len; + size_t name_len = strlen(filename); + if (name_len < ext_len) { + ext_len = name_len; + } + const char *suffix = filename + name_len - ext_len; + + char buf[256]; + char *copy; + if (ext_len < sizeof(buf)) { + copy = memcpy(buf, suffix, ext_len + 1); + } else { + copy = strndup(suffix, ext_len); + if (!copy) { + return NULL; + } + } + + ext_reverse(copy, ext_len); + const struct trie_leaf *leaf = trie_find_prefix(&colors->ext_trie, copy); + const struct ext_color *ext = leaf ? leaf->value : NULL; + + ext_tolower(copy, ext_len); + const struct trie_leaf *ileaf = trie_find_prefix(&colors->iext_trie, copy); + const struct ext_color *iext = ileaf ? ileaf->value : NULL; + + if (iext && (!ext || ext->priority < iext->priority)) { + ext = iext; + } + + if (copy != buf) { + free(copy); + } + + return ext ? ext->esc : NULL; +} + +/** + * Parse a chunk of $LS_COLORS that may have escape sequences. The supported + * escapes are: + * + * \a, \b, \f, \n, \r, \t, \v: + * As in C + * \e: + * ESC (\033) + * \?: + * DEL (\177) + * \_: + * ' ' (space) + * \NNN: + * Octal + * \xNN: + * Hex + * ^C: + * Control character. + * + * See man dir_colors. + * + * @param str + * A dstring to fill with the unescaped chunk. + * @param value + * The value to parse. + * @param end + * The character that marks the end of the chunk. + * @param[out] next + * Will be set to the next chunk. + * @return + * 0 on success, -1 on failure. + */ +static int unescape(char **str, const char *value, char end, const char **next) { + *next = NULL; + + if (!value) { + errno = EINVAL; + return -1; + } + + if (dstresize(str, 0) != 0) { + return -1; + } + + const char *i; + for (i = value; *i && *i != end; ++i) { + unsigned char c = 0; + + switch (*i) { + case '\\': + switch (*++i) { + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'e': + c = '\033'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case '?': + c = '\177'; + break; + case '_': + c = ' '; + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + while (i[1] >= '0' && i[1] <= '7') { + c <<= 3; + c |= *i++ - '0'; + } + c <<= 3; + c |= *i - '0'; + break; + + case 'X': + case 'x': + while (true) { + if (i[1] >= '0' && i[1] <= '9') { + c <<= 4; + c |= i[1] - '0'; + } else if (i[1] >= 'A' && i[1] <= 'F') { + c <<= 4; + c |= i[1] - 'A' + 0xA; + } else if (i[1] >= 'a' && i[1] <= 'f') { + c <<= 4; + c |= i[1] - 'a' + 0xA; + } else { + break; + } + ++i; + } + break; + + case '\0': + errno = EINVAL; + return -1; + + default: + c = *i; + break; + } + break; + + case '^': + switch (*++i) { + case '?': + c = '\177'; + break; + case '\0': + errno = EINVAL; + return -1; + default: + // CTRL masks bits 6 and 7 + c = *i & 0x1F; + break; + } + break; + + default: + c = *i; + break; + } + + if (dstrapp(str, c) != 0) { + return -1; + } + } + + if (*i) { + *next = i + 1; + } + + return 0; +} + +/** Parse the GNU $LS_COLORS format. */ +static int parse_gnu_ls_colors(struct colors *colors, const char *ls_colors) { + int ret = -1; + dchar *key = NULL; + dchar *value = NULL; + + for (const char *chunk = ls_colors, *next; chunk; chunk = next) { + if (chunk[0] == '*') { + if (unescape(&key, chunk + 1, '=', &next) != 0) { + goto fail; + } + if (unescape(&value, next, ':', &next) != 0) { + goto fail; + } + if (set_ext(colors, key, value) != 0) { + goto fail; + } + } else { + const char *equals = strchr(chunk, '='); + if (!equals) { + break; + } + + if (dstrncpy(&key, chunk, equals - chunk) != 0) { + goto fail; + } + if (unescape(&value, equals + 1, ':', &next) != 0) { + goto fail; + } + + // All-zero values should be treated like NULL, to fall + // back on any other relevant coloring for that file + dchar *esc = value; + if (strspn(value, "0") == dstrlen(value) + && strcmp(key, "rs") != 0 + && strcmp(key, "lc") != 0 + && strcmp(key, "rc") != 0 + && strcmp(key, "ec") != 0) { + esc = NULL; + } + + if (set_esc(colors, key, esc) != 0) { + goto fail; + } + } + } + + ret = 0; +fail: + dstrfree(value); + dstrfree(key); + return ret; +} + +struct colors *parse_colors(void) { + struct colors *colors = ALLOC(struct colors); + if (!colors) { + return NULL; + } + + VARENA_INIT(&colors->esc_arena, struct esc_seq, seq); + VARENA_INIT(&colors->ext_arena, struct ext_color, ext); + trie_init(&colors->names); + colors->ext_count = 0; + colors->ext_len = 0; + trie_init(&colors->ext_trie); + trie_init(&colors->iext_trie); + + bool fail = false; + + // From man console_codes + + fail = fail || init_esc(colors, "rs", "0", &colors->reset); + fail = fail || init_esc(colors, "lc", "\033[", &colors->leftcode); + fail = fail || init_esc(colors, "rc", "m", &colors->rightcode); + fail = fail || init_esc(colors, "ec", NULL, &colors->endcode); + fail = fail || init_esc(colors, "cl", "\033[K", &colors->clear_to_eol); + + fail = fail || init_esc(colors, "bld", "01;39", &colors->bold); + fail = fail || init_esc(colors, "gry", "01;30", &colors->gray); + fail = fail || init_esc(colors, "red", "01;31", &colors->red); + fail = fail || init_esc(colors, "grn", "01;32", &colors->green); + fail = fail || init_esc(colors, "ylw", "01;33", &colors->yellow); + fail = fail || init_esc(colors, "blu", "01;34", &colors->blue); + fail = fail || init_esc(colors, "mag", "01;35", &colors->magenta); + fail = fail || init_esc(colors, "cyn", "01;36", &colors->cyan); + fail = fail || init_esc(colors, "wht", "01;37", &colors->white); + + fail = fail || init_esc(colors, "wrn", "01;33", &colors->warning); + fail = fail || init_esc(colors, "err", "01;31", &colors->error); + + // Defaults from man dir_colors + // "" means fall back to ->normal + + fail = fail || init_esc(colors, "no", NULL, &colors->normal); + + fail = fail || init_esc(colors, "fi", "", &colors->file); + fail = fail || init_esc(colors, "mh", NULL, &colors->multi_hard); + fail = fail || init_esc(colors, "ex", "01;32", &colors->executable); + fail = fail || init_esc(colors, "ca", NULL, &colors->capable); + fail = fail || init_esc(colors, "sg", "30;43", &colors->setgid); + fail = fail || init_esc(colors, "su", "37;41", &colors->setuid); + + fail = fail || init_esc(colors, "di", "01;34", &colors->directory); + fail = fail || init_esc(colors, "st", "37;44", &colors->sticky); + fail = fail || init_esc(colors, "ow", "34;42", &colors->other_writable); + fail = fail || init_esc(colors, "tw", "30;42", &colors->sticky_other_writable); + + fail = fail || init_esc(colors, "ln", "01;36", &colors->link); + fail = fail || init_esc(colors, "or", NULL, &colors->orphan); + fail = fail || init_esc(colors, "mi", NULL, &colors->missing); + colors->link_as_target = false; + + fail = fail || init_esc(colors, "bd", "01;33", &colors->blockdev); + fail = fail || init_esc(colors, "cd", "01;33", &colors->chardev); + fail = fail || init_esc(colors, "do", "01;35", &colors->door); + fail = fail || init_esc(colors, "pi", "33", &colors->pipe); + fail = fail || init_esc(colors, "so", "01;35", &colors->socket); + + if (fail) { + goto fail; + } + + if (parse_gnu_ls_colors(colors, getenv("LS_COLORS")) != 0) { + goto fail; + } + if (parse_gnu_ls_colors(colors, getenv("BFS_COLORS")) != 0) { + goto fail; + } + if (build_iext_trie(colors) != 0) { + goto fail; + } + + if (colors->link && esc_eq(colors->link, "target", strlen("target"))) { + colors->link_as_target = true; + colors->link->len = 0; + } + + // Pre-compute the reset escape sequence + if (!colors->endcode) { + dchar *ec = dstralloc(0); + if (!ec + || cat_esc(&ec, colors->leftcode) != 0 + || cat_esc(&ec, colors->reset) != 0 + || cat_esc(&ec, colors->rightcode) != 0 + || set_esc(colors, "ec", ec) != 0) { + dstrfree(ec); + goto fail; + } + dstrfree(ec); + } + + return colors; + +fail: + free_colors(colors); + return NULL; +} + +void free_colors(struct colors *colors) { + if (!colors) { + return; + } + + trie_destroy(&colors->iext_trie); + trie_destroy(&colors->ext_trie); + trie_destroy(&colors->names); + varena_destroy(&colors->ext_arena); + varena_destroy(&colors->esc_arena); + + free(colors); +} + +CFILE *cfwrap(FILE *file, const struct colors *colors, bool close) { + CFILE *cfile = ALLOC(CFILE); + if (!cfile) { + return NULL; + } + + cfile->buffer = dstralloc(128); + if (!cfile->buffer) { + free(cfile); + return NULL; + } + + cfile->file = file; + cfile->fd = fileno(file); + cfile->need_reset = false; + cfile->close = close; + + if (isatty(cfile->fd)) { + cfile->colors = colors; + } else { + cfile->colors = NULL; + } + + return cfile; +} + +int cfclose(CFILE *cfile) { + int ret = 0; + + if (cfile) { + dstrfree(cfile->buffer); + + if (cfile->close) { + ret = fclose(cfile->file); + } + + free(cfile); + } + + return ret; +} + +/** Check if a symlink is broken. */ +static bool is_link_broken(const struct BFTW *ftwbuf) { + if (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW) { + return xfaccessat(ftwbuf->at_fd, ftwbuf->at_path, F_OK) != 0; + } else { + return true; + } +} + +bool colors_need_stat(const struct colors *colors) { + return colors->setuid || colors->setgid || colors->executable || colors->multi_hard + || colors->sticky_other_writable || colors->other_writable || colors->sticky; +} + +/** Get the color for a file. */ +static const struct esc_seq *file_color(const struct colors *colors, const char *filename, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + enum bfs_type type = bftw_type(ftwbuf, flags); + if (type == BFS_ERROR) { + goto error; + } + + const struct bfs_stat *statbuf = NULL; + const struct esc_seq *color = NULL; + + switch (type) { + case BFS_REG: + if (colors->setuid || colors->setgid || colors->executable || colors->multi_hard) { + statbuf = bftw_stat(ftwbuf, flags); + if (!statbuf) { + goto error; + } + } + + if (colors->setuid && (statbuf->mode & 04000)) { + color = colors->setuid; + } else if (colors->setgid && (statbuf->mode & 02000)) { + color = colors->setgid; + } else if (colors->capable && bfs_check_capabilities(ftwbuf) > 0) { + color = colors->capable; + } else if (colors->executable && (statbuf->mode & 00111)) { + color = colors->executable; + } else if (colors->multi_hard && statbuf->nlink > 1) { + color = colors->multi_hard; + } + + if (!color) { + color = get_ext(colors, filename); + } + + if (!color) { + color = colors->file; + } + + break; + + case BFS_DIR: + if (colors->sticky_other_writable || colors->other_writable || colors->sticky) { + statbuf = bftw_stat(ftwbuf, flags); + if (!statbuf) { + goto error; + } + } + + if (colors->sticky_other_writable && (statbuf->mode & 01002) == 01002) { + color = colors->sticky_other_writable; + } else if (colors->other_writable && (statbuf->mode & 00002)) { + color = colors->other_writable; + } else if (colors->sticky && (statbuf->mode & 01000)) { + color = colors->sticky; + } else { + color = colors->directory; + } + + break; + + case BFS_LNK: + if (colors->orphan && is_link_broken(ftwbuf)) { + color = colors->orphan; + } else { + color = colors->link; + } + break; + + case BFS_BLK: + color = colors->blockdev; + break; + case BFS_CHR: + color = colors->chardev; + break; + case BFS_FIFO: + color = colors->pipe; + break; + case BFS_SOCK: + color = colors->socket; + break; + case BFS_DOOR: + color = colors->door; + break; + + default: + break; + } + + if (color && color->len == 0) { + color = colors->normal; + } + + return color; + +error: + if (colors->missing) { + return colors->missing; + } else { + return colors->orphan; + } +} + +/** Print an escape sequence chunk. */ +static int print_esc_chunk(CFILE *cfile, const struct esc_seq *esc) { + return cat_esc(&cfile->buffer, esc); +} + +/** Print an ANSI escape sequence. */ +static int print_esc(CFILE *cfile, const struct esc_seq *esc) { + if (!esc) { + return 0; + } + + const struct colors *colors = cfile->colors; + if (esc != colors->reset) { + cfile->need_reset = true; + } + + if (print_esc_chunk(cfile, cfile->colors->leftcode) != 0) { + return -1; + } + if (print_esc_chunk(cfile, esc) != 0) { + return -1; + } + if (print_esc_chunk(cfile, cfile->colors->rightcode) != 0) { + return -1; + } + + return 0; +} + +/** Reset after an ANSI escape sequence. */ +static int print_reset(CFILE *cfile) { + if (!cfile->need_reset) { + return 0; + } + cfile->need_reset = false; + + return print_esc_chunk(cfile, cfile->colors->endcode); +} + +/** Print a shell-escaped string. */ +static int print_wordesc(CFILE *cfile, const char *str, size_t n, enum wesc_flags flags) { + return dstrnescat(&cfile->buffer, str, n, flags); +} + +/** Print a string with an optional color. */ +static int print_colored(CFILE *cfile, const struct esc_seq *esc, const char *str, size_t len) { + if (print_esc(cfile, esc) != 0) { + return -1; + } + + // Don't let the string itself interfere with the colors + if (print_wordesc(cfile, str, len, WESC_TTY) != 0) { + return -1; + } + + if (print_reset(cfile) != 0) { + return -1; + } + + return 0; +} + +/** Find the offset of the first broken path component. */ +static ssize_t first_broken_offset(const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags, size_t max) { + ssize_t ret = max; + bfs_assert(ret >= 0); + + if (bftw_type(ftwbuf, flags) != BFS_ERROR) { + goto out; + } + + dchar *at_path; + int at_fd; + if (path == ftwbuf->path) { + if (ftwbuf->depth == 0) { + at_fd = AT_FDCWD; + at_path = dstrndup(path, max); + } else { + // The parent must have existed to get here + goto out; + } + } else { + // We're in print_link_target(), so resolve relative to the link's parent directory + at_fd = ftwbuf->at_fd; + if (at_fd == (int)AT_FDCWD && path[0] != '/') { + at_path = dstrndup(ftwbuf->path, ftwbuf->nameoff); + if (at_path && dstrncat(&at_path, path, max) != 0) { + ret = -1; + goto out_path; + } + } else { + at_path = dstrndup(path, max); + } + } + + if (!at_path) { + ret = -1; + goto out; + } + + while (ret > 0) { + if (xfaccessat(at_fd, at_path, F_OK) == 0) { + break; + } + + size_t len = dstrlen(at_path); + while (ret && at_path[len - 1] == '/') { + --len, --ret; + } + if (errno != ENOTDIR) { + while (ret && at_path[len - 1] != '/') { + --len, --ret; + } + } + + dstresize(&at_path, len); + } + +out_path: + dstrfree(at_path); +out: + return ret; +} + +/** Print a path with colors. */ +static int print_path_colored(CFILE *cfile, const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + size_t nameoff; + if (path == ftwbuf->path) { + nameoff = ftwbuf->nameoff; + } else { + nameoff = xbaseoff(path); + } + + const char *name = path + nameoff; + size_t pathlen = nameoff + strlen(name); + + ssize_t broken = first_broken_offset(path, ftwbuf, flags, nameoff); + if (broken < 0) { + return -1; + } + size_t split = broken; + + const struct colors *colors = cfile->colors; + const struct esc_seq *dirs_color = colors->directory; + const struct esc_seq *name_color; + + if (split < nameoff) { + name_color = colors->missing; + if (!name_color) { + name_color = colors->orphan; + } + } else { + name_color = file_color(cfile->colors, path + nameoff, ftwbuf, flags); + if (name_color == dirs_color) { + split = pathlen; + } + } + + if (split > 0) { + if (print_colored(cfile, dirs_color, path, split) != 0) { + return -1; + } + } + + if (split < pathlen) { + if (print_colored(cfile, name_color, path + split, pathlen - split) != 0) { + return -1; + } + } + + return 0; +} + +/** Print a file name with colors. */ +static int print_name_colored(CFILE *cfile, const char *name, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + const struct esc_seq *esc = file_color(cfile->colors, name, ftwbuf, flags); + return print_colored(cfile, esc, name, strlen(name)); +} + +/** Print the name of a file with the appropriate colors. */ +static int print_name(CFILE *cfile, const struct BFTW *ftwbuf) { + const char *name = ftwbuf->path + ftwbuf->nameoff; + + const struct colors *colors = cfile->colors; + if (!colors) { + return dstrcat(&cfile->buffer, name); + } + + enum bfs_stat_flags flags = ftwbuf->stat_flags; + if (colors->link_as_target && ftwbuf->type == BFS_LNK) { + flags = BFS_STAT_TRYFOLLOW; + } + + return print_name_colored(cfile, name, ftwbuf, flags); +} + +/** Print the path to a file with the appropriate colors. */ +static int print_path(CFILE *cfile, const struct BFTW *ftwbuf) { + const struct colors *colors = cfile->colors; + if (!colors) { + return dstrcat(&cfile->buffer, ftwbuf->path); + } + + enum bfs_stat_flags flags = ftwbuf->stat_flags; + if (colors->link_as_target && ftwbuf->type == BFS_LNK) { + flags = BFS_STAT_TRYFOLLOW; + } + + return print_path_colored(cfile, ftwbuf->path, ftwbuf, flags); +} + +/** Print a link target with the appropriate colors. */ +static int print_link_target(CFILE *cfile, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW); + size_t len = statbuf ? statbuf->size : 0; + + char *target = xreadlinkat(ftwbuf->at_fd, ftwbuf->at_path, len); + if (!target) { + return -1; + } + + int ret; + if (cfile->colors) { + ret = print_path_colored(cfile, target, ftwbuf, BFS_STAT_FOLLOW); + } else { + ret = dstrcat(&cfile->buffer, target); + } + + free(target); + return ret; +} + +/** Format some colored output to the buffer. */ +attr(printf(2, 3)) +static int cbuff(CFILE *cfile, const char *format, ...); + +/** Dump a parsed expression tree, for debugging. */ +static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose, int depth) { + if (depth >= 2) { + return dstrcat(&cfile->buffer, "(...)"); + } + + if (!expr) { + return dstrcat(&cfile->buffer, "(null)"); + } + + if (dstrcat(&cfile->buffer, "(") != 0) { + return -1; + } + + if (bfs_expr_is_parent(expr)) { + if (cbuff(cfile, "${red}%pq${rs}", expr->argv[0]) < 0) { + return -1; + } + } else { + if (cbuff(cfile, "${blu}%pq${rs}", expr->argv[0]) < 0) { + return -1; + } + } + + for (size_t i = 1; i < expr->argc; ++i) { + if (cbuff(cfile, " ${bld}%pq${rs}", expr->argv[i]) < 0) { + return -1; + } + } + + if (verbose) { + double rate = 0.0, time = 0.0; + if (expr->evaluations) { + rate = 100.0 * expr->successes / expr->evaluations; + time = (1.0e9 * expr->elapsed.tv_sec + expr->elapsed.tv_nsec) / expr->evaluations; + } + if (cbuff(cfile, " [${ylw}%zu${rs}/${ylw}%zu${rs}=${ylw}%g%%${rs}; ${ylw}%gns${rs}]", + expr->successes, expr->evaluations, rate, time)) { + return -1; + } + } + + int count = 0; + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + if (dstrcat(&cfile->buffer, " ") != 0) { + return -1; + } + if (++count >= 3) { + if (dstrcat(&cfile->buffer, "...") != 0) { + return -1; + } + break; + } else { + if (print_expr(cfile, child, verbose, depth + 1) != 0) { + return -1; + } + } + } + + if (dstrcat(&cfile->buffer, ")") != 0) { + return -1; + } + + return 0; +} + +attr(printf(2, 0)) +static int cvbuff(CFILE *cfile, const char *format, va_list args) { + const struct colors *colors = cfile->colors; + int error = errno; + + // Color specifier (e.g. ${blu}) state + struct esc_seq **esc; + const char *end; + size_t len; + char name[4]; + + for (const char *i = format; *i; ++i) { + size_t verbatim = strcspn(i, "%$"); + if (dstrncat(&cfile->buffer, i, verbatim) != 0) { + return -1; + } + i += verbatim; + + switch (*i) { + case '%': + switch (*++i) { + case '%': + if (dstrapp(&cfile->buffer, '%') != 0) { + return -1; + } + break; + + case 'c': + if (dstrapp(&cfile->buffer, va_arg(args, int)) != 0) { + return -1; + } + break; + + case 'd': + if (dstrcatf(&cfile->buffer, "%d", va_arg(args, int)) != 0) { + return -1; + } + break; + + case 'g': + if (dstrcatf(&cfile->buffer, "%g", va_arg(args, double)) != 0) { + return -1; + } + break; + + case 's': + if (dstrcat(&cfile->buffer, va_arg(args, const char *)) != 0) { + return -1; + } + break; + + case 'z': + ++i; + if (*i != 'u') { + goto invalid; + } + if (dstrcatf(&cfile->buffer, "%zu", va_arg(args, size_t)) != 0) { + return -1; + } + break; + + case 'm': + if (dstrcat(&cfile->buffer, xstrerror(error)) != 0) { + return -1; + } + break; + + case 'p': + switch (*++i) { + case 'q': + if (print_wordesc(cfile, va_arg(args, const char *), SIZE_MAX, WESC_SHELL | WESC_TTY) != 0) { + return -1; + } + break; + case 'Q': + if (print_wordesc(cfile, va_arg(args, const char *), SIZE_MAX, WESC_TTY) != 0) { + return -1; + } + break; + + case 'F': + if (print_name(cfile, va_arg(args, const struct BFTW *)) != 0) { + return -1; + } + break; + + case 'P': + if (print_path(cfile, va_arg(args, const struct BFTW *)) != 0) { + return -1; + } + break; + + case 'L': + if (print_link_target(cfile, va_arg(args, const struct BFTW *)) != 0) { + return -1; + } + break; + + case 'e': + if (print_expr(cfile, va_arg(args, const struct bfs_expr *), false, 0) != 0) { + return -1; + } + break; + case 'E': + if (print_expr(cfile, va_arg(args, const struct bfs_expr *), true, 0) != 0) { + return -1; + } + break; + + default: + goto invalid; + } + + break; + + default: + goto invalid; + } + break; + + case '$': + switch (*++i) { + case '$': + if (dstrapp(&cfile->buffer, '$') != 0) { + return -1; + } + break; + + case '{': + ++i; + end = strchr(i, '}'); + if (!end) { + goto invalid; + } + if (!colors) { + i = end; + break; + } + + len = end - i; + if (len >= sizeof(name)) { + goto invalid; + } + memcpy(name, i, len); + name[len] = '\0'; + + if (strcmp(name, "rs") == 0) { + if (print_reset(cfile) != 0) { + return -1; + } + } else { + esc = get_esc(colors, name); + if (!esc) { + goto invalid; + } + if (print_esc(cfile, *esc) != 0) { + return -1; + } + } + + i = end; + break; + + default: + goto invalid; + } + break; + + default: + return 0; + } + } + + return 0; + +invalid: + bfs_bug("Invalid format string '%s'", format); + errno = EINVAL; + return -1; +} + +static int cbuff(CFILE *cfile, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = cvbuff(cfile, format, args); + va_end(args); + return ret; +} + +int cvfprintf(CFILE *cfile, const char *format, va_list args) { + bfs_assert(dstrlen(cfile->buffer) == 0); + + int ret = -1; + if (cvbuff(cfile, format, args) == 0) { + size_t len = dstrlen(cfile->buffer); + if (fwrite(cfile->buffer, 1, len, cfile->file) == len) { + ret = 0; + } + } + + dstresize(&cfile->buffer, 0); + return ret; +} + +int cfprintf(CFILE *cfile, const char *format, ...) { + va_list args; + va_start(args, format); + int ret = cvfprintf(cfile, format, args); + va_end(args); + return ret; +} + +int cfreset(CFILE *cfile) { + const struct colors *colors = cfile->colors; + if (!colors) { + return 0; + } + + const struct esc_seq *esc = colors->endcode; + size_t ret = xwrite(cfile->fd, esc->seq, esc->len); + return ret == esc->len ? 0 : -1; +} diff --git a/src/color.h b/src/color.h new file mode 100644 index 0000000..3550888 --- /dev/null +++ b/src/color.h @@ -0,0 +1,118 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Utilities for colored output on ANSI terminals. + */ + +#ifndef BFS_COLOR_H +#define BFS_COLOR_H + +#include "prelude.h" +#include "dstring.h" +#include <stdio.h> + +/** + * A color scheme. + */ +struct colors; + +/** + * Parse the color table from the environment. + */ +struct colors *parse_colors(void); + +/** + * Check if stat() info is required to color a file correctly. + */ +bool colors_need_stat(const struct colors *colors); + +/** + * Free a color table. + */ +void free_colors(struct colors *colors); + +/** + * A file/stream with associated colors. + */ +typedef struct CFILE { + /** The underlying file/stream. */ + FILE *file; + /** The color table to use, if any. */ + const struct colors *colors; + /** A buffer for colored formatting. */ + dchar *buffer; + /** Cached file descriptor number. */ + int fd; + /** Whether the next ${rs} is actually necessary. */ + bool need_reset; + /** Whether to close the underlying stream. */ + bool close; +} CFILE; + +/** + * Wrap an existing file into a colored stream. + * + * @param file + * The underlying file. + * @param colors + * The color table to use if file is a TTY. + * @param close + * Whether to close the underlying stream when this stream is closed. + * @return + * A colored wrapper around file. + */ +CFILE *cfwrap(FILE *file, const struct colors *colors, bool close); + +/** + * Close a colored file. + * + * @param cfile + * The colored file to close. + * @return + * 0 on success, -1 on failure. + */ +int cfclose(CFILE *cfile); + +/** + * Colored, formatted output. + * + * @param cfile + * The colored stream to print to. + * @param format + * A printf()-style format string, supporting these format specifiers: + * + * %c: A single character + * %d: An integer + * %g: A double + * %s: A string + * %zu: A size_t + * %m: strerror(errno) + * %pq: A shell-escaped string, like bash's printf %q + * %pQ: A TTY-escaped string. + * %pF: A colored file name, from a const struct BFTW * argument + * %pP: A colored file path, from a const struct BFTW * argument + * %pL: A colored link target, from a const struct BFTW * argument + * %pe: Dump a const struct bfs_expr *, for debugging. + * %pE: Dump a const struct bfs_expr * in verbose form, for debugging. + * %%: A literal '%' + * ${cc}: Change the color to 'cc' + * $$: A literal '$' + * @return + * 0 on success, -1 on failure. + */ +attr(printf(2, 3)) +int cfprintf(CFILE *cfile, const char *format, ...); + +/** + * cfprintf() variant that takes a va_list. + */ +attr(printf(2, 0)) +int cvfprintf(CFILE *cfile, const char *format, va_list args); + +/** + * Reset the TTY state when terminating abnormally (async-signal-safe). + */ +int cfreset(CFILE *cfile); + +#endif // BFS_COLOR_H diff --git a/src/ctx.c b/src/ctx.c new file mode 100644 index 0000000..11531df --- /dev/null +++ b/src/ctx.c @@ -0,0 +1,298 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "ctx.h" +#include "alloc.h" +#include "bfstd.h" +#include "color.h" +#include "diag.h" +#include "expr.h" +#include "list.h" +#include "mtab.h" +#include "pwcache.h" +#include "sighook.h" +#include "stat.h" +#include "trie.h" +#include "xtime.h" +#include <errno.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +/** Get the initial value for ctx->threads (-j). */ +static int bfs_nproc(void) { + long nproc = xsysconf(_SC_NPROCESSORS_ONLN); + + if (nproc < 1) { + nproc = 1; + } else if (nproc > 8) { + // Not much speedup after 8 threads + nproc = 8; + } + + return nproc; +} + +struct bfs_ctx *bfs_ctx_new(void) { + struct bfs_ctx *ctx = ZALLOC(struct bfs_ctx); + if (!ctx) { + return NULL; + } + + SLIST_INIT(&ctx->expr_list); + ARENA_INIT(&ctx->expr_arena, struct bfs_expr); + + ctx->maxdepth = INT_MAX; + ctx->flags = BFTW_RECOVER; + ctx->strategy = BFTW_BFS; + ctx->threads = bfs_nproc(); + ctx->optlevel = 3; + + trie_init(&ctx->files); + + if (getrlimit(RLIMIT_NOFILE, &ctx->orig_nofile) != 0) { + goto fail; + } + ctx->cur_nofile = ctx->orig_nofile; + + ctx->users = bfs_users_new(); + if (!ctx->users) { + goto fail; + } + + ctx->groups = bfs_groups_new(); + if (!ctx->groups) { + goto fail; + } + + if (xgettime(&ctx->now) != 0) { + goto fail; + } + + return ctx; + +fail: + bfs_ctx_free(ctx); + return NULL; +} + +const struct bfs_mtab *bfs_ctx_mtab(const struct bfs_ctx *ctx) { + struct bfs_ctx *mut = (struct bfs_ctx *)ctx; + + if (mut->mtab_error) { + errno = mut->mtab_error; + } else if (!mut->mtab) { + mut->mtab = bfs_mtab_parse(); + if (!mut->mtab) { + mut->mtab_error = errno; + } + } + + return mut->mtab; +} + +/** + * An open file tracked by the bfs context. + */ +struct bfs_ctx_file { + /** The file itself. */ + CFILE *cfile; + /** The path to the file (for diagnostics). */ + const char *path; + /** Signal hook to send a reset escape sequence. */ + struct sighook *hook; + /** Remembers I/O errors, to propagate them to the exit status. */ + int error; +}; + +/** Call cfreset() on a tracked file. */ +static void cfreset_hook(int sig, siginfo_t *info, void *arg) { + cfreset(arg); +} + +CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, CFILE *cfile, const char *path) { + struct bfs_stat sb; + if (bfs_stat(cfile->fd, NULL, 0, &sb) != 0) { + return NULL; + } + + bfs_file_id id; + bfs_stat_id(&sb, &id); + + struct trie_leaf *leaf = trie_insert_mem(&ctx->files, id, sizeof(id)); + if (!leaf) { + return NULL; + } + + struct bfs_ctx_file *ctx_file = leaf->value; + if (ctx_file) { + ctx_file->path = path; + return ctx_file->cfile; + } + + leaf->value = ctx_file = ALLOC(struct bfs_ctx_file); + if (!ctx_file) { + goto fail; + } + + ctx_file->cfile = cfile; + ctx_file->path = path; + ctx_file->error = 0; + ctx_file->hook = NULL; + + if (cfile->colors) { + ctx_file->hook = atsigexit(cfreset_hook, cfile); + if (!ctx_file->hook) { + goto fail; + } + } + + if (cfile != ctx->cout && cfile != ctx->cerr) { + ++ctx->nfiles; + } + + return cfile; + +fail: + trie_remove(&ctx->files, leaf); + free(ctx_file); + return NULL; +} + +void bfs_ctx_flush(const struct bfs_ctx *ctx) { + // Before executing anything, flush all open streams. This ensures that + // - the user sees everything relevant before an -ok[dir] prompt + // - output from commands is interleaved consistently with bfs + // - executed commands can rely on I/O from other bfs actions + for_trie (leaf, &ctx->files) { + struct bfs_ctx_file *ctx_file = leaf->value; + CFILE *cfile = ctx_file->cfile; + if (fflush(cfile->file) == 0) { + continue; + } + + ctx_file->error = errno; + clearerr(cfile->file); + + const char *path = ctx_file->path; + if (path) { + bfs_error(ctx, "%pq: %m.\n", path); + } else if (cfile == ctx->cout) { + bfs_error(ctx, "(standard output): %m.\n"); + } + } + + // Flush the user/group caches, in case the executed command edits the + // user/group tables + bfs_users_flush(ctx->users); + bfs_groups_flush(ctx->groups); +} + +/** Flush a file and report any errors. */ +static int bfs_ctx_fflush(CFILE *cfile) { + int ret = 0, error = 0; + if (ferror(cfile->file)) { + ret = -1; + error = EIO; + } + if (fflush(cfile->file) != 0) { + ret = -1; + error = errno; + } + + errno = error; + return ret; +} + +/** Close a file tracked by the bfs context. */ +static int bfs_ctx_fclose(struct bfs_ctx *ctx, struct bfs_ctx_file *ctx_file) { + CFILE *cfile = ctx_file->cfile; + + // Writes to stderr are allowed to fail silently, unless the same file + // was used by -fprint, -fls, etc. + bool silent = cfile == ctx->cerr && !ctx_file->path; + int ret = 0, error = 0; + + if (ctx_file->error) { + // An error was previously reported during bfs_ctx_flush() + ret = -1; + error = ctx_file->error; + } + + // Flush the file just before we remove the hook, to maximize the chance + // we leave the TTY in a good state + if (bfs_ctx_fflush(cfile) != 0) { + ret = -1; + error = errno; + } + + if (ctx_file->hook) { + sigunhook(ctx_file->hook); + } + + // Close the CFILE, except for stdio streams, which are closed later + if (cfile != ctx->cout && cfile != ctx->cerr) { + if (cfclose(cfile) != 0) { + ret = -1; + error = errno; + } + } + + if (silent) { + ret = 0; + } + + if (ret != 0 && ctx->cerr) { + if (ctx_file->path) { + bfs_error(ctx, "%pq: %s.\n", ctx_file->path, xstrerror(error)); + } else if (cfile == ctx->cout) { + bfs_error(ctx, "(standard output): %s.\n", xstrerror(error)); + } + } + + free(ctx_file); + return ret; +} + +int bfs_ctx_free(struct bfs_ctx *ctx) { + int ret = 0; + + if (ctx) { + CFILE *cout = ctx->cout; + CFILE *cerr = ctx->cerr; + + bfs_mtab_free(ctx->mtab); + + bfs_groups_free(ctx->groups); + bfs_users_free(ctx->users); + + for_trie (leaf, &ctx->files) { + struct bfs_ctx_file *ctx_file = leaf->value; + if (bfs_ctx_fclose(ctx, ctx_file) != 0) { + ret = -1; + } + } + trie_destroy(&ctx->files); + + cfclose(cout); + cfclose(cerr); + free_colors(ctx->colors); + + for_slist (struct bfs_expr, expr, &ctx->expr_list, freelist) { + bfs_expr_clear(expr); + } + arena_destroy(&ctx->expr_arena); + + for (size_t i = 0; i < ctx->npaths; ++i) { + free((char *)ctx->paths[i]); + } + free(ctx->paths); + + free(ctx->argv); + free(ctx); + } + + return ret; +} diff --git a/src/ctx.h b/src/ctx.h new file mode 100644 index 0000000..fc3020c --- /dev/null +++ b/src/ctx.h @@ -0,0 +1,169 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * bfs execution context. + */ + +#ifndef BFS_CTX_H +#define BFS_CTX_H + +#include "prelude.h" +#include "alloc.h" +#include "bftw.h" +#include "diag.h" +#include "expr.h" +#include "trie.h" +#include <stddef.h> +#include <sys/resource.h> +#include <time.h> + +struct CFILE; + +/** + * The execution context for bfs. + */ +struct bfs_ctx { + /** The number of command line arguments. */ + size_t argc; + /** The unparsed command line arguments. */ + char **argv; + + /** The root paths. */ + const char **paths; + /** The number of root paths. */ + size_t npaths; + + /** The main command line expression. */ + struct bfs_expr *expr; + /** An expression for files to filter out. */ + struct bfs_expr *exclude; + /** A list of allocated expressions. */ + struct bfs_exprs expr_list; + /** bfs_expr arena. */ + struct arena expr_arena; + + /** -mindepth option. */ + int mindepth; + /** -maxdepth option. */ + int maxdepth; + + /** bftw() flags. */ + enum bftw_flags flags; + /** bftw() search strategy. */ + enum bftw_strategy strategy; + + /** Threads (-j). */ + int threads; + /** Optimization level (-O). */ + int optlevel; + /** Debugging flags (-D). */ + enum debug_flags debug; + /** Whether to ignore deletions that race with bfs (-ignore_readdir_race). */ + bool ignore_races; + /** Whether to follow POSIXisms more closely ($POSIXLY_CORRECT). */ + bool posixly_correct; + /** Whether to show a status bar (-status). */ + bool status; + /** Whether to only return unique files (-unique). */ + bool unique; + /** Whether to print warnings (-warn/-nowarn). */ + bool warn; + /** Whether to only handle paths with xargs-safe characters (-X). */ + bool xargs_safe; + + /** Color data. */ + struct colors *colors; + /** The error that occurred parsing the color table, if any. */ + int colors_error; + /** Colored stdout. */ + struct CFILE *cout; + /** Colored stderr. */ + struct CFILE *cerr; + + /** User cache. */ + struct bfs_users *users; + /** Group table. */ + struct bfs_groups *groups; + /** The error that occurred parsing the group table, if any. */ + int groups_error; + + /** Table of mounted file systems. */ + struct bfs_mtab *mtab; + /** The error that occurred parsing the mount table, if any. */ + int mtab_error; + + /** All the files owned by the context. */ + struct trie files; + /** The number of files owned by the context. */ + int nfiles; + + /** The initial RLIMIT_NOFILE limits. */ + struct rlimit orig_nofile; + /** The current RLIMIT_NOFILE limits. */ + struct rlimit cur_nofile; + + /** The current time. */ + struct timespec now; +}; + +/** + * @return + * A new bfs context, or NULL on failure. + */ +struct bfs_ctx *bfs_ctx_new(void); + +/** + * Get the mount table. + * + * @param ctx + * The bfs context. + * @return + * The cached mount table, or NULL on failure. + */ +const struct bfs_mtab *bfs_ctx_mtab(const struct bfs_ctx *ctx); + +/** + * Deduplicate an opened file. + * + * @param ctx + * The bfs context. + * @param cfile + * The opened file. + * @param path + * The path to the opened file (or NULL for standard streams). + * @return + * If the same file was opened previously, that file is returned. If cfile is a new file, + * cfile itself is returned. If an error occurs, NULL is returned. + */ +struct CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, struct CFILE *cfile, const char *path); + +/** + * Flush any caches for consistency with external processes. + * + * @param ctx + * The bfs context. + */ +void bfs_ctx_flush(const struct bfs_ctx *ctx); + +/** + * Dump the parsed command line. + * + * @param ctx + * The bfs context. + * @param flag + * The -D flag that triggered the dump. + */ +void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag); + +/** + * Free a bfs context. + * + * @param ctx + * The context to free. + * @return + * 0 on success, -1 if any errors occurred. + */ +int bfs_ctx_free(struct bfs_ctx *ctx); + +#endif // BFS_CTX_H diff --git a/src/diag.c b/src/diag.c new file mode 100644 index 0000000..bb744f6 --- /dev/null +++ b/src/diag.c @@ -0,0 +1,300 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "diag.h" +#include "alloc.h" +#include "bfstd.h" +#include "color.h" +#include "ctx.h" +#include "dstring.h" +#include "expr.h" +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +/** bfs_diagf() implementation. */ +attr(printf(2, 0)) +static void bfs_vdiagf(const struct bfs_loc *loc, const char *format, va_list args) { + fprintf(stderr, "%s: %s@%s:%d: ", xgetprogname(), loc->func, loc->file, loc->line); + vfprintf(stderr, format, args); + fprintf(stderr, "\n"); +} + +void bfs_diagf(const struct bfs_loc *loc, const char *format, ...) { + va_list args; + va_start(args, format); + bfs_vdiagf(loc, format, args); + va_end(args); +} + +noreturn void bfs_abortf(const struct bfs_loc *loc, const char *format, ...) { + va_list args; + va_start(args, format); + bfs_vdiagf(loc, format, args); + va_end(args); + + abort(); +} + +const char *bfs_errstr(void) { + return xstrerror(errno); +} + +const char *debug_flag_name(enum debug_flags flag) { + switch (flag) { + case DEBUG_COST: + return "cost"; + case DEBUG_EXEC: + return "exec"; + case DEBUG_OPT: + return "opt"; + case DEBUG_RATES: + return "rates"; + case DEBUG_SEARCH: + return "search"; + case DEBUG_STAT: + return "stat"; + case DEBUG_TREE: + return "tree"; + + case DEBUG_ALL: + break; + } + + bfs_bug("Unrecognized debug flag"); + return "???"; +} + +void bfs_perror(const struct bfs_ctx *ctx, const char *str) { + bfs_error(ctx, "%s: %m.\n", str); +} + +void bfs_error(const struct bfs_ctx *ctx, const char *format, ...) { + va_list args; + va_start(args, format); + bfs_verror(ctx, format, args); + va_end(args); +} + +bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...) { + va_list args; + va_start(args, format); + bool ret = bfs_vwarning(ctx, format, args); + va_end(args); + return ret; +} + +bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...) { + va_list args; + va_start(args, format); + bool ret = bfs_vdebug(ctx, flag, format, args); + va_end(args); + return ret; +} + +void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args) { + int error = errno; + + bfs_error_prefix(ctx); + + errno = error; + cvfprintf(ctx->cerr, format, args); +} + +bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args) { + int error = errno; + + if (bfs_warning_prefix(ctx)) { + errno = error; + cvfprintf(ctx->cerr, format, args); + return true; + } else { + return false; + } +} + +bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args) { + int error = errno; + + if (bfs_debug_prefix(ctx, flag)) { + errno = error; + cvfprintf(ctx->cerr, format, args); + return true; + } else { + return false; + } +} + +/** Get the command name without any leading directories. */ +static const char *bfs_cmd(const struct bfs_ctx *ctx) { + return ctx->argv[0] + xbaseoff(ctx->argv[0]); +} + +void bfs_error_prefix(const struct bfs_ctx *ctx) { + cfprintf(ctx->cerr, "${bld}%s:${rs} ${err}error:${rs} ", bfs_cmd(ctx)); +} + +bool bfs_warning_prefix(const struct bfs_ctx *ctx) { + if (ctx->warn) { + cfprintf(ctx->cerr, "${bld}%s:${rs} ${wrn}warning:${rs} ", bfs_cmd(ctx)); + return true; + } else { + return false; + } +} + +bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag) { + if (ctx->debug & flag) { + cfprintf(ctx->cerr, "${bld}%s:${rs} ${cyn}-D %s${rs}: ", bfs_cmd(ctx), debug_flag_name(flag)); + return true; + } else { + return false; + } +} + +/** Recursive part of highlight_expr(). */ +static bool highlight_expr_recursive(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool args[]) { + if (!expr) { + return false; + } + + bool ret = false; + + for (size_t i = 0; i < ctx->argc; ++i) { + if (&ctx->argv[i] == expr->argv) { + for (size_t j = 0; j < expr->argc; ++j) { + bfs_assert(i + j < ctx->argc); + args[i + j] = true; + ret = true; + } + break; + } + } + + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + ret |= highlight_expr_recursive(ctx, child, args); + } + + return ret; +} + +/** Highlight an expression in the command line. */ +static bool highlight_expr(const struct bfs_ctx *ctx, const struct bfs_expr *expr, bool args[]) { + for (size_t i = 0; i < ctx->argc; ++i) { + args[i] = false; + } + + return highlight_expr_recursive(ctx, expr, args); +} + +/** Print a highlighted portion of the command line. */ +static void bfs_argv_diag(const struct bfs_ctx *ctx, const bool args[], bool warning) { + if (warning) { + bfs_warning_prefix(ctx); + } else { + bfs_error_prefix(ctx); + } + + dchar **argv = ZALLOC_ARRAY(dchar *, ctx->argc); + if (!argv) { + return; + } + + for (size_t i = 0; i < ctx->argc; ++i) { + if (dstrescat(&argv[i], ctx->argv[i], WESC_SHELL | WESC_TTY) != 0) { + goto done; + } + } + + size_t max_argc = 0; + for (size_t i = 0; i < ctx->argc; ++i) { + if (i > 0) { + cfprintf(ctx->cerr, " "); + } + + if (args[i]) { + max_argc = i + 1; + cfprintf(ctx->cerr, "${bld}%s${rs}", argv[i]); + } else { + cfprintf(ctx->cerr, "%s", argv[i]); + } + } + + cfprintf(ctx->cerr, "\n"); + + if (warning) { + bfs_warning_prefix(ctx); + } else { + bfs_error_prefix(ctx); + } + + for (size_t i = 0; i < max_argc; ++i) { + if (i > 0) { + if (args[i - 1] && args[i]) { + cfprintf(ctx->cerr, "~"); + } else { + cfprintf(ctx->cerr, " "); + } + } + + if (args[i] && (i == 0 || !args[i - 1])) { + if (warning) { + cfprintf(ctx->cerr, "${wrn}"); + } else { + cfprintf(ctx->cerr, "${err}"); + } + } + + size_t len = xstrwidth(argv[i]); + for (size_t j = 0; j < len; ++j) { + if (args[i]) { + cfprintf(ctx->cerr, "~"); + } else { + cfprintf(ctx->cerr, " "); + } + } + + if (args[i] && (i + 1 >= max_argc || !args[i + 1])) { + cfprintf(ctx->cerr, "${rs}"); + } + } + + cfprintf(ctx->cerr, "\n"); + +done: + for (size_t i = 0; i < ctx->argc; ++i) { + dstrfree(argv[i]); + } + free(argv); +} + +void bfs_argv_error(const struct bfs_ctx *ctx, const bool args[]) { + bfs_argv_diag(ctx, args, false); +} + +void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr) { + bool args[ctx->argc]; + if (highlight_expr(ctx, expr, args)) { + bfs_argv_error(ctx, args); + } +} + +bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool args[]) { + if (!ctx->warn) { + return false; + } + + bfs_argv_diag(ctx, args, true); + return true; +} + +bool bfs_expr_warning(const struct bfs_ctx *ctx, const struct bfs_expr *expr) { + bool args[ctx->argc]; + if (highlight_expr(ctx, expr, args)) { + return bfs_argv_warning(ctx, args); + } + + return false; +} diff --git a/src/diag.h b/src/diag.h new file mode 100644 index 0000000..f2498b5 --- /dev/null +++ b/src/diag.h @@ -0,0 +1,258 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Diagnostic messages. + */ + +#ifndef BFS_DIAG_H +#define BFS_DIAG_H + +#include "prelude.h" +#include <stdarg.h> + +/** + * static_assert() with an optional second argument. + */ +#if __STDC_VERSION__ >= C23 +# define bfs_static_assert static_assert +#else +# define bfs_static_assert(...) bfs_static_assert_(__VA_ARGS__, #__VA_ARGS__, ) +# define bfs_static_assert_(expr, msg, ...) _Static_assert(expr, msg) +#endif + +/** + * A source code location. + */ +struct bfs_loc { + const char *file; + int line; + const char *func; +}; + +#define BFS_LOC_INIT { .file = __FILE__, .line = __LINE__, .func = __func__ } + +/** + * Get the current source code location. + */ +#if __STDC_VERSION__ >= C23 +# define bfs_location() (&(static const struct bfs_loc)BFS_LOC_INIT) +#else +# define bfs_location() (&(const struct bfs_loc)BFS_LOC_INIT) +#endif + +/** + * Print a low-level diagnostic message to standard error, formatted like + * + * bfs: func@src/file.c:0: Message + */ +attr(printf(2, 3)) +void bfs_diagf(const struct bfs_loc *loc, const char *format, ...); + +/** + * Unconditional diagnostic message. + */ +#define bfs_diag(...) bfs_diagf(bfs_location(), __VA_ARGS__) + +/** + * Get the last error message. + */ +const char *bfs_errstr(void); + +/** + * Print a diagnostic message including the last error. + */ +#define bfs_ediag(...) \ + bfs_ediag_("" __VA_ARGS__, bfs_errstr()) + +#define bfs_ediag_(format, ...) \ + bfs_diag(sizeof(format) > 1 ? format ": %s" : "%s", __VA_ARGS__) + +/** + * Print a message to standard error and abort. + */ +attr(cold, printf(2, 3)) +noreturn void bfs_abortf(const struct bfs_loc *loc, const char *format, ...); + +/** + * Unconditional abort with a message. + */ +#define bfs_abort(...) \ + bfs_abortf(bfs_location(), __VA_ARGS__) + +/** + * Abort with a message including the last error. + */ +#define bfs_eabort(...) \ + bfs_eabort_("" __VA_ARGS__, bfs_errstr()) + +#define bfs_eabort_(format, ...) \ + bfs_abort(sizeof(format) > 1 ? format ": %s" : "%s", __VA_ARGS__) + +/** + * Abort in debug builds; no-op in release builds. + */ +#ifdef NDEBUG +# define bfs_bug(...) ((void)0) +# define bfs_ebug(...) ((void)0) +#else +# define bfs_bug bfs_abort +# define bfs_ebug bfs_eabort +#endif + +/** + * Unconditional assert. + */ +#define bfs_verify(...) \ + bfs_verify_(#__VA_ARGS__, __VA_ARGS__, "", "") + +#define bfs_verify_(str, cond, format, ...) \ + ((cond) ? (void)0 : bfs_abort( \ + sizeof(format) > 1 \ + ? "%.0s" format "%s%s" \ + : "Assertion failed: `%s`%s", \ + str, __VA_ARGS__)) + +/** + * Unconditional assert, including the last error. + */ +#define bfs_everify(...) \ + bfs_everify_(#__VA_ARGS__, __VA_ARGS__, "", bfs_errstr()) + +#define bfs_everify_(str, cond, format, ...) \ + ((cond) ? (void)0 : bfs_abort( \ + sizeof(format) > 1 \ + ? "%.0s" format "%s: %s" \ + : "Assertion failed: `%s`: %s", \ + str, __VA_ARGS__)) + +/** + * Assert in debug builds; no-op in release builds. + */ +#ifdef NDEBUG +# define bfs_assert(...) ((void)0) +# define bfs_eassert(...) ((void)0) +#else +# define bfs_assert bfs_verify +# define bfs_eassert bfs_everify +#endif + +struct bfs_ctx; +struct bfs_expr; + +/** + * Various debugging flags. + */ +enum debug_flags { + /** Print cost estimates. */ + DEBUG_COST = 1 << 0, + /** Print executed command details. */ + DEBUG_EXEC = 1 << 1, + /** Print optimization details. */ + DEBUG_OPT = 1 << 2, + /** Print rate information. */ + DEBUG_RATES = 1 << 3, + /** Trace the filesystem traversal. */ + DEBUG_SEARCH = 1 << 4, + /** Trace all stat() calls. */ + DEBUG_STAT = 1 << 5, + /** Print the parse tree. */ + DEBUG_TREE = 1 << 6, + /** All debug flags. */ + DEBUG_ALL = (1 << 7) - 1, +}; + +/** + * Convert a debug flag to a string. + */ +const char *debug_flag_name(enum debug_flags flag); + +/** + * Like perror(), but decorated like bfs_error(). + */ +attr(cold) +void bfs_perror(const struct bfs_ctx *ctx, const char *str); + +/** + * Shorthand for printing error messages. + */ +attr(cold, printf(2, 3)) +void bfs_error(const struct bfs_ctx *ctx, const char *format, ...); + +/** + * Shorthand for printing warning messages. + * + * @return Whether a warning was printed. + */ +attr(cold, printf(2, 3)) +bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...); + +/** + * Shorthand for printing debug messages. + * + * @return Whether a debug message was printed. + */ +attr(cold, printf(3, 4)) +bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...); + +/** + * bfs_error() variant that takes a va_list. + */ +attr(cold, printf(2, 0)) +void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args); + +/** + * bfs_warning() variant that takes a va_list. + */ +attr(cold, printf(2, 0)) +bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args); + +/** + * bfs_debug() variant that takes a va_list. + */ +attr(cold, printf(3, 0)) +bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args); + +/** + * Print the error message prefix. + */ +attr(cold) +void bfs_error_prefix(const struct bfs_ctx *ctx); + +/** + * Print the warning message prefix. + */ +attr(cold) +bool bfs_warning_prefix(const struct bfs_ctx *ctx); + +/** + * Print the debug message prefix. + */ +attr(cold) +bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag); + +/** + * Highlight parts of the command line in an error message. + */ +attr(cold) +void bfs_argv_error(const struct bfs_ctx *ctx, const bool args[]); + +/** + * Highlight parts of an expression in an error message. + */ +attr(cold) +void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr); + +/** + * Highlight parts of the command line in a warning message. + */ +attr(cold) +bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool args[]); + +/** + * Highlight parts of an expression in a warning message. + */ +attr(cold) +bool bfs_expr_warning(const struct bfs_ctx *ctx, const struct bfs_expr *expr); + +#endif // BFS_DIAG_H diff --git a/src/dir.c b/src/dir.c new file mode 100644 index 0000000..fadf1c0 --- /dev/null +++ b/src/dir.c @@ -0,0 +1,371 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "dir.h" +#include "alloc.h" +#include "bfstd.h" +#include "diag.h" +#include "sanity.h" +#include "trie.h" +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +#if BFS_USE_GETDENTS +# if BFS_HAS_GETDENTS64_SYSCALL +# include <sys/syscall.h> +# endif + +/** getdents() syscall wrapper. */ +static ssize_t bfs_getdents(int fd, void *buf, size_t size) { + sanitize_uninit(buf, size); + +#if BFS_HAS_POSIX_GETDENTS + int flags = 0; +# ifdef DT_FORCE_TYPE + flags |= DT_FORCE_TYPE; +# endif + ssize_t ret = posix_getdents(fd, buf, size, flags); +#elif BFS_HAS_GETDENTS + ssize_t ret = getdents(fd, buf, size); +#elif BFS_HAS_GETDENTS64 + ssize_t ret = getdents64(fd, buf, size); +#elif BFS_HAS_GETDENTS64_SYSCALL + ssize_t ret = syscall(SYS_getdents64, fd, buf, size); +#else +# error "No getdents() implementation" +#endif + + if (ret > 0) { + sanitize_init(buf, ret); + } + + return ret; +} + +#endif // BFS_USE_GETDENTS + +/** Directory entry type for bfs_getdents() */ +#if !BFS_USE_GETDENTS || BFS_HAS_GETDENTS +typedef struct dirent sys_dirent; +#elif BFS_HAS_POSIX_GETDENTS +typedef struct posix_dent sys_dirent; +#else +typedef struct dirent64 sys_dirent; +#endif + +enum bfs_type bfs_mode_to_type(mode_t mode) { + switch (mode & S_IFMT) { +#ifdef S_IFBLK + case S_IFBLK: + return BFS_BLK; +#endif +#ifdef S_IFCHR + case S_IFCHR: + return BFS_CHR; +#endif +#ifdef S_IFDIR + case S_IFDIR: + return BFS_DIR; +#endif +#ifdef S_IFDOOR + case S_IFDOOR: + return BFS_DOOR; +#endif +#ifdef S_IFIFO + case S_IFIFO: + return BFS_FIFO; +#endif +#ifdef S_IFLNK + case S_IFLNK: + return BFS_LNK; +#endif +#ifdef S_IFPORT + case S_IFPORT: + return BFS_PORT; +#endif +#ifdef S_IFREG + case S_IFREG: + return BFS_REG; +#endif +#ifdef S_IFSOCK + case S_IFSOCK: + return BFS_SOCK; +#endif +#ifdef S_IFWHT + case S_IFWHT: + return BFS_WHT; +#endif + + default: + return BFS_UNKNOWN; + } +} + +/** + * Private directory flags. + */ +enum { + /** We've reached the end of the directory. */ + BFS_DIR_EOF = BFS_DIR_PRIVATE << 0, + /** This directory is a union mount we need to dedup manually. */ + BFS_DIR_UNION = BFS_DIR_PRIVATE << 1, +}; + +struct bfs_dir { + unsigned int flags; + +#if BFS_USE_GETDENTS + int fd; + unsigned short pos; + unsigned short size; +# if __FreeBSD__ + struct trie trie; +# endif + alignas(sys_dirent) char buf[]; +#else + DIR *dir; + struct dirent *de; +#endif +}; + +#if BFS_USE_GETDENTS +# define DIR_SIZE (64 << 10) +# define BUF_SIZE (DIR_SIZE - sizeof(struct bfs_dir)) +#else +# define DIR_SIZE sizeof(struct bfs_dir) +#endif + +struct bfs_dir *bfs_allocdir(void) { + return malloc(DIR_SIZE); +} + +void bfs_dir_arena(struct arena *arena) { + arena_init(arena, alignof(struct bfs_dir), DIR_SIZE); +} + +int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path, enum bfs_dir_flags flags) { + int fd; + if (at_path) { + fd = openat(at_fd, at_path, O_RDONLY | O_CLOEXEC | O_DIRECTORY); + if (fd < 0) { + return -1; + } + } else if (at_fd >= 0) { + fd = at_fd; + } else { + errno = EBADF; + return -1; + } + + dir->flags = flags; + +#if BFS_USE_GETDENTS + dir->fd = fd; + dir->pos = 0; + dir->size = 0; + +# if __FreeBSD__ && defined(F_ISUNIONSTACK) + if (fcntl(fd, F_ISUNIONSTACK) > 0) { + dir->flags |= BFS_DIR_UNION; + trie_init(&dir->trie); + } +# endif +#else // !BFS_USE_GETDENTS + dir->dir = fdopendir(fd); + if (!dir->dir) { + if (at_path) { + close_quietly(fd); + } + return -1; + } + dir->de = NULL; +#endif + + return 0; +} + +int bfs_dirfd(const struct bfs_dir *dir) { +#if BFS_USE_GETDENTS + return dir->fd; +#else + return dirfd(dir->dir); +#endif +} + +int bfs_polldir(struct bfs_dir *dir) { +#if BFS_USE_GETDENTS + if (dir->pos < dir->size) { + return 1; + } else if (dir->flags & BFS_DIR_EOF) { + return 0; + } + + char *buf = (char *)(dir + 1); + ssize_t size = bfs_getdents(dir->fd, buf, BUF_SIZE); + if (size == 0) { + dir->flags |= BFS_DIR_EOF; + return 0; + } else if (size < 0) { + return -1; + } + + dir->pos = 0; + dir->size = size; + + // Like read(), getdents() doesn't indicate EOF until another call returns zero. + // Check that eagerly here to hopefully avoid a syscall in the last bfs_readdir(). + size_t rest = BUF_SIZE - size; + if (rest >= sizeof(sys_dirent)) { + size = bfs_getdents(dir->fd, buf + size, rest); + if (size > 0) { + dir->size += size; + } else if (size == 0) { + dir->flags |= BFS_DIR_EOF; + } + } + + return 1; +#else // !BFS_USE_GETDENTS + if (dir->de) { + return 1; + } else if (dir->flags & BFS_DIR_EOF) { + return 0; + } + + errno = 0; + dir->de = readdir(dir->dir); + if (dir->de) { + return 1; + } else if (errno == 0) { + dir->flags |= BFS_DIR_EOF; + return 0; + } else { + return -1; + } +#endif +} + +/** Read a single directory entry. */ +static int bfs_getdent(struct bfs_dir *dir, const sys_dirent **de) { + int ret = bfs_polldir(dir); + if (ret > 0) { +#if BFS_USE_GETDENTS + char *buf = (char *)(dir + 1); + *de = (const sys_dirent *)(buf + dir->pos); + dir->pos += (*de)->d_reclen; +#else + *de = dir->de; + dir->de = NULL; +#endif + } + return ret; +} + +/** Skip ".", "..", and deleted/empty dirents. */ +static int bfs_skipdent(struct bfs_dir *dir, const sys_dirent *de) { +#if BFS_USE_GETDENTS +# if __FreeBSD__ + // Union mounts on FreeBSD have to be de-duplicated in userspace + if (dir->flags & BFS_DIR_UNION) { + struct trie_leaf *leaf = trie_insert_str(&dir->trie, de->d_name); + if (!leaf) { + return -1; + } else if (leaf->value) { + return 1; + } else { + leaf->value = leaf; + } + } + + // NFS mounts on FreeBSD can return empty dirents with inode number 0 + if (de->d_ino == 0) { + return 1; + } +# endif + +# ifdef DT_WHT + if (de->d_type == DT_WHT && !(dir->flags & BFS_DIR_WHITEOUTS)) { + return 1; + } +# endif +#endif // BFS_USE_GETDENTS + + const char *name = de->d_name; + return name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); +} + +/** Convert de->d_type to a bfs_type, if it exists. */ +static enum bfs_type bfs_d_type(const sys_dirent *de) { +#ifdef DTTOIF + return bfs_mode_to_type(DTTOIF(de->d_type)); +#else + return BFS_UNKNOWN; +#endif +} + +int bfs_readdir(struct bfs_dir *dir, struct bfs_dirent *de) { + while (true) { + const sys_dirent *sysde; + int ret = bfs_getdent(dir, &sysde); + if (ret <= 0) { + return ret; + } + + int skip = bfs_skipdent(dir, sysde); + if (skip < 0) { + return skip; + } else if (skip) { + continue; + } + + if (de) { + de->type = bfs_d_type(sysde); + de->name = sysde->d_name; + } + + return 1; + } +} + +static void bfs_destroydir(struct bfs_dir *dir) { +#if BFS_USE_GETDENTS && __FreeBSD__ + if (dir->flags & BFS_DIR_UNION) { + trie_destroy(&dir->trie); + } +#endif + + sanitize_uninit(dir, DIR_SIZE); +} + +int bfs_closedir(struct bfs_dir *dir) { +#if BFS_USE_GETDENTS + int ret = xclose(dir->fd); +#else + int ret = closedir(dir->dir); + if (ret != 0) { + bfs_verify(errno != EBADF); + } +#endif + + bfs_destroydir(dir); + return ret; +} + +#if BFS_USE_UNWRAPDIR +int bfs_unwrapdir(struct bfs_dir *dir) { +#if BFS_USE_GETDENTS + int ret = dir->fd; +#elif BFS_HAS_FDCLOSEDIR + int ret = fdclosedir(dir->dir); +#endif + + bfs_destroydir(dir); + return ret; +} +#endif diff --git a/src/dir.h b/src/dir.h new file mode 100644 index 0000000..bbba071 --- /dev/null +++ b/src/dir.h @@ -0,0 +1,175 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Directories and their contents. + */ + +#ifndef BFS_DIR_H +#define BFS_DIR_H + +#include "prelude.h" +#include <sys/types.h> + +/** + * Whether the implementation uses the getdents() syscall directly, rather than + * libc's readdir(). + */ +#ifndef BFS_USE_GETDENTS +# if BFS_HAS_POSIX_GETDENTS +# define BFS_USE_GETDENTS true +# elif __linux__ || __FreeBSD__ +# define BFS_USE_GETDENTS (BFS_HAS_GETDENTS || BFS_HAS_GETDENTS64 | BFS_HAS_GETDENTS64_SYSCALL) +# endif +#endif + +/** + * A directory. + */ +struct bfs_dir; + +/** + * File types. + */ +enum bfs_type { + /** An error occurred for this file. */ + BFS_ERROR = -1, + /** Unknown type. */ + BFS_UNKNOWN, + /** Block device. */ + BFS_BLK, + /** Character device. */ + BFS_CHR, + /** Directory. */ + BFS_DIR, + /** Solaris door. */ + BFS_DOOR, + /** Pipe. */ + BFS_FIFO, + /** Symbolic link. */ + BFS_LNK, + /** Solaris event port. */ + BFS_PORT, + /** Regular file. */ + BFS_REG, + /** Socket. */ + BFS_SOCK, + /** BSD whiteout. */ + BFS_WHT, +}; + +/** + * Convert a bfs_stat() mode to a bfs_type. + */ +enum bfs_type bfs_mode_to_type(mode_t mode); + +/** + * A directory entry. + */ +struct bfs_dirent { + /** The type of this file (possibly unknown). */ + enum bfs_type type; + /** The name of this file. */ + const char *name; +}; + +/** + * Allocate space for a directory. + * + * @return + * An allocated, unopen directory, or NULL on failure. + */ +struct bfs_dir *bfs_allocdir(void); + +struct arena; + +/** + * Initialize an arena for directories. + * + * @param arena + * The arena to initialize. + */ +void bfs_dir_arena(struct arena *arena); + +/** + * bfs_opendir() flags. + */ +enum bfs_dir_flags { + /** Include whiteouts in the results. */ + BFS_DIR_WHITEOUTS = 1 << 0, + /** @internal Start of private flags. */ + BFS_DIR_PRIVATE = 1 << 1, +}; + +/** + * Open a directory. + * + * @param dir + * The allocated directory. + * @param at_fd + * The base directory for path resolution. + * @param at_path + * The path of the directory to open, relative to at_fd. Pass NULL to + * open at_fd itself. + * @param flags + * Flags that control which directory entries are listed. + * @return + * 0 on success, or -1 on failure. + */ +int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path, enum bfs_dir_flags flags); + +/** + * Get the file descriptor for a directory. + */ +int bfs_dirfd(const struct bfs_dir *dir); + +/** + * Performs any I/O necessary for the next bfs_readdir() call. + * + * @param dir + * The directory to poll. + * @return + * 1 on success, 0 on EOF, or -1 on failure. + */ +int bfs_polldir(struct bfs_dir *dir); + +/** + * Read a directory entry. + * + * @param dir + * The directory to read. + * @param[out] dirent + * The directory entry to populate. + * @return + * 1 on success, 0 on EOF, or -1 on failure. + */ +int bfs_readdir(struct bfs_dir *dir, struct bfs_dirent *de); + +/** + * Close a directory. + * + * @return + * 0 on success, -1 on failure. + */ +int bfs_closedir(struct bfs_dir *dir); + +/** + * Whether the bfs_unwrapdir() function is supported. + */ +#ifndef BFS_USE_UNWRAPDIR +# define BFS_USE_UNWRAPDIR (BFS_USE_GETDENTS || BFS_HAS_FDCLOSEDIR) +#endif + +#if BFS_USE_UNWRAPDIR +/** + * Detach the file descriptor from an open directory. + * + * @param dir + * The directory to detach. + * @return + * The file descriptor of the directory. + */ +int bfs_unwrapdir(struct bfs_dir *dir); +#endif + +#endif // BFS_DIR_H diff --git a/src/dstring.c b/src/dstring.c new file mode 100644 index 0000000..86ab646 --- /dev/null +++ b/src/dstring.c @@ -0,0 +1,279 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "dstring.h" +#include "alloc.h" +#include "bit.h" +#include "diag.h" +#include <stdarg.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/** + * The memory representation of a dynamic string. Users get a pointer to str. + */ +struct dstring { + /** Capacity of the string, *including* the terminating NUL. */ + size_t cap; + /** Length of the string, *excluding* the terminating NUL. */ + size_t len; + /** The string itself. */ + alignas(dchar) char str[]; +}; + +#define DSTR_OFFSET offsetof(struct dstring, str) + +/** Back up to the header from a pointer to dstring::str. */ +static struct dstring *dstrheader(const dchar *dstr) { + return (struct dstring *)(dstr - DSTR_OFFSET); +} + +/** + * In some provenance models, the expression `header->str` has its provenance + * restricted to just the `str` field itself, making a future dstrheader() + * illegal. This alternative is guaranteed to preserve provenance for the entire + * allocation. + * + * - https://stackoverflow.com/q/25296019 + * - https://mastodon.social/@void_friend@tech.lgbt/111144859908104311 + */ +static dchar *dstrdata(struct dstring *header) { + return (char *)header + DSTR_OFFSET; +} + +/** Allocate a dstring with the given contents. */ +static dchar *dstralloc_impl(size_t cap, size_t len, const char *str) { + // Avoid reallocations for small strings + if (cap < DSTR_OFFSET) { + cap = DSTR_OFFSET; + } + + struct dstring *header = ALLOC_FLEX(struct dstring, str, cap); + if (!header) { + return NULL; + } + + header->cap = cap; + header->len = len; + + char *ret = dstrdata(header); + memcpy(ret, str, len); + ret[len] = '\0'; + return ret; +} + +dchar *dstralloc(size_t cap) { + return dstralloc_impl(cap + 1, 0, ""); +} + +dchar *dstrdup(const char *str) { + return dstrxdup(str, strlen(str)); +} + +dchar *dstrndup(const char *str, size_t n) { + return dstrxdup(str, strnlen(str, n)); +} + +dchar *dstrddup(const dchar *dstr) { + return dstrxdup(dstr, dstrlen(dstr)); +} + +dchar *dstrxdup(const char *str, size_t len) { + return dstralloc_impl(len + 1, len, str); +} + +size_t dstrlen(const dchar *dstr) { + return dstrheader(dstr)->len; +} + +int dstreserve(dchar **dstr, size_t cap) { + if (!*dstr) { + *dstr = dstralloc(cap); + return *dstr ? 0 : -1; + } + + struct dstring *header = dstrheader(*dstr); + size_t old_cap = header->cap; + size_t new_cap = cap + 1; // Terminating NUL + if (old_cap >= new_cap) { + return 0; + } + + new_cap = bit_ceil(new_cap); + header = REALLOC_FLEX(struct dstring, str, header, old_cap, new_cap); + if (!header) { + return -1; + } + + header->cap = new_cap; + *dstr = dstrdata(header); + return 0; +} + +int dstresize(dchar **dstr, size_t len) { + if (dstreserve(dstr, len) != 0) { + return -1; + } + + struct dstring *header = dstrheader(*dstr); + header->len = len; + header->str[len] = '\0'; + return 0; +} + +int dstrcat(dchar **dest, const char *src) { + return dstrxcat(dest, src, strlen(src)); +} + +int dstrncat(dchar **dest, const char *src, size_t n) { + return dstrxcat(dest, src, strnlen(src, n)); +} + +int dstrdcat(dchar **dest, const dchar *src) { + return dstrxcat(dest, src, dstrlen(src)); +} + +int dstrxcat(dchar **dest, const char *src, size_t len) { + size_t oldlen = dstrlen(*dest); + size_t newlen = oldlen + len; + + if (dstresize(dest, newlen) != 0) { + return -1; + } + + memcpy(*dest + oldlen, src, len); + return 0; +} + +int dstrapp(dchar **str, char c) { + return dstrxcat(str, &c, 1); +} + +int dstrcpy(dchar **dest, const char *src) { + return dstrxcpy(dest, src, strlen(src)); +} + +int dstrncpy(dchar **dest, const char *src, size_t n) { + return dstrxcpy(dest, src, strnlen(src, n)); +} + +int dstrdcpy(dchar **dest, const dchar *src) { + return dstrxcpy(dest, src, dstrlen(src)); +} + +int dstrxcpy(dchar **dest, const char *src, size_t len) { + if (dstresize(dest, len) != 0) { + return -1; + } + + memcpy(*dest, src, len); + return 0; +} + +dchar *dstrprintf(const char *format, ...) { + va_list args; + + va_start(args, format); + dchar *str = dstrvprintf(format, args); + va_end(args); + + return str; +} + +dchar *dstrvprintf(const char *format, va_list args) { + // Guess a capacity to try to avoid reallocating + dchar *str = dstralloc(2 * strlen(format)); + if (!str) { + return NULL; + } + + if (dstrvcatf(&str, format, args) != 0) { + dstrfree(str); + return NULL; + } + + return str; +} + +int dstrcatf(dchar **str, const char *format, ...) { + va_list args; + + va_start(args, format); + int ret = dstrvcatf(str, format, args); + va_end(args); + + return ret; +} + +int dstrvcatf(dchar **str, const char *format, va_list args) { + // Guess a capacity to try to avoid calling vsnprintf() twice + size_t len = dstrlen(*str); + dstreserve(str, len + 2 * strlen(format)); + size_t cap = dstrheader(*str)->cap; + + va_list copy; + va_copy(copy, args); + + char *tail = *str + len; + size_t tail_cap = cap - len; + int ret = vsnprintf(tail, tail_cap, format, args); + if (ret < 0) { + goto fail; + } + + size_t tail_len = ret; + if (tail_len >= tail_cap) { + if (dstreserve(str, len + tail_len) != 0) { + goto fail; + } + + tail = *str + len; + ret = vsnprintf(tail, tail_len + 1, format, copy); + if (ret < 0 || (size_t)ret != tail_len) { + bfs_bug("Length of formatted string changed"); + goto fail; + } + } + + va_end(copy); + + dstrheader(*str)->len += tail_len; + return 0; + +fail: + va_end(copy); + *tail = '\0'; + return -1; +} + +int dstrescat(dchar **dest, const char *str, enum wesc_flags flags) { + return dstrnescat(dest, str, SIZE_MAX, flags); +} + +int dstrnescat(dchar **dest, const char *str, size_t n, enum wesc_flags flags) { + size_t len = *dest ? dstrlen(*dest) : 0; + + // Worst case growth is `ccc...` => $'\xCC\xCC\xCC...' + n = strnlen(str, n); + size_t cap = len + 4 * n + 3; + if (dstreserve(dest, cap) != 0) { + return -1; + } + + char *cur = *dest + len; + char *end = *dest + cap + 1; + cur = wordnesc(cur, end, str, n, flags); + bfs_assert(cur != end, "wordesc() result truncated"); + + return dstresize(dest, cur - *dest); +} + +void dstrfree(dchar *dstr) { + if (dstr) { + free(dstrheader(dstr)); + } +} diff --git a/src/dstring.h b/src/dstring.h new file mode 100644 index 0000000..14e1d3e --- /dev/null +++ b/src/dstring.h @@ -0,0 +1,322 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * A dynamic string library. + */ + +#ifndef BFS_DSTRING_H +#define BFS_DSTRING_H + +#include "prelude.h" +#include "bfstd.h" +#include <stdarg.h> +#include <stddef.h> + +/** Marker type for dynamic strings. */ +#if BFS_LINT && __clang__ +// Abuse __attribute__(aligned) to make a type that allows +// +// dchar * -> char * +// +// conversions, but warns (with Clang's -Walign-mismatch) on +// +// char * -> dchar * +typedef __attribute__((aligned(alignof(size_t)))) char dchar; +#else +typedef char dchar; +#endif + +/** + * Free a dynamic string. + * + * @param dstr + * The string to free. + */ +void dstrfree(dchar *dstr); + +/** + * Allocate a dynamic string. + * + * @param cap + * The initial capacity of the string. + */ +attr(malloc(dstrfree, 1)) +dchar *dstralloc(size_t cap); + +/** + * Create a dynamic copy of a string. + * + * @param str + * The NUL-terminated string to copy. + */ +attr(malloc(dstrfree, 1)) +dchar *dstrdup(const char *str); + +/** + * Create a length-limited dynamic copy of a string. + * + * @param str + * The string to copy. + * @param n + * The maximum number of characters to copy from str. + */ +attr(malloc(dstrfree, 1)) +dchar *dstrndup(const char *str, size_t n); + +/** + * Create a dynamic copy of a dynamic string. + * + * @param dstr + * The dynamic string to copy. + */ +attr(malloc(dstrfree, 1)) +dchar *dstrddup(const dchar *dstr); + +/** + * Create an exact-sized dynamic copy of a string. + * + * @param str + * The string to copy. + * @param len + * The length of the string, which may include internal NUL bytes. + */ +attr(malloc(dstrfree, 1)) +dchar *dstrxdup(const char *str, size_t len); + +/** + * Get a dynamic string's length. + * + * @param dstr + * The string to measure. + * @return + * The length of dstr. + */ +size_t dstrlen(const dchar *dstr); + +/** + * Reserve some capacity in a dynamic string. + * + * @param dstr + * The dynamic string to preallocate. + * @param cap + * The new capacity for the string. + * @return + * 0 on success, -1 on failure. + */ +int dstreserve(dchar **dstr, size_t cap); + +/** + * Resize a dynamic string. + * + * @param dstr + * The dynamic string to resize. + * @param len + * The new length for the dynamic string. + * @return + * 0 on success, -1 on failure. + */ +int dstresize(dchar **dstr, size_t len); + +/** + * Append to a dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The string to append. + * @return 0 on success, -1 on failure. + */ +int dstrcat(dchar **dest, const char *src); + +/** + * Append to a dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The string to append. + * @param n + * The maximum number of characters to take from src. + * @return + * 0 on success, -1 on failure. + */ +int dstrncat(dchar **dest, const char *src, size_t n); + +/** + * Append a dynamic string to another dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The dynamic string to append. + * @return + * 0 on success, -1 on failure. + */ +int dstrdcat(dchar **dest, const dchar *src); + +/** + * Append to a dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The string to append. + * @param len + * The exact number of characters to take from src. + * @return + * 0 on success, -1 on failure. + */ +int dstrxcat(dchar **dest, const char *src, size_t len); + +/** + * Append a single character to a dynamic string. + * + * @param str + * The string to append to. + * @param c + * The character to append. + * @return + * 0 on success, -1 on failure. + */ +int dstrapp(dchar **str, char c); + +/** + * Copy a string into a dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The string to copy. + * @returns + * 0 on success, -1 on failure. + */ +int dstrcpy(dchar **dest, const char *str); + +/** + * Copy a dynamic string into another one. + * + * @param dest + * The destination dynamic string. + * @param src + * The dynamic string to copy. + * @returns + * 0 on success, -1 on failure. + */ +int dstrdcpy(dchar **dest, const dchar *str); + +/** + * Copy a string into a dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The dynamic string to copy. + * @param n + * The maximum number of characters to take from src. + * @returns + * 0 on success, -1 on failure. + */ +int dstrncpy(dchar **dest, const char *str, size_t n); + +/** + * Copy a string into a dynamic string. + * + * @param dest + * The destination dynamic string. + * @param src + * The dynamic string to copy. + * @param len + * The exact number of characters to take from src. + * @returns + * 0 on success, -1 on failure. + */ +int dstrxcpy(dchar **dest, const char *str, size_t len); + +/** + * Create a dynamic string from a format string. + * + * @param format + * The format string to fill in. + * @param ... + * Any arguments for the format string. + * @return + * The created string, or NULL on failure. + */ +attr(printf(1, 2)) +dchar *dstrprintf(const char *format, ...); + +/** + * Create a dynamic string from a format string and a va_list. + * + * @param format + * The format string to fill in. + * @param args + * The arguments for the format string. + * @return + * The created string, or NULL on failure. + */ +attr(printf(1, 0)) +dchar *dstrvprintf(const char *format, va_list args); + +/** + * Format some text onto the end of a dynamic string. + * + * @param str + * The destination dynamic string. + * @param format + * The format string to fill in. + * @param ... + * Any arguments for the format string. + * @return + * 0 on success, -1 on failure. + */ +attr(printf(2, 3)) +int dstrcatf(dchar **str, const char *format, ...); + +/** + * Format some text from a va_list onto the end of a dynamic string. + * + * @param str + * The destination dynamic string. + * @param format + * The format string to fill in. + * @param args + * The arguments for the format string. + * @return + * 0 on success, -1 on failure. + */ +attr(printf(2, 0)) +int dstrvcatf(dchar **str, const char *format, va_list args); + +/** + * Concatenate while shell-escaping. + * + * @param dest + * The destination dynamic string. + * @param str + * The string to escape. + * @param flags + * Flags for wordesc(). + * @return + * 0 on success, -1 on failure. + */ +int dstrescat(dchar **dest, const char *str, enum wesc_flags flags); + +/** + * Concatenate while shell-escaping. + * + * @param dest + * The destination dynamic string. + * @param str + * The string to escape. + * @param n + * The maximum length of the string. + * @param flags + * Flags for wordesc(). + * @return + * 0 on success, -1 on failure. + */ +int dstrnescat(dchar **dest, const char *str, size_t n, enum wesc_flags flags); + +#endif // BFS_DSTRING_H diff --git a/src/eval.c b/src/eval.c new file mode 100644 index 0000000..4fcda60 --- /dev/null +++ b/src/eval.c @@ -0,0 +1,1696 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Implementation of all the primary expressions. + */ + +#include "prelude.h" +#include "eval.h" +#include "bar.h" +#include "bfstd.h" +#include "bftw.h" +#include "color.h" +#include "ctx.h" +#include "diag.h" +#include "dir.h" +#include "dstring.h" +#include "exec.h" +#include "expr.h" +#include "fsade.h" +#include "mtab.h" +#include "printf.h" +#include "pwcache.h" +#include "sanity.h" +#include "stat.h" +#include "trie.h" +#include "xregex.h" +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <grp.h> +#include <pwd.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> +#include <wchar.h> + +struct bfs_eval { + /** Data about the current file. */ + const struct BFTW *ftwbuf; + /** The bfs context. */ + const struct bfs_ctx *ctx; + /** The bftw() callback return value. */ + enum bftw_action action; + /** The bfs_eval() return value. */ + int *ret; + /** Whether to quit immediately. */ + bool quit; +}; + +/** + * Print an error message. + */ +attr(printf(2, 3)) +static void eval_error(struct bfs_eval *state, const char *format, ...) { + // By POSIX, any errors should be accompanied by a non-zero exit status + *state->ret = EXIT_FAILURE; + + int error = errno; + const struct bfs_ctx *ctx = state->ctx; + CFILE *cerr = ctx->cerr; + + bfs_error(ctx, "%pP: ", state->ftwbuf); + + va_list args; + va_start(args, format); + errno = error; + cvfprintf(cerr, format, args); + va_end(args); +} + +/** + * Check if an error should be ignored. + */ +static bool eval_should_ignore(const struct bfs_eval *state, int error) { + return state->ctx->ignore_races + && error_is_like(error, ENOENT) + && state->ftwbuf->depth > 0; +} + +/** + * Report an error that occurs during evaluation. + */ +static void eval_report_error(struct bfs_eval *state) { + if (!eval_should_ignore(state, errno)) { + eval_error(state, "%m.\n"); + } +} + +/** + * Report an I/O error that occurs during evaluation. + */ +static void eval_io_error(const struct bfs_expr *expr, struct bfs_eval *state) { + if (expr->path) { + eval_error(state, "'%s': %m.\n", expr->path); + } else { + eval_error(state, "(standard output): %m.\n"); + } + + // Don't report the error again in bfs_ctx_free() + clearerr(expr->cfile->file); +} + +/** + * Perform a bfs_stat() call if necessary. + */ +static const struct bfs_stat *eval_stat(struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + const struct bfs_stat *ret = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!ret) { + eval_report_error(state); + } + return ret; +} + +/** + * Get the difference (in seconds) between two struct timespecs. + */ +static time_t timespec_diff(const struct timespec *lhs, const struct timespec *rhs) { + time_t ret = lhs->tv_sec - rhs->tv_sec; + if (lhs->tv_nsec < rhs->tv_nsec) { + --ret; + } + return ret; +} + +bool bfs_expr_cmp(const struct bfs_expr *expr, long long n) { + switch (expr->int_cmp) { + case BFS_INT_EQUAL: + return n == expr->num; + case BFS_INT_LESS: + return n < expr->num; + case BFS_INT_GREATER: + return n > expr->num; + } + + bfs_bug("Invalid comparison mode"); + return false; +} + +/** Common code for fnmatch() tests. */ +static bool eval_fnmatch(const struct bfs_expr *expr, const char *str) { + if (expr->literal) { +#ifdef FNM_CASEFOLD + if (expr->fnm_flags & FNM_CASEFOLD) { + return strcasecmp(expr->pattern, str) == 0; + } +#endif + return strcmp(expr->pattern, str) == 0; + } else { + return fnmatch(expr->pattern, str, expr->fnm_flags) == 0; + } +} + +/** + * -true test. + */ +bool eval_true(const struct bfs_expr *expr, struct bfs_eval *state) { + return true; +} + +/** + * -false test. + */ +bool eval_false(const struct bfs_expr *expr, struct bfs_eval *state) { + return false; +} + +/** + * -executable, -readable, -writable tests. + */ +bool eval_access(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + return xfaccessat(ftwbuf->at_fd, ftwbuf->at_path, expr->num) == 0; +} + +/** + * -acl test. + */ +bool eval_acl(const struct bfs_expr *expr, struct bfs_eval *state) { + int ret = bfs_check_acl(state->ftwbuf); + if (ret >= 0) { + return ret; + } else { + eval_report_error(state); + return false; + } +} + +/** + * -capable test. + */ +bool eval_capable(const struct bfs_expr *expr, struct bfs_eval *state) { + int ret = bfs_check_capabilities(state->ftwbuf); + if (ret >= 0) { + return ret; + } else { + eval_report_error(state); + return false; + } +} + +/** + * -context test. + */ +bool eval_context(const struct bfs_expr *expr, struct bfs_eval *state) { + char *con = bfs_getfilecon(state->ftwbuf); + if (!con) { + eval_report_error(state); + return false; + } + + bool ret = eval_fnmatch(expr, con); + bfs_freecon(con); + return ret; +} + +/** + * Get the given timespec field out of a stat buffer. + */ +static const struct timespec *eval_stat_time(const struct bfs_stat *statbuf, enum bfs_stat_field field, struct bfs_eval *state) { + const struct timespec *ret = bfs_stat_time(statbuf, field); + if (!ret) { + eval_error(state, "Couldn't get file %s: %m.\n", bfs_stat_field_name(field)); + } + return ret; +} + +/** + * -[aBcm]?newer tests. + */ +bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct timespec *time = eval_stat_time(statbuf, expr->stat_field, state); + if (!time) { + return false; + } + + return time->tv_sec > expr->reftime.tv_sec + || (time->tv_sec == expr->reftime.tv_sec && time->tv_nsec > expr->reftime.tv_nsec); +} + +/** + * -[aBcm]{min,time} tests. + */ +bool eval_time(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct timespec *time = eval_stat_time(statbuf, expr->stat_field, state); + if (!time) { + return false; + } + + time_t diff = timespec_diff(&expr->reftime, time); + switch (expr->time_unit) { + case BFS_DAYS: + diff /= 60 * 24; + fallthru; + case BFS_MINUTES: + diff /= 60; + fallthru; + case BFS_SECONDS: + break; + } + + return bfs_expr_cmp(expr, diff); +} + +/** + * -used test. + */ +bool eval_used(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct timespec *atime = eval_stat_time(statbuf, BFS_STAT_ATIME, state); + const struct timespec *ctime = eval_stat_time(statbuf, BFS_STAT_CTIME, state); + if (!atime || !ctime) { + return false; + } + + long long diff = timespec_diff(atime, ctime); + if (diff < 0) { + return false; + } + + long long day_seconds = 60 * 60 * 24; + diff = (diff + day_seconds - 1) / day_seconds; + return bfs_expr_cmp(expr, diff); +} + +/** + * -gid test. + */ +bool eval_gid(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return bfs_expr_cmp(expr, statbuf->gid); +} + +/** + * -uid test. + */ +bool eval_uid(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return bfs_expr_cmp(expr, statbuf->uid); +} + +/** + * -nogroup test. + */ +bool eval_nogroup(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct group *grp = bfs_getgrgid(state->ctx->groups, statbuf->gid); + if (errno != 0) { + eval_report_error(state); + } + return grp == NULL; +} + +/** + * -nouser test. + */ +bool eval_nouser(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct passwd *pwd = bfs_getpwuid(state->ctx->users, statbuf->uid); + if (errno != 0) { + eval_report_error(state); + } + return pwd == NULL; +} + +/** + * -delete action. + */ +bool eval_delete(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + + // Don't try to delete the current directory + if (strcmp(ftwbuf->path, ".") == 0) { + return true; + } + + int flag = 0; + + // We need to know the actual type of the path, not what it points to + enum bfs_type type = bftw_type(ftwbuf, BFS_STAT_NOFOLLOW); + if (type == BFS_DIR) { + flag |= AT_REMOVEDIR; + } else if (type == BFS_ERROR) { + eval_report_error(state); + return false; + } + + if (unlinkat(ftwbuf->at_fd, ftwbuf->at_path, flag) != 0) { + eval_report_error(state); + return false; + } + + return true; +} + +/** Finish any pending -exec ... + operations. */ +static int eval_exec_finish(const struct bfs_expr *expr, const struct bfs_ctx *ctx) { + int ret = 0; + + if (expr->eval_fn == eval_exec) { + if (bfs_exec_finish(expr->exec) != 0) { + if (errno != 0) { + bfs_error(ctx, "%s %s: %m.\n", expr->argv[0], expr->argv[1]); + } + ret = -1; + } + } + + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + if (eval_exec_finish(child, ctx) != 0) { + ret = -1; + } + } + + return ret; +} + +/** + * -exec[dir]/-ok[dir] actions. + */ +bool eval_exec(const struct bfs_expr *expr, struct bfs_eval *state) { + bool ret = bfs_exec(expr->exec, state->ftwbuf) == 0; + if (errno != 0) { + eval_error(state, "%s %s: %m.\n", expr->argv[0], expr->argv[1]); + } + return ret; +} + +/** + * -exit action. + */ +bool eval_exit(const struct bfs_expr *expr, struct bfs_eval *state) { + state->action = BFTW_STOP; + *state->ret = expr->num; + state->quit = true; + return true; +} + +/** + * -depth N test. + */ +bool eval_depth(const struct bfs_expr *expr, struct bfs_eval *state) { + return bfs_expr_cmp(expr, state->ftwbuf->depth); +} + +/** + * -empty test. + */ +bool eval_empty(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + const struct bfs_stat *statbuf; + struct bfs_dir *dir; + + switch (ftwbuf->type) { + case BFS_REG: + statbuf = eval_stat(state); + return statbuf && statbuf->size == 0; + + case BFS_DIR: + dir = bfs_allocdir(); + if (!dir) { + goto error; + } + + if (bfs_opendir(dir, ftwbuf->at_fd, ftwbuf->at_path, 0) != 0) { + goto error; + } + + int did_read = bfs_readdir(dir, NULL); + bfs_closedir(dir); + + if (did_read < 0) { + goto error; + } + + free(dir); + return did_read == 0; + error: + eval_report_error(state); + free(dir); + return false; + + default: + return false; + } +} + +/** + * -flags test. + */ +bool eval_flags(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + if (!(statbuf->mask & BFS_STAT_ATTRS)) { + eval_error(state, "Couldn't get file %s.\n", bfs_stat_field_name(BFS_STAT_ATTRS)); + return false; + } + + unsigned long flags = statbuf->attrs; + unsigned long set = expr->set_flags; + unsigned long clear = expr->clear_flags; + + switch (expr->flags_cmp) { + case BFS_MODE_EQUAL: + return flags == set && !(flags & clear); + + case BFS_MODE_ALL: + return (flags & set) == set && !(flags & clear); + + case BFS_MODE_ANY: + return (flags & set) || (flags & clear) != clear; + } + + bfs_bug("Invalid comparison mode"); + return false; +} + +/** + * -fstype test. + */ +bool eval_fstype(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + const struct bfs_mtab *mtab = bfs_ctx_mtab(state->ctx); + if (!mtab) { + eval_report_error(state); + return false; + } + + const char *type = bfs_fstype(mtab, statbuf); + if (!type) { + eval_report_error(state); + return false; + } + + return strcmp(type, expr->argv[1]) == 0; +} + +/** + * -hidden test. + */ +bool eval_hidden(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + const char *name = ftwbuf->path + ftwbuf->nameoff; + + // Don't treat "." or ".." as hidden directories. Otherwise we'd filter + // out everything when given + // + // $ bfs . -nohidden + // $ bfs .. -nohidden + return name[0] == '.' && strcmp(name, ".") != 0 && strcmp(name, "..") != 0; +} + +/** + * -inum test. + */ +bool eval_inum(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return bfs_expr_cmp(expr, statbuf->ino); +} + +/** + * -links test. + */ +bool eval_links(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return bfs_expr_cmp(expr, statbuf->nlink); +} + +/** + * -i?lname test. + */ +bool eval_lname(const struct bfs_expr *expr, struct bfs_eval *state) { + bool ret = false; + char *name = NULL; + + const struct BFTW *ftwbuf = state->ftwbuf; + if (ftwbuf->type != BFS_LNK) { + goto done; + } + + const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW); + size_t len = statbuf ? statbuf->size : 0; + + name = xreadlinkat(ftwbuf->at_fd, ftwbuf->at_path, len); + if (!name) { + eval_report_error(state); + goto done; + } + + ret = eval_fnmatch(expr, name); + +done: + free(name); + return ret; +} + +/** + * -i?name test. + */ +bool eval_name(const struct bfs_expr *expr, struct bfs_eval *state) { + bool ret = false; + const struct BFTW *ftwbuf = state->ftwbuf; + + const char *name = ftwbuf->path + ftwbuf->nameoff; + char *copy = NULL; + if (ftwbuf->depth == 0) { + // Any trailing slashes are not part of the name. This can only + // happen for the root path. + name = copy = xbasename(name); + if (!name) { + eval_report_error(state); + goto done; + } + } + + ret = eval_fnmatch(expr, name); + +done: + free(copy); + return ret; +} + +/** + * -i?path test. + */ +bool eval_path(const struct bfs_expr *expr, struct bfs_eval *state) { + return eval_fnmatch(expr, state->ftwbuf->path); +} + +/** + * -perm test. + */ +bool eval_perm(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + mode_t mode = statbuf->mode; + mode_t target; + if (state->ftwbuf->type == BFS_DIR) { + target = expr->dir_mode; + } else { + target = expr->file_mode; + } + + switch (expr->mode_cmp) { + case BFS_MODE_EQUAL: + return (mode & 07777) == target; + + case BFS_MODE_ALL: + return (mode & target) == target; + + case BFS_MODE_ANY: + return !(mode & target) == !target; + } + + bfs_bug("Invalid comparison mode"); + return false; +} + +/** Print a user/group name/id, and update the column width. */ +static int print_owner(FILE *file, const char *name, uintmax_t id, int *width) { + if (name) { + int len = xstrwidth(name); + if (*width < len) { + *width = len; + } + + return fprintf(file, " %s%*s", name, *width - len, ""); + } else { + int ret = fprintf(file, " %-*ju", *width, id); + if (ret >= 0 && *width < ret - 1) { + *width = ret - 1; + } + return ret; + } +} + +/** + * -f?ls action. + */ +bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state) { + CFILE *cfile = expr->cfile; + FILE *file = cfile->file; + const struct bfs_ctx *ctx = state->ctx; + const struct BFTW *ftwbuf = state->ftwbuf; + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + goto done; + } + + // ls -l prints non-path text in the "normal" color, so do the same + if (cfprintf(cfile, "${no}") < 0) { + goto error; + } + + uintmax_t ino = statbuf->ino; + uintmax_t block_size = ctx->posixly_correct ? 512 : 1024; + uintmax_t blocks = ((uintmax_t)statbuf->blocks * BFS_STAT_BLKSIZE + block_size - 1) / block_size; + char mode[11]; + xstrmode(statbuf->mode, mode); + char acl = bfs_check_acl(ftwbuf) > 0 ? '+' : ' '; + uintmax_t nlink = statbuf->nlink; + if (fprintf(file, "%9ju %6ju %s%c %2ju", ino, blocks, mode, acl, nlink) < 0) { + goto error; + } + + const struct passwd *pwd = bfs_getpwuid(ctx->users, statbuf->uid); + static int uwidth = 8; + if (print_owner(file, pwd ? pwd->pw_name : NULL, statbuf->uid, &uwidth) < 0) { + goto error; + } + + const struct group *grp = bfs_getgrgid(ctx->groups, statbuf->gid); + static int gwidth = 8; + if (print_owner(file, grp ? grp->gr_name : NULL, statbuf->gid, &gwidth) < 0) { + goto error; + } + + if (ftwbuf->type == BFS_BLK || ftwbuf->type == BFS_CHR) { + int ma = xmajor(statbuf->rdev); + int mi = xminor(statbuf->rdev); + if (fprintf(file, " %3d, %3d", ma, mi) < 0) { + goto error; + } + } else { + uintmax_t size = statbuf->size; + if (fprintf(file, " %8ju", size) < 0) { + goto error; + } + } + + time_t time = statbuf->mtime.tv_sec; + time_t now = ctx->now.tv_sec; + time_t six_months_ago = now - 6 * 30 * 24 * 60 * 60; + time_t tomorrow = now + 24 * 60 * 60; + struct tm tm; + if (!localtime_r(&time, &tm)) { + goto error; + } + char time_str[256]; + size_t time_ret; + if (time <= six_months_ago || time >= tomorrow) { + time_ret = strftime(time_str, sizeof(time_str), "%b %e %Y", &tm); + } else { + time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm); + } + if (time_ret == 0) { + errno = EOVERFLOW; + goto error; + } + if (cfprintf(cfile, " %s${rs}", time_str) < 0) { + goto error; + } + + if (cfprintf(cfile, " %pP", ftwbuf) < 0) { + goto error; + } + + if (ftwbuf->type == BFS_LNK) { + if (cfprintf(cfile, " -> %pL", ftwbuf) < 0) { + goto error; + } + } + + if (fputc('\n', file) == EOF) { + goto error; + } + +done: + return true; + +error: + eval_io_error(expr, state); + return true; +} + +/** + * -f?print action. + */ +bool eval_fprint(const struct bfs_expr *expr, struct bfs_eval *state) { + if (cfprintf(expr->cfile, "%pP\n", state->ftwbuf) < 0) { + eval_io_error(expr, state); + } + return true; +} + +/** + * -f?print0 action. + */ +bool eval_fprint0(const struct bfs_expr *expr, struct bfs_eval *state) { + const char *path = state->ftwbuf->path; + size_t length = strlen(path) + 1; + if (fwrite(path, 1, length, expr->cfile->file) != length) { + eval_io_error(expr, state); + } + return true; +} + +/** + * -f?printf action. + */ +bool eval_fprintf(const struct bfs_expr *expr, struct bfs_eval *state) { + if (bfs_printf(expr->cfile, expr->printf, state->ftwbuf) != 0) { + eval_io_error(expr, state); + } + + return true; +} + +/** + * -printx action. + */ +bool eval_fprintx(const struct bfs_expr *expr, struct bfs_eval *state) { + FILE *file = expr->cfile->file; + const char *path = state->ftwbuf->path; + + while (true) { + size_t span = strcspn(path, " \t\n\\$'\"`"); + if (fwrite(path, 1, span, file) != span) { + goto error; + } + path += span; + + char c = path[0]; + if (!c) { + break; + } + + char escaped[] = {'\\', c}; + if (fwrite(escaped, 1, sizeof(escaped), file) != sizeof(escaped)) { + goto error; + } + ++path; + } + + if (fputc('\n', file) == EOF) { + goto error; + } + + return true; + +error: + eval_io_error(expr, state); + return true; +} + +/** + * -limit action. + */ +bool eval_limit(const struct bfs_expr *expr, struct bfs_eval *state) { + long long evals = expr->evaluations + 1; + if (evals >= expr->num) { + state->action = BFTW_STOP; + state->quit = true; + } + + return true; +} + +/** + * -prune action. + */ +bool eval_prune(const struct bfs_expr *expr, struct bfs_eval *state) { + state->action = BFTW_PRUNE; + return true; +} + +/** + * -quit action. + */ +bool eval_quit(const struct bfs_expr *expr, struct bfs_eval *state) { + state->action = BFTW_STOP; + state->quit = true; + return true; +} + +/** + * -i?regex test. + */ +bool eval_regex(const struct bfs_expr *expr, struct bfs_eval *state) { + const char *path = state->ftwbuf->path; + + int ret = bfs_regexec(expr->regex, path, BFS_REGEX_ANCHOR); + if (ret < 0) { + char *str = bfs_regerror(expr->regex); + if (str) { + eval_error(state, "%s.\n", str); + free(str); + } else { + eval_error(state, "bfs_regerror(): %m.\n"); + } + } + + return ret > 0; +} + +/** + * -samefile test. + */ +bool eval_samefile(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + return statbuf->dev == expr->dev && statbuf->ino == expr->ino; +} + +/** + * -size test. + */ +bool eval_size(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + static const off_t scales[] = { + [BFS_BLOCKS] = 512, + [BFS_BYTES] = 1, + [BFS_WORDS] = 2, + [BFS_KB] = 1LL << 10, + [BFS_MB] = 1LL << 20, + [BFS_GB] = 1LL << 30, + [BFS_TB] = 1LL << 40, + [BFS_PB] = 1LL << 50, + }; + + off_t scale = scales[expr->size_unit]; + off_t size = (statbuf->size + scale - 1) / scale; // Round up + return bfs_expr_cmp(expr, size); +} + +/** + * -sparse test. + */ +bool eval_sparse(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + blkcnt_t expected = (statbuf->size + BFS_STAT_BLKSIZE - 1) / BFS_STAT_BLKSIZE; + return statbuf->blocks < expected; +} + +/** + * -type test. + */ +bool eval_type(const struct bfs_expr *expr, struct bfs_eval *state) { + return (1 << state->ftwbuf->type) & expr->num; +} + +/** + * -xattr test. + */ +bool eval_xattr(const struct bfs_expr *expr, struct bfs_eval *state) { + int ret = bfs_check_xattrs(state->ftwbuf); + if (ret >= 0) { + return ret; + } else { + eval_report_error(state); + return false; + } +} + +/** + * -xattrname test. + */ +bool eval_xattrname(const struct bfs_expr *expr, struct bfs_eval *state) { + int ret = bfs_check_xattr_named(state->ftwbuf, expr->argv[1]); + if (ret >= 0) { + return ret; + } else { + eval_report_error(state); + return false; + } +} + +/** + * -xtype test. + */ +bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state) { + const struct BFTW *ftwbuf = state->ftwbuf; + enum bfs_stat_flags flags = ftwbuf->stat_flags ^ (BFS_STAT_NOFOLLOW | BFS_STAT_TRYFOLLOW); + enum bfs_type type = bftw_type(ftwbuf, flags); + if (type == BFS_ERROR) { + eval_report_error(state); + return false; + } else { + return (1 << type) & expr->num; + } +} + +#if _POSIX_MONOTONIC_CLOCK > 0 +# define BFS_CLOCK CLOCK_MONOTONIC +#elif _POSIX_TIMERS > 0 +# define BFS_CLOCK CLOCK_REALTIME +#endif + +/** + * Call clock_gettime(), if available. + */ +static int eval_gettime(struct bfs_eval *state, struct timespec *ts) { +#ifdef BFS_CLOCK + int ret = clock_gettime(BFS_CLOCK, ts); + if (ret != 0) { + bfs_warning(state->ctx, "%pP: clock_gettime(): %m.\n", state->ftwbuf); + } + return ret; +#else + return -1; +#endif +} + +/** + * Record an elapsed time. + */ +static void timespec_elapsed(struct timespec *elapsed, const struct timespec *start, const struct timespec *end) { + elapsed->tv_sec += end->tv_sec - start->tv_sec; + elapsed->tv_nsec += end->tv_nsec - start->tv_nsec; + if (elapsed->tv_nsec < 0) { + elapsed->tv_nsec += 1000000000L; + --elapsed->tv_sec; + } else if (elapsed->tv_nsec >= 1000000000L) { + elapsed->tv_nsec -= 1000000000L; + ++elapsed->tv_sec; + } +} + +/** + * Evaluate an expression. + */ +static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) { + struct timespec start, end; + bool time = state->ctx->debug & DEBUG_RATES; + if (time) { + if (eval_gettime(state, &start) != 0) { + time = false; + } + } + + bfs_assert(!state->quit); + + bool ret = expr->eval_fn(expr, state); + + if (time) { + if (eval_gettime(state, &end) == 0) { + timespec_elapsed(&expr->elapsed, &start, &end); + } + } + + ++expr->evaluations; + if (ret) { + ++expr->successes; + } + + if (bfs_expr_never_returns(expr)) { + bfs_assert(state->quit); + } else if (!state->quit) { + bfs_assert(!expr->always_true || ret); + bfs_assert(!expr->always_false || !ret); + } + + return ret; +} + +/** + * Evaluate a negation. + */ +bool eval_not(const struct bfs_expr *expr, struct bfs_eval *state) { + return !eval_expr(bfs_expr_children(expr), state); +} + +/** + * Evaluate a conjunction. + */ +bool eval_and(const struct bfs_expr *expr, struct bfs_eval *state) { + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + if (!eval_expr(child, state) || state->quit) { + return false; + } + } + + return true; +} + +/** + * Evaluate a disjunction. + */ +bool eval_or(const struct bfs_expr *expr, struct bfs_eval *state) { + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + if (eval_expr(child, state) || state->quit) { + return true; + } + } + + return false; +} + +/** + * Evaluate the comma operator. + */ +bool eval_comma(const struct bfs_expr *expr, struct bfs_eval *state) { + bool ret uninit(false); + + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + ret = eval_expr(child, state); + if (state->quit) { + break; + } + } + + return ret; +} + +/** Update the status bar. */ +static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct timespec *last_status, size_t count) { + struct timespec now; + if (eval_gettime(state, &now) == 0) { + struct timespec elapsed = {0}; + timespec_elapsed(&elapsed, last_status, &now); + + // Update every 0.1s + if (elapsed.tv_sec > 0 || elapsed.tv_nsec >= 100000000L) { + *last_status = now; + } else { + return; + } + } + + size_t width = bfs_bar_width(bar); + if (width < 3) { + return; + } + + const struct BFTW *ftwbuf = state->ftwbuf; + + dchar *status = NULL; + dchar *rhs = dstrprintf(" (visited: %'zu; depth: %2zu)", count, ftwbuf->depth); + if (!rhs) { + return; + } + + size_t rhslen = xstrwidth(rhs); + if (3 + rhslen > width) { + dstresize(&rhs, 0); + rhslen = 0; + } + + status = dstralloc(0); + if (!status) { + goto out; + } + + const char *path = ftwbuf->path; + size_t pathlen = ftwbuf->nameoff; + if (ftwbuf->depth == 0) { + pathlen = strlen(path); + } + + // Escape weird filename characters + if (dstrnescat(&status, path, pathlen, WESC_TTY) != 0) { + goto out; + } + pathlen = dstrlen(status); + + // Try to make sure even wide characters fit in the status bar + size_t pathmax = width - rhslen - 3; + size_t pathwidth = 0; + size_t lhslen = 0; + mbstate_t mb = {0}; + for (size_t i = lhslen; lhslen < pathlen; lhslen = i) { + wint_t wc = xmbrtowc(status, &i, pathlen, &mb); + int cwidth; + if (wc == WEOF) { + // Invalid byte sequence, assume a single-width '?' + cwidth = 1; + } else { + cwidth = xwcwidth(wc); + if (cwidth < 0) { + cwidth = 0; + } + } + + if (pathwidth + cwidth > pathmax) { + break; + } + pathwidth += cwidth; + } + dstresize(&status, lhslen); + + if (dstrcat(&status, "...") != 0) { + goto out; + } + + while (pathwidth < pathmax) { + if (dstrapp(&status, ' ') != 0) { + goto out; + } + ++pathwidth; + } + + if (dstrdcat(&status, rhs) != 0) { + goto out; + } + + bfs_bar_update(bar, status); + +out: + dstrfree(status); + dstrfree(rhs); +} + +/** Check if we've seen a file before. */ +static bool eval_file_unique(struct bfs_eval *state, struct trie *seen) { + const struct bfs_stat *statbuf = eval_stat(state); + if (!statbuf) { + return false; + } + + bfs_file_id id; + bfs_stat_id(statbuf, &id); + + struct trie_leaf *leaf = trie_insert_mem(seen, id, sizeof(id)); + if (!leaf) { + eval_report_error(state); + return false; + } + + if (leaf->value) { + state->action = BFTW_PRUNE; + return false; + } else { + leaf->value = leaf; + return true; + } +} + +#define DEBUG_FLAG(flags, flag) \ + do { \ + if ((flags & flag) || flags == flag) { \ + fputs(#flag, stderr); \ + flags ^= flag; \ + if (flags) { \ + fputs(" | ", stderr); \ + } \ + } \ + } while (0) + +/** + * Log a stat() call. + */ +static void debug_stat(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf, enum bfs_stat_flags flags, int err) { + bfs_debug_prefix(ctx, DEBUG_STAT); + + fprintf(stderr, "bfs_stat("); + if (ftwbuf->at_fd == (int)AT_FDCWD) { + fprintf(stderr, "AT_FDCWD"); + } else { + size_t baselen = strlen(ftwbuf->path) - strlen(ftwbuf->at_path); + fprintf(stderr, "\""); + fwrite(ftwbuf->path, 1, baselen, stderr); + fprintf(stderr, "\""); + } + + fprintf(stderr, ", \"%s\", ", ftwbuf->at_path); + + DEBUG_FLAG(flags, BFS_STAT_FOLLOW); + DEBUG_FLAG(flags, BFS_STAT_NOFOLLOW); + DEBUG_FLAG(flags, BFS_STAT_TRYFOLLOW); + DEBUG_FLAG(flags, BFS_STAT_NOSYNC); + + fprintf(stderr, ") == %d", err ? 0 : -1); + + if (err) { + fprintf(stderr, " [%d]", err); + } + + fprintf(stderr, "\n"); +} + +/** + * Log any stat() calls that happened. + */ +static void debug_stats(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf) { + if (!(ctx->debug & DEBUG_STAT)) { + return; + } + + const struct bftw_stat *bufs = &ftwbuf->stat_bufs; + + if (bufs->stat_err >= 0) { + debug_stat(ctx, ftwbuf, BFS_STAT_FOLLOW, bufs->stat_err); + } + + if (bufs->lstat_err >= 0) { + debug_stat(ctx, ftwbuf, BFS_STAT_NOFOLLOW, bufs->lstat_err); + } +} + +#define DUMP_MAP(value) [value] = #value + +/** + * Dump the bfs_type for -D search. + */ +static const char *dump_bfs_type(enum bfs_type type) { + static const char *types[] = { + DUMP_MAP(BFS_UNKNOWN), + DUMP_MAP(BFS_BLK), + DUMP_MAP(BFS_CHR), + DUMP_MAP(BFS_DIR), + DUMP_MAP(BFS_DOOR), + DUMP_MAP(BFS_FIFO), + DUMP_MAP(BFS_LNK), + DUMP_MAP(BFS_PORT), + DUMP_MAP(BFS_REG), + DUMP_MAP(BFS_SOCK), + DUMP_MAP(BFS_WHT), + }; + + if (type == BFS_ERROR) { + return "BFS_ERROR"; + } else { + return types[type]; + } +} + +/** + * Dump the bftw_visit for -D search. + */ +static const char *dump_bftw_visit(enum bftw_visit visit) { + static const char *visits[] = { + DUMP_MAP(BFTW_PRE), + DUMP_MAP(BFTW_POST), + }; + return visits[visit]; +} + +/** + * Dump the bftw_action for -D search. + */ +static const char *dump_bftw_action(enum bftw_action action) { + static const char *actions[] = { + DUMP_MAP(BFTW_CONTINUE), + DUMP_MAP(BFTW_PRUNE), + DUMP_MAP(BFTW_STOP), + }; + return actions[action]; +} + +/** + * Type passed as the argument to the bftw() callback. + */ +struct callback_args { + /** The bfs context. */ + const struct bfs_ctx *ctx; + + /** The status bar. */ + struct bfs_bar *bar; + /** The time of the last status update. */ + struct timespec last_status; + /** The number of files visited so far. */ + size_t count; + + /** The set of seen files. */ + struct trie *seen; + + /** Eventual return value from bfs_eval(). */ + int ret; +}; + +/** + * bftw() callback. + */ +static enum bftw_action eval_callback(const struct BFTW *ftwbuf, void *ptr) { + struct callback_args *args = ptr; + ++args->count; + + const struct bfs_ctx *ctx = args->ctx; + + struct bfs_eval state; + state.ftwbuf = ftwbuf; + state.ctx = ctx; + state.action = BFTW_CONTINUE; + state.ret = &args->ret; + state.quit = false; + + if (args->bar) { + eval_status(&state, args->bar, &args->last_status, args->count); + } + + if (ftwbuf->type == BFS_ERROR) { + if (!eval_should_ignore(&state, ftwbuf->error)) { + eval_error(&state, "%s.\n", xstrerror(ftwbuf->error)); + } + state.action = BFTW_PRUNE; + goto done; + } + + if (ctx->unique && ftwbuf->visit == BFTW_PRE) { + if (!eval_file_unique(&state, args->seen)) { + goto done; + } + } + + if (eval_expr(ctx->exclude, &state)) { + state.action = BFTW_PRUNE; + goto done; + } + + if (ctx->xargs_safe && strpbrk(ftwbuf->path, " \t\n\'\"\\")) { + eval_error(&state, "Path is not safe for xargs.\n"); + state.action = BFTW_PRUNE; + goto done; + } + + if (ctx->maxdepth < 0 || ftwbuf->depth >= (size_t)ctx->maxdepth) { + state.action = BFTW_PRUNE; + } + + // In -depth mode, only handle directories on the BFTW_POST visit + enum bftw_visit expected_visit = BFTW_PRE; + if ((ctx->flags & BFTW_POST_ORDER) + && (ctx->strategy == BFTW_IDS || ftwbuf->type == BFS_DIR) + && ftwbuf->depth < (size_t)ctx->maxdepth) { + expected_visit = BFTW_POST; + } + + if (ftwbuf->visit == expected_visit + && ftwbuf->depth >= (size_t)ctx->mindepth + && ftwbuf->depth <= (size_t)ctx->maxdepth) { + eval_expr(ctx->expr, &state); + } + +done: + debug_stats(ctx, ftwbuf); + + if (bfs_debug(ctx, DEBUG_SEARCH, "eval_callback({\n")) { + fprintf(stderr, "\t.path = \"%s\",\n", ftwbuf->path); + fprintf(stderr, "\t.root = \"%s\",\n", ftwbuf->root); + fprintf(stderr, "\t.depth = %zu,\n", ftwbuf->depth); + fprintf(stderr, "\t.visit = %s,\n", dump_bftw_visit(ftwbuf->visit)); + fprintf(stderr, "\t.type = %s,\n", dump_bfs_type(ftwbuf->type)); + fprintf(stderr, "\t.error = %d,\n", ftwbuf->error); + fprintf(stderr, "}) == %s\n", dump_bftw_action(state.action)); + } + + return state.action; +} + +/** Raise RLIMIT_NOFILE if possible, and return the new limit. */ +static int raise_fdlimit(struct bfs_ctx *ctx) { + rlim_t cur = ctx->orig_nofile.rlim_cur; + rlim_t max = ctx->orig_nofile.rlim_max; + + rlim_t target = 64 << 10; + if (rlim_cmp(target, max) > 0) { + target = max; + } + + if (rlim_cmp(target, cur) <= 0) { + return target; + } + + const struct rlimit rl = { + .rlim_cur = target, + .rlim_max = max, + }; + + if (setrlimit(RLIMIT_NOFILE, &rl) != 0) { + return cur; + } + + ctx->cur_nofile = rl; + return target; +} + +/** Preallocate the fd table in the kernel. */ +static void reserve_fds(int limit) { + // Kernels typically implement the fd table as a dynamic array. + // Growing the array can be expensive, especially if files are being + // opened in parallel. We can work around this by allocating the + // highest possible fd, forcing the kernel to grow the table upfront. + +#ifdef F_DUPFD_CLOEXEC + int fd = fcntl(STDIN_FILENO, F_DUPFD_CLOEXEC, limit - 1); +#else + int fd = fcntl(STDIN_FILENO, F_DUPFD, limit - 1); +#endif + if (fd >= 0) { + xclose(fd); + } +} + +/** Infer the number of file descriptors available to bftw(). */ +static int infer_fdlimit(const struct bfs_ctx *ctx, int limit) { + // 3 for std{in,out,err} + int nopen = 3 + ctx->nfiles; + + // Check /proc/self/fd for the current number of open fds, if possible + // (we may have inherited more than just the standard ones) + struct bfs_dir *dir = bfs_allocdir(); + if (!dir) { + goto done; + } + + if (bfs_opendir(dir, AT_FDCWD, "/proc/self/fd", 0) != 0 + && bfs_opendir(dir, AT_FDCWD, "/dev/fd", 0) != 0) { + goto done; + } + + // Account for 'dir' itself + nopen = -1; + + while (bfs_readdir(dir, NULL) > 0) { + ++nopen; + } + bfs_closedir(dir); +done: + free(dir); + + int ret = limit - nopen; + ret -= ctx->expr->persistent_fds; + ret -= ctx->expr->ephemeral_fds; + + // bftw() needs at least 2 available fds + if (ret < 2) { + ret = 2; + } + + return ret; +} + +/** + * Dump the bftw() flags for -D search. + */ +static void dump_bftw_flags(enum bftw_flags flags) { + DEBUG_FLAG(flags, 0); + DEBUG_FLAG(flags, BFTW_STAT); + DEBUG_FLAG(flags, BFTW_RECOVER); + DEBUG_FLAG(flags, BFTW_POST_ORDER); + DEBUG_FLAG(flags, BFTW_FOLLOW_ROOTS); + DEBUG_FLAG(flags, BFTW_FOLLOW_ALL); + DEBUG_FLAG(flags, BFTW_DETECT_CYCLES); + DEBUG_FLAG(flags, BFTW_SKIP_MOUNTS); + DEBUG_FLAG(flags, BFTW_PRUNE_MOUNTS); + DEBUG_FLAG(flags, BFTW_SORT); + DEBUG_FLAG(flags, BFTW_BUFFER); + DEBUG_FLAG(flags, BFTW_WHITEOUTS); + + bfs_assert(flags == 0, "Missing bftw flag 0x%X", flags); +} + +/** + * Dump the bftw_strategy for -D search. + */ +static const char *dump_bftw_strategy(enum bftw_strategy strategy) { + static const char *strategies[] = { + DUMP_MAP(BFTW_BFS), + DUMP_MAP(BFTW_DFS), + DUMP_MAP(BFTW_IDS), + DUMP_MAP(BFTW_EDS), + }; + return strategies[strategy]; +} + +/** Check if we need to enable BFTW_BUFFER. */ +static bool eval_must_buffer(const struct bfs_expr *expr) { +#if __FreeBSD__ + // FreeBSD doesn't properly handle adding/removing directory entries + // during readdir() on NFS mounts. Work around it by passing BFTW_BUFFER + // whenever we could be mutating the directory ourselves through -delete + // or -exec. We don't attempt to handle concurrent modification by other + // processes, which are racey anyway. + // + // https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=57696 + // https://github.com/tavianator/bfs/issues/67 + + if (expr->eval_fn == eval_delete || expr->eval_fn == eval_exec) { + return true; + } + + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + if (eval_must_buffer(child)) { + return true; + } + } +#endif // __FreeBSD__ + + return false; +} + +int bfs_eval(struct bfs_ctx *ctx) { + if (!ctx->expr) { + return EXIT_SUCCESS; + } + + struct callback_args args = { + .ctx = ctx, + .ret = EXIT_SUCCESS, + }; + + if (ctx->status) { + args.bar = bfs_bar_show(); + if (!args.bar) { + bfs_warning(ctx, "Couldn't show status bar: %m.\n\n"); + } + } + + struct trie seen; + if (ctx->unique) { + trie_init(&seen); + args.seen = &seen; + } + + int fdlimit = raise_fdlimit(ctx); + reserve_fds(fdlimit); + fdlimit = infer_fdlimit(ctx, fdlimit); + + // -1 for the main thread + int nthreads = ctx->threads - 1; + + struct bftw_args bftw_args = { + .paths = ctx->paths, + .npaths = ctx->npaths, + .callback = eval_callback, + .ptr = &args, + .nopenfd = fdlimit, + .nthreads = nthreads, + .flags = ctx->flags, + .strategy = ctx->strategy, + .mtab = bfs_ctx_mtab(ctx), + }; + + if (eval_must_buffer(ctx->expr)) { + bftw_args.flags |= BFTW_BUFFER; + } + + if (bfs_debug(ctx, DEBUG_SEARCH, "bftw({\n")) { + fprintf(stderr, "\t.paths = {\n"); + for (size_t i = 0; i < bftw_args.npaths; ++i) { + fprintf(stderr, "\t\t\"%s\",\n", bftw_args.paths[i]); + } + fprintf(stderr, "\t},\n"); + fprintf(stderr, "\t.npaths = %zu,\n", bftw_args.npaths); + fprintf(stderr, "\t.callback = eval_callback,\n"); + fprintf(stderr, "\t.ptr = &args,\n"); + fprintf(stderr, "\t.nopenfd = %d,\n", bftw_args.nopenfd); + fprintf(stderr, "\t.nthreads = %d,\n", bftw_args.nthreads); + fprintf(stderr, "\t.flags = "); + dump_bftw_flags(bftw_args.flags); + fprintf(stderr, ",\n\t.strategy = %s,\n", dump_bftw_strategy(bftw_args.strategy)); + fprintf(stderr, "\t.mtab = "); + if (bftw_args.mtab) { + fprintf(stderr, "ctx->mtab"); + } else { + fprintf(stderr, "NULL"); + } + fprintf(stderr, ",\n})\n"); + } + + if (bftw(&bftw_args) != 0) { + args.ret = EXIT_FAILURE; + bfs_perror(ctx, "bftw()"); + } + + if (eval_exec_finish(ctx->expr, ctx) != 0) { + args.ret = EXIT_FAILURE; + } + + bfs_ctx_dump(ctx, DEBUG_RATES); + + if (ctx->unique) { + trie_destroy(&seen); + } + + bfs_bar_hide(args.bar); + + return args.ret; +} diff --git a/src/eval.h b/src/eval.h new file mode 100644 index 0000000..4dd7996 --- /dev/null +++ b/src/eval.h @@ -0,0 +1,102 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * The evaluation functions that implement primary expressions like -name, + * -print, etc. + */ + +#ifndef BFS_EVAL_H +#define BFS_EVAL_H + +#include "prelude.h" + +struct bfs_ctx; +struct bfs_expr; + +/** + * Ephemeral state for evaluating an expression. + */ +struct bfs_eval; + +/** + * Expression evaluation function. + * + * @param expr + * The current expression. + * @param state + * The current evaluation state. + * @return + * The result of the test. + */ +typedef bool bfs_eval_fn(const struct bfs_expr *expr, struct bfs_eval *state); + +/** + * Evaluate the command line. + * + * @param ctx + * The bfs context to evaluate. + * @return + * EXIT_SUCCESS on success, otherwise on failure. + */ +int bfs_eval(struct bfs_ctx *ctx); + +// Predicate evaluation functions + +bool eval_true(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_false(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_access(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_acl(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_capable(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_context(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_perm(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_xattr(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_xattrname(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_time(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_used(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_gid(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_uid(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_nogroup(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_nouser(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_depth(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_empty(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_flags(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fstype(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_hidden(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_inum(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_links(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_samefile(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_size(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_sparse(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_type(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_lname(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_name(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_path(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_regex(const struct bfs_expr *expr, struct bfs_eval *state); + +bool eval_delete(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_exec(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_exit(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fprint(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fprint0(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fprintf(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_fprintx(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_limit(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_prune(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_quit(const struct bfs_expr *expr, struct bfs_eval *state); + +// Operator evaluation functions +bool eval_not(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_and(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_or(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_comma(const struct bfs_expr *expr, struct bfs_eval *state); + +#endif // BFS_EVAL_H diff --git a/src/exec.c b/src/exec.c new file mode 100644 index 0000000..cd73d6c --- /dev/null +++ b/src/exec.c @@ -0,0 +1,690 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "exec.h" +#include "alloc.h" +#include "bfstd.h" +#include "bftw.h" +#include "color.h" +#include "ctx.h" +#include "diag.h" +#include "dstring.h" +#include "xspawn.h" +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/wait.h> +#include <unistd.h> + +/** Print some debugging info. */ +attr(printf(2, 3)) +static void bfs_exec_debug(const struct bfs_exec *execbuf, const char *format, ...) { + const struct bfs_ctx *ctx = execbuf->ctx; + + if (!bfs_debug(ctx, DEBUG_EXEC, "${blu}")) { + return; + } + + if (execbuf->flags & BFS_EXEC_CONFIRM) { + fputs("-ok", stderr); + } else { + fputs("-exec", stderr); + } + if (execbuf->flags & BFS_EXEC_CHDIR) { + fputs("dir", stderr); + } + cfprintf(ctx->cerr, "${rs}: "); + + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); +} + +/** Determine the size of a single argument, for comparison to arg_max. */ +static size_t bfs_exec_arg_size(const char *arg) { + return sizeof(arg) + strlen(arg) + 1; +} + +/** Even if we can pass a bigger argument list, cap it here. */ +#define BFS_EXEC_ARG_MAX (16 << 20) + +/** Determine the maximum argv size. */ +static size_t bfs_exec_arg_max(const struct bfs_exec *execbuf) { + long arg_max = xsysconf(_SC_ARG_MAX); + bfs_exec_debug(execbuf, "ARG_MAX: %ld according to sysconf()\n", arg_max); + if (arg_max < 0) { + arg_max = BFS_EXEC_ARG_MAX; + bfs_exec_debug(execbuf, "ARG_MAX: %ld assumed\n", arg_max); + } + + // We have to share space with the environment variables + extern char **environ; + for (char **envp = environ; *envp; ++envp) { + arg_max -= bfs_exec_arg_size(*envp); + } + // Account for the terminating NULL entry + arg_max -= sizeof(char *); + bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after environment variables\n", arg_max); + + // Account for the fixed arguments + for (size_t i = 0; i < execbuf->tmpl_argc - 1; ++i) { + arg_max -= bfs_exec_arg_size(execbuf->tmpl_argv[i]); + } + // Account for the terminating NULL entry + arg_max -= sizeof(char *); + bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after fixed arguments\n", arg_max); + + // Assume arguments are counted with the granularity of a single page, + // so allow a one page cushion to account for rounding up + long page_size = xsysconf(_SC_PAGESIZE); + if (page_size < 4096) { + page_size = 4096; + } + arg_max -= page_size; + bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after page cushion\n", arg_max); + + // POSIX recommends an additional 2048 bytes of headroom + arg_max -= 2048; + bfs_exec_debug(execbuf, "ARG_MAX: %ld remaining after headroom\n", arg_max); + + if (arg_max < 0) { + arg_max = 0; + } else if (arg_max > BFS_EXEC_ARG_MAX) { + arg_max = BFS_EXEC_ARG_MAX; + } + + bfs_exec_debug(execbuf, "ARG_MAX: %ld final value\n", arg_max); + return arg_max; +} + +/** Highlight part of the command line as an error. */ +static void bfs_exec_parse_error(const struct bfs_ctx *ctx, const struct bfs_exec *execbuf) { + char **argv = execbuf->tmpl_argv - 1; + size_t argc = execbuf->tmpl_argc + 1; + if (argv[argc]) { + ++argc; + } + + bool args[ctx->argc]; + for (size_t i = 0; i < ctx->argc; ++i) { + args[i] = false; + } + + size_t i = argv - ctx->argv; + for (size_t j = 0; j < argc; ++j) { + args[i + j] = true; + } + + bfs_argv_error(ctx, args); +} + +struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs_exec_flags flags) { + struct bfs_exec *execbuf = ZALLOC(struct bfs_exec); + if (!execbuf) { + bfs_perror(ctx, "zalloc()"); + goto fail; + } + + execbuf->flags = flags; + execbuf->ctx = ctx; + execbuf->tmpl_argv = argv + 1; + execbuf->wd_fd = -1; + + while (true) { + const char *arg = execbuf->tmpl_argv[execbuf->tmpl_argc]; + if (!arg) { + if (execbuf->flags & BFS_EXEC_CONFIRM) { + bfs_exec_parse_error(ctx, execbuf); + bfs_error(ctx, "Expected '... ;'.\n"); + } else { + bfs_exec_parse_error(ctx, execbuf); + bfs_error(ctx, "Expected '... ;' or '... {} +'.\n"); + } + goto fail; + } else if (strcmp(arg, ";") == 0) { + break; + } else if (execbuf->tmpl_argc > 0 && strcmp(arg, "+") == 0) { + const char *prev = execbuf->tmpl_argv[execbuf->tmpl_argc - 1]; + if (!(execbuf->flags & BFS_EXEC_CONFIRM) && strcmp(prev, "{}") == 0) { + execbuf->flags |= BFS_EXEC_MULTI; + break; + } + } + + ++execbuf->tmpl_argc; + } + + if (execbuf->tmpl_argc == 0) { + bfs_exec_parse_error(ctx, execbuf); + bfs_error(ctx, "Missing command.\n"); + goto fail; + } + + execbuf->argv_cap = execbuf->tmpl_argc + 1; + execbuf->argv = ALLOC_ARRAY(char *, execbuf->argv_cap); + if (!execbuf->argv) { + bfs_perror(ctx, "alloc()"); + goto fail; + } + + if (execbuf->flags & BFS_EXEC_MULTI) { + for (size_t i = 0; i < execbuf->tmpl_argc - 1; ++i) { + char *arg = execbuf->tmpl_argv[i]; + if (strstr(arg, "{}")) { + bfs_exec_parse_error(ctx, execbuf); + bfs_error(ctx, "Only one '{}' is supported.\n"); + goto fail; + } + execbuf->argv[i] = arg; + } + execbuf->argc = execbuf->tmpl_argc - 1; + + execbuf->arg_max = bfs_exec_arg_max(execbuf); + execbuf->arg_min = execbuf->arg_max; + } + + return execbuf; + +fail: + bfs_exec_free(execbuf); + return NULL; +} + +/** Format the current path for use as a command line argument. */ +static char *bfs_exec_format_path(const struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + if (!(execbuf->flags & BFS_EXEC_CHDIR)) { + return strdup(ftwbuf->path); + } + + const char *name = ftwbuf->path + ftwbuf->nameoff; + + if (name[0] == '/') { + // Must be a root path ("/", "//", etc.) + return strdup(name); + } + + // For compatibility with GNU find, use './name' instead of just 'name' + char *path = malloc(2 + strlen(name) + 1); + if (!path) { + return NULL; + } + + char *cur = stpcpy(path, "./"); + cur = stpcpy(cur, name); + return path; +} + +/** Format an argument, expanding "{}" to the current path. */ +static char *bfs_exec_format_arg(char *arg, const char *path) { + char *match = strstr(arg, "{}"); + if (!match) { + return arg; + } + + dchar *ret = dstralloc(0); + if (!ret) { + return NULL; + } + + char *last = arg; + do { + if (dstrncat(&ret, last, match - last) != 0) { + goto err; + } + if (dstrcat(&ret, path) != 0) { + goto err; + } + + last = match + 2; + match = strstr(last, "{}"); + } while (match); + + if (dstrcat(&ret, last) != 0) { + goto err; + } + + return ret; + +err: + dstrfree(ret); + return NULL; +} + +/** Free a formatted argument. */ +static void bfs_exec_free_arg(char *arg, const char *tmpl) { + if (arg != tmpl) { + dstrfree((dchar *)arg); + } +} + +/** Open a file to use as the working directory. */ +static int bfs_exec_openwd(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + bfs_assert(execbuf->wd_fd < 0); + bfs_assert(!execbuf->wd_path); + + if (ftwbuf->at_fd != (int)AT_FDCWD) { + // Rely on at_fd being the immediate parent + bfs_assert(xbaseoff(ftwbuf->at_path) == 0); + + execbuf->wd_fd = ftwbuf->at_fd; + if (!(execbuf->flags & BFS_EXEC_MULTI)) { + return 0; + } + + execbuf->wd_fd = dup_cloexec(execbuf->wd_fd); + if (execbuf->wd_fd < 0) { + return -1; + } + } + + execbuf->wd_len = ftwbuf->nameoff; + if (execbuf->wd_len == 0) { + if (ftwbuf->path[0] == '/') { + ++execbuf->wd_len; + } else { + // The path is something like "foo", so we're already in the right directory + return 0; + } + } + + execbuf->wd_path = strndup(ftwbuf->path, execbuf->wd_len); + if (!execbuf->wd_path) { + return -1; + } + + if (execbuf->wd_fd < 0) { + execbuf->wd_fd = open(execbuf->wd_path, O_RDONLY | O_CLOEXEC | O_DIRECTORY); + } + + if (execbuf->wd_fd < 0) { + return -1; + } + + return 0; +} + +/** Close the working directory. */ +static void bfs_exec_closewd(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + if (execbuf->wd_fd >= 0) { + if (!ftwbuf || execbuf->wd_fd != ftwbuf->at_fd) { + xclose(execbuf->wd_fd); + } + execbuf->wd_fd = -1; + } + + if (execbuf->wd_path) { + free(execbuf->wd_path); + execbuf->wd_path = NULL; + execbuf->wd_len = 0; + } +} + +/** Actually spawn the process. */ +static int bfs_exec_spawn(const struct bfs_exec *execbuf) { + const struct bfs_ctx *ctx = execbuf->ctx; + + // Flush the context state for consistency with the external process + bfs_ctx_flush(ctx); + + if (execbuf->flags & BFS_EXEC_CONFIRM) { + for (size_t i = 0; i < execbuf->argc; ++i) { + if (fprintf(stderr, "%s ", execbuf->argv[i]) < 0) { + return -1; + } + } + if (fprintf(stderr, "? ") < 0) { + return -1; + } + + if (ynprompt() <= 0) { + errno = 0; + return -1; + } + } + + if (execbuf->flags & BFS_EXEC_MULTI) { + bfs_exec_debug(execbuf, "Executing '%s' ... [%zu arguments] (size %zu)\n", + execbuf->argv[0], execbuf->argc - 1, execbuf->arg_size); + } else { + bfs_exec_debug(execbuf, "Executing '%s' ... [%zu arguments]\n", execbuf->argv[0], execbuf->argc - 1); + } + + pid_t pid = -1; + + struct bfs_spawn spawn; + if (bfs_spawn_init(&spawn) != 0) { + return -1; + } + + spawn.flags |= BFS_SPAWN_USE_PATH; + + if (execbuf->wd_fd >= 0) { + if (bfs_spawn_addfchdir(&spawn, execbuf->wd_fd) != 0) { + goto fail; + } + } + + // Reset RLIMIT_NOFILE if necessary, to avoid breaking applications that use select() + if (rlim_cmp(ctx->orig_nofile.rlim_cur, ctx->cur_nofile.rlim_cur) < 0) { + if (bfs_spawn_setrlimit(&spawn, RLIMIT_NOFILE, &ctx->orig_nofile) != 0) { + goto fail; + } + } + + pid = bfs_spawn(execbuf->argv[0], &spawn, execbuf->argv, NULL); + +fail:; + int error = errno; + + bfs_spawn_destroy(&spawn); + if (pid < 0) { + errno = error; + return -1; + } + + int wstatus; + if (xwaitpid(pid, &wstatus, 0) < 0) { + return -1; + } + + int ret = -1; + + if (WIFEXITED(wstatus)) { + int status = WEXITSTATUS(wstatus); + if (status == EXIT_SUCCESS) { + ret = 0; + } else { + bfs_exec_debug(execbuf, "Command '%s' failed with status %d\n", execbuf->argv[0], status); + } + } else if (WIFSIGNALED(wstatus)) { + int sig = WTERMSIG(wstatus); + const char *str = strsignal(sig); + if (!str) { + str = "unknown"; + } + bfs_warning(ctx, "Command '${ex}%s${rs}' terminated by signal %d (%s)\n", execbuf->argv[0], sig, str); + } else { + bfs_warning(ctx, "Command '${ex}%s${rs}' terminated abnormally\n", execbuf->argv[0]); + } + + errno = 0; + return ret; +} + +/** exec() a command for a single file. */ +static int bfs_exec_single(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + int ret = -1, error = 0; + + char *path = bfs_exec_format_path(execbuf, ftwbuf); + if (!path) { + goto out; + } + + size_t i; + for (i = 0; i < execbuf->tmpl_argc; ++i) { + execbuf->argv[i] = bfs_exec_format_arg(execbuf->tmpl_argv[i], path); + if (!execbuf->argv[i]) { + goto out_free; + } + } + execbuf->argv[i] = NULL; + execbuf->argc = i; + + if (execbuf->flags & BFS_EXEC_CHDIR) { + if (bfs_exec_openwd(execbuf, ftwbuf) != 0) { + goto out_free; + } + } + + ret = bfs_exec_spawn(execbuf); + +out_free: + error = errno; + + bfs_exec_closewd(execbuf, ftwbuf); + + for (size_t j = 0; j < i; ++j) { + bfs_exec_free_arg(execbuf->argv[j], execbuf->tmpl_argv[j]); + } + + free(path); + + errno = error; + +out: + return ret; +} + +/** Check if any arguments remain in the buffer. */ +static bool bfs_exec_args_remain(const struct bfs_exec *execbuf) { + return execbuf->argc >= execbuf->tmpl_argc; +} + +/** Compute the current ARG_MAX estimate for binary search. */ +static size_t bfs_exec_estimate_max(const struct bfs_exec *execbuf) { + size_t min = execbuf->arg_min; + size_t max = execbuf->arg_max; + return min + (max - min) / 2; +} + +/** Update the ARG_MAX lower bound from a successful execution. */ +static void bfs_exec_update_min(struct bfs_exec *execbuf) { + if (execbuf->arg_size > execbuf->arg_min) { + execbuf->arg_min = execbuf->arg_size; + + // Don't let min exceed max + if (execbuf->arg_min > execbuf->arg_max) { + execbuf->arg_min = execbuf->arg_max; + } + + size_t estimate = bfs_exec_estimate_max(execbuf); + bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n", + execbuf->arg_min, execbuf->arg_max, estimate); + } +} + +/** Update the ARG_MAX upper bound from a failed execution. */ +static size_t bfs_exec_update_max(struct bfs_exec *execbuf) { + bfs_exec_debug(execbuf, "Got E2BIG, shrinking argument list...\n"); + + size_t size = execbuf->arg_size; + if (size <= execbuf->arg_min) { + // Lower bound was wrong, restart binary search. + execbuf->arg_min = 0; + } + + // Trim a fraction off the max size to avoid repeated failures near the + // top end of the working range + size -= size / 16; + if (size < execbuf->arg_max) { + execbuf->arg_max = size; + + // Don't let min exceed max + if (execbuf->arg_min > execbuf->arg_max) { + execbuf->arg_min = execbuf->arg_max; + } + } + + // Binary search for a more precise bound + size_t estimate = bfs_exec_estimate_max(execbuf); + bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n", + execbuf->arg_min, execbuf->arg_max, estimate); + return estimate; +} + +/** Execute the pending command from a BFS_EXEC_MULTI execbuf. */ +static int bfs_exec_flush(struct bfs_exec *execbuf) { + int ret = 0, error = 0; + + size_t orig_argc = execbuf->argc; + while (bfs_exec_args_remain(execbuf)) { + execbuf->argv[execbuf->argc] = NULL; + ret = bfs_exec_spawn(execbuf); + error = errno; + if (ret == 0) { + bfs_exec_update_min(execbuf); + break; + } else if (error != E2BIG) { + break; + } + + // Try to recover from E2BIG by trying fewer and fewer arguments + // until they fit + size_t new_max = bfs_exec_update_max(execbuf); + while (execbuf->arg_size > new_max) { + execbuf->argv[execbuf->argc] = execbuf->argv[execbuf->argc - 1]; + execbuf->arg_size -= bfs_exec_arg_size(execbuf->argv[execbuf->argc]); + --execbuf->argc; + } + } + + size_t new_argc = execbuf->argc; + for (size_t i = execbuf->tmpl_argc - 1; i < new_argc; ++i) { + free(execbuf->argv[i]); + } + execbuf->argc = execbuf->tmpl_argc - 1; + execbuf->arg_size = 0; + + if (new_argc < orig_argc) { + // If we recovered from E2BIG, there are unused arguments at the + // end of the list + for (size_t i = new_argc + 1; i <= orig_argc; ++i) { + if (error == 0) { + execbuf->argv[execbuf->argc] = execbuf->argv[i]; + execbuf->arg_size += bfs_exec_arg_size(execbuf->argv[execbuf->argc]); + ++execbuf->argc; + } else { + free(execbuf->argv[i]); + } + } + } + + errno = error; + return ret; +} + +/** Check if we need to flush the execbuf because we're changing directories. */ +static bool bfs_exec_changed_dirs(const struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + if (execbuf->flags & BFS_EXEC_CHDIR) { + if (ftwbuf->nameoff > execbuf->wd_len + || (execbuf->wd_path && strncmp(ftwbuf->path, execbuf->wd_path, execbuf->wd_len) != 0)) { + bfs_exec_debug(execbuf, "Changed directories, executing buffered command\n"); + return true; + } + } + + return false; +} + +/** Check if we need to flush the execbuf because we're too big. */ +static bool bfs_exec_would_overflow(const struct bfs_exec *execbuf, const char *arg) { + size_t arg_max = bfs_exec_estimate_max(execbuf); + size_t next_size = execbuf->arg_size + bfs_exec_arg_size(arg); + if (next_size > arg_max) { + bfs_exec_debug(execbuf, "Command size (%zu) would exceed maximum (%zu), executing buffered command\n", + next_size, arg_max); + return true; + } + + return false; +} + +/** Push a new argument to a BFS_EXEC_MULTI execbuf. */ +static int bfs_exec_push(struct bfs_exec *execbuf, char *arg) { + execbuf->argv[execbuf->argc] = arg; + + if (execbuf->argc + 1 >= execbuf->argv_cap) { + size_t cap = 2 * execbuf->argv_cap; + char **argv = REALLOC_ARRAY(char *, execbuf->argv, execbuf->argv_cap, cap); + if (!argv) { + return -1; + } + execbuf->argv = argv; + execbuf->argv_cap = cap; + } + + ++execbuf->argc; + execbuf->arg_size += bfs_exec_arg_size(arg); + return 0; +} + +/** Handle a new path for a BFS_EXEC_MULTI execbuf. */ +static int bfs_exec_multi(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + int ret = 0; + + char *arg = bfs_exec_format_path(execbuf, ftwbuf); + if (!arg) { + ret = -1; + goto out; + } + + if (bfs_exec_changed_dirs(execbuf, ftwbuf)) { + while (bfs_exec_args_remain(execbuf)) { + ret |= bfs_exec_flush(execbuf); + } + bfs_exec_closewd(execbuf, ftwbuf); + } else if (bfs_exec_would_overflow(execbuf, arg)) { + ret |= bfs_exec_flush(execbuf); + } + + if ((execbuf->flags & BFS_EXEC_CHDIR) && execbuf->wd_fd < 0) { + if (bfs_exec_openwd(execbuf, ftwbuf) != 0) { + ret = -1; + goto out_arg; + } + } + + if (bfs_exec_push(execbuf, arg) != 0) { + ret = -1; + goto out_arg; + } + + // arg will get cleaned up later by bfs_exec_flush() + goto out; + +out_arg: + free(arg); +out: + return ret; +} + +int bfs_exec(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) { + if (execbuf->flags & BFS_EXEC_MULTI) { + if (bfs_exec_multi(execbuf, ftwbuf) == 0) { + errno = 0; + } else { + execbuf->ret = -1; + } + // -exec ... + never returns false + return 0; + } else { + return bfs_exec_single(execbuf, ftwbuf); + } +} + +int bfs_exec_finish(struct bfs_exec *execbuf) { + if (execbuf->flags & BFS_EXEC_MULTI) { + bfs_exec_debug(execbuf, "Finishing execution, executing buffered command\n"); + while (bfs_exec_args_remain(execbuf)) { + execbuf->ret |= bfs_exec_flush(execbuf); + } + if (execbuf->ret != 0) { + bfs_exec_debug(execbuf, "One or more executions of '%s' failed\n", execbuf->argv[0]); + } + } + return execbuf->ret; +} + +void bfs_exec_free(struct bfs_exec *execbuf) { + if (execbuf) { + bfs_exec_closewd(execbuf, NULL); + free(execbuf->argv); + free(execbuf); + } +} diff --git a/src/exec.h b/src/exec.h new file mode 100644 index 0000000..9d4192d --- /dev/null +++ b/src/exec.h @@ -0,0 +1,108 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Implementation of -exec/-execdir/-ok/-okdir. + */ + +#ifndef BFS_EXEC_H +#define BFS_EXEC_H + +#include <stddef.h> + +struct BFTW; +struct bfs_ctx; + +/** + * Flags for the -exec actions. + */ +enum bfs_exec_flags { + /** Prompt the user before executing (-ok, -okdir). */ + BFS_EXEC_CONFIRM = 1 << 0, + /** Run the command in the file's parent directory (-execdir, -okdir). */ + BFS_EXEC_CHDIR = 1 << 1, + /** Pass multiple files at once to the command (-exec ... {} +). */ + BFS_EXEC_MULTI = 1 << 2, +}; + +/** + * Buffer for a command line to be executed. + */ +struct bfs_exec { + /** Flags for this exec buffer. */ + enum bfs_exec_flags flags; + + /** The bfs context. */ + const struct bfs_ctx *ctx; + /** Command line template. */ + char **tmpl_argv; + /** Command line template size. */ + size_t tmpl_argc; + + /** The built command line. */ + char **argv; + /** Number of command line arguments. */ + size_t argc; + /** Capacity of argv. */ + size_t argv_cap; + + /** Current size of all arguments. */ + size_t arg_size; + /** Maximum arg_size before E2BIG. */ + size_t arg_max; + /** Lower bound for arg_max. */ + size_t arg_min; + + /** A file descriptor for the working directory, for BFS_EXEC_CHDIR. */ + int wd_fd; + /** The path to the working directory, for BFS_EXEC_CHDIR. */ + char *wd_path; + /** Length of the working directory path. */ + size_t wd_len; + + /** The ultimate return value for bfs_exec_finish(). */ + int ret; +}; + +/** + * Parse an exec action. + * + * @param argv + * The (bfs) command line argument to parse. + * @param flags + * Any flags for this exec action. + * @param ctx + * The bfs context. + * @return + * The parsed exec action, or NULL on failure. + */ +struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs_exec_flags flags); + +/** + * Execute the command for a file. + * + * @param execbuf + * The parsed exec action. + * @param ftwbuf + * The bftw() data for the current file. + * @return 0 if the command succeeded, -1 if it failed. If the command could + * be executed, -1 is returned, and errno will be non-zero. For + * BFS_EXEC_MULTI, errors will not be reported until bfs_exec_finish(). + */ +int bfs_exec(struct bfs_exec *execbuf, const struct BFTW *ftwbuf); + +/** + * Finish executing any commands. + * + * @param execbuf + * The parsed exec action. + * @return 0 on success, -1 if any errors were encountered. + */ +int bfs_exec_finish(struct bfs_exec *execbuf); + +/** + * Free a parsed exec action. + */ +void bfs_exec_free(struct bfs_exec *execbuf); + +#endif // BFS_EXEC_H diff --git a/src/expr.c b/src/expr.c new file mode 100644 index 0000000..5784220 --- /dev/null +++ b/src/expr.c @@ -0,0 +1,85 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "expr.h" +#include "alloc.h" +#include "ctx.h" +#include "diag.h" +#include "eval.h" +#include "exec.h" +#include "list.h" +#include "printf.h" +#include "xregex.h" +#include <string.h> + +struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval_fn, size_t argc, char **argv) { + struct bfs_expr *expr = arena_alloc(&ctx->expr_arena); + if (!expr) { + return NULL; + } + + memset(expr, 0, sizeof(*expr)); + expr->eval_fn = eval_fn; + expr->argc = argc; + expr->argv = argv; + expr->probability = 0.5; + SLIST_PREPEND(&ctx->expr_list, expr, freelist); + + if (bfs_expr_is_parent(expr)) { + SLIST_INIT(&expr->children); + } + + return expr; +} + +bool bfs_expr_is_parent(const struct bfs_expr *expr) { + return expr->eval_fn == eval_and + || expr->eval_fn == eval_or + || expr->eval_fn == eval_not + || expr->eval_fn == eval_comma; +} + +struct bfs_expr *bfs_expr_children(const struct bfs_expr *expr) { + if (bfs_expr_is_parent(expr)) { + return expr->children.head; + } else { + return NULL; + } +} + +void bfs_expr_append(struct bfs_expr *expr, struct bfs_expr *child) { + bfs_assert(bfs_expr_is_parent(expr)); + + SLIST_APPEND(&expr->children, child); + + if (!child->pure) { + expr->pure = false; + } + + expr->persistent_fds += child->persistent_fds; + if (expr->ephemeral_fds < child->ephemeral_fds) { + expr->ephemeral_fds = child->ephemeral_fds; + } +} + +void bfs_expr_extend(struct bfs_expr *expr, struct bfs_exprs *children) { + while (!SLIST_EMPTY(children)) { + struct bfs_expr *child = SLIST_POP(children); + bfs_expr_append(expr, child); + } +} + +bool bfs_expr_never_returns(const struct bfs_expr *expr) { + // Expressions that never return are vacuously both always true and always false + return expr->always_true && expr->always_false; +} + +void bfs_expr_clear(struct bfs_expr *expr) { + if (expr->eval_fn == eval_exec) { + bfs_exec_free(expr->exec); + } else if (expr->eval_fn == eval_fprintf) { + bfs_printf_free(expr->printf); + } else if (expr->eval_fn == eval_regex) { + bfs_regfree(expr->regex); + } +} diff --git a/src/expr.h b/src/expr.h new file mode 100644 index 0000000..7bcace7 --- /dev/null +++ b/src/expr.h @@ -0,0 +1,247 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * The expression tree representation. + */ + +#ifndef BFS_EXPR_H +#define BFS_EXPR_H + +#include "prelude.h" +#include "color.h" +#include "eval.h" +#include "stat.h" +#include <sys/types.h> +#include <time.h> + +/** + * Integer comparison modes. + */ +enum bfs_int_cmp { + /** Exactly N. */ + BFS_INT_EQUAL, + /** Less than N (-N). */ + BFS_INT_LESS, + /** Greater than N (+N). */ + BFS_INT_GREATER, +}; + +/** + * Permission comparison modes. + */ +enum bfs_mode_cmp { + /** Mode is an exact match (MODE). */ + BFS_MODE_EQUAL, + /** Mode has all these bits (-MODE). */ + BFS_MODE_ALL, + /** Mode has any of these bits (/MODE). */ + BFS_MODE_ANY, +}; + +/** + * Possible time units. + */ +enum bfs_time_unit { + /** Seconds. */ + BFS_SECONDS, + /** Minutes. */ + BFS_MINUTES, + /** Days. */ + BFS_DAYS, +}; + +/** + * Possible file size units. + */ +enum bfs_size_unit { + /** 512-byte blocks. */ + BFS_BLOCKS, + /** Single bytes. */ + BFS_BYTES, + /** Two-byte words. */ + BFS_WORDS, + /** Kibibytes. */ + BFS_KB, + /** Mebibytes. */ + BFS_MB, + /** Gibibytes. */ + BFS_GB, + /** Tebibytes. */ + BFS_TB, + /** Pebibytes. */ + BFS_PB, +}; + +/** + * A linked list of expressions. + */ +struct bfs_exprs { + struct bfs_expr *head; + struct bfs_expr **tail; +}; + +/** + * A command line expression. + */ +struct bfs_expr { + /** This expression's next sibling, if any. */ + struct bfs_expr *next; + /** The next allocated expression. */ + struct { struct bfs_expr *next; } freelist; + + /** The function that evaluates this expression. */ + bfs_eval_fn *eval_fn; + + /** The number of command line arguments for this expression. */ + size_t argc; + /** The command line arguments comprising this expression. */ + char **argv; + + /** The number of files this expression keeps open between evaluations. */ + int persistent_fds; + /** The number of files this expression opens during evaluation. */ + int ephemeral_fds; + + /** Whether this expression has no side effects. */ + bool pure; + /** Whether this expression always evaluates to true. */ + bool always_true; + /** Whether this expression always evaluates to false. */ + bool always_false; + /** Whether this expression uses stat(). */ + bool calls_stat; + + /** Estimated cost. */ + float cost; + /** Estimated probability of success. */ + float probability; + /** Number of times this predicate was evaluated. */ + size_t evaluations; + /** Number of times this predicate succeeded. */ + size_t successes; + /** Total time spent running this predicate. */ + struct timespec elapsed; + + /** Auxilliary data for the evaluation function. */ + union { + /** Child expressions. */ + struct bfs_exprs children; + + /** Integer comparisons. */ + struct { + /** Integer for this comparison. */ + long long num; + /** The comparison mode. */ + enum bfs_int_cmp int_cmp; + + /** -size data. */ + enum bfs_size_unit size_unit; + + /** The stat field to look at. */ + enum bfs_stat_field stat_field; + /** The time unit. */ + enum bfs_time_unit time_unit; + /** The reference time. */ + struct timespec reftime; + }; + + /** String comparisons. */ + struct { + /** String pattern. */ + const char *pattern; + /** fnmatch() flags. */ + int fnm_flags; + /** Whether strcmp() can be used instead of fnmatch(). */ + bool literal; + }; + + /** Printing actions. */ + struct { + /** The output stream. */ + CFILE *cfile; + /** Optional file path. */ + const char *path; + /** Optional -printf format. */ + struct bfs_printf *printf; + }; + + /** -exec data. */ + struct bfs_exec *exec; + + /** -flags data. */ + struct { + /** The comparison mode. */ + enum bfs_mode_cmp flags_cmp; + /** Flags that should be set. */ + unsigned long long set_flags; + /** Flags that should be cleared. */ + unsigned long long clear_flags; + }; + + /** -perm data. */ + struct { + /** The comparison mode. */ + enum bfs_mode_cmp mode_cmp; + /** Mode to use for files. */ + mode_t file_mode; + /** Mode to use for directories (different due to X). */ + mode_t dir_mode; + }; + + /** -regex data. */ + struct bfs_regex *regex; + + /** -samefile data. */ + struct { + /** Device number of the target file. */ + dev_t dev; + /** Inode number of the target file. */ + ino_t ino; + }; + }; +}; + +struct bfs_ctx; + +/** + * Create a new expression. + */ +struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval, size_t argc, char **argv); + +/** + * @return Whether this type of expression has children. + */ +bool bfs_expr_is_parent(const struct bfs_expr *expr); + +/** + * @return The first child of this expression, or NULL if it has none. + */ +struct bfs_expr *bfs_expr_children(const struct bfs_expr *expr); + +/** + * Add a child to an expression. + */ +void bfs_expr_append(struct bfs_expr *expr, struct bfs_expr *child); + +/** + * Add a list of children to an expression. + */ +void bfs_expr_extend(struct bfs_expr *expr, struct bfs_exprs *children); + +/** + * @return Whether expr is known to always quit. + */ +bool bfs_expr_never_returns(const struct bfs_expr *expr); + +/** + * @return The result of the integer comparison for this expression. + */ +bool bfs_expr_cmp(const struct bfs_expr *expr, long long n); + +/** + * Free any resources owned by an expression. + */ +void bfs_expr_clear(struct bfs_expr *expr); + +#endif // BFS_EXPR_H diff --git a/src/fsade.c b/src/fsade.c new file mode 100644 index 0000000..7310141 --- /dev/null +++ b/src/fsade.c @@ -0,0 +1,508 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "fsade.h" +#include "atomic.h" +#include "bfstd.h" +#include "bftw.h" +#include "dir.h" +#include "dstring.h" +#include "sanity.h" +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <unistd.h> + +#if BFS_CAN_CHECK_ACL +# include <sys/acl.h> +#endif + +#if BFS_CAN_CHECK_CAPABILITIES +# include <sys/capability.h> +#endif + +#if BFS_CAN_CHECK_CONTEXT +# include <selinux/selinux.h> +#endif + +#if BFS_USE_SYS_EXTATTR_H +# include <sys/extattr.h> +#elif BFS_USE_SYS_XATTR_H +# include <sys/xattr.h> +#endif + +/** + * Many of the APIs used here don't have *at() variants, but we can try to + * emulate something similar if /proc/self/fd is available. + */ +attr(maybe_unused) +static const char *fake_at(const struct BFTW *ftwbuf) { + static atomic int proc_works = -1; + + dchar *path = NULL; + if (ftwbuf->at_fd == (int)AT_FDCWD || load(&proc_works, relaxed) == 0) { + goto fail; + } + + path = dstrprintf("/proc/self/fd/%d/", ftwbuf->at_fd); + if (!path) { + goto fail; + } + + if (load(&proc_works, relaxed) < 0) { + if (xfaccessat(AT_FDCWD, path, F_OK) != 0) { + store(&proc_works, 0, relaxed); + goto fail; + } else { + store(&proc_works, 1, relaxed); + } + } + + if (dstrcat(&path, ftwbuf->at_path) != 0) { + goto fail; + } + + return path; + +fail: + dstrfree(path); + return ftwbuf->path; +} + +attr(maybe_unused) +static void free_fake_at(const struct BFTW *ftwbuf, const char *path) { + if (path != ftwbuf->path) { + dstrfree((dchar *)path); + } +} + +/** + * Check if an error was caused by the absence of support or data for a feature. + */ +attr(maybe_unused) +static bool is_absence_error(int error) { + // If the OS doesn't support the feature, it's obviously not enabled for + // any files + if (error == ENOTSUP) { + return true; + } + + // On Linux, ACLs and capabilities are implemented in terms of extended + // attributes, which report ENODATA/ENOATTR when missing + +#ifdef ENODATA + if (error == ENODATA) { + return true; + } +#endif + +#if defined(ENOATTR) && ENOATTR != ENODATA + if (error == ENOATTR) { + return true; + } +#endif + + // On at least FreeBSD and macOS, EINVAL is returned when the requested + // ACL type is not supported for that file + if (error == EINVAL) { + return true; + } + +#if __APPLE__ + // On macOS, ENOENT can also signal that a file has no ACLs + if (error == ENOENT) { + return true; + } +#endif + + return false; +} + +#if BFS_CAN_CHECK_ACL + +#if BFS_HAS_ACL_GET_FILE + +/** Unified interface for incompatible acl_get_entry() implementations. */ +static int bfs_acl_entry(acl_t acl, int which, acl_entry_t *entry) { +#if BFS_HAS_ACL_GET_ENTRY + int ret = acl_get_entry(acl, which, entry); +# if __APPLE__ + // POSIX.1e specifies a return value of 1 for success, but macOS returns 0 instead + return !ret; +# else + return ret; +# endif +#elif __DragonFly__ +# if !defined(ACL_FIRST_ENTRY) && !defined(ACL_NEXT_ENTRY) +# define ACL_FIRST_ENTRY 0 +# define ACL_NEXT_ENTRY 1 +# endif + + switch (which) { + case ACL_FIRST_ENTRY: + *entry = &acl->acl_entry[0]; + break; + case ACL_NEXT_ENTRY: + ++*entry; + break; + default: + errno = EINVAL; + return -1; + } + + acl_entry_t last = &acl->acl_entry[acl->acl_cnt]; + return *entry == last; +#else + errno = ENOTSUP; + return -1; +#endif +} + +/** Unified interface for acl_get_tag_type(). */ +attr(maybe_unused) +static int bfs_acl_tag_type(acl_entry_t entry, acl_tag_t *tag) { +#if BFS_HAS_ACL_GET_TAG_TYPE + return acl_get_tag_type(entry, tag); +#elif __DragonFly__ + *tag = entry->ae_tag; + return 0; +#else + errno = ENOTSUP; + return -1; +#endif +} + +/** Check if a POSIX.1e ACL is non-trivial. */ +static int bfs_check_posix1e_acl(acl_t acl, bool ignore_required) { + int ret = 0; + + acl_entry_t entry; + for (int status = bfs_acl_entry(acl, ACL_FIRST_ENTRY, &entry); + status > 0; + status = bfs_acl_entry(acl, ACL_NEXT_ENTRY, &entry)) + { +#if defined(ACL_USER_OBJ) && defined(ACL_GROUP_OBJ) && defined(ACL_OTHER) + if (ignore_required) { + acl_tag_t tag; + if (bfs_acl_tag_type(entry, &tag) != 0) { + ret = -1; + continue; + } + if (tag == ACL_USER_OBJ || tag == ACL_GROUP_OBJ || tag == ACL_OTHER) { + continue; + } + } +#endif + + ret = 1; + break; + } + + return ret; +} + +/** Check if an ACL of the given type is non-trivial. */ +static int bfs_check_acl_type(acl_t acl, acl_type_t type) { + if (type == ACL_TYPE_DEFAULT) { + // For directory default ACLs, any entries make them non-trivial + return bfs_check_posix1e_acl(acl, false); + } + +#if BFS_HAS_ACL_IS_TRIVIAL_NP + int trivial; + int ret = acl_is_trivial_np(acl, &trivial); + + // msan seems to be missing an interceptor for acl_is_trivial_np() + sanitize_init(&trivial); + + if (ret < 0) { + return -1; + } else if (trivial) { + return 0; + } else { + return 1; + } +#else + return bfs_check_posix1e_acl(acl, true); +#endif +} + +#endif // BFS_HAS_ACL_GET_FILE + +int bfs_check_acl(const struct BFTW *ftwbuf) { + if (ftwbuf->type == BFS_LNK) { + return 0; + } + + const char *path = fake_at(ftwbuf); + +#if BFS_HAS_ACL_TRIVIAL + int ret = acl_trivial(path); + int error = errno; +#elif BFS_HAS_ACL_GET_FILE + static const acl_type_t acl_types[] = { +# if __APPLE__ + // macOS gives EINVAL for either of the two standard ACL types, + // supporting only ACL_TYPE_EXTENDED + ACL_TYPE_EXTENDED, +# else + // The two standard POSIX.1e ACL types + ACL_TYPE_ACCESS, + ACL_TYPE_DEFAULT, +# endif + +# ifdef ACL_TYPE_NFS4 + ACL_TYPE_NFS4, +# endif + }; + + int ret = -1, error = 0; + for (size_t i = 0; i < countof(acl_types) && ret <= 0; ++i) { + acl_type_t type = acl_types[i]; + + if (type == ACL_TYPE_DEFAULT && ftwbuf->type != BFS_DIR) { + // ACL_TYPE_DEFAULT is supported only for directories, + // otherwise acl_get_file() gives EACCESS + continue; + } + + acl_t acl = acl_get_file(path, type); + if (!acl) { + error = errno; + if (is_absence_error(error)) { + ret = 0; + } + continue; + } + + ret = bfs_check_acl_type(acl, type); + error = errno; + acl_free(acl); + } +#endif + + free_fake_at(ftwbuf, path); + errno = error; + return ret; +} + +#else // !BFS_CAN_CHECK_ACL + +int bfs_check_acl(const struct BFTW *ftwbuf) { + errno = ENOTSUP; + return -1; +} + +#endif + +#if BFS_CAN_CHECK_CAPABILITIES + +int bfs_check_capabilities(const struct BFTW *ftwbuf) { + if (ftwbuf->type == BFS_LNK) { + return 0; + } + + int ret = -1, error; + const char *path = fake_at(ftwbuf); + + cap_t caps = cap_get_file(path); + if (!caps) { + error = errno; + if (is_absence_error(error)) { + ret = 0; + } + goto out_path; + } + + // TODO: Any better way to check for a non-empty capability set? + char *text = cap_to_text(caps, NULL); + if (!text) { + error = errno; + goto out_caps; + } + ret = text[0] ? 1 : 0; + + error = errno; + cap_free(text); +out_caps: + cap_free(caps); +out_path: + free_fake_at(ftwbuf, path); + errno = error; + return ret; +} + +#else // !BFS_CAN_CHECK_CAPABILITIES + +int bfs_check_capabilities(const struct BFTW *ftwbuf) { + errno = ENOTSUP; + return -1; +} + +#endif + +#if BFS_CAN_CHECK_XATTRS + +#if BFS_USE_SYS_EXTATTR_H + +/** Wrapper for extattr_list_{file,link}. */ +static ssize_t bfs_extattr_list(const char *path, enum bfs_type type, int namespace) { + if (type == BFS_LNK) { +#if BFS_HAS_EXTATTR_LIST_LINK + return extattr_list_link(path, namespace, NULL, 0); +#elif BFS_HAS_EXTATTR_GET_LINK + return extattr_get_link(path, namespace, "", NULL, 0); +#else + return 0; +#endif + } + +#if BFS_HAS_EXTATTR_LIST_FILE + return extattr_list_file(path, namespace, NULL, 0); +#elif BFS_HAS_EXTATTR_GET_FILE + // From man extattr(2): + // + // In earlier versions of this API, passing an empty string for the + // attribute name to extattr_get_file() would return the list of attributes + // defined for the target object. This interface has been deprecated in + // preference to using the explicit list API, and should not be used. + return extattr_get_file(path, namespace, "", NULL, 0); +#else + return 0; +#endif +} + +/** Wrapper for extattr_get_{file,link}. */ +static ssize_t bfs_extattr_get(const char *path, enum bfs_type type, int namespace, const char *name) { + if (type == BFS_LNK) { +#if BFS_HAS_EXTATTR_GET_LINK + return extattr_get_link(path, namespace, name, NULL, 0); +#else + return 0; +#endif + } + +#if BFS_HAS_EXTATTR_GET_FILE + return extattr_get_file(path, namespace, name, NULL, 0); +#else + return 0; +#endif +} + +#endif // BFS_USE_SYS_EXTATTR_H + +int bfs_check_xattrs(const struct BFTW *ftwbuf) { + const char *path = fake_at(ftwbuf); + ssize_t len; + +#if BFS_USE_SYS_EXTATTR_H + len = bfs_extattr_list(path, ftwbuf->type, EXTATTR_NAMESPACE_SYSTEM); + if (len <= 0) { + len = bfs_extattr_list(path, ftwbuf->type, EXTATTR_NAMESPACE_USER); + } +#elif __APPLE__ + int options = ftwbuf->type == BFS_LNK ? XATTR_NOFOLLOW : 0; + len = listxattr(path, NULL, 0, options); +#else + if (ftwbuf->type == BFS_LNK) { + len = llistxattr(path, NULL, 0); + } else { + len = listxattr(path, NULL, 0); + } +#endif + + int error = errno; + + free_fake_at(ftwbuf, path); + + if (len > 0) { + return 1; + } else if (len == 0 || is_absence_error(error)) { + return 0; + } else if (error == E2BIG) { + return 1; + } else { + errno = error; + return -1; + } +} + +int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name) { + const char *path = fake_at(ftwbuf); + ssize_t len; + +#if BFS_USE_SYS_EXTATTR_H + len = bfs_extattr_get(path, ftwbuf->type, EXTATTR_NAMESPACE_SYSTEM, name); + if (len < 0) { + len = bfs_extattr_get(path, ftwbuf->type, EXTATTR_NAMESPACE_USER, name); + } +#elif __APPLE__ + int options = ftwbuf->type == BFS_LNK ? XATTR_NOFOLLOW : 0; + len = getxattr(path, name, NULL, 0, 0, options); +#else + if (ftwbuf->type == BFS_LNK) { + len = lgetxattr(path, name, NULL, 0); + } else { + len = getxattr(path, name, NULL, 0); + } +#endif + + int error = errno; + + free_fake_at(ftwbuf, path); + + if (len >= 0) { + return 1; + } else if (is_absence_error(error)) { + return 0; + } else if (error == E2BIG) { + return 1; + } else { + errno = error; + return -1; + } +} + +#else // !BFS_CAN_CHECK_XATTRS + +int bfs_check_xattrs(const struct BFTW *ftwbuf) { + errno = ENOTSUP; + return -1; +} + +int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name) { + errno = ENOTSUP; + return -1; +} + +#endif + +char *bfs_getfilecon(const struct BFTW *ftwbuf) { +#if BFS_CAN_CHECK_CONTEXT + const char *path = fake_at(ftwbuf); + + char *con; + int ret; + if (ftwbuf->type == BFS_LNK) { + ret = lgetfilecon(path, &con); + } else { + ret = getfilecon(path, &con); + } + + if (ret >= 0) { + return con; + } else { + return NULL; + } +#else + errno = ENOTSUP; + return NULL; +#endif +} + +void bfs_freecon(char *con) { +#if BFS_CAN_CHECK_CONTEXT + freecon(con); +#endif +} diff --git a/src/fsade.h b/src/fsade.h new file mode 100644 index 0000000..eefef9f --- /dev/null +++ b/src/fsade.h @@ -0,0 +1,81 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * A facade over (file)system features that are (un)implemented differently + * between platforms. + */ + +#ifndef BFS_FSADE_H +#define BFS_FSADE_H + +#include "prelude.h" + +#define BFS_CAN_CHECK_ACL (BFS_HAS_ACL_GET_FILE || BFS_HAS_ACL_TRIVIAL) + +#define BFS_CAN_CHECK_CAPABILITIES BFS_USE_LIBCAP + +#define BFS_CAN_CHECK_CONTEXT BFS_USE_LIBSELINUX + +#define BFS_CAN_CHECK_XATTRS (BFS_USE_SYS_EXTATTR_H || BFS_USE_SYS_XATTR_H) + +struct BFTW; + +/** + * Check if a file has a non-trivial Access Control List. + * + * @param ftwbuf + * The file to check. + * @return + * 1 if it does, 0 if it doesn't, or -1 if an error occurred. + */ +int bfs_check_acl(const struct BFTW *ftwbuf); + +/** + * Check if a file has a non-trivial capability set. + * + * @param ftwbuf + * The file to check. + * @return + * 1 if it does, 0 if it doesn't, or -1 if an error occurred. + */ +int bfs_check_capabilities(const struct BFTW *ftwbuf); + +/** + * Check if a file has any extended attributes set. + * + * @param ftwbuf + * The file to check. + * @return + * 1 if it does, 0 if it doesn't, or -1 if an error occurred. + */ +int bfs_check_xattrs(const struct BFTW *ftwbuf); + +/** + * Check if a file has an extended attribute with the given name. + * + * @param ftwbuf + * The file to check. + * @param name + * The name of the xattr to check. + * @return + * 1 if it does, 0 if it doesn't, or -1 if an error occurred. + */ +int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name); + +/** + * Get a file's SELinux context + * + * @param ftwbuf + * The file to check. + * @return + * The file's SELinux context, or NULL on failure. + */ +char *bfs_getfilecon(const struct BFTW *ftwbuf); + +/** + * Free a bfs_getfilecon() result. + */ +void bfs_freecon(char *con); + +#endif // BFS_FSADE_H diff --git a/src/ioq.c b/src/ioq.c new file mode 100644 index 0000000..43a1b35 --- /dev/null +++ b/src/ioq.c @@ -0,0 +1,1100 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * An asynchronous I/O queue implementation. + * + * struct ioq is composed of two separate queues: + * + * struct ioqq *pending; // Pending I/O requests + * struct ioqq *ready; // Ready I/O responses + * + * Worker threads pop requests from `pending`, execute them, and push them back + * to the `ready` queue. The main thread pushes requests to `pending` and pops + * them from `ready`. + * + * struct ioqq is a blocking MPMC queue (though it could be SPMC/MPSC for + * pending/ready respectively). It is implemented as a circular buffer: + * + * size_t mask; // (1 << N) - 1 + * [padding] + * size_t head; // Writer index + * [padding] + * size_t tail; // Reader index + * [padding] + * ioq_slot slots[1 << N]; // Queue contents + * + * Pushes are implemented with an unconditional + * + * fetch_add(&ioqq->head, 1) + * + * which scales better on many architectures than compare-and-swap (see [1] for + * details). Pops are implemented similarly. Since the fetch-and-adds are + * unconditional, non-blocking readers can get ahead of writers: + * + * Reader Writer + * ──────────────── ────────────────────── + * head: 0 → 1 + * slots[0]: empty + * tail: 0 → 1 + * slots[0]: empty → full + * head: 1 → 2 + * slots[1]: empty! + * + * To avoid this, non-blocking reads (ioqq_pop(ioqq, false)) must mark the slots + * somehow so that writers can skip them: + * + * Reader Writer + * ─────────────────────── ─────────────────────── + * head: 0 → 1 + * slots[0]: empty → skip + * tail: 0 → 1 + * slots[0]: skip → empty + * tail: 1 → 2 + * slots[1]: empty → full + * head: 1 → 2 + * slots[1]: full → empty + * + * As well, a reader might "lap" a writer (or another reader), so slots need to + * count how many times they should be skipped: + * + * Reader Writer + * ────────────────────────── ───────────────────────── + * head: 0 → 1 + * slots[0]: empty → skip(1) + * head: 1 → 2 + * slots[1]: empty → skip(1) + * ... + * head: M → 0 + * slots[M]: empty → skip(1) + * head: 0 → 1 + * slots[0]: skip(1 → 2) + * tail: 0 → 1 + * slots[0]: skip(2 → 1) + * tail: 1 → 2 + * slots[1]: skip(1) → empty + * ... + * tail: M → 0 + * slots[M]: skip(1) → empty + * tail: 0 → 1 + * slots[0]: skip(1) → empty + * tail: 1 → 2 + * slots[1]: empty → full + * head: 1 → 2 + * slots[1]: full → empty + * + * As described in [1], this approach is susceptible to livelock if readers stay + * ahead of writers. This is okay for us because we don't retry failed non- + * blocking reads. + * + * The slot representation uses tag bits to hold either a pointer or skip(N): + * + * IOQ_SKIP (highest bit) IOQ_BLOCKED (lowest bit) + * ↓ ↓ + * 0 0 0 ... 0 0 0 + * └──────────┬──────────┘ + * │ + * value bits + * + * If IOQ_SKIP is unset, the value bits hold a pointer (or zero/NULL for empty). + * If IOQ_SKIP is set, the value bits hold a negative skip count. Writers can + * reduce the skip count by adding 1 to the value bits, and when the count hits + * zero, the carry will automatically clear IOQ_SKIP: + * + * IOQ_SKIP IOQ_BLOCKED + * ↓ ↓ + * 1 1 1 ... 1 0 0 skip(2) + * 1 1 1 ... 1 1 0 skip(1) + * 0 0 0 ... 0 0 0 empty + * + * The IOQ_BLOCKED flag is used to track sleeping waiters, futex-style. To wait + * for a slot to change, waiters call ioq_slot_wait() which sets IOQ_BLOCKED and + * goes to sleep. Whenever a slot is updated, if the old value had IOQ_BLOCKED + * set, ioq_slot_wake() must be called to wake up that waiter. + * + * Blocking/waking uses a pool of monitors (mutex, condition variable pairs). + * Slots are assigned round-robin to a monitor from the pool. + * + * [1]: https://arxiv.org/abs/2201.02179 + */ + +#include "prelude.h" +#include "ioq.h" +#include "alloc.h" +#include "atomic.h" +#include "bfstd.h" +#include "bit.h" +#include "diag.h" +#include "dir.h" +#include "stat.h" +#include "thread.h" +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <stdint.h> +#include <stdlib.h> +#include <sys/stat.h> + +#if BFS_USE_LIBURING +# include <liburing.h> +#endif + +/** + * A monitor for an I/O queue slot. + */ +struct ioq_monitor { + cache_align pthread_mutex_t mutex; + pthread_cond_t cond; +}; + +/** Initialize an ioq_monitor. */ +static int ioq_monitor_init(struct ioq_monitor *monitor) { + if (mutex_init(&monitor->mutex, NULL) != 0) { + return -1; + } + + if (cond_init(&monitor->cond, NULL) != 0) { + mutex_destroy(&monitor->mutex); + return -1; + } + + return 0; +} + +/** Destroy an ioq_monitor. */ +static void ioq_monitor_destroy(struct ioq_monitor *monitor) { + cond_destroy(&monitor->cond); + mutex_destroy(&monitor->mutex); +} + +/** A single entry in a command queue. */ +typedef atomic uintptr_t ioq_slot; + +/** Someone might be waiting on this slot. */ +#define IOQ_BLOCKED ((uintptr_t)1) + +/** Bit for IOQ_SKIP. */ +#define IOQ_SKIP_BIT (UINTPTR_WIDTH - 1) +/** The next push(es) should skip this slot. */ +#define IOQ_SKIP ((uintptr_t)1 << IOQ_SKIP_BIT) +/** Amount to add for an additional skip. */ +#define IOQ_SKIP_ONE (~IOQ_BLOCKED) + +// Need room for two flag bits +bfs_static_assert(alignof(struct ioq_ent) >= (1 << 2)); + +/** + * An MPMC queue of I/O commands. + */ +struct ioqq { + /** Circular buffer index mask. */ + size_t slot_mask; + + /** Monitor index mask. */ + size_t monitor_mask; + /** Array of monitors used by the slots. */ + struct ioq_monitor *monitors; + + /** Index of next writer. */ + cache_align atomic size_t head; + /** Index of next reader. */ + cache_align atomic size_t tail; + + /** The circular buffer itself. */ + cache_align ioq_slot slots[]; +}; + +/** Destroy an I/O command queue. */ +static void ioqq_destroy(struct ioqq *ioqq) { + if (!ioqq) { + return; + } + + for (size_t i = 0; i < ioqq->monitor_mask + 1; ++i) { + ioq_monitor_destroy(&ioqq->monitors[i]); + } + free(ioqq->monitors); + free(ioqq); +} + +/** Create an I/O command queue. */ +static struct ioqq *ioqq_create(size_t size) { + // Circular buffer size must be a power of two + size = bit_ceil(size); + + struct ioqq *ioqq = ALLOC_FLEX(struct ioqq, slots, size); + if (!ioqq) { + return NULL; + } + + ioqq->slot_mask = size - 1; + ioqq->monitor_mask = -1; + + // Use a pool of monitors + size_t nmonitors = size < 64 ? size : 64; + ioqq->monitors = ALLOC_ARRAY(struct ioq_monitor, nmonitors); + if (!ioqq->monitors) { + ioqq_destroy(ioqq); + return NULL; + } + + for (size_t i = 0; i < nmonitors; ++i) { + if (ioq_monitor_init(&ioqq->monitors[i]) != 0) { + ioqq_destroy(ioqq); + return NULL; + } + ++ioqq->monitor_mask; + } + + atomic_init(&ioqq->head, 0); + atomic_init(&ioqq->tail, 0); + + for (size_t i = 0; i < size; ++i) { + atomic_init(&ioqq->slots[i], 0); + } + + return ioqq; +} + +/** Get the monitor associated with a slot. */ +static struct ioq_monitor *ioq_slot_monitor(struct ioqq *ioqq, ioq_slot *slot) { + size_t i = slot - ioqq->slots; + return &ioqq->monitors[i & ioqq->monitor_mask]; +} + +/** Atomically wait for a slot to change. */ +attr(noinline) +static uintptr_t ioq_slot_wait(struct ioqq *ioqq, ioq_slot *slot, uintptr_t value) { + struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot); + mutex_lock(&monitor->mutex); + + uintptr_t ret = load(slot, relaxed); + if (ret != value) { + goto done; + } + + if (!(value & IOQ_BLOCKED)) { + value |= IOQ_BLOCKED; + if (!compare_exchange_strong(slot, &ret, value, relaxed, relaxed)) { + goto done; + } + } + + do { + // To avoid missed wakeups, it is important that + // cond_broadcast() is not called right here + cond_wait(&monitor->cond, &monitor->mutex); + ret = load(slot, relaxed); + } while (ret == value); + +done: + mutex_unlock(&monitor->mutex); + return ret; +} + +/** Wake up any threads waiting on a slot. */ +attr(noinline) +static void ioq_slot_wake(struct ioqq *ioqq, ioq_slot *slot) { + struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot); + + // The following implementation would clearly avoid the missed wakeup + // issue mentioned above in ioq_slot_wait(): + // + // mutex_lock(&monitor->mutex); + // cond_broadcast(&monitor->cond); + // mutex_unlock(&monitor->mutex); + // + // As a minor optimization, we move the broadcast outside of the lock. + // This optimization is correct, even though it leads to a seemingly- + // useless empty critical section. + + mutex_lock(&monitor->mutex); + mutex_unlock(&monitor->mutex); + cond_broadcast(&monitor->cond); +} + +/** Branch-free (slot & IOQ_SKIP) ? ~IOQ_BLOCKED : 0 */ +static uintptr_t ioq_skip_mask(uintptr_t slot) { + return -(slot >> IOQ_SKIP_BIT) << 1; +} + +/** Push an entry into a slot. */ +static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent) { + uintptr_t prev = load(slot, relaxed); + + while (true) { + size_t skip_mask = ioq_skip_mask(prev); + size_t full_mask = ~skip_mask & ~IOQ_BLOCKED; + if (prev & full_mask) { + // full(ptr) → wait + prev = ioq_slot_wait(ioqq, slot, prev); + continue; + } + + // empty → full(ptr) + uintptr_t next = ((uintptr_t)ent >> 1) & full_mask; + // skip(1) → empty + // skip(n) → skip(n - 1) + next |= (prev - IOQ_SKIP_ONE) & skip_mask; + + if (compare_exchange_weak(slot, &prev, next, release, relaxed)) { + break; + } + } + + if (prev & IOQ_BLOCKED) { + ioq_slot_wake(ioqq, slot); + } + + return !(prev & IOQ_SKIP); +} + +/** (Try to) pop an entry from a slot. */ +static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) { + uintptr_t prev = load(slot, relaxed); + while (true) { + // empty → skip(1) + // skip(n) → skip(n + 1) + // full(ptr) → full(ptr - 1) + uintptr_t next = prev + IOQ_SKIP_ONE; + // skip(n) → ~IOQ_BLOCKED + // full(ptr) → 0 + next &= ioq_skip_mask(next); + + if (block && next) { + prev = ioq_slot_wait(ioqq, slot, prev); + continue; + } + + if (compare_exchange_weak(slot, &prev, next, acquire, relaxed)) { + break; + } + } + + if (prev & IOQ_BLOCKED) { + ioq_slot_wake(ioqq, slot); + } + + // empty → 0 + // skip(n) → 0 + // full(ptr) → ptr + prev &= ioq_skip_mask(~prev); + return (struct ioq_ent *)(prev << 1); +} + +/** Push an entry onto the queue. */ +static void ioqq_push(struct ioqq *ioqq, struct ioq_ent *ent) { + while (true) { + size_t i = fetch_add(&ioqq->head, 1, relaxed); + ioq_slot *slot = &ioqq->slots[i & ioqq->slot_mask]; + if (ioq_slot_push(ioqq, slot, ent)) { + break; + } + } +} + +/** Push a batch of entries to the queue. */ +static void ioqq_push_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t size) { + size_t mask = ioqq->slot_mask; + do { + size_t i = fetch_add(&ioqq->head, size, relaxed); + for (size_t j = i + size; i != j; ++i) { + ioq_slot *slot = &ioqq->slots[i & mask]; + if (ioq_slot_push(ioqq, slot, *batch)) { + ++batch; + --size; + } + } + } while (size > 0); +} + +/** Pop an entry from the queue. */ +static struct ioq_ent *ioqq_pop(struct ioqq *ioqq, bool block) { + size_t i = fetch_add(&ioqq->tail, 1, relaxed); + ioq_slot *slot = &ioqq->slots[i & ioqq->slot_mask]; + return ioq_slot_pop(ioqq, slot, block); +} + +/** Pop a batch of entries from the queue. */ +static void ioqq_pop_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t size, bool block) { + size_t mask = ioqq->slot_mask; + size_t i = fetch_add(&ioqq->tail, size, relaxed); + for (size_t j = i + size; i != j; ++i) { + ioq_slot *slot = &ioqq->slots[i & mask]; + *batch++ = ioq_slot_pop(ioqq, slot, block); + block = false; + } +} + +/** Use cache-line-sized batches. */ +#define IOQ_BATCH (FALSE_SHARING_SIZE / sizeof(ioq_slot)) + +/** + * A batch of entries to send all at once. + */ +struct ioq_batch { + /** The current batch size. */ + size_t size; + /** The array of entries. */ + struct ioq_ent *entries[IOQ_BATCH]; +}; + +/** Send the batch to a queue. */ +static void ioq_batch_flush(struct ioqq *ioqq, struct ioq_batch *batch) { + if (batch->size > 0) { + ioqq_push_batch(ioqq, batch->entries, batch->size); + batch->size = 0; + } +} + +/** An an entry to a batch, flushing if necessary. */ +static void ioq_batch_push(struct ioqq *ioqq, struct ioq_batch *batch, struct ioq_ent *ent) { + if (batch->size >= IOQ_BATCH) { + ioq_batch_flush(ioqq, batch); + } + + batch->entries[batch->size++] = ent; +} + +/** Sentinel stop command. */ +static struct ioq_ent IOQ_STOP; + +#if BFS_USE_LIBURING +/** + * Supported io_uring operations. + */ +enum ioq_ring_ops { + IOQ_RING_OPENAT = 1 << 0, + IOQ_RING_CLOSE = 1 << 1, + IOQ_RING_STATX = 1 << 2, +}; +#endif + +/** I/O queue thread-specific data. */ +struct ioq_thread { + /** The thread handle. */ + pthread_t id; + /** Pointer back to the I/O queue. */ + struct ioq *parent; + +#if BFS_USE_LIBURING + /** io_uring instance. */ + struct io_uring ring; + /** Any error that occurred initializing the ring. */ + int ring_err; + /** Bitmask of supported io_uring operations. */ + enum ioq_ring_ops ring_ops; +#endif +}; + +struct ioq { + /** The depth of the queue. */ + size_t depth; + /** The current size of the queue. */ + size_t size; + /** Cancellation flag. */ + atomic bool cancel; + + /** ioq_ent arena. */ + struct arena ents; +#if BFS_USE_LIBURING && BFS_USE_STATX + /** struct statx arena. */ + struct arena xbufs; +#endif + + /** Pending I/O requests. */ + struct ioqq *pending; + /** Ready I/O responses. */ + struct ioqq *ready; + + /** The number of background threads. */ + size_t nthreads; + /** The background threads themselves. */ + struct ioq_thread threads[]; +}; + +/** Cancel a request if we need to. */ +static bool ioq_check_cancel(struct ioq *ioq, struct ioq_ent *ent) { + if (!load(&ioq->cancel, relaxed)) { + return false; + } + + // Always close(), even if we're cancelled, just like a real EINTR + if (ent->op == IOQ_CLOSE || ent->op == IOQ_CLOSEDIR) { + return false; + } + + ent->result = -EINTR; + return true; +} + +/** Dispatch a single request synchronously. */ +static void ioq_dispatch_sync(struct ioq *ioq, struct ioq_ent *ent) { + switch (ent->op) { + case IOQ_CLOSE: + ent->result = try(xclose(ent->close.fd)); + return; + + case IOQ_OPENDIR: { + struct ioq_opendir *args = &ent->opendir; + ent->result = try(bfs_opendir(args->dir, args->dfd, args->path, args->flags)); + if (ent->result >= 0) { + bfs_polldir(args->dir); + } + return; + } + + case IOQ_CLOSEDIR: + ent->result = try(bfs_closedir(ent->closedir.dir)); + return; + + case IOQ_STAT: { + struct ioq_stat *args = &ent->stat; + ent->result = try(bfs_stat(args->dfd, args->path, args->flags, args->buf)); + return; + } + } + + bfs_bug("Unknown ioq_op %d", (int)ent->op); + ent->result = -ENOSYS; +} + +#if BFS_USE_LIBURING + +/** io_uring worker state. */ +struct ioq_ring_state { + /** The I/O queue. */ + struct ioq *ioq; + /** The io_uring. */ + struct io_uring *ring; + /** Supported io_uring operations. */ + enum ioq_ring_ops ops; + /** Number of prepped, unsubmitted SQEs. */ + size_t prepped; + /** Number of submitted, unreaped SQEs. */ + size_t submitted; + /** Whether to stop the loop. */ + bool stop; + /** A batch of ready entries. */ + struct ioq_batch ready; +}; + +/** Dispatch a single request asynchronously. */ +static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, struct ioq_ent *ent) { + struct io_uring *ring = state->ring; + enum ioq_ring_ops ops = state->ops; + struct io_uring_sqe *sqe = NULL; + + switch (ent->op) { + case IOQ_CLOSE: + if (ops & IOQ_RING_CLOSE) { + sqe = io_uring_get_sqe(ring); + io_uring_prep_close(sqe, ent->close.fd); + } + return sqe; + + case IOQ_OPENDIR: + if (ops & IOQ_RING_OPENAT) { + sqe = io_uring_get_sqe(ring); + struct ioq_opendir *args = &ent->opendir; + int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY; + io_uring_prep_openat(sqe, args->dfd, args->path, flags, 0); + } + return sqe; + + case IOQ_CLOSEDIR: +#if BFS_USE_UNWRAPDIR + if (ops & IOQ_RING_CLOSE) { + sqe = io_uring_get_sqe(ring); + io_uring_prep_close(sqe, bfs_unwrapdir(ent->closedir.dir)); + } +#endif + return sqe; + + case IOQ_STAT: +#if BFS_USE_STATX + if (ops & IOQ_RING_STATX) { + sqe = io_uring_get_sqe(ring); + struct ioq_stat *args = &ent->stat; + int flags = bfs_statx_flags(args->flags); + unsigned int mask = STATX_BASIC_STATS | STATX_BTIME; + io_uring_prep_statx(sqe, args->dfd, args->path, flags, mask, args->xbuf); + } +#endif + return sqe; + } + + bfs_bug("Unknown ioq_op %d", (int)ent->op); + return NULL; +} + +/** Check if ioq_ring_reap() has work to do. */ +static bool ioq_ring_empty(struct ioq_ring_state *state) { + return !state->prepped && !state->submitted && !state->ready.size; +} + +/** Prep a single SQE. */ +static void ioq_prep_sqe(struct ioq_ring_state *state, struct ioq_ent *ent) { + struct ioq *ioq = state->ioq; + if (ioq_check_cancel(ioq, ent)) { + ioq_batch_push(ioq->ready, &state->ready, ent); + return; + } + + struct io_uring_sqe *sqe = ioq_dispatch_async(state, ent); + if (sqe) { + io_uring_sqe_set_data(sqe, ent); + ++state->prepped; + } else { + ioq_dispatch_sync(ioq, ent); + ioq_batch_push(ioq->ready, &state->ready, ent); + } +} + +/** Prep a batch of SQEs. */ +static bool ioq_ring_prep(struct ioq_ring_state *state) { + if (state->stop) { + return false; + } + + struct ioq *ioq = state->ioq; + struct io_uring *ring = state->ring; + struct ioq_ent *pending[IOQ_BATCH]; + + while (io_uring_sq_space_left(ring) >= IOQ_BATCH) { + bool block = ioq_ring_empty(state); + ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, block); + + bool any = false; + for (size_t i = 0; i < IOQ_BATCH; ++i) { + struct ioq_ent *ent = pending[i]; + if (ent == &IOQ_STOP) { + ioqq_push(ioq->pending, &IOQ_STOP); + state->stop = true; + goto done; + } else if (ent) { + ioq_prep_sqe(state, ent); + any = true; + } + } + + if (!any) { + break; + } + } + +done: + return !ioq_ring_empty(state); +} + +/** Reap a single CQE. */ +static void ioq_reap_cqe(struct ioq_ring_state *state, struct io_uring_cqe *cqe) { + struct ioq *ioq = state->ioq; + struct io_uring *ring = state->ring; + + struct ioq_ent *ent = io_uring_cqe_get_data(cqe); + ent->result = cqe->res; + io_uring_cqe_seen(ring, cqe); + --state->submitted; + + if (ent->result < 0) { + goto push; + } + + switch (ent->op) { + case IOQ_OPENDIR: { + int fd = ent->result; + if (ioq_check_cancel(ioq, ent)) { + xclose(fd); + goto push; + } + + struct ioq_opendir *args = &ent->opendir; + ent->result = try(bfs_opendir(args->dir, fd, NULL, args->flags)); + if (ent->result >= 0) { + // TODO: io_uring_prep_getdents() + bfs_polldir(args->dir); + } else { + xclose(fd); + } + + break; + } + +#if BFS_USE_STATX + case IOQ_STAT: { + struct ioq_stat *args = &ent->stat; + ent->result = try(bfs_statx_convert(args->buf, args->xbuf)); + break; + } +#endif + + default: + break; + } + +push: + ioq_batch_push(ioq->ready, &state->ready, ent); +} + +/** Reap a batch of CQEs. */ +static void ioq_ring_reap(struct ioq_ring_state *state) { + struct ioq *ioq = state->ioq; + struct io_uring *ring = state->ring; + + while (state->prepped) { + int ret = io_uring_submit_and_wait(ring, 1); + if (ret > 0) { + state->prepped -= ret; + state->submitted += ret; + } + } + + while (state->submitted) { + struct io_uring_cqe *cqe; + if (io_uring_wait_cqe(ring, &cqe) < 0) { + continue; + } + + ioq_reap_cqe(state, cqe); + } + + ioq_batch_flush(ioq->ready, &state->ready); +} + +/** io_uring worker loop. */ +static void ioq_ring_work(struct ioq_thread *thread) { + struct ioq_ring_state state = { + .ioq = thread->parent, + .ring = &thread->ring, + .ops = thread->ring_ops, + }; + + while (ioq_ring_prep(&state)) { + ioq_ring_reap(&state); + } +} + +#endif // BFS_USE_LIBURING + +/** Synchronous syscall loop. */ +static void ioq_sync_work(struct ioq_thread *thread) { + struct ioq *ioq = thread->parent; + + bool stop = false; + while (!stop) { + struct ioq_ent *pending[IOQ_BATCH]; + ioqq_pop_batch(ioq->pending, pending, IOQ_BATCH, true); + + struct ioq_batch ready; + ready.size = 0; + + for (size_t i = 0; i < IOQ_BATCH; ++i) { + struct ioq_ent *ent = pending[i]; + if (ent == &IOQ_STOP) { + ioqq_push(ioq->pending, &IOQ_STOP); + stop = true; + break; + } else if (ent) { + if (!ioq_check_cancel(ioq, ent)) { + ioq_dispatch_sync(ioq, ent); + } + ioq_batch_push(ioq->ready, &ready, ent); + } + } + + ioq_batch_flush(ioq->ready, &ready); + } +} + +/** Background thread entry point. */ +static void *ioq_work(void *ptr) { + struct ioq_thread *thread = ptr; + +#if BFS_USE_LIBURING + if (thread->ring_err == 0) { + ioq_ring_work(thread); + return NULL; + } +#endif + + ioq_sync_work(thread); + return NULL; +} + +/** Initialize io_uring thread state. */ +static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) { +#if BFS_USE_LIBURING + struct ioq_thread *prev = NULL; + if (thread > ioq->threads) { + prev = thread - 1; + } + + if (prev && prev->ring_err) { + thread->ring_err = prev->ring_err; + return -1; + } + + // Share io-wq workers between rings + struct io_uring_params params = {0}; + if (prev) { + params.flags |= IORING_SETUP_ATTACH_WQ; + params.wq_fd = prev->ring.ring_fd; + } + + // Use a page for each SQE ring + size_t entries = 4096 / sizeof(struct io_uring_sqe); + thread->ring_err = -io_uring_queue_init_params(entries, &thread->ring, ¶ms); + if (thread->ring_err) { + return -1; + } + + if (prev) { + // Initial setup already complete + thread->ring_ops = prev->ring_ops; + return 0; + } + + // Check for supported operations + struct io_uring_probe *probe = io_uring_get_probe_ring(&thread->ring); + if (probe) { + if (io_uring_opcode_supported(probe, IORING_OP_OPENAT)) { + thread->ring_ops |= IOQ_RING_OPENAT; + } + if (io_uring_opcode_supported(probe, IORING_OP_CLOSE)) { + thread->ring_ops |= IOQ_RING_CLOSE; + } +#if BFS_USE_STATX + if (io_uring_opcode_supported(probe, IORING_OP_STATX)) { + thread->ring_ops |= IOQ_RING_STATX; + } +#endif + io_uring_free_probe(probe); + } + if (!thread->ring_ops) { + io_uring_queue_exit(&thread->ring); + thread->ring_err = ENOTSUP; + return -1; + } + + // Limit the number of io_uring workers + unsigned int values[] = { + ioq->nthreads, // [IO_WQ_BOUND] + 0, // [IO_WQ_UNBOUND] + }; + io_uring_register_iowq_max_workers(&thread->ring, values); +#endif + + return 0; +} + +/** Destroy an io_uring. */ +static void ioq_ring_exit(struct ioq_thread *thread) { +#if BFS_USE_LIBURING + if (thread->ring_err == 0) { + io_uring_queue_exit(&thread->ring); + } +#endif +} + +/** Create an I/O queue thread. */ +static int ioq_thread_create(struct ioq *ioq, struct ioq_thread *thread) { + thread->parent = ioq; + + ioq_ring_init(ioq, thread); + + if (thread_create(&thread->id, NULL, ioq_work, thread) != 0) { + ioq_ring_exit(thread); + return -1; + } + + return 0; +} + +/** Join an I/O queue thread. */ +static void ioq_thread_join(struct ioq_thread *thread) { + thread_join(thread->id, NULL); + ioq_ring_exit(thread); +} + +struct ioq *ioq_create(size_t depth, size_t nthreads) { + struct ioq *ioq = ZALLOC_FLEX(struct ioq, threads, nthreads); + if (!ioq) { + goto fail; + } + + ioq->depth = depth; + + ARENA_INIT(&ioq->ents, struct ioq_ent); +#if BFS_USE_LIBURING && BFS_USE_STATX + ARENA_INIT(&ioq->xbufs, struct statx); +#endif + + ioq->pending = ioqq_create(depth); + if (!ioq->pending) { + goto fail; + } + + ioq->ready = ioqq_create(depth); + if (!ioq->ready) { + goto fail; + } + + ioq->nthreads = nthreads; + for (size_t i = 0; i < nthreads; ++i) { + if (ioq_thread_create(ioq, &ioq->threads[i]) != 0) { + ioq->nthreads = i; + goto fail; + } + } + + return ioq; + + int err; +fail: + err = errno; + ioq_destroy(ioq); + errno = err; + return NULL; +} + +size_t ioq_capacity(const struct ioq *ioq) { + return ioq->depth - ioq->size; +} + +static struct ioq_ent *ioq_request(struct ioq *ioq, enum ioq_op op, void *ptr) { + if (load(&ioq->cancel, relaxed)) { + errno = EINTR; + return NULL; + } + + if (ioq->size >= ioq->depth) { + errno = EAGAIN; + return NULL; + } + + struct ioq_ent *ent = arena_alloc(&ioq->ents); + if (!ent) { + return NULL; + } + + ent->op = op; + ent->ptr = ptr; + ++ioq->size; + return ent; +} + +int ioq_close(struct ioq *ioq, int fd, void *ptr) { + struct ioq_ent *ent = ioq_request(ioq, IOQ_CLOSE, ptr); + if (!ent) { + return -1; + } + + ent->close.fd = fd; + + ioqq_push(ioq->pending, ent); + return 0; +} + +int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, enum bfs_dir_flags flags, void *ptr) { + struct ioq_ent *ent = ioq_request(ioq, IOQ_OPENDIR, ptr); + if (!ent) { + return -1; + } + + struct ioq_opendir *args = &ent->opendir; + args->dir = dir; + args->dfd = dfd; + args->path = path; + args->flags = flags; + + ioqq_push(ioq->pending, ent); + return 0; +} + +int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr) { + struct ioq_ent *ent = ioq_request(ioq, IOQ_CLOSEDIR, ptr); + if (!ent) { + return -1; + } + + ent->closedir.dir = dir; + + ioqq_push(ioq->pending, ent); + return 0; +} + +int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags flags, struct bfs_stat *buf, void *ptr) { + struct ioq_ent *ent = ioq_request(ioq, IOQ_STAT, ptr); + if (!ent) { + return -1; + } + + struct ioq_stat *args = &ent->stat; + args->dfd = dfd; + args->path = path; + args->flags = flags; + args->buf = buf; + +#if BFS_USE_LIBURING && BFS_USE_STATX + args->xbuf = arena_alloc(&ioq->xbufs); + if (!args->xbuf) { + ioq_free(ioq, ent); + return -1; + } +#endif + + ioqq_push(ioq->pending, ent); + return 0; +} + +struct ioq_ent *ioq_pop(struct ioq *ioq, bool block) { + if (ioq->size == 0) { + return NULL; + } + + return ioqq_pop(ioq->ready, block); +} + +void ioq_free(struct ioq *ioq, struct ioq_ent *ent) { + bfs_assert(ioq->size > 0); + --ioq->size; + +#if BFS_USE_LIBURING && BFS_USE_STATX + if (ent->op == IOQ_STAT && ent->stat.xbuf) { + arena_free(&ioq->xbufs, ent->stat.xbuf); + } +#endif + + arena_free(&ioq->ents, ent); +} + +void ioq_cancel(struct ioq *ioq) { + if (!exchange(&ioq->cancel, true, relaxed)) { + ioqq_push(ioq->pending, &IOQ_STOP); + } +} + +void ioq_destroy(struct ioq *ioq) { + if (!ioq) { + return; + } + + if (ioq->nthreads > 0) { + ioq_cancel(ioq); + } + + for (size_t i = 0; i < ioq->nthreads; ++i) { + ioq_thread_join(&ioq->threads[i]); + } + + ioqq_destroy(ioq->ready); + ioqq_destroy(ioq->pending); + +#if BFS_USE_LIBURING && BFS_USE_STATX + arena_destroy(&ioq->xbufs); +#endif + arena_destroy(&ioq->ents); + + free(ioq); +} diff --git a/src/ioq.h b/src/ioq.h new file mode 100644 index 0000000..d8e1573 --- /dev/null +++ b/src/ioq.h @@ -0,0 +1,198 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Asynchronous I/O queues. + */ + +#ifndef BFS_IOQ_H +#define BFS_IOQ_H + +#include "prelude.h" +#include "dir.h" +#include "stat.h" +#include <stddef.h> + +/** + * An queue of asynchronous I/O operations. + */ +struct ioq; + +/** + * I/O queue operations. + */ +enum ioq_op { + /** ioq_close(). */ + IOQ_CLOSE, + /** ioq_opendir(). */ + IOQ_OPENDIR, + /** ioq_closedir(). */ + IOQ_CLOSEDIR, + /** ioq_stat(). */ + IOQ_STAT, +}; + +/** + * The I/O queue implementation needs two tag bits in each pointer to a struct + * ioq_ent, so we need to ensure at least 4-byte alignment. The natural + * alignment is enough on most architectures, but not m68k, so over-align it. + */ +#define IOQ_ENT_ALIGN alignas(4) + +/** + * An I/O queue entry. + */ +struct ioq_ent { + /** The I/O operation. */ + IOQ_ENT_ALIGN enum ioq_op op; + + /** The return value (on success) or negative error code (on failure). */ + int result; + + /** Arbitrary user data. */ + void *ptr; + + /** Operation-specific arguments. */ + union { + /** ioq_close() args. */ + struct ioq_close { + int fd; + } close; + /** ioq_opendir() args. */ + struct ioq_opendir { + struct bfs_dir *dir; + const char *path; + int dfd; + enum bfs_dir_flags flags; + } opendir; + /** ioq_closedir() args. */ + struct ioq_closedir { + struct bfs_dir *dir; + } closedir; + /** ioq_stat() args. */ + struct ioq_stat { + const char *path; + struct bfs_stat *buf; + void *xbuf; + int dfd; + enum bfs_stat_flags flags; + } stat; + }; +}; + +/** + * Create an I/O queue. + * + * @param depth + * The maximum depth of the queue. + * @param nthreads + * The maximum number of background threads. + * @return + * The new I/O queue, or NULL on failure. + */ +struct ioq *ioq_create(size_t depth, size_t nthreads); + +/** + * Check the remaining capacity of a queue. + */ +size_t ioq_capacity(const struct ioq *ioq); + +/** + * Asynchronous close(). + * + * @param ioq + * The I/O queue. + * @param fd + * The fd to close. + * @param ptr + * An arbitrary pointer to associate with the request. + * @return + * 0 on success, or -1 on failure. + */ +int ioq_close(struct ioq *ioq, int fd, void *ptr); + +/** + * Asynchronous bfs_opendir(). + * + * @param ioq + * The I/O queue. + * @param dir + * The allocated directory. + * @param dfd + * The base file descriptor. + * @param path + * The path to open, relative to dfd. + * @param flags + * Flags that control which directory entries are listed. + * @param ptr + * An arbitrary pointer to associate with the request. + * @return + * 0 on success, or -1 on failure. + */ +int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, enum bfs_dir_flags flags, void *ptr); + +/** + * Asynchronous bfs_closedir(). + * + * @param ioq + * The I/O queue. + * @param dir + * The directory to close. + * @param ptr + * An arbitrary pointer to associate with the request. + * @return + * 0 on success, or -1 on failure. + */ +int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr); + +/** + * Asynchronous bfs_stat(). + * + * @param ioq + * The I/O queue. + * @param dfd + * The base file descriptor. + * @param path + * The path to stat, relative to dfd. + * @param flags + * Flags that affect the lookup. + * @param buf + * A place to store the stat buffer, if successful. + * @param ptr + * An arbitrary pointer to associate with the request. + * @return + * 0 on success, or -1 on failure. + */ +int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags flags, struct bfs_stat *buf, void *ptr); + +/** + * Pop a response from the queue. + * + * @param ioq + * The I/O queue. + * @return + * The next response, or NULL. + */ +struct ioq_ent *ioq_pop(struct ioq *ioq, bool block); + +/** + * Free a queue entry. + * + * @param ioq + * The I/O queue. + * @param ent + * The entry to free. + */ +void ioq_free(struct ioq *ioq, struct ioq_ent *ent); + +/** + * Cancel any pending I/O operations. + */ +void ioq_cancel(struct ioq *ioq); + +/** + * Stop and destroy an I/O queue. + */ +void ioq_destroy(struct ioq *ioq); + +#endif // BFS_IOQ_H diff --git a/src/list.h b/src/list.h new file mode 100644 index 0000000..61d0e5b --- /dev/null +++ b/src/list.h @@ -0,0 +1,581 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Intrusive linked lists. + * + * Singly-linked lists are declared like this: + * + * struct item { + * struct item *next; + * }; + * + * struct list { + * struct item *head; + * struct item **tail; + * }; + * + * The SLIST_*() macros manipulate singly-linked lists. + * + * struct list list; + * SLIST_INIT(&list); + * + * struct item item; + * SLIST_ITEM_INIT(&item); + * SLIST_APPEND(&list, &item); + * + * Doubly linked lists are similar: + * + * struct item { + * struct item *next; + * struct item *prev; + * }; + * + * struct list { + * struct item *head; + * struct item *tail; + * }; + * + * struct list list; + * LIST_INIT(&list); + * + * struct item item; + * LIST_ITEM_INIT(&item); + * LIST_APPEND(&list, &item); + * + * Items can be on multiple lists at once: + * + * struct item { + * struct { + * struct item *next; + * } chain; + * + * struct { + * struct item *next; + * struct item *prev; + * } lru; + * }; + * + * struct items { + * struct { + * struct item *head; + * struct item **tail; + * } queue; + * + * struct { + * struct item *head; + * struct item *tail; + * } cache; + * }; + * + * struct items items; + * SLIST_INIT(&items.queue); + * LIST_INIT(&items.cache); + * + * struct item item; + * SLIST_ITEM_INIT(&item, chain); + * SLIST_APPEND(&items.queue, &item, chain); + * LIST_ITEM_INIT(&item, lru); + * LIST_APPEND(&items.cache, &item, lru); + */ + +#ifndef BFS_LIST_H +#define BFS_LIST_H + +#include "diag.h" +#include <stddef.h> +#include <string.h> + +/** + * Initialize a singly-linked list. + * + * @param list + * The list to initialize. + * + * --- + * + * Like many macros in this file, this macro delegates the bulk of its work to + * some helper macros. We explicitly parenthesize (list) here so the helpers + * don't have to. + */ +#define SLIST_INIT(list) \ + SLIST_INIT_((list)) + +/** + * Helper for SLIST_INIT(). + */ +#define SLIST_INIT_(list) LIST_VOID_( \ + list->head = NULL, \ + list->tail = &list->head) + +/** + * Cast a list of expressions to void. + */ +#define LIST_VOID_(...) ((void)(__VA_ARGS__)) + +/** + * Initialize a singly-linked list item. + * + * @param item + * The item to initialize. + * @param node (optional) + * If specified, use item->node.next rather than item->next. + * + * --- + * + * We play some tricks with variadic macros to handle the optional parameter: + * + * SLIST_ITEM_INIT(item) => item->next = NULL + * SLIST_ITEM_INIT(item, node) => item->node.next = NULL + * + * The first trick is that + * + * #define SLIST_ITEM_INIT(item, ...) + * + * won't work because both commas are required (until C23; see N3033). As a + * workaround, we dispatch to another macro and add a trailing comma. + * + * SLIST_ITEM_INIT(item) => SLIST_ITEM_INIT_(item, ) + * SLIST_ITEM_INIT(item, node) => SLIST_ITEM_INIT_(item, node, ) + */ +#define SLIST_ITEM_INIT(...) \ + SLIST_ITEM_INIT_(__VA_ARGS__, ) + +/** + * Now we need a way to generate either ->next or ->node.next depending on + * whether the node parameter was passed. The approach is based on + * + * #define FOO(...) BAR(__VA_ARGS__, 1, 2, ) + * #define BAR(x, y, z, ...) z + * + * FOO(a) => 2 + * FOO(a, b) => 1 + * + * The LIST_NEXT_() macro uses this technique: + * + * LIST_NEXT_() => LIST_NODE_(next, ) + * LIST_NEXT_(node, ) => LIST_NODE_(next, node, ) + */ +#define LIST_NEXT_(...) \ + LIST_NODE_(next, __VA_ARGS__) + +/** + * LIST_NODE_() dispatches to yet another macro: + * + * LIST_NODE_(next, ) => LIST_NODE__(next, , . , , ) + * LIST_NODE_(next, node, ) => LIST_NODE__(next, node, , . , , ) + */ +#define LIST_NODE_(dir, ...) \ + LIST_NODE__(dir, __VA_ARGS__, . , , ) + +/** + * And finally, LIST_NODE__() adds the node and the dot if necessary. + * + * dir node ignored dot + * v v v v + * LIST_NODE__(next, , . , , ) => next + * LIST_NODE__(next, node, , . , , ) => node . next + * ^ ^ ^ ^ + * dir node ignored dot + */ +#define LIST_NODE__(dir, node, ignored, dot, ...) \ + node dot dir + +/** + * SLIST_ITEM_INIT_() uses LIST_NEXT_() to generate the right name for the list + * node, and finally delegates to the actual implementation. + */ +#define SLIST_ITEM_INIT_(item, ...) \ + SLIST_ITEM_INIT__((item), LIST_NEXT_(__VA_ARGS__)) + +#define SLIST_ITEM_INIT__(item, next) \ + LIST_VOID_(item->next = NULL) + +/** + * Type-checking macro for singly-linked lists. + */ +#define SLIST_CHECK_(list) \ + (void)sizeof(list->tail - &list->head) + +/** + * Get the head of a singly-linked list. + * + * @param list + * The list in question. + * @return + * The first item in the list. + */ +#define SLIST_HEAD(list) \ + SLIST_HEAD_((list)) + +#define SLIST_HEAD_(list) \ + (SLIST_CHECK_(list), list->head) + +/** + * Check if a singly-linked list is empty. + */ +#define SLIST_EMPTY(list) \ + (!SLIST_HEAD(list)) + +/** + * Like container_of(), but using the head pointer instead of offsetof() since + * we don't have the type around. + */ +#define SLIST_CONTAINER_(tail, head, next) \ + (void *)((char *)tail - ((char *)&head->next - (char *)head)) + +/** + * Get the tail of a singly-linked list. + * + * @param list + * The list in question. + * @param node (optional) + * If specified, use item->node.next rather than item->next. + * @return + * The last item in the list. + */ +#define SLIST_TAIL(...) \ + SLIST_TAIL_(__VA_ARGS__, ) + +#define SLIST_TAIL_(list, ...) \ + SLIST_TAIL__((list), LIST_NEXT_(__VA_ARGS__)) + +#define SLIST_TAIL__(list, next) \ + (list->head ? SLIST_CONTAINER_(list->tail, list->head, next) : NULL) + +/** + * Check if an item is attached to a singly-linked list. + * + * @param list + * The list to check. + * @param item + * The item to check. + * @param node (optional) + * If specified, use item->node.next rather than item->next. + * @return + * Whether the item is attached to the list. + */ +#define SLIST_ATTACHED(list, ...) \ + SLIST_ATTACHED_(list, __VA_ARGS__, ) + +#define SLIST_ATTACHED_(list, item, ...) \ + SLIST_ATTACHED__((list), (item), LIST_NEXT_(__VA_ARGS__)) + +#define SLIST_ATTACHED__(list, item, next) \ + (item->next || list->tail == &item->next) + +/** + * Insert an item into a singly-linked list. + * + * @param list + * The list to modify. + * @param cursor + * A pointer to the item to insert after, e.g. &list->head or list->tail. + * @param item + * The item to insert. + * @param node (optional) + * If specified, use item->node.next rather than item->next. + * @return + * A cursor for the next item. + */ +#define SLIST_INSERT(list, cursor, ...) \ + SLIST_INSERT_(list, cursor, __VA_ARGS__, ) + +#define SLIST_INSERT_(list, cursor, item, ...) \ + SLIST_INSERT__((list), (cursor), (item), LIST_NEXT_(__VA_ARGS__)) + +#define SLIST_INSERT__(list, cursor, item, next) \ + (bfs_assert(!SLIST_ATTACHED__(list, item, next)), \ + item->next = *cursor, \ + *cursor = item, \ + list->tail = item->next ? list->tail : &item->next, \ + &item->next) + +/** + * Add an item to the tail of a singly-linked list. + * + * @param list + * The list to modify. + * @param item + * The item to append. + * @param node (optional) + * If specified, use item->node.next rather than item->next. + */ +#define SLIST_APPEND(list, ...) \ + SLIST_APPEND_(list, __VA_ARGS__, ) + +#define SLIST_APPEND_(list, item, ...) \ + LIST_VOID_(SLIST_INSERT_(list, (list)->tail, item, __VA_ARGS__)) + +/** + * Add an item to the head of a singly-linked list. + * + * @param list + * The list to modify. + * @param item + * The item to prepend. + * @param node (optional) + * If specified, use item->node.next rather than item->next. + */ +#define SLIST_PREPEND(list, ...) \ + SLIST_PREPEND_(list, __VA_ARGS__, ) + +#define SLIST_PREPEND_(list, item, ...) \ + LIST_VOID_(SLIST_INSERT_(list, &(list)->head, item, __VA_ARGS__)) + +/** + * Add an entire singly-linked list to the tail of another. + * + * @param dest + * The destination list. + * @param src + * The source list. + */ +#define SLIST_EXTEND(dest, src) \ + SLIST_EXTEND_((dest), (src)) + +#define SLIST_EXTEND_(dest, src) \ + (src->head ? (*dest->tail = src->head, dest->tail = src->tail, SLIST_INIT(src)) : (void)0) + +/** + * Remove an item from a singly-linked list. + * + * @param list + * The list to modify. + * @param cursor + * A pointer to the item to remove, either &list->head or &prev->next. + * @param node (optional) + * If specified, use item->node.next rather than item->next. + * @return + * The removed item. + */ +#define SLIST_REMOVE(list, ...) \ + SLIST_REMOVE_(list, __VA_ARGS__, ) + +#define SLIST_REMOVE_(list, cursor, ...) \ + SLIST_REMOVE__((list), (cursor), LIST_NEXT_(__VA_ARGS__)) + +#define SLIST_REMOVE__(list, cursor, next) \ + (list->tail = (*cursor)->next ? list->tail : cursor, \ + slist_remove_impl(*cursor, cursor, &(*cursor)->next, sizeof(*cursor))) + +// Helper for SLIST_REMOVE() +static inline void *slist_remove_impl(void *ret, void *cursor, void *next, size_t size) { + // ret = *cursor; + // *cursor = ret->next; + memcpy(cursor, next, size); + // ret->next = NULL; + memset(next, 0, size); + return ret; +} + +/** + * Pop the head off a singly-linked list. + * + * @param list + * The list to modify. + * @param node (optional) + * If specified, use head->node.next rather than head->next. + * @return + * The popped item, or NULL if the list was empty. + */ +#define SLIST_POP(...) \ + SLIST_POP_(__VA_ARGS__, ) + +#define SLIST_POP_(list, ...) \ + SLIST_POP__((list), __VA_ARGS__) + +#define SLIST_POP__(list, ...) \ + (list->head ? SLIST_REMOVE_(list, &list->head, __VA_ARGS__) : NULL) + +/** + * Loop over the items in a singly-linked list. + * + * @param type + * The list item type. + * @param item + * The induction variable name. + * @param list + * The list to iterate. + * @param node (optional) + * If specified, use head->node.next rather than head->next. + */ +#define for_slist(type, item, ...) \ + for_slist_(type, item, __VA_ARGS__, ) + +#define for_slist_(type, item, list, ...) \ + for_slist__(type, item, (list), LIST_NEXT_(__VA_ARGS__)) + +#define for_slist__(type, item, list, next) \ + for (type *item = list->head, *_next; \ + item && (SLIST_CHECK_(list), _next = item->next, true); \ + item = _next) + +/** + * Initialize a doubly-linked list. + * + * @param list + * The list to initialize. + */ +#define LIST_INIT(list) \ + LIST_INIT_((list)) + +#define LIST_INIT_(list) \ + LIST_VOID_(list->head = list->tail = NULL) + +/** + * LIST_PREV_() => prev + * LIST_PREV_(node, ) => node.prev + */ +#define LIST_PREV_(...) \ + LIST_NODE_(prev, __VA_ARGS__) + +/** + * Initialize a doubly-linked list item. + * + * @param item + * The item to initialize. + * @param node (optional) + * If specified, use item->node.next rather than item->next. + */ +#define LIST_ITEM_INIT(...) \ + LIST_ITEM_INIT_(__VA_ARGS__, ) + +#define LIST_ITEM_INIT_(item, ...) \ + LIST_ITEM_INIT__((item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__)) + +#define LIST_ITEM_INIT__(item, prev, next) \ + LIST_VOID_(item->prev = item->next = NULL) + +/** + * Type-checking macro for doubly-linked lists. + */ +#define LIST_CHECK_(list) \ + (void)sizeof(list->tail - list->head) + +/** + * Check if a doubly-linked list is empty. + */ +#define LIST_EMPTY(list) \ + LIST_EMPTY_((list)) + +#define LIST_EMPTY_(list) \ + (LIST_CHECK_(list), !list->head) + +/** + * Add an item to the tail of a doubly-linked list. + * + * @param list + * The list to modify. + * @param item + * The item to append. + * @param node (optional) + * If specified, use item->node.{prev,next} rather than item->{prev,next}. + */ +#define LIST_APPEND(list, ...) \ + LIST_INSERT(list, (list)->tail, __VA_ARGS__) + +/** + * Add an item to the head of a doubly-linked list. + * + * @param list + * The list to modify. + * @param item + * The item to prepend. + * @param node (optional) + * If specified, use item->node.{prev,next} rather than item->{prev,next}. + */ +#define LIST_PREPEND(list, ...) \ + LIST_INSERT(list, NULL, __VA_ARGS__) + +/** + * Check if an item is attached to a doubly-linked list. + * + * @param list + * The list to check. + * @param item + * The item to check. + * @param node (optional) + * If specified, use item->node.{prev,next} rather than item->{prev,next}. + * @return + * Whether the item is attached to the list. + */ +#define LIST_ATTACHED(list, ...) \ + LIST_ATTACHED_(list, __VA_ARGS__, ) + +#define LIST_ATTACHED_(list, item, ...) \ + LIST_ATTACHED__((list), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__)) + +#define LIST_ATTACHED__(list, item, prev, next) \ + (item->prev || item->next || list->head == item || list->tail == item) + +/** + * Insert into a doubly-linked list after the given cursor. + * + * @param list + * The list to modify. + * @param cursor + * Insert after this element. + * @param item + * The item to insert. + * @param node (optional) + * If specified, use item->node.{prev,next} rather than item->{prev,next}. + */ +#define LIST_INSERT(list, cursor, ...) \ + LIST_INSERT_(list, cursor, __VA_ARGS__, ) + +#define LIST_INSERT_(list, cursor, item, ...) \ + LIST_INSERT__((list), (cursor), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__)) + +#define LIST_INSERT__(list, cursor, item, prev, next) LIST_VOID_( \ + bfs_assert(!LIST_ATTACHED__(list, item, prev, next)), \ + item->prev = cursor, \ + item->next = cursor ? cursor->next : list->head, \ + *(item->prev ? &item->prev->next : &list->head) = item, \ + *(item->next ? &item->next->prev : &list->tail) = item) + +/** + * Remove an item from a doubly-linked list. + * + * @param list + * The list to modify. + * @param item + * The item to remove. + * @param node (optional) + * If specified, use item->node.{prev,next} rather than item->{prev,next}. + */ +#define LIST_REMOVE(list, ...) \ + LIST_REMOVE_(list, __VA_ARGS__, ) + +#define LIST_REMOVE_(list, item, ...) \ + LIST_REMOVE__((list), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__)) + +#define LIST_REMOVE__(list, item, prev, next) LIST_VOID_( \ + *(item->prev ? &item->prev->next : &list->head) = item->next, \ + *(item->next ? &item->next->prev : &list->tail) = item->prev, \ + item->prev = item->next = NULL) + +/** + * Loop over the items in a doubly-linked list. + * + * @param type + * The list item type. + * @param item + * The induction variable name. + * @param list + * The list to iterate. + * @param node (optional) + * If specified, use head->node.next rather than head->next. + */ +#define for_list(type, item, ...) \ + for_list_(type, item, __VA_ARGS__, ) + +#define for_list_(type, item, list, ...) \ + for_list__(type, item, (list), LIST_NEXT_(__VA_ARGS__)) + +#define for_list__(type, item, list, next) \ + for (type *item = list->head, *_next; \ + item && (LIST_CHECK_(list), _next = item->next, true); \ + item = _next) + +#endif // BFS_LIST_H diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..5dd88e4 --- /dev/null +++ b/src/main.c @@ -0,0 +1,150 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * - main(): the entry point for bfs(1), a breadth-first version of find(1) + * - main.c (this file) + * + * - bfs_parse_cmdline(): parses the command line into an expression tree + * - ctx.[ch] (struct bfs_ctx, the overall bfs context) + * - expr.h (declares the expression tree nodes) + * - parse.[ch] (the parser itself) + * - opt.[ch] (the optimizer) + * + * - bfs_eval(): runs the expression on every file it sees + * - eval.[ch] (the main evaluation functions) + * - exec.[ch] (implements -exec[dir]/-ok[dir]) + * - printf.[ch] (implements -[f]printf) + * + * - bftw(): used by bfs_eval() to walk the directory tree(s) + * - bftw.[ch] (an extended version of nftw(3)) + * + * - Utilities: + * - alloc.[ch] (memory allocation) + * - atomic.h (atomic operations) + * - bar.[ch] (a terminal status bar) + * - bit.h (bit manipulation) + * - bfstd.[ch] (standard library wrappers/polyfills) + * - color.[ch] (for pretty terminal colors) + * - prelude.h (configuration and feature/platform detection) + * - diag.[ch] (formats diagnostic messages) + * - dir.[ch] (a directory API facade) + * - dstring.[ch] (a dynamic string library) + * - fsade.[ch] (a facade over non-standard filesystem features) + * - ioq.[ch] (an async I/O queue) + * - list.h (linked list macros) + * - mtab.[ch] (parses the system's mount table) + * - pwcache.[ch] (a cache for the user/group tables) + * - sanity.h (sanitizer interfaces) + * - sighook.[ch] (signal hooks) + * - stat.[ch] (wraps stat(), or statx() on Linux) + * - thread.h (multi-threading) + * - trie.[ch] (a trie set/map implementation) + * - typo.[ch] (fuzzy matching for typos) + * - xregex.[ch] (regular expression support) + * - xspawn.[ch] (spawns processes) + * - xtime.[ch] (date/time handling utilities) + */ + +#include "prelude.h" +#include "bfstd.h" +#include "ctx.h" +#include "diag.h" +#include "eval.h" +#include "parse.h" +#include <errno.h> +#include <fcntl.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> + +/** + * Check if a file descriptor is open. + */ +static bool isopen(int fd) { + return fcntl(fd, F_GETFD) >= 0 || errno != EBADF; +} + +/** + * Open a file and redirect it to a particular descriptor. + */ +static int redirect(int fd, const char *path, int flags) { + int newfd = open(path, flags); + if (newfd < 0 || newfd == fd) { + return newfd; + } + + int ret = dup2(newfd, fd); + close_quietly(newfd); + return ret; +} + +/** + * Make sure the standard streams std{in,out,err} are open. If they are not, + * future open() calls may use those file descriptors, and std{in,out,err} will + * use them unintentionally. + */ +static int open_std_streams(void) { +#ifdef O_PATH + const int inflags = O_PATH, outflags = O_PATH; +#else + // These are intentionally backwards so that bfs >&- still fails with EBADF + const int inflags = O_WRONLY, outflags = O_RDONLY; +#endif + + if (!isopen(STDERR_FILENO) && redirect(STDERR_FILENO, "/dev/null", outflags) < 0) { + return -1; + } + if (!isopen(STDOUT_FILENO) && redirect(STDOUT_FILENO, "/dev/null", outflags) < 0) { + perror("redirect()"); + return -1; + } + if (!isopen(STDIN_FILENO) && redirect(STDIN_FILENO, "/dev/null", inflags) < 0) { + perror("redirect()"); + return -1; + } + + return 0; +} + +/** + * bfs entry point. + */ +int main(int argc, char *argv[]) { + // Make sure the standard streams are open + if (open_std_streams() != 0) { + return EXIT_FAILURE; + } + + // Use the system locale instead of "C" + int locale_err = 0; + if (!setlocale(LC_ALL, "")) { + locale_err = errno; + } + + // Apply the environment's timezone + tzset(); + + // Parse the command line + struct bfs_ctx *ctx = bfs_parse_cmdline(argc, argv); + if (!ctx) { + return EXIT_FAILURE; + } + + // Warn if setlocale() failed, unless there's no expression to evaluate + if (locale_err && ctx->warn && ctx->expr) { + bfs_warning(ctx, "Failed to set locale: %s\n\n", xstrerror(locale_err)); + } + + // Walk the file system tree, evaluating the expression on each file + int ret = bfs_eval(ctx); + + // Free the parsed command line, and detect any last-minute errors + if (bfs_ctx_free(ctx) != 0 && ret == EXIT_SUCCESS) { + ret = EXIT_FAILURE; + } + + return ret; +} diff --git a/src/mtab.c b/src/mtab.c new file mode 100644 index 0000000..0377fea --- /dev/null +++ b/src/mtab.c @@ -0,0 +1,303 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "mtab.h" +#include "alloc.h" +#include "bfstd.h" +#include "stat.h" +#include "trie.h" +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#if !defined(BFS_USE_MNTENT) && BFS_HAS_GETMNTENT_1 +# define BFS_USE_MNTENT true +#elif !defined(BFS_USE_MNTINFO) && BFS_HAS_GETMNTINFO +# define BFS_USE_MNTINFO true +#elif !defined(BFS_USE_MNTTAB) && BFS_HAS_GETMNTENT_2 +# define BFS_USE_MNTTAB true +#endif + +#if BFS_USE_MNTENT +# include <mntent.h> +# include <paths.h> +# include <stdio.h> +#elif BFS_USE_MNTINFO +# include <sys/mount.h> +#elif BFS_USE_MNTTAB +# include <stdio.h> +# include <sys/mnttab.h> +#endif + +/** + * A mount point in the table. + */ +struct bfs_mount { + /** The path to the mount point. */ + char *path; + /** The filesystem type. */ + char *type; + /** Buffer for the strings. */ + char buf[]; +}; + +struct bfs_mtab { + /** Mount point arena. */ + struct varena varena; + + /** The array of mount points. */ + struct bfs_mount **mounts; + /** The number of mount points. */ + size_t nmounts; + + /** The basenames of every mount point. */ + struct trie names; + + /** A map from device ID to fstype (populated lazily). */ + struct trie types; + /** Whether the types map has been populated. */ + bool types_filled; +}; + +/** + * Add an entry to the mount table. + */ +attr(maybe_unused) +static int bfs_mtab_add(struct bfs_mtab *mtab, const char *path, const char *type) { + size_t path_size = strlen(path) + 1; + size_t type_size = strlen(type) + 1; + size_t size = path_size + type_size; + struct bfs_mount *mount = varena_alloc(&mtab->varena, size); + if (!mount) { + return -1; + } + + struct bfs_mount **ptr = RESERVE(struct bfs_mount *, &mtab->mounts, &mtab->nmounts); + if (!ptr) { + goto free; + } + *ptr = mount; + + mount->path = mount->buf; + memcpy(mount->path, path, path_size); + + mount->type = mount->buf + path_size; + memcpy(mount->type, type, type_size); + + const char *name = path + xbaseoff(path); + if (!trie_insert_str(&mtab->names, name)) { + goto shrink; + } + + return 0; + +shrink: + --mtab->nmounts; +free: + varena_free(&mtab->varena, mount, size); + return -1; +} + +struct bfs_mtab *bfs_mtab_parse(void) { + struct bfs_mtab *mtab = ZALLOC(struct bfs_mtab); + if (!mtab) { + return NULL; + } + + VARENA_INIT(&mtab->varena, struct bfs_mount, buf); + + trie_init(&mtab->names); + trie_init(&mtab->types); + + int error = 0; + +#if BFS_USE_MNTENT + + FILE *file = setmntent(_PATH_MOUNTED, "r"); + if (!file) { + // In case we're in a chroot or something with /proc but no /etc/mtab + error = errno; + file = setmntent("/proc/mounts", "r"); + } + if (!file) { + goto fail; + } + + struct mntent *mnt; + while ((mnt = getmntent(file))) { + if (bfs_mtab_add(mtab, mnt->mnt_dir, mnt->mnt_type) != 0) { + error = errno; + endmntent(file); + goto fail; + } + } + + endmntent(file); + +#elif BFS_USE_MNTINFO + +#if __NetBSD__ + typedef struct statvfs bfs_statfs; +#else + typedef struct statfs bfs_statfs; +#endif + + bfs_statfs *mntbuf; + int size = getmntinfo(&mntbuf, MNT_WAIT); + if (size <= 0) { + error = errno; + goto fail; + } + + for (bfs_statfs *mnt = mntbuf; mnt < mntbuf + size; ++mnt) { + if (bfs_mtab_add(mtab, mnt->f_mntonname, mnt->f_fstypename) != 0) { + error = errno; + goto fail; + } + } + +#elif BFS_USE_MNTTAB + + FILE *file = xfopen(MNTTAB, O_RDONLY | O_CLOEXEC); + if (!file) { + error = errno; + goto fail; + } + + struct mnttab mnt; + while (getmntent(file, &mnt) == 0) { + if (bfs_mtab_add(mtab, mnt.mnt_mountp, mnt.mnt_fstype) != 0) { + error = errno; + fclose(file); + goto fail; + } + } + + fclose(file); + +#else + + error = ENOTSUP; + goto fail; + +#endif + + return mtab; + +fail: + bfs_mtab_free(mtab); + errno = error; + return NULL; +} + +static int bfs_mtab_fill_types(struct bfs_mtab *mtab) { + const enum bfs_stat_flags flags = BFS_STAT_NOFOLLOW | BFS_STAT_NOSYNC; + int ret = -1; + + // It's possible that /path/to/mount was unmounted between bfs_mtab_parse() and bfs_mtab_fill_types(). + // In that case, the dev_t of /path/to/mount will be the same as /path/to, which should not get its + // fstype from the old mount record of /path/to/mount. + // + // Detect this by comparing the st_dev of the parent (/path/to) and child (/path/to/mount). Only when + // they differ can the filesystem type actually change between them. As a minor optimization, we keep + // the parent directory open in case multiple mounts have the same parent (e.g. /mnt). + char *parent_dir = NULL; + int parent_fd = -1; + int parent_ret = -1; + struct bfs_stat parent_stat; + + for (size_t i = 0; i < mtab->nmounts; ++i) { + struct bfs_mount *mount = mtab->mounts[i]; + const char *path = mount->path; + int fd = AT_FDCWD; + + char *dir = xdirname(path); + if (!dir) { + goto fail; + } + + if (parent_dir && strcmp(parent_dir, dir) == 0) { + // Same parent + free(dir); + } else { + free(parent_dir); + parent_dir = dir; + + if (parent_fd >= 0) { + xclose(parent_fd); + } + parent_fd = open(parent_dir, O_SEARCH | O_CLOEXEC | O_DIRECTORY); + + parent_ret = -1; + if (parent_fd >= 0) { + parent_ret = bfs_stat(parent_fd, NULL, flags, &parent_stat); + } + } + + if (parent_fd >= 0) { + fd = parent_fd; + path += xbaseoff(path); + } + + struct bfs_stat sb; + if (bfs_stat(fd, path, flags, &sb) != 0) { + continue; + } + + if (parent_ret == 0 && parent_stat.dev == sb.dev && parent_stat.ino != sb.ino) { + // Not a mount point any more (or a bind mount, but with the same fstype) + continue; + } + + struct trie_leaf *leaf = trie_insert_mem(&mtab->types, &sb.dev, sizeof(sb.dev)); + if (leaf) { + leaf->value = mount->type; + } else { + goto fail; + } + } + + mtab->types_filled = true; + ret = 0; + +fail: + if (parent_fd >= 0) { + xclose(parent_fd); + } + free(parent_dir); + return ret; +} + +const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statbuf) { + if (!mtab->types_filled) { + if (bfs_mtab_fill_types((struct bfs_mtab *)mtab) != 0) { + return NULL; + } + } + + const struct trie_leaf *leaf = trie_find_mem(&mtab->types, &statbuf->dev, sizeof(statbuf->dev)); + if (leaf) { + return leaf->value; + } else { + return "unknown"; + } +} + +bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *name) { + return trie_find_str(&mtab->names, name); +} + +void bfs_mtab_free(struct bfs_mtab *mtab) { + if (mtab) { + trie_destroy(&mtab->types); + trie_destroy(&mtab->names); + + free(mtab->mounts); + varena_destroy(&mtab->varena); + + free(mtab); + } +} diff --git a/src/mtab.h b/src/mtab.h new file mode 100644 index 0000000..67290c2 --- /dev/null +++ b/src/mtab.h @@ -0,0 +1,58 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * A facade over platform-specific APIs for enumerating mounted filesystems. + */ + +#ifndef BFS_MTAB_H +#define BFS_MTAB_H + +#include "prelude.h" + +struct bfs_stat; + +/** + * A file system mount table. + */ +struct bfs_mtab; + +/** + * Parse the mount table. + * + * @return + * The parsed mount table, or NULL on error. + */ +struct bfs_mtab *bfs_mtab_parse(void); + +/** + * Determine the file system type that a file is on. + * + * @param mtab + * The current mount table. + * @param statbuf + * The bfs_stat() buffer for the file in question. + * @return + * The type of file system containing this file, "unknown" if not known, + * or NULL on error. + */ +const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statbuf); + +/** + * Check if a file could be a mount point. + * + * @param mtab + * The current mount table. + * @param name + * The name of the file to check. + * @return + * Whether the named file could be a mount point. + */ +bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *name); + +/** + * Free a mount table. + */ +void bfs_mtab_free(struct bfs_mtab *mtab); + +#endif // BFS_MTAB_H diff --git a/src/opt.c b/src/opt.c new file mode 100644 index 0000000..883d598 --- /dev/null +++ b/src/opt.c @@ -0,0 +1,2299 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * The expression optimizer. Different optimization levels are supported: + * + * -O1: basic logical simplifications, like folding (-true -and -foo) to -foo. + * + * -O2: dead code elimination and data flow analysis. struct df_domain is used + * to record data flow facts that are true at various points of evaluation. + * Specifically, struct df_domain records the state before an expression is + * evaluated (opt->before), and after an expression returns true + * (opt->after_true) or false (opt->after_false). Additionally, opt->impure + * records the possible state before any expression with side effects is + * evaluated. + * + * -O3: expression re-ordering to reduce expected cost. In an expression like + * (-foo -and -bar), if both -foo and -bar are pure (no side effects), they can + * be re-ordered to (-bar -and -foo). This is profitable if the expected cost + * is lower for the re-ordered expression, for example if -foo is very slow or + * -bar is likely to return false. + * + * -O4/-Ofast: aggressive optimizations that may affect correctness in corner + * cases. The main effect is to use opt->impure to determine if any side- + * effects are reachable at all, skipping the traversal if not. + */ + +#include "prelude.h" +#include "opt.h" +#include "bftw.h" +#include "bit.h" +#include "color.h" +#include "ctx.h" +#include "diag.h" +#include "dir.h" +#include "eval.h" +#include "exec.h" +#include "expr.h" +#include "list.h" +#include "pwcache.h" +#include <errno.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <unistd.h> + +static char *fake_and_arg = "-and"; +static char *fake_or_arg = "-or"; +static char *fake_not_arg = "-not"; + +/** + * The data flow domain for predicates. + */ +enum df_pred { + /** The bottom state (unreachable). */ + PRED_BOTTOM = 0, + /** The predicate is known to be false. */ + PRED_FALSE = 1 << false, + /** The predicate is known to be true. */ + PRED_TRUE = 1 << true, + /** The top state (unknown). */ + PRED_TOP = PRED_FALSE | PRED_TRUE, +}; + +/** Make a predicate known. */ +static void constrain_pred(enum df_pred *pred, bool value) { + *pred &= 1 << value; +} + +/** Compute the join (union) of two predicates. */ +static void pred_join(enum df_pred *dest, enum df_pred src) { + *dest |= src; +} + +/** + * Types of predicates we track. + */ +enum pred_type { + /** -readable */ + READABLE_PRED, + /** -writable */ + WRITABLE_PRED, + /** -executable */ + EXECUTABLE_PRED, + /** -acl */ + ACL_PRED, + /** -capable */ + CAPABLE_PRED, + /** -empty */ + EMPTY_PRED, + /** -hidden */ + HIDDEN_PRED, + /** -nogroup */ + NOGROUP_PRED, + /** -nouser */ + NOUSER_PRED, + /** -sparse */ + SPARSE_PRED, + /** -xattr */ + XATTR_PRED, + /** The number of pred_types. */ + PRED_TYPES, +}; + +/** Get the name of a predicate type. */ +static const char *pred_type_name(enum pred_type type) { + switch (type) { + case READABLE_PRED: + return "-readable"; + case WRITABLE_PRED: + return "-writable"; + case EXECUTABLE_PRED: + return "-executable"; + case ACL_PRED: + return "-acl"; + case CAPABLE_PRED: + return "-capable"; + case EMPTY_PRED: + return "-empty"; + case HIDDEN_PRED: + return "-hidden"; + case NOGROUP_PRED: + return "-nogroup"; + case NOUSER_PRED: + return "-nouser"; + case SPARSE_PRED: + return "-sparse"; + case XATTR_PRED: + return "-xattr"; + + case PRED_TYPES: + break; + } + + bfs_bug("Unknown predicate %d", (int)type); + return "???"; +} + +/** + * A contrained integer range. + */ +struct df_range { + /** The (inclusive) minimum value. */ + long long min; + /** The (inclusive) maximum value. */ + long long max; +}; + +/** Initialize an empty range. */ +static void range_init_bottom(struct df_range *range) { + range->min = LLONG_MAX; + range->max = LLONG_MIN; +} + +/** Check if a range is empty. */ +static bool range_is_bottom(const struct df_range *range) { + return range->min > range->max; +} + +/** Initialize a full range. */ +static void range_init_top(struct df_range *range) { + // All ranges we currently track are non-negative + range->min = 0; + range->max = LLONG_MAX; +} + +/** Check for an infinite range. */ +static bool range_is_top(const struct df_range *range) { + return range->min == 0 && range->max == LLONG_MAX; +} + +/** Compute the minimum of two values. */ +static long long min_value(long long a, long long b) { + if (a < b) { + return a; + } else { + return b; + } +} + +/** Compute the maximum of two values. */ +static long long max_value(long long a, long long b) { + if (a > b) { + return a; + } else { + return b; + } +} + +/** Constrain the minimum of a range. */ +static void constrain_min(struct df_range *range, long long value) { + range->min = max_value(range->min, value); +} + +/** Contrain the maximum of a range. */ +static void constrain_max(struct df_range *range, long long value) { + range->max = min_value(range->max, value); +} + +/** Remove a single value from a range. */ +static void range_remove(struct df_range *range, long long value) { + if (range->min == value) { + if (range->min == LLONG_MAX) { + range->max = LLONG_MIN; + } else { + ++range->min; + } + } + + if (range->max == value) { + if (range->max == LLONG_MIN) { + range->min = LLONG_MAX; + } else { + --range->max; + } + } +} + +/** Compute the union of two ranges. */ +static void range_join(struct df_range *dest, const struct df_range *src) { + dest->min = min_value(dest->min, src->min); + dest->max = max_value(dest->max, src->max); +} + +/** + * Types of ranges we track. + */ +enum range_type { + /** Search tree depth. */ + DEPTH_RANGE, + /** Group ID. */ + GID_RANGE, + /** Inode number. */ + INUM_RANGE, + /** Hard link count. */ + LINKS_RANGE, + /** File size. */ + SIZE_RANGE, + /** User ID. */ + UID_RANGE, + /** The number of range_types. */ + RANGE_TYPES, +}; + +/** Get the name of a range type. */ +static const char *range_type_name(enum range_type type) { + switch (type) { + case DEPTH_RANGE: + return "-depth"; + case GID_RANGE: + return "-gid"; + case INUM_RANGE: + return "-inum"; + case LINKS_RANGE: + return "-links"; + case SIZE_RANGE: + return "-size"; + case UID_RANGE: + return "-uid"; + + case RANGE_TYPES: + break; + } + + bfs_bug("Unknown range %d", (int)type); + return "???"; +} + +/** + * The data flow analysis domain. + */ +struct df_domain { + /** The predicates we track. */ + enum df_pred preds[PRED_TYPES]; + + /** The value ranges we track. */ + struct df_range ranges[RANGE_TYPES]; + + /** Bitmask of possible -types. */ + unsigned int types; + /** Bitmask of possible -xtypes. */ + unsigned int xtypes; +}; + +/** Set a data flow value to bottom. */ +static void df_init_bottom(struct df_domain *value) { + for (int i = 0; i < PRED_TYPES; ++i) { + value->preds[i] = PRED_BOTTOM; + } + + for (int i = 0; i < RANGE_TYPES; ++i) { + range_init_bottom(&value->ranges[i]); + } + + value->types = 0; + value->xtypes = 0; +} + +/** Determine whether a fact set is impossible. */ +static bool df_is_bottom(const struct df_domain *value) { + for (int i = 0; i < RANGE_TYPES; ++i) { + if (range_is_bottom(&value->ranges[i])) { + return true; + } + } + + for (int i = 0; i < PRED_TYPES; ++i) { + if (value->preds[i] == PRED_BOTTOM) { + return true; + } + } + + if (!value->types || !value->xtypes) { + return true; + } + + return false; +} + +/** Initialize some data flow value. */ +static void df_init_top(struct df_domain *value) { + for (int i = 0; i < PRED_TYPES; ++i) { + value->preds[i] = PRED_TOP; + } + + for (int i = 0; i < RANGE_TYPES; ++i) { + range_init_top(&value->ranges[i]); + } + + value->types = ~0; + value->xtypes = ~0; +} + +/** Check for the top element. */ +static bool df_is_top(const struct df_domain *value) { + for (int i = 0; i < PRED_TYPES; ++i) { + if (value->preds[i] != PRED_TOP) { + return false; + } + } + + for (int i = 0; i < RANGE_TYPES; ++i) { + if (!range_is_top(&value->ranges[i])) { + return false; + } + } + + if (value->types != ~0U) { + return false; + } + + if (value->xtypes != ~0U) { + return false; + } + + return true; +} + +/** Compute the union of two fact sets. */ +static void df_join(struct df_domain *dest, const struct df_domain *src) { + for (int i = 0; i < PRED_TYPES; ++i) { + pred_join(&dest->preds[i], src->preds[i]); + } + + for (int i = 0; i < RANGE_TYPES; ++i) { + range_join(&dest->ranges[i], &src->ranges[i]); + } + + dest->types |= src->types; + dest->xtypes |= src->xtypes; +} + +/** + * Optimizer state. + */ +struct bfs_opt { + /** The context we're optimizing. */ + struct bfs_ctx *ctx; + /** Optimization level (ctx->optlevel). */ + int level; + /** Recursion depth. */ + int depth; + + /** Whether to produce warnings. */ + bool warn; + /** Whether the result of this expression is ignored. */ + bool ignore_result; + + /** Data flow state before this expression is evaluated. */ + struct df_domain before; + /** Data flow state after this expression returns true. */ + struct df_domain after_true; + /** Data flow state after this expression returns false. */ + struct df_domain after_false; + /** Data flow state before any side-effecting expressions are evaluated. */ + struct df_domain *impure; +}; + +/** Log an optimization. */ +attr(printf(2, 3)) +static bool opt_debug(struct bfs_opt *opt, const char *format, ...) { + if (bfs_debug_prefix(opt->ctx, DEBUG_OPT)) { + for (int i = 0; i < opt->depth; ++i) { + cfprintf(opt->ctx->cerr, "│ "); + } + + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); + return true; + } else { + return false; + } +} + +/** Log a recursive call. */ +attr(printf(2, 3)) +static bool opt_enter(struct bfs_opt *opt, const char *format, ...) { + int depth = opt->depth; + if (depth > 0) { + --opt->depth; + } + + bool debug = opt_debug(opt, "%s", depth > 0 ? "├─╮ " : ""); + if (debug) { + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); + } + + opt->depth = depth + 1; + return debug; +} + +/** Log a recursive return. */ +attr(printf(2, 3)) +static bool opt_leave(struct bfs_opt *opt, const char *format, ...) { + bool debug = false; + int depth = opt->depth; + + if (format) { + if (depth > 1) { + opt->depth -= 2; + } + + debug = opt_debug(opt, "%s", depth > 1 ? "├─╯ " : ""); + if (debug) { + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); + } + } + + opt->depth = depth - 1; + return debug; +} + +/** Log a shallow visit. */ +attr(printf(2, 3)) +static bool opt_visit(struct bfs_opt *opt, const char *format, ...) { + int depth = opt->depth; + if (depth > 0) { + --opt->depth; + } + + bool debug = opt_debug(opt, "%s", depth > 0 ? "├─◯ " : ""); + if (debug) { + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); + } + + opt->depth = depth; + return debug; +} + +/** Log the deletion of an expression. */ +attr(printf(2, 3)) +static bool opt_delete(struct bfs_opt *opt, const char *format, ...) { + int depth = opt->depth; + + if (depth > 0) { + --opt->depth; + } + + bool debug = opt_debug(opt, "%s", depth > 0 ? "├─✘ " : ""); + if (debug) { + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); + } + + opt->depth = depth; + return debug; +} + +typedef bool dump_fn(struct bfs_opt *opt, const char *format, ...); + +/** Print a df_pred. */ +static void pred_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum pred_type type) { + dump(opt, "${blu}%s${rs}: ", pred_type_name(type)); + + FILE *file = opt->ctx->cerr->file; + switch (value->preds[type]) { + case PRED_BOTTOM: + fprintf(file, "⊥\n"); + break; + case PRED_TOP: + fprintf(file, "⊤\n"); + break; + case PRED_TRUE: + fprintf(file, "true\n"); + break; + case PRED_FALSE: + fprintf(file, "false\n"); + break; + } +} + +/** Print a df_range. */ +static void range_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum range_type type) { + dump(opt, "${blu}%s${rs}: ", range_type_name(type)); + + FILE *file = opt->ctx->cerr->file; + const struct df_range *range = &value->ranges[type]; + if (range_is_bottom(range)) { + fprintf(file, "⊥\n"); + } else if (range_is_top(range)) { + fprintf(file, "⊤\n"); + } else if (range->min == range->max) { + fprintf(file, "%lld\n", range->min); + } else { + if (range->min == LLONG_MIN) { + fprintf(file, "(-∞, "); + } else { + fprintf(file, "[%lld, ", range->min); + } + if (range->max == LLONG_MAX) { + fprintf(file, "∞)\n"); + } else { + fprintf(file, "%lld]\n", range->max); + } + } +} + +/** Print a set of types. */ +static void types_dump(dump_fn *dump, struct bfs_opt *opt, const char *name, unsigned int types) { + dump(opt, "${blu}%s${rs}: ", name); + + FILE *file = opt->ctx->cerr->file; + if (types == 0) { + fprintf(file, " ⊥\n"); + } else if (types == ~0U) { + fprintf(file, " ⊤\n"); + } else if (count_ones(types) < count_ones(~types)) { + fprintf(file, " 0x%X\n", types); + } else { + fprintf(file, "~0x%X\n", ~types); + } +} + +/** Calculate the number of lines of df_dump() output. */ +static int df_dump_lines(const struct df_domain *value) { + int lines = 0; + + for (int i = 0; i < PRED_TYPES; ++i) { + lines += value->preds[i] != PRED_TOP; + } + + for (int i = 0; i < RANGE_TYPES; ++i) { + lines += !range_is_top(&value->ranges[i]); + } + + lines += value->types != ~0U; + lines += value->xtypes != ~0U; + + return lines; +} + +/** Get the right debugging function for a df_dump() line. */ +static dump_fn *df_dump_line(int lines, int *line) { + ++*line; + + if (lines == 1) { + return opt_visit; + } else if (*line == 1) { + return opt_enter; + } else if (*line == lines) { + return opt_leave; + } else { + return opt_debug; + } +} + +/** Print a data flow value. */ +static void df_dump(struct bfs_opt *opt, const char *str, const struct df_domain *value) { + if (df_is_bottom(value)) { + opt_debug(opt, "%s: ⊥\n", str); + return; + } else if (df_is_top(value)) { + opt_debug(opt, "%s: ⊤\n", str); + return; + } + + if (!opt_debug(opt, "%s:\n", str)) { + return; + } + + int lines = df_dump_lines(value); + int line = 0; + + for (int i = 0; i < PRED_TYPES; ++i) { + if (value->preds[i] != PRED_TOP) { + pred_dump(df_dump_line(lines, &line), opt, value, i); + } + } + + for (int i = 0; i < RANGE_TYPES; ++i) { + if (!range_is_top(&value->ranges[i])) { + range_dump(df_dump_line(lines, &line), opt, value, i); + } + } + + if (value->types != ~0U) { + types_dump(df_dump_line(lines, &line), opt, "-type", value->types); + } + + if (value->xtypes != ~0U) { + types_dump(df_dump_line(lines, &line), opt, "-xtype", value->xtypes); + } +} + +/** Check if an expression is constant. */ +static bool is_const(const struct bfs_expr *expr) { + return expr->eval_fn == eval_true || expr->eval_fn == eval_false; +} + +/** Warn about an expression. */ +attr(printf(3, 4)) +static void opt_warning(const struct bfs_opt *opt, const struct bfs_expr *expr, const char *format, ...) { + if (!opt->warn) { + return; + } + + if (bfs_expr_is_parent(expr) || is_const(expr)) { + return; + } + + if (bfs_expr_warning(opt->ctx, expr)) { + va_list args; + va_start(args, format); + bfs_vwarning(opt->ctx, format, args); + va_end(args); + } +} + +/** Remove and return an expression's children. */ +static void foster_children(struct bfs_expr *expr, struct bfs_exprs *children) { + bfs_assert(bfs_expr_is_parent(expr)); + + SLIST_INIT(children); + SLIST_EXTEND(children, &expr->children); + + expr->persistent_fds = 0; + expr->ephemeral_fds = 0; + expr->pure = true; +} + +/** Return an expression's only child. */ +static struct bfs_expr *only_child(struct bfs_expr *expr) { + bfs_assert(bfs_expr_is_parent(expr)); + struct bfs_expr *child = bfs_expr_children(expr); + bfs_assert(child && !child->next); + return child; +} + +/** Foster an expression's only child. */ +static struct bfs_expr *foster_only_child(struct bfs_expr *expr) { + struct bfs_expr *child = only_child(expr); + struct bfs_exprs children; + foster_children(expr, &children); + return child; +} + +/** An expression visitor. */ +struct visitor; + +/** An expression-visiting function. */ +typedef struct bfs_expr *visit_fn(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor); + +/** An entry in a visitor lookup table. */ +struct visitor_table { + /** The evaluation function to match on. */ + bfs_eval_fn *eval_fn; + /** The visitor function. */ + visit_fn *visit; +}; + +/** Look up a visitor in a table. */ +static visit_fn *look_up_visitor(const struct bfs_expr *expr, const struct visitor_table table[]) { + for (size_t i = 0; table[i].eval_fn; ++i) { + if (expr->eval_fn == table[i].eval_fn) { + return table[i].visit; + } + } + + return NULL; +} + +struct visitor { + /** The name of this visitor. */ + const char *name; + + /** A function to call before visiting children. */ + visit_fn *enter; + /** The default visitor. */ + visit_fn *visit; + /** A function to call after visiting children. */ + visit_fn *leave; + + /** A visitor lookup table. */ + const struct visitor_table *table; +}; + +/** Recursive visitor implementation. */ +static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor); + +/** Visit a negation. */ +static struct bfs_expr *visit_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *rhs = foster_only_child(expr); + + struct bfs_opt nested = *opt; + rhs = visit_deep(&nested, rhs, visitor); + if (!rhs) { + return NULL; + } + + opt->after_true = nested.after_false; + opt->after_false = nested.after_true; + + bfs_expr_append(expr, rhs); + return expr; +} + +/** Visit a conjunction. */ +static struct bfs_expr *visit_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + // Base case (-and) == (-true) + df_init_bottom(&opt->after_false); + struct bfs_opt nested = *opt; + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (SLIST_EMPTY(&children)) { + nested.ignore_result = opt->ignore_result; + } else { + nested.ignore_result = false; + } + + child = visit_deep(&nested, child, visitor); + if (!child) { + return NULL; + } + + df_join(&opt->after_false, &nested.after_false); + nested.before = nested.after_true; + + bfs_expr_append(expr, child); + } + + opt->after_true = nested.after_true; + + return expr; +} + +/** Visit a disjunction. */ +static struct bfs_expr *visit_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + // Base case (-or) == (-false) + df_init_bottom(&opt->after_true); + struct bfs_opt nested = *opt; + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (SLIST_EMPTY(&children)) { + nested.ignore_result = opt->ignore_result; + } else { + nested.ignore_result = false; + } + + child = visit_deep(&nested, child, visitor); + if (!child) { + return NULL; + } + + df_join(&opt->after_true, &nested.after_true); + nested.before = nested.after_false; + + bfs_expr_append(expr, child); + } + + opt->after_false = nested.after_false; + + return expr; +} + +/** Visit a comma expression. */ +static struct bfs_expr *visit_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_opt nested = *opt; + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (SLIST_EMPTY(&children)) { + nested.ignore_result = opt->ignore_result; + } else { + nested.ignore_result = true; + } + + child = visit_deep(&nested, child, visitor); + if (!child) { + return NULL; + } + + nested.before = nested.after_true; + df_join(&nested.before, &nested.after_false); + + bfs_expr_append(expr, child); + } + + opt->after_true = nested.after_true; + opt->after_false = nested.after_false; + + return expr; +} + +/** Default enter() function. */ +static struct bfs_expr *visit_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt_enter(opt, "%pe\n", expr); + opt->after_true = opt->before; + opt->after_false = opt->before; + return expr; +} + +/** Default leave() function. */ +static struct bfs_expr *visit_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt_leave(opt, "%pe\n", expr); + return expr; +} + +static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + bool entered = false; + + visit_fn *enter = visitor->enter ? visitor->enter : visit_enter; + visit_fn *leave = visitor->leave ? visitor->leave : visit_leave; + + static const struct visitor_table table[] = { + {eval_not, visit_not}, + {eval_and, visit_and}, + {eval_or, visit_or}, + {eval_comma, visit_comma}, + {NULL, NULL}, + }; + visit_fn *recursive = look_up_visitor(expr, table); + if (recursive) { + if (!entered) { + expr = enter(opt, expr, visitor); + if (!expr) { + return NULL; + } + entered = true; + } + + expr = recursive(opt, expr, visitor); + if (!expr) { + return NULL; + } + } + + visit_fn *general = visitor->visit; + if (general) { + if (!entered) { + expr = enter(opt, expr, visitor); + if (!expr) { + return NULL; + } + entered = true; + } + + expr = general(opt, expr, visitor); + if (!expr) { + return NULL; + } + } + + visit_fn *specific = look_up_visitor(expr, visitor->table); + if (specific) { + if (!entered) { + expr = enter(opt, expr, visitor); + if (!expr) { + return NULL; + } + entered = true; + } + + expr = specific(opt, expr, visitor); + if (!expr) { + return NULL; + } + } + + if (entered) { + expr = leave(opt, expr, visitor); + } else { + opt_visit(opt, "%pe\n", expr); + } + + return expr; +} + +/** Visit an expression recursively. */ +static struct bfs_expr *visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt_enter(opt, "%s()\n", visitor->name); + expr = visit_deep(opt, expr, visitor); + opt_leave(opt, "\n"); + return expr; +} + +/** Visit an expression non-recursively. */ +static struct bfs_expr *visit_shallow(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + visit_fn *general = visitor->visit; + if (expr && general) { + expr = general(opt, expr, visitor); + } + + if (!expr) { + return NULL; + } + + visit_fn *specific = look_up_visitor(expr, visitor->table); + if (specific) { + expr = specific(opt, expr, visitor); + } + + return expr; +} + +/** Annotate -{execut,read,writ}able. */ +static struct bfs_expr *annotate_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->probability = 1.0; + if (expr->num & R_OK) { + expr->probability *= 0.99; + } + if (expr->num & W_OK) { + expr->probability *= 0.8; + } + if (expr->num & X_OK) { + expr->probability *= 0.2; + } + + return expr; +} + +/** Annotate -empty. */ +static struct bfs_expr *annotate_empty(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->level >= 4) { + // Since -empty attempts to open and read directories, it may + // have side effects such as reporting permission errors, and + // thus shouldn't be re-ordered without aggressive optimizations + expr->pure = true; + } + + return expr; +} + +/** Annotate -exec. */ +static struct bfs_expr *annotate_exec(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (expr->exec->flags & BFS_EXEC_MULTI) { + expr->always_true = true; + } else { + expr->cost = 1000000.0; + } + + return expr; +} + +/** Annotate -name/-lname/-path. */ +static struct bfs_expr *annotate_fnmatch(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (expr->literal) { + expr->probability = 0.1; + } else { + expr->probability = 0.5; + } + + return expr; +} + +/** Annotate -f?print. */ +static struct bfs_expr *annotate_fprint(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + const struct colors *colors = expr->cfile->colors; + expr->calls_stat = colors && colors_need_stat(colors); + return expr; +} + +/** Estimate probability for -x?type. */ +static void estimate_type_probability(struct bfs_expr *expr) { + unsigned int types = expr->num; + + expr->probability = 0.0; + if (types & (1 << BFS_BLK)) { + expr->probability += 0.00000721183; + } + if (types & (1 << BFS_CHR)) { + expr->probability += 0.0000499855; + } + if (types & (1 << BFS_DIR)) { + expr->probability += 0.114475; + } + if (types & (1 << BFS_DOOR)) { + expr->probability += 0.000001; + } + if (types & (1 << BFS_FIFO)) { + expr->probability += 0.00000248684; + } + if (types & (1 << BFS_REG)) { + expr->probability += 0.859772; + } + if (types & (1 << BFS_LNK)) { + expr->probability += 0.0256816; + } + if (types & (1 << BFS_SOCK)) { + expr->probability += 0.0000116881; + } + if (types & (1 << BFS_WHT)) { + expr->probability += 0.000001; + } +} + +/** Annotate -type. */ +static struct bfs_expr *annotate_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + estimate_type_probability(expr); + return expr; +} + +/** Annotate -xtype. */ +static struct bfs_expr *annotate_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->level >= 4) { + // Since -xtype dereferences symbolic links, it may have side + // effects such as reporting permission errors, and thus + // shouldn't be re-ordered without aggressive optimizations + expr->pure = true; + } + + estimate_type_probability(expr); + return expr; +} + +/** Annotate a negation. */ +static struct bfs_expr *annotate_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *rhs = only_child(expr); + expr->pure = rhs->pure; + expr->always_true = rhs->always_false; + expr->always_false = rhs->always_true; + expr->cost = rhs->cost; + expr->probability = 1.0 - rhs->probability; + return expr; +} + +/** Annotate a conjunction. */ +static struct bfs_expr *annotate_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->pure = true; + expr->always_true = true; + expr->always_false = false; + expr->cost = 0.0; + expr->probability = 1.0; + + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + expr->pure &= child->pure; + expr->always_true &= child->always_true; + expr->always_false |= child->always_false; + expr->cost += expr->probability * child->cost; + expr->probability *= child->probability; + } + + return expr; +} + +/** Annotate a disjunction. */ +static struct bfs_expr *annotate_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->pure = true; + expr->always_true = false; + expr->always_false = true; + expr->cost = 0.0; + + float false_prob = 1.0; + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + expr->pure &= child->pure; + expr->always_true |= child->always_true; + expr->always_false &= child->always_false; + expr->cost += false_prob * child->cost; + false_prob *= (1.0 - child->probability); + } + expr->probability = 1.0 - false_prob; + + return expr; +} + +/** Annotate a comma expression. */ +static struct bfs_expr *annotate_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->pure = true; + expr->cost = 0.0; + + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + expr->pure &= child->pure; + expr->always_true = child->always_true; + expr->always_false = child->always_false; + expr->cost += child->cost; + expr->probability = child->probability; + } + + return expr; +} + +/** Annotate an arbitrary expression. */ +static struct bfs_expr *annotate_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + /** Table of pure expressions. */ + static bfs_eval_fn *const pure[] = { + eval_access, + eval_acl, + eval_capable, + eval_depth, + eval_false, + eval_flags, + eval_fstype, + eval_gid, + eval_hidden, + eval_inum, + eval_links, + eval_lname, + eval_name, + eval_newer, + eval_nogroup, + eval_nouser, + eval_path, + eval_perm, + eval_regex, + eval_samefile, + eval_size, + eval_sparse, + eval_time, + eval_true, + eval_type, + eval_uid, + eval_used, + eval_xattr, + eval_xattrname, + }; + + expr->pure = false; + for (size_t i = 0; i < countof(pure); ++i) { + if (expr->eval_fn == pure[i]) { + expr->pure = true; + break; + } + } + + /** Table of always-true expressions. */ + static bfs_eval_fn *const always_true[] = { + eval_fls, + eval_fprint, + eval_fprint0, + eval_fprintf, + eval_fprintx, + eval_limit, + eval_prune, + eval_true, + // Non-returning + eval_exit, + eval_quit, + }; + + expr->always_true = false; + for (size_t i = 0; i < countof(always_true); ++i) { + if (expr->eval_fn == always_true[i]) { + expr->always_true = true; + break; + } + } + + /** Table of always-false expressions. */ + static bfs_eval_fn *const always_false[] = { + eval_false, + // Non-returning + eval_exit, + eval_quit, + }; + + expr->always_false = false; + for (size_t i = 0; i < countof(always_false); ++i) { + if (expr->eval_fn == always_false[i]) { + expr->always_false = true; + break; + } + } + + /** Table of stat-calling primaries. */ + static bfs_eval_fn *const calls_stat[] = { + eval_empty, + eval_flags, + eval_fls, + eval_fprintf, + eval_fstype, + eval_gid, + eval_inum, + eval_links, + eval_newer, + eval_nogroup, + eval_nouser, + eval_perm, + eval_samefile, + eval_size, + eval_sparse, + eval_time, + eval_uid, + eval_used, + eval_xattr, + eval_xattrname, + }; + + expr->calls_stat = false; + for (size_t i = 0; i < countof(calls_stat); ++i) { + if (expr->eval_fn == calls_stat[i]) { + expr->calls_stat = true; + break; + } + } + +#define FAST_COST 40.0 +#define FNMATCH_COST 400.0 +#define STAT_COST 1000.0 +#define PRINT_COST 20000.0 + + /** Table of expression costs. */ + static const struct { + bfs_eval_fn *eval_fn; + float cost; + } costs[] = { + {eval_access, STAT_COST}, + {eval_acl, STAT_COST}, + {eval_capable, STAT_COST}, + {eval_empty, 2 * STAT_COST}, // readdir() is worse than stat() + {eval_flags, STAT_COST}, + {eval_fls, PRINT_COST}, + {eval_fprint, PRINT_COST}, + {eval_fprint0, PRINT_COST}, + {eval_fprintf, PRINT_COST}, + {eval_fprintx, PRINT_COST}, + {eval_fstype, STAT_COST}, + {eval_gid, STAT_COST}, + {eval_inum, STAT_COST}, + {eval_links, STAT_COST}, + {eval_lname, FNMATCH_COST}, + {eval_name, FNMATCH_COST}, + {eval_newer, STAT_COST}, + {eval_nogroup, STAT_COST}, + {eval_nouser, STAT_COST}, + {eval_path, FNMATCH_COST}, + {eval_perm, STAT_COST}, + {eval_samefile, STAT_COST}, + {eval_size, STAT_COST}, + {eval_sparse, STAT_COST}, + {eval_time, STAT_COST}, + {eval_uid, STAT_COST}, + {eval_used, STAT_COST}, + {eval_xattr, STAT_COST}, + {eval_xattrname, STAT_COST}, + }; + + expr->cost = FAST_COST; + for (size_t i = 0; i < countof(costs); ++i) { + if (expr->eval_fn == costs[i].eval_fn) { + expr->cost = costs[i].cost; + break; + } + } + + /** Table of expression probabilities. */ + static const struct { + /** The evaluation function with this cost. */ + bfs_eval_fn *eval_fn; + /** The matching probability. */ + float probability; + } probs[] = { + {eval_acl, 0.00002}, + {eval_capable, 0.000002}, + {eval_empty, 0.01}, + {eval_false, 0.0}, + {eval_hidden, 0.01}, + {eval_nogroup, 0.01}, + {eval_nouser, 0.01}, + {eval_samefile, 0.01}, + {eval_true, 1.0}, + {eval_xattr, 0.01}, + {eval_xattrname, 0.01}, + }; + + expr->probability = 0.5; + for (size_t i = 0; i < countof(probs); ++i) { + if (expr->eval_fn == probs[i].eval_fn) { + expr->probability = probs[i].probability; + break; + } + } + + return expr; +} + +/** + * Annotating visitor. + */ +static const struct visitor annotate = { + .name = "annotate", + .visit = annotate_visit, + .table = (const struct visitor_table[]) { + {eval_access, annotate_access}, + {eval_empty, annotate_empty}, + {eval_exec, annotate_exec}, + {eval_fprint, annotate_fprint}, + {eval_lname, annotate_fnmatch}, + {eval_name, annotate_fnmatch}, + {eval_path, annotate_fnmatch}, + {eval_type, annotate_type}, + {eval_xtype, annotate_xtype}, + + {eval_not, annotate_not}, + {eval_and, annotate_and}, + {eval_or, annotate_or}, + {eval_comma, annotate_comma}, + + {NULL, NULL}, + }, +}; + +/** Create a constant expression. */ +static struct bfs_expr *opt_const(struct bfs_opt *opt, bool value) { + static bfs_eval_fn *const fns[] = {eval_false, eval_true}; + static char *fake_args[] = {"-false", "-true"}; + + struct bfs_expr *expr = bfs_expr_new(opt->ctx, fns[value], 1, &fake_args[value]); + return visit_shallow(opt, expr, &annotate); +} + +/** Negate an expression, keeping it canonical. */ +static struct bfs_expr *negate_expr(struct bfs_opt *opt, struct bfs_expr *expr, char **argv) { + if (expr->eval_fn == eval_not) { + return only_child(expr); + } else if (expr->eval_fn == eval_true) { + return opt_const(opt, false); + } else if (expr->eval_fn == eval_false) { + return opt_const(opt, true); + } + + struct bfs_expr *ret = bfs_expr_new(opt->ctx, eval_not, 1, argv); + if (!ret) { + return NULL; + } + + bfs_expr_append(ret, expr); + return visit_shallow(opt, ret, &annotate); +} + +/** Sink negations into a conjunction/disjunction using De Morgan's laws. */ +static struct bfs_expr *sink_not_andor(struct bfs_opt *opt, struct bfs_expr *expr) { + opt_debug(opt, "De Morgan's laws\n"); + + char **argv = expr->argv; + expr = only_child(expr); + opt_enter(opt, "%pe\n", expr); + + if (expr->eval_fn == eval_and) { + expr->eval_fn = eval_or; + expr->argv = &fake_or_arg; + } else { + bfs_assert(expr->eval_fn == eval_or); + expr->eval_fn = eval_and; + expr->argv = &fake_and_arg; + } + + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + opt_enter(opt, "%pe\n", child); + + child = negate_expr(opt, child, argv); + if (!child) { + return NULL; + } + + opt_leave(opt, "%pe\n", child); + bfs_expr_append(expr, child); + } + + opt_leave(opt, "%pe\n", expr); + return visit_shallow(opt, expr, &annotate); +} + +/** Sink a negation into a comma expression. */ +static struct bfs_expr *sink_not_comma(struct bfs_opt *opt, struct bfs_expr *expr) { + bfs_assert(expr->eval_fn == eval_comma); + + opt_enter(opt, "%pe\n", expr); + + char **argv = expr->argv; + expr = only_child(expr); + + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + if (SLIST_EMPTY(&children)) { + opt_enter(opt, "%pe\n", child); + opt_debug(opt, "sink\n"); + + child = negate_expr(opt, child, argv); + if (!child) { + return NULL; + } + + opt_leave(opt, "%pe\n", child); + } else { + opt_visit(opt, "%pe\n", child); + } + + bfs_expr_append(expr, child); + } + + opt_leave(opt, "%pe\n", expr); + return visit_shallow(opt, expr, &annotate); +} + +/** Canonicalize a negation. */ +static struct bfs_expr *canonicalize_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *rhs = only_child(expr); + + if (rhs->eval_fn == eval_not) { + opt_debug(opt, "double negation\n"); + rhs = only_child(expr); + return only_child(rhs); + } else if (rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) { + return sink_not_andor(opt, expr); + } else if (rhs->eval_fn == eval_comma) { + return sink_not_comma(opt, expr); + } else if (is_const(rhs)) { + opt_debug(opt, "constant propagation\n"); + return opt_const(opt, rhs->eval_fn == eval_false); + } else { + return expr; + } +} + +/** Canonicalize an associative operator. */ +static struct bfs_expr *canonicalize_assoc(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_exprs flat; + SLIST_INIT(&flat); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + if (child->eval_fn == expr->eval_fn) { + struct bfs_expr *head = SLIST_HEAD(&child->children); + struct bfs_expr *tail = SLIST_TAIL(&child->children); + + if (!head) { + opt_delete(opt, "%pe [empty]\n", child); + } else { + opt_enter(opt, "%pe\n", child); + opt_debug(opt, "associativity\n"); + if (head == tail) { + opt_leave(opt, "%pe\n", head); + } else if (head->next == tail) { + opt_leave(opt, "%pe %pe\n", head, tail); + } else { + opt_leave(opt, "%pe ... %pe\n", head, tail); + } + } + + SLIST_EXTEND(&flat, &child->children); + } else { + opt_visit(opt, "%pe\n", child); + SLIST_APPEND(&flat, child); + } + } + + bfs_expr_extend(expr, &flat); + + return visit_shallow(opt, expr, &annotate); +} + +/** + * Canonicalizing visitor. + */ +static const struct visitor canonicalize = { + .name = "canonicalize", + .table = (const struct visitor_table[]) { + {eval_not, canonicalize_not}, + {eval_and, canonicalize_assoc}, + {eval_or, canonicalize_assoc}, + {eval_comma, canonicalize_assoc}, + {NULL, NULL}, + }, +}; + +/** Calculate the cost of an ordered pair of expressions. */ +static float expr_cost(const struct bfs_expr *parent, const struct bfs_expr *lhs, const struct bfs_expr *rhs) { + // https://cs.stackexchange.com/a/66921/21004 + float prob = lhs->probability; + if (parent->eval_fn == eval_or) { + prob = 1.0 - prob; + } + return lhs->cost + prob * rhs->cost; +} + +/** Sort a block of expressions. */ +static void sort_exprs(struct bfs_opt *opt, struct bfs_expr *parent, struct bfs_exprs *exprs) { + if (!exprs->head || !exprs->head->next) { + return; + } + + struct bfs_exprs left, right; + SLIST_INIT(&left); + SLIST_INIT(&right); + + // Split + for (struct bfs_expr *hare = exprs->head; hare && (hare = hare->next); hare = hare->next) { + struct bfs_expr *tortoise = SLIST_POP(exprs); + SLIST_APPEND(&left, tortoise); + } + SLIST_EXTEND(&right, exprs); + + // Recurse + sort_exprs(opt, parent, &left); + sort_exprs(opt, parent, &right); + + // Merge + while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) { + struct bfs_expr *lhs = left.head; + struct bfs_expr *rhs = right.head; + + float cost = expr_cost(parent, lhs, rhs); + float swapped = expr_cost(parent, rhs, lhs); + + if (cost <= swapped) { + SLIST_POP(&left); + SLIST_APPEND(exprs, lhs); + } else { + opt_enter(opt, "%pe %pe [${ylw}%g${rs}]\n", lhs, rhs, cost); + SLIST_POP(&right); + SLIST_APPEND(exprs, rhs); + opt_leave(opt, "%pe %pe [${ylw}%g${rs}]\n", rhs, lhs, swapped); + } + } + SLIST_EXTEND(exprs, &left); + SLIST_EXTEND(exprs, &right); +} + +/** Reorder children to reduce cost. */ +static struct bfs_expr *reorder_andor(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + // Split into blocks of consecutive pure/impure expressions, and sort + // the pure blocks + struct bfs_exprs pure; + SLIST_INIT(&pure); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + if (child->pure) { + SLIST_APPEND(&pure, child); + } else { + sort_exprs(opt, expr, &pure); + bfs_expr_extend(expr, &pure); + bfs_expr_append(expr, child); + } + } + sort_exprs(opt, expr, &pure); + bfs_expr_extend(expr, &pure); + + return visit_shallow(opt, expr, &annotate); +} + +/** + * Reordering visitor. + */ +static const struct visitor reorder = { + .name = "reorder", + .table = (const struct visitor_table[]) { + {eval_and, reorder_andor}, + {eval_or, reorder_andor}, + {NULL, NULL}, + }, +}; + +/** Transfer function for simple predicates. */ +static void data_flow_pred(struct bfs_opt *opt, enum pred_type pred, bool value) { + constrain_pred(&opt->after_true.preds[pred], value); + constrain_pred(&opt->after_false.preds[pred], !value); +} + +/** Transfer function for icmp-style ([+-]N) expressions. */ +static void data_flow_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enum range_type type) { + struct df_range *true_range = &opt->after_true.ranges[type]; + struct df_range *false_range = &opt->after_false.ranges[type]; + long long value = expr->num; + + switch (expr->int_cmp) { + case BFS_INT_EQUAL: + constrain_min(true_range, value); + constrain_max(true_range, value); + range_remove(false_range, value); + break; + + case BFS_INT_LESS: + constrain_min(false_range, value); + constrain_max(true_range, value); + range_remove(true_range, value); + break; + + case BFS_INT_GREATER: + constrain_max(false_range, value); + constrain_min(true_range, value); + range_remove(true_range, value); + break; + } +} + +/** Transfer function for -{execut,read,writ}able. */ +static struct bfs_expr *data_flow_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (expr->num & R_OK) { + data_flow_pred(opt, READABLE_PRED, true); + } + if (expr->num & W_OK) { + data_flow_pred(opt, WRITABLE_PRED, true); + } + if (expr->num & X_OK) { + data_flow_pred(opt, EXECUTABLE_PRED, true); + } + + return expr; +} + +/** Transfer function for -gid. */ +static struct bfs_expr *data_flow_gid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[GID_RANGE]; + if (range->min == range->max) { + gid_t gid = range->min; + bool nogroup = !bfs_getgrgid(opt->ctx->groups, gid); + if (errno == 0) { + data_flow_pred(opt, NOGROUP_PRED, nogroup); + } + } + + return expr; +} + +/** Transfer function for -inum. */ +static struct bfs_expr *data_flow_inum(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[INUM_RANGE]; + if (range->min == range->max) { + expr->probability = 0.01; + } else { + expr->probability = 0.5; + } + + return expr; +} + +/** Transfer function for -links. */ +static struct bfs_expr *data_flow_links(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[LINKS_RANGE]; + if (1 >= range->min && 1 <= range->max) { + expr->probability = 0.99; + } else { + expr->probability = 0.5; + } + + return expr; +} + +/** Transfer function for -samefile. */ +static struct bfs_expr *data_flow_samefile(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *true_range = &opt->after_true.ranges[INUM_RANGE]; + constrain_min(true_range, expr->ino); + constrain_max(true_range, expr->ino); + + struct df_range *false_range = &opt->after_false.ranges[INUM_RANGE]; + range_remove(false_range, expr->ino); + + return expr; +} + +/** Transfer function for -size. */ +static struct bfs_expr *data_flow_size(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[SIZE_RANGE]; + if (range->min == range->max) { + expr->probability = 0.01; + } else { + expr->probability = 0.5; + } + + return expr; +} + +/** Transfer function for -type. */ +static struct bfs_expr *data_flow_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt->after_true.types &= expr->num; + opt->after_false.types &= ~expr->num; + return expr; +} + +/** Transfer function for -uid. */ +static struct bfs_expr *data_flow_uid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[UID_RANGE]; + if (range->min == range->max) { + uid_t uid = range->min; + bool nouser = !bfs_getpwuid(opt->ctx->users, uid); + if (errno == 0) { + data_flow_pred(opt, NOUSER_PRED, nouser); + } + } + + return expr; +} + +/** Transfer function for -xtype. */ +static struct bfs_expr *data_flow_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt->after_true.xtypes &= expr->num; + opt->after_false.xtypes &= ~expr->num; + return expr; +} + +/** Data flow visitor entry. */ +static struct bfs_expr *data_flow_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + visit_enter(opt, expr, visitor); + + df_dump(opt, "before", &opt->before); + + if (!bfs_expr_is_parent(expr) && !expr->pure) { + df_join(opt->impure, &opt->before); + df_dump(opt, "impure", opt->impure); + } + + return expr; +} + +/** Data flow visitor exit. */ +static struct bfs_expr *data_flow_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (expr->always_true) { + expr->probability = 1.0; + df_init_bottom(&opt->after_false); + } + + if (expr->always_false) { + expr->probability = 0.0; + df_init_bottom(&opt->after_true); + } + + df_dump(opt, "after true", &opt->after_true); + df_dump(opt, "after false", &opt->after_false); + + if (df_is_bottom(&opt->after_false)) { + if (!expr->pure) { + expr->always_true = true; + expr->probability = 0.0; + } else if (expr->eval_fn != eval_true) { + opt_warning(opt, expr, "This expression is always true.\n\n"); + opt_debug(opt, "pure, always true\n"); + expr = opt_const(opt, true); + if (!expr) { + return NULL; + } + } + } + + if (df_is_bottom(&opt->after_true)) { + if (!expr->pure) { + expr->always_false = true; + expr->probability = 0.0; + } else if (expr->eval_fn != eval_false) { + opt_warning(opt, expr, "This expression is always false.\n\n"); + opt_debug(opt, "pure, always false\n"); + expr = opt_const(opt, false); + if (!expr) { + return NULL; + } + } + } + + return visit_leave(opt, expr, visitor); +} + +/** Data flow visitor function. */ +static struct bfs_expr *data_flow_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->ignore_result && expr->pure) { + opt_debug(opt, "ignored result\n"); + opt_warning(opt, expr, "The result of this expression is ignored.\n\n"); + expr = opt_const(opt, false); + if (!expr) { + return NULL; + } + } + + if (df_is_bottom(&opt->before)) { + opt_debug(opt, "unreachable\n"); + opt_warning(opt, expr, "This expression is unreachable.\n\n"); + expr = opt_const(opt, false); + if (!expr) { + return NULL; + } + } + + /** Table of simple predicates. */ + static const struct { + bfs_eval_fn *eval_fn; + enum pred_type pred; + } preds[] = { + {eval_acl, ACL_PRED}, + {eval_capable, CAPABLE_PRED}, + {eval_empty, EMPTY_PRED}, + {eval_hidden, HIDDEN_PRED}, + {eval_nogroup, NOGROUP_PRED}, + {eval_nouser, NOUSER_PRED}, + {eval_sparse, SPARSE_PRED}, + {eval_xattr, XATTR_PRED}, + }; + + for (size_t i = 0; i < countof(preds); ++i) { + if (preds[i].eval_fn == expr->eval_fn) { + data_flow_pred(opt, preds[i].pred, true); + break; + } + } + + /** Table of simple range comparisons. */ + static const struct { + bfs_eval_fn *eval_fn; + enum range_type range; + } ranges[] = { + {eval_depth, DEPTH_RANGE}, + {eval_gid, GID_RANGE}, + {eval_inum, INUM_RANGE}, + {eval_links, LINKS_RANGE}, + {eval_size, SIZE_RANGE}, + {eval_uid, UID_RANGE}, + }; + + for (size_t i = 0; i < countof(ranges); ++i) { + if (ranges[i].eval_fn == expr->eval_fn) { + data_flow_icmp(opt, expr, ranges[i].range); + break; + } + } + + return expr; +} + +/** + * Data flow visitor. + */ +static const struct visitor data_flow = { + .name = "data_flow", + .enter = data_flow_enter, + .visit = data_flow_visit, + .leave = data_flow_leave, + .table = (const struct visitor_table[]) { + {eval_access, data_flow_access}, + {eval_gid, data_flow_gid}, + {eval_inum, data_flow_inum}, + {eval_links, data_flow_links}, + {eval_samefile, data_flow_samefile}, + {eval_size, data_flow_size}, + {eval_type, data_flow_type}, + {eval_uid, data_flow_uid}, + {eval_xtype, data_flow_xtype}, + {NULL, NULL}, + }, +}; + +/** Simplify a negation. */ +static struct bfs_expr *simplify_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->ignore_result) { + opt_debug(opt, "ignored result\n"); + expr = only_child(expr); + } + + return expr; +} + +/** Lift negations out of a conjunction/disjunction using De Morgan's laws. */ +static struct bfs_expr *lift_andor_not(struct bfs_opt *opt, struct bfs_expr *expr) { + // Only lift negations if it would reduce the number of (-not) expressions + size_t added = 0, removed = 0; + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + if (child->eval_fn == eval_not) { + ++removed; + } else { + ++added; + } + } + if (added >= removed) { + return visit_shallow(opt, expr, &annotate); + } + + opt_debug(opt, "De Morgan's laws\n"); + + if (expr->eval_fn == eval_and) { + expr->eval_fn = eval_or; + expr->argv = &fake_or_arg; + } else { + bfs_assert(expr->eval_fn == eval_or); + expr->eval_fn = eval_and; + expr->argv = &fake_and_arg; + } + + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_expr *child; + while ((child = SLIST_POP(&children))) { + opt_enter(opt, "%pe\n", child); + + child = negate_expr(opt, child, &fake_not_arg); + if (!child) { + return NULL; + } + + opt_leave(opt, "%pe\n", child); + bfs_expr_append(expr, child); + } + + expr = visit_shallow(opt, expr, &annotate); + return negate_expr(opt, expr, &fake_not_arg); +} + +/** Get the first ignorable expression in a conjunction/disjunction. */ +static struct bfs_expr *first_ignorable(struct bfs_opt *opt, struct bfs_expr *expr) { + if (opt->level < 2 || !opt->ignore_result) { + return NULL; + } + + struct bfs_expr *ret = NULL; + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + if (!child->pure) { + ret = NULL; + } else if (!ret) { + ret = child; + } + } + + return ret; +} + +/** Simplify a conjunction. */ +static struct bfs_expr *simplify_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *ignorable = first_ignorable(opt, expr); + bool ignore = false; + + struct bfs_exprs children; + foster_children(expr, &children); + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (child == ignorable) { + ignore = true; + } + + if (ignore) { + opt_delete(opt, "%pe [ignored result]\n", child); + opt_warning(opt, child, "The result of this expression is ignored.\n\n"); + continue; + } + + if (child->eval_fn == eval_true) { + opt_delete(opt, "%pe [conjunction elimination]\n", child); + continue; + } + + opt_visit(opt, "%pe\n", child); + bfs_expr_append(expr, child); + + if (child->always_false) { + while ((child = SLIST_POP(&children))) { + opt_delete(opt, "%pe [short-circuit]\n", child); + } + } + } + + struct bfs_expr *child = bfs_expr_children(expr); + if (!child) { + opt_debug(opt, "nullary identity\n"); + return opt_const(opt, true); + } else if (!child->next) { + opt_debug(opt, "unary identity\n"); + return only_child(expr); + } + + return lift_andor_not(opt, expr); +} + +/** Simplify a disjunction. */ +static struct bfs_expr *simplify_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *ignorable = first_ignorable(opt, expr); + bool ignore = false; + + struct bfs_exprs children; + foster_children(expr, &children); + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (child == ignorable) { + ignore = true; + } + + if (ignore) { + opt_delete(opt, "%pe [ignored result]\n", child); + opt_warning(opt, child, "The result of this expression is ignored.\n\n"); + continue; + } + + if (child->eval_fn == eval_false) { + opt_delete(opt, "%pe [disjunctive syllogism]\n", child); + continue; + } + + opt_visit(opt, "%pe\n", child); + bfs_expr_append(expr, child); + + if (child->always_true) { + while ((child = SLIST_POP(&children))) { + opt_delete(opt, "%pe [short-circuit]\n", child); + } + } + } + + struct bfs_expr *child = bfs_expr_children(expr); + if (!child) { + opt_debug(opt, "nullary identity\n"); + return opt_const(opt, false); + } else if (!child->next) { + opt_debug(opt, "unary identity\n"); + return only_child(expr); + } + + return lift_andor_not(opt, expr); +} + +/** Simplify a comma expression. */ +static struct bfs_expr *simplify_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + while (!SLIST_EMPTY(&children)) { + struct bfs_expr *child = SLIST_POP(&children); + + if (opt->level >= 2 && child->pure && !SLIST_EMPTY(&children)) { + opt_delete(opt, "%pe [ignored result]\n", child); + opt_warning(opt, child, "The result of this expression is ignored.\n\n"); + continue; + } + + opt_visit(opt, "%pe\n", child); + bfs_expr_append(expr, child); + } + + struct bfs_expr *child = bfs_expr_children(expr); + if (child && !child->next) { + opt_debug(opt, "unary identity\n"); + return only_child(expr); + } + + return expr; +} + +/** + * Logical simplification visitor. + */ +static const struct visitor simplify = { + .name = "simplify", + .table = (const struct visitor_table[]) { + {eval_not, simplify_not}, + {eval_and, simplify_and}, + {eval_or, simplify_or}, + {eval_comma, simplify_comma}, + {NULL, NULL}, + }, +}; + +/** Optimize an expression. */ +static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) { + opt_enter(opt, "pass 0:\n"); + expr = visit(opt, expr, &annotate); + opt_leave(opt, NULL); + + /** Table of optimization passes. */ + static const struct { + /** Minimum optlevel for this pass. */ + int level; + /** The visitor for this pass. */ + const struct visitor *visitor; + } passes[] = { + {1, &canonicalize}, + {3, &reorder}, + {2, &data_flow}, + {1, &simplify}, + }; + + struct df_domain impure; + + for (int i = 0; i < 3; ++i) { + struct bfs_opt nested = *opt; + nested.impure = &impure; + impure = *opt->impure; + + opt_enter(&nested, "pass %d:\n", i + 1); + + for (size_t j = 0; j < countof(passes); ++j) { + if (opt->level < passes[j].level) { + continue; + } + + // Skip reordering the first time through the passes, to + // make warnings more understandable + if (passes[j].visitor == &reorder) { + if (i == 0) { + continue; + } else { + nested.warn = false; + } + } + + expr = visit(&nested, expr, passes[j].visitor); + if (!expr) { + return NULL; + } + } + + opt_leave(&nested, NULL); + + if (!bfs_expr_is_parent(expr)) { + break; + } + } + + *opt->impure = impure; + return expr; +} + +/** Estimate the odds of an expression calling stat(). */ +static float expr_stat_odds(struct bfs_expr *expr) { + if (expr->calls_stat) { + return 1.0; + } + + float nostat_odds = 1.0; + float reached_odds = 1.0; + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + float child_odds = expr_stat_odds(child); + nostat_odds *= 1.0 - reached_odds * child_odds; + + if (expr->eval_fn == eval_and) { + reached_odds *= child->probability; + } else if (expr->eval_fn == eval_or) { + reached_odds *= 1.0 - child->probability; + } + } + + return 1.0 - nostat_odds; +} + +/** Estimate the odds of calling stat(). */ +static float estimate_stat_odds(struct bfs_ctx *ctx) { + if (ctx->unique) { + return 1.0; + } + + float nostat_odds = 1.0 - expr_stat_odds(ctx->exclude); + + float reached_odds = 1.0 - ctx->exclude->probability; + float expr_odds = expr_stat_odds(ctx->expr); + nostat_odds *= 1.0 - reached_odds * expr_odds; + + return 1.0 - nostat_odds; +} + +int bfs_optimize(struct bfs_ctx *ctx) { + bfs_ctx_dump(ctx, DEBUG_OPT); + + struct df_domain impure; + df_init_bottom(&impure); + + struct bfs_opt opt = { + .ctx = ctx, + .level = ctx->optlevel, + .depth = 0, + .warn = ctx->warn, + .ignore_result = false, + .impure = &impure, + }; + df_init_top(&opt.before); + + ctx->exclude = optimize(&opt, ctx->exclude); + if (!ctx->exclude) { + return -1; + } + + // Only non-excluded files are evaluated + opt.before = opt.after_false; + opt.ignore_result = true; + + struct df_range *depth = &opt.before.ranges[DEPTH_RANGE]; + if (ctx->mindepth > 0) { + constrain_min(depth, ctx->mindepth); + } + if (ctx->maxdepth < INT_MAX) { + constrain_max(depth, ctx->maxdepth); + } + + ctx->expr = optimize(&opt, ctx->expr); + if (!ctx->expr) { + return -1; + } + + if (opt.level >= 2 && df_is_bottom(&impure)) { + bfs_warning(ctx, "This command won't do anything.\n\n"); + } + + const struct df_range *impure_depth = &impure.ranges[DEPTH_RANGE]; + long long mindepth = impure_depth->min; + long long maxdepth = impure_depth->max; + + opt_enter(&opt, "post-process:\n"); + + if (opt.level >= 2 && mindepth > ctx->mindepth) { + if (mindepth > INT_MAX) { + mindepth = INT_MAX; + } + opt_enter(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth); + ctx->mindepth = mindepth; + opt_leave(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth); + } + + if (opt.level >= 4 && maxdepth < ctx->maxdepth) { + if (maxdepth < INT_MIN) { + maxdepth = INT_MIN; + } + opt_enter(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth); + ctx->maxdepth = maxdepth; + opt_leave(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth); + } + + if (opt.level >= 3) { + // bfs_eval() can do lazy stat() calls, but only on one thread. + float lazy_cost = estimate_stat_odds(ctx); + // bftw() can do eager stat() calls in parallel + float eager_cost = 1.0 / ctx->threads; + + if (eager_cost <= lazy_cost) { + opt_enter(&opt, "lazy stat cost: ${ylw}%g${rs}\n", lazy_cost); + ctx->flags |= BFTW_STAT; + opt_leave(&opt, "eager stat cost: ${ylw}%g${rs}\n", eager_cost); + } + + } + + opt_leave(&opt, NULL); + + return 0; +} diff --git a/src/opt.h b/src/opt.h new file mode 100644 index 0000000..4aac129 --- /dev/null +++ b/src/opt.h @@ -0,0 +1,23 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Optimization. + */ + +#ifndef BFS_OPT_H +#define BFS_OPT_H + +struct bfs_ctx; + +/** + * Apply optimizations to the command line. + * + * @param ctx + * The bfs context to optimize. + * @return + * 0 if successful, -1 on error. + */ +int bfs_optimize(struct bfs_ctx *ctx); + +#endif // BFS_OPT_H diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..a1155c0 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,3712 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * The command line parser. Expressions are parsed by recursive descent, with a + * grammar described in the comments of the parse_*() functions. The parser + * also accepts flags and paths at any point in the expression, by treating + * flags like always-true options, and skipping over paths wherever they appear. + */ + +#include "prelude.h" +#include "parse.h" +#include "alloc.h" +#include "bfstd.h" +#include "bftw.h" +#include "color.h" +#include "ctx.h" +#include "diag.h" +#include "dir.h" +#include "eval.h" +#include "exec.h" +#include "expr.h" +#include "fsade.h" +#include "list.h" +#include "opt.h" +#include "printf.h" +#include "pwcache.h" +#include "sanity.h" +#include "stat.h" +#include "typo.h" +#include "xregex.h" +#include "xspawn.h" +#include "xtime.h" +#include <errno.h> +#include <fcntl.h> +#include <fnmatch.h> +#include <grp.h> +#include <limits.h> +#include <pwd.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +// Strings printed by -D tree for "fake" expressions +static char *fake_and_arg = "-and"; +static char *fake_hidden_arg = "-hidden"; +static char *fake_or_arg = "-or"; +static char *fake_print_arg = "-print"; +static char *fake_true_arg = "-true"; + +/** + * Color use flags. + */ +enum use_color { + COLOR_NEVER, + COLOR_AUTO, + COLOR_ALWAYS, +}; + +/** + * Command line parser state. + */ +struct bfs_parser { + /** The command line being constructed. */ + struct bfs_ctx *ctx; + /** The command line arguments being parsed. */ + char **argv; + /** The name of this program. */ + const char *command; + + /** The current regex flags to use. */ + enum bfs_regex_type regex_type; + + /** Whether stdout is a terminal. */ + bool stdout_tty; + /** Whether this session is interactive (stdin and stderr are each a terminal). */ + bool interactive; + /** Whether -color or -nocolor has been passed. */ + enum use_color use_color; + /** Whether a -print action is implied. */ + bool implicit_print; + /** Whether the default root "." should be used. */ + bool implicit_root; + /** Whether the expression has started. */ + bool expr_started; + /** Whether an information option like -help or -version was passed. */ + bool just_info; + /** Whether we are currently parsing an -exclude expression. */ + bool excluding; + + /** The last non-path argument. */ + char **last_arg; + /** A "-depth"-type argument, if any. */ + char **depth_arg; + /** A "-limit" argument, if any. */ + char **limit_arg; + /** A "-prune" argument, if any. */ + char **prune_arg; + /** A "-mount" argument, if any. */ + char **mount_arg; + /** An "-xdev" argument, if any. */ + char **xdev_arg; + /** A "-files0-from -" argument, if any. */ + char **files0_stdin_arg; + /** An "-ok"-type expression, if any. */ + const struct bfs_expr *ok_expr; + + /** The current time (maybe modified by -daystart). */ + struct timespec now; +}; + +/** + * Possible token types. + */ +enum token_type { + /** A flag. */ + T_FLAG, + /** A root path. */ + T_PATH, + /** An option. */ + T_OPTION, + /** A test. */ + T_TEST, + /** An action. */ + T_ACTION, + /** An operator. */ + T_OPERATOR, +}; + +/** + * Print a low-level error message during parsing. + */ +static void parse_perror(const struct bfs_parser *parser, const char *str) { + bfs_perror(parser->ctx, str); +} + +/** Initialize an empty highlighted range. */ +static void init_highlight(const struct bfs_ctx *ctx, bool *args) { + for (size_t i = 0; i < ctx->argc; ++i) { + args[i] = false; + } +} + +/** Highlight a range of command line arguments. */ +static void highlight_args(const struct bfs_ctx *ctx, char **argv, size_t argc, bool *args) { + size_t i = argv - ctx->argv; + for (size_t j = 0; j < argc; ++j) { + bfs_assert(i + j < ctx->argc); + args[i + j] = true; + } +} + +/** + * Print an error message during parsing. + */ +attr(printf(2, 3)) +static void parse_error(const struct bfs_parser *parser, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = parser->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, parser->argv, 1, highlight); + bfs_argv_error(ctx, highlight); + + va_list args; + va_start(args, format); + errno = error; + bfs_verror(parser->ctx, format, args); + va_end(args); +} + +/** + * Print an error about some command line arguments. + */ +attr(printf(4, 5)) +static void parse_argv_error(const struct bfs_parser *parser, char **argv, size_t argc, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = parser->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, argv, argc, highlight); + bfs_argv_error(ctx, highlight); + + va_list args; + va_start(args, format); + errno = error; + bfs_verror(ctx, format, args); + va_end(args); +} + +/** + * Print an error about conflicting command line arguments. + */ +attr(printf(6, 7)) +static void parse_conflict_error(const struct bfs_parser *parser, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = parser->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, argv1, argc1, highlight); + highlight_args(ctx, argv2, argc2, highlight); + bfs_argv_error(ctx, highlight); + + va_list args; + va_start(args, format); + errno = error; + bfs_verror(ctx, format, args); + va_end(args); +} + +/** + * Print an error about an expression. + */ +attr(printf(3, 4)) +static void parse_expr_error(const struct bfs_parser *parser, const struct bfs_expr *expr, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = parser->ctx; + + bfs_expr_error(ctx, expr); + + va_list args; + va_start(args, format); + errno = error; + bfs_verror(ctx, format, args); + va_end(args); +} + +/** + * Print a warning message during parsing. + */ +attr(printf(2, 3)) +static bool parse_warning(const struct bfs_parser *parser, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = parser->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, parser->argv, 1, highlight); + if (!bfs_argv_warning(ctx, highlight)) { + return false; + } + + va_list args; + va_start(args, format); + errno = error; + bool ret = bfs_vwarning(parser->ctx, format, args); + va_end(args); + return ret; +} + +/** + * Print a warning about conflicting command line arguments. + */ +attr(printf(6, 7)) +static bool parse_conflict_warning(const struct bfs_parser *parser, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = parser->ctx; + + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, argv1, argc1, highlight); + highlight_args(ctx, argv2, argc2, highlight); + if (!bfs_argv_warning(ctx, highlight)) { + return false; + } + + va_list args; + va_start(args, format); + errno = error; + bool ret = bfs_vwarning(ctx, format, args); + va_end(args); + return ret; +} + +/** + * Print a warning about an expression. + */ +attr(printf(3, 4)) +static bool parse_expr_warning(const struct bfs_parser *parser, const struct bfs_expr *expr, const char *format, ...) { + int error = errno; + const struct bfs_ctx *ctx = parser->ctx; + + if (!bfs_expr_warning(ctx, expr)) { + return false; + } + + va_list args; + va_start(args, format); + errno = error; + bool ret = bfs_vwarning(ctx, format, args); + va_end(args); + return ret; +} + +/** + * Allocate a new expression. + */ +static struct bfs_expr *parse_new_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc, char **argv) { + struct bfs_expr *expr = bfs_expr_new(parser->ctx, eval_fn, argc, argv); + if (!expr) { + parse_perror(parser, "bfs_expr_new()"); + } + return expr; +} + +/** + * Create a new unary expression. + */ +static struct bfs_expr *new_unary_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, struct bfs_expr *rhs, char **argv) { + struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv); + if (!expr) { + return NULL; + } + + bfs_assert(bfs_expr_is_parent(expr)); + bfs_expr_append(expr, rhs); + return expr; +} + +/** + * Create a new binary expression. + */ +static struct bfs_expr *new_binary_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, struct bfs_expr *lhs, struct bfs_expr *rhs, char **argv) { + struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv); + if (!expr) { + return NULL; + } + + bfs_assert(bfs_expr_is_parent(expr)); + bfs_expr_append(expr, lhs); + bfs_expr_append(expr, rhs); + return expr; +} + +/** + * Fill in a "-print"-type expression. + */ +static void init_print_expr(struct bfs_parser *parser, struct bfs_expr *expr) { + expr->cfile = parser->ctx->cout; + expr->path = NULL; +} + +/** + * Open a file for an expression. + */ +static int expr_open(struct bfs_parser *parser, struct bfs_expr *expr, const char *path) { + struct bfs_ctx *ctx = parser->ctx; + + FILE *file = NULL; + CFILE *cfile = NULL; + + file = xfopen(path, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC); + if (!file) { + goto fail; + } + + cfile = cfwrap(file, parser->use_color ? ctx->colors : NULL, true); + if (!cfile) { + goto fail; + } + + CFILE *dedup = bfs_ctx_dedup(ctx, cfile, path); + if (!dedup) { + goto fail; + } + + if (dedup != cfile) { + cfclose(cfile); + } + + expr->cfile = dedup; + expr->path = path; + return 0; + +fail: + parse_expr_error(parser, expr, "%m.\n"); + if (cfile) { + cfclose(cfile); + } else if (file) { + fclose(file); + } + return -1; +} + +/** + * Invoke bfs_stat() on an argument. + */ +static int stat_arg(const struct bfs_parser *parser, char **arg, struct bfs_stat *sb) { + const struct bfs_ctx *ctx = parser->ctx; + + bool follow = ctx->flags & (BFTW_FOLLOW_ROOTS | BFTW_FOLLOW_ALL); + enum bfs_stat_flags flags = follow ? BFS_STAT_TRYFOLLOW : BFS_STAT_NOFOLLOW; + + int ret = bfs_stat(AT_FDCWD, *arg, flags, sb); + if (ret != 0) { + parse_argv_error(parser, arg, 1, "%m.\n"); + } + return ret; +} + +/** + * Parse the expression specified on the command line. + */ +static struct bfs_expr *parse_expr(struct bfs_parser *parser); + +/** + * Advance by a single token. + */ +static char **parser_advance(struct bfs_parser *parser, enum token_type type, size_t argc) { + if (type != T_FLAG && type != T_PATH) { + parser->expr_started = true; + } + + if (type != T_PATH) { + parser->last_arg = parser->argv; + } + + char **argv = parser->argv; + parser->argv += argc; + return argv; +} + +/** + * Parse a root path. + */ +static int parse_root(struct bfs_parser *parser, const char *path) { + struct bfs_ctx *ctx = parser->ctx; + const char **root = RESERVE(const char *, &ctx->paths, &ctx->npaths); + if (!root) { + parse_perror(parser, "RESERVE()"); + return -1; + } + + *root = strdup(path); + if (!*root) { + --ctx->npaths; + parse_perror(parser, "strdup()"); + return -1; + } + + parser->implicit_root = false; + return 0; +} + +/** + * While parsing an expression, skip any paths and add them to ctx->paths. + */ +static int skip_paths(struct bfs_parser *parser) { + while (true) { + const char *arg = parser->argv[0]; + if (!arg) { + return 0; + } + + if (arg[0] == '-') { + if (strcmp(arg, "--") == 0) { + // find uses -- to separate flags from the rest + // of the command line. We allow mixing flags + // and paths/predicates, so we just ignore --. + parser_advance(parser, T_FLAG, 1); + continue; + } + if (strcmp(arg, "-") != 0) { + // - by itself is a file name. Anything else + // starting with - is a flag/predicate. + return 0; + } + } + + // By POSIX, these are always options + if (strcmp(arg, "(") == 0 || strcmp(arg, "!") == 0) { + return 0; + } + + if (parser->expr_started) { + // By POSIX, these can be paths. We only treat them as + // such at the beginning of the command line. + if (strcmp(arg, ")") == 0 || strcmp(arg, ",") == 0) { + return 0; + } + } + + if (parser->excluding) { + parse_warning(parser, "This path will not be excluded. Use a test like ${blu}-name${rs} or ${blu}-path${rs}\n"); + bfs_warning(parser->ctx, "within ${red}-exclude${rs} to exclude matching files.\n\n"); + } + + if (parse_root(parser, arg) != 0) { + return -1; + } + + parser_advance(parser, T_PATH, 1); + } +} + +/** Integer parsing flags. */ +enum int_flags { + IF_BASE_MASK = 0x03F, + IF_INT = 0x040, + IF_LONG = 0x080, + IF_LONG_LONG = 0x0C0, + IF_SIZE_MASK = 0x0C0, + IF_UNSIGNED = 0x100, + IF_PARTIAL_OK = 0x200, + IF_QUIET = 0x400, +}; + +/** + * Parse an integer. + */ +static const char *parse_int(const struct bfs_parser *parser, char **arg, const char *str, void *result, enum int_flags flags) { + // strtoll() skips leading spaces, but we want to reject them + if (xisspace(str[0])) { + goto bad; + } + + int base = flags & IF_BASE_MASK; + if (base == 0) { + base = 10; + } + + char *endptr; + errno = 0; + long long value = strtoll(str, &endptr, base); + if (errno != 0) { + if (errno == ERANGE) { + goto range; + } else { + goto bad; + } + } + + // https://github.com/llvm/llvm-project/issues/64946 + sanitize_init(&endptr); + + if (endptr == str) { + goto bad; + } + + if (!(flags & IF_PARTIAL_OK) && *endptr != '\0') { + goto bad; + } + + if ((flags & IF_UNSIGNED) && value < 0) { + goto negative; + } + + switch (flags & IF_SIZE_MASK) { + case IF_INT: + if (value < INT_MIN || value > INT_MAX) { + goto range; + } + *(int *)result = value; + break; + + case IF_LONG: + if (value < LONG_MIN || value > LONG_MAX) { + goto range; + } + *(long *)result = value; + break; + + case IF_LONG_LONG: + *(long long *)result = value; + break; + + default: + bfs_bug("Invalid int size"); + goto bad; + } + + return endptr; + +bad: + if (!(flags & IF_QUIET)) { + parse_argv_error(parser, arg, 1, "${bld}%pq${rs} is not a valid integer.\n", str); + } + return NULL; + +negative: + if (!(flags & IF_QUIET)) { + parse_argv_error(parser, arg, 1, "Negative integer ${bld}%pq${rs} is not allowed here.\n", str); + } + return NULL; + +range: + if (!(flags & IF_QUIET)) { + parse_argv_error(parser, arg, 1, "${bld}%pq${rs} is too large an integer.\n", str); + } + return NULL; +} + +/** + * Parse an integer and a comparison flag. + */ +static const char *parse_icmp(const struct bfs_parser *parser, struct bfs_expr *expr, enum int_flags flags) { + char **arg = &expr->argv[1]; + const char *str = *arg; + switch (str[0]) { + case '-': + expr->int_cmp = BFS_INT_LESS; + ++str; + break; + case '+': + expr->int_cmp = BFS_INT_GREATER; + ++str; + break; + default: + expr->int_cmp = BFS_INT_EQUAL; + break; + } + + return parse_int(parser, arg, str, &expr->num, flags | IF_LONG_LONG | IF_UNSIGNED); +} + +/** + * Check if a string could be an integer comparison. + */ +static bool looks_like_icmp(const char *str) { + int i; + + // One +/- for the comparison flag, one for the sign + for (i = 0; i < 2; ++i) { + if (str[i] != '-' && str[i] != '+') { + break; + } + } + + return str[i] >= '0' && str[i] <= '9'; +} + +/** + * Parse a single flag. + */ +static struct bfs_expr *parse_flag(struct bfs_parser *parser, size_t argc) { + char **argv = parser_advance(parser, T_FLAG, argc); + return parse_new_expr(parser, eval_true, argc, argv); +} + +/** + * Parse a flag that doesn't take a value. + */ +static struct bfs_expr *parse_nullary_flag(struct bfs_parser *parser) { + return parse_flag(parser, 1); +} + +/** + * Parse a flag that takes a value. + */ +static struct bfs_expr *parse_unary_flag(struct bfs_parser *parser) { + const char *arg = parser->argv[0]; + const char *value = parser->argv[1]; + if (!value) { + parse_error(parser, "${cyn}%s${rs} needs a value.\n", arg); + return NULL; + } + + return parse_flag(parser, 2); +} + +/** + * Parse a single option. + */ +static struct bfs_expr *parse_option(struct bfs_parser *parser, size_t argc) { + char **argv = parser_advance(parser, T_OPTION, argc); + return parse_new_expr(parser, eval_true, argc, argv); +} + +/** + * Parse an option that doesn't take a value. + */ +static struct bfs_expr *parse_nullary_option(struct bfs_parser *parser) { + return parse_option(parser, 1); +} + +/** + * Parse an option that takes a value. + */ +static struct bfs_expr *parse_unary_option(struct bfs_parser *parser) { + const char *arg = parser->argv[0]; + const char *value = parser->argv[1]; + if (!value) { + parse_error(parser, "${blu}%s${rs} needs a value.\n", arg); + return NULL; + } + + return parse_option(parser, 2); +} + +/** + * Parse a single test. + */ +static struct bfs_expr *parse_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc) { + char **argv = parser_advance(parser, T_TEST, argc); + return parse_new_expr(parser, eval_fn, argc, argv); +} + +/** + * Parse a test that doesn't take a value. + */ +static struct bfs_expr *parse_nullary_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + return parse_test(parser, eval_fn, 1); +} + +/** + * Parse a test that takes a value. + */ +static struct bfs_expr *parse_unary_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + const char *arg = parser->argv[0]; + const char *value = parser->argv[1]; + if (!value) { + parse_error(parser, "${blu}%s${rs} needs a value.\n", arg); + return NULL; + } + + return parse_test(parser, eval_fn, 2); +} + +/** + * Parse a single action. + */ +static struct bfs_expr *parse_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc) { + char **argv = parser_advance(parser, T_ACTION, argc); + + if (parser->excluding) { + parse_argv_error(parser, argv, argc, "This action is not supported within ${red}-exclude${rs}.\n"); + return NULL; + } + + if (eval_fn != eval_limit && eval_fn != eval_prune && eval_fn != eval_quit) { + parser->implicit_print = false; + } + + return parse_new_expr(parser, eval_fn, argc, argv); +} + +/** + * Parse an action that takes no arguments. + */ +static struct bfs_expr *parse_nullary_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + return parse_action(parser, eval_fn, 1); +} + +/** + * Parse an action that takes one argument. + */ +static struct bfs_expr *parse_unary_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + const char *arg = parser->argv[0]; + const char *value = parser->argv[1]; + if (!value) { + parse_error(parser, "${blu}%s${rs} needs a value.\n", arg); + return NULL; + } + + return parse_action(parser, eval_fn, 2); +} + +/** + * Parse a test expression with integer data and a comparison flag. + */ +static struct bfs_expr *parse_test_icmp(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + struct bfs_expr *expr = parse_unary_test(parser, eval_fn); + if (!expr) { + return NULL; + } + + if (!parse_icmp(parser, expr, 0)) { + return NULL; + } + + return expr; +} + +/** + * Print usage information for -D. + */ +static void debug_help(CFILE *cfile) { + cfprintf(cfile, "Supported debug flags:\n\n"); + + cfprintf(cfile, " ${bld}help${rs}: This message.\n"); + cfprintf(cfile, " ${bld}cost${rs}: Show cost estimates.\n"); + cfprintf(cfile, " ${bld}exec${rs}: Print executed command details.\n"); + cfprintf(cfile, " ${bld}opt${rs}: Print optimization details.\n"); + cfprintf(cfile, " ${bld}rates${rs}: Print predicate success rates.\n"); + cfprintf(cfile, " ${bld}search${rs}: Trace the filesystem traversal.\n"); + cfprintf(cfile, " ${bld}stat${rs}: Trace all stat() calls.\n"); + cfprintf(cfile, " ${bld}tree${rs}: Print the parse tree.\n"); + cfprintf(cfile, " ${bld}all${rs}: All debug flags at once.\n"); +} + +/** Check if a substring matches a debug flag. */ +static bool parse_debug_flag(const char *flag, size_t len, const char *expected) { + if (len == strlen(expected)) { + return strncmp(flag, expected, len) == 0; + } else { + return false; + } +} + +/** + * Parse -D FLAG. + */ +static struct bfs_expr *parse_debug(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_ctx *ctx = parser->ctx; + + struct bfs_expr *expr = parse_unary_flag(parser); + if (!expr) { + cfprintf(ctx->cerr, "\n"); + debug_help(ctx->cerr); + return NULL; + } + + bool unrecognized = false; + + for (const char *flag = expr->argv[1], *next; flag; flag = next) { + size_t len = strcspn(flag, ","); + if (flag[len]) { + next = flag + len + 1; + } else { + next = NULL; + } + + if (parse_debug_flag(flag, len, "help")) { + debug_help(ctx->cout); + parser->just_info = true; + return NULL; + } else if (parse_debug_flag(flag, len, "all")) { + ctx->debug = DEBUG_ALL; + continue; + } + + enum debug_flags i; + for (i = 1; DEBUG_ALL & i; i <<= 1) { + const char *name = debug_flag_name(i); + if (parse_debug_flag(flag, len, name)) { + break; + } + } + + if (DEBUG_ALL & i) { + ctx->debug |= i; + } else { + if (parse_expr_warning(parser, expr, "Unrecognized debug flag ${bld}")) { + fwrite(flag, 1, len, stderr); + cfprintf(ctx->cerr, "${rs}.\n\n"); + unrecognized = true; + } + } + } + + if (unrecognized) { + debug_help(ctx->cerr); + cfprintf(ctx->cerr, "\n"); + } + + return expr; +} + +/** + * Parse -On. + */ +static struct bfs_expr *parse_optlevel(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_flag(parser); + if (!expr) { + return NULL; + } + + int *optlevel = &parser->ctx->optlevel; + + if (strcmp(expr->argv[0], "-Ofast") == 0) { + *optlevel = 4; + } else if (!parse_int(parser, expr->argv, expr->argv[0] + 2, optlevel, IF_INT | IF_UNSIGNED)) { + return NULL; + } + + if (*optlevel > 4) { + parse_expr_warning(parser, expr, "${cyn}-O${bld}%s${rs} is the same as ${cyn}-O${bld}4${rs}.\n\n", expr->argv[0] + 2); + } + + return expr; +} + +/** + * Parse -[PHL], -follow. + */ +static struct bfs_expr *parse_follow(struct bfs_parser *parser, int flags, int option) { + struct bfs_ctx *ctx = parser->ctx; + ctx->flags &= ~(BFTW_FOLLOW_ROOTS | BFTW_FOLLOW_ALL); + ctx->flags |= flags; + if (option) { + return parse_nullary_option(parser); + } else { + return parse_nullary_flag(parser); + } +} + +/** + * Parse -X. + */ +static struct bfs_expr *parse_xargs_safe(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->xargs_safe = true; + return parse_nullary_flag(parser); +} + +/** + * Parse -executable, -readable, -writable + */ +static struct bfs_expr *parse_access(struct bfs_parser *parser, int flag, int arg2) { + struct bfs_expr *expr = parse_nullary_test(parser, eval_access); + if (expr) { + expr->num = flag; + } + return expr; +} + +/** + * Parse -acl. + */ +static struct bfs_expr *parse_acl(struct bfs_parser *parser, int flag, int arg2) { +#if BFS_CAN_CHECK_ACL + return parse_nullary_test(parser, eval_acl); +#else + parse_error(parser, "Missing platform support.\n"); + return NULL; +#endif +} + +/** + * Parse -[aBcm]?newer. + */ +static struct bfs_expr *parse_newer(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_newer); + if (!expr) { + return NULL; + } + + struct bfs_stat sb; + if (stat_arg(parser, &expr->argv[1], &sb) != 0) { + return NULL; + } + + expr->reftime = sb.mtime; + expr->stat_field = field; + return expr; +} + +/** + * Parse -[aBcm]min. + */ +static struct bfs_expr *parse_min(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_test_icmp(parser, eval_time); + if (!expr) { + return NULL; + } + + expr->reftime = parser->now; + expr->stat_field = field; + expr->time_unit = BFS_MINUTES; + return expr; +} + +/** + * Parse -[aBcm]time. + */ +static struct bfs_expr *parse_time(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_time); + if (!expr) { + return NULL; + } + + expr->reftime = parser->now; + expr->stat_field = field; + + const char *tail = parse_icmp(parser, expr, IF_PARTIAL_OK); + if (!tail) { + return NULL; + } + + if (!*tail) { + expr->time_unit = BFS_DAYS; + return expr; + } + + unsigned long long time = expr->num; + expr->num = 0; + + while (true) { + switch (*tail) { + case 'w': + time *= 7; + fallthru; + case 'd': + time *= 24; + fallthru; + case 'h': + time *= 60; + fallthru; + case 'm': + time *= 60; + fallthru; + case 's': + break; + default: + parse_expr_error(parser, expr, "Unknown time unit ${bld}%c${rs}.\n", *tail); + return NULL; + } + + expr->num += time; + + if (!*++tail) { + break; + } + + tail = parse_int(parser, &expr->argv[1], tail, &time, IF_PARTIAL_OK | IF_LONG_LONG | IF_UNSIGNED); + if (!tail) { + return NULL; + } + if (!*tail) { + parse_expr_error(parser, expr, "Missing time unit.\n"); + return NULL; + } + } + + expr->time_unit = BFS_SECONDS; + return expr; +} + +/** + * Parse -capable. + */ +static struct bfs_expr *parse_capable(struct bfs_parser *parser, int flag, int arg2) { +#if BFS_CAN_CHECK_CAPABILITIES + return parse_nullary_test(parser, eval_capable); +#else + parse_error(parser, "Missing platform support.\n"); + return NULL; +#endif +} + +/** + * Parse -(no)?color. + */ +static struct bfs_expr *parse_color(struct bfs_parser *parser, int color, int arg2) { + struct bfs_expr *expr = parse_nullary_option(parser); + if (!expr) { + return NULL; + } + + struct bfs_ctx *ctx = parser->ctx; + struct colors *colors = ctx->colors; + + if (color) { + if (!colors) { + parse_expr_error(parser, expr, "Error parsing $$LS_COLORS: %s.\n", xstrerror(ctx->colors_error)); + return NULL; + } + + parser->use_color = COLOR_ALWAYS; + ctx->cout->colors = colors; + ctx->cerr->colors = colors; + } else { + parser->use_color = COLOR_NEVER; + ctx->cout->colors = NULL; + ctx->cerr->colors = NULL; + } + + return expr; +} + +/** + * Common code for fnmatch() tests. + */ +static struct bfs_expr *parse_fnmatch(const struct bfs_parser *parser, struct bfs_expr *expr, bool casefold) { + if (!expr) { + return NULL; + } + + expr->pattern = expr->argv[1]; + + if (casefold) { +#ifdef FNM_CASEFOLD + expr->fnm_flags = FNM_CASEFOLD; +#else + parse_expr_error(parser, expr, "Missing platform support.\n"); + return NULL; +#endif + } else { + expr->fnm_flags = 0; + } + + // POSIX says, about fnmatch(): + // + // If pattern ends with an unescaped <backslash>, fnmatch() shall + // return a non-zero value (indicating either no match or an error). + // + // But not all implementations obey this, so check for it ourselves. + size_t i, len = strlen(expr->pattern); + for (i = 0; i < len; ++i) { + if (expr->pattern[len - i - 1] != '\\') { + break; + } + } + if (i % 2 != 0) { + parse_expr_warning(parser, expr, "Unescaped trailing backslash.\n\n"); + expr->eval_fn = eval_false; + return expr; + } + + // strcmp() can be much faster than fnmatch() since it doesn't have to + // parse the pattern, so special-case patterns with no wildcards. + // + // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13_01 + expr->literal = strcspn(expr->pattern, "?*\\[") == len; + + return expr; +} + +/** + * Parse -context. + */ +static struct bfs_expr *parse_context(struct bfs_parser *parser, int flag, int arg2) { +#if BFS_CAN_CHECK_CONTEXT + struct bfs_expr *expr = parse_unary_test(parser, eval_context); + return parse_fnmatch(parser, expr, false); +#else + parse_error(parser, "Missing platform support.\n"); + return NULL; +#endif +} + +/** + * Parse -{false,true}. + */ +static struct bfs_expr *parse_const(struct bfs_parser *parser, int value, int arg2) { + return parse_nullary_test(parser, value ? eval_true : eval_false); +} + +/** + * Parse -daystart. + */ +static struct bfs_expr *parse_daystart(struct bfs_parser *parser, int arg1, int arg2) { + struct tm tm; + if (!localtime_r(&parser->now.tv_sec, &tm)) { + parse_perror(parser, "localtime_r()"); + return NULL; + } + + if (tm.tm_hour || tm.tm_min || tm.tm_sec || parser->now.tv_nsec) { + ++tm.tm_mday; + } + tm.tm_hour = 0; + tm.tm_min = 0; + tm.tm_sec = 0; + + time_t time; + if (xmktime(&tm, &time) != 0) { + parse_perror(parser, "xmktime()"); + return NULL; + } + + parser->now.tv_sec = time; + parser->now.tv_nsec = 0; + + return parse_nullary_option(parser); +} + +/** + * Parse -delete. + */ +static struct bfs_expr *parse_delete(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->flags |= BFTW_POST_ORDER; + parser->depth_arg = parser->argv; + return parse_nullary_action(parser, eval_delete); +} + +/** + * Parse -d. + */ +static struct bfs_expr *parse_depth(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->flags |= BFTW_POST_ORDER; + parser->depth_arg = parser->argv; + return parse_nullary_flag(parser); +} + +/** + * Parse -depth [N]. + */ +static struct bfs_expr *parse_depth_n(struct bfs_parser *parser, int arg1, int arg2) { + const char *arg = parser->argv[1]; + if (arg && looks_like_icmp(arg)) { + return parse_test_icmp(parser, eval_depth); + } else { + return parse_depth(parser, arg1, arg2); + } +} + +/** + * Parse -{min,max}depth N. + */ +static struct bfs_expr *parse_depth_limit(struct bfs_parser *parser, int is_min, int arg2) { + struct bfs_expr *expr = parse_unary_option(parser); + if (!expr) { + return NULL; + } + + struct bfs_ctx *ctx = parser->ctx; + int *depth = is_min ? &ctx->mindepth : &ctx->maxdepth; + char **arg = &expr->argv[1]; + if (!parse_int(parser, arg, *arg, depth, IF_INT | IF_UNSIGNED)) { + return NULL; + } + + return expr; +} + +/** + * Parse -empty. + */ +static struct bfs_expr *parse_empty(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_test(parser, eval_empty); + if (expr) { + // For opendir() + expr->ephemeral_fds = 1; + } + return expr; +} + +/** + * Parse -exec(dir)?/-ok(dir)?. + */ +static struct bfs_expr *parse_exec(struct bfs_parser *parser, int flags, int arg2) { + struct bfs_exec *execbuf = bfs_exec_parse(parser->ctx, parser->argv, flags); + if (!execbuf) { + return NULL; + } + + struct bfs_expr *expr = parse_action(parser, eval_exec, execbuf->tmpl_argc + 2); + if (!expr) { + bfs_exec_free(execbuf); + return NULL; + } + + expr->exec = execbuf; + + // For pipe() in bfs_spawn() + expr->ephemeral_fds = 2; + + if (execbuf->flags & BFS_EXEC_CHDIR) { + // Check for relative paths in $PATH + const char *path = getenv("PATH"); + while (path) { + if (*path != '/') { + size_t len = strcspn(path, ":"); + char *comp = strndup(path, len); + if (comp) { + parse_expr_error(parser, expr, + "This action would be unsafe, since ${bld}$$PATH${rs} contains the relative path ${bld}%pq${rs}\n", comp); + free(comp); + } else { + parse_perror(parser, "strndup()"); + } + return NULL; + } + + path = strchr(path, ':'); + if (path) { + ++path; + } + } + + // To dup() the parent directory + if (execbuf->flags & BFS_EXEC_MULTI) { + ++expr->persistent_fds; + } else { + ++expr->ephemeral_fds; + } + } + + if (execbuf->flags & BFS_EXEC_CONFIRM) { + parser->ok_expr = expr; + } + + return expr; +} + +/** + * Parse -exit [STATUS]. + */ +static struct bfs_expr *parse_exit(struct bfs_parser *parser, int arg1, int arg2) { + size_t argc = 1; + const char *value = parser->argv[1]; + + int status = EXIT_SUCCESS; + if (value && parse_int(parser, NULL, value, &status, IF_INT | IF_UNSIGNED | IF_QUIET)) { + argc = 2; + } + + struct bfs_expr *expr = parse_action(parser, eval_exit, argc); + if (expr) { + expr->num = status; + } + return expr; +} + +/** + * Parse -f PATH. + */ +static struct bfs_expr *parse_f(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_flag(parser); + if (!expr) { + return NULL; + } + + if (parse_root(parser, expr->argv[1]) != 0) { + return NULL; + } + + return expr; +} + +/** + * Parse -files0-from PATH. + */ +static struct bfs_expr *parse_files0_from(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_option(parser); + if (!expr) { + return NULL; + } + + const char *from = expr->argv[1]; + + FILE *file; + if (strcmp(from, "-") == 0) { + file = stdin; + } else { + file = xfopen(from, O_RDONLY | O_CLOEXEC); + } + if (!file) { + parse_expr_error(parser, expr, "%m.\n"); + return NULL; + } + + while (true) { + char *path = xgetdelim(file, '\0'); + if (!path) { + if (errno) { + goto fail; + } else { + break; + } + } + + int ret = parse_root(parser, path); + free(path); + if (ret != 0) { + goto fail; + } + } + + if (file == stdin) { + parser->files0_stdin_arg = expr->argv; + } else { + fclose(file); + } + + parser->implicit_root = false; + return expr; + +fail: + if (file != stdin) { + fclose(file); + } + return NULL; +} + +/** + * Parse -flags FLAGS. + */ +static struct bfs_expr *parse_flags(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_flags); + if (!expr) { + return NULL; + } + + const char *flags = expr->argv[1]; + switch (flags[0]) { + case '-': + expr->flags_cmp = BFS_MODE_ALL; + ++flags; + break; + case '+': + expr->flags_cmp = BFS_MODE_ANY; + ++flags; + break; + default: + expr->flags_cmp = BFS_MODE_EQUAL; + break; + } + + if (xstrtofflags(&flags, &expr->set_flags, &expr->clear_flags) != 0) { + if (errno == ENOTSUP) { + parse_expr_error(parser, expr, "Missing platform support.\n"); + } else { + parse_expr_error(parser, expr, "Invalid flags.\n"); + } + return NULL; + } + + return expr; +} + +/** + * Parse -fls FILE. + */ +static struct bfs_expr *parse_fls(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_fls); + if (!expr) { + return NULL; + } + + if (expr_open(parser, expr, expr->argv[1]) != 0) { + return NULL; + } + + return expr; +} + +/** + * Parse -fprint FILE. + */ +static struct bfs_expr *parse_fprint(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_fprint); + if (!expr) { + return NULL; + } + + if (expr_open(parser, expr, expr->argv[1]) != 0) { + return NULL; + } + + return expr; +} + +/** + * Parse -fprint0 FILE. + */ +static struct bfs_expr *parse_fprint0(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_fprint0); + if (!expr) { + return NULL; + } + + if (expr_open(parser, expr, expr->argv[1]) != 0) { + return NULL; + } + + return expr; +} + +/** + * Parse -fprintf FILE FORMAT. + */ +static struct bfs_expr *parse_fprintf(struct bfs_parser *parser, int arg1, int arg2) { + const char *arg = parser->argv[0]; + + const char *file = parser->argv[1]; + if (!file) { + parse_error(parser, "${blu}%s${rs} needs a file.\n", arg); + return NULL; + } + + const char *format = parser->argv[2]; + if (!format) { + parse_error(parser, "${blu}%s${rs} needs a format string.\n", arg); + return NULL; + } + + struct bfs_expr *expr = parse_action(parser, eval_fprintf, 3); + if (!expr) { + return NULL; + } + + if (expr_open(parser, expr, file) != 0) { + return NULL; + } + + if (bfs_printf_parse(parser->ctx, expr, format) != 0) { + return NULL; + } + + return expr; +} + +/** + * Parse -fstype TYPE. + */ +static struct bfs_expr *parse_fstype(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_fstype); + if (!expr) { + return NULL; + } + + if (!bfs_ctx_mtab(parser->ctx)) { + parse_expr_error(parser, expr, "Couldn't parse the mount table: %m.\n"); + return NULL; + } + + return expr; +} + +/** + * Parse -gid/-group. + */ +static struct bfs_expr *parse_group(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_gid); + if (!expr) { + return NULL; + } + + const struct group *grp = bfs_getgrnam(parser->ctx->groups, expr->argv[1]); + if (grp) { + expr->num = grp->gr_gid; + expr->int_cmp = BFS_INT_EQUAL; + } else if (looks_like_icmp(expr->argv[1])) { + if (!parse_icmp(parser, expr, 0)) { + return NULL; + } + } else if (errno) { + parse_expr_error(parser, expr, "%m.\n"); + return NULL; + } else { + parse_expr_error(parser, expr, "No such group.\n"); + return NULL; + } + + return expr; +} + +/** + * Parse -unique. + */ +static struct bfs_expr *parse_unique(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->unique = true; + return parse_nullary_option(parser); +} + +/** + * Parse -used N. + */ +static struct bfs_expr *parse_used(struct bfs_parser *parser, int arg1, int arg2) { + return parse_test_icmp(parser, eval_used); +} + +/** + * Parse -uid/-user. + */ +static struct bfs_expr *parse_user(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_uid); + if (!expr) { + return NULL; + } + + const struct passwd *pwd = bfs_getpwnam(parser->ctx->users, expr->argv[1]); + if (pwd) { + expr->num = pwd->pw_uid; + expr->int_cmp = BFS_INT_EQUAL; + } else if (looks_like_icmp(expr->argv[1])) { + if (!parse_icmp(parser, expr, 0)) { + return NULL; + } + } else if (errno) { + parse_expr_error(parser, expr, "%m.\n"); + return NULL; + } else { + parse_expr_error(parser, expr, "No such user.\n"); + return NULL; + } + + return expr; +} + +/** + * Parse -hidden. + */ +static struct bfs_expr *parse_hidden(struct bfs_parser *parser, int arg1, int arg2) { + return parse_nullary_test(parser, eval_hidden); +} + +/** + * Parse -(no)?ignore_readdir_race. + */ +static struct bfs_expr *parse_ignore_races(struct bfs_parser *parser, int ignore, int arg2) { + parser->ctx->ignore_races = ignore; + return parse_nullary_option(parser); +} + +/** + * Parse -inum N. + */ +static struct bfs_expr *parse_inum(struct bfs_parser *parser, int arg1, int arg2) { + return parse_test_icmp(parser, eval_inum); +} + +/** + * Parse -j<n>. + */ +static struct bfs_expr *parse_jobs(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_flag(parser); + if (!expr) { + return NULL; + } + + unsigned int n; + if (!parse_int(parser, expr->argv, expr->argv[0] + 2, &n, IF_INT | IF_UNSIGNED)) { + return NULL; + } + + if (n == 0) { + parse_expr_error(parser, expr, "${bld}0${rs} is not enough threads.\n"); + return NULL; + } + + parser->ctx->threads = n; + return expr; +} + +/** + * Parse -limit N. + */ +static struct bfs_expr *parse_limit(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_limit); + if (!expr) { + return NULL; + } + + char **arg = &expr->argv[1]; + if (!parse_int(parser, arg, *arg, &expr->num, IF_LONG_LONG)) { + return NULL; + } + + if (expr->num <= 0) { + parse_expr_error(parser, expr, "The ${blu}%s${rs} must be at least ${bld}1${rs}.\n", expr->argv[0]); + return NULL; + } + + parser->limit_arg = expr->argv; + return expr; +} + +/** + * Parse -links N. + */ +static struct bfs_expr *parse_links(struct bfs_parser *parser, int arg1, int arg2) { + return parse_test_icmp(parser, eval_links); +} + +/** + * Parse -ls. + */ +static struct bfs_expr *parse_ls(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_fls); + if (!expr) { + return NULL; + } + + init_print_expr(parser, expr); + return expr; +} + +/** + * Parse -mount. + */ +static struct bfs_expr *parse_mount(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_option(parser); + if (!expr) { + return NULL; + } + + parse_expr_warning(parser, expr, "In the future, ${blu}%s${rs} will skip mount points entirely, unlike\n", expr->argv[0]); + bfs_warning(parser->ctx, "${blu}-xdev${rs}, due to http://austingroupbugs.net/view.php?id=1133.\n\n"); + + parser->ctx->flags |= BFTW_PRUNE_MOUNTS; + parser->mount_arg = expr->argv; + return expr; +} + +/** + * Parse -i?name. + */ +static struct bfs_expr *parse_name(struct bfs_parser *parser, int casefold, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_name); + return parse_fnmatch(parser, expr, casefold); +} + +/** + * Parse -i?path, -i?wholename. + */ +static struct bfs_expr *parse_path(struct bfs_parser *parser, int casefold, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_path); + return parse_fnmatch(parser, expr, casefold); +} + +/** + * Parse -i?lname. + */ +static struct bfs_expr *parse_lname(struct bfs_parser *parser, int casefold, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_lname); + return parse_fnmatch(parser, expr, casefold); +} + +/** Get the bfs_stat_field for X/Y in -newerXY. */ +static enum bfs_stat_field parse_newerxy_field(char c) { + switch (c) { + case 'a': + return BFS_STAT_ATIME; + case 'B': + return BFS_STAT_BTIME; + case 'c': + return BFS_STAT_CTIME; + case 'm': + return BFS_STAT_MTIME; + default: + return 0; + } +} + +/** Parse an explicit reference timestamp for -newerXt and -*since. */ +static int parse_reftime(const struct bfs_parser *parser, struct bfs_expr *expr) { + if (xgetdate(expr->argv[1], &expr->reftime) == 0) { + return 0; + } else if (errno != EINVAL) { + parse_expr_error(parser, expr, "%m.\n"); + return -1; + } + + parse_expr_error(parser, expr, "Invalid timestamp.\n\n"); + fprintf(stderr, "Supported timestamp formats are ISO 8601-like, e.g.\n\n"); + + struct tm tm; + if (!localtime_r(&parser->now.tv_sec, &tm)) { + parse_perror(parser, "localtime_r()"); + return -1; + } + + int year = tm.tm_year + 1900; + int month = tm.tm_mon + 1; + fprintf(stderr, " - %04d-%02d-%02d\n", year, month, tm.tm_mday); + fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02d\n", year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); + +#if BFS_HAS_TM_GMTOFF + int gmtoff = tm.tm_gmtoff; +#else + int gmtoff = -timezone; +#endif + int tz_hour = gmtoff / 3600; + int tz_min = (labs(gmtoff) / 60) % 60; + fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02d%+03d:%02d\n", + year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, tz_hour, tz_min); + + if (!gmtime_r(&parser->now.tv_sec, &tm)) { + parse_perror(parser, "gmtime_r()"); + return -1; + } + + year = tm.tm_year + 1900; + month = tm.tm_mon + 1; + fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02dZ\n", year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); + + return -1; +} + +/** + * Parse -newerXY. + */ +static struct bfs_expr *parse_newerxy(struct bfs_parser *parser, int arg1, int arg2) { + const char *arg = parser->argv[0]; + if (strlen(arg) != 8) { + parse_error(parser, "Expected ${blu}-newer${bld}XY${rs}; found ${blu}-newer${bld}%pq${rs}.\n", arg + 6); + return NULL; + } + + struct bfs_expr *expr = parse_unary_test(parser, eval_newer); + if (!expr) { + return NULL; + } + + expr->stat_field = parse_newerxy_field(arg[6]); + if (!expr->stat_field) { + parse_expr_error(parser, expr, + "For ${blu}-newer${bld}XY${rs}, ${bld}X${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, or ${bld}B${rs}, not ${err}%c${rs}.\n", + arg[6]); + return NULL; + } + + if (arg[7] == 't') { + if (parse_reftime(parser, expr) != 0) { + return NULL; + } + } else { + enum bfs_stat_field field = parse_newerxy_field(arg[7]); + if (!field) { + parse_expr_error(parser, expr, + "For ${blu}-newer${bld}XY${rs}, ${bld}Y${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, ${bld}B${rs}, or ${bld}t${rs}, not ${err}%c${rs}.\n", + arg[7]); + return NULL; + } + + struct bfs_stat sb; + if (stat_arg(parser, &expr->argv[1], &sb) != 0) { + return NULL; + } + + const struct timespec *reftime = bfs_stat_time(&sb, field); + if (!reftime) { + parse_expr_error(parser, expr, "Couldn't get file %s.\n", bfs_stat_field_name(field)); + return NULL; + } + + expr->reftime = *reftime; + } + + return expr; +} + +/** + * Parse -nogroup. + */ +static struct bfs_expr *parse_nogroup(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_test(parser, eval_nogroup); + if (expr) { + // Who knows how many FDs getgrgid_r() needs? + expr->ephemeral_fds = 3; + } + return expr; +} + +/** + * Parse -nohidden. + */ +static struct bfs_expr *parse_nohidden(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *hidden = parse_new_expr(parser, eval_hidden, 1, &fake_hidden_arg); + if (!hidden) { + return NULL; + } + + bfs_expr_append(parser->ctx->exclude, hidden); + return parse_nullary_option(parser); +} + +/** + * Parse -noleaf. + */ +static struct bfs_expr *parse_noleaf(struct bfs_parser *parser, int arg1, int arg2) { + parse_warning(parser, "${ex}%s${rs} does not apply the optimization that ${blu}%s${rs} inhibits.\n\n", + BFS_COMMAND, parser->argv[0]); + return parse_nullary_option(parser); +} + +/** + * Parse -nouser. + */ +static struct bfs_expr *parse_nouser(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_test(parser, eval_nouser); + if (expr) { + // Who knows how many FDs getpwuid_r() needs? + expr->ephemeral_fds = 3; + } + return expr; +} + +/** + * Parse a permission mode like chmod(1). + */ +static int parse_mode(const struct bfs_parser *parser, const char *mode, struct bfs_expr *expr) { + if (mode[0] >= '0' && mode[0] <= '9') { + unsigned int parsed; + if (!parse_int(parser, NULL, mode, &parsed, 8 | IF_INT | IF_UNSIGNED | IF_QUIET)) { + goto fail; + } + if (parsed > 07777) { + goto fail; + } + + expr->file_mode = parsed; + expr->dir_mode = parsed; + return 0; + } + + expr->file_mode = 0; + expr->dir_mode = 0; + + // Parse the same grammar as chmod(1), which looks like this: + // + // MODE : CLAUSE ["," CLAUSE]* + // + // CLAUSE : WHO* ACTION+ + // + // WHO : "u" | "g" | "o" | "a" + // + // ACTION : OP PERM* + // | OP PERMCOPY + // + // OP : "+" | "-" | "=" + // + // PERM : "r" | "w" | "x" | "X" | "s" | "t" + // + // PERMCOPY : "u" | "g" | "o" + + // Parser machine parser + enum { + MODE_CLAUSE, + MODE_WHO, + MODE_ACTION, + MODE_ACTION_APPLY, + MODE_OP, + MODE_PERM, + } mparser = MODE_CLAUSE; + + enum { + MODE_PLUS, + MODE_MINUS, + MODE_EQUALS, + } op uninit(MODE_EQUALS); + + mode_t who uninit(0); + mode_t file_change uninit(0); + mode_t dir_change uninit(0); + + const char *i = mode; + while (true) { + switch (mparser) { + case MODE_CLAUSE: + who = 0; + mparser = MODE_WHO; + fallthru; + + case MODE_WHO: + switch (*i) { + case 'u': + who |= 0700; + break; + case 'g': + who |= 0070; + break; + case 'o': + who |= 0007; + break; + case 'a': + who |= 0777; + break; + default: + mparser = MODE_ACTION; + continue; + } + break; + + case MODE_ACTION_APPLY: + switch (op) { + case MODE_EQUALS: + expr->file_mode &= ~who; + expr->dir_mode &= ~who; + fallthru; + case MODE_PLUS: + expr->file_mode |= file_change; + expr->dir_mode |= dir_change; + break; + case MODE_MINUS: + expr->file_mode &= ~file_change; + expr->dir_mode &= ~dir_change; + break; + } + fallthru; + + case MODE_ACTION: + if (who == 0) { + who = 0777; + } + + switch (*i) { + case '+': + op = MODE_PLUS; + mparser = MODE_OP; + break; + case '-': + op = MODE_MINUS; + mparser = MODE_OP; + break; + case '=': + op = MODE_EQUALS; + mparser = MODE_OP; + break; + + case ',': + if (mparser == MODE_ACTION_APPLY) { + mparser = MODE_CLAUSE; + } else { + goto fail; + } + break; + + case '\0': + if (mparser == MODE_ACTION_APPLY) { + goto done; + } else { + goto fail; + } + + default: + goto fail; + } + break; + + case MODE_OP: + switch (*i) { + case 'u': + file_change = (expr->file_mode >> 6) & 07; + dir_change = (expr->dir_mode >> 6) & 07; + break; + case 'g': + file_change = (expr->file_mode >> 3) & 07; + dir_change = (expr->dir_mode >> 3) & 07; + break; + case 'o': + file_change = expr->file_mode & 07; + dir_change = expr->dir_mode & 07; + break; + + default: + file_change = 0; + dir_change = 0; + mparser = MODE_PERM; + continue; + } + + file_change |= (file_change << 6) | (file_change << 3); + file_change &= who; + dir_change |= (dir_change << 6) | (dir_change << 3); + dir_change &= who; + mparser = MODE_ACTION_APPLY; + break; + + case MODE_PERM: + switch (*i) { + case 'r': + file_change |= who & 0444; + dir_change |= who & 0444; + break; + case 'w': + file_change |= who & 0222; + dir_change |= who & 0222; + break; + case 'x': + file_change |= who & 0111; + fallthru; + case 'X': + dir_change |= who & 0111; + break; + case 's': + if (who & 0700) { + file_change |= S_ISUID; + dir_change |= S_ISUID; + } + if (who & 0070) { + file_change |= S_ISGID; + dir_change |= S_ISGID; + } + break; + case 't': + if (who & 0007) { + file_change |= S_ISVTX; + dir_change |= S_ISVTX; + } + break; + default: + mparser = MODE_ACTION_APPLY; + continue; + } + break; + } + + ++i; + } + +done: + return 0; + +fail: + parse_expr_error(parser, expr, "Invalid mode.\n"); + return -1; +} + +/** + * Parse -perm MODE. + */ +static struct bfs_expr *parse_perm(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_perm); + if (!expr) { + return NULL; + } + + const char *mode = expr->argv[1]; + switch (mode[0]) { + case '-': + expr->mode_cmp = BFS_MODE_ALL; + ++mode; + break; + case '/': + expr->mode_cmp = BFS_MODE_ANY; + ++mode; + break; + case '+': + if (mode[1] >= '0' && mode[1] <= '9') { + expr->mode_cmp = BFS_MODE_ANY; + ++mode; + break; + } + fallthru; + default: + expr->mode_cmp = BFS_MODE_EQUAL; + break; + } + + if (parse_mode(parser, mode, expr) != 0) { + return NULL; + } + + return expr; +} + +/** + * Parse -print. + */ +static struct bfs_expr *parse_print(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_fprint); + if (expr) { + init_print_expr(parser, expr); + } + return expr; +} + +/** + * Parse -print0. + */ +static struct bfs_expr *parse_print0(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_fprint0); + if (expr) { + init_print_expr(parser, expr); + } + return expr; +} + +/** + * Parse -printf FORMAT. + */ +static struct bfs_expr *parse_printf(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_fprintf); + if (!expr) { + return NULL; + } + + init_print_expr(parser, expr); + + if (bfs_printf_parse(parser->ctx, expr, expr->argv[1]) != 0) { + return NULL; + } + + return expr; +} + +/** + * Parse -printx. + */ +static struct bfs_expr *parse_printx(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_fprintx); + if (expr) { + init_print_expr(parser, expr); + } + return expr; +} + +/** + * Parse -prune. + */ +static struct bfs_expr *parse_prune(struct bfs_parser *parser, int arg1, int arg2) { + parser->prune_arg = parser->argv; + return parse_nullary_action(parser, eval_prune); +} + +/** + * Parse -quit. + */ +static struct bfs_expr *parse_quit(struct bfs_parser *parser, int arg1, int arg2) { + return parse_nullary_action(parser, eval_quit); +} + +/** + * Parse -i?regex. + */ +static struct bfs_expr *parse_regex(struct bfs_parser *parser, int flags, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_regex); + if (!expr) { + return NULL; + } + + if (bfs_regcomp(&expr->regex, expr->argv[1], parser->regex_type, flags) != 0) { + if (expr->regex) { + char *str = bfs_regerror(expr->regex); + if (str) { + parse_expr_error(parser, expr, "%s.\n", str); + free(str); + } else { + parse_perror(parser, "bfs_regerror()"); + } + } else { + parse_perror(parser, "bfs_regcomp()"); + } + + return NULL; + } + + return expr; +} + +/** + * Parse -E. + */ +static struct bfs_expr *parse_regex_extended(struct bfs_parser *parser, int arg1, int arg2) { + parser->regex_type = BFS_REGEX_POSIX_EXTENDED; + return parse_nullary_flag(parser); +} + +/** + * Parse -regextype TYPE. + */ +static struct bfs_expr *parse_regextype(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_ctx *ctx = parser->ctx; + CFILE *cfile = ctx->cerr; + + struct bfs_expr *expr = parse_unary_option(parser); + if (!expr) { + cfprintf(cfile, "\n"); + goto list_types; + } + + // See https://www.gnu.org/software/gnulib/manual/html_node/Predefined-Syntaxes.html + const char *type = expr->argv[1]; + if (strcmp(type, "posix-basic") == 0 + || strcmp(type, "ed") == 0 + || strcmp(type, "sed") == 0) { + parser->regex_type = BFS_REGEX_POSIX_BASIC; + } else if (strcmp(type, "posix-extended") == 0) { + parser->regex_type = BFS_REGEX_POSIX_EXTENDED; +#if BFS_USE_ONIGURUMA + } else if (strcmp(type, "emacs") == 0) { + parser->regex_type = BFS_REGEX_EMACS; + } else if (strcmp(type, "grep") == 0) { + parser->regex_type = BFS_REGEX_GREP; +#endif + } else if (strcmp(type, "help") == 0) { + parser->just_info = true; + cfile = ctx->cout; + goto list_types; + } else { + parse_expr_error(parser, expr, "Unsupported regex type.\n\n"); + goto list_types; + } + + return expr; + +list_types: + cfprintf(cfile, "Supported types are:\n\n"); + cfprintf(cfile, " ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n"); + cfprintf(cfile, " ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n"); + cfprintf(cfile, " ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n"); +#if BFS_USE_ONIGURUMA + cfprintf(cfile, " ${bld}emacs${rs}: Like ${grn}emacs${rs}\n"); + cfprintf(cfile, " ${bld}grep${rs}: Like ${grn}grep${rs}\n"); +#endif + cfprintf(cfile, " ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n"); + return NULL; +} + +/** + * Parse -s. + */ +static struct bfs_expr *parse_s(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->flags |= BFTW_SORT; + return parse_nullary_flag(parser); +} + +/** + * Parse -samefile FILE. + */ +static struct bfs_expr *parse_samefile(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_samefile); + if (!expr) { + return NULL; + } + + struct bfs_stat sb; + if (stat_arg(parser, &expr->argv[1], &sb) != 0) { + return NULL; + } + + expr->dev = sb.dev; + expr->ino = sb.ino; + return expr; +} + +/** + * Parse -S STRATEGY. + */ +static struct bfs_expr *parse_search_strategy(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_ctx *ctx = parser->ctx; + CFILE *cfile = ctx->cerr; + + struct bfs_expr *expr = parse_unary_flag(parser); + if (!expr) { + cfprintf(cfile, "\n"); + goto list_strategies; + } + + const char *arg = expr->argv[1]; + if (strcmp(arg, "bfs") == 0) { + ctx->strategy = BFTW_BFS; + } else if (strcmp(arg, "dfs") == 0) { + ctx->strategy = BFTW_DFS; + } else if (strcmp(arg, "ids") == 0) { + ctx->strategy = BFTW_IDS; + } else if (strcmp(arg, "eds") == 0) { + ctx->strategy = BFTW_EDS; + } else if (strcmp(arg, "help") == 0) { + parser->just_info = true; + cfile = ctx->cout; + goto list_strategies; + } else { + parse_expr_error(parser, expr, "Unrecognized search strategy.\n\n"); + goto list_strategies; + } + + return expr; + +list_strategies: + cfprintf(cfile, "Supported search strategies:\n\n"); + cfprintf(cfile, " ${bld}bfs${rs}: breadth-first search\n"); + cfprintf(cfile, " ${bld}dfs${rs}: depth-first search\n"); + cfprintf(cfile, " ${bld}ids${rs}: iterative deepening search\n"); + cfprintf(cfile, " ${bld}eds${rs}: exponential deepening search\n"); + return NULL; +} + +/** + * Parse -[aBcm]?since. + */ +static struct bfs_expr *parse_since(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_newer); + if (!expr) { + return NULL; + } + + if (parse_reftime(parser, expr) != 0) { + return NULL; + } + + expr->stat_field = field; + return expr; +} + +/** + * Parse -size N[cwbkMGTP]?. + */ +static struct bfs_expr *parse_size(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_size); + if (!expr) { + return NULL; + } + + const char *unit = parse_icmp(parser, expr, IF_PARTIAL_OK); + if (!unit) { + return NULL; + } + + if (strlen(unit) > 1) { + goto bad_unit; + } + + switch (*unit) { + case '\0': + case 'b': + expr->size_unit = BFS_BLOCKS; + break; + case 'c': + expr->size_unit = BFS_BYTES; + break; + case 'w': + expr->size_unit = BFS_WORDS; + break; + case 'k': + expr->size_unit = BFS_KB; + break; + case 'M': + expr->size_unit = BFS_MB; + break; + case 'G': + expr->size_unit = BFS_GB; + break; + case 'T': + expr->size_unit = BFS_TB; + break; + case 'P': + expr->size_unit = BFS_PB; + break; + + default: + bad_unit: + parse_expr_error(parser, expr, "Expected a size unit (one of ${bld}cwbkMGTP${rs}); found ${err}%pq${rs}.\n", unit); + return NULL; + } + + return expr; +} + +/** + * Parse -sparse. + */ +static struct bfs_expr *parse_sparse(struct bfs_parser *parser, int arg1, int arg2) { + return parse_nullary_test(parser, eval_sparse); +} + +/** + * Parse -status. + */ +static struct bfs_expr *parse_status(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->status = true; + return parse_nullary_option(parser); +} + +/** + * Parse -x?type [bcdpflsD]. + */ +static struct bfs_expr *parse_type(struct bfs_parser *parser, int x, int arg2) { + struct bfs_ctx *ctx = parser->ctx; + + bfs_eval_fn *eval = x ? eval_xtype : eval_type; + struct bfs_expr *expr = parse_unary_test(parser, eval); + if (!expr) { + return NULL; + } + + expr->num = 0; + + const char *c = expr->argv[1]; + while (true) { + switch (*c) { + case 'b': + expr->num |= 1 << BFS_BLK; + break; + case 'c': + expr->num |= 1 << BFS_CHR; + break; + case 'd': + expr->num |= 1 << BFS_DIR; + break; + case 'D': + expr->num |= 1 << BFS_DOOR; + break; + case 'p': + expr->num |= 1 << BFS_FIFO; + break; + case 'f': + expr->num |= 1 << BFS_REG; + break; + case 'l': + expr->num |= 1 << BFS_LNK; + break; + case 's': + expr->num |= 1 << BFS_SOCK; + break; + case 'w': + expr->num |= 1 << BFS_WHT; + ctx->flags |= BFTW_WHITEOUTS; + break; + + case '\0': + parse_expr_error(parser, expr, "Expected a type flag.\n"); + return NULL; + + default: + parse_expr_error(parser, expr, "Unknown type flag ${err}%c${rs}; expected one of [${bld}bcdpflsD${rs}].\n", *c); + return NULL; + } + + ++c; + if (*c == '\0') { + break; + } else if (*c == ',') { + ++c; + continue; + } else { + parse_expr_error(parser, expr, "Types must be comma-separated.\n"); + return NULL; + } + } + + return expr; +} + +/** + * Parse -(no)?warn. + */ +static struct bfs_expr *parse_warn(struct bfs_parser *parser, int warn, int arg2) { + parser->ctx->warn = warn; + return parse_nullary_option(parser); +} + +/** + * Parse -xattr. + */ +static struct bfs_expr *parse_xattr(struct bfs_parser *parser, int arg1, int arg2) { +#if BFS_CAN_CHECK_XATTRS + return parse_nullary_test(parser, eval_xattr); +#else + parse_error(parser, "Missing platform support.\n"); + return NULL; +#endif +} + +/** + * Parse -xattrname. + */ +static struct bfs_expr *parse_xattrname(struct bfs_parser *parser, int arg1, int arg2) { +#if BFS_CAN_CHECK_XATTRS + return parse_unary_test(parser, eval_xattrname); +#else + parse_error(parser, "Missing platform support.\n"); + return NULL; +#endif +} + +/** + * Parse -xdev. + */ +static struct bfs_expr *parse_xdev(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->flags |= BFTW_PRUNE_MOUNTS; + parser->xdev_arg = parser->argv; + return parse_nullary_option(parser); +} + +/** + * Launch a pager for the help output. + */ +static CFILE *launch_pager(pid_t *pid, CFILE *cout) { + char *pager = getenv("PAGER"); + + char *exe; + if (pager && pager[0]) { + exe = bfs_spawn_resolve(pager); + } else { + exe = bfs_spawn_resolve("less"); + if (!exe) { + exe = bfs_spawn_resolve("more"); + } + } + if (!exe) { + goto fail; + } + + int pipefd[2]; + if (pipe(pipefd) != 0) { + goto fail_exe; + } + + FILE *file = fdopen(pipefd[1], "w"); + if (!file) { + goto fail_pipe; + } + pipefd[1] = -1; + + CFILE *ret = cfwrap(file, NULL, true); + if (!ret) { + goto fail_file; + } + file = NULL; + + struct bfs_spawn ctx; + if (bfs_spawn_init(&ctx) != 0) { + goto fail_ret; + } + + if (bfs_spawn_addclose(&ctx, fileno(ret->file)) != 0) { + goto fail_ctx; + } + if (bfs_spawn_adddup2(&ctx, pipefd[0], STDIN_FILENO) != 0) { + goto fail_ctx; + } + if (bfs_spawn_addclose(&ctx, pipefd[0]) != 0) { + goto fail_ctx; + } + + char *argv[] = { + exe, + NULL, + NULL, + }; + + const char *cmd = exe + xbaseoff(exe); + if (strcmp(cmd, "less") == 0) { + // We know less supports colors, other pagers may not + ret->colors = cout->colors; + argv[1] = "-FKRX"; + } + + *pid = bfs_spawn(exe, &ctx, argv, NULL); + if (*pid < 0) { + goto fail_ctx; + } + + xclose(pipefd[0]); + bfs_spawn_destroy(&ctx); + free(exe); + return ret; + +fail_ctx: + bfs_spawn_destroy(&ctx); +fail_ret: + cfclose(ret); +fail_file: + if (file) { + fclose(file); + } +fail_pipe: + if (pipefd[1] >= 0) { + xclose(pipefd[1]); + } + if (pipefd[0] >= 0) { + xclose(pipefd[0]); + } +fail_exe: + free(exe); +fail: + return cout; +} + +/** + * "Parse" -help. + */ +static struct bfs_expr *parse_help(struct bfs_parser *parser, int arg1, int arg2) { + CFILE *cout = parser->ctx->cout; + + pid_t pager = -1; + if (parser->stdout_tty) { + cout = launch_pager(&pager, cout); + } + + cfprintf(cout, "Usage: ${ex}%s${rs} [${cyn}flags${rs}...] [${mag}paths${rs}...] [${blu}expression${rs}...]\n\n", + parser->command); + + cfprintf(cout, "${ex}%s${rs} is compatible with ${ex}find${rs}, with some extensions. " + "${cyn}Flags${rs} (${cyn}-H${rs}/${cyn}-L${rs}/${cyn}-P${rs} etc.), ${mag}paths${rs},\n" + "and ${blu}expressions${rs} may be freely mixed in any order.\n\n", + BFS_COMMAND); + + cfprintf(cout, "${bld}Flags:${rs}\n\n"); + + cfprintf(cout, " ${cyn}-H${rs}\n"); + cfprintf(cout, " Follow symbolic links on the command line, but not while searching\n"); + cfprintf(cout, " ${cyn}-L${rs}\n"); + cfprintf(cout, " Follow all symbolic links\n"); + cfprintf(cout, " ${cyn}-P${rs}\n"); + cfprintf(cout, " Never follow symbolic links (the default)\n"); + + cfprintf(cout, " ${cyn}-E${rs}\n"); + cfprintf(cout, " Use extended regular expressions (same as ${blu}-regextype${rs} ${bld}posix-extended${rs})\n"); + cfprintf(cout, " ${cyn}-X${rs}\n"); + cfprintf(cout, " Filter out files with non-${ex}xargs${rs}-safe names\n"); + cfprintf(cout, " ${cyn}-d${rs}\n"); + cfprintf(cout, " Search in post-order (same as ${blu}-depth${rs})\n"); + cfprintf(cout, " ${cyn}-s${rs}\n"); + cfprintf(cout, " Visit directory entries in sorted order\n"); + cfprintf(cout, " ${cyn}-x${rs}\n"); + cfprintf(cout, " Don't descend into other mount points (same as ${blu}-xdev${rs})\n"); + + cfprintf(cout, " ${cyn}-f${rs} ${mag}PATH${rs}\n"); + cfprintf(cout, " Treat ${mag}PATH${rs} as a path to search (useful if begins with a dash)\n"); + cfprintf(cout, " ${cyn}-D${rs} ${bld}FLAG${rs}\n"); + cfprintf(cout, " Turn on a debugging flag (see ${cyn}-D${rs} ${bld}help${rs})\n"); + cfprintf(cout, " ${cyn}-O${bld}N${rs}\n"); + cfprintf(cout, " Enable optimization level ${bld}N${rs} (default: ${bld}3${rs})\n"); + cfprintf(cout, " ${cyn}-S${rs} ${bld}bfs${rs}|${bld}dfs${rs}|${bld}ids${rs}|${bld}eds${rs}\n"); + cfprintf(cout, " Use ${bld}b${rs}readth-${bld}f${rs}irst/${bld}d${rs}epth-${bld}f${rs}irst/${bld}i${rs}terative/${bld}e${rs}xponential ${bld}d${rs}eepening ${bld}s${rs}earch\n"); + cfprintf(cout, " (default: ${cyn}-S${rs} ${bld}bfs${rs})\n"); + cfprintf(cout, " ${cyn}-j${bld}N${rs}\n"); + cfprintf(cout, " Search with ${bld}N${rs} threads in parallel (default: number of CPUs, up to ${bld}8${rs})\n\n"); + + cfprintf(cout, "${bld}Operators:${rs}\n\n"); + + cfprintf(cout, " ${red}(${rs} ${blu}expression${rs} ${red})${rs}\n\n"); + + cfprintf(cout, " ${red}!${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " ${red}-not${rs} ${blu}expression${rs}\n\n"); + + cfprintf(cout, " ${blu}expression${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " ${blu}expression${rs} ${red}-a${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " ${blu}expression${rs} ${red}-and${rs} ${blu}expression${rs}\n\n"); + + cfprintf(cout, " ${blu}expression${rs} ${red}-o${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " ${blu}expression${rs} ${red}-or${rs} ${blu}expression${rs}\n\n"); + + cfprintf(cout, " ${blu}expression${rs} ${red},${rs} ${blu}expression${rs}\n\n"); + + cfprintf(cout, "${bld}Special forms:${rs}\n\n"); + + cfprintf(cout, " ${red}-exclude${rs} ${blu}expression${rs}\n"); + cfprintf(cout, " Exclude all paths matching the ${blu}expression${rs} from the search.\n\n"); + + cfprintf(cout, "${bld}Options:${rs}\n\n"); + + cfprintf(cout, " ${blu}-color${rs}\n"); + cfprintf(cout, " ${blu}-nocolor${rs}\n"); + cfprintf(cout, " Turn colors on or off (default: ${blu}-color${rs} if outputting to a terminal,\n"); + cfprintf(cout, " ${blu}-nocolor${rs} otherwise)\n"); + cfprintf(cout, " ${blu}-daystart${rs}\n"); + cfprintf(cout, " Measure times relative to the start of today\n"); + cfprintf(cout, " ${blu}-depth${rs}\n"); + cfprintf(cout, " Search in post-order (descendents first)\n"); + cfprintf(cout, " ${blu}-files0-from${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " Search the NUL ('\\0')-separated paths from ${bld}FILE${rs} (${bld}-${rs} for standard input).\n"); + cfprintf(cout, " ${blu}-follow${rs}\n"); + cfprintf(cout, " Follow all symbolic links (same as ${cyn}-L${rs})\n"); + cfprintf(cout, " ${blu}-ignore_readdir_race${rs}\n"); + cfprintf(cout, " ${blu}-noignore_readdir_race${rs}\n"); + cfprintf(cout, " Whether to report an error if ${ex}%s${rs} detects that the file tree is modified\n", + BFS_COMMAND); + cfprintf(cout, " during the search (default: ${blu}-noignore_readdir_race${rs})\n"); + cfprintf(cout, " ${blu}-maxdepth${rs} ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-mindepth${rs} ${bld}N${rs}\n"); + cfprintf(cout, " Ignore files deeper/shallower than ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-mount${rs}\n"); + cfprintf(cout, " Don't descend into other mount points (same as ${blu}-xdev${rs} for now, but will\n"); + cfprintf(cout, " skip mount points entirely in the future)\n"); + cfprintf(cout, " ${blu}-nohidden${rs}\n"); + cfprintf(cout, " Exclude hidden files\n"); + cfprintf(cout, " ${blu}-noleaf${rs}\n"); + cfprintf(cout, " Ignored; for compatibility with GNU find\n"); + cfprintf(cout, " ${blu}-regextype${rs} ${bld}TYPE${rs}\n"); + cfprintf(cout, " Use ${bld}TYPE${rs}-flavored regexes (default: ${bld}posix-basic${rs}; see ${blu}-regextype${rs} ${bld}help${rs})\n"); + cfprintf(cout, " ${blu}-status${rs}\n"); + cfprintf(cout, " Display a status bar while searching\n"); + cfprintf(cout, " ${blu}-unique${rs}\n"); + cfprintf(cout, " Skip any files that have already been seen\n"); + cfprintf(cout, " ${blu}-warn${rs}\n"); + cfprintf(cout, " ${blu}-nowarn${rs}\n"); + cfprintf(cout, " Turn on or off warnings about the command line\n"); + cfprintf(cout, " ${blu}-xdev${rs}\n"); + cfprintf(cout, " Don't descend into other mount points\n\n"); + + cfprintf(cout, "${bld}Tests:${rs}\n\n"); + +#if BFS_CAN_CHECK_ACL + cfprintf(cout, " ${blu}-acl${rs}\n"); + cfprintf(cout, " Find files with a non-trivial Access Control List\n"); +#endif + cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}min${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified ${bld}N${rs} minutes ago\n"); + cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}newer${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified more recently than ${bld}FILE${rs} was\n" + " modified\n"); + cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}since${rs} ${bld}TIME${rs}\n"); + cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified more recently than ${bld}TIME${rs}\n"); + cfprintf(cout, " ${blu}-${rs}[${blu}aBcm${rs}]${blu}time${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files ${blu}a${rs}ccessed/${blu}B${rs}irthed/${blu}c${rs}hanged/${blu}m${rs}odified ${bld}N${rs} days ago\n"); +#if BFS_CAN_CHECK_CAPABILITIES + cfprintf(cout, " ${blu}-capable${rs}\n"); + cfprintf(cout, " Find files with POSIX.1e capabilities set\n"); +#endif +#if BFS_CAN_CHECK_CONTEXT + cfprintf(cout, " ${blu}-context${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Find files with SELinux context matching a glob pattern\n"); +#endif + cfprintf(cout, " ${blu}-depth${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files with depth ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-empty${rs}\n"); + cfprintf(cout, " Find empty files/directories\n"); + cfprintf(cout, " ${blu}-executable${rs}\n"); + cfprintf(cout, " ${blu}-readable${rs}\n"); + cfprintf(cout, " ${blu}-writable${rs}\n"); + cfprintf(cout, " Find files the current user can execute/read/write\n"); + cfprintf(cout, " ${blu}-false${rs}\n"); + cfprintf(cout, " ${blu}-true${rs}\n"); + cfprintf(cout, " Always false/true\n"); + cfprintf(cout, " ${blu}-fstype${rs} ${bld}TYPE${rs}\n"); + cfprintf(cout, " Find files on file systems with the given ${bld}TYPE${rs}\n"); + cfprintf(cout, " ${blu}-gid${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " ${blu}-uid${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files owned by group/user ID ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-group${rs} ${bld}NAME${rs}\n"); + cfprintf(cout, " ${blu}-user${rs} ${bld}NAME${rs}\n"); + cfprintf(cout, " Find files owned by the group/user ${bld}NAME${rs}\n"); + cfprintf(cout, " ${blu}-hidden${rs}\n"); + cfprintf(cout, " Find hidden files\n"); +#ifdef FNM_CASEFOLD + cfprintf(cout, " ${blu}-ilname${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-iname${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-ipath${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-iregex${rs} ${bld}REGEX${rs}\n"); + cfprintf(cout, " ${blu}-iwholename${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Case-insensitive versions of ${blu}-lname${rs}/${blu}-name${rs}/${blu}-path${rs}" + "/${blu}-regex${rs}/${blu}-wholename${rs}\n"); +#endif + cfprintf(cout, " ${blu}-inum${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files with inode number ${bld}N${rs}\n"); + cfprintf(cout, " ${blu}-links${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files with ${bld}N${rs} hard links\n"); + cfprintf(cout, " ${blu}-lname${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Find symbolic links whose target matches the ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-name${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Find files whose name matches the ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-newer${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " Find files newer than ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-newer${bld}XY${rs} ${bld}REFERENCE${rs}\n"); + cfprintf(cout, " Find files whose ${bld}X${rs} time is newer than the ${bld}Y${rs} time of" + " ${bld}REFERENCE${rs}. ${bld}X${rs} and ${bld}Y${rs}\n"); + cfprintf(cout, " can be any of [${bld}aBcm${rs}]. ${bld}Y${rs} may also be ${bld}t${rs} to parse ${bld}REFERENCE${rs} an explicit\n"); + cfprintf(cout, " timestamp.\n"); + cfprintf(cout, " ${blu}-nogroup${rs}\n"); + cfprintf(cout, " ${blu}-nouser${rs}\n"); + cfprintf(cout, " Find files owned by nonexistent groups/users\n"); + cfprintf(cout, " ${blu}-path${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-wholename${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Find files whose entire path matches the ${bld}GLOB${rs}\n"); + cfprintf(cout, " ${blu}-perm${rs} ${bld}[-]MODE${rs}\n"); + cfprintf(cout, " Find files with a matching mode\n"); + cfprintf(cout, " ${blu}-regex${rs} ${bld}REGEX${rs}\n"); + cfprintf(cout, " Find files whose entire path matches the regular expression ${bld}REGEX${rs}\n"); + cfprintf(cout, " ${blu}-samefile${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " Find hard links to ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-since${rs} ${bld}TIME${rs}\n"); + cfprintf(cout, " Find files modified since ${bld}TIME${rs}\n"); + cfprintf(cout, " ${blu}-size${rs} ${bld}[-+]N[cwbkMGTP]${rs}\n"); + cfprintf(cout, " Find files with the given size, in 1-byte ${bld}c${rs}haracters, 2-byte ${bld}w${rs}ords,\n"); + cfprintf(cout, " 512-byte ${bld}b${rs}locks (default), or ${bld}k${rs}iB/${bld}M${rs}iB/${bld}G${rs}iB/${bld}T${rs}iB/${bld}P${rs}iB\n"); + cfprintf(cout, " ${blu}-sparse${rs}\n"); + cfprintf(cout, " Find files that occupy fewer disk blocks than expected\n"); + cfprintf(cout, " ${blu}-type${rs} ${bld}[bcdlpfswD]${rs}\n"); + cfprintf(cout, " Find files of the given type\n"); + cfprintf(cout, " ${blu}-used${rs} ${bld}[-+]N${rs}\n"); + cfprintf(cout, " Find files last accessed ${bld}N${rs} days after they were changed\n"); +#if BFS_CAN_CHECK_XATTRS + cfprintf(cout, " ${blu}-xattr${rs}\n"); + cfprintf(cout, " Find files with extended attributes\n"); + cfprintf(cout, " ${blu}-xattrname${rs} ${bld}NAME${rs}\n"); + cfprintf(cout, " Find files with the extended attribute ${bld}NAME${rs}\n"); +#endif + cfprintf(cout, " ${blu}-xtype${rs} ${bld}[bcdlpfswD]${rs}\n"); + cfprintf(cout, " Find files of the given type, following links when ${blu}-type${rs} would not, and\n"); + cfprintf(cout, " vice versa\n\n"); + + cfprintf(cout, "${bld}Actions:${rs}\n\n"); + + cfprintf(cout, " ${blu}-delete${rs}\n"); + cfprintf(cout, " ${blu}-rm${rs}\n"); + cfprintf(cout, " Delete any found files (implies ${blu}-depth${rs})\n"); + cfprintf(cout, " ${blu}-exec${rs} ${bld}command ... {} ;${rs}\n"); + cfprintf(cout, " Execute a command\n"); + cfprintf(cout, " ${blu}-exec${rs} ${bld}command ... {} +${rs}\n"); + cfprintf(cout, " Execute a command with multiple files at once\n"); + cfprintf(cout, " ${blu}-ok${rs} ${bld}command ... {} ;${rs}\n"); + cfprintf(cout, " Prompt the user whether to execute a command\n"); + cfprintf(cout, " ${blu}-execdir${rs} ${bld}command ... {} ;${rs}\n"); + cfprintf(cout, " ${blu}-execdir${rs} ${bld}command ... {} +${rs}\n"); + cfprintf(cout, " ${blu}-okdir${rs} ${bld}command ... {} ;${rs}\n"); + cfprintf(cout, " Like ${blu}-exec${rs}/${blu}-ok${rs}, but run the command in the same directory as the found\n"); + cfprintf(cout, " file(s)\n"); + cfprintf(cout, " ${blu}-exit${rs} [${bld}STATUS${rs}]\n"); + cfprintf(cout, " Exit immediately with the given status (%d if unspecified)\n", EXIT_SUCCESS); + cfprintf(cout, " ${blu}-fls${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-fprint${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-fprint0${rs} ${bld}FILE${rs}\n"); + cfprintf(cout, " ${blu}-fprintf${rs} ${bld}FILE${rs} ${bld}FORMAT${rs}\n"); + cfprintf(cout, " Like ${blu}-ls${rs}/${blu}-print${rs}/${blu}-print0${rs}/${blu}-printf${rs}, but write to ${bld}FILE${rs} instead of standard\n" + " output\n"); + cfprintf(cout, " ${blu}-limit${rs} ${bld}N${rs}\n"); + cfprintf(cout, " Quit after this action is evaluated ${bld}N${rs} times\n"); + cfprintf(cout, " ${blu}-ls${rs}\n"); + cfprintf(cout, " List files like ${ex}ls${rs} ${bld}-dils${rs}\n"); + cfprintf(cout, " ${blu}-print${rs}\n"); + cfprintf(cout, " Print the path to the found file\n"); + cfprintf(cout, " ${blu}-print0${rs}\n"); + cfprintf(cout, " Like ${blu}-print${rs}, but use the null character ('\\0') as a separator rather than\n"); + cfprintf(cout, " newlines\n"); + cfprintf(cout, " ${blu}-printf${rs} ${bld}FORMAT${rs}\n"); + cfprintf(cout, " Print according to a format string (see ${ex}man${rs} ${bld}find${rs}). The additional format\n"); + cfprintf(cout, " directives %%w and %%W${bld}k${rs} for printing file birth times are supported.\n"); + cfprintf(cout, " ${blu}-printx${rs}\n"); + cfprintf(cout, " Like ${blu}-print${rs}, but escape whitespace and quotation characters, to make the\n"); + cfprintf(cout, " output safe for ${ex}xargs${rs}. Consider using ${blu}-print0${rs} and ${ex}xargs${rs} ${bld}-0${rs} instead.\n"); + cfprintf(cout, " ${blu}-prune${rs}\n"); + cfprintf(cout, " Don't descend into this directory\n"); + cfprintf(cout, " ${blu}-quit${rs}\n"); + cfprintf(cout, " Quit immediately\n"); + cfprintf(cout, " ${blu}-version${rs}\n"); + cfprintf(cout, " Print version information\n"); + cfprintf(cout, " ${blu}-help${rs}\n"); + cfprintf(cout, " Print this help message\n\n"); + + cfprintf(cout, "%s\n", BFS_HOMEPAGE); + + if (pager > 0) { + cfclose(cout); + xwaitpid(pager, NULL, 0); + } + + parser->just_info = true; + return NULL; +} + +/** + * "Parse" -version. + */ +static struct bfs_expr *parse_version(struct bfs_parser *parser, int arg1, int arg2) { + cfprintf(parser->ctx->cout, "${ex}%s${rs} ${bld}%s${rs}\n\n", BFS_COMMAND, bfs_version); + + printf("%s\n", BFS_HOMEPAGE); + + parser->just_info = true; + return NULL; +} + +typedef struct bfs_expr *parse_fn(struct bfs_parser *parser, int arg1, int arg2); + +/** + * An entry in the parse table for primary expressions. + */ +struct table_entry { + char *arg; + enum token_type type; + parse_fn *parse; + int arg1; + int arg2; + bool prefix; +}; + +/** + * The parse table for primary expressions. + */ +static const struct table_entry parse_table[] = { + {"--", T_FLAG}, + {"--help", T_ACTION, parse_help}, + {"--version", T_ACTION, parse_version}, + {"-Bmin", T_TEST, parse_min, BFS_STAT_BTIME}, + {"-Bnewer", T_TEST, parse_newer, BFS_STAT_BTIME}, + {"-Bsince", T_TEST, parse_since, BFS_STAT_BTIME}, + {"-Btime", T_TEST, parse_time, BFS_STAT_BTIME}, + {"-D", T_FLAG, parse_debug}, + {"-E", T_FLAG, parse_regex_extended}, + {"-H", T_FLAG, parse_follow, BFTW_FOLLOW_ROOTS, false}, + {"-L", T_FLAG, parse_follow, BFTW_FOLLOW_ALL, false}, + {"-O", T_FLAG, parse_optlevel, 0, 0, true}, + {"-P", T_FLAG, parse_follow, 0, false}, + {"-S", T_FLAG, parse_search_strategy}, + {"-X", T_FLAG, parse_xargs_safe}, + {"-a", T_OPERATOR}, + {"-acl", T_TEST, parse_acl}, + {"-amin", T_TEST, parse_min, BFS_STAT_ATIME}, + {"-and", T_OPERATOR}, + {"-anewer", T_TEST, parse_newer, BFS_STAT_ATIME}, + {"-asince", T_TEST, parse_since, BFS_STAT_ATIME}, + {"-atime", T_TEST, parse_time, BFS_STAT_ATIME}, + {"-capable", T_TEST, parse_capable}, + {"-cmin", T_TEST, parse_min, BFS_STAT_CTIME}, + {"-cnewer", T_TEST, parse_newer, BFS_STAT_CTIME}, + {"-color", T_OPTION, parse_color, true}, + {"-context", T_TEST, parse_context, true}, + {"-csince", T_TEST, parse_since, BFS_STAT_CTIME}, + {"-ctime", T_TEST, parse_time, BFS_STAT_CTIME}, + {"-d", T_FLAG, parse_depth}, + {"-daystart", T_OPTION, parse_daystart}, + {"-delete", T_ACTION, parse_delete}, + {"-depth", T_OPTION, parse_depth_n}, + {"-empty", T_TEST, parse_empty}, + {"-exclude", T_OPERATOR}, + {"-exec", T_ACTION, parse_exec, 0}, + {"-execdir", T_ACTION, parse_exec, BFS_EXEC_CHDIR}, + {"-executable", T_TEST, parse_access, X_OK}, + {"-exit", T_ACTION, parse_exit}, + {"-f", T_FLAG, parse_f}, + {"-false", T_TEST, parse_const, false}, + {"-files0-from", T_OPTION, parse_files0_from}, + {"-flags", T_TEST, parse_flags}, + {"-fls", T_ACTION, parse_fls}, + {"-follow", T_OPTION, parse_follow, BFTW_FOLLOW_ALL, true}, + {"-fprint", T_ACTION, parse_fprint}, + {"-fprint0", T_ACTION, parse_fprint0}, + {"-fprintf", T_ACTION, parse_fprintf}, + {"-fstype", T_TEST, parse_fstype}, + {"-gid", T_TEST, parse_group}, + {"-group", T_TEST, parse_group}, + {"-help", T_ACTION, parse_help}, + {"-hidden", T_TEST, parse_hidden}, + {"-ignore_readdir_race", T_OPTION, parse_ignore_races, true}, + {"-ilname", T_TEST, parse_lname, true}, + {"-iname", T_TEST, parse_name, true}, + {"-inum", T_TEST, parse_inum}, + {"-ipath", T_TEST, parse_path, true}, + {"-iregex", T_TEST, parse_regex, BFS_REGEX_ICASE}, + {"-iwholename", T_TEST, parse_path, true}, + {"-j", T_FLAG, parse_jobs, 0, 0, true}, + {"-limit", T_ACTION, parse_limit}, + {"-links", T_TEST, parse_links}, + {"-lname", T_TEST, parse_lname, false}, + {"-ls", T_ACTION, parse_ls}, + {"-maxdepth", T_OPTION, parse_depth_limit, false}, + {"-mindepth", T_OPTION, parse_depth_limit, true}, + {"-mmin", T_TEST, parse_min, BFS_STAT_MTIME}, + {"-mnewer", T_TEST, parse_newer, BFS_STAT_MTIME}, + {"-mount", T_OPTION, parse_mount}, + {"-msince", T_TEST, parse_since, BFS_STAT_MTIME}, + {"-mtime", T_TEST, parse_time, BFS_STAT_MTIME}, + {"-name", T_TEST, parse_name, false}, + {"-newer", T_TEST, parse_newer, BFS_STAT_MTIME}, + {"-newer", T_TEST, parse_newerxy, 0, 0, true}, + {"-nocolor", T_OPTION, parse_color, false}, + {"-nogroup", T_TEST, parse_nogroup}, + {"-nohidden", T_TEST, parse_nohidden}, + {"-noignore_readdir_race", T_OPTION, parse_ignore_races, false}, + {"-noleaf", T_OPTION, parse_noleaf}, + {"-not", T_OPERATOR}, + {"-nouser", T_TEST, parse_nouser}, + {"-nowarn", T_OPTION, parse_warn, false}, + {"-o", T_OPERATOR}, + {"-ok", T_ACTION, parse_exec, BFS_EXEC_CONFIRM}, + {"-okdir", T_ACTION, parse_exec, BFS_EXEC_CONFIRM | BFS_EXEC_CHDIR}, + {"-or", T_OPERATOR}, + {"-path", T_TEST, parse_path, false}, + {"-perm", T_TEST, parse_perm}, + {"-print", T_ACTION, parse_print}, + {"-print0", T_ACTION, parse_print0}, + {"-printf", T_ACTION, parse_printf}, + {"-printx", T_ACTION, parse_printx}, + {"-prune", T_ACTION, parse_prune}, + {"-quit", T_ACTION, parse_quit}, + {"-readable", T_TEST, parse_access, R_OK}, + {"-regex", T_TEST, parse_regex, 0}, + {"-regextype", T_OPTION, parse_regextype}, + {"-rm", T_ACTION, parse_delete}, + {"-s", T_FLAG, parse_s}, + {"-samefile", T_TEST, parse_samefile}, + {"-since", T_TEST, parse_since, BFS_STAT_MTIME}, + {"-size", T_TEST, parse_size}, + {"-sparse", T_TEST, parse_sparse}, + {"-status", T_OPTION, parse_status}, + {"-true", T_TEST, parse_const, true}, + {"-type", T_TEST, parse_type, false}, + {"-uid", T_TEST, parse_user}, + {"-unique", T_OPTION, parse_unique}, + {"-used", T_TEST, parse_used}, + {"-user", T_TEST, parse_user}, + {"-version", T_ACTION, parse_version}, + {"-warn", T_OPTION, parse_warn, true}, + {"-wholename", T_TEST, parse_path, false}, + {"-writable", T_TEST, parse_access, W_OK}, + {"-x", T_FLAG, parse_xdev}, + {"-xattr", T_TEST, parse_xattr}, + {"-xattrname", T_TEST, parse_xattrname}, + {"-xdev", T_OPTION, parse_xdev}, + {"-xtype", T_TEST, parse_type, true}, + {0}, +}; + +/** Look up an argument in the parse table. */ +static const struct table_entry *table_lookup(const char *arg) { + for (const struct table_entry *entry = parse_table; entry->arg; ++entry) { + bool match; + if (entry->prefix) { + match = strncmp(arg, entry->arg, strlen(entry->arg)) == 0; + } else { + match = strcmp(arg, entry->arg) == 0; + } + if (match) { + return entry; + } + } + + return NULL; +} + +/** Search for a fuzzy match in the parse table. */ +static const struct table_entry *table_lookup_fuzzy(const char *arg) { + const struct table_entry *best = NULL; + int best_dist = INT_MAX; + + for (const struct table_entry *entry = parse_table; entry->arg; ++entry) { + int dist = typo_distance(arg, entry->arg); + if (!best || dist < best_dist) { + best = entry; + best_dist = dist; + } + } + + return best; +} + +/** + * PRIMARY : OPTION + * | TEST + * | ACTION + */ +static struct bfs_expr *parse_primary(struct bfs_parser *parser) { + // Paths are already skipped at this point + const char *arg = parser->argv[0]; + + if (arg[0] != '-') { + goto unexpected; + } + + const struct table_entry *match = table_lookup(arg); + if (match) { + if (match->parse) { + goto matched; + } else { + goto unexpected; + } + } + + match = table_lookup_fuzzy(arg); + + CFILE *cerr = parser->ctx->cerr; + parse_error(parser, "Unknown argument; did you mean "); + switch (match->type) { + case T_FLAG: + cfprintf(cerr, "${cyn}%s${rs}?", match->arg); + break; + case T_OPERATOR: + cfprintf(cerr, "${red}%s${rs}?", match->arg); + break; + default: + cfprintf(cerr, "${blu}%s${rs}?", match->arg); + break; + } + + if (!parser->interactive || !match->parse) { + fprintf(stderr, "\n"); + goto unmatched; + } + + fprintf(stderr, " "); + if (ynprompt() <= 0) { + goto unmatched; + } + + fprintf(stderr, "\n"); + parser->argv[0] = match->arg; + +matched: + return match->parse(parser, match->arg1, match->arg2); + +unmatched: + return NULL; + +unexpected: + parse_error(parser, "Expected a predicate.\n"); + return NULL; +} + +/** + * FACTOR : "(" EXPR ")" + * | "!" FACTOR | "-not" FACTOR + * | "-exclude" FACTOR + * | PRIMARY + */ +static struct bfs_expr *parse_factor(struct bfs_parser *parser) { + if (skip_paths(parser) != 0) { + return NULL; + } + + const char *arg = parser->argv[0]; + if (!arg) { + parse_argv_error(parser, parser->last_arg, 1, "Expression terminated prematurely here.\n"); + return NULL; + } + + if (strcmp(arg, "(") == 0) { + parser_advance(parser, T_OPERATOR, 1); + + struct bfs_expr *expr = parse_expr(parser); + if (!expr) { + return NULL; + } + + if (skip_paths(parser) != 0) { + return NULL; + } + + arg = parser->argv[0]; + if (!arg || strcmp(arg, ")") != 0) { + parse_argv_error(parser, parser->last_arg, 1, "Expected a ${red})${rs}.\n"); + return NULL; + } + + parser_advance(parser, T_OPERATOR, 1); + return expr; + } else if (strcmp(arg, "-exclude") == 0) { + if (parser->excluding) { + parse_error(parser, "${err}%s${rs} is not supported within ${red}-exclude${rs}.\n", arg); + return NULL; + } + + char **argv = parser_advance(parser, T_OPERATOR, 1); + parser->excluding = true; + + struct bfs_expr *factor = parse_factor(parser); + if (!factor) { + return NULL; + } + + parser->excluding = false; + + bfs_expr_append(parser->ctx->exclude, factor); + return parse_new_expr(parser, eval_true, parser->argv - argv, argv); + } else if (strcmp(arg, "!") == 0 || strcmp(arg, "-not") == 0) { + char **argv = parser_advance(parser, T_OPERATOR, 1); + + struct bfs_expr *factor = parse_factor(parser); + if (!factor) { + return NULL; + } + + return new_unary_expr(parser, eval_not, factor, argv); + } else { + return parse_primary(parser); + } +} + +/** + * TERM : FACTOR + * | TERM FACTOR + * | TERM "-a" FACTOR + * | TERM "-and" FACTOR + */ +static struct bfs_expr *parse_term(struct bfs_parser *parser) { + struct bfs_expr *term = parse_factor(parser); + + while (term) { + if (skip_paths(parser) != 0) { + return NULL; + } + + const char *arg = parser->argv[0]; + if (!arg) { + break; + } + + if (strcmp(arg, "-o") == 0 || strcmp(arg, "-or") == 0 + || strcmp(arg, ",") == 0 + || strcmp(arg, ")") == 0) { + break; + } + + char **argv = &fake_and_arg; + if (strcmp(arg, "-a") == 0 || strcmp(arg, "-and") == 0) { + argv = parser_advance(parser, T_OPERATOR, 1); + } + + struct bfs_expr *lhs = term; + struct bfs_expr *rhs = parse_factor(parser); + if (!rhs) { + return NULL; + } + + term = new_binary_expr(parser, eval_and, lhs, rhs, argv); + } + + return term; +} + +/** + * CLAUSE : TERM + * | CLAUSE "-o" TERM + * | CLAUSE "-or" TERM + */ +static struct bfs_expr *parse_clause(struct bfs_parser *parser) { + struct bfs_expr *clause = parse_term(parser); + + while (clause) { + if (skip_paths(parser) != 0) { + return NULL; + } + + const char *arg = parser->argv[0]; + if (!arg) { + break; + } + + if (strcmp(arg, "-o") != 0 && strcmp(arg, "-or") != 0) { + break; + } + + char **argv = parser_advance(parser, T_OPERATOR, 1); + + struct bfs_expr *lhs = clause; + struct bfs_expr *rhs = parse_term(parser); + if (!rhs) { + return NULL; + } + + clause = new_binary_expr(parser, eval_or, lhs, rhs, argv); + } + + return clause; +} + +/** + * EXPR : CLAUSE + * | EXPR "," CLAUSE + */ +static struct bfs_expr *parse_expr(struct bfs_parser *parser) { + struct bfs_expr *expr = parse_clause(parser); + + while (expr) { + if (skip_paths(parser) != 0) { + return NULL; + } + + const char *arg = parser->argv[0]; + if (!arg) { + break; + } + + if (strcmp(arg, ",") != 0) { + break; + } + + char **argv = parser_advance(parser, T_OPERATOR, 1); + + struct bfs_expr *lhs = expr; + struct bfs_expr *rhs = parse_clause(parser); + if (!rhs) { + return NULL; + } + + expr = new_binary_expr(parser, eval_comma, lhs, rhs, argv); + } + + return expr; +} + +/** + * Parse the top-level expression. + */ +static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) { + if (skip_paths(parser) != 0) { + return NULL; + } + + struct bfs_expr *expr; + if (parser->argv[0]) { + expr = parse_expr(parser); + } else { + expr = parse_new_expr(parser, eval_true, 1, &fake_true_arg); + } + if (!expr) { + return NULL; + } + + if (parser->argv[0]) { + parse_error(parser, "Unexpected argument.\n"); + return NULL; + } + + if (parser->implicit_print) { + char **limit = parser->limit_arg; + if (limit) { + parse_argv_error(parser, parser->limit_arg, 2, + "With ${blu}%s${rs}, you must specify an action explicitly; for example, ${blu}-print${rs} ${blu}%s${rs} ${bld}%s${rs}.\n", + limit[0], limit[0], limit[1]); + return NULL; + } + + struct bfs_expr *print = parse_new_expr(parser, eval_fprint, 1, &fake_print_arg); + if (!print) { + return NULL; + } + init_print_expr(parser, print); + + expr = new_binary_expr(parser, eval_and, expr, print, &fake_and_arg); + if (!expr) { + return NULL; + } + } + + if (parser->mount_arg && parser->xdev_arg) { + parse_conflict_warning(parser, parser->mount_arg, 1, parser->xdev_arg, 1, + "${blu}%s${rs} is redundant in the presence of ${blu}%s${rs}.\n\n", + parser->xdev_arg[0], parser->mount_arg[0]); + } + + if (parser->ctx->warn && parser->depth_arg && parser->prune_arg) { + parse_conflict_warning(parser, parser->depth_arg, 1, parser->prune_arg, 1, + "${blu}%s${rs} does not work in the presence of ${blu}%s${rs}.\n", + parser->prune_arg[0], parser->depth_arg[0]); + + if (parser->interactive) { + bfs_warning(parser->ctx, "Do you want to continue? "); + if (ynprompt() == 0) { + return NULL; + } + } + + fprintf(stderr, "\n"); + } + + if (parser->ok_expr && parser->files0_stdin_arg) { + parse_conflict_error(parser, parser->ok_expr->argv, parser->ok_expr->argc, parser->files0_stdin_arg, 2, + "${blu}%s${rs} conflicts with ${blu}%s${rs} ${bld}%s${rs}.\n", + parser->ok_expr->argv[0], parser->files0_stdin_arg[0], parser->files0_stdin_arg[1]); + return NULL; + } + + return expr; +} + +static const char *bftw_strategy_name(enum bftw_strategy strategy) { + switch (strategy) { + case BFTW_BFS: + return "bfs"; + case BFTW_DFS: + return "dfs"; + case BFTW_IDS: + return "ids"; + case BFTW_EDS: + return "eds"; + } + + bfs_bug("Invalid strategy"); + return "???"; +} + +static void dump_expr_multiline(const struct bfs_ctx *ctx, enum debug_flags flag, const struct bfs_expr *expr, int indent, int rparens) { + bfs_debug_prefix(ctx, flag); + + for (int i = 0; i < indent; ++i) { + cfprintf(ctx->cerr, " "); + } + + bool close = true; + + if (bfs_expr_is_parent(expr)) { + if (SLIST_EMPTY(&expr->children)) { + cfprintf(ctx->cerr, "(${red}%s${rs}", expr->argv[0]); + ++rparens; + } else { + cfprintf(ctx->cerr, "(${red}%s${rs}\n", expr->argv[0]); + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) { + int parens = child->next ? 0 : rparens + 1; + dump_expr_multiline(ctx, flag, child, indent + 1, parens); + } + close = false; + } + } else { + if (flag == DEBUG_RATES) { + cfprintf(ctx->cerr, "%pE", expr); + } else { + cfprintf(ctx->cerr, "%pe", expr); + } + } + + if (close) { + for (int i = 0; i < rparens; ++i) { + cfprintf(ctx->cerr, ")"); + } + cfprintf(ctx->cerr, "\n"); + } +} + +void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag) { + if (!bfs_debug_prefix(ctx, flag)) { + return; + } + + CFILE *cerr = ctx->cerr; + + cfprintf(cerr, "${ex}%s${rs}", ctx->argv[0]); + + if (ctx->flags & BFTW_FOLLOW_ALL) { + cfprintf(cerr, " ${cyn}-L${rs}"); + } else if (ctx->flags & BFTW_FOLLOW_ROOTS) { + cfprintf(cerr, " ${cyn}-H${rs}"); + } else { + cfprintf(cerr, " ${cyn}-P${rs}"); + } + + if (ctx->xargs_safe) { + cfprintf(cerr, " ${cyn}-X${rs}"); + } + + if (ctx->flags & BFTW_SORT) { + cfprintf(cerr, " ${cyn}-s${rs}"); + } + + cfprintf(cerr, " ${cyn}-j${bld}%d${rs}", ctx->threads); + + if (ctx->optlevel != 3) { + cfprintf(cerr, " ${cyn}-O${bld}%d${rs}", ctx->optlevel); + } + + cfprintf(cerr, " ${cyn}-S${rs} ${bld}%s${rs}", bftw_strategy_name(ctx->strategy)); + + enum debug_flags debug = ctx->debug; + if (debug == DEBUG_ALL) { + cfprintf(cerr, " ${cyn}-D${rs} ${bld}all${rs}"); + } else if (debug) { + cfprintf(cerr, " ${cyn}-D${rs} "); + for (enum debug_flags i = 1; DEBUG_ALL & i; i <<= 1) { + if (debug & i) { + cfprintf(cerr, "${bld}%s${rs}", debug_flag_name(i)); + debug ^= i; + if (debug) { + cfprintf(cerr, ","); + } + } + } + } + + for (size_t i = 0; i < ctx->npaths; ++i) { + const char *path = ctx->paths[i]; + char c = path[0]; + if (c == '-' || c == '(' || c == ')' || c == '!' || c == ',') { + cfprintf(cerr, " ${cyn}-f${rs}"); + } + cfprintf(cerr, " ${mag}%pq${rs}", path); + } + + if (ctx->cout->colors) { + cfprintf(cerr, " ${blu}-color${rs}"); + } else { + cfprintf(cerr, " ${blu}-nocolor${rs}"); + } + if (ctx->flags & BFTW_POST_ORDER) { + cfprintf(cerr, " ${blu}-depth${rs}"); + } + if (ctx->ignore_races) { + cfprintf(cerr, " ${blu}-ignore_readdir_race${rs}"); + } + if (ctx->mindepth != 0) { + cfprintf(cerr, " ${blu}-mindepth${rs} ${bld}%d${rs}", ctx->mindepth); + } + if (ctx->maxdepth != INT_MAX) { + cfprintf(cerr, " ${blu}-maxdepth${rs} ${bld}%d${rs}", ctx->maxdepth); + } + if (ctx->flags & BFTW_SKIP_MOUNTS) { + cfprintf(cerr, " ${blu}-mount${rs}"); + } + if (ctx->status) { + cfprintf(cerr, " ${blu}-status${rs}"); + } + if (ctx->unique) { + cfprintf(cerr, " ${blu}-unique${rs}"); + } + if ((ctx->flags & (BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS)) == BFTW_PRUNE_MOUNTS) { + cfprintf(cerr, " ${blu}-xdev${rs}"); + } + + fputs("\n", stderr); + + bfs_debug(ctx, flag, "(${red}-exclude${rs}\n"); + dump_expr_multiline(ctx, flag, ctx->exclude, 1, 1); + + dump_expr_multiline(ctx, flag, ctx->expr, 0, 0); +} + +/** + * Dump the estimated costs. + */ +static void dump_costs(const struct bfs_ctx *ctx) { + const struct bfs_expr *expr = ctx->expr; + bfs_debug(ctx, DEBUG_COST, " Cost: ~${ylw}%g${rs}\n", expr->cost); + bfs_debug(ctx, DEBUG_COST, "Probability: ~${ylw}%g%%${rs}\n", 100.0 * expr->probability); +} + +struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) { + struct bfs_ctx *ctx = bfs_ctx_new(); + if (!ctx) { + perror("bfs_ctx_new()"); + goto fail; + } + + static char *default_argv[] = {BFS_COMMAND, NULL}; + if (argc < 1) { + argc = 1; + argv = default_argv; + } + + ctx->argc = argc; + ctx->argv = xmemdup(argv, sizeof_array(char *, argc + 1)); + if (!ctx->argv) { + perror("xmemdup()"); + goto fail; + } + + enum use_color use_color = COLOR_AUTO; + const char *no_color = getenv("NO_COLOR"); + if (no_color && *no_color) { + // https://no-color.org/ + use_color = COLOR_NEVER; + } + + ctx->colors = parse_colors(); + if (!ctx->colors) { + ctx->colors_error = errno; + } + + ctx->cerr = cfwrap(stderr, use_color ? ctx->colors : NULL, false); + if (!ctx->cerr) { + perror("cfwrap()"); + goto fail; + } + + ctx->cout = cfwrap(stdout, use_color ? ctx->colors : NULL, false); + if (!ctx->cout) { + bfs_perror(ctx, "cfwrap()"); + goto fail; + } + + if (!bfs_ctx_dedup(ctx, ctx->cout, NULL) || !bfs_ctx_dedup(ctx, ctx->cerr, NULL)) { + bfs_perror(ctx, "bfs_ctx_dedup()"); + goto fail; + } + + bool stdin_tty = isatty(STDIN_FILENO); + bool stdout_tty = isatty(STDOUT_FILENO); + bool stderr_tty = isatty(STDERR_FILENO); + + if (getenv("POSIXLY_CORRECT")) { + ctx->posixly_correct = true; + } else { + ctx->warn = stdin_tty; + } + + struct bfs_parser parser = { + .ctx = ctx, + .argv = ctx->argv + 1, + .command = ctx->argv[0], + .regex_type = BFS_REGEX_POSIX_BASIC, + .stdout_tty = stdout_tty, + .interactive = stdin_tty && stderr_tty, + .use_color = use_color, + .implicit_print = true, + .implicit_root = true, + .just_info = false, + .excluding = false, + .last_arg = NULL, + .depth_arg = NULL, + .prune_arg = NULL, + .mount_arg = NULL, + .xdev_arg = NULL, + .files0_stdin_arg = NULL, + .ok_expr = NULL, + .now = ctx->now, + }; + + ctx->exclude = parse_new_expr(&parser, eval_or, 1, &fake_or_arg); + if (!ctx->exclude) { + goto fail; + } + + ctx->expr = parse_whole_expr(&parser); + if (!ctx->expr) { + if (parser.just_info) { + goto done; + } else { + goto fail; + } + } + + if (parser.use_color == COLOR_AUTO && !ctx->colors) { + bfs_warning(ctx, "Error parsing $$LS_COLORS: %s.\n\n", xstrerror(ctx->colors_error)); + } + + if (bfs_optimize(ctx) != 0) { + bfs_perror(ctx, "bfs_optimize()"); + goto fail; + } + + if (ctx->npaths == 0 && parser.implicit_root) { + if (parse_root(&parser, ".") != 0) { + goto fail; + } + } + + if ((ctx->flags & BFTW_FOLLOW_ALL) && !ctx->unique) { + // We need bftw() to detect cycles unless -unique does it for us + ctx->flags |= BFTW_DETECT_CYCLES; + } + + bfs_ctx_dump(ctx, DEBUG_TREE); + dump_costs(ctx); + +done: + return ctx; + +fail: + bfs_ctx_free(ctx); + return NULL; +} diff --git a/src/parse.h b/src/parse.h new file mode 100644 index 0000000..6895c9f --- /dev/null +++ b/src/parse.h @@ -0,0 +1,23 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * bfs command line parsing. + */ + +#ifndef BFS_PARSE_H +#define BFS_PARSE_H + +/** + * Parse the command line. + * + * @param argc + * The number of arguments. + * @param argv + * The arguments to parse. + * @return + * A new bfs context, or NULL on failure. + */ +struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]); + +#endif // BFS_PARSE_H diff --git a/src/prelude.h b/src/prelude.h new file mode 100644 index 0000000..72f88b0 --- /dev/null +++ b/src/prelude.h @@ -0,0 +1,370 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Configuration and feature/platform detection. + */ + +#ifndef BFS_PRELUDE_H +#define BFS_PRELUDE_H + +// Possible __STDC_VERSION__ values + +#define C95 199409L +#define C99 199901L +#define C11 201112L +#define C17 201710L +#define C23 202311L + +#include <stddef.h> + +#if __STDC_VERSION__ < C23 +# include <stdalign.h> +# include <stdbool.h> +# include <stdnoreturn.h> +#endif + +// bfs packaging configuration + +#include "config.h" + +#ifndef BFS_COMMAND +# define BFS_COMMAND "bfs" +#endif +#ifndef BFS_HOMEPAGE +# define BFS_HOMEPAGE "https://tavianator.com/projects/bfs.html" +#endif + +// This is a symbol instead of a literal so we don't have to rebuild everything +// when the version number changes +extern const char bfs_version[]; + +// Check for system headers + +#ifdef __has_include + +#if __has_include(<mntent.h>) +# define BFS_HAS_MNTENT_H true +#endif +#if __has_include(<paths.h>) +# define BFS_HAS_PATHS_H true +#endif +#if __has_include(<sys/extattr.h>) +# define BFS_HAS_SYS_EXTATTR_H true +#endif +#if __has_include(<sys/mkdev.h>) +# define BFS_HAS_SYS_MKDEV_H true +#endif +#if __has_include(<sys/param.h>) +# define BFS_HAS_SYS_PARAM_H true +#endif +#if __has_include(<sys/sysmacros.h>) +# define BFS_HAS_SYS_SYSMACROS_H true +#endif +#if __has_include(<sys/xattr.h>) +# define BFS_HAS_SYS_XATTR_H true +#endif +#if __has_include(<threads.h>) +# define BFS_HAS_THREADS_H true +#endif +#if __has_include(<util.h>) +# define BFS_HAS_UTIL_H true +#endif + +#else // !__has_include + +#define BFS_HAS_MNTENT_H __GLIBC__ +#define BFS_HAS_PATHS_H true +#define BFS_HAS_SYS_EXTATTR_H __FreeBSD__ +#define BFS_HAS_SYS_MKDEV_H false +#define BFS_HAS_SYS_PARAM_H true +#define BFS_HAS_SYS_SYSMACROS_H __GLIBC__ +#define BFS_HAS_SYS_XATTR_H __linux__ +#define BFS_HAS_THREADS_H (!__STDC_NO_THREADS__) +#define BFS_HAS_UTIL_H __NetBSD__ + +#endif // !__has_include + +#ifndef BFS_USE_MNTENT_H +# define BFS_USE_MNTENT_H BFS_HAS_MNTENT_H +#endif +#ifndef BFS_USE_PATHS_H +# define BFS_USE_PATHS_H BFS_HAS_PATHS_H +#endif +#ifndef BFS_USE_SYS_EXTATTR_H +# define BFS_USE_SYS_EXTATTR_H BFS_HAS_SYS_EXTATTR_H +#endif +#ifndef BFS_USE_SYS_MKDEV_H +# define BFS_USE_SYS_MKDEV_H BFS_HAS_SYS_MKDEV_H +#endif +#ifndef BFS_USE_SYS_PARAM_H +# define BFS_USE_SYS_PARAM_H BFS_HAS_SYS_PARAM_H +#endif +#ifndef BFS_USE_SYS_SYSMACROS_H +# define BFS_USE_SYS_SYSMACROS_H BFS_HAS_SYS_SYSMACROS_H +#endif +#ifndef BFS_USE_SYS_XATTR_H +# define BFS_USE_SYS_XATTR_H BFS_HAS_SYS_XATTR_H +#endif +#ifndef BFS_USE_THREADS_H +# define BFS_USE_THREADS_H BFS_HAS_THREADS_H +#endif +#ifndef BFS_USE_UTIL_H +# define BFS_USE_UTIL_H BFS_HAS_UTIL_H +#endif + +// Stub out feature detection on old/incompatible compilers + +#ifndef __has_feature +# define __has_feature(feat) false +#endif + +#ifndef __has_c_attribute +# define __has_c_attribute(attr) false +#endif + +#ifndef __has_attribute +# define __has_attribute(attr) false +#endif + +// Fundamental utilities + +/** + * Get the length of an array. + */ +#define countof(array) (sizeof(array) / sizeof(0[array])) + +/** + * False sharing/destructive interference/largest cache line size. + */ +#ifdef __GCC_DESTRUCTIVE_SIZE +# define FALSE_SHARING_SIZE __GCC_DESTRUCTIVE_SIZE +#else +# define FALSE_SHARING_SIZE 64 +#endif + +/** + * True sharing/constructive interference/smallest cache line size. + */ +#ifdef __GCC_CONSTRUCTIVE_SIZE +# define TRUE_SHARING_SIZE __GCC_CONSTRUCTIVE_SIZE +#else +# define TRUE_SHARING_SIZE 64 +#endif + +/** + * Polyfill max_align_t if we don't already have it. + */ +#if !BFS_HAS_MAX_ALIGN_T +typedef union { +# ifdef __BIGGEST_ALIGNMENT__ + alignas(__BIGGEST_ALIGNMENT__) char c; +# else + long double ld; + long long ll; + void *ptr; +# endif +} max_align_t; +#endif + +/** + * Alignment specifier that avoids false sharing. + */ +#define cache_align alignas(FALSE_SHARING_SIZE) + +// Wrappers for attributes + +/** + * Silence warnings about switch/case fall-throughs. + */ +#if __has_attribute(fallthrough) +# define fallthru __attribute__((fallthrough)) +#else +# define fallthru ((void)0) +#endif + +/** + * Silence warnings about unused declarations. + */ +#if __has_attribute(unused) +# define attr_maybe_unused __attribute__((unused)) +#else +# define attr_maybe_unused +#endif + +/** + * Warn if a value is unused. + */ +#if __has_attribute(warn_unused_result) +# define attr_nodiscard __attribute__((warn_unused_result)) +#else +# define attr_nodiscard +#endif + +/** + * Hint to avoid inlining a function. + */ +#if __has_attribute(noinline) +# define attr_noinline __attribute__((noinline)) +#else +# define attr_noinline +#endif + +/** + * Hint that a function is unlikely to be called. + */ +#if __has_attribute(cold) +# define attr_cold attr_noinline __attribute__((cold)) +#else +# define attr_cold attr_noinline +#endif + +/** + * Adds compiler warnings for bad printf()-style function calls, if supported. + */ +#if __has_attribute(format) +# define attr_printf(fmt, args) __attribute__((format(printf, fmt, args))) +#else +# define attr_printf(fmt, args) +#endif + +/** + * Annotates functions that potentially modify and return format strings. + */ +#if __has_attribute(format_arg) +# define attr_format_arg(arg) __attribute__((format_arg(arg))) +#else +# define attr_format_arg(args) +#endif + +/** + * Annotates allocator-like functions. + */ +#if __has_attribute(malloc) +# if __GNUC__ >= 11 && !__OPTIMIZE__ // malloc(deallocator) disables inlining on GCC +# define attr_malloc(...) attr_nodiscard __attribute__((malloc(__VA_ARGS__))) +# else +# define attr_malloc(...) attr_nodiscard __attribute__((malloc)) +# endif +#else +# define attr_malloc(...) attr_nodiscard +#endif + +/** + * Specifies that a function returns allocations with a given alignment. + */ +#if __has_attribute(alloc_align) +# define attr_alloc_align(param) __attribute__((alloc_align(param))) +#else +# define attr_alloc_align(param) +#endif + +/** + * Specifies that a function returns allocations with a given size. + */ +#if __has_attribute(alloc_size) +# define attr_alloc_size(...) __attribute__((alloc_size(__VA_ARGS__))) +#else +# define attr_alloc_size(...) +#endif + +/** + * Shorthand for attr_alloc_align() and attr_alloc_size(). + */ +#define attr_aligned_alloc(align, ...) \ + attr_alloc_align(align) \ + attr_alloc_size(__VA_ARGS__) + +/** + * Check if function multiversioning via GNU indirect functions (ifunc) is supported. + */ +#ifndef BFS_USE_TARGET_CLONES +# if __has_attribute(target_clones) && (__GLIBC__ || __FreeBSD__) +# define BFS_USE_TARGET_CLONES true +# endif +#endif + +/** + * Apply the target_clones attribute, if available. + */ +#if BFS_USE_TARGET_CLONES +# define attr_target_clones(...) __attribute__((target_clones(__VA_ARGS__))) +#else +# define attr_target_clones(...) +#endif + +/** + * Shorthand for multiple attributes at once. attr(a, b(c), d) is equivalent to + * + * attr_a + * attr_b(c) + * attr_d + */ +#define attr(...) \ + attr__(attr_##__VA_ARGS__, none, none, none, none, none, none, none, none, none, ) + +/** + * attr() helper. For exposition, pretend we support only 2 args, instead of 9. + * There are a few cases: + * + * attr() + * => attr__(attr_, none, none) + * => attr_ => + * attr_none => + * attr_too_many_none() => + * + * attr(a) + * => attr__(attr_a, none, none) + * => attr_a => __attribute__((a)) + * attr_none => + * attr_too_many_none() => + * + * attr(a, b(c)) + * => attr__(attr_a, b(c), none, none) + * => attr_a => __attribute__((a)) + * attr_b(c) => __attribute__((b(c))) + * attr_too_many_none(none) => + * + * attr(a, b(c), d) + * => attr__(attr_a, b(c), d, none, none) + * => attr_a => __attribute__((a)) + * attr_b(c) => __attribute__((b(c))) + * attr_too_many_d(none, none) => error + * + * Some attribute names are the same as standard library functions, e.g. printf. + * Standard libraries are permitted to define these functions as macros, like + * + * #define printf(...) __builtin_printf(__VA_ARGS__) + * + * The token paste in + * + * #define attr(...) attr__(attr_##__VA_ARGS__, none, none) + * + * is necessary to prevent macro expansion before evaluating attr__(). + * Otherwise, we could get + * + * attr(printf(1, 2)) + * => attr__(__builtin_printf(1, 2), none, none) + * => attr____builtin_printf(1, 2) + * => error + */ +#define attr__(a1, a2, a3, a4, a5, a6, a7, a8, a9, none, ...) \ + a1 \ + attr_##a2 \ + attr_##a3 \ + attr_##a4 \ + attr_##a5 \ + attr_##a6 \ + attr_##a7 \ + attr_##a8 \ + attr_##a9 \ + attr_too_many_##none(__VA_ARGS__) + +// Ignore `attr_none` from expanding 1-9 argument attr(a1, a2, ...) +#define attr_none +// Ignore `attr_` from expanding 0-argument attr() +#define attr_ +// Only trigger an error on more than 9 arguments +#define attr_too_many_none(...) + +#endif // BFS_PRELUDE_H diff --git a/src/printf.c b/src/printf.c new file mode 100644 index 0000000..f8428f7 --- /dev/null +++ b/src/printf.c @@ -0,0 +1,968 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "printf.h" +#include "alloc.h" +#include "bfstd.h" +#include "bftw.h" +#include "color.h" +#include "ctx.h" +#include "diag.h" +#include "dir.h" +#include "dstring.h" +#include "expr.h" +#include "fsade.h" +#include "mtab.h" +#include "pwcache.h" +#include "stat.h" +#include <errno.h> +#include <grp.h> +#include <pwd.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +struct bfs_fmt; + +/** + * A function implementing a printf directive. + */ +typedef int bfs_printf_fn(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf); + +/** + * A single formatting directive like %f or %#4m. + */ +struct bfs_fmt { + /** The printing function to invoke. */ + bfs_printf_fn *fn; + /** String data associated with this directive. */ + dchar *str; + /** The stat field to print. */ + enum bfs_stat_field stat_field; + /** Character data associated with this directive. */ + char c; + /** Some data used by the directive. */ + void *ptr; +}; + +/** + * An entire format string. + */ +struct bfs_printf { + /** An array of formatting directives. */ + struct bfs_fmt *fmts; + /** The number of directives. */ + size_t nfmts; +}; + +/** Print some text as-is. */ +static int bfs_printf_literal(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + size_t len = dstrlen(fmt->str); + if (fwrite(fmt->str, 1, len, cfile->file) == len) { + return 0; + } else { + return -1; + } +} + +/** \c: flush */ +static int bfs_printf_flush(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + return fflush(cfile->file); +} + +/** Check if we can safely colorize this directive. */ +static bool should_color(CFILE *cfile, const struct bfs_fmt *fmt) { + return cfile->colors && strcmp(fmt->str, "%s") == 0; +} + +/** + * Print a value to a temporary buffer before formatting it. + */ +#define BFS_PRINTF_BUF(buf, format, ...) \ + char buf[256]; \ + int ret = snprintf(buf, sizeof(buf), format, __VA_ARGS__); \ + bfs_assert(ret >= 0 && (size_t)ret < sizeof(buf)); \ + (void)ret + +/** Return a dynamic format string. */ +attr(format_arg(2)) +static const char *dyn_fmt(const char *str, const char *fake) { + bfs_assert(strcmp(str + strlen(str) - strlen(fake) + 1, fake + 1) == 0, + "Mismatched format specifiers: '%s' vs. '%s'", str, fake); + return str; +} + +/** Wrapper for fprintf(). */ +attr(printf(3, 4)) +static int bfs_fprintf(CFILE *cfile, const struct bfs_fmt *fmt, const char *fake, ...) { + va_list args; + va_start(args, fake); + int ret = vfprintf(cfile->file, dyn_fmt(fmt->str, fake), args); + va_end(args); + return ret; +} + +/** %a, %c, %t: ctime() */ +static int bfs_printf_ctime(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + // Not using ctime() itself because GNU find adds nanoseconds + static const char *days[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; + static const char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + const struct timespec *ts = bfs_stat_time(statbuf, fmt->stat_field); + if (!ts) { + return -1; + } + + struct tm tm; + if (!localtime_r(&ts->tv_sec, &tm)) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%s %s %2d %.2d:%.2d:%.2d.%09ld0 %4d", + days[tm.tm_wday], + months[tm.tm_mon], + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)ts->tv_nsec, + 1900 + tm.tm_year); + + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %A, %B/%W, %C, %T: strftime() */ +static int bfs_printf_strftime(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + const struct timespec *ts = bfs_stat_time(statbuf, fmt->stat_field); + if (!ts) { + return -1; + } + + struct tm tm; + if (!localtime_r(&ts->tv_sec, &tm)) { + return -1; + } + + int ret; + char buf[256]; + char format[] = "% "; + switch (fmt->c) { + // Non-POSIX strftime() features + case '@': + ret = snprintf(buf, sizeof(buf), "%lld.%09ld0", (long long)ts->tv_sec, (long)ts->tv_nsec); + break; + case '+': + ret = snprintf(buf, sizeof(buf), "%4d-%.2d-%.2d+%.2d:%.2d:%.2d.%09ld0", + 1900 + tm.tm_year, + tm.tm_mon + 1, + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)ts->tv_nsec); + break; + case 'k': + ret = snprintf(buf, sizeof(buf), "%2d", tm.tm_hour); + break; + case 'l': + ret = snprintf(buf, sizeof(buf), "%2d", (tm.tm_hour + 11) % 12 + 1); + break; + case 's': + ret = snprintf(buf, sizeof(buf), "%lld", (long long)ts->tv_sec); + break; + case 'S': + ret = snprintf(buf, sizeof(buf), "%.2d.%09ld0", tm.tm_sec, (long)ts->tv_nsec); + break; + case 'T': + ret = snprintf(buf, sizeof(buf), "%.2d:%.2d:%.2d.%09ld0", + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)ts->tv_nsec); + break; + + // POSIX strftime() features + default: + format[1] = fmt->c; +#if __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif + ret = strftime(buf, sizeof(buf), format, &tm); +#if __GNUC__ +# pragma GCC diagnostic pop +#endif + break; + } + + bfs_assert(ret >= 0 && (size_t)ret < sizeof(buf)); + (void)ret; + + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %b: blocks */ +static int bfs_printf_b(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + uintmax_t blocks = ((uintmax_t)statbuf->blocks * BFS_STAT_BLKSIZE + 511) / 512; + BFS_PRINTF_BUF(buf, "%ju", blocks); + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %d: depth */ +static int bfs_printf_d(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + return bfs_fprintf(cfile, fmt, "%jd", (intmax_t)ftwbuf->depth); +} + +/** %D: device */ +static int bfs_printf_D(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->dev); + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %f: file name */ +static int bfs_printf_f(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + if (should_color(cfile, fmt)) { + return cfprintf(cfile, "%pF", ftwbuf); + } else { + return bfs_fprintf(cfile, fmt, "%s", ftwbuf->path + ftwbuf->nameoff); + } +} + +/** %F: file system type */ +static int bfs_printf_F(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + const char *type = bfs_fstype(fmt->ptr, statbuf); + if (!type) { + return -1; + } + + return bfs_fprintf(cfile, fmt, "%s", type); +} + +/** %G: gid */ +static int bfs_printf_G(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->gid); + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %g: group name */ +static int bfs_printf_g(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + struct bfs_groups *groups = fmt->ptr; + const struct group *grp = bfs_getgrgid(groups, statbuf->gid); + if (!grp) { + return bfs_printf_G(cfile, fmt, ftwbuf); + } + + return bfs_fprintf(cfile, fmt, "%s", grp->gr_name); +} + +/** %h: leading directories */ +static int bfs_printf_h(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + char *copy = NULL; + const char *buf; + + if (ftwbuf->nameoff > 0) { + size_t len = ftwbuf->nameoff; + if (len > 1) { + --len; + } + + buf = copy = strndup(ftwbuf->path, len); + } else if (ftwbuf->path[0] == '/') { + buf = "/"; + } else { + buf = "."; + } + + if (!buf) { + return -1; + } + + int ret; + if (should_color(cfile, fmt)) { + ret = cfprintf(cfile, "${di}%pQ${rs}", buf); + } else { + ret = bfs_fprintf(cfile, fmt, "%s", buf); + } + + free(copy); + return ret; +} + +/** %H: current root */ +static int bfs_printf_H(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + if (should_color(cfile, fmt)) { + if (ftwbuf->depth == 0) { + return cfprintf(cfile, "%pP", ftwbuf); + } else { + return cfprintf(cfile, "${di}%pQ${rs}", ftwbuf->root); + } + } else { + return bfs_fprintf(cfile, fmt, "%s", ftwbuf->root); + } +} + +/** %i: inode */ +static int bfs_printf_i(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->ino); + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %k: 1K blocks */ +static int bfs_printf_k(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + uintmax_t blocks = ((uintmax_t)statbuf->blocks * BFS_STAT_BLKSIZE + 1023) / 1024; + BFS_PRINTF_BUF(buf, "%ju", blocks); + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %l: link target */ +static int bfs_printf_l(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + char *buf = NULL; + const char *target = ""; + + if (ftwbuf->type == BFS_LNK) { + if (should_color(cfile, fmt)) { + return cfprintf(cfile, "%pL", ftwbuf); + } + + const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW); + size_t len = statbuf ? statbuf->size : 0; + + target = buf = xreadlinkat(ftwbuf->at_fd, ftwbuf->at_path, len); + if (!target) { + return -1; + } + } + + int ret = bfs_fprintf(cfile, fmt, "%s", target); + free(buf); + return ret; +} + +/** %m: mode */ +static int bfs_printf_m(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + return bfs_fprintf(cfile, fmt, "%o", (unsigned int)(statbuf->mode & 07777)); +} + +/** %M: symbolic mode */ +static int bfs_printf_M(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + char buf[11]; + xstrmode(statbuf->mode, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %n: link count */ +static int bfs_printf_n(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->nlink); + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %p: full path */ +static int bfs_printf_p(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + if (should_color(cfile, fmt)) { + return cfprintf(cfile, "%pP", ftwbuf); + } else { + return bfs_fprintf(cfile, fmt, "%s", ftwbuf->path); + } +} + +/** %P: path after root */ +static int bfs_printf_P(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + size_t offset = strlen(ftwbuf->root); + if (ftwbuf->path[offset] == '/') { + ++offset; + } + + if (should_color(cfile, fmt)) { + if (ftwbuf->depth == 0) { + return 0; + } + + struct BFTW copybuf = *ftwbuf; + copybuf.path += offset; + copybuf.nameoff -= offset; + return cfprintf(cfile, "%pP", ©buf); + } else { + return bfs_fprintf(cfile, fmt, "%s", ftwbuf->path + offset); + } +} + +/** %s: size */ +static int bfs_printf_s(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->size); + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %S: sparseness */ +static int bfs_printf_S(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + double sparsity; + if (statbuf->size == 0 && statbuf->blocks == 0) { + sparsity = 1.0; + } else { + sparsity = (double)BFS_STAT_BLKSIZE * statbuf->blocks / statbuf->size; + } + return bfs_fprintf(cfile, fmt, "%g", sparsity); +} + +/** %U: uid */ +static int bfs_printf_U(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->uid); + return bfs_fprintf(cfile, fmt, "%s", buf); +} + +/** %u: user name */ +static int bfs_printf_u(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (!statbuf) { + return -1; + } + + struct bfs_users *users = fmt->ptr; + const struct passwd *pwd = bfs_getpwuid(users, statbuf->uid); + if (!pwd) { + return bfs_printf_U(cfile, fmt, ftwbuf); + } + + return bfs_fprintf(cfile, fmt, "%s", pwd->pw_name); +} + +static const char *bfs_printf_type(enum bfs_type type) { + switch (type) { + case BFS_BLK: + return "b"; + case BFS_CHR: + return "c"; + case BFS_DIR: + return "d"; + case BFS_DOOR: + return "D"; + case BFS_FIFO: + return "p"; + case BFS_LNK: + return "l"; + case BFS_PORT: + return "P"; + case BFS_REG: + return "f"; + case BFS_SOCK: + return "s"; + case BFS_WHT: + return "w"; + default: + return "U"; + } +} + +/** %y: type */ +static int bfs_printf_y(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + const char *type = bfs_printf_type(ftwbuf->type); + return bfs_fprintf(cfile, fmt, "%s", type); +} + +/** %Y: target type */ +static int bfs_printf_Y(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + enum bfs_type type = bftw_type(ftwbuf, BFS_STAT_FOLLOW); + const char *str; + + int error = 0; + if (type == BFS_ERROR) { + if (errno_is_like(ELOOP)) { + str = "L"; + } else if (errno_is_like(ENOENT)) { + str = "N"; + } else { + str = "?"; + error = errno; + } + } else { + str = bfs_printf_type(type); + } + + int ret = bfs_fprintf(cfile, fmt, "%s", str); + if (error != 0) { + ret = -1; + errno = error; + } + return ret; +} + +/** %Z: SELinux context */ +attr(maybe_unused) +static int bfs_printf_Z(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + char *con = bfs_getfilecon(ftwbuf); + if (!con) { + return -1; + } + + int ret = bfs_fprintf(cfile, fmt, "%s", con); + bfs_freecon(con); + return ret; +} + +/** + * Append a literal string to the chain. + */ +static int append_literal(const struct bfs_ctx *ctx, struct bfs_printf *format, dchar **literal) { + if (dstrlen(*literal) == 0) { + return 0; + } + + struct bfs_fmt *fmt = RESERVE(struct bfs_fmt, &format->fmts, &format->nfmts); + if (!fmt) { + bfs_perror(ctx, "RESERVE()"); + return -1; + } + + fmt->fn = bfs_printf_literal; + fmt->str = *literal; + + *literal = dstralloc(0); + if (!*literal) { + bfs_perror(ctx, "dstralloc()"); + return -1; + } + + return 0; +} + +/** + * Append a printf directive to the chain. + */ +static int append_directive(const struct bfs_ctx *ctx, struct bfs_printf *format, dchar **literal, struct bfs_fmt *fmt) { + if (append_literal(ctx, format, literal) != 0) { + return -1; + } + + struct bfs_fmt *dest = RESERVE(struct bfs_fmt, &format->fmts, &format->nfmts); + if (!dest) { + bfs_perror(ctx, "RESERVE()"); + return -1; + } + + *dest = *fmt; + return 0; +} + +int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const char *format) { + expr->printf = ZALLOC(struct bfs_printf); + if (!expr->printf) { + bfs_perror(ctx, "zalloc()"); + return -1; + } + + dchar *literal = dstralloc(0); + if (!literal) { + bfs_perror(ctx, "dstralloc()"); + goto error; + } + + for (const char *i = format; *i; ++i) { + char c = *i; + + if (c == '\\') { + c = *++i; + + if (c >= '0' && c < '8') { + c = 0; + for (int j = 0; j < 3 && *i >= '0' && *i < '8'; ++i, ++j) { + c *= 8; + c += *i - '0'; + } + --i; + goto one_char; + } + + switch (c) { + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case '\\': c = '\\'; break; + + case 'c': + { + struct bfs_fmt fmt = { + .fn = bfs_printf_flush, + }; + if (append_directive(ctx, expr->printf, &literal, &fmt) != 0) { + goto error; + } + goto done; + } + + case '\0': + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Incomplete escape sequence '\\'.\n"); + goto error; + + default: + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Unrecognized escape sequence '\\%c'.\n", c); + goto error; + } + } else if (c == '%') { + if (i[1] == '%') { + c = *++i; + goto one_char; + } + + struct bfs_fmt fmt = { + .str = dstralloc(2), + }; + if (!fmt.str) { + goto fmt_error; + } + if (dstrapp(&fmt.str, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto fmt_error; + } + + const char *specifier = "s"; + + // Parse any flags + bool must_be_numeric = false; + while (true) { + c = *++i; + + switch (c) { + case '#': + case '0': + case '+': + must_be_numeric = true; + fallthru; + case ' ': + case '-': + if (strchr(fmt.str, c)) { + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Duplicate flag '%c'.\n", c); + goto fmt_error; + } + if (dstrapp(&fmt.str, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto fmt_error; + } + continue; + } + + break; + } + + // Parse the field width + while (c >= '0' && c <= '9') { + if (dstrapp(&fmt.str, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto fmt_error; + } + c = *++i; + } + + // Parse the precision + if (c == '.') { + do { + if (dstrapp(&fmt.str, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto fmt_error; + } + c = *++i; + } while (c >= '0' && c <= '9'); + } + + switch (c) { + case 'a': + fmt.fn = bfs_printf_ctime; + fmt.stat_field = BFS_STAT_ATIME; + break; + case 'b': + fmt.fn = bfs_printf_b; + break; + case 'c': + fmt.fn = bfs_printf_ctime; + fmt.stat_field = BFS_STAT_CTIME; + break; + case 'd': + fmt.fn = bfs_printf_d; + specifier = "jd"; + break; + case 'D': + fmt.fn = bfs_printf_D; + break; + case 'f': + fmt.fn = bfs_printf_f; + break; + case 'F': + fmt.fn = bfs_printf_F; + fmt.ptr = (void *)bfs_ctx_mtab(ctx); + if (!fmt.ptr) { + int error = errno; + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Couldn't parse the mount table: %s.\n", xstrerror(error)); + goto fmt_error; + } + break; + case 'g': + fmt.fn = bfs_printf_g; + fmt.ptr = ctx->groups; + break; + case 'G': + fmt.fn = bfs_printf_G; + break; + case 'h': + fmt.fn = bfs_printf_h; + break; + case 'H': + fmt.fn = bfs_printf_H; + break; + case 'i': + fmt.fn = bfs_printf_i; + break; + case 'k': + fmt.fn = bfs_printf_k; + break; + case 'l': + fmt.fn = bfs_printf_l; + break; + case 'm': + fmt.fn = bfs_printf_m; + specifier = "o"; + break; + case 'M': + fmt.fn = bfs_printf_M; + break; + case 'n': + fmt.fn = bfs_printf_n; + break; + case 'p': + fmt.fn = bfs_printf_p; + break; + case 'P': + fmt.fn = bfs_printf_P; + break; + case 's': + fmt.fn = bfs_printf_s; + break; + case 'S': + fmt.fn = bfs_printf_S; + specifier = "g"; + break; + case 't': + fmt.fn = bfs_printf_ctime; + fmt.stat_field = BFS_STAT_MTIME; + break; + case 'u': + fmt.fn = bfs_printf_u; + fmt.ptr = ctx->users; + break; + case 'U': + fmt.fn = bfs_printf_U; + break; + case 'w': + fmt.fn = bfs_printf_ctime; + fmt.stat_field = BFS_STAT_BTIME; + break; + case 'y': + fmt.fn = bfs_printf_y; + break; + case 'Y': + fmt.fn = bfs_printf_Y; + break; + case 'Z': +#if BFS_CAN_CHECK_CONTEXT + fmt.fn = bfs_printf_Z; + break; +#else + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Missing platform support for '%%%c'.\n", c); + goto fmt_error; +#endif + + case 'A': + fmt.stat_field = BFS_STAT_ATIME; + goto fmt_strftime; + case 'B': + case 'W': + fmt.stat_field = BFS_STAT_BTIME; + goto fmt_strftime; + case 'C': + fmt.stat_field = BFS_STAT_CTIME; + goto fmt_strftime; + case 'T': + fmt.stat_field = BFS_STAT_MTIME; + goto fmt_strftime; + + fmt_strftime: + fmt.fn = bfs_printf_strftime; + c = *++i; + if (!c) { + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Incomplete time specifier '%s%c'.\n", fmt.str, i[-1]); + goto fmt_error; + } else if (strchr("%+@aAbBcCdDeFgGhHIjklmMnprRsStTuUVwWxXyYzZ", c)) { + fmt.c = c; + } else { + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Unrecognized time specifier '%%%c%c'.\n", i[-1], c); + goto fmt_error; + } + break; + + case '\0': + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Incomplete format specifier '%s'.\n", fmt.str); + goto fmt_error; + + default: + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Unrecognized format specifier '%%%c'.\n", c); + goto fmt_error; + } + + if (must_be_numeric && strcmp(specifier, "s") == 0) { + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Invalid flags '%s' for string format '%%%c'.\n", fmt.str + 1, c); + goto fmt_error; + } + + if (dstrcat(&fmt.str, specifier) != 0) { + bfs_perror(ctx, "dstrcat()"); + goto fmt_error; + } + + if (append_directive(ctx, expr->printf, &literal, &fmt) != 0) { + goto fmt_error; + } + + continue; + + fmt_error: + dstrfree(fmt.str); + goto error; + } + + one_char: + if (dstrapp(&literal, c) != 0) { + bfs_perror(ctx, "dstrapp()"); + goto error; + } + } + +done: + if (append_literal(ctx, expr->printf, &literal) != 0) { + goto error; + } + dstrfree(literal); + return 0; + +error: + dstrfree(literal); + bfs_printf_free(expr->printf); + expr->printf = NULL; + return -1; +} + +int bfs_printf(CFILE *cfile, const struct bfs_printf *format, const struct BFTW *ftwbuf) { + int ret = 0, error = 0; + + for (size_t i = 0; i < format->nfmts; ++i) { + const struct bfs_fmt *fmt = &format->fmts[i]; + if (fmt->fn(cfile, fmt, ftwbuf) < 0) { + ret = -1; + error = errno; + } + } + + errno = error; + return ret; +} + +void bfs_printf_free(struct bfs_printf *format) { + if (!format) { + return; + } + + for (size_t i = 0; i < format->nfmts; ++i) { + dstrfree(format->fmts[i].str); + } + free(format->fmts); + free(format); +} diff --git a/src/printf.h b/src/printf.h new file mode 100644 index 0000000..2bff087 --- /dev/null +++ b/src/printf.h @@ -0,0 +1,55 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Implementation of -printf/-fprintf. + */ + +#ifndef BFS_PRINTF_H +#define BFS_PRINTF_H + +#include "color.h" + +struct BFTW; +struct bfs_ctx; +struct bfs_expr; + +/** + * A printf command, the result of parsing a single format string. + */ +struct bfs_printf; + +/** + * Parse a -printf format string. + * + * @param ctx + * The bfs context. + * @param expr + * The expression to fill in. + * @param format + * The format string to parse. + * @return + * 0 on success, -1 on failure. + */ +int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const char *format); + +/** + * Evaluate a parsed format string. + * + * @param cfile + * The CFILE to print to. + * @param format + * The parsed printf format. + * @param ftwbuf + * The bftw() data for the current file. + * @return + * 0 on success, -1 on failure. + */ +int bfs_printf(CFILE *cfile, const struct bfs_printf *format, const struct BFTW *ftwbuf); + +/** + * Free a parsed format string. + */ +void bfs_printf_free(struct bfs_printf *format); + +#endif // BFS_PRINTF_H diff --git a/src/pwcache.c b/src/pwcache.c new file mode 100644 index 0000000..af8c237 --- /dev/null +++ b/src/pwcache.c @@ -0,0 +1,220 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "pwcache.h" +#include "alloc.h" +#include "trie.h" +#include <errno.h> +#include <grp.h> +#include <pwd.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/** Represents cache hits for negative results. */ +static void *MISSING = &MISSING; + +/** Callback type for bfs_getent(). */ +typedef void *bfs_getent_fn(const void *key, void *ptr, size_t bufsize); + +/** Shared scaffolding for get{pw,gr}{nam,?id}_r(). */ +static void *bfs_getent(bfs_getent_fn *fn, const void *key, struct trie_leaf *leaf, struct varena *varena) { + if (leaf->value) { + errno = 0; + return leaf->value == MISSING ? NULL : leaf->value; + } + + // _SC_GET{PW,GR}_R_SIZE_MAX tend to be fairly large (~1K). That's okay + // for temporary allocations, but for these long-lived ones, let's start + // with a smaller buffer. + size_t bufsize = 128; + void *ptr = varena_alloc(varena, bufsize); + if (!ptr) { + return NULL; + } + + while (true) { + void *ret = fn(key, ptr, bufsize); + if (ret) { + leaf->value = ret; + return ret; + } else if (errno == 0) { + leaf->value = MISSING; + break; + } else if (errno == ERANGE) { + void *next = varena_grow(varena, ptr, &bufsize); + if (!next) { + break; + } + ptr = next; + } else { + break; + } + } + + varena_free(varena, ptr, bufsize); + return NULL; +} + +/** + * An arena-allocated struct passwd. + */ +struct bfs_passwd { + struct passwd pwd; + char buf[]; +}; + +struct bfs_users { + /** bfs_passwd arena. */ + struct varena varena; + /** A map from usernames to entries. */ + struct trie by_name; + /** A map from UIDs to entries. */ + struct trie by_uid; +}; + +struct bfs_users *bfs_users_new(void) { + struct bfs_users *users = ALLOC(struct bfs_users); + if (!users) { + return NULL; + } + + VARENA_INIT(&users->varena, struct bfs_passwd, buf); + trie_init(&users->by_name); + trie_init(&users->by_uid); + return users; +} + +/** bfs_getent() callback for getpwnam_r(). */ +static void *bfs_getpwnam_impl(const void *key, void *ptr, size_t bufsize) { + struct bfs_passwd *storage = ptr; + + struct passwd *ret = NULL; + errno = getpwnam_r(key, &storage->pwd, storage->buf, bufsize, &ret); + return ret; +} + +const struct passwd *bfs_getpwnam(struct bfs_users *users, const char *name) { + struct trie_leaf *leaf = trie_insert_str(&users->by_name, name); + if (!leaf) { + return NULL; + } + + return bfs_getent(bfs_getpwnam_impl, name, leaf, &users->varena); +} + +/** bfs_getent() callback for getpwuid_r(). */ +static void *bfs_getpwuid_impl(const void *key, void *ptr, size_t bufsize) { + const uid_t *uid = key; + struct bfs_passwd *storage = ptr; + + struct passwd *ret = NULL; + errno = getpwuid_r(*uid, &storage->pwd, storage->buf, bufsize, &ret); + return ret; +} + +const struct passwd *bfs_getpwuid(struct bfs_users *users, uid_t uid) { + struct trie_leaf *leaf = trie_insert_mem(&users->by_uid, &uid, sizeof(uid)); + if (!leaf) { + return NULL; + } + + return bfs_getent(bfs_getpwuid_impl, &uid, leaf, &users->varena); +} + +void bfs_users_flush(struct bfs_users *users) { + trie_clear(&users->by_uid); + trie_clear(&users->by_name); + varena_clear(&users->varena); +} + +void bfs_users_free(struct bfs_users *users) { + if (users) { + trie_destroy(&users->by_uid); + trie_destroy(&users->by_name); + varena_destroy(&users->varena); + free(users); + } +} + +/** + * An arena-allocated struct group. + */ +struct bfs_group { + struct group grp; + char buf[]; +}; + +struct bfs_groups { + /** bfs_group arena. */ + struct varena varena; + /** A map from group names to entries. */ + struct trie by_name; + /** A map from GIDs to entries. */ + struct trie by_gid; +}; + +struct bfs_groups *bfs_groups_new(void) { + struct bfs_groups *groups = ALLOC(struct bfs_groups); + if (!groups) { + return NULL; + } + + VARENA_INIT(&groups->varena, struct bfs_group, buf); + trie_init(&groups->by_name); + trie_init(&groups->by_gid); + return groups; +} + +/** bfs_getent() callback for getgrnam_r(). */ +static void *bfs_getgrnam_impl(const void *key, void *ptr, size_t bufsize) { + struct bfs_group *storage = ptr; + + struct group *ret = NULL; + errno = getgrnam_r(key, &storage->grp, storage->buf, bufsize, &ret); + return ret; +} + +const struct group *bfs_getgrnam(struct bfs_groups *groups, const char *name) { + struct trie_leaf *leaf = trie_insert_str(&groups->by_name, name); + if (!leaf) { + return NULL; + } + + return bfs_getent(bfs_getgrnam_impl, name, leaf, &groups->varena); +} + +/** bfs_getent() callback for getgrgid_r(). */ +static void *bfs_getgrgid_impl(const void *key, void *ptr, size_t bufsize) { + const gid_t *gid = key; + struct bfs_group *storage = ptr; + + struct group *ret = NULL; + errno = getgrgid_r(*gid, &storage->grp, storage->buf, bufsize, &ret); + return ret; +} + +const struct group *bfs_getgrgid(struct bfs_groups *groups, gid_t gid) { + struct trie_leaf *leaf = trie_insert_mem(&groups->by_gid, &gid, sizeof(gid)); + if (!leaf) { + return NULL; + } + + return bfs_getent(bfs_getgrgid_impl, &gid, leaf, &groups->varena); +} + +void bfs_groups_flush(struct bfs_groups *groups) { + trie_clear(&groups->by_gid); + trie_clear(&groups->by_name); + varena_clear(&groups->varena); +} + +void bfs_groups_free(struct bfs_groups *groups) { + if (groups) { + trie_destroy(&groups->by_gid); + trie_destroy(&groups->by_name); + varena_destroy(&groups->varena); + free(groups); + } +} diff --git a/src/pwcache.h b/src/pwcache.h new file mode 100644 index 0000000..b6c0b67 --- /dev/null +++ b/src/pwcache.h @@ -0,0 +1,124 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * A caching wrapper for /etc/{passwd,group}. + */ + +#ifndef BFS_PWCACHE_H +#define BFS_PWCACHE_H + +#include <grp.h> +#include <pwd.h> + +/** + * A user cache. + */ +struct bfs_users; + +/** + * Create a user cache. + * + * @return + * A new user cache, or NULL on failure. + */ +struct bfs_users *bfs_users_new(void); + +/** + * Get a user entry by name. + * + * @param users + * The user cache. + * @param name + * The username to look up. + * @return + * The matching user, or NULL if not found (errno == 0) or an error + * occurred (errno != 0). + */ +const struct passwd *bfs_getpwnam(struct bfs_users *users, const char *name); + +/** + * Get a user entry by ID. + * + * @param users + * The user cache. + * @param uid + * The ID to look up. + * @return + * The matching user, or NULL if not found (errno == 0) or an error + * occurred (errno != 0). + */ +const struct passwd *bfs_getpwuid(struct bfs_users *users, uid_t uid); + +/** + * Flush a user cache. + * + * @param users + * The cache to flush. + */ +void bfs_users_flush(struct bfs_users *users); + +/** + * Free a user cache. + * + * @param users + * The user cache to free. + */ +void bfs_users_free(struct bfs_users *users); + +/** + * A group cache. + */ +struct bfs_groups; + +/** + * Create a group cache. + * + * @return + * A new group cache, or NULL on failure. + */ +struct bfs_groups *bfs_groups_new(void); + +/** + * Get a group entry by name. + * + * @param groups + * The group cache. + * @param name + * The group name to look up. + * @return + * The matching group, or NULL if not found (errno == 0) or an error + * occurred (errno != 0). + */ +const struct group *bfs_getgrnam(struct bfs_groups *groups, const char *name); + +/** + * Get a group entry by ID. + * + * @param groups + * The group cache. + * @param uid + * The ID to look up. + * @return + * The matching group, or NULL if not found (errno == 0) or an error + * occurred (errno != 0). + */ +const struct group *bfs_getgrgid(struct bfs_groups *groups, gid_t gid); + +/** + * Flush a group cache. + * + * @param groups + * The cache to flush. + */ +void bfs_groups_flush(struct bfs_groups *groups); + +/** + * Free a group cache. + * + * @param groups + * The group cache to free. + */ +void bfs_groups_free(struct bfs_groups *groups); + +#endif // BFS_PWCACHE_H diff --git a/src/sanity.h b/src/sanity.h new file mode 100644 index 0000000..e168b8f --- /dev/null +++ b/src/sanity.h @@ -0,0 +1,94 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Sanitizer interface. + */ + +#ifndef BFS_SANITY_H +#define BFS_SANITY_H + +#include "prelude.h" +#include <stddef.h> + +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) +# define SANITIZE_ADDRESS true +#endif + +#if __has_feature(memory_sanitizer) || defined(__SANITIZE_MEMORY__) +# define SANITIZE_MEMORY true +#endif + +#if __has_feature(thread_sanitizer) || defined(__SANITIZE_THREAD__) +# define SANITIZE_THREAD true +#endif + +// Call macro(ptr, size) or macro(ptr, sizeof(*ptr)) +#define SANITIZE_CALL(...) \ + SANITIZE_CALL_(__VA_ARGS__, ) + +#define SANITIZE_CALL_(macro, ptr, ...) \ + SANITIZE_CALL__(macro, ptr, __VA_ARGS__ sizeof(*(ptr)), ) + +#define SANITIZE_CALL__(macro, ptr, size, ...) \ + macro(ptr, size) + +#if SANITIZE_ADDRESS +# include <sanitizer/asan_interface.h> + +/** + * sanitize_alloc(ptr, size = sizeof(*ptr)) + * + * Mark a memory region as allocated. + */ +#define sanitize_alloc(...) SANITIZE_CALL(__asan_unpoison_memory_region, __VA_ARGS__) + +/** + * sanitize_free(ptr, size = sizeof(*ptr)) + * + * Mark a memory region as free. + */ +#define sanitize_free(...) SANITIZE_CALL(__asan_poison_memory_region, __VA_ARGS__) + +#else +# define sanitize_alloc sanitize_uninit +# define sanitize_free sanitize_uninit +#endif + +#if SANITIZE_MEMORY +# include <sanitizer/msan_interface.h> + +/** + * sanitize_init(ptr, size = sizeof(*ptr)) + * + * Mark a memory region as initialized. + */ +#define sanitize_init(...) SANITIZE_CALL(__msan_unpoison, __VA_ARGS__) + +/** + * sanitize_uninit(ptr, size = sizeof(*ptr)) + * + * Mark a memory region as uninitialized. + */ +#define sanitize_uninit(...) SANITIZE_CALL(__msan_allocated_memory, __VA_ARGS__) + +#else +# define sanitize_init(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__) +# define sanitize_uninit(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__) +#endif + +/** + * Squelch unused variable warnings when not sanitizing. + */ +#define sanitize_ignore(ptr, size) ((void)(ptr), (void)(size)) + +/** + * Initialize a variable, unless sanitizers would detect uninitialized uses. + */ +#if SANITIZE_MEMORY +# define uninit(value) +#else +# define uninit(value) = value +#endif + +#endif // BFS_SANITY_H diff --git a/src/sighook.c b/src/sighook.c new file mode 100644 index 0000000..ff5b96f --- /dev/null +++ b/src/sighook.c @@ -0,0 +1,600 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Dynamic (un)registration of signal handlers. + * + * Because signal handlers can interrupt any thread at an arbitrary point, they + * must be lock-free or risk deadlock. Therefore, we implement the global table + * of signal "hooks" with a simple read-copy-update (RCU) scheme. Readers get a + * reference-counted pointer (struct arc) to the table in a lock-free way, and + * release the reference count when finished. + * + * Updates are managed by struct rcu, which has two slots: one active and one + * inactive. Readers acquire a reference to the active slot. A single writer + * can safely update it by initializing the inactive slot, atomically swapping + * the slots, and waiting for the reference count of the newly inactive slot to + * drop to zero. Once it does, the old pointer can be safely freed. + */ + +#include "prelude.h" +#include "sighook.h" +#include "alloc.h" +#include "atomic.h" +#include "bfstd.h" +#include "diag.h" +#include "thread.h" +#include <errno.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> + +#if _POSIX_SEMAPHORES > 0 +# include <semaphore.h> +#endif + +/** + * An atomically reference-counted pointer. + */ +struct arc { + /** The current reference count (0 means empty). */ + atomic size_t refs; + /** The reference itself. */ + void *ptr; + +#if _POSIX_SEMAPHORES > 0 + /** A semaphore for arc_wake(). */ + sem_t sem; + /** sem_init() result. */ + int sem_status; +#endif +}; + +/** Initialize an arc. */ +static void arc_init(struct arc *arc) { + atomic_init(&arc->refs, 0); + arc->ptr = NULL; + +#if _POSIX_SEMAPHORES > 0 + arc->sem_status = sem_init(&arc->sem, false, 0); +#endif +} + +/** Get the current refcount. */ +static size_t arc_refs(const struct arc *arc) { + return load(&arc->refs, relaxed); +} + +/** Set the pointer in an empty arc. */ +static void arc_set(struct arc *arc, void *ptr) { + bfs_assert(arc_refs(arc) == 0); + bfs_assert(ptr); + + arc->ptr = ptr; + store(&arc->refs, 1, release); +} + +/** Acquire a reference. */ +static void *arc_get(struct arc *arc) { + size_t refs = arc_refs(arc); + do { + if (refs < 1) { + return NULL; + } + } while (!compare_exchange_weak(&arc->refs, &refs, refs + 1, acquire, relaxed)); + + return arc->ptr; +} + +/** Release a reference. */ +static void arc_put(struct arc *arc) { + size_t refs = fetch_sub(&arc->refs, 1, release); + + if (refs == 1) { +#if _POSIX_SEMAPHORES > 0 + if (arc->sem_status == 0 && sem_post(&arc->sem) != 0) { + abort(); + } +#endif + } +} + +/** Wait on the semaphore. */ +static int arc_sem_wait(struct arc *arc) { +#if _POSIX_SEMAPHORES > 0 + if (arc->sem_status == 0) { + while (sem_wait(&arc->sem) != 0) { + bfs_everify(errno == EINTR, "sem_wait()"); + } + return 0; + } +#endif + + return -1; +} + +/** Wait for all references to be released. */ +static void *arc_wait(struct arc *arc) { + size_t refs = fetch_sub(&arc->refs, 1, relaxed); + bfs_assert(refs > 0); + + --refs; + while (refs > 0) { + if (arc_sem_wait(arc) == 0) { + bfs_assert(arc_refs(arc) == 0); + // sem_wait() provides enough ordering, so we can skip the fence + goto done; + } + + // Some platforms (like macOS) don't support unnamed semaphores, + // but we can always busy-wait + spin_loop(); + refs = arc_refs(arc); + } + + thread_fence(&arc->refs, acquire); + +done:; + void *ptr = arc->ptr; + arc->ptr = NULL; + return ptr; +} + +/** Destroy an arc. */ +static void arc_destroy(struct arc *arc) { + bfs_assert(arc_refs(arc) <= 1); + +#if _POSIX_SEMAPHORES > 0 + if (arc->sem_status == 0) { + bfs_everify(sem_destroy(&arc->sem) == 0, "sem_destroy()"); + } +#endif +} + +/** + * A simple read-copy-update memory reclamation scheme. + */ +struct rcu { + /** The currently active slot. */ + atomic size_t active; + /** The two slots. */ + struct arc slots[2]; +}; + +/** Sentinel value for RCU, since arc uses NULL already. */ +static void *RCU_NULL = &RCU_NULL; + +/** Initialize an RCU block. */ +static void rcu_init(struct rcu *rcu) { + atomic_init(&rcu->active, 0); + arc_init(&rcu->slots[0]); + arc_init(&rcu->slots[1]); + arc_set(&rcu->slots[0], RCU_NULL); +} + +/** Get the active slot. */ +static struct arc *rcu_active(struct rcu *rcu) { + size_t i = load(&rcu->active, relaxed); + return &rcu->slots[i]; +} + +/** Read an RCU-protected pointer. */ +static void *rcu_read(struct rcu *rcu, struct arc **slot) { + while (true) { + *slot = rcu_active(rcu); + void *ptr = arc_get(*slot); + if (ptr == RCU_NULL) { + return NULL; + } else if (ptr) { + return ptr; + } + // Otherwise, the other slot became active; retry + } +} + +/** Get the RCU-protected pointer without acquiring a reference. */ +static void *rcu_peek(struct rcu *rcu) { + struct arc *arc = rcu_active(rcu); + void *ptr = arc->ptr; + if (ptr == RCU_NULL) { + return NULL; + } else { + return ptr; + } +} + +/** Update an RCU-protected pointer, and return the old one. */ +static void *rcu_update(struct rcu *rcu, void *ptr) { + size_t i = load(&rcu->active, relaxed); + struct arc *prev = &rcu->slots[i]; + + size_t j = i ^ 1; + struct arc *next = &rcu->slots[j]; + + arc_set(next, ptr ? ptr : RCU_NULL); + store(&rcu->active, j, relaxed); + return arc_wait(prev); +} + +struct sighook { + int sig; + sighook_fn *fn; + void *arg; + enum sigflags flags; +}; + +/** + * A table of signal hooks. + */ +struct sigtable { + /** The number of filled slots. */ + size_t filled; + /** The length of the array. */ + size_t size; + /** An array of signal hooks. */ + struct arc hooks[]; +}; + +/** Add a hook to a table. */ +static int sigtable_add(struct sigtable *table, struct sighook *hook) { + if (!table || table->filled == table->size) { + return -1; + } + + for (size_t i = 0; i < table->size; ++i) { + struct arc *arc = &table->hooks[i]; + if (arc_refs(arc) == 0) { + arc_set(arc, hook); + ++table->filled; + return 0; + } + } + + return -1; +} + +/** Delete a hook from a table. */ +static int sigtable_del(struct sigtable *table, struct sighook *hook) { + for (size_t i = 0; i < table->size; ++i) { + struct arc *arc = &table->hooks[i]; + if (arc->ptr == hook) { + arc_wait(arc); + --table->filled; + return 0; + } + } + + return -1; +} + +/** Create a bigger copy of a signal table. */ +static struct sigtable *sigtable_grow(struct sigtable *prev) { + size_t old_size = prev ? prev->size : 0; + size_t new_size = old_size ? 2 * old_size : 1; + struct sigtable *table = ALLOC_FLEX(struct sigtable, hooks, new_size); + if (!table) { + return NULL; + } + + table->filled = 0; + table->size = new_size; + for (size_t i = 0; i < new_size; ++i) { + arc_init(&table->hooks[i]); + } + + for (size_t i = 0; i < old_size; ++i) { + struct sighook *hook = prev->hooks[i].ptr; + if (hook) { + bfs_verify(sigtable_add(table, hook) == 0); + } + } + + return table; +} + +/** Free a signal table. */ +static void sigtable_free(struct sigtable *table) { + if (!table) { + return; + } + + for (size_t i = 0; i < table->size; ++i) { + struct arc *arc = &table->hooks[i]; + arc_destroy(arc); + } + free(table); +} + +/** Add a hook to a signal table, growing it if necessary. */ +static int rcu_sigtable_add(struct rcu *rcu, struct sighook *hook) { + struct sigtable *prev = rcu_peek(rcu); + if (sigtable_add(prev, hook) == 0) { + return 0; + } + + struct sigtable *next = sigtable_grow(prev); + if (!next) { + return -1; + } + + bfs_verify(sigtable_add(next, hook) == 0); + rcu_update(rcu, next); + sigtable_free(prev); + return 0; +} + +/** The global table of signal hooks. */ +static struct rcu rcu_sighooks; +/** The global table of atsigexit() hooks. */ +static struct rcu rcu_exithooks; + +/** Mutex for initialization and RCU writer exclusion. */ +static pthread_mutex_t sigmutex = PTHREAD_MUTEX_INITIALIZER; + +/** Check if a signal was generated by userspace. */ +static bool is_user_generated(const siginfo_t *info) { + // https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03 + // + // If si_code is SI_USER or SI_QUEUE, or any value less than or + // equal to 0, then the signal was generated by a process ... + int code = info->si_code; + return code == SI_USER || code == SI_QUEUE || code <= 0; +} + +/** Check if a signal is caused by a fault. */ +static bool is_fault(const siginfo_t *info) { + int sig = info->si_signo; + if (sig == SIGBUS || sig == SIGFPE || sig == SIGILL || sig == SIGSEGV) { + return !is_user_generated(info); + } else { + return false; + } +} + +// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html +static const int FATAL_SIGNALS[] = { + SIGABRT, + SIGALRM, + SIGBUS, + SIGFPE, + SIGHUP, + SIGILL, + SIGINT, + SIGPIPE, + SIGQUIT, + SIGSEGV, + SIGTERM, + SIGUSR1, + SIGUSR2, +#ifdef SIGPOLL + SIGPOLL, +#endif +#ifdef SIGPROF + SIGPROF, +#endif +#ifdef SIGSYS + SIGSYS, +#endif + SIGTRAP, +#ifdef SIGVTALRM + SIGVTALRM, +#endif + SIGXCPU, + SIGXFSZ, +}; + +/** Check if a signal's default action is to terminate the process. */ +static bool is_fatal(int sig) { + for (size_t i = 0; i < countof(FATAL_SIGNALS); ++i) { + if (sig == FATAL_SIGNALS[i]) { + return true; + } + } + +#ifdef SIGRTMIN + // https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03 + // + // The default actions for the realtime signals in the range + // SIGRTMIN to SIGRTMAX shall be to terminate the process + // abnormally. + if (sig >= SIGRTMIN && sig <= SIGRTMAX) { + return true; + } +#endif + + return false; +} + +/** Reraise a fatal signal. */ +static noreturn void reraise(int sig) { + // Restore the default signal action + if (signal(sig, SIG_DFL) == SIG_ERR) { + goto fail; + } + + // Unblock the signal, since we didn't set SA_NODEFER + sigset_t mask; + if (sigemptyset(&mask) != 0 + || sigaddset(&mask, sig) != 0 + || pthread_sigmask(SIG_UNBLOCK, &mask, NULL) != 0) { + goto fail; + } + + raise(sig); +fail: + abort(); +} + +/** Find any matching hooks and run them. */ +static enum sigflags run_hooks(struct rcu *rcu, int sig, siginfo_t *info) { + enum sigflags ret = 0; + struct arc *slot; + struct sigtable *table = rcu_read(rcu, &slot); + if (!table) { + goto done; + } + + for (size_t i = 0; i < table->size; ++i) { + struct arc *arc = &table->hooks[i]; + struct sighook *hook = arc_get(arc); + if (!hook) { + continue; + } + + if (hook->sig == sig || hook->sig == 0) { + hook->fn(sig, info, hook->arg); + ret |= hook->flags; + } + arc_put(arc); + } + +done: + arc_put(slot); + return ret; +} + +/** Dispatches a signal to the registered handlers. */ +static void sigdispatch(int sig, siginfo_t *info, void *context) { + // https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03 + // + // The behavior of a process is undefined after it returns normally + // from a signal-catching function for a SIGBUS, SIGFPE, SIGILL, or + // SIGSEGV signal that was not generated by kill(), sigqueue(), or + // raise(). + if (is_fault(info)) { + reraise(sig); + } + + // https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03 + // + // After returning from a signal-catching function, the value of + // errno is unspecified if the signal-catching function or any + // function it called assigned a value to errno and the signal- + // catching function did not save and restore the original value of + // errno. + int error = errno; + + // Run the normal hooks + enum sigflags flags = run_hooks(&rcu_sighooks, sig, info); + + // Run the atsigexit() hooks, if we're exiting + if (!(flags & SH_CONTINUE) && is_fatal(sig)) { + run_hooks(&rcu_exithooks, sig, info); + reraise(sig); + } + + errno = error; +} + +/** Make sure our signal handler is installed for a given signal. */ +static int siginit(int sig) { + static struct sigaction action = { + .sa_sigaction = sigdispatch, + .sa_flags = SA_RESTART | SA_SIGINFO, + }; + + static sigset_t signals; + static bool initialized = false; + + if (!initialized) { + if (sigemptyset(&signals) != 0 + || sigemptyset(&action.sa_mask) != 0) { + return -1; + } + rcu_init(&rcu_sighooks); + rcu_init(&rcu_exithooks); + initialized = true; + } + + int installed = sigismember(&signals, sig); + if (installed < 0) { + return -1; + } else if (installed) { + return 0; + } + + if (sigaction(sig, &action, NULL) != 0) { + return -1; + } + + if (sigaddset(&signals, sig) != 0) { + return -1; + } + + return 0; +} + +/** Shared sighook()/atsigexit() implementation. */ +static struct sighook *sighook_impl(struct rcu *rcu, int sig, sighook_fn *fn, void *arg, enum sigflags flags) { + struct sighook *hook = ALLOC(struct sighook); + if (!hook) { + return NULL; + } + + hook->sig = sig; + hook->fn = fn; + hook->arg = arg; + hook->flags = flags; + + if (rcu_sigtable_add(rcu, hook) != 0) { + free(hook); + return NULL; + } + + return hook; +} + +struct sighook *sighook(int sig, sighook_fn *fn, void *arg, enum sigflags flags) { + mutex_lock(&sigmutex); + + struct sighook *ret = NULL; + if (siginit(sig) != 0) { + goto done; + } + + ret = sighook_impl(&rcu_sighooks, sig, fn, arg, flags); +done: + mutex_unlock(&sigmutex); + return ret; +} + +struct sighook *atsigexit(sighook_fn *fn, void *arg) { + mutex_lock(&sigmutex); + + struct sighook *ret = NULL; + + for (size_t i = 0; i < countof(FATAL_SIGNALS); ++i) { + if (siginit(FATAL_SIGNALS[i]) != 0) { + goto done; + } + } + +#ifdef SIGRTMIN + for (int i = SIGRTMIN; i <= SIGRTMAX; ++i) { + if (siginit(i) != 0) { + goto done; + } + } +#endif + + ret = sighook_impl(&rcu_exithooks, 0, fn, arg, 0); +done: + mutex_unlock(&sigmutex); + return ret; +} + +void sigunhook(struct sighook *hook) { + mutex_lock(&sigmutex); + + struct rcu *rcu = hook->sig ? &rcu_sighooks : &rcu_exithooks; + struct sigtable *table = rcu_peek(rcu); + bfs_verify(sigtable_del(table, hook) == 0); + + if (table->filled == 0) { + rcu_update(rcu, NULL); + sigtable_free(table); + } + + mutex_unlock(&sigmutex); + free(hook); +} diff --git a/src/sighook.h b/src/sighook.h new file mode 100644 index 0000000..74d18c0 --- /dev/null +++ b/src/sighook.h @@ -0,0 +1,73 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Signal hooks. + */ + +#ifndef BFS_SIGHOOK_H +#define BFS_SIGHOOK_H + +#include <signal.h> + +/** + * A dynamic signal hook. + */ +struct sighook; + +/** + * Signal hook flags. + */ +enum sigflags { + /** Suppress the default action for this signal. */ + SH_CONTINUE = 1 << 0, +}; + +/** + * A signal hook callback. Hooks are executed from a signal handler, so must + * only call async-signal-safe functions. + * + * @param sig + * The signal number. + * @param info + * Additional information about the signal. + * @param arg + * An arbitrary pointer passed to the hook. + */ +typedef void sighook_fn(int sig, siginfo_t *info, void *arg); + +/** + * Install a hook for a signal. + * + * @param sig + * The signal to hook. + * @param fn + * The function to call. + * @param arg + * An argument passed to the function. + * @param flags + * Flags for the new hook. + * @return + * The installed hook, or NULL on failure. + */ +struct sighook *sighook(int sig, sighook_fn *fn, void *arg, enum sigflags flags); + +/** + * On a best-effort basis, invoke the given hook just before the program is + * abnormally terminated by a signal. + * + * @param fn + * The function to call. + * @param arg + * An argument passed to the function. + * @return + * The installed hook, or NULL on failure. + */ +struct sighook *atsigexit(sighook_fn *fn, void *arg); + +/** + * Remove a signal hook. + */ +void sigunhook(struct sighook *hook); + +#endif // BFS_SIGHOOK_H diff --git a/src/stat.c b/src/stat.c new file mode 100644 index 0000000..2044564 --- /dev/null +++ b/src/stat.c @@ -0,0 +1,345 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "stat.h" +#include "atomic.h" +#include "bfstd.h" +#include "diag.h" +#include "sanity.h" +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> + +#if BFS_USE_STATX && !BFS_HAS_STATX +# include <linux/stat.h> +# include <sys/syscall.h> +# include <unistd.h> +#endif + +const char *bfs_stat_field_name(enum bfs_stat_field field) { + switch (field) { + case BFS_STAT_MODE: + return "mode"; + case BFS_STAT_DEV: + return "device number"; + case BFS_STAT_INO: + return "inode nunmber"; + case BFS_STAT_NLINK: + return "link count"; + case BFS_STAT_GID: + return "group ID"; + case BFS_STAT_UID: + return "user ID"; + case BFS_STAT_SIZE: + return "size"; + case BFS_STAT_BLOCKS: + return "block count"; + case BFS_STAT_RDEV: + return "underlying device"; + case BFS_STAT_ATTRS: + return "attributes"; + case BFS_STAT_ATIME: + return "access time"; + case BFS_STAT_BTIME: + return "birth time"; + case BFS_STAT_CTIME: + return "change time"; + case BFS_STAT_MTIME: + return "modification time"; + } + + bfs_bug("Unrecognized stat field"); + return "???"; +} + +int bfs_fstatat_flags(enum bfs_stat_flags flags) { + int ret = 0; + + if (flags & BFS_STAT_NOFOLLOW) { + ret |= AT_SYMLINK_NOFOLLOW; + } + +#ifdef AT_NO_AUTOMOUNT + ret |= AT_NO_AUTOMOUNT; +#endif + + return ret; +} + +void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src) { + dest->mask = 0; + + dest->mode = src->st_mode; + dest->mask |= BFS_STAT_MODE; + + dest->dev = src->st_dev; + dest->mask |= BFS_STAT_DEV; + + dest->ino = src->st_ino; + dest->mask |= BFS_STAT_INO; + + dest->nlink = src->st_nlink; + dest->mask |= BFS_STAT_NLINK; + + dest->gid = src->st_gid; + dest->mask |= BFS_STAT_GID; + + dest->uid = src->st_uid; + dest->mask |= BFS_STAT_UID; + + dest->size = src->st_size; + dest->mask |= BFS_STAT_SIZE; + + dest->blocks = src->st_blocks; + dest->mask |= BFS_STAT_BLOCKS; + + dest->rdev = src->st_rdev; + dest->mask |= BFS_STAT_RDEV; + +#if BFS_HAS_ST_FLAGS + dest->attrs = src->st_flags; + dest->mask |= BFS_STAT_ATTRS; +#endif + + dest->atime = ST_ATIM(*src); + dest->mask |= BFS_STAT_ATIME; + + dest->ctime = ST_CTIM(*src); + dest->mask |= BFS_STAT_CTIME; + + dest->mtime = ST_MTIM(*src); + dest->mask |= BFS_STAT_MTIME; + +#if BFS_HAS_ST_BIRTHTIM + dest->btime = src->st_birthtim; + dest->mask |= BFS_STAT_BTIME; +#elif BFS_HAS___ST_BIRTHTIM + dest->btime = src->__st_birthtim; + dest->mask |= BFS_STAT_BTIME; +#elif BFS_HAS_ST_BIRTHTIMESPEC + dest->btime = src->st_birthtimespec; + dest->mask |= BFS_STAT_BTIME; +#endif +} + +/** + * bfs_stat() implementation backed by stat(). + */ +static int bfs_stat_impl(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) { + struct stat statbuf; + int ret = fstatat(at_fd, at_path, &statbuf, at_flags); + if (ret == 0) { + bfs_stat_convert(buf, &statbuf); + } + return ret; +} + +#if BFS_USE_STATX + +/** + * Wrapper for the statx() system call, which had no glibc wrapper prior to 2.28. + */ +static int bfs_statx(int at_fd, const char *at_path, int at_flags, unsigned int mask, struct statx *buf) { +#if BFS_HAS_STATX + int ret = statx(at_fd, at_path, at_flags, mask, buf); +#else + int ret = syscall(SYS_statx, at_fd, at_path, at_flags, mask, buf); +#endif + + if (ret == 0) { + // -fsanitize=memory doesn't know about statx() + sanitize_init(buf); + } + + return ret; +} + +int bfs_statx_flags(enum bfs_stat_flags flags) { + int ret = bfs_fstatat_flags(flags); + + if (flags & BFS_STAT_NOSYNC) { + ret |= AT_STATX_DONT_SYNC; + } + + return ret; +} + +int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) { + // Callers shouldn't have to check anything except the times + const unsigned int guaranteed = STATX_BASIC_STATS & ~(STATX_ATIME | STATX_CTIME | STATX_MTIME); + if ((src->stx_mask & guaranteed) != guaranteed) { + errno = ENOTSUP; + return -1; + } + + dest->mask = 0; + + dest->mode = src->stx_mode; + dest->mask |= BFS_STAT_MODE; + + dest->dev = xmakedev(src->stx_dev_major, src->stx_dev_minor); + dest->mask |= BFS_STAT_DEV; + + dest->ino = src->stx_ino; + dest->mask |= BFS_STAT_INO; + + dest->nlink = src->stx_nlink; + dest->mask |= BFS_STAT_NLINK; + + dest->gid = src->stx_gid; + dest->mask |= BFS_STAT_GID; + + dest->uid = src->stx_uid; + dest->mask |= BFS_STAT_UID; + + dest->size = src->stx_size; + dest->mask |= BFS_STAT_SIZE; + + dest->blocks = src->stx_blocks; + dest->mask |= BFS_STAT_BLOCKS; + + dest->rdev = xmakedev(src->stx_rdev_major, src->stx_rdev_minor); + dest->mask |= BFS_STAT_RDEV; + + dest->attrs = src->stx_attributes; + dest->mask |= BFS_STAT_ATTRS; + + if (src->stx_mask & STATX_ATIME) { + dest->atime.tv_sec = src->stx_atime.tv_sec; + dest->atime.tv_nsec = src->stx_atime.tv_nsec; + dest->mask |= BFS_STAT_ATIME; + } + + if (src->stx_mask & STATX_BTIME) { + dest->btime.tv_sec = src->stx_btime.tv_sec; + dest->btime.tv_nsec = src->stx_btime.tv_nsec; + dest->mask |= BFS_STAT_BTIME; + } + + if (src->stx_mask & STATX_CTIME) { + dest->ctime.tv_sec = src->stx_ctime.tv_sec; + dest->ctime.tv_nsec = src->stx_ctime.tv_nsec; + dest->mask |= BFS_STAT_CTIME; + } + + if (src->stx_mask & STATX_MTIME) { + dest->mtime.tv_sec = src->stx_mtime.tv_sec; + dest->mtime.tv_nsec = src->stx_mtime.tv_nsec; + dest->mask |= BFS_STAT_MTIME; + } + + return 0; +} + +/** + * bfs_stat() implementation backed by statx(). + */ +static int bfs_statx_impl(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) { + unsigned int mask = STATX_BASIC_STATS | STATX_BTIME; + struct statx xbuf; + int ret = bfs_statx(at_fd, at_path, at_flags, mask, &xbuf); + if (ret != 0) { + return ret; + } + + return bfs_statx_convert(buf, &xbuf); +} + +#endif // BFS_USE_STATX + +/** + * Calls the stat() implementation with explicit flags. + */ +static int bfs_stat_explicit(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) { +#if BFS_USE_STATX + static atomic bool has_statx = true; + + if (load(&has_statx, relaxed)) { + int ret = bfs_statx_impl(at_fd, at_path, at_flags, buf); + if (ret != 0 && errno_is_like(ENOSYS)) { + store(&has_statx, false, relaxed); + } else { + return ret; + } + } + + at_flags &= ~AT_STATX_DONT_SYNC; +#endif + + return bfs_stat_impl(at_fd, at_path, at_flags, buf); +} + +/** + * Implements the BFS_STAT_TRYFOLLOW retry logic. + */ +static int bfs_stat_tryfollow(int at_fd, const char *at_path, int at_flags, enum bfs_stat_flags bfs_flags, struct bfs_stat *buf) { + int ret = bfs_stat_explicit(at_fd, at_path, at_flags, buf); + + if (ret != 0 + && (bfs_flags & (BFS_STAT_NOFOLLOW | BFS_STAT_TRYFOLLOW)) == BFS_STAT_TRYFOLLOW + && errno_is_like(ENOENT)) + { + at_flags |= AT_SYMLINK_NOFOLLOW; + ret = bfs_stat_explicit(at_fd, at_path, at_flags, buf); + } + + return ret; +} + +int bfs_stat(int at_fd, const char *at_path, enum bfs_stat_flags flags, struct bfs_stat *buf) { +#if BFS_USE_STATX + int at_flags = bfs_statx_flags(flags); +#else + int at_flags = bfs_fstatat_flags(flags); +#endif + + if (at_path) { + return bfs_stat_tryfollow(at_fd, at_path, at_flags, flags, buf); + } + +#if BFS_USE_STATX + // If we have statx(), use it with AT_EMPTY_PATH for its extra features + at_flags |= AT_EMPTY_PATH; + return bfs_stat_explicit(at_fd, "", at_flags, buf); +#else + // Otherwise, just use fstat() rather than fstatat(at_fd, ""), to save + // the kernel the trouble of copying in the empty string + struct stat sb; + if (fstat(at_fd, &sb) != 0) { + return -1; + } + + bfs_stat_convert(buf, &sb); + return 0; +#endif +} + +const struct timespec *bfs_stat_time(const struct bfs_stat *buf, enum bfs_stat_field field) { + if (!(buf->mask & field)) { + errno = ENOTSUP; + return NULL; + } + + switch (field) { + case BFS_STAT_ATIME: + return &buf->atime; + case BFS_STAT_BTIME: + return &buf->btime; + case BFS_STAT_CTIME: + return &buf->ctime; + case BFS_STAT_MTIME: + return &buf->mtime; + default: + bfs_bug("Invalid stat field for time"); + errno = EINVAL; + return NULL; + } +} + +void bfs_stat_id(const struct bfs_stat *buf, bfs_file_id *id) { + memcpy(*id, &buf->dev, sizeof(buf->dev)); + memcpy(*id + sizeof(buf->dev), &buf->ino, sizeof(buf->ino)); +} diff --git a/src/stat.h b/src/stat.h new file mode 100644 index 0000000..8d7144d --- /dev/null +++ b/src/stat.h @@ -0,0 +1,172 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * A facade over the stat() API that unifies some details that diverge between + * implementations, like the names of the timespec fields and the presence of + * file "birth" times. On new enough Linux kernels, the facade is backed by + * statx() instead, and so it exposes a similar interface with a mask for which + * fields were successfully returned. + */ + +#ifndef BFS_STAT_H +#define BFS_STAT_H + +#include "prelude.h" +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> + +#if !BFS_HAS_STATX && BFS_HAS_STATX_SYSCALL +# include <linux/stat.h> +#endif + +#ifndef BFS_USE_STATX +# define BFS_USE_STATX (BFS_HAS_STATX || BFS_HAS_STATX_SYSCALL) +#endif + +#if BFS_USE_SYS_PARAM_H +# include <sys/param.h> +#endif + +#ifdef DEV_BSIZE +# define BFS_STAT_BLKSIZE DEV_BSIZE +#elif defined(S_BLKSIZE) +# define BFS_STAT_BLKSIZE S_BLKSIZE +#else +# define BFS_STAT_BLKSIZE 512 +#endif + +/** + * bfs_stat field bitmask. + */ +enum bfs_stat_field { + BFS_STAT_MODE = 1 << 0, + BFS_STAT_DEV = 1 << 1, + BFS_STAT_INO = 1 << 2, + BFS_STAT_NLINK = 1 << 3, + BFS_STAT_GID = 1 << 4, + BFS_STAT_UID = 1 << 5, + BFS_STAT_SIZE = 1 << 6, + BFS_STAT_BLOCKS = 1 << 7, + BFS_STAT_RDEV = 1 << 8, + BFS_STAT_ATTRS = 1 << 9, + BFS_STAT_ATIME = 1 << 10, + BFS_STAT_BTIME = 1 << 11, + BFS_STAT_CTIME = 1 << 12, + BFS_STAT_MTIME = 1 << 13, +}; + +/** + * Get the human-readable name of a bfs_stat field. + */ +const char *bfs_stat_field_name(enum bfs_stat_field field); + +/** + * bfs_stat() flags. + */ +enum bfs_stat_flags { + /** Follow symlinks (the default). */ + BFS_STAT_FOLLOW = 0, + /** Never follow symlinks. */ + BFS_STAT_NOFOLLOW = 1 << 0, + /** Try to follow symlinks, but fall back to the link itself if broken. */ + BFS_STAT_TRYFOLLOW = 1 << 1, + /** Try to use cached values without synchronizing remote filesystems. */ + BFS_STAT_NOSYNC = 1 << 2, +}; + +/** + * Facade over struct stat. + */ +struct bfs_stat { + /** Bitmask indicating filled fields. */ + enum bfs_stat_field mask; + + /** File type and access mode. */ + mode_t mode; + /** Device ID containing the file. */ + dev_t dev; + /** Inode number. */ + ino_t ino; + /** Number of hard links. */ + nlink_t nlink; + /** Owner group ID. */ + gid_t gid; + /** Owner user ID. */ + uid_t uid; + /** File size in bytes. */ + off_t size; + /** Number of disk blocks allocated (of size BFS_STAT_BLKSIZE). */ + blkcnt_t blocks; + /** The device ID represented by this file. */ + dev_t rdev; + + /** Attributes/flags set on the file. */ + unsigned long long attrs; + + /** Access time. */ + struct timespec atime; + /** Birth/creation time. */ + struct timespec btime; + /** Status change time. */ + struct timespec ctime; + /** Modification time. */ + struct timespec mtime; +}; + +/** + * Facade over fstatat(). + * + * @param at_fd + * The base file descriptor for the lookup. + * @param at_path + * The path to stat, relative to at_fd. Pass NULL to fstat() at_fd + * itself. + * @param flags + * Flags that affect the lookup. + * @param[out] buf + * A place to store the stat buffer, if successful. + * @return + * 0 on success, -1 on error. + */ +int bfs_stat(int at_fd, const char *at_path, enum bfs_stat_flags flags, struct bfs_stat *buf); + +/** + * Convert bfs_stat_flags to fstatat() flags. + */ +int bfs_fstatat_flags(enum bfs_stat_flags flags); + +/** + * Convert struct stat to struct bfs_stat. + */ +void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src); + +#if BFS_USE_STATX +/** + * Convert bfs_stat_flags to statx() flags. + */ +int bfs_statx_flags(enum bfs_stat_flags flags); + +/** + * Convert struct statx to struct bfs_stat. + */ +int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src); +#endif + +/** + * Get a particular time field from a bfs_stat() buffer. + */ +const struct timespec *bfs_stat_time(const struct bfs_stat *buf, enum bfs_stat_field field); + +/** + * A unique ID for a file. + */ +typedef unsigned char bfs_file_id[sizeof(dev_t) + sizeof(ino_t)]; + +/** + * Compute a unique ID for a file. + */ +void bfs_stat_id(const struct bfs_stat *buf, bfs_file_id *id); + +#endif // BFS_STAT_H diff --git a/src/thread.c b/src/thread.c new file mode 100644 index 0000000..3793896 --- /dev/null +++ b/src/thread.c @@ -0,0 +1,81 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "thread.h" +#include "bfstd.h" +#include "diag.h" +#include <errno.h> +#include <pthread.h> + +#define THREAD_FALLIBLE(expr) \ + do { \ + int err = expr; \ + if (err == 0) { \ + return 0; \ + } else { \ + errno = err; \ + return -1; \ + } \ + } while (0) + +#define THREAD_INFALLIBLE(...) \ + THREAD_INFALLIBLE_(__VA_ARGS__, 0, ) + +#define THREAD_INFALLIBLE_(expr, allowed, ...) \ + int err = expr; \ + bfs_verify(err == 0 || err == allowed, "%s: %s", #expr, xstrerror(err)); \ + (void)0 + +int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn, void *arg) { + THREAD_FALLIBLE(pthread_create(thread, attr, fn, arg)); +} + +void thread_join(pthread_t thread, void **ret) { + THREAD_INFALLIBLE(pthread_join(thread, ret)); +} + +int mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr) { + THREAD_FALLIBLE(pthread_mutex_init(mutex, attr)); +} + +void mutex_lock(pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_mutex_lock(mutex)); +} + +bool mutex_trylock(pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_mutex_trylock(mutex), EBUSY); + return err == 0; +} + +void mutex_unlock(pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_mutex_unlock(mutex)); +} + +void mutex_destroy(pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_mutex_destroy(mutex)); +} + +int cond_init(pthread_cond_t *cond, pthread_condattr_t *attr) { + THREAD_FALLIBLE(pthread_cond_init(cond, attr)); +} + +void cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_cond_wait(cond, mutex)); +} + +void cond_signal(pthread_cond_t *cond) { + THREAD_INFALLIBLE(pthread_cond_signal(cond)); +} + +void cond_broadcast(pthread_cond_t *cond) { + THREAD_INFALLIBLE(pthread_cond_broadcast(cond)); +} + +void cond_destroy(pthread_cond_t *cond) { + THREAD_INFALLIBLE(pthread_cond_destroy(cond)); +} + +void invoke_once(pthread_once_t *once, once_fn *fn) { + THREAD_INFALLIBLE(pthread_once(once, fn)); +} diff --git a/src/thread.h b/src/thread.h new file mode 100644 index 0000000..db11bd8 --- /dev/null +++ b/src/thread.h @@ -0,0 +1,99 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Wrappers for POSIX threading APIs. + */ + +#ifndef BFS_THREAD_H +#define BFS_THREAD_H + +#include "prelude.h" +#include <pthread.h> + +#if __STDC_VERSION__ < C23 && !defined(thread_local) +# if BFS_USE_THREADS_H +# include <threads.h> +# else +# define thread_local _Thread_local +# endif +#endif + +/** Thread entry point type. */ +typedef void *thread_fn(void *arg); + +/** + * Wrapper for pthread_create(). + * + * @return + * 0 on success, -1 on error. + */ +int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn, void *arg); + +/** + * Wrapper for pthread_join(). + */ +void thread_join(pthread_t thread, void **ret); + +/** + * Wrapper for pthread_mutex_init(). + */ +int mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr); + +/** + * Wrapper for pthread_mutex_lock(). + */ +void mutex_lock(pthread_mutex_t *mutex); + +/** + * Wrapper for pthread_mutex_trylock(). + * + * @return + * Whether the mutex was locked. + */ +bool mutex_trylock(pthread_mutex_t *mutex); + +/** + * Wrapper for pthread_mutex_unlock(). + */ +void mutex_unlock(pthread_mutex_t *mutex); + +/** + * Wrapper for pthread_mutex_destroy(). + */ +void mutex_destroy(pthread_mutex_t *mutex); + +/** + * Wrapper for pthread_cond_init(). + */ +int cond_init(pthread_cond_t *cond, pthread_condattr_t *attr); + +/** + * Wrapper for pthread_cond_wait(). + */ +void cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex); + +/** + * Wrapper for pthread_cond_signal(). + */ +void cond_signal(pthread_cond_t *cond); + +/** + * Wrapper for pthread_cond_broadcast(). + */ +void cond_broadcast(pthread_cond_t *cond); + +/** + * Wrapper for pthread_cond_destroy(). + */ +void cond_destroy(pthread_cond_t *cond); + +/** pthread_once() callback type. */ +typedef void once_fn(void); + +/** + * Wrapper for pthread_once(). + */ +void invoke_once(pthread_once_t *once, once_fn *fn); + +#endif // BFS_THREAD_H diff --git a/src/trie.c b/src/trie.c new file mode 100644 index 0000000..808953e --- /dev/null +++ b/src/trie.c @@ -0,0 +1,729 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * This is an implementation of a "qp trie," as documented at + * https://dotat.at/prog/qp/README.html + * + * An uncompressed trie over the dataset {AAAA, AADD, ABCD, DDAA, DDDD} would + * look like + * + * A A A A + * ●───→●───→●───→●───→○ + * │ │ │ D D + * │ │ └───→●───→○ + * │ │ B C D + * │ └───→●───→●───→○ + * │ D D A A + * └───→●───→●───→●───→○ + * │ D D + * └───→●───→○ + * + * A compressed (PATRICIA) trie collapses internal nodes that have only a single + * child, like this: + * + * A A AA + * ●───→●───→●────→○ + * │ │ │ DD + * │ │ └────→○ + * │ │ BCD + * │ └─────→○ + * │ DD AA + * └────→●────→○ + * │ DD + * └────→○ + * + * The nodes can be compressed further by dropping the actual compressed + * sequences from the nodes, storing it only in the leaves. This is the + * technique applied in QP tries, and the crit-bit trees that inspired them + * (https://cr.yp.to/critbit.html). Only the index to test, and the values to + * branch on, need to be stored in each node. + * + * A A A + * 0───→1───→2───→AAAA + * │ │ │ D + * │ │ └───→AADD + * │ │ B + * │ └───→ABCD + * │ D A + * └───→2───→DDAA + * │ D + * └───→DDDD + * + * Nodes are represented very compactly. Rather than a dense array of children, + * a sparse array of only the non-NULL children directly follows the node in + * memory. A bitmap is used to track which children exist. + * + * ┌────────────┐ + * │ [4] [3] [2][1][0] ←─ children + * │ ↓ ↓ ↓ ↓ ↓ + * │ 14 10 6 3 0 ←─ sparse index + * │ ↓ ↓ ↓ ↓ ↓ + * │ 0100010001001001 ←─ bitmap + * │ + * │ To convert a sparse index to a dense index, mask off the bits above it, and + * │ count the remaining bits. + * │ + * │ 10 ←─ sparse index + * │ ↓ + * │ 0000001111111111 ←─ mask + * │ & 0100010001001001 ←─ bitmap + * │ ──────────────── + * │ = 0000000001001001 + * │ └──┼──┘ + * │ [3] ←─ dense index + * └───────────────────┘ + * + * This implementation tests a whole nibble (half byte/hex digit) at every + * branch, so the bitmap takes up 16 bits. The remainder of a machine word is + * used to hold the offset, which severely constrains its range on 32-bit + * platforms. As a workaround, we store relative instead of absolute offsets, + * and insert intermediate singleton "jump" nodes when necessary. + */ + +#include "prelude.h" +#include "trie.h" +#include "alloc.h" +#include "bit.h" +#include "diag.h" +#include "list.h" +#include <stdint.h> +#include <string.h> + +bfs_static_assert(CHAR_WIDTH == 8); + +#if __i386__ || __x86_64__ +# define trie_clones attr(target_clones("popcnt", "default")) +#else +# define trie_clones +#endif + +/** Number of bits for the sparse array bitmap, aka the range of a nibble. */ +#define BITMAP_WIDTH 16 +/** The number of remaining bits in a word, to hold the offset. */ +#define OFFSET_WIDTH (SIZE_WIDTH - BITMAP_WIDTH) +/** The highest representable offset (only 64k on a 32-bit architecture). */ +#define OFFSET_MAX (((size_t)1 << OFFSET_WIDTH) - 1) + +/** + * An internal node of the trie. + */ +struct trie_node { + /** + * A bitmap that hold which indices exist in the sparse children array. + * Bit i will be set if a child exists at logical index i, and its index + * into the array will be popcount(bitmap & ((1 << i) - 1)). + */ + size_t bitmap : BITMAP_WIDTH; + + /** + * The offset into the key in nibbles. This is relative to the parent + * node, to support offsets larger than OFFSET_MAX. + */ + size_t offset : OFFSET_WIDTH; + + /** + * Flexible array of children. Each pointer uses the lowest bit as a + * tag to distinguish internal nodes from leaves. This is safe as long + * as all dynamic allocations are aligned to more than a single byte. + */ + uintptr_t children[]; +}; + +/** Check if an encoded pointer is to a leaf. */ +static bool trie_is_leaf(uintptr_t ptr) { + return ptr & 1; +} + +/** Decode a pointer to a leaf. */ +static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) { + bfs_assert(trie_is_leaf(ptr)); + return (struct trie_leaf *)(ptr ^ 1); +} + +/** Encode a pointer to a leaf. */ +static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) { + uintptr_t ptr = (uintptr_t)leaf ^ 1; + bfs_assert(trie_is_leaf(ptr)); + return ptr; +} + +/** Decode a pointer to an internal node. */ +static struct trie_node *trie_decode_node(uintptr_t ptr) { + bfs_assert(!trie_is_leaf(ptr)); + return (struct trie_node *)ptr; +} + +/** Encode a pointer to an internal node. */ +static uintptr_t trie_encode_node(const struct trie_node *node) { + uintptr_t ptr = (uintptr_t)node; + bfs_assert(!trie_is_leaf(ptr)); + return ptr; +} + +void trie_init(struct trie *trie) { + trie->root = 0; + LIST_INIT(trie); + VARENA_INIT(&trie->nodes, struct trie_node, children); + VARENA_INIT(&trie->leaves, struct trie_leaf, key); +} + +/** Extract the nibble at a certain offset from a byte sequence. */ +static unsigned char trie_key_nibble(const void *key, size_t offset) { + const unsigned char *bytes = key; + size_t byte = offset >> 1; + + // A branchless version of + // if (offset & 1) { + // return bytes[byte] >> 4; + // } else { + // return bytes[byte] & 0xF; + // } + unsigned int shift = (offset & 1) << 2; + return (bytes[byte] >> shift) & 0xF; +} + +/** + * Finds a leaf in the trie that matches the key at every branch. If the key + * exists in the trie, the representative will match the searched key. But + * since only branch points are tested, it can be different from the key. In + * that case, the first mismatch between the key and the representative will be + * the depth at which to make a new branch to insert the key. + */ +trie_clones +static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) { + uintptr_t ptr = trie->root; + if (!ptr) { + return NULL; + } + + size_t offset = 0; + while (!trie_is_leaf(ptr)) { + struct trie_node *node = trie_decode_node(ptr); + offset += node->offset; + + unsigned int index = 0; + if ((offset >> 1) < length) { + unsigned char nibble = trie_key_nibble(key, offset); + unsigned int bit = 1U << nibble; + // bits = bitmap & bit ? bitmap & (bit - 1) : 0 + unsigned int mask = -!!(node->bitmap & bit); + unsigned int bits = node->bitmap & (bit - 1) & mask; + index = count_ones(bits); + } + ptr = node->children[index]; + } + + return trie_decode_leaf(ptr); +} + +struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) { + return trie_find_mem(trie, key, strlen(key) + 1); +} + +struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) { + struct trie_leaf *rep = trie_representative(trie, key, length); + if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) { + return rep; + } else { + return NULL; + } +} + +struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) { + size_t length = strlen(key); + struct trie_leaf *rep = trie_representative(trie, key, length + 1); + if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) { + return rep; + } else { + return NULL; + } +} + +/** + * Find a leaf that may end at the current node. + */ +static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) { + // Finding a terminating NUL byte may take two nibbles + for (int i = 0; i < 2; ++i) { + if (!(node->bitmap & 1)) { + break; + } + + uintptr_t ptr = node->children[0]; + if (trie_is_leaf(ptr)) { + return trie_decode_leaf(ptr); + } else { + node = trie_decode_node(ptr); + } + } + + return NULL; +} + +/** Check if a leaf is a prefix of a search key. */ +static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *key, size_t length) { + if (leaf && leaf->length <= length) { + return memcmp(key + skip, leaf->key + skip, leaf->length - skip - 1) == 0; + } else { + return false; + } +} + +trie_clones +static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const char *key) { + uintptr_t ptr = trie->root; + if (!ptr) { + return NULL; + } + + struct trie_leaf *best = NULL; + size_t skip = 0; + size_t length = strlen(key) + 1; + + size_t offset = 0; + while (!trie_is_leaf(ptr)) { + struct trie_node *node = trie_decode_node(ptr); + offset += node->offset; + if ((offset >> 1) >= length) { + return best; + } + + struct trie_leaf *leaf = trie_terminal_leaf(node); + if (trie_check_prefix(leaf, skip, key, length)) { + best = leaf; + skip = offset >> 1; + } + + unsigned char nibble = trie_key_nibble(key, offset); + unsigned int bit = 1U << nibble; + if (node->bitmap & bit) { + unsigned int index = count_ones(node->bitmap & (bit - 1)); + ptr = node->children[index]; + } else { + return best; + } + } + + struct trie_leaf *leaf = trie_decode_leaf(ptr); + if (trie_check_prefix(leaf, skip, key, length)) { + best = leaf; + } + + return best; +} + +struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key) { + return trie_find_prefix_impl(trie, key); +} + +/** Create a new leaf, holding a copy of the given key. */ +static struct trie_leaf *trie_leaf_alloc(struct trie *trie, const void *key, size_t length) { + struct trie_leaf *leaf = varena_alloc(&trie->leaves, length); + if (!leaf) { + return NULL; + } + + LIST_ITEM_INIT(leaf); + LIST_APPEND(trie, leaf); + + leaf->value = NULL; + leaf->length = length; + memcpy(leaf->key, key, length); + + return leaf; +} + +/** Free a leaf. */ +static void trie_leaf_free(struct trie *trie, struct trie_leaf *leaf) { + LIST_REMOVE(trie, leaf); + varena_free(&trie->leaves, leaf, leaf->length); +} + +/** Create a new node. */ +static struct trie_node *trie_node_alloc(struct trie *trie, size_t size) { + bfs_assert(has_single_bit(size)); + return varena_alloc(&trie->nodes, size); +} + +/** Reallocate a trie node. */ +static struct trie_node *trie_node_realloc(struct trie *trie, struct trie_node *node, size_t old_size, size_t new_size) { + bfs_assert(has_single_bit(old_size)); + bfs_assert(has_single_bit(new_size)); + return varena_realloc(&trie->nodes, node, old_size, new_size); +} + +/** Free a node. */ +static void trie_node_free(struct trie *trie, struct trie_node *node, size_t size) { + bfs_assert(size == (size_t)count_ones(node->bitmap)); + varena_free(&trie->nodes, node, size); +} + +#if ENDIAN_NATIVE == ENDIAN_LITTLE +# define TRIE_BSWAP(n) (n) +#elif ENDIAN_NATIVE == ENDIAN_BIG +# define TRIE_BSWAP(n) bswap(n) +#endif + +/** Find the offset of the first nibble that differs between two keys. */ +static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t length) { + if (!rep) { + return 0; + } + + if (rep->length < length) { + length = rep->length; + } + + const char *rep_bytes = rep->key; + const char *key_bytes = key; + + size_t i = 0; + for (size_t chunk = sizeof(chunk); i + chunk <= length; i += chunk) { + size_t rep_chunk, key_chunk; + memcpy(&rep_chunk, rep_bytes + i, sizeof(rep_chunk)); + memcpy(&key_chunk, key_bytes + i, sizeof(key_chunk)); + + if (rep_chunk != key_chunk) { +#ifdef TRIE_BSWAP + size_t diff = TRIE_BSWAP(rep_chunk ^ key_chunk); + i *= 2; + i += trailing_zeros(diff) / 4; + return i; +#else + break; +#endif + } + } + + for (; i < length; ++i) { + unsigned char diff = rep_bytes[i] ^ key_bytes[i]; + if (diff) { + return 2 * i + !(diff & 0xF); + } + } + + return 2 * i; +} + +/** + * Insert a leaf into a node. The node must not have a child in that position + * already. Effectively takes a subtrie like this: + * + * ptr + * | + * v X + * *--->... + * | Z + * +--->... + * + * and transforms it to: + * + * ptr + * | + * v X + * *--->... + * | Y + * +--->leaf + * | Z + * +--->... + */ +trie_clones +static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) { + struct trie_node *node = trie_decode_node(*ptr); + unsigned int size = count_ones(node->bitmap); + + // Double the capacity every power of two + if (has_single_bit(size)) { + node = trie_node_realloc(trie, node, size, 2 * size); + if (!node) { + trie_leaf_free(trie, leaf); + return NULL; + } + *ptr = trie_encode_node(node); + } + + unsigned int bit = 1U << nibble; + + // The child must not already be present + bfs_assert(!(node->bitmap & bit)); + node->bitmap |= bit; + + unsigned int target = count_ones(node->bitmap & (bit - 1)); + for (size_t i = size; i > target; --i) { + node->children[i] = node->children[i - 1]; + } + node->children[target] = trie_encode_leaf(leaf); + return leaf; +} + +/** + * When the current offset exceeds OFFSET_MAX, insert "jump" nodes that bridge + * the gap. This function takes a subtrie like this: + * + * ptr + * | + * v + * *--->rep + * + * and changes it to: + * + * ptr ret + * | | + * v v + * *--->*--->rep + * + * so that a new key can be inserted like: + * + * ptr ret + * | | + * v v X + * *--->*--->rep + * | Y + * +--->key + */ +static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, size_t *offset) { + // We only ever need to jump to leaf nodes, since internal nodes are + // guaranteed to be within OFFSET_MAX anyway + bfs_assert(trie_is_leaf(*ptr)); + + struct trie_node *node = trie_node_alloc(trie, 1); + if (!node) { + return NULL; + } + + *offset += OFFSET_MAX; + node->offset = OFFSET_MAX; + + unsigned char nibble = trie_key_nibble(key, *offset); + node->bitmap = 1 << nibble; + + node->children[0] = *ptr; + *ptr = trie_encode_node(node); + return node->children; +} + +/** + * Split a node in the trie. Changes a subtrie like this: + * + * ptr + * | + * v + * *...>--->rep + * + * into this: + * + * ptr + * | + * v X + * *--->*...>--->rep + * | Y + * +--->leaf + */ +static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, struct trie_leaf *rep, size_t offset, size_t mismatch) { + unsigned char key_nibble = trie_key_nibble(leaf->key, mismatch); + unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch); + bfs_assert(key_nibble != rep_nibble); + + struct trie_node *node = trie_node_alloc(trie, 2); + if (!node) { + trie_leaf_free(trie, leaf); + return NULL; + } + + node->bitmap = (1 << key_nibble) | (1 << rep_nibble); + + size_t delta = mismatch - offset; + if (!trie_is_leaf(*ptr)) { + struct trie_node *child = trie_decode_node(*ptr); + child->offset -= delta; + } + node->offset = delta; + + unsigned int key_index = key_nibble > rep_nibble; + node->children[key_index] = trie_encode_leaf(leaf); + node->children[key_index ^ 1] = *ptr; + *ptr = trie_encode_node(node); + return leaf; +} + +struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) { + return trie_insert_mem(trie, key, strlen(key) + 1); +} + +trie_clones +static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key, size_t length) { + struct trie_leaf *rep = trie_representative(trie, key, length); + size_t mismatch = trie_mismatch(rep, key, length); + if (mismatch >= (length << 1)) { + return rep; + } + + struct trie_leaf *leaf = trie_leaf_alloc(trie, key, length); + if (!leaf) { + return NULL; + } + + if (!rep) { + trie->root = trie_encode_leaf(leaf); + return leaf; + } + + size_t offset = 0; + uintptr_t *ptr = &trie->root; + while (!trie_is_leaf(*ptr)) { + struct trie_node *node = trie_decode_node(*ptr); + if (offset + node->offset > mismatch) { + break; + } + offset += node->offset; + + unsigned char nibble = trie_key_nibble(key, offset); + unsigned int bit = 1U << nibble; + if (node->bitmap & bit) { + bfs_assert(offset < mismatch); + unsigned int index = count_ones(node->bitmap & (bit - 1)); + ptr = &node->children[index]; + } else { + bfs_assert(offset == mismatch); + return trie_node_insert(trie, ptr, leaf, nibble); + } + } + + while (mismatch - offset > OFFSET_MAX) { + ptr = trie_jump(trie, ptr, key, &offset); + if (!ptr) { + trie_leaf_free(trie, leaf); + return NULL; + } + } + + return trie_split(trie, ptr, leaf, rep, offset, mismatch); +} + +struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length) { + return trie_insert_mem_impl(trie, key, length); +} + +/** Free a chain of singleton nodes. */ +static void trie_free_singletons(struct trie *trie, uintptr_t ptr) { + while (!trie_is_leaf(ptr)) { + struct trie_node *node = trie_decode_node(ptr); + + // Make sure the bitmap is a power of two, i.e. it has just one child + bfs_assert(has_single_bit(node->bitmap)); + + ptr = node->children[0]; + trie_node_free(trie, node, 1); + } + + trie_leaf_free(trie, trie_decode_leaf(ptr)); +} + +/** + * Try to collapse a two-child node like: + * + * parent child + * | | + * v v + * *----->*----->*----->leaf + * | + * +----->other + * + * into + * + * parent + * | + * v + * other + */ +static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) { + uintptr_t other = parent_node->children[child_index ^ 1]; + if (!trie_is_leaf(other)) { + struct trie_node *other_node = trie_decode_node(other); + if (other_node->offset + parent_node->offset <= OFFSET_MAX) { + other_node->offset += parent_node->offset; + } else { + return -1; + } + } + + *parent = other; + trie_node_free(trie, parent_node, 1); + return 0; +} + +trie_clones +static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) { + uintptr_t *child = &trie->root; + uintptr_t *parent = NULL; + unsigned int child_bit = 0, child_index = 0; + size_t offset = 0; + while (!trie_is_leaf(*child)) { + struct trie_node *node = trie_decode_node(*child); + offset += node->offset; + bfs_assert((offset >> 1) < leaf->length); + + unsigned char nibble = trie_key_nibble(leaf->key, offset); + unsigned int bit = 1U << nibble; + unsigned int bitmap = node->bitmap; + bfs_assert(bitmap & bit); + unsigned int index = count_ones(bitmap & (bit - 1)); + + // Advance the parent pointer, unless this node had only one child + if (!has_single_bit(bitmap)) { + parent = child; + child_bit = bit; + child_index = index; + } + + child = &node->children[index]; + } + + bfs_assert(trie_decode_leaf(*child) == leaf); + + if (!parent) { + trie_free_singletons(trie, trie->root); + trie->root = 0; + return; + } + + struct trie_node *node = trie_decode_node(*parent); + child = node->children + child_index; + trie_free_singletons(trie, *child); + + node->bitmap ^= child_bit; + unsigned int parent_size = count_ones(node->bitmap); + bfs_assert(parent_size > 0); + if (parent_size == 1 && trie_collapse_node(trie, parent, node, child_index) == 0) { + return; + } + + if (child_index < parent_size) { + memmove(child, child + 1, (parent_size - child_index) * sizeof(*child)); + } + + if (has_single_bit(parent_size)) { + node = trie_node_realloc(trie, node, 2 * parent_size, parent_size); + if (node) { + *parent = trie_encode_node(node); + } + } +} + +void trie_remove(struct trie *trie, struct trie_leaf *leaf) { + trie_remove_impl(trie, leaf); +} + +void trie_clear(struct trie *trie) { + trie->root = 0; + LIST_INIT(trie); + + varena_clear(&trie->leaves); + varena_clear(&trie->nodes); +} + +void trie_destroy(struct trie *trie) { + varena_destroy(&trie->leaves); + varena_destroy(&trie->nodes); +} diff --git a/src/trie.h b/src/trie.h new file mode 100644 index 0000000..4288d76 --- /dev/null +++ b/src/trie.h @@ -0,0 +1,147 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#ifndef BFS_TRIE_H +#define BFS_TRIE_H + +#include "alloc.h" +#include "list.h" +#include <stddef.h> +#include <stdint.h> + +/** + * A leaf of a trie. + */ +struct trie_leaf { + /** Linked list of leaves, in insertion order. */ + struct trie_leaf *prev, *next; + /** An arbitrary value associated with this leaf. */ + void *value; + /** The length of the key in bytes. */ + size_t length; + /** The key itself, stored inline. */ + char key[]; +}; + +/** + * A trie that holds a set of fixed- or variable-length strings. + */ +struct trie { + /** Pointer to the root node/leaf. */ + uintptr_t root; + /** Linked list of leaves. */ + struct trie_leaf *head, *tail; + /** Node allocator. */ + struct varena nodes; + /** Leaf allocator. */ + struct varena leaves; +}; + +/** + * Initialize an empty trie. + */ +void trie_init(struct trie *trie); + +/** + * Find the leaf for a string key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @return + * The found leaf, or NULL if the key is not present. + */ +struct trie_leaf *trie_find_str(const struct trie *trie, const char *key); + +/** + * Find the leaf for a fixed-size key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @param length + * The length of the key in bytes. + * @return + * The found leaf, or NULL if the key is not present. + */ +struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length); + +/** + * Find the shortest leaf that starts with a given key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @return + * A leaf that starts with the given key, or NULL. + */ +struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key); + +/** + * Find the leaf that is the longest prefix of the given key. + * + * @param trie + * The trie to search. + * @param key + * The key to look up. + * @return + * The longest prefix match for the given key, or NULL. + */ +struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key); + +/** + * Insert a string key into the trie. + * + * @param trie + * The trie to modify. + * @param key + * The key to insert. + * @return + * The inserted leaf, or NULL on failure. + */ +struct trie_leaf *trie_insert_str(struct trie *trie, const char *key); + +/** + * Insert a fixed-size key into the trie. + * + * @param trie + * The trie to modify. + * @param key + * The key to insert. + * @param length + * The length of the key in bytes. + * @return + * The inserted leaf, or NULL on failure. + */ +struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length); + +/** + * Remove a leaf from a trie. + * + * @param trie + * The trie to modify. + * @param leaf + * The leaf to remove. + */ +void trie_remove(struct trie *trie, struct trie_leaf *leaf); + +/** + * Remove all leaves from a trie. + */ +void trie_clear(struct trie *trie); + +/** + * Destroy a trie and its contents. + */ +void trie_destroy(struct trie *trie); + +/** + * Iterate over the leaves of a trie. + */ +#define for_trie(leaf, trie) \ + for_list (struct trie_leaf, leaf, trie) + +#endif // BFS_TRIE_H diff --git a/src/typo.c b/src/typo.c new file mode 100644 index 0000000..b1c5c44 --- /dev/null +++ b/src/typo.c @@ -0,0 +1,164 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "typo.h" +#include <limits.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +// Assume QWERTY layout for now +static const int8_t key_coords[UCHAR_MAX + 1][3] = { + ['`'] = { 0, 0, 0}, + ['~'] = { 0, 0, 1}, + ['1'] = { 3, 0, 0}, + ['!'] = { 3, 0, 1}, + ['2'] = { 6, 0, 0}, + ['@'] = { 6, 0, 1}, + ['3'] = { 9, 0, 0}, + ['#'] = { 9, 0, 1}, + ['4'] = {12, 0, 0}, + ['$'] = {12, 0, 1}, + ['5'] = {15, 0, 0}, + ['%'] = {15, 0, 1}, + ['6'] = {18, 0, 0}, + ['^'] = {18, 0, 1}, + ['7'] = {21, 0, 0}, + ['&'] = {21, 0, 1}, + ['8'] = {24, 0, 0}, + ['*'] = {24, 0, 1}, + ['9'] = {27, 0, 0}, + ['('] = {27, 0, 1}, + ['0'] = {30, 0, 0}, + [')'] = {30, 0, 1}, + ['-'] = {33, 0, 0}, + ['_'] = {33, 0, 1}, + ['='] = {36, 0, 0}, + ['+'] = {36, 0, 1}, + + ['\t'] = { 1, 3, 0}, + ['q'] = { 4, 3, 0}, + ['Q'] = { 4, 3, 1}, + ['w'] = { 7, 3, 0}, + ['W'] = { 7, 3, 1}, + ['e'] = {10, 3, 0}, + ['E'] = {10, 3, 1}, + ['r'] = {13, 3, 0}, + ['R'] = {13, 3, 1}, + ['t'] = {16, 3, 0}, + ['T'] = {16, 3, 1}, + ['y'] = {19, 3, 0}, + ['Y'] = {19, 3, 1}, + ['u'] = {22, 3, 0}, + ['U'] = {22, 3, 1}, + ['i'] = {25, 3, 0}, + ['I'] = {25, 3, 1}, + ['o'] = {28, 3, 0}, + ['O'] = {28, 3, 1}, + ['p'] = {31, 3, 0}, + ['P'] = {31, 3, 1}, + ['['] = {34, 3, 0}, + ['{'] = {34, 3, 1}, + [']'] = {37, 3, 0}, + ['}'] = {37, 3, 1}, + ['\\'] = {40, 3, 0}, + ['|'] = {40, 3, 1}, + + ['a'] = { 5, 6, 0}, + ['A'] = { 5, 6, 1}, + ['s'] = { 8, 6, 0}, + ['S'] = { 8, 6, 1}, + ['d'] = {11, 6, 0}, + ['D'] = {11, 6, 1}, + ['f'] = {14, 6, 0}, + ['F'] = {14, 6, 1}, + ['g'] = {17, 6, 0}, + ['G'] = {17, 6, 1}, + ['h'] = {20, 6, 0}, + ['H'] = {20, 6, 1}, + ['j'] = {23, 6, 0}, + ['J'] = {23, 6, 1}, + ['k'] = {26, 6, 0}, + ['K'] = {26, 6, 1}, + ['l'] = {29, 6, 0}, + ['L'] = {29, 6, 1}, + [';'] = {32, 6, 0}, + [':'] = {32, 6, 1}, + ['\''] = {35, 6, 0}, + ['"'] = {35, 6, 1}, + ['\n'] = {38, 6, 0}, + + ['z'] = { 6, 9, 0}, + ['Z'] = { 6, 9, 1}, + ['x'] = { 9, 9, 0}, + ['X'] = { 9, 9, 1}, + ['c'] = {12, 9, 0}, + ['C'] = {12, 9, 1}, + ['v'] = {15, 9, 0}, + ['V'] = {15, 9, 1}, + ['b'] = {18, 9, 0}, + ['B'] = {18, 9, 1}, + ['n'] = {21, 9, 0}, + ['N'] = {21, 9, 1}, + ['m'] = {24, 9, 0}, + ['M'] = {24, 9, 1}, + [','] = {27, 9, 0}, + ['<'] = {27, 9, 1}, + ['.'] = {30, 9, 0}, + ['>'] = {30, 9, 1}, + ['/'] = {33, 9, 0}, + ['?'] = {33, 9, 1}, + + [' '] = {18, 12, 0}, +}; + +static int char_distance(char a, char b) { + const int8_t *ac = key_coords[(unsigned char)a], *bc = key_coords[(unsigned char)b]; + int ret = 0; + for (int i = 0; i < 3; ++i) { + ret += abs(ac[i] - bc[i]); + } + return ret; +} + +int typo_distance(const char *actual, const char *expected) { + // This is the Wagner-Fischer algorithm for Levenshtein distance, using + // Manhattan distance on the keyboard for individual characters. + + const int insert_cost = 12; + + size_t rows = strlen(actual) + 1; + size_t cols = strlen(expected) + 1; + + int arr0[cols], arr1[cols]; + int *row0 = arr0, *row1 = arr1; + + for (size_t j = 0; j < cols; ++j) { + row0[j] = insert_cost * j; + } + + for (size_t i = 1; i < rows; ++i) { + row1[0] = row0[0] + insert_cost; + + char a = actual[i - 1]; + for (size_t j = 1; j < cols; ++j) { + char b = expected[j - 1]; + int cost = row0[j - 1] + char_distance(a, b); + int del_cost = row0[j] + insert_cost; + if (del_cost < cost) { + cost = del_cost; + } + int ins_cost = row1[j - 1] + insert_cost; + if (ins_cost < cost) { + cost = ins_cost; + } + row1[j] = cost; + } + + int *tmp = row0; + row0 = row1; + row1 = tmp; + } + + return row0[cols - 1]; +} diff --git a/src/typo.h b/src/typo.h new file mode 100644 index 0000000..13eaa67 --- /dev/null +++ b/src/typo.h @@ -0,0 +1,18 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#ifndef BFS_TYPO_H +#define BFS_TYPO_H + +/** + * Find the "typo" distance between two strings. + * + * @param actual + * The actual string typed by the user. + * @param expected + * The expected valid string. + * @return The distance between the two strings. + */ +int typo_distance(const char *actual, const char *expected); + +#endif // BFS_TYPO_H diff --git a/src/xregex.c b/src/xregex.c new file mode 100644 index 0000000..c2711bc --- /dev/null +++ b/src/xregex.c @@ -0,0 +1,295 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "xregex.h" +#include "alloc.h" +#include "bfstd.h" +#include "diag.h" +#include "sanity.h" +#include "thread.h" +#include <errno.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> + +#if BFS_USE_ONIGURUMA +# include <langinfo.h> +# include <oniguruma.h> +#else +# include <regex.h> +#endif + +struct bfs_regex { +#if BFS_USE_ONIGURUMA + unsigned char *pattern; + OnigRegex impl; + int err; + OnigErrorInfo einfo; +#else + regex_t impl; + int err; +#endif +}; + +#if BFS_USE_ONIGURUMA + +static int bfs_onig_status; +static OnigEncoding bfs_onig_enc; + +/** pthread_once() callback. */ +static void bfs_onig_once(void) { + // Fall back to ASCII by default + bfs_onig_enc = ONIG_ENCODING_ASCII; + + // Oniguruma has no locale support, so try to guess the right encoding + // from the current locale. + const char *charmap = nl_langinfo(CODESET); + if (charmap) { +#define BFS_MAP_ENCODING(name, value) \ + do { \ + if (strcmp(charmap, name) == 0) { \ + bfs_onig_enc = value; \ + } \ + } while (0) +#define BFS_MAP_ENCODING2(name1, name2, value) \ + do { \ + BFS_MAP_ENCODING(name1, value); \ + BFS_MAP_ENCODING(name2, value); \ + } while (0) + + // These names were found with locale -m on Linux and FreeBSD +#define BFS_MAP_ISO_8859(n) \ + BFS_MAP_ENCODING2("ISO-8859-" #n, "ISO8859-" #n, ONIG_ENCODING_ISO_8859_ ## n) + + BFS_MAP_ISO_8859(1); + BFS_MAP_ISO_8859(2); + BFS_MAP_ISO_8859(3); + BFS_MAP_ISO_8859(4); + BFS_MAP_ISO_8859(5); + BFS_MAP_ISO_8859(6); + BFS_MAP_ISO_8859(7); + BFS_MAP_ISO_8859(8); + BFS_MAP_ISO_8859(9); + BFS_MAP_ISO_8859(10); + BFS_MAP_ISO_8859(11); + // BFS_MAP_ISO_8859(12); + BFS_MAP_ISO_8859(13); + BFS_MAP_ISO_8859(14); + BFS_MAP_ISO_8859(15); + BFS_MAP_ISO_8859(16); + + BFS_MAP_ENCODING("UTF-8", ONIG_ENCODING_UTF8); + +#define BFS_MAP_EUC(name) \ + BFS_MAP_ENCODING2("EUC-" #name, "euc" #name, ONIG_ENCODING_EUC_ ## name) + + BFS_MAP_EUC(JP); + BFS_MAP_EUC(TW); + BFS_MAP_EUC(KR); + BFS_MAP_EUC(CN); + + BFS_MAP_ENCODING2("SHIFT_JIS", "SJIS", ONIG_ENCODING_SJIS); + + // BFS_MAP_ENCODING("KOI-8", ONIG_ENCODING_KOI8); + BFS_MAP_ENCODING("KOI8-R", ONIG_ENCODING_KOI8_R); + + BFS_MAP_ENCODING("CP1251", ONIG_ENCODING_CP1251); + + BFS_MAP_ENCODING("GB18030", ONIG_ENCODING_BIG5); + } + + bfs_onig_status = onig_initialize(&bfs_onig_enc, 1); + if (bfs_onig_status != ONIG_NORMAL) { + bfs_onig_enc = NULL; + } +} + +/** Initialize Oniguruma. */ +static int bfs_onig_initialize(OnigEncoding *enc) { + static pthread_once_t once = PTHREAD_ONCE_INIT; + invoke_once(&once, bfs_onig_once); + + *enc = bfs_onig_enc; + return bfs_onig_status; +} +#endif + +int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags) { + struct bfs_regex *regex = *preg = ALLOC(struct bfs_regex); + if (!regex) { + return -1; + } + +#if BFS_USE_ONIGURUMA + // onig_error_code_to_str() says + // + // don't call this after the pattern argument of onig_new() is freed + // + // so make a defensive copy. + regex->pattern = (unsigned char *)strdup(pattern); + if (!regex->pattern) { + goto fail; + } + + regex->impl = NULL; + regex->err = ONIG_NORMAL; + + OnigSyntaxType *syntax = NULL; + switch (type) { + case BFS_REGEX_POSIX_BASIC: + syntax = ONIG_SYNTAX_POSIX_BASIC; + break; + case BFS_REGEX_POSIX_EXTENDED: + syntax = ONIG_SYNTAX_POSIX_EXTENDED; + break; + case BFS_REGEX_EMACS: + syntax = ONIG_SYNTAX_EMACS; + break; + case BFS_REGEX_GREP: + syntax = ONIG_SYNTAX_GREP; + break; + } + bfs_assert(syntax, "Invalid regex type"); + + OnigOptionType options = syntax->options; + if (flags & BFS_REGEX_ICASE) { + options |= ONIG_OPTION_IGNORECASE; + } + + OnigEncoding enc; + regex->err = bfs_onig_initialize(&enc); + if (regex->err != ONIG_NORMAL) { + return -1; + } + + const unsigned char *end = regex->pattern + strlen(pattern); + regex->err = onig_new(®ex->impl, regex->pattern, end, options, enc, syntax, ®ex->einfo); + if (regex->err != ONIG_NORMAL) { + return -1; + } +#else + int cflags = 0; + switch (type) { + case BFS_REGEX_POSIX_BASIC: + break; + case BFS_REGEX_POSIX_EXTENDED: + cflags |= REG_EXTENDED; + break; + default: + errno = EINVAL; + goto fail; + } + + if (flags & BFS_REGEX_ICASE) { + cflags |= REG_ICASE; + } + + regex->err = regcomp(®ex->impl, pattern, cflags); + if (regex->err != 0) { + // https://github.com/google/sanitizers/issues/1496 + sanitize_init(®ex->impl); + return -1; + } +#endif + + return 0; + +fail: + free(regex); + *preg = NULL; + return -1; +} + +int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) { + size_t len = strlen(str); + +#if BFS_USE_ONIGURUMA + const unsigned char *ustr = (const unsigned char *)str; + const unsigned char *end = ustr + len; + + // The docs for onig_{match,search}() say + // + // Do not pass invalid byte string in the regex character encoding. + if (!onigenc_is_valid_mbc_string(onig_get_encoding(regex->impl), ustr, end)) { + return 0; + } + + int ret; + if (flags & BFS_REGEX_ANCHOR) { + ret = onig_match(regex->impl, ustr, end, ustr, NULL, ONIG_OPTION_DEFAULT); + } else { + ret = onig_search(regex->impl, ustr, end, ustr, end, NULL, ONIG_OPTION_DEFAULT); + } + + if (ret >= 0) { + if (flags & BFS_REGEX_ANCHOR) { + return (size_t)ret == len; + } else { + return 1; + } + } else if (ret == ONIG_MISMATCH) { + return 0; + } else { + regex->err = ret; + return -1; + } +#else + regmatch_t match = { + .rm_so = 0, + .rm_eo = len, + }; + + int eflags = 0; +#ifdef REG_STARTEND + eflags |= REG_STARTEND; +#endif + + int ret = regexec(®ex->impl, str, 1, &match, eflags); + if (ret == 0) { + if (flags & BFS_REGEX_ANCHOR) { + return match.rm_so == 0 && (size_t)match.rm_eo == len; + } else { + return 1; + } + } else if (ret == REG_NOMATCH) { + return 0; + } else { + regex->err = ret; + return -1; + } +#endif +} + +void bfs_regfree(struct bfs_regex *regex) { + if (regex) { +#if BFS_USE_ONIGURUMA + onig_free(regex->impl); + free(regex->pattern); +#else + regfree(®ex->impl); +#endif + free(regex); + } +} + +char *bfs_regerror(const struct bfs_regex *regex) { + if (!regex) { + return strdup(xstrerror(ENOMEM)); + } + +#if BFS_USE_ONIGURUMA + unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN); + if (str) { + onig_error_code_to_str(str, regex->err, ®ex->einfo); + } + return (char *)str; +#else + size_t len = regerror(regex->err, ®ex->impl, NULL, 0); + char *str = malloc(len); + if (str) { + regerror(regex->err, ®ex->impl, str, len); + } + return str; +#endif +} diff --git a/src/xregex.h b/src/xregex.h new file mode 100644 index 0000000..998a2b0 --- /dev/null +++ b/src/xregex.h @@ -0,0 +1,83 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> and the bfs contributors +// SPDX-License-Identifier: 0BSD + +#ifndef BFS_XREGEX_H +#define BFS_XREGEX_H + +/** + * A compiled regular expression. + */ +struct bfs_regex; + +/** + * Regex syntax flavors. + */ +enum bfs_regex_type { + BFS_REGEX_POSIX_BASIC, + BFS_REGEX_POSIX_EXTENDED, + BFS_REGEX_EMACS, + BFS_REGEX_GREP, +}; + +/** + * Regex compilation flags. + */ +enum bfs_regcomp_flags { + /** Treat the regex case-insensitively. */ + BFS_REGEX_ICASE = 1 << 0, +}; + +/** + * Regex execution flags. + */ +enum bfs_regexec_flags { + /** Only treat matches of the entire string as successful. */ + BFS_REGEX_ANCHOR = 1 << 0, +}; + +/** + * Wrapper for regcomp() that supports additional regex types. + * + * @param[out] preg + * Will hold the compiled regex. + * @param pattern + * The regular expression to compile. + * @param type + * The regular expression syntax to use. + * @param flags + * Regex compilation flags. + * @return + * 0 on success, -1 on failure. + */ +int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags); + +/** + * Wrapper for regexec(). + * + * @param regex + * The regular expression to execute. + * @param str + * The string to match against. + * @param flags + * Regex execution flags. + * @return + * 1 for a match, 0 for no match, -1 on failure. + */ +int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags); + +/** + * Free a compiled regex. + */ +void bfs_regfree(struct bfs_regex *regex); + +/** + * Get a human-readable regex error message. + * + * @param regex + * The compiled regex. + * @return + * A human-readable description of the error, which should be free()'d. + */ +char *bfs_regerror(const struct bfs_regex *regex); + +#endif // BFS_XREGEX_H diff --git a/src/xspawn.c b/src/xspawn.c new file mode 100644 index 0000000..33e5a4a --- /dev/null +++ b/src/xspawn.c @@ -0,0 +1,690 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "xspawn.h" +#include "alloc.h" +#include "bfstd.h" +#include "diag.h" +#include "list.h" +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <unistd.h> + +#if BFS_USE_PATHS_H +# include <paths.h> +#endif + +#if _POSIX_SPAWN > 0 +# include <spawn.h> +#endif + +/** + * Types of spawn actions. + */ +enum bfs_spawn_op { + BFS_SPAWN_OPEN, + BFS_SPAWN_CLOSE, + BFS_SPAWN_DUP2, + BFS_SPAWN_FCHDIR, + BFS_SPAWN_SETRLIMIT, +}; + +/** + * A spawn action. + */ +struct bfs_spawn_action { + /** The next action in the list. */ + struct bfs_spawn_action *next; + + /** This action's operation. */ + enum bfs_spawn_op op; + /** The input fd (or -1). */ + int in_fd; + /** The output fd (or -1). */ + int out_fd; + + /** Operation-specific args. */ + union { + /** BFS_SPAWN_OPEN args. */ + struct { + const char *path; + int flags; + mode_t mode; + }; + + /** BFS_SPAWN_SETRLIMIT args. */ + struct { + int resource; + struct rlimit rlimit; + }; + }; +}; + +int bfs_spawn_init(struct bfs_spawn *ctx) { + ctx->flags = 0; + SLIST_INIT(ctx); + +#if _POSIX_SPAWN > 0 + ctx->flags |= BFS_SPAWN_USE_POSIX; + + errno = posix_spawn_file_actions_init(&ctx->actions); + if (errno != 0) { + return -1; + } + + errno = posix_spawnattr_init(&ctx->attr); + if (errno != 0) { + posix_spawn_file_actions_destroy(&ctx->actions); + return -1; + } +#endif + + return 0; +} + +int bfs_spawn_destroy(struct bfs_spawn *ctx) { +#if _POSIX_SPAWN > 0 + posix_spawnattr_destroy(&ctx->attr); + posix_spawn_file_actions_destroy(&ctx->actions); +#endif + + for_slist (struct bfs_spawn_action, action, ctx) { + free(action); + } + + return 0; +} + +#if _POSIX_SPAWN > 0 +/** Set some posix_spawnattr flags. */ +attr(maybe_unused) +static int bfs_spawn_addflags(struct bfs_spawn *ctx, short flags) { + short prev; + errno = posix_spawnattr_getflags(&ctx->attr, &prev); + if (errno != 0) { + return -1; + } + + short next = prev | flags; + if (next != prev) { + errno = posix_spawnattr_setflags(&ctx->attr, next); + if (errno != 0) { + return -1; + } + } + + return 0; +} +#endif // _POSIX_SPAWN > 0 + +/** Allocate a spawn action. */ +static struct bfs_spawn_action *bfs_spawn_action(enum bfs_spawn_op op) { + struct bfs_spawn_action *action = ALLOC(struct bfs_spawn_action); + if (!action) { + return NULL; + } + + SLIST_ITEM_INIT(action); + action->op = op; + action->in_fd = -1; + action->out_fd = -1; + return action; +} + +int bfs_spawn_addopen(struct bfs_spawn *ctx, int fd, const char *path, int flags, mode_t mode) { + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_OPEN); + if (!action) { + return -1; + } + +#if _POSIX_SPAWN > 0 + if (ctx->flags & BFS_SPAWN_USE_POSIX) { + errno = posix_spawn_file_actions_addopen(&ctx->actions, fd, path, flags, mode); + if (errno != 0) { + free(action); + return -1; + } + } +#endif + + action->out_fd = fd; + action->path = path; + action->flags = flags; + action->mode = mode; + SLIST_APPEND(ctx, action); + return 0; +} + +int bfs_spawn_addclose(struct bfs_spawn *ctx, int fd) { + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_CLOSE); + if (!action) { + return -1; + } + +#if _POSIX_SPAWN > 0 + if (ctx->flags & BFS_SPAWN_USE_POSIX) { + errno = posix_spawn_file_actions_addclose(&ctx->actions, fd); + if (errno != 0) { + free(action); + return -1; + } + } +#endif + + action->out_fd = fd; + SLIST_APPEND(ctx, action); + return 0; +} + +int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd) { + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_DUP2); + if (!action) { + return -1; + } + +#if _POSIX_SPAWN > 0 + if (ctx->flags & BFS_SPAWN_USE_POSIX) { + errno = posix_spawn_file_actions_adddup2(&ctx->actions, oldfd, newfd); + if (errno != 0) { + free(action); + return -1; + } + } +#endif + + action->in_fd = oldfd; + action->out_fd = newfd; + SLIST_APPEND(ctx, action); + return 0; +} + +/** + * https://www.austingroupbugs.net/view.php?id=1208#c4830 says: + * + * ... a search of the directories passed as the environment variable + * PATH ..., using the working directory of the child process after all + * file_actions have been performed. + * + * but macOS and NetBSD resolve the PATH *before* file_actions (because there + * posix_spawn() is its own syscall). + */ +#define BFS_POSIX_SPAWNP_AFTER_FCHDIR !(__APPLE__ || __NetBSD__) + +int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd) { + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_FCHDIR); + if (!action) { + return -1; + } + +#if BFS_HAS_POSIX_SPAWN_ADDFCHDIR +# define BFS_POSIX_SPAWN_ADDFCHDIR posix_spawn_file_actions_addfchdir +#elif BFS_HAS_POSIX_SPAWN_ADDFCHDIR_NP +# define BFS_POSIX_SPAWN_ADDFCHDIR posix_spawn_file_actions_addfchdir_np +#endif + +#if _POSIX_SPAWN > 0 && defined(BFS_POSIX_SPAWN_FCHDIR) + if (ctx->flags & BFS_SPAWN_USE_POSIX) { + errno = BFS_POSIX_SPAWN_ADDFCHDIR(&ctx->actions, fd); + if (errno != 0) { + free(action); + return -1; + } + } +#else + ctx->flags &= ~BFS_SPAWN_USE_POSIX; +#endif + + action->in_fd = fd; + SLIST_APPEND(ctx, action); + return 0; +} + +int bfs_spawn_setrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl) { + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_SETRLIMIT); + if (!action) { + goto fail; + } + +#ifdef POSIX_SPAWN_SETRLIMIT + if (bfs_spawn_addflags(ctx, POSIX_SPAWN_SETRLIMIT) != 0) { + goto fail; + } + + errno = posix_spawnattr_setrlimit(&ctx->attr, resource, rl); + if (errno != 0) { + goto fail; + } +#else + ctx->flags &= ~BFS_SPAWN_USE_POSIX; +#endif + + action->resource = resource; + action->rlimit = *rl; + SLIST_APPEND(ctx, action); + return 0; + +fail: + free(action); + return -1; +} + +/** + * Context for resolving executables in the $PATH. + */ +struct bfs_resolver { + /** The executable to spawn. */ + const char *exe; + /** The $PATH to resolve in. */ + char *path; + /** A buffer to hold the resolved path. */ + char *buf; + /** The size of the buffer. */ + size_t len; + /** Whether the executable is already resolved. */ + bool done; + /** Whether to free(path). */ + bool free; +}; + +/** Free a $PATH resolution context. */ +static void bfs_resolve_free(struct bfs_resolver *res) { + if (res->free) { + free(res->path); + } + free(res->buf); +} + +/** Get the next component in the $PATH. */ +static bool bfs_resolve_next(const char **path, const char **next, size_t *len) { + *path = *next; + if (!*path) { + return false; + } + + *next = strchr(*path, ':'); + if (*next) { + *len = *next - *path; + ++*next; + } else { + *len = strlen(*path); + } + + if (*len == 0) { + // POSIX 8.3: "A zero-length prefix is a legacy feature that + // indicates the current working directory." + *path = "."; + *len = 1; + } + + return true; +} + +/** Finish resolving an executable, potentially from the child process. */ +static int bfs_resolve_late(struct bfs_resolver *res) { + if (res->done) { + return 0; + } + + char *buf = res->buf; + char *end = buf + res->len; + + const char *path; + const char *next = res->path; + size_t len; + while (bfs_resolve_next(&path, &next, &len)) { + char *cur = xstpencpy(buf, end, path, len); + cur = xstpecpy(cur, end, "/"); + cur = xstpecpy(cur, end, res->exe); + if (cur == end) { + bfs_bug("PATH resolution buffer too small"); + errno = ENOMEM; + return -1; + } + + if (xfaccessat(AT_FDCWD, buf, X_OK) == 0) { + res->exe = buf; + res->done = true; + return 0; + } + } + + errno = ENOENT; + return -1; +} + +/** Check if we can skip path resolution entirely. */ +static bool bfs_can_skip_resolve(const struct bfs_resolver *res, const struct bfs_spawn *ctx) { + if (ctx && !(ctx->flags & BFS_SPAWN_USE_PATH)) { + return true; + } + + if (strchr(res->exe, '/')) { + return true; + } + + return false; +} + +/** Check if any $PATH components are relative. */ +static bool bfs_resolve_relative(const struct bfs_resolver *res) { + const char *path; + const char *next = res->path; + size_t len; + while (bfs_resolve_next(&path, &next, &len)) { + if (path[0] != '/') { + return true; + } + } + + return false; +} + +/** Check if we can resolve the executable before file actions. */ +static bool bfs_can_resolve_early(const struct bfs_resolver *res, const struct bfs_spawn *ctx) { + if (!bfs_resolve_relative(res)) { + return true; + } + + if (ctx) { + for_slist (const struct bfs_spawn_action, action, ctx) { + if (action->op == BFS_SPAWN_FCHDIR) { + return false; + } + } + } + + return true; +} + +/** Get the required path resolution buffer size. */ +static size_t bfs_resolve_capacity(const struct bfs_resolver *res) { + size_t max = 0; + + const char *path; + const char *next = res->path; + size_t len; + while (bfs_resolve_next(&path, &next, &len)) { + if (len > max) { + max = len; + } + } + + // path + "/" + exe + '\0' + return max + 1 + strlen(res->exe) + 1; +} + +/** Begin resolving an executable, from the parent process. */ +static int bfs_resolve_early(struct bfs_resolver *res, const char *exe, const struct bfs_spawn *ctx) { + *res = (struct bfs_resolver) { + .exe = exe, + }; + + if (bfs_can_skip_resolve(res, ctx)) { + res->done = true; + return 0; + } + + res->path = getenv("PATH"); + if (!res->path) { +#if defined(_CS_PATH) + res->path = xconfstr(_CS_PATH); + res->free = true; +#elif defined(_PATH_DEFPATH) + res->path = _PATH_DEFPATH; +#else + errno = ENOENT; +#endif + } + if (!res->path) { + goto fail; + } + + bool can_finish = bfs_can_resolve_early(res, ctx); + +#if BFS_POSIX_SPAWNP_AFTER_FCHDIR + bool use_posix = ctx && (ctx->flags & BFS_SPAWN_USE_POSIX); + if (!can_finish && use_posix) { + // posix_spawnp() will do the resolution, so don't bother + // allocating a buffer + return 0; + } +#endif + + res->len = bfs_resolve_capacity(res); + res->buf = malloc(res->len); + if (!res->buf) { + goto fail; + } + + if (can_finish && bfs_resolve_late(res) != 0) { + goto fail; + } + + return 0; + +fail: + bfs_resolve_free(res); + return -1; +} + +#if _POSIX_SPAWN > 0 + +/** bfs_spawn() implementation using posix_spawn(). */ +static pid_t bfs_posix_spawn(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) { + pid_t ret; + + if (res->done) { + errno = posix_spawn(&ret, res->exe, &ctx->actions, &ctx->attr, argv, envp); + } else { + errno = posix_spawnp(&ret, res->exe, &ctx->actions, &ctx->attr, argv, envp); + } + + if (errno != 0) { + return -1; + } + + return ret; +} + +/** Check if we can use posix_spawn(). */ +static bool bfs_use_posix_spawn(const struct bfs_resolver *res, const struct bfs_spawn *ctx) { + if (!(ctx->flags & BFS_SPAWN_USE_POSIX)) { + return false; + } + +#if !BFS_POSIX_SPAWNP_AFTER_FCHDIR + if (!res->done) { + return false; + } +#endif + + return true; +} + +#endif // _POSIX_SPAWN > 0 + +/** Actually exec() the new process. */ +static noreturn void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp, int pipefd[2]) { + xclose(pipefd[0]); + + for_slist (const struct bfs_spawn_action, action, ctx) { + int fd; + + // Move the error-reporting pipe out of the way if necessary... + if (action->out_fd == pipefd[1]) { + fd = dup_cloexec(pipefd[1]); + if (fd < 0) { + goto fail; + } + xclose(pipefd[1]); + pipefd[1] = fd; + } + + // ... and pretend the pipe doesn't exist + if (action->in_fd == pipefd[1]) { + errno = EBADF; + goto fail; + } + + switch (action->op) { + case BFS_SPAWN_OPEN: + fd = open(action->path, action->flags, action->mode); + if (fd < 0) { + goto fail; + } + if (fd != action->out_fd) { + if (dup2(fd, action->out_fd) < 0) { + goto fail; + } + } + break; + case BFS_SPAWN_CLOSE: + if (close(action->out_fd) != 0) { + goto fail; + } + break; + case BFS_SPAWN_DUP2: + if (dup2(action->in_fd, action->out_fd) < 0) { + goto fail; + } + break; + case BFS_SPAWN_FCHDIR: + if (fchdir(action->in_fd) != 0) { + goto fail; + } + break; + case BFS_SPAWN_SETRLIMIT: + if (setrlimit(action->resource, &action->rlimit) != 0) { + goto fail; + } + break; + } + } + + if (bfs_resolve_late(res) != 0) { + goto fail; + } + + execve(res->exe, argv, envp); + +fail:; + int error = errno; + + // In case of a write error, the parent will still see that we exited + // unsuccessfully, but won't know why + (void)xwrite(pipefd[1], &error, sizeof(error)); + + xclose(pipefd[1]); + _Exit(127); +} + +/** bfs_spawn() implementation using fork()/exec(). */ +static pid_t bfs_fork_spawn(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) { + // Use a pipe to report errors from the child + int pipefd[2]; + if (pipe_cloexec(pipefd) != 0) { + return -1; + } + + // Block signals before fork() so handlers don't run in the child + sigset_t new_mask; + if (sigfillset(&new_mask) != 0) { + goto fail; + } + sigset_t old_mask; + errno = pthread_sigmask(SIG_BLOCK, &new_mask, &old_mask); + if (errno != 0) { + goto fail; + } + + pid_t pid = fork(); + if (pid == 0) { + // Child + bfs_spawn_exec(res, ctx, argv, envp, pipefd); + } + + // Restore the original signal mask + int ret = pthread_sigmask(SIG_SETMASK, &old_mask, NULL); + bfs_everify(ret == 0, "pthread_sigmask()"); + + if (pid < 0) { + // fork() failed + goto fail; + } + + xclose(pipefd[1]); + + int error; + ssize_t nbytes = xread(pipefd[0], &error, sizeof(error)); + xclose(pipefd[0]); + if (nbytes == sizeof(error)) { + xwaitpid(pid, NULL, 0); + errno = error; + return -1; + } + + return pid; + +fail: + close_quietly(pipefd[1]); + close_quietly(pipefd[0]); + return -1; +} + +/** Call the right bfs_spawn() implementation. */ +static pid_t bfs_spawn_impl(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) { +#if _POSIX_SPAWN > 0 + if (bfs_use_posix_spawn(res, ctx)) { + return bfs_posix_spawn(res, ctx, argv, envp); + } +#endif + + return bfs_fork_spawn(res, ctx, argv, envp); +} + +pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp) { + // execvp()/posix_spawnp() are typically implemented with repeated + // execv() calls for each $PATH component until one succeeds. It's + // faster to resolve the full path ahead of time. + struct bfs_resolver res; + if (bfs_resolve_early(&res, exe, ctx) != 0) { + return -1; + } + + extern char **environ; + if (!envp) { + envp = environ; + } + + pid_t ret = bfs_spawn_impl(&res, ctx, argv, envp); + bfs_resolve_free(&res); + return ret; +} + +char *bfs_spawn_resolve(const char *exe) { + struct bfs_resolver res; + if (bfs_resolve_early(&res, exe, NULL) != 0) { + return NULL; + } + if (bfs_resolve_late(&res) != 0) { + bfs_resolve_free(&res); + return NULL; + } + + char *ret; + if (res.exe == res.buf) { + ret = res.buf; + res.buf = NULL; + } else { + ret = strdup(res.exe); + } + + bfs_resolve_free(&res); + return ret; +} diff --git a/src/xspawn.h b/src/xspawn.h new file mode 100644 index 0000000..6a8f54a --- /dev/null +++ b/src/xspawn.h @@ -0,0 +1,134 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * A process-spawning library inspired by posix_spawn(). + */ + +#ifndef BFS_XSPAWN_H +#define BFS_XSPAWN_H + +#include "prelude.h" +#include <sys/resource.h> +#include <sys/types.h> +#include <unistd.h> + +#if _POSIX_SPAWN > 0 +# include <spawn.h> +#endif + +/** + * bfs_spawn() flags. + */ +enum bfs_spawn_flags { + /** Use the PATH variable to resolve the executable (like execvp()). */ + BFS_SPAWN_USE_PATH = 1 << 0, + /** Whether posix_spawn() can be used. */ + BFS_SPAWN_USE_POSIX = 1 << 1, +}; + +/** + * bfs_spawn() attributes, controlling the context of the new process. + */ +struct bfs_spawn { + /** Spawn flags. */ + enum bfs_spawn_flags flags; + + /** Linked list of actions. */ + struct bfs_spawn_action *head; + struct bfs_spawn_action **tail; + +#if _POSIX_SPAWN > 0 + /** posix_spawn() context, for when we can use it. */ + posix_spawn_file_actions_t actions; + posix_spawnattr_t attr; +#endif +}; + +/** + * Create a new bfs_spawn() context. + * + * @return + * 0 on success, -1 on failure. + */ +int bfs_spawn_init(struct bfs_spawn *ctx); + +/** + * Destroy a bfs_spawn() context. + * + * @return + * 0 on success, -1 on failure. + */ +int bfs_spawn_destroy(struct bfs_spawn *ctx); + +/** + * Add an open() action to a bfs_spawn() context. + * + * @return + * 0 on success, -1 on failure. + */ +int bfs_spawn_addopen(struct bfs_spawn *ctx, int fd, const char *path, int flags, mode_t mode); + +/** + * Add a close() action to a bfs_spawn() context. + * + * @return + * 0 on success, -1 on failure. + */ +int bfs_spawn_addclose(struct bfs_spawn *ctx, int fd); + +/** + * Add a dup2() action to a bfs_spawn() context. + * + * @return + * 0 on success, -1 on failure. + */ +int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd); + +/** + * Add an fchdir() action to a bfs_spawn() context. + * + * @return + * 0 on success, -1 on failure. + */ +int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd); + +/** + * Apply setrlimit() to a bfs_spawn() context. + * + * @return + * 0 on success, -1 on failure. + */ +int bfs_spawn_setrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl); + +/** + * Spawn a new process. + * + * @param exe + * The executable to run. + * @param ctx + * The context for the new process. + * @param argv + * The arguments for the new process. + * @param envp + * The environment variables for the new process (NULL for the current + * environment). + * @return + * The PID of the new process, or -1 on error. + */ +pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp); + +/** + * Look up an executable in the current PATH, as BFS_SPAWN_USE_PATH or execvp() + * would do. + * + * @param exe + * The name of the binary to execute. Bare names without a '/' will be + * searched on the provided PATH. + * @return + * The full path to the executable, which should be free()'d, or NULL on + * failure. + */ +char *bfs_spawn_resolve(const char *exe); + +#endif // BFS_XSPAWN_H diff --git a/src/xtime.c b/src/xtime.c new file mode 100644 index 0000000..2808455 --- /dev/null +++ b/src/xtime.c @@ -0,0 +1,348 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "prelude.h" +#include "xtime.h" +#include "bfstd.h" +#include "diag.h" +#include "sanity.h" +#include <errno.h> +#include <limits.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +int xmktime(struct tm *tm, time_t *timep) { + time_t time = mktime(tm); + + if (time == -1) { + int error = errno; + + struct tm tmp; + if (!localtime_r(&time, &tmp)) { + bfs_ebug("localtime_r(-1)"); + return -1; + } + + if (tm->tm_year != tmp.tm_year || tm->tm_yday != tmp.tm_yday + || tm->tm_hour != tmp.tm_hour || tm->tm_min != tmp.tm_min || tm->tm_sec != tmp.tm_sec) { + errno = error; + return -1; + } + } + + *timep = time; + return 0; +} + +// FreeBSD is missing an interceptor +#if BFS_HAS_TIMEGM && !(__FreeBSD__ && SANITIZE_MEMORY) + +int xtimegm(struct tm *tm, time_t *timep) { + time_t time = timegm(tm); + + if (time == -1) { + int error = errno; + + struct tm tmp; + if (!gmtime_r(&time, &tmp)) { + bfs_ebug("gmtime_r(-1)"); + return -1; + } + + if (tm->tm_year != tmp.tm_year || tm->tm_yday != tmp.tm_yday + || tm->tm_hour != tmp.tm_hour || tm->tm_min != tmp.tm_min || tm->tm_sec != tmp.tm_sec) { + errno = error; + return -1; + } + } + + *timep = time; + return 0; +} + +#else + +static int safe_add(int *value, int delta) { + if (*value >= 0) { + if (delta > INT_MAX - *value) { + return -1; + } + } else { + if (delta < INT_MIN - *value) { + return -1; + } + } + + *value += delta; + return 0; +} + +static int floor_div(int n, int d) { + int a = n < 0; + return (n + a) / d - a; +} + +static int wrap(int *value, int max, int *next) { + int carry = floor_div(*value, max); + *value -= carry * max; + return safe_add(next, carry); +} + +static int month_length(int year, int month) { + static const int month_lengths[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + int ret = month_lengths[month]; + if (month == 1 && year % 4 == 0 && (year % 100 != 0 || (year + 300) % 400 == 0)) { + ++ret; + } + return ret; +} + +int xtimegm(struct tm *tm, time_t *timep) { + struct tm copy = *tm; + copy.tm_isdst = 0; + + if (wrap(©.tm_sec, 60, ©.tm_min) != 0) { + goto overflow; + } + if (wrap(©.tm_min, 60, ©.tm_hour) != 0) { + goto overflow; + } + if (wrap(©.tm_hour, 24, ©.tm_mday) != 0) { + goto overflow; + } + + // In order to wrap the days of the month, we first need to know what + // month it is + if (wrap(©.tm_mon, 12, ©.tm_year) != 0) { + goto overflow; + } + + if (copy.tm_mday < 1) { + do { + --copy.tm_mon; + if (wrap(©.tm_mon, 12, ©.tm_year) != 0) { + goto overflow; + } + + copy.tm_mday += month_length(copy.tm_year, copy.tm_mon); + } while (copy.tm_mday < 1); + } else { + while (true) { + int days = month_length(copy.tm_year, copy.tm_mon); + if (copy.tm_mday <= days) { + break; + } + + copy.tm_mday -= days; + ++copy.tm_mon; + if (wrap(©.tm_mon, 12, ©.tm_year) != 0) { + goto overflow; + } + } + } + + copy.tm_yday = 0; + for (int i = 0; i < copy.tm_mon; ++i) { + copy.tm_yday += month_length(copy.tm_year, i); + } + copy.tm_yday += copy.tm_mday - 1; + + int leap_days; + // Compute floor((year - 69)/4) - floor((year - 1)/100) + floor((year + 299)/400) without overflows + if (copy.tm_year >= 0) { + leap_days = floor_div(copy.tm_year - 69, 4) - floor_div(copy.tm_year - 1, 100) + floor_div(copy.tm_year - 101, 400) + 1; + } else { + leap_days = floor_div(copy.tm_year + 3, 4) - floor_div(copy.tm_year + 99, 100) + floor_div(copy.tm_year + 299, 400) - 17; + } + + long long epoch_days = 365LL * (copy.tm_year - 70) + leap_days + copy.tm_yday; + copy.tm_wday = (epoch_days + 4) % 7; + if (copy.tm_wday < 0) { + copy.tm_wday += 7; + } + + long long epoch_time = copy.tm_sec + 60 * (copy.tm_min + 60 * (copy.tm_hour + 24 * epoch_days)); + time_t time = (time_t)epoch_time; + if ((long long)time != epoch_time) { + goto overflow; + } + + *tm = copy; + *timep = time; + return 0; + +overflow: + errno = EOVERFLOW; + return -1; +} + +#endif // !BFS_HAS_TIMEGM + +/** Parse a decimal digit. */ +static int xgetdigit(char c) { + int ret = c - '0'; + if (ret < 0 || ret > 9) { + return -1; + } else { + return ret; + } +} + +/** Parse some digits from a timestamp. */ +static int xgetpart(const char **str, size_t n, int *result) { + *result = 0; + + for (size_t i = 0; i < n; ++i, ++*str) { + int dig = xgetdigit(**str); + if (dig < 0) { + return -1; + } + *result *= 10; + *result += dig; + } + + return 0; +} + +int xgetdate(const char *str, struct timespec *result) { + struct tm tm = { + .tm_isdst = -1, + }; + + int tz_hour = 0; + int tz_min = 0; + bool tz_negative = false; + bool local = true; + + // YYYY + if (xgetpart(&str, 4, &tm.tm_year) != 0) { + goto invalid; + } + tm.tm_year -= 1900; + + // MM + if (*str == '-') { + ++str; + } + if (xgetpart(&str, 2, &tm.tm_mon) != 0) { + goto invalid; + } + tm.tm_mon -= 1; + + // DD + if (*str == '-') { + ++str; + } + if (xgetpart(&str, 2, &tm.tm_mday) != 0) { + goto invalid; + } + + if (!*str) { + goto end; + } else if (*str == 'T' || *str == ' ') { + ++str; + } + + // hh + if (xgetpart(&str, 2, &tm.tm_hour) != 0) { + goto invalid; + } + + // mm + if (!*str) { + goto end; + } else if (*str == ':') { + ++str; + } else if (xgetdigit(*str) < 0) { + goto zone; + } + if (xgetpart(&str, 2, &tm.tm_min) != 0) { + goto invalid; + } + + // ss + if (!*str) { + goto end; + } else if (*str == ':') { + ++str; + } else if (xgetdigit(*str) < 0) { + goto zone; + } + if (xgetpart(&str, 2, &tm.tm_sec) != 0) { + goto invalid; + } + +zone: + if (!*str) { + goto end; + } else if (*str == 'Z') { + local = false; + ++str; + } else if (*str == '+' || *str == '-') { + local = false; + tz_negative = *str == '-'; + ++str; + + // hh + if (xgetpart(&str, 2, &tz_hour) != 0) { + goto invalid; + } + + // mm + if (!*str) { + goto end; + } else if (*str == ':') { + ++str; + } + if (xgetpart(&str, 2, &tz_min) != 0) { + goto invalid; + } + } else { + goto invalid; + } + + if (*str) { + goto invalid; + } + +end: + if (local) { + if (xmktime(&tm, &result->tv_sec) != 0) { + goto error; + } + } else { + if (xtimegm(&tm, &result->tv_sec) != 0) { + goto error; + } + + int offset = (tz_hour * 60 + tz_min) * 60; + if (tz_negative) { + result->tv_sec += offset; + } else { + result->tv_sec -= offset; + } + } + + result->tv_nsec = 0; + return 0; + +invalid: + errno = EINVAL; +error: + return -1; +} + +int xgettime(struct timespec *result) { +#if _POSIX_TIMERS > 0 + return clock_gettime(CLOCK_REALTIME, result); +#else + struct timeval tv; + int ret = gettimeofday(&tv, NULL); + if (ret == 0) { + result->tv_sec = tv.tv_sec; + result->tv_nsec = tv.tv_usec * 1000L; + } + return ret; +#endif +} diff --git a/src/xtime.h b/src/xtime.h new file mode 100644 index 0000000..fb60ae4 --- /dev/null +++ b/src/xtime.h @@ -0,0 +1,59 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Date/time handling. + */ + +#ifndef BFS_XTIME_H +#define BFS_XTIME_H + +#include <time.h> + +/** + * mktime() wrapper that reports errors more reliably. + * + * @param[in,out] tm + * The struct tm to convert. + * @param[out] timep + * A pointer to the result. + * @return + * 0 on success, -1 on failure. + */ +int xmktime(struct tm *tm, time_t *timep); + +/** + * A portable timegm(), the inverse of gmtime(). + * + * @param[in,out] tm + * The struct tm to convert. + * @param[out] timep + * A pointer to the result. + * @return + * 0 on success, -1 on failure. + */ +int xtimegm(struct tm *tm, time_t *timep); + +/** + * Parse an ISO 8601-style timestamp. + * + * @param[in] str + * The string to parse. + * @param[out] result + * A pointer to the result. + * @return + * 0 on success, -1 on failure. + */ +int xgetdate(const char *str, struct timespec *result); + +/** + * Get the current time. + * + * @param[out] result + * A pointer to the result. + * @return + * 0 on success, -1 on failure. + */ +int xgettime(struct timespec *result); + +#endif // BFS_XTIME_H |