diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/alloc.c | 158 | ||||
-rw-r--r-- | src/alloc.h | 260 | ||||
-rw-r--r-- | src/atomic.h | 37 | ||||
-rw-r--r-- | src/bar.c | 203 | ||||
-rw-r--r-- | src/bar.h | 4 | ||||
-rw-r--r-- | src/bfs.h | 232 | ||||
-rw-r--r-- | src/bfstd.c | 670 | ||||
-rw-r--r-- | src/bfstd.h | 345 | ||||
-rw-r--r-- | src/bftw.c | 1198 | ||||
-rw-r--r-- | src/bftw.h | 51 | ||||
-rw-r--r-- | src/bit.h | 314 | ||||
-rw-r--r-- | src/color.c | 786 | ||||
-rw-r--r-- | src/color.h | 45 | ||||
-rw-r--r-- | src/config.h | 213 | ||||
-rw-r--r-- | src/ctx.c | 174 | ||||
-rw-r--r-- | src/ctx.h | 82 | ||||
-rw-r--r-- | src/darray.c | 90 | ||||
-rw-r--r-- | src/darray.h | 97 | ||||
-rw-r--r-- | src/diag.c | 86 | ||||
-rw-r--r-- | src/diag.h | 168 | ||||
-rw-r--r-- | src/dir.c | 125 | ||||
-rw-r--r-- | src/dir.h | 46 | ||||
-rw-r--r-- | src/dstring.c | 187 | ||||
-rw-r--r-- | src/dstring.h | 220 | ||||
-rw-r--r-- | src/eval.c | 632 | ||||
-rw-r--r-- | src/eval.h | 12 | ||||
-rw-r--r-- | src/exec.c | 81 | ||||
-rw-r--r-- | src/exec.h | 12 | ||||
-rw-r--r-- | src/expr.c | 89 | ||||
-rw-r--r-- | src/expr.h | 108 | ||||
-rw-r--r-- | src/fsade.c | 225 | ||||
-rw-r--r-- | src/fsade.h | 44 | ||||
-rw-r--r-- | src/ioq.c | 1076 | ||||
-rw-r--r-- | src/ioq.h | 118 | ||||
-rw-r--r-- | src/list.h | 451 | ||||
-rw-r--r-- | src/main.c | 26 | ||||
-rw-r--r-- | src/mtab.c | 101 | ||||
-rw-r--r-- | src/mtab.h | 14 | ||||
-rw-r--r-- | src/opt.c | 2767 | ||||
-rw-r--r-- | src/opt.h | 3 | ||||
-rw-r--r-- | src/parse.c | 2381 | ||||
-rw-r--r-- | src/parse.h | 4 | ||||
-rw-r--r-- | src/prelude.h | 130 | ||||
-rw-r--r-- | src/printf.c | 555 | ||||
-rw-r--r-- | src/printf.h | 12 | ||||
-rw-r--r-- | src/pwcache.c | 14 | ||||
-rw-r--r-- | src/pwcache.h | 24 | ||||
-rw-r--r-- | src/sanity.h | 54 | ||||
-rw-r--r-- | src/sighook.c | 692 | ||||
-rw-r--r-- | src/sighook.h | 83 | ||||
-rw-r--r-- | src/stat.c | 325 | ||||
-rw-r--r-- | src/stat.h | 102 | ||||
-rw-r--r-- | src/thread.c | 94 | ||||
-rw-r--r-- | src/thread.h | 54 | ||||
-rw-r--r-- | src/trie.c | 273 | ||||
-rw-r--r-- | src/trie.h | 97 | ||||
-rw-r--r-- | src/typo.c | 6 | ||||
-rw-r--r-- | src/typo.h | 4 | ||||
-rw-r--r-- | src/version.c | 32 | ||||
-rw-r--r-- | src/xregex.c | 74 | ||||
-rw-r--r-- | src/xregex.h | 20 | ||||
-rw-r--r-- | src/xspawn.c | 687 | ||||
-rw-r--r-- | src/xspawn.h | 65 | ||||
-rw-r--r-- | src/xtime.c | 337 | ||||
-rw-r--r-- | src/xtime.h | 97 |
65 files changed, 12142 insertions, 5624 deletions
diff --git a/src/alloc.c b/src/alloc.c index 0b978ba..f505eda 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -2,33 +2,42 @@ // SPDX-License-Identifier: 0BSD #include "alloc.h" + +#include "bfs.h" #include "bit.h" #include "diag.h" #include "sanity.h" + #include <errno.h> #include <stdlib.h> +#include <stdint.h> #include <string.h> -/** Portable aligned_alloc()/posix_memalign(). */ +/** The largest possible allocation size. */ +#if PTRDIFF_MAX < SIZE_MAX / 2 +# define ALLOC_MAX ((size_t)PTRDIFF_MAX) +#else +# define ALLOC_MAX (SIZE_MAX / 2) +#endif + +/** posix_memalign() wrapper. */ static void *xmemalign(size_t align, size_t size) { bfs_assert(has_single_bit(align)); bfs_assert(align >= sizeof(void *)); - bfs_assert((size & (align - 1)) == 0); -#if __APPLE__ + // Since https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2072.htm, + // aligned_alloc() doesn't require the size to be a multiple of align. + // But the sanitizers don't know about that yet, so always use + // posix_memalign(). void *ptr = NULL; errno = posix_memalign(&ptr, align, size); return ptr; -#else - return aligned_alloc(align, size); -#endif } void *alloc(size_t align, size_t size) { bfs_assert(has_single_bit(align)); - bfs_assert((size & (align - 1)) == 0); - if (size >> (SIZE_WIDTH - 1)) { + if (size > ALLOC_MAX) { errno = EOVERFLOW; return NULL; } @@ -42,9 +51,8 @@ void *alloc(size_t align, size_t size) { void *zalloc(size_t align, size_t size) { bfs_assert(has_single_bit(align)); - bfs_assert((size & (align - 1)) == 0); - if (size >> (SIZE_WIDTH - 1)) { + if (size > ALLOC_MAX) { errno = EOVERFLOW; return NULL; } @@ -60,6 +68,64 @@ void *zalloc(size_t align, size_t size) { return ret; } +void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size) { + bfs_assert(has_single_bit(align)); + + if (new_size == 0) { + free(ptr); + return NULL; + } else if (new_size > ALLOC_MAX) { + errno = EOVERFLOW; + return NULL; + } + + if (align <= alignof(max_align_t)) { + return realloc(ptr, new_size); + } + + // There is no aligned_realloc(), so reallocate and copy manually + void *ret = xmemalign(align, new_size); + if (!ret) { + return NULL; + } + + size_t min_size = old_size < new_size ? old_size : new_size; + if (min_size) { + memcpy(ret, ptr, min_size); + } + + free(ptr); + return ret; +} + +void *reserve(void *ptr, size_t align, size_t size, size_t count) { + // No need to overflow-check the current size + size_t old_size = size * count; + + // Capacity is doubled every power of two, from 0→1, 1→2, 2→4, etc. + // If we stayed within the same size class, reuse ptr. + if (count & (count - 1)) { + // Tell sanitizers about the new array element + sanitize_resize(ptr, old_size, old_size + size, bit_ceil(count) * size); + errno = 0; + return ptr; + } + + // No need to overflow-check; xrealloc() will fail before we overflow + size_t new_size = count ? 2 * old_size : size; + void *ret = xrealloc(ptr, align, old_size, new_size); + if (!ret) { + // errno is used to communicate success/failure to the RESERVE() macro + bfs_assert(errno != 0); + return ptr; + } + + // Pretend we only allocated one more element + sanitize_resize(ret, new_size, old_size + size, new_size); + errno = 0; + return ret; +} + /** * An arena allocator chunk. */ @@ -89,7 +155,7 @@ static void chunk_set_next(const struct arena *arena, union chunk *chunk, union void arena_init(struct arena *arena, size_t align, size_t size) { bfs_assert(has_single_bit(align)); - bfs_assert((size & (align - 1)) == 0); + bfs_assert(is_aligned(align, size)); if (align < alignof(union chunk)) { align = alignof(union chunk); @@ -97,7 +163,7 @@ void arena_init(struct arena *arena, size_t align, size_t size) { if (size < sizeof(union chunk)) { size = sizeof(union chunk); } - bfs_assert((size & (align - 1)) == 0); + bfs_assert(is_aligned(align, size)); arena->chunks = NULL; arena->nslabs = 0; @@ -107,13 +173,8 @@ void arena_init(struct arena *arena, size_t align, size_t size) { } /** Allocate a new slab. */ +_cold static int slab_alloc(struct arena *arena) { - void **slabs = realloc(arena->slabs, sizeof_array(void *, arena->nslabs + 1)); - if (!slabs) { - return -1; - } - arena->slabs = slabs; - // Make the initial allocation size ~4K size_t size = 4096; if (size < arena->size) { @@ -130,6 +191,13 @@ static int slab_alloc(struct arena *arena) { return -1; } + // Grow the slab array + void **pslab = RESERVE(void *, &arena->slabs, &arena->nslabs); + if (!pslab) { + free(slab); + return -1; + } + // Fix the last chunk->next offset void *last = (char *)slab + size - arena->size; chunk_set_next(arena, last, arena->chunks); @@ -137,7 +205,7 @@ static int slab_alloc(struct arena *arena) { // We can rely on zero-initialized slabs, but others shouldn't sanitize_uninit(slab, size); - arena->chunks = arena->slabs[arena->nslabs++] = slab; + arena->chunks = *pslab = slab; return 0; } @@ -160,6 +228,7 @@ void arena_free(struct arena *arena, void *ptr) { union chunk *chunk = ptr; chunk_set_next(arena, chunk, arena->chunks); arena->chunks = chunk; + sanitize_uninit(chunk, arena->size); sanitize_free(chunk, arena->size); } @@ -179,7 +248,7 @@ void arena_destroy(struct arena *arena) { sanitize_uninit(arena); } -void varena_init(struct varena *varena, size_t align, size_t min, size_t offset, size_t size) { +void varena_init(struct varena *varena, size_t align, size_t offset, size_t size) { varena->align = align; varena->offset = offset; varena->size = size; @@ -188,7 +257,7 @@ void varena_init(struct varena *varena, size_t align, size_t min, size_t offset, // The smallest size class is at least as many as fit in the smallest // aligned allocation size - size_t min_count = (flex_size(align, min, offset, size, 1) - offset + size - 1) / size; + size_t min_count = (flex_size(align, offset, size, 1) - offset + size - 1) / size; varena->shift = bit_width(min_count - 1); } @@ -201,28 +270,23 @@ static size_t varena_size_class(struct varena *varena, size_t count) { /** Get the exact size of a flexible struct. */ static size_t varena_exact_size(const struct varena *varena, size_t count) { - return flex_size(varena->align, 0, varena->offset, varena->size, count); + return flex_size(varena->align, varena->offset, varena->size, count); } /** Get the arena for the given array length. */ static struct arena *varena_get(struct varena *varena, size_t count) { size_t i = varena_size_class(varena, count); - if (i >= varena->narenas) { - size_t narenas = i + 1; - struct arena *arenas = realloc(varena->arenas, sizeof_array(struct arena, narenas)); - if (!arenas) { + while (i >= varena->narenas) { + size_t j = varena->narenas; + struct arena *arena = RESERVE(struct arena, &varena->arenas, &varena->narenas); + if (!arena) { return NULL; } - for (size_t j = varena->narenas; j < narenas; ++j) { - size_t shift = j + varena->shift; - size_t size = varena_exact_size(varena, (size_t)1 << shift); - arena_init(&arenas[j], varena->align, size); - } - - varena->narenas = narenas; - varena->arenas = arenas; + size_t shift = j + varena->shift; + size_t size = varena_exact_size(varena, (size_t)1 << shift); + arena_init(arena, varena->align, size); } return &varena->arenas[i]; @@ -240,8 +304,7 @@ void *varena_alloc(struct varena *varena, size_t count) { } // Tell the sanitizers the exact size of the allocated struct - sanitize_free(ret, arena->size); - sanitize_alloc(ret, varena_exact_size(varena, count)); + sanitize_resize(ret, arena->size, varena_exact_size(varena, count), arena->size); return ret; } @@ -253,15 +316,14 @@ void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t return NULL; } - size_t new_exact_size = varena_exact_size(varena, new_count); - size_t old_exact_size = varena_exact_size(varena, old_count); + size_t old_size = old_arena->size; + size_t new_size = new_arena->size; if (new_arena == old_arena) { - if (new_count < old_count) { - sanitize_free((char *)ptr + new_exact_size, old_exact_size - new_exact_size); - } else if (new_count > old_count) { - sanitize_alloc((char *)ptr + old_exact_size, new_exact_size - old_exact_size); - } + sanitize_resize(ptr, + varena_exact_size(varena, old_count), + varena_exact_size(varena, new_count), + new_size); return ptr; } @@ -270,16 +332,18 @@ void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t return NULL; } - size_t old_size = old_arena->size; - sanitize_alloc((char *)ptr + old_exact_size, old_size - old_exact_size); + // Non-sanitized builds don't bother computing exact sizes, and just use + // the potentially-larger arena size for each size class instead. To + // allow the below memcpy() to work with the less-precise sizes, expand + // the old allocation to its full capacity. + sanitize_resize(ptr, varena_exact_size(varena, old_count), old_size, old_size); - size_t new_size = new_arena->size; size_t min_size = new_size < old_size ? new_size : old_size; memcpy(ret, ptr, min_size); arena_free(old_arena, ptr); - sanitize_free((char *)ret + new_exact_size, new_size - new_exact_size); + sanitize_resize(ret, new_size, varena_exact_size(varena, new_count), new_size); return ret; } diff --git a/src/alloc.h b/src/alloc.h index 5f0c423..1fafbab 100644 --- a/src/alloc.h +++ b/src/alloc.h @@ -8,118 +8,145 @@ #ifndef BFS_ALLOC_H #define BFS_ALLOC_H -#include "config.h" +#include "bfs.h" + +#include <errno.h> #include <stddef.h> +#include <stdlib.h> + +#define IS_ALIGNED(align, size) \ + (((size) & ((align) - 1)) == 0) + +/** Check if a size is properly aligned. */ +static inline bool is_aligned(size_t align, size_t size) { + return IS_ALIGNED(align, size); +} + +#define ALIGN_FLOOR(align, size) \ + ((size) & ~((align) - 1)) /** Round down to a multiple of an alignment. */ static inline size_t align_floor(size_t align, size_t size) { - return size & ~(align - 1); + return ALIGN_FLOOR(align, size); } +#define ALIGN_CEIL(align, size) \ + ((((size) - 1) | ((align) - 1)) + 1) + /** Round up to a multiple of an alignment. */ static inline size_t align_ceil(size_t align, size_t size) { - return align_floor(align, size + align - 1); + return ALIGN_CEIL(align, size); } /** - * Saturating array size. - * - * @param align - * Array element alignment. - * @param size - * Array element size. - * @param count - * Array element count. - * @return - * size * count, saturating to the maximum aligned value on overflow. + * Saturating size addition. */ -static inline size_t array_size(size_t align, size_t size, size_t count) { +static inline size_t size_add(size_t lhs, size_t rhs) { + size_t ret = lhs + rhs; + return ret >= lhs ? ret : (size_t)-1; +} + +/** + * Saturating size multiplication. + */ +static inline size_t size_mul(size_t size, size_t count) { size_t ret = size * count; - return ret / size == count ? ret : ~(align - 1); + return ret / size == count ? ret : (size_t)-1; } /** Saturating array sizeof. */ #define sizeof_array(type, count) \ - array_size(alignof(type), sizeof(type), count) + size_mul(sizeof(type), count) /** Size of a struct/union field. */ #define sizeof_member(type, member) \ sizeof(((type *)NULL)->member) /** + * @internal + * Our flexible struct size calculations assume that structs have the minimum + * trailing padding to align the type properly. A pathological ABI that adds + * extra padding would result in us under-allocating space for those structs, + * so we static_assert() that no such padding exists. + */ +#define ASSERT_FLEX_ABI(type, member) \ + ASSERT_FLEX_ABI_( \ + ALIGN_CEIL(alignof(type), offsetof(type, member)) >= sizeof(type), \ + "Unexpected tail padding in " #type) + +/** + * @internal + * The contortions here allow static_assert() to be used in expressions, rather + * than just declarations. + */ +#define ASSERT_FLEX_ABI_(...) \ + ((void)sizeof(struct { char _; static_assert(__VA_ARGS__); })) + +/** * Saturating flexible struct size. * - * @param align + * @align * Struct alignment. - * @param min - * Minimum struct size. - * @param offset + * @offset * Flexible array member offset. - * @param size + * @size * Flexible array element size. - * @param count + * @count * Flexible array element count. * @return * The size of the struct with count flexible array elements. Saturates * to the maximum aligned value on overflow. */ -static inline size_t flex_size(size_t align, size_t min, size_t offset, size_t size, size_t count) { - size_t ret = size * count; - size_t overflow = ret / size != count; - - size_t extra = offset + align - 1; - ret += extra; - overflow |= ret < extra; - ret |= -overflow; +static inline size_t flex_size(size_t align, size_t offset, size_t size, size_t count) { + size_t ret = size_mul(size, count); + ret = size_add(ret, offset + align - 1); ret = align_floor(align, ret); - - // Make sure flex_sizeof(type, member, 0) >= sizeof(type), even if the - // type has more padding than necessary for alignment - if (min > align_ceil(align, offset)) { - ret = ret < min ? min : ret; - } - return ret; } /** * Computes the size of a flexible struct. * - * @param type + * @type * The type of the struct containing the flexible array. - * @param member + * @member * The name of the flexible array member. - * @param count + * @count * The length of the flexible array. * @return * The size of the struct with count flexible array elements. Saturates * to the maximum aligned value on overflow. */ #define sizeof_flex(type, member, count) \ - flex_size(alignof(type), sizeof(type), offsetof(type, member), sizeof_member(type, member[0]), count) + (ASSERT_FLEX_ABI(type, member), flex_size( \ + alignof(type), offsetof(type, member), sizeof_member(type, member[0]), count)) /** * General memory allocator. * - * @param align + * @align * The required alignment. - * @param size + * @size * The size of the allocation. * @return * The allocated memory, or NULL on failure. */ +_malloc(free, 1) +_aligned_alloc(1, 2) void *alloc(size_t align, size_t size); /** * Zero-initialized memory allocator. * - * @param align + * @align * The required alignment. - * @param size + * @size * The size of the allocation. * @return * The allocated memory, or NULL on failure. */ +_malloc(free, 1) +_aligned_alloc(1, 2) void *zalloc(size_t align, size_t size); /** Allocate memory for the given type. */ @@ -132,11 +159,11 @@ void *zalloc(size_t align, size_t size); /** Allocate memory for an array. */ #define ALLOC_ARRAY(type, count) \ - (type *)alloc(alignof(type), sizeof_array(type, count)); + (type *)alloc(alignof(type), sizeof_array(type, count)) /** Allocate zeroed memory for an array. */ #define ZALLOC_ARRAY(type, count) \ - (type *)zalloc(alignof(type), sizeof_array(type, count)); + (type *)zalloc(alignof(type), sizeof_array(type, count)) /** Allocate memory for a flexible struct. */ #define ALLOC_FLEX(type, member, count) \ @@ -147,6 +174,68 @@ void *zalloc(size_t align, size_t size); (type *)zalloc(alignof(type), sizeof_flex(type, member, count)) /** + * Alignment-aware realloc(). + * + * @ptr + * The pointer to reallocate. + * @align + * The required alignment. + * @old_size + * The previous allocation size. + * @new_size + * The new allocation size. + * @return + * The reallocated memory, or NULL on failure. + */ +_aligned_alloc(2, 4) +_nodiscard +void *xrealloc(void *ptr, size_t align, size_t old_size, size_t new_size); + +/** Reallocate memory for an array. */ +#define REALLOC_ARRAY(type, ptr, old_count, new_count) \ + (type *)xrealloc((ptr), alignof(type), sizeof_array(type, old_count), sizeof_array(type, new_count)) + +/** Reallocate memory for a flexible struct. */ +#define REALLOC_FLEX(type, member, ptr, old_count, new_count) \ + (type *)xrealloc((ptr), alignof(type), sizeof_flex(type, member, old_count), sizeof_flex(type, member, new_count)) + +/** + * Reserve space for one more element in a dynamic array. + * + * @ptr + * The pointer to reallocate. + * @align + * The required alignment. + * @count + * The current size of the array. + * @return + * The reallocated memory, on both success *and* failure. On success, + * errno will be set to zero, and the returned pointer will have room + * for (count + 1) elements. On failure, errno will be non-zero, and + * ptr will returned unchanged. + */ +_nodiscard +void *reserve(void *ptr, size_t align, size_t size, size_t count); + +/** + * Convenience macro to grow a dynamic array. + * + * @type + * The array element type. + * @type **ptr + * A pointer to the array. + * @size_t *count + * A pointer to the array's size. + * @return + * On success, a pointer to the newly reserved array element, i.e. + * `*ptr + *count++`. On failure, NULL is returned, and both *ptr and + * *count remain unchanged. + */ +#define RESERVE(type, ptr, count) \ + ((*ptr) = reserve((*ptr), alignof(type), sizeof(type), (*count)), \ + errno ? NULL : (*ptr) + (*count)++) + +/** * An arena allocator for fixed-size types. * * Arena allocators are intentionally not thread safe. @@ -176,14 +265,15 @@ void arena_init(struct arena *arena, size_t align, size_t size); arena_init((arena), alignof(type), sizeof(type)) /** - * Allocate an object out of the arena. + * Free an object from the arena. */ -void *arena_alloc(struct arena *arena); +void arena_free(struct arena *arena, void *ptr); /** - * Free an object from the arena. + * Allocate an object out of the arena. */ -void arena_free(struct arena *arena, void *ptr); +_malloc(arena_free, 2) +void *arena_alloc(struct arena *arena); /** * Free all allocations from an arena. @@ -216,87 +306,89 @@ struct varena { /** * Initialize a varena for a struct with the given layout. * - * @param varena + * @varena * The varena to initialize. - * @param align + * @align * alignof(type) - * @param min - * sizeof(type) - * @param offset + * @offset * offsetof(type, flexible_array) - * @param size + * @size * sizeof(flexible_array[i]) */ -void varena_init(struct varena *varena, size_t align, size_t min, size_t offset, size_t size); +void varena_init(struct varena *varena, size_t align, size_t offset, size_t size); /** * Initialize a varena for the given type and flexible array. * - * @param varena + * @varena * The varena to initialize. - * @param type + * @type * A struct type containing a flexible array. - * @param member + * @member * The name of the flexible array member. */ #define VARENA_INIT(varena, type, member) \ - varena_init(varena, alignof(type), sizeof(type), offsetof(type, member), sizeof_member(type, member[0])) + (ASSERT_FLEX_ABI(type, member), varena_init( \ + varena, alignof(type), offsetof(type, member), sizeof_member(type, member[0]))) + +/** + * Free an arena-allocated flexible struct. + * + * @varena + * The that allocated the object. + * @ptr + * The object to free. + * @count + * The length of the flexible array. + */ +void varena_free(struct varena *varena, void *ptr, size_t count); /** * Arena-allocate a flexible struct. * - * @param varena + * @varena * The varena to allocate from. - * @param count + * @count * The length of the flexible array. * @return * The allocated struct, or NULL on failure. */ +_malloc(varena_free, 2) void *varena_alloc(struct varena *varena, size_t count); /** * Resize a flexible struct. * - * @param varena + * @varena * The varena to allocate from. - * @param ptr + * @ptr * The object to resize. - * @param old_count - * The old array lenth. - * @param new_count + * @old_count + * The old array length. + * @new_count * The new array length. * @return * The resized struct, or NULL on failure. */ +_nodiscard void *varena_realloc(struct varena *varena, void *ptr, size_t old_count, size_t new_count); /** * Grow a flexible struct by an arbitrary amount. * - * @param varena + * @varena * The varena to allocate from. - * @param ptr + * @ptr * The object to resize. - * @param count + * @count * Pointer to the flexible array length. * @return * The resized struct, or NULL on failure. */ +_nodiscard void *varena_grow(struct varena *varena, void *ptr, size_t *count); /** - * Free an arena-allocated flexible struct. - * - * @param varena - * The that allocated the object. - * @param ptr - * The object to free. - * @param count - * The length of the flexible array. - */ -void varena_free(struct varena *varena, void *ptr, size_t count); - -/** * Free all allocations from a varena. */ void varena_clear(struct varena *varena); diff --git a/src/atomic.h b/src/atomic.h index f1a6bea..5c2826f 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -8,6 +8,8 @@ #ifndef BFS_ATOMIC_H #define BFS_ATOMIC_H +#include "bfs.h" + #include <stdatomic.h> /** @@ -18,9 +20,9 @@ /** * Shorthand for atomic_load_explicit(). * - * @param obj + * @obj * A pointer to the atomic object. - * @param order + * @order * The memory ordering to use, without the memory_order_ prefix. * @return * The loaded value. @@ -82,4 +84,35 @@ #define fetch_and(obj, arg, order) \ atomic_fetch_and_explicit(obj, arg, memory_order_##order) +/** + * Shorthand for atomic_thread_fence(). + */ +#if __SANITIZE_THREAD__ +// TSan doesn't support fences: https://github.com/google/sanitizers/issues/1415 +# define thread_fence(obj, order) \ + fetch_add(obj, 0, order) +#else +# define thread_fence(obj, order) \ + atomic_thread_fence(memory_order_##order) +#endif + +/** + * Shorthand for atomic_signal_fence(). + */ +#define signal_fence(order) \ + atomic_signal_fence(memory_order_##order) + +/** + * A hint to the CPU to relax while it spins. + */ +#if __has_builtin(__builtin_ia32_pause) +# define spin_loop() __builtin_ia32_pause() +#elif __has_builtin(__builtin_arm_yield) +# define spin_loop() __builtin_arm_yield() +#elif BFS_HAS_BUILTIN_RISCV_PAUSE +# define spin_loop() __builtin_riscv_pause() +#else +# define spin_loop() ((void)0) +#endif + #endif // BFS_ATOMIC_H @@ -2,50 +2,54 @@ // SPDX-License-Identifier: 0BSD #include "bar.h" + +#include "alloc.h" +#include "atomic.h" +#include "bfs.h" #include "bfstd.h" #include "bit.h" -#include "config.h" #include "dstring.h" +#include "sighook.h" + #include <errno.h> #include <fcntl.h> #include <signal.h> #include <stdarg.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> -#include <sys/ioctl.h> +#include <termios.h> +#include <unistd.h> struct bfs_bar { int fd; - volatile sig_atomic_t width; - volatile sig_atomic_t height; -}; + atomic unsigned int width; + atomic unsigned int height; -/** The global status bar instance. */ -static struct bfs_bar the_bar = { - .fd = -1, + struct sighook *exit_hook; + struct sighook *winch_hook; }; /** Get the terminal size, if possible. */ static int bfs_bar_getsize(struct bfs_bar *bar) { -#ifdef TIOCGWINSZ struct winsize ws; - if (ioctl(bar->fd, TIOCGWINSZ, &ws) != 0) { + if (xtcgetwinsize(bar->fd, &ws) != 0) { return -1; } - bar->width = ws.ws_col; - bar->height = ws.ws_row; + store(&bar->width, ws.ws_col, relaxed); + store(&bar->height, ws.ws_row, relaxed); return 0; -#else - errno = ENOTSUP; - return -1; -#endif } -/** Async Signal Safe puts(). */ -static int ass_puts(int fd, const char *str) { - size_t len = strlen(str); - return xwrite(fd, str, len) == len ? 0 : -1; +/** Write a string to the status bar (async-signal-safe). */ +static int bfs_bar_write(struct bfs_bar *bar, const char *str, size_t len) { + return xwrite(bar->fd, str, len) == len ? 0 : -1; +} + +/** Write a string to the status bar (async-signal-safe). */ +static int bfs_bar_puts(struct bfs_bar *bar, const char *str) { + return bfs_bar_write(bar, str, strlen(str)); } /** Number of decimal digits needed for terminal sizes. */ @@ -67,140 +71,127 @@ static char *ass_itoa(char *str, unsigned int n) { return str + len; } +/** Reset the scrollable region and hide the bar. */ +static int bfs_bar_reset(struct bfs_bar *bar) { + return bfs_bar_puts(bar, + "\0337" // DECSC: Save cursor + "\033[r" // DECSTBM: Reset scrollable region + "\0338" // DECRC: Restore cursor + "\033[J" // ED: Erase display from cursor to end + ); +} + +/** Hide the bar if the terminal is shorter than this. */ +#define BFS_BAR_MIN_HEIGHT 3 + /** Update the size of the scrollable region. */ static int bfs_bar_resize(struct bfs_bar *bar) { - char esc_seq[12 + ITOA_DIGITS] = + unsigned int height = load(&bar->height, relaxed); + if (height < BFS_BAR_MIN_HEIGHT) { + return bfs_bar_reset(bar); + } + + static const char PREFIX[] = + "\033D" // IND: Line feed, possibly scrolling + "\033[1A" // CUU: Move cursor up 1 row "\0337" // DECSC: Save cursor "\033[;"; // DECSTBM: Set scrollable region + static const char SUFFIX[] = + "r" // (end of DECSTBM) + "\0338" // DECRC: Restore the cursor + "\033[J"; // ED: Erase display from cursor to end - // DECSTBM takes the height as the second argument - char *ptr = esc_seq + strlen(esc_seq); - ptr = ass_itoa(ptr, bar->height - 1); + char esc_seq[sizeof(PREFIX) + ITOA_DIGITS + sizeof(SUFFIX)]; - strcpy(ptr, - "r" // DECSTBM - "\0338" // DECRC: Restore the cursor - "\033[J" // ED: Erase display from cursor to end - ); + // DECSTBM takes the height as the second argument + char *cur = stpcpy(esc_seq, PREFIX); + cur = ass_itoa(cur, height - 1); + cur = stpcpy(cur, SUFFIX); - return ass_puts(bar->fd, esc_seq); + return bfs_bar_write(bar, esc_seq, cur - esc_seq); } #ifdef SIGWINCH /** SIGWINCH handler. */ -static void sighand_winch(int sig) { - int error = errno; - - bfs_bar_getsize(&the_bar); - bfs_bar_resize(&the_bar); - - errno = error; +static void bfs_bar_sigwinch(int sig, siginfo_t *info, void *arg) { + struct bfs_bar *bar = arg; + bfs_bar_getsize(bar); + bfs_bar_resize(bar); } #endif -/** Reset the scrollable region and hide the bar. */ -static int bfs_bar_reset(struct bfs_bar *bar) { - return ass_puts(bar->fd, - "\0337" // DECSC: Save cursor - "\033[r" // DECSTBM: Reset scrollable region - "\0338" // DECRC: Restore cursor - "\033[J" // ED: Erase display from cursor to end - ); -} - /** Signal handler for process-terminating signals. */ -static void sighand_reset(int sig) { - bfs_bar_reset(&the_bar); - raise(sig); -} - -/** Register sighand_reset() for a signal. */ -static void reset_before_death_by(int sig) { - struct sigaction sa = { - .sa_handler = sighand_reset, - .sa_flags = SA_RESETHAND, - }; - sigemptyset(&sa.sa_mask); - sigaction(sig, &sa, NULL); +static void bfs_bar_sigexit(int sig, siginfo_t *info, void *arg) { + struct bfs_bar *bar = arg; + bfs_bar_reset(bar); } /** printf() to the status bar with a single write(). */ -BFS_FORMATTER(2, 3) +_printf(2, 3) static int bfs_bar_printf(struct bfs_bar *bar, const char *format, ...) { va_list args; va_start(args, format); - char *str = dstrvprintf(format, args); + dchar *str = dstrvprintf(format, args); va_end(args); if (!str) { return -1; } - int ret = ass_puts(bar->fd, str); + int ret = bfs_bar_write(bar, str, dstrlen(str)); dstrfree(str); return ret; } struct bfs_bar *bfs_bar_show(void) { - if (the_bar.fd >= 0) { - errno = EBUSY; - goto fail; + struct bfs_bar *bar = ALLOC(struct bfs_bar); + if (!bar) { + return NULL; } - char term[L_ctermid]; - ctermid(term); - if (strlen(term) == 0) { - errno = ENOTTY; + bar->fd = open_cterm(O_RDWR | O_CLOEXEC); + if (bar->fd < 0) { goto fail; } - the_bar.fd = open(term, O_RDWR | O_CLOEXEC); - if (the_bar.fd < 0) { - goto fail; + if (bfs_bar_getsize(bar) != 0) { + goto fail_close; } - if (bfs_bar_getsize(&the_bar) != 0) { + bar->exit_hook = atsigexit(bfs_bar_sigexit, bar); + if (!bar->exit_hook) { goto fail_close; } - reset_before_death_by(SIGABRT); - reset_before_death_by(SIGINT); - reset_before_death_by(SIGPIPE); - reset_before_death_by(SIGQUIT); - reset_before_death_by(SIGTERM); - #ifdef SIGWINCH - struct sigaction sa = { - .sa_handler = sighand_winch, - .sa_flags = SA_RESTART, - }; - sigemptyset(&sa.sa_mask); - sigaction(SIGWINCH, &sa, NULL); + bar->winch_hook = sighook(SIGWINCH, bfs_bar_sigwinch, bar, 0); + if (!bar->winch_hook) { + goto fail_hook; + } #endif - bfs_bar_printf(&the_bar, - "\n" // Make space for the bar - "\0337" // DECSC: Save cursor - "\033[;%ur" // DECSTBM: Set scrollable region - "\0338" // DECRC: Restore cursor - "\033[1A", // CUU: Move cursor up 1 row - (unsigned int)(the_bar.height - 1) - ); - - return &the_bar; + bfs_bar_resize(bar); + return bar; +fail_hook: + sigunhook(bar->exit_hook); fail_close: - close_quietly(the_bar.fd); - the_bar.fd = -1; + close_quietly(bar->fd); fail: + free(bar); return NULL; } unsigned int bfs_bar_width(const struct bfs_bar *bar) { - return bar->width; + return load(&bar->width, relaxed); } int bfs_bar_update(struct bfs_bar *bar, const char *str) { + unsigned int height = load(&bar->height, relaxed); + if (height < BFS_BAR_MIN_HEIGHT) { + return 0; + } + return bfs_bar_printf(bar, "\0337" // DECSC: Save cursor "\033[%u;0f" // HVP: Move cursor to row, column @@ -209,7 +200,7 @@ int bfs_bar_update(struct bfs_bar *bar, const char *str) { "%s" "\033[27m" // SGR reverse video off "\0338", // DECRC: Restore cursor - (unsigned int)bar->height, + height, str ); } @@ -219,17 +210,11 @@ void bfs_bar_hide(struct bfs_bar *bar) { return; } - signal(SIGABRT, SIG_DFL); - signal(SIGINT, SIG_DFL); - signal(SIGPIPE, SIG_DFL); - signal(SIGQUIT, SIG_DFL); - signal(SIGTERM, SIG_DFL); -#ifdef SIGWINCH - signal(SIGWINCH, SIG_DFL); -#endif + sigunhook(bar->winch_hook); + sigunhook(bar->exit_hook); bfs_bar_reset(bar); xclose(bar->fd); - bar->fd = -1; + free(bar); } @@ -27,9 +27,9 @@ unsigned int bfs_bar_width(const struct bfs_bar *bar); /** * Update the status bar message. * - * @param bar + * @bar * The status bar to update. - * @param str + * @str * The string to display. * @return * 0 on success, -1 on failure. diff --git a/src/bfs.h b/src/bfs.h new file mode 100644 index 0000000..32dbbae --- /dev/null +++ b/src/bfs.h @@ -0,0 +1,232 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Configuration and fundamental utilities. + */ + +#ifndef BFS_H +#define BFS_H + +// Standard versions + +/** Possible __STDC_VERSION__ values. */ +#define C95 199409L +#define C99 199901L +#define C11 201112L +#define C17 201710L +#define C23 202311L + +/** Possible _POSIX_C_SOURCE and _POSIX_<OPTION> values. */ +#define POSIX_1990 1 +#define POSIX_1992 2 +#define POSIX_1993 199309L +#define POSIX_1995 199506L +#define POSIX_2001 200112L +#define POSIX_2008 200809L +#define POSIX_2024 202405L + +// Build configuration + +#include "config.h" + +#ifndef BFS_COMMAND +# define BFS_COMMAND "bfs" +#endif + +#ifndef BFS_HOMEPAGE +# define BFS_HOMEPAGE "https://tavianator.com/projects/bfs.html" +#endif + +#ifndef BFS_LINT +# define BFS_LINT false +#endif + +// This is a symbol instead of a literal so we don't have to rebuild everything +// when the version number changes +extern const char bfs_version[]; + +extern const char bfs_confflags[]; +extern const char bfs_cc[]; +extern const char bfs_cppflags[]; +extern const char bfs_cflags[]; +extern const char bfs_ldflags[]; +extern const char bfs_ldlibs[]; + +// Get __GLIBC__ +#include <assert.h> + +// Fundamental utilities + +/** + * Get the length of an array. + */ +#define countof(...) (sizeof(__VA_ARGS__) / sizeof(0[__VA_ARGS__])) + +/** + * False sharing/destructive interference/largest cache line size. + */ +#ifdef __GCC_DESTRUCTIVE_SIZE +# define FALSE_SHARING_SIZE __GCC_DESTRUCTIVE_SIZE +#else +# define FALSE_SHARING_SIZE 64 +#endif + +/** + * True sharing/constructive interference/smallest cache line size. + */ +#ifdef __GCC_CONSTRUCTIVE_SIZE +# define TRUE_SHARING_SIZE __GCC_CONSTRUCTIVE_SIZE +#else +# define TRUE_SHARING_SIZE 64 +#endif + +/** + * Alignment specifier that avoids false sharing. + */ +#define cache_align alignas(FALSE_SHARING_SIZE) + +// Wrappers for attributes + +/** + * Silence warnings about switch/case fall-throughs. + */ +#if __has_attribute(fallthrough) +# define _fallthrough __attribute__((fallthrough)) +#else +# define _fallthrough ((void)0) +#endif + +/** + * Silence warnings about unused declarations. + */ +#if __has_attribute(unused) +# define _maybe_unused __attribute__((unused)) +#else +# define _maybe_unused +#endif + +/** + * Warn if a value is unused. + */ +#if __has_attribute(warn_unused_result) +# define _nodiscard __attribute__((warn_unused_result)) +#else +# define _nodiscard +#endif + +/** + * Hint to avoid inlining a function. + */ +#if __has_attribute(noinline) +# define _noinline __attribute__((noinline)) +#else +# define _noinline +#endif + +/** + * Marks a non-returning function. + */ +#if __STDC_VERSION__ >= C23 +# define _noreturn [[noreturn]] +#else +# define _noreturn _Noreturn +#endif + +/** + * Hint that a function is unlikely to be called. + */ +#if __has_attribute(cold) +# define _cold _noinline __attribute__((cold)) +#else +# define _cold _noinline +#endif + +/** + * Adds compiler warnings for bad printf()-style function calls, if supported. + */ +#if __has_attribute(format) +# define _printf(fmt, args) __attribute__((format(printf, fmt, args))) +#else +# define _printf(fmt, args) +#endif + +/** + * Annotates functions that potentially modify and return format strings. + */ +#if __has_attribute(format_arg) +# define _format_arg(arg) __attribute__((format_arg(arg))) +#else +# define _format_arg(arg) +#endif + +/** + * Annotates allocator-like functions. + */ +#if __has_attribute(malloc) +# if __GNUC__ >= 11 && !__OPTIMIZE__ // malloc(deallocator) disables inlining on GCC +# define _malloc(...) _nodiscard __attribute__((malloc(__VA_ARGS__))) +# else +# define _malloc(...) _nodiscard __attribute__((malloc)) +# endif +#else +# define _malloc(...) _nodiscard +#endif + +/** + * Specifies that a function returns allocations with a given alignment. + */ +#if __has_attribute(alloc_align) +# define _alloc_align(param) __attribute__((alloc_align(param))) +#else +# define _alloc_align(param) +#endif + +/** + * Specifies that a function returns allocations with a given size. + */ +#if __has_attribute(alloc_size) +# define _alloc_size(...) __attribute__((alloc_size(__VA_ARGS__))) +#else +# define _alloc_size(...) +#endif + +/** + * Shorthand for _alloc_align() and _alloc_size(). + */ +#define _aligned_alloc(align, ...) _alloc_align(align) _alloc_size(__VA_ARGS__) + +/** + * Check if function multiversioning via GNU indirect functions (ifunc) is supported. + * + * Disabled on TSan due to https://github.com/google/sanitizers/issues/342. + */ +#ifndef BFS_USE_TARGET_CLONES +# if __has_attribute(target_clones) && (__GLIBC__ || __FreeBSD__) && !__SANITIZE_THREAD__ +# define BFS_USE_TARGET_CLONES true +# else +# define BFS_USE_TARGET_CLONES false +# endif +#endif + +/** + * Apply the target_clones attribute, if available. + */ +#if BFS_USE_TARGET_CLONES +# define _target_clones(...) __attribute__((target_clones(__VA_ARGS__))) +#else +# define _target_clones(...) +#endif + +/** + * Optimization hint to not unroll a loop. + */ +#if BFS_HAS_PRAGMA_NOUNROLL +# define _nounroll _Pragma("nounroll") +#elif __GNUC__ && !__clang__ +# define _nounroll _Pragma("GCC unroll 0") +#else +# define _nounroll +#endif + +#endif // BFS_H diff --git a/src/bfstd.c b/src/bfstd.c index 1a5a67d..b78af7a 100644 --- a/src/bfstd.c +++ b/src/bfstd.c @@ -2,39 +2,80 @@ // SPDX-License-Identifier: 0BSD #include "bfstd.h" + +#include "bfs.h" #include "bit.h" -#include "config.h" #include "diag.h" #include "sanity.h" #include "thread.h" #include "xregex.h" -#include <ctype.h> + #include <errno.h> #include <fcntl.h> #include <langinfo.h> +#include <limits.h> +#include <locale.h> #include <nl_types.h> +#include <pthread.h> +#include <sched.h> +#include <stddef.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/ioctl.h> +#include <sys/resource.h> #include <sys/stat.h> #include <sys/types.h> +#include <sys/wait.h> +#include <termios.h> #include <unistd.h> #include <wchar.h> -#include <wctype.h> -#if BFS_USE_SYS_SYSMACROS_H +#if __has_include(<sys/sysmacros.h>) # include <sys/sysmacros.h> -#elif BFS_USE_SYS_MKDEV_H +#elif __has_include(<sys/mkdev.h>) # include <sys/mkdev.h> #endif -#if BFS_USE_UTIL_H +#if __has_include(<util.h>) # include <util.h> #endif -bool is_nonexistence_error(int error) { - return error == ENOENT || errno == ENOTDIR; +bool error_is_like(int error, int category) { + if (error == category) { + return true; + } + + switch (category) { + case ENOENT: + return error == ENOTDIR; + + case ENOSYS: + // https://github.com/opencontainers/runc/issues/2151 + return errno == EPERM; + +#if __DragonFly__ + // https://twitter.com/tavianator/status/1742991411203485713 + case ENAMETOOLONG: + return error == EFAULT; +#endif + } + + return false; +} + +bool errno_is_like(int category) { + return error_is_like(errno, category); +} + +int try(int ret) { + if (ret >= 0) { + return ret; + } else { + bfs_assert(errno > 0, "errno should be positive, was %d\n", errno); + return -errno; + } } char *xdirname(const char *path) { @@ -150,10 +191,10 @@ char *xgetdelim(FILE *file, char delim) { const char *xgetprogname(void) { const char *cmd = NULL; -#if __GLIBC__ - cmd = program_invocation_short_name; -#elif BSD +#if BFS_HAS_GETPROGNAME cmd = getprogname(); +#elif BFS_HAS_GETPROGNAME_GNU + cmd = program_invocation_short_name; #endif if (!cmd) { @@ -163,6 +204,171 @@ const char *xgetprogname(void) { return cmd; } +/** Common prologue for xstrto*() wrappers. */ +static int xstrtox_prologue(const char *str) { + // strto*() skips leading spaces, but we want to reject them + if (xisspace(str[0])) { + errno = EINVAL; + return -1; + } + + errno = 0; + return 0; +} + +/** Common epilogue for xstrto*() wrappers. */ +static int xstrtox_epilogue(const char *str, char **end, char *endp) { + if (errno != 0) { + return -1; + } + + if (end) { + *end = endp; + } + + // If end is NULL, make sure the entire string is valid + if (endp == str || (!end && *endp != '\0')) { + errno = EINVAL; + return -1; + } + + return 0; +} + +int xstrtos(const char *str, char **end, int base, short *value) { + long n; + if (xstrtol(str, end, base, &n) != 0) { + return -1; + } + + if (n < SHRT_MIN || n > SHRT_MAX) { + errno = ERANGE; + return -1; + } + + *value = n; + return 0; +} + +int xstrtoi(const char *str, char **end, int base, int *value) { + long n; + if (xstrtol(str, end, base, &n) != 0) { + return -1; + } + + if (n < INT_MIN || n > INT_MAX) { + errno = ERANGE; + return -1; + } + + *value = n; + return 0; +} + +int xstrtol(const char *str, char **end, int base, long *value) { + if (xstrtox_prologue(str) != 0) { + return -1; + } + + char *endp; + *value = strtol(str, &endp, base); + return xstrtox_epilogue(str, end, endp); +} + +int xstrtoll(const char *str, char **end, int base, long long *value) { + if (xstrtox_prologue(str) != 0) { + return -1; + } + + char *endp; + *value = strtoll(str, &endp, base); + return xstrtox_epilogue(str, end, endp); +} + +int xstrtof(const char *str, char **end, float *value) { + if (xstrtox_prologue(str) != 0) { + return -1; + } + + char *endp; + *value = strtof(str, &endp); + return xstrtox_epilogue(str, end, endp); +} + +int xstrtod(const char *str, char **end, double *value) { + if (xstrtox_prologue(str) != 0) { + return -1; + } + + char *endp; + *value = strtod(str, &endp); + return xstrtox_epilogue(str, end, endp); +} + +int xstrtous(const char *str, char **end, int base, unsigned short *value) { + unsigned long n; + if (xstrtoul(str, end, base, &n) != 0) { + return -1; + } + + if (n > USHRT_MAX) { + errno = ERANGE; + return -1; + } + + *value = n; + return 0; +} + +int xstrtoui(const char *str, char **end, int base, unsigned int *value) { + unsigned long n; + if (xstrtoul(str, end, base, &n) != 0) { + return -1; + } + + if (n > UINT_MAX) { + errno = ERANGE; + return -1; + } + + *value = n; + return 0; +} + +/** Common epilogue for xstrtou*() wrappers. */ +static int xstrtoux_epilogue(const char *str, char **end, char *endp) { + if (xstrtox_epilogue(str, end, endp) != 0) { + return -1; + } + + if (str[0] == '-') { + errno = ERANGE; + return -1; + } + + return 0; +} + +int xstrtoul(const char *str, char **end, int base, unsigned long *value) { + if (xstrtox_prologue(str) != 0) { + return -1; + } + + char *endp; + *value = strtoul(str, &endp, base); + return xstrtoux_epilogue(str, end, endp); +} + +int xstrtoull(const char *str, char **end, int base, unsigned long long *value) { + if (xstrtox_prologue(str) != 0) { + return -1; + } + + char *endp; + *value = strtoull(str, &endp, base); + return xstrtoux_epilogue(str, end, endp); +} + /** Compile and execute a regular expression for xrpmatch(). */ static int xrpregex(nl_item item, const char *response) { const char *pattern = nl_langinfo(item); @@ -216,6 +422,80 @@ int ynprompt(void) { return ret; } +void *xmemdup(const void *src, size_t size) { + void *ret = malloc(size); + if (ret) { + memcpy(ret, src, size); + } + return ret; +} + +char *xstpecpy(char *dest, char *end, const char *src) { + return xstpencpy(dest, end, src, SIZE_MAX); +} + +char *xstpencpy(char *dest, char *end, const char *src, size_t n) { + size_t space = end - dest; + n = space < n ? space : n; + n = strnlen(src, n); + memcpy(dest, src, n); + if (n < space) { + dest[n] = '\0'; + return dest + n; + } else { + end[-1] = '\0'; + return end; + } +} + +const char *xstrerror(int errnum) { + int saved = errno; + const char *ret = NULL; + static thread_local char buf[256]; + + // On FreeBSD with MemorySanitizer, duplocale() triggers + // https://github.com/llvm/llvm-project/issues/65532 +#if BFS_HAS_STRERROR_L && !(__FreeBSD__ && __SANITIZE_MEMORY__) +# if BFS_HAS_USELOCALE + locale_t loc = uselocale((locale_t)0); +# else + locale_t loc = LC_GLOBAL_LOCALE; +# endif + + bool free_loc = false; + if (loc == LC_GLOBAL_LOCALE) { + loc = duplocale(loc); + free_loc = true; + } + + if (loc != (locale_t)0) { + ret = strerror_l(errnum, loc); + if (free_loc) { + freelocale(loc); + } + } +#elif BFS_HAS_STRERROR_R_POSIX + if (strerror_r(errnum, buf, sizeof(buf)) == 0) { + ret = buf; + } +#elif BFS_HAS_STRERROR_R_GNU + ret = strerror_r(errnum, buf, sizeof(buf)); +#endif + + if (!ret) { + // Fallback for strerror_[lr]() or duplocale() failures + snprintf(buf, sizeof(buf), "Unknown error %d", errnum); + ret = buf; + } + + errno = saved; + return ret; +} + +const char *errstr(void) { + return xstrerror(errno); +} + /** Get the single character describing the given file type. */ static char type_char(mode_t mode) { switch (mode & S_IFMT) { @@ -250,32 +530,6 @@ static char type_char(mode_t mode) { return '?'; } -void *xmemdup(const void *src, size_t size) { - void *ret = malloc(size); - if (ret) { - memcpy(ret, src, size); - } - return ret; -} - -char *xstpecpy(char *dest, char *end, const char *src) { - return xstpencpy(dest, end, src, SIZE_MAX); -} - -char *xstpencpy(char *dest, char *end, const char *src, size_t n) { - size_t space = end - dest; - n = space < n ? space : n; - n = strnlen(src, n); - memcpy(dest, src, n); - if (n < space) { - dest[n] = '\0'; - return dest + n; - } else { - end[-1] = '\0'; - return end; - } -} - void xstrmode(mode_t mode, char str[11]) { strcpy(str, "----------"); @@ -324,8 +578,42 @@ void xstrmode(mode_t mode, char str[11]) { } } +/** Check if an rlimit value is infinite. */ +static bool rlim_isinf(rlim_t r) { + // Consider RLIM_{INFINITY,SAVED_{CUR,MAX}} all equally infinite + if (r == RLIM_INFINITY) { + return true; + } + +#ifdef RLIM_SAVED_CUR + if (r == RLIM_SAVED_CUR) { + return true; + } +#endif + +#ifdef RLIM_SAVED_MAX + if (r == RLIM_SAVED_MAX) { + return true; + } +#endif + + return false; +} + +int rlim_cmp(rlim_t a, rlim_t b) { + bool a_inf = rlim_isinf(a); + bool b_inf = rlim_isinf(b); + if (a_inf || b_inf) { + return a_inf - b_inf; + } + + return (a > b) - (a < b); +} + dev_t xmakedev(int ma, int mi) { -#ifdef makedev +#if __QNX__ + return makedev(0, ma, mi); +#elif defined(makedev) return makedev(ma, mi); #else return (ma << 8) | mi; @@ -348,6 +636,40 @@ int xminor(dev_t dev) { #endif } +pid_t xwaitpid(pid_t pid, int *status, int flags) { + pid_t ret; + do { + ret = waitpid(pid, status, flags); + } while (ret < 0 && errno == EINTR); + return ret; +} + +int open_cterm(int flags) { + char path[L_ctermid]; + if (ctermid(path) == NULL || strlen(path) == 0) { + errno = ENOTTY; + return -1; + } + + return open(path, flags); +} + +int xtcgetwinsize(int fd, struct winsize *ws) { +#if BFS_HAS_TCGETWINSIZE + return tcgetwinsize(fd, ws); +#else + return ioctl(fd, TIOCGWINSZ, ws); +#endif +} + +int xtcsetwinsize(int fd, const struct winsize *ws) { +#if BFS_HAS_TCSETWINSIZE + return tcsetwinsize(fd, ws); +#else + return ioctl(fd, TIOCSWINSZ, ws); +#endif +} + int dup_cloexec(int fd) { #ifdef F_DUPFD_CLOEXEC return fcntl(fd, F_DUPFD_CLOEXEC, 0); @@ -367,7 +689,7 @@ int dup_cloexec(int fd) { } int pipe_cloexec(int pipefd[2]) { -#if __linux__ || (BSD && !__APPLE__) +#if BFS_HAS_PIPE2 return pipe2(pipefd, O_CLOEXEC); #else if (pipe(pipefd) != 0) { @@ -459,10 +781,7 @@ int xfaccessat(int fd, const char *path, int amode) { } char *xconfstr(int name) { -#if __ANDROID__ - errno = ENOTSUP; - return NULL; -#else +#if BFS_HAS_CONFSTR size_t len = confstr(name, NULL, 0); if (len == 0) { return NULL; @@ -479,7 +798,10 @@ char *xconfstr(int name) { } return str; -#endif // !__ANDROID__ +#else + errno = ENOTSUP; + return NULL; +#endif } char *xreadlinkat(int fd, const char *path, size_t size) { @@ -517,17 +839,25 @@ error: return NULL; } +#if BFS_HAS_STRTOFFLAGS +# define BFS_STRTOFFLAGS strtofflags +#elif BFS_HAS_STRING_TO_FLAGS +# define BFS_STRTOFFLAGS string_to_flags +#endif + int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear) { -#if BSD && !__GNU__ +#ifdef BFS_STRTOFFLAGS char *str_arg = (char *)*str; - unsigned long set_arg = 0; - unsigned long clear_arg = 0; -#if __NetBSD__ - int ret = string_to_flags(&str_arg, &set_arg, &clear_arg); +#if __OpenBSD__ + typedef uint32_t bfs_fflags_t; #else - int ret = strtofflags(&str_arg, &set_arg, &clear_arg); + typedef unsigned long bfs_fflags_t; #endif + bfs_fflags_t set_arg = 0; + bfs_fflags_t clear_arg = 0; + + int ret = BFS_STRTOFFLAGS(&str_arg, &set_arg, &clear_arg); *str = str_arg; *set = set_arg; @@ -537,24 +867,139 @@ int xstrtofflags(const char **str, unsigned long long *set, unsigned long long * errno = EINVAL; } return ret; -#else // !BSD +#else // !BFS_STRTOFFLAGS errno = ENOTSUP; return -1; #endif } -/** mbrtowc() wrapper. */ -static int xmbrtowc(wchar_t *wc, size_t *i, const char *str, size_t len, mbstate_t *mb) { - size_t mblen = mbrtowc(wc, str + *i, len - *i, mb); +long xsysconf(int name) { +#if __FreeBSD__ && __SANITIZE_MEMORY__ + // Work around https://github.com/llvm/llvm-project/issues/88163 + __msan_scoped_disable_interceptor_checks(); +#endif + + long ret = sysconf(name); + +#if __FreeBSD__ && __SANITIZE_MEMORY__ + __msan_scoped_enable_interceptor_checks(); +#endif + + return ret; +} + +#if BFS_HAS_SCHED_GETAFFINITY +/** Get the CPU count in an affinity mask of the given size. */ +static long bfs_sched_getaffinity(size_t size) { + cpu_set_t set, *pset = &set; + + if (size > sizeof(set)) { + pset = malloc(size); + if (!pset) { + return -1; + } + } + + long ret = -1; + if (sched_getaffinity(0, size, pset) == 0) { +# ifdef CPU_COUNT_S + ret = CPU_COUNT_S(size, pset); +# else + bfs_assert(size <= sizeof(set)); + ret = CPU_COUNT(pset); +# endif + } + + if (pset != &set) { + free(pset); + } + return ret; +} +#endif + +long nproc(void) { + long ret = 0; + +#if BFS_HAS_SCHED_GETAFFINITY + size_t size = sizeof(cpu_set_t); + do { + ret = bfs_sched_getaffinity(size); + +# ifdef CPU_COUNT_S + // On Linux, sched_getaffinity(2) says: + // + // When working on systems with large kernel CPU affinity masks, one must + // dynamically allocate the mask argument (see CPU_ALLOC(3)). Currently, + // the only way to do this is by probing for the size of the required mask + // using sched_getaffinity() calls with increasing mask sizes (until the + // call does not fail with the error EINVAL). + size *= 2; +# else + // No support for dynamically-sized CPU masks + break; +# endif + } while (ret < 0 && errno == EINVAL); +#endif + + if (ret < 1) { + ret = xsysconf(_SC_NPROCESSORS_ONLN); + } + + if (ret < 1) { + ret = 1; + } + + return ret; +} + +size_t asciilen(const char *str) { + return asciinlen(str, strlen(str)); +} + +size_t asciinlen(const char *str, size_t n) { + const unsigned char *ustr = (const unsigned char *)str; + size_t i = 0; + + // Word-at-a-time isascii() +#define CHUNK(n) CHUNK_(uint##n##_t, load8_leu##n) +#define CHUNK_(type, load8) \ + (n - i >= sizeof(type)) { \ + type word = load8(ustr + i); \ + type mask = (((type)-1) / 0xFF) << 7; /* 0x808080.. */ \ + word &= mask; \ + i += trailing_zeros(word) / 8; \ + if (word) { \ + return i; \ + } \ + } + +#if SIZE_WIDTH >= 64 + while CHUNK(64); + if CHUNK(32); +#else + while CHUNK(32); +#endif + if CHUNK(16); + if CHUNK(8); + +#undef CHUNK_ +#undef CHUNK + + return i; +} + +wint_t xmbrtowc(const char *str, size_t *i, size_t len, mbstate_t *mb) { + wchar_t wc; + size_t mblen = mbrtowc(&wc, str + *i, len - *i, mb); switch (mblen) { case -1: // Invalid byte sequence case -2: // Incomplete byte sequence *i += 1; - memset(mb, 0, sizeof(*mb)); - return -1; + *mb = (mbstate_t){0}; + return WEOF; default: *i += mblen; - return 0; + return wc; } } @@ -562,16 +1007,27 @@ size_t xstrwidth(const char *str) { size_t len = strlen(str); size_t ret = 0; - mbstate_t mb; - memset(&mb, 0, sizeof(mb)); + size_t asclen = asciinlen(str, len); + size_t i; + for (i = 0; i < asclen; ++i) { + // Assume all ASCII printables have width 1 + if (xisprint(str[i])) { + ++ret; + } + } - for (size_t i = 0; i < len;) { - wchar_t wc; - if (xmbrtowc(&wc, &i, str, len, &mb) == 0) { - ret += wcwidth(wc); - } else { + mbstate_t mb = {0}; + while (i < len) { + wint_t wc = xmbrtowc(str, &i, len, &mb); + if (wc == WEOF) { // Assume a single-width '?' ++ret; + continue; + } + + int width = xwcwidth(wc); + if (width > 0) { + ret += width; } } @@ -591,27 +1047,18 @@ static unsigned char ctype_cache[UCHAR_MAX + 1]; /** Initialize the ctype cache. */ static void char_cache_init(void) { -#if __FreeBSD__ && SANITIZE_MEMORY -// Work around https://github.com/llvm/llvm-project/issues/65532 -# define bfs_isprint (isprint) -# define bfs_isspace (isspace) -#else -# define bfs_isprint isprint -# define bfs_isspace isspace -#endif - for (size_t c = 0; c <= UCHAR_MAX; ++c) { - if (bfs_isprint(c)) { + if (xisprint(c)) { ctype_cache[c] |= IS_PRINT; } - if (bfs_isspace(c)) { + if (xisspace(c)) { ctype_cache[c] |= IS_SPACE; } } } /** Check if a character is printable. */ -static bool xisprint(unsigned char c, enum wesc_flags flags) { +static bool wesc_isprint(unsigned char c, enum wesc_flags flags) { if (ctype_cache[c] & IS_PRINT) { return true; } @@ -627,21 +1074,12 @@ static bool xisprint(unsigned char c, enum wesc_flags flags) { } /** Check if a wide character is printable. */ -static bool xiswprint(wchar_t c, enum wesc_flags flags) { -#if __FreeBSD__ && SANITIZE_MEMORY -// Work around https://github.com/llvm/llvm-project/issues/65532 -# define bfs_iswprint (iswprint) -# define bfs_iswspace (iswspace) -#else -# define bfs_iswprint iswprint -# define bfs_iswspace iswspace -#endif - - if (bfs_iswprint(c)) { +static bool wesc_iswprint(wchar_t c, enum wesc_flags flags) { + if (xiswprint(c)) { return true; } - if (!(flags & WESC_SHELL) && bfs_iswspace(c)) { + if (!(flags & WESC_SHELL) && xiswspace(c)) { return true; } @@ -651,46 +1089,24 @@ static bool xiswprint(wchar_t c, enum wesc_flags flags) { /** Get the length of the longest printable prefix of a string. */ static size_t printable_len(const char *str, size_t len, enum wesc_flags flags) { static pthread_once_t once = PTHREAD_ONCE_INIT; - call_once(&once, char_cache_init); + invoke_once(&once, char_cache_init); // Fast path: avoid multibyte checks - size_t i, word; - for (i = 0; i + sizeof(word) <= len;) { - // Word-at-a-time isascii() - memcpy(&word, str + i, sizeof(word)); - // 0xFFFF... / 0xFF == 0x10101... - size_t mask = (SIZE_MAX / 0xFF) << 7; - if (word & mask) { - goto multibyte; - } - - for (size_t j = 0; j < sizeof(word); ++i, ++j) { - if (!xisprint(str[i], flags)) { - return i; - } - } - } - - for (; i < len; ++i) { - unsigned char c = str[i]; - if (!isascii(c)) { - goto multibyte; - } - if (!xisprint(c, flags)) { + size_t asclen = asciinlen(str, len); + size_t i; + for (i = 0; i < asclen; ++i) { + if (!wesc_isprint(str[i], flags)) { return i; } } - mbstate_t mb; -multibyte: - memset(&mb, 0, sizeof(mb)); - + mbstate_t mb = {0}; for (size_t j = i; i < len; i = j) { - wchar_t wc; - if (xmbrtowc(&wc, &j, str, len, &mb) != 0) { + wint_t wc = xmbrtowc(str, &j, len, &mb); + if (wc == WEOF) { break; } - if (!xiswprint(wc, flags)) { + if (!wesc_iswprint(wc, flags)) { break; } } @@ -731,16 +1147,14 @@ static const char *dollar_esc(char c) { static char *dollar_quote(char *dest, char *end, const char *str, size_t len, enum wesc_flags flags) { dest = xstpecpy(dest, end, "$'"); - mbstate_t mb; - memset(&mb, 0, sizeof(mb)); - + mbstate_t mb = {0}; for (size_t i = 0; i < len;) { size_t start = i; bool safe = false; - wchar_t wc; - if (xmbrtowc(&wc, &i, str, len, &mb) == 0) { - safe = xiswprint(wc, flags); + wint_t wc = xmbrtowc(str, &i, len, &mb); + if (wc != WEOF) { + safe = wesc_iswprint(wc, flags); } for (size_t j = start; safe && j < i; ++j) { @@ -772,14 +1186,14 @@ static char *dollar_quote(char *dest, char *end, const char *str, size_t len, en /** How much of this string is safe as a bare word? */ static size_t bare_len(const char *str, size_t len) { - // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02 - size_t ret = strcspn(str, "|&;<>()$`\\\"' *?[#˜=%!"); + // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02 + size_t ret = strcspn(str, "|&;<>()$`\\\"' *?[#~=%!{}"); return ret < len ? ret : len; } /** How much of this string is safe to double-quote? */ static size_t quotable_len(const char *str, size_t len) { - // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02_03 + // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_03 size_t ret = strcspn(str, "`$\\\"!"); return ret < len ? ret : len; } diff --git a/src/bfstd.h b/src/bfstd.h index fb77399..15dd949 100644 --- a/src/bfstd.h +++ b/src/bfstd.h @@ -8,15 +8,70 @@ #ifndef BFS_BFSTD_H #define BFS_BFSTD_H -#include "config.h" +#include "bfs.h" + #include <stddef.h> +#include <ctype.h> + +/** + * Work around https://github.com/llvm/llvm-project/issues/65532 by forcing a + * function, not a macro, to be called. + */ +#if __FreeBSD__ && __SANITIZE_MEMORY__ +# define BFS_INTERCEPT(fn) (fn) +#else +# define BFS_INTERCEPT(fn) fn +#endif + +/** + * Wrap isalpha()/isdigit()/etc. + */ +#define BFS_ISCTYPE(fn, c) BFS_INTERCEPT(fn)((unsigned char)(c)) + +#define xisalnum(c) BFS_ISCTYPE(isalnum, c) +#define xisalpha(c) BFS_ISCTYPE(isalpha, c) +#define xisascii(c) BFS_ISCTYPE(isascii, c) +#define xiscntrl(c) BFS_ISCTYPE(iscntrl, c) +#define xisdigit(c) BFS_ISCTYPE(isdigit, c) +#define xislower(c) BFS_ISCTYPE(islower, c) +#define xisgraph(c) BFS_ISCTYPE(isgraph, c) +#define xisprint(c) BFS_ISCTYPE(isprint, c) +#define xispunct(c) BFS_ISCTYPE(ispunct, c) +#define xisspace(c) BFS_ISCTYPE(isspace, c) +#define xisupper(c) BFS_ISCTYPE(isupper, c) +#define xisxdigit(c) BFS_ISCTYPE(isxdigit, c) + // #include <errno.h> /** - * Return whether an error code is due to a path not existing. + * Check if an error code is "like" another one. For example, ENOTDIR is + * like ENOENT because they can both be triggered by non-existent paths. + * + * @error + * The error code to check. + * @category + * The category to test for. Known categories include ENOENT and + * ENAMETOOLONG. + * @return + * Whether the error belongs to the given category. + */ +bool error_is_like(int error, int category); + +/** + * Equivalent to error_is_like(errno, category). */ -bool is_nonexistence_error(int error); +bool errno_is_like(int category); + +/** + * Apply the "negative errno" convention. + * + * @ret + * The return value of the attempted operation. + * @return + * ret, if non-negative, otherwise -errno. + */ +int try(int ret); #include <fcntl.h> @@ -51,7 +106,7 @@ bool is_nonexistence_error(int error); /** * Re-entrant dirname() variant that always allocates a copy. * - * @param path + * @path * The path in question. * @return * The parent directory of the path. @@ -61,7 +116,7 @@ char *xdirname(const char *path); /** * Re-entrant basename() variant that always allocates a copy. * - * @param path + * @path * The path in question. * @return * The final component of the path. @@ -71,7 +126,7 @@ char *xbasename(const char *path); /** * Find the offset of the final component of a path. * - * @param path + * @path * The path in question. * @return * The offset of the basename. @@ -83,9 +138,9 @@ size_t xbaseoff(const char *path); /** * fopen() variant that takes open() style flags. * - * @param path + * @path * The path to open. - * @param flags + * @flags * Flags to pass to open(). */ FILE *xfopen(const char *path, int flags); @@ -93,9 +148,9 @@ FILE *xfopen(const char *path, int flags); /** * Convenience wrapper for getdelim(). * - * @param file + * @file * The file to read. - * @param delim + * @delim * The delimiter character to split on. * @return * The read chunk (without the delimiter), allocated with malloc(). @@ -114,6 +169,56 @@ char *xgetdelim(FILE *file, char delim); const char *xgetprogname(void); /** + * Like xstrtol(), but for short. + */ +int xstrtos(const char *str, char **end, int base, short *value); + +/** + * Like xstrtol(), but for int. + */ +int xstrtoi(const char *str, char **end, int base, int *value); + +/** + * Wrapper for strtol() that forbids leading spaces. + */ +int xstrtol(const char *str, char **end, int base, long *value); + +/** + * Wrapper for strtoll() that forbids leading spaces. + */ +int xstrtoll(const char *str, char **end, int base, long long *value); + +/** + * Like xstrtoul(), but for unsigned short. + */ +int xstrtous(const char *str, char **end, int base, unsigned short *value); + +/** + * Like xstrtoul(), but for unsigned int. + */ +int xstrtoui(const char *str, char **end, int base, unsigned int *value); + +/** + * Wrapper for strtoul() that forbids leading spaces, negatives. + */ +int xstrtoul(const char *str, char **end, int base, unsigned long *value); + +/** + * Wrapper for strtoull() that forbids leading spaces, negatives. + */ +int xstrtoull(const char *str, char **end, int base, unsigned long long *value); + +/** + * Wrapper for strtof() that forbids leading spaces. + */ +int xstrtof(const char *str, char **end, float *value); + +/** + * Wrapper for strtod() that forbids leading spaces. + */ +int xstrtod(const char *str, char **end, double *value); + +/** * Process a yes/no prompt. * * @return 1 for yes, 0 for no, and -1 for unknown. @@ -123,11 +228,26 @@ int ynprompt(void); // #include <string.h> /** + * Get the length of the pure-ASCII prefix of a string. + */ +size_t asciilen(const char *str); + +/** + * Get the length of the pure-ASCII prefix of a string. + * + * @str + * The string to check. + * @n + * The maximum prefix length. + */ +size_t asciinlen(const char *str, size_t n); + +/** * Allocate a copy of a region of memory. * - * @param src + * @src * The memory region to copy. - * @param size + * @size * The size of the memory region. * @return * A copy of the region, allocated with malloc(), or NULL on failure. @@ -137,12 +257,12 @@ void *xmemdup(const void *src, size_t size); /** * A nice string copying function. * - * @param dest + * @dest * The NUL terminator of the destination string, or `end` if it is * already truncated. - * @param end + * @end * The end of the destination buffer. - * @param src + * @src * The string to copy from. * @return * The new NUL terminator of the destination, or `end` on truncation. @@ -152,14 +272,14 @@ char *xstpecpy(char *dest, char *end, const char *src); /** * A nice string copying function. * - * @param dest + * @dest * The NUL terminator of the destination string, or `end` if it is * already truncated. - * @param end + * @end * The end of the destination buffer. - * @param src + * @src * The string to copy from. - * @param n + * @n * The maximum number of characters to copy. * @return * The new NUL terminator of the destination, or `end` on truncation. @@ -167,15 +287,38 @@ char *xstpecpy(char *dest, char *end, const char *src); char *xstpencpy(char *dest, char *end, const char *src, size_t n); /** + * Thread-safe strerror(). + * + * @errnum + * An error number. + * @return + * A string describing that error, which remains valid until the next + * xstrerror() call in the same thread. + */ +const char *xstrerror(int errnum); + +/** + * Shorthand for xstrerror(errno). + */ +const char *errstr(void); + +/** * Format a mode like ls -l (e.g. -rw-r--r--). * - * @param mode + * @mode * The mode to format. - * @param str + * @str * The string to hold the formatted mode. */ void xstrmode(mode_t mode, char str[11]); +#include <sys/resource.h> + +/** + * Compare two rlim_t values, accounting for infinite limits. + */ +int rlim_cmp(rlim_t a, rlim_t b); + #include <sys/types.h> /** @@ -195,19 +338,59 @@ int xminor(dev_t dev); // #include <sys/stat.h> -#if __APPLE__ -# define st_atim st_atimespec -# define st_ctim st_ctimespec -# define st_mtim st_mtimespec -# define st_birthtim st_birthtimespec +/** + * Get the access/change/modification time from a struct stat. + */ +#if BFS_HAS_ST_ACMTIM +# define ST_ATIM(sb) (sb).st_atim +# define ST_CTIM(sb) (sb).st_ctim +# define ST_MTIM(sb) (sb).st_mtim +#elif BFS_HAS_ST_ACMTIMESPEC +# define ST_ATIM(sb) (sb).st_atimespec +# define ST_CTIM(sb) (sb).st_ctimespec +# define ST_MTIM(sb) (sb).st_mtimespec +#else +# define ST_ATIM(sb) ((struct timespec) { .tv_sec = (sb).st_atime }) +# define ST_CTIM(sb) ((struct timespec) { .tv_sec = (sb).st_ctime }) +# define ST_MTIM(sb) ((struct timespec) { .tv_sec = (sb).st_mtime }) #endif +// #include <sys/wait.h> + +/** + * waitpid() wrapper that handles EINTR. + */ +pid_t xwaitpid(pid_t pid, int *status, int flags); + +#include <sys/ioctl.h> // May be necessary for struct winsize +#include <termios.h> + +/** + * Open the controlling terminal. + * + * @flags + * The open() flags. + * @return + * An open file descriptor, or -1 on failure. + */ +int open_cterm(int flags); + +/** + * tcgetwinsize()/ioctl(TIOCGWINSZ) wrapper. + */ +int xtcgetwinsize(int fd, struct winsize *ws); + +/** + * tcsetwinsize()/ioctl(TIOCSWINSZ) wrapper. + */ +int xtcsetwinsize(int fd, const struct winsize *ws); + // #include <unistd.h> /** * Like dup(), but set the FD_CLOEXEC flag. * - * @param fd + * @fd * The file descriptor to duplicate. * @return * A duplicated file descriptor, or -1 on failure. @@ -217,7 +400,7 @@ int dup_cloexec(int fd); /** * Like pipe(), but set the FD_CLOEXEC flag. * - * @param pipefd + * @pipefd * The array to hold the two file descriptors. * @return * 0 on success, -1 on failure. @@ -239,14 +422,14 @@ size_t xread(int fd, void *buf, size_t nbytes); * writes. * * @return - The number of bytes written. A value != nbytes indicates an error. + * The number of bytes written. A value != nbytes indicates an error. */ size_t xwrite(int fd, const void *buf, size_t nbytes); /** * close() variant that preserves errno. * - * @param fd + * @fd * The file descriptor to close. */ void close_quietly(int fd); @@ -254,7 +437,7 @@ void close_quietly(int fd); /** * close() wrapper that asserts the file descriptor is valid. * - * @param fd + * @fd * The file descriptor to close. * @return * 0 on success, or -1 on error. @@ -269,11 +452,11 @@ int xfaccessat(int fd, const char *path, int amode); /** * readlinkat() wrapper that dynamically allocates the result. * - * @param fd + * @fd * The base directory descriptor. - * @param path + * @path * The path to the link, relative to fd. - * @param size + * @size * An estimate for the size of the link name (pass 0 if unknown). * @return * The target of the link, allocated with malloc(), or NULL on failure. @@ -283,7 +466,7 @@ char *xreadlinkat(int fd, const char *path, size_t size); /** * Wrapper for confstr() that allocates with malloc(). * - * @param name + * @name * The ID of the confstr to look up. * @return * The value of the confstr, or NULL on failure. @@ -293,30 +476,94 @@ char *xconfstr(int name); /** * Portability wrapper for strtofflags(). * - * @param str + * @str * The string to parse. The pointee will be advanced to the first * invalid position on error. - * @param set + * @set * The flags that are set in the string. - * @param clear + * @clear * The flags that are cleared in the string. * @return * 0 on success, -1 on failure. */ int xstrtofflags(const char **str, unsigned long long *set, unsigned long long *clear); -// #include <wchar.h> +/** + * Wrapper for sysconf() that works around an MSan bug. + */ +long xsysconf(int name); + +/** + * Check for a POSIX option[1] at runtime. + * + * [1]: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap02.html#tag_02_01_06 + * + * @name + * The symbolic name of the POSIX option (e.g. SPAWN). + * @return + * The value of the option, either -1 or a date like 202405. + */ +#define sysoption(name) \ + (_POSIX_##name == 0 ? xsysconf(_SC_##name) : _POSIX_##name) + +/** + * Get the number of CPU threads available to the current process. + */ +long nproc(void); + +#include <wchar.h> + +/** + * Error-recovering mbrtowc() wrapper. + * + * @str + * The string to convert. + * @i + * The current index. + * @len + * The length of the string. + * @mb + * The multi-byte decoding state. + * @return + * The wide character at index *i, or WEOF if decoding fails. In either + * case, *i will be advanced to the next multi-byte character. + */ +wint_t xmbrtowc(const char *str, size_t *i, size_t len, mbstate_t *mb); /** * wcswidth() variant that works on narrow strings. * - * @param str + * @str * The string to measure. * @return * The likely width of that string in a terminal. */ size_t xstrwidth(const char *str); +/** + * wcwidth() wrapper that works around LLVM bug #65532. + */ +#define xwcwidth BFS_INTERCEPT(wcwidth) + +#include <wctype.h> + +/** + * Wrap iswalpha()/iswdigit()/etc. + */ +#define BFS_ISWCTYPE(fn, c) BFS_INTERCEPT(fn)(c) + +#define xiswalnum(c) BFS_ISWCTYPE(iswalnum, c) +#define xiswalpha(c) BFS_ISWCTYPE(iswalpha, c) +#define xiswcntrl(c) BFS_ISWCTYPE(iswcntrl, c) +#define xiswdigit(c) BFS_ISWCTYPE(iswdigit, c) +#define xiswlower(c) BFS_ISWCTYPE(iswlower, c) +#define xiswgraph(c) BFS_ISWCTYPE(iswgraph, c) +#define xiswprint(c) BFS_ISWCTYPE(iswprint, c) +#define xiswpunct(c) BFS_ISWCTYPE(iswpunct, c) +#define xiswspace(c) BFS_ISWCTYPE(iswspace, c) +#define xiswupper(c) BFS_ISWCTYPE(iswupper, c) +#define xiswxdigit(c) BFS_ISWCTYPE(iswxdigit, c) + // #include <wordexp.h> /** @@ -338,13 +585,13 @@ enum wesc_flags { /** * Escape a string as a single shell word. * - * @param dest + * @dest * The destination string to fill. - * @param end + * @end * The end of the destination buffer. - * @param src + * @src * The string to escape. - * @param flags + * @flags * Controls which characters to escape. * @return * The new NUL terminator of the destination, or `end` on truncation. @@ -354,15 +601,15 @@ char *wordesc(char *dest, char *end, const char *str, enum wesc_flags flags); /** * Escape a string as a single shell word. * - * @param dest + * @dest * The destination string to fill. - * @param end + * @end * The end of the destination buffer. - * @param src + * @src * The string to escape. - * @param n + * @n * The maximum length of the string. - * @param flags + * @flags * Controls which characters to escape. * @return * The new NUL terminator of the destination, or `end` on truncation. @@ -9,6 +9,8 @@ * * - struct bftw_list: A linked list of bftw_file's. * + * - struct bftw_queue: A multi-stage queue of bftw_file's. + * * - struct bftw_cache: An LRU list of bftw_file's with open file descriptors, * used for openat() to minimize the amount of path re-traversals. * @@ -17,9 +19,10 @@ */ #include "bftw.h" + #include "alloc.h" +#include "bfs.h" #include "bfstd.h" -#include "config.h" #include "diag.h" #include "dir.h" #include "dstring.h" @@ -28,57 +31,138 @@ #include "mtab.h" #include "stat.h" #include "trie.h" + #include <errno.h> #include <fcntl.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> +#include <sys/types.h> + +/** Initialize a bftw_stat cache. */ +static void bftw_stat_init(struct bftw_stat *bufs, struct bfs_stat *stat_buf, struct bfs_stat *lstat_buf) { + bufs->stat_buf = stat_buf; + bufs->lstat_buf = lstat_buf; + bufs->stat_err = -1; + bufs->lstat_err = -1; +} -/** Caching bfs_stat(). */ -static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, struct bftw_stat *cache, enum bfs_stat_flags flags) { - if (!cache->buf) { - if (cache->error) { - errno = cache->error; - } else if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, &cache->storage) == 0) { - cache->buf = &cache->storage; +/** Fill a bftw_stat cache from another one. */ +static void bftw_stat_fill(struct bftw_stat *dest, const struct bftw_stat *src) { + if (dest->stat_err < 0 && src->stat_err >= 0) { + dest->stat_buf = src->stat_buf; + dest->stat_err = src->stat_err; + } + + if (dest->lstat_err < 0 && src->lstat_err >= 0) { + dest->lstat_buf = src->lstat_buf; + dest->lstat_err = src->lstat_err; + } +} + +/** Cache a bfs_stat() result. */ +static void bftw_stat_cache(struct bftw_stat *bufs, enum bfs_stat_flags flags, const struct bfs_stat *buf, int err) { + if (flags & BFS_STAT_NOFOLLOW) { + bufs->lstat_buf = buf; + bufs->lstat_err = err; + if (err || !S_ISLNK(buf->mode)) { + // Non-link, so share stat info + bufs->stat_buf = buf; + bufs->stat_err = err; + } + } else if (flags & BFS_STAT_TRYFOLLOW) { + if (err) { + bufs->stat_err = err; + } else if (S_ISLNK(buf->mode)) { + bufs->lstat_buf = buf; + bufs->lstat_err = err; + bufs->stat_err = ENOENT; } else { - cache->error = errno; + bufs->stat_buf = buf; + bufs->stat_err = err; } + } else { + bufs->stat_buf = buf; + bufs->stat_err = err; + } +} + +/** Caching bfs_stat(). */ +static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + struct bftw_stat *bufs = &ftwbuf->stat_bufs; + struct bfs_stat *buf; + + if (flags & BFS_STAT_NOFOLLOW) { + buf = (struct bfs_stat *)bufs->lstat_buf; + if (bufs->lstat_err == 0) { + return buf; + } else if (bufs->lstat_err > 0) { + errno = bufs->lstat_err; + return NULL; + } + } else { + buf = (struct bfs_stat *)bufs->stat_buf; + if (bufs->stat_err == 0) { + return buf; + } else if (bufs->stat_err > 0) { + errno = bufs->stat_err; + return NULL; + } + } + + struct bfs_stat *ret; + int err; + if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, buf) == 0) { + ret = buf; + err = 0; +#ifdef S_IFWHT + } else if (errno == ENOENT && ftwbuf->type == BFS_WHT) { + // This matches the behavior of FTS_WHITEOUT on BSD + ret = memset(buf, 0, sizeof(*buf)); + ret->mode = S_IFWHT; + err = 0; +#endif + } else { + ret = NULL; + err = errno; } - return cache->buf; + bftw_stat_cache(bufs, flags, ret, err); + return ret; } const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { struct BFTW *mutbuf = (struct BFTW *)ftwbuf; const struct bfs_stat *ret; - if (flags & BFS_STAT_NOFOLLOW) { - ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); - if (ret && !S_ISLNK(ret->mode) && !mutbuf->stat_cache.buf) { - // Non-link, so share stat info - mutbuf->stat_cache.buf = ret; + if (flags & BFS_STAT_TRYFOLLOW) { + ret = bftw_stat_impl(mutbuf, BFS_STAT_FOLLOW); + if (!ret && errno_is_like(ENOENT)) { + ret = bftw_stat_impl(mutbuf, BFS_STAT_NOFOLLOW); } } else { - ret = bftw_stat_impl(mutbuf, &mutbuf->stat_cache, BFS_STAT_FOLLOW); - if (!ret && (flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(errno)) { - ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); - } + ret = bftw_stat_impl(mutbuf, flags); } return ret; } const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + const struct bftw_stat *bufs = &ftwbuf->stat_bufs; + if (flags & BFS_STAT_NOFOLLOW) { - return ftwbuf->lstat_cache.buf; - } else if (ftwbuf->stat_cache.buf) { - return ftwbuf->stat_cache.buf; - } else if ((flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(ftwbuf->stat_cache.error)) { - return ftwbuf->lstat_cache.buf; - } else { - return NULL; + if (bufs->lstat_err == 0) { + return bufs->lstat_buf; + } + } else if (bufs->stat_err == 0) { + return bufs->stat_buf; + } else if ((flags & BFS_STAT_TRYFOLLOW) && error_is_like(bufs->stat_err, ENOENT)) { + if (bufs->lstat_err == 0) { + return bufs->lstat_buf; + } } + + return NULL; } enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { @@ -115,12 +199,26 @@ struct bftw_file { /** The root under which this file was found. */ struct bftw_file *root; - /** The next file to open/close/visit. */ + /** + * List node for: + * + * bftw_queue::buffer + * bftw_queue::waiting + * bftw_file_open()::parents + */ struct bftw_file *next; - /** The next directory to read. */ - struct { struct bftw_file *next; } to_read; - /** LRU list node. */ + /** + * List node for: + * + * bftw_queue::ready + * bftw_state::to_close + */ + struct { struct bftw_file *next; } ready; + + /** + * List node for bftw_cache. + */ struct { struct bftw_file *prev; struct bftw_file *next; @@ -147,6 +245,9 @@ struct bftw_file { /** The inode number, for cycle detection. */ ino_t ino; + /** Cached bfs_stat() info. */ + struct bftw_stat stat_bufs; + /** The offset of this file in the full path. */ size_t nameoff; /** The length of the file's name. */ @@ -164,6 +265,283 @@ struct bftw_list { }; /** + * bftw_queue flags. + */ +enum bftw_qflags { + /** Track the sync/async service balance. */ + BFTW_QBALANCE = 1 << 0, + /** Buffer files before adding them to the queue. */ + BFTW_QBUFFER = 1 << 1, + /** Use LIFO (stack/DFS) ordering. */ + BFTW_QLIFO = 1 << 2, + /** Maintain a strict order. */ + BFTW_QORDER = 1 << 3, +}; + +/** + * A queue of bftw_file's that may be serviced asynchronously. + * + * A bftw_queue comprises three linked lists each tracking different stages. + * When BFTW_QBUFFER is set, files are initially pushed to the buffer: + * + * ╔═══╗ ╔═══╦═══╗ + * buffer: ║ 𝘩 ║ ║ 𝘩 ║ 𝘪 ║ + * ╠═══╬═══╦═══╗ ╠═══╬═══╬═══╗ + * waiting: ║ e ║ f ║ g ║ → ║ e ║ f ║ g ║ + * ╠═══╬═══╬═══╬═══╗ ╠═══╬═══╬═══╬═══╗ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ + * ╚═══╩═══╩═══╩═══╝ ╚═══╩═══╩═══╩═══╝ + * + * When bftw_queue_flush() is called, the files in the buffer are appended to + * the waiting list (or prepended, if BFTW_QLIFO is set): + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╦═══╦═══╗ + * waiting: ║ e ║ f ║ g ║ h ║ i ║ + * ╠═══╬═══╬═══╬═══╬═══╝ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ + * ╚═══╩═══╩═══╩═══╝ + * + * Using the buffer gives a more natural ordering for BFTW_QLIFO, and allows + * files to be sorted before adding them to the waiting list. If BFTW_QBUFFER + * is not set, files are pushed directly to the waiting list instead. + * + * Files on the waiting list are waiting to be "serviced" asynchronously by the + * ioq (for example, by an ioq_opendir() or ioq_stat() call). While they are + * being serviced, they are detached from the queue by bftw_queue_detach() and + * are not tracked by the queue at all: + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╗ ⎛ ┌───┬───┐ ⎞ + * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ 𝓯 │ ⎟ + * ╠═══╬═══╬═══╬═══╗ ⎝ └───┴───┘ ⎠ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ + * ╚═══╩═══╩═══╩═══╝ + * + * When their async service is complete, files are reattached to the queue by + * bftw_queue_attach(), this time on the ready list: + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╗ ⎛ ┌───┐ ⎞ + * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ ⎟ + * ╠═══╬═══╬═══╬═══╦═══╗ ⎝ └───┘ ⎠ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ 𝕗 ║ + * ╚═══╩═══╩═══╩═══╩═══╝ + * + * Files are added to the ready list in the order they are finished by the ioq. + * bftw_queue_pop() pops a file from the ready list if possible. Otherwise, it + * pops from the waiting list, and the file must be serviced synchronously. + * + * However, if BFTW_QORDER is set, files must be popped in the exact order they + * are added to the waiting list (to maintain sorted order). In this case, + * files are added to the waiting and ready lists at the same time. The + * file->ioqueued flag is set while it is in-service, so that bftw() can wait + * for it to be truly ready before using it. + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╗ ⎛ ┌───┐ ⎞ + * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ ⎟ + * ╠═══╬═══╬═══╬═══╦═══╦═══╦═══╦═══╦═══╗ ⎝ └───┘ ⎠ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ 𝓮 ║ 𝕗 ║ g ║ h ║ i ║ + * ╚═══╩═══╩═══╩═══╩═══╩═══╩═══╩═══╩═══╝ + * + * If BFTW_QBALANCE is set, queue->imbalance tracks the delta between async + * service (negative) and synchronous service (positive). The queue is + * considered "balanced" when this number is non-negative. Only a balanced + * queue will perform any async service, ensuring work is fairly distributed + * between the main thread and the ioq. + * + * BFTW_QBALANCE is only set for single-threaded ioqs. When an ioq has multiple + * threads, it is faster to wait for the ioq to complete an operation than it is + * to perform it on the main thread. + */ +struct bftw_queue { + /** Queue flags. */ + enum bftw_qflags flags; + /** A buffer of files to be enqueued together. */ + struct bftw_list buffer; + /** A list of files which are waiting to be serviced. */ + struct bftw_list waiting; + /** A list of already-serviced files. */ + struct bftw_list ready; + /** The current size of the queue. */ + size_t size; + /** The number of files currently in-service. */ + size_t ioqueued; + /** Tracks the imbalance between synchronous and async service. */ + unsigned long imbalance; +}; + +/** Initialize a queue. */ +static void bftw_queue_init(struct bftw_queue *queue, enum bftw_qflags flags) { + queue->flags = flags; + SLIST_INIT(&queue->buffer); + SLIST_INIT(&queue->waiting); + SLIST_INIT(&queue->ready); + queue->size = 0; + queue->ioqueued = 0; + queue->imbalance = 0; +} + +/** Add a file to the queue. */ +static void bftw_queue_push(struct bftw_queue *queue, struct bftw_file *file) { + if (queue->flags & BFTW_QBUFFER) { + SLIST_APPEND(&queue->buffer, file); + } else if (queue->flags & BFTW_QLIFO) { + SLIST_PREPEND(&queue->waiting, file); + if (queue->flags & BFTW_QORDER) { + SLIST_PREPEND(&queue->ready, file, ready); + } + } else { + SLIST_APPEND(&queue->waiting, file); + if (queue->flags & BFTW_QORDER) { + SLIST_APPEND(&queue->ready, file, ready); + } + } + + ++queue->size; +} + +/** Add any buffered files to the queue. */ +static void bftw_queue_flush(struct bftw_queue *queue) { + if (!(queue->flags & BFTW_QBUFFER)) { + return; + } + + if (queue->flags & BFTW_QORDER) { + // When sorting, add files to the ready list at the same time + // (and in the same order) as they are added to the waiting list + struct bftw_file **cursor = (queue->flags & BFTW_QLIFO) + ? &queue->ready.head + : queue->ready.tail; + for_slist (struct bftw_file, file, &queue->buffer) { + cursor = SLIST_INSERT(&queue->ready, cursor, file, ready); + } + } + + if (queue->flags & BFTW_QLIFO) { + SLIST_EXTEND(&queue->buffer, &queue->waiting); + } + + SLIST_EXTEND(&queue->waiting, &queue->buffer); +} + +/** Check if the queue is properly balanced for async work. */ +static bool bftw_queue_balanced(const struct bftw_queue *queue) { + if (queue->flags & BFTW_QBALANCE) { + return (long)queue->imbalance >= 0; + } else { + return true; + } +} + +/** Update the queue balance for (a)sync service. */ +static void bftw_queue_rebalance(struct bftw_queue *queue, bool async) { + if (async) { + --queue->imbalance; + } else { + ++queue->imbalance; + } +} + +/** Detach the next waiting file. */ +static void bftw_queue_detach(struct bftw_queue *queue, struct bftw_file *file, bool async) { + bfs_assert(!file->ioqueued); + + if (file == SLIST_HEAD(&queue->buffer)) { + // To maintain order, we can't detach any files until they're + // added to the waiting/ready lists + bfs_assert(!(queue->flags & BFTW_QORDER)); + SLIST_POP(&queue->buffer); + } else if (file == SLIST_HEAD(&queue->waiting)) { + SLIST_POP(&queue->waiting); + } else { + bfs_bug("Detached file was not buffered or waiting"); + } + + if (async) { + file->ioqueued = true; + ++queue->ioqueued; + bftw_queue_rebalance(queue, true); + } +} + +/** Reattach a serviced file to the queue. */ +static void bftw_queue_attach(struct bftw_queue *queue, struct bftw_file *file, bool async) { + if (async) { + bfs_assert(file->ioqueued); + file->ioqueued = false; + --queue->ioqueued; + } else { + bfs_assert(!file->ioqueued); + } + + if (!(queue->flags & BFTW_QORDER)) { + SLIST_APPEND(&queue->ready, file, ready); + } +} + +/** Make a file ready immediately. */ +static void bftw_queue_skip(struct bftw_queue *queue, struct bftw_file *file) { + bftw_queue_detach(queue, file, false); + bftw_queue_attach(queue, file, false); +} + +/** Get the next waiting file. */ +static struct bftw_file *bftw_queue_waiting(const struct bftw_queue *queue) { + if (!(queue->flags & BFTW_QBUFFER)) { + return SLIST_HEAD(&queue->waiting); + } + + if (queue->flags & BFTW_QORDER) { + // Don't detach files until they're on the waiting/ready lists + return SLIST_HEAD(&queue->waiting); + } + + const struct bftw_list *prefix = &queue->waiting; + const struct bftw_list *suffix = &queue->buffer; + if (queue->flags & BFTW_QLIFO) { + prefix = &queue->buffer; + suffix = &queue->waiting; + } + + struct bftw_file *file = SLIST_HEAD(prefix); + if (!file) { + file = SLIST_HEAD(suffix); + } + return file; +} + +/** Get the next ready file. */ +static struct bftw_file *bftw_queue_ready(const struct bftw_queue *queue) { + return SLIST_HEAD(&queue->ready); +} + +/** Pop a file from the queue. */ +static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) { + // Don't pop until we've had a chance to sort the buffer + bfs_assert(SLIST_EMPTY(&queue->buffer)); + + struct bftw_file *file = SLIST_POP(&queue->ready, ready); + + if (!file || file == SLIST_HEAD(&queue->waiting)) { + // If no files are ready, try the waiting list. Or, if + // BFTW_QORDER is set, we may need to pop from both lists. + file = SLIST_POP(&queue->waiting); + } + + if (file) { + --queue->size; + } + + return file; +} + +/** * A cache of open directories. */ struct bftw_cache { @@ -178,10 +556,14 @@ struct bftw_cache { /** bftw_file arena. */ struct varena files; + /** bfs_dir arena. */ struct arena dirs; /** Remaining bfs_dir capacity. */ - size_t dirlimit; + int dir_limit; + + /** bfs_stat arena. */ + struct arena stat_bufs; }; /** Initialize a cache. */ @@ -191,28 +573,35 @@ static void bftw_cache_init(struct bftw_cache *cache, size_t capacity) { cache->capacity = capacity; VARENA_INIT(&cache->files, struct bftw_file, name); + bfs_dir_arena(&cache->dirs); - cache->dirlimit = capacity - 1; - if (cache->dirlimit > 1024) { - cache->dirlimit = 1024; + if (cache->capacity > 1024) { + cache->dir_limit = 1024; + } else { + cache->dir_limit = capacity - 1; } + + ARENA_INIT(&cache->stat_bufs, struct bfs_stat); } /** Allocate a directory. */ -static struct bfs_dir *bftw_allocdir(struct bftw_cache *cache) { - if (cache->dirlimit == 0) { +static struct bfs_dir *bftw_allocdir(struct bftw_cache *cache, bool force) { + if (!force && cache->dir_limit <= 0) { errno = ENOMEM; return NULL; } - --cache->dirlimit; - return arena_alloc(&cache->dirs); + struct bfs_dir *dir = arena_alloc(&cache->dirs); + if (dir) { + --cache->dir_limit; + } + return dir; } /** Free a directory. */ static void bftw_freedir(struct bftw_cache *cache, struct bfs_dir *dir) { - ++cache->dirlimit; + ++cache->dir_limit; arena_free(&cache->dirs, dir); } @@ -318,12 +707,12 @@ static size_t bftw_child_nameoff(const struct bftw_file *parent) { /** Destroy a cache. */ static void bftw_cache_destroy(struct bftw_cache *cache) { - bfs_assert(!cache->head); - bfs_assert(!cache->tail); + bfs_assert(LIST_EMPTY(cache)); bfs_assert(!cache->target); - varena_destroy(&cache->files); + arena_destroy(&cache->stat_bufs); arena_destroy(&cache->dirs); + varena_destroy(&cache->files); } /** Create a new bftw_file. */ @@ -347,9 +736,9 @@ static struct bftw_file *bftw_file_new(struct bftw_cache *cache, struct bftw_fil file->nameoff = 0; } - file->next = NULL; - file->to_read.next = NULL; - file->lru.prev = file->lru.next = NULL; + SLIST_ITEM_INIT(file); + SLIST_ITEM_INIT(file, ready); + LIST_ITEM_INIT(file, lru); file->refcount = 1; file->pincount = 0; @@ -361,6 +750,8 @@ static struct bftw_file *bftw_file_new(struct bftw_cache *cache, struct bftw_fil file->dev = -1; file->ino = -1; + bftw_stat_init(&file->stat_bufs, NULL, NULL); + file->namelen = namelen; memcpy(file->name, name, namelen + 1); @@ -378,8 +769,21 @@ static void bftw_file_set_dir(struct bftw_cache *cache, struct bftw_file *file, file->fd = bfs_dirfd(dir); bftw_cache_add(cache, file); } +} - bftw_cache_pin(cache, file); +/** Free a file's cached stat() buffers. */ +static void bftw_stat_recycle(struct bftw_cache *cache, struct bftw_file *file) { + struct bftw_stat *bufs = &file->stat_bufs; + + struct bfs_stat *stat_buf = (struct bfs_stat *)bufs->stat_buf; + struct bfs_stat *lstat_buf = (struct bfs_stat *)bufs->lstat_buf; + if (stat_buf) { + arena_free(&cache->stat_bufs, stat_buf); + } else if (lstat_buf) { + arena_free(&cache->stat_bufs, lstat_buf); + } + + bftw_stat_init(bufs, NULL, NULL); } /** Free a bftw_file. */ @@ -390,6 +794,8 @@ static void bftw_file_free(struct bftw_cache *cache, struct bftw_file *file) { bftw_file_close(cache, file); } + bftw_stat_recycle(cache, file); + varena_free(&cache->files, file, file->namelen + 1); } @@ -411,6 +817,8 @@ struct bftw_state { enum bftw_strategy strategy; /** The mount table. */ const struct bfs_mtab *mtab; + /** bfs_opendir() flags. */ + enum bfs_dir_flags dir_flags; /** The appropriate errno value, if any. */ int error; @@ -423,20 +831,15 @@ struct bftw_state { /** The number of I/O threads. */ size_t nthreads; - /** The queue of directories to open. */ - struct bftw_list to_open; - /** The queue of directories to read. */ - struct bftw_list to_read; /** The queue of unpinned directories to unwrap. */ struct bftw_list to_close; - /** The queue of files to visit. */ - struct bftw_list to_visit; - /** A batch of files to enqueue. */ - struct bftw_list batch; + struct bftw_queue fileq; + /** The queue of directories to open/read. */ + struct bftw_queue dirq; /** The current path. */ - char *path; + dchar *path; /** The current file. */ struct bftw_file *file; /** The previous file. */ @@ -453,8 +856,44 @@ struct bftw_state { /** Extra data about the current file. */ struct BFTW ftwbuf; + /** stat() buffer storage. */ + struct bfs_stat stat_buf; + /** lstat() buffer storage. */ + struct bfs_stat lstat_buf; }; +/** Check if we have to buffer files before visiting them. */ +static bool bftw_must_buffer(const struct bftw_state *state) { + if (state->flags & BFTW_SORT) { + // Have to buffer the files to sort them + return true; + } + + if (state->strategy == BFTW_DFS && state->nthreads == 0) { + // Without buffering, we would get a not-quite-depth-first + // ordering: + // + // a + // b + // a/c + // a/c/d + // b/e + // b/e/f + // + // This is okay for iterative deepening, since the caller only + // sees files at the target depth. We also deem it okay for + // parallel searches, since the order is unpredictable anyway. + return true; + } + + if ((state->flags & BFTW_STAT) && state->nthreads > 1) { + // We will be buffering every file anyway for ioq_stat() + return true; + } + + return false; +} + /** Initialize the bftw() state. */ static int bftw_state_init(struct bftw_state *state, const struct bftw_args *args) { state->paths = args->paths; @@ -464,35 +903,68 @@ static int bftw_state_init(struct bftw_state *state, const struct bftw_args *arg state->flags = args->flags; state->strategy = args->strategy; state->mtab = args->mtab; - - if ((state->flags & BFTW_SORT) || state->strategy == BFTW_DFS) { - state->flags |= BFTW_BUFFER; - } - + state->dir_flags = 0; state->error = 0; - if (args->nopenfd < 1) { + if (args->nopenfd < 2) { errno = EMFILE; return -1; } - bftw_cache_init(&state->cache, args->nopenfd); - state->nthreads = args->nthreads; - if (state->nthreads > 0) { - state->ioq = ioq_create(4096, state->nthreads); + size_t nopenfd = args->nopenfd; + size_t qdepth = 4096; + size_t nthreads = args->nthreads; + +#if BFS_WITH_LIBURING + // io_uring uses one fd per ring, ioq uses one ring per thread + if (nthreads >= nopenfd - 1) { + nthreads = nopenfd - 2; + } + nopenfd -= nthreads; +#endif + + bftw_cache_init(&state->cache, nopenfd); + + if (nthreads > 0) { + state->ioq = ioq_create(qdepth, nthreads); if (!state->ioq) { return -1; } } else { state->ioq = NULL; } + state->nthreads = nthreads; + + if (bftw_must_buffer(state)) { + state->flags |= BFTW_BUFFER; + } + + if (state->flags & BFTW_WHITEOUTS) { + state->dir_flags |= BFS_DIR_WHITEOUTS; + } - SLIST_INIT(&state->to_open); - SLIST_INIT(&state->to_read); SLIST_INIT(&state->to_close); - SLIST_INIT(&state->to_visit); - SLIST_INIT(&state->batch); + enum bftw_qflags qflags = 0; + if (state->strategy != BFTW_BFS) { + qflags |= BFTW_QBUFFER | BFTW_QLIFO; + } + if (state->flags & BFTW_BUFFER) { + qflags |= BFTW_QBUFFER; + } + if (state->flags & BFTW_SORT) { + qflags |= BFTW_QORDER; + } else if (nthreads == 1) { + qflags |= BFTW_QBALANCE; + } + bftw_queue_init(&state->fileq, qflags); + + if (state->strategy == BFTW_BFS || (state->flags & BFTW_BUFFER)) { + // In breadth-first mode, or if we're already buffering files, + // directories can be queued in FIFO order + qflags &= ~(BFTW_QBUFFER | BFTW_QLIFO); + } + bftw_queue_init(&state->dirq, qflags); state->path = NULL; state->file = NULL; @@ -505,22 +977,47 @@ static int bftw_state_init(struct bftw_state *state, const struct bftw_args *arg return 0; } +/** Queue a directory for unwrapping. */ +static void bftw_delayed_unwrap(struct bftw_state *state, struct bftw_file *file) { + bfs_assert(file->dir); + + if (!SLIST_ATTACHED(&state->to_close, file, ready)) { + SLIST_APPEND(&state->to_close, file, ready); + } +} + +/** Unpin a file's parent. */ +static void bftw_unpin_parent(struct bftw_state *state, struct bftw_file *file, bool unwrap) { + struct bftw_file *parent = file->parent; + if (!parent) { + return; + } + + bftw_cache_unpin(&state->cache, parent); + + if (unwrap && parent->dir && parent->pincount == 0) { + bftw_delayed_unwrap(state, parent); + } +} + /** Pop a response from the I/O queue. */ static int bftw_ioq_pop(struct bftw_state *state, bool block) { + struct bftw_cache *cache = &state->cache; struct ioq *ioq = state->ioq; if (!ioq) { return -1; } - struct ioq_ent *ent = block ? ioq_pop(ioq) : ioq_trypop(ioq); + ioq_submit(ioq); + struct ioq_ent *ent = ioq_pop(ioq, block); if (!ent) { return -1; } - struct bftw_cache *cache = &state->cache; - struct bftw_file *file; - struct bftw_file *parent; - struct bfs_dir *dir; + struct bftw_file *file = ent->ptr; + if (file) { + bftw_unpin_parent(state, file, true); + } enum ioq_op op = ent->op; switch (op) { @@ -530,33 +1027,34 @@ static int bftw_ioq_pop(struct bftw_state *state, bool block) { case IOQ_CLOSEDIR: ++cache->capacity; - dir = ent->closedir.dir; - bftw_freedir(cache, dir); + bftw_freedir(cache, ent->closedir.dir); break; case IOQ_OPENDIR: - file = ent->ptr; - file->ioqueued = false; - ++cache->capacity; - parent = file->parent; - if (parent) { - bftw_cache_unpin(cache, parent); - if (parent->pincount == 0 && parent->dir) { - SLIST_APPEND(&state->to_close, parent); - } - } - dir = ent->opendir.dir; - if (ent->ret == 0) { - bftw_file_set_dir(cache, file, dir); + if (ent->result >= 0) { + bftw_file_set_dir(cache, file, ent->opendir.dir); } else { - bftw_freedir(cache, dir); + bftw_freedir(cache, ent->opendir.dir); } - if (!(state->flags & BFTW_SORT)) { - SLIST_APPEND(&state->to_read, file, to_read); + bftw_queue_attach(&state->dirq, file, true); + break; + + case IOQ_STAT: + if (ent->result >= 0) { + bftw_stat_cache(&file->stat_bufs, ent->stat.flags, ent->stat.buf, 0); + } else { + arena_free(&cache->stat_bufs, ent->stat.buf); + bftw_stat_cache(&file->stat_bufs, ent->stat.flags, NULL, -ent->result); } + + bftw_queue_attach(&state->fileq, file, true); + break; + + default: + bfs_bug("Unexpected ioq op %d", (int)op); break; } @@ -575,9 +1073,9 @@ static int bftw_ioq_reserve(struct bftw_state *state) { return 0; } - // With more than two threads, it is faster to wait for an I/O operation - // to complete than it is to do it ourselves - bool block = state->nthreads > 2; + // With more than one background thread, it's faster to wait on + // background I/O than it is to do it on the main thread + bool block = state->nthreads > 1; if (bftw_ioq_pop(state, block) < 0) { return -1; } @@ -661,7 +1159,7 @@ static int bftw_file_open(struct bftw_state *state, struct bftw_file *file, cons } int fd = bftw_file_openat(state, file, base, at_path); - if (fd >= 0 || errno != ENAMETOOLONG) { + if (fd >= 0 || !errno_is_like(ENAMETOOLONG)) { return fd; } @@ -669,12 +1167,13 @@ static int bftw_file_open(struct bftw_state *state, struct bftw_file *file, cons struct bftw_list parents; SLIST_INIT(&parents); - struct bftw_file *cur; - for (cur = file; cur != base; cur = cur->parent) { + // Reverse the chain of parents + for (struct bftw_file *cur = file; cur != base; cur = cur->parent) { SLIST_PREPEND(&parents, cur); } - while ((cur = SLIST_POP(&parents))) { + // Open each component relative to its parent + drain_slist (struct bftw_file, cur, &parents) { if (!cur->parent || cur->parent->fd >= 0) { bftw_file_openat(state, cur, cur->parent, cur->name); } @@ -762,8 +1261,12 @@ static int bftw_unwrapdir(struct bftw_state *state, struct bftw_file *file) { return bftw_close(state, file); } - if (bftw_cache_reserve(state) != 0) { - return -1; + // Make room for dup() + bftw_cache_pin(cache, file); + int ret = bftw_cache_reserve(state); + bftw_cache_unpin(cache, file); + if (ret != 0) { + return ret; } int fd = dup_cloexec(file->fd); @@ -777,124 +1280,285 @@ static int bftw_unwrapdir(struct bftw_state *state, struct bftw_file *file) { return bftw_ioq_closedir(state, dir); } +/** Try to pin a file's parent. */ +static int bftw_pin_parent(struct bftw_state *state, struct bftw_file *file) { + struct bftw_file *parent = file->parent; + if (!parent) { + return AT_FDCWD; + } + + int fd = parent->fd; + if (fd < 0) { + // Don't confuse failures with AT_FDCWD + return (int)AT_FDCWD == -1 ? -2 : -1; + } + + bftw_cache_pin(&state->cache, parent); + return fd; +} + /** Open a directory asynchronously. */ static int bftw_ioq_opendir(struct bftw_state *state, struct bftw_file *file) { + struct bftw_cache *cache = &state->cache; + if (bftw_ioq_reserve(state) != 0) { goto fail; } - int dfd = AT_FDCWD; - struct bftw_cache *cache = &state->cache; - struct bftw_file *parent = file->parent; - if (parent) { - dfd = parent->fd; - if (dfd < 0) { - goto fail; - } - bftw_cache_pin(cache, parent); + int dfd = bftw_pin_parent(state, file); + if (dfd < 0 && dfd != (int)AT_FDCWD) { + goto fail; } if (bftw_cache_reserve(state) != 0) { goto unpin; } - struct bfs_dir *dir = bftw_allocdir(cache); + struct bfs_dir *dir = bftw_allocdir(cache, false); if (!dir) { goto unpin; } - if (ioq_opendir(state->ioq, dir, dfd, file->name, file) != 0) { + if (ioq_opendir(state->ioq, dir, dfd, file->name, state->dir_flags, file) != 0) { goto free; } - file->ioqueued = true; --cache->capacity; return 0; free: bftw_freedir(cache, dir); unpin: - if (parent) { - bftw_cache_unpin(cache, parent); - } + bftw_unpin_parent(state, file, false); fail: return -1; } +/** Open a batch of directories asynchronously. */ +static void bftw_ioq_opendirs(struct bftw_state *state) { + while (bftw_queue_balanced(&state->dirq)) { + struct bftw_file *dir = bftw_queue_waiting(&state->dirq); + if (!dir) { + break; + } + + if (bftw_ioq_opendir(state, dir) == 0) { + bftw_queue_detach(&state->dirq, dir, true); + } else { + break; + } + } +} + /** Push a directory onto the queue. */ static void bftw_push_dir(struct bftw_state *state, struct bftw_file *file) { bfs_assert(file->type == BFS_DIR); + bftw_queue_push(&state->dirq, file); + bftw_ioq_opendirs(state); +} - SLIST_APPEND(&state->to_open, file); - - if (state->flags & BFTW_SORT) { - // When sorting, directories are kept in order on the to_read - // list; otherwise, they are only added once they are open - SLIST_APPEND(&state->to_read, file, to_read); +/** Pop a file from a queue, then activate it. */ +static bool bftw_pop(struct bftw_state *state, struct bftw_queue *queue) { + if (queue->size == 0) { + return false; } - while (state->to_open.head) { - if (bftw_ioq_opendir(state, state->to_open.head) == 0) { - SLIST_POP(&state->to_open); - } else { + while (!bftw_queue_ready(queue) && queue->ioqueued > 0) { + bool block = true; + if (bftw_queue_waiting(queue) && state->nthreads == 1) { + // With only one background thread, balance the work + // between it and the main thread + block = false; + } + + if (bftw_ioq_pop(state, block) < 0) { break; } } + + struct bftw_file *file = bftw_queue_pop(queue); + if (!file) { + return false; + } + + while (file->ioqueued) { + bftw_ioq_pop(state, true); + } + + state->file = file; + return true; } /** Pop a directory to read from the queue. */ static bool bftw_pop_dir(struct bftw_state *state) { bfs_assert(!state->file); - struct bftw_cache *cache = &state->cache; - bool have_files = state->to_visit.head; - if (state->flags & BFTW_SORT) { // Keep strict breadth-first order when sorting - if (state->strategy != BFTW_DFS && have_files) { + if (state->strategy == BFTW_BFS && bftw_queue_ready(&state->fileq)) { return false; } + } else if (!bftw_queue_ready(&state->dirq)) { + // Don't block if we have files ready to visit + if (bftw_queue_ready(&state->fileq)) { + return false; + } + } + + return bftw_pop(state, &state->dirq); +} + +/** Figure out bfs_stat() flags. */ +static enum bfs_stat_flags bftw_stat_flags(const struct bftw_state *state, size_t depth) { + enum bftw_flags mask = BFTW_FOLLOW_ALL; + if (depth == 0) { + mask |= BFTW_FOLLOW_ROOTS; + } + + if (state->flags & mask) { + return BFS_STAT_TRYFOLLOW; } else { - while (!state->to_read.head) { - // Block if we have no other files/dirs to visit, or no room in the cache - bool have_dirs = state->to_open.head; - bool have_room = cache->capacity > 0 && cache->dirlimit > 0; - bool block = !(have_dirs || have_files) || !have_room; - - if (bftw_ioq_pop(state, block) < 0) { - break; - } + return BFS_STAT_NOFOLLOW; + } +} + +/** Check if a stat() call is necessary. */ +static bool bftw_must_stat(const struct bftw_state *state, size_t depth, enum bfs_type type, const char *name) { + if (state->flags & BFTW_STAT) { + return true; + } + + switch (type) { + case BFS_UNKNOWN: + return true; + + case BFS_DIR: + return state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS); + + case BFS_LNK: + if (!(bftw_stat_flags(state, depth) & BFS_STAT_NOFOLLOW)) { + return true; } + _fallthrough; + + default: +#if __linux__ + if (state->mtab && bfs_might_be_mount(state->mtab, name)) { + return true; + } +#endif + return false; } +} - struct bftw_file *file = SLIST_POP(&state->to_read, to_read); - if (!file || file == state->to_open.head) { - file = SLIST_POP(&state->to_open); +/** stat() a file asynchronously. */ +static int bftw_ioq_stat(struct bftw_state *state, struct bftw_file *file) { + if (bftw_ioq_reserve(state) != 0) { + goto fail; } - if (!file) { + + int dfd = bftw_pin_parent(state, file); + if (dfd < 0 && dfd != (int)AT_FDCWD) { + goto fail; + } + + struct bftw_cache *cache = &state->cache; + struct bfs_stat *buf = arena_alloc(&cache->stat_bufs); + if (!buf) { + goto unpin; + } + + enum bfs_stat_flags flags = bftw_stat_flags(state, file->depth); + if (ioq_stat(state->ioq, dfd, file->name, flags, buf, file) != 0) { + goto free; + } + + return 0; + +free: + arena_free(&cache->stat_bufs, buf); +unpin: + bftw_unpin_parent(state, file, false); +fail: + return -1; +} + +/** Check if we should stat() a file asynchronously. */ +static bool bftw_should_ioq_stat(struct bftw_state *state, struct bftw_file *file) { + // To avoid surprising users too much, process the roots in order + if (file->depth == 0) { return false; } - while (file->ioqueued) { - bftw_ioq_pop(state, true); +#ifdef S_IFWHT + // ioq_stat() does not do whiteout emulation like bftw_stat_impl() + if (file->type == BFS_WHT) { + return false; } +#endif - state->file = file; - return true; + return bftw_must_stat(state, file->depth, file->type, file->name); +} + +/** Call stat() on files that need it. */ +static void bftw_stat_files(struct bftw_state *state) { + while (true) { + struct bftw_file *file = bftw_queue_waiting(&state->fileq); + if (!file) { + break; + } + + if (!bftw_should_ioq_stat(state, file)) { + bftw_queue_skip(&state->fileq, file); + continue; + } + + if (!bftw_queue_balanced(&state->fileq)) { + break; + } + + if (bftw_ioq_stat(state, file) == 0) { + bftw_queue_detach(&state->fileq, file, true); + } else { + break; + } + } +} + +/** Push a file onto the queue. */ +static void bftw_push_file(struct bftw_state *state, struct bftw_file *file) { + bftw_queue_push(&state->fileq, file); + bftw_stat_files(state); } /** Pop a file to visit from the queue. */ static bool bftw_pop_file(struct bftw_state *state) { bfs_assert(!state->file); - state->file = SLIST_POP(&state->to_visit); - return state->file; + return bftw_pop(state, &state->fileq); +} + +/** Add a path component to the path. */ +static void bftw_prepend_path(char *path, size_t nameoff, size_t namelen, const char *name) { + if (nameoff > 0) { + path[nameoff - 1] = '/'; + } + memcpy(path + nameoff, name, namelen); } /** Build the path to the current file. */ static int bftw_build_path(struct bftw_state *state, const char *name) { const struct bftw_file *file = state->file; - size_t pathlen = file ? file->nameoff + file->namelen : 0; + size_t nameoff, namelen; + if (name) { + nameoff = file ? bftw_child_nameoff(file) : 0; + namelen = strlen(name); + } else { + nameoff = file->nameoff; + namelen = file->namelen; + } + + size_t pathlen = nameoff + namelen; if (dstresize(&state->path, pathlen) != 0) { state->error = errno; return -1; @@ -907,11 +1571,11 @@ static int bftw_build_path(struct bftw_state *state, const char *name) { } // Build the path backwards + if (name) { + bftw_prepend_path(state->path, nameoff, namelen, name); + } while (file && file != ancestor) { - if (file->nameoff > 0) { - state->path[file->nameoff - 1] = '/'; - } - memcpy(state->path + file->nameoff, file->name, file->namelen); + bftw_prepend_path(state->path, file->nameoff, file->namelen, file->name); if (ancestor && ancestor->depth == file->depth) { ancestor = ancestor->parent; @@ -920,20 +1584,6 @@ static int bftw_build_path(struct bftw_state *state, const char *name) { } state->previous = state->file; - - if (name) { - if (pathlen > 0 && state->path[pathlen - 1] != '/') { - if (dstrapp(&state->path, '/') != 0) { - state->error = errno; - return -1; - } - } - if (dstrcat(&state->path, name) != 0) { - state->error = errno; - return -1; - } - } - return 0; } @@ -945,12 +1595,12 @@ static struct bfs_dir *bftw_file_opendir(struct bftw_state *state, struct bftw_f } struct bftw_cache *cache = &state->cache; - struct bfs_dir *dir = bftw_allocdir(cache); + struct bfs_dir *dir = bftw_allocdir(cache, true); if (!dir) { return NULL; } - if (bfs_opendir(dir, fd, NULL) != 0) { + if (bfs_opendir(dir, fd, NULL, state->dir_flags) != 0) { bftw_freedir(cache, dir); return NULL; } @@ -969,18 +1619,23 @@ static int bftw_opendir(struct bftw_state *state) { struct bftw_file *file = state->file; state->dir = file->dir; if (state->dir) { - return 0; + goto pin; } if (bftw_build_path(state, NULL) != 0) { return -1; } + bftw_queue_rebalance(&state->dirq, false); + state->dir = bftw_file_opendir(state, file, state->path); if (!state->dir) { state->direrror = errno; + return 0; } +pin: + bftw_cache_pin(&state->cache, file); return 0; } @@ -1003,47 +1658,6 @@ static int bftw_readdir(struct bftw_state *state) { return ret; } -/** Check if a stat() call is needed for this visit. */ -static bool bftw_need_stat(const struct bftw_state *state) { - if (state->flags & BFTW_STAT) { - return true; - } - - const struct BFTW *ftwbuf = &state->ftwbuf; - if (ftwbuf->type == BFS_UNKNOWN) { - return true; - } - - if (ftwbuf->type == BFS_LNK && !(ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { - return true; - } - - if (ftwbuf->type == BFS_DIR) { - if (state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS)) { - return true; - } -#if __linux__ - } else if (state->mtab) { - // Linux fills in d_type from the underlying inode, even when - // the directory entry is a bind mount point. In that case, we - // need to stat() to get the correct type. We don't need to - // check for directories because they can only be mounted over - // by other directories. - if (bfs_might_be_mount(state->mtab, ftwbuf->path)) { - return true; - } -#endif - } - - return false; -} - -/** Initialize bftw_stat cache. */ -static void bftw_stat_init(struct bftw_stat *cache) { - cache->buf = NULL; - cache->error = 0; -} - /** Open a file if necessary. */ static int bftw_ensure_open(struct bftw_state *state, struct bftw_file *file, const char *path) { int ret = file->fd; @@ -1073,11 +1687,10 @@ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { ftwbuf->visit = visit; ftwbuf->type = BFS_UNKNOWN; ftwbuf->error = state->direrror; + ftwbuf->loopoff = 0; ftwbuf->at_fd = AT_FDCWD; ftwbuf->at_path = ftwbuf->path; - ftwbuf->stat_flags = BFS_STAT_NOFOLLOW; - bftw_stat_init(&ftwbuf->lstat_cache); - bftw_stat_init(&ftwbuf->stat_cache); + bftw_stat_init(&ftwbuf->stat_bufs, &state->stat_buf, &state->lstat_buf); struct bftw_file *parent = NULL; if (de) { @@ -1090,6 +1703,7 @@ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { ftwbuf->depth = file->depth; ftwbuf->type = file->type; ftwbuf->nameoff = file->nameoff; + bftw_stat_fill(&ftwbuf->stat_bufs, &file->stat_bufs); } if (parent) { @@ -1107,22 +1721,15 @@ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { ftwbuf->nameoff = xbaseoff(ftwbuf->path); } + ftwbuf->stat_flags = bftw_stat_flags(state, ftwbuf->depth); + if (ftwbuf->error != 0) { ftwbuf->type = BFS_ERROR; return; } - int follow_flags = BFTW_FOLLOW_ALL; - if (ftwbuf->depth == 0) { - follow_flags |= BFTW_FOLLOW_ROOTS; - } - bool follow = state->flags & follow_flags; - if (follow) { - ftwbuf->stat_flags = BFS_STAT_TRYFOLLOW; - } - const struct bfs_stat *statbuf = NULL; - if (bftw_need_stat(state)) { + if (bftw_must_stat(state, ftwbuf->depth, ftwbuf->type, ftwbuf->path + ftwbuf->nameoff)) { statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (statbuf) { ftwbuf->type = bfs_mode_to_type(statbuf->mode); @@ -1138,6 +1745,7 @@ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { if (ancestor->dev == statbuf->dev && ancestor->ino == statbuf->ino) { ftwbuf->type = BFS_ERROR; ftwbuf->error = ELOOP; + ftwbuf->loopoff = ancestor->nameoff + ancestor->namelen; return; } } @@ -1161,6 +1769,11 @@ static bool bftw_is_mount(struct bftw_state *state, const char *name) { return statbuf && statbuf->dev != parent->dev; } +/** Check if bfs_stat() was called from the main thread. */ +static bool bftw_stat_was_sync(const struct bftw_state *state, const struct bfs_stat *buf) { + return buf == &state->stat_buf || buf == &state->lstat_buf; +} + /** Invoke the callback. */ static enum bftw_action bftw_call_back(struct bftw_state *state, const char *name, enum bftw_visit visit) { if (visit == BFTW_POST && !(state->flags & BFTW_POST_ORDER)) { @@ -1180,31 +1793,43 @@ static enum bftw_action bftw_call_back(struct bftw_state *state, const char *nam return BFTW_STOP; } + enum bftw_action ret = BFTW_PRUNE; if ((state->flags & BFTW_SKIP_MOUNTS) && bftw_is_mount(state, name)) { - return BFTW_PRUNE; + goto done; } - enum bftw_action ret = state->callback(ftwbuf, state->ptr); + ret = state->callback(ftwbuf, state->ptr); switch (ret) { case BFTW_CONTINUE: - if (visit != BFTW_PRE) { - return BFTW_PRUNE; - } - if (ftwbuf->type != BFS_DIR) { - return BFTW_PRUNE; - } - if ((state->flags & BFTW_PRUNE_MOUNTS) && bftw_is_mount(state, name)) { - return BFTW_PRUNE; + if (visit != BFTW_PRE || ftwbuf->type != BFS_DIR) { + ret = BFTW_PRUNE; + } else if (state->flags & BFTW_PRUNE_MOUNTS) { + if (bftw_is_mount(state, name)) { + ret = BFTW_PRUNE; + } } - fallthru; + break; + case BFTW_PRUNE: case BFTW_STOP: - return ret; + break; default: state->error = EINVAL; return BFTW_STOP; } + +done: + if (state->fileq.flags & BFTW_QBALANCE) { + // Detect any main-thread stat() calls to rebalance the queue + const struct bfs_stat *buf = bftw_cached_stat(ftwbuf, BFS_STAT_FOLLOW); + const struct bfs_stat *lbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW); + if (bftw_stat_was_sync(state, buf) || bftw_stat_was_sync(state, lbuf)) { + bftw_queue_rebalance(&state->fileq, false); + } + } + + return ret; } /** @@ -1228,9 +1853,13 @@ static int bftw_gc(struct bftw_state *state, enum bftw_gc_flags flags) { int ret = 0; struct bftw_file *file = state->file; - if (file && file->dir) { - bftw_cache_unpin(&state->cache, file); - SLIST_APPEND(&state->to_close, file); + if (file) { + if (state->dir) { + bftw_cache_unpin(&state->cache, file); + } + if (file->dir) { + bftw_delayed_unwrap(state, file); + } } state->dir = NULL; state->de = NULL; @@ -1247,8 +1876,8 @@ static int bftw_gc(struct bftw_state *state, enum bftw_gc_flags flags) { } state->direrror = 0; - while ((file = SLIST_POP(&state->to_close))) { - bftw_unwrapdir(state, file); + drain_slist (struct bftw_file, dead, &state->to_close, ready) { + bftw_unwrapdir(state, dead); } enum bftw_gc_flags visit = BFTW_VISIT_FILE; @@ -1303,7 +1932,7 @@ static void bftw_list_sort(struct bftw_list *list) { bftw_list_sort(&right); // Merge - while (left.head && right.head) { + while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) { struct bftw_file *lf = left.head; struct bftw_file *rf = right.head; @@ -1319,16 +1948,20 @@ static void bftw_list_sort(struct bftw_list *list) { SLIST_EXTEND(list, &right); } -/** Finish adding a batch of files. */ -static void bftw_batch_finish(struct bftw_state *state) { +/** Flush all the queue buffers. */ +static void bftw_flush(struct bftw_state *state) { if (state->flags & BFTW_SORT) { - bftw_list_sort(&state->batch); + bftw_list_sort(&state->fileq.buffer); } + bftw_queue_flush(&state->fileq); + bftw_stat_files(state); - if (state->strategy != BFTW_BFS) { - SLIST_EXTEND(&state->batch, &state->to_visit); + bftw_queue_flush(&state->dirq); + bftw_ioq_opendirs(state); + + if (state->ioq) { + ioq_submit(state->ioq); } - SLIST_EXTEND(&state->to_visit, &state->batch); } /** Close the current directory. */ @@ -1337,7 +1970,7 @@ static int bftw_closedir(struct bftw_state *state) { return -1; } - bftw_batch_finish(state); + bftw_flush(state); return 0; } @@ -1345,22 +1978,46 @@ static int bftw_closedir(struct bftw_state *state) { static void bftw_save_ftwbuf(struct bftw_file *file, const struct BFTW *ftwbuf) { file->type = ftwbuf->type; - const struct bfs_stat *statbuf = ftwbuf->stat_cache.buf; - if (!statbuf || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { - statbuf = ftwbuf->lstat_cache.buf; - } + const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, ftwbuf->stat_flags); if (statbuf) { file->dev = statbuf->dev; file->ino = statbuf->ino; } } +/** Check if we should buffer a file instead of visiting it. */ +static bool bftw_buffer_file(const struct bftw_state *state, const struct bftw_file *file, const char *name) { + if (!name) { + // Already buffered + return false; + } + + if (state->flags & BFTW_BUFFER) { + return true; + } + + // If we need to call stat(), and can do it async, buffer this file + if (!state->ioq) { + return false; + } + + if (!bftw_queue_balanced(&state->fileq)) { + // stat() would run synchronously anyway + return false; + } + + size_t depth = file ? file->depth + 1 : 1; + enum bfs_type type = state->de ? state->de->type : BFS_UNKNOWN; + return bftw_must_stat(state, depth, type, name); +} + /** Visit and/or enqueue the current file. */ static int bftw_visit(struct bftw_state *state, const char *name) { + struct bftw_cache *cache = &state->cache; struct bftw_file *file = state->file; - if (name && (state->flags & BFTW_BUFFER)) { - file = bftw_file_new(&state->cache, file, name); + if (bftw_buffer_file(state, file, name)) { + file = bftw_file_new(cache, file, name); if (!file) { state->error = errno; return -1; @@ -1370,14 +2027,14 @@ static int bftw_visit(struct bftw_state *state, const char *name) { file->type = state->de->type; } - SLIST_APPEND(&state->batch, file); + bftw_push_file(state, file); return 0; } switch (bftw_call_back(state, name, BFTW_PRE)) { case BFTW_CONTINUE: if (name) { - file = bftw_file_new(&state->cache, state->file, name); + file = bftw_file_new(cache, state->file, name); } else { state->file = NULL; } @@ -1387,6 +2044,7 @@ static int bftw_visit(struct bftw_state *state, const char *name) { } bftw_save_ftwbuf(file, &state->ftwbuf); + bftw_stat_recycle(cache, file); bftw_push_dir(state, file); return 0; @@ -1398,10 +2056,22 @@ static int bftw_visit(struct bftw_state *state, const char *name) { } default: + if (file && !name) { + bftw_gc(state, BFTW_VISIT_NONE); + } return -1; } } +/** Drain a bftw_queue. */ +static void bftw_drain(struct bftw_state *state, struct bftw_queue *queue) { + bftw_queue_flush(queue); + + while (bftw_pop(state, queue)) { + bftw_gc(state, BFTW_VISIT_NONE); + } +} + /** * Dispose of the bftw() state. * @@ -1418,10 +2088,9 @@ static int bftw_state_destroy(struct bftw_state *state) { state->ioq = NULL; } - SLIST_EXTEND(&state->to_visit, &state->batch); - do { - bftw_gc(state, BFTW_VISIT_NONE); - } while (bftw_pop_dir(state) || bftw_pop_file(state)); + bftw_gc(state, BFTW_VISIT_NONE); + bftw_drain(state, &state->dirq); + bftw_drain(state, &state->fileq); ioq_destroy(ioq); @@ -1440,7 +2109,7 @@ static int bftw_impl(struct bftw_state *state) { return -1; } } - bftw_batch_finish(state); + bftw_flush(state); while (true) { while (bftw_pop_dir(state)) { @@ -1461,8 +2130,9 @@ static int bftw_impl(struct bftw_state *state) { break; } if (bftw_visit(state, NULL) != 0) { - break; + return -1; } + bftw_flush(state); } return 0; @@ -10,6 +10,7 @@ #include "dir.h" #include "stat.h" + #include <stddef.h> /** @@ -26,12 +27,14 @@ enum bftw_visit { * Cached bfs_stat() info for a file. */ struct bftw_stat { - /** A pointer to the bfs_stat() buffer, if available. */ - const struct bfs_stat *buf; - /** Storage for the bfs_stat() buffer, if needed. */ - struct bfs_stat storage; - /** The cached error code, if any. */ - int error; + /** The bfs_stat(BFS_STAT_FOLLOW) buffer. */ + const struct bfs_stat *stat_buf; + /** The bfs_stat(BFS_STAT_NOFOLLOW) buffer. */ + const struct bfs_stat *lstat_buf; + /** The cached bfs_stat(BFS_STAT_FOLLOW) error. */ + int stat_err; + /** The cached bfs_stat(BFS_STAT_NOFOLLOW) error. */ + int lstat_err; }; /** @@ -52,8 +55,10 @@ struct BFTW { /** The file type. */ enum bfs_type type; - /** The errno that occurred, if type == BFTW_ERROR. */ + /** The errno that occurred, if type == BFS_ERROR. */ int error; + /** For filesystem loops, the length of the loop prefix. */ + size_t loopoff; /** A parent file descriptor for the *at() family of calls. */ int at_fd; @@ -62,19 +67,17 @@ struct BFTW { /** Flags for bfs_stat(). */ enum bfs_stat_flags stat_flags; - /** Cached bfs_stat() info for BFS_STAT_NOFOLLOW. */ - struct bftw_stat lstat_cache; - /** Cached bfs_stat() info for BFS_STAT_FOLLOW. */ - struct bftw_stat stat_cache; + /** Cached bfs_stat() info. */ + struct bftw_stat stat_bufs; }; /** * Get bfs_stat() info for a file encountered during bftw(), caching the result * whenever possible. * - * @param ftwbuf + * @ftwbuf * bftw() data for the file to stat. - * @param flags + * @flags * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags. * @return * A pointer to a bfs_stat() buffer, or NULL if the call failed. @@ -85,9 +88,9 @@ const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags * Get bfs_stat() info for a file encountered during bftw(), if it has already * been cached. * - * @param ftwbuf + * @ftwbuf * bftw() data for the file to stat. - * @param flags + * @flags * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags. * @return * A pointer to a bfs_stat() buffer, or NULL if no stat info is cached. @@ -99,12 +102,12 @@ const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat * whether to follow links. This function will avoid calling bfs_stat() if * possible. * - * @param ftwbuf + * @ftwbuf * bftw() data for the file to check. - * @param flags + * @flags * flags for bfs_stat(). Pass ftwbuf->stat_flags for the default flags. * @return - * The type of the file, or BFTW_ERROR if an error occurred. + * The type of the file, or BFS_ERROR if an error occurred. */ enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags); @@ -123,9 +126,9 @@ enum bftw_action { /** * Callback function type for bftw(). * - * @param ftwbuf + * @ftwbuf * Data about the current file. - * @param ptr + * @ptr * The pointer passed to bftw(). * @return * An action value. @@ -156,6 +159,8 @@ enum bftw_flags { BFTW_SORT = 1 << 8, /** Read each directory into memory before processing its children. */ BFTW_BUFFER = 1 << 9, + /** Include whiteouts in the search results. */ + BFTW_WHITEOUTS = 1 << 10, }; /** @@ -180,18 +185,22 @@ struct bftw_args { const char **paths; /** The number of starting paths. */ size_t npaths; + /** The callback to invoke. */ bftw_callback *callback; /** A pointer which is passed to the callback. */ void *ptr; + /** The maximum number of file descriptors to keep open. */ int nopenfd; /** The maximum number of threads to use. */ int nthreads; + /** Flags that control bftw() behaviour. */ enum bftw_flags flags; /** The search strategy to use. */ enum bftw_strategy strategy; + /** The parsed mount table, if available. */ const struct bfs_mtab *mtab; }; @@ -202,7 +211,7 @@ struct bftw_args { * Like ftw(3) and nftw(3), this function walks a directory tree recursively, * and invokes a callback for each path it encounters. * - * @param args + * @args * The arguments that control the walk. * @return * 0 on success, or -1 on failure. @@ -8,11 +8,12 @@ #ifndef BFS_BIT_H #define BFS_BIT_H -#include "config.h" +#include "bfs.h" + #include <limits.h> #include <stdint.h> -#if __STDC_VERSION__ >= 202311L +#if __has_include(<stdbit.h>) # include <stdbit.h> #endif @@ -53,56 +54,101 @@ #ifndef CHAR_WIDTH # define CHAR_WIDTH CHAR_BIT #endif + +// See https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + +#ifndef USHRT_WIDTH +# ifdef __SHRT_WIDTH__ +# define USHRT_WIDTH __SHRT_WIDTH__ +# else +# define USHRT_WIDTH UMAX_WIDTH(USHRT_MAX) +# endif +#endif + +#ifndef UINT_WIDTH +# ifdef __INT_WIDTH__ +# define UINT_WIDTH __INT_WIDTH__ +# else +# define UINT_WIDTH UMAX_WIDTH(UINT_MAX) +# endif +#endif + +#ifndef ULONG_WIDTH +# ifdef __LONG_WIDTH__ +# define ULONG_WIDTH __LONG_WIDTH__ +# else +# define ULONG_WIDTH UMAX_WIDTH(ULONG_MAX) +# endif +#endif + +#ifndef ULLONG_WIDTH +# ifdef __LONG_LONG_WIDTH__ +# define ULLONG_WIDTH __LONG_LONG_WIDTH__ +# elif defined(__LLONG_WIDTH__) // Clang +# define ULLONG_WIDTH __LLONG_WIDTH__ +# else +# define ULLONG_WIDTH UMAX_WIDTH(ULLONG_MAX) +# endif +#endif + +#ifndef SIZE_WIDTH +# ifdef __SIZE_WIDTH__ +# define SIZE_WIDTH __SIZE_WIDTH__ +# else +# define SIZE_WIDTH UMAX_WIDTH(SIZE_MAX) +# endif +#endif + +#ifndef PTRDIFF_WIDTH +# ifdef __PTRDIFF_WIDTH__ +# define PTRDIFF_WIDTH __PTRDIFF_WIDTH__ +# else +# define PTRDIFF_WIDTH UMAX_WIDTH(PTRDIFF_MAX) +# endif +#endif + +#ifndef UINTPTR_WIDTH +# ifdef __INTPTR_WIDTH__ +# define UINTPTR_WIDTH __INTPTR_WIDTH__ +# else +# define UINTPTR_WIDTH UMAX_WIDTH(UINTPTR_MAX) +# endif +#endif + +#ifndef UINTMAX_WIDTH +# ifdef __INTMAX_WIDTH__ +# define UINTMAX_WIDTH __INTMAX_WIDTH__ +# else +# define UINTMAX_WIDTH UMAX_WIDTH(UINTMAX_MAX) +# endif +#endif + #ifndef UCHAR_WIDTH # define UCHAR_WIDTH CHAR_WIDTH #endif #ifndef SCHAR_WIDTH # define SCHAR_WIDTH CHAR_WIDTH #endif -#ifndef USHRT_WIDTH -# define USHRT_WIDTH UMAX_WIDTH(USHRT_MAX) -#endif #ifndef SHRT_WIDTH # define SHRT_WIDTH USHRT_WIDTH #endif -#ifndef UINT_WIDTH -# define UINT_WIDTH UMAX_WIDTH(UINT_MAX) -#endif #ifndef INT_WIDTH # define INT_WIDTH UINT_WIDTH #endif -#ifndef ULONG_WIDTH -# define ULONG_WIDTH UMAX_WIDTH(ULONG_MAX) -#endif #ifndef LONG_WIDTH # define LONG_WIDTH ULONG_WIDTH #endif -#ifndef ULLONG_WIDTH -# define ULLONG_WIDTH UMAX_WIDTH(ULLONG_MAX) -#endif #ifndef LLONG_WIDTH # define LLONG_WIDTH ULLONG_WIDTH #endif -#ifndef SIZE_WIDTH -# define SIZE_WIDTH UMAX_WIDTH(SIZE_MAX) -#endif -#ifndef PTRDIFF_WIDTH -# define PTRDIFF_WIDTH (UMAX_WIDTH(PTRDIFF_MAX) + 1) -#endif -#ifndef UINTPTR_WIDTH -# define UINTPTR_WIDTH UMAX_WIDTH(UINTPTR_MAX) -#endif #ifndef INTPTR_WIDTH # define INTPTR_WIDTH UINTPTR_WIDTH #endif -#ifndef UINTMAX_WIDTH -# define UINTMAX_WIDTH UMAX_WIDTH(UINTMAX_MAX) -#endif #ifndef INTMAX_WIDTH # define INTMAX_WIDTH UINTMAX_WIDTH #endif -// C23 polyfill: byte order +// N3022 polyfill: byte order #ifdef __STDC_ENDIAN_LITTLE__ # define ENDIAN_LITTLE __STDC_ENDIAN_LITTLE__ @@ -122,49 +168,65 @@ #ifdef __STDC_ENDIAN_NATIVE__ # define ENDIAN_NATIVE __STDC_ENDIAN_NATIVE__ -#elif defined(__ORDER_NATIVE_ENDIAN__) -# define ENDIAN_NATIVE __ORDER_NATIVE_ENDIAN__ +#elif defined(__BYTE_ORDER__) +# define ENDIAN_NATIVE __BYTE_ORDER__ #else # define ENDIAN_NATIVE 0 #endif -#if __STDC_VERSION__ >= 202311L -# define bswap16 stdc_memreverse8u16 -# define bswap32 stdc_memreverse8u32 -# define bswap64 stdc_memreverse8u64 -#elif __GNUC__ -# define bswap16 __builtin_bswap16 -# define bswap32 __builtin_bswap32 -# define bswap64 __builtin_bswap64 +#if __GNUC__ +# define bswap_u16 __builtin_bswap16 +# define bswap_u32 __builtin_bswap32 +# define bswap_u64 __builtin_bswap64 #else -static inline uint16_t bswap16(uint16_t n) { +static inline uint16_t bswap_u16(uint16_t n) { return (n << 8) | (n >> 8); } -static inline uint32_t bswap32(uint32_t n) { - return ((uint32_t)bswap16(n) << 16) | bswap16(n >> 16); +static inline uint32_t bswap_u32(uint32_t n) { + return ((uint32_t)bswap_u16(n) << 16) | bswap_u16(n >> 16); } -static inline uint64_t bswap64(uint64_t n) { - return ((uint64_t)bswap32(n) << 32) | bswap32(n >> 32); +static inline uint64_t bswap_u64(uint64_t n) { + return ((uint64_t)bswap_u32(n) << 32) | bswap_u32(n >> 32); } #endif -static inline uint8_t bswap8(uint8_t n) { +static inline uint8_t bswap_u8(uint8_t n) { return n; } -/** - * Reverse the byte order of an integer. - */ -#define bswap(n) \ - _Generic((n), \ - uint8_t: bswap8, \ - uint16_t: bswap16, \ - uint32_t: bswap32, \ - uint64_t: bswap64)(n) +#if UCHAR_WIDTH == 8 +# define bswap_uc bswap_u8 +#endif + +#if USHRT_WIDTH == 16 +# define bswap_us bswap_u16 +#elif USHRT_WIDTH == 32 +# define bswap_us bswap_u32 +#elif USHRT_WIDTH == 64 +# define bswap_us bswap_u64 +#endif + +#if UINT_WIDTH == 16 +# define bswap_ui bswap_u16 +#elif UINT_WIDTH == 32 +# define bswap_ui bswap_u32 +#elif UINT_WIDTH == 64 +# define bswap_ui bswap_u64 +#endif + +#if ULONG_WIDTH == 32 +# define bswap_ul bswap_u32 +#elif ULONG_WIDTH == 64 +# define bswap_ul bswap_u64 +#endif + +#if ULLONG_WIDTH == 64 +# define bswap_ull bswap_u64 +#endif // Define an overload for each unsigned type #define UINT_OVERLOADS(macro) \ @@ -177,25 +239,74 @@ static inline uint8_t bswap8(uint8_t n) { // Select an overload based on an unsigned integer type #define UINT_SELECT(n, name) \ _Generic((n), \ - char: name##_uc, \ - signed char: name##_uc, \ unsigned char: name##_uc, \ - signed short: name##_us, \ unsigned short: name##_us, \ - signed int: name##_ui, \ unsigned int: name##_ui, \ - signed long: name##_ul, \ unsigned long: name##_ul, \ - signed long long: name##_ull, \ unsigned long long: name##_ull) +/** + * Reverse the byte order of an integer. + */ +#define bswap(n) UINT_SELECT(n, bswap)(n) + +#define LOAD8_LEU8(ptr, i, n) ((uint##n##_t)((const unsigned char *)ptr)[(i) / 8] << (i)) +#define LOAD8_BEU8(ptr, i, n) ((uint##n##_t)((const unsigned char *)ptr)[(i) / 8] << (n - (i) - 8)) + +/** Load a little-endian 8-bit word. */ +static inline uint8_t load8_leu8(const void *ptr) { + return LOAD8_LEU8(ptr, 0, 8); +} + +/** Load a big-endian 8-bit word. */ +static inline uint8_t load8_beu8(const void *ptr) { + return LOAD8_BEU8(ptr, 0, 8); +} + +#define LOAD8_LEU16(ptr, i, n) (LOAD8_LEU8(ptr, i, n) | LOAD8_LEU8(ptr, i + 8, n)) +#define LOAD8_BEU16(ptr, i, n) (LOAD8_BEU8(ptr, i, n) | LOAD8_BEU8(ptr, i + 8, n)) + +/** Load a little-endian 16-bit word. */ +static inline uint16_t load8_leu16(const void *ptr) { + return LOAD8_LEU16(ptr, 0, 16); +} + +/** Load a big-endian 16-bit word. */ +static inline uint16_t load8_beu16(const void *ptr) { + return LOAD8_BEU16(ptr, 0, 16); +} + +#define LOAD8_LEU32(ptr, i, n) (LOAD8_LEU16(ptr, i, n) | LOAD8_LEU16(ptr, i + 16, n)) +#define LOAD8_BEU32(ptr, i, n) (LOAD8_BEU16(ptr, i, n) | LOAD8_BEU16(ptr, i + 16, n)) + +/** Load a little-endian 32-bit word. */ +static inline uint32_t load8_leu32(const void *ptr) { + return LOAD8_LEU32(ptr, 0, 32); +} + +/** Load a big-endian 32-bit word. */ +static inline uint32_t load8_beu32(const void *ptr) { + return LOAD8_BEU32(ptr, 0, 32); +} + +#define LOAD8_LEU64(ptr, i, n) (LOAD8_LEU32(ptr, i, n) | LOAD8_LEU32(ptr, i + 32, n)) +#define LOAD8_BEU64(ptr, i, n) (LOAD8_BEU32(ptr, i, n) | LOAD8_BEU32(ptr, i + 32, n)) + +/** Load a little-endian 64-bit word. */ +static inline uint64_t load8_leu64(const void *ptr) { + return LOAD8_LEU64(ptr, 0, 64); +} + +/** Load a big-endian 64-bit word. */ +static inline uint64_t load8_beu64(const void *ptr) { + return LOAD8_BEU64(ptr, 0, 64); +} + // C23 polyfill: bit utilities -#if __STDC_VERSION__ >= 202311L +#if __STDC_VERSION_STDBIT_H__ >= C23 # define count_ones stdc_count_ones # define count_zeros stdc_count_zeros -# define rotate_left stdc_rotate_left -# define rotate_right stdc_rotate_right # define leading_zeros stdc_leading_zeros # define leading_ones stdc_leading_ones # define trailing_zeros stdc_trailing_zeros @@ -228,31 +339,31 @@ static inline uint8_t bswap8(uint8_t n) { #define BUILTIN_WIDTH(suffix) BUILTIN_WIDTH##suffix #define COUNT_ONES(type, suffix, width) \ - static inline int count_ones##suffix(type n) { \ + static inline unsigned int count_ones##suffix(type n) { \ return UINT_BUILTIN(popcount, suffix)(n); \ } #define LEADING_ZEROS(type, suffix, width) \ - static inline int leading_zeros##suffix(type n) { \ + static inline unsigned int leading_zeros##suffix(type n) { \ return n \ ? UINT_BUILTIN(clz, suffix)(n) - (BUILTIN_WIDTH(suffix) - width) \ : width; \ } #define TRAILING_ZEROS(type, suffix, width) \ - static inline int trailing_zeros##suffix(type n) { \ + static inline unsigned int trailing_zeros##suffix(type n) { \ return n ? UINT_BUILTIN(ctz, suffix)(n) : (int)width; \ } #define FIRST_TRAILING_ONE(type, suffix, width) \ - static inline int first_trailing_one##suffix(type n) { \ + static inline unsigned int first_trailing_one##suffix(type n) { \ return UINT_BUILTIN(ffs, suffix)(n); \ } #else // !__GNUC__ #define COUNT_ONES(type, suffix, width) \ - static inline int count_ones##suffix(type n) { \ + static inline unsigned int count_ones##suffix(type n) { \ int ret; \ for (ret = 0; n; ++ret) { \ n &= n - 1; \ @@ -261,7 +372,7 @@ static inline uint8_t bswap8(uint8_t n) { } #define LEADING_ZEROS(type, suffix, width) \ - static inline int leading_zeros##suffix(type n) { \ + static inline unsigned int leading_zeros##suffix(type n) { \ type bit = (type)1 << (width - 1); \ int ret; \ for (ret = 0; bit && !(n & bit); ++ret, bit >>= 1); \ @@ -269,7 +380,7 @@ static inline uint8_t bswap8(uint8_t n) { } #define TRAILING_ZEROS(type, suffix, width) \ - static inline int trailing_zeros##suffix(type n) { \ + static inline unsigned int trailing_zeros##suffix(type n) { \ type bit = 1; \ int ret; \ for (ret = 0; bit && !(n & bit); ++ret, bit <<= 1); \ @@ -277,7 +388,7 @@ static inline uint8_t bswap8(uint8_t n) { } #define FIRST_TRAILING_ONE(type, suffix, width) \ - static inline int first_trailing_one##suffix(type n) { \ + static inline unsigned int first_trailing_one##suffix(type n) { \ return n ? trailing_zeros##suffix(n) + 1 : 0; \ } @@ -288,37 +399,41 @@ UINT_OVERLOADS(LEADING_ZEROS) UINT_OVERLOADS(TRAILING_ZEROS) UINT_OVERLOADS(FIRST_TRAILING_ONE) -#define ROTATE_LEFT(type, suffix, width) \ - static inline type rotate_left##suffix(type n, int c) { \ - return (n << c) | (n >> ((width - c) % width)); \ +#define FIRST_LEADING_ONE(type, suffix, width) \ + static inline unsigned int first_leading_one##suffix(type n) { \ + return n ? leading_zeros##suffix(n) + 1 : 0; \ } -#define ROTATE_RIGHT(type, suffix, width) \ - static inline type rotate_right##suffix(type n, int c) { \ - return (n >> c) | (n << ((width - c) % width)); \ +#define HAS_SINGLE_BIT(type, suffix, width) \ + static inline bool has_single_bit##suffix(type n) { \ + /** Branchless n && !(n & (n - 1)) */ \ + return n - 1 < (n ^ (n - 1)); \ } -#define FIRST_LEADING_ONE(type, suffix, width) \ - static inline int first_leading_one##suffix(type n) { \ +#define BIT_WIDTH(type, suffix, width) \ + static inline unsigned int bit_width##suffix(type n) { \ return width - leading_zeros##suffix(n); \ } -#define HAS_SINGLE_BIT(type, suffix, width) \ - static inline bool has_single_bit##suffix(type n) { \ - return n && !(n & (n - 1)); \ +#define BIT_FLOOR(type, suffix, width) \ + static inline type bit_floor##suffix(type n) { \ + return n ? (type)1 << (bit_width##suffix(n) - 1) : 0; \ + } + +#define BIT_CEIL(type, suffix, width) \ + static inline type bit_ceil##suffix(type n) { \ + return (type)1 << bit_width##suffix(n - !!n); \ } -UINT_OVERLOADS(ROTATE_LEFT) -UINT_OVERLOADS(ROTATE_RIGHT) UINT_OVERLOADS(FIRST_LEADING_ONE) UINT_OVERLOADS(HAS_SINGLE_BIT) +UINT_OVERLOADS(BIT_WIDTH) +UINT_OVERLOADS(BIT_FLOOR) +UINT_OVERLOADS(BIT_CEIL) #define count_ones(n) UINT_SELECT(n, count_ones)(n) #define count_zeros(n) UINT_SELECT(n, count_ones)(~(n)) -#define rotate_left(n, c) UINT_SELECT(n, rotate_left)(n, c) -#define rotate_right(n, c) UINT_SELECT(n, rotate_right)(n, c) - #define leading_zeros(n) UINT_SELECT(n, leading_zeros)(n) #define leading_ones(n) UINT_SELECT(n, leading_zeros)(~(n)) @@ -333,23 +448,26 @@ UINT_OVERLOADS(HAS_SINGLE_BIT) #define has_single_bit(n) UINT_SELECT(n, has_single_bit)(n) -#define BIT_FLOOR(type, suffix, width) \ - static inline type bit_floor##suffix(type n) { \ - return n ? (type)1 << (first_leading_one##suffix(n) - 1) : 0; \ - } +#define bit_width(n) UINT_SELECT(n, bit_width)(n) +#define bit_floor(n) UINT_SELECT(n, bit_floor)(n) +#define bit_ceil(n) UINT_SELECT(n, bit_ceil)(n) -#define BIT_CEIL(type, suffix, width) \ - static inline type bit_ceil##suffix(type n) { \ - return (type)1 << first_leading_one##suffix(n - !!n); \ +#endif // __STDC_VERSION_STDBIT_H__ < C23 + +#define ROTATE_LEFT(type, suffix, width) \ + static inline type rotate_left##suffix(type n, int c) { \ + return (n << c) | (n >> ((width - c) % width)); \ } -UINT_OVERLOADS(BIT_FLOOR) -UINT_OVERLOADS(BIT_CEIL) +#define ROTATE_RIGHT(type, suffix, width) \ + static inline type rotate_right##suffix(type n, int c) { \ + return (n >> c) | (n << ((width - c) % width)); \ + } -#define bit_width(n) first_leading_one(n) -#define bit_floor(n) UINT_SELECT(n, bit_floor)(n) -#define bit_ceil(n) UINT_SELECT(n, bit_ceil)(n) +UINT_OVERLOADS(ROTATE_LEFT) +UINT_OVERLOADS(ROTATE_RIGHT) -#endif // __STDC_VERSION__ < 202311L +#define rotate_left(n, c) UINT_SELECT(n, rotate_left)(n, c) +#define rotate_right(n, c) UINT_SELECT(n, rotate_right)(n, c) #endif // BFS_BIT_H diff --git a/src/color.c b/src/color.c index b9a788b..588dbac 100644 --- a/src/color.c +++ b/src/color.c @@ -2,10 +2,11 @@ // SPDX-License-Identifier: 0BSD #include "color.h" + #include "alloc.h" +#include "bfs.h" #include "bfstd.h" #include "bftw.h" -#include "config.h" #include "diag.h" #include "dir.h" #include "dstring.h" @@ -13,6 +14,7 @@ #include "fsade.h" #include "stat.h" #include "trie.h" + #include <errno.h> #include <fcntl.h> #include <stdarg.h> @@ -29,7 +31,7 @@ struct esc_seq { /** The length of the escape sequence. */ size_t len; - /** The escape sequence iteself, without a terminating NUL. */ + /** The escape sequence itself, without a terminating NUL. */ char seq[]; }; @@ -141,13 +143,7 @@ static int init_esc(struct colors *colors, const char *name, const char *value, *field = esc; - struct trie_leaf *leaf = trie_insert_str(&colors->names, name); - if (leaf) { - leaf->value = field; - return 0; - } else { - return -1; - } + return trie_set_str(&colors->names, name, field); } /** Check if an escape sequence is equal to a string. */ @@ -157,12 +153,16 @@ static bool esc_eq(const struct esc_seq *esc, const char *str, size_t len) { /** Get an escape sequence from the table. */ static struct esc_seq **get_esc(const struct colors *colors, const char *name) { - const struct trie_leaf *leaf = trie_find_str(&colors->names, name); - return leaf ? leaf->value : NULL; + return trie_get_str(&colors->names, name); +} + +/** Append an escape sequence to a string. */ +static int cat_esc(dchar **dstr, const struct esc_seq *seq) { + return dstrxcat(dstr, seq->seq, seq->len); } /** Set a named escape sequence. */ -static int set_esc(struct colors *colors, const char *name, char *value) { +static int set_esc(struct colors *colors, const char *name, dchar *value) { struct esc_seq **field = get_esc(colors, name); if (!field) { return 0; @@ -209,58 +209,31 @@ static void ext_tolower(char *ext, size_t len) { } } -/** - * The "smart case" algorithm. - * - * @param ext - * The current extension being added. - * @param prev - * The previous case-sensitive match, if any, for the same extension. - * @param iprev - * The previous case-insensitive match, if any, for the same extension. - * @return - * Whether this extension should become case-sensitive. - */ -static bool ext_case_sensitive(struct ext_color *ext, struct ext_color *prev, struct ext_color *iprev) { - // This is the first case-insensitive occurrence of this extension, e.g. - // - // *.gz=01;31:*.tar.gz=01;33 - if (!iprev) { - bfs_assert(!prev); - return false; - } - - // If the last version of this extension is already case-sensitive, - // this one should be too, e.g. - // - // *.tar.gz=01;31:*.TAR.GZ=01;32:*.TAR.GZ=01;33 - if (iprev->case_sensitive) { - return true; - } - - // The case matches the last occurrence exactly, e.g. - // - // *.tar.gz=01;31:*.tar.gz=01;33 - if (iprev == prev) { - return false; - } - - // Different case, but same value, e.g. - // - // *.tar.gz=01;31:*.TAR.GZ=01;31 - if (esc_eq(iprev->esc, ext->esc->seq, ext->esc->len)) { - return false; +/** Insert an extension into a trie. */ +static int insert_ext(struct trie *trie, struct ext_color *ext) { + // A later *.x should override any earlier *.x, *.y.x, etc. + struct trie_leaf *leaf; + while ((leaf = trie_find_postfix(trie, ext->ext))) { + trie_remove(trie, leaf); } - // Different case, different value, e.g. - // - // *.tar.gz=01;31:*.TAR.GZ=01;33 - return true; + size_t len = ext->len + 1; + return trie_set_mem(trie, ext->ext, len, ext); } /** Set the color for an extension. */ -static int set_ext(struct colors *colors, char *key, char *value) { +static int set_ext(struct colors *colors, dchar *key, dchar *value) { size_t len = dstrlen(key); + + // Embedded NUL bytes in extensions can lead to a non-prefix-free + // set of strings, e.g. {".gz", "\0.gz"} would be transformed to + // {"zg.\0", "zg.\0\0"} (showing the implicit terminating NUL). + // Our trie implementation only supports prefix-free key sets, but + // luckily '\0' cannot appear in filenames so we can ignore them. + if (memchr(key, '\0', len)) { + return 0; + } + struct ext_color *ext = varena_alloc(&colors->ext_arena, len + 1); if (!ext) { return -1; @@ -274,46 +247,20 @@ static int set_ext(struct colors *colors, char *key, char *value) { goto fail; } - key = memcpy(ext->ext, key, len + 1); + memcpy(ext->ext, key, len + 1); // Reverse the extension (`*.y.x` -> `x.y.*`) so we can use trie_find_prefix() - ext_reverse(key, len); - - // Find any pre-existing exact match - struct ext_color *prev = NULL; - struct trie_leaf *leaf = trie_find_str(&colors->ext_trie, key); - if (leaf) { - prev = leaf->value; - trie_remove(&colors->ext_trie, leaf); - } - - // A later *.x should override any earlier *.x, *.y.x, etc. - while ((leaf = trie_find_postfix(&colors->ext_trie, key))) { - trie_remove(&colors->ext_trie, leaf); - } + ext_reverse(ext->ext, len); // Insert the extension into the case-sensitive trie - leaf = trie_insert_str(&colors->ext_trie, key); - if (!leaf) { + if (insert_ext(&colors->ext_trie, ext) != 0) { goto fail; } - leaf->value = ext; - // "Smart case": if the same extension is given with two different - // capitalizations (e.g. `*.y.x=31:*.Y.Z=32:`), make it case-sensitive - ext_tolower(key, len); - leaf = trie_insert_str(&colors->iext_trie, key); - if (!leaf) { - goto fail; + if (colors->ext_len < len) { + colors->ext_len = len; } - struct ext_color *iprev = leaf->value; - if (ext_case_sensitive(ext, prev, iprev)) { - iprev->case_sensitive = true; - ext->case_sensitive = true; - } - leaf->value = ext; - return 0; fail: @@ -324,32 +271,83 @@ fail: return -1; } -/** Rebuild the case-insensitive trie after all extensions have been parsed. */ +/** + * The "smart case" algorithm. + * + * @ext + * The current extension being added. + * @iext + * The previous case-insensitive match, if any, for the same extension. + * @return + * Whether this extension should become case-sensitive. + */ +static bool ext_case_sensitive(struct ext_color *ext, struct ext_color *iext) { + // This is the first case-insensitive occurrence of this extension, e.g. + // + // *.gz=01;31:*.tar.gz=01;33 + if (!iext) { + return false; + } + + // If the last version of this extension is already case-sensitive, + // this one should be too, e.g. + // + // *.tar.gz=01;31:*.TAR.GZ=01;32:*.TAR.GZ=01;33 + if (iext->case_sensitive) { + return true; + } + + // Different case, but same value, e.g. + // + // *.tar.gz=01;31:*.TAR.GZ=01;31 + if (esc_eq(iext->esc, ext->esc->seq, ext->esc->len)) { + return false; + } + + // Different case, different value, e.g. + // + // *.tar.gz=01;31:*.TAR.GZ=01;33 + return true; +} + +/** Build the case-insensitive trie, after all extensions have been parsed. */ static int build_iext_trie(struct colors *colors) { - trie_clear(&colors->iext_trie); + // Find which extensions should be case-sensitive + for_trie (leaf, &colors->ext_trie) { + struct ext_color *ext = leaf->value; + + // "Smart case": if the same extension is given with two different + // capitalizations (e.g. `*.y.x=31:*.Y.Z=32:`), make it case-sensitive + ext_tolower(ext->ext, ext->len); + + size_t len = ext->len + 1; + struct trie_leaf *ileaf = trie_insert_mem(&colors->iext_trie, ext->ext, len); + if (!ileaf) { + return -1; + } - TRIE_FOR_EACH(&colors->ext_trie, leaf) { - size_t len = leaf->length - 1; - if (colors->ext_len < len) { - colors->ext_len = len; + struct ext_color *iext = ileaf->value; + if (ext_case_sensitive(ext, iext)) { + ext->case_sensitive = true; + iext->case_sensitive = true; } + ileaf->value = ext; + } + + // Rebuild the trie with only the case-insensitive ones + trie_clear(&colors->iext_trie); + + for_trie (leaf, &colors->ext_trie) { struct ext_color *ext = leaf->value; if (ext->case_sensitive) { continue; } - // set_ext() already reversed and lowercased the extension - struct trie_leaf *ileaf; - while ((ileaf = trie_find_postfix(&colors->iext_trie, ext->ext))) { - trie_remove(&colors->iext_trie, ileaf); - } - - ileaf = trie_insert_str(&colors->iext_trie, ext->ext); - if (!ileaf) { + // We already lowercased the extension above + if (insert_ext(&colors->iext_trie, ext) != 0) { return -1; } - ileaf->value = ext; } return 0; @@ -358,9 +356,8 @@ static int build_iext_trie(struct colors *colors) { /** * Find a color by an extension. */ -static const struct esc_seq *get_ext(const struct colors *colors, const char *filename) { +static const struct esc_seq *get_ext(const struct colors *colors, const char *filename, size_t name_len) { size_t ext_len = colors->ext_len; - size_t name_len = strlen(filename); if (name_len < ext_len) { ext_len = name_len; } @@ -369,7 +366,8 @@ static const struct esc_seq *get_ext(const struct colors *colors, const char *fi char buf[256]; char *copy; if (ext_len < sizeof(buf)) { - copy = memcpy(buf, suffix, ext_len + 1); + copy = memcpy(buf, suffix, ext_len); + copy[ext_len] = '\0'; } else { copy = strndup(suffix, ext_len); if (!copy) { @@ -417,13 +415,13 @@ static const struct esc_seq *get_ext(const struct colors *colors, const char *fi * * See man dir_colors. * - * @param str + * @str * A dstring to fill with the unescaped chunk. - * @param value + * @value * The value to parse. - * @param end + * @end * The character that marks the end of the chunk. - * @param[out] next + * @next[out] * Will be set to the next chunk. * @return * 0 on success, -1 on failure. @@ -558,8 +556,8 @@ static int unescape(char **str, const char *value, char end, const char **next) /** Parse the GNU $LS_COLORS format. */ static int parse_gnu_ls_colors(struct colors *colors, const char *ls_colors) { int ret = -1; - char *key = NULL; - char *value = NULL; + dchar *key = NULL; + dchar *value = NULL; for (const char *chunk = ls_colors, *next; chunk; chunk = next) { if (chunk[0] == '*') { @@ -578,7 +576,7 @@ static int parse_gnu_ls_colors(struct colors *colors, const char *ls_colors) { break; } - if (dstrncpy(&key, chunk, equals - chunk) != 0) { + if (dstrxcpy(&key, chunk, equals - chunk) != 0) { goto fail; } if (unescape(&value, equals + 1, ':', &next) != 0) { @@ -587,8 +585,8 @@ static int parse_gnu_ls_colors(struct colors *colors, const char *ls_colors) { // All-zero values should be treated like NULL, to fall // back on any other relevant coloring for that file - char *esc = value; - if (strspn(value, "0") == strlen(value) + dchar *esc = value; + if (strspn(value, "0") == dstrlen(value) && strcmp(key, "rs") != 0 && strcmp(key, "lc") != 0 && strcmp(key, "rc") != 0 @@ -623,58 +621,58 @@ struct colors *parse_colors(void) { trie_init(&colors->ext_trie); trie_init(&colors->iext_trie); - int ret = 0; + bool fail = false; // From man console_codes - ret |= init_esc(colors, "rs", "0", &colors->reset); - ret |= init_esc(colors, "lc", "\033[", &colors->leftcode); - ret |= init_esc(colors, "rc", "m", &colors->rightcode); - ret |= init_esc(colors, "ec", NULL, &colors->endcode); - ret |= init_esc(colors, "cl", "\033[K", &colors->clear_to_eol); - - ret |= init_esc(colors, "bld", "01;39", &colors->bold); - ret |= init_esc(colors, "gry", "01;30", &colors->gray); - ret |= init_esc(colors, "red", "01;31", &colors->red); - ret |= init_esc(colors, "grn", "01;32", &colors->green); - ret |= init_esc(colors, "ylw", "01;33", &colors->yellow); - ret |= init_esc(colors, "blu", "01;34", &colors->blue); - ret |= init_esc(colors, "mag", "01;35", &colors->magenta); - ret |= init_esc(colors, "cyn", "01;36", &colors->cyan); - ret |= init_esc(colors, "wht", "01;37", &colors->white); - - ret |= init_esc(colors, "wrn", "01;33", &colors->warning); - ret |= init_esc(colors, "err", "01;31", &colors->error); + fail = fail || init_esc(colors, "rs", "0", &colors->reset); + fail = fail || init_esc(colors, "lc", "\033[", &colors->leftcode); + fail = fail || init_esc(colors, "rc", "m", &colors->rightcode); + fail = fail || init_esc(colors, "ec", NULL, &colors->endcode); + fail = fail || init_esc(colors, "cl", "\033[K", &colors->clear_to_eol); + + fail = fail || init_esc(colors, "bld", "01;39", &colors->bold); + fail = fail || init_esc(colors, "gry", "01;30", &colors->gray); + fail = fail || init_esc(colors, "red", "01;31", &colors->red); + fail = fail || init_esc(colors, "grn", "01;32", &colors->green); + fail = fail || init_esc(colors, "ylw", "01;33", &colors->yellow); + fail = fail || init_esc(colors, "blu", "01;34", &colors->blue); + fail = fail || init_esc(colors, "mag", "01;35", &colors->magenta); + fail = fail || init_esc(colors, "cyn", "01;36", &colors->cyan); + fail = fail || init_esc(colors, "wht", "01;37", &colors->white); + + fail = fail || init_esc(colors, "wrn", "01;33", &colors->warning); + fail = fail || init_esc(colors, "err", "01;31", &colors->error); // Defaults from man dir_colors // "" means fall back to ->normal - ret |= init_esc(colors, "no", NULL, &colors->normal); + fail = fail || init_esc(colors, "no", NULL, &colors->normal); - ret |= init_esc(colors, "fi", "", &colors->file); - ret |= init_esc(colors, "mh", NULL, &colors->multi_hard); - ret |= init_esc(colors, "ex", "01;32", &colors->executable); - ret |= init_esc(colors, "ca", NULL, &colors->capable); - ret |= init_esc(colors, "sg", "30;43", &colors->setgid); - ret |= init_esc(colors, "su", "37;41", &colors->setuid); + fail = fail || init_esc(colors, "fi", "", &colors->file); + fail = fail || init_esc(colors, "mh", NULL, &colors->multi_hard); + fail = fail || init_esc(colors, "ex", "01;32", &colors->executable); + fail = fail || init_esc(colors, "ca", NULL, &colors->capable); + fail = fail || init_esc(colors, "sg", "30;43", &colors->setgid); + fail = fail || init_esc(colors, "su", "37;41", &colors->setuid); - ret |= init_esc(colors, "di", "01;34", &colors->directory); - ret |= init_esc(colors, "st", "37;44", &colors->sticky); - ret |= init_esc(colors, "ow", "34;42", &colors->other_writable); - ret |= init_esc(colors, "tw", "30;42", &colors->sticky_other_writable); + fail = fail || init_esc(colors, "di", "01;34", &colors->directory); + fail = fail || init_esc(colors, "st", "37;44", &colors->sticky); + fail = fail || init_esc(colors, "ow", "34;42", &colors->other_writable); + fail = fail || init_esc(colors, "tw", "30;42", &colors->sticky_other_writable); - ret |= init_esc(colors, "ln", "01;36", &colors->link); - ret |= init_esc(colors, "or", NULL, &colors->orphan); - ret |= init_esc(colors, "mi", NULL, &colors->missing); + fail = fail || init_esc(colors, "ln", "01;36", &colors->link); + fail = fail || init_esc(colors, "or", NULL, &colors->orphan); + fail = fail || init_esc(colors, "mi", NULL, &colors->missing); colors->link_as_target = false; - ret |= init_esc(colors, "bd", "01;33", &colors->blockdev); - ret |= init_esc(colors, "cd", "01;33", &colors->chardev); - ret |= init_esc(colors, "do", "01;35", &colors->door); - ret |= init_esc(colors, "pi", "33", &colors->pipe); - ret |= init_esc(colors, "so", "01;35", &colors->socket); + fail = fail || init_esc(colors, "bd", "01;33", &colors->blockdev); + fail = fail || init_esc(colors, "cd", "01;33", &colors->chardev); + fail = fail || init_esc(colors, "do", "01;35", &colors->door); + fail = fail || init_esc(colors, "pi", "33", &colors->pipe); + fail = fail || init_esc(colors, "so", "01;35", &colors->socket); - if (ret != 0) { + if (fail) { goto fail; } @@ -693,6 +691,20 @@ struct colors *parse_colors(void) { colors->link->len = 0; } + // Pre-compute the reset escape sequence + if (!colors->endcode) { + dchar *ec = dstralloc(0); + if (!ec + || cat_esc(&ec, colors->leftcode) != 0 + || cat_esc(&ec, colors->reset) != 0 + || cat_esc(&ec, colors->rightcode) != 0 + || set_esc(colors, "ec", ec) != 0) { + dstrfree(ec); + goto fail; + } + dstrfree(ec); + } + return colors; fail: @@ -727,10 +739,11 @@ CFILE *cfwrap(FILE *file, const struct colors *colors, bool close) { } cfile->file = file; + cfile->fd = fileno(file); cfile->need_reset = false; cfile->close = close; - if (isatty(fileno(file))) { + if (isatty(cfile->fd)) { cfile->colors = colors; } else { cfile->colors = NULL; @@ -755,18 +768,196 @@ int cfclose(CFILE *cfile) { return ret; } +bool colors_need_stat(const struct colors *colors) { + return colors->setuid || colors->setgid || colors->executable || colors->multi_hard + || colors->sticky_other_writable || colors->other_writable || colors->sticky; +} + +/** A colorable file path. */ +struct cpath { + /** The full path to color. */ + const char *path; + /** The basename offset of the last valid component. */ + size_t nameoff; + /** The end offset of the last valid component. */ + size_t valid; + /** The total length of the path. */ + size_t len; + + /** The bftw() buffer. */ + const struct BFTW *ftwbuf; + /** bfs_stat() flags for the final component. */ + enum bfs_stat_flags flags; + /** A bfs_stat() buffer, filled in when 0 < valid < len. */ + struct bfs_stat statbuf; +}; + +/** Move the valid range of a path backwards. */ +static void cpath_retreat(struct cpath *cpath) { + const char *path = cpath->path; + size_t nameoff = cpath->nameoff; + size_t valid = cpath->valid; + + if (valid > 0 && path[valid - 1] == '/') { + // Try without trailing slashes, to distinguish "notdir/" from "notdir" + do { + --valid; + } while (valid > 0 && path[valid - 1] == '/'); + + nameoff = valid; + while (nameoff > 0 && path[nameoff - 1] != '/') { + --nameoff; + } + } else { + // Remove the last component and try again + valid = nameoff; + } + + cpath->nameoff = nameoff; + cpath->valid = valid; +} + +/** Initialize a struct cpath. */ +static int cpath_init(struct cpath *cpath, const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + // Normally there are only two components to color: + // + // nameoff valid + // v v + // path/to/filename + // --------+------- + // ${di} ${fi} + // + // Error cases also usually have two components: + // + // valid, + // nameoff + // v + // path/to/nowhere + // --------+------ + // ${di} ${mi} + // + // But with ENOTDIR, there may be three: + // + // nameoff valid + // v v + // path/to/filename/nowhere + // --------+-------+------- + // ${di} ${fi} ${mi} + + cpath->path = path; + cpath->len = strlen(path); + cpath->ftwbuf = ftwbuf; + cpath->flags = flags; + + cpath->valid = cpath->len; + if (path == ftwbuf->path) { + cpath->nameoff = ftwbuf->nameoff; + } else { + cpath->nameoff = xbaseoff(path); + } + + if (bftw_type(ftwbuf, flags) != BFS_ERROR) { + return 0; + } + + cpath_retreat(cpath); + + // Find the base path. For symlinks like + // + // path/to/symlink -> nested/file + // + // this will be something like + // + // path/to/nested/file + int at_fd = AT_FDCWD; + dchar *at_path = NULL; + if (path == ftwbuf->path) { + if (ftwbuf->depth > 0) { + // The parent must have existed to get here + return 0; + } + } else { + // We're in print_link_target(), so resolve relative to the link's parent directory + at_fd = ftwbuf->at_fd; + if (at_fd == (int)AT_FDCWD && path[0] != '/') { + at_path = dstrxdup(ftwbuf->path, ftwbuf->nameoff); + if (!at_path) { + return -1; + } + } + } + + if (!at_path) { + at_path = dstralloc(cpath->valid); + if (!at_path) { + return -1; + } + } + if (dstrxcat(&at_path, path, cpath->valid) != 0) { + dstrfree(at_path); + return -1; + } + + size_t at_off = dstrlen(at_path) - cpath->valid; + + // Find the longest valid path prefix + while (cpath->valid > 0) { + if (bfs_stat(at_fd, at_path, BFS_STAT_FOLLOW, &cpath->statbuf) == 0) { + break; + } + + cpath_retreat(cpath); + dstrshrink(at_path, at_off + cpath->valid); + } + + dstrfree(at_path); + return 0; +} + +/** Get the bfs_stat() buffer for the last valid component. */ +static const struct bfs_stat *cpath_stat(const struct cpath *cpath) { + if (cpath->valid == cpath->len) { + return bftw_stat(cpath->ftwbuf, cpath->flags); + } else { + return &cpath->statbuf; + } +} + +/** Check if a path has non-trivial capabilities. */ +static bool cpath_has_capabilities(const struct cpath *cpath) { + if (cpath->valid == cpath->len) { + return bfs_check_capabilities(cpath->ftwbuf) > 0; + } else { + // TODO: implement capability checks for arbitrary paths + return false; + } +} + /** Check if a symlink is broken. */ -static bool is_link_broken(const struct BFTW *ftwbuf) { +static bool cpath_is_broken(const struct cpath *cpath) { + if (cpath->valid < cpath->len) { + // A valid parent can't be a broken link + return false; + } + + const struct BFTW *ftwbuf = cpath->ftwbuf; if (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW) { return xfaccessat(ftwbuf->at_fd, ftwbuf->at_path, F_OK) != 0; } else { + // A link encountered with BFS_STAT_TRYFOLLOW must be broken return true; } } /** Get the color for a file. */ -static const struct esc_seq *file_color(const struct colors *colors, const char *filename, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { - enum bfs_type type = bftw_type(ftwbuf, flags); +static const struct esc_seq *file_color(const struct colors *colors, const struct cpath *cpath) { + enum bfs_type type; + if (cpath->valid == cpath->len) { + type = bftw_type(cpath->ftwbuf, cpath->flags); + } else { + type = bfs_mode_to_type(cpath->statbuf.mode); + } + if (type == BFS_ERROR) { goto error; } @@ -777,7 +968,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const char switch (type) { case BFS_REG: if (colors->setuid || colors->setgid || colors->executable || colors->multi_hard) { - statbuf = bftw_stat(ftwbuf, flags); + statbuf = cpath_stat(cpath); if (!statbuf) { goto error; } @@ -787,7 +978,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const char color = colors->setuid; } else if (colors->setgid && (statbuf->mode & 02000)) { color = colors->setgid; - } else if (colors->capable && bfs_check_capabilities(ftwbuf) > 0) { + } else if (colors->capable && cpath_has_capabilities(cpath)) { color = colors->capable; } else if (colors->executable && (statbuf->mode & 00111)) { color = colors->executable; @@ -796,7 +987,9 @@ static const struct esc_seq *file_color(const struct colors *colors, const char } if (!color) { - color = get_ext(colors, filename); + const char *name = cpath->path + cpath->nameoff; + size_t namelen = cpath->valid - cpath->nameoff; + color = get_ext(colors, name, namelen); } if (!color) { @@ -807,7 +1000,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const char case BFS_DIR: if (colors->sticky_other_writable || colors->other_writable || colors->sticky) { - statbuf = bftw_stat(ftwbuf, flags); + statbuf = cpath_stat(cpath); if (!statbuf) { goto error; } @@ -826,7 +1019,7 @@ static const struct esc_seq *file_color(const struct colors *colors, const char break; case BFS_LNK: - if (colors->orphan && is_link_broken(ftwbuf)) { + if (colors->orphan && cpath_is_broken(cpath)) { color = colors->orphan; } else { color = colors->link; @@ -869,7 +1062,7 @@ error: /** Print an escape sequence chunk. */ static int print_esc_chunk(CFILE *cfile, const struct esc_seq *esc) { - return dstrxcat(&cfile->buffer, esc->seq, esc->len); + return cat_esc(&cfile->buffer, esc); } /** Print an ANSI escape sequence. */ @@ -903,12 +1096,7 @@ static int print_reset(CFILE *cfile) { } cfile->need_reset = false; - const struct colors *colors = cfile->colors; - if (colors->endcode) { - return print_esc_chunk(cfile, colors->endcode); - } else { - return print_esc(cfile, colors->reset); - } + return print_esc_chunk(cfile, cfile->colors->endcode); } /** Print a shell-escaped string. */ @@ -918,6 +1106,10 @@ static int print_wordesc(CFILE *cfile, const char *str, size_t n, enum wesc_flag /** Print a string with an optional color. */ static int print_colored(CFILE *cfile, const struct esc_seq *esc, const char *str, size_t len) { + if (len == 0) { + return 0; + } + if (print_esc(cfile, esc) != 0) { return -1; } @@ -934,112 +1126,42 @@ static int print_colored(CFILE *cfile, const struct esc_seq *esc, const char *st return 0; } -/** Find the offset of the first broken path component. */ -static ssize_t first_broken_offset(const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags, size_t max) { - ssize_t ret = max; - bfs_assert(ret >= 0); - - if (bftw_type(ftwbuf, flags) != BFS_ERROR) { - goto out; - } - - char *at_path; - int at_fd; - if (path == ftwbuf->path) { - if (ftwbuf->depth == 0) { - at_fd = AT_FDCWD; - at_path = dstrndup(path, max); - } else { - // The parent must have existed to get here - goto out; - } - } else { - // We're in print_link_target(), so resolve relative to the link's parent directory - at_fd = ftwbuf->at_fd; - if (at_fd == AT_FDCWD && path[0] != '/') { - at_path = dstrndup(ftwbuf->path, ftwbuf->nameoff); - if (at_path && dstrncat(&at_path, path, max) != 0) { - ret = -1; - goto out_path; - } - } else { - at_path = dstrndup(path, max); - } - } - - if (!at_path) { - ret = -1; - goto out; - } - - while (ret > 0) { - if (xfaccessat(at_fd, at_path, F_OK) == 0) { - break; - } - - size_t len = dstrlen(at_path); - while (ret && at_path[len - 1] == '/') { - --len, --ret; - } - if (errno != ENOTDIR) { - while (ret && at_path[len - 1] != '/') { - --len, --ret; - } - } - - dstresize(&at_path, len); - } - -out_path: - dstrfree(at_path); -out: - return ret; -} - /** Print a path with colors. */ static int print_path_colored(CFILE *cfile, const char *path, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { - size_t nameoff; - if (path == ftwbuf->path) { - nameoff = ftwbuf->nameoff; - } else { - nameoff = xbaseoff(path); - } - - const char *name = path + nameoff; - size_t pathlen = nameoff + strlen(name); - - ssize_t broken = first_broken_offset(path, ftwbuf, flags, nameoff); - if (broken < 0) { + struct cpath cpath; + if (cpath_init(&cpath, path, ftwbuf, flags) != 0) { return -1; } - size_t split = broken; const struct colors *colors = cfile->colors; const struct esc_seq *dirs_color = colors->directory; - const struct esc_seq *name_color; + const struct esc_seq *name_color = NULL; + const struct esc_seq *err_color = colors->missing; + if (!err_color) { + err_color = colors->orphan; + } - if (split < nameoff) { - name_color = colors->missing; - if (!name_color) { - name_color = colors->orphan; - } - } else { - name_color = file_color(cfile->colors, path + nameoff, ftwbuf, flags); + if (cpath.nameoff < cpath.valid) { + name_color = file_color(colors, &cpath); if (name_color == dirs_color) { - split = pathlen; + cpath.nameoff = cpath.valid; } } - if (split > 0) { - if (print_colored(cfile, dirs_color, path, split) != 0) { - return -1; - } + if (print_colored(cfile, dirs_color, path, cpath.nameoff) != 0) { + return -1; } - if (split < pathlen) { - if (print_colored(cfile, name_color, path + split, pathlen - split) != 0) { - return -1; - } + const char *name = path + cpath.nameoff; + size_t name_len = cpath.valid - cpath.nameoff; + if (print_colored(cfile, name_color, name, name_len) != 0) { + return -1; + } + + const char *tail = path + cpath.valid; + size_t tail_len = cpath.len - cpath.valid; + if (print_colored(cfile, err_color, tail, tail_len) != 0) { + return -1; } return 0; @@ -1047,8 +1169,18 @@ static int print_path_colored(CFILE *cfile, const char *path, const struct BFTW /** Print a file name with colors. */ static int print_name_colored(CFILE *cfile, const char *name, const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { - const struct esc_seq *esc = file_color(cfile->colors, name, ftwbuf, flags); - return print_colored(cfile, esc, name, strlen(name)); + size_t len = strlen(name); + const struct cpath cpath = { + .path = name, + .nameoff = 0, + .valid = len, + .len = len, + .ftwbuf = ftwbuf, + .flags = flags, + }; + + const struct esc_seq *esc = file_color(cfile->colors, &cpath); + return print_colored(cfile, esc, name, cpath.len); } /** Print the name of a file with the appropriate colors. */ @@ -1105,68 +1237,80 @@ static int print_link_target(CFILE *cfile, const struct BFTW *ftwbuf) { } /** Format some colored output to the buffer. */ -BFS_FORMATTER(2, 3) +_printf(2, 3) static int cbuff(CFILE *cfile, const char *format, ...); -/** Dump a parsed expression tree, for debugging. */ -static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose) { - if (!expr) { - return dstrcat(&cfile->buffer, "(null)"); +/** Print an expression's name, for diagnostics. */ +static int print_expr_name(CFILE *cfile, const struct bfs_expr *expr) { + switch (expr->kind) { + case BFS_FLAG: + return cbuff(cfile, "${cyn}%pq${rs}", expr->argv[0]); + case BFS_OPERATOR: + return cbuff(cfile, "${red}%pq${rs}", expr->argv[0]); + default: + return cbuff(cfile, "${blu}%pq${rs}", expr->argv[0]); } +} - if (dstrcat(&cfile->buffer, "(") != 0) { +/** Print an expression's args, for diagnostics. */ +static int print_expr_args(CFILE *cfile, const struct bfs_expr *expr) { + if (print_expr_name(cfile, expr) != 0) { return -1; } - const struct bfs_expr *lhs = NULL; - const struct bfs_expr *rhs = NULL; - - if (bfs_expr_is_parent(expr)) { - lhs = expr->lhs; - rhs = expr->rhs; - - if (cbuff(cfile, "${red}%pq${rs}", expr->argv[0]) < 0) { - return -1; - } - } else { - if (cbuff(cfile, "${blu}%pq${rs}", expr->argv[0]) < 0) { - return -1; - } - } - for (size_t i = 1; i < expr->argc; ++i) { if (cbuff(cfile, " ${bld}%pq${rs}", expr->argv[i]) < 0) { return -1; } } + return 0; +} + +/** Dump a parsed expression tree, for debugging. */ +static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose, int depth) { + if (depth >= 2) { + return dstrcat(&cfile->buffer, "(...)"); + } + + if (!expr) { + return dstrcat(&cfile->buffer, "(null)"); + } + + if (dstrcat(&cfile->buffer, "(") != 0) { + return -1; + } + + if (print_expr_args(cfile, expr) != 0) { + return -1; + } + if (verbose) { double rate = 0.0, time = 0.0; if (expr->evaluations) { - rate = 100.0*expr->successes/expr->evaluations; - time = (1.0e9*expr->elapsed.tv_sec + expr->elapsed.tv_nsec)/expr->evaluations; + rate = 100.0 * expr->successes / expr->evaluations; + time = (1.0e9 * expr->elapsed.tv_sec + expr->elapsed.tv_nsec) / expr->evaluations; } if (cbuff(cfile, " [${ylw}%zu${rs}/${ylw}%zu${rs}=${ylw}%g%%${rs}; ${ylw}%gns${rs}]", - expr->successes, expr->evaluations, rate, time)) { + expr->successes, expr->evaluations, rate, time)) { return -1; } } - if (lhs) { + int count = 0; + for_expr (child, expr) { if (dstrcat(&cfile->buffer, " ") != 0) { return -1; } - if (print_expr(cfile, lhs, verbose) != 0) { - return -1; - } - } - - if (rhs) { - if (dstrcat(&cfile->buffer, " ") != 0) { - return -1; - } - if (print_expr(cfile, rhs, verbose) != 0) { - return -1; + if (++count >= 3) { + if (dstrcat(&cfile->buffer, "...") != 0) { + return -1; + } + break; + } else { + if (print_expr(cfile, child, verbose, depth + 1) != 0) { + return -1; + } } } @@ -1177,10 +1321,9 @@ static int print_expr(CFILE *cfile, const struct bfs_expr *expr, bool verbose) { return 0; } -BFS_FORMATTER(2, 0) +_printf(2, 0) static int cvbuff(CFILE *cfile, const char *format, va_list args) { const struct colors *colors = cfile->colors; - int error = errno; // Color specifier (e.g. ${blu}) state struct esc_seq **esc; @@ -1190,7 +1333,7 @@ static int cvbuff(CFILE *cfile, const char *format, va_list args) { for (const char *i = format; *i; ++i) { size_t verbatim = strcspn(i, "%$"); - if (dstrncat(&cfile->buffer, i, verbatim) != 0) { + if (dstrxcat(&cfile->buffer, i, verbatim) != 0) { return -1; } i += verbatim; @@ -1238,12 +1381,6 @@ static int cvbuff(CFILE *cfile, const char *format, va_list args) { } break; - case 'm': - if (dstrcat(&cfile->buffer, strerror(error)) != 0) { - return -1; - } - break; - case 'p': switch (*++i) { case 'q': @@ -1276,12 +1413,22 @@ static int cvbuff(CFILE *cfile, const char *format, va_list args) { break; case 'e': - if (print_expr(cfile, va_arg(args, const struct bfs_expr *), false) != 0) { + if (print_expr(cfile, va_arg(args, const struct bfs_expr *), false, 0) != 0) { return -1; } break; case 'E': - if (print_expr(cfile, va_arg(args, const struct bfs_expr *), true) != 0) { + if (print_expr(cfile, va_arg(args, const struct bfs_expr *), true, 0) != 0) { + return -1; + } + break; + case 'x': + if (print_expr_args(cfile, va_arg(args, const struct bfs_expr *)) != 0) { + return -1; + } + break; + case 'X': + if (print_expr_name(cfile, va_arg(args, const struct bfs_expr *)) != 0) { return -1; } break; @@ -1377,7 +1524,7 @@ int cvfprintf(CFILE *cfile, const char *format, va_list args) { } } - dstresize(&cfile->buffer, 0); + dstrshrink(cfile->buffer, 0); return ret; } @@ -1388,3 +1535,14 @@ int cfprintf(CFILE *cfile, const char *format, ...) { va_end(args); return ret; } + +int cfreset(CFILE *cfile) { + const struct colors *colors = cfile->colors; + if (!colors) { + return 0; + } + + const struct esc_seq *esc = colors->endcode; + size_t ret = xwrite(cfile->fd, esc->seq, esc->len); + return ret == esc->len ? 0 : -1; +} diff --git a/src/color.h b/src/color.h index 0d46c33..aac8b33 100644 --- a/src/color.h +++ b/src/color.h @@ -8,8 +8,9 @@ #ifndef BFS_COLOR_H #define BFS_COLOR_H -#include "config.h" -#include <stdarg.h> +#include "bfs.h" +#include "dstring.h" + #include <stdio.h> /** @@ -18,17 +19,17 @@ struct colors; /** - * Parse a color table. - * - * @return The parsed color table. + * Parse the color table from the environment. */ struct colors *parse_colors(void); /** + * Check if stat() info is required to color a file correctly. + */ +bool colors_need_stat(const struct colors *colors); + +/** * Free a color table. - * - * @param colors - * The color table to free. */ void free_colors(struct colors *colors); @@ -41,7 +42,9 @@ typedef struct CFILE { /** The color table to use, if any. */ const struct colors *colors; /** A buffer for colored formatting. */ - char *buffer; + dchar *buffer; + /** Cached file descriptor number. */ + int fd; /** Whether the next ${rs} is actually necessary. */ bool need_reset; /** Whether to close the underlying stream. */ @@ -51,11 +54,11 @@ typedef struct CFILE { /** * Wrap an existing file into a colored stream. * - * @param file + * @file * The underlying file. - * @param colors + * @colors * The color table to use if file is a TTY. - * @param close + * @close * Whether to close the underlying stream when this stream is closed. * @return * A colored wrapper around file. @@ -65,7 +68,7 @@ CFILE *cfwrap(FILE *file, const struct colors *colors, bool close); /** * Close a colored file. * - * @param cfile + * @cfile * The colored file to close. * @return * 0 on success, -1 on failure. @@ -75,9 +78,9 @@ int cfclose(CFILE *cfile); /** * Colored, formatted output. * - * @param cfile + * @cfile * The colored stream to print to. - * @param format + * @format * A printf()-style format string, supporting these format specifiers: * * %c: A single character @@ -85,7 +88,6 @@ int cfclose(CFILE *cfile); * %g: A double * %s: A string * %zu: A size_t - * %m: strerror(errno) * %pq: A shell-escaped string, like bash's printf %q * %pQ: A TTY-escaped string. * %pF: A colored file name, from a const struct BFTW * argument @@ -93,19 +95,26 @@ int cfclose(CFILE *cfile); * %pL: A colored link target, from a const struct BFTW * argument * %pe: Dump a const struct bfs_expr *, for debugging. * %pE: Dump a const struct bfs_expr * in verbose form, for debugging. + * %px: Print a const struct bfs_expr * with syntax highlighting. + * %pX: Print the name of a const struct bfs_expr *, without arguments. * %%: A literal '%' * ${cc}: Change the color to 'cc' * $$: A literal '$' * @return * 0 on success, -1 on failure. */ -BFS_FORMATTER(2, 3) +_printf(2, 3) int cfprintf(CFILE *cfile, const char *format, ...); /** * cfprintf() variant that takes a va_list. */ -BFS_FORMATTER(2, 0) +_printf(2, 0) int cvfprintf(CFILE *cfile, const char *format, va_list args); +/** + * Reset the TTY state when terminating abnormally (async-signal-safe). + */ +int cfreset(CFILE *cfile); + #endif // BFS_COLOR_H diff --git a/src/config.h b/src/config.h deleted file mode 100644 index 14c9305..0000000 --- a/src/config.h +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright © Tavian Barnes <tavianator@tavianator.com> -// SPDX-License-Identifier: 0BSD - -/** - * Configuration and feature/platform detection. - */ - -#ifndef BFS_CONFIG_H -#define BFS_CONFIG_H - -#include <stddef.h> - -#if __STDC_VERSION__ < 202311L -# include <stdalign.h> -# include <stdbool.h> -# include <stdnoreturn.h> -#endif - -// bfs packaging configuration - -#ifndef BFS_COMMAND -# define BFS_COMMAND "bfs" -#endif -#ifndef BFS_VERSION -# define BFS_VERSION "3.0.2" -#endif -#ifndef BFS_HOMEPAGE -# define BFS_HOMEPAGE "https://tavianator.com/projects/bfs.html" -#endif - -// Check for system headers - -#ifdef __has_include - -#if __has_include(<mntent.h>) -# define BFS_HAS_MNTENT_H true -#endif -#if __has_include(<paths.h>) -# define BFS_HAS_PATHS_H true -#endif -#if __has_include(<sys/acl.h>) -# define BFS_HAS_SYS_ACL_H true -#endif -#if __has_include(<sys/capability.h>) -# define BFS_HAS_SYS_CAPABILITY_H true -#endif -#if __has_include(<sys/extattr.h>) -# define BFS_HAS_SYS_EXTATTR_H true -#endif -#if __has_include(<sys/mkdev.h>) -# define BFS_HAS_SYS_MKDEV_H true -#endif -#if __has_include(<sys/param.h>) -# define BFS_HAS_SYS_PARAM_H true -#endif -#if __has_include(<sys/sysmacros.h>) -# define BFS_HAS_SYS_SYSMACROS_H true -#endif -#if __has_include(<sys/xattr.h>) -# define BFS_HAS_SYS_XATTR_H true -#endif -#if __has_include(<util.h>) -# define BFS_HAS_UTIL_H true -#endif - -#else // !__has_include - -#define BFS_HAS_MNTENT_H __GLIBC__ -#define BFS_HAS_PATHS_H true -#define BFS_HAS_SYS_ACL_H true -#define BFS_HAS_SYS_CAPABILITY_H __linux__ -#define BFS_HAS_SYS_EXTATTR_H __FreeBSD__ -#define BFS_HAS_SYS_MKDEV_H false -#define BFS_HAS_SYS_PARAM_H true -#define BFS_HAS_SYS_SYSMACROS_H __GLIBC__ -#define BFS_HAS_SYS_XATTR_H __linux__ -#define BFS_HAS_UTIL_H __NetBSD__ - -#endif // !__has_include - -#ifndef BFS_USE_MNTENT_H -# define BFS_USE_MNTENT_H BFS_HAS_MNTENT_H -#endif -#ifndef BFS_USE_PATHS_H -# define BFS_USE_PATHS_H BFS_HAS_PATHS_H -#endif -#ifndef BFS_USE_SYS_ACL_H -# define BFS_USE_SYS_ACL_H BFS_HAS_SYS_ACL_H -#endif -#ifndef BFS_USE_SYS_CAPABILITY_H -# define BFS_USE_SYS_CAPABILITY_H BFS_HAS_SYS_CAPABILITY_H -#endif -#ifndef BFS_USE_SYS_EXTATTR_H -# define BFS_USE_SYS_EXTATTR_H BFS_HAS_SYS_EXTATTR_H -#endif -#ifndef BFS_USE_SYS_MKDEV_H -# define BFS_USE_SYS_MKDEV_H BFS_HAS_SYS_MKDEV_H -#endif -#ifndef BFS_USE_SYS_PARAM_H -# define BFS_USE_SYS_PARAM_H BFS_HAS_SYS_PARAM_H -#endif -#ifndef BFS_USE_SYS_SYSMACROS_H -# define BFS_USE_SYS_SYSMACROS_H BFS_HAS_SYS_SYSMACROS_H -#endif -#ifndef BFS_USE_SYS_XATTR_H -# define BFS_USE_SYS_XATTR_H BFS_HAS_SYS_XATTR_H -#endif -#ifndef BFS_USE_UTIL_H -# define BFS_USE_UTIL_H BFS_HAS_UTIL_H -#endif - -// Stub out feature detection on old/incompatible compilers - -#ifndef __has_feature -# define __has_feature(feat) false -#endif - -#ifndef __has_c_attribute -# define __has_c_attribute(attr) false -#endif - -#ifndef __has_attribute -# define __has_attribute(attr) false -#endif - -// Platform detection - -// Get the definition of BSD if available -#if BFS_USE_SYS_PARAM_H -# include <sys/param.h> -#endif - -#ifndef __GLIBC_PREREQ -# define __GLIBC_PREREQ(maj, min) false -#endif - -// Fundamental utilities - -/** - * Get the length of an array. - */ -#define countof(array) (sizeof(array) / sizeof(0[array])) - -/** - * False sharing/destructive interference/largest cache line size. - */ -#ifdef __GCC_DESTRUCTIVE_SIZE -# define FALSE_SHARING_SIZE __GCC_DESTRUCTIVE_SIZE -#else -# define FALSE_SHARING_SIZE 64 -#endif - -/** - * True sharing/constructive interference/smallest cache line size. - */ -#ifdef __GCC_CONSTRUCTIVE_SIZE -# define TRUE_SHARING_SIZE __GCC_CONSTRUCTIVE_SIZE -#else -# define TRUE_SHARING_SIZE 64 -#endif - -/** - * Alignment specifier that avoids false sharing. - */ -#define cache_align alignas(FALSE_SHARING_SIZE) - -// Wrappers for attributes - -/** - * Silence compiler warnings about switch/case fall-throughs. - */ -#if __has_c_attribute(fallthrough) -# define fallthru [[fallthrough]] -#elif __has_attribute(fallthrough) -# define fallthru __attribute__((fallthrough)) -#else -# define fallthru ((void)0) -#endif - -/** - * Adds compiler warnings for bad printf()-style function calls, if supported. - */ -#if __has_attribute(format) -# define BFS_FORMATTER(fmt, args) __attribute__((format(printf, fmt, args))) -#else -# define BFS_FORMATTER(fmt, args) -#endif - -/** - * Check if function multiversioning via GNU indirect functions (ifunc) is supported. - */ -#ifndef BFS_USE_TARGET_CLONES -# if __has_attribute(target_clones) && (__GLIBC__ || __FreeBSD__ || __NetBSD__) -# define BFS_USE_TARGET_CLONES true -# endif -#endif - -/** - * Ignore a particular GCC warning for a region of code. - */ -#if __GNUC__ -# define BFS_PRAGMA_STRINGIFY(...) _Pragma(#__VA_ARGS__) -# define BFS_SUPPRESS(warning) \ - _Pragma("GCC diagnostic push"); \ - BFS_PRAGMA_STRINGIFY(GCC diagnostic ignored warning) -# define BFS_UNSUPPRESS() \ - _Pragma("GCC diagnostic pop") -#else -# define BFS_SUPPRESS(warning) -# define BFS_UNSUPPRESS() -#endif - -#endif // BFS_CONFIG_H @@ -2,45 +2,27 @@ // SPDX-License-Identifier: 0BSD #include "ctx.h" + #include "alloc.h" +#include "bfstd.h" #include "color.h" -#include "darray.h" #include "diag.h" #include "expr.h" +#include "list.h" #include "mtab.h" #include "pwcache.h" +#include "sighook.h" #include "stat.h" #include "trie.h" -#include "xtime.h" + #include <errno.h> #include <limits.h> +#include <signal.h> #include <stdio.h> #include <stdlib.h> - -const char *debug_flag_name(enum debug_flags flag) { - switch (flag) { - case DEBUG_COST: - return "cost"; - case DEBUG_EXEC: - return "exec"; - case DEBUG_OPT: - return "opt"; - case DEBUG_RATES: - return "rates"; - case DEBUG_SEARCH: - return "search"; - case DEBUG_STAT: - return "stat"; - case DEBUG_TREE: - return "tree"; - - case DEBUG_ALL: - break; - } - - bfs_bug("Unrecognized debug flag"); - return "???"; -} +#include <sys/stat.h> +#include <time.h> +#include <unistd.h> struct bfs_ctx *bfs_ctx_new(void) { struct bfs_ctx *ctx = ZALLOC(struct bfs_ctx); @@ -48,19 +30,30 @@ struct bfs_ctx *bfs_ctx_new(void) { return NULL; } + SLIST_INIT(&ctx->expr_list); + ARENA_INIT(&ctx->expr_arena, struct bfs_expr); + ctx->maxdepth = INT_MAX; ctx->flags = BFTW_RECOVER; ctx->strategy = BFTW_BFS; ctx->optlevel = 3; + ctx->threads = nproc(); + if (ctx->threads > 8) { + // Not much speedup after 8 threads + ctx->threads = 8; + } + trie_init(&ctx->files); - struct rlimit rl; - if (getrlimit(RLIMIT_NOFILE, &rl) != 0) { + ctx->umask = umask(0); + umask(ctx->umask); + + if (getrlimit(RLIMIT_NOFILE, &ctx->orig_nofile) != 0) { goto fail; } - ctx->nofile_soft = rl.rlim_cur; - ctx->nofile_hard = rl.rlim_max; + ctx->cur_nofile = ctx->orig_nofile; + ctx->raise_nofile = true; ctx->users = bfs_users_new(); if (!ctx->users) { @@ -72,7 +65,7 @@ struct bfs_ctx *bfs_ctx_new(void) { goto fail; } - if (xgettime(&ctx->now) != 0) { + if (clock_gettime(CLOCK_REALTIME, &ctx->now) != 0) { goto fail; } @@ -106,13 +99,20 @@ struct bfs_ctx_file { CFILE *cfile; /** The path to the file (for diagnostics). */ const char *path; + /** Signal hook to send a reset escape sequence. */ + struct sighook *hook; /** Remembers I/O errors, to propagate them to the exit status. */ int error; }; +/** Call cfreset() on a tracked file. */ +static void cfreset_hook(int sig, siginfo_t *info, void *arg) { + cfreset(arg); +} + CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, CFILE *cfile, const char *path) { struct bfs_stat sb; - if (bfs_stat(fileno(cfile->file), NULL, 0, &sb) != 0) { + if (bfs_stat(cfile->fd, NULL, 0, &sb) != 0) { return NULL; } @@ -132,19 +132,31 @@ CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, CFILE *cfile, const char *path) { leaf->value = ctx_file = ALLOC(struct bfs_ctx_file); if (!ctx_file) { - trie_remove(&ctx->files, leaf); - return NULL; + goto fail; } ctx_file->cfile = cfile; ctx_file->path = path; ctx_file->error = 0; + ctx_file->hook = NULL; + + if (cfile->colors) { + ctx_file->hook = atsigexit(cfreset_hook, cfile); + if (!ctx_file->hook) { + goto fail; + } + } if (cfile != ctx->cout && cfile != ctx->cerr) { ++ctx->nfiles; } return cfile; + +fail: + trie_remove(&ctx->files, leaf); + free(ctx_file); + return NULL; } void bfs_ctx_flush(const struct bfs_ctx *ctx) { @@ -152,7 +164,7 @@ void bfs_ctx_flush(const struct bfs_ctx *ctx) { // - the user sees everything relevant before an -ok[dir] prompt // - output from commands is interleaved consistently with bfs // - executed commands can rely on I/O from other bfs actions - TRIE_FOR_EACH(&ctx->files, leaf) { + for_trie (leaf, &ctx->files) { struct bfs_ctx_file *ctx_file = leaf->value; CFILE *cfile = ctx_file->cfile; if (fflush(cfile->file) == 0) { @@ -164,11 +176,10 @@ void bfs_ctx_flush(const struct bfs_ctx *ctx) { const char *path = ctx_file->path; if (path) { - bfs_error(ctx, "'%s': %m.\n", path); + bfs_error(ctx, "%pq: %s.\n", path, errstr()); } else if (cfile == ctx->cout) { - bfs_error(ctx, "(standard output): %m.\n"); + bfs_error(ctx, "(standard output): %s.\n", errstr()); } - } // Flush the user/group caches, in case the executed command edits the @@ -197,30 +208,47 @@ static int bfs_ctx_fflush(CFILE *cfile) { static int bfs_ctx_fclose(struct bfs_ctx *ctx, struct bfs_ctx_file *ctx_file) { CFILE *cfile = ctx_file->cfile; - if (cfile == ctx->cout) { - // Will be checked later - return 0; - } else if (cfile == ctx->cerr) { - // Writes to stderr are allowed to fail silently, unless the same file was used by - // -fprint, -fls, etc. - if (ctx_file->path) { - return bfs_ctx_fflush(cfile); - } else { - return 0; - } - } - + // Writes to stderr are allowed to fail silently, unless the same file + // was used by -fprint, -fls, etc. + bool silent = cfile == ctx->cerr && !ctx_file->path; int ret = 0, error = 0; - if (ferror(cfile->file)) { + + if (ctx_file->error) { + // An error was previously reported during bfs_ctx_flush() ret = -1; - error = EIO; + error = ctx_file->error; } - if (cfclose(cfile) != 0) { + + // Flush the file just before we remove the hook, to maximize the chance + // we leave the TTY in a good state + if (bfs_ctx_fflush(cfile) != 0) { ret = -1; error = errno; } - errno = error; + sigunhook(ctx_file->hook); + + // Close the CFILE, except for stdio streams, which are closed later + if (cfile != ctx->cout && cfile != ctx->cerr) { + if (cfclose(cfile) != 0) { + ret = -1; + error = errno; + } + } + + if (silent) { + ret = 0; + } + + if (ret != 0 && ctx->cerr) { + if (ctx_file->path) { + bfs_error(ctx, "%pq: %s.\n", ctx_file->path, xstrerror(error)); + } else if (cfile == ctx->cout) { + bfs_error(ctx, "(standard output): %s.\n", xstrerror(error)); + } + } + + free(ctx_file); return ret; } @@ -231,50 +259,34 @@ int bfs_ctx_free(struct bfs_ctx *ctx) { CFILE *cout = ctx->cout; CFILE *cerr = ctx->cerr; - bfs_expr_free(ctx->exclude); - bfs_expr_free(ctx->expr); - bfs_mtab_free(ctx->mtab); bfs_groups_free(ctx->groups); bfs_users_free(ctx->users); - TRIE_FOR_EACH(&ctx->files, leaf) { + for_trie (leaf, &ctx->files) { struct bfs_ctx_file *ctx_file = leaf->value; - - if (ctx_file->error) { - // An error was previously reported during bfs_ctx_flush() - ret = -1; - } - if (bfs_ctx_fclose(ctx, ctx_file) != 0) { - if (cerr) { - bfs_error(ctx, "'%s': %m.\n", ctx_file->path); - } ret = -1; } - - free(ctx_file); } trie_destroy(&ctx->files); - if (cout && bfs_ctx_fflush(cout) != 0) { - if (cerr) { - bfs_error(ctx, "(standard output): %m.\n"); - } - ret = -1; - } - cfclose(cout); cfclose(cerr); - free_colors(ctx->colors); - for (size_t i = 0; i < darray_length(ctx->paths); ++i) { + for_slist (struct bfs_expr, expr, &ctx->expr_list, freelist) { + bfs_expr_clear(expr); + } + arena_destroy(&ctx->expr_arena); + + for (size_t i = 0; i < ctx->npaths; ++i) { free((char *)ctx->paths[i]); } - darray_free(ctx->paths); + free(ctx->paths); + free(ctx->kinds); free(ctx->argv); free(ctx); } @@ -8,39 +8,18 @@ #ifndef BFS_CTX_H #define BFS_CTX_H +#include "alloc.h" #include "bftw.h" -#include "config.h" +#include "diag.h" +#include "expr.h" #include "trie.h" + #include <stddef.h> #include <sys/resource.h> +#include <sys/types.h> #include <time.h> -/** - * Various debugging flags. - */ -enum debug_flags { - /** Print cost estimates. */ - DEBUG_COST = 1 << 0, - /** Print executed command details. */ - DEBUG_EXEC = 1 << 1, - /** Print optimization details. */ - DEBUG_OPT = 1 << 2, - /** Print rate information. */ - DEBUG_RATES = 1 << 3, - /** Trace the filesystem traversal. */ - DEBUG_SEARCH = 1 << 4, - /** Trace all stat() calls. */ - DEBUG_STAT = 1 << 5, - /** Print the parse tree. */ - DEBUG_TREE = 1 << 6, - /** All debug flags. */ - DEBUG_ALL = (1 << 7) - 1, -}; - -/** - * Convert a debug flag to a string. - */ -const char *debug_flag_name(enum debug_flags flag); +struct CFILE; /** * The execution context for bfs. @@ -50,13 +29,22 @@ struct bfs_ctx { size_t argc; /** The unparsed command line arguments. */ char **argv; + /** The argument token kinds. */ + enum bfs_kind *kinds; /** The root paths. */ const char **paths; + /** The number of root paths. */ + size_t npaths; + /** The main command line expression. */ struct bfs_expr *expr; /** An expression for files to filter out. */ struct bfs_expr *exclude; + /** A list of allocated expressions. */ + struct bfs_exprs expr_list; + /** bfs_expr arena. */ + struct arena expr_arena; /** -mindepth option. */ int mindepth; @@ -82,11 +70,18 @@ struct bfs_ctx { bool status; /** Whether to only return unique files (-unique). */ bool unique; - /** Whether to print warnings (-warn/-nowarn). */ - bool warn; /** Whether to only handle paths with xargs-safe characters (-X). */ bool xargs_safe; + /** Whether bfs was run interactively. */ + bool interactive; + /** Whether to print warnings (-warn/-nowarn). */ + bool warn; + /** Whether to report errors (-noerror). */ + bool ignore_errors; + /** Whether any dangerous actions (-delete/-exec) are present. */ + bool dangerous; + /** Color data. */ struct colors *colors; /** The error that occurred parsing the color table, if any. */ @@ -113,10 +108,15 @@ struct bfs_ctx { /** The number of files owned by the context. */ int nfiles; - /** The initial RLIMIT_NOFILE soft limit. */ - rlim_t nofile_soft; - /** The initial RLIMIT_NOFILE hard limit. */ - rlim_t nofile_hard; + /** The current file creation mask. */ + mode_t umask; + + /** The initial RLIMIT_NOFILE limits. */ + struct rlimit orig_nofile; + /** The current RLIMIT_NOFILE limits. */ + struct rlimit cur_nofile; + /** Whether the fd limit should be raised. */ + bool raise_nofile; /** The current time. */ struct timespec now; @@ -131,7 +131,7 @@ struct bfs_ctx *bfs_ctx_new(void); /** * Get the mount table. * - * @param ctx + * @ctx * The bfs context. * @return * The cached mount table, or NULL on failure. @@ -141,11 +141,11 @@ const struct bfs_mtab *bfs_ctx_mtab(const struct bfs_ctx *ctx); /** * Deduplicate an opened file. * - * @param ctx + * @ctx * The bfs context. - * @param cfile + * @cfile * The opened file. - * @param path + * @path * The path to the opened file (or NULL for standard streams). * @return * If the same file was opened previously, that file is returned. If cfile is a new file, @@ -156,7 +156,7 @@ struct CFILE *bfs_ctx_dedup(struct bfs_ctx *ctx, struct CFILE *cfile, const char /** * Flush any caches for consistency with external processes. * - * @param ctx + * @ctx * The bfs context. */ void bfs_ctx_flush(const struct bfs_ctx *ctx); @@ -164,9 +164,9 @@ void bfs_ctx_flush(const struct bfs_ctx *ctx); /** * Dump the parsed command line. * - * @param ctx + * @ctx * The bfs context. - * @param flag + * @flag * The -D flag that triggered the dump. */ void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag); @@ -174,7 +174,7 @@ void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag); /** * Free a bfs context. * - * @param ctx + * @ctx * The context to free. * @return * 0 on success, -1 if any errors occurred. diff --git a/src/darray.c b/src/darray.c deleted file mode 100644 index 42b8397..0000000 --- a/src/darray.c +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright © Tavian Barnes <tavianator@tavianator.com> -// SPDX-License-Identifier: 0BSD - -#include "darray.h" -#include <stdlib.h> -#include <string.h> - -/** - * The darray header. - */ -struct darray { - /** The current capacity of the array, as a count of elements. */ - size_t capacity; - /** The current length of the array. */ - size_t length; - - // The array elements are stored after this header in memory. Not using - // a flexible array member to avoid worrying about strict aliasing. We - // assume that 2*sizeof(size_t) keeps any memory allocation suitably - // aligned for the element type. -}; - -/** Get the header for a darray. */ -static struct darray *darray_header(const void *da) { - return (struct darray *)da - 1; -} - -/** Get the array from a darray header. */ -static char *darray_data(struct darray *header) { - return (char *)(header + 1); -} - -size_t darray_length(const void *da) { - if (da) { - return darray_header(da)->length; - } else { - return 0; - } -} - -void *darray_push(void *da, const void *item, size_t size) { - struct darray *header; - if (da) { - header = darray_header(da); - } else { - header = malloc(sizeof(*header) + size); - if (!header) { - return NULL; - } - header->capacity = 1; - header->length = 0; - } - - size_t capacity = header->capacity; - size_t i = header->length++; - if (i >= capacity) { - capacity *= 2; - header = realloc(header, sizeof(*header) + capacity*size); - if (!header) { - // This failure will be detected by darray_check() - return da; - } - header->capacity = capacity; - } - - char *data = darray_data(header); - memcpy(data + i*size, item, size); - return data; -} - -int darray_check(void *da) { - if (!da) { - return -1; - } - - struct darray *header = darray_header(da); - if (header->length <= header->capacity) { - return 0; - } else { - // realloc() failed in darray_push(), so reset the length and report the failure - header->length = header->capacity; - return -1; - } -} - -void darray_free(void *da) { - if (da) { - free(darray_header(da)); - } -} diff --git a/src/darray.h b/src/darray.h deleted file mode 100644 index cc6cc42..0000000 --- a/src/darray.h +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright © Tavian Barnes <tavianator@tavianator.com> -// SPDX-License-Identifier: 0BSD - -/** - * A dynamic array library. - * - * darrays are represented by a simple pointer to the array element type, like - * any other array. Behind the scenes, the capacity and current length of the - * array are stored along with it. NULL is a valid way to initialize an empty - * darray: - * - * int *darray = NULL; - * - * To append an element to a darray, use the DARRAY_PUSH macro: - * - * int e = 42; - * if (DARRAY_PUSH(&darray, &e) != 0) { - * // Report the error... - * } - * - * The length can be retrieved by darray_length(). Iterating over the array - * works like normal arrays: - * - * for (size_t i = 0; i < darray_length(darray); ++i) { - * printf("%d\n", darray[i]); - * } - * - * To free a darray, use darray_free(): - * - * darray_free(darray); - */ - -#ifndef BFS_DARRAY_H -#define BFS_DARRAY_H - -#include <stddef.h> - -/** - * Get the length of a darray. - * - * @param da - * The array in question. - * @return - * The length of the array. - */ -size_t darray_length(const void *da); - -/** - * @internal Use DARRAY_PUSH(). - * - * Push an element into a darray. - * - * @param da - * The array to append to. - * @param item - * The item to append. - * @param size - * The size of the item. - * @return - * The (new) location of the array. - */ -void *darray_push(void *da, const void *item, size_t size); - -/** - * @internal Use DARRAY_PUSH(). - * - * Check if the last darray_push() call failed. - * - * @param da - * The darray to check. - * @return - * 0 on success, -1 on failure. - */ -int darray_check(void *da); - -/** - * Free a darray. - * - * @param da - * The darray to free. - */ -void darray_free(void *da); - -/** - * Push an item into a darray. - * - * @param da - * The array to append to. - * @param item - * A pointer to the item to append. - * @return - * 0 on success, -1 on failure. - */ -#define DARRAY_PUSH(da, item) \ - (darray_check(*(da) = darray_push(*(da), (item), sizeof(**(da) = *(item))))) - -#endif // BFS_DARRAY_H @@ -2,43 +2,84 @@ // SPDX-License-Identifier: 0BSD #include "diag.h" + #include "alloc.h" +#include "bfs.h" #include "bfstd.h" -#include "ctx.h" #include "color.h" -#include "config.h" +#include "ctx.h" #include "dstring.h" #include "expr.h" -#include <errno.h> + #include <stdarg.h> +#include <stdio.h> #include <stdlib.h> -#include <string.h> - -noreturn void bfs_abortf(const struct bfs_loc *loc, const char *format, ...) { - fprintf(stderr, "%s: %s@%s:%d: ", xgetprogname(), loc->func, loc->file, loc->line); - +#include <unistd.h> + +/** + * Print an error using dprintf() if possible, because it's more likely to be + * async-signal-safe in practice. + */ +#if BFS_HAS_DPRINTF +# define veprintf(...) vdprintf(STDERR_FILENO, __VA_ARGS__) +#else +# define veprintf(...) vfprintf(stderr, __VA_ARGS__) +#endif + +void bfs_diagf(const char *format, ...) { va_list args; va_start(args, format); - vfprintf(stderr, format, args); + veprintf(format, args); va_end(args); +} - fprintf(stderr, "\n"); +_noreturn +void bfs_abortf(const char *format, ...) { + va_list args; + va_start(args, format); + veprintf(format, args); + va_end(args); abort(); } +const char *debug_flag_name(enum debug_flags flag) { + switch (flag) { + case DEBUG_COST: + return "cost"; + case DEBUG_EXEC: + return "exec"; + case DEBUG_OPT: + return "opt"; + case DEBUG_RATES: + return "rates"; + case DEBUG_SEARCH: + return "search"; + case DEBUG_STAT: + return "stat"; + case DEBUG_TREE: + return "tree"; + + case DEBUG_ALL: + break; + } + + bfs_bug("Unrecognized debug flag"); + return "???"; +} + void bfs_perror(const struct bfs_ctx *ctx, const char *str) { - bfs_error(ctx, "%s: %m.\n", str); + bfs_error(ctx, "%s: %s.\n", str, errstr()); } -void bfs_error(const struct bfs_ctx *ctx, const char *format, ...) { +void bfs_error(const struct bfs_ctx *ctx, const char *format, ...) { va_list args; va_start(args, format); bfs_verror(ctx, format, args); va_end(args); } -bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...) { +bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...) { va_list args; va_start(args, format); bool ret = bfs_vwarning(ctx, format, args); @@ -46,7 +87,7 @@ bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...) { return ret; } -bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...) { +bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...) { va_list args; va_start(args, format); bool ret = bfs_vdebug(ctx, flag, format, args); @@ -55,19 +96,12 @@ bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *for } void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args) { - int error = errno; - bfs_error_prefix(ctx); - - errno = error; cvfprintf(ctx->cerr, format, args); } bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args) { - int error = errno; - if (bfs_warning_prefix(ctx)) { - errno = error; cvfprintf(ctx->cerr, format, args); return true; } else { @@ -76,10 +110,7 @@ bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args) { } bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args) { - int error = errno; - if (bfs_debug_prefix(ctx, flag)) { - errno = error; cvfprintf(ctx->cerr, format, args); return true; } else { @@ -133,9 +164,8 @@ static bool highlight_expr_recursive(const struct bfs_ctx *ctx, const struct bfs } } - if (bfs_expr_is_parent(expr)) { - ret |= highlight_expr_recursive(ctx, expr->lhs, args); - ret |= highlight_expr_recursive(ctx, expr->rhs, args); + for_expr (child, expr) { + ret |= highlight_expr_recursive(ctx, child, args); } return ret; @@ -158,7 +188,7 @@ static void bfs_argv_diag(const struct bfs_ctx *ctx, const bool args[], bool war bfs_error_prefix(ctx); } - char **argv = ZALLOC_ARRAY(char *, ctx->argc); + dchar **argv = ZALLOC_ARRAY(dchar *, ctx->argc); if (!argv) { return; } @@ -8,93 +8,177 @@ #ifndef BFS_DIAG_H #define BFS_DIAG_H -#include "ctx.h" -#include "config.h" +#include "bfs.h" +#include "bfstd.h" + #include <stdarg.h> /** - * static_assert() with an optional second argument. + * Wrap a diagnostic format string so it looks like + * + * bfs: func@src/file.c:0: Message */ -#if __STDC_VERSION__ >= 202311L -# define bfs_static_assert static_assert -#else -# define bfs_static_assert(...) bfs_static_assert_(__VA_ARGS__, #__VA_ARGS__, ) -# define bfs_static_assert_(expr, msg, ...) _Static_assert(expr, msg) -#endif +#define BFS_DIAG_FORMAT_(format) \ + ((format) ? "%s: %s@%s:%d: " format "%s" : "") /** - * A source code location. + * Add arguments to match a BFS_DIAG_FORMAT string. */ -struct bfs_loc { - const char *file; - int line; - const char *func; -}; +#define BFS_DIAG_ARGS_(...) \ + xgetprogname(), __func__, __FILE__, __LINE__, __VA_ARGS__ "\n" -#define BFS_LOC_INIT { .file = __FILE__, .line = __LINE__, .func = __func__ } +/** + * Print a low-level diagnostic message to standard error. + */ +_printf(1, 2) +void bfs_diagf(const char *format, ...); /** - * Get the current source code location. + * Unconditional diagnostic message. */ -#if __STDC_VERSION__ >= 202311L -# define bfs_location() (&(static const struct bfs_loc)BFS_LOC_INIT) -#else -# define bfs_location() (&(const struct bfs_loc)BFS_LOC_INIT) -#endif +#define bfs_diag(...) \ + bfs_diag_(__VA_ARGS__, ) + +#define bfs_diag_(format, ...) \ + bfs_diagf(BFS_DIAG_FORMAT_(format), BFS_DIAG_ARGS_(__VA_ARGS__)) + +/** + * Print a diagnostic message including the last error. + */ +#define bfs_ediag(...) \ + bfs_ediag_(__VA_ARGS__, ) + +#define bfs_ediag_(format, ...) \ + bfs_diag_(format "%s%s", __VA_ARGS__ (sizeof("" format) > 1 ? ": " : ""), errstr(), ) /** * Print a message to standard error and abort. */ -BFS_FORMATTER(2, 3) -noreturn void bfs_abortf(const struct bfs_loc *loc, const char *format, ...); +_cold +_printf(1, 2) +_noreturn +void bfs_abortf(const char *format, ...); /** * Unconditional abort with a message. */ -#define bfs_abort(...) bfs_abortf(bfs_location(), __VA_ARGS__) +#define bfs_abort(...) \ + bfs_abort_(__VA_ARGS__, ) + +#define bfs_abort_(format, ...) \ + bfs_abortf(BFS_DIAG_FORMAT_(format), BFS_DIAG_ARGS_(__VA_ARGS__)) + +/** + * Abort with a message including the last error. + */ +#define bfs_eabort(...) \ + bfs_eabort_(__VA_ARGS__, ) + +#define bfs_eabort_(format, ...) \ + ((format) ? bfs_abort_(format ": %s", __VA_ARGS__ errstr(), ) : (void)0) /** * Abort in debug builds; no-op in release builds. */ #ifdef NDEBUG # define bfs_bug(...) ((void)0) +# define bfs_ebug(...) ((void)0) #else # define bfs_bug bfs_abort +# define bfs_ebug bfs_eabort #endif /** + * Get the default assertion message, if no format string was specified. + */ +#define BFS_DIAG_MSG_(format, str) \ + (sizeof(format) > 1 ? "" : str) + +/** * Unconditional assert. */ #define bfs_verify(...) \ - bfs_verify_(#__VA_ARGS__, __VA_ARGS__, "", "") + bfs_verify_(#__VA_ARGS__, __VA_ARGS__, "", ) #define bfs_verify_(str, cond, format, ...) \ - ((cond) ? (void)0 : bfs_abort( \ + ((cond) ? (void)0 : bfs_verify__(format, BFS_DIAG_MSG_(format, str), __VA_ARGS__)) + +#define bfs_verify__(format, ...) \ + bfs_abortf( \ + sizeof(format) > 1 \ + ? BFS_DIAG_FORMAT_("%s" format "%s") \ + : BFS_DIAG_FORMAT_("Assertion failed: `%s`"), \ + BFS_DIAG_ARGS_(__VA_ARGS__)) + +/** + * Unconditional assert, including the last error. + */ +#define bfs_everify(...) \ + bfs_everify_(#__VA_ARGS__, __VA_ARGS__, "", ) + + +#define bfs_everify_(str, cond, format, ...) \ + ((cond) ? (void)0 : bfs_everify__(format, BFS_DIAG_MSG_(format, str), __VA_ARGS__)) + +#define bfs_everify__(format, ...) \ + bfs_abortf( \ sizeof(format) > 1 \ - ? "%.0s" format "%s%s" \ - : "Assertion failed: `%s`%s", \ - str, __VA_ARGS__)) + ? BFS_DIAG_FORMAT_("%s" format "%s: %s") \ + : BFS_DIAG_FORMAT_("Assertion failed: `%s`: %s"), \ + BFS_DIAG_ARGS_(__VA_ARGS__ errstr(), )) /** * Assert in debug builds; no-op in release builds. */ #ifdef NDEBUG # define bfs_assert(...) ((void)0) +# define bfs_eassert(...) ((void)0) #else # define bfs_assert bfs_verify +# define bfs_eassert bfs_everify #endif +struct bfs_ctx; struct bfs_expr; /** + * Various debugging flags. + */ +enum debug_flags { + /** Print cost estimates. */ + DEBUG_COST = 1 << 0, + /** Print executed command details. */ + DEBUG_EXEC = 1 << 1, + /** Print optimization details. */ + DEBUG_OPT = 1 << 2, + /** Print rate information. */ + DEBUG_RATES = 1 << 3, + /** Trace the filesystem traversal. */ + DEBUG_SEARCH = 1 << 4, + /** Trace all stat() calls. */ + DEBUG_STAT = 1 << 5, + /** Print the parse tree. */ + DEBUG_TREE = 1 << 6, + /** All debug flags. */ + DEBUG_ALL = (1 << 7) - 1, +}; + +/** + * Convert a debug flag to a string. + */ +const char *debug_flag_name(enum debug_flags flag); + +/** * Like perror(), but decorated like bfs_error(). */ +_cold void bfs_perror(const struct bfs_ctx *ctx, const char *str); /** * Shorthand for printing error messages. */ -BFS_FORMATTER(2, 3) +_cold +_printf(2, 3) void bfs_error(const struct bfs_ctx *ctx, const char *format, ...); /** @@ -102,7 +186,8 @@ void bfs_error(const struct bfs_ctx *ctx, const char *format, ...); * * @return Whether a warning was printed. */ -BFS_FORMATTER(2, 3) +_cold +_printf(2, 3) bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...); /** @@ -110,60 +195,71 @@ bool bfs_warning(const struct bfs_ctx *ctx, const char *format, ...); * * @return Whether a debug message was printed. */ -BFS_FORMATTER(3, 4) +_cold +_printf(3, 4) bool bfs_debug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, ...); /** * bfs_error() variant that takes a va_list. */ -BFS_FORMATTER(2, 0) +_cold +_printf(2, 0) void bfs_verror(const struct bfs_ctx *ctx, const char *format, va_list args); /** * bfs_warning() variant that takes a va_list. */ -BFS_FORMATTER(2, 0) +_cold +_printf(2, 0) bool bfs_vwarning(const struct bfs_ctx *ctx, const char *format, va_list args); /** * bfs_debug() variant that takes a va_list. */ -BFS_FORMATTER(3, 0) +_cold +_printf(3, 0) bool bfs_vdebug(const struct bfs_ctx *ctx, enum debug_flags flag, const char *format, va_list args); /** * Print the error message prefix. */ +_cold void bfs_error_prefix(const struct bfs_ctx *ctx); /** * Print the warning message prefix. */ +_cold bool bfs_warning_prefix(const struct bfs_ctx *ctx); /** * Print the debug message prefix. */ +_cold bool bfs_debug_prefix(const struct bfs_ctx *ctx, enum debug_flags flag); /** * Highlight parts of the command line in an error message. */ +_cold void bfs_argv_error(const struct bfs_ctx *ctx, const bool args[]); /** * Highlight parts of an expression in an error message. */ +_cold void bfs_expr_error(const struct bfs_ctx *ctx, const struct bfs_expr *expr); /** * Highlight parts of the command line in a warning message. */ +_cold bool bfs_argv_warning(const struct bfs_ctx *ctx, const bool args[]); /** * Highlight parts of an expression in a warning message. */ +_cold bool bfs_expr_warning(const struct bfs_ctx *ctx, const struct bfs_expr *expr); #endif // BFS_DIAG_H @@ -2,11 +2,14 @@ // SPDX-License-Identifier: 0BSD #include "dir.h" + #include "alloc.h" +#include "bfs.h" #include "bfstd.h" -#include "config.h" #include "diag.h" #include "sanity.h" +#include "trie.h" + #include <dirent.h> #include <errno.h> #include <fcntl.h> @@ -16,7 +19,7 @@ #include <unistd.h> #if BFS_USE_GETDENTS -# if __linux__ +# if BFS_HAS_GETDENTS64_SYSCALL # include <sys/syscall.h> # endif @@ -24,12 +27,20 @@ static ssize_t bfs_getdents(int fd, void *buf, size_t size) { sanitize_uninit(buf, size); -#if __linux__ && __GLIBC__ && !__GLIBC_PREREQ(2, 30) - ssize_t ret = syscall(SYS_getdents64, fd, buf, size); -#elif __linux__ +#if BFS_HAS_POSIX_GETDENTS + int flags = 0; +# ifdef DT_FORCE_TYPE + flags |= DT_FORCE_TYPE; +# endif + ssize_t ret = posix_getdents(fd, buf, size, flags); +#elif BFS_HAS_GETDENTS + ssize_t ret = getdents(fd, buf, size); +#elif BFS_HAS_GETDENTS64 ssize_t ret = getdents64(fd, buf, size); +#elif BFS_HAS_GETDENTS64_SYSCALL + ssize_t ret = syscall(SYS_getdents64, fd, buf, size); #else - ssize_t ret = getdents(fd, buf, size); +# error "No getdents() implementation" #endif if (ret > 0) { @@ -41,11 +52,13 @@ static ssize_t bfs_getdents(int fd, void *buf, size_t size) { #endif // BFS_USE_GETDENTS -#if BFS_USE_GETDENTS && __linux__ /** Directory entry type for bfs_getdents() */ -typedef struct dirent64 sys_dirent; -#else +#if !BFS_USE_GETDENTS || BFS_HAS_GETDENTS typedef struct dirent sys_dirent; +#elif BFS_HAS_POSIX_GETDENTS +typedef struct posix_dent sys_dirent; +#else +typedef struct dirent64 sys_dirent; #endif enum bfs_type bfs_mode_to_type(mode_t mode) { @@ -96,18 +109,31 @@ enum bfs_type bfs_mode_to_type(mode_t mode) { } } +/** + * Private directory flags. + */ +enum { + /** We've reached the end of the directory. */ + BFS_DIR_EOF = BFS_DIR_PRIVATE << 0, + /** This directory is a union mount we need to dedup manually. */ + BFS_DIR_UNION = BFS_DIR_PRIVATE << 1, +}; + struct bfs_dir { + unsigned int flags; + #if BFS_USE_GETDENTS - alignas(sys_dirent) int fd; + int fd; unsigned short pos; unsigned short size; - // sys_dirent buf[]; +# if __FreeBSD__ + struct trie trie; +# endif + alignas(sys_dirent) char buf[]; #else DIR *dir; struct dirent *de; #endif - - bool eof; }; #if BFS_USE_GETDENTS @@ -125,7 +151,7 @@ void bfs_dir_arena(struct arena *arena) { arena_init(arena, alignof(struct bfs_dir), DIR_SIZE); } -int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path) { +int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path, enum bfs_dir_flags flags) { int fd; if (at_path) { fd = openat(at_fd, at_path, O_RDONLY | O_CLOEXEC | O_DIRECTORY); @@ -139,11 +165,20 @@ int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path) { return -1; } + dir->flags = flags; + #if BFS_USE_GETDENTS dir->fd = fd; dir->pos = 0; dir->size = 0; -#else + +# if __FreeBSD__ && defined(F_ISUNIONSTACK) + if (fcntl(fd, F_ISUNIONSTACK) > 0) { + dir->flags |= BFS_DIR_UNION; + trie_init(&dir->trie); + } +# endif +#else // !BFS_USE_GETDENTS dir->dir = fdopendir(fd); if (!dir->dir) { if (at_path) { @@ -154,7 +189,6 @@ int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path) { dir->de = NULL; #endif - dir->eof = false; return 0; } @@ -170,14 +204,14 @@ int bfs_polldir(struct bfs_dir *dir) { #if BFS_USE_GETDENTS if (dir->pos < dir->size) { return 1; - } else if (dir->eof) { + } else if (dir->flags & BFS_DIR_EOF) { return 0; } char *buf = (char *)(dir + 1); ssize_t size = bfs_getdents(dir->fd, buf, BUF_SIZE); if (size == 0) { - dir->eof = true; + dir->flags |= BFS_DIR_EOF; return 0; } else if (size < 0) { return -1; @@ -194,7 +228,7 @@ int bfs_polldir(struct bfs_dir *dir) { if (size > 0) { dir->size += size; } else if (size == 0) { - dir->eof = true; + dir->flags |= BFS_DIR_EOF; } } @@ -202,7 +236,7 @@ int bfs_polldir(struct bfs_dir *dir) { #else // !BFS_USE_GETDENTS if (dir->de) { return 1; - } else if (dir->eof) { + } else if (dir->flags & BFS_DIR_EOF) { return 0; } @@ -211,7 +245,7 @@ int bfs_polldir(struct bfs_dir *dir) { if (dir->de) { return 1; } else if (errno == 0) { - dir->eof = true; + dir->flags |= BFS_DIR_EOF; return 0; } else { return -1; @@ -236,13 +270,33 @@ static int bfs_getdent(struct bfs_dir *dir, const sys_dirent **de) { } /** Skip ".", "..", and deleted/empty dirents. */ -static bool skip_dirent(const sys_dirent *de) { -#if __FreeBSD__ +static int bfs_skipdent(struct bfs_dir *dir, const sys_dirent *de) { +#if BFS_USE_GETDENTS +# if __FreeBSD__ + // Union mounts on FreeBSD have to be de-duplicated in userspace + if (dir->flags & BFS_DIR_UNION) { + struct trie_leaf *leaf = trie_insert_str(&dir->trie, de->d_name); + if (!leaf) { + return -1; + } else if (leaf->value) { + return 1; + } else { + leaf->value = leaf; + } + } + // NFS mounts on FreeBSD can return empty dirents with inode number 0 if (de->d_ino == 0) { - return true; + return 1; } -#endif +# endif + +# ifdef DT_WHT + if (de->d_type == DT_WHT && !(dir->flags & BFS_DIR_WHITEOUTS)) { + return 1; + } +# endif +#endif // BFS_USE_GETDENTS const char *name = de->d_name; return name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); @@ -265,7 +319,10 @@ int bfs_readdir(struct bfs_dir *dir, struct bfs_dirent *de) { return ret; } - if (skip_dirent(sysde)) { + int skip = bfs_skipdent(dir, sysde); + if (skip < 0) { + return skip; + } else if (skip) { continue; } @@ -278,6 +335,16 @@ int bfs_readdir(struct bfs_dir *dir, struct bfs_dirent *de) { } } +static void bfs_destroydir(struct bfs_dir *dir) { +#if BFS_USE_GETDENTS && __FreeBSD__ + if (dir->flags & BFS_DIR_UNION) { + trie_destroy(&dir->trie); + } +#endif + + sanitize_uninit(dir, DIR_SIZE); +} + int bfs_closedir(struct bfs_dir *dir) { #if BFS_USE_GETDENTS int ret = xclose(dir->fd); @@ -288,7 +355,7 @@ int bfs_closedir(struct bfs_dir *dir) { } #endif - sanitize_uninit(dir, DIR_SIZE); + bfs_destroydir(dir); return ret; } @@ -296,11 +363,11 @@ int bfs_closedir(struct bfs_dir *dir) { int bfs_unwrapdir(struct bfs_dir *dir) { #if BFS_USE_GETDENTS int ret = dir->fd; -#elif __FreeBSD__ +#elif BFS_HAS_FDCLOSEDIR int ret = fdclosedir(dir->dir); #endif - sanitize_uninit(dir, DIR_SIZE); + bfs_destroydir(dir); return ret; } #endif @@ -8,8 +8,8 @@ #ifndef BFS_DIR_H #define BFS_DIR_H -#include "alloc.h" -#include "config.h" +#include "bfs.h" + #include <sys/types.h> /** @@ -17,7 +17,13 @@ * libc's readdir(). */ #ifndef BFS_USE_GETDENTS -# define BFS_USE_GETDENTS (__linux__ || __FreeBSD__) +# if BFS_HAS_POSIX_GETDENTS +# define BFS_USE_GETDENTS true +# elif __linux__ || __FreeBSD__ +# define BFS_USE_GETDENTS (BFS_HAS_GETDENTS || BFS_HAS_GETDENTS64 | BFS_HAS_GETDENTS64_SYSCALL) +# else +# define BFS_USE_GETDENTS false +# endif #endif /** @@ -78,28 +84,42 @@ struct bfs_dirent { */ struct bfs_dir *bfs_allocdir(void); +struct arena; + /** * Initialize an arena for directories. * - * @param arena + * @arena * The arena to initialize. */ void bfs_dir_arena(struct arena *arena); /** + * bfs_opendir() flags. + */ +enum bfs_dir_flags { + /** Include whiteouts in the results. */ + BFS_DIR_WHITEOUTS = 1 << 0, + /** @internal Start of private flags. */ + BFS_DIR_PRIVATE = 1 << 1, +}; + +/** * Open a directory. * - * @param dir + * @dir * The allocated directory. - * @param at_fd + * @at_fd * The base directory for path resolution. - * @param at_path + * @at_path * The path of the directory to open, relative to at_fd. Pass NULL to * open at_fd itself. + * @flags + * Flags that control which directory entries are listed. * @return * 0 on success, or -1 on failure. */ -int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path); +int bfs_opendir(struct bfs_dir *dir, int at_fd, const char *at_path, enum bfs_dir_flags flags); /** * Get the file descriptor for a directory. @@ -109,7 +129,7 @@ int bfs_dirfd(const struct bfs_dir *dir); /** * Performs any I/O necessary for the next bfs_readdir() call. * - * @param dir + * @dir * The directory to poll. * @return * 1 on success, 0 on EOF, or -1 on failure. @@ -119,9 +139,9 @@ int bfs_polldir(struct bfs_dir *dir); /** * Read a directory entry. * - * @param dir + * @dir * The directory to read. - * @param[out] dirent + * @dirent[out] * The directory entry to populate. * @return * 1 on success, 0 on EOF, or -1 on failure. @@ -140,14 +160,14 @@ int bfs_closedir(struct bfs_dir *dir); * Whether the bfs_unwrapdir() function is supported. */ #ifndef BFS_USE_UNWRAPDIR -# define BFS_USE_UNWRAPDIR (BFS_USE_GETDENTS || __FreeBSD__) +# define BFS_USE_UNWRAPDIR (BFS_USE_GETDENTS || BFS_HAS_FDCLOSEDIR) #endif #if BFS_USE_UNWRAPDIR /** * Detach the file descriptor from an open directory. * - * @param dir + * @dir * The directory to detach. * @return * The file descriptor of the directory. diff --git a/src/dstring.c b/src/dstring.c index 60a7df9..0f08679 100644 --- a/src/dstring.c +++ b/src/dstring.c @@ -2,124 +2,154 @@ // SPDX-License-Identifier: 0BSD #include "dstring.h" + #include "alloc.h" #include "bit.h" #include "diag.h" + #include <stdarg.h> +#include <stddef.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> /** - * The memory representation of a dynamic string. Users get a pointer to data. + * The memory representation of a dynamic string. Users get a pointer to str. */ struct dstring { - size_t capacity; - size_t length; - char data[]; + /** Capacity of the string, *including* the terminating NUL. */ + size_t cap; + /** Length of the string, *excluding* the terminating NUL. */ + size_t len; + /** The string itself. */ + alignas(dchar) char str[]; }; -/** Get the string header from the string data pointer. */ -static struct dstring *dstrheader(const char *dstr) { - return (struct dstring *)(dstr - offsetof(struct dstring, data)); +#define DSTR_OFFSET offsetof(struct dstring, str) + +/** Back up to the header from a pointer to dstring::str. */ +static struct dstring *dstrheader(const dchar *dstr) { + return (struct dstring *)(dstr - DSTR_OFFSET); +} + +/** + * In some provenance models, the expression `header->str` has its provenance + * restricted to just the `str` field itself, making a future dstrheader() + * illegal. This alternative is guaranteed to preserve provenance for the entire + * allocation. + * + * - https://stackoverflow.com/q/25296019 + * - https://mastodon.social/@void_friend@tech.lgbt/111144859908104311 + */ +static dchar *dstrdata(struct dstring *header) { + return (char *)header + DSTR_OFFSET; } -/** Get the correct size for a dstring with the given capacity. */ -static size_t dstrsize(size_t capacity) { - return sizeof_flex(struct dstring, data, capacity + 1); +/** Set the length of a dynamic string. */ +static void dstrsetlen(struct dstring *header, size_t len) { + bfs_assert(len < header->cap); + header->len = len; + header->str[len] = '\0'; } /** Allocate a dstring with the given contents. */ -static char *dstralloc_impl(size_t capacity, size_t length, const char *data) { +static dchar *dstralloc_impl(size_t cap, size_t len, const char *str) { // Avoid reallocations for small strings - if (capacity < 7) { - capacity = 7; + if (cap < DSTR_OFFSET) { + cap = DSTR_OFFSET; } - struct dstring *header = malloc(dstrsize(capacity)); + struct dstring *header = ALLOC_FLEX(struct dstring, str, cap); if (!header) { return NULL; } - header->capacity = capacity; - header->length = length; + header->cap = cap; + dstrsetlen(header, len); - memcpy(header->data, data, length); - header->data[length] = '\0'; - return header->data; + dchar *ret = dstrdata(header); + memcpy(ret, str, len); + return ret; } -char *dstralloc(size_t capacity) { - return dstralloc_impl(capacity, 0, ""); +dchar *dstralloc(size_t cap) { + return dstralloc_impl(cap + 1, 0, ""); } -char *dstrdup(const char *str) { +dchar *dstrdup(const char *str) { return dstrxdup(str, strlen(str)); } -char *dstrndup(const char *str, size_t n) { +dchar *dstrndup(const char *str, size_t n) { return dstrxdup(str, strnlen(str, n)); } -char *dstrddup(const char *dstr) { +dchar *dstrddup(const dchar *dstr) { return dstrxdup(dstr, dstrlen(dstr)); } -char *dstrxdup(const char *str, size_t len) { - return dstralloc_impl(len, len, str); +dchar *dstrxdup(const char *str, size_t len) { + return dstralloc_impl(len + 1, len, str); } -size_t dstrlen(const char *dstr) { - return dstrheader(dstr)->length; +size_t dstrlen(const dchar *dstr) { + return dstrheader(dstr)->len; } -int dstreserve(char **dstr, size_t capacity) { +int dstreserve(dchar **dstr, size_t cap) { if (!*dstr) { - *dstr = dstralloc(capacity); + *dstr = dstralloc(cap); return *dstr ? 0 : -1; } struct dstring *header = dstrheader(*dstr); + size_t old_cap = header->cap; + size_t new_cap = cap + 1; // Terminating NUL + if (old_cap >= new_cap) { + return 0; + } - if (capacity > header->capacity) { - capacity = bit_ceil(capacity + 1) - 1; - - header = realloc(header, dstrsize(capacity)); - if (!header) { - return -1; - } - header->capacity = capacity; - - *dstr = header->data; + new_cap = bit_ceil(new_cap); + header = REALLOC_FLEX(struct dstring, str, header, old_cap, new_cap); + if (!header) { + return -1; } + header->cap = new_cap; + *dstr = dstrdata(header); return 0; } -int dstresize(char **dstr, size_t length) { - if (dstreserve(dstr, length) != 0) { +int dstresize(dchar **dstr, size_t len) { + if (dstreserve(dstr, len) != 0) { return -1; } struct dstring *header = dstrheader(*dstr); - header->length = length; - header->data[length] = '\0'; + dstrsetlen(header, len); return 0; } -int dstrcat(char **dest, const char *src) { +void dstrshrink(dchar *dstr, size_t len) { + struct dstring *header = dstrheader(dstr); + bfs_assert(len <= header->len); + dstrsetlen(header, len); +} + +int dstrcat(dchar **dest, const char *src) { return dstrxcat(dest, src, strlen(src)); } -int dstrncat(char **dest, const char *src, size_t n) { +int dstrncat(dchar **dest, const char *src, size_t n) { return dstrxcat(dest, src, strnlen(src, n)); } -int dstrdcat(char **dest, const char *src) { +int dstrdcat(dchar **dest, const dchar *src) { return dstrxcat(dest, src, dstrlen(src)); } -int dstrxcat(char **dest, const char *src, size_t len) { +int dstrxcat(dchar **dest, const char *src, size_t len) { size_t oldlen = dstrlen(*dest); size_t newlen = oldlen + len; @@ -131,23 +161,23 @@ int dstrxcat(char **dest, const char *src, size_t len) { return 0; } -int dstrapp(char **str, char c) { +int dstrapp(dchar **str, char c) { return dstrxcat(str, &c, 1); } -int dstrcpy(char **dest, const char *src) { +int dstrcpy(dchar **dest, const char *src) { return dstrxcpy(dest, src, strlen(src)); } -int dstrncpy(char **dest, const char *src, size_t n) { +int dstrncpy(dchar **dest, const char *src, size_t n) { return dstrxcpy(dest, src, strnlen(src, n)); } -int dstrdcpy(char **dest, const char *src) { +int dstrdcpy(dchar **dest, const dchar *src) { return dstrxcpy(dest, src, dstrlen(src)); } -int dstrxcpy(char **dest, const char *src, size_t len) { +int dstrxcpy(dchar **dest, const char *src, size_t len) { if (dstresize(dest, len) != 0) { return -1; } @@ -156,19 +186,19 @@ int dstrxcpy(char **dest, const char *src, size_t len) { return 0; } -char *dstrprintf(const char *format, ...) { +dchar *dstrprintf(const char *format, ...) { va_list args; va_start(args, format); - char *str = dstrvprintf(format, args); + dchar *str = dstrvprintf(format, args); va_end(args); return str; } -char *dstrvprintf(const char *format, va_list args) { +dchar *dstrvprintf(const char *format, va_list args) { // Guess a capacity to try to avoid reallocating - char *str = dstralloc(2*strlen(format)); + dchar *str = dstralloc(2 * strlen(format)); if (!str) { return NULL; } @@ -181,7 +211,7 @@ char *dstrvprintf(const char *format, va_list args) { return str; } -int dstrcatf(char **str, const char *format, ...) { +int dstrcatf(dchar **str, const char *format, ...) { va_list args; va_start(args, format); @@ -191,25 +221,25 @@ int dstrcatf(char **str, const char *format, ...) { return ret; } -int dstrvcatf(char **str, const char *format, va_list args) { +int dstrvcatf(dchar **str, const char *format, va_list args) { // Guess a capacity to try to avoid calling vsnprintf() twice size_t len = dstrlen(*str); - dstreserve(str, len + 2*strlen(format)); - size_t cap = dstrheader(*str)->capacity; + dstreserve(str, len + 2 * strlen(format)); + size_t cap = dstrheader(*str)->cap; va_list copy; va_copy(copy, args); char *tail = *str + len; - int ret = vsnprintf(tail, cap - len + 1, format, args); + size_t tail_cap = cap - len; + int ret = vsnprintf(tail, tail_cap, format, args); if (ret < 0) { goto fail; } size_t tail_len = ret; - if (tail_len > cap - len) { - cap = len + tail_len; - if (dstreserve(str, cap) != 0) { + if (tail_len >= tail_cap) { + if (dstreserve(str, len + tail_len) != 0) { goto fail; } @@ -223,20 +253,20 @@ int dstrvcatf(char **str, const char *format, va_list args) { va_end(copy); - struct dstring *header = dstrheader(*str); - header->length += tail_len; + dstrheader(*str)->len += tail_len; return 0; fail: + va_end(copy); *tail = '\0'; return -1; } -int dstrescat(char **dest, const char *str, enum wesc_flags flags) { +int dstrescat(dchar **dest, const char *str, enum wesc_flags flags) { return dstrnescat(dest, str, SIZE_MAX, flags); } -int dstrnescat(char **dest, const char *str, size_t n, enum wesc_flags flags) { +int dstrnescat(dchar **dest, const char *str, size_t n, enum wesc_flags flags) { size_t len = *dest ? dstrlen(*dest) : 0; // Worst case growth is `ccc...` => $'\xCC\xCC\xCC...' @@ -254,8 +284,25 @@ int dstrnescat(char **dest, const char *str, size_t n, enum wesc_flags flags) { return dstresize(dest, cur - *dest); } -void dstrfree(char *dstr) { +void dstrfree(dchar *dstr) { if (dstr) { free(dstrheader(dstr)); } } + +dchar *dstrepeat(const char *str, size_t n) { + size_t len = strlen(str); + dchar *ret = dstralloc(n * len); + if (!ret) { + return NULL; + } + + for (size_t i = 0; i < n; ++i) { + if (dstrxcat(&ret, str, len) < 0) { + dstrfree(ret); + return NULL; + } + } + + return ret; +} diff --git a/src/dstring.h b/src/dstring.h index 88ca79f..ce7ef86 100644 --- a/src/dstring.h +++ b/src/dstring.h @@ -8,296 +8,348 @@ #ifndef BFS_DSTRING_H #define BFS_DSTRING_H +#include "bfs.h" #include "bfstd.h" -#include "config.h" + #include <stdarg.h> #include <stddef.h> +/** Marker type for dynamic strings. */ +#if BFS_LINT && __clang__ +// Abuse __attribute__(aligned) to make a type that allows +// +// dchar * -> char * +// +// conversions, but warns (with Clang's -Walign-mismatch) on +// +// char * -> dchar * +typedef __attribute__((aligned(alignof(size_t)))) char dchar; +#else +typedef char dchar; +#endif + +/** + * Free a dynamic string. + * + * @dstr + * The string to free. + */ +void dstrfree(dchar *dstr); + /** * Allocate a dynamic string. * - * @param capacity + * @cap * The initial capacity of the string. */ -char *dstralloc(size_t capacity); +_malloc(dstrfree, 1) +dchar *dstralloc(size_t cap); /** * Create a dynamic copy of a string. * - * @param str + * @str * The NUL-terminated string to copy. */ -char *dstrdup(const char *str); +_malloc(dstrfree, 1) +dchar *dstrdup(const char *str); /** * Create a length-limited dynamic copy of a string. * - * @param str + * @str * The string to copy. - * @param n + * @n * The maximum number of characters to copy from str. */ -char *dstrndup(const char *str, size_t n); +_malloc(dstrfree, 1) +dchar *dstrndup(const char *str, size_t n); /** * Create a dynamic copy of a dynamic string. * - * @param dstr + * @dstr * The dynamic string to copy. */ -char *dstrddup(const char *dstr); +_malloc(dstrfree, 1) +dchar *dstrddup(const dchar *dstr); /** * Create an exact-sized dynamic copy of a string. * - * @param str + * @str * The string to copy. - * @param len + * @len * The length of the string, which may include internal NUL bytes. */ -char *dstrxdup(const char *str, size_t len); +_malloc(dstrfree, 1) +dchar *dstrxdup(const char *str, size_t len); /** * Get a dynamic string's length. * - * @param dstr + * @dstr * The string to measure. * @return * The length of dstr. */ -size_t dstrlen(const char *dstr); +size_t dstrlen(const dchar *dstr); /** * Reserve some capacity in a dynamic string. * - * @param dstr + * @dstr * The dynamic string to preallocate. - * @param capacity + * @cap * The new capacity for the string. * @return * 0 on success, -1 on failure. */ -int dstreserve(char **dstr, size_t capacity); +int dstreserve(dchar **dstr, size_t cap); /** * Resize a dynamic string. * - * @param dstr + * @dstr * The dynamic string to resize. - * @param length + * @len * The new length for the dynamic string. * @return * 0 on success, -1 on failure. */ -int dstresize(char **dstr, size_t length); +_nodiscard +int dstresize(dchar **dstr, size_t len); + +/** + * Shrink a dynamic string. + * + * @dstr + * The dynamic string to shrink. + * @len + * The new length. Must not be greater than the current length. + */ +void dstrshrink(dchar *dstr, size_t len); /** * Append to a dynamic string. * - * @param dest + * @dest * The destination dynamic string. - * @param src + * @src * The string to append. * @return 0 on success, -1 on failure. */ -int dstrcat(char **dest, const char *src); +_nodiscard +int dstrcat(dchar **dest, const char *src); /** * Append to a dynamic string. * - * @param dest + * @dest * The destination dynamic string. - * @param src + * @src * The string to append. - * @param n + * @n * The maximum number of characters to take from src. * @return * 0 on success, -1 on failure. */ -int dstrncat(char **dest, const char *src, size_t n); +_nodiscard +int dstrncat(dchar **dest, const char *src, size_t n); /** * Append a dynamic string to another dynamic string. * - * @param dest + * @dest * The destination dynamic string. - * @param src + * @src * The dynamic string to append. * @return * 0 on success, -1 on failure. */ -int dstrdcat(char **dest, const char *src); +_nodiscard +int dstrdcat(dchar **dest, const dchar *src); /** * Append to a dynamic string. * - * @param dest + * @dest * The destination dynamic string. - * @param src + * @src * The string to append. - * @param len + * @len * The exact number of characters to take from src. * @return * 0 on success, -1 on failure. */ -int dstrxcat(char **dest, const char *src, size_t len); +_nodiscard +int dstrxcat(dchar **dest, const char *src, size_t len); /** * Append a single character to a dynamic string. * - * @param str + * @str * The string to append to. - * @param c + * @c * The character to append. * @return * 0 on success, -1 on failure. */ -int dstrapp(char **str, char c); +_nodiscard +int dstrapp(dchar **str, char c); /** * Copy a string into a dynamic string. * - * @param dest + * @dest * The destination dynamic string. - * @param src + * @src * The string to copy. * @returns * 0 on success, -1 on failure. */ -int dstrcpy(char **dest, const char *str); +_nodiscard +int dstrcpy(dchar **dest, const char *str); /** * Copy a dynamic string into another one. * - * @param dest + * @dest * The destination dynamic string. - * @param src + * @src * The dynamic string to copy. * @returns * 0 on success, -1 on failure. */ -int dstrdcpy(char **dest, const char *str); +_nodiscard +int dstrdcpy(dchar **dest, const dchar *str); /** * Copy a string into a dynamic string. * - * @param dest + * @dest * The destination dynamic string. - * @param src + * @src * The dynamic string to copy. - * @param n + * @n * The maximum number of characters to take from src. * @returns * 0 on success, -1 on failure. */ -int dstrncpy(char **dest, const char *str, size_t n); +_nodiscard +int dstrncpy(dchar **dest, const char *str, size_t n); /** * Copy a string into a dynamic string. * - * @param dest + * @dest * The destination dynamic string. - * @param src + * @src * The dynamic string to copy. - * @param len + * @len * The exact number of characters to take from src. * @returns * 0 on success, -1 on failure. */ -int dstrxcpy(char **dest, const char *str, size_t len); +_nodiscard +int dstrxcpy(dchar **dest, const char *str, size_t len); /** * Create a dynamic string from a format string. * - * @param format + * @format * The format string to fill in. - * @param ... + * @... * Any arguments for the format string. * @return * The created string, or NULL on failure. */ -BFS_FORMATTER(1, 2) -char *dstrprintf(const char *format, ...); +_nodiscard +_printf(1, 2) +dchar *dstrprintf(const char *format, ...); /** * Create a dynamic string from a format string and a va_list. * - * @param format + * @format * The format string to fill in. - * @param args + * @args * The arguments for the format string. * @return * The created string, or NULL on failure. */ -BFS_FORMATTER(1, 0) -char *dstrvprintf(const char *format, va_list args); +_nodiscard +_printf(1, 0) +dchar *dstrvprintf(const char *format, va_list args); /** * Format some text onto the end of a dynamic string. * - * @param str + * @str * The destination dynamic string. - * @param format + * @format * The format string to fill in. - * @param ... + * @... * Any arguments for the format string. * @return * 0 on success, -1 on failure. */ -BFS_FORMATTER(2, 3) -int dstrcatf(char **str, const char *format, ...); +_nodiscard +_printf(2, 3) +int dstrcatf(dchar **str, const char *format, ...); /** * Format some text from a va_list onto the end of a dynamic string. * - * @param str + * @str * The destination dynamic string. - * @param format + * @format * The format string to fill in. - * @param args + * @args * The arguments for the format string. * @return * 0 on success, -1 on failure. */ -BFS_FORMATTER(2, 0) -int dstrvcatf(char **str, const char *format, va_list args); +_nodiscard +_printf(2, 0) +int dstrvcatf(dchar **str, const char *format, va_list args); /** * Concatenate while shell-escaping. * - * @param dest + * @dest * The destination dynamic string. - * @param str + * @str * The string to escape. - * @param flags + * @flags * Flags for wordesc(). * @return * 0 on success, -1 on failure. */ -int dstrescat(char **dest, const char *str, enum wesc_flags flags); +_nodiscard +int dstrescat(dchar **dest, const char *str, enum wesc_flags flags); /** * Concatenate while shell-escaping. * - * @param dest + * @dest * The destination dynamic string. - * @param str + * @str * The string to escape. - * @param n + * @n * The maximum length of the string. - * @param flags + * @flags * Flags for wordesc(). * @return * 0 on success, -1 on failure. */ -int dstrnescat(char **dest, const char *str, size_t n, enum wesc_flags flags); +_nodiscard +int dstrnescat(dchar **dest, const char *str, size_t n, enum wesc_flags flags); /** - * Free a dynamic string. - * - * @param dstr - * The string to free. + * Repeat a string n times. */ -void dstrfree(char *dstr); +_nodiscard +dchar *dstrepeat(const char *str, size_t n); #endif // BFS_DSTRING_H @@ -6,13 +6,14 @@ */ #include "eval.h" + +#include "atomic.h" #include "bar.h" +#include "bfs.h" #include "bfstd.h" #include "bftw.h" #include "color.h" -#include "config.h" #include "ctx.h" -#include "darray.h" #include "diag.h" #include "dir.h" #include "dstring.h" @@ -22,22 +23,28 @@ #include "mtab.h" #include "printf.h" #include "pwcache.h" +#include "sanity.h" +#include "sighook.h" #include "stat.h" #include "trie.h" #include "xregex.h" #include "xtime.h" + #include <errno.h> #include <fcntl.h> #include <fnmatch.h> #include <grp.h> #include <pwd.h> +#include <signal.h> #include <stdarg.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <strings.h> #include <sys/resource.h> -#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> #include <time.h> #include <unistd.h> #include <wchar.h> @@ -51,6 +58,8 @@ struct bfs_eval { enum bftw_action action; /** The bfs_eval() return value. */ int *ret; + /** The number of errors that have occurred. */ + size_t *nerrors; /** Whether to quit immediately. */ bool quit; }; @@ -58,20 +67,24 @@ struct bfs_eval { /** * Print an error message. */ -BFS_FORMATTER(2, 3) +_printf(2, 3) static void eval_error(struct bfs_eval *state, const char *format, ...) { + const struct bfs_ctx *ctx = state->ctx; + + ++*state->nerrors; + if (ctx->ignore_errors) { + return; + } + // By POSIX, any errors should be accompanied by a non-zero exit status *state->ret = EXIT_FAILURE; - int error = errno; - const struct bfs_ctx *ctx = state->ctx; CFILE *cerr = ctx->cerr; bfs_error(ctx, "%pP: ", state->ftwbuf); va_list args; va_start(args, format); - errno = error; cvfprintf(cerr, format, args); va_end(args); } @@ -81,7 +94,7 @@ static void eval_error(struct bfs_eval *state, const char *format, ...) { */ static bool eval_should_ignore(const struct bfs_eval *state, int error) { return state->ctx->ignore_races - && is_nonexistence_error(error) + && error_is_like(error, ENOENT) && state->ftwbuf->depth > 0; } @@ -90,7 +103,7 @@ static bool eval_should_ignore(const struct bfs_eval *state, int error) { */ static void eval_report_error(struct bfs_eval *state) { if (!eval_should_ignore(state, errno)) { - eval_error(state, "%m.\n"); + eval_error(state, "%s.\n", errstr()); } } @@ -99,9 +112,9 @@ static void eval_report_error(struct bfs_eval *state) { */ static void eval_io_error(const struct bfs_expr *expr, struct bfs_eval *state) { if (expr->path) { - eval_error(state, "'%s': %m.\n", expr->path); + eval_error(state, "'%s': %s.\n", expr->path, errstr()); } else { - eval_error(state, "(standard output): %m.\n"); + eval_error(state, "(standard output): %s.\n", errstr()); } // Don't report the error again in bfs_ctx_free() @@ -124,11 +137,9 @@ static const struct bfs_stat *eval_stat(struct bfs_eval *state) { * Get the difference (in seconds) between two struct timespecs. */ static time_t timespec_diff(const struct timespec *lhs, const struct timespec *rhs) { - time_t ret = lhs->tv_sec - rhs->tv_sec; - if (lhs->tv_nsec < rhs->tv_nsec) { - --ret; - } - return ret; + struct timespec diff = *lhs; + timespec_sub(&diff, rhs); + return diff.tv_sec; } bool bfs_expr_cmp(const struct bfs_expr *expr, long long n) { @@ -145,6 +156,20 @@ bool bfs_expr_cmp(const struct bfs_expr *expr, long long n) { return false; } +/** Common code for fnmatch() tests. */ +static bool eval_fnmatch(const struct bfs_expr *expr, const char *str) { + if (expr->literal) { +#ifdef FNM_CASEFOLD + if (expr->fnm_flags & FNM_CASEFOLD) { + return strcasecmp(expr->pattern, str) == 0; + } +#endif + return strcmp(expr->pattern, str) == 0; + } else { + return fnmatch(expr->pattern, str, expr->fnm_flags) == 0; + } +} + /** * -true test. */ @@ -194,12 +219,27 @@ bool eval_capable(const struct bfs_expr *expr, struct bfs_eval *state) { } /** + * -context test. + */ +bool eval_context(const struct bfs_expr *expr, struct bfs_eval *state) { + char *con = bfs_getfilecon(state->ftwbuf); + if (!con) { + eval_report_error(state); + return false; + } + + bool ret = eval_fnmatch(expr, con); + bfs_freecon(con); + return ret; +} + +/** * Get the given timespec field out of a stat buffer. */ static const struct timespec *eval_stat_time(const struct bfs_stat *statbuf, enum bfs_stat_field field, struct bfs_eval *state) { const struct timespec *ret = bfs_stat_time(statbuf, field); if (!ret) { - eval_error(state, "Couldn't get file %s: %m.\n", bfs_stat_field_name(field)); + eval_error(state, "Couldn't get file %s: %s.\n", bfs_stat_field_name(field), errstr()); } return ret; } @@ -218,8 +258,7 @@ bool eval_newer(const struct bfs_expr *expr, struct bfs_eval *state) { return false; } - return time->tv_sec > expr->reftime.tv_sec - || (time->tv_sec == expr->reftime.tv_sec && time->tv_nsec > expr->reftime.tv_nsec); + return timespec_cmp(time, &expr->reftime) > 0; } /** @@ -239,11 +278,11 @@ bool eval_time(const struct bfs_expr *expr, struct bfs_eval *state) { time_t diff = timespec_diff(&expr->reftime, time); switch (expr->time_unit) { case BFS_DAYS: - diff /= 60*24; - fallthru; + diff /= 60 * 24; + _fallthrough; case BFS_MINUTES: diff /= 60; - fallthru; + _fallthrough; case BFS_SECONDS: break; } @@ -271,7 +310,7 @@ bool eval_used(const struct bfs_expr *expr, struct bfs_eval *state) { return false; } - long long day_seconds = 60*60*24; + long long day_seconds = 60 * 60 * 24; diff = (diff + day_seconds - 1) / day_seconds; return bfs_expr_cmp(expr, diff); } @@ -369,15 +408,14 @@ static int eval_exec_finish(const struct bfs_expr *expr, const struct bfs_ctx *c if (expr->eval_fn == eval_exec) { if (bfs_exec_finish(expr->exec) != 0) { if (errno != 0) { - bfs_error(ctx, "%s %s: %m.\n", expr->argv[0], expr->argv[1]); + bfs_error(ctx, "${blu}%pq${rs} ${bld}%pq${rs}: %s.\n", expr->argv[0], expr->argv[1], errstr()); } ret = -1; } - } else if (bfs_expr_is_parent(expr)) { - if (expr->lhs && eval_exec_finish(expr->lhs, ctx) != 0) { - ret = -1; - } - if (expr->rhs && eval_exec_finish(expr->rhs, ctx) != 0) { + } + + for_expr (child, expr) { + if (eval_exec_finish(child, ctx) != 0) { ret = -1; } } @@ -391,7 +429,7 @@ static int eval_exec_finish(const struct bfs_expr *expr, const struct bfs_ctx *c bool eval_exec(const struct bfs_expr *expr, struct bfs_eval *state) { bool ret = bfs_exec(expr->exec, state->ftwbuf) == 0; if (errno != 0) { - eval_error(state, "%s %s: %m.\n", expr->argv[0], expr->argv[1]); + eval_error(state, "${blu}%pq${rs} ${bld}%pq${rs}: %s.\n", expr->argv[0], expr->argv[1], errstr()); } return ret; } @@ -417,38 +455,42 @@ bool eval_depth(const struct bfs_expr *expr, struct bfs_eval *state) { * -empty test. */ bool eval_empty(const struct bfs_expr *expr, struct bfs_eval *state) { - bool ret = false; const struct BFTW *ftwbuf = state->ftwbuf; + const struct bfs_stat *statbuf; + struct bfs_dir *dir; - if (ftwbuf->type == BFS_DIR) { - struct bfs_dir *dir = bfs_allocdir(); + switch (ftwbuf->type) { + case BFS_REG: + statbuf = eval_stat(state); + return statbuf && statbuf->size == 0; + + case BFS_DIR: + dir = bfs_allocdir(); if (!dir) { - eval_report_error(state); - return ret; + goto error; } - if (bfs_opendir(dir, ftwbuf->at_fd, ftwbuf->at_path) != 0) { - eval_report_error(state); - return ret; + if (bfs_opendir(dir, ftwbuf->at_fd, ftwbuf->at_path, 0) != 0) { + goto error; } int did_read = bfs_readdir(dir, NULL); + bfs_closedir(dir); + if (did_read < 0) { - eval_report_error(state); - } else { - ret = !did_read; + goto error; } - bfs_closedir(dir); free(dir); - } else if (ftwbuf->type == BFS_REG) { - const struct bfs_stat *statbuf = eval_stat(state); - if (statbuf) { - ret = statbuf->size == 0; - } - } + return did_read == 0; + error: + eval_report_error(state); + free(dir); + return false; - return ret; + default: + return false; + } } /** @@ -547,20 +589,6 @@ bool eval_links(const struct bfs_expr *expr, struct bfs_eval *state) { return bfs_expr_cmp(expr, statbuf->nlink); } -/** Common code for fnmatch() tests. */ -static bool eval_fnmatch(const struct bfs_expr *expr, const char *str) { - if (expr->literal) { -#ifdef FNM_CASEFOLD - if (expr->fnm_flags & FNM_CASEFOLD) { - return strcasecmp(expr->pattern, str) == 0; - } -#endif - return strcmp(expr->pattern, str) == 0; - } else { - return fnmatch(expr->pattern, str, expr->fnm_flags) == 0; - } -} - /** * -i?lname test. */ @@ -593,6 +621,7 @@ done: * -i?name test. */ bool eval_name(const struct bfs_expr *expr, struct bfs_eval *state) { + bool ret = false; const struct BFTW *ftwbuf = state->ftwbuf; const char *name = ftwbuf->path + ftwbuf->nameoff; @@ -601,9 +630,15 @@ bool eval_name(const struct bfs_expr *expr, struct bfs_eval *state) { // Any trailing slashes are not part of the name. This can only // happen for the root path. name = copy = xbasename(name); + if (!name) { + eval_report_error(state); + goto done; + } } - bool ret = eval_fnmatch(expr, name); + ret = eval_fnmatch(expr, name); + +done: free(copy); return ret; } @@ -665,6 +700,34 @@ static int print_owner(FILE *file, const char *name, uintmax_t id, int *width) { } } +/** Print a file's modification time. */ +static int print_time(FILE *file, time_t time, time_t now) { + struct tm tm; + if (!localtime_r(&time, &tm)) { + goto error; + } + + char time_str[256]; + size_t time_ret; + + time_t six_months_ago = now - 6 * 30 * 24 * 60 * 60; + time_t tomorrow = now + 24 * 60 * 60; + if (time <= six_months_ago || time >= tomorrow) { + time_ret = strftime(time_str, sizeof(time_str), "%b %e %Y", &tm); + } else { + time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm); + } + + if (time_ret == 0) { + goto error; + } + + return fprintf(file, " %s", time_str); + +error: + return fprintf(file, " %jd", (intmax_t)time); +} + /** * -f?ls action. */ @@ -685,7 +748,7 @@ bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state) { uintmax_t ino = statbuf->ino; uintmax_t block_size = ctx->posixly_correct ? 512 : 1024; - uintmax_t blocks = ((uintmax_t)statbuf->blocks*BFS_STAT_BLKSIZE + block_size - 1)/block_size; + uintmax_t blocks = ((uintmax_t)statbuf->blocks * BFS_STAT_BLKSIZE + block_size - 1) / block_size; char mode[11]; xstrmode(statbuf->mode, mode); char acl = bfs_check_acl(ftwbuf) > 0 ? '+' : ' '; @@ -721,28 +784,11 @@ bool eval_fls(const struct bfs_expr *expr, struct bfs_eval *state) { time_t time = statbuf->mtime.tv_sec; time_t now = ctx->now.tv_sec; - time_t six_months_ago = now - 6*30*24*60*60; - time_t tomorrow = now + 24*60*60; - struct tm tm; - if (xlocaltime(&time, &tm) != 0) { - goto error; - } - char time_str[256]; - size_t time_ret; - if (time <= six_months_ago || time >= tomorrow) { - time_ret = strftime(time_str, sizeof(time_str), "%b %e %Y", &tm); - } else { - time_ret = strftime(time_str, sizeof(time_str), "%b %e %H:%M", &tm); - } - if (time_ret == 0) { - errno = EOVERFLOW; - goto error; - } - if (cfprintf(cfile, " %s${rs}", time_str) < 0) { + if (print_time(file, time, now) < 0) { goto error; } - if (cfprintf(cfile, " %pP", ftwbuf) < 0) { + if (cfprintf(cfile, "${rs} %pP", ftwbuf) < 0) { goto error; } @@ -823,7 +869,6 @@ bool eval_fprintx(const struct bfs_expr *expr, struct bfs_eval *state) { ++path; } - if (fputc('\n', file) == EOF) { goto error; } @@ -836,6 +881,19 @@ error: } /** + * -limit action. + */ +bool eval_limit(const struct bfs_expr *expr, struct bfs_eval *state) { + long long evals = expr->evaluations + 1; + if (evals >= expr->num) { + state->action = BFTW_STOP; + state->quit = true; + } + + return true; +} + +/** * -prune action. */ bool eval_prune(const struct bfs_expr *expr, struct bfs_eval *state) { @@ -865,7 +923,7 @@ bool eval_regex(const struct bfs_expr *expr, struct bfs_eval *state) { eval_error(state, "%s.\n", str); free(str); } else { - eval_error(state, "bfs_regerror(): %m.\n"); + eval_error(state, "bfs_regerror(): %s.\n", errstr()); } } @@ -905,7 +963,7 @@ bool eval_size(const struct bfs_expr *expr, struct bfs_eval *state) { }; off_t scale = scales[expr->size_unit]; - off_t size = (statbuf->size + scale - 1)/scale; // Round up + off_t size = (statbuf->size + scale - 1) / scale; // Round up return bfs_expr_cmp(expr, size); } @@ -918,7 +976,7 @@ bool eval_sparse(const struct bfs_expr *expr, struct bfs_eval *state) { return false; } - blkcnt_t expected = (statbuf->size + BFS_STAT_BLKSIZE - 1)/BFS_STAT_BLKSIZE; + blkcnt_t expected = (statbuf->size + BFS_STAT_BLKSIZE - 1) / BFS_STAT_BLKSIZE; return statbuf->blocks < expected; } @@ -962,6 +1020,13 @@ bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state) { const struct BFTW *ftwbuf = state->ftwbuf; enum bfs_stat_flags flags = ftwbuf->stat_flags ^ (BFS_STAT_NOFOLLOW | BFS_STAT_TRYFOLLOW); enum bfs_type type = bftw_type(ftwbuf, flags); + + // GNU find treats ELOOP as a broken symbolic link for -xtype l + // (but not -L -type l) + if ((flags & BFS_STAT_TRYFOLLOW) && type == BFS_ERROR && errno == ELOOP) { + type = BFS_LNK; + } + if (type == BFS_ERROR) { eval_report_error(state); return false; @@ -970,40 +1035,23 @@ bool eval_xtype(const struct bfs_expr *expr, struct bfs_eval *state) { } } -#if _POSIX_MONOTONIC_CLOCK > 0 -# define BFS_CLOCK CLOCK_MONOTONIC -#elif _POSIX_TIMERS > 0 -# define BFS_CLOCK CLOCK_REALTIME -#endif - /** - * Call clock_gettime(), if available. + * clock_gettime() wrapper. */ static int eval_gettime(struct bfs_eval *state, struct timespec *ts) { -#ifdef BFS_CLOCK - int ret = clock_gettime(BFS_CLOCK, ts); - if (ret != 0) { - bfs_warning(state->ctx, "%pP: clock_gettime(): %m.\n", state->ftwbuf); + clockid_t clock = CLOCK_REALTIME; + +#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0 + if (sysoption(MONOTONIC_CLOCK) > 0) { + clock = CLOCK_MONOTONIC; } - return ret; -#else - return -1; #endif -} -/** - * Record an elapsed time. - */ -static void timespec_elapsed(struct timespec *elapsed, const struct timespec *start, const struct timespec *end) { - elapsed->tv_sec += end->tv_sec - start->tv_sec; - elapsed->tv_nsec += end->tv_nsec - start->tv_nsec; - if (elapsed->tv_nsec < 0) { - elapsed->tv_nsec += 1000000000L; - --elapsed->tv_sec; - } else if (elapsed->tv_nsec >= 1000000000L) { - elapsed->tv_nsec -= 1000000000L; - ++elapsed->tv_sec; + int ret = clock_gettime(clock, ts); + if (ret != 0) { + bfs_warning(state->ctx, "%pP: clock_gettime(): %s.\n", state->ftwbuf, errstr()); } + return ret; } /** @@ -1024,7 +1072,8 @@ static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) { if (time) { if (eval_gettime(state, &end) == 0) { - timespec_elapsed(&expr->elapsed, &start, &end); + timespec_sub(&end, &start); + timespec_add(&expr->elapsed, &end); } } @@ -1047,67 +1096,53 @@ static bool eval_expr(struct bfs_expr *expr, struct bfs_eval *state) { * Evaluate a negation. */ bool eval_not(const struct bfs_expr *expr, struct bfs_eval *state) { - return !eval_expr(expr->rhs, state); + return !eval_expr(bfs_expr_children(expr), state); } /** * Evaluate a conjunction. */ bool eval_and(const struct bfs_expr *expr, struct bfs_eval *state) { - if (!eval_expr(expr->lhs, state)) { - return false; - } - - if (state->quit) { - return false; + for_expr (child, expr) { + if (!eval_expr(child, state) || state->quit) { + return false; + } } - return eval_expr(expr->rhs, state); + return true; } /** * Evaluate a disjunction. */ bool eval_or(const struct bfs_expr *expr, struct bfs_eval *state) { - if (eval_expr(expr->lhs, state)) { - return true; - } - - if (state->quit) { - return false; + for_expr (child, expr) { + if (eval_expr(child, state) || state->quit) { + return true; + } } - return eval_expr(expr->rhs, state); + return false; } /** * Evaluate the comma operator. */ bool eval_comma(const struct bfs_expr *expr, struct bfs_eval *state) { - eval_expr(expr->lhs, state); + bool ret uninit(false); - if (state->quit) { - return false; + for_expr (child, expr) { + ret = eval_expr(child, state); + if (state->quit) { + break; + } } - return eval_expr(expr->rhs, state); + return ret; } /** Update the status bar. */ -static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct timespec *last_status, size_t count) { - struct timespec now; - if (eval_gettime(state, &now) == 0) { - struct timespec elapsed = {0}; - timespec_elapsed(&elapsed, last_status, &now); - - // Update every 0.1s - if (elapsed.tv_sec > 0 || elapsed.tv_nsec >= 100000000L) { - *last_status = now; - } else { - return; - } - } - +static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, size_t count) { size_t width = bfs_bar_width(bar); if (width < 3) { return; @@ -1115,20 +1150,21 @@ static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct time const struct BFTW *ftwbuf = state->ftwbuf; - char *rhs = dstrprintf(" (visited: %zu, depth: %2zu)", count, ftwbuf->depth); + dchar *status = NULL; + dchar *rhs = dstrprintf(" (visited: %'zu; depth: %2zu)", count, ftwbuf->depth); if (!rhs) { return; } - size_t rhslen = dstrlen(rhs); + size_t rhslen = xstrwidth(rhs); if (3 + rhslen > width) { - dstresize(&rhs, 0); + dstrshrink(rhs, 0); rhslen = 0; } - char *status = dstralloc(0); + status = dstralloc(0); if (!status) { - goto out_rhs; + goto out; } const char *path = ftwbuf->path; @@ -1137,26 +1173,25 @@ static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct time pathlen = strlen(path); } + // Escape weird filename characters + if (dstrnescat(&status, path, pathlen, WESC_TTY) != 0) { + goto out; + } + pathlen = dstrlen(status); + // Try to make sure even wide characters fit in the status bar size_t pathmax = width - rhslen - 3; size_t pathwidth = 0; - mbstate_t mb; - memset(&mb, 0, sizeof(mb)); - while (pathlen > 0) { - wchar_t wc; - size_t len = mbrtowc(&wc, path, pathlen, &mb); + size_t lhslen = 0; + mbstate_t mb = {0}; + for (size_t i = lhslen; lhslen < pathlen; lhslen = i) { + wint_t wc = xmbrtowc(status, &i, pathlen, &mb); int cwidth; - if (len == (size_t)-1) { + if (wc == WEOF) { // Invalid byte sequence, assume a single-width '?' - len = 1; - cwidth = 1; - memset(&mb, 0, sizeof(mb)); - } else if (len == (size_t)-2) { - // Incomplete byte sequence, assume a single-width '?' - len = pathlen; cwidth = 1; } else { - cwidth = wcwidth(wc); + cwidth = xwcwidth(wc); if (cwidth < 0) { cwidth = 0; } @@ -1165,35 +1200,29 @@ static void eval_status(struct bfs_eval *state, struct bfs_bar *bar, struct time if (pathwidth + cwidth > pathmax) { break; } - - if (dstrncat(&status, path, len) != 0) { - goto out_rhs; - } - - path += len; - pathlen -= len; pathwidth += cwidth; } + dstrshrink(status, lhslen); if (dstrcat(&status, "...") != 0) { - goto out_rhs; + goto out; } while (pathwidth < pathmax) { if (dstrapp(&status, ' ') != 0) { - goto out_rhs; + goto out; } ++pathwidth; } - if (dstrcat(&status, rhs) != 0) { - goto out_rhs; + if (dstrdcat(&status, rhs) != 0) { + goto out; } bfs_bar_update(bar, status); +out: dstrfree(status); -out_rhs: dstrfree(rhs); } @@ -1236,11 +1265,11 @@ static bool eval_file_unique(struct bfs_eval *state, struct trie *seen) { /** * Log a stat() call. */ -static void debug_stat(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf, const struct bftw_stat *cache, enum bfs_stat_flags flags) { +static void debug_stat(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf, enum bfs_stat_flags flags, int err) { bfs_debug_prefix(ctx, DEBUG_STAT); fprintf(stderr, "bfs_stat("); - if (ftwbuf->at_fd == AT_FDCWD) { + if (ftwbuf->at_fd == (int)AT_FDCWD) { fprintf(stderr, "AT_FDCWD"); } else { size_t baselen = strlen(ftwbuf->path) - strlen(ftwbuf->at_path); @@ -1256,10 +1285,10 @@ static void debug_stat(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf, con DEBUG_FLAG(flags, BFS_STAT_TRYFOLLOW); DEBUG_FLAG(flags, BFS_STAT_NOSYNC); - fprintf(stderr, ") == %d", cache->buf ? 0 : -1); + fprintf(stderr, ") == %d", err == 0 ? 0 : -1); - if (cache->error) { - fprintf(stderr, " [%d]", cache->error); + if (err) { + fprintf(stderr, " [%d]", err); } fprintf(stderr, "\n"); @@ -1273,14 +1302,14 @@ static void debug_stats(const struct bfs_ctx *ctx, const struct BFTW *ftwbuf) { return; } - const struct bfs_stat *statbuf = ftwbuf->stat_cache.buf; - if (statbuf || ftwbuf->stat_cache.error) { - debug_stat(ctx, ftwbuf, &ftwbuf->stat_cache, BFS_STAT_FOLLOW); + const struct bftw_stat *bufs = &ftwbuf->stat_bufs; + + if (bufs->stat_err >= 0) { + debug_stat(ctx, ftwbuf, BFS_STAT_FOLLOW, bufs->stat_err); } - const struct bfs_stat *lstatbuf = ftwbuf->lstat_cache.buf; - if ((lstatbuf && lstatbuf != statbuf) || ftwbuf->lstat_cache.error) { - debug_stat(ctx, ftwbuf, &ftwbuf->lstat_cache, BFS_STAT_NOFOLLOW); + if (bufs->lstat_err >= 0) { + debug_stat(ctx, ftwbuf, BFS_STAT_NOFOLLOW, bufs->lstat_err); } } @@ -1343,18 +1372,85 @@ struct callback_args { /** The status bar. */ struct bfs_bar *bar; - /** The time of the last status update. */ - struct timespec last_status; + /** The SIGALRM hook. */ + struct sighook *alrm_hook; + /** The interval timer. */ + struct timer *timer; + /** Flag set by SIGALRM. */ + atomic bool alrm_flag; + /** Flag set by SIGINFO. */ + atomic bool info_flag; + /** The number of files visited so far. */ size_t count; /** The set of seen files. */ struct trie *seen; + /** The number of errors that have occurred. */ + size_t nerrors; /** Eventual return value from bfs_eval(). */ int ret; }; +/** Update the status bar in response to SIGALRM. */ +static void eval_sigalrm(int sig, siginfo_t *info, void *ptr) { + struct callback_args *args = ptr; + store(&args->alrm_flag, true, relaxed); +} + +/** Show/hide the bar in response to SIGINFO. */ +static void eval_siginfo(int sig, siginfo_t *info, void *ptr) { + struct callback_args *args = ptr; + store(&args->info_flag, true, relaxed); +} + +/** Show the status bar. */ +static void eval_show_bar(struct callback_args *args) { + args->alrm_hook = sighook(SIGALRM, eval_sigalrm, args, SH_CONTINUE); + if (!args->alrm_hook) { + goto fail; + } + + args->bar = bfs_bar_show(); + if (!args->bar) { + goto fail; + } + + // Update the bar every 0.1s + struct timespec ival = { .tv_nsec = 100 * 1000 * 1000 }; + args->timer = xtimer_start(&ival); + if (!args->timer) { + goto fail; + } + + // Update the bar immediately + store(&args->alrm_flag, true, relaxed); + + return; + +fail: + bfs_warning(args->ctx, "Couldn't show status bar: %s.\n\n", errstr()); + + bfs_bar_hide(args->bar); + args->bar = NULL; + + sigunhook(args->alrm_hook); + args->alrm_hook = NULL; +} + +/** Hide the status bar. */ +static void eval_hide_bar(struct callback_args *args) { + xtimer_stop(args->timer); + args->timer = NULL; + + sigunhook(args->alrm_hook); + args->alrm_hook = NULL; + + bfs_bar_hide(args->bar); + args->bar = NULL; +} + /** * bftw() callback. */ @@ -1369,17 +1465,37 @@ static enum bftw_action eval_callback(const struct BFTW *ftwbuf, void *ptr) { state.ctx = ctx; state.action = BFTW_CONTINUE; state.ret = &args->ret; + state.nerrors = &args->nerrors; state.quit = false; - if (args->bar) { - eval_status(&state, args->bar, &args->last_status, args->count); + // Check whether SIGINFO was delivered and show/hide the bar + if (exchange(&args->info_flag, false, relaxed)) { + if (args->bar) { + eval_hide_bar(args); + } else { + eval_show_bar(args); + } + } + + if (exchange(&args->alrm_flag, false, relaxed)) { + eval_status(&state, args->bar, args->count); } if (ftwbuf->type == BFS_ERROR) { - if (!eval_should_ignore(&state, ftwbuf->error)) { - eval_error(&state, "%s.\n", strerror(ftwbuf->error)); - } state.action = BFTW_PRUNE; + + if (ftwbuf->error == ELOOP && ftwbuf->loopoff > 0) { + char *loop = strndup(ftwbuf->path, ftwbuf->loopoff); + if (loop) { + eval_error(&state, "Filesystem loop back to ${di}%pq${rs}\n", loop); + free(loop); + goto done; + } + } else if (eval_should_ignore(&state, ftwbuf->error)) { + goto done; + } + + eval_error(&state, "%s.\n", xstrerror(ftwbuf->error)); goto done; } @@ -1434,59 +1550,34 @@ done: return state.action; } -/** Check if an rlimit value is infinite. */ -static bool rlim_isinf(rlim_t r) { - // Consider RLIM_{INFINITY,SAVED_{CUR,MAX}} all equally infinite - if (r == RLIM_INFINITY) { - return true; - } - -#ifdef RLIM_SAVED_CUR - if (r == RLIM_SAVED_CUR) { - return true; - } -#endif - -#ifdef RLIM_SAVED_MAX - if (r == RLIM_SAVED_MAX) { - return true; +/** Raise RLIMIT_NOFILE if possible, and return the new limit. */ +static int raise_fdlimit(struct bfs_ctx *ctx) { + rlim_t cur = ctx->orig_nofile.rlim_cur; + rlim_t max = ctx->orig_nofile.rlim_max; + if (!ctx->raise_nofile) { + max = cur; } -#endif - - return false; -} -/** Compare two rlimit values, accounting for RLIM_INFINITY etc. */ -static int rlim_cmp(rlim_t a, rlim_t b) { - bool a_inf = rlim_isinf(a); - bool b_inf = rlim_isinf(b); - if (a_inf || b_inf) { - return a_inf - b_inf; + rlim_t target = 64 << 10; + if (rlim_cmp(target, max) > 0) { + target = max; } - return (a > b) - (a < b); -} - -/** Raise RLIMIT_NOFILE if possible, and return the new limit. */ -static int raise_fdlimit(const struct bfs_ctx *ctx) { - rlim_t target = 64 << 10; - if (rlim_cmp(target, ctx->nofile_hard) > 0) { - target = ctx->nofile_hard; + if (rlim_cmp(target, cur) <= 0) { + return target; } - int ret = target; + const struct rlimit rl = { + .rlim_cur = target, + .rlim_max = max, + }; - if (rlim_cmp(target, ctx->nofile_soft) > 0) { - const struct rlimit rl = { - .rlim_cur = target, - .rlim_max = ctx->nofile_hard, - }; - if (setrlimit(RLIMIT_NOFILE, &rl) != 0) { - ret = ctx->nofile_soft; - } + if (setrlimit(RLIMIT_NOFILE, &rl) != 0) { + return cur; } - return ret; + ctx->cur_nofile = rl; + return target; } /** Preallocate the fd table in the kernel. */ @@ -1518,8 +1609,8 @@ static int infer_fdlimit(const struct bfs_ctx *ctx, int limit) { goto done; } - if (bfs_opendir(dir, AT_FDCWD, "/proc/self/fd") != 0 - && bfs_opendir(dir, AT_FDCWD, "/dev/fd") != 0) { + if (bfs_opendir(dir, AT_FDCWD, "/proc/self/fd", 0) != 0 + && bfs_opendir(dir, AT_FDCWD, "/dev/fd", 0) != 0) { goto done; } @@ -1545,19 +1636,6 @@ done: return ret; } -static int infer_nproc(void) { - long nproc = sysconf(_SC_NPROCESSORS_ONLN); - - if (nproc < 1) { - nproc = 1; - } else if (nproc > 8) { - // Not much speedup after 8 threads - nproc = 8; - } - - return nproc; -} - /** * Dump the bftw() flags for -D search. */ @@ -1573,6 +1651,7 @@ static void dump_bftw_flags(enum bftw_flags flags) { DEBUG_FLAG(flags, BFTW_PRUNE_MOUNTS); DEBUG_FLAG(flags, BFTW_SORT); DEBUG_FLAG(flags, BFTW_BUFFER); + DEBUG_FLAG(flags, BFTW_WHITEOUTS); bfs_assert(flags == 0, "Missing bftw flag 0x%X", flags); } @@ -1606,12 +1685,8 @@ static bool eval_must_buffer(const struct bfs_expr *expr) { return true; } - if (bfs_expr_is_parent(expr)) { - if (expr->lhs && eval_must_buffer(expr->lhs)) { - return true; - } - - if (expr->rhs && eval_must_buffer(expr->rhs)) { + for_expr (child, expr) { + if (eval_must_buffer(child)) { return true; } } @@ -1620,7 +1695,7 @@ static bool eval_must_buffer(const struct bfs_expr *expr) { return false; } -int bfs_eval(const struct bfs_ctx *ctx) { +int bfs_eval(struct bfs_ctx *ctx) { if (!ctx->expr) { return EXIT_SUCCESS; } @@ -1631,12 +1706,16 @@ int bfs_eval(const struct bfs_ctx *ctx) { }; if (ctx->status) { - args.bar = bfs_bar_show(); - if (!args.bar) { - bfs_warning(ctx, "Couldn't show status bar: %m.\n\n"); - } + eval_show_bar(&args); } +#ifdef SIGINFO + int siginfo = SIGINFO; +#else + int siginfo = SIGUSR1; +#endif + struct sighook *info_hook = sighook(siginfo, eval_siginfo, &args, SH_CONTINUE); + struct trie seen; if (ctx->unique) { trie_init(&seen); @@ -1647,16 +1726,12 @@ int bfs_eval(const struct bfs_ctx *ctx) { reserve_fds(fdlimit); fdlimit = infer_fdlimit(ctx, fdlimit); - int nthreads; - if (ctx->threads > 0) { - nthreads = ctx->threads - 1; - } else { - nthreads = infer_nproc() - 1; - } + // -1 for the main thread + int nthreads = ctx->threads - 1; struct bftw_args bftw_args = { .paths = ctx->paths, - .npaths = darray_length(ctx->paths), + .npaths = ctx->npaths, .callback = eval_callback, .ptr = &args, .nopenfd = fdlimit, @@ -1708,7 +1783,14 @@ int bfs_eval(const struct bfs_ctx *ctx) { trie_destroy(&seen); } - bfs_bar_hide(args.bar); + sigunhook(info_hook); + if (args.bar) { + eval_hide_bar(&args); + } + + if (ctx->ignore_errors && args.nerrors > 0) { + bfs_warning(ctx, "Suppressed errors: %zu\n", args.nerrors); + } return args.ret; } @@ -9,8 +9,6 @@ #ifndef BFS_EVAL_H #define BFS_EVAL_H -#include "config.h" - struct bfs_ctx; struct bfs_expr; @@ -22,9 +20,9 @@ struct bfs_eval; /** * Expression evaluation function. * - * @param expr + * @expr * The current expression. - * @param state + * @state * The current evaluation state. * @return * The result of the test. @@ -34,12 +32,12 @@ typedef bool bfs_eval_fn(const struct bfs_expr *expr, struct bfs_eval *state); /** * Evaluate the command line. * - * @param ctx + * @ctx * The bfs context to evaluate. * @return * EXIT_SUCCESS on success, otherwise on failure. */ -int bfs_eval(const struct bfs_ctx *ctx); +int bfs_eval(struct bfs_ctx *ctx); // Predicate evaluation functions @@ -49,6 +47,7 @@ bool eval_false(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_access(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_acl(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_capable(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_context(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_perm(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_xattr(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_xattrname(const struct bfs_expr *expr, struct bfs_eval *state); @@ -88,6 +87,7 @@ bool eval_fprint(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_fprint0(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_fprintf(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_fprintx(const struct bfs_expr *expr, struct bfs_eval *state); +bool eval_limit(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_prune(const struct bfs_expr *expr, struct bfs_eval *state); bool eval_quit(const struct bfs_expr *expr, struct bfs_eval *state); @@ -2,15 +2,17 @@ // SPDX-License-Identifier: 0BSD #include "exec.h" + #include "alloc.h" +#include "bfs.h" #include "bfstd.h" #include "bftw.h" -#include "ctx.h" #include "color.h" -#include "config.h" +#include "ctx.h" #include "diag.h" #include "dstring.h" #include "xspawn.h" + #include <errno.h> #include <fcntl.h> #include <stdarg.h> @@ -22,7 +24,7 @@ #include <unistd.h> /** Print some debugging info. */ -BFS_FORMATTER(2, 3) +_printf(2, 3) static void bfs_exec_debug(const struct bfs_exec *execbuf, const char *format, ...) { const struct bfs_ctx *ctx = execbuf->ctx; @@ -56,7 +58,7 @@ static size_t bfs_exec_arg_size(const char *arg) { /** Determine the maximum argv size. */ static size_t bfs_exec_arg_max(const struct bfs_exec *execbuf) { - long arg_max = sysconf(_SC_ARG_MAX); + long arg_max = xsysconf(_SC_ARG_MAX); bfs_exec_debug(execbuf, "ARG_MAX: %ld according to sysconf()\n", arg_max); if (arg_max < 0) { arg_max = BFS_EXEC_ARG_MAX; @@ -82,7 +84,7 @@ static size_t bfs_exec_arg_max(const struct bfs_exec *execbuf) { // Assume arguments are counted with the granularity of a single page, // so allow a one page cushion to account for rounding up - long page_size = sysconf(_SC_PAGESIZE); + long page_size = xsysconf(_SC_PAGESIZE); if (page_size < 4096) { page_size = 4096; } @@ -227,14 +229,14 @@ static char *bfs_exec_format_arg(char *arg, const char *path) { return arg; } - char *ret = dstralloc(0); + dchar *ret = dstralloc(0); if (!ret) { return NULL; } char *last = arg; do { - if (dstrncat(&ret, last, match - last) != 0) { + if (dstrxcat(&ret, last, match - last) != 0) { goto err; } if (dstrcat(&ret, path) != 0) { @@ -259,7 +261,7 @@ err: /** Free a formatted argument. */ static void bfs_exec_free_arg(char *arg, const char *tmpl) { if (arg != tmpl) { - dstrfree(arg); + dstrfree((dchar *)arg); } } @@ -268,7 +270,7 @@ static int bfs_exec_openwd(struct bfs_exec *execbuf, const struct BFTW *ftwbuf) bfs_assert(execbuf->wd_fd < 0); bfs_assert(!execbuf->wd_path); - if (ftwbuf->at_fd != AT_FDCWD) { + if (ftwbuf->at_fd != (int)AT_FDCWD) { // Rely on at_fd being the immediate parent bfs_assert(xbaseoff(ftwbuf->at_path) == 0); @@ -327,8 +329,10 @@ static void bfs_exec_closewd(struct bfs_exec *execbuf, const struct BFTW *ftwbuf /** Actually spawn the process. */ static int bfs_exec_spawn(const struct bfs_exec *execbuf) { + const struct bfs_ctx *ctx = execbuf->ctx; + // Flush the context state for consistency with the external process - bfs_ctx_flush(execbuf->ctx); + bfs_ctx_flush(ctx); if (execbuf->flags & BFS_EXEC_CONFIRM) { for (size_t i = 0; i < execbuf->argc; ++i) { @@ -348,49 +352,46 @@ static int bfs_exec_spawn(const struct bfs_exec *execbuf) { if (execbuf->flags & BFS_EXEC_MULTI) { bfs_exec_debug(execbuf, "Executing '%s' ... [%zu arguments] (size %zu)\n", - execbuf->argv[0], execbuf->argc - 1, execbuf->arg_size); + execbuf->argv[0], execbuf->argc - 1, execbuf->arg_size); } else { bfs_exec_debug(execbuf, "Executing '%s' ... [%zu arguments]\n", execbuf->argv[0], execbuf->argc - 1); } pid_t pid = -1; - int error; - struct bfs_spawn ctx; - if (bfs_spawn_init(&ctx) != 0) { + struct bfs_spawn spawn; + if (bfs_spawn_init(&spawn) != 0) { return -1; } - if (bfs_spawn_setflags(&ctx, BFS_SPAWN_USEPATH) != 0) { - goto fail; - } + spawn.flags |= BFS_SPAWN_USE_PATH; - // Reset RLIMIT_NOFILE, to avoid breaking applications that use select() - struct rlimit rl = { - .rlim_cur = execbuf->ctx->nofile_soft, - .rlim_max = execbuf->ctx->nofile_hard, - }; - if (bfs_spawn_addsetrlimit(&ctx, RLIMIT_NOFILE, &rl) != 0) { - goto fail; + if (execbuf->wd_fd >= 0) { + if (bfs_spawn_addfchdir(&spawn, execbuf->wd_fd) != 0) { + goto fail; + } } - if (execbuf->wd_fd >= 0) { - if (bfs_spawn_addfchdir(&ctx, execbuf->wd_fd) != 0) { + // Reset RLIMIT_NOFILE if necessary, to avoid breaking applications that use select() + if (rlim_cmp(ctx->orig_nofile.rlim_cur, ctx->cur_nofile.rlim_cur) < 0) { + if (bfs_spawn_setrlimit(&spawn, RLIMIT_NOFILE, &ctx->orig_nofile) != 0) { goto fail; } } - pid = bfs_spawn(execbuf->argv[0], &ctx, execbuf->argv, NULL); -fail: - error = errno; - bfs_spawn_destroy(&ctx); + pid = bfs_spawn(execbuf->argv[0], &spawn, execbuf->argv, NULL); + +fail:; + int error = errno; + + bfs_spawn_destroy(&spawn); if (pid < 0) { errno = error; return -1; } int wstatus; - if (waitpid(pid, &wstatus, 0) < 0) { + if (xwaitpid(pid, &wstatus, 0) < 0) { return -1; } @@ -409,9 +410,9 @@ fail: if (!str) { str = "unknown"; } - bfs_warning(execbuf->ctx, "Command '${ex}%s${rs}' terminated by signal %d (%s)\n", execbuf->argv[0], sig, str); + bfs_warning(ctx, "Command '${ex}%s${rs}' terminated by signal %d (%s)\n", execbuf->argv[0], sig, str); } else { - bfs_warning(execbuf->ctx, "Command '${ex}%s${rs}' terminated abnormally\n", execbuf->argv[0]); + bfs_warning(ctx, "Command '${ex}%s${rs}' terminated abnormally\n", execbuf->argv[0]); } errno = 0; @@ -471,7 +472,7 @@ static bool bfs_exec_args_remain(const struct bfs_exec *execbuf) { static size_t bfs_exec_estimate_max(const struct bfs_exec *execbuf) { size_t min = execbuf->arg_min; size_t max = execbuf->arg_max; - return min + (max - min)/2; + return min + (max - min) / 2; } /** Update the ARG_MAX lower bound from a successful execution. */ @@ -486,7 +487,7 @@ static void bfs_exec_update_min(struct bfs_exec *execbuf) { size_t estimate = bfs_exec_estimate_max(execbuf); bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n", - execbuf->arg_min, execbuf->arg_max, estimate); + execbuf->arg_min, execbuf->arg_max, estimate); } } @@ -502,7 +503,7 @@ static size_t bfs_exec_update_max(struct bfs_exec *execbuf) { // Trim a fraction off the max size to avoid repeated failures near the // top end of the working range - size -= size/16; + size -= size / 16; if (size < execbuf->arg_max) { execbuf->arg_max = size; @@ -515,7 +516,7 @@ static size_t bfs_exec_update_max(struct bfs_exec *execbuf) { // Binary search for a more precise bound size_t estimate = bfs_exec_estimate_max(execbuf); bfs_exec_debug(execbuf, "ARG_MAX between [%zu, %zu], trying %zu\n", - execbuf->arg_min, execbuf->arg_max, estimate); + execbuf->arg_min, execbuf->arg_max, estimate); return estimate; } @@ -589,7 +590,7 @@ static bool bfs_exec_would_overflow(const struct bfs_exec *execbuf, const char * size_t next_size = execbuf->arg_size + bfs_exec_arg_size(arg); if (next_size > arg_max) { bfs_exec_debug(execbuf, "Command size (%zu) would exceed maximum (%zu), executing buffered command\n", - next_size, arg_max); + next_size, arg_max); return true; } @@ -601,8 +602,8 @@ static int bfs_exec_push(struct bfs_exec *execbuf, char *arg) { execbuf->argv[execbuf->argc] = arg; if (execbuf->argc + 1 >= execbuf->argv_cap) { - size_t cap = 2*execbuf->argv_cap; - char **argv = realloc(execbuf->argv, sizeof_array(char *, cap)); + size_t cap = 2 * execbuf->argv_cap; + char **argv = REALLOC_ARRAY(char *, execbuf->argv, execbuf->argv_cap, cap); if (!argv) { return -1; } @@ -67,11 +67,11 @@ struct bfs_exec { /** * Parse an exec action. * - * @param argv + * @argv * The (bfs) command line argument to parse. - * @param flags + * @flags * Any flags for this exec action. - * @param ctx + * @ctx * The bfs context. * @return * The parsed exec action, or NULL on failure. @@ -81,9 +81,9 @@ struct bfs_exec *bfs_exec_parse(const struct bfs_ctx *ctx, char **argv, enum bfs /** * Execute the command for a file. * - * @param execbuf + * @execbuf * The parsed exec action. - * @param ftwbuf + * @ftwbuf * The bftw() data for the current file. * @return 0 if the command succeeded, -1 if it failed. If the command could * be executed, -1 is returned, and errno will be non-zero. For @@ -94,7 +94,7 @@ int bfs_exec(struct bfs_exec *execbuf, const struct BFTW *ftwbuf); /** * Finish executing any commands. * - * @param execbuf + * @execbuf * The parsed exec action. * @return 0 on success, -1 if any errors were encountered. */ diff --git a/src/expr.c b/src/expr.c new file mode 100644 index 0000000..ca37ffc --- /dev/null +++ b/src/expr.c @@ -0,0 +1,89 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "expr.h" + +#include "alloc.h" +#include "ctx.h" +#include "diag.h" +#include "eval.h" +#include "exec.h" +#include "list.h" +#include "printf.h" +#include "xregex.h" + +#include <string.h> + +struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval_fn, size_t argc, char **argv, enum bfs_kind kind) { + bfs_assert(kind != BFS_PATH); + + struct bfs_expr *expr = arena_alloc(&ctx->expr_arena); + if (!expr) { + return NULL; + } + + memset(expr, 0, sizeof(*expr)); + expr->eval_fn = eval_fn; + expr->argc = argc; + expr->argv = argv; + expr->kind = kind; + expr->probability = 0.5; + SLIST_PREPEND(&ctx->expr_list, expr, freelist); + + if (bfs_expr_is_parent(expr)) { + SLIST_INIT(&expr->children); + } + + return expr; +} + +bool bfs_expr_is_parent(const struct bfs_expr *expr) { + return expr->eval_fn == eval_and + || expr->eval_fn == eval_or + || expr->eval_fn == eval_not + || expr->eval_fn == eval_comma; +} + +struct bfs_expr *bfs_expr_children(const struct bfs_expr *expr) { + if (bfs_expr_is_parent(expr)) { + return expr->children.head; + } else { + return NULL; + } +} + +void bfs_expr_append(struct bfs_expr *expr, struct bfs_expr *child) { + bfs_assert(bfs_expr_is_parent(expr)); + + SLIST_APPEND(&expr->children, child); + + if (!child->pure) { + expr->pure = false; + } + + expr->persistent_fds += child->persistent_fds; + if (expr->ephemeral_fds < child->ephemeral_fds) { + expr->ephemeral_fds = child->ephemeral_fds; + } +} + +void bfs_expr_extend(struct bfs_expr *expr, struct bfs_exprs *children) { + drain_slist (struct bfs_expr, child, children) { + bfs_expr_append(expr, child); + } +} + +bool bfs_expr_never_returns(const struct bfs_expr *expr) { + // Expressions that never return are vacuously both always true and always false + return expr->always_true && expr->always_false; +} + +void bfs_expr_clear(struct bfs_expr *expr) { + if (expr->eval_fn == eval_exec) { + bfs_exec_free(expr->exec); + } else if (expr->eval_fn == eval_fprintf) { + bfs_printf_free(expr->printf); + } else if (expr->eval_fn == eval_regex) { + bfs_regfree(expr->regex); + } +} @@ -9,14 +9,37 @@ #define BFS_EXPR_H #include "color.h" -#include "config.h" #include "eval.h" #include "stat.h" -#include <stddef.h> + #include <sys/types.h> #include <time.h> /** + * Argument/token/expression kinds. + */ +enum bfs_kind { + /** A regular argument. */ + BFS_ARG, + + /** A flag (-H, -L, etc.). */ + BFS_FLAG, + + /** A root path. */ + BFS_PATH, + + /** An option (-follow, -mindepth, etc.). */ + BFS_OPTION, + /** A test (-name, -size, etc.). */ + BFS_TEST, + /** An action (-print, -exec, etc.). */ + BFS_ACTION, + + /** An operator (-and, -or, etc.). */ + BFS_OPERATOR, +}; + +/** * Integer comparison modes. */ enum bfs_int_cmp { @@ -75,9 +98,22 @@ enum bfs_size_unit { }; /** + * A linked list of expressions. + */ +struct bfs_exprs { + struct bfs_expr *head; + struct bfs_expr **tail; +}; + +/** * A command line expression. */ struct bfs_expr { + /** This expression's next sibling, if any. */ + struct bfs_expr *next; + /** The next allocated expression. */ + struct { struct bfs_expr *next; } freelist; + /** The function that evaluates this expression. */ bfs_eval_fn *eval_fn; @@ -85,6 +121,8 @@ struct bfs_expr { size_t argc; /** The command line arguments comprising this expression. */ char **argv; + /** The kind of expression this is. */ + enum bfs_kind kind; /** The number of files this expression keeps open between evaluations. */ int persistent_fds; @@ -97,6 +135,8 @@ struct bfs_expr { bool always_true; /** Whether this expression always evaluates to false. */ bool always_false; + /** Whether this expression uses stat(). */ + bool calls_stat; /** Estimated cost. */ float cost; @@ -109,15 +149,10 @@ struct bfs_expr { /** Total time spent running this predicate. */ struct timespec elapsed; - /** Auxilliary data for the evaluation function. */ + /** Auxiliary data for the evaluation function. */ union { /** Child expressions. */ - struct { - /** The left hand side of the expression. */ - struct bfs_expr *lhs; - /** The right hand side of the expression. */ - struct bfs_expr *rhs; - }; + struct bfs_exprs children; /** Integer comparisons. */ struct { @@ -126,21 +161,15 @@ struct bfs_expr { /** The comparison mode. */ enum bfs_int_cmp int_cmp; - /** Optional extra data. */ - union { - /** -size data. */ - enum bfs_size_unit size_unit; - - /** Timestamp comparison data. */ - struct { - /** The stat field to look at. */ - enum bfs_stat_field stat_field; - /** The reference time. */ - struct timespec reftime; - /** The time unit. */ - enum bfs_time_unit time_unit; - }; - }; + /** -size data. */ + enum bfs_size_unit size_unit; + + /** The stat field to look at. */ + enum bfs_stat_field stat_field; + /** The time unit. */ + enum bfs_time_unit time_unit; + /** The reference time. */ + struct timespec reftime; }; /** String comparisons. */ @@ -199,17 +228,34 @@ struct bfs_expr { }; }; +struct bfs_ctx; + /** * Create a new expression. */ -struct bfs_expr *bfs_expr_new(bfs_eval_fn *eval, size_t argc, char **argv); +struct bfs_expr *bfs_expr_new(struct bfs_ctx *ctx, bfs_eval_fn *eval, size_t argc, char **argv, enum bfs_kind kind); /** - * @return Whether the expression has child expressions. + * @return Whether this type of expression has children. */ bool bfs_expr_is_parent(const struct bfs_expr *expr); /** + * @return The first child of this expression, or NULL if it has none. + */ +struct bfs_expr *bfs_expr_children(const struct bfs_expr *expr); + +/** + * Add a child to an expression. + */ +void bfs_expr_append(struct bfs_expr *expr, struct bfs_expr *child); + +/** + * Add a list of children to an expression. + */ +void bfs_expr_extend(struct bfs_expr *expr, struct bfs_exprs *children); + +/** * @return Whether expr is known to always quit. */ bool bfs_expr_never_returns(const struct bfs_expr *expr); @@ -220,8 +266,14 @@ bool bfs_expr_never_returns(const struct bfs_expr *expr); bool bfs_expr_cmp(const struct bfs_expr *expr, long long n); /** - * Free an expression tree. + * Free any resources owned by an expression. + */ +void bfs_expr_clear(struct bfs_expr *expr); + +/** + * Iterate over the children of an expression. */ -void bfs_expr_free(struct bfs_expr *expr); +#define for_expr(child, expr) \ + for (struct bfs_expr *child = bfs_expr_children(expr); child; child = child->next) #endif // BFS_EXPR_H diff --git a/src/fsade.c b/src/fsade.c index c401426..dfdf125 100644 --- a/src/fsade.c +++ b/src/fsade.c @@ -2,13 +2,15 @@ // SPDX-License-Identifier: 0BSD #include "fsade.h" + #include "atomic.h" -#include "config.h" +#include "bfs.h" #include "bfstd.h" #include "bftw.h" #include "dir.h" #include "dstring.h" #include "sanity.h" + #include <errno.h> #include <fcntl.h> #include <stddef.h> @@ -22,23 +24,35 @@ # include <sys/capability.h> #endif -#if BFS_USE_SYS_EXTATTR_H +#if BFS_CAN_CHECK_CONTEXT +# include <selinux/selinux.h> +#endif + +#if __has_include(<sys/extattr.h>) # include <sys/extattr.h> -#elif BFS_USE_SYS_XATTR_H +# define BFS_USE_EXTATTR true +#elif __has_include(<sys/xattr.h>) # include <sys/xattr.h> +# define BFS_USE_XATTR true #endif -#if BFS_CAN_CHECK_ACL || BFS_CAN_CHECK_CAPABILITIES || BFS_CAN_CHECK_XATTRS +#ifndef BFS_USE_EXTATTR +# define BFS_USE_EXTATTR false +#endif +#ifndef BFS_USE_XATTR +# define BFS_USE_XATTR false +#endif /** * Many of the APIs used here don't have *at() variants, but we can try to * emulate something similar if /proc/self/fd is available. */ +_maybe_unused static const char *fake_at(const struct BFTW *ftwbuf) { static atomic int proc_works = -1; - char *path = NULL; - if (ftwbuf->at_fd == AT_FDCWD || load(&proc_works, relaxed) == 0) { + dchar *path = NULL; + if (ftwbuf->at_fd == (int)AT_FDCWD || load(&proc_works, relaxed) == 0) { goto fail; } @@ -67,15 +81,17 @@ fail: return ftwbuf->path; } +_maybe_unused static void free_fake_at(const struct BFTW *ftwbuf, const char *path) { if (path != ftwbuf->path) { - dstrfree((char *)path); + dstrfree((dchar *)path); } } /** * Check if an error was caused by the absence of support or data for a feature. */ +_maybe_unused static bool is_absence_error(int error) { // If the OS doesn't support the feature, it's obviously not enabled for // any files @@ -114,28 +130,73 @@ static bool is_absence_error(int error) { return false; } -#endif // BFS_CAN_CHECK_ACL || BFS_CAN_CHECK_CAPABILITIES || BFS_CAN_CHECK_XATTRS - #if BFS_CAN_CHECK_ACL +#if BFS_HAS_ACL_GET_FILE + +/** Unified interface for incompatible acl_get_entry() implementations. */ +static int bfs_acl_entry(acl_t acl, int which, acl_entry_t *entry) { +#if BFS_HAS_ACL_GET_ENTRY + int ret = acl_get_entry(acl, which, entry); +# if __APPLE__ + // POSIX.1e specifies a return value of 1 for success, but macOS returns 0 instead + return !ret; +# else + return ret; +# endif +#elif __DragonFly__ +# if !defined(ACL_FIRST_ENTRY) && !defined(ACL_NEXT_ENTRY) +# define ACL_FIRST_ENTRY 0 +# define ACL_NEXT_ENTRY 1 +# endif + + switch (which) { + case ACL_FIRST_ENTRY: + *entry = &acl->acl_entry[0]; + break; + case ACL_NEXT_ENTRY: + ++*entry; + break; + default: + errno = EINVAL; + return -1; + } + + acl_entry_t last = &acl->acl_entry[acl->acl_cnt]; + return *entry == last; +#else + errno = ENOTSUP; + return -1; +#endif +} + +/** Unified interface for acl_get_tag_type(). */ +_maybe_unused +static int bfs_acl_tag_type(acl_entry_t entry, acl_tag_t *tag) { +#if BFS_HAS_ACL_GET_TAG_TYPE + return acl_get_tag_type(entry, tag); +#elif __DragonFly__ + *tag = entry->ae_tag; + return 0; +#else + errno = ENOTSUP; + return -1; +#endif +} + /** Check if a POSIX.1e ACL is non-trivial. */ static int bfs_check_posix1e_acl(acl_t acl, bool ignore_required) { int ret = 0; acl_entry_t entry; - for (int status = acl_get_entry(acl, ACL_FIRST_ENTRY, &entry); -#if __APPLE__ - // POSIX.1e specifies a return value of 1 for success, but macOS - // returns 0 instead - status == 0; -#else + for (int status = bfs_acl_entry(acl, ACL_FIRST_ENTRY, &entry); status > 0; -#endif - status = acl_get_entry(acl, ACL_NEXT_ENTRY, &entry)) { + status = bfs_acl_entry(acl, ACL_NEXT_ENTRY, &entry)) + { #if defined(ACL_USER_OBJ) && defined(ACL_GROUP_OBJ) && defined(ACL_OTHER) if (ignore_required) { acl_tag_t tag; - if (acl_get_tag_type(entry, &tag) != 0) { + if (bfs_acl_tag_type(entry, &tag) != 0) { ret = -1; continue; } @@ -159,7 +220,7 @@ static int bfs_check_acl_type(acl_t acl, acl_type_t type) { return bfs_check_posix1e_acl(acl, false); } -#if __FreeBSD__ +#if BFS_HAS_ACL_IS_TRIVIAL_NP int trivial; int ret = acl_is_trivial_np(acl, &trivial); @@ -173,34 +234,40 @@ static int bfs_check_acl_type(acl_t acl, acl_type_t type) { } else { return 1; } -#else // !__FreeBSD__ +#else return bfs_check_posix1e_acl(acl, true); #endif } +#endif // BFS_HAS_ACL_GET_FILE + int bfs_check_acl(const struct BFTW *ftwbuf) { + if (ftwbuf->type == BFS_LNK) { + return 0; + } + + const char *path = fake_at(ftwbuf); + +#if BFS_HAS_ACL_TRIVIAL + int ret = acl_trivial(path); + int error = errno; +#elif BFS_HAS_ACL_GET_FILE static const acl_type_t acl_types[] = { -#if __APPLE__ +# if __APPLE__ // macOS gives EINVAL for either of the two standard ACL types, // supporting only ACL_TYPE_EXTENDED ACL_TYPE_EXTENDED, -#else +# else // The two standard POSIX.1e ACL types ACL_TYPE_ACCESS, ACL_TYPE_DEFAULT, -#endif +# endif -#ifdef ACL_TYPE_NFS4 +# ifdef ACL_TYPE_NFS4 ACL_TYPE_NFS4, -#endif +# endif }; - if (ftwbuf->type == BFS_LNK) { - return 0; - } - - const char *path = fake_at(ftwbuf); - int ret = -1, error = 0; for (size_t i = 0; i < countof(acl_types) && ret <= 0; ++i) { acl_type_t type = acl_types[i]; @@ -224,6 +291,7 @@ int bfs_check_acl(const struct BFTW *ftwbuf) { error = errno; acl_free(acl); } +#endif free_fake_at(ftwbuf, path); errno = error; @@ -287,17 +355,62 @@ int bfs_check_capabilities(const struct BFTW *ftwbuf) { #if BFS_CAN_CHECK_XATTRS +#if BFS_USE_EXTATTR + +/** Wrapper for extattr_list_{file,link}. */ +static ssize_t bfs_extattr_list(const char *path, enum bfs_type type, int namespace) { + if (type == BFS_LNK) { +#if BFS_HAS_EXTATTR_LIST_LINK + return extattr_list_link(path, namespace, NULL, 0); +#elif BFS_HAS_EXTATTR_GET_LINK + return extattr_get_link(path, namespace, "", NULL, 0); +#else + return 0; +#endif + } + +#if BFS_HAS_EXTATTR_LIST_FILE + return extattr_list_file(path, namespace, NULL, 0); +#elif BFS_HAS_EXTATTR_GET_FILE + // From man extattr(2): + // + // In earlier versions of this API, passing an empty string for the + // attribute name to extattr_get_file() would return the list of attributes + // defined for the target object. This interface has been deprecated in + // preference to using the explicit list API, and should not be used. + return extattr_get_file(path, namespace, "", NULL, 0); +#else + return 0; +#endif +} + +/** Wrapper for extattr_get_{file,link}. */ +static ssize_t bfs_extattr_get(const char *path, enum bfs_type type, int namespace, const char *name) { + if (type == BFS_LNK) { +#if BFS_HAS_EXTATTR_GET_LINK + return extattr_get_link(path, namespace, name, NULL, 0); +#else + return 0; +#endif + } + +#if BFS_HAS_EXTATTR_GET_FILE + return extattr_get_file(path, namespace, name, NULL, 0); +#else + return 0; +#endif +} + +#endif // BFS_USE_EXTATTR + int bfs_check_xattrs(const struct BFTW *ftwbuf) { const char *path = fake_at(ftwbuf); ssize_t len; -#if BFS_USE_SYS_EXTATTR_H - ssize_t (*extattr_list)(const char *, int, void*, size_t) = - ftwbuf->type == BFS_LNK ? extattr_list_link : extattr_list_file; - - len = extattr_list(path, EXTATTR_NAMESPACE_SYSTEM, NULL, 0); +#if BFS_USE_EXTATTR + len = bfs_extattr_list(path, ftwbuf->type, EXTATTR_NAMESPACE_SYSTEM); if (len <= 0) { - len = extattr_list(path, EXTATTR_NAMESPACE_USER, NULL, 0); + len = bfs_extattr_list(path, ftwbuf->type, EXTATTR_NAMESPACE_USER); } #elif __APPLE__ int options = ftwbuf->type == BFS_LNK ? XATTR_NOFOLLOW : 0; @@ -330,13 +443,10 @@ int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name) { const char *path = fake_at(ftwbuf); ssize_t len; -#if BFS_USE_SYS_EXTATTR_H - ssize_t (*extattr_get)(const char *, int, const char *, void*, size_t) = - ftwbuf->type == BFS_LNK ? extattr_get_link : extattr_get_file; - - len = extattr_get(path, EXTATTR_NAMESPACE_SYSTEM, name, NULL, 0); +#if BFS_USE_EXTATTR + len = bfs_extattr_get(path, ftwbuf->type, EXTATTR_NAMESPACE_SYSTEM, name); if (len < 0) { - len = extattr_get(path, EXTATTR_NAMESPACE_USER, name, NULL, 0); + len = bfs_extattr_get(path, ftwbuf->type, EXTATTR_NAMESPACE_USER, name); } #elif __APPLE__ int options = ftwbuf->type == BFS_LNK ? XATTR_NOFOLLOW : 0; @@ -378,3 +488,32 @@ int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name) { } #endif + +char *bfs_getfilecon(const struct BFTW *ftwbuf) { +#if BFS_CAN_CHECK_CONTEXT + const char *path = fake_at(ftwbuf); + + char *con; + int ret; + if (ftwbuf->type == BFS_LNK) { + ret = lgetfilecon(path, &con); + } else { + ret = getfilecon(path, &con); + } + + if (ret >= 0) { + return con; + } else { + return NULL; + } +#else + errno = ENOTSUP; + return NULL; +#endif +} + +void bfs_freecon(char *con) { +#if BFS_CAN_CHECK_CONTEXT + freecon(con); +#endif +} diff --git a/src/fsade.h b/src/fsade.h index 0d9ecaf..fbe02d8 100644 --- a/src/fsade.h +++ b/src/fsade.h @@ -9,25 +9,26 @@ #ifndef BFS_FSADE_H #define BFS_FSADE_H -#include "config.h" +#include "bfs.h" -#define BFS_CAN_CHECK_ACL BFS_USE_SYS_ACL_H +#define BFS_CAN_CHECK_ACL (BFS_HAS_ACL_GET_FILE || BFS_HAS_ACL_TRIVIAL) -#if !defined(BFS_CAN_CHECK_CAPABILITIES) && BFS_USE_SYS_CAPABILITY_H && !__FreeBSD__ -# include <sys/capability.h> -# ifdef CAP_CHOWN -# define BFS_CAN_CHECK_CAPABILITIES true -# endif -#endif +#define BFS_CAN_CHECK_CAPABILITIES BFS_WITH_LIBCAP + +#define BFS_CAN_CHECK_CONTEXT BFS_WITH_LIBSELINUX -#define BFS_CAN_CHECK_XATTRS (BFS_USE_SYS_EXTATTR_H || BFS_USE_SYS_XATTR_H) +#if __has_include(<sys/extattr.h>) || __has_include(<sys/xattr.h>) +# define BFS_CAN_CHECK_XATTRS true +#else +# define BFS_CAN_CHECK_XATTRS false +#endif struct BFTW; /** * Check if a file has a non-trivial Access Control List. * - * @param ftwbuf + * @ftwbuf * The file to check. * @return * 1 if it does, 0 if it doesn't, or -1 if an error occurred. @@ -37,7 +38,7 @@ int bfs_check_acl(const struct BFTW *ftwbuf); /** * Check if a file has a non-trivial capability set. * - * @param ftwbuf + * @ftwbuf * The file to check. * @return * 1 if it does, 0 if it doesn't, or -1 if an error occurred. @@ -47,7 +48,7 @@ int bfs_check_capabilities(const struct BFTW *ftwbuf); /** * Check if a file has any extended attributes set. * - * @param ftwbuf + * @ftwbuf * The file to check. * @return * 1 if it does, 0 if it doesn't, or -1 if an error occurred. @@ -57,13 +58,28 @@ int bfs_check_xattrs(const struct BFTW *ftwbuf); /** * Check if a file has an extended attribute with the given name. * - * @param ftwbuf + * @ftwbuf * The file to check. - * @param name + * @name * The name of the xattr to check. * @return * 1 if it does, 0 if it doesn't, or -1 if an error occurred. */ int bfs_check_xattr_named(const struct BFTW *ftwbuf, const char *name); +/** + * Get a file's SELinux context + * + * @ftwbuf + * The file to check. + * @return + * The file's SELinux context, or NULL on failure. + */ +char *bfs_getfilecon(const struct BFTW *ftwbuf); + +/** + * Free a bfs_getfilecon() result. + */ +void bfs_freecon(char *con); + #endif // BFS_FSADE_H @@ -1,20 +1,146 @@ // Copyright © Tavian Barnes <tavianator@tavianator.com> // SPDX-License-Identifier: 0BSD +/** + * An asynchronous I/O queue implementation. + * + * struct ioq is composed of two separate queues: + * + * struct ioqq *pending; // Pending I/O requests + * struct ioqq *ready; // Ready I/O responses + * + * Worker threads pop requests from `pending`, execute them, and push them back + * to the `ready` queue. The main thread pushes requests to `pending` and pops + * them from `ready`. + * + * struct ioqq is a blocking MPMC queue (though it could be SPMC/MPSC for + * pending/ready respectively). It is implemented as a circular buffer: + * + * size_t mask; // (1 << N) - 1 + * [padding] + * size_t head; // Writer index + * [padding] + * size_t tail; // Reader index + * [padding] + * ioq_slot slots[1 << N]; // Queue contents + * + * Pushes are implemented with an unconditional + * + * fetch_add(&ioqq->head, 1) + * + * which scales better on many architectures than compare-and-swap (see [1] for + * details). Pops are implemented similarly. Since the fetch-and-adds are + * unconditional, non-blocking readers can get ahead of writers: + * + * Reader Writer + * ──────────────── ────────────────────── + * head: 0 → 1 + * slots[0]: empty + * tail: 0 → 1 + * slots[0]: empty → full + * head: 1 → 2 + * slots[1]: empty! + * + * To avoid this, non-blocking reads (ioqq_pop(ioqq, false)) must mark the slots + * somehow so that writers can skip them: + * + * Reader Writer + * ─────────────────────── ─────────────────────── + * head: 0 → 1 + * slots[0]: empty → skip + * tail: 0 → 1 + * slots[0]: skip → empty + * tail: 1 → 2 + * slots[1]: empty → full + * head: 1 → 2 + * slots[1]: full → empty + * + * As well, a reader might "lap" a writer (or another reader), so slots need to + * count how many times they should be skipped: + * + * Reader Writer + * ────────────────────────── ───────────────────────── + * head: 0 → 1 + * slots[0]: empty → skip(1) + * head: 1 → 2 + * slots[1]: empty → skip(1) + * ... + * head: M → 0 + * slots[M]: empty → skip(1) + * head: 0 → 1 + * slots[0]: skip(1 → 2) + * tail: 0 → 1 + * slots[0]: skip(2 → 1) + * tail: 1 → 2 + * slots[1]: skip(1) → empty + * ... + * tail: M → 0 + * slots[M]: skip(1) → empty + * tail: 0 → 1 + * slots[0]: skip(1) → empty + * tail: 1 → 2 + * slots[1]: empty → full + * head: 1 → 2 + * slots[1]: full → empty + * + * As described in [1], this approach is susceptible to livelock if readers stay + * ahead of writers. This is okay for us because we don't retry failed non- + * blocking reads. + * + * The slot representation uses tag bits to hold either a pointer or skip(N): + * + * IOQ_SKIP (highest bit) IOQ_BLOCKED (lowest bit) + * ↓ ↓ + * 0 0 0 ... 0 0 0 + * └──────────┬──────────┘ + * │ + * value bits + * + * If IOQ_SKIP is unset, the value bits hold a pointer (or zero/NULL for empty). + * If IOQ_SKIP is set, the value bits hold a negative skip count. Writers can + * reduce the skip count by adding 1 to the value bits, and when the count hits + * zero, the carry will automatically clear IOQ_SKIP: + * + * IOQ_SKIP IOQ_BLOCKED + * ↓ ↓ + * 1 1 1 ... 1 0 0 skip(2) + * 1 1 1 ... 1 1 0 skip(1) + * 0 0 0 ... 0 0 0 empty + * + * The IOQ_BLOCKED flag is used to track sleeping waiters, futex-style. To wait + * for a slot to change, waiters call ioq_slot_wait() which sets IOQ_BLOCKED and + * goes to sleep. Whenever a slot is updated, if the old value had IOQ_BLOCKED + * set, ioq_slot_wake() must be called to wake up that waiter. + * + * Blocking/waking uses a pool of monitors (mutex, condition variable pairs). + * Slots are assigned round-robin to a monitor from the pool. + * + * [1]: https://arxiv.org/abs/2201.02179 + */ + #include "ioq.h" + #include "alloc.h" #include "atomic.h" +#include "bfs.h" #include "bfstd.h" #include "bit.h" -#include "config.h" #include "diag.h" #include "dir.h" +#include "stat.h" #include "thread.h" -#include "sanity.h" -#include <assert.h> + #include <errno.h> +#include <fcntl.h> #include <pthread.h> +#include <stdint.h> #include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> + +#if BFS_WITH_LIBURING +# include <liburing.h> +#endif /** * A monitor for an I/O queue slot. @@ -47,24 +173,17 @@ static void ioq_monitor_destroy(struct ioq_monitor *monitor) { /** A single entry in a command queue. */ typedef atomic uintptr_t ioq_slot; -/** Slot flag bit to indicate waiters. */ +/** Someone might be waiting on this slot. */ #define IOQ_BLOCKED ((uintptr_t)1) -bfs_static_assert(alignof(struct ioq_ent) > 1); -/** Check if a slot has waiters. */ -static bool ioq_slot_blocked(uintptr_t value) { - return value & IOQ_BLOCKED; -} +/** Bit for IOQ_SKIP. */ +#define IOQ_SKIP_BIT (UINTPTR_WIDTH - 1) +/** The next push(es) should skip this slot. */ +#define IOQ_SKIP ((uintptr_t)1 << IOQ_SKIP_BIT) +/** Amount to add for an additional skip. */ +#define IOQ_SKIP_ONE (~IOQ_BLOCKED) -/** Extract the pointer from a slot. */ -static struct ioq_ent *ioq_slot_ptr(uintptr_t value) { - return (struct ioq_ent *)(value & ~IOQ_BLOCKED); -} - -/** Check if a slot is empty. */ -static bool ioq_slot_empty(uintptr_t value) { - return !ioq_slot_ptr(value); -} +static_assert(alignof(struct ioq_ent) >= (1 << 2), "struct ioq_ent is underaligned"); /** * An MPMC queue of I/O commands. @@ -87,19 +206,12 @@ struct ioqq { cache_align ioq_slot slots[]; }; -// If we assign slots sequentially, threads will likely be operating on -// consecutive slots. If these slots are in the same cache line, that will -// result in false sharing. We can mitigate this by assigning slots with a -// stride larger than a cache line e.g. 0, 9, 18, ..., 1, 10, 19, ... -// As long as the stride is relatively prime to circular buffer length, we'll -// still use every available slot. Since the length is a power of two, that -// means the stride must be odd. - -#define IOQ_STRIDE ((FALSE_SHARING_SIZE / sizeof(ioq_slot)) | 1) -bfs_static_assert(IOQ_STRIDE % 2 == 1); - /** Destroy an I/O command queue. */ static void ioqq_destroy(struct ioqq *ioqq) { + if (!ioqq) { + return; + } + for (size_t i = 0; i < ioqq->monitor_mask + 1; ++i) { ioq_monitor_destroy(&ioqq->monitors[i]); } @@ -148,16 +260,45 @@ static struct ioqq *ioqq_create(size_t size) { /** Get the monitor associated with a slot. */ static struct ioq_monitor *ioq_slot_monitor(struct ioqq *ioqq, ioq_slot *slot) { - size_t i = slot - ioqq->slots; + uint32_t i = slot - ioqq->slots; + + // Hash the index to de-correlate waiters + // https://nullprogram.com/blog/2018/07/31/ + // https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1120105785 + i ^= i >> 16; + i *= UINT32_C(0x21f0aaad); + i ^= i >> 15; + i *= UINT32_C(0x735a2d97); + i ^= i >> 15; + return &ioqq->monitors[i & ioqq->monitor_mask]; } /** Atomically wait for a slot to change. */ +_noinline static uintptr_t ioq_slot_wait(struct ioqq *ioqq, ioq_slot *slot, uintptr_t value) { + uintptr_t ret; + + // Try spinning a few times (with exponential backoff) before blocking + _nounroll + for (int i = 1; i < 1024; i *= 2) { + _nounroll + for (int j = 0; j < i; ++j) { + spin_loop(); + } + + // Check if the slot changed + ret = load(slot, relaxed); + if (ret != value) { + return ret; + } + } + + // Nothing changed, start blocking struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot); mutex_lock(&monitor->mutex); - uintptr_t ret = load(slot, relaxed); + ret = load(slot, relaxed); if (ret != value) { goto done; } @@ -182,6 +323,7 @@ done: } /** Wake up any threads waiting on a slot. */ +_noinline static void ioq_slot_wake(struct ioqq *ioqq, ioq_slot *slot) { struct ioq_monitor *monitor = ioq_slot_monitor(ioqq, slot); @@ -201,85 +343,228 @@ static void ioq_slot_wake(struct ioqq *ioqq, ioq_slot *slot) { cond_broadcast(&monitor->cond); } -/** Get the next slot for writing. */ -static ioq_slot *ioqq_write(struct ioqq *ioqq) { - size_t i = fetch_add(&ioqq->head, IOQ_STRIDE, relaxed); - return &ioqq->slots[i & ioqq->slot_mask]; +/** Branch-free ((slot & IOQ_SKIP) ? skip : full) & ~IOQ_BLOCKED */ +static uintptr_t ioq_slot_blend(uintptr_t slot, uintptr_t skip, uintptr_t full) { + uintptr_t mask = -(slot >> IOQ_SKIP_BIT); + uintptr_t ret = (skip & mask) | (full & ~mask); + return ret & ~IOQ_BLOCKED; } /** Push an entry into a slot. */ -static void ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent) { - uintptr_t addr = (uintptr_t)ent; - bfs_assert(!ioq_slot_blocked(addr)); +static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent) { + uintptr_t prev = load(slot, relaxed); + + while (true) { + uintptr_t full = ioq_slot_blend(prev, 0, prev); + if (full) { + // full(ptr) → wait + prev = ioq_slot_wait(ioqq, slot, prev); + continue; + } + + // empty → full(ptr) + uintptr_t next = (uintptr_t)ent >> 1; + // skip(1) → empty + // skip(n) → skip(n - 1) + next = ioq_slot_blend(prev, prev - IOQ_SKIP_ONE, next); + + if (compare_exchange_weak(slot, &prev, next, release, relaxed)) { + break; + } + } + + if (prev & IOQ_BLOCKED) { + ioq_slot_wake(ioqq, slot); + } + return !(prev & IOQ_SKIP); +} + +/** (Try to) pop an entry from a slot. */ +static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) { uintptr_t prev = load(slot, relaxed); - do { - while (!ioq_slot_empty(prev)) { + while (true) { +#if __has_builtin(__builtin_prefetch) + // Optimistically prefetch the pointer in this slot. If this + // slot is not full, this will prefetch an invalid address, but + // experimentally this is worth it on both Intel (Alder Lake) + // and AMD (Zen 2). + __builtin_prefetch((void *)(prev << 1), 1 /* write */); +#endif + + // empty → skip(1) + // skip(n) → skip(n + 1) + // full(ptr) → full(ptr - 1) + uintptr_t next = prev + IOQ_SKIP_ONE; + // full(ptr) → 0 + next = ioq_slot_blend(next, next, 0); + + if (block && next) { prev = ioq_slot_wait(ioqq, slot, prev); + continue; + } + + if (compare_exchange_weak(slot, &prev, next, acquire, relaxed)) { + break; } - } while (!compare_exchange_weak(slot, &prev, addr, release, relaxed)); + } - if (ioq_slot_blocked(prev)) { + if (prev & IOQ_BLOCKED) { ioq_slot_wake(ioqq, slot); } + + // empty → 0 + // skip(n) → 0 + // full(ptr) → ptr + prev = ioq_slot_blend(prev, 0, prev); + return (struct ioq_ent *)(prev << 1); } /** Push an entry onto the queue. */ static void ioqq_push(struct ioqq *ioqq, struct ioq_ent *ent) { - ioq_slot *slot = ioqq_write(ioqq); - ioq_slot_push(ioqq, slot, ent); -} - -/** Get the next slot for reading. */ -static ioq_slot *ioqq_read(struct ioqq *ioqq) { - size_t i = fetch_add(&ioqq->tail, IOQ_STRIDE, relaxed); - return &ioqq->slots[i & ioqq->slot_mask]; + while (true) { + size_t i = fetch_add(&ioqq->head, 1, relaxed); + ioq_slot *slot = &ioqq->slots[i & ioqq->slot_mask]; + if (ioq_slot_push(ioqq, slot, ent)) { + break; + } + } } -/** (Try to) pop an entry from a slot. */ -static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) { - uintptr_t prev = load(slot, relaxed); +/** Push a batch of entries to the queue. */ +static void ioqq_push_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t size) { + size_t mask = ioqq->slot_mask; do { - while (ioq_slot_empty(prev)) { - if (block) { - prev = ioq_slot_wait(ioqq, slot, prev); - } else { - return NULL; + size_t i = fetch_add(&ioqq->head, size, relaxed); + for (size_t j = i + size; i != j; ++i) { + ioq_slot *slot = &ioqq->slots[i & mask]; + if (ioq_slot_push(ioqq, slot, *batch)) { + ++batch; + --size; } } - } while (!compare_exchange_weak(slot, &prev, 0, acquire, relaxed)); + } while (size > 0); +} - if (ioq_slot_blocked(prev)) { - ioq_slot_wake(ioqq, slot); +/** Pop a batch of entries from the queue. */ +static void ioqq_pop_batch(struct ioqq *ioqq, struct ioq_ent *batch[], size_t size, bool block) { + size_t mask = ioqq->slot_mask; + size_t i = fetch_add(&ioqq->tail, size, relaxed); + for (size_t j = i + size; i != j; ++i) { + ioq_slot *slot = &ioqq->slots[i & mask]; + *batch++ = ioq_slot_pop(ioqq, slot, block); + block = false; } +} + +/** Use cache-line-sized batches. */ +#define IOQ_BATCH (FALSE_SHARING_SIZE / sizeof(ioq_slot)) + +/** + * A batch of I/O queue entries. + */ +struct ioq_batch { + /** The start of the batch. */ + size_t head; + /** The end of the batch. */ + size_t tail; + /** The array of entries. */ + struct ioq_ent *entries[IOQ_BATCH]; +}; - return ioq_slot_ptr(prev); +/** Reset a batch. */ +static void ioq_batch_reset(struct ioq_batch *batch) { + batch->head = batch->tail = 0; } -/** Pop an entry from the queue. */ -static struct ioq_ent *ioqq_pop(struct ioqq *ioqq) { - ioq_slot *slot = ioqq_read(ioqq); - return ioq_slot_pop(ioqq, slot, true); +/** Check if a batch is empty. */ +static bool ioq_batch_empty(const struct ioq_batch *batch) { + return batch->head >= batch->tail; +} + +/** Send a batch to a queue. */ +static void ioq_batch_flush(struct ioqq *ioqq, struct ioq_batch *batch) { + if (batch->tail > 0) { + ioqq_push_batch(ioqq, batch->entries, batch->tail); + ioq_batch_reset(batch); + } } -/** Pop an entry from the queue if one is available. */ -static struct ioq_ent *ioqq_trypop(struct ioqq *ioqq) { - size_t i = load(&ioqq->tail, relaxed); - ioq_slot *slot = &ioqq->slots[i & ioqq->slot_mask]; +/** Push an entry to a batch, flushing if necessary. */ +static void ioq_batch_push(struct ioqq *ioqq, struct ioq_batch *batch, struct ioq_ent *ent) { + batch->entries[batch->tail++] = ent; - struct ioq_ent *ret = ioq_slot_pop(ioqq, slot, false); - if (ret) { - size_t j = exchange(&ioqq->tail, i + IOQ_STRIDE, relaxed); - bfs_assert(j == i, "Detected multiple consumers"); - (void)j; + if (batch->tail >= IOQ_BATCH) { + ioq_batch_flush(ioqq, batch); } +} - return ret; +/** Fill a batch from a queue. */ +static bool ioq_batch_fill(struct ioqq *ioqq, struct ioq_batch *batch, bool block) { + ioqq_pop_batch(ioqq, batch->entries, IOQ_BATCH, block); + + ioq_batch_reset(batch); + for (size_t i = 0; i < IOQ_BATCH; ++i) { + struct ioq_ent *ent = batch->entries[i]; + if (ent) { + batch->entries[batch->tail++] = ent; + } + } + + return batch->tail > 0; +} + +/** Pop an entry from a batch, filling it first if necessary. */ +static struct ioq_ent *ioq_batch_pop(struct ioqq *ioqq, struct ioq_batch *batch, bool block) { + if (ioq_batch_empty(batch)) { + // For non-blocking pops, make sure that each ioq_batch_pop() + // corresponds to a single (amortized) increment of ioqq->head. + // Otherwise, we start skipping many slots and batching ends up + // degrading performance. + if (!block && batch->head < IOQ_BATCH) { + ++batch->head; + return NULL; + } + + if (!ioq_batch_fill(ioqq, batch, block)) { + return NULL; + } + } + + return batch->entries[batch->head++]; } /** Sentinel stop command. */ static struct ioq_ent IOQ_STOP; +#if BFS_WITH_LIBURING +/** + * Supported io_uring operations. + */ +enum ioq_ring_ops { + IOQ_RING_OPENAT = 1 << 0, + IOQ_RING_CLOSE = 1 << 1, + IOQ_RING_STATX = 1 << 2, +}; +#endif + +/** I/O queue thread-specific data. */ +struct ioq_thread { + /** The thread handle. */ + pthread_t id; + /** Pointer back to the I/O queue. */ + struct ioq *parent; + +#if BFS_WITH_LIBURING + /** io_uring instance. */ + struct io_uring ring; + /** Any error that occurred initializing the ring. */ + int ring_err; + /** Bitmask of supported io_uring operations. */ + enum ioq_ring_ops ring_ops; +#endif +}; + struct ioq { /** The depth of the queue. */ size_t depth; @@ -290,72 +575,558 @@ struct ioq { /** ioq_ent arena. */ struct arena ents; +#if BFS_WITH_LIBURING && BFS_USE_STATX + /** struct statx arena. */ + struct arena xbufs; +#endif - /** Pending I/O requests. */ + /** Pending I/O request queue. */ struct ioqq *pending; - /** Ready I/O responses. */ + /** Ready I/O response queue. */ struct ioqq *ready; + /** Pending request batch. */ + struct ioq_batch pending_batch; + /** Ready request batch. */ + struct ioq_batch ready_batch; + /** The number of background threads. */ size_t nthreads; /** The background threads themselves. */ - pthread_t threads[]; + struct ioq_thread threads[]; }; -/** Background thread entry point. */ -static void *ioq_work(void *ptr) { - struct ioq *ioq = ptr; +/** Cancel a request if we need to. */ +static bool ioq_check_cancel(struct ioq *ioq, struct ioq_ent *ent) { + if (!load(&ioq->cancel, relaxed)) { + return false; + } - while (true) { - struct ioq_ent *ent = ioqq_pop(ioq->pending); - if (ent == &IOQ_STOP) { - break; - } + // Always close(), even if we're cancelled, just like a real EINTR + if (ent->op == IOQ_CLOSE || ent->op == IOQ_CLOSEDIR) { + return false; + } - bool cancel = load(&ioq->cancel, relaxed); + ent->result = -EINTR; + return true; +} - ent->ret = -1; +/** Dispatch a single request synchronously. */ +static void ioq_dispatch_sync(struct ioq *ioq, struct ioq_ent *ent) { + switch (ent->op) { + case IOQ_NOP: + if (ent->nop.type == IOQ_NOP_HEAVY) { + // A fast, no-op syscall + getppid(); + } + ent->result = 0; + return; - switch (ent->op) { case IOQ_CLOSE: - // Always close(), even if we're cancelled, just like a real EINTR - ent->ret = xclose(ent->close.fd); - break; + ent->result = try(xclose(ent->close.fd)); + return; + + case IOQ_OPENDIR: { + struct ioq_opendir *args = &ent->opendir; + ent->result = try(bfs_opendir(args->dir, args->dfd, args->path, args->flags)); + if (ent->result >= 0) { + bfs_polldir(args->dir); + } + return; + } + + case IOQ_CLOSEDIR: + ent->result = try(bfs_closedir(ent->closedir.dir)); + return; + + case IOQ_STAT: { + struct ioq_stat *args = &ent->stat; + ent->result = try(bfs_stat(args->dfd, args->path, args->flags, args->buf)); + return; + } + } + + bfs_bug("Unknown ioq_op %d", (int)ent->op); + ent->result = -ENOSYS; +} - case IOQ_OPENDIR: - if (!cancel) { - struct ioq_opendir *args = &ent->opendir; - ent->ret = bfs_opendir(args->dir, args->dfd, args->path); - if (ent->ret == 0) { - bfs_polldir(args->dir); - } +#if BFS_WITH_LIBURING + +/** io_uring worker state. */ +struct ioq_ring_state { + /** The I/O queue. */ + struct ioq *ioq; + /** The io_uring. */ + struct io_uring *ring; + /** Supported io_uring operations. */ + enum ioq_ring_ops ops; + /** Number of prepped, unsubmitted SQEs. */ + size_t prepped; + /** Number of submitted, unreaped SQEs. */ + size_t submitted; + /** Whether to stop the loop. */ + bool stop; + /** A batch of ready entries. */ + struct ioq_batch ready; +}; + +/** Reap a single CQE. */ +static void ioq_reap_cqe(struct ioq_ring_state *state, struct io_uring_cqe *cqe) { + struct ioq *ioq = state->ioq; + + struct ioq_ent *ent = io_uring_cqe_get_data(cqe); + ent->result = cqe->res; + + if (ent->result < 0) { + goto push; + } + + switch (ent->op) { + case IOQ_OPENDIR: { + int fd = ent->result; + if (ioq_check_cancel(ioq, ent)) { + xclose(fd); + goto push; + } + + struct ioq_opendir *args = &ent->opendir; + ent->result = try(bfs_opendir(args->dir, fd, NULL, args->flags)); + if (ent->result >= 0) { + // TODO: io_uring_prep_getdents() + bfs_polldir(args->dir); + } else { + xclose(fd); } + break; + } - case IOQ_CLOSEDIR: - ent->ret = bfs_closedir(ent->closedir.dir); +#if BFS_USE_STATX + case IOQ_STAT: { + struct ioq_stat *args = &ent->stat; + ent->result = try(bfs_statx_convert(args->buf, args->xbuf)); break; + } +#endif default: - bfs_bug("Unknown ioq_op %d", (int)ent->op); - errno = ENOSYS; break; + } + +push: + ioq_batch_push(ioq->ready, &state->ready, ent); +} + +/** Wait for submitted requests to complete. */ +static void ioq_ring_drain(struct ioq_ring_state *state, size_t wait_nr) { + struct ioq *ioq = state->ioq; + struct io_uring *ring = state->ring; + + bfs_assert(wait_nr <= state->submitted); + + while (state->submitted > 0) { + struct io_uring_cqe *cqe; + if (wait_nr > 0) { + io_uring_wait_cqes(ring, &cqe, wait_nr, NULL, NULL); + } + + unsigned int head; + size_t seen = 0; + io_uring_for_each_cqe (ring, head, cqe) { + ioq_reap_cqe(state, cqe); + ++seen; + } + + io_uring_cq_advance(ring, seen); + state->submitted -= seen; + + if (seen >= wait_nr) { + break; + } + wait_nr -= seen; + } + + ioq_batch_flush(ioq->ready, &state->ready); +} + +/** Submit prepped SQEs, and wait for some to complete. */ +static void ioq_ring_submit(struct ioq_ring_state *state) { + struct io_uring *ring = state->ring; + + size_t unreaped = state->prepped + state->submitted; + size_t wait_nr = 0; + + if (state->prepped == 0 && unreaped > 0) { + // If we have no new SQEs, wait for at least one old one to + // complete, to avoid livelock + wait_nr = 1; + } + + if (unreaped > ring->sq.ring_entries) { + // Keep the completion queue below half full + wait_nr = unreaped - ring->sq.ring_entries; + } + + // Submit all prepped SQEs + while (state->prepped > 0) { + int ret = io_uring_submit_and_wait(state->ring, wait_nr); + if (ret <= 0) { + continue; } - if (cancel) { - ent->error = EINTR; - } else if (ent->ret < 0) { - ent->error = errno; + state->submitted += ret; + state->prepped -= ret; + if (state->prepped > 0) { + // In the unlikely event of a short submission, any SQE + // links will be broken. Wait for all SQEs to complete + // to preserve any ordering requirements. + ioq_ring_drain(state, state->submitted); + wait_nr = 0; + } + } + + // Drain all the CQEs we waited for (and any others that are ready) + ioq_ring_drain(state, wait_nr); +} + +/** Reserve space for a number of SQEs, submitting if necessary. */ +static void ioq_reserve_sqes(struct ioq_ring_state *state, unsigned int count) { + while (io_uring_sq_space_left(state->ring) < count) { + ioq_ring_submit(state); + } +} + +/** Get an SQE, submitting if necessary. */ +static struct io_uring_sqe *ioq_get_sqe(struct ioq_ring_state *state) { + ioq_reserve_sqes(state, 1); + return io_uring_get_sqe(state->ring); +} + +/** Dispatch a single request asynchronously. */ +static struct io_uring_sqe *ioq_dispatch_async(struct ioq_ring_state *state, struct ioq_ent *ent) { + enum ioq_ring_ops ops = state->ops; + struct io_uring_sqe *sqe = NULL; + + switch (ent->op) { + case IOQ_NOP: + if (ent->nop.type == IOQ_NOP_HEAVY) { + sqe = ioq_get_sqe(state); + io_uring_prep_nop(sqe); + } + return sqe; + + case IOQ_CLOSE: + if (ops & IOQ_RING_CLOSE) { + sqe = ioq_get_sqe(state); + io_uring_prep_close(sqe, ent->close.fd); + } + return sqe; + + case IOQ_OPENDIR: + if (ops & IOQ_RING_OPENAT) { + sqe = ioq_get_sqe(state); + struct ioq_opendir *args = &ent->opendir; + int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY; + io_uring_prep_openat(sqe, args->dfd, args->path, flags, 0); + } + return sqe; + + case IOQ_CLOSEDIR: +#if BFS_USE_UNWRAPDIR + if (ops & IOQ_RING_CLOSE) { + sqe = ioq_get_sqe(state); + io_uring_prep_close(sqe, bfs_unwrapdir(ent->closedir.dir)); + } +#endif + return sqe; + + case IOQ_STAT: +#if BFS_USE_STATX + if (ops & IOQ_RING_STATX) { + sqe = ioq_get_sqe(state); + struct ioq_stat *args = &ent->stat; + int flags = bfs_statx_flags(args->flags); + unsigned int mask = bfs_statx_mask(); + io_uring_prep_statx(sqe, args->dfd, args->path, flags, mask, args->xbuf); + } +#endif + return sqe; + } + + bfs_bug("Unknown ioq_op %d", (int)ent->op); + return NULL; +} + +/** Check if ioq_ring_reap() has work to do. */ +static bool ioq_ring_empty(struct ioq_ring_state *state) { + return !state->prepped && !state->submitted && ioq_batch_empty(&state->ready); +} + +/** Prep a single SQE. */ +static void ioq_prep_sqe(struct ioq_ring_state *state, struct ioq_ent *ent) { + struct ioq *ioq = state->ioq; + if (ioq_check_cancel(ioq, ent)) { + ioq_batch_push(ioq->ready, &state->ready, ent); + return; + } + + struct io_uring_sqe *sqe = ioq_dispatch_async(state, ent); + if (sqe) { + io_uring_sqe_set_data(sqe, ent); + ++state->prepped; + } else { + ioq_dispatch_sync(ioq, ent); + ioq_batch_push(ioq->ready, &state->ready, ent); + } +} + +/** Prep a batch of SQEs. */ +static bool ioq_ring_prep(struct ioq_ring_state *state) { + if (state->stop) { + return false; + } + + struct ioq *ioq = state->ioq; + + struct ioq_batch pending; + ioq_batch_reset(&pending); + + while (true) { + bool block = ioq_ring_empty(state); + struct ioq_ent *ent = ioq_batch_pop(ioq->pending, &pending, block); + if (ent == &IOQ_STOP) { + ioqq_push(ioq->pending, ent); + state->stop = true; + break; + } else if (ent) { + ioq_prep_sqe(state, ent); } else { - ent->error = 0; + break; + } + } + + bfs_assert(ioq_batch_empty(&pending)); + return !ioq_ring_empty(state); +} + +/** io_uring worker loop. */ +static int ioq_ring_work(struct ioq_thread *thread) { + struct io_uring *ring = &thread->ring; + +#ifdef IORING_SETUP_R_DISABLED + if (ring->flags & IORING_SETUP_R_DISABLED) { + if (io_uring_enable_rings(ring) != 0) { + return -1; + } + } +#endif + + struct ioq_ring_state state = { + .ioq = thread->parent, + .ring = ring, + .ops = thread->ring_ops, + }; + + while (ioq_ring_prep(&state)) { + ioq_ring_submit(&state); + } + + ioq_ring_drain(&state, state.submitted); + return 0; +} + +#endif // BFS_WITH_LIBURING + +/** Synchronous syscall loop. */ +static void ioq_sync_work(struct ioq_thread *thread) { + struct ioq *ioq = thread->parent; + + struct ioq_batch pending, ready; + ioq_batch_reset(&pending); + ioq_batch_reset(&ready); + + while (true) { + if (ioq_batch_empty(&pending)) { + ioq_batch_flush(ioq->ready, &ready); + } + + struct ioq_ent *ent = ioq_batch_pop(ioq->pending, &pending, true); + if (ent == &IOQ_STOP) { + ioqq_push(ioq->pending, ent); + break; + } + + if (!ioq_check_cancel(ioq, ent)) { + ioq_dispatch_sync(ioq, ent); } + ioq_batch_push(ioq->ready, &ready, ent); + } + + bfs_assert(ioq_batch_empty(&pending)); + ioq_batch_flush(ioq->ready, &ready); +} - ioqq_push(ioq->ready, ent); +/** Background thread entry point. */ +static void *ioq_work(void *ptr) { + struct ioq_thread *thread = ptr; + +#if BFS_WITH_LIBURING + if (thread->ring_err == 0) { + if (ioq_ring_work(thread) == 0) { + return NULL; + } } +#endif + ioq_sync_work(thread); return NULL; } +#if BFS_WITH_LIBURING +/** Test whether some io_uring setup flags are supported. */ +static bool ioq_ring_probe_flags(struct io_uring_params *params, unsigned int flags) { + unsigned int saved = params->flags; + params->flags |= flags; + + struct io_uring ring; + int ret = io_uring_queue_init_params(2, &ring, params); + if (ret == 0) { + io_uring_queue_exit(&ring); + } + + if (ret == -EINVAL) { + params->flags = saved; + return false; + } + + return true; +} +#endif + +/** Initialize io_uring thread state. */ +static int ioq_ring_init(struct ioq *ioq, struct ioq_thread *thread) { +#if BFS_WITH_LIBURING + struct ioq_thread *prev = NULL; + if (thread > ioq->threads) { + prev = thread - 1; + } + + if (prev && prev->ring_err) { + thread->ring_err = prev->ring_err; + return -1; + } + + struct io_uring_params params = {0}; + + if (prev) { + // Share io-wq workers between rings + params.flags = prev->ring.flags | IORING_SETUP_ATTACH_WQ; + params.wq_fd = prev->ring.ring_fd; + } else { +#ifdef IORING_SETUP_SUBMIT_ALL + // Don't abort submission just because an inline request fails + ioq_ring_probe_flags(¶ms, IORING_SETUP_SUBMIT_ALL); +#endif + +#ifdef IORING_SETUP_R_DISABLED + // Don't enable the ring yet (needed for SINGLE_ISSUER) + if (ioq_ring_probe_flags(¶ms, IORING_SETUP_R_DISABLED)) { +# ifdef IORING_SETUP_SINGLE_ISSUER + // Allow optimizations assuming only one task submits SQEs + ioq_ring_probe_flags(¶ms, IORING_SETUP_SINGLE_ISSUER); +# endif +# ifdef IORING_SETUP_DEFER_TASKRUN + // Don't interrupt us aggressively with completion events + ioq_ring_probe_flags(¶ms, IORING_SETUP_DEFER_TASKRUN); +# endif + } +#endif + } + + // Use a page for each SQE ring + size_t entries = 4096 / sizeof(struct io_uring_sqe); + thread->ring_err = -io_uring_queue_init_params(entries, &thread->ring, ¶ms); + if (thread->ring_err) { + return -1; + } + + if (prev) { + // Initial setup already complete + thread->ring_ops = prev->ring_ops; + return 0; + } + + // Check for supported operations + struct io_uring_probe *probe = io_uring_get_probe_ring(&thread->ring); + if (probe) { + if (io_uring_opcode_supported(probe, IORING_OP_OPENAT)) { + thread->ring_ops |= IOQ_RING_OPENAT; + } + if (io_uring_opcode_supported(probe, IORING_OP_CLOSE)) { + thread->ring_ops |= IOQ_RING_CLOSE; + } +#if BFS_USE_STATX + if (io_uring_opcode_supported(probe, IORING_OP_STATX)) { + thread->ring_ops |= IOQ_RING_STATX; + } +#endif + io_uring_free_probe(probe); + } + if (!thread->ring_ops) { + io_uring_queue_exit(&thread->ring); + thread->ring_err = ENOTSUP; + return -1; + } + +#if BFS_HAS_IO_URING_MAX_WORKERS + // Limit the number of io_uring workers + unsigned int values[] = { + ioq->nthreads, // [IO_WQ_BOUND] + 0, // [IO_WQ_UNBOUND] + }; + io_uring_register_iowq_max_workers(&thread->ring, values); +#endif + +#endif // BFS_WITH_LIBURING + + return 0; +} + +/** Destroy an io_uring. */ +static void ioq_ring_exit(struct ioq_thread *thread) { +#if BFS_WITH_LIBURING + if (thread->ring_err == 0) { + io_uring_queue_exit(&thread->ring); + } +#endif +} + +/** Create an I/O queue thread. */ +static int ioq_thread_create(struct ioq *ioq, size_t i) { + struct ioq_thread *thread = &ioq->threads[i]; + thread->parent = ioq; + + ioq_ring_init(ioq, thread); + + if (thread_create(&thread->id, NULL, ioq_work, thread) != 0) { + ioq_ring_exit(thread); + return -1; + } + + char name[16]; + if (snprintf(name, sizeof(name), "ioq-%zu", i) >= 0) { + thread_setname(thread->id, name); + } + + return 0; +} + +/** Join an I/O queue thread. */ +static void ioq_thread_join(struct ioq_thread *thread) { + thread_join(thread->id, NULL); + ioq_ring_exit(thread); +} + struct ioq *ioq_create(size_t depth, size_t nthreads) { struct ioq *ioq = ZALLOC_FLEX(struct ioq, threads, nthreads); if (!ioq) { @@ -363,7 +1134,11 @@ struct ioq *ioq_create(size_t depth, size_t nthreads) { } ioq->depth = depth; + ARENA_INIT(&ioq->ents, struct ioq_ent); +#if BFS_WITH_LIBURING && BFS_USE_STATX + ARENA_INIT(&ioq->xbufs, struct statx); +#endif ioq->pending = ioqq_create(depth); if (!ioq->pending) { @@ -375,11 +1150,12 @@ struct ioq *ioq_create(size_t depth, size_t nthreads) { goto fail; } + ioq->nthreads = nthreads; for (size_t i = 0; i < nthreads; ++i) { - if (thread_create(&ioq->threads[i], NULL, ioq_work, ioq) != 0) { + if (ioq_thread_create(ioq, i) != 0) { + ioq->nthreads = i; goto fail; } - ++ioq->nthreads; } return ioq; @@ -418,6 +1194,18 @@ static struct ioq_ent *ioq_request(struct ioq *ioq, enum ioq_op op, void *ptr) { return ent; } +int ioq_nop(struct ioq *ioq, enum ioq_nop_type type, void *ptr) { + struct ioq_ent *ent = ioq_request(ioq, IOQ_NOP, ptr); + if (!ent) { + return -1; + } + + ent->nop.type = type; + + ioq_batch_push(ioq->pending, &ioq->pending_batch, ent); + return 0; +} + int ioq_close(struct ioq *ioq, int fd, void *ptr) { struct ioq_ent *ent = ioq_request(ioq, IOQ_CLOSE, ptr); if (!ent) { @@ -426,11 +1214,11 @@ int ioq_close(struct ioq *ioq, int fd, void *ptr) { ent->close.fd = fd; - ioqq_push(ioq->pending, ent); + ioq_batch_push(ioq->pending, &ioq->pending_batch, ent); return 0; } -int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, void *ptr) { +int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, enum bfs_dir_flags flags, void *ptr) { struct ioq_ent *ent = ioq_request(ioq, IOQ_OPENDIR, ptr); if (!ent) { return -1; @@ -440,8 +1228,9 @@ int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, args->dir = dir; args->dfd = dfd; args->path = path; + args->flags = flags; - ioqq_push(ioq->pending, ent); + ioq_batch_push(ioq->pending, &ioq->pending_batch, ent); return 0; } @@ -453,38 +1242,66 @@ int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr) { ent->closedir.dir = dir; - ioqq_push(ioq->pending, ent); + ioq_batch_push(ioq->pending, &ioq->pending_batch, ent); return 0; } -struct ioq_ent *ioq_pop(struct ioq *ioq) { - if (ioq->size == 0) { - return NULL; +int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags flags, struct bfs_stat *buf, void *ptr) { + struct ioq_ent *ent = ioq_request(ioq, IOQ_STAT, ptr); + if (!ent) { + return -1; + } + + struct ioq_stat *args = &ent->stat; + args->dfd = dfd; + args->path = path; + args->flags = flags; + args->buf = buf; + +#if BFS_WITH_LIBURING && BFS_USE_STATX + args->xbuf = arena_alloc(&ioq->xbufs); + if (!args->xbuf) { + ioq_free(ioq, ent); + return -1; } +#endif - return ioqq_pop(ioq->ready); + ioq_batch_push(ioq->pending, &ioq->pending_batch, ent); + return 0; } -struct ioq_ent *ioq_trypop(struct ioq *ioq) { +void ioq_submit(struct ioq *ioq) { + ioq_batch_flush(ioq->pending, &ioq->pending_batch); +} + +struct ioq_ent *ioq_pop(struct ioq *ioq, bool block) { + // Don't forget to submit before popping + bfs_assert(ioq_batch_empty(&ioq->pending_batch)); + if (ioq->size == 0) { return NULL; } - return ioqq_trypop(ioq->ready); + return ioq_batch_pop(ioq->ready, &ioq->ready_batch, block); } void ioq_free(struct ioq *ioq, struct ioq_ent *ent) { bfs_assert(ioq->size > 0); --ioq->size; +#if BFS_WITH_LIBURING && BFS_USE_STATX + if (ent->op == IOQ_STAT && ent->stat.xbuf) { + arena_free(&ioq->xbufs, ent->stat.xbuf); + } +#endif + arena_free(&ioq->ents, ent); } void ioq_cancel(struct ioq *ioq) { if (!exchange(&ioq->cancel, true, relaxed)) { - for (size_t i = 0; i < ioq->nthreads; ++i) { - ioqq_push(ioq->pending, &IOQ_STOP); - } + ioq_batch_push(ioq->pending, &ioq->pending_batch, &IOQ_STOP); + ioq_submit(ioq); } } @@ -493,15 +1310,20 @@ void ioq_destroy(struct ioq *ioq) { return; } - ioq_cancel(ioq); + if (ioq->nthreads > 0) { + ioq_cancel(ioq); + } for (size_t i = 0; i < ioq->nthreads; ++i) { - thread_join(ioq->threads[i], NULL); + ioq_thread_join(&ioq->threads[i]); } ioqq_destroy(ioq->ready); ioqq_destroy(ioq->pending); +#if BFS_WITH_LIBURING && BFS_USE_STATX + arena_destroy(&ioq->xbufs); +#endif arena_destroy(&ioq->ents); free(ioq); @@ -8,6 +8,10 @@ #ifndef BFS_IOQ_H #define BFS_IOQ_H +#include "bfs.h" +#include "dir.h" +#include "stat.h" + #include <stddef.h> /** @@ -19,12 +23,26 @@ struct ioq; * I/O queue operations. */ enum ioq_op { + /** ioq_nop(). */ + IOQ_NOP, /** ioq_close(). */ IOQ_CLOSE, /** ioq_opendir(). */ IOQ_OPENDIR, /** ioq_closedir(). */ IOQ_CLOSEDIR, + /** ioq_stat(). */ + IOQ_STAT, +}; + +/** + * ioq_nop() types. + */ +enum ioq_nop_type { + /** A lightweight nop that avoids syscalls. */ + IOQ_NOP_LIGHT, + /** A heavyweight nop that involves a syscall. */ + IOQ_NOP_HEAVY, }; /** @@ -32,18 +50,20 @@ enum ioq_op { */ struct ioq_ent { /** The I/O operation. */ - enum ioq_op op; + cache_align enum ioq_op op; - /** The return value of the operation. */ - int ret; - /** The error code, if the operation failed. */ - int error; + /** The return value (on success) or negative error code (on failure). */ + int result; /** Arbitrary user data. */ void *ptr; /** Operation-specific arguments. */ union { + /** ioq_nop() args. */ + struct ioq_nop { + enum ioq_nop_type type; + } nop; /** ioq_close() args. */ struct ioq_close { int fd; @@ -51,22 +71,31 @@ struct ioq_ent { /** ioq_opendir() args. */ struct ioq_opendir { struct bfs_dir *dir; - int dfd; const char *path; + int dfd; + enum bfs_dir_flags flags; } opendir; /** ioq_closedir() args. */ struct ioq_closedir { struct bfs_dir *dir; } closedir; + /** ioq_stat() args. */ + struct ioq_stat { + const char *path; + struct bfs_stat *buf; + void *xbuf; + int dfd; + enum bfs_stat_flags flags; + } stat; }; }; /** * Create an I/O queue. * - * @param depth + * @depth * The maximum depth of the queue. - * @param nthreads + * @nthreads * The maximum number of background threads. * @return * The new I/O queue, or NULL on failure. @@ -79,13 +108,27 @@ struct ioq *ioq_create(size_t depth, size_t nthreads); size_t ioq_capacity(const struct ioq *ioq); /** + * A no-op, for benchmarking. + * + * @ioq + * The I/O queue. + * @type + * The type of operation to perform. + * @ptr + * An arbitrary pointer to associate with the request. + * @return + * 0 on success, or -1 on failure. + */ +int ioq_nop(struct ioq *ioq, enum ioq_nop_type type, void *ptr); + +/** * Asynchronous close(). * - * @param ioq + * @ioq * The I/O queue. - * @param fd + * @fd * The fd to close. - * @param ptr + * @ptr * An arbitrary pointer to associate with the request. * @return * 0 on success, or -1 on failure. @@ -95,29 +138,31 @@ int ioq_close(struct ioq *ioq, int fd, void *ptr); /** * Asynchronous bfs_opendir(). * - * @param ioq + * @ioq * The I/O queue. - * @param dir + * @dir * The allocated directory. - * @param dfd + * @dfd * The base file descriptor. - * @param path + * @path * The path to open, relative to dfd. - * @param ptr + * @flags + * Flags that control which directory entries are listed. + * @ptr * An arbitrary pointer to associate with the request. * @return * 0 on success, or -1 on failure. */ -int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, void *ptr); +int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, enum bfs_dir_flags flags, void *ptr); /** * Asynchronous bfs_closedir(). * - * @param ioq + * @ioq * The I/O queue. - * @param dir + * @dir * The directory to close. - * @param ptr + * @ptr * An arbitrary pointer to associate with the request. * @return * 0 on success, or -1 on failure. @@ -125,31 +170,46 @@ int ioq_opendir(struct ioq *ioq, struct bfs_dir *dir, int dfd, const char *path, int ioq_closedir(struct ioq *ioq, struct bfs_dir *dir, void *ptr); /** - * Pop a response from the queue. + * Asynchronous bfs_stat(). * - * @param ioq + * @ioq * The I/O queue. + * @dfd + * The base file descriptor. + * @path + * The path to stat, relative to dfd. + * @flags + * Flags that affect the lookup. + * @buf + * A place to store the stat buffer, if successful. + * @ptr + * An arbitrary pointer to associate with the request. * @return - * The next response, or NULL. + * 0 on success, or -1 on failure. + */ +int ioq_stat(struct ioq *ioq, int dfd, const char *path, enum bfs_stat_flags flags, struct bfs_stat *buf, void *ptr); + +/** + * Submit any buffered requests. */ -struct ioq_ent *ioq_pop(struct ioq *ioq); +void ioq_submit(struct ioq *ioq); /** - * Pop a response from the queue, without blocking. + * Pop a response from the queue. * - * @param ioq + * @ioq * The I/O queue. * @return * The next response, or NULL. */ -struct ioq_ent *ioq_trypop(struct ioq *ioq); +struct ioq_ent *ioq_pop(struct ioq *ioq, bool block); /** * Free a queue entry. * - * @param ioq + * @ioq * The I/O queue. - * @param ent + * @ent * The entry to free. */ void ioq_free(struct ioq *ioq, struct ioq_ent *ent); @@ -21,6 +21,7 @@ * SLIST_INIT(&list); * * struct item item; + * SLIST_ITEM_INIT(&item); * SLIST_APPEND(&list, &item); * * Doubly linked lists are similar: @@ -39,6 +40,7 @@ * LIST_INIT(&list); * * struct item item; + * LIST_ITEM_INIT(&item); * LIST_APPEND(&list, &item); * * Items can be on multiple lists at once: @@ -71,20 +73,24 @@ * LIST_INIT(&items.cache); * * struct item item; + * SLIST_ITEM_INIT(&item, chain); * SLIST_APPEND(&items.queue, &item, chain); + * LIST_ITEM_INIT(&item, lru); * LIST_APPEND(&items.cache, &item, lru); */ #ifndef BFS_LIST_H #define BFS_LIST_H +#include "diag.h" + #include <stddef.h> #include <string.h> /** * Initialize a singly-linked list. * - * @param list + * @list * The list to initialize. * * --- @@ -94,56 +100,47 @@ * don't have to. */ #define SLIST_INIT(list) \ - LIST_BLOCK_(SLIST_INIT_((list))) + SLIST_INIT_((list)) -#define SLIST_INIT_(list) \ - list->head = NULL; \ - list->tail = &list->head; +/** + * Helper for SLIST_INIT(). + */ +#define SLIST_INIT_(list) LIST_VOID_( \ + list->head = NULL, \ + list->tail = &list->head) /** - * Wraps a group of statements in a block. + * Cast a list of expressions to void. */ -#define LIST_BLOCK_(block) do { block } while (0) +#define LIST_VOID_(...) ((void)(__VA_ARGS__)) /** - * Insert an item into a singly-linked list. + * Initialize a singly-linked list item. * - * @param list - * The list to modify. - * @param cursor - * A pointer to the item to insert after, e.g. &list->head or list->tail. - * @param item - * The item to insert. - * @param node (optional) + * @item + * The item to initialize. + * @node (optional) * If specified, use item->node.next rather than item->next. * * --- * * We play some tricks with variadic macros to handle the optional parameter: * - * SLIST_INSERT(list, cursor, item) => { - * item->next = *cursor; - * *cursor = item; - * list->tail = item->next ? list->tail : &item->next; - * } - * - * SLIST_INSERT(list, cursor, item, node) => { - * item->node.next = *cursor; - * *cursor = item; - * list->tail = item->node.next ? list->tail : &item->node.next; - * } + * SLIST_ITEM_INIT(item) => item->next = NULL + * SLIST_ITEM_INIT(item, node) => item->node.next = NULL * * The first trick is that * - * #define SLIST_INSERT(list, item, cursor, ...) + * #define SLIST_ITEM_INIT(item, ...) * * won't work because both commas are required (until C23; see N3033). As a * workaround, we dispatch to another macro and add a trailing comma. * - * SLIST_INSERT(list, cursor, item) => SLIST_INSERT_(list, cursor, item, ) - * SLIST_INSERT(list, cursor, item, node) => SLIST_INSERT_(list, cursor, item, node, ) + * SLIST_ITEM_INIT(item) => SLIST_ITEM_INIT_(item, ) + * SLIST_ITEM_INIT(item, node) => SLIST_ITEM_INIT_(item, node, ) */ -#define SLIST_INSERT(list, cursor, ...) SLIST_INSERT_(list, cursor, __VA_ARGS__, ) +#define SLIST_ITEM_INIT(...) \ + SLIST_ITEM_INIT_(__VA_ARGS__, ) /** * Now we need a way to generate either ->next or ->node.next depending on @@ -160,7 +157,8 @@ * LIST_NEXT_() => LIST_NODE_(next, ) * LIST_NEXT_(node, ) => LIST_NODE_(next, node, ) */ -#define LIST_NEXT_(...) LIST_NODE_(next, __VA_ARGS__) +#define LIST_NEXT_(...) \ + LIST_NODE_(next, __VA_ARGS__) /** * LIST_NODE_() dispatches to yet another macro: @@ -168,7 +166,8 @@ * LIST_NODE_(next, ) => LIST_NODE__(next, , . , , ) * LIST_NODE_(next, node, ) => LIST_NODE__(next, node, , . , , ) */ -#define LIST_NODE_(dir, ...) LIST_NODE__(dir, __VA_ARGS__, . , , ) +#define LIST_NODE_(dir, ...) \ + LIST_NODE__(dir, __VA_ARGS__, . , , ) /** * And finally, LIST_NODE__() adds the node and the dot if necessary. @@ -180,217 +179,435 @@ * ^ ^ ^ ^ * dir node ignored dot */ -#define LIST_NODE__(dir, node, ignored, dot, ...) node dot dir +#define LIST_NODE__(dir, node, ignored, dot, ...) \ + node dot dir /** - * SLIST_INSERT_() uses LIST_NEXT_() to generate the right name for the list + * SLIST_ITEM_INIT_() uses LIST_NEXT_() to generate the right name for the list * node, and finally delegates to the actual implementation. */ +#define SLIST_ITEM_INIT_(item, ...) \ + SLIST_ITEM_INIT__((item), LIST_NEXT_(__VA_ARGS__)) + +#define SLIST_ITEM_INIT__(item, next) \ + LIST_VOID_(item->next = NULL) + +/** + * Type-checking macro for singly-linked lists. + */ +#define SLIST_CHECK_(list) \ + (void)sizeof(list->tail - &list->head) + +/** + * Get the head of a singly-linked list. + * + * @list + * The list in question. + * @return + * The first item in the list. + */ +#define SLIST_HEAD(list) \ + SLIST_HEAD_((list)) + +#define SLIST_HEAD_(list) \ + (SLIST_CHECK_(list), list->head) + +/** + * Check if a singly-linked list is empty. + */ +#define SLIST_EMPTY(list) \ + (!SLIST_HEAD(list)) + +/** + * Like container_of(), but using the head pointer instead of offsetof() since + * we don't have the type around. + */ +#define SLIST_CONTAINER_(tail, head, next) \ + (void *)((char *)tail - ((char *)&head->next - (char *)head)) + +/** + * Get the tail of a singly-linked list. + * + * @list + * The list in question. + * @node (optional) + * If specified, use item->node.next rather than item->next. + * @return + * The last item in the list. + */ +#define SLIST_TAIL(...) \ + SLIST_TAIL_(__VA_ARGS__, ) + +#define SLIST_TAIL_(list, ...) \ + SLIST_TAIL__((list), LIST_NEXT_(__VA_ARGS__)) + +#define SLIST_TAIL__(list, next) \ + (list->head ? SLIST_CONTAINER_(list->tail, list->head, next) : NULL) + +/** + * Check if an item is attached to a singly-linked list. + * + * @list + * The list to check. + * @item + * The item to check. + * @node (optional) + * If specified, use item->node.next rather than item->next. + * @return + * Whether the item is attached to the list. + */ +#define SLIST_ATTACHED(list, ...) \ + SLIST_ATTACHED_(list, __VA_ARGS__, ) + +#define SLIST_ATTACHED_(list, item, ...) \ + SLIST_ATTACHED__((list), (item), LIST_NEXT_(__VA_ARGS__)) + +#define SLIST_ATTACHED__(list, item, next) \ + (item->next || list->tail == &item->next) + +/** + * Insert an item into a singly-linked list. + * + * @list + * The list to modify. + * @cursor + * A pointer to the item to insert after, e.g. &list->head or list->tail. + * @item + * The item to insert. + * @node (optional) + * If specified, use item->node.next rather than item->next. + * @return + * A cursor for the next item. + */ +#define SLIST_INSERT(list, cursor, ...) \ + SLIST_INSERT_(list, cursor, __VA_ARGS__, ) + #define SLIST_INSERT_(list, cursor, item, ...) \ - LIST_BLOCK_(SLIST_INSERT__((list), (cursor), (item), LIST_NEXT_(__VA_ARGS__))) + SLIST_INSERT__((list), (cursor), (item), LIST_NEXT_(__VA_ARGS__)) #define SLIST_INSERT__(list, cursor, item, next) \ - item->next = *cursor; \ - *cursor = item; \ - list->tail = item->next ? list->tail : &item->next; + (bfs_assert(!SLIST_ATTACHED__(list, item, next)), \ + item->next = *cursor, \ + *cursor = item, \ + list->tail = item->next ? list->tail : &item->next, \ + &item->next) /** * Add an item to the tail of a singly-linked list. * - * @param list + * @list * The list to modify. - * @param item + * @item * The item to append. - * @param node (optional) + * @node (optional) * If specified, use item->node.next rather than item->next. */ -#define SLIST_APPEND(list, ...) SLIST_APPEND_(list, __VA_ARGS__, ) +#define SLIST_APPEND(list, ...) \ + SLIST_APPEND_(list, __VA_ARGS__, ) #define SLIST_APPEND_(list, item, ...) \ - SLIST_INSERT_(list, (list)->tail, item, __VA_ARGS__) + LIST_VOID_(SLIST_INSERT_(list, (list)->tail, item, __VA_ARGS__)) /** * Add an item to the head of a singly-linked list. * - * @param list + * @list * The list to modify. - * @param item + * @item * The item to prepend. - * @param node (optional) + * @node (optional) * If specified, use item->node.next rather than item->next. */ -#define SLIST_PREPEND(list, ...) SLIST_PREPEND_(list, __VA_ARGS__, ) +#define SLIST_PREPEND(list, ...) \ + SLIST_PREPEND_(list, __VA_ARGS__, ) #define SLIST_PREPEND_(list, item, ...) \ - SLIST_INSERT_(list, &(list)->head, item, __VA_ARGS__) + LIST_VOID_(SLIST_INSERT_(list, &(list)->head, item, __VA_ARGS__)) + +/** + * Splice a singly-linked list into another. + * + * @dest + * The destination list. + * @cursor + * A pointer to the item to splice after, e.g. &list->head or list->tail. + * @src + * The source list. + */ +#define SLIST_SPLICE(dest, cursor, src) \ + LIST_VOID_(SLIST_SPLICE_((dest), (cursor), (src))) + +#define SLIST_SPLICE_(dest, cursor, src) \ + *src->tail = *cursor, \ + *cursor = src->head, \ + dest->tail = *dest->tail ? src->tail : dest->tail, \ + SLIST_INIT(src) /** * Add an entire singly-linked list to the tail of another. * - * @param dest + * @dest * The destination list. - * @param src + * @src * The source list. */ #define SLIST_EXTEND(dest, src) \ - LIST_BLOCK_(SLIST_EXTEND_((dest), (src))) - -#define SLIST_EXTEND_(dest, src) \ - if (src->head) { \ - *dest->tail = src->head; \ - dest->tail = src->tail; \ - SLIST_INIT(src); \ - } + SLIST_SPLICE(dest, (dest)->tail, src) /** * Remove an item from a singly-linked list. * - * @param list + * @list * The list to modify. - * @param cursor + * @cursor * A pointer to the item to remove, either &list->head or &prev->next. - * @param node (optional) + * @node (optional) * If specified, use item->node.next rather than item->next. * @return * The removed item. */ -#define SLIST_REMOVE(list, ...) SLIST_REMOVE_(list, __VA_ARGS__, ) +#define SLIST_REMOVE(list, ...) \ + SLIST_REMOVE_(list, __VA_ARGS__, ) #define SLIST_REMOVE_(list, cursor, ...) \ SLIST_REMOVE__((list), (cursor), LIST_NEXT_(__VA_ARGS__)) #define SLIST_REMOVE__(list, cursor, next) \ (list->tail = (*cursor)->next ? list->tail : cursor, \ - slist_remove_impl(*cursor, cursor, &(*cursor)->next, list->tail, sizeof(*cursor))) + slist_remove_(*cursor, cursor, &(*cursor)->next, sizeof(*cursor))) // Helper for SLIST_REMOVE() -static inline void *slist_remove_impl(void *ret, void *cursor, void *next, void *tail, size_t size) { +static inline void *slist_remove_(void *ret, void *cursor, void *next, size_t size) { // ret = *cursor; // *cursor = ret->next; memcpy(cursor, next, size); - // ret->next = *list->tail; (NULL) - memcpy(next, tail, size); + // ret->next = NULL; + memset(next, 0, size); return ret; } /** * Pop the head off a singly-linked list. * - * @param list + * @list * The list to modify. - * @param node (optional) + * @node (optional) * If specified, use head->node.next rather than head->next. * @return * The popped item, or NULL if the list was empty. */ -#define SLIST_POP(...) SLIST_POP_(__VA_ARGS__, ) +#define SLIST_POP(...) \ + SLIST_POP_(__VA_ARGS__, ) #define SLIST_POP_(list, ...) \ - ((list)->head ? SLIST_REMOVE_(list, &(list)->head, __VA_ARGS__) : NULL) + SLIST_POP__((list), __VA_ARGS__) + +#define SLIST_POP__(list, ...) \ + (list->head ? SLIST_REMOVE_(list, &list->head, __VA_ARGS__) : NULL) + +/** + * Loop over the items in a singly-linked list. + * + * @type + * The list item type. + * @item + * The induction variable name. + * @list + * The list to iterate. + * @node (optional) + * If specified, use head->node.next rather than head->next. + */ +#define for_slist(type, item, ...) \ + for_slist_(type, item, __VA_ARGS__, ) + +#define for_slist_(type, item, list, ...) \ + for_slist__(type, item, (list), LIST_NEXT_(__VA_ARGS__)) + +#define for_slist__(type, item, list, next) \ + for (type *item = list->head, *_next; \ + item && (SLIST_CHECK_(list), _next = item->next, true); \ + item = _next) + +/** + * Loop over a singly-linked list, popping each item. + * + * @type + * The list item type. + * @item + * The induction variable name. + * @list + * The list to drain. + * @node (optional) + * If specified, use head->node.next rather than head->next. + */ +#define drain_slist(type, item, ...) \ + for (type *item; (item = SLIST_POP(__VA_ARGS__));) /** * Initialize a doubly-linked list. * - * @param list + * @list * The list to initialize. */ #define LIST_INIT(list) \ - LIST_BLOCK_(LIST_INIT_((list))) + LIST_INIT_((list)) #define LIST_INIT_(list) \ - list->head = list->tail = NULL; + LIST_VOID_(list->head = list->tail = NULL) /** * LIST_PREV_() => prev * LIST_PREV_(node, ) => node.prev */ -#define LIST_PREV_(...) LIST_NODE_(prev, __VA_ARGS__) +#define LIST_PREV_(...) \ + LIST_NODE_(prev, __VA_ARGS__) + +/** + * Initialize a doubly-linked list item. + * + * @item + * The item to initialize. + * @node (optional) + * If specified, use item->node.next rather than item->next. + */ +#define LIST_ITEM_INIT(...) \ + LIST_ITEM_INIT_(__VA_ARGS__, ) + +#define LIST_ITEM_INIT_(item, ...) \ + LIST_ITEM_INIT__((item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__)) + +#define LIST_ITEM_INIT__(item, prev, next) \ + LIST_VOID_(item->prev = item->next = NULL) + +/** + * Type-checking macro for doubly-linked lists. + */ +#define LIST_CHECK_(list) \ + (void)sizeof(list->tail - list->head) + +/** + * Check if a doubly-linked list is empty. + */ +#define LIST_EMPTY(list) \ + LIST_EMPTY_((list)) + +#define LIST_EMPTY_(list) \ + (LIST_CHECK_(list), !list->head) /** * Add an item to the tail of a doubly-linked list. * - * @param list + * @list * The list to modify. - * @param item + * @item * The item to append. - * @param node (optional) + * @node (optional) * If specified, use item->node.{prev,next} rather than item->{prev,next}. */ -#define LIST_APPEND(list, ...) LIST_INSERT(list, (list)->tail, __VA_ARGS__) +#define LIST_APPEND(list, ...) \ + LIST_INSERT(list, (list)->tail, __VA_ARGS__) /** * Add an item to the head of a doubly-linked list. * - * @param list + * @list * The list to modify. - * @param item + * @item * The item to prepend. - * @param node (optional) + * @node (optional) * If specified, use item->node.{prev,next} rather than item->{prev,next}. */ -#define LIST_PREPEND(list, ...) LIST_INSERT(list, NULL, __VA_ARGS__) +#define LIST_PREPEND(list, ...) \ + LIST_INSERT(list, NULL, __VA_ARGS__) + +/** + * Check if an item is attached to a doubly-linked list. + * + * @list + * The list to check. + * @item + * The item to check. + * @node (optional) + * If specified, use item->node.{prev,next} rather than item->{prev,next}. + * @return + * Whether the item is attached to the list. + */ +#define LIST_ATTACHED(list, ...) \ + LIST_ATTACHED_(list, __VA_ARGS__, ) + +#define LIST_ATTACHED_(list, item, ...) \ + LIST_ATTACHED__((list), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__)) + +#define LIST_ATTACHED__(list, item, prev, next) \ + (item->prev || item->next || list->head == item || list->tail == item) /** * Insert into a doubly-linked list after the given cursor. * - * @param list + * @list * The list to modify. - * @param cursor + * @cursor * Insert after this element. - * @param item + * @item * The item to insert. - * @param node (optional) + * @node (optional) * If specified, use item->node.{prev,next} rather than item->{prev,next}. */ -#define LIST_INSERT(list, cursor, ...) LIST_INSERT_(list, cursor, __VA_ARGS__, ) +#define LIST_INSERT(list, cursor, ...) \ + LIST_INSERT_(list, cursor, __VA_ARGS__, ) #define LIST_INSERT_(list, cursor, item, ...) \ - LIST_BLOCK_(LIST_INSERT__((list), (cursor), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__))) + LIST_INSERT__((list), (cursor), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__)) -#define LIST_INSERT__(list, cursor, item, prev, next) \ - item->prev = cursor; \ - item->next = cursor ? cursor->next : list->head; \ - *(item->prev ? &item->prev->next : &list->head) = item; \ - *(item->next ? &item->next->prev : &list->tail) = item; +#define LIST_INSERT__(list, cursor, item, prev, next) LIST_VOID_( \ + bfs_assert(!LIST_ATTACHED__(list, item, prev, next)), \ + item->prev = cursor, \ + item->next = cursor ? cursor->next : list->head, \ + *(item->prev ? &item->prev->next : &list->head) = item, \ + *(item->next ? &item->next->prev : &list->tail) = item) /** * Remove an item from a doubly-linked list. * - * @param list + * @list * The list to modify. - * @param item + * @item * The item to remove. - * @param node (optional) + * @node (optional) * If specified, use item->node.{prev,next} rather than item->{prev,next}. */ -#define LIST_REMOVE(list, ...) LIST_REMOVE_(list, __VA_ARGS__, ) +#define LIST_REMOVE(list, ...) \ + LIST_REMOVE_(list, __VA_ARGS__, ) #define LIST_REMOVE_(list, item, ...) \ - LIST_BLOCK_(LIST_REMOVE__((list), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__))) + LIST_REMOVE__((list), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__)) -#define LIST_REMOVE__(list, item, prev, next) \ - *(item->prev ? &item->prev->next : &list->head) = item->next; \ - *(item->next ? &item->next->prev : &list->tail) = item->prev; \ - item->prev = item->next = NULL; +#define LIST_REMOVE__(list, item, prev, next) LIST_VOID_( \ + *(item->prev ? &item->prev->next : &list->head) = item->next, \ + *(item->next ? &item->next->prev : &list->tail) = item->prev, \ + item->prev = item->next = NULL) /** - * Check if an item is attached to a doubly-linked list. - * - * @param list - * The list to check. - * @param item - * The item to check. - * @param node (optional) - * If specified, use item->node.{prev,next} rather than item->{prev,next}. - * @return - * Whether the item is attached to the list. + * Loop over the items in a doubly-linked list. + * + * @type + * The list item type. + * @item + * The induction variable name. + * @list + * The list to iterate. + * @node (optional) + * If specified, use head->node.next rather than head->next. */ -#define LIST_ATTACHED(list, ...) LIST_ATTACHED_(list, __VA_ARGS__, ) +#define for_list(type, item, ...) \ + for_list_(type, item, __VA_ARGS__, ) -#define LIST_ATTACHED_(list, item, ...) \ - LIST_ATTACHED__((list), (item), LIST_PREV_(__VA_ARGS__), LIST_NEXT_(__VA_ARGS__)) +#define for_list_(type, item, list, ...) \ + for_list__(type, item, (list), LIST_NEXT_(__VA_ARGS__)) -#define LIST_ATTACHED__(list, item, prev, next) \ - (item->prev || item->next || list->head == item || list->tail == item) +#define for_list__(type, item, list, next) \ + for (type *item = list->head, *_next; \ + item && (LIST_CHECK_(list), _next = item->next, true); \ + item = _next) #endif // BFS_LIST_H @@ -20,14 +20,14 @@ * - bftw.[ch] (an extended version of nftw(3)) * * - Utilities: + * - prelude.h (feature test macros; automatically included) * - alloc.[ch] (memory allocation) * - atomic.h (atomic operations) * - bar.[ch] (a terminal status bar) * - bit.h (bit manipulation) + * - bfs.h (configuration and fundamental utilities) * - bfstd.[ch] (standard library wrappers/polyfills) * - color.[ch] (for pretty terminal colors) - * - config.h (configuration and feature/platform detection) - * - darray.[ch] (a dynamic array library) * - diag.[ch] (formats diagnostic messages) * - dir.[ch] (a directory API facade) * - dstring.[ch] (a dynamic string library) @@ -37,25 +37,29 @@ * - mtab.[ch] (parses the system's mount table) * - pwcache.[ch] (a cache for the user/group tables) * - sanity.h (sanitizer interfaces) + * - sighook.[ch] (signal hooks) * - stat.[ch] (wraps stat(), or statx() on Linux) * - thread.h (multi-threading) * - trie.[ch] (a trie set/map implementation) * - typo.[ch] (fuzzy matching for typos) + * - version.c (embeds version information) * - xregex.[ch] (regular expression support) * - xspawn.[ch] (spawns processes) * - xtime.[ch] (date/time handling utilities) */ #include "bfstd.h" -#include "config.h" #include "ctx.h" +#include "diag.h" #include "eval.h" #include "parse.h" + #include <errno.h> #include <fcntl.h> #include <locale.h> #include <stdio.h> #include <stdlib.h> +#include <time.h> #include <unistd.h> /** @@ -117,15 +121,29 @@ int main(int argc, char *argv[]) { } // Use the system locale instead of "C" - setlocale(LC_ALL, ""); + int locale_err = 0; + if (!setlocale(LC_ALL, "")) { + locale_err = errno; + } + + // Apply the environment's timezone + tzset(); + // Parse the command line struct bfs_ctx *ctx = bfs_parse_cmdline(argc, argv); if (!ctx) { return EXIT_FAILURE; } + // Warn if setlocale() failed, unless there's no expression to evaluate + if (locale_err && ctx->warn && ctx->expr) { + bfs_warning(ctx, "Failed to set locale: %s\n\n", xstrerror(locale_err)); + } + + // Walk the file system tree, evaluating the expression on each file int ret = bfs_eval(ctx); + // Free the parsed command line, and detect any last-minute errors if (bfs_ctx_free(ctx) != 0 && ret == EXIT_SUCCESS) { ret = EXIT_FAILURE; } @@ -2,24 +2,27 @@ // SPDX-License-Identifier: 0BSD #include "mtab.h" + #include "alloc.h" +#include "bfs.h" #include "bfstd.h" -#include "config.h" -#include "darray.h" #include "stat.h" #include "trie.h" + #include <errno.h> #include <fcntl.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> -#if !defined(BFS_USE_MNTENT) && BFS_USE_MNTENT_H -# define BFS_USE_MNTENT true -#elif !defined(BFS_USE_MNTINFO) && BSD -# define BFS_USE_MNTINFO true -#elif !defined(BFS_USE_MNTTAB) && __SVR4 -# define BFS_USE_MNTTAB true +#ifndef BFS_USE_MNTENT +# define BFS_USE_MNTENT BFS_HAS_GETMNTENT_1 +#endif +#ifndef BFS_USE_MNTINFO +# define BFS_USE_MNTINFO (!BFS_USE_MNTENT && BFS_HAS_GETMNTINFO) +#endif +#ifndef BFS_USE_MNTTAB +# define BFS_USE_MNTTAB (!BFS_USE_MNTINFO && BFS_HAS_GETMNTENT_2) #endif #if BFS_USE_MNTENT @@ -28,7 +31,6 @@ # include <stdio.h> #elif BFS_USE_MNTINFO # include <sys/mount.h> -# include <sys/ucred.h> #elif BFS_USE_MNTTAB # include <stdio.h> # include <sys/mnttab.h> @@ -37,16 +39,24 @@ /** * A mount point in the table. */ -struct bfs_mtab_entry { +struct bfs_mount { /** The path to the mount point. */ char *path; /** The filesystem type. */ char *type; + /** Buffer for the strings. */ + char buf[]; }; struct bfs_mtab { + /** Mount point arena. */ + struct varena varena; + /** The array of mount points. */ - struct bfs_mtab_entry *entries; + struct bfs_mount **mounts; + /** The number of mount points. */ + size_t nmounts; + /** The basenames of every mount point. */ struct trie names; @@ -59,31 +69,39 @@ struct bfs_mtab { /** * Add an entry to the mount table. */ -static inline int bfs_mtab_add(struct bfs_mtab *mtab, const char *path, const char *type) { - struct bfs_mtab_entry entry = { - .path = strdup(path), - .type = strdup(type), - }; - - if (!entry.path || !entry.type) { - goto fail_entry; +_maybe_unused +static int bfs_mtab_add(struct bfs_mtab *mtab, const char *path, const char *type) { + size_t path_size = strlen(path) + 1; + size_t type_size = strlen(type) + 1; + size_t size = path_size + type_size; + struct bfs_mount *mount = varena_alloc(&mtab->varena, size); + if (!mount) { + return -1; } - if (DARRAY_PUSH(&mtab->entries, &entry) != 0) { - goto fail_entry; + struct bfs_mount **ptr = RESERVE(struct bfs_mount *, &mtab->mounts, &mtab->nmounts); + if (!ptr) { + goto free; } + *ptr = mount; + + mount->path = mount->buf; + memcpy(mount->path, path, path_size); + + mount->type = mount->buf + path_size; + memcpy(mount->type, type, type_size); const char *name = path + xbaseoff(path); if (!trie_insert_str(&mtab->names, name)) { - goto fail; + goto shrink; } return 0; -fail_entry: - free(entry.type); - free(entry.path); -fail: +shrink: + --mtab->nmounts; +free: + varena_free(&mtab->varena, mount, size); return -1; } @@ -93,6 +111,8 @@ struct bfs_mtab *bfs_mtab_parse(void) { return NULL; } + VARENA_INIT(&mtab->varena, struct bfs_mount, buf); + trie_init(&mtab->names); trie_init(&mtab->types); @@ -131,7 +151,7 @@ struct bfs_mtab *bfs_mtab_parse(void) { bfs_statfs *mntbuf; int size = getmntinfo(&mntbuf, MNT_WAIT); - if (size < 0) { + if (size <= 0) { error = errno; goto fail; } @@ -193,9 +213,9 @@ static int bfs_mtab_fill_types(struct bfs_mtab *mtab) { int parent_ret = -1; struct bfs_stat parent_stat; - for (size_t i = 0; i < darray_length(mtab->entries); ++i) { - struct bfs_mtab_entry *entry = &mtab->entries[i]; - const char *path = entry->path; + for (size_t i = 0; i < mtab->nmounts; ++i) { + struct bfs_mount *mount = mtab->mounts[i]; + const char *path = mount->path; int fd = AT_FDCWD; char *dir = xdirname(path); @@ -236,10 +256,7 @@ static int bfs_mtab_fill_types(struct bfs_mtab *mtab) { continue; } - struct trie_leaf *leaf = trie_insert_mem(&mtab->types, &sb.dev, sizeof(sb.dev)); - if (leaf) { - leaf->value = entry->type; - } else { + if (trie_set_mem(&mtab->types, &sb.mnt_id, sizeof(sb.mnt_id), mount->type) != 0) { goto fail; } } @@ -262,16 +279,15 @@ const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statb } } - const struct trie_leaf *leaf = trie_find_mem(&mtab->types, &statbuf->dev, sizeof(statbuf->dev)); - if (leaf) { - return leaf->value; + const char *type = trie_get_mem(&mtab->types, &statbuf->mnt_id, sizeof(statbuf->mnt_id)); + if (type) { + return type; } else { return "unknown"; } } -bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *path) { - const char *name = path + xbaseoff(path); +bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *name) { return trie_find_str(&mtab->names, name); } @@ -280,11 +296,8 @@ void bfs_mtab_free(struct bfs_mtab *mtab) { trie_destroy(&mtab->types); trie_destroy(&mtab->names); - for (size_t i = 0; i < darray_length(mtab->entries); ++i) { - free(mtab->entries[i].type); - free(mtab->entries[i].path); - } - darray_free(mtab->entries); + free(mtab->mounts); + varena_destroy(&mtab->varena); free(mtab); } @@ -8,8 +8,6 @@ #ifndef BFS_MTAB_H #define BFS_MTAB_H -#include "config.h" - struct bfs_stat; /** @@ -28,9 +26,9 @@ struct bfs_mtab *bfs_mtab_parse(void); /** * Determine the file system type that a file is on. * - * @param mtab + * @mtab * The current mount table. - * @param statbuf + * @statbuf * The bfs_stat() buffer for the file in question. * @return * The type of file system containing this file, "unknown" if not known, @@ -41,14 +39,14 @@ const char *bfs_fstype(const struct bfs_mtab *mtab, const struct bfs_stat *statb /** * Check if a file could be a mount point. * - * @param mtab + * @mtab * The current mount table. - * @param path - * The path to check. + * @name + * The name of the file to check. * @return * Whether the named file could be a mount point. */ -bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *path); +bool bfs_might_be_mount(const struct bfs_mtab *mtab, const char *name); /** * Free a mount table. @@ -6,14 +6,13 @@ * * -O1: basic logical simplifications, like folding (-true -and -foo) to -foo. * - * -O2: dead code elimination and data flow analysis. struct opt_facts is used + * -O2: dead code elimination and data flow analysis. struct df_domain is used * to record data flow facts that are true at various points of evaluation. - * Specifically, struct opt_facts records the facts that must be true before an - * expression is evaluated (state->facts), and those that must be true after the - * expression is evaluated, given that it returns true (state->facts_when_true) - * or false (state->facts_when_true). Additionally, state->facts_when_impure - * records the possible data flow facts before any expressions with side effects - * are evaluated. + * Specifically, struct df_domain records the state before an expression is + * evaluated (opt->before), and after an expression returns true + * (opt->after_true) or false (opt->after_false). Additionally, opt->impure + * records the possible state before any expression with side effects is + * evaluated. * * -O3: expression re-ordering to reduce expected cost. In an expression like * (-foo -and -bar), if both -foo and -bar are pure (no side effects), they can @@ -22,40 +21,139 @@ * -bar is likely to return false. * * -O4/-Ofast: aggressive optimizations that may affect correctness in corner - * cases. The main effect is to use facts_when_impure to determine if any side- - * effects are reachable at all, and skipping the traversal if not. + * cases. The main effect is to use opt->impure to determine if any side- + * effects are reachable at all, skipping the traversal if not. */ #include "opt.h" + +#include "bfs.h" +#include "bfstd.h" +#include "bftw.h" +#include "bit.h" #include "color.h" -#include "config.h" #include "ctx.h" #include "diag.h" +#include "dir.h" #include "eval.h" #include "exec.h" #include "expr.h" +#include "list.h" #include "pwcache.h" +#include "xspawn.h" + #include <errno.h> #include <limits.h> #include <stdarg.h> #include <stdio.h> -#include <string.h> #include <unistd.h> -static char *fake_and_arg = "-a"; -static char *fake_or_arg = "-o"; -static char *fake_not_arg = "!"; +static char *fake_and_arg = "-and"; +static char *fake_or_arg = "-or"; +static char *fake_not_arg = "-not"; + +/** + * The data flow domain for predicates. + */ +enum df_pred { + /** The bottom state (unreachable). */ + PRED_BOTTOM = 0, + /** The predicate is known to be false. */ + PRED_FALSE = 1 << false, + /** The predicate is known to be true. */ + PRED_TRUE = 1 << true, + /** The top state (unknown). */ + PRED_TOP = PRED_FALSE | PRED_TRUE, +}; + +/** Make a predicate known. */ +static void constrain_pred(enum df_pred *pred, bool value) { + *pred &= 1 << value; +} + +/** Compute the join (union) of two predicates. */ +static void pred_join(enum df_pred *dest, enum df_pred src) { + *dest |= src; +} + +/** + * Types of predicates we track. + */ +enum pred_type { + /** -readable */ + READABLE_PRED, + /** -writable */ + WRITABLE_PRED, + /** -executable */ + EXECUTABLE_PRED, + /** -acl */ + ACL_PRED, + /** -capable */ + CAPABLE_PRED, + /** -empty */ + EMPTY_PRED, + /** -hidden */ + HIDDEN_PRED, + /** -nogroup */ + NOGROUP_PRED, + /** -nouser */ + NOUSER_PRED, + /** -sparse */ + SPARSE_PRED, + /** -xattr */ + XATTR_PRED, + /** The number of pred_types. */ + PRED_TYPES, +}; + +/** Predicate type names. */ +static const char *const pred_names[] = { + [READABLE_PRED] = "-readable", + [WRITABLE_PRED] = "-writable", + [EXECUTABLE_PRED] = "-executable", + [ACL_PRED] = "-acl", + [CAPABLE_PRED] = "-capable", + [EMPTY_PRED] = "-empty", + [HIDDEN_PRED] = "-hidden", + [NOGROUP_PRED] = "-nogroup", + [NOUSER_PRED] = "-nouser", + [SPARSE_PRED] = "-sparse", + [XATTR_PRED] = "-xattr", +}; /** - * A contrained integer range. + * A constrained integer range. */ -struct range { +struct df_range { /** The (inclusive) minimum value. */ long long min; /** The (inclusive) maximum value. */ long long max; }; +/** Initialize an empty range. */ +static void range_init_bottom(struct df_range *range) { + range->min = LLONG_MAX; + range->max = LLONG_MIN; +} + +/** Check if a range is empty. */ +static bool range_is_bottom(const struct df_range *range) { + return range->min > range->max; +} + +/** Initialize a full range. */ +static void range_init_top(struct df_range *range) { + // All ranges we currently track are non-negative + range->min = 0; + range->max = LLONG_MAX; +} + +/** Check for an infinite range. */ +static bool range_is_top(const struct df_range *range) { + return range->min == 0 && range->max == LLONG_MAX; +} + /** Compute the minimum of two values. */ static long long min_value(long long a, long long b) { if (a < b) { @@ -75,17 +173,23 @@ static long long max_value(long long a, long long b) { } /** Constrain the minimum of a range. */ -static void constrain_min(struct range *range, long long value) { +static void constrain_min(struct df_range *range, long long value) { range->min = max_value(range->min, value); } -/** Contrain the maximum of a range. */ -static void constrain_max(struct range *range, long long value) { +/** Constrain the maximum of a range. */ +static void constrain_max(struct df_range *range, long long value) { range->max = min_value(range->max, value); } +/** Constrain a range to a single value. */ +static void constrain_range(struct df_range *range, long long value) { + constrain_min(range, value); + constrain_max(range, value); +} + /** Remove a single value from a range. */ -static void range_remove(struct range *range, long long value) { +static void range_remove(struct df_range *range, long long value) { if (range->min == value) { if (range->min == LLONG_MAX) { range->max = LLONG_MIN; @@ -104,20 +208,9 @@ static void range_remove(struct range *range, long long value) { } /** Compute the union of two ranges. */ -static void range_union(struct range *result, const struct range *lhs, const struct range *rhs) { - result->min = min_value(lhs->min, rhs->min); - result->max = max_value(lhs->max, rhs->max); -} - -/** Check if a range contains no values. */ -static bool range_is_impossible(const struct range *range) { - return range->min > range->max; -} - -/** Set a range to contain no values. */ -static void set_range_impossible(struct range *range) { - range->min = LLONG_MAX; - range->max = LLONG_MIN; +static void range_join(struct df_range *dest, const struct df_range *src) { + dest->min = min_value(dest->min, src->min); + dest->max = max_value(dest->max, src->max); } /** @@ -140,179 +233,157 @@ enum range_type { RANGE_TYPES, }; -/** - * A possibly-known value of a predicate. - */ -enum known_pred { - /** The state is impossible to reach. */ - PRED_IMPOSSIBLE = -2, - /** The value of the predicate is not known. */ - PRED_UNKNOWN = -1, - /** The predicate is known to be false. */ - PRED_FALSE = false, - /** The predicate is known to be true. */ - PRED_TRUE = true, +/** Range type names. */ +static const char *const range_names[] = { + [DEPTH_RANGE] = "-depth", + [GID_RANGE] = "-gid", + [INUM_RANGE] = "-inum", + [LINKS_RANGE] = "-links", + [SIZE_RANGE] = "-size", + [UID_RANGE] = "-uid", }; -/** Make a predicate known. */ -static void constrain_pred(enum known_pred *pred, bool value) { - if (*pred == PRED_UNKNOWN) { - *pred = value; - } else if (*pred == !value) { - *pred = PRED_IMPOSSIBLE; - } -} - -/** Compute the union of two known predicates. */ -static enum known_pred pred_union(enum known_pred lhs, enum known_pred rhs) { - if (lhs == PRED_IMPOSSIBLE) { - return rhs; - } else if (rhs == PRED_IMPOSSIBLE) { - return lhs; - } else if (lhs == rhs) { - return lhs; - } else { - return PRED_UNKNOWN; - } -} - /** - * Types of predicates we track. + * The data flow analysis domain. */ -enum pred_type { - /** -readable */ - READABLE_PRED, - /** -writable */ - WRITABLE_PRED, - /** -executable */ - EXECUTABLE_PRED, - /** -acl */ - ACL_PRED, - /** -capable */ - CAPABLE_PRED, - /** -empty */ - EMPTY_PRED, - /** -hidden */ - HIDDEN_PRED, - /** -nogroup */ - NOGROUP_PRED, - /** -nouser */ - NOUSER_PRED, - /** -sparse */ - SPARSE_PRED, - /** -xattr */ - XATTR_PRED, - /** The number of pred_types. */ - PRED_TYPES, -}; +struct df_domain { + /** The predicates we track. */ + enum df_pred preds[PRED_TYPES]; -/** - * Data flow facts about an evaluation point. - */ -struct opt_facts { /** The value ranges we track. */ - struct range ranges[RANGE_TYPES]; + struct df_range ranges[RANGE_TYPES]; - /** The predicates we track. */ - enum known_pred preds[PRED_TYPES]; - - /** Bitmask of possible file types. */ + /** Bitmask of possible -types. */ unsigned int types; - /** Bitmask of possible link target types. */ + /** Bitmask of possible -xtypes. */ unsigned int xtypes; }; -/** Initialize some data flow facts. */ -static void facts_init(struct opt_facts *facts) { - for (int i = 0; i < RANGE_TYPES; ++i) { - struct range *range = &facts->ranges[i]; - range->min = 0; // All ranges we currently track are non-negative - range->max = LLONG_MAX; - } - +/** Set a data flow value to bottom. */ +static void df_init_bottom(struct df_domain *value) { for (int i = 0; i < PRED_TYPES; ++i) { - facts->preds[i] = PRED_UNKNOWN; + value->preds[i] = PRED_BOTTOM; } - facts->types = ~0; - facts->xtypes = ~0; -} - -/** Compute the union of two fact sets. */ -static void facts_union(struct opt_facts *result, const struct opt_facts *lhs, const struct opt_facts *rhs) { for (int i = 0; i < RANGE_TYPES; ++i) { - range_union(&result->ranges[i], &lhs->ranges[i], &rhs->ranges[i]); - } - - for (int i = 0; i < PRED_TYPES; ++i) { - result->preds[i] = pred_union(lhs->preds[i], rhs->preds[i]); + range_init_bottom(&value->ranges[i]); } - result->types = lhs->types | rhs->types; - result->xtypes = lhs->xtypes | rhs->xtypes; + value->types = 0; + value->xtypes = 0; } /** Determine whether a fact set is impossible. */ -static bool facts_are_impossible(const struct opt_facts *facts) { +static bool df_is_bottom(const struct df_domain *value) { for (int i = 0; i < RANGE_TYPES; ++i) { - if (range_is_impossible(&facts->ranges[i])) { + if (range_is_bottom(&value->ranges[i])) { return true; } } for (int i = 0; i < PRED_TYPES; ++i) { - if (facts->preds[i] == PRED_IMPOSSIBLE) { + if (value->preds[i] == PRED_BOTTOM) { return true; } } - if (!facts->types || !facts->xtypes) { + if (!value->types || !value->xtypes) { return true; } return false; } -/** Set some facts to be impossible. */ -static void set_facts_impossible(struct opt_facts *facts) { +/** Initialize some data flow value. */ +static void df_init_top(struct df_domain *value) { + for (int i = 0; i < PRED_TYPES; ++i) { + value->preds[i] = PRED_TOP; + } + + for (int i = 0; i < RANGE_TYPES; ++i) { + range_init_top(&value->ranges[i]); + } + + value->types = ~0; + value->xtypes = ~0; +} + +/** Check for the top element. */ +static bool df_is_top(const struct df_domain *value) { + for (int i = 0; i < PRED_TYPES; ++i) { + if (value->preds[i] != PRED_TOP) { + return false; + } + } + for (int i = 0; i < RANGE_TYPES; ++i) { - set_range_impossible(&facts->ranges[i]); + if (!range_is_top(&value->ranges[i])) { + return false; + } + } + + if (value->types != ~0U) { + return false; + } + + if (value->xtypes != ~0U) { + return false; } + return true; +} + +/** Compute the union of two fact sets. */ +static void df_join(struct df_domain *dest, const struct df_domain *src) { for (int i = 0; i < PRED_TYPES; ++i) { - facts->preds[i] = PRED_IMPOSSIBLE; + pred_join(&dest->preds[i], src->preds[i]); } - facts->types = 0; - facts->xtypes = 0; + for (int i = 0; i < RANGE_TYPES; ++i) { + range_join(&dest->ranges[i], &src->ranges[i]); + } + + dest->types |= src->types; + dest->xtypes |= src->xtypes; } /** * Optimizer state. */ -struct opt_state { +struct bfs_opt { /** The context we're optimizing. */ - const struct bfs_ctx *ctx; - - /** Data flow facts before this expression is evaluated. */ - struct opt_facts facts; - /** Data flow facts after this expression returns true. */ - struct opt_facts facts_when_true; - /** Data flow facts after this expression returns false. */ - struct opt_facts facts_when_false; - /** Data flow facts before any side-effecting expressions are evaluated. */ - struct opt_facts *facts_when_impure; + struct bfs_ctx *ctx; + /** Optimization level (ctx->optlevel). */ + int level; + /** Recursion depth. */ + int depth; + + /** Whether to produce warnings. */ + bool warn; + /** Whether the result of this expression is ignored. */ + bool ignore_result; + + /** Data flow state before this expression is evaluated. */ + struct df_domain before; + /** Data flow state after this expression returns true. */ + struct df_domain after_true; + /** Data flow state after this expression returns false. */ + struct df_domain after_false; + /** Data flow state before any side-effecting expressions are evaluated. */ + struct df_domain *impure; }; /** Log an optimization. */ -BFS_FORMATTER(3, 4) -static bool opt_debug(const struct opt_state *state, int level, const char *format, ...) { - bfs_assert(state->ctx->optlevel >= level); +_printf(2, 3) +static bool opt_debug(struct bfs_opt *opt, const char *format, ...) { + if (bfs_debug_prefix(opt->ctx, DEBUG_OPT)) { + for (int i = 0; i < opt->depth; ++i) { + cfprintf(opt->ctx->cerr, "│ "); + } - if (bfs_debug(state->ctx, DEBUG_OPT, "${cyn}-O%d${rs}: ", level)) { va_list args; va_start(args, format); - cvfprintf(state->ctx->cerr, format, args); + cvfprintf(opt->ctx->cerr, format, args); va_end(args); return true; } else { @@ -320,471 +391,568 @@ static bool opt_debug(const struct opt_state *state, int level, const char *form } } -/** Warn about an expression. */ -BFS_FORMATTER(3, 4) -static void opt_warning(const struct opt_state *state, const struct bfs_expr *expr, const char *format, ...) { - if (bfs_expr_warning(state->ctx, expr)) { +/** Log a recursive call. */ +_printf(2, 3) +static bool opt_enter(struct bfs_opt *opt, const char *format, ...) { + int depth = opt->depth; + if (depth > 0) { + --opt->depth; + } + + bool debug = opt_debug(opt, "%s", depth > 0 ? "├─╮ " : ""); + if (debug) { va_list args; va_start(args, format); - bfs_vwarning(state->ctx, format, args); + cvfprintf(opt->ctx->cerr, format, args); va_end(args); } + + opt->depth = depth + 1; + return debug; } -/** Create a constant expression. */ -static struct bfs_expr *opt_const(bool value) { - static bfs_eval_fn *fns[] = {eval_false, eval_true}; - static char *fake_args[] = {"-false", "-true"}; - return bfs_expr_new(fns[value], 1, &fake_args[value]); +/** Log a recursive return. */ +_printf(2, 3) +static bool opt_leave(struct bfs_opt *opt, const char *format, ...) { + bool debug = false; + int depth = opt->depth; + + if (format) { + if (depth > 1) { + opt->depth -= 2; + } + + debug = opt_debug(opt, "%s", depth > 1 ? "├─╯ " : ""); + if (debug) { + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); + } + } + + opt->depth = depth - 1; + return debug; } -/** Extract a child expression, freeing the outer expression. */ -static struct bfs_expr *extract_child_expr(struct bfs_expr *expr, struct bfs_expr **child) { - struct bfs_expr *ret = *child; - *child = NULL; - bfs_expr_free(expr); - return ret; +/** Log a shallow visit. */ +_printf(2, 3) +static bool opt_visit(struct bfs_opt *opt, const char *format, ...) { + int depth = opt->depth; + if (depth > 0) { + --opt->depth; + } + + bool debug = opt_debug(opt, "%s", depth > 0 ? "├─◯ " : ""); + if (debug) { + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); + } + + opt->depth = depth; + return debug; } -/** - * Negate an expression. - */ -static struct bfs_expr *negate_expr(struct bfs_expr *rhs, char **argv) { - if (rhs->eval_fn == eval_not) { - return extract_child_expr(rhs, &rhs->rhs); +/** Log the deletion of an expression. */ +_printf(2, 3) +static bool opt_delete(struct bfs_opt *opt, const char *format, ...) { + int depth = opt->depth; + + if (depth > 0) { + --opt->depth; } - struct bfs_expr *expr = bfs_expr_new(eval_not, 1, argv); - if (!expr) { - bfs_expr_free(rhs); - return NULL; + bool debug = opt_debug(opt, "%s", depth > 0 ? "├─✘ " : ""); + if (debug) { + va_list args; + va_start(args, format); + cvfprintf(opt->ctx->cerr, format, args); + va_end(args); } - expr->lhs = NULL; - expr->rhs = rhs; - return expr; + opt->depth = depth; + return debug; } -static struct bfs_expr *optimize_not_expr(const struct opt_state *state, struct bfs_expr *expr); -static struct bfs_expr *optimize_and_expr(const struct opt_state *state, struct bfs_expr *expr); -static struct bfs_expr *optimize_or_expr(const struct opt_state *state, struct bfs_expr *expr); +typedef bool dump_fn(struct bfs_opt *opt, const char *format, ...); -/** - * Apply De Morgan's laws. - */ -static struct bfs_expr *de_morgan(const struct opt_state *state, struct bfs_expr *expr, char **argv) { - bool debug = opt_debug(state, 1, "De Morgan's laws: %pe ", expr); +/** Print a df_pred. */ +static void pred_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum pred_type type) { + dump(opt, "${blu}%s${rs}: ", pred_names[type]); - struct bfs_expr *parent = negate_expr(expr, argv); - if (!parent) { - return NULL; + FILE *file = opt->ctx->cerr->file; + switch (value->preds[type]) { + case PRED_BOTTOM: + fprintf(file, "⊥\n"); + break; + case PRED_TOP: + fprintf(file, "⊤\n"); + break; + case PRED_TRUE: + fprintf(file, "true\n"); + break; + case PRED_FALSE: + fprintf(file, "false\n"); + break; } +} - bool has_parent = true; - if (parent->eval_fn != eval_not) { - expr = parent; - has_parent = false; +/** Print a df_range. */ +static void range_dump(dump_fn *dump, struct bfs_opt *opt, const struct df_domain *value, enum range_type type) { + dump(opt, "${blu}%s${rs}: ", range_names[type]); + + FILE *file = opt->ctx->cerr->file; + const struct df_range *range = &value->ranges[type]; + if (range_is_bottom(range)) { + fprintf(file, "⊥\n"); + } else if (range_is_top(range)) { + fprintf(file, "⊤\n"); + } else if (range->min == range->max) { + fprintf(file, "%lld\n", range->min); + } else { + if (range->min == LLONG_MIN) { + fprintf(file, "(-∞, "); + } else { + fprintf(file, "[%lld, ", range->min); + } + if (range->max == LLONG_MAX) { + fprintf(file, "∞)\n"); + } else { + fprintf(file, "%lld]\n", range->max); + } } +} - bfs_assert(expr->eval_fn == eval_and || expr->eval_fn == eval_or); - if (expr->eval_fn == eval_and) { - expr->eval_fn = eval_or; - expr->argv = &fake_or_arg; +/** Print a set of types. */ +static void types_dump(dump_fn *dump, struct bfs_opt *opt, const char *name, unsigned int types) { + dump(opt, "${blu}%s${rs}: ", name); + + FILE *file = opt->ctx->cerr->file; + if (types == 0) { + fprintf(file, " ⊥\n"); + } else if (types == ~0U) { + fprintf(file, " ⊤\n"); + } else if (count_ones(types) < count_ones(~types)) { + fprintf(file, " 0x%X\n", types); } else { - expr->eval_fn = eval_and; - expr->argv = &fake_and_arg; + fprintf(file, "~0x%X\n", ~types); } +} - expr->lhs = negate_expr(expr->lhs, argv); - expr->rhs = negate_expr(expr->rhs, argv); - if (!expr->lhs || !expr->rhs) { - bfs_expr_free(parent); - return NULL; +/** Calculate the number of lines of df_dump() output. */ +static int df_dump_lines(const struct df_domain *value) { + int lines = 0; + + for (int i = 0; i < PRED_TYPES; ++i) { + lines += value->preds[i] != PRED_TOP; } - if (debug) { - cfprintf(state->ctx->cerr, "<==> %pe\n", parent); + for (int i = 0; i < RANGE_TYPES; ++i) { + lines += !range_is_top(&value->ranges[i]); } - if (expr->lhs->eval_fn == eval_not) { - expr->lhs = optimize_not_expr(state, expr->lhs); + lines += value->types != ~0U; + lines += value->xtypes != ~0U; + + return lines; +} + +/** Get the right debugging function for a df_dump() line. */ +static dump_fn *df_dump_line(int lines, int *line) { + ++*line; + + if (lines == 1) { + return opt_visit; + } else if (*line == 1) { + return opt_enter; + } else if (*line == lines) { + return opt_leave; + } else { + return opt_debug; } - if (expr->rhs->eval_fn == eval_not) { - expr->rhs = optimize_not_expr(state, expr->rhs); +} + +/** Print a data flow value. */ +static void df_dump(struct bfs_opt *opt, const char *str, const struct df_domain *value) { + if (df_is_bottom(value)) { + opt_debug(opt, "%s: ⊥\n", str); + return; + } else if (df_is_top(value)) { + opt_debug(opt, "%s: ⊤\n", str); + return; } - if (!expr->lhs || !expr->rhs) { - bfs_expr_free(parent); - return NULL; + + if (!opt_debug(opt, "%s:\n", str)) { + return; } - if (expr->eval_fn == eval_and) { - expr = optimize_and_expr(state, expr); - } else { - expr = optimize_or_expr(state, expr); + int lines = df_dump_lines(value); + int line = 0; + + for (int i = 0; i < PRED_TYPES; ++i) { + if (value->preds[i] != PRED_TOP) { + pred_dump(df_dump_line(lines, &line), opt, value, i); + } } - if (has_parent) { - parent->rhs = expr; - } else { - parent = expr; + + for (int i = 0; i < RANGE_TYPES; ++i) { + if (!range_is_top(&value->ranges[i])) { + range_dump(df_dump_line(lines, &line), opt, value, i); + } } - if (!expr) { - bfs_expr_free(parent); - return NULL; + + if (value->types != ~0U) { + types_dump(df_dump_line(lines, &line), opt, "-type", value->types); } - if (has_parent) { - parent = optimize_not_expr(state, parent); + if (value->xtypes != ~0U) { + types_dump(df_dump_line(lines, &line), opt, "-xtype", value->xtypes); } - return parent; } -/** Optimize an expression recursively. */ -static struct bfs_expr *optimize_expr_recursive(struct opt_state *state, struct bfs_expr *expr); +/** Check if an expression is constant. */ +static bool is_const(const struct bfs_expr *expr) { + return expr->eval_fn == eval_true || expr->eval_fn == eval_false; +} -/** - * Optimize a negation. - */ -static struct bfs_expr *optimize_not_expr(const struct opt_state *state, struct bfs_expr *expr) { - bfs_assert(expr->eval_fn == eval_not); - - struct bfs_expr *rhs = expr->rhs; - - int optlevel = state->ctx->optlevel; - if (optlevel >= 1) { - if (rhs->eval_fn == eval_true || rhs->eval_fn == eval_false) { - struct bfs_expr *ret = opt_const(rhs->eval_fn == eval_false); - opt_debug(state, 1, "constant propagation: %pe <==> %pe\n", expr, ret); - bfs_expr_free(expr); - return ret; - } else if (rhs->eval_fn == eval_not) { - opt_debug(state, 1, "double negation: %pe <==> %pe\n", expr, rhs->rhs); - return extract_child_expr(expr, &rhs->rhs); - } else if (bfs_expr_never_returns(rhs)) { - opt_debug(state, 1, "reachability: %pe <==> %pe\n", expr, rhs); - return extract_child_expr(expr, &expr->rhs); - } else if ((rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) - && (rhs->lhs->eval_fn == eval_not || rhs->rhs->eval_fn == eval_not)) { - return de_morgan(state, expr, expr->argv); - } +/** Warn about an expression. */ +_printf(3, 4) +static bool opt_warning(const struct bfs_opt *opt, const struct bfs_expr *expr, const char *format, ...) { + if (!opt->warn) { + return false; } - expr->pure = rhs->pure; - expr->always_true = rhs->always_false; - expr->always_false = rhs->always_true; - expr->cost = rhs->cost; - expr->probability = 1.0 - rhs->probability; + if (bfs_expr_is_parent(expr) || is_const(expr)) { + return false; + } - return expr; + if (!bfs_expr_warning(opt->ctx, expr)) { + return false; + } + + va_list args; + va_start(args, format); + bfs_vwarning(opt->ctx, format, args); + va_end(args); + + return true; } -/** Optimize a negation recursively. */ -static struct bfs_expr *optimize_not_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { - struct opt_state rhs_state = *state; - expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); - if (!expr->rhs) { - goto fail; - } +/** Remove and return an expression's children. */ +static void foster_children(struct bfs_expr *expr, struct bfs_exprs *children) { + bfs_assert(bfs_expr_is_parent(expr)); - state->facts_when_true = rhs_state.facts_when_false; - state->facts_when_false = rhs_state.facts_when_true; + SLIST_INIT(children); + SLIST_EXTEND(children, &expr->children); - return optimize_not_expr(state, expr); + expr->persistent_fds = 0; + expr->ephemeral_fds = 0; + expr->pure = true; +} -fail: - bfs_expr_free(expr); - return NULL; +/** Return an expression's only child. */ +static struct bfs_expr *only_child(struct bfs_expr *expr) { + bfs_assert(bfs_expr_is_parent(expr)); + struct bfs_expr *child = bfs_expr_children(expr); + bfs_assert(child && !child->next); + return child; } -/** Optimize a conjunction. */ -static struct bfs_expr *optimize_and_expr(const struct opt_state *state, struct bfs_expr *expr) { - bfs_assert(expr->eval_fn == eval_and); - - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; - - const struct bfs_ctx *ctx = state->ctx; - int optlevel = ctx->optlevel; - if (optlevel >= 1) { - if (lhs->eval_fn == eval_true) { - opt_debug(state, 1, "conjunction elimination: %pe <==> %pe\n", expr, rhs); - return extract_child_expr(expr, &expr->rhs); - } else if (rhs->eval_fn == eval_true) { - opt_debug(state, 1, "conjunction elimination: %pe <==> %pe\n", expr, lhs); - return extract_child_expr(expr, &expr->lhs); - } else if (lhs->always_false) { - opt_debug(state, 1, "short-circuit: %pe <==> %pe\n", expr, lhs); - opt_warning(state, expr->rhs, "This expression is unreachable.\n\n"); - return extract_child_expr(expr, &expr->lhs); - } else if (lhs->always_true && rhs->eval_fn == eval_false) { - bool debug = opt_debug(state, 1, "strength reduction: %pe <==> ", expr); - struct bfs_expr *ret = extract_child_expr(expr, &expr->lhs); - ret = negate_expr(ret, &fake_not_arg); - if (debug && ret) { - cfprintf(ctx->cerr, "%pe\n", ret); - } - return ret; - } else if (optlevel >= 2 && lhs->pure && rhs->eval_fn == eval_false) { - opt_debug(state, 2, "purity: %pe <==> %pe\n", expr, rhs); - opt_warning(state, expr->lhs, "The result of this expression is ignored.\n\n"); - return extract_child_expr(expr, &expr->rhs); - } else if (lhs->eval_fn == eval_not && rhs->eval_fn == eval_not) { - return de_morgan(state, expr, expr->lhs->argv); +/** Foster an expression's only child. */ +static struct bfs_expr *foster_only_child(struct bfs_expr *expr) { + struct bfs_expr *child = only_child(expr); + struct bfs_exprs children; + foster_children(expr, &children); + return child; +} + +/** An expression visitor. */ +struct visitor; + +/** An expression-visiting function. */ +typedef struct bfs_expr *visit_fn(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor); + +/** An entry in a visitor lookup table. */ +struct visitor_table { + /** The evaluation function to match on. */ + bfs_eval_fn *eval_fn; + /** The visitor function. */ + visit_fn *visit; +}; + +/** Look up a visitor in a table. */ +static visit_fn *look_up_visitor(const struct bfs_expr *expr, const struct visitor_table table[]) { + for (size_t i = 0; table[i].eval_fn; ++i) { + if (expr->eval_fn == table[i].eval_fn) { + return table[i].visit; } } - expr->pure = lhs->pure && rhs->pure; - expr->always_true = lhs->always_true && rhs->always_true; - expr->always_false = lhs->always_false || rhs->always_false; - expr->cost = lhs->cost + lhs->probability*rhs->cost; - expr->probability = lhs->probability*rhs->probability; - - return expr; + return NULL; } -/** Optimize a conjunction recursively. */ -static struct bfs_expr *optimize_and_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { - struct opt_state lhs_state = *state; - expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); - if (!expr->lhs) { - goto fail; - } +struct visitor { + /** The name of this visitor. */ + const char *name; - struct opt_state rhs_state = *state; - rhs_state.facts = lhs_state.facts_when_true; - expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); - if (!expr->rhs) { - goto fail; - } + /** A function to call before visiting children. */ + visit_fn *enter; + /** The default visitor. */ + visit_fn *visit; + /** A function to call after visiting children. */ + visit_fn *leave; - state->facts_when_true = rhs_state.facts_when_true; - facts_union(&state->facts_when_false, &lhs_state.facts_when_false, &rhs_state.facts_when_false); + /** A visitor lookup table. */ + const struct visitor_table *table; +}; - return optimize_and_expr(state, expr); +/** Recursive visitor implementation. */ +static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor); -fail: - bfs_expr_free(expr); - return NULL; -} +/** Visit a negation. */ +static struct bfs_expr *visit_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *rhs = foster_only_child(expr); -/** Optimize a disjunction. */ -static struct bfs_expr *optimize_or_expr(const struct opt_state *state, struct bfs_expr *expr) { - bfs_assert(expr->eval_fn == eval_or); - - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; - - const struct bfs_ctx *ctx = state->ctx; - int optlevel = ctx->optlevel; - if (optlevel >= 1) { - if (lhs->always_true) { - opt_debug(state, 1, "short-circuit: %pe <==> %pe\n", expr, lhs); - opt_warning(state, expr->rhs, "This expression is unreachable.\n\n"); - return extract_child_expr(expr, &expr->lhs); - } else if (lhs->eval_fn == eval_false) { - opt_debug(state, 1, "disjunctive syllogism: %pe <==> %pe\n", expr, rhs); - return extract_child_expr(expr, &expr->rhs); - } else if (rhs->eval_fn == eval_false) { - opt_debug(state, 1, "disjunctive syllogism: %pe <==> %pe\n", expr, lhs); - return extract_child_expr(expr, &expr->lhs); - } else if (lhs->always_false && rhs->eval_fn == eval_true) { - bool debug = opt_debug(state, 1, "strength reduction: %pe <==> ", expr); - struct bfs_expr *ret = extract_child_expr(expr, &expr->lhs); - ret = negate_expr(ret, &fake_not_arg); - if (debug && ret) { - cfprintf(ctx->cerr, "%pe\n", ret); - } - return ret; - } else if (optlevel >= 2 && lhs->pure && rhs->eval_fn == eval_true) { - opt_debug(state, 2, "purity: %pe <==> %pe\n", expr, rhs); - opt_warning(state, expr->lhs, "The result of this expression is ignored.\n\n"); - return extract_child_expr(expr, &expr->rhs); - } else if (lhs->eval_fn == eval_not && rhs->eval_fn == eval_not) { - return de_morgan(state, expr, expr->lhs->argv); - } + struct bfs_opt nested = *opt; + rhs = visit_deep(&nested, rhs, visitor); + if (!rhs) { + return NULL; } - expr->pure = lhs->pure && rhs->pure; - expr->always_true = lhs->always_true || rhs->always_true; - expr->always_false = lhs->always_false && rhs->always_false; - expr->cost = lhs->cost + (1 - lhs->probability)*rhs->cost; - expr->probability = lhs->probability + rhs->probability - lhs->probability*rhs->probability; + opt->after_true = nested.after_false; + opt->after_false = nested.after_true; + bfs_expr_append(expr, rhs); return expr; } -/** Optimize a disjunction recursively. */ -static struct bfs_expr *optimize_or_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { - struct opt_state lhs_state = *state; - expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); - if (!expr->lhs) { - goto fail; - } +/** Visit a conjunction. */ +static struct bfs_expr *visit_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); - struct opt_state rhs_state = *state; - rhs_state.facts = lhs_state.facts_when_false; - expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); - if (!expr->rhs) { - goto fail; - } + // Base case (-and) == (-true) + df_init_bottom(&opt->after_false); + struct bfs_opt nested = *opt; - facts_union(&state->facts_when_true, &lhs_state.facts_when_true, &rhs_state.facts_when_true); - state->facts_when_false = rhs_state.facts_when_false; + drain_slist (struct bfs_expr, child, &children) { + if (SLIST_EMPTY(&children)) { + nested.ignore_result = opt->ignore_result; + } else { + nested.ignore_result = false; + } - return optimize_or_expr(state, expr); + child = visit_deep(&nested, child, visitor); + if (!child) { + return NULL; + } -fail: - bfs_expr_free(expr); - return NULL; + df_join(&opt->after_false, &nested.after_false); + nested.before = nested.after_true; + + bfs_expr_append(expr, child); + } + + opt->after_true = nested.after_true; + + return expr; } -/** Optimize an expression in an ignored-result context. */ -static struct bfs_expr *ignore_result(const struct opt_state *state, struct bfs_expr *expr) { - int optlevel = state->ctx->optlevel; - - if (optlevel >= 1) { - while (true) { - if (expr->eval_fn == eval_not) { - opt_debug(state, 1, "ignored result: %pe --> %pe\n", expr, expr->rhs); - opt_warning(state, expr, "The result of this expression is ignored.\n\n"); - expr = extract_child_expr(expr, &expr->rhs); - } else if (optlevel >= 2 - && (expr->eval_fn == eval_and || expr->eval_fn == eval_or || expr->eval_fn == eval_comma) - && expr->rhs->pure) { - opt_debug(state, 2, "ignored result: %pe --> %pe\n", expr, expr->lhs); - opt_warning(state, expr->rhs, "The result of this expression is ignored.\n\n"); - expr = extract_child_expr(expr, &expr->lhs); - } else { - break; - } +/** Visit a disjunction. */ +static struct bfs_expr *visit_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + // Base case (-or) == (-false) + df_init_bottom(&opt->after_true); + struct bfs_opt nested = *opt; + + drain_slist (struct bfs_expr, child, &children) { + if (SLIST_EMPTY(&children)) { + nested.ignore_result = opt->ignore_result; + } else { + nested.ignore_result = false; } - if (optlevel >= 2 && expr->pure && expr->eval_fn != eval_false) { - struct bfs_expr *ret = opt_const(false); - opt_debug(state, 2, "ignored result: %pe --> %pe\n", expr, ret); - opt_warning(state, expr, "The result of this expression is ignored.\n\n"); - bfs_expr_free(expr); - return ret; + child = visit_deep(&nested, child, visitor); + if (!child) { + return NULL; } + + df_join(&opt->after_true, &nested.after_true); + nested.before = nested.after_false; + + bfs_expr_append(expr, child); } + opt->after_false = nested.after_false; + return expr; } -/** Optimize a comma expression. */ -static struct bfs_expr *optimize_comma_expr(const struct opt_state *state, struct bfs_expr *expr) { - bfs_assert(expr->eval_fn == eval_comma); +/** Visit a comma expression. */ +static struct bfs_expr *visit_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; + struct bfs_opt nested = *opt; - int optlevel = state->ctx->optlevel; - if (optlevel >= 1) { - lhs = expr->lhs = ignore_result(state, lhs); + drain_slist (struct bfs_expr, child, &children) { + if (SLIST_EMPTY(&children)) { + nested.ignore_result = opt->ignore_result; + } else { + nested.ignore_result = true; + } - if (bfs_expr_never_returns(lhs)) { - opt_debug(state, 1, "reachability: %pe <==> %pe\n", expr, lhs); - opt_warning(state, expr->rhs, "This expression is unreachable.\n\n"); - return extract_child_expr(expr, &expr->lhs); - } else if ((lhs->always_true && rhs->eval_fn == eval_true) - || (lhs->always_false && rhs->eval_fn == eval_false)) { - opt_debug(state, 1, "redundancy elimination: %pe <==> %pe\n", expr, lhs); - return extract_child_expr(expr, &expr->lhs); - } else if (optlevel >= 2 && lhs->pure) { - opt_debug(state, 2, "purity: %pe <==> %pe\n", expr, rhs); - opt_warning(state, expr->lhs, "The result of this expression is ignored.\n\n"); - return extract_child_expr(expr, &expr->rhs); + child = visit_deep(&nested, child, visitor); + if (!child) { + return NULL; } + + nested.before = nested.after_true; + df_join(&nested.before, &nested.after_false); + + bfs_expr_append(expr, child); } - expr->pure = lhs->pure && rhs->pure; - expr->always_true = bfs_expr_never_returns(lhs) || rhs->always_true; - expr->always_false = bfs_expr_never_returns(lhs) || rhs->always_false; - expr->cost = lhs->cost + rhs->cost; - expr->probability = rhs->probability; + opt->after_true = nested.after_true; + opt->after_false = nested.after_false; return expr; } -/** Optimize a comma expression recursively. */ -static struct bfs_expr *optimize_comma_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { - struct opt_state lhs_state = *state; - expr->lhs = optimize_expr_recursive(&lhs_state, expr->lhs); - if (!expr->lhs) { - goto fail; +/** Default enter() function. */ +static struct bfs_expr *visit_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt_enter(opt, "%pe\n", expr); + opt->after_true = opt->before; + opt->after_false = opt->before; + return expr; +} + +/** Default leave() function. */ +static struct bfs_expr *visit_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt_leave(opt, "%pe\n", expr); + return expr; +} + +static struct bfs_expr *visit_deep(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + bool entered = false; + + visit_fn *enter = visitor->enter ? visitor->enter : visit_enter; + visit_fn *leave = visitor->leave ? visitor->leave : visit_leave; + + static const struct visitor_table table[] = { + {eval_not, visit_not}, + {eval_and, visit_and}, + {eval_or, visit_or}, + {eval_comma, visit_comma}, + {NULL, NULL}, + }; + visit_fn *recursive = look_up_visitor(expr, table); + if (recursive) { + if (!entered) { + expr = enter(opt, expr, visitor); + if (!expr) { + return NULL; + } + entered = true; + } + + expr = recursive(opt, expr, visitor); + if (!expr) { + return NULL; + } } - struct opt_state rhs_state = *state; - facts_union(&rhs_state.facts, &lhs_state.facts_when_true, &lhs_state.facts_when_false); - expr->rhs = optimize_expr_recursive(&rhs_state, expr->rhs); - if (!expr->rhs) { - goto fail; + visit_fn *general = visitor->visit; + if (general) { + if (!entered) { + expr = enter(opt, expr, visitor); + if (!expr) { + return NULL; + } + entered = true; + } + + expr = general(opt, expr, visitor); + if (!expr) { + return NULL; + } } - return optimize_comma_expr(state, expr); + visit_fn *specific = look_up_visitor(expr, visitor->table); + if (specific) { + if (!entered) { + expr = enter(opt, expr, visitor); + if (!expr) { + return NULL; + } + entered = true; + } -fail: - bfs_expr_free(expr); - return NULL; -} + expr = specific(opt, expr, visitor); + if (!expr) { + return NULL; + } + } -/** Infer data flow facts about a predicate. */ -static void infer_pred_facts(struct opt_state *state, enum pred_type pred) { - constrain_pred(&state->facts_when_true.preds[pred], true); - constrain_pred(&state->facts_when_false.preds[pred], false); + if (entered) { + expr = leave(opt, expr, visitor); + } else { + opt_visit(opt, "%pe\n", expr); + } + + return expr; } -/** Infer data flow facts about an icmp-style ([+-]N) expression. */ -static void infer_icmp_facts(struct opt_state *state, const struct bfs_expr *expr, enum range_type type) { - struct range *range_when_true = &state->facts_when_true.ranges[type]; - struct range *range_when_false = &state->facts_when_false.ranges[type]; - long long value = expr->num; +/** Visit an expression recursively. */ +static struct bfs_expr *visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt_enter(opt, "%s()\n", visitor->name); + expr = visit_deep(opt, expr, visitor); + opt_leave(opt, "\n"); + return expr; +} - switch (expr->int_cmp) { - case BFS_INT_EQUAL: - constrain_min(range_when_true, value); - constrain_max(range_when_true, value); - range_remove(range_when_false, value); - break; +/** Visit an expression non-recursively. */ +static struct bfs_expr *visit_shallow(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + visit_fn *general = visitor->visit; + if (expr && general) { + expr = general(opt, expr, visitor); + } - case BFS_INT_LESS: - constrain_min(range_when_false, value); - constrain_max(range_when_true, value); - range_remove(range_when_true, value); - break; + if (!expr) { + return NULL; + } - case BFS_INT_GREATER: - constrain_max(range_when_false, value); - constrain_min(range_when_true, value); - range_remove(range_when_true, value); - break; + visit_fn *specific = look_up_visitor(expr, visitor->table); + if (specific) { + expr = specific(opt, expr, visitor); } + + return expr; } -/** Optimize -{execut,read,writ}able. */ -static struct bfs_expr *optimize_access(struct opt_state *state, struct bfs_expr *expr) { +/** Annotate -{execut,read,writ}able. */ +static struct bfs_expr *annotate_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { expr->probability = 1.0; - if (expr->num & R_OK) { - infer_pred_facts(state, READABLE_PRED); expr->probability *= 0.99; } - if (expr->num & W_OK) { - infer_pred_facts(state, WRITABLE_PRED); expr->probability *= 0.8; } - if (expr->num & X_OK) { - infer_pred_facts(state, EXECUTABLE_PRED); expr->probability *= 0.2; } return expr; } -/** Optimize -empty. */ -static struct bfs_expr *optimize_empty(struct opt_state *state, struct bfs_expr *expr) { - if (state->ctx->optlevel >= 4) { +/** Annotate -empty. */ +static struct bfs_expr *annotate_empty(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->level >= 4) { // Since -empty attempts to open and read directories, it may // have side effects such as reporting permission errors, and // thus shouldn't be re-ordered without aggressive optimizations @@ -794,8 +962,8 @@ static struct bfs_expr *optimize_empty(struct opt_state *state, struct bfs_expr return expr; } -/** Optimize -{exec,ok}{,dir}. */ -static struct bfs_expr *optimize_exec(struct opt_state *state, struct bfs_expr *expr) { +/** Annotate -exec. */ +static struct bfs_expr *annotate_exec(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (expr->exec->flags & BFS_EXEC_MULTI) { expr->always_true = true; } else { @@ -805,36 +973,10 @@ static struct bfs_expr *optimize_exec(struct opt_state *state, struct bfs_expr * return expr; } -/** Optimize -name/-lname/-path. */ -static struct bfs_expr *optimize_fnmatch(struct opt_state *state, struct bfs_expr *expr) { - if (strchr(expr->argv[1], '*')) { - expr->probability = 0.5; - } else { +/** Annotate -name/-lname/-path. */ +static struct bfs_expr *annotate_fnmatch(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (expr->literal) { expr->probability = 0.1; - } - - return expr; -} - -/** Optimize -gid. */ -static struct bfs_expr *optimize_gid(struct opt_state *state, struct bfs_expr *expr) { - struct range *range = &state->facts_when_true.ranges[GID_RANGE]; - if (range->min == range->max) { - gid_t gid = range->min; - bool nogroup = !bfs_getgrgid(state->ctx->groups, gid); - if (errno == 0) { - constrain_pred(&state->facts_when_true.preds[NOGROUP_PRED], nogroup); - } - } - - return expr; -} - -/** Optimize -inum. */ -static struct bfs_expr *optimize_inum(struct opt_state *state, struct bfs_expr *expr) { - struct range *range = &state->facts_when_true.ranges[INUM_RANGE]; - if (range->min == range->max) { - expr->probability = 0.01; } else { expr->probability = 0.5; } @@ -842,49 +984,10 @@ static struct bfs_expr *optimize_inum(struct opt_state *state, struct bfs_expr * return expr; } -/** Optimize -links. */ -static struct bfs_expr *optimize_links(struct opt_state *state, struct bfs_expr *expr) { - struct range *range = &state->facts_when_true.ranges[LINKS_RANGE]; - if (1 >= range->min && 1 <= range->max) { - expr->probability = 0.99; - } else { - expr->probability = 0.5; - } - - return expr; -} - -/** Optimize -uid. */ -static struct bfs_expr *optimize_uid(struct opt_state *state, struct bfs_expr *expr) { - struct range *range = &state->facts_when_true.ranges[UID_RANGE]; - if (range->min == range->max) { - uid_t uid = range->min; - bool nouser = !bfs_getpwuid(state->ctx->users, uid); - if (errno == 0) { - constrain_pred(&state->facts_when_true.preds[NOUSER_PRED], nouser); - } - } - - return expr; -} - -/** Optimize -samefile. */ -static struct bfs_expr *optimize_samefile(struct opt_state *state, struct bfs_expr *expr) { - struct range *range_when_true = &state->facts_when_true.ranges[INUM_RANGE]; - constrain_min(range_when_true, expr->ino); - constrain_max(range_when_true, expr->ino); - return expr; -} - -/** Optimize -size. */ -static struct bfs_expr *optimize_size(struct opt_state *state, struct bfs_expr *expr) { - struct range *range = &state->facts_when_true.ranges[SIZE_RANGE]; - if (range->min == range->max) { - expr->probability = 0.01; - } else { - expr->probability = 0.5; - } - +/** Annotate -f?print. */ +static struct bfs_expr *annotate_fprint(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + const struct colors *colors = expr->cfile->colors; + expr->calls_stat = colors && colors_need_stat(colors); return expr; } @@ -922,502 +1025,1328 @@ static void estimate_type_probability(struct bfs_expr *expr) { } } -/** Optimize -type. */ -static struct bfs_expr *optimize_type(struct opt_state *state, struct bfs_expr *expr) { - state->facts_when_true.types &= expr->num; - state->facts_when_false.types &= ~expr->num; - +/** Annotate -type. */ +static struct bfs_expr *annotate_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { estimate_type_probability(expr); - return expr; } -/** Optimize -xtype. */ -static struct bfs_expr *optimize_xtype(struct opt_state *state, struct bfs_expr *expr) { - if (state->ctx->optlevel >= 4) { +/** Annotate -xtype. */ +static struct bfs_expr *annotate_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->level >= 4) { // Since -xtype dereferences symbolic links, it may have side // effects such as reporting permission errors, and thus // shouldn't be re-ordered without aggressive optimizations expr->pure = true; } - state->facts_when_true.xtypes &= expr->num; - state->facts_when_false.xtypes &= ~expr->num; - estimate_type_probability(expr); - return expr; } -/** - * Table of pure expressions. - */ -static bfs_eval_fn *const opt_pure[] = { - eval_access, - eval_acl, - eval_capable, - eval_depth, - eval_false, - eval_flags, - eval_fstype, - eval_gid, - eval_hidden, - eval_inum, - eval_links, - eval_lname, - eval_name, - eval_newer, - eval_nogroup, - eval_nouser, - eval_path, - eval_perm, - eval_regex, - eval_samefile, - eval_size, - eval_sparse, - eval_time, - eval_true, - eval_type, - eval_uid, - eval_used, - eval_xattr, - eval_xattrname, -}; +/** Annotate a negation. */ +static struct bfs_expr *annotate_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *rhs = only_child(expr); + expr->pure = rhs->pure; + expr->always_true = rhs->always_false; + expr->always_false = rhs->always_true; + expr->cost = rhs->cost; + expr->probability = 1.0 - rhs->probability; + return expr; +} -/** - * Table of always-true expressions. - */ -static bfs_eval_fn *const opt_always_true[] = { - eval_fls, - eval_fprint, - eval_fprint0, - eval_fprintf, - eval_fprintx, - eval_prune, - eval_true, - - // Non-returning - eval_exit, - eval_quit, -}; +/** Annotate a conjunction. */ +static struct bfs_expr *annotate_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->pure = true; + expr->always_true = true; + expr->always_false = false; + expr->cost = 0.0; + expr->probability = 1.0; -/** - * Table of always-false expressions. - */ -static bfs_eval_fn *const opt_always_false[] = { - eval_false, + for_expr (child, expr) { + expr->pure &= child->pure; + expr->always_true &= child->always_true; + expr->always_false |= child->always_false; + expr->cost += expr->probability * child->cost; + expr->probability *= child->probability; + } - // Non-returning - eval_exit, - eval_quit, -}; + return expr; +} -#define FAST_COST 40.0 -#define FNMATCH_COST 400.0 -#define STAT_COST 1000.0 -#define PRINT_COST 20000.0 +/** Annotate a disjunction. */ +static struct bfs_expr *annotate_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->pure = true; + expr->always_true = false; + expr->always_false = true; + expr->cost = 0.0; -/** - * Table of expression costs. - */ -static const struct { - /** The evaluation function with this cost. */ - bfs_eval_fn *eval_fn; - /** The matching cost. */ - float cost; -} opt_costs[] = { - {eval_access, STAT_COST}, - {eval_acl, STAT_COST}, - {eval_capable, STAT_COST}, - {eval_empty, 2 * STAT_COST}, // readdir() is worse than stat() - {eval_fls, PRINT_COST}, - {eval_fprint, PRINT_COST}, - {eval_fprint0, PRINT_COST}, - {eval_fprintf, PRINT_COST}, - {eval_fprintx, PRINT_COST}, - {eval_fstype, STAT_COST}, - {eval_gid, STAT_COST}, - {eval_inum, STAT_COST}, - {eval_links, STAT_COST}, - {eval_lname, FNMATCH_COST}, - {eval_name, FNMATCH_COST}, - {eval_newer, STAT_COST}, - {eval_nogroup, STAT_COST}, - {eval_nouser, STAT_COST}, - {eval_path, FNMATCH_COST}, - {eval_perm, STAT_COST}, - {eval_samefile, STAT_COST}, - {eval_size, STAT_COST}, - {eval_sparse, STAT_COST}, - {eval_time, STAT_COST}, - {eval_uid, STAT_COST}, - {eval_used, STAT_COST}, - {eval_xattr, STAT_COST}, - {eval_xattrname, STAT_COST}, -}; - -/** - * Table of expression probabilities. - */ -static const struct { - /** The evaluation function with this cost. */ - bfs_eval_fn *eval_fn; - /** The matching probability. */ - float probability; -} opt_probs[] = { - {eval_acl, 0.00002}, - {eval_capable, 0.000002}, - {eval_empty, 0.01}, - {eval_false, 0.0}, - {eval_hidden, 0.01}, - {eval_nogroup, 0.01}, - {eval_nouser, 0.01}, - {eval_samefile, 0.01}, - {eval_true, 1.0}, - {eval_xattr, 0.01}, - {eval_xattrname, 0.01}, -}; + float false_prob = 1.0; + for_expr (child, expr) { + expr->pure &= child->pure; + expr->always_true |= child->always_true; + expr->always_false &= child->always_false; + expr->cost += false_prob * child->cost; + false_prob *= (1.0 - child->probability); + } + expr->probability = 1.0 - false_prob; + return expr; +} -/** - * Table of simple predicates. - */ -static const struct { - /** The evaluation function this optimizer applies to. */ - bfs_eval_fn *eval_fn; - /** The corresponding predicate. */ - enum pred_type pred; -} opt_preds[] = { - {eval_acl, ACL_PRED}, - {eval_capable, CAPABLE_PRED}, - {eval_empty, EMPTY_PRED}, - {eval_hidden, HIDDEN_PRED}, - {eval_nogroup, NOGROUP_PRED}, - {eval_nouser, NOUSER_PRED}, - {eval_sparse, SPARSE_PRED}, - {eval_xattr, XATTR_PRED}, -}; +/** Annotate a comma expression. */ +static struct bfs_expr *annotate_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + expr->pure = true; + expr->cost = 0.0; -/** - * Table of simple range comparisons. - */ -static const struct { - /** The evaluation function this optimizer applies to. */ - bfs_eval_fn *eval_fn; - /** The corresponding range. */ - enum range_type range; -} opt_ranges[] = { - {eval_depth, DEPTH_RANGE}, - {eval_gid, GID_RANGE}, - {eval_inum, INUM_RANGE}, - {eval_links, LINKS_RANGE}, - {eval_size, SIZE_RANGE}, - {eval_uid, UID_RANGE}, -}; + for_expr (child, expr) { + expr->pure &= child->pure; + expr->always_true = child->always_true; + expr->always_false = child->always_false; + expr->cost += child->cost; + expr->probability = child->probability; + } -/** Signature for custom optimizer functions. */ -typedef struct bfs_expr *bfs_opt_fn(struct opt_state *state, struct bfs_expr *expr); + return expr; +} -/** Table of custom optimizer functions. */ -static const struct { - /** The evaluation function this optimizer applies to. */ - bfs_eval_fn *eval_fn; - /** The corresponding optimizer function. */ - bfs_opt_fn *opt_fn; -} opt_fns[] = { - // Primaries - {eval_access, optimize_access}, - {eval_empty, optimize_empty}, - {eval_exec, optimize_exec}, - {eval_gid, optimize_gid}, - {eval_inum, optimize_inum}, - {eval_links, optimize_links}, - {eval_lname, optimize_fnmatch}, - {eval_name, optimize_fnmatch}, - {eval_path, optimize_fnmatch}, - {eval_samefile, optimize_samefile}, - {eval_size, optimize_size}, - {eval_type, optimize_type}, - {eval_uid, optimize_uid}, - {eval_xtype, optimize_xtype}, - - // Operators - {eval_and, optimize_and_expr_recursive}, - {eval_comma, optimize_comma_expr_recursive}, - {eval_not, optimize_not_expr_recursive}, - {eval_or, optimize_or_expr_recursive}, -}; +/** Annotate an arbitrary expression. */ +static struct bfs_expr *annotate_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + /** Table of pure expressions. */ + static bfs_eval_fn *const pure[] = { + eval_access, + eval_acl, + eval_capable, + eval_depth, + eval_false, + eval_flags, + eval_fstype, + eval_gid, + eval_hidden, + eval_inum, + eval_links, + eval_lname, + eval_name, + eval_newer, + eval_nogroup, + eval_nouser, + eval_path, + eval_perm, + eval_regex, + eval_samefile, + eval_size, + eval_sparse, + eval_time, + eval_true, + eval_type, + eval_uid, + eval_used, + eval_xattr, + eval_xattrname, + }; -/** - * Look up the appropriate optimizer for an expression and call it. - */ -static struct bfs_expr *optimize_expr_lookup(struct opt_state *state, struct bfs_expr *expr) { - for (size_t i = 0; i < countof(opt_pure); ++i) { - if (opt_pure[i] == expr->eval_fn) { + expr->pure = false; + for (size_t i = 0; i < countof(pure); ++i) { + if (expr->eval_fn == pure[i]) { expr->pure = true; break; } } - for (size_t i = 0; i < countof(opt_always_true); ++i) { - if (opt_always_true[i] == expr->eval_fn) { + /** Table of always-true expressions. */ + static bfs_eval_fn *const always_true[] = { + eval_fls, + eval_fprint, + eval_fprint0, + eval_fprintf, + eval_fprintx, + eval_limit, + eval_prune, + eval_true, + // Non-returning + eval_exit, + eval_quit, + }; + + expr->always_true = false; + for (size_t i = 0; i < countof(always_true); ++i) { + if (expr->eval_fn == always_true[i]) { expr->always_true = true; break; } } - for (size_t i = 0; i < countof(opt_always_false); ++i) { - if (opt_always_false[i] == expr->eval_fn) { + /** Table of always-false expressions. */ + static bfs_eval_fn *const always_false[] = { + eval_false, + // Non-returning + eval_exit, + eval_quit, + }; + + expr->always_false = false; + for (size_t i = 0; i < countof(always_false); ++i) { + if (expr->eval_fn == always_false[i]) { expr->always_false = true; break; } } - expr->cost = FAST_COST; - for (size_t i = 0; i < countof(opt_costs); ++i) { - if (opt_costs[i].eval_fn == expr->eval_fn) { - expr->cost = opt_costs[i].cost; + /** Table of stat-calling primaries. */ + static bfs_eval_fn *const calls_stat[] = { + eval_empty, + eval_flags, + eval_fls, + eval_fprintf, + eval_fstype, + eval_gid, + eval_inum, + eval_links, + eval_newer, + eval_nogroup, + eval_nouser, + eval_perm, + eval_samefile, + eval_size, + eval_sparse, + eval_time, + eval_uid, + eval_used, + eval_xattr, + eval_xattrname, + }; + + expr->calls_stat = false; + for (size_t i = 0; i < countof(calls_stat); ++i) { + if (expr->eval_fn == calls_stat[i]) { + expr->calls_stat = true; break; } } - for (size_t i = 0; i < countof(opt_probs); ++i) { - if (opt_probs[i].eval_fn == expr->eval_fn) { - expr->probability = opt_probs[i].probability; +#define FAST_COST 40.0 +#define FNMATCH_COST 400.0 +#define STAT_COST 1000.0 +#define PRINT_COST 20000.0 + + /** Table of expression costs. */ + static const struct { + bfs_eval_fn *eval_fn; + float cost; + } costs[] = { + {eval_access, STAT_COST}, + {eval_acl, STAT_COST}, + {eval_capable, STAT_COST}, + {eval_empty, 2 * STAT_COST}, // readdir() is worse than stat() + {eval_flags, STAT_COST}, + {eval_fls, PRINT_COST}, + {eval_fprint, PRINT_COST}, + {eval_fprint0, PRINT_COST}, + {eval_fprintf, PRINT_COST}, + {eval_fprintx, PRINT_COST}, + {eval_fstype, STAT_COST}, + {eval_gid, STAT_COST}, + {eval_inum, STAT_COST}, + {eval_links, STAT_COST}, + {eval_lname, FNMATCH_COST}, + {eval_name, FNMATCH_COST}, + {eval_newer, STAT_COST}, + {eval_nogroup, STAT_COST}, + {eval_nouser, STAT_COST}, + {eval_path, FNMATCH_COST}, + {eval_perm, STAT_COST}, + {eval_samefile, STAT_COST}, + {eval_size, STAT_COST}, + {eval_sparse, STAT_COST}, + {eval_time, STAT_COST}, + {eval_uid, STAT_COST}, + {eval_used, STAT_COST}, + {eval_xattr, STAT_COST}, + {eval_xattrname, STAT_COST}, + }; + + expr->cost = FAST_COST; + for (size_t i = 0; i < countof(costs); ++i) { + if (expr->eval_fn == costs[i].eval_fn) { + expr->cost = costs[i].cost; break; } } - for (size_t i = 0; i < countof(opt_preds); ++i) { - if (opt_preds[i].eval_fn == expr->eval_fn) { - infer_pred_facts(state, opt_preds[i].pred); + /** Table of expression probabilities. */ + static const struct { + /** The evaluation function with this cost. */ + bfs_eval_fn *eval_fn; + /** The matching probability. */ + float probability; + } probs[] = { + {eval_acl, 0.00002}, + {eval_capable, 0.000002}, + {eval_empty, 0.01}, + {eval_false, 0.0}, + {eval_hidden, 0.01}, + {eval_nogroup, 0.01}, + {eval_nouser, 0.01}, + {eval_samefile, 0.01}, + {eval_true, 1.0}, + {eval_xattr, 0.01}, + {eval_xattrname, 0.01}, + }; + + expr->probability = 0.5; + for (size_t i = 0; i < countof(probs); ++i) { + if (expr->eval_fn == probs[i].eval_fn) { + expr->probability = probs[i].probability; break; } } - for (size_t i = 0; i < countof(opt_ranges); ++i) { - if (opt_ranges[i].eval_fn == expr->eval_fn) { - infer_icmp_facts(state, expr, opt_ranges[i].range); - break; + return expr; +} + +/** + * Annotating visitor. + */ +static const struct visitor annotate = { + .name = "annotate", + .visit = annotate_visit, + .table = (const struct visitor_table[]) { + {eval_access, annotate_access}, + {eval_empty, annotate_empty}, + {eval_exec, annotate_exec}, + {eval_fprint, annotate_fprint}, + {eval_lname, annotate_fnmatch}, + {eval_name, annotate_fnmatch}, + {eval_path, annotate_fnmatch}, + {eval_type, annotate_type}, + {eval_xtype, annotate_xtype}, + + {eval_not, annotate_not}, + {eval_and, annotate_and}, + {eval_or, annotate_or}, + {eval_comma, annotate_comma}, + + {NULL, NULL}, + }, +}; + +/** Create a constant expression. */ +static struct bfs_expr *opt_const(struct bfs_opt *opt, bool value) { + static bfs_eval_fn *const fns[] = {eval_false, eval_true}; + static char *fake_args[] = {"-false", "-true"}; + + struct bfs_expr *expr = bfs_expr_new(opt->ctx, fns[value], 1, &fake_args[value], BFS_TEST); + return visit_shallow(opt, expr, &annotate); +} + +/** Negate an expression, keeping it canonical. */ +static struct bfs_expr *negate_expr(struct bfs_opt *opt, struct bfs_expr *expr, char **argv) { + if (expr->eval_fn == eval_not) { + return only_child(expr); + } else if (expr->eval_fn == eval_true) { + return opt_const(opt, false); + } else if (expr->eval_fn == eval_false) { + return opt_const(opt, true); + } + + struct bfs_expr *ret = bfs_expr_new(opt->ctx, eval_not, 1, argv, BFS_OPERATOR); + if (!ret) { + return NULL; + } + + bfs_expr_append(ret, expr); + return visit_shallow(opt, ret, &annotate); +} + +/** Sink negations into a conjunction/disjunction using De Morgan's laws. */ +static struct bfs_expr *sink_not_andor(struct bfs_opt *opt, struct bfs_expr *expr) { + opt_debug(opt, "De Morgan's laws\n"); + + char **argv = expr->argv; + expr = only_child(expr); + opt_enter(opt, "%pe\n", expr); + + if (expr->eval_fn == eval_and) { + expr->eval_fn = eval_or; + expr->argv = &fake_or_arg; + } else { + bfs_assert(expr->eval_fn == eval_or); + expr->eval_fn = eval_and; + expr->argv = &fake_and_arg; + } + + struct bfs_exprs children; + foster_children(expr, &children); + + drain_slist (struct bfs_expr, child, &children) { + opt_enter(opt, "%pe\n", child); + + child = negate_expr(opt, child, argv); + if (!child) { + return NULL; } + + opt_leave(opt, "%pe\n", child); + bfs_expr_append(expr, child); } - for (size_t i = 0; i < countof(opt_fns); ++i) { - if (opt_fns[i].eval_fn == expr->eval_fn) { - return opt_fns[i].opt_fn(state, expr); + opt_leave(opt, "%pe\n", expr); + return visit_shallow(opt, expr, &annotate); +} + +/** Sink a negation into a comma expression. */ +static struct bfs_expr *sink_not_comma(struct bfs_opt *opt, struct bfs_expr *expr) { + char **argv = expr->argv; + expr = only_child(expr); + opt_enter(opt, "%pe\n", expr); + + bfs_assert(expr->eval_fn == eval_comma); + + struct bfs_exprs children; + foster_children(expr, &children); + + drain_slist (struct bfs_expr, child, &children) { + if (SLIST_EMPTY(&children)) { + opt_enter(opt, "%pe\n", child); + opt_debug(opt, "sink\n"); + + child = negate_expr(opt, child, argv); + if (!child) { + return NULL; + } + + opt_leave(opt, "%pe\n", child); + } else { + opt_visit(opt, "%pe\n", child); } + + bfs_expr_append(expr, child); } - return expr; + opt_leave(opt, "%pe\n", expr); + return visit_shallow(opt, expr, &annotate); +} + +/** Canonicalize a negation. */ +static struct bfs_expr *canonicalize_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *rhs = only_child(expr); + + if (rhs->eval_fn == eval_not) { + opt_debug(opt, "double negation\n"); + return only_child(rhs); + } else if (rhs->eval_fn == eval_and || rhs->eval_fn == eval_or) { + return sink_not_andor(opt, expr); + } else if (rhs->eval_fn == eval_comma) { + return sink_not_comma(opt, expr); + } else if (is_const(rhs)) { + opt_debug(opt, "constant propagation\n"); + return opt_const(opt, rhs->eval_fn == eval_false); + } else { + return expr; + } } -static struct bfs_expr *optimize_expr_recursive(struct opt_state *state, struct bfs_expr *expr) { - int optlevel = state->ctx->optlevel; +/** Canonicalize an associative operator. */ +static struct bfs_expr *canonicalize_assoc(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + struct bfs_exprs flat; + SLIST_INIT(&flat); - state->facts_when_true = state->facts; - state->facts_when_false = state->facts; + drain_slist (struct bfs_expr, child, &children) { + if (child->eval_fn == expr->eval_fn) { + struct bfs_expr *head = SLIST_HEAD(&child->children); + struct bfs_expr *tail = SLIST_TAIL(&child->children); - if (optlevel >= 2 && facts_are_impossible(&state->facts)) { - struct bfs_expr *ret = opt_const(false); - opt_debug(state, 2, "reachability: %pe --> %pe\n", expr, ret); - opt_warning(state, expr, "This expression is unreachable.\n\n"); - bfs_expr_free(expr); - return ret; + if (!head) { + opt_delete(opt, "%pe [empty]\n", child); + } else { + opt_enter(opt, "%pe\n", child); + opt_debug(opt, "associativity\n"); + if (head == tail) { + opt_leave(opt, "%pe\n", head); + } else if (head->next == tail) { + opt_leave(opt, "%pe %pe\n", head, tail); + } else { + opt_leave(opt, "%pe ... %pe\n", head, tail); + } + } + + SLIST_EXTEND(&flat, &child->children); + } else { + opt_visit(opt, "%pe\n", child); + SLIST_APPEND(&flat, child); + } } - expr = optimize_expr_lookup(state, expr); - if (!expr) { - return NULL; + bfs_expr_extend(expr, &flat); + + return visit_shallow(opt, expr, &annotate); +} + +/** + * Canonicalizing visitor. + */ +static const struct visitor canonicalize = { + .name = "canonicalize", + .table = (const struct visitor_table[]) { + {eval_not, canonicalize_not}, + {eval_and, canonicalize_assoc}, + {eval_or, canonicalize_assoc}, + {eval_comma, canonicalize_assoc}, + {NULL, NULL}, + }, +}; + +/** Calculate the cost of an ordered pair of expressions. */ +static float expr_cost(const struct bfs_expr *parent, const struct bfs_expr *lhs, const struct bfs_expr *rhs) { + // https://cs.stackexchange.com/a/66921/21004 + float prob = lhs->probability; + if (parent->eval_fn == eval_or) { + prob = 1.0 - prob; + } + return lhs->cost + prob * rhs->cost; +} + +/** Sort a block of expressions. */ +static void sort_exprs(struct bfs_opt *opt, struct bfs_expr *parent, struct bfs_exprs *exprs) { + if (!exprs->head || !exprs->head->next) { + return; + } + + struct bfs_exprs left, right; + SLIST_INIT(&left); + SLIST_INIT(&right); + + // Split + for (struct bfs_expr *hare = exprs->head; hare && (hare = hare->next); hare = hare->next) { + struct bfs_expr *tortoise = SLIST_POP(exprs); + SLIST_APPEND(&left, tortoise); } + SLIST_EXTEND(&right, exprs); - if (bfs_expr_is_parent(expr)) { - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; - if (rhs) { - expr->persistent_fds = rhs->persistent_fds; - expr->ephemeral_fds = rhs->ephemeral_fds; + // Recurse + sort_exprs(opt, parent, &left); + sort_exprs(opt, parent, &right); + + // Merge + while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) { + struct bfs_expr *lhs = left.head; + struct bfs_expr *rhs = right.head; + + float cost = expr_cost(parent, lhs, rhs); + float swapped = expr_cost(parent, rhs, lhs); + + if (cost <= swapped) { + SLIST_POP(&left); + SLIST_APPEND(exprs, lhs); + } else { + opt_enter(opt, "%pe %pe [${ylw}%g${rs}]\n", lhs, rhs, cost); + SLIST_POP(&right); + SLIST_APPEND(exprs, rhs); + opt_leave(opt, "%pe %pe [${ylw}%g${rs}]\n", rhs, lhs, swapped); } - if (lhs) { - expr->persistent_fds += lhs->persistent_fds; - if (lhs->ephemeral_fds > expr->ephemeral_fds) { - expr->ephemeral_fds = lhs->ephemeral_fds; - } + } + SLIST_EXTEND(exprs, &left); + SLIST_EXTEND(exprs, &right); +} + +/** Reorder children to reduce cost. */ +static struct bfs_expr *reorder_andor(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + // Split into blocks of consecutive pure/impure expressions, and sort + // the pure blocks + struct bfs_exprs pure; + SLIST_INIT(&pure); + + drain_slist (struct bfs_expr, child, &children) { + if (child->pure) { + SLIST_APPEND(&pure, child); + } else { + sort_exprs(opt, expr, &pure); + bfs_expr_extend(expr, &pure); + bfs_expr_append(expr, child); + } + } + sort_exprs(opt, expr, &pure); + bfs_expr_extend(expr, &pure); + + return visit_shallow(opt, expr, &annotate); +} + +/** + * Reordering visitor. + */ +static const struct visitor reorder = { + .name = "reorder", + .table = (const struct visitor_table[]) { + {eval_and, reorder_andor}, + {eval_or, reorder_andor}, + {NULL, NULL}, + }, +}; + +/** Transfer function for simple predicates. */ +static void data_flow_pred(struct bfs_opt *opt, enum pred_type pred, bool value) { + constrain_pred(&opt->after_true.preds[pred], value); + constrain_pred(&opt->after_false.preds[pred], !value); +} + +/** Transfer function for icmp-style ([+-]N) expressions. */ +static void data_flow_icmp(struct bfs_opt *opt, const struct bfs_expr *expr, enum range_type type) { + struct df_range *true_range = &opt->after_true.ranges[type]; + struct df_range *false_range = &opt->after_false.ranges[type]; + long long value = expr->num; + + switch (expr->int_cmp) { + case BFS_INT_EQUAL: + constrain_range(true_range, value); + range_remove(false_range, value); + break; + + case BFS_INT_LESS: + constrain_min(false_range, value); + constrain_max(true_range, value); + range_remove(true_range, value); + break; + + case BFS_INT_GREATER: + constrain_max(false_range, value); + constrain_min(true_range, value); + range_remove(true_range, value); + break; + } +} + +/** Transfer function for -{execut,read,writ}able. */ +static struct bfs_expr *data_flow_access(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (expr->num & R_OK) { + data_flow_pred(opt, READABLE_PRED, true); + } + if (expr->num & W_OK) { + data_flow_pred(opt, WRITABLE_PRED, true); + } + if (expr->num & X_OK) { + data_flow_pred(opt, EXECUTABLE_PRED, true); + } + + return expr; +} + +/** Transfer function for -empty. */ +static struct bfs_expr *data_flow_empty(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt->after_true.types &= (1 << BFS_REG) | (1 << BFS_DIR); + + if (opt->before.types == (1 << BFS_REG)) { + constrain_range(&opt->after_true.ranges[SIZE_RANGE], 0); + range_remove(&opt->after_false.ranges[SIZE_RANGE], 0); + } + + return expr; +} + +/** Transfer function for -gid. */ +static struct bfs_expr *data_flow_gid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[GID_RANGE]; + if (range->min == range->max) { + gid_t gid = range->min; + bool nogroup = !bfs_getgrgid(opt->ctx->groups, gid); + if (errno == 0) { + data_flow_pred(opt, NOGROUP_PRED, nogroup); + } + } + + return expr; +} + +/** Transfer function for -inum. */ +static struct bfs_expr *data_flow_inum(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[INUM_RANGE]; + if (range->min == range->max) { + expr->probability = 0.01; + } else { + expr->probability = 0.5; + } + + return expr; +} + +/** Transfer function for -links. */ +static struct bfs_expr *data_flow_links(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[LINKS_RANGE]; + if (1 >= range->min && 1 <= range->max) { + expr->probability = 0.99; + } else { + expr->probability = 0.5; + } + + return expr; +} + +/** Transfer function for -lname. */ +static struct bfs_expr *data_flow_lname(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt->after_true.types &= 1 << BFS_LNK; + return expr; +} + +/** Transfer function for -samefile. */ +static struct bfs_expr *data_flow_samefile(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *true_range = &opt->after_true.ranges[INUM_RANGE]; + constrain_range(true_range, expr->ino); + + struct df_range *false_range = &opt->after_false.ranges[INUM_RANGE]; + range_remove(false_range, expr->ino); + + return expr; +} + +/** Transfer function for -size. */ +static struct bfs_expr *data_flow_size(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[SIZE_RANGE]; + if (range->min == range->max) { + expr->probability = 0.01; + } else { + expr->probability = 0.5; + } + + return expr; +} + +/** Transfer function for -type. */ +static struct bfs_expr *data_flow_type(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt->after_true.types &= expr->num; + opt->after_false.types &= ~expr->num; + return expr; +} + +/** Transfer function for -uid. */ +static struct bfs_expr *data_flow_uid(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct df_range *range = &opt->after_true.ranges[UID_RANGE]; + if (range->min == range->max) { + uid_t uid = range->min; + bool nouser = !bfs_getpwuid(opt->ctx->users, uid); + if (errno == 0) { + data_flow_pred(opt, NOUSER_PRED, nouser); } - } else if (!expr->pure) { - facts_union(state->facts_when_impure, state->facts_when_impure, &state->facts); } + return expr; +} + +/** Transfer function for -xtype. */ +static struct bfs_expr *data_flow_xtype(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + opt->after_true.xtypes &= expr->num; + opt->after_false.xtypes &= ~expr->num; + return expr; +} + +/** Data flow visitor entry. */ +static struct bfs_expr *data_flow_enter(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + visit_enter(opt, expr, visitor); + + df_dump(opt, "before", &opt->before); + + if (!bfs_expr_is_parent(expr) && !expr->pure) { + df_join(opt->impure, &opt->before); + df_dump(opt, "impure", opt->impure); + } + + return expr; +} + +/** Data flow visitor exit. */ +static struct bfs_expr *data_flow_leave(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { if (expr->always_true) { expr->probability = 1.0; - set_facts_impossible(&state->facts_when_false); + df_init_bottom(&opt->after_false); } + if (expr->always_false) { expr->probability = 0.0; - set_facts_impossible(&state->facts_when_true); + df_init_bottom(&opt->after_true); } - if (optlevel < 2 || expr->eval_fn == eval_true || expr->eval_fn == eval_false) { - return expr; + df_dump(opt, "after true", &opt->after_true); + df_dump(opt, "after false", &opt->after_false); + + if (df_is_bottom(&opt->after_false)) { + if (!expr->pure) { + expr->always_true = true; + expr->probability = 1.0; + } else if (expr->eval_fn != eval_true) { + opt_warning(opt, expr, "This expression is always true.\n\n"); + opt_debug(opt, "pure, always true\n"); + expr = opt_const(opt, true); + if (!expr) { + return NULL; + } + } } - if (facts_are_impossible(&state->facts_when_true)) { - if (expr->pure) { - struct bfs_expr *ret = opt_const(false); - opt_warning(state, expr, "This expression is always false.\n\n"); - opt_debug(state, 2, "data flow: %pe --> %pe\n", expr, ret); - bfs_expr_free(expr); - return ret; - } else { + if (df_is_bottom(&opt->after_true)) { + if (!expr->pure) { expr->always_false = true; expr->probability = 0.0; + } else if (expr->eval_fn != eval_false) { + opt_warning(opt, expr, "This expression is always false.\n\n"); + opt_debug(opt, "pure, always false\n"); + expr = opt_const(opt, false); + if (!expr) { + return NULL; + } } - } else if (facts_are_impossible(&state->facts_when_false)) { - if (expr->pure) { - struct bfs_expr *ret = opt_const(true); - opt_debug(state, 2, "data flow: %pe --> %pe\n", expr, ret); - opt_warning(state, expr, "This expression is always true.\n\n"); - bfs_expr_free(expr); - return ret; - } else { - expr->always_true = true; - expr->probability = 1.0; + } + + return visit_leave(opt, expr, visitor); +} + +/** Ignore an expression (and possibly warn/prompt). */ +static struct bfs_expr *opt_ignore(struct bfs_opt *opt, struct bfs_expr *expr, bool delete) { + if (delete) { + opt_delete(opt, "%pe [ignored result]\n", expr); + } else { + opt_debug(opt, "ignored result\n"); + } + + if (expr->kind != BFS_TEST) { + goto done; + } + + if (!opt_warning(opt, expr, "The result of this expression is ignored.\n")) { + goto done; + } + + struct bfs_ctx *ctx = opt->ctx; + if (ctx->interactive && ctx->dangerous) { + bfs_warning(ctx, "Do you want to continue? "); + if (ynprompt() <= 0) { + errno = 0; + return NULL; } } + fprintf(stderr, "\n"); + +done: + if (!delete && expr->pure) { + // If we're not deleting the expression entirely, replace it with -false + expr = opt_const(opt, false); + } return expr; } -/** Swap the children of a binary expression if it would reduce the cost. */ -static bool reorder_expr(const struct opt_state *state, struct bfs_expr *expr, float swapped_cost) { - if (swapped_cost < expr->cost) { - bool debug = opt_debug(state, 3, "cost: %pe <==> ", expr); - struct bfs_expr *lhs = expr->lhs; - expr->lhs = expr->rhs; - expr->rhs = lhs; - if (debug) { - cfprintf(state->ctx->cerr, "%pe (~${ylw}%g${rs} --> ~${ylw}%g${rs})\n", expr, expr->cost, swapped_cost); +/** Data flow visitor function. */ +static struct bfs_expr *data_flow_visit(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->ignore_result) { + expr = opt_ignore(opt, expr, false); + if (!expr) { + return NULL; + } + } + + if (df_is_bottom(&opt->before)) { + opt_debug(opt, "unreachable\n"); + opt_warning(opt, expr, "This expression is unreachable.\n\n"); + expr = opt_const(opt, false); + if (!expr) { + return NULL; + } + } + + /** Table of simple predicates. */ + static const struct { + bfs_eval_fn *eval_fn; + enum pred_type pred; + } preds[] = { + {eval_acl, ACL_PRED}, + {eval_capable, CAPABLE_PRED}, + {eval_empty, EMPTY_PRED}, + {eval_hidden, HIDDEN_PRED}, + {eval_nogroup, NOGROUP_PRED}, + {eval_nouser, NOUSER_PRED}, + {eval_sparse, SPARSE_PRED}, + {eval_xattr, XATTR_PRED}, + }; + + for (size_t i = 0; i < countof(preds); ++i) { + if (preds[i].eval_fn == expr->eval_fn) { + data_flow_pred(opt, preds[i].pred, true); + break; } - expr->cost = swapped_cost; - return true; - } else { - return false; } + + /** Table of simple range comparisons. */ + static const struct { + bfs_eval_fn *eval_fn; + enum range_type range; + } ranges[] = { + {eval_depth, DEPTH_RANGE}, + {eval_gid, GID_RANGE}, + {eval_inum, INUM_RANGE}, + {eval_links, LINKS_RANGE}, + {eval_size, SIZE_RANGE}, + {eval_uid, UID_RANGE}, + }; + + for (size_t i = 0; i < countof(ranges); ++i) { + if (ranges[i].eval_fn == expr->eval_fn) { + data_flow_icmp(opt, expr, ranges[i].range); + break; + } + } + + return expr; } /** - * Recursively reorder sub-expressions to reduce the overall cost. - * - * @param expr - * The expression to optimize. - * @return - * Whether any subexpression was reordered. + * Data flow visitor. */ -static bool reorder_expr_recursive(const struct opt_state *state, struct bfs_expr *expr) { - if (!bfs_expr_is_parent(expr)) { - return false; +static const struct visitor data_flow = { + .name = "data_flow", + .enter = data_flow_enter, + .visit = data_flow_visit, + .leave = data_flow_leave, + .table = (const struct visitor_table[]) { + {eval_access, data_flow_access}, + {eval_empty, data_flow_empty}, + {eval_gid, data_flow_gid}, + {eval_inum, data_flow_inum}, + {eval_links, data_flow_links}, + {eval_lname, data_flow_lname}, + {eval_samefile, data_flow_samefile}, + {eval_size, data_flow_size}, + {eval_type, data_flow_type}, + {eval_uid, data_flow_uid}, + {eval_xtype, data_flow_xtype}, + {NULL, NULL}, + }, +}; + +/** Simplify a negation. */ +static struct bfs_expr *simplify_not(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + if (opt->ignore_result) { + opt_debug(opt, "ignored result\n"); + expr = only_child(expr); } - struct bfs_expr *lhs = expr->lhs; - struct bfs_expr *rhs = expr->rhs; + return expr; +} - bool ret = false; - if (lhs) { - ret |= reorder_expr_recursive(state, lhs); +/** Lift negations out of a conjunction/disjunction using De Morgan's laws. */ +static struct bfs_expr *lift_andor_not(struct bfs_opt *opt, struct bfs_expr *expr) { + // Only lift negations if it would reduce the number of (-not) expressions + size_t added = 0, removed = 0; + for_expr (child, expr) { + if (child->eval_fn == eval_not) { + ++removed; + } else { + ++added; + } + } + if (added >= removed) { + return visit_shallow(opt, expr, &annotate); } - if (rhs) { - ret |= reorder_expr_recursive(state, rhs); + + opt_debug(opt, "De Morgan's laws\n"); + + if (expr->eval_fn == eval_and) { + expr->eval_fn = eval_or; + expr->argv = &fake_or_arg; + } else { + bfs_assert(expr->eval_fn == eval_or); + expr->eval_fn = eval_and; + expr->argv = &fake_and_arg; } - if (expr->eval_fn == eval_and || expr->eval_fn == eval_or) { - if (lhs->pure && rhs->pure) { - float rhs_prob = expr->eval_fn == eval_and ? rhs->probability : 1.0 - rhs->probability; - float swapped_cost = rhs->cost + rhs_prob*lhs->cost; - ret |= reorder_expr(state, expr, swapped_cost); + struct bfs_exprs children; + foster_children(expr, &children); + + drain_slist (struct bfs_expr, child, &children) { + opt_enter(opt, "%pe\n", child); + + child = negate_expr(opt, child, &fake_not_arg); + if (!child) { + return NULL; + } + + opt_leave(opt, "%pe\n", child); + bfs_expr_append(expr, child); + } + + expr = visit_shallow(opt, expr, &annotate); + if (!expr) { + return NULL; + } + + return negate_expr(opt, expr, &fake_not_arg); +} + +/** Get the first ignorable expression in a conjunction/disjunction. */ +static struct bfs_expr *first_ignorable(struct bfs_opt *opt, struct bfs_expr *expr) { + if (opt->level < 2 || !opt->ignore_result) { + return NULL; + } + + struct bfs_expr *ret = NULL; + for_expr (child, expr) { + if (!child->pure) { + ret = NULL; + } else if (!ret) { + ret = child; } } return ret; } +/** Simplify a conjunction. */ +static struct bfs_expr *simplify_and(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *ignorable = first_ignorable(opt, expr); + bool ignore = false; + + struct bfs_exprs children; + foster_children(expr, &children); + + drain_slist (struct bfs_expr, child, &children) { + if (child == ignorable) { + ignore = true; + } + + if (ignore) { + if (!opt_ignore(opt, child, true)) { + return NULL; + } + continue; + } + + if (child->eval_fn == eval_true) { + opt_delete(opt, "%pe [conjunction elimination]\n", child); + continue; + } + + opt_visit(opt, "%pe\n", child); + bfs_expr_append(expr, child); + + if (child->always_false) { + drain_slist (struct bfs_expr, dead, &children) { + opt_delete(opt, "%pe [short-circuit]\n", dead); + } + } + } + + struct bfs_expr *child = bfs_expr_children(expr); + if (!child) { + opt_debug(opt, "nullary identity\n"); + return opt_const(opt, true); + } else if (!child->next) { + opt_debug(opt, "unary identity\n"); + return only_child(expr); + } + + return lift_andor_not(opt, expr); +} + +/** Simplify a disjunction. */ +static struct bfs_expr *simplify_or(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_expr *ignorable = first_ignorable(opt, expr); + bool ignore = false; + + struct bfs_exprs children; + foster_children(expr, &children); + + drain_slist (struct bfs_expr, child, &children) { + if (child == ignorable) { + ignore = true; + } + + if (ignore) { + if (!opt_ignore(opt, child, true)) { + return NULL; + } + continue; + } + + if (child->eval_fn == eval_false) { + opt_delete(opt, "%pe [disjunctive syllogism]\n", child); + continue; + } + + opt_visit(opt, "%pe\n", child); + bfs_expr_append(expr, child); + + if (child->always_true) { + drain_slist (struct bfs_expr, dead, &children) { + opt_delete(opt, "%pe [short-circuit]\n", dead); + } + } + } + + struct bfs_expr *child = bfs_expr_children(expr); + if (!child) { + opt_debug(opt, "nullary identity\n"); + return opt_const(opt, false); + } else if (!child->next) { + opt_debug(opt, "unary identity\n"); + return only_child(expr); + } + + return lift_andor_not(opt, expr); +} + +/** Simplify a comma expression. */ +static struct bfs_expr *simplify_comma(struct bfs_opt *opt, struct bfs_expr *expr, const struct visitor *visitor) { + struct bfs_exprs children; + foster_children(expr, &children); + + drain_slist (struct bfs_expr, child, &children) { + if (opt->level >= 2 && child->pure && !SLIST_EMPTY(&children)) { + if (!opt_ignore(opt, child, true)) { + return NULL; + } + continue; + } + + opt_visit(opt, "%pe\n", child); + bfs_expr_append(expr, child); + } + + struct bfs_expr *child = bfs_expr_children(expr); + if (child && !child->next) { + opt_debug(opt, "unary identity\n"); + return only_child(expr); + } + + return expr; +} + /** - * Optimize a top-level expression. + * Logical simplification visitor. */ -static struct bfs_expr *optimize_expr(struct opt_state *state, struct bfs_expr *expr) { - struct opt_facts saved_impure = *state->facts_when_impure; +static const struct visitor simplify = { + .name = "simplify", + .table = (const struct visitor_table[]) { + {eval_not, simplify_not}, + {eval_and, simplify_and}, + {eval_or, simplify_or}, + {eval_comma, simplify_comma}, + {NULL, NULL}, + }, +}; - expr = optimize_expr_recursive(state, expr); - if (!expr) { - return NULL; - } +/** Optimize an expression. */ +static struct bfs_expr *optimize(struct bfs_opt *opt, struct bfs_expr *expr) { + opt_enter(opt, "pass 0:\n"); + expr = visit(opt, expr, &annotate); + opt_leave(opt, NULL); + + /** Table of optimization passes. */ + static const struct { + /** Minimum optlevel for this pass. */ + int level; + /** The visitor for this pass. */ + const struct visitor *visitor; + } passes[] = { + {1, &canonicalize}, + {3, &reorder}, + {2, &data_flow}, + {1, &simplify}, + }; - if (state->ctx->optlevel >= 3 && reorder_expr_recursive(state, expr)) { - // Re-do optimizations to account for the new ordering - *state->facts_when_impure = saved_impure; - expr = optimize_expr_recursive(state, expr); - if (!expr) { - return NULL; + struct df_domain impure; + df_init_top(&opt->after_true); + df_init_top(&opt->after_false); + + for (int i = 0; i < 3; ++i) { + struct bfs_opt nested = *opt; + nested.impure = &impure; + impure = *opt->impure; + + opt_enter(&nested, "pass %d:\n", i + 1); + + for (size_t j = 0; j < countof(passes); ++j) { + if (opt->level < passes[j].level) { + continue; + } + + const struct visitor *visitor = passes[j].visitor; + + // Skip reordering the first time through the passes, to + // make warnings more understandable + if (visitor == &reorder) { + if (i == 0) { + continue; + } else { + nested.warn = false; + } + } + + expr = visit(&nested, expr, visitor); + if (!expr) { + return NULL; + } + + if (visitor == &data_flow) { + opt->after_true = nested.after_true; + opt->after_false = nested.after_false; + } + } + + opt_leave(&nested, NULL); + + if (!bfs_expr_is_parent(expr)) { + break; } } + *opt->impure = impure; return expr; } +/** An expression predicate. */ +typedef bool expr_pred(const struct bfs_expr *expr); + +/** Estimate the odds that a matching expression will be evaluated. */ +static float estimate_odds(const struct bfs_expr *expr, expr_pred *pred) { + if (pred(expr)) { + return 1.0; + } + + float nonmatch_odds = 1.0; + float reached_odds = 1.0; + for_expr (child, expr) { + float child_odds = estimate_odds(child, pred); + nonmatch_odds *= 1.0 - reached_odds * child_odds; + + if (expr->eval_fn == eval_and) { + reached_odds *= child->probability; + } else if (expr->eval_fn == eval_or) { + reached_odds *= 1.0 - child->probability; + } + } + + return 1.0 - nonmatch_odds; +} + +/** Whether an expression calls stat(). */ +static bool calls_stat(const struct bfs_expr *expr) { + return expr->calls_stat; +} + +/** Estimate the odds of calling stat(). */ +static float estimate_stat_odds(struct bfs_ctx *ctx) { + if (ctx->unique) { + return 1.0; + } + + float nostat_odds = 1.0 - estimate_odds(ctx->exclude, calls_stat); + + float reached_odds = 1.0 - ctx->exclude->probability; + float expr_odds = estimate_odds(ctx->expr, calls_stat); + nostat_odds *= 1.0 - reached_odds * expr_odds; + + return 1.0 - nostat_odds; +} + +/** Matches -(exec|ok) ... \; */ +static bool single_exec(const struct bfs_expr *expr) { + return expr->eval_fn == eval_exec && !(expr->exec->flags & BFS_EXEC_MULTI); +} + int bfs_optimize(struct bfs_ctx *ctx) { bfs_ctx_dump(ctx, DEBUG_OPT); - struct opt_facts facts_when_impure; - set_facts_impossible(&facts_when_impure); + struct df_domain impure; + df_init_bottom(&impure); - struct opt_state state = { + struct bfs_opt opt = { .ctx = ctx, - .facts_when_impure = &facts_when_impure, + .level = ctx->optlevel, + .depth = 0, + .warn = ctx->warn, + .ignore_result = false, + .impure = &impure, }; - facts_init(&state.facts); + df_init_top(&opt.before); - ctx->exclude = optimize_expr(&state, ctx->exclude); + ctx->exclude = optimize(&opt, ctx->exclude); if (!ctx->exclude) { return -1; } // Only non-excluded files are evaluated - state.facts = state.facts_when_false; + opt.before = opt.after_false; + opt.ignore_result = true; - struct range *depth = &state.facts.ranges[DEPTH_RANGE]; - constrain_min(depth, ctx->mindepth); - constrain_max(depth, ctx->maxdepth); + struct df_range *depth = &opt.before.ranges[DEPTH_RANGE]; + if (ctx->mindepth > 0) { + constrain_min(depth, ctx->mindepth); + } + if (ctx->maxdepth < INT_MAX) { + constrain_max(depth, ctx->maxdepth); + } - ctx->expr = optimize_expr(&state, ctx->expr); + ctx->expr = optimize(&opt, ctx->expr); if (!ctx->expr) { return -1; } - ctx->expr = ignore_result(&state, ctx->expr); - - if (facts_are_impossible(&facts_when_impure)) { + if (opt.level >= 2 && df_is_bottom(&impure)) { bfs_warning(ctx, "This command won't do anything.\n\n"); } - const struct range *depth_when_impure = &facts_when_impure.ranges[DEPTH_RANGE]; - long long mindepth = depth_when_impure->min; - long long maxdepth = depth_when_impure->max; + const struct df_range *impure_depth = &impure.ranges[DEPTH_RANGE]; + long long mindepth = impure_depth->min; + long long maxdepth = impure_depth->max; - int optlevel = ctx->optlevel; + opt_enter(&opt, "post-process:\n"); - if (optlevel >= 2 && mindepth > ctx->mindepth) { + if (opt.level >= 2 && mindepth > ctx->mindepth) { if (mindepth > INT_MAX) { mindepth = INT_MAX; } + opt_enter(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth); ctx->mindepth = mindepth; - opt_debug(&state, 2, "data flow: mindepth --> %d\n", ctx->mindepth); + opt_leave(&opt, "${blu}-mindepth${rs} ${bld}%d${rs}\n", ctx->mindepth); } - if (optlevel >= 4 && maxdepth < ctx->maxdepth) { + if (opt.level >= 4 && maxdepth < ctx->maxdepth) { if (maxdepth < INT_MIN) { maxdepth = INT_MIN; } + opt_enter(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth); ctx->maxdepth = maxdepth; - opt_debug(&state, 4, "data flow: maxdepth --> %d\n", ctx->maxdepth); + opt_leave(&opt, "${blu}-maxdepth${rs} ${bld}%d${rs}\n", ctx->maxdepth); + } + + if (opt.level >= 3) { + // bfs_eval() can do lazy stat() calls, but only on one thread. + float lazy_cost = estimate_stat_odds(ctx); + // bftw() can do eager stat() calls in parallel + float eager_cost = 1.0 / ctx->threads; + + if (eager_cost <= lazy_cost) { + opt_enter(&opt, "lazy stat cost: ${ylw}%g${rs}\n", lazy_cost); + ctx->flags |= BFTW_STAT; + opt_leave(&opt, "eager stat cost: ${ylw}%g${rs}\n", eager_cost); + } + +#ifndef POSIX_SPAWN_SETRLIMIT + // If bfs_spawn_setrlimit() would force us to use fork() over + // posix_spawn(), the extra cost may outweigh the benefit of a + // higher RLIMIT_NOFILE + float single_exec_odds = estimate_odds(ctx->expr, single_exec); + if (single_exec_odds >= 0.5) { + opt_enter(&opt, "single ${blu}-exec${rs} odds: ${ylw}%g${rs}\n", single_exec_odds); + ctx->raise_nofile = false; + opt_leave(&opt, "not raising RLIMIT_NOFILE\n"); + } +#endif } + opt_leave(&opt, NULL); + return 0; } @@ -13,7 +13,7 @@ struct bfs_ctx; /** * Apply optimizations to the command line. * - * @param ctx + * @ctx * The bfs context to optimize. * @return * 0 if successful, -1 on error. @@ -21,4 +21,3 @@ struct bfs_ctx; int bfs_optimize(struct bfs_ctx *ctx); #endif // BFS_OPT_H - diff --git a/src/parse.c b/src/parse.c index 3416d9e..9c39d6b 100644 --- a/src/parse.c +++ b/src/parse.c @@ -9,19 +9,20 @@ */ #include "parse.h" + #include "alloc.h" +#include "bfs.h" #include "bfstd.h" #include "bftw.h" #include "color.h" -#include "config.h" #include "ctx.h" -#include "darray.h" #include "diag.h" #include "dir.h" #include "eval.h" #include "exec.h" #include "expr.h" #include "fsade.h" +#include "list.h" #include "opt.h" #include "printf.h" #include "pwcache.h" @@ -31,6 +32,7 @@ #include "xregex.h" #include "xspawn.h" #include "xtime.h" + #include <errno.h> #include <fcntl.h> #include <fnmatch.h> @@ -41,109 +43,18 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/time.h> #include <sys/stat.h> -#include <sys/wait.h> +#include <sys/types.h> #include <time.h> #include <unistd.h> // Strings printed by -D tree for "fake" expressions -static char *fake_and_arg = "-a"; -static char *fake_false_arg = "-false"; +static char *fake_and_arg = "-and"; static char *fake_hidden_arg = "-hidden"; -static char *fake_or_arg = "-o"; +static char *fake_or_arg = "-or"; static char *fake_print_arg = "-print"; static char *fake_true_arg = "-true"; -struct bfs_expr *bfs_expr_new(bfs_eval_fn *eval_fn, size_t argc, char **argv) { - struct bfs_expr *expr = ZALLOC(struct bfs_expr); - if (!expr) { - perror("zalloc()"); - return NULL; - } - - expr->eval_fn = eval_fn; - expr->argc = argc; - expr->argv = argv; - expr->probability = 0.5; - return expr; -} - -bool bfs_expr_is_parent(const struct bfs_expr *expr) { - return expr->eval_fn == eval_and - || expr->eval_fn == eval_or - || expr->eval_fn == eval_not - || expr->eval_fn == eval_comma; -} - -bool bfs_expr_never_returns(const struct bfs_expr *expr) { - // Expressions that never return are vacuously both always true and always false - return expr->always_true && expr->always_false; -} - -void bfs_expr_free(struct bfs_expr *expr) { - if (!expr) { - return; - } - - if (bfs_expr_is_parent(expr)) { - bfs_expr_free(expr->rhs); - bfs_expr_free(expr->lhs); - } else if (expr->eval_fn == eval_exec) { - bfs_exec_free(expr->exec); - } else if (expr->eval_fn == eval_fprintf) { - bfs_printf_free(expr->printf); - } else if (expr->eval_fn == eval_regex) { - bfs_regfree(expr->regex); - } - - free(expr); -} - -/** - * Create a new unary expression. - */ -static struct bfs_expr *new_unary_expr(bfs_eval_fn *eval_fn, struct bfs_expr *rhs, char **argv) { - struct bfs_expr *expr = bfs_expr_new(eval_fn, 1, argv); - if (!expr) { - bfs_expr_free(rhs); - return NULL; - } - - expr->lhs = NULL; - expr->rhs = rhs; - bfs_assert(bfs_expr_is_parent(expr)); - - expr->persistent_fds = rhs->persistent_fds; - expr->ephemeral_fds = rhs->ephemeral_fds; - return expr; -} - -/** - * Create a new binary expression. - */ -static struct bfs_expr *new_binary_expr(bfs_eval_fn *eval_fn, struct bfs_expr *lhs, struct bfs_expr *rhs, char **argv) { - struct bfs_expr *expr = bfs_expr_new(eval_fn, 1, argv); - if (!expr) { - bfs_expr_free(rhs); - bfs_expr_free(lhs); - return NULL; - } - - expr->lhs = lhs; - expr->rhs = rhs; - bfs_assert(bfs_expr_is_parent(expr)); - - expr->persistent_fds = lhs->persistent_fds + rhs->persistent_fds; - if (lhs->ephemeral_fds > rhs->ephemeral_fds) { - expr->ephemeral_fds = lhs->ephemeral_fds; - } else { - expr->ephemeral_fds = rhs->ephemeral_fds; - } - - return expr; -} - /** * Color use flags. */ @@ -154,9 +65,9 @@ enum use_color { }; /** - * Ephemeral state for parsing the command line. + * Command line parser state. */ -struct parser_state { +struct bfs_parser { /** The command line being constructed. */ struct bfs_ctx *ctx; /** The command line arguments being parsed. */ @@ -169,14 +80,10 @@ struct parser_state { /** Whether stdout is a terminal. */ bool stdout_tty; - /** Whether this session is interactive (stdin and stderr are each a terminal). */ - bool interactive; /** Whether -color or -nocolor has been passed. */ enum use_color use_color; /** Whether a -print action is implied. */ bool implicit_print; - /** Whether the default root "." should be used. */ - bool implicit_root; /** Whether the expression has started. */ bool expr_started; /** Whether an information option like -help or -version was passed. */ @@ -186,46 +93,30 @@ struct parser_state { /** The last non-path argument. */ char **last_arg; - /** A "-depth"-type argument, if any. */ - char **depth_arg; - /** A "-prune" argument, if any. */ - char **prune_arg; - /** A "-mount" argument, if any. */ - char **mount_arg; - /** An "-xdev" argument, if any. */ - char **xdev_arg; - /** A "-files0-from -" argument, if any. */ - char **files0_stdin_arg; - /** An "-ok"-type expression, if any. */ - const struct bfs_expr *ok_expr; + /** A "-depth"-type expression, if any. */ + const struct bfs_expr *depth_expr; + /** A "-limit" expression, if any. */ + const struct bfs_expr *limit_expr; + /** A "-prune" expression, if any. */ + const struct bfs_expr *prune_expr; + /** A "-mount" expression, if any. */ + const struct bfs_expr *mount_expr; + /** An "-xdev" expression, if any. */ + const struct bfs_expr *xdev_expr; + /** A "-files0-from" expression, if any. */ + const struct bfs_expr *files0_expr; + /** An expression that consumes stdin, if any. */ + const struct bfs_expr *stdin_expr; /** The current time (maybe modified by -daystart). */ struct timespec now; }; /** - * Possible token types. - */ -enum token_type { - /** A flag. */ - T_FLAG, - /** A root path. */ - T_PATH, - /** An option. */ - T_OPTION, - /** A test. */ - T_TEST, - /** An action. */ - T_ACTION, - /** An operator. */ - T_OPERATOR, -}; - -/** * Print a low-level error message during parsing. */ -static void parse_perror(const struct parser_state *state, const char *str) { - bfs_perror(state->ctx, str); +static void parse_perror(const struct bfs_parser *parser, const char *str) { + bfs_perror(parser->ctx, str); } /** Initialize an empty highlighted range. */ @@ -247,30 +138,27 @@ static void highlight_args(const struct bfs_ctx *ctx, char **argv, size_t argc, /** * Print an error message during parsing. */ -BFS_FORMATTER(2, 3) -static void parse_error(const struct parser_state *state, const char *format, ...) { - int error = errno; - const struct bfs_ctx *ctx = state->ctx; +_printf(2, 3) +static void parse_error(const struct bfs_parser *parser, const char *format, ...) { + const struct bfs_ctx *ctx = parser->ctx; bool highlight[ctx->argc]; init_highlight(ctx, highlight); - highlight_args(ctx, state->argv, 1, highlight); + highlight_args(ctx, parser->argv, 1, highlight); bfs_argv_error(ctx, highlight); va_list args; va_start(args, format); - errno = error; - bfs_verror(state->ctx, format, args); + bfs_verror(parser->ctx, format, args); va_end(args); } /** * Print an error about some command line arguments. */ -BFS_FORMATTER(4, 5) -static void parse_argv_error(const struct parser_state *state, char **argv, size_t argc, const char *format, ...) { - int error = errno; - const struct bfs_ctx *ctx = state->ctx; +_printf(4, 5) +static void parse_argv_error(const struct bfs_parser *parser, char **argv, size_t argc, const char *format, ...) { + const struct bfs_ctx *ctx = parser->ctx; bool highlight[ctx->argc]; init_highlight(ctx, highlight); @@ -279,7 +167,6 @@ static void parse_argv_error(const struct parser_state *state, char **argv, size va_list args; va_start(args, format); - errno = error; bfs_verror(ctx, format, args); va_end(args); } @@ -287,20 +174,18 @@ static void parse_argv_error(const struct parser_state *state, char **argv, size /** * Print an error about conflicting command line arguments. */ -BFS_FORMATTER(6, 7) -static void parse_conflict_error(const struct parser_state *state, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) { - int error = errno; - const struct bfs_ctx *ctx = state->ctx; +_printf(4, 5) +static void parse_conflict_error(const struct bfs_parser *parser, const struct bfs_expr *expr1, const struct bfs_expr *expr2, const char *format, ...) { + const struct bfs_ctx *ctx = parser->ctx; bool highlight[ctx->argc]; init_highlight(ctx, highlight); - highlight_args(ctx, argv1, argc1, highlight); - highlight_args(ctx, argv2, argc2, highlight); + highlight_args(ctx, expr1->argv, expr1->argc, highlight); + highlight_args(ctx, expr2->argv, expr2->argc, highlight); bfs_argv_error(ctx, highlight); va_list args; va_start(args, format); - errno = error; bfs_verror(ctx, format, args); va_end(args); } @@ -308,16 +193,14 @@ static void parse_conflict_error(const struct parser_state *state, char **argv1, /** * Print an error about an expression. */ -BFS_FORMATTER(3, 4) -static void parse_expr_error(const struct parser_state *state, const struct bfs_expr *expr, const char *format, ...) { - int error = errno; - const struct bfs_ctx *ctx = state->ctx; +_printf(3, 4) +static void parse_expr_error(const struct bfs_parser *parser, const struct bfs_expr *expr, const char *format, ...) { + const struct bfs_ctx *ctx = parser->ctx; bfs_expr_error(ctx, expr); va_list args; va_start(args, format); - errno = error; bfs_verror(ctx, format, args); va_end(args); } @@ -325,22 +208,20 @@ static void parse_expr_error(const struct parser_state *state, const struct bfs_ /** * Print a warning message during parsing. */ -BFS_FORMATTER(2, 3) -static bool parse_warning(const struct parser_state *state, const char *format, ...) { - int error = errno; - const struct bfs_ctx *ctx = state->ctx; +_printf(2, 3) +static bool parse_warning(const struct bfs_parser *parser, const char *format, ...) { + const struct bfs_ctx *ctx = parser->ctx; bool highlight[ctx->argc]; init_highlight(ctx, highlight); - highlight_args(ctx, state->argv, 1, highlight); + highlight_args(ctx, parser->argv, 1, highlight); if (!bfs_argv_warning(ctx, highlight)) { return false; } va_list args; va_start(args, format); - errno = error; - bool ret = bfs_vwarning(state->ctx, format, args); + bool ret = bfs_vwarning(parser->ctx, format, args); va_end(args); return ret; } @@ -348,22 +229,20 @@ static bool parse_warning(const struct parser_state *state, const char *format, /** * Print a warning about conflicting command line arguments. */ -BFS_FORMATTER(6, 7) -static bool parse_conflict_warning(const struct parser_state *state, char **argv1, size_t argc1, char **argv2, size_t argc2, const char *format, ...) { - int error = errno; - const struct bfs_ctx *ctx = state->ctx; +_printf(4, 5) +static bool parse_conflict_warning(const struct bfs_parser *parser, const struct bfs_expr *expr1, const struct bfs_expr *expr2, const char *format, ...) { + const struct bfs_ctx *ctx = parser->ctx; bool highlight[ctx->argc]; init_highlight(ctx, highlight); - highlight_args(ctx, argv1, argc1, highlight); - highlight_args(ctx, argv2, argc2, highlight); + highlight_args(ctx, expr1->argv, expr1->argc, highlight); + highlight_args(ctx, expr2->argv, expr2->argc, highlight); if (!bfs_argv_warning(ctx, highlight)) { return false; } va_list args; va_start(args, format); - errno = error; bool ret = bfs_vwarning(ctx, format, args); va_end(args); return ret; @@ -372,10 +251,9 @@ static bool parse_conflict_warning(const struct parser_state *state, char **argv /** * Print a warning about an expression. */ -BFS_FORMATTER(3, 4) -static bool parse_expr_warning(const struct parser_state *state, const struct bfs_expr *expr, const char *format, ...) { - int error = errno; - const struct bfs_ctx *ctx = state->ctx; +_printf(3, 4) +static bool parse_expr_warning(const struct bfs_parser *parser, const struct bfs_expr *expr, const char *format, ...) { + const struct bfs_ctx *ctx = parser->ctx; if (!bfs_expr_warning(ctx, expr)) { return false; @@ -383,25 +261,79 @@ static bool parse_expr_warning(const struct parser_state *state, const struct bf va_list args; va_start(args, format); - errno = error; bool ret = bfs_vwarning(ctx, format, args); va_end(args); return ret; } /** + * Report an error if stdin is already consumed, then consume it. + */ +static bool consume_stdin(struct bfs_parser *parser, const struct bfs_expr *expr) { + if (parser->stdin_expr) { + parse_conflict_error(parser, parser->stdin_expr, expr, + "%pX and %pX can't both use standard input.\n", + parser->stdin_expr, expr); + return false; + } + + parser->stdin_expr = expr; + return true; +} + +/** + * Allocate a new expression. + */ +static struct bfs_expr *parse_new_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc, char **argv, enum bfs_kind kind) { + struct bfs_expr *expr = bfs_expr_new(parser->ctx, eval_fn, argc, argv, kind); + if (!expr) { + parse_perror(parser, "bfs_expr_new()"); + } + return expr; +} + +/** + * Create a new unary expression. + */ +static struct bfs_expr *new_unary_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, struct bfs_expr *rhs, char **argv) { + struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv, BFS_OPERATOR); + if (!expr) { + return NULL; + } + + bfs_assert(bfs_expr_is_parent(expr)); + bfs_expr_append(expr, rhs); + return expr; +} + +/** + * Create a new binary expression. + */ +static struct bfs_expr *new_binary_expr(const struct bfs_parser *parser, bfs_eval_fn *eval_fn, struct bfs_expr *lhs, struct bfs_expr *rhs, char **argv) { + struct bfs_expr *expr = parse_new_expr(parser, eval_fn, 1, argv, BFS_OPERATOR); + if (!expr) { + return NULL; + } + + bfs_assert(bfs_expr_is_parent(expr)); + bfs_expr_append(expr, lhs); + bfs_expr_append(expr, rhs); + return expr; +} + +/** * Fill in a "-print"-type expression. */ -static void init_print_expr(struct parser_state *state, struct bfs_expr *expr) { - expr->cfile = state->ctx->cout; +static void init_print_expr(struct bfs_parser *parser, struct bfs_expr *expr) { + expr->cfile = parser->ctx->cout; expr->path = NULL; } /** * Open a file for an expression. */ -static int expr_open(struct parser_state *state, struct bfs_expr *expr, const char *path) { - struct bfs_ctx *ctx = state->ctx; +static int expr_open(struct bfs_parser *parser, struct bfs_expr *expr, const char *path) { + struct bfs_ctx *ctx = parser->ctx; FILE *file = NULL; CFILE *cfile = NULL; @@ -411,7 +343,7 @@ static int expr_open(struct parser_state *state, struct bfs_expr *expr, const ch goto fail; } - cfile = cfwrap(file, state->use_color ? ctx->colors : NULL, true); + cfile = cfwrap(file, parser->use_color ? ctx->colors : NULL, true); if (!cfile) { goto fail; } @@ -430,7 +362,7 @@ static int expr_open(struct parser_state *state, struct bfs_expr *expr, const ch return 0; fail: - parse_expr_error(state, expr, "%m.\n"); + parse_expr_error(parser, expr, "%s.\n", errstr()); if (cfile) { cfclose(cfile); } else if (file) { @@ -442,15 +374,15 @@ fail: /** * Invoke bfs_stat() on an argument. */ -static int stat_arg(const struct parser_state *state, char **arg, struct bfs_stat *sb) { - const struct bfs_ctx *ctx = state->ctx; +static int stat_arg(const struct bfs_parser *parser, char **arg, struct bfs_stat *sb) { + const struct bfs_ctx *ctx = parser->ctx; bool follow = ctx->flags & (BFTW_FOLLOW_ROOTS | BFTW_FOLLOW_ALL); enum bfs_stat_flags flags = follow ? BFS_STAT_TRYFOLLOW : BFS_STAT_NOFOLLOW; int ret = bfs_stat(AT_FDCWD, *arg, flags, sb); if (ret != 0) { - parse_argv_error(state, arg, 1, "%m.\n"); + parse_argv_error(parser, arg, 1, "%s.\n", errstr()); } return ret; } @@ -458,52 +390,57 @@ static int stat_arg(const struct parser_state *state, char **arg, struct bfs_sta /** * Parse the expression specified on the command line. */ -static struct bfs_expr *parse_expr(struct parser_state *state); +static struct bfs_expr *parse_expr(struct bfs_parser *parser); /** * Advance by a single token. */ -static char **parser_advance(struct parser_state *state, enum token_type type, size_t argc) { - if (type != T_FLAG && type != T_PATH) { - state->expr_started = true; +static char **parser_advance(struct bfs_parser *parser, enum bfs_kind kind, size_t argc) { + struct bfs_ctx *ctx = parser->ctx; + + if (kind != BFS_FLAG && kind != BFS_PATH) { + parser->expr_started = true; } - if (type != T_PATH) { - state->last_arg = state->argv; + if (kind != BFS_PATH) { + parser->last_arg = parser->argv; } - char **argv = state->argv; - state->argv += argc; + size_t i = parser->argv - ctx->argv; + ctx->kinds[i] = kind; + + char **argv = parser->argv; + parser->argv += argc; return argv; } /** * Parse a root path. */ -static int parse_root(struct parser_state *state, const char *path) { - char *copy = strdup(path); - if (!copy) { - parse_perror(state, "strdup()"); +static int parse_root(struct bfs_parser *parser, const char *path) { + struct bfs_ctx *ctx = parser->ctx; + const char **root = RESERVE(const char *, &ctx->paths, &ctx->npaths); + if (!root) { + parse_perror(parser, "RESERVE()"); return -1; } - struct bfs_ctx *ctx = state->ctx; - if (DARRAY_PUSH(&ctx->paths, ©) != 0) { - parse_perror(state, "DARRAY_PUSH()"); - free(copy); + *root = strdup(path); + if (!*root) { + --ctx->npaths; + parse_perror(parser, "strdup()"); return -1; } - state->implicit_root = false; return 0; } /** * While parsing an expression, skip any paths and add them to ctx->paths. */ -static int skip_paths(struct parser_state *state) { +static int skip_paths(struct bfs_parser *parser) { while (true) { - const char *arg = state->argv[0]; + const char *arg = parser->argv[0]; if (!arg) { return 0; } @@ -513,7 +450,7 @@ static int skip_paths(struct parser_state *state) { // find uses -- to separate flags from the rest // of the command line. We allow mixing flags // and paths/predicates, so we just ignore --. - parser_advance(state, T_FLAG, 1); + parser_advance(parser, BFS_FLAG, 1); continue; } if (strcmp(arg, "-") != 0) { @@ -528,7 +465,7 @@ static int skip_paths(struct parser_state *state) { return 0; } - if (state->expr_started) { + if (parser->expr_started) { // By POSIX, these can be paths. We only treat them as // such at the beginning of the command line. if (strcmp(arg, ")") == 0 || strcmp(arg, ",") == 0) { @@ -536,16 +473,16 @@ static int skip_paths(struct parser_state *state) { } } - if (state->excluding) { - parse_warning(state, "This path will not be excluded. Use a test like ${blu}-name${rs} or ${blu}-path${rs}\n"); - bfs_warning(state->ctx, "within ${red}-exclude${rs} to exclude matching files.\n\n"); + if (parser->excluding) { + parse_warning(parser, "This path will not be excluded. Use a test like ${blu}-name${rs} or ${blu}-path${rs}\n"); + bfs_warning(parser->ctx, "within ${red}-exclude${rs} to exclude matching files.\n\n"); } - if (parse_root(state, arg) != 0) { + if (parse_root(parser, arg) != 0) { return -1; } - parser_advance(state, T_PATH, 1); + parser_advance(parser, BFS_PATH, 1); } } @@ -564,17 +501,15 @@ enum int_flags { /** * Parse an integer. */ -static const char *parse_int(const struct parser_state *state, char **arg, const char *str, void *result, enum int_flags flags) { - char *endptr; - +static const char *parse_int(const struct bfs_parser *parser, char **arg, const char *str, void *result, enum int_flags flags) { int base = flags & IF_BASE_MASK; if (base == 0) { base = 10; } - errno = 0; - long long value = strtoll(str, &endptr, base); - if (errno != 0) { + char *endptr; + long long value; + if (xstrtoll(str, &endptr, base, &value) != 0) { if (errno == ERANGE) { goto range; } else { @@ -582,13 +517,6 @@ static const char *parse_int(const struct parser_state *state, char **arg, const } } - // https://github.com/llvm/llvm-project/issues/64946 - sanitize_init(&endptr); - - if (endptr == str) { - goto bad; - } - if (!(flags & IF_PARTIAL_OK) && *endptr != '\0') { goto bad; } @@ -625,19 +553,19 @@ static const char *parse_int(const struct parser_state *state, char **arg, const bad: if (!(flags & IF_QUIET)) { - parse_argv_error(state, arg, 1, "${bld}%pq${rs} is not a valid integer.\n", str); + parse_argv_error(parser, arg, 1, "${bld}%pq${rs} is not a valid integer.\n", str); } return NULL; negative: if (!(flags & IF_QUIET)) { - parse_argv_error(state, arg, 1, "Negative integer ${bld}%pq${rs} is not allowed here.\n", str); + parse_argv_error(parser, arg, 1, "Negative integer ${bld}%pq${rs} is not allowed here.\n", str); } return NULL; range: if (!(flags & IF_QUIET)) { - parse_argv_error(state, arg, 1, "${bld}%pq${rs} is too large an integer.\n", str); + parse_argv_error(parser, arg, 1, "${bld}%pq${rs} is too large an integer.\n", str); } return NULL; } @@ -645,7 +573,7 @@ range: /** * Parse an integer and a comparison flag. */ -static const char *parse_icmp(const struct parser_state *state, struct bfs_expr *expr, enum int_flags flags) { +static const char *parse_icmp(const struct bfs_parser *parser, struct bfs_expr *expr, enum int_flags flags) { char **arg = &expr->argv[1]; const char *str = *arg; switch (str[0]) { @@ -662,7 +590,7 @@ static const char *parse_icmp(const struct parser_state *state, struct bfs_expr break; } - return parse_int(state, arg, str, &expr->num, flags | IF_LONG_LONG | IF_UNSIGNED); + return parse_int(parser, arg, str, &expr->num, flags | IF_LONG_LONG | IF_UNSIGNED); } /** @@ -684,153 +612,164 @@ static bool looks_like_icmp(const char *str) { /** * Parse a single flag. */ -static struct bfs_expr *parse_flag(struct parser_state *state, size_t argc) { - char **argv = parser_advance(state, T_FLAG, argc); - return bfs_expr_new(eval_true, argc, argv); +static struct bfs_expr *parse_flag(struct bfs_parser *parser, size_t argc) { + char **argv = parser_advance(parser, BFS_FLAG, argc); + return parse_new_expr(parser, eval_true, argc, argv, BFS_FLAG); } /** * Parse a flag that doesn't take a value. */ -static struct bfs_expr *parse_nullary_flag(struct parser_state *state) { - return parse_flag(state, 1); +static struct bfs_expr *parse_nullary_flag(struct bfs_parser *parser) { + return parse_flag(parser, 1); } /** * Parse a flag that takes a value. */ -static struct bfs_expr *parse_unary_flag(struct parser_state *state) { - const char *arg = state->argv[0]; - const char *value = state->argv[1]; +static struct bfs_expr *parse_unary_flag(struct bfs_parser *parser) { + const char *arg = parser->argv[0]; + char flag = arg[strlen(arg) - 1]; + + const char *value = parser->argv[1]; if (!value) { - parse_error(state, "${cyn}%s${rs} needs a value.\n", arg); + parse_error(parser, "${cyn}-%c${rs} needs a value.\n", flag); + return NULL; + } + + return parse_flag(parser, 2); +} + +/** + * Parse a prefix flag like -O3, -j8, etc. + */ +static struct bfs_expr *parse_prefix_flag(struct bfs_parser *parser, char flag, bool allow_separate, const char **value) { + const char *arg = parser->argv[0]; + + const char *suffix = strchr(arg, flag) + 1; + if (*suffix) { + *value = suffix; + return parse_nullary_flag(parser); + } + + suffix = parser->argv[1]; + if (allow_separate && suffix) { + *value = suffix; + } else { + parse_error(parser, "${cyn}-%c${rs} needs a value.\n", flag); return NULL; } - return parse_flag(state, 2); + return parse_unary_flag(parser); } /** * Parse a single option. */ -static struct bfs_expr *parse_option(struct parser_state *state, size_t argc) { - char **argv = parser_advance(state, T_OPTION, argc); - return bfs_expr_new(eval_true, argc, argv); +static struct bfs_expr *parse_option(struct bfs_parser *parser, size_t argc) { + char **argv = parser_advance(parser, BFS_OPTION, argc); + return parse_new_expr(parser, eval_true, argc, argv, BFS_OPTION); } /** * Parse an option that doesn't take a value. */ -static struct bfs_expr *parse_nullary_option(struct parser_state *state) { - return parse_option(state, 1); +static struct bfs_expr *parse_nullary_option(struct bfs_parser *parser) { + return parse_option(parser, 1); } /** * Parse an option that takes a value. */ -static struct bfs_expr *parse_unary_option(struct parser_state *state) { - const char *arg = state->argv[0]; - const char *value = state->argv[1]; +static struct bfs_expr *parse_unary_option(struct bfs_parser *parser) { + const char *arg = parser->argv[0]; + const char *value = parser->argv[1]; if (!value) { - parse_error(state, "${blu}%s${rs} needs a value.\n", arg); + parse_error(parser, "${blu}%s${rs} needs a value.\n", arg); return NULL; } - return parse_option(state, 2); + return parse_option(parser, 2); } /** * Parse a single test. */ -static struct bfs_expr *parse_test(struct parser_state *state, bfs_eval_fn *eval_fn, size_t argc) { - char **argv = parser_advance(state, T_TEST, argc); - return bfs_expr_new(eval_fn, argc, argv); +static struct bfs_expr *parse_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc) { + char **argv = parser_advance(parser, BFS_TEST, argc); + return parse_new_expr(parser, eval_fn, argc, argv, BFS_TEST); } /** * Parse a test that doesn't take a value. */ -static struct bfs_expr *parse_nullary_test(struct parser_state *state, bfs_eval_fn *eval_fn) { - return parse_test(state, eval_fn, 1); +static struct bfs_expr *parse_nullary_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + return parse_test(parser, eval_fn, 1); } /** * Parse a test that takes a value. */ -static struct bfs_expr *parse_unary_test(struct parser_state *state, bfs_eval_fn *eval_fn) { - const char *arg = state->argv[0]; - const char *value = state->argv[1]; +static struct bfs_expr *parse_unary_test(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + const char *arg = parser->argv[0]; + const char *value = parser->argv[1]; if (!value) { - parse_error(state, "${blu}%s${rs} needs a value.\n", arg); + parse_error(parser, "${blu}%s${rs} needs a value.\n", arg); return NULL; } - return parse_test(state, eval_fn, 2); + return parse_test(parser, eval_fn, 2); } /** * Parse a single action. */ -static struct bfs_expr *parse_action(struct parser_state *state, bfs_eval_fn *eval_fn, size_t argc) { - char **argv = parser_advance(state, T_ACTION, argc); +static struct bfs_expr *parse_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn, size_t argc) { + char **argv = parser_advance(parser, BFS_ACTION, argc); - if (state->excluding) { - parse_argv_error(state, argv, argc, "This action is not supported within ${red}-exclude${rs}.\n"); + if (parser->excluding) { + parse_argv_error(parser, argv, argc, "This action is not supported within ${red}-exclude${rs}.\n"); return NULL; } - if (eval_fn != eval_prune && eval_fn != eval_quit) { - state->implicit_print = false; + if (eval_fn != eval_limit && eval_fn != eval_prune && eval_fn != eval_quit) { + parser->implicit_print = false; } - return bfs_expr_new(eval_fn, argc, argv); + return parse_new_expr(parser, eval_fn, argc, argv, BFS_ACTION); } /** * Parse an action that takes no arguments. */ -static struct bfs_expr *parse_nullary_action(struct parser_state *state, bfs_eval_fn *eval_fn) { - return parse_action(state, eval_fn, 1); +static struct bfs_expr *parse_nullary_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + return parse_action(parser, eval_fn, 1); } /** * Parse an action that takes one argument. */ -static struct bfs_expr *parse_unary_action(struct parser_state *state, bfs_eval_fn *eval_fn) { - const char *arg = state->argv[0]; - const char *value = state->argv[1]; +static struct bfs_expr *parse_unary_action(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + const char *arg = parser->argv[0]; + const char *value = parser->argv[1]; if (!value) { - parse_error(state, "${blu}%s${rs} needs a value.\n", arg); + parse_error(parser, "${blu}%s${rs} needs a value.\n", arg); return NULL; } - return parse_action(state, eval_fn, 2); -} - -/** - * Add an expression to the exclusions. - */ -static int parse_exclude(struct parser_state *state, struct bfs_expr *expr) { - struct bfs_ctx *ctx = state->ctx; - ctx->exclude = new_binary_expr(eval_or, ctx->exclude, expr, &fake_or_arg); - if (ctx->exclude) { - return 0; - } else { - return -1; - } + return parse_action(parser, eval_fn, 2); } /** * Parse a test expression with integer data and a comparison flag. */ -static struct bfs_expr *parse_test_icmp(struct parser_state *state, bfs_eval_fn *eval_fn) { - struct bfs_expr *expr = parse_unary_test(state, eval_fn); +static struct bfs_expr *parse_test_icmp(struct bfs_parser *parser, bfs_eval_fn *eval_fn) { + struct bfs_expr *expr = parse_unary_test(parser, eval_fn); if (!expr) { return NULL; } - if (!parse_icmp(state, expr, 0)) { - bfs_expr_free(expr); + if (!parse_icmp(parser, expr, 0)) { return NULL; } @@ -866,10 +805,11 @@ static bool parse_debug_flag(const char *flag, size_t len, const char *expected) /** * Parse -D FLAG. */ -static struct bfs_expr *parse_debug(struct parser_state *state, int arg1, int arg2) { - struct bfs_ctx *ctx = state->ctx; +static struct bfs_expr *parse_debug(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_ctx *ctx = parser->ctx; - struct bfs_expr *expr = parse_unary_flag(state); + const char *flags; + struct bfs_expr *expr = parse_prefix_flag(parser, 'D', true, &flags); if (!expr) { cfprintf(ctx->cerr, "\n"); debug_help(ctx->cerr); @@ -878,7 +818,7 @@ static struct bfs_expr *parse_debug(struct parser_state *state, int arg1, int ar bool unrecognized = false; - for (const char *flag = expr->argv[1], *next; flag; flag = next) { + for (const char *flag = flags, *next; flag; flag = next) { size_t len = strcspn(flag, ","); if (flag[len]) { next = flag + len + 1; @@ -888,8 +828,7 @@ static struct bfs_expr *parse_debug(struct parser_state *state, int arg1, int ar if (parse_debug_flag(flag, len, "help")) { debug_help(ctx->cout); - state->just_info = true; - bfs_expr_free(expr); + parser->just_info = true; return NULL; } else if (parse_debug_flag(flag, len, "all")) { ctx->debug = DEBUG_ALL; @@ -907,7 +846,7 @@ static struct bfs_expr *parse_debug(struct parser_state *state, int arg1, int ar if (DEBUG_ALL & i) { ctx->debug |= i; } else { - if (parse_expr_warning(state, expr, "Unrecognized debug flag ${bld}")) { + if (parse_expr_warning(parser, expr, "Unrecognized debug flag ${bld}")) { fwrite(flag, 1, len, stderr); cfprintf(ctx->cerr, "${rs}.\n\n"); unrecognized = true; @@ -926,23 +865,23 @@ static struct bfs_expr *parse_debug(struct parser_state *state, int arg1, int ar /** * Parse -On. */ -static struct bfs_expr *parse_optlevel(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_flag(state); +static struct bfs_expr *parse_optlevel(struct bfs_parser *parser, int arg1, int arg2) { + const char *arg; + struct bfs_expr *expr = parse_prefix_flag(parser, 'O', false, &arg); if (!expr) { return NULL; } - int *optlevel = &state->ctx->optlevel; + int *optlevel = &parser->ctx->optlevel; - if (strcmp(expr->argv[0], "-Ofast") == 0) { + if (strcmp(arg, "fast") == 0) { *optlevel = 4; - } else if (!parse_int(state, expr->argv, expr->argv[0] + 2, optlevel, IF_INT | IF_UNSIGNED)) { - bfs_expr_free(expr); + } else if (!parse_int(parser, expr->argv, arg, optlevel, IF_INT | IF_UNSIGNED)) { return NULL; } if (*optlevel > 4) { - parse_expr_warning(state, expr, "${cyn}-O${bld}%s${rs} is the same as ${cyn}-O${bld}4${rs}.\n\n", expr->argv[0] + 2); + parse_expr_warning(parser, expr, "${cyn}-O${bld}%s${rs} is the same as ${cyn}-O${bld}4${rs}.\n\n", arg); } return expr; @@ -951,30 +890,30 @@ static struct bfs_expr *parse_optlevel(struct parser_state *state, int arg1, int /** * Parse -[PHL], -follow. */ -static struct bfs_expr *parse_follow(struct parser_state *state, int flags, int option) { - struct bfs_ctx *ctx = state->ctx; +static struct bfs_expr *parse_follow(struct bfs_parser *parser, int flags, int option) { + struct bfs_ctx *ctx = parser->ctx; ctx->flags &= ~(BFTW_FOLLOW_ROOTS | BFTW_FOLLOW_ALL); ctx->flags |= flags; if (option) { - return parse_nullary_option(state); + return parse_nullary_option(parser); } else { - return parse_nullary_flag(state); + return parse_nullary_flag(parser); } } /** * Parse -X. */ -static struct bfs_expr *parse_xargs_safe(struct parser_state *state, int arg1, int arg2) { - state->ctx->xargs_safe = true; - return parse_nullary_flag(state); +static struct bfs_expr *parse_xargs_safe(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->xargs_safe = true; + return parse_nullary_flag(parser); } /** * Parse -executable, -readable, -writable */ -static struct bfs_expr *parse_access(struct parser_state *state, int flag, int arg2) { - struct bfs_expr *expr = parse_nullary_test(state, eval_access); +static struct bfs_expr *parse_access(struct bfs_parser *parser, int flag, int arg2) { + struct bfs_expr *expr = parse_nullary_test(parser, eval_access); if (expr) { expr->num = flag; } @@ -984,11 +923,11 @@ static struct bfs_expr *parse_access(struct parser_state *state, int flag, int a /** * Parse -acl. */ -static struct bfs_expr *parse_acl(struct parser_state *state, int flag, int arg2) { +static struct bfs_expr *parse_acl(struct bfs_parser *parser, int flag, int arg2) { #if BFS_CAN_CHECK_ACL - return parse_nullary_test(state, eval_acl); + return parse_nullary_test(parser, eval_acl); #else - parse_error(state, "Missing platform support.\n"); + parse_error(parser, "Missing platform support.\n"); return NULL; #endif } @@ -996,36 +935,32 @@ static struct bfs_expr *parse_acl(struct parser_state *state, int flag, int arg2 /** * Parse -[aBcm]?newer. */ -static struct bfs_expr *parse_newer(struct parser_state *state, int field, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_newer); +static struct bfs_expr *parse_newer(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_newer); if (!expr) { return NULL; } struct bfs_stat sb; - if (stat_arg(state, &expr->argv[1], &sb) != 0) { - goto fail; + if (stat_arg(parser, &expr->argv[1], &sb) != 0) { + return NULL; } expr->reftime = sb.mtime; expr->stat_field = field; return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -[aBcm]min. */ -static struct bfs_expr *parse_min(struct parser_state *state, int field, int arg2) { - struct bfs_expr *expr = parse_test_icmp(state, eval_time); +static struct bfs_expr *parse_min(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_test_icmp(parser, eval_time); if (!expr) { return NULL; } - expr->reftime = state->now; + expr->reftime = parser->now; expr->stat_field = field; expr->time_unit = BFS_MINUTES; return expr; @@ -1034,18 +969,18 @@ static struct bfs_expr *parse_min(struct parser_state *state, int field, int arg /** * Parse -[aBcm]time. */ -static struct bfs_expr *parse_time(struct parser_state *state, int field, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_time); +static struct bfs_expr *parse_time(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_time); if (!expr) { return NULL; } - expr->reftime = state->now; + expr->reftime = parser->now; expr->stat_field = field; - const char *tail = parse_icmp(state, expr, IF_PARTIAL_OK); + const char *tail = parse_icmp(parser, expr, IF_PARTIAL_OK); if (!tail) { - goto fail; + return NULL; } if (!*tail) { @@ -1060,21 +995,21 @@ static struct bfs_expr *parse_time(struct parser_state *state, int field, int ar switch (*tail) { case 'w': time *= 7; - fallthru; + _fallthrough; case 'd': time *= 24; - fallthru; + _fallthrough; case 'h': time *= 60; - fallthru; + _fallthrough; case 'm': time *= 60; - fallthru; + _fallthrough; case 's': break; default: - parse_expr_error(state, expr, "Unknown time unit ${bld}%c${rs}.\n", *tail); - goto fail; + parse_expr_error(parser, expr, "Unknown time unit ${bld}%c${rs}.\n", *tail); + return NULL; } expr->num += time; @@ -1083,32 +1018,28 @@ static struct bfs_expr *parse_time(struct parser_state *state, int field, int ar break; } - tail = parse_int(state, &expr->argv[1], tail, &time, IF_PARTIAL_OK | IF_LONG_LONG | IF_UNSIGNED); + tail = parse_int(parser, &expr->argv[1], tail, &time, IF_PARTIAL_OK | IF_LONG_LONG | IF_UNSIGNED); if (!tail) { - goto fail; + return NULL; } if (!*tail) { - parse_expr_error(state, expr, "Missing time unit.\n"); - goto fail; + parse_expr_error(parser, expr, "Missing time unit.\n"); + return NULL; } } expr->time_unit = BFS_SECONDS; return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -capable. */ -static struct bfs_expr *parse_capable(struct parser_state *state, int flag, int arg2) { +static struct bfs_expr *parse_capable(struct bfs_parser *parser, int flag, int arg2) { #if BFS_CAN_CHECK_CAPABILITIES - return parse_nullary_test(state, eval_capable); + return parse_nullary_test(parser, eval_capable); #else - parse_error(state, "Missing platform support.\n"); + parse_error(parser, "Missing platform support.\n"); return NULL; #endif } @@ -1116,27 +1047,26 @@ static struct bfs_expr *parse_capable(struct parser_state *state, int flag, int /** * Parse -(no)?color. */ -static struct bfs_expr *parse_color(struct parser_state *state, int color, int arg2) { - struct bfs_expr *expr = parse_nullary_option(state); +static struct bfs_expr *parse_color(struct bfs_parser *parser, int color, int arg2) { + struct bfs_expr *expr = parse_nullary_option(parser); if (!expr) { return NULL; } - struct bfs_ctx *ctx = state->ctx; + struct bfs_ctx *ctx = parser->ctx; struct colors *colors = ctx->colors; if (color) { if (!colors) { - parse_expr_error(state, expr, "Error parsing $$LS_COLORS: %s.\n", strerror(ctx->colors_error)); - bfs_expr_free(expr); + parse_expr_error(parser, expr, "Error parsing $$LS_COLORS: %s.\n", xstrerror(ctx->colors_error)); return NULL; } - state->use_color = COLOR_ALWAYS; + parser->use_color = COLOR_ALWAYS; ctx->cout->colors = colors; ctx->cerr->colors = colors; } else { - state->use_color = COLOR_NEVER; + parser->use_color = COLOR_NEVER; ctx->cout->colors = NULL; ctx->cerr->colors = NULL; } @@ -1145,23 +1075,84 @@ static struct bfs_expr *parse_color(struct parser_state *state, int color, int a } /** + * Common code for fnmatch() tests. + */ +static struct bfs_expr *parse_fnmatch(const struct bfs_parser *parser, struct bfs_expr *expr, bool casefold) { + if (!expr) { + return NULL; + } + + expr->pattern = expr->argv[1]; + + if (casefold) { +#ifdef FNM_CASEFOLD + expr->fnm_flags = FNM_CASEFOLD; +#else + parse_expr_error(parser, expr, "Missing platform support.\n"); + return NULL; +#endif + } else { + expr->fnm_flags = 0; + } + + // POSIX says, about fnmatch(): + // + // If pattern ends with an unescaped <backslash>, fnmatch() shall + // return a non-zero value (indicating either no match or an error). + // + // But not all implementations obey this, so check for it ourselves. + size_t i, len = strlen(expr->pattern); + for (i = 0; i < len; ++i) { + if (expr->pattern[len - i - 1] != '\\') { + break; + } + } + if (i % 2 != 0) { + parse_expr_warning(parser, expr, "Unescaped trailing backslash.\n\n"); + expr->eval_fn = eval_false; + return expr; + } + + // strcmp() can be much faster than fnmatch() since it doesn't have to + // parse the pattern, so special-case patterns with no wildcards. + // + // https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_14_01 + expr->literal = strcspn(expr->pattern, "?*\\[") == len; + + return expr; +} + +/** + * Parse -context. + */ +static struct bfs_expr *parse_context(struct bfs_parser *parser, int flag, int arg2) { +#if BFS_CAN_CHECK_CONTEXT + struct bfs_expr *expr = parse_unary_test(parser, eval_context); + return parse_fnmatch(parser, expr, false); +#else + parse_error(parser, "Missing platform support.\n"); + return NULL; +#endif +} + +/** * Parse -{false,true}. */ -static struct bfs_expr *parse_const(struct parser_state *state, int value, int arg2) { - return parse_nullary_test(state, value ? eval_true : eval_false); +static struct bfs_expr *parse_const(struct bfs_parser *parser, int value, int arg2) { + return parse_nullary_test(parser, value ? eval_true : eval_false); } /** * Parse -daystart. */ -static struct bfs_expr *parse_daystart(struct parser_state *state, int arg1, int arg2) { +static struct bfs_expr *parse_daystart(struct bfs_parser *parser, int arg1, int arg2) { struct tm tm; - if (xlocaltime(&state->now.tv_sec, &tm) != 0) { - parse_perror(state, "xlocaltime()"); + if (!localtime_r(&parser->now.tv_sec, &tm)) { + parse_perror(parser, "localtime_r()"); return NULL; } - if (tm.tm_hour || tm.tm_min || tm.tm_sec || state->now.tv_nsec) { + if (tm.tm_hour || tm.tm_min || tm.tm_sec || parser->now.tv_nsec) { ++tm.tm_mday; } tm.tm_hour = 0; @@ -1170,60 +1161,74 @@ static struct bfs_expr *parse_daystart(struct parser_state *state, int arg1, int time_t time; if (xmktime(&tm, &time) != 0) { - parse_perror(state, "xmktime()"); + parse_perror(parser, "xmktime()"); return NULL; } - state->now.tv_sec = time; - state->now.tv_nsec = 0; + parser->now.tv_sec = time; + parser->now.tv_nsec = 0; - return parse_nullary_option(state); + return parse_nullary_option(parser); } /** * Parse -delete. */ -static struct bfs_expr *parse_delete(struct parser_state *state, int arg1, int arg2) { - state->ctx->flags |= BFTW_POST_ORDER; - state->depth_arg = state->argv; - return parse_nullary_action(state, eval_delete); +static struct bfs_expr *parse_delete(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_delete); + if (!expr) { + return NULL; + } + + struct bfs_ctx *ctx = parser->ctx; + ctx->flags |= BFTW_POST_ORDER; + ctx->dangerous = true; + + parser->depth_expr = expr; + return expr; } /** * Parse -d. */ -static struct bfs_expr *parse_depth(struct parser_state *state, int arg1, int arg2) { - state->ctx->flags |= BFTW_POST_ORDER; - state->depth_arg = state->argv; - return parse_nullary_flag(state); +static struct bfs_expr *parse_depth(struct bfs_parser *parser, int flag, int arg2) { + struct bfs_expr *expr = flag + ? parse_nullary_flag(parser) + : parse_nullary_option(parser); + if (!expr) { + return NULL; + } + + parser->ctx->flags |= BFTW_POST_ORDER; + parser->depth_expr = expr; + return expr; } /** * Parse -depth [N]. */ -static struct bfs_expr *parse_depth_n(struct parser_state *state, int arg1, int arg2) { - const char *arg = state->argv[1]; +static struct bfs_expr *parse_depth_n(struct bfs_parser *parser, int arg1, int arg2) { + const char *arg = parser->argv[1]; if (arg && looks_like_icmp(arg)) { - return parse_test_icmp(state, eval_depth); + return parse_test_icmp(parser, eval_depth); } else { - return parse_depth(state, arg1, arg2); + return parse_depth(parser, arg1, arg2); } } /** * Parse -{min,max}depth N. */ -static struct bfs_expr *parse_depth_limit(struct parser_state *state, int is_min, int arg2) { - struct bfs_expr *expr = parse_unary_option(state); +static struct bfs_expr *parse_depth_limit(struct bfs_parser *parser, int is_min, int arg2) { + struct bfs_expr *expr = parse_unary_option(parser); if (!expr) { return NULL; } - struct bfs_ctx *ctx = state->ctx; + struct bfs_ctx *ctx = parser->ctx; int *depth = is_min ? &ctx->mindepth : &ctx->maxdepth; char **arg = &expr->argv[1]; - if (!parse_int(state, arg, *arg, depth, IF_INT | IF_UNSIGNED)) { - bfs_expr_free(expr); + if (!parse_int(parser, arg, *arg, depth, IF_INT | IF_UNSIGNED)) { return NULL; } @@ -1233,8 +1238,8 @@ static struct bfs_expr *parse_depth_limit(struct parser_state *state, int is_min /** * Parse -empty. */ -static struct bfs_expr *parse_empty(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_test(state, eval_empty); +static struct bfs_expr *parse_empty(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_test(parser, eval_empty); if (expr) { // For opendir() expr->ephemeral_fds = 1; @@ -1245,13 +1250,15 @@ static struct bfs_expr *parse_empty(struct parser_state *state, int arg1, int ar /** * Parse -exec(dir)?/-ok(dir)?. */ -static struct bfs_expr *parse_exec(struct parser_state *state, int flags, int arg2) { - struct bfs_exec *execbuf = bfs_exec_parse(state->ctx, state->argv, flags); +static struct bfs_expr *parse_exec(struct bfs_parser *parser, int flags, int arg2) { + struct bfs_ctx *ctx = parser->ctx; + + struct bfs_exec *execbuf = bfs_exec_parse(ctx, parser->argv, flags); if (!execbuf) { return NULL; } - struct bfs_expr *expr = parse_action(state, eval_exec, execbuf->tmpl_argc + 2); + struct bfs_expr *expr = parse_action(parser, eval_exec, execbuf->tmpl_argc + 2); if (!expr) { bfs_exec_free(execbuf); return NULL; @@ -1263,6 +1270,28 @@ static struct bfs_expr *parse_exec(struct parser_state *state, int flags, int ar expr->ephemeral_fds = 2; if (execbuf->flags & BFS_EXEC_CHDIR) { + // Check for relative paths in $PATH + const char *path = getenv("PATH"); + while (path) { + if (*path != '/') { + size_t len = strcspn(path, ":"); + char *comp = strndup(path, len); + if (comp) { + parse_expr_error(parser, expr, + "This action would be unsafe, since ${bld}$$PATH${rs} contains the relative path ${bld}%pq${rs}\n", comp); + free(comp); + } else { + parse_perror(parser, "strndup()"); + } + return NULL; + } + + path = strchr(path, ':'); + if (path) { + ++path; + } + } + // To dup() the parent directory if (execbuf->flags & BFS_EXEC_MULTI) { ++expr->persistent_fds; @@ -1272,7 +1301,11 @@ static struct bfs_expr *parse_exec(struct parser_state *state, int flags, int ar } if (execbuf->flags & BFS_EXEC_CONFIRM) { - state->ok_expr = expr; + if (!consume_stdin(parser, expr)) { + return NULL; + } + } else { + ctx->dangerous = true; } return expr; @@ -1281,16 +1314,16 @@ static struct bfs_expr *parse_exec(struct parser_state *state, int flags, int ar /** * Parse -exit [STATUS]. */ -static struct bfs_expr *parse_exit(struct parser_state *state, int arg1, int arg2) { +static struct bfs_expr *parse_exit(struct bfs_parser *parser, int arg1, int arg2) { size_t argc = 1; - const char *value = state->argv[1]; + const char *value = parser->argv[1]; int status = EXIT_SUCCESS; - if (value && parse_int(state, NULL, value, &status, IF_INT | IF_UNSIGNED | IF_QUIET)) { + if (value && parse_int(parser, NULL, value, &status, IF_INT | IF_UNSIGNED | IF_QUIET)) { argc = 2; } - struct bfs_expr *expr = parse_action(state, eval_exit, argc); + struct bfs_expr *expr = parse_action(parser, eval_exit, argc); if (expr) { expr->num = status; } @@ -1300,14 +1333,19 @@ static struct bfs_expr *parse_exit(struct parser_state *state, int arg1, int arg /** * Parse -f PATH. */ -static struct bfs_expr *parse_f(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_flag(state); +static struct bfs_expr *parse_f(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_ctx *ctx = parser->ctx; + + struct bfs_expr *expr = parse_unary_flag(parser); if (!expr) { return NULL; } - if (parse_root(state, expr->argv[1]) != 0) { - bfs_expr_free(expr); + // Mark the path as a path, not a regular argument + size_t i = expr->argv - ctx->argv; + ctx->kinds[i + 1] = BFS_PATH; + + if (parse_root(parser, expr->argv[1]) != 0) { return NULL; } @@ -1317,64 +1355,27 @@ static struct bfs_expr *parse_f(struct parser_state *state, int arg1, int arg2) /** * Parse -files0-from PATH. */ -static struct bfs_expr *parse_files0_from(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_option(state); +static struct bfs_expr *parse_files0_from(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_option(parser); if (!expr) { return NULL; } - const char *from = expr->argv[1]; - - FILE *file; - if (strcmp(from, "-") == 0) { - file = stdin; - } else { - file = xfopen(from, O_RDONLY | O_CLOEXEC); - } - if (!file) { - parse_expr_error(state, expr, "%m.\n"); - goto fail; - } - - while (true) { - char *path = xgetdelim(file, '\0'); - if (!path) { - if (errno) { - goto fail; - } else { - break; - } - } - - int ret = parse_root(state, path); - free(path); - if (ret != 0) { - goto fail; - } - } - - if (file == stdin) { - state->files0_stdin_arg = expr->argv; - } else { - fclose(file); - } - - state->implicit_root = false; + // For compatibility with GNU find, + // + // bfs -files0-from a -files0-from b + // + // should *only* use b, not a. So stash the expression here and only + // process the last one at the end of parsing. + parser->files0_expr = expr; return expr; - -fail: - if (file && file != stdin) { - fclose(file); - } - bfs_expr_free(expr); - return NULL; } /** * Parse -flags FLAGS. */ -static struct bfs_expr *parse_flags(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_flags); +static struct bfs_expr *parse_flags(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_flags); if (!expr) { return NULL; } @@ -1396,11 +1397,10 @@ static struct bfs_expr *parse_flags(struct parser_state *state, int arg1, int ar if (xstrtofflags(&flags, &expr->set_flags, &expr->clear_flags) != 0) { if (errno == ENOTSUP) { - parse_expr_error(state, expr, "Missing platform support.\n"); + parse_expr_error(parser, expr, "Missing platform support.\n"); } else { - parse_expr_error(state, expr, "Invalid flags.\n"); + parse_expr_error(parser, expr, "Invalid flags.\n"); } - bfs_expr_free(expr); return NULL; } @@ -1410,107 +1410,96 @@ static struct bfs_expr *parse_flags(struct parser_state *state, int arg1, int ar /** * Parse -fls FILE. */ -static struct bfs_expr *parse_fls(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_action(state, eval_fls); +static struct bfs_expr *parse_fls(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_fls); if (!expr) { - goto fail; + return NULL; } - if (expr_open(state, expr, expr->argv[1]) != 0) { - goto fail; + if (expr_open(parser, expr, expr->argv[1]) != 0) { + return NULL; } return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -fprint FILE. */ -static struct bfs_expr *parse_fprint(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_action(state, eval_fprint); - if (expr) { - if (expr_open(state, expr, expr->argv[1]) != 0) { - goto fail; - } +static struct bfs_expr *parse_fprint(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_fprint); + if (!expr) { + return NULL; } - return expr; -fail: - bfs_expr_free(expr); - return NULL; + if (expr_open(parser, expr, expr->argv[1]) != 0) { + return NULL; + } + + return expr; } /** * Parse -fprint0 FILE. */ -static struct bfs_expr *parse_fprint0(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_action(state, eval_fprint0); - if (expr) { - if (expr_open(state, expr, expr->argv[1]) != 0) { - goto fail; - } +static struct bfs_expr *parse_fprint0(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_fprint0); + if (!expr) { + return NULL; } - return expr; -fail: - bfs_expr_free(expr); - return NULL; + if (expr_open(parser, expr, expr->argv[1]) != 0) { + return NULL; + } + + return expr; } /** * Parse -fprintf FILE FORMAT. */ -static struct bfs_expr *parse_fprintf(struct parser_state *state, int arg1, int arg2) { - const char *arg = state->argv[0]; +static struct bfs_expr *parse_fprintf(struct bfs_parser *parser, int arg1, int arg2) { + const char *arg = parser->argv[0]; - const char *file = state->argv[1]; + const char *file = parser->argv[1]; if (!file) { - parse_error(state, "${blu}%s${rs} needs a file.\n", arg); + parse_error(parser, "${blu}%s${rs} needs a file.\n", arg); return NULL; } - const char *format = state->argv[2]; + const char *format = parser->argv[2]; if (!format) { - parse_error(state, "${blu}%s${rs} needs a format string.\n", arg); + parse_error(parser, "${blu}%s${rs} needs a format string.\n", arg); return NULL; } - struct bfs_expr *expr = parse_action(state, eval_fprintf, 3); + struct bfs_expr *expr = parse_action(parser, eval_fprintf, 3); if (!expr) { return NULL; } - if (expr_open(state, expr, file) != 0) { - goto fail; + if (expr_open(parser, expr, file) != 0) { + return NULL; } - if (bfs_printf_parse(state->ctx, expr, format) != 0) { - goto fail; + if (bfs_printf_parse(parser->ctx, expr, format) != 0) { + return NULL; } return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -fstype TYPE. */ -static struct bfs_expr *parse_fstype(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_fstype); +static struct bfs_expr *parse_fstype(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_fstype); if (!expr) { return NULL; } - if (!bfs_ctx_mtab(state->ctx)) { - parse_expr_error(state, expr, "Couldn't parse the mount table: %m.\n"); - bfs_expr_free(expr); + if (!bfs_ctx_mtab(parser->ctx)) { + parse_expr_error(parser, expr, "Couldn't parse the mount table: %s.\n", errstr()); return NULL; } @@ -1520,237 +1509,199 @@ static struct bfs_expr *parse_fstype(struct parser_state *state, int arg1, int a /** * Parse -gid/-group. */ -static struct bfs_expr *parse_group(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_gid); +static struct bfs_expr *parse_group(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_gid); if (!expr) { return NULL; } - const struct group *grp = bfs_getgrnam(state->ctx->groups, expr->argv[1]); + const struct group *grp = bfs_getgrnam(parser->ctx->groups, expr->argv[1]); if (grp) { expr->num = grp->gr_gid; expr->int_cmp = BFS_INT_EQUAL; } else if (looks_like_icmp(expr->argv[1])) { - if (!parse_icmp(state, expr, 0)) { - goto fail; + if (!parse_icmp(parser, expr, 0)) { + return NULL; } } else if (errno) { - parse_expr_error(state, expr, "%m.\n"); - goto fail; + parse_expr_error(parser, expr, "%s.\n", errstr()); + return NULL; } else { - parse_expr_error(state, expr, "No such group.\n"); - goto fail; + parse_expr_error(parser, expr, "No such group.\n"); + return NULL; } return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -unique. */ -static struct bfs_expr *parse_unique(struct parser_state *state, int arg1, int arg2) { - state->ctx->unique = true; - return parse_nullary_option(state); +static struct bfs_expr *parse_unique(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->unique = true; + return parse_nullary_option(parser); } /** * Parse -used N. */ -static struct bfs_expr *parse_used(struct parser_state *state, int arg1, int arg2) { - return parse_test_icmp(state, eval_used); +static struct bfs_expr *parse_used(struct bfs_parser *parser, int arg1, int arg2) { + return parse_test_icmp(parser, eval_used); } /** * Parse -uid/-user. */ -static struct bfs_expr *parse_user(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_uid); +static struct bfs_expr *parse_user(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_uid); if (!expr) { return NULL; } - const struct passwd *pwd = bfs_getpwnam(state->ctx->users, expr->argv[1]); + const struct passwd *pwd = bfs_getpwnam(parser->ctx->users, expr->argv[1]); if (pwd) { expr->num = pwd->pw_uid; expr->int_cmp = BFS_INT_EQUAL; } else if (looks_like_icmp(expr->argv[1])) { - if (!parse_icmp(state, expr, 0)) { - goto fail; + if (!parse_icmp(parser, expr, 0)) { + return NULL; } } else if (errno) { - parse_expr_error(state, expr, "%m.\n"); - goto fail; + parse_expr_error(parser, expr, "%s.\n", errstr()); + return NULL; } else { - parse_expr_error(state, expr, "No such user.\n"); - goto fail; + parse_expr_error(parser, expr, "No such user.\n"); + return NULL; } return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -hidden. */ -static struct bfs_expr *parse_hidden(struct parser_state *state, int arg1, int arg2) { - return parse_nullary_test(state, eval_hidden); +static struct bfs_expr *parse_hidden(struct bfs_parser *parser, int arg1, int arg2) { + return parse_nullary_test(parser, eval_hidden); } /** * Parse -(no)?ignore_readdir_race. */ -static struct bfs_expr *parse_ignore_races(struct parser_state *state, int ignore, int arg2) { - state->ctx->ignore_races = ignore; - return parse_nullary_option(state); +static struct bfs_expr *parse_ignore_races(struct bfs_parser *parser, int ignore, int arg2) { + parser->ctx->ignore_races = ignore; + return parse_nullary_option(parser); } /** * Parse -inum N. */ -static struct bfs_expr *parse_inum(struct parser_state *state, int arg1, int arg2) { - return parse_test_icmp(state, eval_inum); +static struct bfs_expr *parse_inum(struct bfs_parser *parser, int arg1, int arg2) { + return parse_test_icmp(parser, eval_inum); } /** * Parse -j<n>. */ -static struct bfs_expr *parse_jobs(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_flag(state); +static struct bfs_expr *parse_jobs(struct bfs_parser *parser, int arg1, int arg2) { + const char *arg; + struct bfs_expr *expr = parse_prefix_flag(parser, 'j', false, &arg); if (!expr) { return NULL; } unsigned int n; - if (!parse_int(state, expr->argv, expr->argv[0] + 2, &n, IF_INT | IF_UNSIGNED)) { - bfs_expr_free(expr); + if (!parse_int(parser, expr->argv, arg, &n, IF_INT | IF_UNSIGNED)) { return NULL; } if (n == 0) { - parse_expr_error(state, expr, "${bld}0${rs} is not enough threads.\n"); - bfs_expr_free(expr); + parse_expr_error(parser, expr, "${bld}0${rs} is not enough threads.\n"); return NULL; } - state->ctx->threads = n; + parser->ctx->threads = n; return expr; } /** - * Parse -links N. + * Parse -limit N. */ -static struct bfs_expr *parse_links(struct parser_state *state, int arg1, int arg2) { - return parse_test_icmp(state, eval_links); -} - -/** - * Parse -ls. - */ -static struct bfs_expr *parse_ls(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_action(state, eval_fls); +static struct bfs_expr *parse_limit(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_limit); if (!expr) { return NULL; } - init_print_expr(state, expr); + char **arg = &expr->argv[1]; + if (!parse_int(parser, arg, *arg, &expr->num, IF_LONG_LONG)) { + return NULL; + } + + if (expr->num <= 0) { + parse_expr_error(parser, expr, "The %pX must be at least ${bld}1${rs}.\n", expr); + return NULL; + } + + parser->limit_expr = expr; return expr; } /** - * Parse -mount. + * Parse -links N. + */ +static struct bfs_expr *parse_links(struct bfs_parser *parser, int arg1, int arg2) { + return parse_test_icmp(parser, eval_links); +} + +/** + * Parse -ls. */ -static struct bfs_expr *parse_mount(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_option(state); +static struct bfs_expr *parse_ls(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_fls); if (!expr) { return NULL; } - parse_expr_warning(state, expr, "In the future, ${blu}%s${rs} will skip mount points entirely, unlike\n", expr->argv[0]); - bfs_warning(state->ctx, "${blu}-xdev${rs}, due to http://austingroupbugs.net/view.php?id=1133.\n\n"); - - state->ctx->flags |= BFTW_PRUNE_MOUNTS; - state->mount_arg = expr->argv; + init_print_expr(parser, expr); return expr; } /** - * Common code for fnmatch() tests. + * Parse -mount. */ -static struct bfs_expr *parse_fnmatch(const struct parser_state *state, struct bfs_expr *expr, bool casefold) { +static struct bfs_expr *parse_mount(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_option(parser); if (!expr) { return NULL; } - expr->pattern = expr->argv[1]; - - if (casefold) { -#ifdef FNM_CASEFOLD - expr->fnm_flags = FNM_CASEFOLD; -#else - parse_expr_error(state, expr, "Missing platform support.\n"); - bfs_expr_free(expr); - return NULL; -#endif - } else { - expr->fnm_flags = 0; - } - - // POSIX says, about fnmatch(): - // - // If pattern ends with an unescaped <backslash>, fnmatch() shall - // return a non-zero value (indicating either no match or an error). - // - // But not all implementations obey this, so check for it ourselves. - size_t i, len = strlen(expr->pattern); - for (i = 0; i < len; ++i) { - if (expr->pattern[len - i - 1] != '\\') { - break; - } - } - if (i % 2 != 0) { - parse_expr_warning(state, expr, "Unescaped trailing backslash.\n\n"); - expr->eval_fn = eval_false; - return expr; - } - - // strcmp() can be much faster than fnmatch() since it doesn't have to - // parse the pattern, so special-case patterns with no wildcards. - // - // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13_01 - expr->literal = strcspn(expr->pattern, "?*\\[") == len; - + parser->ctx->flags |= BFTW_SKIP_MOUNTS; + parser->mount_expr = expr; return expr; } /** * Parse -i?name. */ -static struct bfs_expr *parse_name(struct parser_state *state, int casefold, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_name); - return parse_fnmatch(state, expr, casefold); +static struct bfs_expr *parse_name(struct bfs_parser *parser, int casefold, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_name); + return parse_fnmatch(parser, expr, casefold); } /** * Parse -i?path, -i?wholename. */ -static struct bfs_expr *parse_path(struct parser_state *state, int casefold, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_path); - return parse_fnmatch(state, expr, casefold); +static struct bfs_expr *parse_path(struct bfs_parser *parser, int casefold, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_path); + return parse_fnmatch(parser, expr, casefold); } /** * Parse -i?lname. */ -static struct bfs_expr *parse_lname(struct parser_state *state, int casefold, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_lname); - return parse_fnmatch(state, expr, casefold); +static struct bfs_expr *parse_lname(struct bfs_parser *parser, int casefold, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_lname); + return parse_fnmatch(parser, expr, casefold); } /** Get the bfs_stat_field for X/Y in -newerXY. */ @@ -1770,20 +1721,20 @@ static enum bfs_stat_field parse_newerxy_field(char c) { } /** Parse an explicit reference timestamp for -newerXt and -*since. */ -static int parse_reftime(const struct parser_state *state, struct bfs_expr *expr) { +static int parse_reftime(const struct bfs_parser *parser, struct bfs_expr *expr) { if (xgetdate(expr->argv[1], &expr->reftime) == 0) { return 0; } else if (errno != EINVAL) { - parse_expr_error(state, expr, "%m.\n"); + parse_expr_error(parser, expr, "%s.\n", errstr()); return -1; } - parse_expr_error(state, expr, "Invalid timestamp.\n\n"); + parse_expr_error(parser, expr, "Invalid timestamp.\n\n"); fprintf(stderr, "Supported timestamp formats are ISO 8601-like, e.g.\n\n"); struct tm tm; - if (xlocaltime(&state->now.tv_sec, &tm) != 0) { - parse_perror(state, "xlocaltime()"); + if (!localtime_r(&parser->now.tv_sec, &tm)) { + parse_perror(parser, "localtime_r()"); return -1; } @@ -1792,18 +1743,18 @@ static int parse_reftime(const struct parser_state *state, struct bfs_expr *expr fprintf(stderr, " - %04d-%02d-%02d\n", year, month, tm.tm_mday); fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02d\n", year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); -#if __FreeBSD__ +#if BFS_HAS_TM_GMTOFF int gmtoff = tm.tm_gmtoff; #else int gmtoff = -timezone; #endif - int tz_hour = gmtoff/3600; - int tz_min = (labs(gmtoff)/60)%60; + int tz_hour = gmtoff / 3600; + int tz_min = (labs(gmtoff) / 60) % 60; fprintf(stderr, " - %04d-%02d-%02dT%02d:%02d:%02d%+03d:%02d\n", - year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, tz_hour, tz_min); + year, month, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, tz_hour, tz_min); - if (xgmtime(&state->now.tv_sec, &tm) != 0) { - parse_perror(state, "xgmtime()"); + if (!gmtime_r(&parser->now.tv_sec, &tm)) { + parse_perror(parser, "gmtime_r()"); return -1; } @@ -1817,66 +1768,69 @@ static int parse_reftime(const struct parser_state *state, struct bfs_expr *expr /** * Parse -newerXY. */ -static struct bfs_expr *parse_newerxy(struct parser_state *state, int arg1, int arg2) { - const char *arg = state->argv[0]; +static struct bfs_expr *parse_newerxy(struct bfs_parser *parser, int arg1, int arg2) { + const char *arg = parser->argv[0]; if (strlen(arg) != 8) { - parse_error(state, "Expected ${blu}-newer${bld}XY${rs}; found ${blu}-newer${bld}%pq${rs}.\n", arg + 6); + parse_error(parser, "Expected ${blu}-newer${bld}XY${rs}; found ${blu}-newer${bld}%pq${rs}.\n", arg + 6); return NULL; } - struct bfs_expr *expr = parse_unary_test(state, eval_newer); + struct bfs_expr *expr = parse_unary_test(parser, eval_newer); if (!expr) { - goto fail; + return NULL; } expr->stat_field = parse_newerxy_field(arg[6]); if (!expr->stat_field) { - parse_expr_error(state, expr, - "For ${blu}-newer${bld}XY${rs}, ${bld}X${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, or ${bld}B${rs}, not ${err}%c${rs}.\n", - arg[6]); - goto fail; + parse_expr_error(parser, expr, + "For ${blu}-newer${bld}XY${rs}, ${bld}X${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, or ${bld}B${rs}, not ${err}%c${rs}.\n", + arg[6]); + return NULL; } if (arg[7] == 't') { - if (parse_reftime(state, expr) != 0) { - goto fail; + if (parse_reftime(parser, expr) != 0) { + return NULL; } } else { enum bfs_stat_field field = parse_newerxy_field(arg[7]); if (!field) { - parse_expr_error(state, expr, - "For ${blu}-newer${bld}XY${rs}, ${bld}Y${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, ${bld}B${rs}, or ${bld}t${rs}, not ${err}%c${rs}.\n", - arg[7]); - goto fail; + parse_expr_error(parser, expr, + "For ${blu}-newer${bld}XY${rs}, ${bld}Y${rs} should be ${bld}a${rs}, ${bld}c${rs}, ${bld}m${rs}, ${bld}B${rs}, or ${bld}t${rs}, not ${err}%c${rs}.\n", + arg[7]); + return NULL; } struct bfs_stat sb; - if (stat_arg(state, &expr->argv[1], &sb) != 0) { - goto fail; + if (stat_arg(parser, &expr->argv[1], &sb) != 0) { + return NULL; } - const struct timespec *reftime = bfs_stat_time(&sb, field); if (!reftime) { - parse_expr_error(state, expr, "Couldn't get file %s.\n", bfs_stat_field_name(field)); - goto fail; + parse_expr_error(parser, expr, "Couldn't get file %s.\n", bfs_stat_field_name(field)); + return NULL; } expr->reftime = *reftime; } return expr; +} -fail: - bfs_expr_free(expr); - return NULL; +/** + * Parse -noerror. + */ +static struct bfs_expr *parse_noerror(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->ignore_errors = true; + return parse_nullary_option(parser); } /** * Parse -nogroup. */ -static struct bfs_expr *parse_nogroup(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_test(state, eval_nogroup); +static struct bfs_expr *parse_nogroup(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_test(parser, eval_nogroup); if (expr) { // Who knows how many FDs getgrgid_r() needs? expr->ephemeral_fds = 3; @@ -1887,33 +1841,36 @@ static struct bfs_expr *parse_nogroup(struct parser_state *state, int arg1, int /** * Parse -nohidden. */ -static struct bfs_expr *parse_nohidden(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *hidden = bfs_expr_new(eval_hidden, 1, &fake_hidden_arg); +static struct bfs_expr *parse_nohidden(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *hidden = parse_new_expr(parser, eval_hidden, 1, &fake_hidden_arg, BFS_TEST); if (!hidden) { return NULL; } - if (parse_exclude(state, hidden) != 0) { - return NULL; - } - - return parse_nullary_option(state); + bfs_expr_append(parser->ctx->exclude, hidden); + return parse_nullary_option(parser); } /** * Parse -noleaf. */ -static struct bfs_expr *parse_noleaf(struct parser_state *state, int arg1, int arg2) { - parse_warning(state, "${ex}%s${rs} does not apply the optimization that ${blu}%s${rs} inhibits.\n\n", - BFS_COMMAND, state->argv[0]); - return parse_nullary_option(state); +static struct bfs_expr *parse_noleaf(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_option(parser); + if (!expr) { + return NULL; + } + + parse_expr_warning(parser, expr, + "${ex}%s${rs} does not apply the optimization that %px inhibits.\n\n", + BFS_COMMAND, expr); + return expr; } /** * Parse -nouser. */ -static struct bfs_expr *parse_nouser(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_test(state, eval_nouser); +static struct bfs_expr *parse_nouser(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_test(parser, eval_nouser); if (expr) { // Who knows how many FDs getpwuid_r() needs? expr->ephemeral_fds = 3; @@ -1924,10 +1881,10 @@ static struct bfs_expr *parse_nouser(struct parser_state *state, int arg1, int a /** * Parse a permission mode like chmod(1). */ -static int parse_mode(const struct parser_state *state, const char *mode, struct bfs_expr *expr) { +static int parse_mode(const struct bfs_parser *parser, const char *mode, struct bfs_expr *expr) { if (mode[0] >= '0' && mode[0] <= '9') { unsigned int parsed; - if (!parse_int(state, NULL, mode, &parsed, 8 | IF_INT | IF_UNSIGNED | IF_QUIET)) { + if (!parse_int(parser, NULL, mode, &parsed, 8 | IF_INT | IF_UNSIGNED | IF_QUIET)) { goto fail; } if (parsed > 07777) { @@ -1939,6 +1896,8 @@ static int parse_mode(const struct parser_state *state, const char *mode, struct return 0; } + mode_t umask = parser->ctx->umask; + expr->file_mode = 0; expr->dir_mode = 0; @@ -1967,25 +1926,27 @@ static int parse_mode(const struct parser_state *state, const char *mode, struct MODE_ACTION_APPLY, MODE_OP, MODE_PERM, - } mstate = MODE_CLAUSE; + } state = MODE_CLAUSE; enum { MODE_PLUS, MODE_MINUS, MODE_EQUALS, - } op = uninit(op, MODE_EQUALS); + } op uninit(MODE_EQUALS); - mode_t who = uninit(who, 0); - mode_t file_change = uninit(file_change, 0); - mode_t dir_change = uninit(dir_change, 0); + mode_t who uninit(0); + mode_t mask uninit(0); + mode_t file_change uninit(0); + mode_t dir_change uninit(0); const char *i = mode; while (true) { - switch (mstate) { + switch (state) { case MODE_CLAUSE: who = 0; - mstate = MODE_WHO; - fallthru; + mask = 0777; + state = MODE_WHO; + _fallthrough; case MODE_WHO: switch (*i) { @@ -2002,7 +1963,7 @@ static int parse_mode(const struct parser_state *state, const char *mode, struct who |= 0777; break; default: - mstate = MODE_ACTION; + state = MODE_ACTION; continue; } break; @@ -2012,7 +1973,7 @@ static int parse_mode(const struct parser_state *state, const char *mode, struct case MODE_EQUALS: expr->file_mode &= ~who; expr->dir_mode &= ~who; - fallthru; + _fallthrough; case MODE_PLUS: expr->file_mode |= file_change; expr->dir_mode |= dir_change; @@ -2022,37 +1983,40 @@ static int parse_mode(const struct parser_state *state, const char *mode, struct expr->dir_mode &= ~dir_change; break; } - fallthru; + _fallthrough; case MODE_ACTION: if (who == 0) { who = 0777; + mask = who & ~umask; + } else { + mask = who; } switch (*i) { case '+': op = MODE_PLUS; - mstate = MODE_OP; + state = MODE_OP; break; case '-': op = MODE_MINUS; - mstate = MODE_OP; + state = MODE_OP; break; case '=': op = MODE_EQUALS; - mstate = MODE_OP; + state = MODE_OP; break; case ',': - if (mstate == MODE_ACTION_APPLY) { - mstate = MODE_CLAUSE; + if (state == MODE_ACTION_APPLY) { + state = MODE_CLAUSE; } else { goto fail; } break; case '\0': - if (mstate == MODE_ACTION_APPLY) { + if (state == MODE_ACTION_APPLY) { goto done; } else { goto fail; @@ -2081,32 +2045,32 @@ static int parse_mode(const struct parser_state *state, const char *mode, struct default: file_change = 0; dir_change = 0; - mstate = MODE_PERM; + state = MODE_PERM; continue; } file_change |= (file_change << 6) | (file_change << 3); - file_change &= who; + file_change &= mask; dir_change |= (dir_change << 6) | (dir_change << 3); - dir_change &= who; - mstate = MODE_ACTION_APPLY; + dir_change &= mask; + state = MODE_ACTION_APPLY; break; case MODE_PERM: switch (*i) { case 'r': - file_change |= who & 0444; - dir_change |= who & 0444; + file_change |= mask & 0444; + dir_change |= mask & 0444; break; case 'w': - file_change |= who & 0222; - dir_change |= who & 0222; + file_change |= mask & 0222; + dir_change |= mask & 0222; break; case 'x': - file_change |= who & 0111; - fallthru; + file_change |= mask & 0111; + _fallthrough; case 'X': - dir_change |= who & 0111; + dir_change |= mask & 0111; break; case 's': if (who & 0700) { @@ -2125,7 +2089,7 @@ static int parse_mode(const struct parser_state *state, const char *mode, struct } break; default: - mstate = MODE_ACTION_APPLY; + state = MODE_ACTION_APPLY; continue; } break; @@ -2138,15 +2102,15 @@ done: return 0; fail: - parse_expr_error(state, expr, "Invalid mode.\n"); + parse_expr_error(parser, expr, "Invalid mode.\n"); return -1; } /** * Parse -perm MODE. */ -static struct bfs_expr *parse_perm(struct parser_state *state, int field, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_perm); +static struct bfs_expr *parse_perm(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_perm); if (!expr) { return NULL; } @@ -2167,30 +2131,26 @@ static struct bfs_expr *parse_perm(struct parser_state *state, int field, int ar ++mode; break; } - fallthru; + _fallthrough; default: expr->mode_cmp = BFS_MODE_EQUAL; break; } - if (parse_mode(state, mode, expr) != 0) { - goto fail; + if (parse_mode(parser, mode, expr) != 0) { + return NULL; } return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -print. */ -static struct bfs_expr *parse_print(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_action(state, eval_fprint); +static struct bfs_expr *parse_print(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_fprint); if (expr) { - init_print_expr(state, expr); + init_print_expr(parser, expr); } return expr; } @@ -2198,10 +2158,10 @@ static struct bfs_expr *parse_print(struct parser_state *state, int arg1, int ar /** * Parse -print0. */ -static struct bfs_expr *parse_print0(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_action(state, eval_fprint0); +static struct bfs_expr *parse_print0(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_fprint0); if (expr) { - init_print_expr(state, expr); + init_print_expr(parser, expr); } return expr; } @@ -2209,16 +2169,15 @@ static struct bfs_expr *parse_print0(struct parser_state *state, int arg1, int a /** * Parse -printf FORMAT. */ -static struct bfs_expr *parse_printf(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_action(state, eval_fprintf); +static struct bfs_expr *parse_printf(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_action(parser, eval_fprintf); if (!expr) { return NULL; } - init_print_expr(state, expr); + init_print_expr(parser, expr); - if (bfs_printf_parse(state->ctx, expr, expr->argv[1]) != 0) { - bfs_expr_free(expr); + if (bfs_printf_parse(parser->ctx, expr, expr->argv[1]) != 0) { return NULL; } @@ -2228,10 +2187,10 @@ static struct bfs_expr *parse_printf(struct parser_state *state, int arg1, int a /** * Parse -printx. */ -static struct bfs_expr *parse_printx(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_nullary_action(state, eval_fprintx); +static struct bfs_expr *parse_printx(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_fprintx); if (expr) { - init_print_expr(state, expr); + init_print_expr(parser, expr); } return expr; } @@ -2239,67 +2198,67 @@ static struct bfs_expr *parse_printx(struct parser_state *state, int arg1, int a /** * Parse -prune. */ -static struct bfs_expr *parse_prune(struct parser_state *state, int arg1, int arg2) { - state->prune_arg = state->argv; - return parse_nullary_action(state, eval_prune); +static struct bfs_expr *parse_prune(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_action(parser, eval_prune); + if (!expr) { + return NULL; + } + + parser->prune_expr = expr; + return expr; } /** * Parse -quit. */ -static struct bfs_expr *parse_quit(struct parser_state *state, int arg1, int arg2) { - return parse_nullary_action(state, eval_quit); +static struct bfs_expr *parse_quit(struct bfs_parser *parser, int arg1, int arg2) { + return parse_nullary_action(parser, eval_quit); } /** * Parse -i?regex. */ -static struct bfs_expr *parse_regex(struct parser_state *state, int flags, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_regex); +static struct bfs_expr *parse_regex(struct bfs_parser *parser, int flags, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_regex); if (!expr) { - goto fail; + return NULL; } - if (bfs_regcomp(&expr->regex, expr->argv[1], state->regex_type, flags) != 0) { - if (!expr->regex) { - parse_perror(state, "bfs_regcomp()"); - goto fail; - } - - char *str = bfs_regerror(expr->regex); - if (!str) { - parse_perror(state, "bfs_regerror()"); - goto fail; + if (bfs_regcomp(&expr->regex, expr->argv[1], parser->regex_type, flags) != 0) { + if (expr->regex) { + char *str = bfs_regerror(expr->regex); + if (str) { + parse_expr_error(parser, expr, "%s.\n", str); + free(str); + } else { + parse_perror(parser, "bfs_regerror()"); + } + } else { + parse_perror(parser, "bfs_regcomp()"); } - parse_expr_error(state, expr, "%s.\n", str); - free(str); - goto fail; + return NULL; } return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -E. */ -static struct bfs_expr *parse_regex_extended(struct parser_state *state, int arg1, int arg2) { - state->regex_type = BFS_REGEX_POSIX_EXTENDED; - return parse_nullary_flag(state); +static struct bfs_expr *parse_regex_extended(struct bfs_parser *parser, int arg1, int arg2) { + parser->regex_type = BFS_REGEX_POSIX_EXTENDED; + return parse_nullary_flag(parser); } /** * Parse -regextype TYPE. */ -static struct bfs_expr *parse_regextype(struct parser_state *state, int arg1, int arg2) { - struct bfs_ctx *ctx = state->ctx; +static struct bfs_expr *parse_regextype(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_ctx *ctx = parser->ctx; CFILE *cfile = ctx->cerr; - struct bfs_expr *expr = parse_unary_option(state); + struct bfs_expr *expr = parse_unary_option(parser); if (!expr) { cfprintf(cfile, "\n"); goto list_types; @@ -2308,23 +2267,34 @@ static struct bfs_expr *parse_regextype(struct parser_state *state, int arg1, in // See https://www.gnu.org/software/gnulib/manual/html_node/Predefined-Syntaxes.html const char *type = expr->argv[1]; if (strcmp(type, "posix-basic") == 0 + || strcmp(type, "posix-minimal-basic") == 0 || strcmp(type, "ed") == 0 || strcmp(type, "sed") == 0) { - state->regex_type = BFS_REGEX_POSIX_BASIC; + parser->regex_type = BFS_REGEX_POSIX_BASIC; } else if (strcmp(type, "posix-extended") == 0) { - state->regex_type = BFS_REGEX_POSIX_EXTENDED; -#if BFS_USE_ONIGURUMA + parser->regex_type = BFS_REGEX_POSIX_EXTENDED; +#if BFS_WITH_ONIGURUMA + } else if (strcmp(type, "awk") == 0 + || strcmp(type, "posix-awk") == 0) { + parser->regex_type = BFS_REGEX_AWK; + } else if (strcmp(type, "gnu-awk") == 0) { + parser->regex_type = BFS_REGEX_GNU_AWK; } else if (strcmp(type, "emacs") == 0) { - state->regex_type = BFS_REGEX_EMACS; + parser->regex_type = BFS_REGEX_EMACS; } else if (strcmp(type, "grep") == 0) { - state->regex_type = BFS_REGEX_GREP; + parser->regex_type = BFS_REGEX_GREP; + } else if (strcmp(type, "egrep") == 0 + || strcmp(type, "posix-egrep") == 0) { + parser->regex_type = BFS_REGEX_EGREP; + } else if (strcmp(type, "findutils-default") == 0) { + parser->regex_type = BFS_REGEX_GNU_FIND; #endif } else if (strcmp(type, "help") == 0) { - state->just_info = true; + parser->just_info = true; cfile = ctx->cout; goto list_types; } else { - parse_expr_error(state, expr, "Unsupported regex type.\n\n"); + parse_expr_error(parser, expr, "Unsupported regex type.\n\n"); goto list_types; } @@ -2332,39 +2302,45 @@ static struct bfs_expr *parse_regextype(struct parser_state *state, int arg1, in list_types: cfprintf(cfile, "Supported types are:\n\n"); - cfprintf(cfile, " ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n"); - cfprintf(cfile, " ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n"); - cfprintf(cfile, " ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n"); -#if BFS_USE_ONIGURUMA - cfprintf(cfile, " ${bld}emacs${rs}: Like ${grn}emacs${rs}\n"); - cfprintf(cfile, " ${bld}grep${rs}: Like ${grn}grep${rs}\n"); -#endif - cfprintf(cfile, " ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n"); + cfprintf(cfile, " ${bld}posix-basic${rs}: POSIX basic regular expressions (BRE)\n"); + cfprintf(cfile, " ${bld}ed${rs}: Like ${grn}ed${rs} (same as ${bld}posix-basic${rs})\n"); + cfprintf(cfile, " ${bld}sed${rs}: Like ${grn}sed${rs} (same as ${bld}posix-basic${rs})\n\n"); - bfs_expr_free(expr); + cfprintf(cfile, " ${bld}posix-extended${rs}: POSIX extended regular expressions (ERE)\n\n"); + +#if BFS_WITH_ONIGURUMA + cfprintf(cfile, " [${bld}posix-${rs}]${bld}awk${rs}: Like ${grn}awk${rs}\n"); + cfprintf(cfile, " ${bld}gnu-awk${rs}: Like GNU ${grn}awk${rs}\n\n"); + + cfprintf(cfile, " ${bld}emacs${rs}: Like ${grn}emacs${rs}\n\n"); + + cfprintf(cfile, " ${bld}grep${rs}: Like ${grn}grep${rs}\n"); + cfprintf(cfile, " [${bld}posix-${rs}]${bld}egrep${rs}: Like ${grn}grep${rs} ${cyn}-E${rs}\n\n"); + + cfprintf(cfile, " ${bld}findutils-default${rs}: Like GNU ${grn}find${rs}\n"); +#endif return NULL; } /** * Parse -s. */ -static struct bfs_expr *parse_s(struct parser_state *state, int arg1, int arg2) { - state->ctx->flags |= BFTW_SORT; - return parse_nullary_flag(state); +static struct bfs_expr *parse_s(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->flags |= BFTW_SORT; + return parse_nullary_flag(parser); } /** * Parse -samefile FILE. */ -static struct bfs_expr *parse_samefile(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_samefile); +static struct bfs_expr *parse_samefile(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_samefile); if (!expr) { return NULL; } struct bfs_stat sb; - if (stat_arg(state, &expr->argv[1], &sb) != 0) { - bfs_expr_free(expr); + if (stat_arg(parser, &expr->argv[1], &sb) != 0) { return NULL; } @@ -2376,17 +2352,17 @@ static struct bfs_expr *parse_samefile(struct parser_state *state, int arg1, int /** * Parse -S STRATEGY. */ -static struct bfs_expr *parse_search_strategy(struct parser_state *state, int arg1, int arg2) { - struct bfs_ctx *ctx = state->ctx; +static struct bfs_expr *parse_search_strategy(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_ctx *ctx = parser->ctx; CFILE *cfile = ctx->cerr; - struct bfs_expr *expr = parse_unary_flag(state); + const char *arg; + struct bfs_expr *expr = parse_prefix_flag(parser, 'S', true, &arg); if (!expr) { cfprintf(cfile, "\n"); goto list_strategies; } - const char *arg = expr->argv[1]; if (strcmp(arg, "bfs") == 0) { ctx->strategy = BFTW_BFS; } else if (strcmp(arg, "dfs") == 0) { @@ -2396,11 +2372,11 @@ static struct bfs_expr *parse_search_strategy(struct parser_state *state, int ar } else if (strcmp(arg, "eds") == 0) { ctx->strategy = BFTW_EDS; } else if (strcmp(arg, "help") == 0) { - state->just_info = true; + parser->just_info = true; cfile = ctx->cout; goto list_strategies; } else { - parse_expr_error(state, expr, "Unrecognized search strategy.\n\n"); + parse_expr_error(parser, expr, "Unrecognized search strategy.\n\n"); goto list_strategies; } @@ -2412,44 +2388,38 @@ list_strategies: cfprintf(cfile, " ${bld}dfs${rs}: depth-first search\n"); cfprintf(cfile, " ${bld}ids${rs}: iterative deepening search\n"); cfprintf(cfile, " ${bld}eds${rs}: exponential deepening search\n"); - - bfs_expr_free(expr); return NULL; } /** * Parse -[aBcm]?since. */ -static struct bfs_expr *parse_since(struct parser_state *state, int field, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_newer); +static struct bfs_expr *parse_since(struct bfs_parser *parser, int field, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_newer); if (!expr) { return NULL; } - if (parse_reftime(state, expr) != 0) { - goto fail; + if (parse_reftime(parser, expr) != 0) { + return NULL; } expr->stat_field = field; return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -size N[cwbkMGTP]?. */ -static struct bfs_expr *parse_size(struct parser_state *state, int arg1, int arg2) { - struct bfs_expr *expr = parse_unary_test(state, eval_size); +static struct bfs_expr *parse_size(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_unary_test(parser, eval_size); if (!expr) { return NULL; } - const char *unit = parse_icmp(state, expr, IF_PARTIAL_OK); + const char *unit = parse_icmp(parser, expr, IF_PARTIAL_OK); if (!unit) { - goto fail; + return NULL; } if (strlen(unit) > 1) { @@ -2484,39 +2454,37 @@ static struct bfs_expr *parse_size(struct parser_state *state, int arg1, int arg break; default: - goto bad_unit; + bad_unit: + parse_expr_error(parser, expr, "Expected a size unit (one of ${bld}cwbkMGTP${rs}); found ${err}%pq${rs}.\n", unit); + return NULL; } return expr; - -bad_unit: - parse_expr_error(state, expr, "Expected a size unit (one of ${bld}cwbkMGTP${rs}); found ${err}%pq${rs}.\n", unit); -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -sparse. */ -static struct bfs_expr *parse_sparse(struct parser_state *state, int arg1, int arg2) { - return parse_nullary_test(state, eval_sparse); +static struct bfs_expr *parse_sparse(struct bfs_parser *parser, int arg1, int arg2) { + return parse_nullary_test(parser, eval_sparse); } /** * Parse -status. */ -static struct bfs_expr *parse_status(struct parser_state *state, int arg1, int arg2) { - state->ctx->status = true; - return parse_nullary_option(state); +static struct bfs_expr *parse_status(struct bfs_parser *parser, int arg1, int arg2) { + parser->ctx->status = true; + return parse_nullary_option(parser); } /** * Parse -x?type [bcdpflsD]. */ -static struct bfs_expr *parse_type(struct parser_state *state, int x, int arg2) { +static struct bfs_expr *parse_type(struct bfs_parser *parser, int x, int arg2) { + struct bfs_ctx *ctx = parser->ctx; + bfs_eval_fn *eval = x ? eval_xtype : eval_type; - struct bfs_expr *expr = parse_unary_test(state, eval); + struct bfs_expr *expr = parse_unary_test(parser, eval); if (!expr) { return NULL; } @@ -2552,15 +2520,16 @@ static struct bfs_expr *parse_type(struct parser_state *state, int x, int arg2) break; case 'w': expr->num |= 1 << BFS_WHT; + ctx->flags |= BFTW_WHITEOUTS; break; case '\0': - parse_expr_error(state, expr, "Expected a type flag.\n"); - goto fail; + parse_expr_error(parser, expr, "Expected a type flag.\n"); + return NULL; default: - parse_expr_error(state, expr, "Unknown type flag ${err}%c${rs}; expected one of [${bld}bcdpflsD${rs}].\n", *c); - goto fail; + parse_expr_error(parser, expr, "Unknown type flag ${err}%c${rs}; expected one of [${bld}bcdpflsD${rs}].\n", *c); + return NULL; } ++c; @@ -2570,34 +2539,30 @@ static struct bfs_expr *parse_type(struct parser_state *state, int x, int arg2) ++c; continue; } else { - parse_expr_error(state, expr, "Types must be comma-separated.\n"); - goto fail; + parse_expr_error(parser, expr, "Types must be comma-separated.\n"); + return NULL; } } return expr; - -fail: - bfs_expr_free(expr); - return NULL; } /** * Parse -(no)?warn. */ -static struct bfs_expr *parse_warn(struct parser_state *state, int warn, int arg2) { - state->ctx->warn = warn; - return parse_nullary_option(state); +static struct bfs_expr *parse_warn(struct bfs_parser *parser, int warn, int arg2) { + parser->ctx->warn = warn; + return parse_nullary_option(parser); } /** * Parse -xattr. */ -static struct bfs_expr *parse_xattr(struct parser_state *state, int arg1, int arg2) { +static struct bfs_expr *parse_xattr(struct bfs_parser *parser, int arg1, int arg2) { #if BFS_CAN_CHECK_XATTRS - return parse_nullary_test(state, eval_xattr); + return parse_nullary_test(parser, eval_xattr); #else - parse_error(state, "Missing platform support.\n"); + parse_error(parser, "Missing platform support.\n"); return NULL; #endif } @@ -2605,11 +2570,11 @@ static struct bfs_expr *parse_xattr(struct parser_state *state, int arg1, int ar /** * Parse -xattrname. */ -static struct bfs_expr *parse_xattrname(struct parser_state *state, int arg1, int arg2) { +static struct bfs_expr *parse_xattrname(struct bfs_parser *parser, int arg1, int arg2) { #if BFS_CAN_CHECK_XATTRS - return parse_unary_test(state, eval_xattrname); + return parse_unary_test(parser, eval_xattrname); #else - parse_error(state, "Missing platform support.\n"); + parse_error(parser, "Missing platform support.\n"); return NULL; #endif } @@ -2617,10 +2582,15 @@ static struct bfs_expr *parse_xattrname(struct parser_state *state, int arg1, in /** * Parse -xdev. */ -static struct bfs_expr *parse_xdev(struct parser_state *state, int arg1, int arg2) { - state->ctx->flags |= BFTW_PRUNE_MOUNTS; - state->xdev_arg = state->argv; - return parse_nullary_option(state); +static struct bfs_expr *parse_xdev(struct bfs_parser *parser, int arg1, int arg2) { + struct bfs_expr *expr = parse_nullary_option(parser); + if (!expr) { + return NULL; + } + + parser->ctx->flags |= BFTW_PRUNE_MOUNTS; + parser->xdev_expr = expr; + return expr; } /** @@ -2721,21 +2691,21 @@ fail: /** * "Parse" -help. */ -static struct bfs_expr *parse_help(struct parser_state *state, int arg1, int arg2) { - CFILE *cout = state->ctx->cout; +static struct bfs_expr *parse_help(struct bfs_parser *parser, int arg1, int arg2) { + CFILE *cout = parser->ctx->cout; pid_t pager = -1; - if (state->stdout_tty) { + if (parser->stdout_tty) { cout = launch_pager(&pager, cout); } cfprintf(cout, "Usage: ${ex}%s${rs} [${cyn}flags${rs}...] [${mag}paths${rs}...] [${blu}expression${rs}...]\n\n", - state->command); + parser->command); cfprintf(cout, "${ex}%s${rs} is compatible with ${ex}find${rs}, with some extensions. " - "${cyn}Flags${rs} (${cyn}-H${rs}/${cyn}-L${rs}/${cyn}-P${rs} etc.), ${mag}paths${rs},\n" - "and ${blu}expressions${rs} may be freely mixed in any order.\n\n", - BFS_COMMAND); + "${cyn}Flags${rs} (${cyn}-H${rs}/${cyn}-L${rs}/${cyn}-P${rs} etc.), ${mag}paths${rs},\n" + "and ${blu}expressions${rs} may be freely mixed in any order.\n\n", + BFS_COMMAND); cfprintf(cout, "${bld}Flags:${rs}\n\n"); @@ -2807,14 +2777,15 @@ static struct bfs_expr *parse_help(struct parser_state *state, int arg1, int arg cfprintf(cout, " ${blu}-ignore_readdir_race${rs}\n"); cfprintf(cout, " ${blu}-noignore_readdir_race${rs}\n"); cfprintf(cout, " Whether to report an error if ${ex}%s${rs} detects that the file tree is modified\n", - BFS_COMMAND); + BFS_COMMAND); cfprintf(cout, " during the search (default: ${blu}-noignore_readdir_race${rs})\n"); cfprintf(cout, " ${blu}-maxdepth${rs} ${bld}N${rs}\n"); cfprintf(cout, " ${blu}-mindepth${rs} ${bld}N${rs}\n"); cfprintf(cout, " Ignore files deeper/shallower than ${bld}N${rs}\n"); cfprintf(cout, " ${blu}-mount${rs}\n"); - cfprintf(cout, " Don't descend into other mount points (same as ${blu}-xdev${rs} for now, but will\n"); - cfprintf(cout, " skip mount points entirely in the future)\n"); + cfprintf(cout, " Exclude mount points entirely from the results\n"); + cfprintf(cout, " ${blu}-noerror${rs}\n"); + cfprintf(cout, " Ignore any errors that occur during traversal\n"); cfprintf(cout, " ${blu}-nohidden${rs}\n"); cfprintf(cout, " Exclude hidden files\n"); cfprintf(cout, " ${blu}-noleaf${rs}\n"); @@ -2850,6 +2821,10 @@ static struct bfs_expr *parse_help(struct parser_state *state, int arg1, int arg cfprintf(cout, " ${blu}-capable${rs}\n"); cfprintf(cout, " Find files with POSIX.1e capabilities set\n"); #endif +#if BFS_CAN_CHECK_CONTEXT + cfprintf(cout, " ${blu}-context${rs} ${bld}GLOB${rs}\n"); + cfprintf(cout, " Find files with SELinux context matching a glob pattern\n"); +#endif cfprintf(cout, " ${blu}-depth${rs} ${bld}[-+]N${rs}\n"); cfprintf(cout, " Find files with depth ${bld}N${rs}\n"); cfprintf(cout, " ${blu}-empty${rs}\n"); @@ -2952,6 +2927,8 @@ static struct bfs_expr *parse_help(struct parser_state *state, int arg1, int arg cfprintf(cout, " ${blu}-fprintf${rs} ${bld}FILE${rs} ${bld}FORMAT${rs}\n"); cfprintf(cout, " Like ${blu}-ls${rs}/${blu}-print${rs}/${blu}-print0${rs}/${blu}-printf${rs}, but write to ${bld}FILE${rs} instead of standard\n" " output\n"); + cfprintf(cout, " ${blu}-limit${rs} ${bld}N${rs}\n"); + cfprintf(cout, " Quit after this action is evaluated ${bld}N${rs} times\n"); cfprintf(cout, " ${blu}-ls${rs}\n"); cfprintf(cout, " List files like ${ex}ls${rs} ${bld}-dils${rs}\n"); cfprintf(cout, " ${blu}-print${rs}\n"); @@ -2978,161 +2955,207 @@ static struct bfs_expr *parse_help(struct parser_state *state, int arg1, int arg if (pager > 0) { cfclose(cout); - waitpid(pager, NULL, 0); + xwaitpid(pager, NULL, 0); } - state->just_info = true; + parser->just_info = true; return NULL; } +/** Print the bfs "logo". */ +static void print_logo(CFILE *cout) { + if (!cout->colors) { + goto boring; + } + + size_t vwidth = xstrwidth(bfs_version); + dchar *spaces = dstrepeat(" ", vwidth); + dchar *lines = dstrepeat("─", vwidth); + if (!spaces || !lines) { + dstrfree(lines); + dstrfree(spaces); + goto boring; + } + + // We do ----\r<emoji> rather than <emoji>--- so we don't have to assume + // anything about the width of the emoji + cfprintf(cout, "╭─────%s╮\r📂\n", lines); + cfprintf(cout, "├${ex}b${rs} %s │\n", spaces); + cfprintf(cout, "╰├${ex}f${rs} ${bld}%s${rs} │\n", bfs_version); + cfprintf(cout, " ╰├${ex}s${rs} %s │\n", spaces); + cfprintf(cout, " ╰──%s─╯\n\n", lines); + + dstrfree(lines); + dstrfree(spaces); + return; + +boring: + printf("%s %s\n\n", BFS_COMMAND, bfs_version); +} + /** * "Parse" -version. */ -static struct bfs_expr *parse_version(struct parser_state *state, int arg1, int arg2) { - cfprintf(state->ctx->cout, "${ex}%s${rs} ${bld}%s${rs}\n\n", BFS_COMMAND, BFS_VERSION); +static struct bfs_expr *parse_version(struct bfs_parser *parser, int arg1, int arg2) { + print_logo(parser->ctx->cout); - printf("%s\n", BFS_HOMEPAGE); + printf("Copyright © Tavian Barnes and the bfs contributors\n"); + printf("No rights reserved (https://opensource.org/license/0BSD)\n\n"); - state->just_info = true; + printf("CONFFLAGS := %s\n", bfs_confflags); + printf("CC := %s\n", bfs_cc); + printf("CPPFLAGS := %s\n", bfs_cppflags); + printf("CFLAGS := %s\n", bfs_cflags); + printf("LDFLAGS := %s\n", bfs_ldflags); + printf("LDLIBS := %s\n", bfs_ldlibs); + + printf("\n%s\n", BFS_HOMEPAGE); + + parser->just_info = true; return NULL; } -typedef struct bfs_expr *parse_fn(struct parser_state *state, int arg1, int arg2); +/** Parser callback function type. */ +typedef struct bfs_expr *parse_fn(struct bfs_parser *parser, int arg1, int arg2); /** * An entry in the parse table for primary expressions. */ struct table_entry { char *arg; - enum token_type type; + enum bfs_kind kind; parse_fn *parse; int arg1; int arg2; bool prefix; + bool needs_arg; }; /** * The parse table for primary expressions. */ static const struct table_entry parse_table[] = { - {"--", T_FLAG}, - {"--help", T_ACTION, parse_help}, - {"--version", T_ACTION, parse_version}, - {"-Bmin", T_TEST, parse_min, BFS_STAT_BTIME}, - {"-Bnewer", T_TEST, parse_newer, BFS_STAT_BTIME}, - {"-Bsince", T_TEST, parse_since, BFS_STAT_BTIME}, - {"-Btime", T_TEST, parse_time, BFS_STAT_BTIME}, - {"-D", T_FLAG, parse_debug}, - {"-E", T_FLAG, parse_regex_extended}, - {"-H", T_FLAG, parse_follow, BFTW_FOLLOW_ROOTS, false}, - {"-L", T_FLAG, parse_follow, BFTW_FOLLOW_ALL, false}, - {"-O", T_FLAG, parse_optlevel, 0, 0, true}, - {"-P", T_FLAG, parse_follow, 0, false}, - {"-S", T_FLAG, parse_search_strategy}, - {"-X", T_FLAG, parse_xargs_safe}, - {"-a", T_OPERATOR}, - {"-acl", T_TEST, parse_acl}, - {"-amin", T_TEST, parse_min, BFS_STAT_ATIME}, - {"-and", T_OPERATOR}, - {"-anewer", T_TEST, parse_newer, BFS_STAT_ATIME}, - {"-asince", T_TEST, parse_since, BFS_STAT_ATIME}, - {"-atime", T_TEST, parse_time, BFS_STAT_ATIME}, - {"-capable", T_TEST, parse_capable}, - {"-cmin", T_TEST, parse_min, BFS_STAT_CTIME}, - {"-cnewer", T_TEST, parse_newer, BFS_STAT_CTIME}, - {"-color", T_OPTION, parse_color, true}, - {"-csince", T_TEST, parse_since, BFS_STAT_CTIME}, - {"-ctime", T_TEST, parse_time, BFS_STAT_CTIME}, - {"-d", T_FLAG, parse_depth}, - {"-daystart", T_OPTION, parse_daystart}, - {"-delete", T_ACTION, parse_delete}, - {"-depth", T_OPTION, parse_depth_n}, - {"-empty", T_TEST, parse_empty}, - {"-exclude", T_OPERATOR}, - {"-exec", T_ACTION, parse_exec, 0}, - {"-execdir", T_ACTION, parse_exec, BFS_EXEC_CHDIR}, - {"-executable", T_TEST, parse_access, X_OK}, - {"-exit", T_ACTION, parse_exit}, - {"-f", T_FLAG, parse_f}, - {"-false", T_TEST, parse_const, false}, - {"-files0-from", T_OPTION, parse_files0_from}, - {"-flags", T_TEST, parse_flags}, - {"-fls", T_ACTION, parse_fls}, - {"-follow", T_OPTION, parse_follow, BFTW_FOLLOW_ALL, true}, - {"-fprint", T_ACTION, parse_fprint}, - {"-fprint0", T_ACTION, parse_fprint0}, - {"-fprintf", T_ACTION, parse_fprintf}, - {"-fstype", T_TEST, parse_fstype}, - {"-gid", T_TEST, parse_group}, - {"-group", T_TEST, parse_group}, - {"-help", T_ACTION, parse_help}, - {"-hidden", T_TEST, parse_hidden}, - {"-ignore_readdir_race", T_OPTION, parse_ignore_races, true}, - {"-ilname", T_TEST, parse_lname, true}, - {"-iname", T_TEST, parse_name, true}, - {"-inum", T_TEST, parse_inum}, - {"-ipath", T_TEST, parse_path, true}, - {"-iregex", T_TEST, parse_regex, BFS_REGEX_ICASE}, - {"-iwholename", T_TEST, parse_path, true}, - {"-j", T_FLAG, parse_jobs, 0, 0, true}, - {"-links", T_TEST, parse_links}, - {"-lname", T_TEST, parse_lname, false}, - {"-ls", T_ACTION, parse_ls}, - {"-maxdepth", T_OPTION, parse_depth_limit, false}, - {"-mindepth", T_OPTION, parse_depth_limit, true}, - {"-mmin", T_TEST, parse_min, BFS_STAT_MTIME}, - {"-mnewer", T_TEST, parse_newer, BFS_STAT_MTIME}, - {"-mount", T_OPTION, parse_mount}, - {"-msince", T_TEST, parse_since, BFS_STAT_MTIME}, - {"-mtime", T_TEST, parse_time, BFS_STAT_MTIME}, - {"-name", T_TEST, parse_name, false}, - {"-newer", T_TEST, parse_newer, BFS_STAT_MTIME}, - {"-newer", T_TEST, parse_newerxy, 0, 0, true}, - {"-nocolor", T_OPTION, parse_color, false}, - {"-nogroup", T_TEST, parse_nogroup}, - {"-nohidden", T_TEST, parse_nohidden}, - {"-noignore_readdir_race", T_OPTION, parse_ignore_races, false}, - {"-noleaf", T_OPTION, parse_noleaf}, - {"-not", T_OPERATOR}, - {"-nouser", T_TEST, parse_nouser}, - {"-nowarn", T_OPTION, parse_warn, false}, - {"-o", T_OPERATOR}, - {"-ok", T_ACTION, parse_exec, BFS_EXEC_CONFIRM}, - {"-okdir", T_ACTION, parse_exec, BFS_EXEC_CONFIRM | BFS_EXEC_CHDIR}, - {"-or", T_OPERATOR}, - {"-path", T_TEST, parse_path, false}, - {"-perm", T_TEST, parse_perm}, - {"-print", T_ACTION, parse_print}, - {"-print0", T_ACTION, parse_print0}, - {"-printf", T_ACTION, parse_printf}, - {"-printx", T_ACTION, parse_printx}, - {"-prune", T_ACTION, parse_prune}, - {"-quit", T_ACTION, parse_quit}, - {"-readable", T_TEST, parse_access, R_OK}, - {"-regex", T_TEST, parse_regex, 0}, - {"-regextype", T_OPTION, parse_regextype}, - {"-rm", T_ACTION, parse_delete}, - {"-s", T_FLAG, parse_s}, - {"-samefile", T_TEST, parse_samefile}, - {"-since", T_TEST, parse_since, BFS_STAT_MTIME}, - {"-size", T_TEST, parse_size}, - {"-sparse", T_TEST, parse_sparse}, - {"-status", T_OPTION, parse_status}, - {"-true", T_TEST, parse_const, true}, - {"-type", T_TEST, parse_type, false}, - {"-uid", T_TEST, parse_user}, - {"-unique", T_OPTION, parse_unique}, - {"-used", T_TEST, parse_used}, - {"-user", T_TEST, parse_user}, - {"-version", T_ACTION, parse_version}, - {"-warn", T_OPTION, parse_warn, true}, - {"-wholename", T_TEST, parse_path, false}, - {"-writable", T_TEST, parse_access, W_OK}, - {"-x", T_FLAG, parse_xdev}, - {"-xattr", T_TEST, parse_xattr}, - {"-xattrname", T_TEST, parse_xattrname}, - {"-xdev", T_OPTION, parse_xdev}, - {"-xtype", T_TEST, parse_type, true}, + {"--", BFS_FLAG}, + {"--help", BFS_ACTION, parse_help}, + {"--version", BFS_ACTION, parse_version}, + {"-Bmin", BFS_TEST, parse_min, BFS_STAT_BTIME}, + {"-Bnewer", BFS_TEST, parse_newer, BFS_STAT_BTIME}, + {"-Bsince", BFS_TEST, parse_since, BFS_STAT_BTIME}, + {"-Btime", BFS_TEST, parse_time, BFS_STAT_BTIME}, + {"-D", BFS_FLAG, parse_debug, .prefix = true}, + {"-E", BFS_FLAG, parse_regex_extended}, + {"-H", BFS_FLAG, parse_follow, BFTW_FOLLOW_ROOTS, false}, + {"-L", BFS_FLAG, parse_follow, BFTW_FOLLOW_ALL, false}, + {"-O", BFS_FLAG, parse_optlevel, .prefix = true}, + {"-P", BFS_FLAG, parse_follow, 0, false}, + {"-S", BFS_FLAG, parse_search_strategy, .prefix = true}, + {"-X", BFS_FLAG, parse_xargs_safe}, + {"-a", BFS_OPERATOR}, + {"-acl", BFS_TEST, parse_acl}, + {"-amin", BFS_TEST, parse_min, BFS_STAT_ATIME}, + {"-and", BFS_OPERATOR}, + {"-anewer", BFS_TEST, parse_newer, BFS_STAT_ATIME}, + {"-asince", BFS_TEST, parse_since, BFS_STAT_ATIME}, + {"-atime", BFS_TEST, parse_time, BFS_STAT_ATIME}, + {"-capable", BFS_TEST, parse_capable}, + {"-cmin", BFS_TEST, parse_min, BFS_STAT_CTIME}, + {"-cnewer", BFS_TEST, parse_newer, BFS_STAT_CTIME}, + {"-color", BFS_OPTION, parse_color, true}, + {"-context", BFS_TEST, parse_context, true}, + {"-csince", BFS_TEST, parse_since, BFS_STAT_CTIME}, + {"-ctime", BFS_TEST, parse_time, BFS_STAT_CTIME}, + {"-d", BFS_FLAG, parse_depth, true}, + {"-daystart", BFS_OPTION, parse_daystart}, + {"-delete", BFS_ACTION, parse_delete}, + {"-depth", BFS_OPTION, parse_depth_n, false}, + {"-empty", BFS_TEST, parse_empty}, + {"-exclude", BFS_OPERATOR}, + {"-exec", BFS_ACTION, parse_exec, 0}, + {"-execdir", BFS_ACTION, parse_exec, BFS_EXEC_CHDIR}, + {"-executable", BFS_TEST, parse_access, X_OK}, + {"-exit", BFS_ACTION, parse_exit}, + {"-f", BFS_FLAG, parse_f, .needs_arg = true}, + {"-false", BFS_TEST, parse_const, false}, + {"-files0-from", BFS_OPTION, parse_files0_from}, + {"-flags", BFS_TEST, parse_flags}, + {"-fls", BFS_ACTION, parse_fls}, + {"-follow", BFS_OPTION, parse_follow, BFTW_FOLLOW_ALL, true}, + {"-fprint", BFS_ACTION, parse_fprint}, + {"-fprint0", BFS_ACTION, parse_fprint0}, + {"-fprintf", BFS_ACTION, parse_fprintf}, + {"-fstype", BFS_TEST, parse_fstype}, + {"-gid", BFS_TEST, parse_group}, + {"-group", BFS_TEST, parse_group}, + {"-help", BFS_ACTION, parse_help}, + {"-hidden", BFS_TEST, parse_hidden}, + {"-ignore_readdir_race", BFS_OPTION, parse_ignore_races, true}, + {"-ilname", BFS_TEST, parse_lname, true}, + {"-iname", BFS_TEST, parse_name, true}, + {"-inum", BFS_TEST, parse_inum}, + {"-ipath", BFS_TEST, parse_path, true}, + {"-iregex", BFS_TEST, parse_regex, BFS_REGEX_ICASE}, + {"-iwholename", BFS_TEST, parse_path, true}, + {"-j", BFS_FLAG, parse_jobs, .prefix = true}, + {"-limit", BFS_ACTION, parse_limit}, + {"-links", BFS_TEST, parse_links}, + {"-lname", BFS_TEST, parse_lname, false}, + {"-ls", BFS_ACTION, parse_ls}, + {"-maxdepth", BFS_OPTION, parse_depth_limit, false}, + {"-mindepth", BFS_OPTION, parse_depth_limit, true}, + {"-mmin", BFS_TEST, parse_min, BFS_STAT_MTIME}, + {"-mnewer", BFS_TEST, parse_newer, BFS_STAT_MTIME}, + {"-mount", BFS_OPTION, parse_mount}, + {"-msince", BFS_TEST, parse_since, BFS_STAT_MTIME}, + {"-mtime", BFS_TEST, parse_time, BFS_STAT_MTIME}, + {"-name", BFS_TEST, parse_name, false}, + {"-newer", BFS_TEST, parse_newer, BFS_STAT_MTIME}, + {"-newer", BFS_TEST, parse_newerxy, .prefix = true}, + {"-nocolor", BFS_OPTION, parse_color, false}, + {"-noerror", BFS_OPTION, parse_noerror}, + {"-nogroup", BFS_TEST, parse_nogroup}, + {"-nohidden", BFS_TEST, parse_nohidden}, + {"-noignore_readdir_race", BFS_OPTION, parse_ignore_races, false}, + {"-noleaf", BFS_OPTION, parse_noleaf}, + {"-not", BFS_OPERATOR}, + {"-nouser", BFS_TEST, parse_nouser}, + {"-nowarn", BFS_OPTION, parse_warn, false}, + {"-o", BFS_OPERATOR}, + {"-ok", BFS_ACTION, parse_exec, BFS_EXEC_CONFIRM}, + {"-okdir", BFS_ACTION, parse_exec, BFS_EXEC_CONFIRM | BFS_EXEC_CHDIR}, + {"-or", BFS_OPERATOR}, + {"-path", BFS_TEST, parse_path, false}, + {"-perm", BFS_TEST, parse_perm}, + {"-print", BFS_ACTION, parse_print}, + {"-print0", BFS_ACTION, parse_print0}, + {"-printf", BFS_ACTION, parse_printf}, + {"-printx", BFS_ACTION, parse_printx}, + {"-prune", BFS_ACTION, parse_prune}, + {"-quit", BFS_ACTION, parse_quit}, + {"-readable", BFS_TEST, parse_access, R_OK}, + {"-regex", BFS_TEST, parse_regex, 0}, + {"-regextype", BFS_OPTION, parse_regextype}, + {"-rm", BFS_ACTION, parse_delete}, + {"-s", BFS_FLAG, parse_s}, + {"-samefile", BFS_TEST, parse_samefile}, + {"-since", BFS_TEST, parse_since, BFS_STAT_MTIME}, + {"-size", BFS_TEST, parse_size}, + {"-sparse", BFS_TEST, parse_sparse}, + {"-status", BFS_OPTION, parse_status}, + {"-true", BFS_TEST, parse_const, true}, + {"-type", BFS_TEST, parse_type, false}, + {"-uid", BFS_TEST, parse_user}, + {"-unique", BFS_OPTION, parse_unique}, + {"-used", BFS_TEST, parse_used}, + {"-user", BFS_TEST, parse_user}, + {"-version", BFS_ACTION, parse_version}, + {"-warn", BFS_OPTION, parse_warn, true}, + {"-wholename", BFS_TEST, parse_path, false}, + {"-writable", BFS_TEST, parse_access, W_OK}, + {"-x", BFS_FLAG, parse_xdev}, + {"-xattr", BFS_TEST, parse_xattr}, + {"-xattrname", BFS_TEST, parse_xattrname}, + {"-xdev", BFS_OPTION, parse_xdev}, + {"-xtype", BFS_TEST, parse_type, true}, {0}, }; @@ -3153,6 +3176,83 @@ static const struct table_entry *table_lookup(const char *arg) { return NULL; } +/** Look up a single-character flag in the parse table. */ +static const struct table_entry *flag_lookup(char flag) { + for (const struct table_entry *entry = parse_table; entry->arg; ++entry) { + enum bfs_kind kind = entry->kind; + if (kind == BFS_FLAG && entry->arg[1] == flag && !entry->arg[2]) { + return entry; + } + } + + return NULL; +} + +/** Check for a multi-flag argument like -LEXO2. */ +static bool is_flag_group(const char *arg) { + // We enforce that at least one flag in a flag group must be a capital + // letter, to avoid ambiguity with primary expressions + bool has_upper = false; + + // Flags that take an argument must appear last + bool needs_arg = false; + + for (size_t i = 1; arg[i]; ++i) { + char c = arg[i]; + if (c >= 'A' && c <= 'Z') { + has_upper = true; + } + + if (needs_arg) { + return false; + } + + const struct table_entry *entry = flag_lookup(c); + if (!entry || !entry->parse) { + return false; + } + + if (entry->prefix) { + // The rest is the flag's argument + break; + } + + needs_arg |= entry->needs_arg; + } + + return has_upper; +} + +/** Parse a multi-flag argument. */ +static struct bfs_expr *parse_flag_group(struct bfs_parser *parser) { + struct bfs_expr *expr = NULL; + + char **start = parser->argv; + char **end = start; + const char *arg = start[0]; + + for (size_t i = 1; arg[i]; ++i) { + parser->argv = start; + + const struct table_entry *entry = flag_lookup(arg[i]); + expr = entry->parse(parser, entry->arg1, entry->arg2); + + if (parser->argv > end) { + end = parser->argv; + } + + if (!expr || entry->prefix) { + break; + } + } + + if (expr) { + bfs_assert(parser->argv == end, "Didn't eat enough tokens"); + } + + return expr; +} + /** Search for a fuzzy match in the parse table. */ static const struct table_entry *table_lookup_fuzzy(const char *arg) { const struct table_entry *best = NULL; @@ -3174,9 +3274,11 @@ static const struct table_entry *table_lookup_fuzzy(const char *arg) { * | TEST * | ACTION */ -static struct bfs_expr *parse_primary(struct parser_state *state) { +static struct bfs_expr *parse_primary(struct bfs_parser *parser) { + struct bfs_ctx *ctx = parser->ctx; + // Paths are already skipped at this point - const char *arg = state->argv[0]; + const char *arg = parser->argv[0]; if (arg[0] != '-') { goto unexpected; @@ -3191,15 +3293,19 @@ static struct bfs_expr *parse_primary(struct parser_state *state) { } } + if (is_flag_group(arg)) { + return parse_flag_group(parser); + } + match = table_lookup_fuzzy(arg); - CFILE *cerr = state->ctx->cerr; - parse_error(state, "Unknown argument; did you mean "); - switch (match->type) { - case T_FLAG: + CFILE *cerr = ctx->cerr; + parse_error(parser, "Unknown argument; did you mean "); + switch (match->kind) { + case BFS_FLAG: cfprintf(cerr, "${cyn}%s${rs}?", match->arg); break; - case T_OPERATOR: + case BFS_OPERATOR: cfprintf(cerr, "${red}%s${rs}?", match->arg); break; default: @@ -3207,7 +3313,7 @@ static struct bfs_expr *parse_primary(struct parser_state *state) { break; } - if (!state->interactive || !match->parse) { + if (!ctx->interactive || !match->parse) { fprintf(stderr, "\n"); goto unmatched; } @@ -3218,16 +3324,16 @@ static struct bfs_expr *parse_primary(struct parser_state *state) { } fprintf(stderr, "\n"); - state->argv[0] = match->arg; + parser->argv[0] = match->arg; matched: - return match->parse(state, match->arg1, match->arg2); + return match->parse(parser, match->arg1, match->arg2); unmatched: return NULL; unexpected: - parse_error(state, "Expected a predicate.\n"); + parse_error(parser, "Expected a predicate.\n"); return NULL; } @@ -3237,71 +3343,66 @@ unexpected: * | "-exclude" FACTOR * | PRIMARY */ -static struct bfs_expr *parse_factor(struct parser_state *state) { - if (skip_paths(state) != 0) { +static struct bfs_expr *parse_factor(struct bfs_parser *parser) { + if (skip_paths(parser) != 0) { return NULL; } - const char *arg = state->argv[0]; + const char *arg = parser->argv[0]; if (!arg) { - parse_argv_error(state, state->last_arg, 1, "Expression terminated prematurely here.\n"); + parse_argv_error(parser, parser->last_arg, 1, "Expression terminated prematurely here.\n"); return NULL; } if (strcmp(arg, "(") == 0) { - parser_advance(state, T_OPERATOR, 1); + parser_advance(parser, BFS_OPERATOR, 1); - struct bfs_expr *expr = parse_expr(state); + struct bfs_expr *expr = parse_expr(parser); if (!expr) { return NULL; } - if (skip_paths(state) != 0) { - bfs_expr_free(expr); + if (skip_paths(parser) != 0) { return NULL; } - arg = state->argv[0]; + arg = parser->argv[0]; if (!arg || strcmp(arg, ")") != 0) { - parse_argv_error(state, state->last_arg, 1, "Expected a ${red})${rs}.\n"); - bfs_expr_free(expr); + parse_argv_error(parser, parser->last_arg, 1, "Expected a ${red})${rs}.\n"); return NULL; } - parser_advance(state, T_OPERATOR, 1); + parser_advance(parser, BFS_OPERATOR, 1); return expr; } else if (strcmp(arg, "-exclude") == 0) { - if (state->excluding) { - parse_error(state, "${err}%s${rs} is not supported within ${red}-exclude${rs}.\n", arg); + if (parser->excluding) { + parse_error(parser, "${err}%s${rs} is not supported within ${red}-exclude${rs}.\n", arg); return NULL; } - char **argv = parser_advance(state, T_OPERATOR, 1); - state->excluding = true; + char **argv = parser_advance(parser, BFS_OPERATOR, 1); + parser->excluding = true; - struct bfs_expr *factor = parse_factor(state); + struct bfs_expr *factor = parse_factor(parser); if (!factor) { return NULL; } - state->excluding = false; + parser->excluding = false; - if (parse_exclude(state, factor) != 0) { - return NULL; - } - - return bfs_expr_new(eval_true, state->argv - argv, argv); + bfs_expr_append(parser->ctx->exclude, factor); + return parse_new_expr(parser, eval_true, parser->argv - argv, argv, BFS_OPERATOR); } else if (strcmp(arg, "!") == 0 || strcmp(arg, "-not") == 0) { - char **argv = parser_advance(state, T_OPERATOR, 1); + char **argv = parser_advance(parser, BFS_OPERATOR, 1); - struct bfs_expr *factor = parse_factor(state); + struct bfs_expr *factor = parse_factor(parser); if (!factor) { return NULL; } - return new_unary_expr(eval_not, factor, argv); + return new_unary_expr(parser, eval_not, factor, argv); } else { - return parse_primary(state); + return parse_primary(parser); } } @@ -3311,16 +3412,15 @@ static struct bfs_expr *parse_factor(struct parser_state *state) { * | TERM "-a" FACTOR * | TERM "-and" FACTOR */ -static struct bfs_expr *parse_term(struct parser_state *state) { - struct bfs_expr *term = parse_factor(state); +static struct bfs_expr *parse_term(struct bfs_parser *parser) { + struct bfs_expr *term = parse_factor(parser); while (term) { - if (skip_paths(state) != 0) { - bfs_expr_free(term); + if (skip_paths(parser) != 0) { return NULL; } - const char *arg = state->argv[0]; + const char *arg = parser->argv[0]; if (!arg) { break; } @@ -3333,17 +3433,16 @@ static struct bfs_expr *parse_term(struct parser_state *state) { char **argv = &fake_and_arg; if (strcmp(arg, "-a") == 0 || strcmp(arg, "-and") == 0) { - argv = parser_advance(state, T_OPERATOR, 1); + argv = parser_advance(parser, BFS_OPERATOR, 1); } struct bfs_expr *lhs = term; - struct bfs_expr *rhs = parse_factor(state); + struct bfs_expr *rhs = parse_factor(parser); if (!rhs) { - bfs_expr_free(lhs); return NULL; } - term = new_binary_expr(eval_and, lhs, rhs, argv); + term = new_binary_expr(parser, eval_and, lhs, rhs, argv); } return term; @@ -3354,16 +3453,15 @@ static struct bfs_expr *parse_term(struct parser_state *state) { * | CLAUSE "-o" TERM * | CLAUSE "-or" TERM */ -static struct bfs_expr *parse_clause(struct parser_state *state) { - struct bfs_expr *clause = parse_term(state); +static struct bfs_expr *parse_clause(struct bfs_parser *parser) { + struct bfs_expr *clause = parse_term(parser); while (clause) { - if (skip_paths(state) != 0) { - bfs_expr_free(clause); + if (skip_paths(parser) != 0) { return NULL; } - const char *arg = state->argv[0]; + const char *arg = parser->argv[0]; if (!arg) { break; } @@ -3372,16 +3470,15 @@ static struct bfs_expr *parse_clause(struct parser_state *state) { break; } - char **argv = parser_advance(state, T_OPERATOR, 1); + char **argv = parser_advance(parser, BFS_OPERATOR, 1); struct bfs_expr *lhs = clause; - struct bfs_expr *rhs = parse_term(state); + struct bfs_expr *rhs = parse_term(parser); if (!rhs) { - bfs_expr_free(lhs); return NULL; } - clause = new_binary_expr(eval_or, lhs, rhs, argv); + clause = new_binary_expr(parser, eval_or, lhs, rhs, argv); } return clause; @@ -3391,16 +3488,15 @@ static struct bfs_expr *parse_clause(struct parser_state *state) { * EXPR : CLAUSE * | EXPR "," CLAUSE */ -static struct bfs_expr *parse_expr(struct parser_state *state) { - struct bfs_expr *expr = parse_clause(state); +static struct bfs_expr *parse_expr(struct bfs_parser *parser) { + struct bfs_expr *expr = parse_clause(parser); while (expr) { - if (skip_paths(state) != 0) { - bfs_expr_free(expr); + if (skip_paths(parser) != 0) { return NULL; } - const char *arg = state->argv[0]; + const char *arg = parser->argv[0]; if (!arg) { break; } @@ -3409,90 +3505,165 @@ static struct bfs_expr *parse_expr(struct parser_state *state) { break; } - char **argv = parser_advance(state, T_OPERATOR, 1); + char **argv = parser_advance(parser, BFS_OPERATOR, 1); struct bfs_expr *lhs = expr; - struct bfs_expr *rhs = parse_clause(state); + struct bfs_expr *rhs = parse_clause(parser); if (!rhs) { - bfs_expr_free(lhs); return NULL; } - expr = new_binary_expr(eval_comma, lhs, rhs, argv); + expr = new_binary_expr(parser, eval_comma, lhs, rhs, argv); } return expr; } +/** Handle -files0-from after parsing. */ +static int parse_files0_roots(struct bfs_parser *parser) { + const struct bfs_ctx *ctx = parser->ctx; + const struct bfs_expr *expr = parser->files0_expr; + + if (ctx->npaths > 0) { + bool highlight[ctx->argc]; + init_highlight(ctx, highlight); + highlight_args(ctx, expr->argv, expr->argc, highlight); + + for (size_t i = 0; i < ctx->argc; ++i) { + if (ctx->kinds[i] == BFS_PATH) { + highlight[i] = true; + } + } + + bfs_argv_error(ctx, highlight); + bfs_error(ctx, "Cannot combine %pX with explicit root paths.\n", expr); + return -1; + } + + const char *from = expr->argv[1]; + + FILE *file; + if (strcmp(from, "-") == 0) { + if (!consume_stdin(parser, expr)) { + return -1; + } + file = stdin; + } else { + file = xfopen(from, O_RDONLY | O_CLOEXEC); + } + if (!file) { + parse_expr_error(parser, expr, "%s.\n", errstr()); + return -1; + } + + while (true) { + char *path = xgetdelim(file, '\0'); + if (!path) { + if (errno) { + goto fail; + } else { + break; + } + } + + int ret = parse_root(parser, path); + free(path); + if (ret != 0) { + goto fail; + } + } + + if (file != stdin) { + fclose(file); + } + + return 0; + +fail: + if (file != stdin) { + fclose(file); + } + return -1; +} + /** * Parse the top-level expression. */ -static struct bfs_expr *parse_whole_expr(struct parser_state *state) { - if (skip_paths(state) != 0) { +static struct bfs_expr *parse_whole_expr(struct bfs_parser *parser) { + struct bfs_ctx *ctx = parser->ctx; + + if (skip_paths(parser) != 0) { return NULL; } struct bfs_expr *expr; - if (state->argv[0]) { - expr = parse_expr(state); + if (parser->argv[0]) { + expr = parse_expr(parser); } else { - expr = bfs_expr_new(eval_true, 1, &fake_true_arg); + expr = parse_new_expr(parser, eval_true, 1, &fake_true_arg, BFS_TEST); } if (!expr) { return NULL; } - if (state->argv[0]) { - parse_error(state, "Unexpected argument.\n"); - goto fail; + if (parser->argv[0]) { + parse_error(parser, "Unexpected argument.\n"); + return NULL; } - if (state->implicit_print) { - struct bfs_expr *print = bfs_expr_new(eval_fprint, 1, &fake_print_arg); + if (parser->files0_expr) { + if (parse_files0_roots(parser) != 0) { + return NULL; + } + } else if (ctx->npaths == 0) { + if (parse_root(parser, ".") != 0) { + return NULL; + } + } + + if (parser->implicit_print) { + const struct bfs_expr *limit = parser->limit_expr; + if (limit) { + parse_expr_error(parser, limit, + "With %pX, you must specify an action explicitly; for example, ${blu}-print${rs} %px.\n", + limit, limit); + return NULL; + } + + struct bfs_expr *print = parse_new_expr(parser, eval_fprint, 1, &fake_print_arg, BFS_ACTION); if (!print) { - goto fail; + return NULL; } - init_print_expr(state, print); + init_print_expr(parser, print); - expr = new_binary_expr(eval_and, expr, print, &fake_and_arg); + expr = new_binary_expr(parser, eval_and, expr, print, &fake_and_arg); if (!expr) { - goto fail; + return NULL; } } - if (state->mount_arg && state->xdev_arg) { - parse_conflict_warning(state, state->mount_arg, 1, state->xdev_arg, 1, - "${blu}%s${rs} is redundant in the presence of ${blu}%s${rs}.\n\n", - state->xdev_arg[0], state->mount_arg[0]); + if (parser->mount_expr && parser->xdev_expr) { + parse_conflict_warning(parser, parser->mount_expr, parser->xdev_expr, + "%px is redundant in the presence of %px.\n\n", + parser->xdev_expr, parser->mount_expr); } - if (state->ctx->warn && state->depth_arg && state->prune_arg) { - parse_conflict_warning(state, state->depth_arg, 1, state->prune_arg, 1, - "${blu}%s${rs} does not work in the presence of ${blu}%s${rs}.\n", - state->prune_arg[0], state->depth_arg[0]); + if (ctx->warn && parser->depth_expr && parser->prune_expr) { + parse_conflict_warning(parser, parser->depth_expr, parser->prune_expr, + "%px does not work in the presence of %px.\n", + parser->prune_expr, parser->depth_expr); - if (state->interactive) { - bfs_warning(state->ctx, "Do you want to continue? "); - if (ynprompt() == 0) { - goto fail; + if (ctx->interactive) { + bfs_warning(ctx, "Do you want to continue? "); + if (ynprompt() <= 0) { + return NULL; } } fprintf(stderr, "\n"); } - if (state->ok_expr && state->files0_stdin_arg) { - parse_conflict_error(state, state->ok_expr->argv, state->ok_expr->argc, state->files0_stdin_arg, 2, - "${blu}%s${rs} conflicts with ${blu}%s${rs} ${bld}%s${rs}.\n", - state->ok_expr->argv[0], state->files0_stdin_arg[0], state->files0_stdin_arg[1]); - goto fail; - } - return expr; - -fail: - bfs_expr_free(expr); - return NULL; } static const char *bftw_strategy_name(enum bftw_strategy strategy) { @@ -3518,19 +3689,29 @@ static void dump_expr_multiline(const struct bfs_ctx *ctx, enum debug_flags flag cfprintf(ctx->cerr, " "); } + bool close = true; + if (bfs_expr_is_parent(expr)) { - cfprintf(ctx->cerr, "(${red}%s${rs}\n", expr->argv[0]); - if (expr->lhs) { - dump_expr_multiline(ctx, flag, expr->lhs, indent + 1, 0); + if (SLIST_EMPTY(&expr->children)) { + cfprintf(ctx->cerr, "(${red}%s${rs}", expr->argv[0]); + ++rparens; + } else { + cfprintf(ctx->cerr, "(${red}%s${rs}\n", expr->argv[0]); + for_expr (child, expr) { + int parens = child->next ? 0 : rparens + 1; + dump_expr_multiline(ctx, flag, child, indent + 1, parens); + } + close = false; } - dump_expr_multiline(ctx, flag, expr->rhs, indent + 1, rparens + 1); } else { if (flag == DEBUG_RATES) { cfprintf(ctx->cerr, "%pE", expr); } else { cfprintf(ctx->cerr, "%pe", expr); } + } + if (close) { for (int i = 0; i < rparens; ++i) { cfprintf(ctx->cerr, ")"); } @@ -3563,14 +3744,12 @@ void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag) { cfprintf(cerr, " ${cyn}-s${rs}"); } + cfprintf(cerr, " ${cyn}-j${bld}%d${rs}", ctx->threads); + if (ctx->optlevel != 3) { cfprintf(cerr, " ${cyn}-O${bld}%d${rs}", ctx->optlevel); } - if (ctx->threads > 0) { - cfprintf(cerr, " ${cyn}-j${bld}%d${rs}", ctx->threads); - } - cfprintf(cerr, " ${cyn}-S${rs} ${bld}%s${rs}", bftw_strategy_name(ctx->strategy)); enum debug_flags debug = ctx->debug; @@ -3589,7 +3768,7 @@ void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag) { } } - for (size_t i = 0; i < darray_length(ctx->paths); ++i) { + for (size_t i = 0; i < ctx->npaths; ++i) { const char *path = ctx->paths[i]; char c = path[0]; if (c == '-' || c == '(' || c == ')' || c == '!' || c == ',') { @@ -3630,10 +3809,8 @@ void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag) { fputs("\n", stderr); - if (ctx->exclude->eval_fn != eval_false) { - bfs_debug(ctx, flag, "(${red}-exclude${rs}\n"); - dump_expr_multiline(ctx, flag, ctx->exclude, 1, 1); - } + bfs_debug(ctx, flag, "(${red}-exclude${rs}\n"); + dump_expr_multiline(ctx, flag, ctx->exclude, 1, 1); dump_expr_multiline(ctx, flag, ctx->expr, 0, 0); } @@ -3644,17 +3821,17 @@ void bfs_ctx_dump(const struct bfs_ctx *ctx, enum debug_flags flag) { static void dump_costs(const struct bfs_ctx *ctx) { const struct bfs_expr *expr = ctx->expr; bfs_debug(ctx, DEBUG_COST, " Cost: ~${ylw}%g${rs}\n", expr->cost); - bfs_debug(ctx, DEBUG_COST, "Probability: ~${ylw}%g%%${rs}\n", 100.0*expr->probability); + bfs_debug(ctx, DEBUG_COST, "Probability: ~${ylw}%g%%${rs}\n", 100.0 * expr->probability); } struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) { struct bfs_ctx *ctx = bfs_ctx_new(); if (!ctx) { - perror("bfs_new_ctx()"); + perror("bfs_ctx_new()"); goto fail; } - static char* default_argv[] = {BFS_COMMAND, NULL}; + static char *default_argv[] = {BFS_COMMAND, NULL}; if (argc < 1) { argc = 1; argv = default_argv; @@ -3667,8 +3844,15 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) { goto fail; } + ctx->kinds = ZALLOC_ARRAY(enum bfs_kind, argc); + if (!ctx->kinds) { + perror("zalloc()"); + goto fail; + } + enum use_color use_color = COLOR_AUTO; - if (getenv("NO_COLOR")) { + const char *no_color = getenv("NO_COLOR"); + if (no_color && *no_color) { // https://no-color.org/ use_color = COLOR_NEVER; } @@ -3704,55 +3888,50 @@ struct bfs_ctx *bfs_parse_cmdline(int argc, char *argv[]) { } else { ctx->warn = stdin_tty; } + ctx->interactive = stdin_tty && stderr_tty; - struct parser_state state = { + struct bfs_parser parser = { .ctx = ctx, .argv = ctx->argv + 1, .command = ctx->argv[0], .regex_type = BFS_REGEX_POSIX_BASIC, .stdout_tty = stdout_tty, - .interactive = stdin_tty && stderr_tty, .use_color = use_color, .implicit_print = true, - .implicit_root = true, .just_info = false, .excluding = false, .last_arg = NULL, - .depth_arg = NULL, - .prune_arg = NULL, - .mount_arg = NULL, - .xdev_arg = NULL, - .files0_stdin_arg = NULL, - .ok_expr = NULL, + .depth_expr = NULL, + .prune_expr = NULL, + .mount_expr = NULL, + .xdev_expr = NULL, + .stdin_expr = NULL, .now = ctx->now, }; - ctx->exclude = bfs_expr_new(eval_false, 1, &fake_false_arg); + ctx->exclude = parse_new_expr(&parser, eval_or, 1, &fake_or_arg, BFS_OPERATOR); if (!ctx->exclude) { goto fail; } - ctx->expr = parse_whole_expr(&state); + ctx->expr = parse_whole_expr(&parser); if (!ctx->expr) { - if (state.just_info) { + if (parser.just_info) { goto done; } else { goto fail; } } - if (state.use_color == COLOR_AUTO && !ctx->colors) { - bfs_warning(ctx, "Error parsing $$LS_COLORS: %s.\n\n", strerror(ctx->colors_error)); + if (parser.use_color == COLOR_AUTO && !ctx->colors) { + bfs_warning(ctx, "Error parsing $$LS_COLORS: %s.\n\n", xstrerror(ctx->colors_error)); } if (bfs_optimize(ctx) != 0) { - goto fail; - } - - if (darray_length(ctx->paths) == 0 && state.implicit_root) { - if (parse_root(&state, ".") != 0) { - goto fail; + if (errno != 0) { + bfs_perror(ctx, "bfs_optimize()"); } + goto fail; } if ((ctx->flags & BFTW_FOLLOW_ALL) && !ctx->unique) { diff --git a/src/parse.h b/src/parse.h index 6895c9f..fcc8234 100644 --- a/src/parse.h +++ b/src/parse.h @@ -11,9 +11,9 @@ /** * Parse the command line. * - * @param argc + * @argc * The number of arguments. - * @param argv + * @argv * The arguments to parse. * @return * A new bfs context, or NULL on failure. diff --git a/src/prelude.h b/src/prelude.h new file mode 100644 index 0000000..de89a6c --- /dev/null +++ b/src/prelude.h @@ -0,0 +1,130 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Praeludium. + * + * This header is automatically included in every translation unit, before any + * other headers, so it can set feature test macros[1][2]. This sets up our own + * mini-dialect of C, which includes + * + * - Standard C17 and POSIX.1 2024 features + * - Portable and platform-specific extensions + * - Convenience macros like `bool`, `alignof`, etc. + * - Common compiler extensions like __has_include() + * + * Further bfs-specific utilities are defined in "bfs.h". + * + * [1]: https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html + * [2]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html + */ + +#ifndef BFS_PRELUDE_H +#define BFS_PRELUDE_H + +// Feature test macros + +/** + * Linux and BSD handle _POSIX_C_SOURCE differently: on Linux, it enables POSIX + * interfaces that are not visible by default. On BSD, it also *disables* most + * extensions, giving a strict POSIX environment. Since we want the extensions, + * we don't set _POSIX_C_SOURCE. + */ +// #define _POSIX_C_SOURCE 202405L + +/** openat() etc. */ +#define _ATFILE_SOURCE 1 + +/** BSD-derived extensions. */ +#define _BSD_SOURCE 1 + +/** glibc successor to _BSD_SOURCE. */ +#define _DEFAULT_SOURCE 1 + +/** GNU extensions. */ +#define _GNU_SOURCE 1 + +/** Use 64-bit off_t. */ +#define _FILE_OFFSET_BITS 64 + +/** Use 64-bit time_t. */ +#define _TIME_BITS 64 + +/** macOS extensions. */ +#if __APPLE__ +# define _DARWIN_C_SOURCE 1 +#endif + +/** Solaris extensions. */ +#if __sun +# define __EXTENSIONS__ 1 +// https://illumos.org/man/3C/getpwnam#standard-conforming +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif + +/** QNX extensions. */ +#if __QNX__ +# define _QNX_SOURCE 1 +#endif + +// Get the convenience macros that became standard spellings in C23 +#if __STDC_VERSION__ < 202311L + +/** _Static_assert() => static_assert() */ +#include <assert.h> +/** _Alignas(), _Alignof() => alignas(), alignof() */ +#include <stdalign.h> +/** _Bool => bool, true, false */ +#include <stdbool.h> + +/** + * C23 deprecates `noreturn void` in favour of `[[noreturn]] void`, so we expose + * _noreturn instead with the other attributes in "bfs.h". + */ +// #include <stdnoreturn.h> + +/** Part of <threads.h>, but we don't use anything else from it. */ +#define thread_local _Thread_local + +#endif // !C23 + +// Feature detection + +// https://clang.llvm.org/docs/LanguageExtensions.html#has-attribute +#ifndef __has_attribute +# define __has_attribute(attr) false +#endif + +// https://clang.llvm.org/docs/LanguageExtensions.html#has-builtin +#ifndef __has_builtin +# define __has_builtin(builtin) false +#endif + +// https://en.cppreference.com/w/c/language/attributes#Attribute_testing +#ifndef __has_c_attribute +# define __has_c_attribute(attr) false +#endif + +// https://clang.llvm.org/docs/LanguageExtensions.html#has-feature-and-has-extension +#ifndef __has_feature +# define __has_feature(feat) false +#endif + +// https://en.cppreference.com/w/c/preprocessor/include +#ifndef __has_include +# define __has_include(header) false +#endif + +// Sanitizer macros (GCC defines these but Clang does not) + +#if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__) +# define __SANITIZE_ADDRESS__ true +#endif +#if __has_feature(memory_sanitizer) && !defined(__SANITIZE_MEMORY__) +# define __SANITIZE_MEMORY__ true +#endif +#if __has_feature(thread_sanitizer) && !defined(__SANITIZE_THREAD__) +# define __SANITIZE_THREAD__ true +#endif + +#endif // BFS_PRELUDE_H diff --git a/src/printf.c b/src/printf.c index f0910fa..30ec201 100644 --- a/src/printf.c +++ b/src/printf.c @@ -2,43 +2,47 @@ // SPDX-License-Identifier: 0BSD #include "printf.h" + +#include "alloc.h" +#include "bfs.h" #include "bfstd.h" #include "bftw.h" #include "color.h" -#include "config.h" #include "ctx.h" -#include "darray.h" #include "diag.h" #include "dir.h" #include "dstring.h" #include "expr.h" +#include "fsade.h" #include "mtab.h" #include "pwcache.h" #include "stat.h" -#include "xtime.h" + #include <errno.h> #include <grp.h> #include <pwd.h> +#include <stdarg.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> +struct bfs_fmt; + /** * A function implementing a printf directive. */ -typedef int bfs_printf_fn(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf); +typedef int bfs_printf_fn(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf); /** - * A single printf directive like %f or %#4m. The whole format string is stored - * as a darray of these. + * A single formatting directive like %f or %#4m. */ -struct bfs_printf { +struct bfs_fmt { /** The printing function to invoke. */ bfs_printf_fn *fn; /** String data associated with this directive. */ - char *str; + dchar *str; /** The stat field to print. */ enum bfs_stat_field stat_field; /** Character data associated with this directive. */ @@ -47,10 +51,20 @@ struct bfs_printf { void *ptr; }; +/** + * An entire format string. + */ +struct bfs_printf { + /** An array of formatting directives. */ + struct bfs_fmt *fmts; + /** The number of directives. */ + size_t nfmts; +}; + /** Print some text as-is. */ -static int bfs_printf_literal(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { - size_t len = dstrlen(directive->str); - if (fwrite(directive->str, 1, len, cfile->file) == len) { +static int bfs_printf_literal(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + size_t len = dstrlen(fmt->str); + if (fwrite(fmt->str, 1, len, cfile->file) == len) { return 0; } else { return -1; @@ -58,13 +72,13 @@ static int bfs_printf_literal(CFILE *cfile, const struct bfs_printf *directive, } /** \c: flush */ -static int bfs_printf_flush(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_flush(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { return fflush(cfile->file); } /** Check if we can safely colorize this directive. */ -static bool should_color(CFILE *cfile, const struct bfs_printf *directive) { - return cfile->colors && strcmp(directive->str, "%s") == 0; +static bool should_color(CFILE *cfile, const struct bfs_fmt *fmt) { + return cfile->colors && strcmp(fmt->str, "%s") == 0; } /** @@ -76,23 +90,26 @@ static bool should_color(CFILE *cfile, const struct bfs_printf *directive) { bfs_assert(ret >= 0 && (size_t)ret < sizeof(buf)); \ (void)ret -/** - * Common entry point for fprintf() with a dynamic format string. - */ -static int dyn_fprintf(FILE *file, const struct bfs_printf *directive, ...) { - va_list args; - va_start(args, directive); - - BFS_SUPPRESS("-Wformat-nonliteral"); - int ret = vfprintf(file, directive->str, args); - BFS_UNSUPPRESS(); +/** Return a dynamic format string. */ +_format_arg(2) +static const char *dyn_fmt(const char *str, const char *fake) { + bfs_assert(strcmp(str + strlen(str) - strlen(fake) + 1, fake + 1) == 0, + "Mismatched format specifiers: '%s' vs. '%s'", str, fake); + return str; +} +/** Wrapper for fprintf(). */ +_printf(3, 4) +static int bfs_fprintf(CFILE *cfile, const struct bfs_fmt *fmt, const char *fake, ...) { + va_list args; + va_start(args, fake); + int ret = vfprintf(cfile->file, dyn_fmt(fmt->str, fake), args); va_end(args); return ret; } /** %a, %c, %t: ctime() */ -static int bfs_printf_ctime(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_ctime(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { // Not using ctime() itself because GNU find adds nanoseconds static const char *days[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; static const char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; @@ -102,69 +119,69 @@ static int bfs_printf_ctime(CFILE *cfile, const struct bfs_printf *directive, co return -1; } - const struct timespec *ts = bfs_stat_time(statbuf, directive->stat_field); + const struct timespec *ts = bfs_stat_time(statbuf, fmt->stat_field); if (!ts) { return -1; } struct tm tm; - if (xlocaltime(&ts->tv_sec, &tm) != 0) { + if (!localtime_r(&ts->tv_sec, &tm)) { return -1; } BFS_PRINTF_BUF(buf, "%s %s %2d %.2d:%.2d:%.2d.%09ld0 %4d", - days[tm.tm_wday], - months[tm.tm_mon], - tm.tm_mday, - tm.tm_hour, - tm.tm_min, - tm.tm_sec, - (long)ts->tv_nsec, - 1900 + tm.tm_year); + days[tm.tm_wday], + months[tm.tm_mon], + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)ts->tv_nsec, + 1900 + tm.tm_year); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %A, %B/%W, %C, %T: strftime() */ -static int bfs_printf_strftime(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_strftime(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } - const struct timespec *ts = bfs_stat_time(statbuf, directive->stat_field); + const struct timespec *ts = bfs_stat_time(statbuf, fmt->stat_field); if (!ts) { return -1; } struct tm tm; - if (xlocaltime(&ts->tv_sec, &tm) != 0) { + if (!localtime_r(&ts->tv_sec, &tm)) { return -1; } int ret; char buf[256]; char format[] = "% "; - switch (directive->c) { + switch (fmt->c) { // Non-POSIX strftime() features case '@': ret = snprintf(buf, sizeof(buf), "%lld.%09ld0", (long long)ts->tv_sec, (long)ts->tv_nsec); break; case '+': ret = snprintf(buf, sizeof(buf), "%4d-%.2d-%.2d+%.2d:%.2d:%.2d.%09ld0", - 1900 + tm.tm_year, - tm.tm_mon + 1, - tm.tm_mday, - tm.tm_hour, - tm.tm_min, - tm.tm_sec, - (long)ts->tv_nsec); + 1900 + tm.tm_year, + tm.tm_mon + 1, + tm.tm_mday, + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)ts->tv_nsec); break; case 'k': ret = snprintf(buf, sizeof(buf), "%2d", tm.tm_hour); break; case 'l': - ret = snprintf(buf, sizeof(buf), "%2d", (tm.tm_hour + 11)%12 + 1); + ret = snprintf(buf, sizeof(buf), "%2d", (tm.tm_hour + 11) % 12 + 1); break; case 's': ret = snprintf(buf, sizeof(buf), "%lld", (long long)ts->tv_sec); @@ -174,108 +191,113 @@ static int bfs_printf_strftime(CFILE *cfile, const struct bfs_printf *directive, break; case 'T': ret = snprintf(buf, sizeof(buf), "%.2d:%.2d:%.2d.%09ld0", - tm.tm_hour, - tm.tm_min, - tm.tm_sec, - (long)ts->tv_nsec); + tm.tm_hour, + tm.tm_min, + tm.tm_sec, + (long)ts->tv_nsec); break; // POSIX strftime() features default: - format[1] = directive->c; - BFS_SUPPRESS("-Wformat-nonliteral"); + format[1] = fmt->c; +#if __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif ret = strftime(buf, sizeof(buf), format, &tm); - BFS_UNSUPPRESS(); +#if __GNUC__ +# pragma GCC diagnostic pop +#endif break; } bfs_assert(ret >= 0 && (size_t)ret < sizeof(buf)); (void)ret; - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %b: blocks */ -static int bfs_printf_b(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_b(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } - uintmax_t blocks = ((uintmax_t)statbuf->blocks*BFS_STAT_BLKSIZE + 511)/512; + uintmax_t blocks = ((uintmax_t)statbuf->blocks * BFS_STAT_BLKSIZE + 511) / 512; BFS_PRINTF_BUF(buf, "%ju", blocks); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %d: depth */ -static int bfs_printf_d(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { - return dyn_fprintf(cfile->file, directive, (intmax_t)ftwbuf->depth); +static int bfs_printf_d(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + return bfs_fprintf(cfile, fmt, "%jd", (intmax_t)ftwbuf->depth); } /** %D: device */ -static int bfs_printf_D(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_D(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->dev); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %f: file name */ -static int bfs_printf_f(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { - if (should_color(cfile, directive)) { +static int bfs_printf_f(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + if (should_color(cfile, fmt)) { return cfprintf(cfile, "%pF", ftwbuf); } else { - return dyn_fprintf(cfile->file, directive, ftwbuf->path + ftwbuf->nameoff); + return bfs_fprintf(cfile, fmt, "%s", ftwbuf->path + ftwbuf->nameoff); } } /** %F: file system type */ -static int bfs_printf_F(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_F(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } - const char *type = bfs_fstype(directive->ptr, statbuf); + const char *type = bfs_fstype(fmt->ptr, statbuf); if (!type) { return -1; } - return dyn_fprintf(cfile->file, directive, type); + return bfs_fprintf(cfile, fmt, "%s", type); } /** %G: gid */ -static int bfs_printf_G(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_G(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->gid); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %g: group name */ -static int bfs_printf_g(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_g(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } - struct bfs_groups *groups = directive->ptr; + struct bfs_groups *groups = fmt->ptr; const struct group *grp = bfs_getgrgid(groups, statbuf->gid); if (!grp) { - return bfs_printf_G(cfile, directive, ftwbuf); + return bfs_printf_G(cfile, fmt, ftwbuf); } - return dyn_fprintf(cfile->file, directive, grp->gr_name); + return bfs_fprintf(cfile, fmt, "%s", grp->gr_name); } /** %h: leading directories */ -static int bfs_printf_h(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_h(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { char *copy = NULL; const char *buf; @@ -297,10 +319,10 @@ static int bfs_printf_h(CFILE *cfile, const struct bfs_printf *directive, const } int ret; - if (should_color(cfile, directive)) { + if (should_color(cfile, fmt)) { ret = cfprintf(cfile, "${di}%pQ${rs}", buf); } else { - ret = dyn_fprintf(cfile->file, directive, buf); + ret = bfs_fprintf(cfile, fmt, "%s", buf); } free(copy); @@ -308,48 +330,48 @@ static int bfs_printf_h(CFILE *cfile, const struct bfs_printf *directive, const } /** %H: current root */ -static int bfs_printf_H(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { - if (should_color(cfile, directive)) { +static int bfs_printf_H(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + if (should_color(cfile, fmt)) { if (ftwbuf->depth == 0) { return cfprintf(cfile, "%pP", ftwbuf); } else { return cfprintf(cfile, "${di}%pQ${rs}", ftwbuf->root); } } else { - return dyn_fprintf(cfile->file, directive, ftwbuf->root); + return bfs_fprintf(cfile, fmt, "%s", ftwbuf->root); } } /** %i: inode */ -static int bfs_printf_i(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_i(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->ino); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %k: 1K blocks */ -static int bfs_printf_k(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_k(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } - uintmax_t blocks = ((uintmax_t)statbuf->blocks*BFS_STAT_BLKSIZE + 1023)/1024; + uintmax_t blocks = ((uintmax_t)statbuf->blocks * BFS_STAT_BLKSIZE + 1023) / 1024; BFS_PRINTF_BUF(buf, "%ju", blocks); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %l: link target */ -static int bfs_printf_l(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_l(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { char *buf = NULL; const char *target = ""; if (ftwbuf->type == BFS_LNK) { - if (should_color(cfile, directive)) { + if (should_color(cfile, fmt)) { return cfprintf(cfile, "%pL", ftwbuf); } @@ -362,23 +384,23 @@ static int bfs_printf_l(CFILE *cfile, const struct bfs_printf *directive, const } } - int ret = dyn_fprintf(cfile->file, directive, target); + int ret = bfs_fprintf(cfile, fmt, "%s", target); free(buf); return ret; } /** %m: mode */ -static int bfs_printf_m(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_m(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } - return dyn_fprintf(cfile->file, directive, (unsigned int)(statbuf->mode & 07777)); + return bfs_fprintf(cfile, fmt, "%o", (unsigned int)(statbuf->mode & 07777)); } /** %M: symbolic mode */ -static int bfs_printf_M(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_M(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; @@ -386,37 +408,37 @@ static int bfs_printf_M(CFILE *cfile, const struct bfs_printf *directive, const char buf[11]; xstrmode(statbuf->mode, buf); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %n: link count */ -static int bfs_printf_n(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_n(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->nlink); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %p: full path */ -static int bfs_printf_p(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { - if (should_color(cfile, directive)) { +static int bfs_printf_p(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + if (should_color(cfile, fmt)) { return cfprintf(cfile, "%pP", ftwbuf); } else { - return dyn_fprintf(cfile->file, directive, ftwbuf->path); + return bfs_fprintf(cfile, fmt, "%s", ftwbuf->path); } } /** %P: path after root */ -static int bfs_printf_P(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_P(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { size_t offset = strlen(ftwbuf->root); if (ftwbuf->path[offset] == '/') { ++offset; } - if (should_color(cfile, directive)) { + if (should_color(cfile, fmt)) { if (ftwbuf->depth == 0) { return 0; } @@ -426,23 +448,23 @@ static int bfs_printf_P(CFILE *cfile, const struct bfs_printf *directive, const copybuf.nameoff -= offset; return cfprintf(cfile, "%pP", ©buf); } else { - return dyn_fprintf(cfile->file, directive, ftwbuf->path + offset); + return bfs_fprintf(cfile, fmt, "%s", ftwbuf->path + offset); } } /** %s: size */ -static int bfs_printf_s(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_s(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->size); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %S: sparseness */ -static int bfs_printf_S(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_S(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; @@ -452,97 +474,86 @@ static int bfs_printf_S(CFILE *cfile, const struct bfs_printf *directive, const if (statbuf->size == 0 && statbuf->blocks == 0) { sparsity = 1.0; } else { - sparsity = (double)BFS_STAT_BLKSIZE*statbuf->blocks/statbuf->size; + sparsity = (double)BFS_STAT_BLKSIZE * statbuf->blocks / statbuf->size; } - return dyn_fprintf(cfile->file, directive, sparsity); + return bfs_fprintf(cfile, fmt, "%g", sparsity); } /** %U: uid */ -static int bfs_printf_U(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_U(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } BFS_PRINTF_BUF(buf, "%ju", (uintmax_t)statbuf->uid); - return dyn_fprintf(cfile->file, directive, buf); + return bfs_fprintf(cfile, fmt, "%s", buf); } /** %u: user name */ -static int bfs_printf_u(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_u(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (!statbuf) { return -1; } - struct bfs_users *users = directive->ptr; + struct bfs_users *users = fmt->ptr; const struct passwd *pwd = bfs_getpwuid(users, statbuf->uid); if (!pwd) { - return bfs_printf_U(cfile, directive, ftwbuf); + return bfs_printf_U(cfile, fmt, ftwbuf); } - return dyn_fprintf(cfile->file, directive, pwd->pw_name); + return bfs_fprintf(cfile, fmt, "%s", pwd->pw_name); } static const char *bfs_printf_type(enum bfs_type type) { - switch (type) { - case BFS_BLK: - return "b"; - case BFS_CHR: - return "c"; - case BFS_DIR: - return "d"; - case BFS_DOOR: - return "D"; - case BFS_FIFO: - return "p"; - case BFS_LNK: - return "l"; - case BFS_REG: - return "f"; - case BFS_SOCK: - return "s"; - default: - return "U"; + const char *const names[] = { + [BFS_BLK] = "b", + [BFS_CHR] = "c", + [BFS_DIR] = "d", + [BFS_DOOR] = "D", + [BFS_FIFO] = "p", + [BFS_LNK] = "l", + [BFS_PORT] = "P", + [BFS_REG] = "f", + [BFS_SOCK] = "s", + [BFS_WHT] = "w", + }; + + const char *name = NULL; + if ((size_t)type < countof(names)) { + name = names[type]; } + + return name ? name : "U"; } /** %y: type */ -static int bfs_printf_y(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { +static int bfs_printf_y(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { const char *type = bfs_printf_type(ftwbuf->type); - return dyn_fprintf(cfile->file, directive, type); + return bfs_fprintf(cfile, fmt, "%s", type); } /** %Y: target type */ -static int bfs_printf_Y(CFILE *cfile, const struct bfs_printf *directive, const struct BFTW *ftwbuf) { - int error = 0; - - if (ftwbuf->type != BFS_LNK) { - return bfs_printf_y(cfile, directive, ftwbuf); - } - - const char *type = "U"; +static int bfs_printf_Y(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + enum bfs_type type = bftw_type(ftwbuf, BFS_STAT_FOLLOW); + const char *str; - const struct bfs_stat *statbuf = bftw_stat(ftwbuf, BFS_STAT_FOLLOW); - if (statbuf) { - type = bfs_printf_type(bfs_mode_to_type(statbuf->mode)); - } else { - switch (errno) { - case ELOOP: - type = "L"; - break; - case ENOENT: - case ENOTDIR: - type = "N"; - break; - default: - type = "?"; + int error = 0; + if (type == BFS_ERROR) { + if (errno == ELOOP) { + str = "L"; + } else if (errno_is_like(ENOENT)) { + str = "N"; + } else { + str = "?"; error = errno; - break; } + } else { + str = bfs_printf_type(type); } - int ret = dyn_fprintf(cfile->file, directive, type); + int ret = bfs_fprintf(cfile, fmt, "%s", str); if (error != 0) { ret = -1; errno = error; @@ -550,24 +561,36 @@ static int bfs_printf_Y(CFILE *cfile, const struct bfs_printf *directive, const return ret; } +/** %Z: SELinux context */ +_maybe_unused +static int bfs_printf_Z(CFILE *cfile, const struct bfs_fmt *fmt, const struct BFTW *ftwbuf) { + char *con = bfs_getfilecon(ftwbuf); + if (!con) { + return -1; + } + + int ret = bfs_fprintf(cfile, fmt, "%s", con); + bfs_freecon(con); + return ret; +} + /** * Append a literal string to the chain. */ -static int append_literal(const struct bfs_ctx *ctx, struct bfs_printf **format, char **literal) { +static int append_literal(const struct bfs_ctx *ctx, struct bfs_printf *format, dchar **literal) { if (dstrlen(*literal) == 0) { return 0; } - struct bfs_printf directive = { - .fn = bfs_printf_literal, - .str = *literal, - }; - - if (DARRAY_PUSH(format, &directive) != 0) { - bfs_perror(ctx, "DARRAY_PUSH()"); + struct bfs_fmt *fmt = RESERVE(struct bfs_fmt, &format->fmts, &format->nfmts); + if (!fmt) { + bfs_perror(ctx, "RESERVE()"); return -1; } + fmt->fn = bfs_printf_literal; + fmt->str = *literal; + *literal = dstralloc(0); if (!*literal) { bfs_perror(ctx, "dstralloc()"); @@ -580,23 +603,29 @@ static int append_literal(const struct bfs_ctx *ctx, struct bfs_printf **format, /** * Append a printf directive to the chain. */ -static int append_directive(const struct bfs_ctx *ctx, struct bfs_printf **format, char **literal, struct bfs_printf *directive) { +static int append_directive(const struct bfs_ctx *ctx, struct bfs_printf *format, dchar **literal, struct bfs_fmt *fmt) { if (append_literal(ctx, format, literal) != 0) { return -1; } - if (DARRAY_PUSH(format, directive) != 0) { - bfs_perror(ctx, "DARRAY_PUSH()"); + struct bfs_fmt *dest = RESERVE(struct bfs_fmt, &format->fmts, &format->nfmts); + if (!dest) { + bfs_perror(ctx, "RESERVE()"); return -1; } + *dest = *fmt; return 0; } int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const char *format) { - expr->printf = NULL; + expr->printf = ZALLOC(struct bfs_printf); + if (!expr->printf) { + bfs_perror(ctx, "zalloc()"); + return -1; + } - char *literal = dstralloc(0); + dchar *literal = dstralloc(0); if (!literal) { bfs_perror(ctx, "dstralloc()"); goto error; @@ -630,10 +659,10 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha case 'c': { - struct bfs_printf directive = { + struct bfs_fmt fmt = { .fn = bfs_printf_flush, }; - if (append_directive(ctx, &expr->printf, &literal, &directive) != 0) { + if (append_directive(ctx, expr->printf, &literal, &fmt) != 0) { goto error; } goto done; @@ -655,15 +684,15 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha goto one_char; } - struct bfs_printf directive = { + struct bfs_fmt fmt = { .str = dstralloc(2), }; - if (!directive.str) { - goto directive_error; + if (!fmt.str) { + goto fmt_error; } - if (dstrapp(&directive.str, c) != 0) { + if (dstrapp(&fmt.str, c) != 0) { bfs_perror(ctx, "dstrapp()"); - goto directive_error; + goto fmt_error; } const char *specifier = "s"; @@ -677,18 +706,18 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha case '#': case '0': case '+': - must_be_numeric = true; - fallthru; case ' ': + must_be_numeric = true; + _fallthrough; case '-': - if (strchr(directive.str, c)) { + if (strchr(fmt.str, c)) { bfs_expr_error(ctx, expr); bfs_error(ctx, "Duplicate flag '%c'.\n", c); - goto directive_error; + goto fmt_error; } - if (dstrapp(&directive.str, c) != 0) { + if (dstrapp(&fmt.str, c) != 0) { bfs_perror(ctx, "dstrapp()"); - goto directive_error; + goto fmt_error; } continue; } @@ -698,9 +727,9 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha // Parse the field width while (c >= '0' && c <= '9') { - if (dstrapp(&directive.str, c) != 0) { + if (dstrapp(&fmt.str, c) != 0) { bfs_perror(ctx, "dstrapp()"); - goto directive_error; + goto fmt_error; } c = *++i; } @@ -708,9 +737,9 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha // Parse the precision if (c == '.') { do { - if (dstrapp(&directive.str, c) != 0) { + if (dstrapp(&fmt.str, c) != 0) { bfs_perror(ctx, "dstrapp()"); - goto directive_error; + goto fmt_error; } c = *++i; } while (c >= '0' && c <= '9'); @@ -718,163 +747,172 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha switch (c) { case 'a': - directive.fn = bfs_printf_ctime; - directive.stat_field = BFS_STAT_ATIME; + fmt.fn = bfs_printf_ctime; + fmt.stat_field = BFS_STAT_ATIME; break; case 'b': - directive.fn = bfs_printf_b; + fmt.fn = bfs_printf_b; break; case 'c': - directive.fn = bfs_printf_ctime; - directive.stat_field = BFS_STAT_CTIME; + fmt.fn = bfs_printf_ctime; + fmt.stat_field = BFS_STAT_CTIME; break; case 'd': - directive.fn = bfs_printf_d; + fmt.fn = bfs_printf_d; specifier = "jd"; break; case 'D': - directive.fn = bfs_printf_D; + fmt.fn = bfs_printf_D; break; case 'f': - directive.fn = bfs_printf_f; + fmt.fn = bfs_printf_f; break; case 'F': - directive.fn = bfs_printf_F; - directive.ptr = (void *)bfs_ctx_mtab(ctx); - if (!directive.ptr) { + fmt.fn = bfs_printf_F; + fmt.ptr = (void *)bfs_ctx_mtab(ctx); + if (!fmt.ptr) { int error = errno; bfs_expr_error(ctx, expr); - bfs_error(ctx, "Couldn't parse the mount table: %s.\n", strerror(error)); - goto directive_error; + bfs_error(ctx, "Couldn't parse the mount table: %s.\n", xstrerror(error)); + goto fmt_error; } break; case 'g': - directive.fn = bfs_printf_g; - directive.ptr = ctx->groups; + fmt.fn = bfs_printf_g; + fmt.ptr = ctx->groups; break; case 'G': - directive.fn = bfs_printf_G; + fmt.fn = bfs_printf_G; break; case 'h': - directive.fn = bfs_printf_h; + fmt.fn = bfs_printf_h; break; case 'H': - directive.fn = bfs_printf_H; + fmt.fn = bfs_printf_H; break; case 'i': - directive.fn = bfs_printf_i; + fmt.fn = bfs_printf_i; break; case 'k': - directive.fn = bfs_printf_k; + fmt.fn = bfs_printf_k; break; case 'l': - directive.fn = bfs_printf_l; + fmt.fn = bfs_printf_l; break; case 'm': - directive.fn = bfs_printf_m; + fmt.fn = bfs_printf_m; specifier = "o"; break; case 'M': - directive.fn = bfs_printf_M; + fmt.fn = bfs_printf_M; break; case 'n': - directive.fn = bfs_printf_n; + fmt.fn = bfs_printf_n; break; case 'p': - directive.fn = bfs_printf_p; + fmt.fn = bfs_printf_p; break; case 'P': - directive.fn = bfs_printf_P; + fmt.fn = bfs_printf_P; break; case 's': - directive.fn = bfs_printf_s; + fmt.fn = bfs_printf_s; break; case 'S': - directive.fn = bfs_printf_S; + fmt.fn = bfs_printf_S; specifier = "g"; break; case 't': - directive.fn = bfs_printf_ctime; - directive.stat_field = BFS_STAT_MTIME; + fmt.fn = bfs_printf_ctime; + fmt.stat_field = BFS_STAT_MTIME; break; case 'u': - directive.fn = bfs_printf_u; - directive.ptr = ctx->users; + fmt.fn = bfs_printf_u; + fmt.ptr = ctx->users; break; case 'U': - directive.fn = bfs_printf_U; + fmt.fn = bfs_printf_U; break; case 'w': - directive.fn = bfs_printf_ctime; - directive.stat_field = BFS_STAT_BTIME; + fmt.fn = bfs_printf_ctime; + fmt.stat_field = BFS_STAT_BTIME; break; case 'y': - directive.fn = bfs_printf_y; + fmt.fn = bfs_printf_y; break; case 'Y': - directive.fn = bfs_printf_Y; + fmt.fn = bfs_printf_Y; + break; + case 'Z': +#if BFS_CAN_CHECK_CONTEXT + fmt.fn = bfs_printf_Z; break; +#else + bfs_expr_error(ctx, expr); + bfs_error(ctx, "Missing platform support for '%%%c'.\n", c); + goto fmt_error; +#endif case 'A': - directive.stat_field = BFS_STAT_ATIME; - goto directive_strftime; + fmt.stat_field = BFS_STAT_ATIME; + goto fmt_strftime; case 'B': case 'W': - directive.stat_field = BFS_STAT_BTIME; - goto directive_strftime; + fmt.stat_field = BFS_STAT_BTIME; + goto fmt_strftime; case 'C': - directive.stat_field = BFS_STAT_CTIME; - goto directive_strftime; + fmt.stat_field = BFS_STAT_CTIME; + goto fmt_strftime; case 'T': - directive.stat_field = BFS_STAT_MTIME; - goto directive_strftime; + fmt.stat_field = BFS_STAT_MTIME; + goto fmt_strftime; - directive_strftime: - directive.fn = bfs_printf_strftime; + fmt_strftime: + fmt.fn = bfs_printf_strftime; c = *++i; if (!c) { bfs_expr_error(ctx, expr); - bfs_error(ctx, "Incomplete time specifier '%s%c'.\n", directive.str, i[-1]); - goto directive_error; + bfs_error(ctx, "Incomplete time specifier '%s%c'.\n", fmt.str, i[-1]); + goto fmt_error; } else if (strchr("%+@aAbBcCdDeFgGhHIjklmMnprRsStTuUVwWxXyYzZ", c)) { - directive.c = c; + fmt.c = c; } else { bfs_expr_error(ctx, expr); bfs_error(ctx, "Unrecognized time specifier '%%%c%c'.\n", i[-1], c); - goto directive_error; + goto fmt_error; } break; case '\0': bfs_expr_error(ctx, expr); - bfs_error(ctx, "Incomplete format specifier '%s'.\n", directive.str); - goto directive_error; + bfs_error(ctx, "Incomplete format specifier '%s'.\n", fmt.str); + goto fmt_error; default: bfs_expr_error(ctx, expr); bfs_error(ctx, "Unrecognized format specifier '%%%c'.\n", c); - goto directive_error; + goto fmt_error; } if (must_be_numeric && strcmp(specifier, "s") == 0) { bfs_expr_error(ctx, expr); - bfs_error(ctx, "Invalid flags '%s' for string format '%%%c'.\n", directive.str + 1, c); - goto directive_error; + bfs_error(ctx, "Invalid flags '%s' for string format '%%%c'.\n", fmt.str + 1, c); + goto fmt_error; } - if (dstrcat(&directive.str, specifier) != 0) { + if (dstrcat(&fmt.str, specifier) != 0) { bfs_perror(ctx, "dstrcat()"); - goto directive_error; + goto fmt_error; } - if (append_directive(ctx, &expr->printf, &literal, &directive) != 0) { - goto directive_error; + if (append_directive(ctx, expr->printf, &literal, &fmt) != 0) { + goto fmt_error; } continue; - directive_error: - dstrfree(directive.str); + fmt_error: + dstrfree(fmt.str); goto error; } @@ -886,7 +924,7 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha } done: - if (append_literal(ctx, &expr->printf, &literal) != 0) { + if (append_literal(ctx, expr->printf, &literal) != 0) { goto error; } dstrfree(literal); @@ -902,9 +940,9 @@ error: int bfs_printf(CFILE *cfile, const struct bfs_printf *format, const struct BFTW *ftwbuf) { int ret = 0, error = 0; - for (size_t i = 0; i < darray_length(format); ++i) { - const struct bfs_printf *directive = &format[i]; - if (directive->fn(cfile, directive, ftwbuf) < 0) { + for (size_t i = 0; i < format->nfmts; ++i) { + const struct bfs_fmt *fmt = &format->fmts[i]; + if (fmt->fn(cfile, fmt, ftwbuf) < 0) { ret = -1; error = errno; } @@ -915,8 +953,13 @@ int bfs_printf(CFILE *cfile, const struct bfs_printf *format, const struct BFTW } void bfs_printf_free(struct bfs_printf *format) { - for (size_t i = 0; i < darray_length(format); ++i) { - dstrfree(format[i].str); + if (!format) { + return; + } + + for (size_t i = 0; i < format->nfmts; ++i) { + dstrfree(format->fmts[i].str); } - darray_free(format); + free(format->fmts); + free(format); } diff --git a/src/printf.h b/src/printf.h index 2bff087..e8d862e 100644 --- a/src/printf.h +++ b/src/printf.h @@ -22,11 +22,11 @@ struct bfs_printf; /** * Parse a -printf format string. * - * @param ctx + * @ctx * The bfs context. - * @param expr + * @expr * The expression to fill in. - * @param format + * @format * The format string to parse. * @return * 0 on success, -1 on failure. @@ -36,11 +36,11 @@ int bfs_printf_parse(const struct bfs_ctx *ctx, struct bfs_expr *expr, const cha /** * Evaluate a parsed format string. * - * @param cfile + * @cfile * The CFILE to print to. - * @param format + * @format * The parsed printf format. - * @param ftwbuf + * @ftwbuf * The bftw() data for the current file. * @return * 0 on success, -1 on failure. diff --git a/src/pwcache.c b/src/pwcache.c index 0e2f5c1..fa19dad 100644 --- a/src/pwcache.c +++ b/src/pwcache.c @@ -2,16 +2,14 @@ // SPDX-License-Identifier: 0BSD #include "pwcache.h" + #include "alloc.h" -#include "config.h" -#include "darray.h" #include "trie.h" + #include <errno.h> #include <grp.h> #include <pwd.h> #include <stdlib.h> -#include <string.h> -#include <unistd.h> /** Represents cache hits for negative results. */ static void *MISSING = &MISSING; @@ -91,7 +89,7 @@ struct bfs_users *bfs_users_new(void) { static void *bfs_getpwnam_impl(const void *key, void *ptr, size_t bufsize) { struct bfs_passwd *storage = ptr; - struct passwd *ret; + struct passwd *ret = NULL; errno = getpwnam_r(key, &storage->pwd, storage->buf, bufsize, &ret); return ret; } @@ -110,7 +108,7 @@ static void *bfs_getpwuid_impl(const void *key, void *ptr, size_t bufsize) { const uid_t *uid = key; struct bfs_passwd *storage = ptr; - struct passwd *ret; + struct passwd *ret = NULL; errno = getpwuid_r(*uid, &storage->pwd, storage->buf, bufsize, &ret); return ret; } @@ -172,7 +170,7 @@ struct bfs_groups *bfs_groups_new(void) { static void *bfs_getgrnam_impl(const void *key, void *ptr, size_t bufsize) { struct bfs_group *storage = ptr; - struct group *ret; + struct group *ret = NULL; errno = getgrnam_r(key, &storage->grp, storage->buf, bufsize, &ret); return ret; } @@ -191,7 +189,7 @@ static void *bfs_getgrgid_impl(const void *key, void *ptr, size_t bufsize) { const gid_t *gid = key; struct bfs_group *storage = ptr; - struct group *ret; + struct group *ret = NULL; errno = getgrgid_r(*gid, &storage->grp, storage->buf, bufsize, &ret); return ret; } diff --git a/src/pwcache.h b/src/pwcache.h index b6c0b67..d7c602d 100644 --- a/src/pwcache.h +++ b/src/pwcache.h @@ -27,9 +27,9 @@ struct bfs_users *bfs_users_new(void); /** * Get a user entry by name. * - * @param users + * @users * The user cache. - * @param name + * @name * The username to look up. * @return * The matching user, or NULL if not found (errno == 0) or an error @@ -40,9 +40,9 @@ const struct passwd *bfs_getpwnam(struct bfs_users *users, const char *name); /** * Get a user entry by ID. * - * @param users + * @users * The user cache. - * @param uid + * @uid * The ID to look up. * @return * The matching user, or NULL if not found (errno == 0) or an error @@ -53,7 +53,7 @@ const struct passwd *bfs_getpwuid(struct bfs_users *users, uid_t uid); /** * Flush a user cache. * - * @param users + * @users * The cache to flush. */ void bfs_users_flush(struct bfs_users *users); @@ -61,7 +61,7 @@ void bfs_users_flush(struct bfs_users *users); /** * Free a user cache. * - * @param users + * @users * The user cache to free. */ void bfs_users_free(struct bfs_users *users); @@ -82,9 +82,9 @@ struct bfs_groups *bfs_groups_new(void); /** * Get a group entry by name. * - * @param groups + * @groups * The group cache. - * @param name + * @name * The group name to look up. * @return * The matching group, or NULL if not found (errno == 0) or an error @@ -95,9 +95,9 @@ const struct group *bfs_getgrnam(struct bfs_groups *groups, const char *name); /** * Get a group entry by ID. * - * @param groups + * @groups * The group cache. - * @param uid + * @uid * The ID to look up. * @return * The matching group, or NULL if not found (errno == 0) or an error @@ -108,7 +108,7 @@ const struct group *bfs_getgrgid(struct bfs_groups *groups, gid_t gid); /** * Flush a group cache. * - * @param groups + * @groups * The cache to flush. */ void bfs_groups_flush(struct bfs_groups *groups); @@ -116,7 +116,7 @@ void bfs_groups_flush(struct bfs_groups *groups); /** * Free a group cache. * - * @param groups + * @groups * The group cache to free. */ void bfs_groups_free(struct bfs_groups *groups); diff --git a/src/sanity.h b/src/sanity.h index 29b3519..be77eef 100644 --- a/src/sanity.h +++ b/src/sanity.h @@ -8,21 +8,8 @@ #ifndef BFS_SANITY_H #define BFS_SANITY_H -#include "config.h" #include <stddef.h> -#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) -# define SANITIZE_ADDRESS true -#endif - -#if __has_feature(memory_sanitizer) || defined(__SANITIZE_MEMORY__) -# define SANITIZE_MEMORY true -#endif - -#if __has_feature(thread_sanitizer) || defined(__SANITIZE_THREAD__) -# define SANITIZE_THREAD true -#endif - // Call macro(ptr, size) or macro(ptr, sizeof(*ptr)) #define SANITIZE_CALL(...) \ SANITIZE_CALL_(__VA_ARGS__, ) @@ -33,7 +20,7 @@ #define SANITIZE_CALL__(macro, ptr, size, ...) \ macro(ptr, size) -#if SANITIZE_ADDRESS +#if __SANITIZE_ADDRESS__ # include <sanitizer/asan_interface.h> /** @@ -50,12 +37,30 @@ */ #define sanitize_free(...) SANITIZE_CALL(__asan_poison_memory_region, __VA_ARGS__) +/** + * Adjust the size of an allocated region, for things like dynamic arrays. + * + * @ptr + * The memory region. + * @old + * The previous usable size of the region. + * @new + * The new usable size of the region. + * @cap + * The total allocated capacity of the region. + */ +static inline void sanitize_resize(const void *ptr, size_t old, size_t new, size_t cap) { + const char *beg = ptr; + __sanitizer_annotate_contiguous_container(beg, beg + cap, beg + old, beg + new); +} + #else -# define sanitize_alloc sanitize_uninit -# define sanitize_free sanitize_uninit +# define sanitize_alloc(...) ((void)0) +# define sanitize_free(...) ((void)0) +# define sanitize_resize(ptr, old, new, cap) ((void)0) #endif -#if SANITIZE_MEMORY +#if __SANITIZE_MEMORY__ # include <sanitizer/msan_interface.h> /** @@ -73,22 +78,17 @@ #define sanitize_uninit(...) SANITIZE_CALL(__msan_allocated_memory, __VA_ARGS__) #else -# define sanitize_init(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__) -# define sanitize_uninit(...) SANITIZE_CALL(sanitize_ignore, __VA_ARGS__) +# define sanitize_init(...) ((void)0) +# define sanitize_uninit(...) ((void)0) #endif /** - * Squelch unused variable warnings when not sanitizing. - */ -#define sanitize_ignore(ptr, size) ((void)(ptr), (void)(size)) - -/** * Initialize a variable, unless sanitizers would detect uninitialized uses. */ -#if SANITIZE_MEMORY -# define uninit(var, value) var +#if __SANITIZE_MEMORY__ +# define uninit(value) #else -# define uninit(var, value) value +# define uninit(value) = value #endif #endif // BFS_SANITY_H diff --git a/src/sighook.c b/src/sighook.c new file mode 100644 index 0000000..a87bed5 --- /dev/null +++ b/src/sighook.c @@ -0,0 +1,692 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Dynamic (un)registration of signal handlers. + * + * Because signal handlers can interrupt any thread at an arbitrary point, they + * must be lock-free or risk deadlock. Therefore, we implement the global table + * of signal "hooks" with a simple read-copy-update (RCU) scheme. Readers get a + * reference-counted pointer (struct arc) to the table in a lock-free way, and + * release the reference count when finished. + * + * Updates are managed by struct rcu, which has two slots: one active and one + * inactive. Readers acquire a reference to the active slot. A single writer + * can safely update it by initializing the inactive slot, atomically swapping + * the slots, and waiting for the reference count of the newly inactive slot to + * drop to zero. Once it does, the old pointer can be safely freed. + */ + +#include "sighook.h" + +#include "alloc.h" +#include "atomic.h" +#include "bfs.h" +#include "bfstd.h" +#include "diag.h" +#include "thread.h" + +#include <errno.h> +#include <pthread.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> + +#if __linux__ +# include <sys/syscall.h> +#endif + +// NetBSD opens a file descriptor for each sem_init() +#if defined(_POSIX_SEMAPHORES) && !__NetBSD__ +# define BFS_POSIX_SEMAPHORES _POSIX_SEMAPHORES +#else +# define BFS_POSIX_SEMAPHORES (-1) +#endif + +#if BFS_POSIX_SEMAPHORES >= 0 +# include <semaphore.h> +#endif + +/** + * An atomically reference-counted pointer. + */ +struct arc { + /** The current reference count (0 means empty). */ + atomic size_t refs; + /** The reference itself. */ + void *ptr; + +#if BFS_POSIX_SEMAPHORES >= 0 + /** A semaphore for arc_wait(). */ + sem_t sem; + /** sem_init() result. */ + int sem_status; +#endif +}; + +/** Initialize an arc. */ +static void arc_init(struct arc *arc) { + bfs_verify(atomic_is_lock_free(&arc->refs)); + + atomic_init(&arc->refs, 0); + arc->ptr = NULL; + +#if BFS_POSIX_SEMAPHORES >= 0 + if (sysoption(SEMAPHORES) > 0) { + arc->sem_status = sem_init(&arc->sem, false, 0); + } else { + arc->sem_status = -1; + } +#endif +} + +/** Get the current refcount. */ +static size_t arc_refs(const struct arc *arc) { + return load(&arc->refs, relaxed); +} + +/** Set the pointer in an empty arc. */ +static void arc_set(struct arc *arc, void *ptr) { + bfs_assert(arc_refs(arc) == 0); + bfs_assert(ptr); + + arc->ptr = ptr; + store(&arc->refs, 1, release); +} + +/** Acquire a reference. */ +static void *arc_get(struct arc *arc) { + size_t refs = arc_refs(arc); + do { + if (refs < 1) { + return NULL; + } + } while (!compare_exchange_weak(&arc->refs, &refs, refs + 1, acquire, relaxed)); + + return arc->ptr; +} + +/** Release a reference. */ +static void arc_put(struct arc *arc) { + size_t refs = fetch_sub(&arc->refs, 1, release); + + if (refs == 1) { +#if BFS_POSIX_SEMAPHORES >= 0 + if (arc->sem_status == 0 && sem_post(&arc->sem) != 0) { + abort(); + } +#endif + } +} + +/** Wait on the semaphore. */ +static int arc_sem_wait(struct arc *arc) { +#if BFS_POSIX_SEMAPHORES >= 0 + if (arc->sem_status == 0) { + while (sem_wait(&arc->sem) != 0) { + bfs_everify(errno == EINTR, "sem_wait()"); + } + return 0; + } +#endif + + return -1; +} + +/** Wait for all references to be released. */ +static void *arc_wait(struct arc *arc) { + size_t refs = fetch_sub(&arc->refs, 1, relaxed); + bfs_assert(refs > 0); + + --refs; + while (refs > 0) { + if (arc_sem_wait(arc) == 0) { + bfs_assert(arc_refs(arc) == 0); + // sem_wait() provides enough ordering, so we can skip the fence + goto done; + } + + // Some platforms (like macOS) don't support unnamed semaphores, + // but we can always busy-wait + spin_loop(); + refs = arc_refs(arc); + } + + thread_fence(&arc->refs, acquire); + +done:; + void *ptr = arc->ptr; + arc->ptr = NULL; + return ptr; +} + +/** Destroy an arc. */ +static void arc_destroy(struct arc *arc) { + bfs_assert(arc_refs(arc) == 0); + +#if BFS_POSIX_SEMAPHORES >= 0 + if (arc->sem_status == 0) { + bfs_everify(sem_destroy(&arc->sem) == 0, "sem_destroy()"); + } +#endif +} + +/** + * A simple read-copy-update memory reclamation scheme. + */ +struct rcu { + /** The currently active slot. */ + atomic size_t active; + /** The two slots. */ + struct arc slots[2]; +}; + +/** Sentinel value for RCU, since arc uses NULL already. */ +static void *RCU_NULL = &RCU_NULL; + +/** Map NULL -> RCU_NULL. */ +static void *rcu_encode(void *ptr) { + return ptr ? ptr : RCU_NULL; +} + +/** Map RCU_NULL -> NULL. */ +static void *rcu_decode(void *ptr) { + bfs_assert(ptr != NULL); + return ptr == RCU_NULL ? NULL : ptr; +} + +/** Initialize an RCU block. */ +static void rcu_init(struct rcu *rcu, void *ptr) { + bfs_verify(atomic_is_lock_free(&rcu->active)); + + atomic_init(&rcu->active, 0); + arc_init(&rcu->slots[0]); + arc_init(&rcu->slots[1]); + arc_set(&rcu->slots[0], rcu_encode(ptr)); +} + +/** Get the active slot. */ +static struct arc *rcu_active(struct rcu *rcu) { + size_t i = load(&rcu->active, relaxed); + return &rcu->slots[i]; +} + +/** Destroy an RCU block. */ +static void rcu_destroy(struct rcu *rcu) { + arc_wait(rcu_active(rcu)); + arc_destroy(&rcu->slots[1]); + arc_destroy(&rcu->slots[0]); +} + +/** Read an RCU-protected pointer. */ +static void *rcu_read(struct rcu *rcu, struct arc **slot) { + while (true) { + *slot = rcu_active(rcu); + void *ptr = arc_get(*slot); + if (ptr) { + return rcu_decode(ptr); + } + // Otherwise, the other slot became active; retry + } +} + +/** Get the RCU-protected pointer without acquiring a reference. */ +static void *rcu_peek(struct rcu *rcu) { + struct arc *arc = rcu_active(rcu); + return rcu_decode(arc->ptr); +} + +/** Update an RCU-protected pointer, and return the old one. */ +static void *rcu_update(struct rcu *rcu, void *ptr) { + size_t i = load(&rcu->active, relaxed); + struct arc *prev = &rcu->slots[i]; + + size_t j = i ^ 1; + struct arc *next = &rcu->slots[j]; + + arc_set(next, rcu_encode(ptr)); + store(&rcu->active, j, relaxed); + return rcu_decode(arc_wait(prev)); +} + +/** + * An RCU-protected linked list. + */ +struct rcu_list { + /** The first node in the list. */ + struct rcu head; + /** &last->next */ + struct rcu *tail; +}; + +/** + * An rcu_list node. + */ +struct rcu_node { + /** The RCU pointer to this node. */ + struct rcu *self; + /** The next node in the list. */ + struct rcu next; +}; + +/** Initialize an rcu_list. */ +static void rcu_list_init(struct rcu_list *list) { + rcu_init(&list->head, NULL); + list->tail = &list->head; +} + +/** Append to an rcu_list. */ +static void rcu_list_append(struct rcu_list *list, struct rcu_node *node) { + node->self = list->tail; + list->tail = &node->next; + rcu_init(&node->next, NULL); + rcu_update(node->self, node); +} + +/** Remove from an rcu_list. */ +static void rcu_list_remove(struct rcu_list *list, struct rcu_node *node) { + struct rcu_node *next = rcu_peek(&node->next); + rcu_update(node->self, next); + if (next) { + next->self = node->self; + } else { + list->tail = &list->head; + } + rcu_destroy(&node->next); +} + +/** + * Iterate over an rcu_list. + * + * It is save to `break` out of this loop, but `return` or `goto` will lead to + * a missed arc_put(). + */ +#define for_rcu(type, node, list) \ + for_rcu_(type, node, (list), node##_slot_, node##_prev_, node##_done_) + +#define for_rcu_(type, node, list, slot, prev, done) \ + for (struct arc *slot, *prev, **done = NULL; !done; arc_put(slot), done = &slot) \ + for (type *node = rcu_read(&list->head, &slot); \ + node; \ + prev = slot, \ + node = rcu_read(&((struct rcu_node *)node)->next, &slot), \ + arc_put(prev)) + +struct sighook { + /** The RCU list node (must be the first field). */ + struct rcu_node node; + + /** The signal being hooked, or 0 for atsigexit(). */ + int sig; + /** Signal hook flags. */ + enum sigflags flags; + /** The function to call. */ + sighook_fn *fn; + /** An argument to pass to the function. */ + void *arg; + /** Flag for SH_ONESHOT. */ + atomic bool armed; +}; + +/** The lists of signal hooks. */ +static struct rcu_list sighooks[64]; + +/** Get the hook list for a particular signal. */ +static struct rcu_list *siglist(int sig) { + return &sighooks[sig % countof(sighooks)]; +} + +/** Mutex for initialization and RCU writer exclusion. */ +static pthread_mutex_t sigmutex = PTHREAD_MUTEX_INITIALIZER; + +/** Check if a signal was generated by userspace. */ +static bool is_user_generated(const siginfo_t *info) { + // https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_03_03 + // + // If si_code is SI_USER or SI_QUEUE, or any value less than or + // equal to 0, then the signal was generated by a process ... + int code = info->si_code; + return code == SI_USER || code == SI_QUEUE || code <= 0; +} + +/** Check if a signal is caused by a fault. */ +static bool is_fault(const siginfo_t *info) { + int sig = info->si_signo; + if (sig == SIGBUS || sig == SIGFPE || sig == SIGILL || sig == SIGSEGV) { + return !is_user_generated(info); + } else { + return false; + } +} + +// https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/signal.h.html +static const int FATAL_SIGNALS[] = { + SIGABRT, + SIGALRM, + SIGBUS, + SIGFPE, + SIGHUP, + SIGILL, + SIGINT, +#ifdef SIGIO + SIGIO, +#endif + SIGPIPE, +#ifdef SIGPOLL + SIGPOLL, +#endif +#ifdef SIGPROF + SIGPROF, +#endif +#ifdef SIGPWR + SIGPWR, +#endif + SIGQUIT, + SIGSEGV, +#ifdef SIGSTKFLT + SIGSTKFLT, +#endif +#ifdef SIGSYS + SIGSYS, +#endif + SIGTERM, + SIGTRAP, + SIGUSR1, + SIGUSR2, +#ifdef SIGVTALRM + SIGVTALRM, +#endif + SIGXCPU, + SIGXFSZ, +}; + +/** Check if a signal's default action is to terminate the process. */ +static bool is_fatal(int sig) { + for (size_t i = 0; i < countof(FATAL_SIGNALS); ++i) { + if (sig == FATAL_SIGNALS[i]) { + return true; + } + } + +#ifdef SIGRTMIN + // https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_03_01 + // + // The default actions for the realtime signals in the range + // SIGRTMIN to SIGRTMAX shall be to terminate the process + // abnormally. + if (sig >= SIGRTMIN && sig <= SIGRTMAX) { + return true; + } +#endif + + return false; +} + +/** Reraise a fatal signal. */ +_noreturn +static void reraise(siginfo_t *info) { + int sig = info->si_signo; + + // Restore the default signal action + if (signal(sig, SIG_DFL) == SIG_ERR) { + goto fail; + } + + // Unblock the signal, since we didn't set SA_NODEFER + sigset_t mask; + if (sigemptyset(&mask) != 0 + || sigaddset(&mask, sig) != 0 + || pthread_sigmask(SIG_UNBLOCK, &mask, NULL) != 0) { + goto fail; + } + +#if __linux__ + // On Linux, try to re-raise the exact siginfo_t (since 3.9, a process can + // signal itself with any siginfo_t) + pid_t tid = syscall(SYS_gettid); + syscall(SYS_rt_tgsigqueueinfo, getpid(), tid, sig, info); +#endif + + raise(sig); +fail: + abort(); +} + +/** Check whether we should run a hook. */ +static bool should_run(int sig, struct sighook *hook) { + if (hook->sig != sig && hook->sig != 0) { + return false; + } + + if (hook->flags & SH_ONESHOT) { + if (!exchange(&hook->armed, false, relaxed)) { + return false; + } + } + + return true; +} + +/** Find any matching hooks and run them. */ +static enum sigflags run_hooks(struct rcu_list *list, int sig, siginfo_t *info) { + enum sigflags ret = 0; + + for_rcu (struct sighook, hook, list) { + if (should_run(sig, hook)) { + hook->fn(sig, info, hook->arg); + ret |= hook->flags; + } + } + + return ret; +} + +/** Dispatches a signal to the registered handlers. */ +static void sigdispatch(int sig, siginfo_t *info, void *context) { + // If we get a fault (e.g. a "real" SIGSEGV, not something like + // kill(..., SIGSEGV)), don't try to run signal hooks, since we could be + // in an arbitrarily corrupted state. + // + // POSIX says that returning normally from a signal handler for a fault + // is undefined. But in practice, it's better to uninstall the handler + // and return, which will re-run the faulting instruction and cause us + // to die "correctly" (e.g. with a core dump pointing at the faulting + // instruction, not reraise()). + if (is_fault(info)) { + // On macOS, we cannot reliably distinguish between faults and + // asynchronous signals. For example, pkill -SEGV bfs will + // result in si_code == SEGV_ACCERR. So we always re-raise the + // signal, because just returning would cause us to ignore + // asynchronous SIG{BUS,ILL,SEGV}. +#if !__APPLE__ + if (signal(sig, SIG_DFL) != SIG_ERR) { + return; + } +#endif + reraise(info); + } + + // https://pubs.opengroup.org/onlinepubs/9799919799/functions/V2_chap02.html#tag_16_04_04 + // + // After returning from a signal-catching function, the value of + // errno is unspecified if the signal-catching function or any + // function it called assigned a value to errno and the signal- + // catching function did not save and restore the original value of + // errno. + int error = errno; + + // Run the normal hooks + struct rcu_list *list = siglist(sig); + enum sigflags flags = run_hooks(list, sig, info); + + // Run the atsigexit() hooks, if we're exiting + if (!(flags & SH_CONTINUE) && is_fatal(sig)) { + list = siglist(0); + run_hooks(list, sig, info); + reraise(info); + } + + errno = error; +} + +/** A saved signal handler, for sigreset() to restore. */ +struct sigsave { + struct rcu_node node; + int sig; + struct sigaction action; +}; + +/** The list of saved signal handlers. */ +static struct rcu_list saved; +/** `saved` initialization status (since rcu_list_init() isn't atomic). */ +static atomic bool initialized = false; + +/** Make sure our signal handler is installed for a given signal. */ +static int siginit(int sig) { +#ifdef SA_RESTART +# define BFS_SA_RESTART SA_RESTART +#else +# define BFS_SA_RESTART 0 +#endif + + static struct sigaction action = { + .sa_sigaction = sigdispatch, + .sa_flags = BFS_SA_RESTART | SA_SIGINFO, + }; + + static sigset_t signals; + + if (!load(&initialized, relaxed)) { + if (sigemptyset(&signals) != 0 + || sigemptyset(&action.sa_mask) != 0) { + return -1; + } + + for (size_t i = 0; i < countof(sighooks); ++i) { + rcu_list_init(&sighooks[i]); + } + + rcu_list_init(&saved); + store(&initialized, true, release); + } + + int installed = sigismember(&signals, sig); + if (installed < 0) { + return -1; + } else if (installed) { + return 0; + } + + sigset_t updated = signals; + if (sigaddset(&updated, sig) != 0) { + return -1; + } + + struct sigaction original; + if (sigaction(sig, NULL, &original) != 0) { + return -1; + } + + struct sigsave *save = ALLOC(struct sigsave); + if (!save) { + return -1; + } + + save->sig = sig; + save->action = original; + rcu_list_append(&saved, &save->node); + + if (sigaction(sig, &action, NULL) != 0) { + rcu_list_remove(&saved, &save->node); + free(save); + return -1; + } + + signals = updated; + return 0; +} + +/** Shared sighook()/atsigexit() implementation. */ +static struct sighook *sighook_impl(int sig, sighook_fn *fn, void *arg, enum sigflags flags) { + struct sighook *hook = ALLOC(struct sighook); + if (!hook) { + return NULL; + } + + hook->sig = sig; + hook->flags = flags; + hook->fn = fn; + hook->arg = arg; + atomic_init(&hook->armed, true); + + struct rcu_list *list = siglist(sig); + rcu_list_append(list, &hook->node); + return hook; +} + +struct sighook *sighook(int sig, sighook_fn *fn, void *arg, enum sigflags flags) { + bfs_assert(sig > 0); + + mutex_lock(&sigmutex); + + struct sighook *ret = NULL; + if (siginit(sig) == 0) { + ret = sighook_impl(sig, fn, arg, flags); + } + + mutex_unlock(&sigmutex); + return ret; +} + +struct sighook *atsigexit(sighook_fn *fn, void *arg) { + mutex_lock(&sigmutex); + + for (size_t i = 0; i < countof(FATAL_SIGNALS); ++i) { + // Ignore errors; atsigexit() is best-effort anyway and things + // like sanitizer runtimes or valgrind may reserve signals for + // their own use + siginit(FATAL_SIGNALS[i]); + } + +#ifdef SIGRTMIN + for (int i = SIGRTMIN; i <= SIGRTMAX; ++i) { + siginit(i); + } +#endif + + struct sighook *ret = sighook_impl(0, fn, arg, 0); + mutex_unlock(&sigmutex); + return ret; +} + +void sigunhook(struct sighook *hook) { + if (!hook) { + return; + } + + mutex_lock(&sigmutex); + + struct rcu_list *list = siglist(hook->sig); + rcu_list_remove(list, &hook->node); + + mutex_unlock(&sigmutex); + + free(hook); +} + +int sigreset(void) { + if (!load(&initialized, acquire)) { + return 0; + } + + int ret = 0; + + for_rcu (struct sigsave, save, &saved) { + if (sigaction(save->sig, &save->action, NULL) != 0) { + ret = -1; + break; + } + } + + return ret; +} diff --git a/src/sighook.h b/src/sighook.h new file mode 100644 index 0000000..7149229 --- /dev/null +++ b/src/sighook.h @@ -0,0 +1,83 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +/** + * Signal hooks. + */ + +#ifndef BFS_SIGHOOK_H +#define BFS_SIGHOOK_H + +#include <signal.h> + +/** + * A dynamic signal hook. + */ +struct sighook; + +/** + * Signal hook flags. + */ +enum sigflags { + /** Suppress the default action for this signal. */ + SH_CONTINUE = 1 << 0, + /** Only run this hook once. */ + SH_ONESHOT = 1 << 1, +}; + +/** + * A signal hook callback. Hooks are executed from a signal handler, so must + * only call async-signal-safe functions. + * + * @sig + * The signal number. + * @info + * Additional information about the signal. + * @arg + * An arbitrary pointer passed to the hook. + */ +typedef void sighook_fn(int sig, siginfo_t *info, void *arg); + +/** + * Install a hook for a signal. + * + * @sig + * The signal to hook. + * @fn + * The function to call. + * @arg + * An argument passed to the function. + * @flags + * Flags for the new hook. + * @return + * The installed hook, or NULL on failure. + */ +struct sighook *sighook(int sig, sighook_fn *fn, void *arg, enum sigflags flags); + +/** + * On a best-effort basis, invoke the given hook just before the program is + * abnormally terminated by a signal. + * + * @fn + * The function to call. + * @arg + * An argument passed to the function. + * @return + * The installed hook, or NULL on failure. + */ +struct sighook *atsigexit(sighook_fn *fn, void *arg); + +/** + * Remove a signal hook. + */ +void sigunhook(struct sighook *hook); + +/** + * Restore all signal handlers to their original dispositions (e.g. after fork()). + * + * @return + * 0 on success, -1 on failure. + */ +int sigreset(void); + +#endif // BFS_SIGHOOK_H @@ -2,41 +2,33 @@ // SPDX-License-Identifier: 0BSD #include "stat.h" + #include "atomic.h" +#include "bfs.h" #include "bfstd.h" -#include "config.h" #include "diag.h" #include "sanity.h" + #include <errno.h> #include <fcntl.h> #include <string.h> -#include <sys/types.h> #include <sys/stat.h> +#include <sys/types.h> -#if defined(STATX_BASIC_STATS) && (!__ANDROID__ || __ANDROID_API__ >= 30) -# define BFS_HAS_LIBC_STATX true -#elif __linux__ +#if BFS_USE_STATX && !BFS_HAS_STATX # include <linux/stat.h> # include <sys/syscall.h> # include <unistd.h> #endif -#ifndef BFS_USE_STATX -# if BFS_HAS_LIBC_STATX || defined(SYS_statx) -# define BFS_USE_STATX true -# endif -#endif - const char *bfs_stat_field_name(enum bfs_stat_field field) { switch (field) { + case BFS_STAT_MODE: + return "mode"; case BFS_STAT_DEV: return "device number"; case BFS_STAT_INO: return "inode nunmber"; - case BFS_STAT_TYPE: - return "type"; - case BFS_STAT_MODE: - return "mode"; case BFS_STAT_NLINK: return "link count"; case BFS_STAT_GID: @@ -59,62 +51,85 @@ const char *bfs_stat_field_name(enum bfs_stat_field field) { return "change time"; case BFS_STAT_MTIME: return "modification time"; + case BFS_STAT_MNT_ID: + return "mount ID"; } - bfs_bug("Unrecognized stat field"); + bfs_bug("Unrecognized stat field %d", (int)field); return "???"; } -/** - * Convert a struct stat to a struct bfs_stat. - */ -static void bfs_stat_convert(const struct stat *statbuf, struct bfs_stat *buf) { - buf->mask = 0; +int bfs_fstatat_flags(enum bfs_stat_flags flags) { + int ret = 0; + + if (flags & BFS_STAT_NOFOLLOW) { + ret |= AT_SYMLINK_NOFOLLOW; + } + +#ifdef AT_NO_AUTOMOUNT + ret |= AT_NO_AUTOMOUNT; +#endif + + return ret; +} - buf->dev = statbuf->st_dev; - buf->mask |= BFS_STAT_DEV; +void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src) { + dest->mask = 0; - buf->ino = statbuf->st_ino; - buf->mask |= BFS_STAT_INO; + dest->mode = src->st_mode; + dest->mask |= BFS_STAT_MODE; - buf->mode = statbuf->st_mode; - buf->mask |= BFS_STAT_TYPE | BFS_STAT_MODE; + dest->dev = src->st_dev; + dest->mask |= BFS_STAT_DEV; - buf->nlink = statbuf->st_nlink; - buf->mask |= BFS_STAT_NLINK; + dest->ino = src->st_ino; + dest->mask |= BFS_STAT_INO; - buf->gid = statbuf->st_gid; - buf->mask |= BFS_STAT_GID; + dest->nlink = src->st_nlink; + dest->mask |= BFS_STAT_NLINK; - buf->uid = statbuf->st_uid; - buf->mask |= BFS_STAT_UID; + dest->gid = src->st_gid; + dest->mask |= BFS_STAT_GID; - buf->size = statbuf->st_size; - buf->mask |= BFS_STAT_SIZE; + dest->uid = src->st_uid; + dest->mask |= BFS_STAT_UID; - buf->blocks = statbuf->st_blocks; - buf->mask |= BFS_STAT_BLOCKS; + dest->size = src->st_size; + dest->mask |= BFS_STAT_SIZE; - buf->rdev = statbuf->st_rdev; - buf->mask |= BFS_STAT_RDEV; + dest->blocks = src->st_blocks; + dest->mask |= BFS_STAT_BLOCKS; -#if BSD - buf->attrs = statbuf->st_flags; - buf->mask |= BFS_STAT_ATTRS; + dest->rdev = src->st_rdev; + dest->mask |= BFS_STAT_RDEV; + + // No mount IDs in regular stat(), so use the dev_t as an approximation + dest->mnt_id = dest->dev; + dest->mask |= BFS_STAT_MNT_ID; + +#if BFS_HAS_ST_FLAGS + dest->attrs = src->st_flags; + dest->mask |= BFS_STAT_ATTRS; #endif - buf->atime = statbuf->st_atim; - buf->mask |= BFS_STAT_ATIME; + dest->atime = ST_ATIM(*src); + dest->mask |= BFS_STAT_ATIME; - buf->ctime = statbuf->st_ctim; - buf->mask |= BFS_STAT_CTIME; + dest->ctime = ST_CTIM(*src); + dest->mask |= BFS_STAT_CTIME; - buf->mtime = statbuf->st_mtim; - buf->mask |= BFS_STAT_MTIME; + dest->mtime = ST_MTIM(*src); + dest->mask |= BFS_STAT_MTIME; -#if __APPLE__ || __FreeBSD__ || __NetBSD__ - buf->btime = statbuf->st_birthtim; - buf->mask |= BFS_STAT_BTIME; +#if BFS_HAS_ST_BIRTHTIM + dest->btime = src->st_birthtim; + dest->mask |= BFS_STAT_BTIME; +#elif BFS_HAS___ST_BIRTHTIM + dest->btime = src->__st_birthtim; + dest->mask |= BFS_STAT_BTIME; +#elif BFS_HAS_ST_BIRTHTIMESPEC + dest->btime = src->st_birthtimespec; + dest->mask |= BFS_STAT_BTIME; #endif } @@ -125,7 +140,7 @@ static int bfs_stat_impl(int at_fd, const char *at_path, int at_flags, struct bf struct stat statbuf; int ret = fstatat(at_fd, at_path, &statbuf, at_flags); if (ret == 0) { - bfs_stat_convert(&statbuf, buf); + bfs_stat_convert(buf, &statbuf); } return ret; } @@ -136,7 +151,7 @@ static int bfs_stat_impl(int at_fd, const char *at_path, int at_flags, struct bf * Wrapper for the statx() system call, which had no glibc wrapper prior to 2.28. */ static int bfs_statx(int at_fd, const char *at_path, int at_flags, unsigned int mask, struct statx *buf) { -#if BFS_HAS_LIBC_STATX +#if BFS_HAS_STATX int ret = statx(at_fd, at_path, at_flags, mask, buf); #else int ret = syscall(SYS_statx, at_fd, at_path, at_flags, mask, buf); @@ -150,82 +165,118 @@ static int bfs_statx(int at_fd, const char *at_path, int at_flags, unsigned int return ret; } -/** - * bfs_stat() implementation backed by statx(). - */ -static int bfs_statx_impl(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) { - unsigned int mask = STATX_BASIC_STATS | STATX_BTIME; - struct statx xbuf; - int ret = bfs_statx(at_fd, at_path, at_flags, mask, &xbuf); - if (ret != 0) { - return ret; +int bfs_statx_flags(enum bfs_stat_flags flags) { + int ret = bfs_fstatat_flags(flags); + + if (flags & BFS_STAT_NOSYNC) { + ret |= AT_STATX_DONT_SYNC; } + return ret; +} + +unsigned int bfs_statx_mask(void) { + unsigned int mask = STATX_BASIC_STATS | STATX_BTIME; +#ifdef STATX_MNT_ID + mask |= STATX_MNT_ID; +#endif +#ifdef STATX_MNT_ID_UNIQUE + mask |= STATX_MNT_ID_UNIQUE; +#endif + return mask; +} + +int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src) { // Callers shouldn't have to check anything except the times const unsigned int guaranteed = STATX_BASIC_STATS & ~(STATX_ATIME | STATX_CTIME | STATX_MTIME); - if ((xbuf.stx_mask & guaranteed) != guaranteed) { + if ((src->stx_mask & guaranteed) != guaranteed) { errno = ENOTSUP; return -1; } - buf->mask = 0; + dest->mask = 0; - buf->dev = xmakedev(xbuf.stx_dev_major, xbuf.stx_dev_minor); - buf->mask |= BFS_STAT_DEV; + dest->mode = src->stx_mode; + dest->mask |= BFS_STAT_MODE; - buf->ino = xbuf.stx_ino; - buf->mask |= BFS_STAT_INO; + dest->dev = xmakedev(src->stx_dev_major, src->stx_dev_minor); + dest->mask |= BFS_STAT_DEV; - buf->mode = xbuf.stx_mode; - buf->mask |= BFS_STAT_TYPE; - buf->mask |= BFS_STAT_MODE; + dest->ino = src->stx_ino; + dest->mask |= BFS_STAT_INO; - buf->nlink = xbuf.stx_nlink; - buf->mask |= BFS_STAT_NLINK; + dest->nlink = src->stx_nlink; + dest->mask |= BFS_STAT_NLINK; - buf->gid = xbuf.stx_gid; - buf->mask |= BFS_STAT_GID; + dest->gid = src->stx_gid; + dest->mask |= BFS_STAT_GID; - buf->uid = xbuf.stx_uid; - buf->mask |= BFS_STAT_UID; + dest->uid = src->stx_uid; + dest->mask |= BFS_STAT_UID; - buf->size = xbuf.stx_size; - buf->mask |= BFS_STAT_SIZE; + dest->size = src->stx_size; + dest->mask |= BFS_STAT_SIZE; - buf->blocks = xbuf.stx_blocks; - buf->mask |= BFS_STAT_BLOCKS; + dest->blocks = src->stx_blocks; + dest->mask |= BFS_STAT_BLOCKS; - buf->rdev = xmakedev(xbuf.stx_rdev_major, xbuf.stx_rdev_minor); - buf->mask |= BFS_STAT_RDEV; + dest->rdev = xmakedev(src->stx_rdev_major, src->stx_rdev_minor); + dest->mask |= BFS_STAT_RDEV; - buf->attrs = xbuf.stx_attributes; - buf->mask |= BFS_STAT_ATTRS; + dest->attrs = src->stx_attributes; + dest->mask |= BFS_STAT_ATTRS; - if (xbuf.stx_mask & STATX_ATIME) { - buf->atime.tv_sec = xbuf.stx_atime.tv_sec; - buf->atime.tv_nsec = xbuf.stx_atime.tv_nsec; - buf->mask |= BFS_STAT_ATIME; + dest->mnt_id = dest->dev; +#ifdef STATX_MNT_ID + unsigned int mnt_mask = STATX_MNT_ID; +# ifdef STATX_MNT_ID_UNIQUE + mnt_mask |= STATX_MNT_ID_UNIQUE; +# endif + if (src->stx_mask & mnt_mask) { + dest->mnt_id = src->stx_mnt_id; } +#endif + dest->mask |= BFS_STAT_MNT_ID; - if (xbuf.stx_mask & STATX_BTIME) { - buf->btime.tv_sec = xbuf.stx_btime.tv_sec; - buf->btime.tv_nsec = xbuf.stx_btime.tv_nsec; - buf->mask |= BFS_STAT_BTIME; + if (src->stx_mask & STATX_ATIME) { + dest->atime.tv_sec = src->stx_atime.tv_sec; + dest->atime.tv_nsec = src->stx_atime.tv_nsec; + dest->mask |= BFS_STAT_ATIME; } - if (xbuf.stx_mask & STATX_CTIME) { - buf->ctime.tv_sec = xbuf.stx_ctime.tv_sec; - buf->ctime.tv_nsec = xbuf.stx_ctime.tv_nsec; - buf->mask |= BFS_STAT_CTIME; + if (src->stx_mask & STATX_BTIME) { + dest->btime.tv_sec = src->stx_btime.tv_sec; + dest->btime.tv_nsec = src->stx_btime.tv_nsec; + dest->mask |= BFS_STAT_BTIME; } - if (xbuf.stx_mask & STATX_MTIME) { - buf->mtime.tv_sec = xbuf.stx_mtime.tv_sec; - buf->mtime.tv_nsec = xbuf.stx_mtime.tv_nsec; - buf->mask |= BFS_STAT_MTIME; + if (src->stx_mask & STATX_CTIME) { + dest->ctime.tv_sec = src->stx_ctime.tv_sec; + dest->ctime.tv_nsec = src->stx_ctime.tv_nsec; + dest->mask |= BFS_STAT_CTIME; } - return ret; + if (src->stx_mask & STATX_MTIME) { + dest->mtime.tv_sec = src->stx_mtime.tv_sec; + dest->mtime.tv_nsec = src->stx_mtime.tv_nsec; + dest->mask |= BFS_STAT_MTIME; + } + + return 0; +} + +/** + * bfs_stat() implementation backed by statx(). + */ +static int bfs_statx_impl(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) { + unsigned int mask = bfs_statx_mask(); + struct statx xbuf; + int ret = bfs_statx(at_fd, at_path, at_flags, mask, &xbuf); + if (ret != 0) { + return ret; + } + + return bfs_statx_convert(buf, &xbuf); } #endif // BFS_USE_STATX @@ -233,20 +284,20 @@ static int bfs_statx_impl(int at_fd, const char *at_path, int at_flags, struct b /** * Calls the stat() implementation with explicit flags. */ -static int bfs_stat_explicit(int at_fd, const char *at_path, int at_flags, int x_flags, struct bfs_stat *buf) { +static int bfs_stat_explicit(int at_fd, const char *at_path, int at_flags, struct bfs_stat *buf) { #if BFS_USE_STATX static atomic bool has_statx = true; if (load(&has_statx, relaxed)) { - int ret = bfs_statx_impl(at_fd, at_path, at_flags | x_flags, buf); - // EPERM is commonly returned in a seccomp() sandbox that does - // not allow statx() - if (ret != 0 && (errno == ENOSYS || errno == EPERM)) { + int ret = bfs_statx_impl(at_fd, at_path, at_flags, buf); + if (ret != 0 && errno_is_like(ENOSYS)) { store(&has_statx, false, relaxed); } else { return ret; } } + + at_flags &= ~AT_STATX_DONT_SYNC; #endif return bfs_stat_impl(at_fd, at_path, at_flags, buf); @@ -255,62 +306,46 @@ static int bfs_stat_explicit(int at_fd, const char *at_path, int at_flags, int x /** * Implements the BFS_STAT_TRYFOLLOW retry logic. */ -static int bfs_stat_tryfollow(int at_fd, const char *at_path, int at_flags, int x_flags, enum bfs_stat_flags bfs_flags, struct bfs_stat *buf) { - int ret = bfs_stat_explicit(at_fd, at_path, at_flags, x_flags, buf); +static int bfs_stat_tryfollow(int at_fd, const char *at_path, int at_flags, enum bfs_stat_flags bfs_flags, struct bfs_stat *buf) { + int ret = bfs_stat_explicit(at_fd, at_path, at_flags, buf); if (ret != 0 && (bfs_flags & (BFS_STAT_NOFOLLOW | BFS_STAT_TRYFOLLOW)) == BFS_STAT_TRYFOLLOW - && is_nonexistence_error(errno)) + && errno_is_like(ENOENT)) { at_flags |= AT_SYMLINK_NOFOLLOW; - ret = bfs_stat_explicit(at_fd, at_path, at_flags, x_flags, buf); + ret = bfs_stat_explicit(at_fd, at_path, at_flags, buf); } return ret; } int bfs_stat(int at_fd, const char *at_path, enum bfs_stat_flags flags, struct bfs_stat *buf) { - int at_flags = 0; - if (flags & BFS_STAT_NOFOLLOW) { - at_flags |= AT_SYMLINK_NOFOLLOW; - } - -#if defined(AT_NO_AUTOMOUNT) && (!__GNU__ || __GLIBC_PREREQ(2, 35)) - at_flags |= AT_NO_AUTOMOUNT; -#endif - - int x_flags = 0; -#ifdef AT_STATX_DONT_SYNC - if (flags & BFS_STAT_NOSYNC) { - x_flags |= AT_STATX_DONT_SYNC; - } +#if BFS_USE_STATX + int at_flags = bfs_statx_flags(flags); +#else + int at_flags = bfs_fstatat_flags(flags); #endif if (at_path) { - return bfs_stat_tryfollow(at_fd, at_path, at_flags, x_flags, flags, buf); - } - - // Check __GNU__ to work around https://lists.gnu.org/archive/html/bug-hurd/2021-12/msg00001.html -#if defined(AT_EMPTY_PATH) && !__GNU__ - static atomic bool has_at_ep = true; - if (load(&has_at_ep, relaxed)) { - at_flags |= AT_EMPTY_PATH; - int ret = bfs_stat_explicit(at_fd, "", at_flags, x_flags, buf); - if (ret != 0 && errno == EINVAL) { - store(&has_at_ep, false, relaxed); - } else { - return ret; - } + return bfs_stat_tryfollow(at_fd, at_path, at_flags, flags, buf); } -#endif - struct stat statbuf; - if (fstat(at_fd, &statbuf) == 0) { - bfs_stat_convert(&statbuf, buf); - return 0; - } else { +#if BFS_USE_STATX + // If we have statx(), use it with AT_EMPTY_PATH for its extra features + at_flags |= AT_EMPTY_PATH; + return bfs_stat_explicit(at_fd, "", at_flags, buf); +#else + // Otherwise, just use fstat() rather than fstatat(at_fd, ""), to save + // the kernel the trouble of copying in the empty string + struct stat sb; + if (fstat(at_fd, &sb) != 0) { return -1; } + + bfs_stat_convert(buf, &sb); + return 0; +#endif } const struct timespec *bfs_stat_time(const struct bfs_stat *buf, enum bfs_stat_field field) { @@ -12,33 +12,52 @@ #ifndef BFS_STAT_H #define BFS_STAT_H -#include "config.h" +#include "bfs.h" + +#include <stdint.h> +#include <sys/stat.h> #include <sys/types.h> #include <time.h> -#if BFS_USE_SYS_PARAM_H +#if !BFS_HAS_STATX && BFS_HAS_STATX_SYSCALL +# include <linux/stat.h> +#endif + +#ifndef BFS_USE_STATX +# define BFS_USE_STATX (BFS_HAS_STATX || BFS_HAS_STATX_SYSCALL) +#endif + +#if __has_include(<sys/param.h>) # include <sys/param.h> #endif +#ifdef DEV_BSIZE +# define BFS_STAT_BLKSIZE DEV_BSIZE +#elif defined(S_BLKSIZE) +# define BFS_STAT_BLKSIZE S_BLKSIZE +#else +# define BFS_STAT_BLKSIZE 512 +#endif + /** * bfs_stat field bitmask. */ enum bfs_stat_field { - BFS_STAT_DEV = 1 << 0, - BFS_STAT_INO = 1 << 1, - BFS_STAT_TYPE = 1 << 2, - BFS_STAT_MODE = 1 << 3, - BFS_STAT_NLINK = 1 << 4, - BFS_STAT_GID = 1 << 5, - BFS_STAT_UID = 1 << 6, - BFS_STAT_SIZE = 1 << 7, - BFS_STAT_BLOCKS = 1 << 8, - BFS_STAT_RDEV = 1 << 9, - BFS_STAT_ATTRS = 1 << 10, - BFS_STAT_ATIME = 1 << 11, - BFS_STAT_BTIME = 1 << 12, - BFS_STAT_CTIME = 1 << 13, - BFS_STAT_MTIME = 1 << 14, + BFS_STAT_MODE = 1 << 0, + BFS_STAT_DEV = 1 << 1, + BFS_STAT_INO = 1 << 2, + BFS_STAT_NLINK = 1 << 3, + BFS_STAT_GID = 1 << 4, + BFS_STAT_UID = 1 << 5, + BFS_STAT_SIZE = 1 << 6, + BFS_STAT_BLOCKS = 1 << 7, + BFS_STAT_RDEV = 1 << 8, + BFS_STAT_ATTRS = 1 << 9, + BFS_STAT_ATIME = 1 << 10, + BFS_STAT_BTIME = 1 << 11, + BFS_STAT_CTIME = 1 << 12, + BFS_STAT_MTIME = 1 << 13, + BFS_STAT_MNT_ID = 1 << 14, }; /** @@ -60,14 +79,6 @@ enum bfs_stat_flags { BFS_STAT_NOSYNC = 1 << 2, }; -#ifdef DEV_BSIZE -# define BFS_STAT_BLKSIZE DEV_BSIZE -#elif defined(S_BLKSIZE) -# define BFS_STAT_BLKSIZE S_BLKSIZE -#else -# define BFS_STAT_BLKSIZE 512 -#endif - /** * Facade over struct stat. */ @@ -75,12 +86,12 @@ struct bfs_stat { /** Bitmask indicating filled fields. */ enum bfs_stat_field mask; + /** File type and access mode. */ + mode_t mode; /** Device ID containing the file. */ dev_t dev; /** Inode number. */ ino_t ino; - /** File type and access mode. */ - mode_t mode; /** Number of hard links. */ nlink_t nlink; /** Owner group ID. */ @@ -93,6 +104,8 @@ struct bfs_stat { blkcnt_t blocks; /** The device ID represented by this file. */ dev_t rdev; + /** The ID of the mount point containing this file. */ + uint64_t mnt_id; /** Attributes/flags set on the file. */ unsigned long long attrs; @@ -110,14 +123,14 @@ struct bfs_stat { /** * Facade over fstatat(). * - * @param at_fd + * @at_fd * The base file descriptor for the lookup. - * @param at_path + * @at_path * The path to stat, relative to at_fd. Pass NULL to fstat() at_fd * itself. - * @param flags + * @flags * Flags that affect the lookup. - * @param[out] buf + * @buf[out] * A place to store the stat buffer, if successful. * @return * 0 on success, -1 on error. @@ -125,6 +138,33 @@ struct bfs_stat { int bfs_stat(int at_fd, const char *at_path, enum bfs_stat_flags flags, struct bfs_stat *buf); /** + * Convert bfs_stat_flags to fstatat() flags. + */ +int bfs_fstatat_flags(enum bfs_stat_flags flags); + +/** + * Convert struct stat to struct bfs_stat. + */ +void bfs_stat_convert(struct bfs_stat *dest, const struct stat *src); + +#if BFS_USE_STATX +/** + * Convert bfs_stat_flags to statx() flags. + */ +int bfs_statx_flags(enum bfs_stat_flags flags); + +/** + * Get the default statx() mask. + */ +unsigned int bfs_statx_mask(void); + +/** + * Convert struct statx to struct bfs_stat. + */ +int bfs_statx_convert(struct bfs_stat *dest, const struct statx *src); +#endif + +/** * Get a particular time field from a bfs_stat() buffer. */ const struct timespec *bfs_stat_time(const struct bfs_stat *buf, enum bfs_stat_field field); diff --git a/src/thread.c b/src/thread.c new file mode 100644 index 0000000..b3604f8 --- /dev/null +++ b/src/thread.c @@ -0,0 +1,94 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "thread.h" + +#include "bfstd.h" +#include "diag.h" + +#include <errno.h> +#include <pthread.h> + +#if __has_include(<pthread_np.h>) +# include <pthread_np.h> +#endif + +#define THREAD_FALLIBLE(expr) \ + do { \ + int err = expr; \ + if (err == 0) { \ + return 0; \ + } else { \ + errno = err; \ + return -1; \ + } \ + } while (0) + +#define THREAD_INFALLIBLE(...) \ + THREAD_INFALLIBLE_(__VA_ARGS__, 0, ) + +#define THREAD_INFALLIBLE_(expr, allowed, ...) \ + int err = expr; \ + bfs_verify(err == 0 || err == allowed, "%s: %s", #expr, xstrerror(err)); \ + (void)0 + +int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn, void *arg) { + THREAD_FALLIBLE(pthread_create(thread, attr, fn, arg)); +} + +void thread_setname(pthread_t thread, const char *name) { +#if BFS_HAS_PTHREAD_SETNAME_NP + pthread_setname_np(thread, name); +#elif BFS_HAS_PTHREAD_SET_NAME_NP + pthread_set_name_np(thread, name); +#endif +} + +void thread_join(pthread_t thread, void **ret) { + THREAD_INFALLIBLE(pthread_join(thread, ret)); +} + +int mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr) { + THREAD_FALLIBLE(pthread_mutex_init(mutex, attr)); +} + +void mutex_lock(pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_mutex_lock(mutex)); +} + +bool mutex_trylock(pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_mutex_trylock(mutex), EBUSY); + return err == 0; +} + +void mutex_unlock(pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_mutex_unlock(mutex)); +} + +void mutex_destroy(pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_mutex_destroy(mutex)); +} + +int cond_init(pthread_cond_t *cond, pthread_condattr_t *attr) { + THREAD_FALLIBLE(pthread_cond_init(cond, attr)); +} + +void cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { + THREAD_INFALLIBLE(pthread_cond_wait(cond, mutex)); +} + +void cond_signal(pthread_cond_t *cond) { + THREAD_INFALLIBLE(pthread_cond_signal(cond)); +} + +void cond_broadcast(pthread_cond_t *cond) { + THREAD_INFALLIBLE(pthread_cond_broadcast(cond)); +} + +void cond_destroy(pthread_cond_t *cond) { + THREAD_INFALLIBLE(pthread_cond_destroy(cond)); +} + +void invoke_once(pthread_once_t *once, once_fn *fn) { + THREAD_INFALLIBLE(pthread_once(once, fn)); +} diff --git a/src/thread.h b/src/thread.h index b2edf17..3dd8422 100644 --- a/src/thread.h +++ b/src/thread.h @@ -8,13 +8,10 @@ #ifndef BFS_THREAD_H #define BFS_THREAD_H -#include "diag.h" -#include <errno.h> #include <pthread.h> -#include <string.h> -#define thread_verify(expr, cond) \ - bfs_verify((errno = (expr), (cond)), "%s: %s", #expr, strerror(errno)) +/** Thread entry point type. */ +typedef void *thread_fn(void *arg); /** * Wrapper for pthread_create(). @@ -22,26 +19,27 @@ * @return * 0 on success, -1 on error. */ -#define thread_create(thread, attr, fn, arg) \ - ((errno = pthread_create(thread, attr, fn, arg)) ? -1 : 0) +int thread_create(pthread_t *thread, const pthread_attr_t *attr, thread_fn *fn, void *arg); + +/** + * Set the name of a thread. + */ +void thread_setname(pthread_t thread, const char *name); /** * Wrapper for pthread_join(). */ -#define thread_join(thread, ret) \ - thread_verify(pthread_join(thread, ret), errno == 0) +void thread_join(pthread_t thread, void **ret); /** * Wrapper for pthread_mutex_init(). */ -#define mutex_init(mutex, attr) \ - ((errno = pthread_mutex_init(mutex, attr)) ? -1 : 0) +int mutex_init(pthread_mutex_t *mutex, pthread_mutexattr_t *attr); /** * Wrapper for pthread_mutex_lock(). */ -#define mutex_lock(mutex) \ - thread_verify(pthread_mutex_lock(mutex), errno == 0) +void mutex_lock(pthread_mutex_t *mutex); /** * Wrapper for pthread_mutex_trylock(). @@ -49,55 +47,49 @@ * @return * Whether the mutex was locked. */ -#define mutex_trylock(mutex) \ - (thread_verify(pthread_mutex_trylock(mutex), errno == 0 || errno == EBUSY), errno == 0) +bool mutex_trylock(pthread_mutex_t *mutex); /** * Wrapper for pthread_mutex_unlock(). */ -#define mutex_unlock(mutex) \ - thread_verify(pthread_mutex_unlock(mutex), errno == 0) +void mutex_unlock(pthread_mutex_t *mutex); /** * Wrapper for pthread_mutex_destroy(). */ -#define mutex_destroy(mutex) \ - thread_verify(pthread_mutex_destroy(mutex), errno == 0) +void mutex_destroy(pthread_mutex_t *mutex); /** * Wrapper for pthread_cond_init(). */ -#define cond_init(cond, attr) \ - ((errno = pthread_cond_init(cond, attr)) ? -1 : 0) +int cond_init(pthread_cond_t *cond, pthread_condattr_t *attr); /** * Wrapper for pthread_cond_wait(). */ -#define cond_wait(cond, mutex) \ - thread_verify(pthread_cond_wait(cond, mutex), errno == 0) +void cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex); /** * Wrapper for pthread_cond_signal(). */ -#define cond_signal(cond) \ - thread_verify(pthread_cond_signal(cond), errno == 0) +void cond_signal(pthread_cond_t *cond); /** * Wrapper for pthread_cond_broadcast(). */ -#define cond_broadcast(cond) \ - thread_verify(pthread_cond_broadcast(cond), errno == 0) +void cond_broadcast(pthread_cond_t *cond); /** * Wrapper for pthread_cond_destroy(). */ -#define cond_destroy(cond) \ - thread_verify(pthread_cond_destroy(cond), errno == 0) +void cond_destroy(pthread_cond_t *cond); + +/** pthread_once() callback type. */ +typedef void once_fn(void); /** * Wrapper for pthread_once(). */ -#define call_once(once, fn) \ - thread_verify(pthread_once(once, fn), errno == 0) +void invoke_once(pthread_once_t *once, once_fn *fn); #endif // BFS_THREAD_H @@ -82,22 +82,22 @@ */ #include "trie.h" + #include "alloc.h" +#include "bfs.h" #include "bit.h" -#include "config.h" #include "diag.h" #include "list.h" -#include <limits.h> + #include <stdint.h> -#include <stdlib.h> #include <string.h> -bfs_static_assert(CHAR_WIDTH == 8); +static_assert(CHAR_WIDTH == 8, "This trie implementation assumes 8-bit bytes."); -#if BFS_USE_TARGET_CLONES && (__i386__ || __x86_64__) -# define TARGET_CLONES_POPCNT __attribute__((target_clones("popcnt", "default"))) +#if __i386__ || __x86_64__ +# define _trie_clones _target_clones("popcnt", "default") #else -# define TARGET_CLONES_POPCNT +# define _trie_clones #endif /** Number of bits for the sparse array bitmap, aka the range of a nibble. */ @@ -132,34 +132,34 @@ struct trie_node { uintptr_t children[]; }; -/** Check if an encoded pointer is to a leaf. */ -static bool trie_is_leaf(uintptr_t ptr) { +/** Check if an encoded pointer is to an internal node. */ +static bool trie_is_node(uintptr_t ptr) { return ptr & 1; } -/** Decode a pointer to a leaf. */ -static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) { - bfs_assert(trie_is_leaf(ptr)); - return (struct trie_leaf *)(ptr ^ 1); +/** Decode a pointer to an internal node. */ +static struct trie_node *trie_decode_node(uintptr_t ptr) { + bfs_assert(trie_is_node(ptr)); + return (struct trie_node *)(ptr - 1); } -/** Encode a pointer to a leaf. */ -static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) { - uintptr_t ptr = (uintptr_t)leaf ^ 1; - bfs_assert(trie_is_leaf(ptr)); +/** Encode a pointer to an internal node. */ +static uintptr_t trie_encode_node(const struct trie_node *node) { + uintptr_t ptr = (uintptr_t)node + 1; + bfs_assert(trie_is_node(ptr)); return ptr; } -/** Decode a pointer to an internal node. */ -static struct trie_node *trie_decode_node(uintptr_t ptr) { - bfs_assert(!trie_is_leaf(ptr)); - return (struct trie_node *)ptr; +/** Decode a pointer to a leaf. */ +static struct trie_leaf *trie_decode_leaf(uintptr_t ptr) { + bfs_assert(!trie_is_node(ptr)); + return (struct trie_leaf *)ptr; } -/** Encode a pointer to an internal node. */ -static uintptr_t trie_encode_node(const struct trie_node *node) { - uintptr_t ptr = (uintptr_t)node; - bfs_assert(!trie_is_leaf(ptr)); +/** Encode a pointer to a leaf. */ +static uintptr_t trie_encode_leaf(const struct trie_leaf *leaf) { + uintptr_t ptr = (uintptr_t)leaf; + bfs_assert(!trie_is_node(ptr)); return ptr; } @@ -171,20 +171,32 @@ void trie_init(struct trie *trie) { } /** Extract the nibble at a certain offset from a byte sequence. */ -static unsigned char trie_key_nibble(const void *key, size_t offset) { +static unsigned char trie_key_nibble(const void *key, size_t length, size_t offset) { const unsigned char *bytes = key; - size_t byte = offset >> 1; + size_t byte = offset / 2; + bfs_assert(byte < length); // A branchless version of // if (offset & 1) { - // return bytes[byte] >> 4; - // } else { // return bytes[byte] & 0xF; + // } else { + // return bytes[byte] >> 4; // } - unsigned int shift = (offset & 1) << 2; + unsigned int shift = 4 * ((offset + 1) % 2); return (bytes[byte] >> shift) & 0xF; } +/** Extract the nibble at a certain offset from a leaf. */ +static unsigned char trie_leaf_nibble(const struct trie_leaf *leaf, size_t offset) { + return trie_key_nibble(leaf->key, leaf->length, offset); +} + +/** Get the number of children of an internal node. */ +_trie_clones +static unsigned int trie_node_size(const struct trie_node *node) { + return count_ones((unsigned int)node->bitmap); +} + /** * Finds a leaf in the trie that matches the key at every branch. If the key * exists in the trie, the representative will match the searched key. But @@ -192,25 +204,24 @@ static unsigned char trie_key_nibble(const void *key, size_t offset) { * that case, the first mismatch between the key and the representative will be * the depth at which to make a new branch to insert the key. */ -TARGET_CLONES_POPCNT +_trie_clones static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) { uintptr_t ptr = trie->root; - if (!ptr) { - return NULL; - } - size_t offset = 0; - while (!trie_is_leaf(ptr)) { + size_t offset = 0, limit = 2 * length; + while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); offset += node->offset; unsigned int index = 0; - if ((offset >> 1) < length) { - unsigned char nibble = trie_key_nibble(key, offset); + if (offset < limit) { + unsigned char nibble = trie_key_nibble(key, length, offset); unsigned int bit = 1U << nibble; - if (node->bitmap & bit) { - index = count_ones(node->bitmap & (bit - 1)); - } + unsigned int map = node->bitmap; + unsigned int bits = map & (bit - 1); + unsigned int mask = -!!(map & bit); + // index = (map & bit) ? count_ones(bits) : 0; + index = count_ones(bits) & mask; } ptr = node->children[index]; } @@ -222,7 +233,8 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key) { return trie_find_mem(trie, key, strlen(key) + 1); } -struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) { +_trie_clones +static struct trie_leaf *trie_find_mem_impl(const struct trie *trie, const void *key, size_t length) { struct trie_leaf *rep = trie_representative(trie, key, length); if (rep && rep->length == length && memcmp(rep->key, key, length) == 0) { return rep; @@ -231,7 +243,22 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t } } -struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) { +struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length) { + return trie_find_mem_impl(trie, key, length); +} + +void *trie_get_str(const struct trie *trie, const char *key) { + const struct trie_leaf *leaf = trie_find_str(trie, key); + return leaf ? leaf->value : NULL; +} + +void *trie_get_mem(const struct trie *trie, const void *key, size_t length) { + const struct trie_leaf *leaf = trie_find_mem(trie, key, length); + return leaf ? leaf->value : NULL; +} + +_trie_clones +static struct trie_leaf *trie_find_postfix_impl(const struct trie *trie, const char *key) { size_t length = strlen(key); struct trie_leaf *rep = trie_representative(trie, key, length + 1); if (rep && rep->length >= length && memcmp(rep->key, key, length) == 0) { @@ -241,6 +268,10 @@ struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) { } } +struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key) { + return trie_find_postfix_impl(trie, key); +} + /** * Find a leaf that may end at the current node. */ @@ -252,10 +283,10 @@ static struct trie_leaf *trie_terminal_leaf(const struct trie_node *node) { } uintptr_t ptr = node->children[0]; - if (trie_is_leaf(ptr)) { - return trie_decode_leaf(ptr); - } else { + if (trie_is_node(ptr)) { node = trie_decode_node(ptr); + } else { + return trie_decode_leaf(ptr); } } @@ -271,7 +302,7 @@ static bool trie_check_prefix(struct trie_leaf *leaf, size_t skip, const char *k } } -TARGET_CLONES_POPCNT +_trie_clones static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const char *key) { uintptr_t ptr = trie->root; if (!ptr) { @@ -282,21 +313,21 @@ static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const ch size_t skip = 0; size_t length = strlen(key) + 1; - size_t offset = 0; - while (!trie_is_leaf(ptr)) { + size_t offset = 0, limit = 2 * length; + while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); offset += node->offset; - if ((offset >> 1) >= length) { + if (offset >= limit) { return best; } struct trie_leaf *leaf = trie_terminal_leaf(node); if (trie_check_prefix(leaf, skip, key, length)) { best = leaf; - skip = offset >> 1; + skip = offset / 2; } - unsigned char nibble = trie_key_nibble(key, offset); + unsigned char nibble = trie_key_nibble(key, length, offset); unsigned int bit = 1U << nibble; if (node->bitmap & bit) { unsigned int index = count_ones(node->bitmap & (bit - 1)); @@ -325,6 +356,7 @@ static struct trie_leaf *trie_leaf_alloc(struct trie *trie, const void *key, siz return NULL; } + LIST_ITEM_INIT(leaf); LIST_APPEND(trie, leaf); leaf->value = NULL; @@ -355,16 +387,10 @@ static struct trie_node *trie_node_realloc(struct trie *trie, struct trie_node * /** Free a node. */ static void trie_node_free(struct trie *trie, struct trie_node *node, size_t size) { - bfs_assert(size == (size_t)count_ones(node->bitmap)); + bfs_assert(size == trie_node_size(node)); varena_free(&trie->nodes, node, size); } -#if ENDIAN_NATIVE == ENDIAN_LITTLE -# define TRIE_BSWAP(n) (n) -#elif ENDIAN_NATIVE == ENDIAN_BIG -# define TRIE_BSWAP(n) bswap(n) -#endif - /** Find the offset of the first nibble that differs between two keys. */ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t length) { if (!rep) { @@ -378,32 +404,34 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t const char *rep_bytes = rep->key; const char *key_bytes = key; - size_t i = 0; - for (size_t chunk = sizeof(chunk); i + chunk <= length; i += chunk) { - size_t rep_chunk, key_chunk; - memcpy(&rep_chunk, rep_bytes + i, sizeof(rep_chunk)); - memcpy(&key_chunk, key_bytes + i, sizeof(key_chunk)); - - if (rep_chunk != key_chunk) { -#ifdef TRIE_BSWAP - size_t diff = TRIE_BSWAP(rep_chunk ^ key_chunk); - i *= 2; - i += trailing_zeros(diff) / 4; - return i; + size_t ret = 0, i = 0; + +#define CHUNK(n) CHUNK_(uint##n##_t, load8_beu##n) +#define CHUNK_(type, load8) \ + (length - i >= sizeof(type)) { \ + type rep_chunk = load8(rep_bytes + i); \ + type key_chunk = load8(key_bytes + i); \ + type diff = rep_chunk ^ key_chunk; \ + ret += leading_zeros(diff) / 4; \ + if (diff) { \ + return ret; \ + } \ + i += sizeof(type); \ + } + +#if SIZE_WIDTH >= 64 + while CHUNK(64); + if CHUNK(32); #else - break; + while CHUNK(32); #endif - } - } + if CHUNK(16); + if CHUNK(8); - for (; i < length; ++i) { - unsigned char diff = rep_bytes[i] ^ key_bytes[i]; - if (diff) { - return 2 * i + !(diff & 0xF); - } - } +#undef CHUNK_ +#undef CHUNK - return 2 * i; + return ret; } /** @@ -428,10 +456,10 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t * | Z * +--->... */ -TARGET_CLONES_POPCNT +_trie_clones static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) { struct trie_node *node = trie_decode_node(*ptr); - unsigned int size = count_ones(node->bitmap); + unsigned int size = trie_node_size(node); // Double the capacity every power of two if (has_single_bit(size)) { @@ -482,10 +510,10 @@ static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, str * | Y * +--->key */ -static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, size_t *offset) { +static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, size_t *offset) { // We only ever need to jump to leaf nodes, since internal nodes are // guaranteed to be within OFFSET_MAX anyway - bfs_assert(trie_is_leaf(*ptr)); + struct trie_leaf *leaf = trie_decode_leaf(*ptr); struct trie_node *node = trie_node_alloc(trie, 1); if (!node) { @@ -495,7 +523,7 @@ static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, *offset += OFFSET_MAX; node->offset = OFFSET_MAX; - unsigned char nibble = trie_key_nibble(key, *offset); + unsigned char nibble = trie_leaf_nibble(leaf, *offset); node->bitmap = 1 << nibble; node->children[0] = *ptr; @@ -521,8 +549,8 @@ static uintptr_t *trie_jump(struct trie *trie, uintptr_t *ptr, const char *key, * +--->leaf */ static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, struct trie_leaf *rep, size_t offset, size_t mismatch) { - unsigned char key_nibble = trie_key_nibble(leaf->key, mismatch); - unsigned char rep_nibble = trie_key_nibble(rep->key, mismatch); + unsigned char key_nibble = trie_leaf_nibble(leaf, mismatch); + unsigned char rep_nibble = trie_leaf_nibble(rep, mismatch); bfs_assert(key_nibble != rep_nibble); struct trie_node *node = trie_node_alloc(trie, 2); @@ -534,7 +562,7 @@ static struct trie_leaf *trie_split(struct trie *trie, uintptr_t *ptr, struct tr node->bitmap = (1 << key_nibble) | (1 << rep_nibble); size_t delta = mismatch - offset; - if (!trie_is_leaf(*ptr)) { + if (trie_is_node(*ptr)) { struct trie_node *child = trie_decode_node(*ptr); child->offset -= delta; } @@ -551,12 +579,18 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key) { return trie_insert_mem(trie, key, strlen(key) + 1); } -TARGET_CLONES_POPCNT +_trie_clones static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key, size_t length) { struct trie_leaf *rep = trie_representative(trie, key, length); size_t mismatch = trie_mismatch(rep, key, length); - if (mismatch >= (length << 1)) { + size_t misbyte = mismatch / 2; + if (misbyte >= length) { + bfs_assert(misbyte == length); return rep; + } else if (rep && misbyte >= rep->length) { + bfs_bug("trie keys must be prefix-free"); + errno = EINVAL; + return NULL; } struct trie_leaf *leaf = trie_leaf_alloc(trie, key, length); @@ -571,14 +605,14 @@ static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key size_t offset = 0; uintptr_t *ptr = &trie->root; - while (!trie_is_leaf(*ptr)) { + while (trie_is_node(*ptr)) { struct trie_node *node = trie_decode_node(*ptr); if (offset + node->offset > mismatch) { break; } offset += node->offset; - unsigned char nibble = trie_key_nibble(key, offset); + unsigned char nibble = trie_leaf_nibble(leaf, offset); unsigned int bit = 1U << nibble; if (node->bitmap & bit) { bfs_assert(offset < mismatch); @@ -591,7 +625,7 @@ static struct trie_leaf *trie_insert_mem_impl(struct trie *trie, const void *key } while (mismatch - offset > OFFSET_MAX) { - ptr = trie_jump(trie, ptr, key, &offset); + ptr = trie_jump(trie, ptr, &offset); if (!ptr) { trie_leaf_free(trie, leaf); return NULL; @@ -605,13 +639,33 @@ struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t len return trie_insert_mem_impl(trie, key, length); } +int trie_set_str(struct trie *trie, const char *key, const void *value) { + struct trie_leaf *leaf = trie_insert_str(trie, key); + if (leaf) { + leaf->value = (void *)value; + return 0; + } else { + return -1; + } +} + +int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value) { + struct trie_leaf *leaf = trie_insert_mem(trie, key, length); + if (leaf) { + leaf->value = (void *)value; + return 0; + } else { + return -1; + } +} + /** Free a chain of singleton nodes. */ static void trie_free_singletons(struct trie *trie, uintptr_t ptr) { - while (!trie_is_leaf(ptr)) { + while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); // Make sure the bitmap is a power of two, i.e. it has just one child - bfs_assert(has_single_bit(node->bitmap)); + bfs_assert(has_single_bit((size_t)node->bitmap)); ptr = node->children[0]; trie_node_free(trie, node, 1); @@ -639,7 +693,7 @@ static void trie_free_singletons(struct trie *trie, uintptr_t ptr) { */ static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_node *parent_node, unsigned int child_index) { uintptr_t other = parent_node->children[child_index ^ 1]; - if (!trie_is_leaf(other)) { + if (trie_is_node(other)) { struct trie_node *other_node = trie_decode_node(other); if (other_node->offset + parent_node->offset <= OFFSET_MAX) { other_node->offset += parent_node->offset; @@ -649,22 +703,21 @@ static int trie_collapse_node(struct trie *trie, uintptr_t *parent, struct trie_ } *parent = other; - trie_node_free(trie, parent_node, 1); + trie_node_free(trie, parent_node, 2); return 0; } -TARGET_CLONES_POPCNT +_trie_clones static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) { uintptr_t *child = &trie->root; uintptr_t *parent = NULL; unsigned int child_bit = 0, child_index = 0; size_t offset = 0; - while (!trie_is_leaf(*child)) { + while (trie_is_node(*child)) { struct trie_node *node = trie_decode_node(*child); offset += node->offset; - bfs_assert((offset >> 1) < leaf->length); - unsigned char nibble = trie_key_nibble(leaf->key, offset); + unsigned char nibble = trie_leaf_nibble(leaf, offset); unsigned int bit = 1U << nibble; unsigned int bitmap = node->bitmap; bfs_assert(bitmap & bit); @@ -689,19 +742,19 @@ static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) { } struct trie_node *node = trie_decode_node(*parent); - child = node->children + child_index; - trie_free_singletons(trie, *child); + trie_free_singletons(trie, node->children[child_index]); - node->bitmap ^= child_bit; - unsigned int parent_size = count_ones(node->bitmap); - bfs_assert(parent_size > 0); - if (parent_size == 1 && trie_collapse_node(trie, parent, node, child_index) == 0) { + unsigned int parent_size = trie_node_size(node); + bfs_assert(parent_size > 1); + if (parent_size == 2 && trie_collapse_node(trie, parent, node, child_index) == 0) { return; } - if (child_index < parent_size) { - memmove(child, child + 1, (parent_size - child_index)*sizeof(*child)); + for (size_t i = child_index; i + 1 < parent_size; ++i) { + node->children[i] = node->children[i + 1]; } + node->bitmap &= ~child_bit; + --parent_size; if (has_single_bit(parent_size)) { node = trie_node_realloc(trie, node, 2 * parent_size, parent_size); @@ -4,8 +4,9 @@ #ifndef BFS_TRIE_H #define BFS_TRIE_H -#include "config.h" #include "alloc.h" +#include "list.h" + #include <stddef.h> #include <stdint.h> @@ -45,9 +46,9 @@ void trie_init(struct trie *trie); /** * Find the leaf for a string key. * - * @param trie + * @trie * The trie to search. - * @param key + * @key * The key to look up. * @return * The found leaf, or NULL if the key is not present. @@ -57,11 +58,11 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key); /** * Find the leaf for a fixed-size key. * - * @param trie + * @trie * The trie to search. - * @param key + * @key * The key to look up. - * @param length + * @length * The length of the key in bytes. * @return * The found leaf, or NULL if the key is not present. @@ -69,11 +70,37 @@ struct trie_leaf *trie_find_str(const struct trie *trie, const char *key); struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t length); /** + * Get the value associated with a string key. + * + * @trie + * The trie to search. + * @key + * The key to look up. + * @return + * The found value, or NULL if the key is not present. + */ +void *trie_get_str(const struct trie *trie, const char *key); + +/** + * Get the value associated with a fixed-size key. + * + * @trie + * The trie to search. + * @key + * The key to look up. + * @length + * The length of the key in bytes. + * @return + * The found value, or NULL if the key is not present. + */ +void *trie_get_mem(const struct trie *trie, const void *key, size_t length); + +/** * Find the shortest leaf that starts with a given key. * - * @param trie + * @trie * The trie to search. - * @param key + * @key * The key to look up. * @return * A leaf that starts with the given key, or NULL. @@ -83,9 +110,9 @@ struct trie_leaf *trie_find_postfix(const struct trie *trie, const char *key); /** * Find the leaf that is the longest prefix of the given key. * - * @param trie + * @trie * The trie to search. - * @param key + * @key * The key to look up. * @return * The longest prefix match for the given key, or NULL. @@ -95,9 +122,9 @@ struct trie_leaf *trie_find_prefix(const struct trie *trie, const char *key); /** * Insert a string key into the trie. * - * @param trie + * @trie * The trie to modify. - * @param key + * @key * The key to insert. * @return * The inserted leaf, or NULL on failure. @@ -107,11 +134,11 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key); /** * Insert a fixed-size key into the trie. * - * @param trie + * @trie * The trie to modify. - * @param key + * @key * The key to insert. - * @param length + * @length * The length of the key in bytes. * @return * The inserted leaf, or NULL on failure. @@ -119,11 +146,41 @@ struct trie_leaf *trie_insert_str(struct trie *trie, const char *key); struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t length); /** + * Set the value for a string key. + * + * @trie + * The trie to modify. + * @key + * The key to insert. + * @value + * The value to set. + * @return + * 0 on success, -1 on error. + */ +int trie_set_str(struct trie *trie, const char *key, const void *value); + +/** + * Set the value for a fixed-size key. + * + * @trie + * The trie to modify. + * @key + * The key to insert. + * @length + * The length of the key in bytes. + * @value + * The value to set. + * @return + * 0 on success, -1 on error. + */ +int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value); + +/** * Remove a leaf from a trie. * - * @param trie + * @trie * The trie to modify. - * @param leaf + * @leaf * The leaf to remove. */ void trie_remove(struct trie *trie, struct trie_leaf *leaf); @@ -141,9 +198,7 @@ void trie_destroy(struct trie *trie); /** * Iterate over the leaves of a trie. */ -#define TRIE_FOR_EACH(trie, leaf) \ - for (struct trie_leaf *leaf = (trie)->head, *_next; \ - leaf && (_next = leaf->next, true); \ - leaf = _next) +#define for_trie(leaf, trie) \ + for_list (struct trie_leaf, leaf, trie) #endif // BFS_TRIE_H @@ -2,12 +2,14 @@ // SPDX-License-Identifier: 0BSD #include "typo.h" + #include <limits.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> // Assume QWERTY layout for now -static const int key_coords[UCHAR_MAX + 1][3] = { +static const int8_t key_coords[UCHAR_MAX + 1][3] = { ['`'] = { 0, 0, 0}, ['~'] = { 0, 0, 1}, ['1'] = { 3, 0, 0}, @@ -112,7 +114,7 @@ static const int key_coords[UCHAR_MAX + 1][3] = { }; static int char_distance(char a, char b) { - const int *ac = key_coords[(unsigned char)a], *bc = key_coords[(unsigned char)b]; + const int8_t *ac = key_coords[(unsigned char)a], *bc = key_coords[(unsigned char)b]; int ret = 0; for (int i = 0; i < 3; ++i) { ret += abs(ac[i] - bc[i]); @@ -7,9 +7,9 @@ /** * Find the "typo" distance between two strings. * - * @param actual + * @actual * The actual string typed by the user. - * @param expected + * @expected * The expected valid string. * @return The distance between the two strings. */ diff --git a/src/version.c b/src/version.c new file mode 100644 index 0000000..7479a9f --- /dev/null +++ b/src/version.c @@ -0,0 +1,32 @@ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD + +#include "bfs.h" + +const char bfs_version[] = { +#include "version.i" +}; + +const char bfs_confflags[] = { +#include "confflags.i" +}; + +const char bfs_cc[] = { +#include "cc.i" +}; + +const char bfs_cppflags[] = { +#include "cppflags.i" +}; + +const char bfs_cflags[] = { +#include "cflags.i" +}; + +const char bfs_ldflags[] = { +#include "ldflags.i" +}; + +const char bfs_ldlibs[] = { +#include "ldlibs.i" +}; diff --git a/src/xregex.c b/src/xregex.c index beb6676..796544e 100644 --- a/src/xregex.c +++ b/src/xregex.c @@ -2,17 +2,20 @@ // SPDX-License-Identifier: 0BSD #include "xregex.h" + #include "alloc.h" -#include "config.h" +#include "bfs.h" +#include "bfstd.h" #include "diag.h" -#include "thread.h" #include "sanity.h" +#include "thread.h" + #include <errno.h> #include <pthread.h> #include <stdlib.h> #include <string.h> -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA # include <langinfo.h> # include <oniguruma.h> #else @@ -20,7 +23,7 @@ #endif struct bfs_regex { -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA unsigned char *pattern; OnigRegex impl; int err; @@ -31,11 +34,17 @@ struct bfs_regex { #endif }; -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA static int bfs_onig_status; static OnigEncoding bfs_onig_enc; +static OnigSyntaxType bfs_onig_syntax_awk; +static OnigSyntaxType bfs_onig_syntax_gnu_awk; +static OnigSyntaxType bfs_onig_syntax_emacs; +static OnigSyntaxType bfs_onig_syntax_egrep; +static OnigSyntaxType bfs_onig_syntax_gnu_find; + /** pthread_once() callback. */ static void bfs_onig_once(void) { // Fall back to ASCII by default @@ -102,12 +111,41 @@ static void bfs_onig_once(void) { if (bfs_onig_status != ONIG_NORMAL) { bfs_onig_enc = NULL; } + + // Compute the GNU extensions + OnigSyntaxType *ere = ONIG_SYNTAX_POSIX_EXTENDED; + OnigSyntaxType *gnu = ONIG_SYNTAX_GNU_REGEX; + unsigned int gnu_op = gnu->op & ~ere->op; + unsigned int gnu_op2 = gnu->op2 & ~ere->op2; + unsigned int gnu_behavior = gnu->behavior & ~ere->behavior; + + onig_copy_syntax(&bfs_onig_syntax_awk, ONIG_SYNTAX_POSIX_EXTENDED); + bfs_onig_syntax_awk.behavior |= ONIG_SYN_ALLOW_INVALID_INTERVAL; + bfs_onig_syntax_awk.behavior |= ONIG_SYN_BACKSLASH_ESCAPE_IN_CC; + + onig_copy_syntax(&bfs_onig_syntax_gnu_awk, &bfs_onig_syntax_awk); + bfs_onig_syntax_gnu_awk.op |= gnu_op; + bfs_onig_syntax_gnu_awk.op2 |= gnu_op2; + bfs_onig_syntax_gnu_awk.behavior |= gnu_behavior; + bfs_onig_syntax_gnu_awk.behavior &= ~ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS; + bfs_onig_syntax_gnu_awk.behavior &= ~ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS; + + // https://github.com/kkos/oniguruma/issues/296 + onig_copy_syntax(&bfs_onig_syntax_emacs, ONIG_SYNTAX_EMACS); + bfs_onig_syntax_emacs.op2 |= ONIG_SYN_OP2_QMARK_GROUP_EFFECT; + + onig_copy_syntax(&bfs_onig_syntax_egrep, ONIG_SYNTAX_POSIX_EXTENDED); + bfs_onig_syntax_egrep.behavior |= ONIG_SYN_ALLOW_INVALID_INTERVAL; + bfs_onig_syntax_egrep.behavior &= ~ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS; + + onig_copy_syntax(&bfs_onig_syntax_gnu_find, &bfs_onig_syntax_emacs); + bfs_onig_syntax_gnu_find.options |= ONIG_OPTION_MULTILINE; } /** Initialize Oniguruma. */ static int bfs_onig_initialize(OnigEncoding *enc) { static pthread_once_t once = PTHREAD_ONCE_INIT; - call_once(&once, bfs_onig_once); + invoke_once(&once, bfs_onig_once); *enc = bfs_onig_enc; return bfs_onig_status; @@ -120,7 +158,7 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ return -1; } -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA // onig_error_code_to_str() says // // don't call this after the pattern argument of onig_new() is freed @@ -142,12 +180,24 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ case BFS_REGEX_POSIX_EXTENDED: syntax = ONIG_SYNTAX_POSIX_EXTENDED; break; + case BFS_REGEX_AWK: + syntax = &bfs_onig_syntax_awk; + break; + case BFS_REGEX_GNU_AWK: + syntax = &bfs_onig_syntax_gnu_awk; + break; case BFS_REGEX_EMACS: - syntax = ONIG_SYNTAX_EMACS; + syntax = &bfs_onig_syntax_emacs; break; case BFS_REGEX_GREP: syntax = ONIG_SYNTAX_GREP; break; + case BFS_REGEX_EGREP: + syntax = &bfs_onig_syntax_egrep; + break; + case BFS_REGEX_GNU_FIND: + syntax = &bfs_onig_syntax_gnu_find; + break; } bfs_assert(syntax, "Invalid regex type"); @@ -203,7 +253,7 @@ fail: int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) { size_t len = strlen(str); -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA const unsigned char *ustr = (const unsigned char *)str; const unsigned char *end = ustr + len; @@ -262,7 +312,7 @@ int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags void bfs_regfree(struct bfs_regex *regex) { if (regex) { -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA onig_free(regex->impl); free(regex->pattern); #else @@ -274,10 +324,10 @@ void bfs_regfree(struct bfs_regex *regex) { char *bfs_regerror(const struct bfs_regex *regex) { if (!regex) { - return strdup(strerror(ENOMEM)); + return strdup(xstrerror(ENOMEM)); } -#if BFS_USE_ONIGURUMA +#if BFS_WITH_ONIGURUMA unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN); if (str) { onig_error_code_to_str(str, regex->err, ®ex->einfo); diff --git a/src/xregex.h b/src/xregex.h index 998a2b0..c4504ee 100644 --- a/src/xregex.h +++ b/src/xregex.h @@ -15,8 +15,12 @@ struct bfs_regex; enum bfs_regex_type { BFS_REGEX_POSIX_BASIC, BFS_REGEX_POSIX_EXTENDED, + BFS_REGEX_AWK, + BFS_REGEX_GNU_AWK, BFS_REGEX_EMACS, BFS_REGEX_GREP, + BFS_REGEX_EGREP, + BFS_REGEX_GNU_FIND, }; /** @@ -38,13 +42,13 @@ enum bfs_regexec_flags { /** * Wrapper for regcomp() that supports additional regex types. * - * @param[out] preg + * @preg[out] * Will hold the compiled regex. - * @param pattern + * @pattern * The regular expression to compile. - * @param type + * @type * The regular expression syntax to use. - * @param flags + * @flags * Regex compilation flags. * @return * 0 on success, -1 on failure. @@ -54,11 +58,11 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ /** * Wrapper for regexec(). * - * @param regex + * @regex * The regular expression to execute. - * @param str + * @str * The string to match against. - * @param flags + * @flags * Regex execution flags. * @return * 1 for a match, 0 for no match, -1 on failure. @@ -73,7 +77,7 @@ void bfs_regfree(struct bfs_regex *regex); /** * Get a human-readable regex error message. * - * @param regex + * @regex * The compiled regex. * @return * A human-readable description of the error, which should be free()'d. diff --git a/src/xspawn.c b/src/xspawn.c index 2cabdcc..3fa4e60 100644 --- a/src/xspawn.c +++ b/src/xspawn.c @@ -2,27 +2,36 @@ // SPDX-License-Identifier: 0BSD #include "xspawn.h" + #include "alloc.h" +#include "bfs.h" #include "bfstd.h" -#include "config.h" +#include "diag.h" #include "list.h" +#include "sighook.h" + #include <errno.h> #include <fcntl.h> +#include <signal.h> #include <stdlib.h> #include <string.h> #include <sys/resource.h> #include <sys/types.h> -#include <sys/wait.h> #include <unistd.h> -#if BFS_USE_PATHS_H +#if __has_include(<paths.h>) # include <paths.h> #endif +#if BFS_POSIX_SPAWN >= 0 +# include <spawn.h> +#endif + /** * Types of spawn actions. */ enum bfs_spawn_op { + BFS_SPAWN_OPEN, BFS_SPAWN_CLOSE, BFS_SPAWN_DUP2, BFS_SPAWN_FCHDIR, @@ -33,115 +42,509 @@ enum bfs_spawn_op { * A spawn action. */ struct bfs_spawn_action { + /** The next action in the list. */ struct bfs_spawn_action *next; + /** This action's operation. */ enum bfs_spawn_op op; + /** The input fd (or -1). */ int in_fd; + /** The output fd (or -1). */ int out_fd; - int resource; - struct rlimit rlimit; + + /** Operation-specific args. */ + union { + /** BFS_SPAWN_OPEN args. */ + struct { + const char *path; + int flags; + mode_t mode; + }; + + /** BFS_SPAWN_SETRLIMIT args. */ + struct { + int resource; + struct rlimit rlimit; + }; + }; }; int bfs_spawn_init(struct bfs_spawn *ctx) { ctx->flags = 0; SLIST_INIT(ctx); + +#if BFS_POSIX_SPAWN >= 0 + if (sysoption(SPAWN) > 0) { + ctx->flags |= BFS_SPAWN_USE_POSIX; + + errno = posix_spawn_file_actions_init(&ctx->actions); + if (errno != 0) { + return -1; + } + + errno = posix_spawnattr_init(&ctx->attr); + if (errno != 0) { + posix_spawn_file_actions_destroy(&ctx->actions); + return -1; + } + } +#endif + return 0; } +/** + * Clear the BFS_SPAWN_USE_POSIX flag and free the attributes. + */ +static void bfs_spawn_clear_posix(struct bfs_spawn *ctx) { + if (ctx->flags & BFS_SPAWN_USE_POSIX) { + ctx->flags &= ~BFS_SPAWN_USE_POSIX; + +#if BFS_POSIX_SPAWN >= 0 + posix_spawnattr_destroy(&ctx->attr); + posix_spawn_file_actions_destroy(&ctx->actions); +#endif + } +} + int bfs_spawn_destroy(struct bfs_spawn *ctx) { - while (ctx->head) { - free(SLIST_POP(ctx)); + bfs_spawn_clear_posix(ctx); + + for_slist (struct bfs_spawn_action, action, ctx) { + free(action); } return 0; } -int bfs_spawn_setflags(struct bfs_spawn *ctx, enum bfs_spawn_flags flags) { - ctx->flags = flags; +#if BFS_POSIX_SPAWN >= 0 +/** Set some posix_spawnattr flags. */ +_maybe_unused +static int bfs_spawn_addflags(struct bfs_spawn *ctx, short flags) { + short prev; + errno = posix_spawnattr_getflags(&ctx->attr, &prev); + if (errno != 0) { + return -1; + } + + short next = prev | flags; + if (next != prev) { + errno = posix_spawnattr_setflags(&ctx->attr, next); + if (errno != 0) { + return -1; + } + } + return 0; } +#endif -/** Add a spawn action to the chain. */ -static struct bfs_spawn_action *bfs_spawn_add(struct bfs_spawn *ctx, enum bfs_spawn_op op) { +/** Allocate a spawn action. */ +static struct bfs_spawn_action *bfs_spawn_action(enum bfs_spawn_op op) { struct bfs_spawn_action *action = ALLOC(struct bfs_spawn_action); if (!action) { return NULL; } - action->next = NULL; + SLIST_ITEM_INIT(action); action->op = op; action->in_fd = -1; action->out_fd = -1; + return action; +} +int bfs_spawn_addopen(struct bfs_spawn *ctx, int fd, const char *path, int flags, mode_t mode) { + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_OPEN); + if (!action) { + return -1; + } + +#if BFS_POSIX_SPAWN >= 0 + if (ctx->flags & BFS_SPAWN_USE_POSIX) { + errno = posix_spawn_file_actions_addopen(&ctx->actions, fd, path, flags, mode); + if (errno != 0) { + free(action); + return -1; + } + } +#endif + + action->out_fd = fd; + action->path = path; + action->flags = flags; + action->mode = mode; SLIST_APPEND(ctx, action); - return action; + return 0; } int bfs_spawn_addclose(struct bfs_spawn *ctx, int fd) { - if (fd < 0) { - errno = EBADF; + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_CLOSE); + if (!action) { return -1; } - struct bfs_spawn_action *action = bfs_spawn_add(ctx, BFS_SPAWN_CLOSE); - if (action) { - action->out_fd = fd; - return 0; - } else { - return -1; +#if BFS_POSIX_SPAWN >= 0 + if (ctx->flags & BFS_SPAWN_USE_POSIX) { + errno = posix_spawn_file_actions_addclose(&ctx->actions, fd); + if (errno != 0) { + free(action); + return -1; + } } +#endif + + action->out_fd = fd; + SLIST_APPEND(ctx, action); + return 0; } int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd) { - if (oldfd < 0 || newfd < 0) { - errno = EBADF; + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_DUP2); + if (!action) { return -1; } - struct bfs_spawn_action *action = bfs_spawn_add(ctx, BFS_SPAWN_DUP2); - if (action) { - action->in_fd = oldfd; - action->out_fd = newfd; - return 0; - } else { - return -1; +#if BFS_POSIX_SPAWN >= 0 + if (ctx->flags & BFS_SPAWN_USE_POSIX) { + errno = posix_spawn_file_actions_adddup2(&ctx->actions, oldfd, newfd); + if (errno != 0) { + free(action); + return -1; + } } +#endif + + action->in_fd = oldfd; + action->out_fd = newfd; + SLIST_APPEND(ctx, action); + return 0; } +/** + * https://www.austingroupbugs.net/view.php?id=1208#c4830 says: + * + * ... a search of the directories passed as the environment variable + * PATH ..., using the working directory of the child process after all + * file_actions have been performed. + * + * but macOS and NetBSD resolve the PATH *before* file_actions (because there + * posix_spawn() is its own syscall). + */ +#define BFS_POSIX_SPAWNP_AFTER_FCHDIR !(__APPLE__ || __NetBSD__) + int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd) { - if (fd < 0) { - errno = EBADF; + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_FCHDIR); + if (!action) { return -1; } - struct bfs_spawn_action *action = bfs_spawn_add(ctx, BFS_SPAWN_FCHDIR); - if (action) { - action->in_fd = fd; - return 0; +#if BFS_HAS_POSIX_SPAWN_ADDFCHDIR +# define BFS_POSIX_SPAWN_ADDFCHDIR posix_spawn_file_actions_addfchdir +#elif BFS_HAS_POSIX_SPAWN_ADDFCHDIR_NP +# define BFS_POSIX_SPAWN_ADDFCHDIR posix_spawn_file_actions_addfchdir_np +#endif + +#if BFS_POSIX_SPAWN >= 0 && defined(BFS_POSIX_SPAWN_ADDFCHDIR) + if (ctx->flags & BFS_SPAWN_USE_POSIX) { + errno = BFS_POSIX_SPAWN_ADDFCHDIR(&ctx->actions, fd); + if (errno != 0) { + free(action); + return -1; + } + } +#else + bfs_spawn_clear_posix(ctx); +#endif + + action->in_fd = fd; + SLIST_APPEND(ctx, action); + return 0; +} + +int bfs_spawn_setrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl) { + struct bfs_spawn_action *action = bfs_spawn_action(BFS_SPAWN_SETRLIMIT); + if (!action) { + goto fail; + } + +#ifdef POSIX_SPAWN_SETRLIMIT + if (bfs_spawn_addflags(ctx, POSIX_SPAWN_SETRLIMIT) != 0) { + goto fail; + } + + errno = posix_spawnattr_setrlimit(&ctx->attr, resource, rl); + if (errno != 0) { + goto fail; + } +#else + bfs_spawn_clear_posix(ctx); +#endif + + action->resource = resource; + action->rlimit = *rl; + SLIST_APPEND(ctx, action); + return 0; + +fail: + free(action); + return -1; +} + +/** + * Context for resolving executables in the $PATH. + */ +struct bfs_resolver { + /** The executable to spawn. */ + const char *exe; + /** The $PATH to resolve in. */ + char *path; + /** A buffer to hold the resolved path. */ + char *buf; + /** The size of the buffer. */ + size_t len; + /** Whether the executable is already resolved. */ + bool done; + /** Whether to free(path). */ + bool free; +}; + +/** Free a $PATH resolution context. */ +static void bfs_resolve_free(struct bfs_resolver *res) { + if (res->free) { + free(res->path); + } + free(res->buf); +} + +/** Get the next component in the $PATH. */ +static bool bfs_resolve_next(const char **path, const char **next, size_t *len) { + *path = *next; + if (!*path) { + return false; + } + + *next = strchr(*path, ':'); + if (*next) { + *len = *next - *path; + ++*next; } else { - return -1; + *len = strlen(*path); } + + if (*len == 0) { + // POSIX 8.3: "A zero-length prefix is a legacy feature that + // indicates the current working directory." + *path = "."; + *len = 1; + } + + return true; } -int bfs_spawn_addsetrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl) { - struct bfs_spawn_action *action = bfs_spawn_add(ctx, BFS_SPAWN_SETRLIMIT); - if (action) { - action->resource = resource; - action->rlimit = *rl; +/** Finish resolving an executable, potentially from the child process. */ +static int bfs_resolve_late(struct bfs_resolver *res) { + if (res->done) { return 0; + } + + char *buf = res->buf; + char *end = buf + res->len; + + const char *path; + const char *next = res->path; + size_t len; + while (bfs_resolve_next(&path, &next, &len)) { + char *cur = xstpencpy(buf, end, path, len); + cur = xstpecpy(cur, end, "/"); + cur = xstpecpy(cur, end, res->exe); + if (cur == end) { + bfs_bug("PATH resolution buffer too small"); + errno = ENOMEM; + return -1; + } + + if (xfaccessat(AT_FDCWD, buf, X_OK) == 0) { + res->exe = buf; + res->done = true; + return 0; + } + } + + errno = ENOENT; + return -1; +} + +/** Check if we can skip path resolution entirely. */ +static bool bfs_can_skip_resolve(const struct bfs_resolver *res, const struct bfs_spawn *ctx) { + if (ctx && !(ctx->flags & BFS_SPAWN_USE_PATH)) { + return true; + } + + if (strchr(res->exe, '/')) { + return true; + } + + return false; +} + +/** Check if any $PATH components are relative. */ +static bool bfs_resolve_relative(const struct bfs_resolver *res) { + const char *path; + const char *next = res->path; + size_t len; + while (bfs_resolve_next(&path, &next, &len)) { + if (path[0] != '/') { + return true; + } + } + + return false; +} + +/** Check if we can resolve the executable before file actions. */ +static bool bfs_can_resolve_early(const struct bfs_resolver *res, const struct bfs_spawn *ctx) { + if (!bfs_resolve_relative(res)) { + return true; + } + + if (ctx) { + for_slist (const struct bfs_spawn_action, action, ctx) { + if (action->op == BFS_SPAWN_FCHDIR) { + return false; + } + } + } + + return true; +} + +/** Get the required path resolution buffer size. */ +static size_t bfs_resolve_capacity(const struct bfs_resolver *res) { + size_t max = 0; + + const char *path; + const char *next = res->path; + size_t len; + while (bfs_resolve_next(&path, &next, &len)) { + if (len > max) { + max = len; + } + } + + // path + "/" + exe + '\0' + return max + 1 + strlen(res->exe) + 1; +} + +/** Begin resolving an executable, from the parent process. */ +static int bfs_resolve_early(struct bfs_resolver *res, const char *exe, const struct bfs_spawn *ctx) { + *res = (struct bfs_resolver) { + .exe = exe, + }; + + if (bfs_can_skip_resolve(res, ctx)) { + // Do this check eagerly, even though posix_spawn()/execv() also + // would, because: + // + // - faccessat() is faster than fork()/clone() + execv() + // - posix_spawn() is not guaranteed to report ENOENT + if (xfaccessat(AT_FDCWD, exe, X_OK) == 0) { + res->done = true; + return 0; + } else { + return -1; + } + } + + res->path = getenv("PATH"); + if (!res->path) { +#if defined(_CS_PATH) + res->path = xconfstr(_CS_PATH); + res->free = true; +#elif defined(_PATH_DEFPATH) + res->path = _PATH_DEFPATH; +#else + errno = ENOENT; +#endif + } + if (!res->path) { + goto fail; + } + + bool can_finish = bfs_can_resolve_early(res, ctx); + +#if BFS_POSIX_SPAWNP_AFTER_FCHDIR + bool use_posix = ctx && (ctx->flags & BFS_SPAWN_USE_POSIX); + if (!can_finish && use_posix) { + // posix_spawnp() will do the resolution, so don't bother + // allocating a buffer + return 0; + } +#endif + + res->len = bfs_resolve_capacity(res); + res->buf = malloc(res->len); + if (!res->buf) { + goto fail; + } + + if (can_finish && bfs_resolve_late(res) != 0) { + goto fail; + } + + return 0; + +fail: + bfs_resolve_free(res); + return -1; +} + +#if BFS_POSIX_SPAWN >= 0 + +/** bfs_spawn() implementation using posix_spawn(). */ +static pid_t bfs_posix_spawn(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) { + pid_t ret; + + if (res->done) { + errno = posix_spawn(&ret, res->exe, &ctx->actions, &ctx->attr, argv, envp); } else { + errno = posix_spawnp(&ret, res->exe, &ctx->actions, &ctx->attr, argv, envp); + } + + if (errno != 0) { return -1; } + + return ret; } +/** Check if we can use posix_spawn(). */ +static bool bfs_use_posix_spawn(const struct bfs_resolver *res, const struct bfs_spawn *ctx) { + if (!(ctx->flags & BFS_SPAWN_USE_POSIX)) { + return false; + } + +#if !BFS_POSIX_SPAWNP_AFTER_FCHDIR + if (!res->done) { + return false; + } +#endif + + return true; +} + +#endif // BFS_POSIX_SPAWN >= 0 + /** Actually exec() the new process. */ -static void bfs_spawn_exec(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp, int pipefd[2]) { +_noreturn +static void bfs_spawn_exec(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp, const sigset_t *mask, int pipefd[2]) { xclose(pipefd[0]); - for (const struct bfs_spawn_action *action = ctx ? ctx->head : NULL; action; action = action->next) { + for_slist (const struct bfs_spawn_action, action, ctx) { + int fd; + // Move the error-reporting pipe out of the way if necessary... if (action->out_fd == pipefd[1]) { - int fd = dup_cloexec(pipefd[1]); + fd = dup_cloexec(pipefd[1]); if (fd < 0) { goto fail; } @@ -156,6 +559,17 @@ static void bfs_spawn_exec(const char *exe, const struct bfs_spawn *ctx, char ** } switch (action->op) { + case BFS_SPAWN_OPEN: + fd = open(action->path, action->flags, action->mode); + if (fd < 0) { + goto fail; + } + if (fd != action->out_fd) { + if (dup2(fd, action->out_fd) < 0) { + goto fail; + } + } + break; case BFS_SPAWN_CLOSE: if (close(action->out_fd) != 0) { goto fail; @@ -179,137 +593,140 @@ static void bfs_spawn_exec(const char *exe, const struct bfs_spawn *ctx, char ** } } - execve(exe, argv, envp); + if (bfs_resolve_late(res) != 0) { + goto fail; + } - int error; -fail: - error = errno; + // Reset signal handlers to their original values before we unblock + // signals, so that handlers don't run in both the parent and the child + if (sigreset() != 0) { + goto fail; + } + + // Restore the original signal mask for the child process + errno = pthread_sigmask(SIG_SETMASK, mask, NULL); + if (errno != 0) { + goto fail; + } + + execve(res->exe, argv, envp); + +fail:; + int error = errno; // In case of a write error, the parent will still see that we exited // unsuccessfully, but won't know why - (void) xwrite(pipefd[1], &error, sizeof(error)); + (void)xwrite(pipefd[1], &error, sizeof(error)); xclose(pipefd[1]); _Exit(127); } -pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp) { - extern char **environ; - if (!envp) { - envp = environ; - } - - enum bfs_spawn_flags flags = ctx ? ctx->flags : 0; - char *resolved = NULL; - if (flags & BFS_SPAWN_USEPATH) { - exe = resolved = bfs_spawn_resolve(exe); - if (!resolved) { - return -1; - } - } - +/** bfs_spawn() implementation using fork()/exec(). */ +static pid_t bfs_fork_spawn(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) { // Use a pipe to report errors from the child int pipefd[2]; if (pipe_cloexec(pipefd) != 0) { - free(resolved); return -1; } + // Block signals before fork() so handlers don't run in the child + sigset_t new_mask; + if (sigfillset(&new_mask) != 0) { + goto fail; + } + sigset_t old_mask; + errno = pthread_sigmask(SIG_BLOCK, &new_mask, &old_mask); + if (errno != 0) { + goto fail; + } + +#if BFS_HAS__FORK + pid_t pid = _Fork(); +#else pid_t pid = fork(); - if (pid < 0) { - close_quietly(pipefd[1]); - close_quietly(pipefd[0]); - free(resolved); - return -1; - } else if (pid == 0) { +#endif + if (pid == 0) { // Child - bfs_spawn_exec(exe, ctx, argv, envp, pipefd); + bfs_spawn_exec(res, ctx, argv, envp, &old_mask, pipefd); + } + + // Restore the original signal mask + errno = pthread_sigmask(SIG_SETMASK, &old_mask, NULL); + bfs_everify(errno == 0, "pthread_sigmask()"); + + if (pid < 0) { + // fork() failed + goto fail; } - // Parent xclose(pipefd[1]); - free(resolved); int error; ssize_t nbytes = xread(pipefd[0], &error, sizeof(error)); xclose(pipefd[0]); if (nbytes == sizeof(error)) { - int wstatus; - waitpid(pid, &wstatus, 0); + xwaitpid(pid, NULL, 0); errno = error; return -1; } return pid; + +fail: + close_quietly(pipefd[1]); + close_quietly(pipefd[0]); + return -1; } -char *bfs_spawn_resolve(const char *exe) { - if (strchr(exe, '/')) { - return strdup(exe); +/** Call the right bfs_spawn() implementation. */ +static pid_t bfs_spawn_impl(struct bfs_resolver *res, const struct bfs_spawn *ctx, char **argv, char **envp) { +#if BFS_POSIX_SPAWN >= 0 + if (bfs_use_posix_spawn(res, ctx)) { + return bfs_posix_spawn(res, ctx, argv, envp); } - - const char *path = getenv("PATH"); - - char *confpath = NULL; - if (!path) { -#if defined(_CS_PATH) - path = confpath = xconfstr(_CS_PATH); -#elif defined(_PATH_DEFPATH) - path = _PATH_DEFPATH; -#else - errno = ENOENT; #endif - } - if (!path) { - return NULL; - } - size_t cap = 0; - char *ret = NULL; - while (true) { - const char *end = strchr(path, ':'); - size_t len = end ? (size_t)(end - path) : strlen(path); - - // POSIX 8.3: "A zero-length prefix is a legacy feature that - // indicates the current working directory." - if (len == 0) { - path = "."; - len = 1; - } + return bfs_fork_spawn(res, ctx, argv, envp); +} - size_t total = len + 1 + strlen(exe) + 1; - if (cap < total) { - char *grown = realloc(ret, total); - if (!grown) { - goto fail; - } - ret = grown; - cap = total; - } +pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp) { + // execvp()/posix_spawnp() are typically implemented with repeated + // execv() calls for each $PATH component until one succeeds. It's + // faster to resolve the full path ahead of time. + struct bfs_resolver res; + if (bfs_resolve_early(&res, exe, ctx) != 0) { + return -1; + } - memcpy(ret, path, len); - if (ret[len - 1] != '/') { - ret[len++] = '/'; - } - strcpy(ret + len, exe); + extern char **environ; + if (!envp) { + envp = environ; + } - if (xfaccessat(AT_FDCWD, ret, X_OK) == 0) { - break; - } + pid_t ret = bfs_spawn_impl(&res, ctx, argv, envp); + bfs_resolve_free(&res); + return ret; +} - if (!end) { - errno = ENOENT; - goto fail; - } +char *bfs_spawn_resolve(const char *exe) { + struct bfs_resolver res; + if (bfs_resolve_early(&res, exe, NULL) != 0) { + return NULL; + } + if (bfs_resolve_late(&res) != 0) { + bfs_resolve_free(&res); + return NULL; + } - path = end + 1; + char *ret; + if (res.exe == res.buf) { + ret = res.buf; + res.buf = NULL; + } else { + ret = strdup(res.exe); } - free(confpath); + bfs_resolve_free(&res); return ret; - -fail: - free(confpath); - free(ret); - return NULL; } diff --git a/src/xspawn.h b/src/xspawn.h index d9b4a2e..3c74ccd 100644 --- a/src/xspawn.h +++ b/src/xspawn.h @@ -10,83 +10,112 @@ #include <sys/resource.h> #include <sys/types.h> +#include <unistd.h> + +#ifdef _POSIX_SPAWN +# define BFS_POSIX_SPAWN _POSIX_SPAWN +#else +# define BFS_POSIX_SPAWN (-1) +#endif + +#if BFS_POSIX_SPAWN >= 0 +# include <spawn.h> +#endif /** * bfs_spawn() flags. */ enum bfs_spawn_flags { /** Use the PATH variable to resolve the executable (like execvp()). */ - BFS_SPAWN_USEPATH = 1 << 0, + BFS_SPAWN_USE_PATH = 1 << 0, + /** Whether posix_spawn() can be used. */ + BFS_SPAWN_USE_POSIX = 1 << 1, }; /** * bfs_spawn() attributes, controlling the context of the new process. */ struct bfs_spawn { + /** Spawn flags. */ enum bfs_spawn_flags flags; + + /** Linked list of actions. */ struct bfs_spawn_action *head; struct bfs_spawn_action **tail; + +#if BFS_POSIX_SPAWN >= 0 + /** posix_spawn() context, for when we can use it. */ + posix_spawn_file_actions_t actions; + posix_spawnattr_t attr; +#endif }; /** * Create a new bfs_spawn() context. * - * @return 0 on success, -1 on failure. + * @return + * 0 on success, -1 on failure. */ int bfs_spawn_init(struct bfs_spawn *ctx); /** * Destroy a bfs_spawn() context. * - * @return 0 on success, -1 on failure. + * @return + * 0 on success, -1 on failure. */ int bfs_spawn_destroy(struct bfs_spawn *ctx); /** - * Set the flags for a bfs_spawn() context. + * Add an open() action to a bfs_spawn() context. * - * @return 0 on success, -1 on failure. + * @return + * 0 on success, -1 on failure. */ -int bfs_spawn_setflags(struct bfs_spawn *ctx, enum bfs_spawn_flags flags); +int bfs_spawn_addopen(struct bfs_spawn *ctx, int fd, const char *path, int flags, mode_t mode); /** * Add a close() action to a bfs_spawn() context. * - * @return 0 on success, -1 on failure. + * @return + * 0 on success, -1 on failure. */ int bfs_spawn_addclose(struct bfs_spawn *ctx, int fd); /** * Add a dup2() action to a bfs_spawn() context. * - * @return 0 on success, -1 on failure. + * @return + * 0 on success, -1 on failure. */ int bfs_spawn_adddup2(struct bfs_spawn *ctx, int oldfd, int newfd); /** * Add an fchdir() action to a bfs_spawn() context. * - * @return 0 on success, -1 on failure. + * @return + * 0 on success, -1 on failure. */ int bfs_spawn_addfchdir(struct bfs_spawn *ctx, int fd); /** - * Add a setrlimit() action to a bfs_spawn() context. + * Apply setrlimit() to a bfs_spawn() context. * - * @return 0 on success, -1 on failure. + * @return + * 0 on success, -1 on failure. */ -int bfs_spawn_addsetrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl); +int bfs_spawn_setrlimit(struct bfs_spawn *ctx, int resource, const struct rlimit *rl); /** * Spawn a new process. * - * @param exe + * @exe * The executable to run. - * @param ctx + * @ctx * The context for the new process. - * @param argv + * @argv * The arguments for the new process. - * @param envp + * @envp * The environment variables for the new process (NULL for the current * environment). * @return @@ -95,10 +124,10 @@ int bfs_spawn_addsetrlimit(struct bfs_spawn *ctx, int resource, const struct rli pid_t bfs_spawn(const char *exe, const struct bfs_spawn *ctx, char **argv, char **envp); /** - * Look up an executable in the current PATH, as BFS_SPAWN_USEPATH or execvp() + * Look up an executable in the current PATH, as BFS_SPAWN_USE_PATH or execvp() * would do. * - * @param exe + * @exe * The name of the binary to execute. Bare names without a '/' will be * searched on the provided PATH. * @return diff --git a/src/xtime.c b/src/xtime.c index 79dafad..6b8a141 100644 --- a/src/xtime.c +++ b/src/xtime.c @@ -2,55 +2,54 @@ // SPDX-License-Identifier: 0BSD #include "xtime.h" -#include "atomic.h" -#include "config.h" + +#include "alloc.h" +#include "bfs.h" +#include "bfstd.h" +#include "diag.h" +#include "sanity.h" + #include <errno.h> #include <limits.h> -#include <stdlib.h> #include <sys/time.h> #include <time.h> #include <unistd.h> -/** Call tzset() if necessary. */ -static void xtzset(void) { - static atomic bool is_set = false; +int xmktime(struct tm *tm, time_t *timep) { + time_t time = mktime(tm); - if (!load(&is_set, relaxed)) { - tzset(); - store(&is_set, true, relaxed); - } -} + if (time == -1) { + int error = errno; -int xlocaltime(const time_t *timep, struct tm *result) { - // Should be called before localtime_r() according to POSIX.1-2004 - xtzset(); + struct tm tmp; + if (!localtime_r(&time, &tmp)) { + bfs_ebug("localtime_r(-1)"); + return -1; + } - if (localtime_r(timep, result)) { - return 0; - } else { - return -1; + if (tm->tm_year != tmp.tm_year || tm->tm_yday != tmp.tm_yday + || tm->tm_hour != tmp.tm_hour || tm->tm_min != tmp.tm_min || tm->tm_sec != tmp.tm_sec) { + errno = error; + return -1; + } } -} -int xgmtime(const time_t *timep, struct tm *result) { - // Should be called before gmtime_r() according to POSIX.1-2004 - xtzset(); - - if (gmtime_r(timep, result)) { - return 0; - } else { - return -1; - } + *timep = time; + return 0; } -int xmktime(struct tm *tm, time_t *timep) { - *timep = mktime(tm); +// FreeBSD is missing an interceptor +#if BFS_HAS_TIMEGM && !(__FreeBSD__ && __SANITIZE_MEMORY__) - if (*timep == -1) { +int xtimegm(struct tm *tm, time_t *timep) { + time_t time = timegm(tm); + + if (time == -1) { int error = errno; struct tm tmp; - if (xlocaltime(timep, &tmp) != 0) { + if (!gmtime_r(&time, &tmp)) { + bfs_ebug("gmtime_r(-1)"); return -1; } @@ -61,9 +60,12 @@ int xmktime(struct tm *tm, time_t *timep) { } } + *timep = time; return 0; } +#else + static int safe_add(int *value, int delta) { if (*value >= 0) { if (delta > INT_MAX - *value) { @@ -81,7 +83,7 @@ static int safe_add(int *value, int delta) { static int floor_div(int n, int d) { int a = n < 0; - return (n + a)/d - a; + return (n + a) / d - a; } static int wrap(int *value, int max, int *next) { @@ -93,80 +95,84 @@ static int wrap(int *value, int max, int *next) { static int month_length(int year, int month) { static const int month_lengths[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; int ret = month_lengths[month]; - if (month == 1 && year%4 == 0 && (year%100 != 0 || (year + 300)%400 == 0)) { + if (month == 1 && year % 4 == 0 && (year % 100 != 0 || (year + 300) % 400 == 0)) { ++ret; } return ret; } int xtimegm(struct tm *tm, time_t *timep) { - tm->tm_isdst = 0; + struct tm copy = *tm; + copy.tm_isdst = 0; - if (wrap(&tm->tm_sec, 60, &tm->tm_min) != 0) { + if (wrap(©.tm_sec, 60, ©.tm_min) != 0) { goto overflow; } - if (wrap(&tm->tm_min, 60, &tm->tm_hour) != 0) { + if (wrap(©.tm_min, 60, ©.tm_hour) != 0) { goto overflow; } - if (wrap(&tm->tm_hour, 24, &tm->tm_mday) != 0) { + if (wrap(©.tm_hour, 24, ©.tm_mday) != 0) { goto overflow; } // In order to wrap the days of the month, we first need to know what // month it is - if (wrap(&tm->tm_mon, 12, &tm->tm_year) != 0) { + if (wrap(©.tm_mon, 12, ©.tm_year) != 0) { goto overflow; } - if (tm->tm_mday < 1) { + if (copy.tm_mday < 1) { do { - --tm->tm_mon; - if (wrap(&tm->tm_mon, 12, &tm->tm_year) != 0) { + --copy.tm_mon; + if (wrap(©.tm_mon, 12, ©.tm_year) != 0) { goto overflow; } - tm->tm_mday += month_length(tm->tm_year, tm->tm_mon); - } while (tm->tm_mday < 1); + copy.tm_mday += month_length(copy.tm_year, copy.tm_mon); + } while (copy.tm_mday < 1); } else { while (true) { - int days = month_length(tm->tm_year, tm->tm_mon); - if (tm->tm_mday <= days) { + int days = month_length(copy.tm_year, copy.tm_mon); + if (copy.tm_mday <= days) { break; } - tm->tm_mday -= days; - ++tm->tm_mon; - if (wrap(&tm->tm_mon, 12, &tm->tm_year) != 0) { + copy.tm_mday -= days; + ++copy.tm_mon; + if (wrap(©.tm_mon, 12, ©.tm_year) != 0) { goto overflow; } } } - tm->tm_yday = 0; - for (int i = 0; i < tm->tm_mon; ++i) { - tm->tm_yday += month_length(tm->tm_year, i); + copy.tm_yday = 0; + for (int i = 0; i < copy.tm_mon; ++i) { + copy.tm_yday += month_length(copy.tm_year, i); } - tm->tm_yday += tm->tm_mday - 1; + copy.tm_yday += copy.tm_mday - 1; int leap_days; // Compute floor((year - 69)/4) - floor((year - 1)/100) + floor((year + 299)/400) without overflows - if (tm->tm_year >= 0) { - leap_days = floor_div(tm->tm_year - 69, 4) - floor_div(tm->tm_year - 1, 100) + floor_div(tm->tm_year - 101, 400) + 1; + if (copy.tm_year >= 0) { + leap_days = floor_div(copy.tm_year - 69, 4) - floor_div(copy.tm_year - 1, 100) + floor_div(copy.tm_year - 101, 400) + 1; } else { - leap_days = floor_div(tm->tm_year + 3, 4) - floor_div(tm->tm_year + 99, 100) + floor_div(tm->tm_year + 299, 400) - 17; + leap_days = floor_div(copy.tm_year + 3, 4) - floor_div(copy.tm_year + 99, 100) + floor_div(copy.tm_year + 299, 400) - 17; } - long long epoch_days = 365LL*(tm->tm_year - 70) + leap_days + tm->tm_yday; - tm->tm_wday = (epoch_days + 4)%7; - if (tm->tm_wday < 0) { - tm->tm_wday += 7; + long long epoch_days = 365LL * (copy.tm_year - 70) + leap_days + copy.tm_yday; + copy.tm_wday = (epoch_days + 4) % 7; + if (copy.tm_wday < 0) { + copy.tm_wday += 7; } - long long epoch_time = tm->tm_sec + 60*(tm->tm_min + 60*(tm->tm_hour + 24*epoch_days)); - *timep = (time_t)epoch_time; - if ((long long)*timep != epoch_time) { + long long epoch_time = copy.tm_sec + 60 * (copy.tm_min + 60 * (copy.tm_hour + 24 * epoch_days)); + time_t time = (time_t)epoch_time; + if ((long long)time != epoch_time) { goto overflow; } + + *tm = copy; + *timep = time; return 0; overflow: @@ -174,23 +180,52 @@ overflow: return -1; } +#endif // !BFS_HAS_TIMEGM + +/** Parse a decimal digit. */ +static int xgetdigit(char c) { + int ret = c - '0'; + if (ret < 0 || ret > 9) { + return -1; + } else { + return ret; + } +} + /** Parse some digits from a timestamp. */ static int xgetpart(const char **str, size_t n, int *result) { - char buf[n + 1]; + *result = 0; + for (size_t i = 0; i < n; ++i, ++*str) { - char c = **str; - if (c < '0' || c > '9') { + int dig = xgetdigit(**str); + if (dig < 0) { return -1; } - buf[i] = c; + *result *= 10; + *result += dig; } - buf[n] = '\0'; - *result = atoi(buf); return 0; } int xgetdate(const char *str, struct timespec *result) { + // Handle @epochseconds + if (str[0] == '@') { + long long value; + if (xstrtoll(str + 1, NULL, 10, &value) != 0) { + goto error; + } + + time_t time = (time_t)value; + if ((long long)time != value) { + errno = ERANGE; + goto error; + } + + result->tv_sec = time; + goto done; + } + struct tm tm = { .tm_isdst = -1, }; @@ -239,6 +274,8 @@ int xgetdate(const char *str, struct timespec *result) { goto end; } else if (*str == ':') { ++str; + } else if (xgetdigit(*str) < 0) { + goto zone; } if (xgetpart(&str, 2, &tm.tm_min) != 0) { goto invalid; @@ -249,11 +286,14 @@ int xgetdate(const char *str, struct timespec *result) { goto end; } else if (*str == ':') { ++str; + } else if (xgetdigit(*str) < 0) { + goto zone; } if (xgetpart(&str, 2, &tm.tm_sec) != 0) { goto invalid; } +zone: if (!*str) { goto end; } else if (*str == 'Z') { @@ -296,14 +336,15 @@ end: goto error; } - int offset = 60*tz_hour + tz_min; + int offset = (tz_hour * 60 + tz_min) * 60; if (tz_negative) { - result->tv_sec -= offset; - } else { result->tv_sec += offset; + } else { + result->tv_sec -= offset; } } +done: result->tv_nsec = 0; return 0; @@ -313,16 +354,150 @@ error: return -1; } -int xgettime(struct timespec *result) { -#if _POSIX_TIMERS > 0 - return clock_gettime(CLOCK_REALTIME, result); +/** One nanosecond. */ +static const long NS = 1000L * 1000 * 1000; + +void timespec_add(struct timespec *lhs, const struct timespec *rhs) { + lhs->tv_sec += rhs->tv_sec; + lhs->tv_nsec += rhs->tv_nsec; + if (lhs->tv_nsec >= NS) { + lhs->tv_nsec -= NS; + lhs->tv_sec += 1; + } +} + +void timespec_sub(struct timespec *lhs, const struct timespec *rhs) { + lhs->tv_sec -= rhs->tv_sec; + lhs->tv_nsec -= rhs->tv_nsec; + if (lhs->tv_nsec < 0) { + lhs->tv_nsec += NS; + lhs->tv_sec -= 1; + } +} + +int timespec_cmp(const struct timespec *lhs, const struct timespec *rhs) { + if (lhs->tv_sec < rhs->tv_sec) { + return -1; + } else if (lhs->tv_sec > rhs->tv_sec) { + return 1; + } + + if (lhs->tv_nsec < rhs->tv_nsec) { + return -1; + } else if (lhs->tv_nsec > rhs->tv_nsec) { + return 1; + } + + return 0; +} + +void timespec_min(struct timespec *dest, const struct timespec *src) { + if (timespec_cmp(src, dest) < 0) { + *dest = *src; + } +} + +void timespec_max(struct timespec *dest, const struct timespec *src) { + if (timespec_cmp(src, dest) > 0) { + *dest = *src; + } +} + +double timespec_ns(const struct timespec *ts) { + return 1.0e9 * ts->tv_sec + ts->tv_nsec; +} + +#if defined(_POSIX_TIMERS) && BFS_HAS_TIMER_CREATE +# define BFS_POSIX_TIMERS _POSIX_TIMERS #else - struct timeval tv; - int ret = gettimeofday(&tv, NULL); - if (ret == 0) { - result->tv_sec = tv.tv_sec; - result->tv_nsec = tv.tv_usec * 1000L; +# define BFS_POSIX_TIMERS (-1) +#endif + +struct timer { +#if BFS_POSIX_TIMERS >= 0 + /** The POSIX timer. */ + timer_t timer; +#endif + /** Whether to use timer_create() or setitimer(). */ + bool legacy; +}; + +struct timer *xtimer_start(const struct timespec *interval) { + struct timer *timer = ALLOC(struct timer); + if (!timer) { + return NULL; } - return ret; + +#if BFS_POSIX_TIMERS >= 0 + if (sysoption(TIMERS)) { + clockid_t clock = CLOCK_REALTIME; + +#if defined(_POSIX_MONOTONIC_CLOCK) && _POSIX_MONOTONIC_CLOCK >= 0 + if (sysoption(MONOTONIC_CLOCK) > 0) { + clock = CLOCK_MONOTONIC; + } #endif + + if (timer_create(clock, NULL, &timer->timer) != 0) { + goto fail; + } + + // https://github.com/llvm/llvm-project/issues/111847 + sanitize_init(&timer->timer); + + struct itimerspec spec = { + .it_value = *interval, + .it_interval = *interval, + }; + if (timer_settime(timer->timer, 0, &spec, NULL) != 0) { + timer_delete(timer->timer); + goto fail; + } + + timer->legacy = false; + return timer; + } +#endif + +#if BFS_POSIX_TIMERS <= 0 + struct timeval tv = { + .tv_sec = interval->tv_sec, + .tv_usec = (interval->tv_nsec + 999) / 1000, + }; + struct itimerval ival = { + .it_value = tv, + .it_interval = tv, + }; + if (setitimer(ITIMER_REAL, &ival, NULL) != 0) { + goto fail; + } + + timer->legacy = true; + return timer; +#endif + +fail: + free(timer); + return NULL; +} + +void xtimer_stop(struct timer *timer) { + if (!timer) { + return; + } + + if (timer->legacy) { +#if BFS_POSIX_TIMERS <= 0 + struct itimerval ival = {0}; + int ret = setitimer(ITIMER_REAL, &ival, NULL); + bfs_everify(ret == 0, "setitimer()"); +#endif + } else { +#if BFS_POSIX_TIMERS >= 0 + int ret = timer_delete(timer->timer); + bfs_everify(ret == 0, "timer_delete()"); +#endif + } + + free(timer); } diff --git a/src/xtime.h b/src/xtime.h index 75d1f4e..b76fef2 100644 --- a/src/xtime.h +++ b/src/xtime.h @@ -11,73 +11,98 @@ #include <time.h> /** - * localtime_r() wrapper that calls tzset() first. + * mktime() wrapper that reports errors more reliably. * - * @param[in] timep - * The time_t to convert. - * @param[out] result - * Buffer to hold the result. + * @tm[in,out] + * The struct tm to convert and normalize. + * @timep[out] + * A pointer to the result. * @return * 0 on success, -1 on failure. */ -int xlocaltime(const time_t *timep, struct tm *result); +int xmktime(struct tm *tm, time_t *timep); /** - * gmtime_r() wrapper that calls tzset() first. + * A portable timegm(), the inverse of gmtime(). * - * @param[in] timep - * The time_t to convert. - * @param[out] result - * Buffer to hold the result. + * @tm[in,out] + * The struct tm to convert and normalize. + * @timep[out] + * A pointer to the result. * @return * 0 on success, -1 on failure. */ -int xgmtime(const time_t *timep, struct tm *result); +int xtimegm(struct tm *tm, time_t *timep); /** - * mktime() wrapper that reports errors more reliably. + * Parse an ISO 8601-style timestamp. * - * @param[in,out] tm - * The struct tm to convert. - * @param[out] timep + * @str + * The string to parse. + * @result[out] * A pointer to the result. * @return * 0 on success, -1 on failure. */ -int xmktime(struct tm *tm, time_t *timep); +int xgetdate(const char *str, struct timespec *result); /** - * A portable timegm(), the inverse of gmtime(). + * Add to a timespec. + */ +void timespec_add(struct timespec *lhs, const struct timespec *rhs); + +/** + * Subtract from a timespec. + */ +void timespec_sub(struct timespec *lhs, const struct timespec *rhs); + +/** + * Compare two timespecs. * - * @param[in,out] tm - * The struct tm to convert. - * @param[out] timep - * A pointer to the result. * @return - * 0 on success, -1 on failure. + * An integer with the sign of (*lhs - *rhs). */ -int xtimegm(struct tm *tm, time_t *timep); +int timespec_cmp(const struct timespec *lhs, const struct timespec *rhs); /** - * Parse an ISO 8601-style timestamp. + * Update a minimum timespec. + */ +void timespec_min(struct timespec *dest, const struct timespec *src); + +/** + * Update a maximum timespec. + */ +void timespec_max(struct timespec *dest, const struct timespec *src); + +/** + * Convert a timespec to floating point. * - * @param[in] str - * The string to parse. - * @param[out] result - * A pointer to the result. * @return - * 0 on success, -1 on failure. + * The value in nanoseconds. */ -int xgetdate(const char *str, struct timespec *result); +double timespec_ns(const struct timespec *ts); /** - * Get the current time. + * A timer. + */ +struct timer; + +/** + * Start a timer. * - * @param[out] result - * A pointer to the result. + * @interval + * The regular interval at which to send SIGALRM. * @return - * 0 on success, -1 on failure. + * The new timer on success, otherwise NULL. + */ +struct timer *xtimer_start(const struct timespec *interval); + +/** + * Stop a timer. + * + * @timer + * The timer to stop. */ -int xgettime(struct timespec *result); +void xtimer_stop(struct timer *timer); #endif // BFS_XTIME_H |